Computing maximum

Given is an array of n distinct positive integers. The task is to compute the maximum of all elements.

The simplest sequential way is as follows:

input int A[_];
output int max = 0;

for (int i = 0; i < A.size; i++)
  if (A[i] > max) max = A[i];
Assuming n is a power of two, we can use the same techinque as for sum to get logarithmic time with linear work as follows:
input int A[_];
int n = A.size;

output int max;

for (int t = 0; 2 ^ t < n; t++) 
  pardo(i : n / 2 ^ (t + 1)) {
    int z = A[2 * i + 1];
    if (A[2 * i] > A[2 * i + 1]) z = A[2 * i];
    A[i] = z;
  }     
    
max = A[0];
Using the cCRCW mode we can have constant time with quadratic work
#mode cCRCW
input int A[_];

int n = A.size;
output int max;

pardo(i:n) {
    int lmax=1;
    pardo (j:n)
      if (A[j]>A[i]) lmax=0;
    if (lmax)
      max=A[i];
}
With the previous version as subprogram, we can use the `square-root-recursion` to get O(log log n) time and O(n log log n) work
#mode cCRCW
int max_sqrt(int A[_]) {
  int n = A.size;
  if (n < 9) return max_const(A);
  int sn = sqrt(n);
  int B[sn];

  pardo(i : sn) {
    int AA[sn];
    pardo(j : sn) AA[j] = 0;
    pardo(j : sn) if (i * sn + j < n) AA[j] = A[i * sn + j];
    B[i] = max_sqrt(AA);
  }
  return max_const(B);
}
Finally, we can use the accelerated cascading technique to obtain linear work while keeping the time O(log log n)
int shorten(int A[_], int iter) {
  int n = 2 ^ log(A.size);
  int Y[n];
  pardo(i : n) Y[i] = -1;
  pardo(i : A.size) Y[i] = A[i];

  for (int t = 0; t < iter; t++) pardo(i : n / 2 ^ (t + 1)) {
      int z;
      if (Y[2 * i] > Y[2 * i + 1])
        z = Y[2 * i];
      else
        z = Y[2 * i + 1];
      Y[i] = z;
    }

  pardo(i : A.size) A[i] = Y[i];
  return n / 2 ^ iter;
}

if (n < 16)
  max = max_const(A);
else {
  int iter = log(log(n));
  int nn = shorten(A, iter);
  int AA[nn];
  pardo(i : nn) AA[i] = A[i];
  max = max_sqrt(AA);
}