/*********************************************************************/
/*                  pC++/Sage++  Copyright (C) 1993                  */
/*  Indiana University  University of Oregon  University of Rennes   */
/*********************************************************************/


/* file: ker_fun.c */

/**********************************************************************/
/*  This file contains the routines called in sets.c that do all cache*/
/*  analysis and estimation routines.                                 */
/**********************************************************************/

#include <stdio.h>
#include "defs.h"
#include "bif.h"
#include "ll.h"
#include "symb.h"
#include "sets.h"

#define PLUS 2
#define ZPLUS 3
#define MINUS 4
#define ZMINUS 5
#define PLUSMINUS 6
#define NODEP -1

#ifdef __SPF
extern void addToCollection(const int line, const char *file, void *pointer, int type);
#endif

extern int show_deps;

void *malloc();
PTR_SETS alloc_sets();
PTR_REFL alloc_ref();
int disp_refl();
PTR_REFL copy_refl();
PTR_REFL union_refl();
int **a_array;
int a_allocd = 0;
int x[20];                      /* a temporary used to compute the vector c */
int c[20];                      /* such that h(c) = dist                    */
int gcd();
int make_induct_list();
int comp_ker();
int find_mults();

int unif_gen(sor, des, vec, troub, source, destin)
int vec[], troub[];
struct ref *sor;
struct ref *des;
struct subscript *source;
struct subscript *destin;
{
  PTR_SYMB sor_ind_l[MAX_NEST_DEPTH], des_ind_l[MAX_NEST_DEPTH];
  struct subscript il_lo[MAX_NEST_DEPTH];
  struct subscript il_hi[MAX_NEST_DEPTH];
  PTR_LLND ll, tl;
  int arr_dim, uniform;
  int v[AR_DIM_MAX];
  int r, i, j, sd, dd, depth;

  /* the a array that is used here is allocated once and used */
  /* again in future calls */

  if (a_allocd == 0) {
      a_allocd = 1;
      a_array = (int **)malloc(MAX_NEST_DEPTH * (sizeof(int *)));
#ifdef __SPF
      addToCollection(__LINE__, __FILE__,a_array, 0);
#endif
      for (i = 0; i < MAX_NEST_DEPTH; i++)
      {
          a_array[i] = (int *)malloc((AR_DIM_MAX + MAX_NEST_DEPTH) * (sizeof(int)));
#ifdef __SPF
          addToCollection(__LINE__, __FILE__,a_array[i], 0);
#endif
      }
  }
  for (i = 0; i < MAX_NEST_DEPTH; i++) {
    sor_ind_l[i] = NULL;
    des_ind_l[i] = NULL;
  }


  dd = make_induct_list(des->stmt, des_ind_l, il_lo, il_hi);
  sd = make_induct_list(sor->stmt, sor_ind_l, il_lo, il_hi);

  depth = (sd < dd) ? sd : dd;

  i = 0;
  while ((i < depth) && (des_ind_l[i] == sor_ind_l[i]))
    i++;
  if (i < depth)
    depth = i;

  arr_dim = 0;
  /* compute the dimension of the array */
  ll = sor->refer;
  if (ll->variant == ARRAY_REF) {
    tl = ll->entry.array_ref.index;
    while (tl != NULL) {
      if ((tl->variant == VAR_LIST) ||
          (tl->variant == EXPR_LIST) ||
          (tl->variant == RANGE_LIST)) {
        tl = tl->entry.list.next;
        arr_dim++;
      }
    }
  }
  uniform = 1;
  for (i = 0; i < arr_dim; i++) {
    if (source[i].decidable != destin[i].decidable)
      uniform = 0;
    v[i] = source[i].offset - destin[i].offset;
    for (j = 0; j < depth; j++)
      if (source[i].coefs[j] != destin[i].coefs[j])
        uniform = 0;
  }
  if (uniform == 1) {
    r = comp_ker(arr_dim, depth, source, a_array, sor_ind_l, v, vec, troub);
  }
  /* else if (show_deps) fprintf(stderr, "not uniform\n"); */
  return (uniform);

}

/* comp_ker is a function that takes the matrix "h" associated with        */
/* a uniformly generated (potential) dependence and a offest vector "dist" */
/* and computes the distance vector "vec" and a trouble vector "troub"     */
/* the matrix is associated with the access function of an array reference */
/* where the array is of dimension "adim" and the depth of nesting is      */
/* depth.  The "a" array is a matrix that is allocated by the caller and   */
/* upon return contains a factorization of "h".  The array is "depth" rows */
/* by dept+adim columns but is viewed as its transpose mathematically.     */
/* It should be allocated as MAX_NEST_DEPTH by AR_DIM_MAX+MAX_NEST_DEPTH   */
/* In other words "a" is first initialized as
             
                  |<- depth ->|
           -------|           |
            ^     |           |
           adim   |    h      |
            v     |           |
           -------|-----------|  where rows in C are columns.
            ^     |           |
           depth  |    I      |
            v     |           |
           --------------------

   A factoriation takes place which converts this to the form where the
h component is now the matrix L and the Identity block I is now a square
matrix B such that
             L = hB

and L is lower triangular and B and L  are integer valued.  

What this means is that
if dist = Lx, for some x then let c be such that c = Bx and we have 
dist = Lx = hBx = hc.  (note x and c are global and returned by side effect.)
and c is the distance vector.

Furturemore, comp_ker returns the dimension of ker(h) and the right hand
dim(ker(h)) columns of B form a basis of the kernel.

*/

  
int comp_ker(adim, depth, sa, a, sor_ind_l, dist, vec, troub)
int adim, depth;
struct subscript *sa;
int **a;
PTR_SYMB sor_ind_l[];
int dist[];
int vec[], troub[];
{
  int i, j, k, piv_row, piv_col, cols_done, m, mval, cur_x;
  int nosolution;
  int p, q, r, s, z;
  int *tmp;

  sor_ind_l = sor_ind_l;        /* make lint happy, sor_ind_l not used */

  /* h components in first adim rows of matrix */
  for (i = 0; i < adim; i++) {
    for (j = 0; j < depth; j++)
      a[j][i] = sa[i].coefs[j];
  }

  /* depth by depth square identity in second block of matrix */
  for (i = adim; i < adim + depth; i++) {
    for (j = 0; j < depth; j++)
      if ((i - adim) == j)
        a[j][i] = 1;
      else
        a[j][i] = 0;
  }
  /* if(show_deps) print_a_arr(adim+depth,depth); */
  /* The following is a factorization of the  array H from the */
  /* function h (stored as the upper part of a ) into a lower  */
  /* triangluar matrix L and a matrix B such that L = HB          */
  /* now do column operations to reduce top to lower triangular */
  /* remember that a is transposed to use pointers for columns */
  /* for each row ... */
  cols_done = 0;
  for (i = 0; i < adim; i++) {
    piv_row = i;
    piv_col = cols_done;
    while ((a[piv_col][piv_row] == 0) && (piv_col < depth))
      piv_col++;
    if (piv_col < depth) {
      m = piv_col;
      mval = a[m][piv_row];
      mval = mval * mval;
      k = 0;
      /* pick min non-zero term on row to right of cols_done */
      for (j = cols_done; j < depth; j++)
        if ((a[j][piv_row] != 0) &&
            ((a[j][piv_row] * a[j][piv_row]) < mval)) {
          m = j;
          mval = a[j][piv_row] * a[j][piv_row];
        }
      /* now move col m to col cols_done */
      tmp = a[m];
      a[m] = a[cols_done];
      a[cols_done] = tmp;
      /* now eliminate rest of row */
      for (j = cols_done + 1; j < depth; j++)
        if (a[j][piv_row] != 0) {
          find_mults(a[cols_done][piv_row],
                     a[j][piv_row], &p, &q, &r, &s);
          for (k = 0; k < adim + depth; k++) {
            z = a[cols_done][k] * p + a[j][k] * q;
            a[j][k] = a[cols_done][k] * r
              + a[j][k] * s;
            a[cols_done][k] = z;
          }
          if (a[cols_done][piv_row] == 0) {
            tmp = a[j];
            a[j] = a[cols_done];
            a[cols_done] = tmp;
          }
        }
      cols_done++;
    }
  }
  /* reduce system by gcd of each column */
  for (j = 0; j < depth; j++) {
    z = gcd(depth + adim, a[j]);
    if (z != 1 && z != 0) {
      for (k = 0; k < adim + depth; k++)
        a[j][k] = a[j][k] / z;
    }
  }

  /* now back solve for x in dist = Lx */
  nosolution = 0;
  cur_x = 0;
  for (j = 0; (j < adim && cur_x < depth); j++) {
    z = 0;
    for (k = 0; k < cur_x; k++)
      z = z + a[k][j] * x[k];
    if (a[cur_x][j] == 0) {
      if (z != dist[j]) {
        nosolution = 1;
      }
      /* this equation is consistent, so skip it */
    }
    else {
      r = (dist[j] - z) / a[cur_x][j];
      if (r * a[cur_x][j] != dist[j] - z) {
        nosolution = 1;
      }
      x[cur_x] = r;
      cur_x++;
    }
  }
  for (j = cur_x; j < depth; j++) x[j] = 0;


  /* the following is a double check on the solution */

  for (j = 0; j < adim; j++) {
    z = 0;
    for (k = 0; k < depth; k++)
      z = z + a[k][j] * x[k];
    if (z != dist[j])
      nosolution = 1;
  }
  /* if there is no solution then there is no dependence! */
  if (nosolution) {
    troub[0] = 1;
    return (depth - cols_done);
  }
  /* because L = HB where B is the lower block of a       */
  /* and dist = Lx  we have dist = HBx, so if c = Bx, dist = Hc    */
  for (j = 0; j < depth; j++) {
    c[j] = 0;
    for (k = 0; k < depth; k++)
      c[j] = c[j] + a[k][j + adim] * x[k];
  }
  /* to compute vec and troub, we start by setting */
  /* vec to c.  (if ker(h) =0) we are done then  */
  for (j = 0; j < depth; j++)
    vec[j + 1] = c[j];
  /* we now modify by the leading terms of the ker basis */
  for (j = cols_done; j < depth; j++) {
    /* find leading non-zero */
    z = -1;
    for (k = 0; k < depth; k++)
      if (z == -1 && a[j][k + adim] != 0)
        z = k;
    if (z > -1) {
      troub[z + 1] = PLUS;
    }
  }
  z = 100;
  for (j = 1; j < depth + 1; j++) {
    if (troub[j] == PLUS || vec[j] > 0)
      z = j;
    if (troub[j] != PLUS && vec[j] < 0 && z == 100) {
      troub[0] = 1;
      /* fprintf(stderr, " reject - wrong direction \n"); */
      return (depth - cols_done);
    }
    if (z < j && troub[j] == PLUS && vec[j] < 0)
      troub[j] = ZPLUS;
  }

  /* print_a_arr(adim+depth,depth); */
  return (depth - cols_done);
}

static int myabs(x)
int x;
{
  if (x < 0)
    return (-x);
  else
    return (x);
}

int eval_h(c, depth, i, val)
int c[];
int depth, i, val;
{
  depth = depth;                /* make lint happy, depth unused */

  return (c[i] * val);
}

int find_mults(a, b, p1, q1, r1, s1)
int a, b;
int *p1;
int *q1;
int *r1;
int *s1;
{
  /* upon return :      a*p+b*q  or a*r+b*s is 0 */
  int p, q, r, s, olda, oldb;

  olda = a;
  oldb = b;
  p = 1;
  q = 0;
  r = 0;
  s = 1;
  while (a * b != 0) {
    if (a == b) {
      r = r - p;
      s = s - q;
      b = 0;
    }
    else if (a == -b) {
      r = r + p;
      s = s + q;
      b = 0;
    }
    else if (myabs(a) < myabs(b)) {
      if (a * b > 0) {          /* same sign */
        r = r - p;
        s = s - q;
        b = b - a;
      }
      else {
        r = r + p;
        s = s + q;
        b = b + a;
      }
    }
    else {
      if (a * b > 0) {
        p = p - r;
        q = q - s;
        a = a - b;
      }
      else {
        p = p + r;
        q = q + s;
        a = a + b;
      }
    }
  }                             /* end while */

  if ((a != (olda * p + oldb * q)) || (b != (olda * r + oldb * s)))
    fprintf(stderr, " reduce failed!\n");
  *p1 = p;
  *q1 = q;
  *r1 = r;
  *s1 = s;
return 1;
}

void print_a_arr(rows, cols)
int rows, cols;
{
  int i, j;
  for (i = 0; i < rows; i++) {
    fprintf(stderr, "    | ");
    for (j = 0; j < cols; j++) {
      fprintf(stderr, " %d ", a_array[j][i]);
      if (j == cols - 1)
        fprintf(stderr, "        |\n");
    }
  }
}