/*******************************************************************
*                                                                  *
*  MODULE : reduction.c                                            *
*                                                                  *
*  Function: Realization of Reduction Operations                   *
*                                                                  *
*  Attention: only global reduction between all processors         *
*                                                                  *
*******************************************************************/

#include "system.h"

#undef DEBUG

int pos_pid;     /* used global for process_id with minimal/maximal value */

process_reduction (data,size,f_reduction)
char *data;  /* pointer to the data element */
int size;
void (*f_reduction) (); /* reduction operator          */

/*   Communication Patterns

     0    1    2    3    4    5    6    7    8    9   10  
            <-        <-        <-        <-        <-
            <-------            <--------         
            <-----------------
            <-------------------------------------

        brodcast of processor 1 

       <-   --->---->---->---->---->---->---->--->

*/

{ int steph, distance;
  char *hdata; 
  int i, n;
 
  i = pcb.i;
  n = pcb.p;

  if (i != 0) /* host has no contribution */
 
   { /* allocate additional memory on stack for binary operations */

     hdata = (char *) malloc (size);
#ifdef MEIKO_CS2
  ew_touchBuf(hdata, size);
#endif
     distance = 1;
     while (distance < n)    /* log n (base 2) loop */
       { steph = 2*distance;
         if ( ((i-1) % steph) == 0)
            { /* if i+distance exists get a result and combine */
              if ( (i+distance) <= n)
                {  areceive (i,i+distance,hdata,size);
                   f_reduction (data, hdata);
                }
            }
         if ( ((i-1) % steph) == distance)
            asend (i,i-distance,data,size);
         distance = steph;
       }

     free (hdata);
    
   } /* now host is considered again */

  /* process 1 has now the final result */

  general_broadcast (data, size, 1, 1);  /* with host */

  /* now all processes have in data the global result */
}

/*******************************************************************
*                                                                  *
*  same reduction, but saves the position of process id            *
*                                                                  *
*******************************************************************/

process_pos_reduction (data,size,f_reduction)

char *data;  /* pointer to the data element */
int size;
void (*f_reduction) (); /* reduction operator          */

/*   Communication Patterns

     0    1    2    3    4    5    6    7    8    9   10  
            <-        <-        <-        <-        <-
            <-------            <--------         
            <-----------------
            <-------------------------------------

        brodcast of processor 1 

       <-   --->---->---->---->---->---->---->--->

*/

{ int steph, distance;
  char *hdata, *hdata1;
  int i, n;
  int k;      /* index variable */
 
  i = pcb.i;
  n = pcb.p;

  hdata = (char *) malloc (size + 8);  /* 8 bytes for process id */

  if (i != 0) /* host has no contribution */
 
   { /* allocate additional memory on stack for binary operations */

     /* fill up hdata */

     *((int *) hdata) = i;
     for (k=0; k<size; k++)
         hdata[8+k] = data[k];

     hdata1 = (char *) malloc (size + 8);  /* data from other processors */

     distance = 1;
     while (distance < n)    /* log n (base 2) loop */
       { steph = 2*distance;
         if ( ((i-1) % steph) == 0)
            { /* if i+distance exists get a result and combine */
              if ( (i+distance) <= n)
                {  areceive (i,i+distance,hdata1,size+8);
                   f_reduction (hdata, hdata1, hdata+8, hdata1+8);
                }
            }
         if ( ((i-1) % steph) == distance)
            asend (i,i-distance,hdata,size+8);
         distance = steph;
       }

     free (hdata1);
    
   } /* now host is considered again */

  /* process 1 has now the final result */

  general_broadcast (hdata, size+8, 1, 1);  /* with host */

  /* unpack hdata */

     pos_pid = * ((int *) hdata);
     for (k=0; k<size; k++)
         data[k] = hdata[8+k];

     free (hdata);

#ifdef DEBUG
     printf ("Process %d is ready, pos_pid = %d\n", i, pos_pid);
#endif
  /* now all processes have in data, pos_pid the global result */
}

/*******************************************************************
*                                                                  *
*  Import from module cominbers.c                                  *
*                                                                  *
*******************************************************************/

extern void or_bools ();
extern void and_bools ();
extern void neq_bools ();

extern void add_ints ();
extern void mult_ints ();
extern void max_ints ();
extern void min_ints ();
extern void and_ints ();
extern void or_ints ();
extern void eor_ints ();

extern void add_reals ();
extern void mult_reals ();
extern void max_reals ();
extern void min_reals ();

extern void add_doubles ();
extern void mult_doubles ();
extern void max_doubles ();
extern void min_doubles ();

extern void add_complexes ();

extern void max_pos_ints ();
extern void min_pos_ints ();
extern void max_pos_reals ();
extern void min_pos_reals ();
extern void max_pos_doubles ();
extern void min_pos_doubles ();

/*******************************************************************
*                                                                  *
*  FORTRAN - Interface                                             *
*                                                                  *
*******************************************************************/

void dalib_reduction__ (dat, op)
unsigned char *dat;
int *op;
{
  switch (*op) {
  case  1 : process_reduction (dat,4,&min_ints); break;
  case  2 : process_reduction (dat,4,&min_reals); break;
  case  3 : process_reduction (dat,8,&min_doubles); break;
  case  4 : process_reduction (dat,4,&max_ints); break;
  case  5 : process_reduction (dat,4,&max_reals); break;
  case  6 : process_reduction (dat,8,&max_doubles); break;
  case  7 : process_reduction (dat,4,&add_ints); break;
  case  8 : process_reduction (dat,4,&add_reals); break;
  case  9 : process_reduction (dat,8,&add_doubles); break;
  case 10 : process_reduction (dat,4,&mult_ints); break;
  case 11 : process_reduction (dat,4,&mult_reals); break;
  case 12 : process_reduction (dat,8,&mult_doubles); break;
  case 13 : process_reduction (dat,4,&and_ints); break;
  case 14 : process_reduction (dat,4,&or_ints); break;
  case 15 : process_reduction (dat,4,&eor_ints); break;
  case 16 : process_reduction (dat,4,&and_bools); break;
  case 17 : process_reduction (dat,4,&or_bools); break;
  case 18 : process_reduction (dat,4,&neq_bools); break;
  case 19 : process_reduction (dat,8,&add_complexes); break;
  default : printf ("dalib: reduction, illegal op = %d\n", *op); break;
  }
}

void dalib_pos_reduction__ (dat, op)
unsigned char *dat;
int *op;
{
  switch (*op) {
  case  1 : process_pos_reduction (dat,4,&min_pos_ints); break;
  case  2 : process_pos_reduction (dat,4,&min_pos_reals); break;
  case  3 : process_pos_reduction (dat,8,&min_pos_doubles); break;
  case  4 : process_pos_reduction (dat,4,&max_pos_ints); break;
  case  5 : process_pos_reduction (dat,4,&max_pos_reals); break;
  case  6 : process_pos_reduction (dat,8,&max_pos_doubles); break;
  default : printf ("dalib: reduction, illegal op = %d\n", *op); break;
  }
}  /* dalib_pos_reduction_ */

void dalib_loc_exchange__ (data, size)
/* char *data; */
int *data;
int *size;
{ process_broadcast (data, *size, pos_pid); }

