/**************************************************************************
*                                                                         *
*  Author      : Falk Zimmermann, GMD, I1.HR                              *
*  Copyright   : GMD St. Augustin, Germany                                *
*  Date        : Jun 93                                                   *
*  Last Update : Jun 93                                                   *
*                                                                         *
*  This Module is part of the DALIB                                       *
*                                                                         *
*  Module      : transpose1.c                                             *
*                                                                         *
*  Function    : Transposing of a 2-dimensional distributed array         *
*                                                                         *
*  Export      : dalib_transpose ()                                       *
*                                                                         *
*  void dalib_transpose (target, source, N1, N2, size)                    *
*  unsigned char *source, *target;                                        *
*  int *N1, *N2;                                                          *
*  int *size;                                                             *
*                                                                         *
*                                                                         *
**************************************************************************/

# undef DEBUG

#include "system.h"

# define DIM 4       /* maximal supported dimension */


void dalib_transpose__ (target, source, N1, N2, size)
unsigned char *source, *target;
int *N1, *N2;
int *size;

{
  int lb1,lb2,ub1,ub2,ofstar,ofstar2,ofssou,ofssou2,buffsize;
  int i,j,k,n,m,my_num,my_n,my_m,temp,anz;

/*-------------------------------------------------------------------------*
 *                     Evaluation of the local bounds                      *
 *-------------------------------------------------------------------------*/


  my_num = dalib_pid_ ();

  dalib_local_extensions (my_num,*N1,&lb1,&ub1);
  dalib_local_extensions (my_num,*N2,&lb2,&ub2);

  my_n = ub1 - lb1;
  my_m = ub2 - lb2;

/*-------------------------------------------------------------------------*
 *               Transposition of the local array elements                 *
 *                                                                         *
 * within the constraints of block-distribution the local elements are     *
 * placed always along the main-diagonal in submatrices of size (n+1 x m+1)*
 *                                                                         *
 * ofssou bzw. ofstar describe the individual offset to the local elements *
 * of the source- and targetarray, respectively.                           *
 *-------------------------------------------------------------------------*/

  ofssou2 = *N2 * *size;
  ofstar2 = *N1 * *size;

  for (j=0;j<=my_m;j++)
    {  ofssou = (lb2-1+j) * *size;
       ofstar = (lb1-1) * *size + j * ofstar2;

       for (k=0;k<=my_n;k++)
         dalib_memcpy (target+k* *size+ofstar,source+ofssou2*k+ofssou,*size);
    }

/*-------------------------------------------------------------------------*
 *               Transposition of the global array elements                *
 *                                                                         *
 * Transposition requires communication between all processes;             *
 *                                                                         *
 * while sending global elements a buffer is utilized (non-contingious     *
 * section) in which the elements are copied already in transposed order;  *
 * the only task of the receiving part is to copy the contents of the      *
 * buffer in chunks of the targetarray's columns to the corresponding      *
 * target-adresses!                                                        *
 *-------------------------------------------------------------------------*/

                              /* SENDING BUFFER */

  for (anz=1;anz<=pcb.p;anz++)
     {  if (anz != my_num)
         { dalib_local_extensions (anz, *N2, &lb2,&ub2);
            m = ub2 - lb2;

           buffsize = (my_n+1) * (m+1) * *size;

           dalib_create_buffer (buffsize, 0);

           for (j=0;j<=m;j++)
             { ofssou = (j+lb2-1) * *size;
        for (k=0;k<=my_n;k++)
          dalib_fill_buffer (source+ofssou+(k* *N2)* *size,*size);
             }
           dalib_send_buffer (anz);
           dalib_destroy_buffer ();
         }
      }

                              /* RECEIVING BUFFER */

  for (anz=1;anz<=pcb.p;anz++)
    { if ((my_num != anz))
       { dalib_local_extensions (anz,*N1,&lb1,&ub1);
         n = ub1 - lb1;

         buffsize = (my_m+1) * (n+1) * *size;

         dalib_create_buffer (buffsize,1);
         dalib_recv_buffer (anz);

         for (j=0;j<=my_m;j++)
             dalib_get_buffer (target+(lb1-1+j* *N1)* *size, *size *(n+1));

         dalib_destroy_buffer ();
       }
    }
}



