LinuxQuestions.org

LinuxQuestions.org (/questions/)
-   Programming (https://www.linuxquestions.org/questions/programming-9/)
-   -   C/Fotran hybrid code using MPI/ScaLAPACK seg faults in a weird way (https://www.linuxquestions.org/questions/programming-9/c-fotran-hybrid-code-using-mpi-scalapack-seg-faults-in-a-weird-way-4175454908/)

ejspeiro 03-20-2013 05:37 PM

C/Fotran hybrid code using MPI/ScaLAPACK seg faults in a weird way
 
Hello,

As many of you have probably read, I managed to get this C/Fortran interaction problem kind of solved in a previous thread.

As I mentioned in that last thread, I am trying to translate this CODE from Fortran into C.

This is what I have so far:

Code:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>

#include "mpi.h"

#define PRINT_NOTHING            0
#define PRINT_UP_TO_MESGS        1
#define PRINT_UP_TO_ARRAYS        2
#define PRINT_UP_TO_MATRICES      3
#define PRINT_LEVEL              PRINT_UP_TO_MATRICES

/*! Parameters: */
#define TOTMEM                    425053208
#define INTMEM                    13107200
#define NTESTS                    20
#define DLEN_                    9
#define DBLESZ                    8              /*! Size of a DOUBLE PRE. */
#define MEMSIZ                    425053208/8    /*! Memory for DOUBLE PRE. */

#define MASTER                    0
#define MPI_ERROR_CODE_ERROR_GRID 1
#define MPI_ERROR_CODE_NOT_FACT  2
#define MPI_ERROR_CODE_NOT_SOLVE  3

#define BLACS_USER_WONT_FIN_MPI  0
#define BLACS_USER_WILL_FIN_MPI  1

#define DESCRIPTOR_SIZE          7

typedef enum {
  FALSE,
  TRUE
 
} logical;

/*! External subroutines: */
extern void Cblacs_barrier  (int context,
                            char* scope);
extern void Cblacs_exit    (int doneflag);
extern void Cblacs_get      (int ,
                            int ,
                            int *context);
extern void Cblacs_gridexit (int context);
extern void Cblacs_gridinfo (int context,
                            int *our_nprow,
                            int *our_npcol,
                            int *my_prow,
                            int *my_pcol);
extern void Cblacs_gridinit (int* context,
                            char* order,
                            int nproc_rows,
                            int nproc_cols);
extern void Cblacs_pinfo    (int *my_blacs_pid,
                            int *our_blacs_nprocs);

/* http://www.netlib.org/scalapack/explore-html/dd/d22/descinit_8f.html */
extern void descinit_      (int *desc,
                            int *m,
                            int *n,
                            int *mb,
                            int *nb,
                            int *irsrc,
                            int *icsrc,
                            int *ictxt,
                            int *lld,
                            int *info);
/* http://www.netlib.org/scalapack/explore-html/dc/d44/igsum2d___8c.html */
extern void igsum2d_        (int *contxt,
                            char *scope,
                            char *top,
                            int *m,
                            int *n,
                            int *a,
                            int *lda,
                            int *rdest,
                            int *cdest);
/* http://www.netlib.org/scalapack/explore-html/db/da1/pdbmatgen_8f.html */
extern void pdbmatgen_      (int *ictxt,
                            char *aform,
                            char *aform2,
                            int *bwl,
                            int *bwu,
                            int *n,
                            int *mb,
                            int *nb,
                            double *a,
                            int *lda,
                            int *iarow,
                            int *iacol,
                            int *iseed,
                            int *myrow,
                            int *mycol,
                            int *nprow,
                            int *npcol);
/* http://www.netlib.org/scalapack/explore-html/d8/dba/pdchekpad_8f.html */
extern void pdchekpad_      (int * ictxt,
                            char *mess,
                            int *m,
                            int *n,
                            double *a,
                            int *lda,
                            int *ipre,
                            int *ipost,
                            double *chkval);
/* http://www.netlib.org/scalapack/explore-html/d2/dc2/pddblaschk_8f.html */
extern void pddblaschk_    (char *symm,
                            char *uplo,
                            char *trans,
                            int *n,
                            int *bwl,
                            int *bwu,
                            int *nrhs,
                            double *x,
                            int *ix,
                            int *jx,
                            int *descx,
                            int *iaseed,
                            double *a,
                            int *ia,
                            int *ja,
                            int *desca,
                            int *ibseed,
                            double *anorm,
                            double *resid,
                            double *work,
                            int *worksiz);
/* http://www.netlib.org/scalapack/explore-html/d6/d3b/pdfillpad_8f.html */
extern void pdfillpad_      (int *ictxt,
                            int *m,
                            int *n,
                            double *a,
                            int *lda,
                            int *ipre,
                            int *ipost,
                            double *chkval);
/* http://www.netlib.org/scalapack/explore-html/d0/d2b/pdgbinfo_8f.html */
extern void pdgbinfo_      (char *summry,
                            int *nout,
                            char *trans,
                            int *nmat,
                            int *nval,
                            int *ldnval,
                            int *nbw,
                            int *bwlval,
                            int *bwuval,
                            int *ldbwval,
                            int *nnb,
                            int *nbval,
                            int *ldnbval,
                            int *nnr,
                            int *nrval,
                            int *ldnrval,
                            int *nnbr,
                            int *nbrval,
                            int *ldnbrval,
                            int *ngrids,
                            int *pval,
                            int *ldpval,
                            int *qval,
                            int *ldqval,
                            float *thresh,
                           
                            double *work,  /* Array to gather and bcast. */
                           
                            int *iam,
                            int *nprocs);
/* http://www.netlib.org/scalapack/explore-html/dd/dad/pdgbtrf_8f.html */
extern void pdgbtrf_        (int *n,
                            int *bwl,
                            int *bwu,
                            double *a,
                            int *ja,
                            int *desca,
                            int *ipiv,
                            double *af,
                            int *laf,
                            double *work,
                            int *lwork,
                            int *info);
/* http://www.netlib.org/scalapack/explore-html/d9/dda/pdgbtrs_8f.html */
extern void pdgbtrs_        (char *trans,
                            int *n,
                            int *bwl,
                            int *bwu,
                            int *nrhs,
                            double *a,
                            int *ja,
                            int *desca,
                            int *ipiv,
                            double *b,
                            int *ib,
                            int *descb,
                            double *af,
                            int *laf,
                            double *work,
                            int *lwork,
                            int *info);
/*
http://www.netlib.org/scalapack/explore-html/d2/d66/_e_i_g_2pdmatgen_8f.html
*/
extern void pdmatgen_      (int *ictxt,
                            char *aform,
                            char *diag,
                            int *m,
                            int *n,
                            int *mb,
                            int *nb,
                            double *a,
                            int *lda,
                            int *iarow,
                            int *iacol,
                            int *iseed,
                            int *iroff,
                            int *irnum,
                            int *icoff,
                            int *icnum,
                            int *myrow,
                            int *mycol,
                            int *nprow,
                            int *npcol);
/* http://www.netlib.org/scalapack/explore-html/d2/dcd/sltimer_8f_source.html */
extern void slboot_        (void);
extern void slcombine_      (int *ictxt,
                            char *scope,
                            char *op,
                            char *timetype,
                            int *n,
                            int *ibeg,
                            double *times);
extern void sltimer_        ( int *i );

/*! External functions: */
/* http://www.netlib.org/scalapack/explore-html/d4/d48/numroc_8f.html */
extern int numroc_      (int *n,
                        int *nb,
                        int *iproc,
                        int *isrcproc,
                        int *nprocs);
/* http://www.netlib.org/scalapack/explore-html/df/dee/tools_8f.html#a67ae4efe5110e3297b1e9e3a46d8c78b */
extern logical lsame_  (char *ca,
                        char *cb);
/* http://www.netlib.org/scalapack/explore-html/db/dd0/pdlange_8f.html */
extern double pdlange_  (char *norm,
                        int *m,
                        int *n,
                        double *a,
                        int *ia,
                        int *ja,
                        int *desca,
                        double *work );

/*! Intrinsic functions: */
/*      INTRINSIC          DBLE, MAX, MIN, MOD */
inline double DBLE(int xx) { return (double) xx; }
inline double MAX(double xx, double yy) { return xx >= yy? xx: yy; }
inline double MIN(double xx, double yy) { return xx <= yy? xx: yy; }
inline int MOD(int xx, int yy) { return xx%yy; }

/*! PROGRAM PDGBDRIVER: */
int main (int argc, char **argv) {

  /*! Parameters: */
  int ntests = NTESTS;  /* Number of num. tests to be performed */
  //  int      BLOCK_CYCLIC_2D = 1;
  //  int      DTYPE_ = 1;
  //  int      CTXT_ = 2;
  //  int      M_ = 3;
  //  int      N_ = 4;
  //  int      MB_ = 5;
  //  int      NB_ = 6;
  //  int      RSRC_ = 7;
  //  int      CSRC_ = 8;
  //  int      LLD_ = 9;
  //
  //  double    ZERO = 0.0;
  //  double    PADVAL = -9923.0;
  //
  //  int      INT_ONE = 1;
 
  /*! Local scalars: */
  logical  CHECK;        /* Should or shouldn't I perform the num. tests? */
  char      TRANS;        /* Should I transpose the matrix? */
  //  char      PASSED[6];
  char      OUTFILE[80];  /* ? */
  //  int      BWL;
  //  int      BWU;
  //  int      BW_NUM;
  //  int      FILLIN_SIZE;
  //  int      FREE_PTR;
  //  int      H;
  //  int      HH;
  int      I;          /* Iterator per process grid. */
  int      IAM;          /* My MPI process ID. */
  int      IASEED;      /* Seed for random matrix A creation. */
  int      IBSEED;      /* Seed for random matrix B creation. */
  //  int      ICTXT;
  //  int      ICTXTB;
  //  int      IERR_TEMP;
  //  int      IMIDPAD;
  //  int      INFO;
  //  int      IPA;
  //  int      IPB;
  //  int      IPOSTPAD;
  //  int      IPREPAD;
  //  int      IPW;
  //  int      IPW_SIZE;
  //  int      IPW_SOLVE;
  //  int      IPW_SOLVE_SIZE;
  //  int      IP_DRIVER_W;
  //  int      IP_FILLIN;
  //  int      J;
  //  int      K;
 
  /*! Data statements: */
  //  int      KFAIL = 4;
  //  int      KPASS = 4;
  //  int      KSKIP = 4;
  //  int      KTESTS = 4;
 
  /*! Local scalars: (continued) */
  //  int      MYCOL;
  //  int      MYRHS_SIZE;
  //  int      MYROW;
  //  int      N;
  //  int      NB;
  int      NBW;    /* Number of bandwith values per matrix. */
  int      NGRIDS; /* Number of process grids to try for. */
  int      NMAT;  /* Number of matrices  */
  int      NNB;    /* Number of sizes of NB (block column size). */
  int      NNBR;
  int      NNR;
  int      NOUT;  /* Device out... o.O */
  //  int      NP;
  int      NPCOL;  /* Procs per row in processors grid/ */
  int      NPROCS; /* The size of OUR communicator. */
  //  int      NPROCS_REAL;
  int      NPROW;  /* Procs per row in processors grid/ */
  //  int      NQ;
  //  int      NRHS;
  //  int      N_FIRST;
  //  int      N_LAST;
  //  int      WORKSIZ;
 
  float    THRESH; /* Threshold variable for numerical tests. */
 
  //  double    ANORM;
  //  double    NOPS;
  //  double    NOPS2;
  //  double    SRESID;
  //  double    TMFLOPS;
  //  double    TMFLOPS2;
 
  /*! Local arrays: */
  //  int      IPIV[INTMEM];
  int      BWLVAL[NTESTS];  /* Values of lower diagonals per matrix. */
  int      BWUVAL[NTESTS];  /* Values of upper diagonals per matrix. */
  //  int      DESCA[7];
  //  int      DESCA2D[DLEN_];
  //  int      DESCB[7];
  //  int      DESCB2D[DLEN_];
  //  int      IERR[1];
  int      NBRVAL[NTESTS]; /* ? */
  int      NBVAL[NTESTS];  /* Column sizes per matrix. */
  int      NRVAL[NTESTS];  /* ? */
  int      NVAL[NTESTS];  /* Values of N for a given matrix. */
  int      PVAL[NTESTS];  /* Values for proc. rows. */
  int      QVAL[NTESTS];  /* Values for proc. cols. */
 
  //  double    CTIME[2];
  double    *MEM;      /* GLOABL array for broadcasting the information. */
  //  double    WTIME[2];

  /*! Executable statements: */

  /*! Get starting information: */
  Cblacs_pinfo(&IAM, &NPROCS);
  IASEED = 100;
  IBSEED = 200;

  MEM = (double *) malloc(MEMSIZ);
 
  pdgbinfo_(OUTFILE, &NOUT, &TRANS, &NMAT, NVAL, &ntests, &NBW,
            BWLVAL, BWUVAL, &ntests, &NNB, NBVAL, &ntests, &NNR,
            NRVAL, &ntests, &NNBR, NBRVAL, &ntests, &NGRIDS, PVAL,
            &ntests, QVAL, &ntests, &THRESH, MEM, &IAM, &NPROCS);

  CHECK = (THRESH >= 0.0f);

  /*! Print headings: */
  fprintf(stdout, "\n");
  fprintf(stdout, "TIME TR      N  BWL BWU    NB  NRHS    P    Q L*U Time Slv Time  MFLOPS  MFLOP2  CHECK\n");
  fprintf(stdout, "---- -- ------  --- ---  ---- ----- ---- ---- -------- -------- -------- -------- ------\n");
  fprintf(stdout, "\n");
  fflush(stdout);

  /*! Loop over different process grids: */
  for (I = 1; I <= NGRIDS; I++) {

    NPROW = PVAL[I - 1];
    NPCOL = QVAL[I - 1];

    fprintf(stdout, "Solving for a %d x %d grid!\n", NPROW, NPCOL);
  }

  free(MEM);
  Cblacs_exit(0);
  exit(EXIT_SUCCESS);
}

Please excuse the amount of commented-out variables, but like I said, I am basically translating the reference driver, from scratch.

My problem is that it suddenly started seg-faulting, as soon as I wrote the for loop... it was working before. That is a clear sign of a funky memory manipulation issue, which probably arises from issues related to the memory management differences between C and Fortran.

At least, that is my guess.

Does anybody has a clue on what may be going wrong?

Thanks in advanced!

ejspeiro 03-20-2013 06:17 PM

This is what I mean with "weird": I deleted the loop, and added printing statement for the variable MEM:

Code:

...
  pdgbinfo_(OUTFILE, &NOUT, &TRANS, &NMAT, NVAL, &ntests, &NBW,
            BWLVAL, BWUVAL, &ntests, &NNB, NBVAL, &ntests, &NNR,
            NRVAL, &ntests, &NNBR, NBRVAL, &ntests, &NGRIDS, PVAL,
            &ntests, QVAL, &ntests, &THRESH, MEM, &IAM, &NPROCS);

  fprintf(stdout, "MEM 1 = %g\n", MEM[0]);
  fprintf(stdout, "MEM 2 = %g\n", MEM[1]);
  fprintf(stdout, "MEM 3 = %g\n", MEM[2]);
  fprintf(stdout, "MEM 4 = %g\n", MEM[3]);
  fprintf(stdout, "MEM 5 = %g\n", MEM[4]);
  fprintf(stdout, "MEM 6 = %g\n", MEM[5]);
  fprintf(stdout, "MEM 7 = %g\n", MEM[6]);

//  CHECK = (THRESH >= 0.0f);
//
//  /*! Print headings: */
//  fprintf(stdout, "\n");
//  fprintf(stdout, "TIME TR      N  BWL BWU    NB  NRHS    P    Q L*U Time Slv Time  MFLOPS  MFLOP2  CHECK\n");
//  fprintf(stdout, "---- -- ------  --- ---  ---- ----- ---- ---- -------- -------- -------- -------- ------\n");
//  fprintf(stdout, "\n");
//  fflush(stdout);

  /*! Loop over different process grids: */
//  for (I = 1; I <= NGRIDS; I++) {
//
//    NPROW = PVAL[I - 1];
//    NPCOL = QVAL[I - 1];
//
//    fprintf(stdout, "Solving for a %d x %d grid!\n", NPROW, NPCOL);
//  }

  free(MEM);
  Cblacs_exit(0);
  exit(EXIT_SUCCESS);
}

And it now works...

Code:

[ejspeiro@node01 scalapack-ex07]$ make clean; make; make run np=1
rm -f *.o blogs
clear
gcc -I/opt/intel/impi/3.2.0.011/include64/ -c -DAdd_ -g -Wall -Werror blogs.c
gfortran -g -o blogs blogs.o /home/ejspeiro/libraries/scalapack-2.0.2/TESTING/LIN/pdgbinfo.o /home/ejspeiro/libraries/scalapack-2.0.2/libscalapack.a /home/ejspeiro/libraries/lapack-3.4.1/liblapack.a \
        /home/ejspeiro/libraries/BLAS/libblas.a -L/opt/intel/impi/3.2.0.011/lib64 -lmpi -lmpigf -lmpigi -lrt -lpthread -ldl
mpirun -r ssh -f mpd.hosts -np 1 `pwd`/blogs
SCALAPACK banded linear systems.
'MPI machine'                                                                 

Tests of the parallel real double precision band matrix solve
The following scaled residual checks will be computed:
 Solve residual        = ||Ax - b|| / (||x|| * ||A|| * eps * N)
 Factorization residual = ||A - LU|| / (||A|| * eps * N)
The matrix A is randomly generated for each test.

An explanation of the input/output parameters follows:
TIME    : Indicates whether WALL or CPU time was used.
N      : The number of rows and columns in the matrix A.
bwl, bwu      : The number of diagonals in the matrix A.
NB      : The size of the column panels the matrix A is split into. [-1 for default]
NRHS    : The total number of RHS to solve for.
NBRHS  : The number of RHS to be put on a column of processes before going
          on to the next column of processes.
P      : The number of process rows.
Q      : The number of process columns.
THRESH  : If a residual value is less than THRESH, CHECK is flagged as PASSED
Fact time: Time in seconds to factor the matrix
Sol Time: Time in seconds to solve the system.
MFLOPS  : Rate of execution for factor and solve using sequential operation count.
MFLOP2  : Rough estimate of speed using actual op count (accurate big P,N).

The following parameter values will be used:
  N    :            12
  bwl  :            4
  bwu  :            3
  NB  :            -1
  NRHS :            1
  NBRHS:            1
  P    :            1
  Q    :            1

Relative machine precision (eps) is taken to be      0.111022E-15
Routines pass computational tests if scaled residual is less than  3.0000   
MEM 1 = 8.48798e-314
MEM 2 = nan
MEM 3 = 2.122e-314
MEM 4 = 2.122e-314
MEM 5 = 0
MEM 6 = 0
MEM 7 = 0
[ejspeiro@node01 scalapack-ex07]$


ejspeiro 03-20-2013 08:54 PM

Update
 
Here's the code for the PDGBINFO Fortran subroutine:

Code:

      SUBROUTINE PDGBINFO( SUMMRY, NOUT, TRANS, NMAT, NVAL, LDNVAL, NBW,
    $                    BWLVAL, BWUVAL, LDBWVAL, NNB, NBVAL, LDNBVAL,
    $                    NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL,
    $                    NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH,
    $                    WORK, IAM, NPROCS )
*
*
*
*  -- ScaLAPACK routine (version 1.7) --
*    University of Tennessee, Knoxville, Oak Ridge National Laboratory,
*    and University of California, Berkeley.
*    November 15, 1997
*
*    .. Scalar Arguments ..
      CHARACTER          TRANS
      CHARACTER*(*)      SUMMRY
      INTEGER            IAM,
    $                  LDBWVAL, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL,
    $                  LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, NNBR, NBW,
    $                  NPROCS, NNR, NOUT
      REAL              THRESH
*    ..
*    .. Array Arguments ..
      INTEGER            NBRVAL( LDNBRVAL ), NBVAL( LDNBVAL ),
    $                  NRVAL( LDNRVAL ), NVAL( LDNVAL ),
    $                  BWLVAL( LDBWVAL),BWUVAL( LDBWVAL),
    $                  PVAL( LDPVAL ), QVAL(LDQVAL), WORK( * )
*    ..
*
*  Purpose
*  =======
*
*  PDGBINFO get needed startup information for band factorization
*  and transmits it to all processes.
*
*  Arguments
*  =========
*
*  SUMMRY  (global output) CHARACTER*(*)
*          Name of output (summary) file (if any). Only defined for
*          process 0.
*
*  NOUT    (global output) INTEGER
*          The unit number for output file. NOUT = 6, ouput to screen,
*          NOUT = 0, output to stderr.  Only defined for process 0.
*
*
*  NMAT    (global output) INTEGER
*          The number of different values that can be used for N.
*
*  NVAL    (global output) INTEGER array, dimension (LDNVAL)
*          The values of N (number of columns in matrix) to run the
*          code with.
*
*  NBW      (global output) INTEGER
*          The number of different values that can be used for @bw@.
*  BWLVAL  (global output) INTEGER array, dimension (LDNVAL)
*          The values of BWL (number of subdiagonals in matrix) to run
*          the code with.
*  BWUVAL  (global output) INTEGER array, dimension (LDNVAL)
*          The values of BW (number of supdiagonals in matrix) to run
*          the code with.
*
*  LDNVAL  (global input) INTEGER
*          The maximum number of different values that can be used for
*          N, LDNVAL > =  NMAT.
*
*  NNB      (global output) INTEGER
*          The number of different values that can be used for NB.
*
*  NBVAL    (global output) INTEGER array, dimension (LDNBVAL)
*          The values of NB (blocksize) to run the code with.
*
*  LDNBVAL  (global input) INTEGER
*          The maximum number of different values that can be used for
*          NB, LDNBVAL >= NNB.
*
*  NNR      (global output) INTEGER
*          The number of different values that can be used for NRHS.
*
*  NRVAL    (global output) INTEGER array, dimension(LDNRVAL)
*          The values of NRHS (# of Right Hand Sides) to run the code
*          with.
*
*  LDNRVAL  (global input) INTEGER
*          The maximum number of different values that can be used for
*          NRHS, LDNRVAL >= NNR.
*
*  NNBR    (global output) INTEGER
*          The number of different values that can be used for NBRHS.
*
*  NBRVAL  (global output) INTEGER array, dimension (LDNBRVAL)
*          The values of NBRHS (RHS blocksize) to run the code with.
*
*  LDNBRVAL (global input) INTEGER
*          The maximum number of different values that can be used for
*          NBRHS, LDNBRVAL >= NBRVAL.
*
*  NGRIDS  (global output) INTEGER
*          The number of different values that can be used for P & Q.
*
*  PVAL    (global output) INTEGER array, dimension (LDPVAL)
*          Not used (will be returned as all 1s) since proc grid is 1D
*
*  LDPVAL  (global input) INTEGER
*          The maximum number of different values that can be used for
*          P, LDPVAL >= NGRIDS.
*
*  QVAL    (global output) INTEGER array, dimension (LDQVAL)
*          The values of Q (number of process columns) to run the code
*          with.
*
*  LDQVAL  (global input) INTEGER
*          The maximum number of different values that can be used for
*          Q, LDQVAL >= NGRIDS.
*
*  THRESH  (global output) REAL
*          Indicates what error checks shall be run and printed out:
*            = 0 : Perform no error checking
*            > 0 : report all residuals greater than THRESH, perform
*                  factor check only if solve check fails
*
*  WORK    (local workspace) INTEGER array of dimension >=
*          MAX( 8, LDNVAL+2*LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL
*    $              +3*LDNVAL)
*          Used to pack input arrays in order to send info in one
*          message.
*
*  IAM      (local input) INTEGER
*          My process number.
*
*  NPROCS  (global input) INTEGER
*          The total number of processes.
*
* ======================================================================
*
* Note: For packing the information we assumed that the length in bytes
* ===== of an integer is equal to the length in bytes of a real single
*      precision.
*
*  =====================================================================
*
*  Code Developer: Andrew J. Cleary, University of Tennessee.
*    Current address: Lawrence Livermore National Labs.
*  This version released: August, 2001.
*
* ======================================================================
*
*    .. Parameters ..
      INTEGER            NIN
      PARAMETER          ( NIN = 11 )
*    ..
*    .. Local Scalars ..
      INTEGER            I, ICTXT
      CHARACTER*79      USRINFO
      DOUBLE PRECISION  EPS
*    ..
*    .. External Subroutines ..
      EXTERNAL          BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT,
    $                  BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D,
    $                  IGEBS2D, SGEBR2D, SGEBS2D
*    ..
*    .. External Functions ..
      LOGICAL            LSAME
      DOUBLE PRECISION  PDLAMCH
      EXTERNAL          LSAME, PDLAMCH
*    ..
*    .. Intrinsic Functions ..
      INTRINSIC          MAX, MIN
*    ..
*    .. Executable Statements ..
*
*    Process 0 reads the input data, broadcasts to other processes and
*    writes needed information to NOUT
*
      IF( IAM.EQ.0 ) THEN
*
*        Open file and skip data file header
*
        OPEN( NIN, FILE = 'BLU.dat', STATUS = 'OLD' )
        READ( NIN, FMT = * ) SUMMRY
        SUMMRY = ' '
*
*        Read in user-supplied info about machine type, compiler, etc.
*
        READ( NIN, FMT = 9999 ) USRINFO
*
*        Read name and unit number for summary output file
*
        READ( NIN, FMT = * ) SUMMRY
        READ( NIN, FMT = * ) NOUT
        IF( NOUT.NE.0 .AND. NOUT.NE.6 )
    $      OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' )
*
*        Read and check the parameter values for the tests.
*
*        Get TRANS
*
        READ( NIN, FMT = * ) TRANS
*
*
*        Get number of matrices and their dimensions
*
        READ( NIN, FMT = * ) NMAT
        IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN
            WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL
            GO TO 20
        END IF
        READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT )
*
*        Get bandwidths
*
        READ( NIN, FMT = * ) NBW
        IF( NBW.LT.1 .OR. NBW.GT.LDBWVAL ) THEN
            WRITE( NOUT, FMT = 9994 ) 'BW', LDBWVAL
            GO TO 20
        END IF
        READ( NIN, FMT = * ) ( BWLVAL( I ), I = 1, NBW )
        READ( NIN, FMT = * ) ( BWUVAL( I ), I = 1, NBW )
*
*        Get values of NB
*
        READ( NIN, FMT = * ) NNB
        IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN
            WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL
            GO TO 20
        END IF
        READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB )
*
*        Get values of NRHS
*
        READ( NIN, FMT = * ) NNR
        IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN
            WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL
            GO TO 20
        END IF
        READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR )
*
*        Get values of NBRHS
*
        READ( NIN, FMT = * ) NNBR
        IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN
            WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL
            GO TO 20
        END IF
        READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR )
*
*        Get number of grids
*
        READ( NIN, FMT = * ) NGRIDS
        IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN
            WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL
            GO TO 20
        ELSE IF( NGRIDS.GT.LDQVAL ) THEN
            WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL
            GO TO 20
        END IF
*
*        Processor grid must be 1D so set PVAL to 1
        DO 8738 I = 1, NGRIDS
            PVAL( I ) = 1
 8738    CONTINUE
*
*        Get values of Q
*
        READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS )
*
*        Get level of checking
*
        READ( NIN, FMT = * ) THRESH
*
*        Close input file
*
        CLOSE( NIN )
*
*        For pvm only: if virtual machine not set up, allocate it and
*        spawn the correct number of processes.
*
        IF( NPROCS.LT.1 ) THEN
            NPROCS = 0
            DO 10 I = 1, NGRIDS
              NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) )
  10      CONTINUE
            CALL BLACS_SETUP( IAM, NPROCS )
        END IF
*
*        Temporarily define blacs grid to include all processes so
*        information can be broadcast to all processes.
*
        CALL BLACS_GET( -1, 0, ICTXT )
        CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS )
*
*        Compute machine epsilon
*
        EPS = PDLAMCH( ICTXT, 'eps' )
*
*        Pack information arrays and broadcast
*
        CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 )
        I = 1
        WORK( I ) = NMAT
        I = I+1
        WORK( I ) = NBW
        I = I+1
        WORK( I ) = NNB
        I = I+1
        WORK( I ) = NNR
        I = I+1
        WORK( I ) = NNBR
        I = I+1
        WORK( I ) = NGRIDS
        I = I+1
        IF( LSAME( TRANS, 'N' ) ) THEN
            WORK( I ) = 1
        ELSE
            TRANS = 'T'
            WORK( I ) = 2
        END IF
        I = I+1
*        Send number of elements to be sent
        CALL IGEBS2D( ICTXT, 'All', ' ', 1, 1, I-1, 1 )
*        Send elements
        CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 )
*
        I = 1
        CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 )
        I = I + NMAT
        CALL ICOPY( NBW, BWLVAL, 1, WORK( I ), 1 )
        I = I + NBW
        CALL ICOPY( NBW, BWUVAL, 1, WORK( I ), 1 )
        I = I + NBW
        CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 )
        I = I + NNB
        CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 )
        I = I + NNR
        CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 )
        I = I + NNBR
        CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 )
        I = I + NGRIDS
        CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 )
        I = I + NGRIDS
        CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 )
*
*        regurgitate input
*
        WRITE( NOUT, FMT = 9999 )
    $                  'SCALAPACK banded linear systems.'
        WRITE( NOUT, FMT = 9999 ) USRINFO
        WRITE( NOUT, FMT = * )
        WRITE( NOUT, FMT = 9999 )
    $                  'Tests of the parallel '//
    $                  'real double precision band matrix solve '
        WRITE( NOUT, FMT = 9999 )
    $                  'The following scaled residual '//
    $                  'checks will be computed:'
        WRITE( NOUT, FMT = 9999 )
    $                  ' Solve residual        = ||Ax - b|| / '//
    $                  '(||x|| * ||A|| * eps * N)'
            WRITE( NOUT, FMT = 9999 )
    $                  ' Factorization residual = ||A - LU|| /'//
    $                  ' (||A|| * eps * N)'
        WRITE( NOUT, FMT = 9999 )
    $                  'The matrix A is randomly '//
    $                  'generated for each test.'
        WRITE( NOUT, FMT = * )
        WRITE( NOUT, FMT = 9999 )
    $                  'An explanation of the input/output '//
    $                  'parameters follows:'
        WRITE( NOUT, FMT = 9999 )
    $                  'TIME    : Indicates whether WALL or '//
    $                  'CPU time was used.'
*
        WRITE( NOUT, FMT = 9999 )
    $                  'N      : The number of rows and columns '//
    $                  'in the matrix A.'
        WRITE( NOUT, FMT = 9999 )
    $                  'bwl, bwu      : The number of diagonals '//
    $                  'in the matrix A.'
        WRITE( NOUT, FMT = 9999 )
    $                  'NB      : The size of the column panels the'//
    $                  ' matrix A is split into. [-1 for default]'
        WRITE( NOUT, FMT = 9999 )
    $                  'NRHS    : The total number of RHS to solve'//
    $                  ' for.'
        WRITE( NOUT, FMT = 9999 )
    $                  'NBRHS  : The number of RHS to be put on '//
    $                  'a column of processes before going'
        WRITE( NOUT, FMT = 9999 )
    $                  '          on to the next column of processes.'
        WRITE( NOUT, FMT = 9999 )
    $                  'P      : The number of process rows.'
        WRITE( NOUT, FMT = 9999 )
    $                  'Q      : The number of process columns.'
        WRITE( NOUT, FMT = 9999 )
    $                  'THRESH  : If a residual value is less than'//
    $                  ' THRESH, CHECK is flagged as PASSED'
        WRITE( NOUT, FMT = 9999 )
    $                  'Fact time: Time in seconds to factor the'//
    $                  ' matrix'
        WRITE( NOUT, FMT = 9999 )
    $                  'Sol Time: Time in seconds to solve the'//
    $                  ' system.'
        WRITE( NOUT, FMT = 9999 )
    $                  'MFLOPS  : Rate of execution for factor '//
    $                  'and solve using sequential operation count.'
        WRITE( NOUT, FMT = 9999 )
    $                  'MFLOP2  : Rough estimate of speed '//
    $                  'using actual op count (accurate big P,N).'
        WRITE( NOUT, FMT = * )
        WRITE( NOUT, FMT = 9999 )
    $                  'The following parameter values will be used:'
        WRITE( NOUT, FMT = 9996 )
    $                  'N    ', ( NVAL(I), I = 1, MIN(NMAT, 10) )
        IF( NMAT.GT.10 )
    $      WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT )
        WRITE( NOUT, FMT = 9996 )
    $                  'bwl  ', ( BWLVAL(I), I = 1, MIN(NBW, 10) )
        IF( NBW.GT.10 )
    $      WRITE( NOUT, FMT = 9997 ) ( BWLVAL(I), I = 11, NBW )
        WRITE( NOUT, FMT = 9996 )
    $                  'bwu  ', ( BWUVAL(I), I = 1, MIN(NBW, 10) )
        IF( NBW.GT.10 )
    $      WRITE( NOUT, FMT = 9997 ) ( BWUVAL(I), I = 11, NBW )
        WRITE( NOUT, FMT = 9996 )
    $                  'NB  ', ( NBVAL(I), I = 1, MIN(NNB, 10) )
        IF( NNB.GT.10 )
    $      WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB )
        WRITE( NOUT, FMT = 9996 )
    $                  'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) )
        IF( NNR.GT.10 )
    $      WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR )
        WRITE( NOUT, FMT = 9996 )
    $                  'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) )
        IF( NNBR.GT.10 )
    $      WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR )
        WRITE( NOUT, FMT = 9996 )
    $                  'P    ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) )
        IF( NGRIDS.GT.10 )
    $      WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS )
        WRITE( NOUT, FMT = 9996 )
    $                  'Q    ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) )
        IF( NGRIDS.GT.10 )
    $      WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS )
        WRITE( NOUT, FMT = * )
        WRITE( NOUT, FMT = 9995 ) EPS
        WRITE( NOUT, FMT = 9998 ) THRESH
        CALL FLUSH(6)
*
      ELSE
*
*        If in pvm, must participate setting up virtual machine
*
        IF( NPROCS.LT.1 )
    $      CALL BLACS_SETUP( IAM, NPROCS )
*
*        Temporarily define blacs grid to include all processes so
*        all processes have needed startup information
*
        CALL BLACS_GET( -1, 0, ICTXT )
        CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS )
*
*        Compute machine epsilon
*
        EPS = PDLAMCH( ICTXT, 'eps' )
*
        CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 )
        CALL IGEBR2D( ICTXT, 'All', ' ', 1, 1, I, 1, 0, 0 )
        CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 )
        I = 1
        NMAT = WORK( I )
        I = I+1
        NBW = WORK( I )
        I = I+1
        NNB = WORK( I )
        I = I+1
        NNR = WORK( I )
        I = I+1
        NNBR = WORK( I )
        I = I+1
        NGRIDS = WORK( I )
        I = I+1
        IF( WORK( I ) .EQ. 1 ) THEN
            TRANS = 'N'
        ELSE
            TRANS = 'T'
        END IF
        I = I+1
*
        I = NMAT + NBW + NNB + NNR + NNBR + 2*NGRIDS
        I = I + NBW
*
        CALL IGEBR2D( ICTXT, 'All', ' ', 1, I, WORK, 1, 0, 0 )
        I = 1
        CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 )
        I = I + NMAT
        CALL ICOPY( NBW, WORK( I ), 1, BWLVAL, 1 )
        I = I + NBW
        CALL ICOPY( NBW, WORK( I ), 1, BWUVAL, 1 )
        I = I + NBW
        CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 )
        I = I + NNB
        CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 )
        I = I + NNR
        CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 )
        I = I + NNBR
        CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 )
        I = I + NGRIDS
        CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 )
*
      END IF
*
      CALL BLACS_GRIDEXIT( ICTXT )
*
      RETURN
*
  20 WRITE( NOUT, FMT = 9993 )
      CLOSE( NIN )
      IF( NOUT.NE.6 .AND. NOUT.NE.0 )
    $  CLOSE( NOUT )
*
      CALL BLACS_ABORT( ICTXT, 1 )
      STOP
*
 9999 FORMAT( A )
 9998 FORMAT( 'Routines pass computational tests if scaled residual ',
    $        'is less than ', G12.5 )
 9997 FORMAT( '                ', 10I6 )
 9996 FORMAT( 2X, A5, ':        ', 10I6 )
 9995 FORMAT( 'Relative machine precision (eps) is taken to be ',
    $        E18.6 )
 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ',
    $        'than ', I2 )
 9993 FORMAT( ' Illegal input in file ',40A,'.  Aborting run.' )
*
*    End of PDGBINFO
*
      END

Using GDB, I have traced the error to line 181:

Code:

        READ( NIN, FMT = * ) SUMMRY
SUMMRY is declared as (See LINE 35):

Code:

*  SUMMRY  (global output) CHARACTER*(*)
*          Name of output (summary) file (if any). Only defined for
*          process 0.

The PDGBDRIVER (sample code which uses PDGBINFO) passes a:

Code:

      CHARACTER*80      OUTFILE
Which I try to emulate in C, as follows:

Code:

  char      *OUTFILE;  /* ? */
...
  OUTFILE = (char *) malloc(sizeof(char)*80);
  if (OUTFILE == NULL) {
    fprintf(stderr, "Not enough memory for 80.\n");
    MPI_Abort(MPI_COMM_WORLD, MPI_ERROR_CODE_NOT_MEMRY);
  }

Is that correct?!? Is CHARACTER*80 a string with 80 characters? Or does it mean something else?

Thanks!

ejspeiro 03-20-2013 11:59 PM

Update
 
I tried this:

Code:

  OUTFILE = (unsigned char *) malloc(sizeof(unsigned char)*79);
  if (OUTFILE == NULL) {
    fprintf(stderr, "Not enough memory for 80.\n");
    MPI_Abort(MPI_COMM_WORLD, MPI_ERROR_CODE_NOT_MEMRY);
  }

And the code yields NO OUTPUT AT ALL!:

Code:

ejspeiro@node01 scalapack-ex07]$ make run np=1
mpirun -r ssh -f mpd.hosts -np 1 `pwd`/blogs
[ejspeiro@node01 scalapack-ex07]$

If I change the 79 to an 80, I get the seg fault!

:(


All times are GMT -5. The time now is 06:15 PM.