C/Fotran hybrid code using MPI/ScaLAPACK seg faults in a weird way
ProgrammingThis forum is for all programming questions.
The question does not have to be directly related to Linux and any language is fair game.
Notices
Welcome to LinuxQuestions.org, a friendly and active Linux Community.
You are currently viewing LQ as a guest. By joining our community you will have the ability to post topics, receive our newsletter, use the advanced search, subscribe to threads and access many other special features. Registration is quick, simple and absolutely free. Join our community today!
Note that registered members see fewer ads, and ContentLink is completely disabled once you log in.
If you have any problems with the registration process or your account login, please contact us. If you need to reset your password, click here.
Having a problem logging in? Please visit this page to clear all LQ-related cookies.
Get a virtual cloud desktop with the Linux distro that you want in less than five minutes with Shells! With over 10 pre-installed distros to choose from, the worry-free installation life is here! Whether you are a digital nomad or just looking for flexibility, Shells can put your Linux machine on the device that you want to use.
Exclusive for LQ members, get up to 45% off per month. Click here for more info.
C/Fotran hybrid code using MPI/ScaLAPACK seg faults in a weird way
Hello,
As many of you have probably read, I managed to get this C/Fortran interaction problem kind of solved in a previous thread.
As I mentioned in that last thread, I am trying to translate this CODE from Fortran into C.
This is what I have so far:
Code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "mpi.h"
#define PRINT_NOTHING 0
#define PRINT_UP_TO_MESGS 1
#define PRINT_UP_TO_ARRAYS 2
#define PRINT_UP_TO_MATRICES 3
#define PRINT_LEVEL PRINT_UP_TO_MATRICES
/*! Parameters: */
#define TOTMEM 425053208
#define INTMEM 13107200
#define NTESTS 20
#define DLEN_ 9
#define DBLESZ 8 /*! Size of a DOUBLE PRE. */
#define MEMSIZ 425053208/8 /*! Memory for DOUBLE PRE. */
#define MASTER 0
#define MPI_ERROR_CODE_ERROR_GRID 1
#define MPI_ERROR_CODE_NOT_FACT 2
#define MPI_ERROR_CODE_NOT_SOLVE 3
#define BLACS_USER_WONT_FIN_MPI 0
#define BLACS_USER_WILL_FIN_MPI 1
#define DESCRIPTOR_SIZE 7
typedef enum {
FALSE,
TRUE
} logical;
/*! External subroutines: */
extern void Cblacs_barrier (int context,
char* scope);
extern void Cblacs_exit (int doneflag);
extern void Cblacs_get (int ,
int ,
int *context);
extern void Cblacs_gridexit (int context);
extern void Cblacs_gridinfo (int context,
int *our_nprow,
int *our_npcol,
int *my_prow,
int *my_pcol);
extern void Cblacs_gridinit (int* context,
char* order,
int nproc_rows,
int nproc_cols);
extern void Cblacs_pinfo (int *my_blacs_pid,
int *our_blacs_nprocs);
/* http://www.netlib.org/scalapack/explore-html/dd/d22/descinit_8f.html */
extern void descinit_ (int *desc,
int *m,
int *n,
int *mb,
int *nb,
int *irsrc,
int *icsrc,
int *ictxt,
int *lld,
int *info);
/* http://www.netlib.org/scalapack/explore-html/dc/d44/igsum2d___8c.html */
extern void igsum2d_ (int *contxt,
char *scope,
char *top,
int *m,
int *n,
int *a,
int *lda,
int *rdest,
int *cdest);
/* http://www.netlib.org/scalapack/explore-html/db/da1/pdbmatgen_8f.html */
extern void pdbmatgen_ (int *ictxt,
char *aform,
char *aform2,
int *bwl,
int *bwu,
int *n,
int *mb,
int *nb,
double *a,
int *lda,
int *iarow,
int *iacol,
int *iseed,
int *myrow,
int *mycol,
int *nprow,
int *npcol);
/* http://www.netlib.org/scalapack/explore-html/d8/dba/pdchekpad_8f.html */
extern void pdchekpad_ (int * ictxt,
char *mess,
int *m,
int *n,
double *a,
int *lda,
int *ipre,
int *ipost,
double *chkval);
/* http://www.netlib.org/scalapack/explore-html/d2/dc2/pddblaschk_8f.html */
extern void pddblaschk_ (char *symm,
char *uplo,
char *trans,
int *n,
int *bwl,
int *bwu,
int *nrhs,
double *x,
int *ix,
int *jx,
int *descx,
int *iaseed,
double *a,
int *ia,
int *ja,
int *desca,
int *ibseed,
double *anorm,
double *resid,
double *work,
int *worksiz);
/* http://www.netlib.org/scalapack/explore-html/d6/d3b/pdfillpad_8f.html */
extern void pdfillpad_ (int *ictxt,
int *m,
int *n,
double *a,
int *lda,
int *ipre,
int *ipost,
double *chkval);
/* http://www.netlib.org/scalapack/explore-html/d0/d2b/pdgbinfo_8f.html */
extern void pdgbinfo_ (char *summry,
int *nout,
char *trans,
int *nmat,
int *nval,
int *ldnval,
int *nbw,
int *bwlval,
int *bwuval,
int *ldbwval,
int *nnb,
int *nbval,
int *ldnbval,
int *nnr,
int *nrval,
int *ldnrval,
int *nnbr,
int *nbrval,
int *ldnbrval,
int *ngrids,
int *pval,
int *ldpval,
int *qval,
int *ldqval,
float *thresh,
double *work, /* Array to gather and bcast. */
int *iam,
int *nprocs);
/* http://www.netlib.org/scalapack/explore-html/dd/dad/pdgbtrf_8f.html */
extern void pdgbtrf_ (int *n,
int *bwl,
int *bwu,
double *a,
int *ja,
int *desca,
int *ipiv,
double *af,
int *laf,
double *work,
int *lwork,
int *info);
/* http://www.netlib.org/scalapack/explore-html/d9/dda/pdgbtrs_8f.html */
extern void pdgbtrs_ (char *trans,
int *n,
int *bwl,
int *bwu,
int *nrhs,
double *a,
int *ja,
int *desca,
int *ipiv,
double *b,
int *ib,
int *descb,
double *af,
int *laf,
double *work,
int *lwork,
int *info);
/*
http://www.netlib.org/scalapack/explore-html/d2/d66/_e_i_g_2pdmatgen_8f.html
*/
extern void pdmatgen_ (int *ictxt,
char *aform,
char *diag,
int *m,
int *n,
int *mb,
int *nb,
double *a,
int *lda,
int *iarow,
int *iacol,
int *iseed,
int *iroff,
int *irnum,
int *icoff,
int *icnum,
int *myrow,
int *mycol,
int *nprow,
int *npcol);
/* http://www.netlib.org/scalapack/explore-html/d2/dcd/sltimer_8f_source.html */
extern void slboot_ (void);
extern void slcombine_ (int *ictxt,
char *scope,
char *op,
char *timetype,
int *n,
int *ibeg,
double *times);
extern void sltimer_ ( int *i );
/*! External functions: */
/* http://www.netlib.org/scalapack/explore-html/d4/d48/numroc_8f.html */
extern int numroc_ (int *n,
int *nb,
int *iproc,
int *isrcproc,
int *nprocs);
/* http://www.netlib.org/scalapack/explore-html/df/dee/tools_8f.html#a67ae4efe5110e3297b1e9e3a46d8c78b */
extern logical lsame_ (char *ca,
char *cb);
/* http://www.netlib.org/scalapack/explore-html/db/dd0/pdlange_8f.html */
extern double pdlange_ (char *norm,
int *m,
int *n,
double *a,
int *ia,
int *ja,
int *desca,
double *work );
/*! Intrinsic functions: */
/* INTRINSIC DBLE, MAX, MIN, MOD */
inline double DBLE(int xx) { return (double) xx; }
inline double MAX(double xx, double yy) { return xx >= yy? xx: yy; }
inline double MIN(double xx, double yy) { return xx <= yy? xx: yy; }
inline int MOD(int xx, int yy) { return xx%yy; }
/*! PROGRAM PDGBDRIVER: */
int main (int argc, char **argv) {
/*! Parameters: */
int ntests = NTESTS; /* Number of num. tests to be performed */
// int BLOCK_CYCLIC_2D = 1;
// int DTYPE_ = 1;
// int CTXT_ = 2;
// int M_ = 3;
// int N_ = 4;
// int MB_ = 5;
// int NB_ = 6;
// int RSRC_ = 7;
// int CSRC_ = 8;
// int LLD_ = 9;
//
// double ZERO = 0.0;
// double PADVAL = -9923.0;
//
// int INT_ONE = 1;
/*! Local scalars: */
logical CHECK; /* Should or shouldn't I perform the num. tests? */
char TRANS; /* Should I transpose the matrix? */
// char PASSED[6];
char OUTFILE[80]; /* ? */
// int BWL;
// int BWU;
// int BW_NUM;
// int FILLIN_SIZE;
// int FREE_PTR;
// int H;
// int HH;
int I; /* Iterator per process grid. */
int IAM; /* My MPI process ID. */
int IASEED; /* Seed for random matrix A creation. */
int IBSEED; /* Seed for random matrix B creation. */
// int ICTXT;
// int ICTXTB;
// int IERR_TEMP;
// int IMIDPAD;
// int INFO;
// int IPA;
// int IPB;
// int IPOSTPAD;
// int IPREPAD;
// int IPW;
// int IPW_SIZE;
// int IPW_SOLVE;
// int IPW_SOLVE_SIZE;
// int IP_DRIVER_W;
// int IP_FILLIN;
// int J;
// int K;
/*! Data statements: */
// int KFAIL = 4;
// int KPASS = 4;
// int KSKIP = 4;
// int KTESTS = 4;
/*! Local scalars: (continued) */
// int MYCOL;
// int MYRHS_SIZE;
// int MYROW;
// int N;
// int NB;
int NBW; /* Number of bandwith values per matrix. */
int NGRIDS; /* Number of process grids to try for. */
int NMAT; /* Number of matrices */
int NNB; /* Number of sizes of NB (block column size). */
int NNBR;
int NNR;
int NOUT; /* Device out... o.O */
// int NP;
int NPCOL; /* Procs per row in processors grid/ */
int NPROCS; /* The size of OUR communicator. */
// int NPROCS_REAL;
int NPROW; /* Procs per row in processors grid/ */
// int NQ;
// int NRHS;
// int N_FIRST;
// int N_LAST;
// int WORKSIZ;
float THRESH; /* Threshold variable for numerical tests. */
// double ANORM;
// double NOPS;
// double NOPS2;
// double SRESID;
// double TMFLOPS;
// double TMFLOPS2;
/*! Local arrays: */
// int IPIV[INTMEM];
int BWLVAL[NTESTS]; /* Values of lower diagonals per matrix. */
int BWUVAL[NTESTS]; /* Values of upper diagonals per matrix. */
// int DESCA[7];
// int DESCA2D[DLEN_];
// int DESCB[7];
// int DESCB2D[DLEN_];
// int IERR[1];
int NBRVAL[NTESTS]; /* ? */
int NBVAL[NTESTS]; /* Column sizes per matrix. */
int NRVAL[NTESTS]; /* ? */
int NVAL[NTESTS]; /* Values of N for a given matrix. */
int PVAL[NTESTS]; /* Values for proc. rows. */
int QVAL[NTESTS]; /* Values for proc. cols. */
// double CTIME[2];
double *MEM; /* GLOABL array for broadcasting the information. */
// double WTIME[2];
/*! Executable statements: */
/*! Get starting information: */
Cblacs_pinfo(&IAM, &NPROCS);
IASEED = 100;
IBSEED = 200;
MEM = (double *) malloc(MEMSIZ);
pdgbinfo_(OUTFILE, &NOUT, &TRANS, &NMAT, NVAL, &ntests, &NBW,
BWLVAL, BWUVAL, &ntests, &NNB, NBVAL, &ntests, &NNR,
NRVAL, &ntests, &NNBR, NBRVAL, &ntests, &NGRIDS, PVAL,
&ntests, QVAL, &ntests, &THRESH, MEM, &IAM, &NPROCS);
CHECK = (THRESH >= 0.0f);
/*! Print headings: */
fprintf(stdout, "\n");
fprintf(stdout, "TIME TR N BWL BWU NB NRHS P Q L*U Time Slv Time MFLOPS MFLOP2 CHECK\n");
fprintf(stdout, "---- -- ------ --- --- ---- ----- ---- ---- -------- -------- -------- -------- ------\n");
fprintf(stdout, "\n");
fflush(stdout);
/*! Loop over different process grids: */
for (I = 1; I <= NGRIDS; I++) {
NPROW = PVAL[I - 1];
NPCOL = QVAL[I - 1];
fprintf(stdout, "Solving for a %d x %d grid!\n", NPROW, NPCOL);
}
free(MEM);
Cblacs_exit(0);
exit(EXIT_SUCCESS);
}
Please excuse the amount of commented-out variables, but like I said, I am basically translating the reference driver, from scratch.
My problem is that it suddenly started seg-faulting, as soon as I wrote the for loop... it was working before. That is a clear sign of a funky memory manipulation issue, which probably arises from issues related to the memory management differences between C and Fortran.
At least, that is my guess.
Does anybody has a clue on what may be going wrong?
[ejspeiro@node01 scalapack-ex07]$ make clean; make; make run np=1
rm -f *.o blogs
clear
gcc -I/opt/intel/impi/3.2.0.011/include64/ -c -DAdd_ -g -Wall -Werror blogs.c
gfortran -g -o blogs blogs.o /home/ejspeiro/libraries/scalapack-2.0.2/TESTING/LIN/pdgbinfo.o /home/ejspeiro/libraries/scalapack-2.0.2/libscalapack.a /home/ejspeiro/libraries/lapack-3.4.1/liblapack.a \
/home/ejspeiro/libraries/BLAS/libblas.a -L/opt/intel/impi/3.2.0.011/lib64 -lmpi -lmpigf -lmpigi -lrt -lpthread -ldl
mpirun -r ssh -f mpd.hosts -np 1 `pwd`/blogs
SCALAPACK banded linear systems.
'MPI machine'
Tests of the parallel real double precision band matrix solve
The following scaled residual checks will be computed:
Solve residual = ||Ax - b|| / (||x|| * ||A|| * eps * N)
Factorization residual = ||A - LU|| / (||A|| * eps * N)
The matrix A is randomly generated for each test.
An explanation of the input/output parameters follows:
TIME : Indicates whether WALL or CPU time was used.
N : The number of rows and columns in the matrix A.
bwl, bwu : The number of diagonals in the matrix A.
NB : The size of the column panels the matrix A is split into. [-1 for default]
NRHS : The total number of RHS to solve for.
NBRHS : The number of RHS to be put on a column of processes before going
on to the next column of processes.
P : The number of process rows.
Q : The number of process columns.
THRESH : If a residual value is less than THRESH, CHECK is flagged as PASSED
Fact time: Time in seconds to factor the matrix
Sol Time: Time in seconds to solve the system.
MFLOPS : Rate of execution for factor and solve using sequential operation count.
MFLOP2 : Rough estimate of speed using actual op count (accurate big P,N).
The following parameter values will be used:
N : 12
bwl : 4
bwu : 3
NB : -1
NRHS : 1
NBRHS: 1
P : 1
Q : 1
Relative machine precision (eps) is taken to be 0.111022E-15
Routines pass computational tests if scaled residual is less than 3.0000
MEM 1 = 8.48798e-314
MEM 2 = nan
MEM 3 = 2.122e-314
MEM 4 = 2.122e-314
MEM 5 = 0
MEM 6 = 0
MEM 7 = 0
[ejspeiro@node01 scalapack-ex07]$
Here's the code for the PDGBINFO Fortran subroutine:
Code:
SUBROUTINE PDGBINFO( SUMMRY, NOUT, TRANS, NMAT, NVAL, LDNVAL, NBW,
$ BWLVAL, BWUVAL, LDBWVAL, NNB, NBVAL, LDNBVAL,
$ NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL,
$ NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH,
$ WORK, IAM, NPROCS )
*
*
*
* -- ScaLAPACK routine (version 1.7) --
* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
* and University of California, Berkeley.
* November 15, 1997
*
* .. Scalar Arguments ..
CHARACTER TRANS
CHARACTER*(*) SUMMRY
INTEGER IAM,
$ LDBWVAL, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL,
$ LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, NNBR, NBW,
$ NPROCS, NNR, NOUT
REAL THRESH
* ..
* .. Array Arguments ..
INTEGER NBRVAL( LDNBRVAL ), NBVAL( LDNBVAL ),
$ NRVAL( LDNRVAL ), NVAL( LDNVAL ),
$ BWLVAL( LDBWVAL),BWUVAL( LDBWVAL),
$ PVAL( LDPVAL ), QVAL(LDQVAL), WORK( * )
* ..
*
* Purpose
* =======
*
* PDGBINFO get needed startup information for band factorization
* and transmits it to all processes.
*
* Arguments
* =========
*
* SUMMRY (global output) CHARACTER*(*)
* Name of output (summary) file (if any). Only defined for
* process 0.
*
* NOUT (global output) INTEGER
* The unit number for output file. NOUT = 6, ouput to screen,
* NOUT = 0, output to stderr. Only defined for process 0.
*
*
* NMAT (global output) INTEGER
* The number of different values that can be used for N.
*
* NVAL (global output) INTEGER array, dimension (LDNVAL)
* The values of N (number of columns in matrix) to run the
* code with.
*
* NBW (global output) INTEGER
* The number of different values that can be used for @bw@.
* BWLVAL (global output) INTEGER array, dimension (LDNVAL)
* The values of BWL (number of subdiagonals in matrix) to run
* the code with.
* BWUVAL (global output) INTEGER array, dimension (LDNVAL)
* The values of BW (number of supdiagonals in matrix) to run
* the code with.
*
* LDNVAL (global input) INTEGER
* The maximum number of different values that can be used for
* N, LDNVAL > = NMAT.
*
* NNB (global output) INTEGER
* The number of different values that can be used for NB.
*
* NBVAL (global output) INTEGER array, dimension (LDNBVAL)
* The values of NB (blocksize) to run the code with.
*
* LDNBVAL (global input) INTEGER
* The maximum number of different values that can be used for
* NB, LDNBVAL >= NNB.
*
* NNR (global output) INTEGER
* The number of different values that can be used for NRHS.
*
* NRVAL (global output) INTEGER array, dimension(LDNRVAL)
* The values of NRHS (# of Right Hand Sides) to run the code
* with.
*
* LDNRVAL (global input) INTEGER
* The maximum number of different values that can be used for
* NRHS, LDNRVAL >= NNR.
*
* NNBR (global output) INTEGER
* The number of different values that can be used for NBRHS.
*
* NBRVAL (global output) INTEGER array, dimension (LDNBRVAL)
* The values of NBRHS (RHS blocksize) to run the code with.
*
* LDNBRVAL (global input) INTEGER
* The maximum number of different values that can be used for
* NBRHS, LDNBRVAL >= NBRVAL.
*
* NGRIDS (global output) INTEGER
* The number of different values that can be used for P & Q.
*
* PVAL (global output) INTEGER array, dimension (LDPVAL)
* Not used (will be returned as all 1s) since proc grid is 1D
*
* LDPVAL (global input) INTEGER
* The maximum number of different values that can be used for
* P, LDPVAL >= NGRIDS.
*
* QVAL (global output) INTEGER array, dimension (LDQVAL)
* The values of Q (number of process columns) to run the code
* with.
*
* LDQVAL (global input) INTEGER
* The maximum number of different values that can be used for
* Q, LDQVAL >= NGRIDS.
*
* THRESH (global output) REAL
* Indicates what error checks shall be run and printed out:
* = 0 : Perform no error checking
* > 0 : report all residuals greater than THRESH, perform
* factor check only if solve check fails
*
* WORK (local workspace) INTEGER array of dimension >=
* MAX( 8, LDNVAL+2*LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL
* $ +3*LDNVAL)
* Used to pack input arrays in order to send info in one
* message.
*
* IAM (local input) INTEGER
* My process number.
*
* NPROCS (global input) INTEGER
* The total number of processes.
*
* ======================================================================
*
* Note: For packing the information we assumed that the length in bytes
* ===== of an integer is equal to the length in bytes of a real single
* precision.
*
* =====================================================================
*
* Code Developer: Andrew J. Cleary, University of Tennessee.
* Current address: Lawrence Livermore National Labs.
* This version released: August, 2001.
*
* ======================================================================
*
* .. Parameters ..
INTEGER NIN
PARAMETER ( NIN = 11 )
* ..
* .. Local Scalars ..
INTEGER I, ICTXT
CHARACTER*79 USRINFO
DOUBLE PRECISION EPS
* ..
* .. External Subroutines ..
EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT,
$ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D,
$ IGEBS2D, SGEBR2D, SGEBS2D
* ..
* .. External Functions ..
LOGICAL LSAME
DOUBLE PRECISION PDLAMCH
EXTERNAL LSAME, PDLAMCH
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. Executable Statements ..
*
* Process 0 reads the input data, broadcasts to other processes and
* writes needed information to NOUT
*
IF( IAM.EQ.0 ) THEN
*
* Open file and skip data file header
*
OPEN( NIN, FILE = 'BLU.dat', STATUS = 'OLD' )
READ( NIN, FMT = * ) SUMMRY
SUMMRY = ' '
*
* Read in user-supplied info about machine type, compiler, etc.
*
READ( NIN, FMT = 9999 ) USRINFO
*
* Read name and unit number for summary output file
*
READ( NIN, FMT = * ) SUMMRY
READ( NIN, FMT = * ) NOUT
IF( NOUT.NE.0 .AND. NOUT.NE.6 )
$ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' )
*
* Read and check the parameter values for the tests.
*
* Get TRANS
*
READ( NIN, FMT = * ) TRANS
*
*
* Get number of matrices and their dimensions
*
READ( NIN, FMT = * ) NMAT
IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN
WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL
GO TO 20
END IF
READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT )
*
* Get bandwidths
*
READ( NIN, FMT = * ) NBW
IF( NBW.LT.1 .OR. NBW.GT.LDBWVAL ) THEN
WRITE( NOUT, FMT = 9994 ) 'BW', LDBWVAL
GO TO 20
END IF
READ( NIN, FMT = * ) ( BWLVAL( I ), I = 1, NBW )
READ( NIN, FMT = * ) ( BWUVAL( I ), I = 1, NBW )
*
* Get values of NB
*
READ( NIN, FMT = * ) NNB
IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN
WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL
GO TO 20
END IF
READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB )
*
* Get values of NRHS
*
READ( NIN, FMT = * ) NNR
IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN
WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL
GO TO 20
END IF
READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR )
*
* Get values of NBRHS
*
READ( NIN, FMT = * ) NNBR
IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN
WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL
GO TO 20
END IF
READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR )
*
* Get number of grids
*
READ( NIN, FMT = * ) NGRIDS
IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN
WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL
GO TO 20
ELSE IF( NGRIDS.GT.LDQVAL ) THEN
WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL
GO TO 20
END IF
*
* Processor grid must be 1D so set PVAL to 1
DO 8738 I = 1, NGRIDS
PVAL( I ) = 1
8738 CONTINUE
*
* Get values of Q
*
READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS )
*
* Get level of checking
*
READ( NIN, FMT = * ) THRESH
*
* Close input file
*
CLOSE( NIN )
*
* For pvm only: if virtual machine not set up, allocate it and
* spawn the correct number of processes.
*
IF( NPROCS.LT.1 ) THEN
NPROCS = 0
DO 10 I = 1, NGRIDS
NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) )
10 CONTINUE
CALL BLACS_SETUP( IAM, NPROCS )
END IF
*
* Temporarily define blacs grid to include all processes so
* information can be broadcast to all processes.
*
CALL BLACS_GET( -1, 0, ICTXT )
CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS )
*
* Compute machine epsilon
*
EPS = PDLAMCH( ICTXT, 'eps' )
*
* Pack information arrays and broadcast
*
CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 )
I = 1
WORK( I ) = NMAT
I = I+1
WORK( I ) = NBW
I = I+1
WORK( I ) = NNB
I = I+1
WORK( I ) = NNR
I = I+1
WORK( I ) = NNBR
I = I+1
WORK( I ) = NGRIDS
I = I+1
IF( LSAME( TRANS, 'N' ) ) THEN
WORK( I ) = 1
ELSE
TRANS = 'T'
WORK( I ) = 2
END IF
I = I+1
* Send number of elements to be sent
CALL IGEBS2D( ICTXT, 'All', ' ', 1, 1, I-1, 1 )
* Send elements
CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 )
*
I = 1
CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 )
I = I + NMAT
CALL ICOPY( NBW, BWLVAL, 1, WORK( I ), 1 )
I = I + NBW
CALL ICOPY( NBW, BWUVAL, 1, WORK( I ), 1 )
I = I + NBW
CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 )
I = I + NNB
CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 )
I = I + NNR
CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 )
I = I + NNBR
CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 )
I = I + NGRIDS
CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 )
I = I + NGRIDS
CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 )
*
* regurgitate input
*
WRITE( NOUT, FMT = 9999 )
$ 'SCALAPACK banded linear systems.'
WRITE( NOUT, FMT = 9999 ) USRINFO
WRITE( NOUT, FMT = * )
WRITE( NOUT, FMT = 9999 )
$ 'Tests of the parallel '//
$ 'real double precision band matrix solve '
WRITE( NOUT, FMT = 9999 )
$ 'The following scaled residual '//
$ 'checks will be computed:'
WRITE( NOUT, FMT = 9999 )
$ ' Solve residual = ||Ax - b|| / '//
$ '(||x|| * ||A|| * eps * N)'
WRITE( NOUT, FMT = 9999 )
$ ' Factorization residual = ||A - LU|| /'//
$ ' (||A|| * eps * N)'
WRITE( NOUT, FMT = 9999 )
$ 'The matrix A is randomly '//
$ 'generated for each test.'
WRITE( NOUT, FMT = * )
WRITE( NOUT, FMT = 9999 )
$ 'An explanation of the input/output '//
$ 'parameters follows:'
WRITE( NOUT, FMT = 9999 )
$ 'TIME : Indicates whether WALL or '//
$ 'CPU time was used.'
*
WRITE( NOUT, FMT = 9999 )
$ 'N : The number of rows and columns '//
$ 'in the matrix A.'
WRITE( NOUT, FMT = 9999 )
$ 'bwl, bwu : The number of diagonals '//
$ 'in the matrix A.'
WRITE( NOUT, FMT = 9999 )
$ 'NB : The size of the column panels the'//
$ ' matrix A is split into. [-1 for default]'
WRITE( NOUT, FMT = 9999 )
$ 'NRHS : The total number of RHS to solve'//
$ ' for.'
WRITE( NOUT, FMT = 9999 )
$ 'NBRHS : The number of RHS to be put on '//
$ 'a column of processes before going'
WRITE( NOUT, FMT = 9999 )
$ ' on to the next column of processes.'
WRITE( NOUT, FMT = 9999 )
$ 'P : The number of process rows.'
WRITE( NOUT, FMT = 9999 )
$ 'Q : The number of process columns.'
WRITE( NOUT, FMT = 9999 )
$ 'THRESH : If a residual value is less than'//
$ ' THRESH, CHECK is flagged as PASSED'
WRITE( NOUT, FMT = 9999 )
$ 'Fact time: Time in seconds to factor the'//
$ ' matrix'
WRITE( NOUT, FMT = 9999 )
$ 'Sol Time: Time in seconds to solve the'//
$ ' system.'
WRITE( NOUT, FMT = 9999 )
$ 'MFLOPS : Rate of execution for factor '//
$ 'and solve using sequential operation count.'
WRITE( NOUT, FMT = 9999 )
$ 'MFLOP2 : Rough estimate of speed '//
$ 'using actual op count (accurate big P,N).'
WRITE( NOUT, FMT = * )
WRITE( NOUT, FMT = 9999 )
$ 'The following parameter values will be used:'
WRITE( NOUT, FMT = 9996 )
$ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) )
IF( NMAT.GT.10 )
$ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT )
WRITE( NOUT, FMT = 9996 )
$ 'bwl ', ( BWLVAL(I), I = 1, MIN(NBW, 10) )
IF( NBW.GT.10 )
$ WRITE( NOUT, FMT = 9997 ) ( BWLVAL(I), I = 11, NBW )
WRITE( NOUT, FMT = 9996 )
$ 'bwu ', ( BWUVAL(I), I = 1, MIN(NBW, 10) )
IF( NBW.GT.10 )
$ WRITE( NOUT, FMT = 9997 ) ( BWUVAL(I), I = 11, NBW )
WRITE( NOUT, FMT = 9996 )
$ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) )
IF( NNB.GT.10 )
$ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB )
WRITE( NOUT, FMT = 9996 )
$ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) )
IF( NNR.GT.10 )
$ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR )
WRITE( NOUT, FMT = 9996 )
$ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) )
IF( NNBR.GT.10 )
$ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR )
WRITE( NOUT, FMT = 9996 )
$ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) )
IF( NGRIDS.GT.10 )
$ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS )
WRITE( NOUT, FMT = 9996 )
$ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) )
IF( NGRIDS.GT.10 )
$ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS )
WRITE( NOUT, FMT = * )
WRITE( NOUT, FMT = 9995 ) EPS
WRITE( NOUT, FMT = 9998 ) THRESH
CALL FLUSH(6)
*
ELSE
*
* If in pvm, must participate setting up virtual machine
*
IF( NPROCS.LT.1 )
$ CALL BLACS_SETUP( IAM, NPROCS )
*
* Temporarily define blacs grid to include all processes so
* all processes have needed startup information
*
CALL BLACS_GET( -1, 0, ICTXT )
CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS )
*
* Compute machine epsilon
*
EPS = PDLAMCH( ICTXT, 'eps' )
*
CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 )
CALL IGEBR2D( ICTXT, 'All', ' ', 1, 1, I, 1, 0, 0 )
CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 )
I = 1
NMAT = WORK( I )
I = I+1
NBW = WORK( I )
I = I+1
NNB = WORK( I )
I = I+1
NNR = WORK( I )
I = I+1
NNBR = WORK( I )
I = I+1
NGRIDS = WORK( I )
I = I+1
IF( WORK( I ) .EQ. 1 ) THEN
TRANS = 'N'
ELSE
TRANS = 'T'
END IF
I = I+1
*
I = NMAT + NBW + NNB + NNR + NNBR + 2*NGRIDS
I = I + NBW
*
CALL IGEBR2D( ICTXT, 'All', ' ', 1, I, WORK, 1, 0, 0 )
I = 1
CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 )
I = I + NMAT
CALL ICOPY( NBW, WORK( I ), 1, BWLVAL, 1 )
I = I + NBW
CALL ICOPY( NBW, WORK( I ), 1, BWUVAL, 1 )
I = I + NBW
CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 )
I = I + NNB
CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 )
I = I + NNR
CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 )
I = I + NNBR
CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 )
I = I + NGRIDS
CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 )
*
END IF
*
CALL BLACS_GRIDEXIT( ICTXT )
*
RETURN
*
20 WRITE( NOUT, FMT = 9993 )
CLOSE( NIN )
IF( NOUT.NE.6 .AND. NOUT.NE.0 )
$ CLOSE( NOUT )
*
CALL BLACS_ABORT( ICTXT, 1 )
STOP
*
9999 FORMAT( A )
9998 FORMAT( 'Routines pass computational tests if scaled residual ',
$ 'is less than ', G12.5 )
9997 FORMAT( ' ', 10I6 )
9996 FORMAT( 2X, A5, ': ', 10I6 )
9995 FORMAT( 'Relative machine precision (eps) is taken to be ',
$ E18.6 )
9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ',
$ 'than ', I2 )
9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' )
*
* End of PDGBINFO
*
END
Using GDB, I have traced the error to line 181:
Code:
READ( NIN, FMT = * ) SUMMRY
SUMMRY is declared as (See LINE 35):
Code:
* SUMMRY (global output) CHARACTER*(*)
* Name of output (summary) file (if any). Only defined for
* process 0.
The PDGBDRIVER (sample code which uses PDGBINFO) passes a:
LinuxQuestions.org is looking for people interested in writing
Editorials, Articles, Reviews, and more. If you'd like to contribute
content, let us know.