Actual source code: ex18.c

  1: static char help[] = "Tests the use of MatZeroRowsColumns() for parallel matrices.\n\
  2: Contributed-by: Stephan Kramer <s.kramer@imperial.ac.uk>\n\n";

  4: #include <petscmat.h>

  6: int main(int argc,char **args)
  7: {
  8:   Mat            A;
  9:   Vec            x, rhs, y;
 10:   PetscInt       i,j,k,b,m = 3,n,nlocal=2,bs=1,Ii,J;
 11:   PetscInt       *boundary_nodes, nboundary_nodes, *boundary_indices;
 12:   PetscMPIInt    rank,size;
 14:   PetscScalar    v,v0,v1,v2,a0=0.1,a,rhsval, *boundary_values,diag = 1.0;
 15:   PetscReal      norm;
 16:   char           convname[64];
 17:   PetscBool      upwind = PETSC_FALSE, nonlocalBC = PETSC_FALSE, zerorhs = PETSC_TRUE, convert = PETSC_FALSE;

 19:   PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr;
 20:   MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
 21:   MPI_Comm_size(PETSC_COMM_WORLD,&size);
 22:   n = nlocal*size;

 24:   PetscOptionsGetInt(NULL,NULL, "-bs", &bs, NULL);
 25:   PetscOptionsGetBool(NULL,NULL, "-nonlocal_bc", &nonlocalBC, NULL);
 26:   PetscOptionsGetScalar(NULL,NULL, "-diag", &diag, NULL);
 27:   PetscOptionsGetString(NULL,NULL,"-convname",convname,sizeof(convname),&convert);
 28:   PetscOptionsGetBool(NULL,NULL, "-zerorhs", &zerorhs, NULL);

 30:   MatCreate(PETSC_COMM_WORLD,&A);
 31:   MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m*n*bs,m*n*bs);
 32:   MatSetFromOptions(A);
 33:   MatSetUp(A);

 35:   MatCreateVecs(A, NULL, &rhs);
 36:   VecSetFromOptions(rhs);
 37:   VecSetUp(rhs);

 39:   rhsval = 0.0;
 40:   for (i=0; i<m; i++) {
 41:     for (j=nlocal*rank; j<nlocal*(rank+1); j++) {
 42:       a = a0;
 43:       for (b=0; b<bs; b++) {
 44:         /* let's start with a 5-point stencil diffusion term */
 45:         v = -1.0;  Ii = (j + n*i)*bs + b;
 46:         if (i>0)   {J = Ii - n*bs; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
 47:         if (i<m-1) {J = Ii + n*bs; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
 48:         if (j>0)   {J = Ii - 1*bs; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
 49:         if (j<n-1) {J = Ii + 1*bs; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
 50:         v = 4.0; MatSetValues(A,1,&Ii,1,&Ii,&v,ADD_VALUES);
 51:         if (upwind) {
 52:           /* now add a 2nd order upwind advection term to add a little asymmetry */
 53:           if (j>2) {
 54:             J = Ii-2*bs; v2 = 0.5*a; v1 = -2.0*a; v0 = 1.5*a;
 55:             MatSetValues(A,1,&Ii,1,&J,&v2,ADD_VALUES);
 56:           } else {
 57:             /* fall back to 1st order upwind */
 58:             v1 = -1.0*a; v0 = 1.0*a;
 59:           };
 60:           if (j>1) {J = Ii-1*bs; MatSetValues(A,1,&Ii,1,&J,&v1,ADD_VALUES);}
 61:           MatSetValues(A,1,&Ii,1,&Ii,&v0,ADD_VALUES);
 62:           a /= 10.; /* use a different velocity for the next component */
 63:           /* add a coupling to the previous and next components */
 64:           v = 0.5;
 65:           if (b>0) {J = Ii - 1; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
 66:           if (b<bs-1) {J = Ii + 1; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
 67:         }
 68:         /* make up some rhs */
 69:         VecSetValue(rhs, Ii, rhsval, INSERT_VALUES);
 70:         rhsval += 1.0;
 71:       }
 72:     }
 73:   }
 74:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
 75:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);

 77:   if (convert) { /* Test different Mat implementations */
 78:     Mat B;

 80:     MatConvert(A,convname,MAT_INITIAL_MATRIX,&B);
 81:     MatDestroy(&A);
 82:     A    = B;
 83:   }

 85:   VecAssemblyBegin(rhs);
 86:   VecAssemblyEnd(rhs);
 87:   /* set rhs to zero to simplify */
 88:   if (zerorhs) {
 89:     VecZeroEntries(rhs);
 90:   }

 92:   if (nonlocalBC) {
 93:     /*version where boundary conditions are set by processes that don't necessarily own the nodes */
 94:     if (rank == 0) {
 95:       nboundary_nodes = size>m ? nlocal : m-size+nlocal;
 96:       PetscMalloc1(nboundary_nodes,&boundary_nodes);
 97:       k = 0;
 98:       for (i=size; i<m; i++,k++) {boundary_nodes[k] = n*i;};
 99:     } else if (rank < m) {
100:       nboundary_nodes = nlocal+1;
101:       PetscMalloc1(nboundary_nodes,&boundary_nodes);
102:       boundary_nodes[0] = rank*n;
103:       k = 1;
104:     } else {
105:       nboundary_nodes = nlocal;
106:       PetscMalloc1(nboundary_nodes,&boundary_nodes);
107:       k = 0;
108:     }
109:     for (j=nlocal*rank; j<nlocal*(rank+1); j++,k++) {boundary_nodes[k] = j;};
110:   } else {
111:     /*version where boundary conditions are set by the node owners only */
112:     PetscMalloc1(m*n,&boundary_nodes);
113:     k=0;
114:     for (j=0; j<n; j++) {
115:       Ii = j;
116:       if (Ii>=rank*m*nlocal && Ii<(rank+1)*m*nlocal) boundary_nodes[k++] = Ii;
117:     }
118:     for (i=1; i<m; i++) {
119:       Ii = n*i;
120:       if (Ii>=rank*m*nlocal && Ii<(rank+1)*m*nlocal) boundary_nodes[k++] = Ii;
121:     }
122:     nboundary_nodes = k;
123:   }

125:   VecDuplicate(rhs, &x);
126:   VecZeroEntries(x);
127:   PetscMalloc2(nboundary_nodes*bs,&boundary_indices,nboundary_nodes*bs,&boundary_values);
128:   for (k=0; k<nboundary_nodes; k++) {
129:     Ii = boundary_nodes[k]*bs;
130:     v = 1.0*boundary_nodes[k];
131:     for (b=0; b<bs; b++, Ii++) {
132:       boundary_indices[k*bs+b] = Ii;
133:       boundary_values[k*bs+b] = v;
134:       PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%d %D %f\n", rank, Ii, (double)PetscRealPart(v));
135:       v += 0.1;
136:     }
137:   }
138:   PetscSynchronizedFlush(PETSC_COMM_WORLD, NULL);
139:   VecSetValues(x, nboundary_nodes*bs, boundary_indices, boundary_values, INSERT_VALUES);
140:   VecAssemblyBegin(x);
141:   VecAssemblyEnd(x);

143:   /* We can check the rhs returned by MatZeroColumns by computing y=rhs-A*x  and overwriting the boundary entries with boundary values */
144:   VecDuplicate(x, &y);
145:   MatMult(A, x, y);
146:   VecAYPX(y, -1.0, rhs);
147:   for (k=0; k<nboundary_nodes*bs; k++) boundary_values[k] *= diag;
148:   VecSetValues(y, nboundary_nodes*bs, boundary_indices, boundary_values, INSERT_VALUES);
149:   VecAssemblyBegin(y);
150:   VecAssemblyEnd(y);

152:   PetscPrintf(PETSC_COMM_WORLD, "*** Matrix A and vector x:\n");
153:   MatView(A, PETSC_VIEWER_STDOUT_WORLD);
154:   VecView(x,PETSC_VIEWER_STDOUT_WORLD);

156:   MatZeroRowsColumns(A, nboundary_nodes*bs, boundary_indices, diag, x, rhs);
157:   PetscPrintf(PETSC_COMM_WORLD, "*** Vector rhs returned by MatZeroRowsColumns\n");
158:   VecView(rhs,PETSC_VIEWER_STDOUT_WORLD);
159:   VecAXPY(y, -1.0, rhs);
160:   VecNorm(y, NORM_INFINITY, &norm);
161:   if (norm > 1.0e-10) {
162:     PetscPrintf(PETSC_COMM_WORLD, "*** Difference between rhs and y, inf-norm: %f\n", (double)norm);
163:     VecView(y,PETSC_VIEWER_STDOUT_WORLD);
164:     SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "Bug in MatZeroRowsColumns");
165:   }

167:   PetscFree(boundary_nodes);
168:   PetscFree2(boundary_indices,boundary_values);
169:   VecDestroy(&x);
170:   VecDestroy(&y);
171:   VecDestroy(&rhs);
172:   MatDestroy(&A);

174:   PetscFinalize();
175:   return ierr;
176: }

178: /*TEST

180:    test:
181:       diff_args: -j
182:       suffix: 0

184:    test:
185:       diff_args: -j
186:       suffix: 1
187:       nsize: 2

189:    test:
190:       diff_args: -j
191:       suffix: 10
192:       nsize: 2
193:       args: -bs 2 -nonlocal_bc

195:    test:
196:       diff_args: -j
197:       suffix: 11
198:       nsize: 7
199:       args: -bs 2 -nonlocal_bc

201:    test:
202:       diff_args: -j
203:       suffix: 12
204:       args: -bs 2 -nonlocal_bc -mat_type baij

206:    test:
207:       diff_args: -j
208:       suffix: 13
209:       nsize: 2
210:       args: -bs 2 -nonlocal_bc -mat_type baij

212:    test:
213:       diff_args: -j
214:       suffix: 14
215:       nsize: 7
216:       args: -bs 2 -nonlocal_bc -mat_type baij

218:    test:
219:       diff_args: -j
220:       suffix: 2
221:       nsize: 7

223:    test:
224:       diff_args: -j
225:       suffix: 3
226:       args: -mat_type baij

228:    test:
229:       diff_args: -j
230:       suffix: 4
231:       nsize: 2
232:       args: -mat_type baij

234:    test:
235:       diff_args: -j
236:       suffix: 5
237:       nsize: 7
238:       args: -mat_type baij

240:    test:
241:       diff_args: -j
242:       suffix: 6
243:       args: -bs 2 -mat_type baij

245:    test:
246:       diff_args: -j
247:       suffix: 7
248:       nsize: 2
249:       args: -bs 2 -mat_type baij

251:    test:
252:       diff_args: -j
253:       suffix: 8
254:       nsize: 7
255:       args: -bs 2 -mat_type baij

257:    test:
258:       diff_args: -j
259:       suffix: 9
260:       args: -bs 2 -nonlocal_bc

262:    test:
263:       diff_args: -j
264:       suffix: 15
265:       args: -bs 2 -nonlocal_bc -convname shell

267:    test:
268:       diff_args: -j
269:       suffix: 16
270:       nsize: 2
271:       args: -bs 2 -nonlocal_bc -convname shell

273:    test:
274:       diff_args: -j
275:       suffix: 17
276:       args: -bs 2 -nonlocal_bc -convname dense

278:    testset:
279:       diff_args: -j
280:       suffix: full
281:       nsize: {{1 3}separate output}
282:       args: -diag {{0.12 -0.13}separate output} -convname {{aij shell baij}separate output} -zerorhs 0

284:    test:
285:       diff_args: -j
286:       requires: cuda
287:       suffix: cusparse_1
288:       nsize: 1
289:       args: -diag {{0.12 -0.13}separate output} -convname {{seqaijcusparse mpiaijcusparse}separate output} -zerorhs 0 -mat_type {{seqaijcusparse mpiaijcusparse}separate output}

291:    test:
292:       diff_args: -j
293:       requires: cuda
294:       suffix: cusparse_3
295:       nsize: 3
296:       args: -diag {{0.12 -0.13}separate output} -convname mpiaijcusparse -zerorhs 0 -mat_type mpiaijcusparse

298: TEST*/