Actual source code: flow.c
2: static char help[] = "FUN3D - 3-D, Unstructured Incompressible Euler Solver.\n\
3: originally written by W. K. Anderson of NASA Langley, \n\
4: and ported into PETSc by D. K. Kaushik, ODU and ICASE.\n\n";
6: #include <petscsnes.h>
7: #include <petsctime.h>
8: #include <petscao.h>
9: #include "user.h"
10: #if defined(_OPENMP)
11: #include "omp.h"
12: #if !defined(HAVE_REDUNDANT_WORK)
13: #include "metis.h"
14: #endif
15: #endif
17: #define ICALLOC(size,y) PetscMalloc1(PetscMax(size,1),y);
18: #define FCALLOC(size,y) PetscMalloc1(PetscMax(size,1),y);
20: typedef struct {
21: Vec qnew,qold,func;
22: double fnorm_ini,dt_ini,cfl_ini;
23: double ptime;
24: double cfl_max,max_time;
25: double fnorm,dt,cfl;
26: double fnorm_ratio;
27: int ires,iramp,itstep;
28: int max_steps,print_freq;
29: int LocalTimeStepping;
30: } TstepCtx;
32: typedef struct { /*============================*/
33: GRID *grid; /* Pointer to Grid info */
34: TstepCtx *tsCtx; /* Pointer to Time Stepping Context */
35: PetscBool PreLoading;
36: } AppCtx; /*============================*/
38: extern int FormJacobian(SNES,Vec,Mat,Mat,void*),
39: FormFunction(SNES,Vec,Vec,void*),
40: FormInitialGuess(SNES,GRID*),
41: Update(SNES,void*),
42: ComputeTimeStep(SNES,int,void*),
43: GetLocalOrdering(GRID*),
44: SetPetscDS(GRID *,TstepCtx*);
45: static PetscErrorCode WritePVTU(AppCtx*,const char*,PetscBool);
46: #if defined(_OPENMP) && defined(HAVE_EDGE_COLORING)
47: int EdgeColoring(int nnodes,int nedge,int *e2n,int *eperm,int *ncolor,int *ncount);
48: #endif
49: /* Global Variables */
51: /*============================*/
52: CINFO *c_info; /* Pointer to COMMON INFO */
53: CRUNGE *c_runge; /* Pointer to COMMON RUNGE */
54: CGMCOM *c_gmcom; /* Pointer to COMMON GMCOM */
55: /*============================*/
56: int rank,size,rstart;
57: REAL memSize = 0.0,grad_time = 0.0;
58: #if defined(_OPENMP)
59: int max_threads = 2,tot_threads,my_thread_id;
60: #endif
62: #if defined(PARCH_IRIX64) && defined(USE_HW_COUNTERS)
63: int event0,event1;
64: Scalar time_counters;
65: long long counter0,counter1;
66: #endif
67: int ntran[max_nbtran]; /* transition stuff put here to make global */
68: REAL dxtran[max_nbtran];
70: /* ======================== MAIN ROUTINE =================================== */
71: /* */
72: /* Finite volume flux split solver for general polygons */
73: /* */
74: /*===========================================================================*/
76: int main(int argc,char **args)
77: {
78: AppCtx user;
79: GRID f_pntr;
80: TstepCtx tsCtx;
81: SNES snes; /* nonlinear solver context */
82: Mat Jpc; /* Jacobian and Preconditioner matrices */
83: PetscScalar *qnode;
84: int ierr;
85: PetscBool flg,write_pvtu,pvtu_base64;
86: MPI_Comm comm;
87: PetscInt maxfails = 10000;
88: char pvtu_fname[PETSC_MAX_PATH_LEN] = "incomp";
90: PetscInitialize(&argc,&args,NULL,help);if (ierr) return ierr;
91: PetscInitializeFortran();
92: PetscOptionsInsertFile(PETSC_COMM_WORLD,"petsc.opt",PETSC_FALSE);
94: comm = PETSC_COMM_WORLD;
95: f77FORLINK(); /* Link FORTRAN and C COMMONS */
97: MPI_Comm_rank(comm,&rank);
98: MPI_Comm_size(comm,&size);
100: flg = PETSC_FALSE;
101: PetscOptionsGetBool(NULL,NULL,"-mem_use",&flg,NULL);
102: if (flg) {PetscMemorySetGetMaximumUsage();}
104: /*======================================================================*/
105: /* Initilize stuff related to time stepping */
106: /*======================================================================*/
107: tsCtx.fnorm_ini = 0.0; tsCtx.cfl_ini = 50.0; tsCtx.cfl_max = 1.0e+05;
108: tsCtx.max_steps = 50; tsCtx.max_time = 1.0e+12; tsCtx.iramp = -50;
109: tsCtx.dt = -5.0; tsCtx.fnorm_ratio = 1.0e+10;
110: tsCtx.LocalTimeStepping = 1;
111: PetscOptionsGetInt(NULL,NULL,"-max_st",&tsCtx.max_steps,NULL);
112: PetscOptionsGetReal(NULL,"-ts_rtol",&tsCtx.fnorm_ratio,NULL);
113: PetscOptionsGetReal(NULL,"-cfl_ini",&tsCtx.cfl_ini,NULL);
114: PetscOptionsGetReal(NULL,"-cfl_max",&tsCtx.cfl_max,NULL);
115: tsCtx.print_freq = tsCtx.max_steps;
116: PetscOptionsGetInt(NULL,NULL,"-print_freq",&tsCtx.print_freq,&flg);
117: PetscOptionsGetString(NULL,NULL,"-pvtu",pvtu_fname,sizeof(pvtu_fname),&write_pvtu);
118: pvtu_base64 = PETSC_FALSE;
119: PetscOptionsGetBool(NULL,NULL,"-pvtu_base64",&pvtu_base64,NULL);
121: c_info->alpha = 3.0;
122: c_info->beta = 15.0;
123: c_info->ivisc = 0;
125: c_gmcom->ilu0 = 1;
126: c_gmcom->nsrch = 10;
128: c_runge->nitfo = 0;
130: PetscMemzero(&f_pntr,sizeof(f_pntr));
131: f_pntr.jvisc = c_info->ivisc;
132: f_pntr.ileast = 4;
133: PetscOptionsGetReal(NULL,"-alpha",&c_info->alpha,NULL);
134: PetscOptionsGetReal(NULL,"-beta",&c_info->beta,NULL);
136: /*======================================================================*/
138: /*Set the maximum number of threads for OpenMP */
139: #if defined(_OPENMP)
140: PetscOptionsGetInt(NULL,NULL,"-max_threads",&max_threads,&flg);
141: omp_set_num_threads(max_threads);
142: PetscPrintf(comm,"Using %d threads for each MPI process\n",max_threads);
143: #endif
145: /* Get the grid information into local ordering */
146: GetLocalOrdering(&f_pntr);
148: /* Allocate Memory for Some Other Grid Arrays */
149: set_up_grid(&f_pntr);
151: /* If using least squares for the gradients,calculate the r's */
152: if (f_pntr.ileast == 4) f77SUMGS(&f_pntr.nnodesLoc,&f_pntr.nedgeLoc,f_pntr.eptr,f_pntr.xyz,f_pntr.rxy,&rank,&f_pntr.nvertices);
154: user.grid = &f_pntr;
155: user.tsCtx = &tsCtx;
157: /* SAWs Stuff */
159: /*
160: Preload the executable to get accurate timings. This runs the following chunk of
161: code twice, first to get the executable pages into memory and the second time for
162: accurate timings.
163: */
164: PetscPreLoadBegin(PETSC_TRUE,"Time integration");
165: user.PreLoading = PetscPreLoading;
167: /* Create nonlinear solver */
168: SetPetscDS(&f_pntr,&tsCtx);
169: SNESCreate(comm,&snes);
170: SNESSetType(snes,"newtonls");
172: /* Set various routines and options */
173: SNESSetFunction(snes,user.grid->res,FormFunction,&user);
174: flg = PETSC_FALSE;
175: PetscOptionsGetBool(NULL,NULL,"-matrix_free",&flg,NULL);
176: if (flg) {
177: /* Use matrix-free to define Newton system; use explicit (approx) Jacobian for preconditioner */
178: MatCreateSNESMF(snes,&Jpc);
179: SNESSetJacobian(snes,Jpc,user.grid->A,FormJacobian,&user);
180: } else {
181: /* Use explicit (approx) Jacobian to define Newton system and preconditioner */
182: SNESSetJacobian(snes,user.grid->A,user.grid->A,FormJacobian,&user);
183: }
185: SNESSetMaxLinearSolveFailures(snes,maxfails);
186: SNESSetFromOptions(snes);
188: /* Initialize the flowfield */
189: FormInitialGuess(snes,user.grid);
191: /* Solve nonlinear system */
192: Update(snes,&user);
194: /* Write restart file */
195: VecGetArray(user.grid->qnode,&qnode);
196: /*f77WREST(&user.grid->nnodes,qnode,user.grid->turbre,user.grid->amut);*/
198: /* Write Tecplot solution file */
199: #if 0
200: if (rank == 0)
201: f77TECFLO(&user.grid->nnodes,
202: &user.grid->nnbound,&user.grid->nvbound,&user.grid->nfbound,
203: &user.grid->nnfacet,&user.grid->nvfacet,&user.grid->nffacet,
204: &user.grid->nsnode, &user.grid->nvnode, &user.grid->nfnode,
205: c_info->title,
206: user.grid->x, user.grid->y, user.grid->z,
207: qnode,
208: user.grid->nnpts, user.grid->nntet, user.grid->nvpts,
209: user.grid->nvtet, user.grid->nfpts, user.grid->nftet,
210: user.grid->f2ntn, user.grid->f2ntv, user.grid->f2ntf,
211: user.grid->isnode, user.grid->ivnode, user.grid->ifnode,
212: &rank);
213: #endif
214: if (write_pvtu) {WritePVTU(&user,pvtu_fname,pvtu_base64);}
216: /* Write residual,lift,drag,and moment history file */
217: /*
218: if (rank == 0) f77PLLAN(&user.grid->nnodes,&rank);
219: */
221: VecRestoreArray(user.grid->qnode,&qnode);
222: flg = PETSC_FALSE;
223: PetscOptionsGetBool(NULL,NULL,"-mem_use",&flg,NULL);
224: if (flg) {
225: PetscMemoryView(PETSC_VIEWER_STDOUT_WORLD,"Memory usage before destroying\n");
226: }
228: VecDestroy(&user.grid->qnode);
229: VecDestroy(&user.grid->qnodeLoc);
230: VecDestroy(&user.tsCtx->qold);
231: VecDestroy(&user.tsCtx->func);
232: VecDestroy(&user.grid->res);
233: VecDestroy(&user.grid->grad);
234: VecDestroy(&user.grid->gradLoc);
235: MatDestroy(&user.grid->A);
236: flg = PETSC_FALSE;
237: PetscOptionsGetBool(NULL,NULL,"-matrix_free",&flg,NULL);
238: if (flg) { MatDestroy(&Jpc);}
239: SNESDestroy(&snes);
240: VecScatterDestroy(&user.grid->scatter);
241: VecScatterDestroy(&user.grid->gradScatter);
242: flg = PETSC_FALSE;
243: PetscOptionsGetBool(NULL,NULL,"-mem_use",&flg,NULL);
244: if (flg) {
245: PetscMemoryView(PETSC_VIEWER_STDOUT_WORLD,"Memory usage after destroying\n");
246: }
247: PetscPreLoadEnd();
249: /* allocated in set_up_grid() */
250: PetscFree(user.grid->isface);
251: PetscFree(user.grid->ivface);
252: PetscFree(user.grid->ifface);
253: PetscFree(user.grid->us);
254: PetscFree(user.grid->vs);
255: PetscFree(user.grid->as);
257: /* Allocated in GetLocalOrdering() */
258: PetscFree(user.grid->eptr);
259: PetscFree(user.grid->ia);
260: PetscFree(user.grid->ja);
261: PetscFree(user.grid->loc2glo);
262: PetscFree(user.grid->loc2pet);
263: PetscFree(user.grid->xyzn);
264: #if defined(_OPENMP)
265: # if defined(HAVE_REDUNDANT_WORK)
266: PetscFree(user.grid->resd);
267: # else
268: PetscFree(user.grid->part_thr);
269: PetscFree(user.grid->nedge_thr);
270: PetscFree(user.grid->edge_thr);
271: PetscFree(user.grid->xyzn_thr);
272: # endif
273: #endif
274: PetscFree(user.grid->xyz);
275: PetscFree(user.grid->area);
277: PetscFree(user.grid->nntet);
278: PetscFree(user.grid->nnpts);
279: PetscFree(user.grid->f2ntn);
280: PetscFree(user.grid->isnode);
281: PetscFree(user.grid->sxn);
282: PetscFree(user.grid->syn);
283: PetscFree(user.grid->szn);
284: PetscFree(user.grid->sa);
285: PetscFree(user.grid->sface_bit);
287: PetscFree(user.grid->nvtet);
288: PetscFree(user.grid->nvpts);
289: PetscFree(user.grid->f2ntv);
290: PetscFree(user.grid->ivnode);
291: PetscFree(user.grid->vxn);
292: PetscFree(user.grid->vyn);
293: PetscFree(user.grid->vzn);
294: PetscFree(user.grid->va);
295: PetscFree(user.grid->vface_bit);
297: PetscFree(user.grid->nftet);
298: PetscFree(user.grid->nfpts);
299: PetscFree(user.grid->f2ntf);
300: PetscFree(user.grid->ifnode);
301: PetscFree(user.grid->fxn);
302: PetscFree(user.grid->fyn);
303: PetscFree(user.grid->fzn);
304: PetscFree(user.grid->fa);
305: PetscFree(user.grid->cdt);
306: PetscFree(user.grid->phi);
307: PetscFree(user.grid->rxy);
309: PetscPrintf(comm,"Time taken in gradient calculation %g sec.\n",grad_time);
311: PetscFinalize();
312: return ierr;
313: }
315: /*---------------------------------------------------------------------*/
316: /* --------------------- Form initial approximation ----------------- */
317: int FormInitialGuess(SNES snes,GRID *grid)
318: /*---------------------------------------------------------------------*/
319: {
320: int ierr;
321: PetscScalar *qnode;
324: VecGetArray(grid->qnode,&qnode);
325: f77INIT(&grid->nnodesLoc,qnode,grid->turbre,grid->amut,&grid->nvnodeLoc,grid->ivnode,&rank);
326: VecRestoreArray(grid->qnode,&qnode);
327: return(0);
328: }
330: /*---------------------------------------------------------------------*/
331: /* --------------------- Evaluate Function F(x) --------------------- */
332: int FormFunction(SNES snes,Vec x,Vec f,void *dummy)
333: /*---------------------------------------------------------------------*/
334: {
335: AppCtx *user = (AppCtx*) dummy;
336: GRID *grid = user->grid;
337: TstepCtx *tsCtx = user->tsCtx;
338: PetscScalar *qnode,*res,*qold;
339: PetscScalar *grad;
340: PetscScalar temp;
341: VecScatter scatter = grid->scatter;
342: VecScatter gradScatter = grid->gradScatter;
343: Vec localX = grid->qnodeLoc;
344: Vec localGrad = grid->gradLoc;
345: int i,j,in,ierr;
346: int nbface,ires;
347: PetscScalar time_ini,time_fin;
350: /* Get X into the local work vector */
351: VecScatterBegin(scatter,x,localX,INSERT_VALUES,SCATTER_FORWARD);
352: VecScatterEnd(scatter,x,localX,INSERT_VALUES,SCATTER_FORWARD);
353: /* VecCopy(x,localX); */
354: /* access the local work f,grad,and input */
355: VecGetArray(f,&res);
356: VecGetArray(grid->grad,&grad);
357: VecGetArray(localX,&qnode);
358: ires = tsCtx->ires;
360: PetscTime(&time_ini);
361: f77LSTGS(&grid->nnodesLoc,&grid->nedgeLoc,grid->eptr,qnode,grad,grid->xyz,grid->rxy,
362: &rank,&grid->nvertices);
363: PetscTime(&time_fin);
364: grad_time += time_fin - time_ini;
365: VecRestoreArray(grid->grad,&grad);
367: VecScatterBegin(gradScatter,grid->grad,localGrad,INSERT_VALUES,SCATTER_FORWARD);
368: VecScatterEnd(gradScatter,grid->grad,localGrad,INSERT_VALUES,SCATTER_FORWARD);
369: /*VecCopy(grid->grad,localGrad);*/
371: VecGetArray(localGrad,&grad);
372: nbface = grid->nsface + grid->nvface + grid->nfface;
373: f77GETRES(&grid->nnodesLoc,&grid->ncell, &grid->nedgeLoc, &grid->nsface,
374: &grid->nvface,&grid->nfface, &nbface,
375: &grid->nsnodeLoc,&grid->nvnodeLoc, &grid->nfnodeLoc,
376: grid->isface, grid->ivface, grid->ifface, &grid->ileast,
377: grid->isnode, grid->ivnode, grid->ifnode,
378: &grid->nnfacetLoc,grid->f2ntn, &grid->nnbound,
379: &grid->nvfacetLoc,grid->f2ntv, &grid->nvbound,
380: &grid->nffacetLoc,grid->f2ntf, &grid->nfbound,
381: grid->eptr,
382: grid->sxn, grid->syn, grid->szn,
383: grid->vxn, grid->vyn, grid->vzn,
384: grid->fxn, grid->fyn, grid->fzn,
385: grid->xyzn,
386: qnode, grid->cdt,
387: grid->xyz, grid->area,
388: grad, res,
389: grid->turbre,
390: grid->slen, grid->c2n,
391: grid->c2e,
392: grid->us, grid->vs, grid->as,
393: grid->phi,
394: grid->amut, &ires,
395: #if defined(_OPENMP)
396: &max_threads,
397: #if defined(HAVE_EDGE_COLORING)
398: &grid->ncolor, grid->ncount,
399: #elif defined(HAVE_REDUNDANT_WORK)
400: grid->resd,
401: #else
402: &grid->nedgeAllThr,
403: grid->part_thr,grid->nedge_thr,grid->edge_thr,grid->xyzn_thr,
404: #endif
405: #endif
406: &tsCtx->LocalTimeStepping,&rank,&grid->nvertices);
408: /* Add the contribution due to time stepping */
409: if (ires == 1) {
410: VecGetArray(tsCtx->qold,&qold);
411: #if defined(INTERLACING)
412: for (i = 0; i < grid->nnodesLoc; i++) {
413: temp = grid->area[i]/(tsCtx->cfl*grid->cdt[i]);
414: for (j = 0; j < 4; j++) {
415: in = 4*i + j;
416: res[in] += temp*(qnode[in] - qold[in]);
417: }
418: }
419: #else
420: for (j = 0; j < 4; j++) {
421: for (i = 0; i < grid->nnodesLoc; i++) {
422: temp = grid->area[i]/(tsCtx->cfl*grid->cdt[i]);
423: in = grid->nnodesLoc*j + i;
424: res[in] += temp*(qnode[in] - qold[in]);
425: }
426: }
427: #endif
428: VecRestoreArray(tsCtx->qold,&qold);
429: }
430: VecRestoreArray(localX,&qnode);
431: VecRestoreArray(f,&res);
432: VecRestoreArray(localGrad,&grad);
433: return(0);
434: }
436: /*---------------------------------------------------------------------*/
437: /* -------------------- Evaluate Jacobian F'(x) -------------------- */
439: int FormJacobian(SNES snes,Vec x,Mat Jac,Mat pc_mat,void *dummy)
440: /*---------------------------------------------------------------------*/
441: {
442: AppCtx *user = (AppCtx*) dummy;
443: GRID *grid = user->grid;
444: TstepCtx *tsCtx = user->tsCtx;
445: Vec localX = grid->qnodeLoc;
446: PetscScalar *qnode;
447: int ierr;
450: /* VecScatterBegin(scatter,x,localX,INSERT_VALUES,SCATTER_FORWARD);
451: VecScatterEnd(scatter,x,localX,INSERT_VALUES,SCATTER_FORWARD); */
452: /* VecCopy(x,localX); */
453: MatSetUnfactored(pc_mat);
455: VecGetArray(localX,&qnode);
456: f77FILLA(&grid->nnodesLoc,&grid->nedgeLoc,grid->eptr,
457: &grid->nsface,
458: grid->isface,grid->fxn,grid->fyn,grid->fzn,
459: grid->sxn,grid->syn,grid->szn,
460: &grid->nsnodeLoc,&grid->nvnodeLoc,&grid->nfnodeLoc,grid->isnode,
461: grid->ivnode,grid->ifnode,qnode,&pc_mat,grid->cdt,
462: grid->area,grid->xyzn,&tsCtx->cfl,
463: &rank,&grid->nvertices);
464: VecRestoreArray(localX,&qnode);
465: MatAssemblyBegin(Jac,MAT_FINAL_ASSEMBLY);
466: MatAssemblyEnd(Jac,MAT_FINAL_ASSEMBLY);
467: #if defined(MATRIX_VIEW)
468: if ((tsCtx->itstep != 0) &&(tsCtx->itstep % tsCtx->print_freq) == 0) {
469: PetscViewer viewer;
470: char mat_file[PETSC_MAX_PATH_LEN];
471: sprintf(mat_file,"mat_bin.%d",tsCtx->itstep);
472: PetscViewerBinaryOpen(MPI_COMM_WORLD,mat_file,FILE_MODE_WRITE,&viewer);
473: MatView(pc_mat,viewer);
474: PetscViewerDestroy(&viewer);
475: }
476: #endif
477: return(0);
478: }
480: /*---------------------------------------------------------------------*/
481: int Update(SNES snes,void *ctx)
482: /*---------------------------------------------------------------------*/
483: {
484: AppCtx *user = (AppCtx*) ctx;
485: GRID *grid = user->grid;
486: TstepCtx *tsCtx = user->tsCtx;
487: VecScatter scatter = grid->scatter;
488: Vec localX = grid->qnodeLoc;
489: PetscScalar *qnode,*res;
490: PetscScalar clift,cdrag,cmom;
491: int ierr,its;
492: PetscScalar fratio;
493: PetscScalar time1,time2,cpuloc,cpuglo;
494: int max_steps;
495: PetscBool print_flag = PETSC_FALSE;
496: FILE *fptr = 0;
497: int nfailsCum = 0,nfails = 0;
498: /*Scalar cpu_ini,cpu_fin,cpu_time;*/
499: /*int event0 = 14,event1 = 25,gen_start,gen_read;
500: PetscScalar time_start_counters,time_read_counters;
501: long long counter0,counter1;*/
504: PetscOptionsGetBool(NULL,NULL,"-print",&print_flag,NULL);
505: if (print_flag) {
506: PetscFOpen(PETSC_COMM_WORLD,"history.out","w",&fptr);
507: PetscFPrintf(PETSC_COMM_WORLD,fptr,"VARIABLES = iter,cfl,fnorm,clift,cdrag,cmom,cpu\n");
508: }
509: if (user->PreLoading) max_steps = 1;
510: else max_steps = tsCtx->max_steps;
511: fratio = 1.0;
512: /*tsCtx->ptime = 0.0;*/
513: VecCopy(grid->qnode,tsCtx->qold);
514: PetscTime(&time1);
515: #if defined(PARCH_IRIX64) && defined(USE_HW_COUNTERS)
516: /* if (!user->PreLoading) {
517: PetscBool flg = PETSC_FALSE;
518: PetscOptionsGetInt(NULL,NULL,"-e0",&event0,&flg);
519: PetscOptionsGetInt(NULL,NULL,"-e1",&event1,&flg);
520: PetscTime(&time_start_counters);
521: if ((gen_start = start_counters(event0,event1)) < 0)
522: SETERRQ(PETSC_COMM_SELF,1,>"Error in start_counters\n");
523: }*/
524: #endif
525: /*cpu_ini = PetscGetCPUTime();*/
526: for (tsCtx->itstep = 0; (tsCtx->itstep < max_steps) &&
527: (fratio <= tsCtx->fnorm_ratio); tsCtx->itstep++) {
528: ComputeTimeStep(snes,tsCtx->itstep,user);
529: /*tsCtx->ptime += tsCtx->dt;*/
531: SNESSolve(snes,NULL,grid->qnode);
532: SNESGetIterationNumber(snes,&its);
534: SNESGetNonlinearStepFailures(snes,&nfails);
535: nfailsCum += nfails; nfails = 0;
536: if (nfailsCum >= 2) SETERRQ(PETSC_COMM_SELF,1,"Unable to find a Newton Step");
537: if (print_flag) {
538: PetscPrintf(PETSC_COMM_WORLD,"At Time Step %d cfl = %g and fnorm = %g\n",
539: tsCtx->itstep,tsCtx->cfl,tsCtx->fnorm);
540: }
541: VecCopy(grid->qnode,tsCtx->qold);
543: c_info->ntt = tsCtx->itstep+1;
544: PetscTime(&time2);
545: cpuloc = time2-time1;
546: cpuglo = 0.0;
547: MPI_Allreduce(&cpuloc,&cpuglo,1,MPIU_REAL,MPIU_MAX,PETSC_COMM_WORLD);
548: c_info->tot = cpuglo; /* Total CPU time used upto this time step */
550: VecScatterBegin(scatter,grid->qnode,localX,INSERT_VALUES,SCATTER_FORWARD);
551: VecScatterEnd(scatter,grid->qnode,localX,INSERT_VALUES,SCATTER_FORWARD);
552: /* VecCopy(grid->qnode,localX); */
554: VecGetArray(grid->res,&res);
555: VecGetArray(localX,&qnode);
557: f77FORCE(&grid->nnodesLoc,&grid->nedgeLoc,
558: grid->isnode, grid->ivnode,
559: &grid->nnfacetLoc,grid->f2ntn,&grid->nnbound,
560: &grid->nvfacetLoc,grid->f2ntv,&grid->nvbound,
561: grid->eptr, qnode,
562: grid->xyz,
563: grid->sface_bit,grid->vface_bit,
564: &clift,&cdrag,&cmom,&rank,&grid->nvertices);
565: if (print_flag) {
566: PetscPrintf(PETSC_COMM_WORLD,"%d\t%g\t%g\t%g\t%g\t%g\n",tsCtx->itstep,
567: tsCtx->cfl,tsCtx->fnorm,clift,cdrag,cmom);
568: PetscPrintf(PETSC_COMM_WORLD,"Wall clock time needed %g seconds for %d time steps\n",
569: cpuglo,tsCtx->itstep);
570: PetscFPrintf(PETSC_COMM_WORLD,fptr,"%d\t%g\t%g\t%g\t%g\t%g\t%g\n",
571: tsCtx->itstep,tsCtx->cfl,tsCtx->fnorm,clift,cdrag,cmom,cpuglo);
572: }
573: VecRestoreArray(localX,&qnode);
574: VecRestoreArray(grid->res,&res);
575: fratio = tsCtx->fnorm_ini/tsCtx->fnorm;
576: MPI_Barrier(PETSC_COMM_WORLD);
578: } /* End of time step loop */
580: #if defined(PARCH_IRIX64) && defined(USE_HW_COUNTERS)
581: if (!user->PreLoading) {
582: int eve0,eve1;
583: FILE *cfp0,*cfp1;
584: char str[256];
585: /* if ((gen_read = read_counters(event0,&counter0,event1,&counter1)) < 0)
586: SETERRQ(PETSC_COMM_SELF,1,"Error in read_counter\n");
587: PetscTime(&time_read_counters);
588: if (gen_read != gen_start) {
589: SETERRQ(PETSC_COMM_SELF,1,"Lost Counters!! Aborting ...\n");
590: }*/
591: /*sprintf(str,"counters%d_and_%d",event0,event1);
592: cfp0 = fopen(str,"a");*/
593: /*print_counters(event0,counter0,event1,counter1);*/
594: /*fprintf(cfp0,"%lld %lld %g\n",counter0,counter1,
595: time_counters);
596: fclose(cfp0);*/
597: }
598: #endif
599: PetscPrintf(PETSC_COMM_WORLD,"Total wall clock time needed %g seconds for %d time steps\n",
600: cpuglo,tsCtx->itstep);
601: PetscPrintf(PETSC_COMM_WORLD,"cfl = %g fnorm = %g\n",tsCtx->cfl,tsCtx->fnorm);
602: PetscPrintf(PETSC_COMM_WORLD,"clift = %g cdrag = %g cmom = %g\n",clift,cdrag,cmom);
604: if (rank == 0 && print_flag) fclose(fptr);
605: if (user->PreLoading) {
606: tsCtx->fnorm_ini = 0.0;
607: PetscPrintf(PETSC_COMM_WORLD,"Preloading done ...\n");
608: }
609: return(0);
610: }
612: /*---------------------------------------------------------------------*/
613: int ComputeTimeStep(SNES snes,int iter,void *ctx)
614: /*---------------------------------------------------------------------*/
615: {
616: AppCtx *user = (AppCtx*) ctx;
617: TstepCtx *tsCtx = user->tsCtx;
618: Vec func = tsCtx->func;
619: PetscScalar inc = 1.1;
620: PetscScalar newcfl;
621: int ierr;
622: /*int iramp = tsCtx->iramp;*/
625: tsCtx->ires = 0;
626: FormFunction(snes,tsCtx->qold,func,user);
627: tsCtx->ires = 1;
628: VecNorm(func,NORM_2,&tsCtx->fnorm);
629: /* first time through so compute initial function norm */
630: if (tsCtx->fnorm_ini == 0.0) {
631: tsCtx->fnorm_ini = tsCtx->fnorm;
632: tsCtx->cfl = tsCtx->cfl_ini;
633: } else {
634: newcfl = inc*tsCtx->cfl_ini*tsCtx->fnorm_ini/tsCtx->fnorm;
635: tsCtx->cfl = PetscMin(newcfl,tsCtx->cfl_max);
636: }
638: /* if (iramp < 0) {
639: newcfl = inc*tsCtx->cfl_ini*tsCtx->fnorm_ini/tsCtx->fnorm;
640: } else {
641: if (tsCtx->dt < 0 && iramp > 0)
642: if (iter > iramp) newcfl = tsCtx->cfl_max;
643: else newcfl = tsCtx->cfl_ini + (tsCtx->cfl_max - tsCtx->cfl_ini)*
644: (double) iter/(double) iramp;
645: }
646: tsCtx->cfl = MIN(newcfl,tsCtx->cfl_max);*/
647: /*printf("In ComputeTime Step - fnorm is %f\n",tsCtx->fnorm);*/
648: /*VecDestroy(&func);*/
649: return(0);
650: }
652: /*---------------------------------------------------------------------*/
653: int GetLocalOrdering(GRID *grid)
654: /*---------------------------------------------------------------------*/
655: {
656: int ierr,i,j,k,inode,isurf,nte,nb,node1,node2,node3;
657: int nnodes,nedge,nnz,jstart,jend;
658: int nnodesLoc,nvertices,nedgeLoc,nnodesLocEst;
659: int nedgeLocEst,remEdges,readEdges,remNodes,readNodes;
660: int nnfacet,nvfacet,nffacet;
661: int nnfacetLoc,nvfacetLoc,nffacetLoc;
662: int nsnode,nvnode,nfnode;
663: int nsnodeLoc,nvnodeLoc,nfnodeLoc;
664: int nnbound,nvbound,nfbound;
665: int bs = 4;
666: int fdes = 0;
667: off_t currentPos = 0,newPos = 0;
668: int grid_param = 13;
669: int cross_edges = 0;
670: int *edge_bit,*pordering;
671: int *l2p,*l2a,*p2l,*a2l,*v2p,*eperm;
672: int *tmp,*tmp1,*tmp2;
673: PetscScalar time_ini,time_fin;
674: PetscScalar *ftmp,*ftmp1;
675: char mesh_file[PETSC_MAX_PATH_LEN] = "";
676: AO ao;
677: FILE *fptr,*fptr1;
678: PetscBool flg;
679: MPI_Comm comm = PETSC_COMM_WORLD;
682: /* Read the integer grid parameters */
683: ICALLOC(grid_param,&tmp);
684: if (rank == 0) {
685: PetscBool exists;
686: PetscOptionsGetString(NULL,NULL,"-mesh",mesh_file,sizeof(mesh_file),&flg);
687: PetscTestFile(mesh_file,'r',&exists);
688: if (!exists) { /* try uns3d.msh as the file name */
689: PetscStrcpy(mesh_file,"uns3d.msh");
690: }
691: PetscBinaryOpen(mesh_file,FILE_MODE_READ,&fdes);
692: }
693: PetscBinarySynchronizedRead(comm,fdes,tmp,grid_param,PETSC_INT);
694: grid->ncell = tmp[0];
695: grid->nnodes = tmp[1];
696: grid->nedge = tmp[2];
697: grid->nnbound = tmp[3];
698: grid->nvbound = tmp[4];
699: grid->nfbound = tmp[5];
700: grid->nnfacet = tmp[6];
701: grid->nvfacet = tmp[7];
702: grid->nffacet = tmp[8];
703: grid->nsnode = tmp[9];
704: grid->nvnode = tmp[10];
705: grid->nfnode = tmp[11];
706: grid->ntte = tmp[12];
707: grid->nsface = 0;
708: grid->nvface = 0;
709: grid->nfface = 0;
710: PetscFree(tmp);
711: PetscPrintf(comm,"nnodes = %d,nedge = %d,nnfacet = %d,nsnode = %d,nfnode = %d\n",
712: grid->nnodes,grid->nedge,grid->nnfacet,grid->nsnode,grid->nfnode);
714: nnodes = grid->nnodes;
715: nedge = grid->nedge;
716: nnfacet = grid->nnfacet;
717: nvfacet = grid->nvfacet;
718: nffacet = grid->nffacet;
719: nnbound = grid->nnbound;
720: nvbound = grid->nvbound;
721: nfbound = grid->nfbound;
722: nsnode = grid->nsnode;
723: nvnode = grid->nvnode;
724: nfnode = grid->nfnode;
726: /* Read the partitioning vector generated by MeTiS */
727: ICALLOC(nnodes,&l2a);
728: ICALLOC(nnodes,&v2p);
729: ICALLOC(nnodes,&a2l);
730: nnodesLoc = 0;
732: for (i = 0; i < nnodes; i++) a2l[i] = -1;
733: PetscTime(&time_ini);
735: if (rank == 0) {
736: if (size == 1) {
737: PetscMemzero(v2p,nnodes*sizeof(int));
738: } else {
739: char spart_file[PETSC_MAX_PATH_LEN],part_file[PETSC_MAX_PATH_LEN];
740: PetscBool exists;
742: PetscOptionsGetString(NULL,NULL,"-partition",spart_file,sizeof(spart_file),&flg);
743: PetscTestFile(spart_file,'r',&exists);
744: if (!exists) { /* try appending the number of processors */
745: sprintf(part_file,"part_vec.part.%d",size);
746: PetscStrcpy(spart_file,part_file);
747: }
748: fptr = fopen(spart_file,"r");
749: if (!fptr) SETERRQ1(PETSC_COMM_SELF,1,"Cannot open file %s\n",part_file);
750: for (inode = 0; inode < nnodes; inode++) {
751: fscanf(fptr,"%d\n",&node1);
752: v2p[inode] = node1;
753: }
754: fclose(fptr);
755: }
756: }
757: MPI_Bcast(v2p,nnodes,MPI_INT,0,comm);
758: for (inode = 0; inode < nnodes; inode++) {
759: if (v2p[inode] == rank) {
760: l2a[nnodesLoc] = inode;
761: a2l[inode] = nnodesLoc;
762: nnodesLoc++;
763: }
764: }
766: PetscTime(&time_fin);
767: time_fin -= time_ini;
768: PetscPrintf(comm,"Partition Vector read successfully\n");
769: PetscPrintf(comm,"Time taken in this phase was %g\n",time_fin);
771: MPI_Scan(&nnodesLoc,&rstart,1,MPI_INT,MPI_SUM,comm);
772: rstart -= nnodesLoc;
773: ICALLOC(nnodesLoc,&pordering);
774: for (i=0; i < nnodesLoc; i++) pordering[i] = rstart + i;
775: AOCreateBasic(comm,nnodesLoc,l2a,pordering,&ao);
776: PetscFree(pordering);
778: /* Now count the local number of edges - including edges with
779: ghost nodes but edges between ghost nodes are NOT counted */
780: nedgeLoc = 0;
781: nvertices = nnodesLoc;
782: /* Choose an estimated number of local edges. The choice
783: nedgeLocEst = 1000000 looks reasonable as it will read
784: the edge and edge normal arrays in 8 MB chunks */
785: /*nedgeLocEst = nedge/size;*/
786: nedgeLocEst = PetscMin(nedge,1000000);
787: remEdges = nedge;
788: ICALLOC(2*nedgeLocEst,&tmp);
789: PetscBinarySynchronizedSeek(comm,fdes,0,PETSC_BINARY_SEEK_CUR,¤tPos);
790: PetscTime(&time_ini);
791: while (remEdges > 0) {
792: readEdges = PetscMin(remEdges,nedgeLocEst);
793: /*time_ini = PetscTime();*/
794: PetscBinarySynchronizedRead(comm,fdes,tmp,readEdges,PETSC_INT);
795: PetscBinarySynchronizedSeek(comm,fdes,(nedge-readEdges)*PETSC_BINARY_INT_SIZE,PETSC_BINARY_SEEK_CUR,&newPos);
796: PetscBinarySynchronizedRead(comm,fdes,tmp+readEdges,readEdges,PETSC_INT);
797: PetscBinarySynchronizedSeek(comm,fdes,-nedge*PETSC_BINARY_INT_SIZE,PETSC_BINARY_SEEK_CUR,&newPos);
798: /*time_fin += PetscTime()-time_ini;*/
799: for (j = 0; j < readEdges; j++) {
800: node1 = tmp[j]-1;
801: node2 = tmp[j+readEdges]-1;
802: if ((v2p[node1] == rank) || (v2p[node2] == rank)) {
803: nedgeLoc++;
804: if (a2l[node1] == -1) {
805: l2a[nvertices] = node1;
806: a2l[node1] = nvertices;
807: nvertices++;
808: }
809: if (a2l[node2] == -1) {
810: l2a[nvertices] = node2;
811: a2l[node2] = nvertices;
812: nvertices++;
813: }
814: }
815: }
816: remEdges = remEdges - readEdges;
817: MPI_Barrier(comm);
818: }
819: PetscTime(&time_fin);
820: time_fin -= time_ini;
821: PetscPrintf(comm,"Local edges counted with MPI_Bcast %d\n",nedgeLoc);
822: PetscPrintf(comm,"Local vertices counted %d\n",nvertices);
823: PetscPrintf(comm,"Time taken in this phase was %g\n",time_fin);
825: /* Now store the local edges */
826: ICALLOC(2*nedgeLoc,&grid->eptr);
827: ICALLOC(nedgeLoc,&edge_bit);
828: ICALLOC(nedgeLoc,&eperm);
829: i = 0; j = 0; k = 0;
830: remEdges = nedge;
831: PetscBinarySynchronizedSeek(comm,fdes,currentPos,PETSC_BINARY_SEEK_SET,&newPos);
832: currentPos = newPos;
834: PetscTime(&time_ini);
835: while (remEdges > 0) {
836: readEdges = PetscMin(remEdges,nedgeLocEst);
837: PetscBinarySynchronizedRead(comm,fdes,tmp,readEdges,PETSC_INT);
838: PetscBinarySynchronizedSeek(comm,fdes,(nedge-readEdges)*PETSC_BINARY_INT_SIZE,PETSC_BINARY_SEEK_CUR,&newPos);
839: PetscBinarySynchronizedRead(comm,fdes,tmp+readEdges,readEdges,PETSC_INT);
840: PetscBinarySynchronizedSeek(comm,fdes,-nedge*PETSC_BINARY_INT_SIZE,PETSC_BINARY_SEEK_CUR,&newPos);
841: for (j = 0; j < readEdges; j++) {
842: node1 = tmp[j]-1;
843: node2 = tmp[j+readEdges]-1;
844: if ((v2p[node1] == rank) || (v2p[node2] == rank)) {
845: grid->eptr[k] = a2l[node1];
846: grid->eptr[k+nedgeLoc] = a2l[node2];
847: edge_bit[k] = i; /* Record global file index of the edge */
848: eperm[k] = k;
849: k++;
850: }
851: i++;
852: }
853: remEdges = remEdges - readEdges;
854: MPI_Barrier(comm);
855: }
856: PetscBinarySynchronizedSeek(comm,fdes,currentPos+2*nedge*PETSC_BINARY_INT_SIZE,PETSC_BINARY_SEEK_SET,&newPos);
857: PetscTime(&time_fin);
858: time_fin -= time_ini;
859: PetscPrintf(comm,"Local edges stored\n");
860: PetscPrintf(comm,"Time taken in this phase was %g\n",time_fin);
862: PetscFree(tmp);
863: ICALLOC(2*nedgeLoc,&tmp);
864: PetscMemcpy(tmp,grid->eptr,2*nedgeLoc*sizeof(int));
865: #if defined(_OPENMP) && defined(HAVE_EDGE_COLORING)
866: EdgeColoring(nvertices,nedgeLoc,grid->eptr,eperm,&grid->ncolor,grid->ncount);
867: #else
868: /* Now reorder the edges for better cache locality */
869: /*
870: tmp[0]=7;tmp[1]=6;tmp[2]=3;tmp[3]=9;tmp[4]=2;tmp[5]=0;
871: PetscSortIntWithPermutation(6,tmp,eperm);
872: for (i=0; i<6; i++)
873: printf("%d %d %d\n",i,tmp[i],eperm[i]);
874: */
875: flg = PETSC_FALSE;
876: PetscOptionsGetBool(0,"-no_edge_reordering",&flg,NULL);
877: if (!flg) {
878: PetscSortIntWithPermutation(nedgeLoc,tmp,eperm);
879: }
880: #endif
881: PetscMallocValidate(__LINE__,PETSC_FUNCTION_NAME,__FILE__);
882: k = 0;
883: for (i = 0; i < nedgeLoc; i++) {
884: int cross_node=nnodesLoc/2;
885: node1 = tmp[eperm[i]] + 1;
886: node2 = tmp[nedgeLoc+eperm[i]] + 1;
887: #if defined(INTERLACING)
888: grid->eptr[k++] = node1;
889: grid->eptr[k++] = node2;
890: #else
891: grid->eptr[i] = node1;
892: grid->eptr[nedgeLoc+i] = node2;
893: #endif
894: /* if (node1 > node2)
895: printf("On processor %d, for edge %d node1 = %d, node2 = %d\n",
896: rank,i,node1,node2);*/
897: if ((node1 <= cross_node) && (node2 > cross_node)) cross_edges++;
898: }
899: PetscPrintf(comm,"Number of cross edges %d\n", cross_edges);
900: PetscFree(tmp);
901: #if defined(_OPENMP) && !defined(HAVE_REDUNDANT_WORK) && !defined(HAVE_EDGE_COLORING)
902: /* Now make the local 'ia' and 'ja' arrays */
903: ICALLOC(nvertices+1,&grid->ia);
904: /* Use tmp for a work array */
905: ICALLOC(nvertices,&tmp);
906: f77GETIA(&nvertices,&nedgeLoc,grid->eptr,grid->ia,tmp,&rank);
907: nnz = grid->ia[nvertices] - 1;
908: ICALLOC(nnz,&grid->ja);
909: f77GETJA(&nvertices,&nedgeLoc,grid->eptr,grid->ia,grid->ja,tmp,&rank);
910: PetscFree(tmp);
911: #else
912: /* Now make the local 'ia' and 'ja' arrays */
913: ICALLOC(nnodesLoc+1,&grid->ia);
914: /* Use tmp for a work array */
915: ICALLOC(nnodesLoc,&tmp);
916: f77GETIA(&nnodesLoc,&nedgeLoc,grid->eptr,grid->ia,tmp,&rank);
917: nnz = grid->ia[nnodesLoc] - 1;
918: #if defined(BLOCKING)
919: PetscPrintf(comm,"The Jacobian has %d non-zero blocks with block size = %d\n",nnz,bs);
920: #else
921: PetscPrintf(comm,"The Jacobian has %d non-zeros\n",nnz);
922: #endif
923: ICALLOC(nnz,&grid->ja);
924: f77GETJA(&nnodesLoc,&nedgeLoc,grid->eptr,grid->ia,grid->ja,tmp,&rank);
925: PetscFree(tmp);
926: #endif
927: ICALLOC(nvertices,&grid->loc2glo);
928: PetscMemcpy(grid->loc2glo,l2a,nvertices*sizeof(int));
929: PetscFree(l2a);
930: l2a = grid->loc2glo;
931: ICALLOC(nvertices,&grid->loc2pet);
932: l2p = grid->loc2pet;
933: PetscMemcpy(l2p,l2a,nvertices*sizeof(int));
934: AOApplicationToPetsc(ao,nvertices,l2p);
936: /* Renumber unit normals of dual face (from node1 to node2)
937: and the area of the dual mesh face */
938: FCALLOC(nedgeLocEst,&ftmp);
939: FCALLOC(nedgeLoc,&ftmp1);
940: FCALLOC(4*nedgeLoc,&grid->xyzn);
941: /* Do the x-component */
942: i = 0; k = 0;
943: remEdges = nedge;
944: PetscTime(&time_ini);
945: while (remEdges > 0) {
946: readEdges = PetscMin(remEdges,nedgeLocEst);
947: PetscBinarySynchronizedRead(comm,fdes,ftmp,readEdges,PETSC_SCALAR);
948: for (j = 0; j < readEdges; j++)
949: if (edge_bit[k] == (i+j)) {
950: ftmp1[k] = ftmp[j];
951: k++;
952: }
953: i += readEdges;
954: remEdges = remEdges - readEdges;
955: MPI_Barrier(comm);
956: }
957: for (i = 0; i < nedgeLoc; i++)
958: #if defined(INTERLACING)
959: grid->xyzn[4*i] = ftmp1[eperm[i]];
960: #else
961: grid->xyzn[i] = ftmp1[eperm[i]];
962: #endif
963: /* Do the y-component */
964: i = 0; k = 0;
965: remEdges = nedge;
966: while (remEdges > 0) {
967: readEdges = PetscMin(remEdges,nedgeLocEst);
968: PetscBinarySynchronizedRead(comm,fdes,ftmp,readEdges,PETSC_SCALAR);
969: for (j = 0; j < readEdges; j++)
970: if (edge_bit[k] == (i+j)) {
971: ftmp1[k] = ftmp[j];
972: k++;
973: }
974: i += readEdges;
975: remEdges = remEdges - readEdges;
976: MPI_Barrier(comm);
977: }
978: for (i = 0; i < nedgeLoc; i++)
979: #if defined(INTERLACING)
980: grid->xyzn[4*i+1] = ftmp1[eperm[i]];
981: #else
982: grid->xyzn[nedgeLoc+i] = ftmp1[eperm[i]];
983: #endif
984: /* Do the z-component */
985: i = 0; k = 0;
986: remEdges = nedge;
987: while (remEdges > 0) {
988: readEdges = PetscMin(remEdges,nedgeLocEst);
989: PetscBinarySynchronizedRead(comm,fdes,ftmp,readEdges,PETSC_SCALAR);
990: for (j = 0; j < readEdges; j++)
991: if (edge_bit[k] == (i+j)) {
992: ftmp1[k] = ftmp[j];
993: k++;
994: }
995: i += readEdges;
996: remEdges = remEdges - readEdges;
997: MPI_Barrier(comm);
998: }
999: for (i = 0; i < nedgeLoc; i++)
1000: #if defined(INTERLACING)
1001: grid->xyzn[4*i+2] = ftmp1[eperm[i]];
1002: #else
1003: grid->xyzn[2*nedgeLoc+i] = ftmp1[eperm[i]];
1004: #endif
1005: /* Do the area */
1006: i = 0; k = 0;
1007: remEdges = nedge;
1008: while (remEdges > 0) {
1009: readEdges = PetscMin(remEdges,nedgeLocEst);
1010: PetscBinarySynchronizedRead(comm,fdes,ftmp,readEdges,PETSC_SCALAR);
1011: for (j = 0; j < readEdges; j++)
1012: if (edge_bit[k] == (i+j)) {
1013: ftmp1[k] = ftmp[j];
1014: k++;
1015: }
1016: i += readEdges;
1017: remEdges = remEdges - readEdges;
1018: MPI_Barrier(comm);
1019: }
1020: for (i = 0; i < nedgeLoc; i++)
1021: #if defined(INTERLACING)
1022: grid->xyzn[4*i+3] = ftmp1[eperm[i]];
1023: #else
1024: grid->xyzn[3*nedgeLoc+i] = ftmp1[eperm[i]];
1025: #endif
1027: PetscFree(edge_bit);
1028: PetscFree(eperm);
1029: PetscFree(ftmp);
1030: PetscFree(ftmp1);
1031: PetscTime(&time_fin);
1032: time_fin -= time_ini;
1033: PetscPrintf(comm,"Edge normals partitioned\n");
1034: PetscPrintf(comm,"Time taken in this phase was %g\n",time_fin);
1035: #if defined(_OPENMP)
1036: /*Arrange for the division of work among threads*/
1037: #if defined(HAVE_EDGE_COLORING)
1038: #elif defined(HAVE_REDUNDANT_WORK)
1039: FCALLOC(4*nnodesLoc, &grid->resd);
1040: #else
1041: {
1042: /* Get the local adjacency structure of the graph for partitioning the local
1043: graph into max_threads pieces */
1044: int *ia,*ja,*vwtg=0,*adjwgt=0,options[5];
1045: int numflag = 0, wgtflag = 0, edgecut;
1046: int thr1,thr2,nedgeAllThreads,ned1,ned2;
1047: ICALLOC((nvertices+1),&ia);
1048: ICALLOC((2*nedgeLoc),&ja);
1049: ia[0] = 0;
1050: for (i = 1; i <= nvertices; i++) ia[i] = grid->ia[i]-i-1;
1051: for (i = 0; i < nvertices; i++) {
1052: int jstart,jend;
1053: jstart = grid->ia[i]-1;
1054: jend = grid->ia[i+1]-1;
1055: k = ia[i];
1056: for (j=jstart; j < jend; j++) {
1057: inode = grid->ja[j]-1;
1058: if (inode != i) ja[k++] = inode;
1059: }
1060: }
1061: ICALLOC(nvertices,&grid->part_thr);
1062: PetscMemzero(grid->part_thr,nvertices*sizeof(int));
1063: options[0] = 0;
1064: /* Call the pmetis library routine */
1065: if (max_threads > 1)
1066: METIS_PartGraphRecursive(&nvertices,ia,ja,vwtg,adjwgt,
1067: &wgtflag,&numflag,&max_threads,options,&edgecut,grid->part_thr);
1068: PetscPrintf(MPI_COMM_WORLD,"The number of cut edges is %d\n", edgecut);
1069: /* Write the partition vector to disk */
1070: flg = PETSC_FALSE;
1071: PetscOptionsGetBool(0,"-omp_partitioning",&flg,NULL);
1072: if (flg) {
1073: int *partv_loc, *partv_glo;
1074: int *disp,*counts,*loc2glo_glo;
1075: char part_file[PETSC_MAX_PATH_LEN];
1076: FILE *fp;
1078: ICALLOC(nnodes, &partv_glo);
1079: ICALLOC(nnodesLoc, &partv_loc);
1080: for (i = 0; i < nnodesLoc; i++)
1081: /*partv_loc[i] = grid->part_thr[i]*size + rank;*/
1082: partv_loc[i] = grid->part_thr[i] + max_threads*rank;
1083: ICALLOC(size,&disp);
1084: ICALLOC(size,&counts);
1085: MPI_Allgather(&nnodesLoc,1,MPI_INT,counts,1,MPI_INT,MPI_COMM_WORLD);
1086: disp[0] = 0;
1087: for (i = 1; i < size; i++) disp[i] = counts[i-1] + disp[i-1];
1088: ICALLOC(nnodes, &loc2glo_glo);
1089: MPI_Gatherv(grid->loc2glo,nnodesLoc,MPI_INT,loc2glo_glo,counts,disp,MPI_INT,0,MPI_COMM_WORLD);
1090: MPI_Gatherv(partv_loc,nnodesLoc,MPI_INT,partv_glo,counts,disp,MPI_INT,0,MPI_COMM_WORLD);
1091: if (rank == 0) {
1092: PetscSortIntWithArray(nnodes,loc2glo_glo,partv_glo);
1093: sprintf(part_file,"hyb_part_vec.%d",2*size);
1094: fp = fopen(part_file,"w");
1095: for (i = 0; i < nnodes; i++) fprintf(fp,"%d\n",partv_glo[i]);
1096: fclose(fp);
1097: }
1098: PetscFree(partv_loc);
1099: PetscFree(partv_glo);
1100: PetscFree(disp);
1101: PetscFree(counts);
1102: PetscFree(loc2glo_glo);
1103: }
1105: /* Divide the work among threads */
1106: k = 0;
1107: ICALLOC((max_threads+1),&grid->nedge_thr);
1108: PetscMemzero(grid->nedge_thr,(max_threads+1)*sizeof(int));
1109: cross_edges = 0;
1110: for (i = 0; i < nedgeLoc; i++) {
1111: node1 = grid->eptr[k++]-1;
1112: node2 = grid->eptr[k++]-1;
1113: thr1 = grid->part_thr[node1];
1114: thr2 = grid->part_thr[node2];
1115: grid->nedge_thr[thr1]+=1;
1116: if (thr1 != thr2) {
1117: grid->nedge_thr[thr2]+=1;
1118: cross_edges++;
1119: }
1120: }
1121: PetscPrintf(MPI_COMM_WORLD,"The number of cross edges after Metis partitioning is %d\n",cross_edges);
1122: ned1 = grid->nedge_thr[0];
1123: grid->nedge_thr[0] = 1;
1124: for (i = 1; i <= max_threads; i++) {
1125: ned2 = grid->nedge_thr[i];
1126: grid->nedge_thr[i] = grid->nedge_thr[i-1]+ned1;
1127: ned1 = ned2;
1128: }
1129: /* Allocate a shared edge array. Note that a cut edge is evaluated
1130: by both the threads but updates are done only for the locally
1131: owned node */
1132: grid->nedgeAllThr = nedgeAllThreads = grid->nedge_thr[max_threads]-1;
1133: ICALLOC(2*nedgeAllThreads, &grid->edge_thr);
1134: ICALLOC(max_threads,&tmp);
1135: FCALLOC(4*nedgeAllThreads,&grid->xyzn_thr);
1136: for (i = 0; i < max_threads; i++) tmp[i] = grid->nedge_thr[i]-1;
1137: k = 0;
1138: for (i = 0; i < nedgeLoc; i++) {
1139: int ie1,ie2,ie3;
1140: node1 = grid->eptr[k++];
1141: node2 = grid->eptr[k++];
1142: thr1 = grid->part_thr[node1-1];
1143: thr2 = grid->part_thr[node2-1];
1144: ie1 = 2*tmp[thr1];
1145: ie2 = 4*tmp[thr1];
1146: ie3 = 4*i;
1148: grid->edge_thr[ie1] = node1;
1149: grid->edge_thr[ie1+1] = node2;
1150: grid->xyzn_thr[ie2] = grid->xyzn[ie3];
1151: grid->xyzn_thr[ie2+1] = grid->xyzn[ie3+1];
1152: grid->xyzn_thr[ie2+2] = grid->xyzn[ie3+2];
1153: grid->xyzn_thr[ie2+3] = grid->xyzn[ie3+3];
1155: tmp[thr1]+=1;
1156: if (thr1 != thr2) {
1157: ie1 = 2*tmp[thr2];
1158: ie2 = 4*tmp[thr2];
1160: grid->edge_thr[ie1] = node1;
1161: grid->edge_thr[ie1+1] = node2;
1162: grid->xyzn_thr[ie2] = grid->xyzn[ie3];
1163: grid->xyzn_thr[ie2+1] = grid->xyzn[ie3+1];
1164: grid->xyzn_thr[ie2+2] = grid->xyzn[ie3+2];
1165: grid->xyzn_thr[ie2+3] = grid->xyzn[ie3+3];
1167: tmp[thr2]+=1;
1168: }
1169: }
1170: }
1171: #endif
1172: #endif
1174: /* Remap coordinates */
1175: /*nnodesLocEst = nnodes/size;*/
1176: nnodesLocEst = PetscMin(nnodes,500000);
1177: FCALLOC(nnodesLocEst,&ftmp);
1178: FCALLOC(3*nvertices,&grid->xyz);
1179: remNodes = nnodes;
1180: i = 0;
1181: PetscTime(&time_ini);
1182: while (remNodes > 0) {
1183: readNodes = PetscMin(remNodes,nnodesLocEst);
1184: PetscBinarySynchronizedRead(comm,fdes,ftmp,readNodes,PETSC_SCALAR);
1185: for (j = 0; j < readNodes; j++) {
1186: if (a2l[i+j] >= 0) {
1187: #if defined(INTERLACING)
1188: grid->xyz[3*a2l[i+j]] = ftmp[j];
1189: #else
1190: grid->xyz[a2l[i+j]] = ftmp[j];
1191: #endif
1192: }
1193: }
1194: i += nnodesLocEst;
1195: remNodes -= nnodesLocEst;
1196: MPI_Barrier(comm);
1197: }
1199: remNodes = nnodes;
1200: i = 0;
1201: while (remNodes > 0) {
1202: readNodes = PetscMin(remNodes,nnodesLocEst);
1203: PetscBinarySynchronizedRead(comm,fdes,ftmp,readNodes,PETSC_SCALAR);
1204: for (j = 0; j < readNodes; j++) {
1205: if (a2l[i+j] >= 0) {
1206: #if defined(INTERLACING)
1207: grid->xyz[3*a2l[i+j]+1] = ftmp[j];
1208: #else
1209: grid->xyz[nnodesLoc+a2l[i+j]] = ftmp[j];
1210: #endif
1211: }
1212: }
1213: i += nnodesLocEst;
1214: remNodes -= nnodesLocEst;
1215: MPI_Barrier(comm);
1216: }
1218: remNodes = nnodes;
1219: i = 0;
1220: while (remNodes > 0) {
1221: readNodes = PetscMin(remNodes,nnodesLocEst);
1222: PetscBinarySynchronizedRead(comm,fdes,ftmp,readNodes,PETSC_SCALAR);
1223: for (j = 0; j < readNodes; j++) {
1224: if (a2l[i+j] >= 0) {
1225: #if defined(INTERLACING)
1226: grid->xyz[3*a2l[i+j]+2] = ftmp[j];
1227: #else
1228: grid->xyz[2*nnodesLoc+a2l[i+j]] = ftmp[j];
1229: #endif
1230: }
1231: }
1232: i += nnodesLocEst;
1233: remNodes -= nnodesLocEst;
1234: MPI_Barrier(comm);
1235: }
1237: /* Renumber dual volume "area" */
1238: FCALLOC(nvertices,&grid->area);
1239: remNodes = nnodes;
1240: i = 0;
1241: while (remNodes > 0) {
1242: readNodes = PetscMin(remNodes,nnodesLocEst);
1243: PetscBinarySynchronizedRead(comm,fdes,ftmp,readNodes,PETSC_SCALAR);
1244: for (j = 0; j < readNodes; j++)
1245: if (a2l[i+j] >= 0)
1246: grid->area[a2l[i+j]] = ftmp[j];
1247: i += nnodesLocEst;
1248: remNodes -= nnodesLocEst;
1249: MPI_Barrier(comm);
1250: }
1252: PetscFree(ftmp);
1253: PetscTime(&time_fin);
1254: time_fin -= time_ini;
1255: PetscPrintf(comm,"Coordinates remapped\n");
1256: PetscPrintf(comm,"Time taken in this phase was %g\n",time_fin);
1258: /* Now,handle all the solid boundaries - things to be done :
1259: * 1. Identify the nodes belonging to the solid
1260: * boundaries and count them.
1261: * 2. Put proper indices into f2ntn array,after making it
1262: * of suitable size.
1263: * 3. Remap the normals and areas of solid faces (sxn,syn,szn,
1264: * and sa arrays).
1265: */
1266: ICALLOC(nnbound, &grid->nntet);
1267: ICALLOC(nnbound, &grid->nnpts);
1268: ICALLOC(4*nnfacet,&grid->f2ntn);
1269: ICALLOC(nsnode,&grid->isnode);
1270: FCALLOC(nsnode,&grid->sxn);
1271: FCALLOC(nsnode,&grid->syn);
1272: FCALLOC(nsnode,&grid->szn);
1273: FCALLOC(nsnode,&grid->sa);
1274: PetscBinarySynchronizedRead(comm,fdes,grid->nntet,nnbound,PETSC_INT);
1275: PetscBinarySynchronizedRead(comm,fdes,grid->nnpts,nnbound,PETSC_INT);
1276: PetscBinarySynchronizedRead(comm,fdes,grid->f2ntn,4*nnfacet,PETSC_INT);
1277: PetscBinarySynchronizedRead(comm,fdes,grid->isnode,nsnode,PETSC_INT);
1278: PetscBinarySynchronizedRead(comm,fdes,grid->sxn,nsnode,PETSC_SCALAR);
1279: PetscBinarySynchronizedRead(comm,fdes,grid->syn,nsnode,PETSC_SCALAR);
1280: PetscBinarySynchronizedRead(comm,fdes,grid->szn,nsnode,PETSC_SCALAR);
1282: isurf = 0;
1283: nsnodeLoc = 0;
1284: nnfacetLoc = 0;
1285: nb = 0;
1286: nte = 0;
1287: ICALLOC(3*nnfacet,&tmp);
1288: ICALLOC(nsnode,&tmp1);
1289: ICALLOC(nnodes,&tmp2);
1290: FCALLOC(4*nsnode,&ftmp);
1291: PetscMemzero(tmp,3*nnfacet*sizeof(int));
1292: PetscMemzero(tmp1,nsnode*sizeof(int));
1293: PetscMemzero(tmp2,nnodes*sizeof(int));
1295: j = 0;
1296: for (i = 0; i < nsnode; i++) {
1297: node1 = a2l[grid->isnode[i] - 1];
1298: if (node1 >= 0) {
1299: tmp1[nsnodeLoc] = node1;
1300: tmp2[node1] = nsnodeLoc;
1301: ftmp[j++] = grid->sxn[i];
1302: ftmp[j++] = grid->syn[i];
1303: ftmp[j++] = grid->szn[i];
1304: ftmp[j++] = grid->sa[i];
1305: nsnodeLoc++;
1306: }
1307: }
1308: for (i = 0; i < nnbound; i++) {
1309: for (j = isurf; j < isurf + grid->nntet[i]; j++) {
1310: node1 = a2l[grid->isnode[grid->f2ntn[j] - 1] - 1];
1311: node2 = a2l[grid->isnode[grid->f2ntn[nnfacet + j] - 1] - 1];
1312: node3 = a2l[grid->isnode[grid->f2ntn[2*nnfacet + j] - 1] - 1];
1314: if ((node1 >= 0) && (node2 >= 0) && (node3 >= 0)) {
1315: nnfacetLoc++;
1316: nte++;
1317: tmp[nb++] = tmp2[node1];
1318: tmp[nb++] = tmp2[node2];
1319: tmp[nb++] = tmp2[node3];
1320: }
1321: }
1322: isurf += grid->nntet[i];
1323: /*printf("grid->nntet[%d] before reordering is %d\n",i,grid->nntet[i]);*/
1324: grid->nntet[i] = nte;
1325: /*printf("grid->nntet[%d] after reordering is %d\n",i,grid->nntet[i]);*/
1326: nte = 0;
1327: }
1328: PetscFree(grid->f2ntn);
1329: PetscFree(grid->isnode);
1330: PetscFree(grid->sxn);
1331: PetscFree(grid->syn);
1332: PetscFree(grid->szn);
1333: PetscFree(grid->sa);
1334: ICALLOC(4*nnfacetLoc,&grid->f2ntn);
1335: ICALLOC(nsnodeLoc,&grid->isnode);
1336: FCALLOC(nsnodeLoc,&grid->sxn);
1337: FCALLOC(nsnodeLoc,&grid->syn);
1338: FCALLOC(nsnodeLoc,&grid->szn);
1339: FCALLOC(nsnodeLoc,&grid->sa);
1340: j = 0;
1341: for (i = 0; i < nsnodeLoc; i++) {
1342: grid->isnode[i] = tmp1[i] + 1;
1343: grid->sxn[i] = ftmp[j++];
1344: grid->syn[i] = ftmp[j++];
1345: grid->szn[i] = ftmp[j++];
1346: grid->sa[i] = ftmp[j++];
1347: }
1348: j = 0;
1349: for (i = 0; i < nnfacetLoc; i++) {
1350: grid->f2ntn[i] = tmp[j++] + 1;
1351: grid->f2ntn[nnfacetLoc+i] = tmp[j++] + 1;
1352: grid->f2ntn[2*nnfacetLoc+i] = tmp[j++] + 1;
1353: }
1354: PetscFree(tmp);
1355: PetscFree(tmp1);
1356: PetscFree(tmp2);
1357: PetscFree(ftmp);
1359: /* Now identify the triangles on which the current proceesor
1360: would perform force calculation */
1361: ICALLOC(nnfacetLoc,&grid->sface_bit);
1362: PetscMemzero(grid->sface_bit,nnfacetLoc*sizeof(int));
1363: for (i = 0; i < nnfacetLoc; i++) {
1364: node1 = l2a[grid->isnode[grid->f2ntn[i] - 1] - 1];
1365: node2 = l2a[grid->isnode[grid->f2ntn[nnfacetLoc + i] - 1] - 1];
1366: node3 = l2a[grid->isnode[grid->f2ntn[2*nnfacetLoc + i] - 1] - 1];
1367: if (((v2p[node1] >= rank) && (v2p[node2] >= rank)
1368: && (v2p[node3] >= rank)) &&
1369: ((v2p[node1] == rank) || (v2p[node2] == rank)
1370: || (v2p[node3] == rank)))
1371: grid->sface_bit[i] = 1;
1372: }
1373: /*printf("On processor %d total solid triangles = %d,locally owned = %d alpha = %d\n",rank,totTr,myTr,alpha);*/
1374: PetscPrintf(comm,"Solid boundaries partitioned\n");
1376: /* Now,handle all the viscous boundaries - things to be done :
1377: * 1. Identify the nodes belonging to the viscous
1378: * boundaries and count them.
1379: * 2. Put proper indices into f2ntv array,after making it
1380: * of suitable size
1381: * 3. Remap the normals and areas of viscous faces (vxn,vyn,vzn,
1382: * and va arrays).
1383: */
1384: ICALLOC(nvbound, &grid->nvtet);
1385: ICALLOC(nvbound, &grid->nvpts);
1386: ICALLOC(4*nvfacet,&grid->f2ntv);
1387: ICALLOC(nvnode,&grid->ivnode);
1388: FCALLOC(nvnode,&grid->vxn);
1389: FCALLOC(nvnode,&grid->vyn);
1390: FCALLOC(nvnode,&grid->vzn);
1391: FCALLOC(nvnode,&grid->va);
1392: PetscBinarySynchronizedRead(comm,fdes,grid->nvtet,nvbound,PETSC_INT);
1393: PetscBinarySynchronizedRead(comm,fdes,grid->nvpts,nvbound,PETSC_INT);
1394: PetscBinarySynchronizedRead(comm,fdes,grid->f2ntv,4*nvfacet,PETSC_INT);
1395: PetscBinarySynchronizedRead(comm,fdes,grid->ivnode,nvnode,PETSC_INT);
1396: PetscBinarySynchronizedRead(comm,fdes,grid->vxn,nvnode,PETSC_SCALAR);
1397: PetscBinarySynchronizedRead(comm,fdes,grid->vyn,nvnode,PETSC_SCALAR);
1398: PetscBinarySynchronizedRead(comm,fdes,grid->vzn,nvnode,PETSC_SCALAR);
1400: isurf = 0;
1401: nvnodeLoc = 0;
1402: nvfacetLoc = 0;
1403: nb = 0;
1404: nte = 0;
1405: ICALLOC(3*nvfacet,&tmp);
1406: ICALLOC(nvnode,&tmp1);
1407: ICALLOC(nnodes,&tmp2);
1408: FCALLOC(4*nvnode,&ftmp);
1409: PetscMemzero(tmp,3*nvfacet*sizeof(int));
1410: PetscMemzero(tmp1,nvnode*sizeof(int));
1411: PetscMemzero(tmp2,nnodes*sizeof(int));
1413: j = 0;
1414: for (i = 0; i < nvnode; i++) {
1415: node1 = a2l[grid->ivnode[i] - 1];
1416: if (node1 >= 0) {
1417: tmp1[nvnodeLoc] = node1;
1418: tmp2[node1] = nvnodeLoc;
1419: ftmp[j++] = grid->vxn[i];
1420: ftmp[j++] = grid->vyn[i];
1421: ftmp[j++] = grid->vzn[i];
1422: ftmp[j++] = grid->va[i];
1423: nvnodeLoc++;
1424: }
1425: }
1426: for (i = 0; i < nvbound; i++) {
1427: for (j = isurf; j < isurf + grid->nvtet[i]; j++) {
1428: node1 = a2l[grid->ivnode[grid->f2ntv[j] - 1] - 1];
1429: node2 = a2l[grid->ivnode[grid->f2ntv[nvfacet + j] - 1] - 1];
1430: node3 = a2l[grid->ivnode[grid->f2ntv[2*nvfacet + j] - 1] - 1];
1431: if ((node1 >= 0) && (node2 >= 0) && (node3 >= 0)) {
1432: nvfacetLoc++;
1433: nte++;
1434: tmp[nb++] = tmp2[node1];
1435: tmp[nb++] = tmp2[node2];
1436: tmp[nb++] = tmp2[node3];
1437: }
1438: }
1439: isurf += grid->nvtet[i];
1440: grid->nvtet[i] = nte;
1441: nte = 0;
1442: }
1443: PetscFree(grid->f2ntv);
1444: PetscFree(grid->ivnode);
1445: PetscFree(grid->vxn);
1446: PetscFree(grid->vyn);
1447: PetscFree(grid->vzn);
1448: PetscFree(grid->va);
1449: ICALLOC(4*nvfacetLoc,&grid->f2ntv);
1450: ICALLOC(nvnodeLoc,&grid->ivnode);
1451: FCALLOC(nvnodeLoc,&grid->vxn);
1452: FCALLOC(nvnodeLoc,&grid->vyn);
1453: FCALLOC(nvnodeLoc,&grid->vzn);
1454: FCALLOC(nvnodeLoc,&grid->va);
1455: j = 0;
1456: for (i = 0; i < nvnodeLoc; i++) {
1457: grid->ivnode[i] = tmp1[i] + 1;
1458: grid->vxn[i] = ftmp[j++];
1459: grid->vyn[i] = ftmp[j++];
1460: grid->vzn[i] = ftmp[j++];
1461: grid->va[i] = ftmp[j++];
1462: }
1463: j = 0;
1464: for (i = 0; i < nvfacetLoc; i++) {
1465: grid->f2ntv[i] = tmp[j++] + 1;
1466: grid->f2ntv[nvfacetLoc+i] = tmp[j++] + 1;
1467: grid->f2ntv[2*nvfacetLoc+i] = tmp[j++] + 1;
1468: }
1469: PetscFree(tmp);
1470: PetscFree(tmp1);
1471: PetscFree(tmp2);
1472: PetscFree(ftmp);
1474: /* Now identify the triangles on which the current proceesor
1475: would perform force calculation */
1476: ICALLOC(nvfacetLoc,&grid->vface_bit);
1477: PetscMemzero(grid->vface_bit,nvfacetLoc*sizeof(int));
1478: for (i = 0; i < nvfacetLoc; i++) {
1479: node1 = l2a[grid->ivnode[grid->f2ntv[i] - 1] - 1];
1480: node2 = l2a[grid->ivnode[grid->f2ntv[nvfacetLoc + i] - 1] - 1];
1481: node3 = l2a[grid->ivnode[grid->f2ntv[2*nvfacetLoc + i] - 1] - 1];
1482: if (((v2p[node1] >= rank) && (v2p[node2] >= rank)
1483: && (v2p[node3] >= rank)) &&
1484: ((v2p[node1] == rank) || (v2p[node2] == rank)
1485: || (v2p[node3] == rank))) {
1486: grid->vface_bit[i] = 1;
1487: }
1488: }
1489: PetscFree(v2p);
1490: PetscPrintf(comm,"Viscous boundaries partitioned\n");
1492: /* Now,handle all the free boundaries - things to be done :
1493: * 1. Identify the nodes belonging to the free
1494: * boundaries and count them.
1495: * 2. Put proper indices into f2ntf array,after making it
1496: * of suitable size
1497: * 3. Remap the normals and areas of free bound. faces (fxn,fyn,fzn,
1498: * and fa arrays).
1499: */
1501: ICALLOC(nfbound, &grid->nftet);
1502: ICALLOC(nfbound, &grid->nfpts);
1503: ICALLOC(4*nffacet,&grid->f2ntf);
1504: ICALLOC(nfnode,&grid->ifnode);
1505: FCALLOC(nfnode,&grid->fxn);
1506: FCALLOC(nfnode,&grid->fyn);
1507: FCALLOC(nfnode,&grid->fzn);
1508: FCALLOC(nfnode,&grid->fa);
1509: PetscBinarySynchronizedRead(comm,fdes,grid->nftet,nfbound,PETSC_INT);
1510: PetscBinarySynchronizedRead(comm,fdes,grid->nfpts,nfbound,PETSC_INT);
1511: PetscBinarySynchronizedRead(comm,fdes,grid->f2ntf,4*nffacet,PETSC_INT);
1512: PetscBinarySynchronizedRead(comm,fdes,grid->ifnode,nfnode,PETSC_INT);
1513: PetscBinarySynchronizedRead(comm,fdes,grid->fxn,nfnode,PETSC_SCALAR);
1514: PetscBinarySynchronizedRead(comm,fdes,grid->fyn,nfnode,PETSC_SCALAR);
1515: PetscBinarySynchronizedRead(comm,fdes,grid->fzn,nfnode,PETSC_SCALAR);
1517: isurf = 0;
1518: nfnodeLoc = 0;
1519: nffacetLoc = 0;
1520: nb = 0;
1521: nte = 0;
1522: ICALLOC(3*nffacet,&tmp);
1523: ICALLOC(nfnode,&tmp1);
1524: ICALLOC(nnodes,&tmp2);
1525: FCALLOC(4*nfnode,&ftmp);
1526: PetscMemzero(tmp,3*nffacet*sizeof(int));
1527: PetscMemzero(tmp1,nfnode*sizeof(int));
1528: PetscMemzero(tmp2,nnodes*sizeof(int));
1530: j = 0;
1531: for (i = 0; i < nfnode; i++) {
1532: node1 = a2l[grid->ifnode[i] - 1];
1533: if (node1 >= 0) {
1534: tmp1[nfnodeLoc] = node1;
1535: tmp2[node1] = nfnodeLoc;
1536: ftmp[j++] = grid->fxn[i];
1537: ftmp[j++] = grid->fyn[i];
1538: ftmp[j++] = grid->fzn[i];
1539: ftmp[j++] = grid->fa[i];
1540: nfnodeLoc++;
1541: }
1542: }
1543: for (i = 0; i < nfbound; i++) {
1544: for (j = isurf; j < isurf + grid->nftet[i]; j++) {
1545: node1 = a2l[grid->ifnode[grid->f2ntf[j] - 1] - 1];
1546: node2 = a2l[grid->ifnode[grid->f2ntf[nffacet + j] - 1] - 1];
1547: node3 = a2l[grid->ifnode[grid->f2ntf[2*nffacet + j] - 1] - 1];
1548: if ((node1 >= 0) && (node2 >= 0) && (node3 >= 0)) {
1549: nffacetLoc++;
1550: nte++;
1551: tmp[nb++] = tmp2[node1];
1552: tmp[nb++] = tmp2[node2];
1553: tmp[nb++] = tmp2[node3];
1554: }
1555: }
1556: isurf += grid->nftet[i];
1557: grid->nftet[i] = nte;
1558: nte = 0;
1559: }
1560: PetscFree(grid->f2ntf);
1561: PetscFree(grid->ifnode);
1562: PetscFree(grid->fxn);
1563: PetscFree(grid->fyn);
1564: PetscFree(grid->fzn);
1565: PetscFree(grid->fa);
1566: ICALLOC(4*nffacetLoc,&grid->f2ntf);
1567: ICALLOC(nfnodeLoc,&grid->ifnode);
1568: FCALLOC(nfnodeLoc,&grid->fxn);
1569: FCALLOC(nfnodeLoc,&grid->fyn);
1570: FCALLOC(nfnodeLoc,&grid->fzn);
1571: FCALLOC(nfnodeLoc,&grid->fa);
1572: j = 0;
1573: for (i = 0; i < nfnodeLoc; i++) {
1574: grid->ifnode[i] = tmp1[i] + 1;
1575: grid->fxn[i] = ftmp[j++];
1576: grid->fyn[i] = ftmp[j++];
1577: grid->fzn[i] = ftmp[j++];
1578: grid->fa[i] = ftmp[j++];
1579: }
1580: j = 0;
1581: for (i = 0; i < nffacetLoc; i++) {
1582: grid->f2ntf[i] = tmp[j++] + 1;
1583: grid->f2ntf[nffacetLoc+i] = tmp[j++] + 1;
1584: grid->f2ntf[2*nffacetLoc+i] = tmp[j++] + 1;
1585: }
1587: PetscFree(tmp);
1588: PetscFree(tmp1);
1589: PetscFree(tmp2);
1590: PetscFree(ftmp);
1591: PetscPrintf(comm,"Free boundaries partitioned\n");
1593: flg = PETSC_FALSE;
1594: PetscOptionsGetBool(0,"-mem_use",&flg,NULL);
1595: if (flg) {
1596: PetscMemoryView(PETSC_VIEWER_STDOUT_WORLD,"Memory usage after partitioning\n");
1597: }
1599: /* Put different mappings and other info into grid */
1600: /* ICALLOC(nvertices,&grid->loc2pet);
1601: ICALLOC(nvertices,&grid->loc2glo);
1602: PetscMemcpy(grid->loc2pet,l2p,nvertices*sizeof(int));
1603: PetscMemcpy(grid->loc2glo,l2a,nvertices*sizeof(int));
1604: PetscFree(l2a);
1605: PetscFree(l2p);*/
1607: grid->nnodesLoc = nnodesLoc;
1608: grid->nedgeLoc = nedgeLoc;
1609: grid->nvertices = nvertices;
1610: grid->nsnodeLoc = nsnodeLoc;
1611: grid->nvnodeLoc = nvnodeLoc;
1612: grid->nfnodeLoc = nfnodeLoc;
1613: grid->nnfacetLoc = nnfacetLoc;
1614: grid->nvfacetLoc = nvfacetLoc;
1615: grid->nffacetLoc = nffacetLoc;
1616: /*
1617: * FCALLOC(nvertices*4, &grid->gradx);
1618: * FCALLOC(nvertices*4, &grid->grady);
1619: * FCALLOC(nvertices*4, &grid->gradz);
1620: */
1621: FCALLOC(nvertices, &grid->cdt);
1622: FCALLOC(nvertices*4, &grid->phi);
1623: /*
1624: FCALLOC(nvertices, &grid->r11);
1625: FCALLOC(nvertices, &grid->r12);
1626: FCALLOC(nvertices, &grid->r13);
1627: FCALLOC(nvertices, &grid->r22);
1628: FCALLOC(nvertices, &grid->r23);
1629: FCALLOC(nvertices, &grid->r33);
1630: */
1631: FCALLOC(7*nnodesLoc, &grid->rxy);
1633: /* Map the 'ja' array in petsc ordering */
1634: for (i = 0; i < nnz; i++) grid->ja[i] = l2a[grid->ja[i] - 1];
1635: AOApplicationToPetsc(ao,nnz,grid->ja);
1636: AODestroy(&ao);
1638: /* Print the different mappings
1639: *
1640: */
1641: {
1642: int partLoc[7],partMax[7],partMin[7],partSum[7];
1643: partLoc[0] = nnodesLoc;
1644: partLoc[1] = nvertices;
1645: partLoc[2] = nedgeLoc;
1646: partLoc[3] = nnfacetLoc;
1647: partLoc[4] = nffacetLoc;
1648: partLoc[5] = nsnodeLoc;
1649: partLoc[6] = nfnodeLoc;
1650: for (i = 0; i < 7; i++) {
1651: partMin[i] = 0;
1652: partMax[i] = 0;
1653: partSum[i] = 0;
1654: }
1656: MPI_Allreduce(partLoc,partMax,7,MPI_INT,MPI_MAX,comm);
1657: MPI_Allreduce(partLoc,partMin,7,MPI_INT,MPI_MIN,comm);
1658: MPI_Allreduce(partLoc,partSum,7,MPI_INT,MPI_SUM,comm);
1659: PetscPrintf(comm,"==============================\n");
1660: PetscPrintf(comm,"Partitioning quality info ....\n");
1661: PetscPrintf(comm,"==============================\n");
1662: PetscPrintf(comm,"------------------------------------------------------------\n");
1663: PetscPrintf(comm,"Item Min Max Average Total\n");
1664: PetscPrintf(comm,"------------------------------------------------------------\n");
1665: PetscPrintf(comm,"Local Nodes %9d %9d %9d %9d\n",
1666: partMin[0],partMax[0],partSum[0]/size,partSum[0]);
1667: PetscPrintf(comm,"Local+Ghost Nodes %9d %9d %9d %9d\n",
1668: partMin[1],partMax[1],partSum[1]/size,partSum[1]);
1669: PetscPrintf(comm,"Local Edges %9d %9d %9d %9d\n",
1670: partMin[2],partMax[2],partSum[2]/size,partSum[2]);
1671: PetscPrintf(comm,"Local solid faces %9d %9d %9d %9d\n",
1672: partMin[3],partMax[3],partSum[3]/size,partSum[3]);
1673: PetscPrintf(comm,"Local free faces %9d %9d %9d %9d\n",
1674: partMin[4],partMax[4],partSum[4]/size,partSum[4]);
1675: PetscPrintf(comm,"Local solid nodes %9d %9d %9d %9d\n",
1676: partMin[5],partMax[5],partSum[5]/size,partSum[5]);
1677: PetscPrintf(comm,"Local free nodes %9d %9d %9d %9d\n",
1678: partMin[6],partMax[6],partSum[6]/size,partSum[6]);
1679: PetscPrintf(comm,"------------------------------------------------------------\n");
1680: }
1681: flg = PETSC_FALSE;
1682: PetscOptionsGetBool(0,"-partition_info",&flg,NULL);
1683: if (flg) {
1684: char part_file[PETSC_MAX_PATH_LEN];
1685: sprintf(part_file,"output.%d",rank);
1686: fptr1 = fopen(part_file,"w");
1688: fprintf(fptr1,"---------------------------------------------\n");
1689: fprintf(fptr1,"Local and Global Grid Parameters are :\n");
1690: fprintf(fptr1,"---------------------------------------------\n");
1691: fprintf(fptr1,"Local\t\t\t\tGlobal\n");
1692: fprintf(fptr1,"nnodesLoc = %d\t\tnnodes = %d\n",nnodesLoc,nnodes);
1693: fprintf(fptr1,"nedgeLoc = %d\t\t\tnedge = %d\n",nedgeLoc,nedge);
1694: fprintf(fptr1,"nnfacetLoc = %d\t\tnnfacet = %d\n",nnfacetLoc,nnfacet);
1695: fprintf(fptr1,"nvfacetLoc = %d\t\t\tnvfacet = %d\n",nvfacetLoc,nvfacet);
1696: fprintf(fptr1,"nffacetLoc = %d\t\t\tnffacet = %d\n",nffacetLoc,nffacet);
1697: fprintf(fptr1,"nsnodeLoc = %d\t\t\tnsnode = %d\n",nsnodeLoc,nsnode);
1698: fprintf(fptr1,"nvnodeLoc = %d\t\t\tnvnode = %d\n",nvnodeLoc,nvnode);
1699: fprintf(fptr1,"nfnodeLoc = %d\t\t\tnfnode = %d\n",nfnodeLoc,nfnode);
1700: fprintf(fptr1,"\n");
1701: fprintf(fptr1,"nvertices = %d\n",nvertices);
1702: fprintf(fptr1,"nnbound = %d\n",nnbound);
1703: fprintf(fptr1,"nvbound = %d\n",nvbound);
1704: fprintf(fptr1,"nfbound = %d\n",nfbound);
1705: fprintf(fptr1,"\n");
1707: fprintf(fptr1,"---------------------------------------------\n");
1708: fprintf(fptr1,"Different Orderings\n");
1709: fprintf(fptr1,"---------------------------------------------\n");
1710: fprintf(fptr1,"Local\t\tPETSc\t\tGlobal\n");
1711: fprintf(fptr1,"---------------------------------------------\n");
1712: for (i = 0; i < nvertices; i++) fprintf(fptr1,"%d\t\t%d\t\t%d\n",i,grid->loc2pet[i],grid->loc2glo[i]);
1713: fprintf(fptr1,"\n");
1715: fprintf(fptr1,"---------------------------------------------\n");
1716: fprintf(fptr1,"Solid Boundary Nodes\n");
1717: fprintf(fptr1,"---------------------------------------------\n");
1718: fprintf(fptr1,"Local\t\tPETSc\t\tGlobal\n");
1719: fprintf(fptr1,"---------------------------------------------\n");
1720: for (i = 0; i < nsnodeLoc; i++) {
1721: j = grid->isnode[i]-1;
1722: fprintf(fptr1,"%d\t\t%d\t\t%d\n",j,grid->loc2pet[j],grid->loc2glo[j]);
1723: }
1724: fprintf(fptr1,"\n");
1725: fprintf(fptr1,"---------------------------------------------\n");
1726: fprintf(fptr1,"f2ntn array\n");
1727: fprintf(fptr1,"---------------------------------------------\n");
1728: for (i = 0; i < nnfacetLoc; i++) {
1729: fprintf(fptr1,"%d\t\t%d\t\t%d\t\t%d\n",i,grid->f2ntn[i],
1730: grid->f2ntn[nnfacetLoc+i],grid->f2ntn[2*nnfacetLoc+i]);
1731: }
1732: fprintf(fptr1,"\n");
1734: fprintf(fptr1,"---------------------------------------------\n");
1735: fprintf(fptr1,"Viscous Boundary Nodes\n");
1736: fprintf(fptr1,"---------------------------------------------\n");
1737: fprintf(fptr1,"Local\t\tPETSc\t\tGlobal\n");
1738: fprintf(fptr1,"---------------------------------------------\n");
1739: for (i = 0; i < nvnodeLoc; i++) {
1740: j = grid->ivnode[i]-1;
1741: fprintf(fptr1,"%d\t\t%d\t\t%d\n",j,grid->loc2pet[j],grid->loc2glo[j]);
1742: }
1743: fprintf(fptr1,"\n");
1744: fprintf(fptr1,"---------------------------------------------\n");
1745: fprintf(fptr1,"f2ntv array\n");
1746: fprintf(fptr1,"---------------------------------------------\n");
1747: for (i = 0; i < nvfacetLoc; i++) {
1748: fprintf(fptr1,"%d\t\t%d\t\t%d\t\t%d\n",i,grid->f2ntv[i],
1749: grid->f2ntv[nvfacetLoc+i],grid->f2ntv[2*nvfacetLoc+i]);
1750: }
1751: fprintf(fptr1,"\n");
1753: fprintf(fptr1,"---------------------------------------------\n");
1754: fprintf(fptr1,"Free Boundary Nodes\n");
1755: fprintf(fptr1,"---------------------------------------------\n");
1756: fprintf(fptr1,"Local\t\tPETSc\t\tGlobal\n");
1757: fprintf(fptr1,"---------------------------------------------\n");
1758: for (i = 0; i < nfnodeLoc; i++) {
1759: j = grid->ifnode[i]-1;
1760: fprintf(fptr1,"%d\t\t%d\t\t%d\n",j,grid->loc2pet[j],grid->loc2glo[j]);
1761: }
1762: fprintf(fptr1,"\n");
1763: fprintf(fptr1,"---------------------------------------------\n");
1764: fprintf(fptr1,"f2ntf array\n");
1765: fprintf(fptr1,"---------------------------------------------\n");
1766: for (i = 0; i < nffacetLoc; i++) {
1767: fprintf(fptr1,"%d\t\t%d\t\t%d\t\t%d\n",i,grid->f2ntf[i],
1768: grid->f2ntf[nffacetLoc+i],grid->f2ntf[2*nffacetLoc+i]);
1769: }
1770: fprintf(fptr1,"\n");
1772: fprintf(fptr1,"---------------------------------------------\n");
1773: fprintf(fptr1,"Neighborhood Info In Various Ordering\n");
1774: fprintf(fptr1,"---------------------------------------------\n");
1775: ICALLOC(nnodes,&p2l);
1776: for (i = 0; i < nvertices; i++) p2l[grid->loc2pet[i]] = i;
1777: for (i = 0; i < nnodesLoc; i++) {
1778: jstart = grid->ia[grid->loc2glo[i]] - 1;
1779: jend = grid->ia[grid->loc2glo[i]+1] - 1;
1780: fprintf(fptr1,"Neighbors of Node %d in Local Ordering are :",i);
1781: for (j = jstart; j < jend; j++) fprintf(fptr1,"%d ",p2l[grid->ja[j]]);
1782: fprintf(fptr1,"\n");
1784: fprintf(fptr1,"Neighbors of Node %d in PETSc ordering are :",grid->loc2pet[i]);
1785: for (j = jstart; j < jend; j++) fprintf(fptr1,"%d ",grid->ja[j]);
1786: fprintf(fptr1,"\n");
1788: fprintf(fptr1,"Neighbors of Node %d in Global Ordering are :",grid->loc2glo[i]);
1789: for (j = jstart; j < jend; j++) fprintf(fptr1,"%d ",grid->loc2glo[p2l[grid->ja[j]]]);
1790: fprintf(fptr1,"\n");
1792: }
1793: fprintf(fptr1,"\n");
1794: PetscFree(p2l);
1795: fclose(fptr1);
1796: }
1798: /* Free the temporary arrays */
1799: PetscFree(a2l);
1800: MPI_Barrier(comm);
1801: return(0);
1802: }
1804: /*
1805: encode 3 8-bit binary bytes as 4 '6-bit' characters, len is the number of bytes remaining, at most 3 are used
1806: */
1807: void *base64_encodeblock(void *vout,const void *vin,int len)
1808: {
1809: unsigned char *out = (unsigned char*)vout,in[3] = {0,0,0};
1810: /* Translation Table as described in RFC1113 */
1811: static const char cb64[]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1812: memcpy(in,vin,PetscMin(len,3));
1813: out[0] = cb64[in[0] >> 2];
1814: out[1] = cb64[((in[0] & 0x03) << 4) | ((in[1] & 0xf0) >> 4)];
1815: out[2] = (unsigned char) (len > 1 ? cb64[((in[1] & 0x0f) << 2) | ((in[2] & 0xc0) >> 6)] : '=');
1816: out[3] = (unsigned char) (len > 2 ? cb64[in[2] & 0x3f] : '=');
1817: return (void*)(out+4);
1818: }
1820: /* Write binary data, does not do byte swapping. */
1821: static PetscErrorCode PetscFWrite_FUN3D(MPI_Comm comm,FILE *fp,void *data,PetscInt n,PetscDataType dtype,PetscBool base64)
1822: {
1824: PetscMPIInt rank;
1827: if (n < 0) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Trying to write a negative amount of data %D",n);
1828: if (!n) return(0);
1829: MPI_Comm_rank(comm,&rank);
1830: if (rank == 0) {
1831: size_t count;
1832: int bytes;
1833: switch (dtype) {
1834: case PETSC_DOUBLE:
1835: size = sizeof(double);
1836: break;
1837: case PETSC_FLOAT:
1838: size = sizeof(float);
1839: break;
1840: case PETSC_INT:
1841: size = sizeof(PetscInt);
1842: break;
1843: case PETSC_CHAR:
1844: size = sizeof(char);
1845: break;
1846: default: SETERRQ(comm,PETSC_ERR_SUP,"Data type not supported");
1847: }
1848: bytes = size*n;
1849: if (base64) {
1850: unsigned char *buf,*ptr;
1851: int i;
1852: size_t b64alloc = 9 + (n*size*4) / 3 + (n*size*4) % 3;
1853: PetscMalloc(b64alloc,&buf);
1854: ptr = buf;
1855: ptr = (unsigned char*)base64_encodeblock(ptr,&bytes,3);
1856: ptr = (unsigned char*)base64_encodeblock(ptr,((char*)&bytes)+3,1);
1857: for (i=0; i<bytes; i+=3) {
1858: int left = bytes - i;
1859: ptr = (unsigned char*)base64_encodeblock(ptr,((char*)data)+i,left);
1860: }
1861: *ptr++ = '\n';
1862: /* printf("encoded 4+%d raw bytes in %zd base64 chars, allocated for %zd\n",bytes,ptr-buf,b64alloc); */
1863: count = fwrite(buf,1,ptr-buf,fp);
1864: if (count < (size_t)(ptr-buf)) {
1865: perror("");
1866: SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_WRITE,"Wrote %D of %D bytes",(PetscInt)count,(PetscInt)(ptr-buf));
1867: }
1868: PetscFree(buf);
1869: } else {
1870: count = fwrite(&bytes,sizeof(int),1,fp);
1871: if (count != 1) {
1872: perror("");
1873: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_WRITE,"Error writing byte count");
1874: }
1875: count = fwrite(data,size,(size_t)n,fp);
1876: if ((int)count != n) {
1877: perror("");
1878: SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_FILE_WRITE,"Wrote %D/%D array members of size %D",(PetscInt)count,n,(PetscInt)size);
1879: }
1880: }
1881: }
1882: return(0);
1883: }
1885: static void SortInt2(PetscInt *a,PetscInt *b)
1886: {
1887: if (*b < *a) {
1888: PetscInt c = *b;
1889: *b = *a;
1890: *a = c;
1891: }
1892: }
1894: /* b = intersection(a,b) */
1895: static PetscErrorCode IntersectInt(PetscInt na,const PetscInt *a,PetscInt *nb,PetscInt *b)
1896: {
1897: PetscInt i,n,j;
1900: j = 0;
1901: n = 0;
1902: for (i=0; i<*nb; i++) {
1903: while (j<na && a[j]<b[i]) j++;
1904: if (j<na && a[j]==b[i]) {
1905: b[n++] = b[i];
1906: j++;
1907: }
1908: }
1909: *nb = n;
1910: return(0);
1911: }
1913: /*
1914: This function currently has a semantic bug: it only produces cells containing all local edges. Since the local mesh
1915: does not even store edges between unowned nodes, primal cells that are effectively shared between processes will not
1916: be constructed. This causes visualization artifacts.
1918: This issue could be resolved by either (a) storing more edges from the original mesh or (b) communicating an extra
1919: layer of edges in this function.
1920: */
1921: static PetscErrorCode InferLocalCellConnectivity(PetscInt nnodes,PetscInt nedge,const PetscInt *eptr,PetscInt *incell,PetscInt **iconn)
1922: {
1924: PetscInt ncell,acell,(*conn)[4],node0,node1,node2,node3,i,j,k,l,rowmax;
1925: PetscInt *ui,*uj,*utmp,*tmp1,*tmp2,*tmp3,ntmp1,ntmp2,ntmp3;
1926: #if defined(INTERLACING)
1927: # define GetEdge(eptr,i,n1,n2) do { n1 = eptr[i*2+0]-1; n2 = eptr[i*2+1]-1; } while (0)
1928: #else
1929: # define GetEdge(eptr,i,n1,n2) do { n1 = eptr[i+0*nedge]-1; n2 = eptr[i+1*nedge]-1; } while (0)
1930: #endif
1933: *incell = -1;
1934: *iconn = NULL;
1935: acell = 100000; /* allocate for this many cells */
1936: PetscMalloc1(acell,&conn);
1937: PetscMalloc2(nnodes+1,&ui,nedge,&uj);
1938: PetscCalloc1(nnodes,&utmp);
1939: /* count the number of edges in the upper-triangular matrix u */
1940: for (i=0; i<nedge; i++) { /* count number of nonzeros in upper triangular matrix */
1941: GetEdge(eptr,i,node0,node1);
1942: utmp[PetscMin(node0,node1)]++;
1943: }
1944: rowmax = 0;
1945: ui[0] = 0;
1946: for (i=0; i<nnodes; i++) {
1947: rowmax = PetscMax(rowmax,utmp[i]);
1948: ui[i+1] = ui[i] + utmp[i]; /* convert from count to row offsets */
1949: utmp[i] = 0;
1950: }
1951: for (i=0; i<nedge; i++) { /* assemble upper triangular matrix U */
1952: GetEdge(eptr,i,node0,node1);
1953: SortInt2(&node0,&node1);
1954: uj[ui[node0] + utmp[node0]++] = node1;
1955: }
1956: PetscFree(utmp);
1957: for (i=0; i<nnodes; i++) { /* sort every row */
1958: PetscInt n = ui[i+1] - ui[i];
1959: PetscSortInt(n,&uj[ui[i]]);
1960: }
1962: /* Infer cells */
1963: ncell = 0;
1964: PetscMalloc3(rowmax,&tmp1,rowmax,&tmp2,rowmax,&tmp3);
1965: for (i=0; i<nnodes; i++) {
1966: node0 = i;
1967: ntmp1 = ui[node0+1] - ui[node0]; /* Number of candidates for node1 */
1968: PetscMemcpy(tmp1,&uj[ui[node0]],ntmp1*sizeof(PetscInt));
1969: for (j=0; j<ntmp1; j++) {
1970: node1 = tmp1[j];
1971: if (node1 < 0 || nnodes <= node1) SETERRQ2(PETSC_COMM_SELF,1,"node index %D out of range [0,%D)",node1,nnodes);
1972: if (node1 <= node0) SETERRQ2(PETSC_COMM_SELF,1,"forward neighbor of %D is %D, should be larger",node0,node1);
1973: ntmp2 = ui[node1+1] - ui[node1];
1974: PetscMemcpy(tmp2,&uj[ui[node1]],ntmp2*sizeof(PetscInt));
1975: IntersectInt(ntmp1,tmp1,&ntmp2,tmp2);
1976: for (k=0; k<ntmp2; k++) {
1977: node2 = tmp2[k];
1978: if (node2 < 0 || nnodes <= node2) SETERRQ2(PETSC_COMM_SELF,1,"node index %D out of range [0,%D)",node2,nnodes);
1979: if (node2 <= node1) SETERRQ2(PETSC_COMM_SELF,1,"forward neighbor of %D is %D, should be larger",node1,node2);
1980: ntmp3 = ui[node2+1] - ui[node2];
1981: PetscMemcpy(tmp3,&uj[ui[node2]],ntmp3*sizeof(PetscInt));
1982: IntersectInt(ntmp2,tmp2,&ntmp3,tmp3);
1983: for (l=0; l<ntmp3; l++) {
1984: node3 = tmp3[l];
1985: if (node3 < 0 || nnodes <= node3) SETERRQ2(PETSC_COMM_SELF,1,"node index %D out of range [0,%D)",node3,nnodes);
1986: if (node3 <= node2) SETERRQ2(PETSC_COMM_SELF,1,"forward neighbor of %D is %D, should be larger",node2,node3);
1987: if (ncell > acell) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"buffer exceeded");
1988: if (ntmp3 < 3) continue;
1989: conn[ncell][0] = node0;
1990: conn[ncell][1] = node1;
1991: conn[ncell][2] = node2;
1992: conn[ncell][3] = node3;
1993: if (0) {
1994: PetscViewer viewer = PETSC_VIEWER_STDOUT_WORLD;
1995: PetscViewerASCIIPrintf(viewer,"created cell %d: %d %d %d %d\n",ncell,node0,node1,node2,node3);
1996: PetscIntView(ntmp1,tmp1,viewer);
1997: PetscIntView(ntmp2,tmp2,viewer);
1998: PetscIntView(ntmp3,tmp3,viewer);
1999: /* uns3d.msh has a handful of "tetrahedra" that overlap by violating the following condition. As far as I
2000: * can tell, that means it is an invalid mesh. I don't know what the intent was. */
2001: if (ntmp3 > 2) SETERRQ(PETSC_COMM_SELF,1,"More than two ways to complete a tetrahedron using a common triangle");
2002: }
2003: ncell++;
2004: }
2005: }
2006: }
2007: }
2008: PetscFree3(tmp1,tmp2,tmp3);
2009: PetscFree2(ui,uj);
2011: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"Inferred %D cells with nnodes=%D nedge=%D\n",ncell,nnodes,nedge);
2012: PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);
2013: *incell = ncell;
2014: *iconn = (PetscInt*)conn;
2015: return(0);
2016: }
2018: static PetscErrorCode GridCompleteOverlap(GRID *grid,PetscInt *invertices,PetscInt *inedgeOv,PetscInt **ieptrOv)
2019: {
2021: PetscInt nedgeLoc,nedgeOv,i,j,cnt,node0,node1,node0p,node1p,nnodes,nnodesLoc,nvertices,rstart,nodeEdgeCountAll,nodeEdgeRstart;
2022: PetscInt *nodeEdgeCount,*nodeEdgeOffset,*eIdxOv,*p2l,*eptrOv;
2023: Vec VNodeEdge,VNodeEdgeInfo,VNodeEdgeInfoOv,VNodeEdgeOv;
2024: PetscScalar *vne,*vnei;
2025: IS isglobal,isedgeOv;
2026: VecScatter nescat,neiscat;
2027: PetscBool flg;
2030: nnodes = grid->nnodes; /* Total number of global nodes */
2031: nnodesLoc = grid->nnodesLoc; /* Number of owned nodes */
2032: nvertices = grid->nvertices; /* Number of owned+ghosted nodes */
2033: nedgeLoc = grid->nedgeLoc; /* Number of edges connected to owned nodes */
2035: /* Count the number of neighbors of each owned node */
2036: MPI_Scan(&nnodesLoc,&rstart,1,MPIU_INT,MPI_SUM,PETSC_COMM_WORLD);
2037: rstart -= nnodesLoc;
2038: PetscMalloc2(nnodesLoc,&nodeEdgeCount,nnodesLoc,&nodeEdgeOffset);
2039: PetscMemzero(nodeEdgeCount,nnodesLoc*sizeof(*nodeEdgeCount));
2040: for (i=0; i<nedgeLoc; i++) {
2041: GetEdge(grid->eptr,i,node0,node1);
2042: node0p = grid->loc2pet[node0];
2043: node1p = grid->loc2pet[node1];
2044: if (rstart <= node0p && node0p < rstart+nnodesLoc) nodeEdgeCount[node0p-rstart]++;
2045: if (rstart <= node1p && node1p < rstart+nnodesLoc) nodeEdgeCount[node1p-rstart]++;
2046: }
2047: /* Get the offset in the node-based edge array */
2048: nodeEdgeOffset[0] = 0;
2049: for (i=0; i<nnodesLoc-1; i++) nodeEdgeOffset[i+1] = nodeEdgeOffset[i] + nodeEdgeCount[i];
2050: nodeEdgeCountAll = nodeEdgeCount[nnodesLoc-1] + nodeEdgeOffset[nnodesLoc-1];
2052: /* Pack a Vec by node of all the edges for that node. The nodes are stored by global index */
2053: VecCreateMPI(PETSC_COMM_WORLD,nodeEdgeCountAll,PETSC_DETERMINE,&VNodeEdge);
2054: PetscMemzero(nodeEdgeCount,nnodesLoc*sizeof(*nodeEdgeCount));
2055: VecGetArray(VNodeEdge,&vne);
2056: for (i=0; i<nedgeLoc; i++) {
2057: GetEdge(grid->eptr,i,node0,node1);
2058: node0p = grid->loc2pet[node0];
2059: node1p = grid->loc2pet[node1];
2060: if (rstart <= node0p && node0p < rstart+nnodesLoc) vne[nodeEdgeOffset[node0p-rstart] + nodeEdgeCount[node0p-rstart]++] = node1p;
2061: if (rstart <= node1p && node1p < rstart+nnodesLoc) vne[nodeEdgeOffset[node1p-rstart] + nodeEdgeCount[node1p-rstart]++] = node0p;
2062: }
2063: VecRestoreArray(VNodeEdge,&vne);
2064: VecGetOwnershipRange(VNodeEdge,&nodeEdgeRstart,NULL);
2066: /* Move the count and offset into a Vec so that we can use VecScatter, translating offset from local to global */
2067: VecCreate(PETSC_COMM_WORLD,&VNodeEdgeInfo);
2068: VecSetSizes(VNodeEdgeInfo,2*nnodesLoc,2*nnodes);
2069: VecSetBlockSize(VNodeEdgeInfo,2);
2070: VecSetType(VNodeEdgeInfo,VECMPI);
2072: VecGetArray(VNodeEdgeInfo,&vnei);
2073: for (i=0; i<nnodesLoc; i++) {
2074: vnei[i*2+0] = nodeEdgeCount[i]; /* Total number of edges from this vertex */
2075: vnei[i*2+1] = nodeEdgeOffset[i] + nodeEdgeRstart; /* Now the global index in the next comm round */
2076: }
2077: VecRestoreArray(VNodeEdgeInfo,&vnei);
2078: PetscFree2(nodeEdgeCount,nodeEdgeOffset);
2080: /* Create a Vec to receive the edge count and global offset for each node in owned+ghosted, get them, and clean up */
2081: VecCreate(PETSC_COMM_SELF,&VNodeEdgeInfoOv);
2082: VecSetSizes(VNodeEdgeInfoOv,2*nvertices,2*nvertices);
2083: VecSetBlockSize(VNodeEdgeInfoOv,2);
2084: VecSetType(VNodeEdgeInfoOv,VECSEQ);
2086: ISCreateBlock(PETSC_COMM_WORLD,2,nvertices,grid->loc2pet,PETSC_COPY_VALUES,&isglobal); /* Address the nodes in overlap to get info from */
2087: VecScatterCreate(VNodeEdgeInfo,isglobal,VNodeEdgeInfoOv,NULL,&neiscat);
2088: VecScatterBegin(neiscat,VNodeEdgeInfo,VNodeEdgeInfoOv,INSERT_VALUES,SCATTER_FORWARD);
2089: VecScatterEnd(neiscat,VNodeEdgeInfo,VNodeEdgeInfoOv,INSERT_VALUES,SCATTER_FORWARD);
2090: VecScatterDestroy(&neiscat);
2091: VecDestroy(&VNodeEdgeInfo);
2092: ISDestroy(&isglobal);
2094: /* Create a Vec to receive the actual edges for all nodes (owned and ghosted), execute the scatter */
2095: nedgeOv = 0; /* First count the number of edges in the complete overlap */
2096: VecGetArray(VNodeEdgeInfoOv,&vnei);
2097: for (i=0; i<nvertices; i++) nedgeOv += (PetscInt)vnei[2*i+0];
2098: /* Allocate for the global indices in VNodeEdge of the edges to receive */
2099: PetscMalloc1(nedgeOv,&eIdxOv);
2100: for (i=0,cnt=0; i<nvertices; i++) {
2101: for (j=0; j<(PetscInt)vnei[2*i+0]; j++) eIdxOv[cnt++] = (PetscInt)vnei[2*i+1] + j;
2102: }
2103: VecRestoreArray(VNodeEdgeInfoOv,&vnei);
2104: ISCreateGeneral(PETSC_COMM_WORLD,nedgeOv,eIdxOv,PETSC_USE_POINTER,&isedgeOv);
2105: VecCreateSeq(PETSC_COMM_SELF,nedgeOv,&VNodeEdgeOv);
2106: VecScatterCreate(VNodeEdge,isedgeOv,VNodeEdgeOv,NULL,&nescat);
2107: VecScatterBegin(nescat,VNodeEdge,VNodeEdgeOv,INSERT_VALUES,SCATTER_FORWARD);
2108: VecScatterEnd(nescat,VNodeEdge,VNodeEdgeOv,INSERT_VALUES,SCATTER_FORWARD);
2109: VecScatterDestroy(&nescat);
2110: VecDestroy(&VNodeEdge);
2111: ISDestroy(&isedgeOv);
2112: PetscFree(eIdxOv);
2114: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] %s: number of edges before pruning: %D, half=%D\n",rank,PETSC_FUNCTION_NAME,nedgeOv,nedgeOv/2);
2116: /* Create the non-scalable global-to-local index map. Yuck, but it has already been used elsewhere. */
2117: PetscMalloc1(nnodes,&p2l);
2118: for (i=0; i<nnodes; i++) p2l[i] = -1;
2119: for (i=0; i<nvertices; i++) p2l[grid->loc2pet[i]] = i;
2120: if (1) {
2121: PetscInt m = 0;
2122: for (i=0; i<nnodes; i++) m += (p2l[i] >= 0);
2123: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] %s: number of global indices that map to local indices: %D; nvertices=%D nnodesLoc=%D nnodes=%D\n",rank,PETSC_FUNCTION_NAME,m,nvertices,nnodesLoc,nnodes);
2124: }
2126: /* Log each edge connecting nodes in owned+ghosted exactly once */
2127: VecGetArray(VNodeEdgeInfoOv,&vnei);
2128: VecGetArray(VNodeEdgeOv,&vne);
2129: /* First count the number of edges to keep */
2130: nedgeOv = 0;
2131: for (i=0,cnt=0; i<nvertices; i++) {
2132: PetscInt n = (PetscInt)vnei[2*i+0]; /* number of nodes connected to i */
2133: node0 = i;
2134: for (j=0; j<n; j++) {
2135: node1p = vne[cnt++];
2136: node1 = p2l[node1p];
2137: if (node0 < node1) nedgeOv++;
2138: }
2139: }
2140: /* Array of edges to keep */
2141: PetscMalloc1(2*nedgeOv,&eptrOv);
2142: nedgeOv = 0;
2143: for (i=0,cnt=0; i<nvertices; i++) {
2144: PetscInt n = (PetscInt)vnei[2*i+0]; /* number of nodes connected to i */
2145: node0 = i;
2146: for (j=0; j<n; j++) {
2147: node1p = vne[cnt++];
2148: node1 = p2l[node1p];
2149: if (node0 < node1) {
2150: eptrOv[2*nedgeOv+0] = node0;
2151: eptrOv[2*nedgeOv+1] = node1;
2152: nedgeOv++;
2153: }
2154: }
2155: }
2156: VecRestoreArray(VNodeEdgeInfoOv,&vnei);
2157: VecRestoreArray(VNodeEdgeOv,&vne);
2158: VecDestroy(&VNodeEdgeInfoOv);
2159: VecDestroy(&VNodeEdgeOv);
2160: PetscFree(p2l);
2162: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] %s: nedgeLoc=%D nedgeOv=%D\n",rank,PETSC_FUNCTION_NAME,nedgeLoc,nedgeOv);
2163: PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);
2165: flg = PETSC_TRUE;
2166: PetscOptionsGetBool(NULL,NULL,"-complete_overlap",&flg,NULL);
2167: if (flg) {
2168: *invertices = grid->nvertices; /* We did not change the number of vertices */
2169: *inedgeOv = nedgeOv;
2170: *ieptrOv = eptrOv;
2171: } else {
2172: *invertices = grid->nvertices;
2173: *inedgeOv = nedgeLoc;
2174: PetscFree(eptrOv);
2175: PetscMalloc1(2*nedgeLoc,&eptrOv);
2176: PetscMemcpy(eptrOv,grid->eptr,2*nedgeLoc*sizeof(PetscInt));
2177: *ieptrOv = eptrOv;
2178: }
2179: return(0);
2180: }
2182: static PetscErrorCode WritePVTU(AppCtx *user,const char *fname,PetscBool base64)
2183: {
2184: GRID *grid = user->grid;
2185: TstepCtx *tsCtx = user->tsCtx;
2186: FILE *vtu,*pvtu;
2187: char pvtu_fname[PETSC_MAX_PATH_LEN],vtu_fname[PETSC_MAX_PATH_LEN];
2188: MPI_Comm comm;
2189: PetscMPIInt rank,size;
2190: PetscInt i,nvertices = 0,nedgeLoc = 0,ncells,bs,nloc,boffset = 0,*eptr = NULL;
2191: PetscErrorCode ierr;
2192: Vec Xloc,Xploc,Xuloc;
2193: unsigned char *celltype;
2194: int *celloffset,*conn,*cellrank;
2195: const PetscScalar *x;
2196: PetscScalar *xu,*xp;
2197: const char *byte_order = PetscBinaryBigEndian() ? "BigEndian" : "LittleEndian";
2200: GridCompleteOverlap(user->grid,&nvertices,&nedgeLoc,&eptr);
2201: comm = PETSC_COMM_WORLD;
2202: MPI_Comm_rank(comm,&rank);
2203: MPI_Comm_size(comm,&size);
2204: #if defined(PETSC_USE_COMPLEX) || !defined(PETSC_USE_REAL_DOUBLE) || defined(PETSC_USE_64BIT_INDICES)
2205: SETERRQ(comm,PETSC_ERR_SUP,"This function is only implemented for scalar-type=real precision=double, 32-bit indices");
2206: #endif
2207: PetscSNPrintf(pvtu_fname,sizeof(pvtu_fname),"%s-%D.pvtu",fname,tsCtx->itstep);
2208: PetscSNPrintf(vtu_fname,sizeof(vtu_fname),"%s-%D-%D.vtu",fname,tsCtx->itstep,rank);
2209: PetscFOpen(comm,pvtu_fname,"w",&pvtu);
2210: PetscFPrintf(comm,pvtu,"<?xml version=\"1.0\"?>\n");
2211: PetscFPrintf(comm,pvtu,"<VTKFile type=\"PUnstructuredGrid\" version=\"0.1\" byte_order=\"%s\">\n",byte_order);
2212: PetscFPrintf(comm,pvtu," <PUnstructuredGrid GhostLevel=\"0\">\n");
2213: PetscFPrintf(comm,pvtu," <PPointData Scalars=\"Pressure\" Vectors=\"Velocity\">\n");
2214: PetscFPrintf(comm,pvtu," <PDataArray type=\"Float64\" Name=\"Pressure\" NumberOfComponents=\"1\" />\n");
2215: PetscFPrintf(comm,pvtu," <PDataArray type=\"Float64\" Name=\"Velocity\" NumberOfComponents=\"3\" />\n");
2216: PetscFPrintf(comm,pvtu," </PPointData>\n");
2217: PetscFPrintf(comm,pvtu," <PCellData>\n");
2218: PetscFPrintf(comm,pvtu," <PDataArray type=\"Int32\" Name=\"Rank\" NumberOfComponents=\"1\" />\n");
2219: PetscFPrintf(comm,pvtu," </PCellData>\n");
2220: PetscFPrintf(comm,pvtu," <PPoints>\n");
2221: PetscFPrintf(comm,pvtu," <PDataArray type=\"Float64\" Name=\"Position\" NumberOfComponents=\"3\" />\n");
2222: PetscFPrintf(comm,pvtu," </PPoints>\n");
2223: PetscFPrintf(comm,pvtu," <PCells>\n");
2224: PetscFPrintf(comm,pvtu," <PDataArray type=\"Int32\" Name=\"connectivity\" NumberOfComponents=\"1\" />\n");
2225: PetscFPrintf(comm,pvtu," <PDataArray type=\"Int32\" Name=\"offsets\" NumberOfComponents=\"1\" />\n");
2226: PetscFPrintf(comm,pvtu," <PDataArray type=\"UInt8\" Name=\"types\" NumberOfComponents=\"1\" />\n");
2227: PetscFPrintf(comm,pvtu," </PCells>\n");
2228: for (i=0; i<size; i++) {
2229: PetscFPrintf(comm,pvtu," <Piece Source=\"%s-%D-%D.vtu\" />\n",fname,tsCtx->itstep,i);
2230: }
2231: PetscFPrintf(comm,pvtu," </PUnstructuredGrid>\n");
2232: PetscFPrintf(comm,pvtu,"</VTKFile>\n");
2233: PetscFClose(comm,pvtu);
2235: Xloc = grid->qnodeLoc;
2236: VecScatterBegin(grid->scatter,grid->qnode,Xloc,INSERT_VALUES,SCATTER_FORWARD);
2237: VecScatterEnd(grid->scatter,grid->qnode,Xloc,INSERT_VALUES,SCATTER_FORWARD);
2238: VecGetBlockSize(Xloc,&bs);
2239: if (bs != 4) SETERRQ1(PETSC_COMM_WORLD,PETSC_ERR_ARG_INCOMP,"expected block size 4, got %D",bs);
2240: VecGetSize(Xloc,&nloc);
2241: if (nloc/bs != nvertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"expected nloc/bs=%D to match nvertices=%D",nloc/bs,nvertices);
2242: if (nvertices != grid->nvertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"expected nvertices=%D to match grid->nvertices=%D",nvertices,grid->nvertices);
2243: VecCreateSeq(PETSC_COMM_SELF,nvertices,&Xploc);
2245: VecCreate(PETSC_COMM_SELF,&Xuloc);
2246: VecSetSizes(Xuloc,3*nvertices,3*nvertices);
2247: VecSetBlockSize(Xuloc,3);
2248: VecSetType(Xuloc,VECSEQ);
2250: VecGetArrayRead(Xloc,&x);
2251: VecGetArray(Xploc,&xp);
2252: VecGetArray(Xuloc,&xu);
2253: for (i=0; i<nvertices; i++) {
2254: xp[i] = x[i*4+0];
2255: xu[i*3+0] = x[i*4+1];
2256: xu[i*3+1] = x[i*4+2];
2257: xu[i*3+2] = x[i*4+3];
2258: }
2259: VecRestoreArrayRead(Xloc,&x);
2261: InferLocalCellConnectivity(nvertices,nedgeLoc,eptr,&ncells,&conn);
2263: PetscFOpen(PETSC_COMM_SELF,vtu_fname,"w",&vtu);
2264: PetscFPrintf(PETSC_COMM_SELF,vtu,"<?xml version=\"1.0\"?>\n");
2265: PetscFPrintf(PETSC_COMM_SELF,vtu,"<VTKFile type=\"UnstructuredGrid\" version=\"0.1\" byte_order=\"%s\">\n",byte_order);
2266: PetscFPrintf(PETSC_COMM_SELF,vtu," <UnstructuredGrid>\n");
2267: PetscFPrintf(PETSC_COMM_SELF,vtu," <Piece NumberOfPoints=\"%D\" NumberOfCells=\"%D\">\n",nvertices,ncells);
2269: /* Solution fields */
2270: PetscFPrintf(PETSC_COMM_SELF,vtu," <PointData Scalars=\"Pressure\" Vectors=\"Velocity\">\n");
2271: PetscFPrintf(PETSC_COMM_SELF,vtu," <DataArray type=\"Float64\" Name=\"Pressure\" NumberOfComponents=\"1\" format=\"appended\" offset=\"%D\" />\n",boffset);
2272: boffset += nvertices*sizeof(PetscScalar) + sizeof(int);
2273: PetscFPrintf(PETSC_COMM_SELF,vtu," <DataArray type=\"Float64\" Name=\"Velocity\" NumberOfComponents=\"3\" format=\"appended\" offset=\"%D\" />\n",boffset);
2274: boffset += nvertices*3*sizeof(PetscScalar) + sizeof(int);
2275: PetscFPrintf(PETSC_COMM_SELF,vtu," </PointData>\n");
2276: PetscFPrintf(PETSC_COMM_SELF,vtu," <CellData>\n");
2277: PetscFPrintf(PETSC_COMM_SELF,vtu," <DataArray type=\"Int32\" Name=\"Rank\" NumberOfComponents=\"1\" format=\"appended\" offset=\"%D\" />\n",boffset);
2278: boffset += ncells*sizeof(int) + sizeof(int);
2279: PetscFPrintf(PETSC_COMM_SELF,vtu," </CellData>\n");
2280: /* Coordinate positions */
2281: PetscFPrintf(PETSC_COMM_SELF,vtu," <Points>\n");
2282: PetscFPrintf(PETSC_COMM_SELF,vtu," <DataArray type=\"Float64\" Name=\"Position\" NumberOfComponents=\"3\" format=\"appended\" offset=\"%D\" />\n",boffset);
2283: boffset += nvertices*3*sizeof(double) + sizeof(int);
2284: PetscFPrintf(PETSC_COMM_SELF,vtu," </Points>\n");
2285: /* Cell connectivity */
2286: PetscFPrintf(PETSC_COMM_SELF,vtu," <Cells>\n");
2287: PetscFPrintf(PETSC_COMM_SELF,vtu," <DataArray type=\"Int32\" Name=\"connectivity\" NumberOfComponents=\"1\" format=\"appended\" offset=\"%D\" />\n",boffset);
2288: boffset += ncells*4*sizeof(int) + sizeof(int);
2289: PetscFPrintf(PETSC_COMM_SELF,vtu," <DataArray type=\"Int32\" Name=\"offsets\" NumberOfComponents=\"1\" format=\"appended\" offset=\"%D\" />\n",boffset);
2290: boffset += ncells*sizeof(int) + sizeof(int);
2291: PetscFPrintf(PETSC_COMM_SELF,vtu," <DataArray type=\"UInt8\" Name=\"types\" NumberOfComponents=\"1\" format=\"appended\" offset=\"%D\" />\n",boffset);
2292: boffset += ncells*sizeof(unsigned char) + sizeof(int);
2293: PetscFPrintf(PETSC_COMM_SELF,vtu," </Cells>\n");
2294: PetscFPrintf(PETSC_COMM_SELF,vtu," </Piece>\n");
2295: PetscFPrintf(PETSC_COMM_SELF,vtu," </UnstructuredGrid>\n");
2296: PetscFPrintf(PETSC_COMM_SELF,vtu," <AppendedData encoding=\"%s\">\n",base64 ? "base64" : "raw");
2297: PetscFPrintf(PETSC_COMM_SELF,vtu,"_");
2299: /* Write pressure */
2300: PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,xp,nvertices,PETSC_SCALAR,base64);
2302: /* Write velocity */
2303: PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,xu,nvertices*3,PETSC_SCALAR,base64);
2305: /* Label cell rank, not a measure of computation because nothing is actually computed at cells. This is written
2306: * primarily to aid in debugging. The partition for computation should label vertices. */
2307: PetscMalloc1(ncells,&cellrank);
2308: for (i=0; i<ncells; i++) cellrank[i] = rank;
2309: PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,cellrank,ncells,PETSC_INT,base64);
2310: PetscFree(cellrank);
2312: PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,grid->xyz,nvertices*3,PETSC_DOUBLE,base64);
2313: PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,conn,ncells*4,PETSC_INT,base64);
2314: PetscFree(conn);
2316: PetscMalloc1(ncells,&celloffset);
2317: for (i=0; i<ncells; i++) celloffset[i] = 4*(i+1);
2318: PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,celloffset,ncells,PETSC_INT,base64);
2319: PetscFree(celloffset);
2321: PetscMalloc1(ncells,&celltype);
2322: for (i=0; i<ncells; i++) celltype[i] = 10; /* VTK_TETRA */
2323: PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,celltype,ncells,PETSC_CHAR,base64);
2324: PetscFree(celltype);
2326: PetscFPrintf(PETSC_COMM_SELF,vtu,"\n </AppendedData>\n");
2327: PetscFPrintf(PETSC_COMM_SELF,vtu,"</VTKFile>\n");
2328: PetscFClose(PETSC_COMM_SELF,vtu);
2330: VecRestoreArray(Xploc,&xp);
2331: VecRestoreArray(Xuloc,&xu);
2332: VecDestroy(&Xploc);
2333: VecDestroy(&Xuloc);
2334: PetscFree(eptr);
2335: return(0);
2336: }
2338: /*---------------------------------------------------------------------*/
2339: int SetPetscDS(GRID *grid,TstepCtx *tsCtx)
2340: /*---------------------------------------------------------------------*/
2341: {
2342: int ierr,i,j,bs;
2343: int nnodes,jstart,jend,nbrs_diag,nbrs_offd;
2344: int nnodesLoc,nvertices;
2345: int *val_diag,*val_offd,*svertices,*loc2pet;
2346: IS isglobal,islocal;
2347: ISLocalToGlobalMapping isl2g;
2348: PetscBool flg;
2349: MPI_Comm comm = PETSC_COMM_WORLD;
2352: nnodes = grid->nnodes;
2353: nnodesLoc = grid->nnodesLoc;
2354: nvertices = grid->nvertices;
2355: loc2pet = grid->loc2pet;
2356: bs = 4;
2358: /* Set up the PETSc datastructures */
2360: VecCreate(comm,&grid->qnode);
2361: VecSetSizes(grid->qnode,bs*nnodesLoc,bs*nnodes);
2362: VecSetBlockSize(grid->qnode,bs);
2363: VecSetType(grid->qnode,VECMPI);
2365: VecDuplicate(grid->qnode,&grid->res);
2366: VecDuplicate(grid->qnode,&tsCtx->qold);
2367: VecDuplicate(grid->qnode,&tsCtx->func);
2369: VecCreate(MPI_COMM_SELF,&grid->qnodeLoc);
2370: VecSetSizes(grid->qnodeLoc,bs*nvertices,bs*nvertices);
2371: VecSetBlockSize(grid->qnodeLoc,bs);
2372: VecSetType(grid->qnodeLoc,VECSEQ);
2374: VecCreate(comm,&grid->grad);
2375: VecSetSizes(grid->grad,3*bs*nnodesLoc,3*bs*nnodes);
2376: VecSetBlockSize(grid->grad,3*bs);
2377: VecSetType(grid->grad,VECMPI);
2379: VecCreate(MPI_COMM_SELF,&grid->gradLoc);
2380: VecSetSizes(grid->gradLoc,3*bs*nvertices,3*bs*nvertices);
2381: VecSetBlockSize(grid->gradLoc,3*bs);
2382: VecSetType(grid->gradLoc,VECSEQ);
2384: /* Create Scatter between the local and global vectors */
2385: /* First create scatter for qnode */
2386: ISCreateStride(MPI_COMM_SELF,bs*nvertices,0,1,&islocal);
2387: #if defined(INTERLACING)
2388: #if defined(BLOCKING)
2389: ICALLOC(nvertices,&svertices);
2390: for (i=0; i < nvertices; i++) svertices[i] = loc2pet[i];
2391: ISCreateBlock(MPI_COMM_SELF,bs,nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2392: #else
2393: ICALLOC(bs*nvertices,&svertices);
2394: for (i = 0; i < nvertices; i++)
2395: for (j = 0; j < bs; j++) svertices[j+bs*i] = j + bs*loc2pet[i];
2396: ISCreateGeneral(MPI_COMM_SELF,bs*nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2397: #endif
2398: #else
2399: ICALLOC(bs*nvertices,&svertices);
2400: for (j = 0; j < bs; j++)
2401: for (i = 0; i < nvertices; i++) svertices[j*nvertices+i] = j*nvertices + loc2pet[i];
2402: ISCreateGeneral(MPI_COMM_SELF,bs*nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2403: #endif
2404: PetscFree(svertices);
2405: VecScatterCreate(grid->qnode,isglobal,grid->qnodeLoc,islocal,&grid->scatter);
2406: ISDestroy(&isglobal);
2407: ISDestroy(&islocal);
2409: /* Now create scatter for gradient vector of qnode */
2410: ISCreateStride(MPI_COMM_SELF,3*bs*nvertices,0,1,&islocal);
2411: #if defined(INTERLACING)
2412: #if defined(BLOCKING)
2413: ICALLOC(nvertices,&svertices);
2414: for (i=0; i < nvertices; i++) svertices[i] = loc2pet[i];
2415: ISCreateBlock(MPI_COMM_SELF,3*bs,nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2416: #else
2417: ICALLOC(3*bs*nvertices,&svertices);
2418: for (i = 0; i < nvertices; i++)
2419: for (j = 0; j < 3*bs; j++) svertices[j+3*bs*i] = j + 3*bs*loc2pet[i];
2420: ISCreateGeneral(MPI_COMM_SELF,3*bs*nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2421: #endif
2422: #else
2423: ICALLOC(3*bs*nvertices,&svertices);
2424: for (j = 0; j < 3*bs; j++)
2425: for (i = 0; i < nvertices; i++) svertices[j*nvertices+i] = j*nvertices + loc2pet[i];
2426: ISCreateGeneral(MPI_COMM_SELF,3*bs*nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2427: #endif
2428: PetscFree(svertices);
2429: VecScatterCreate(grid->grad,isglobal,grid->gradLoc,islocal,&grid->gradScatter);
2430: ISDestroy(&isglobal);
2431: ISDestroy(&islocal);
2433: /* Store the number of non-zeroes per row */
2434: #if defined(INTERLACING)
2435: #if defined(BLOCKING)
2436: ICALLOC(nnodesLoc,&val_diag);
2437: ICALLOC(nnodesLoc,&val_offd);
2438: for (i = 0; i < nnodesLoc; i++) {
2439: jstart = grid->ia[i] - 1;
2440: jend = grid->ia[i+1] - 1;
2441: nbrs_diag = 0;
2442: nbrs_offd = 0;
2443: for (j = jstart; j < jend; j++) {
2444: if ((grid->ja[j] >= rstart) && (grid->ja[j] < (rstart+nnodesLoc))) nbrs_diag++;
2445: else nbrs_offd++;
2446: }
2447: val_diag[i] = nbrs_diag;
2448: val_offd[i] = nbrs_offd;
2449: }
2450: MatCreateBAIJ(comm,bs,bs*nnodesLoc,bs*nnodesLoc,
2451: bs*nnodes,bs*nnodes,PETSC_DEFAULT,val_diag,
2452: PETSC_DEFAULT,val_offd,&grid->A);
2453: #else
2454: ICALLOC(nnodesLoc*4,&val_diag);
2455: ICALLOC(nnodesLoc*4,&val_offd);
2456: for (i = 0; i < nnodesLoc; i++) {
2457: jstart = grid->ia[i] - 1;
2458: jend = grid->ia[i+1] - 1;
2459: nbrs_diag = 0;
2460: nbrs_offd = 0;
2461: for (j = jstart; j < jend; j++) {
2462: if ((grid->ja[j] >= rstart) && (grid->ja[j] < (rstart+nnodesLoc))) nbrs_diag++;
2463: else nbrs_offd++;
2464: }
2465: for (j = 0; j < 4; j++) {
2466: row = 4*i + j;
2467: val_diag[row] = nbrs_diag*4;
2468: val_offd[row] = nbrs_offd*4;
2469: }
2470: }
2471: MatCreateAIJ(comm,bs*nnodesLoc,bs*nnodesLoc,
2472: bs*nnodes,bs*nnodes,NULL,val_diag,
2473: NULL,val_offd,&grid->A);
2474: #endif
2475: PetscFree(val_diag);
2476: PetscFree(val_offd);
2478: #else
2479: if (size > 1) SETERRQ(PETSC_COMM_SELF,1,"Parallel case not supported in non-interlaced case\n");
2480: ICALLOC(nnodes*4,&val_diag);
2481: ICALLOC(nnodes*4,&val_offd);
2482: for (j = 0; j < 4; j++)
2483: for (i = 0; i < nnodes; i++) {
2484: int row;
2485: row = i + j*nnodes;
2486: jstart = grid->ia[i] - 1;
2487: jend = grid->ia[i+1] - 1;
2488: nbrs_diag = jend - jstart;
2489: val_diag[row] = nbrs_diag*4;
2490: val_offd[row] = 0;
2491: }
2492: /* MatCreateSeqAIJ(MPI_COMM_SELF,nnodes*4,nnodes*4,NULL,
2493: val,&grid->A);*/
2494: MatCreateAIJ(comm,bs*nnodesLoc,bs*nnodesLoc,
2495: bs*nnodes,bs*nnodes,NULL,val_diag,
2496: NULL,val_offd,&grid->A);
2497: MatSetBlockSize(grid->A,bs);
2498: PetscFree(val_diag);
2499: PetscFree(val_offd);
2500: #endif
2502: flg = PETSC_FALSE;
2503: PetscOptionsGetBool(0,"-mem_use",&flg,NULL);
2504: if (flg) {
2505: PetscMemoryView(PETSC_VIEWER_STDOUT_WORLD,"Memory usage after allocating PETSc data structures\n");
2506: }
2508: /* Set local to global mapping for setting the matrix elements in
2509: local ordering : first set row by row mapping
2510: */
2511: ISLocalToGlobalMappingCreate(MPI_COMM_SELF,bs,nvertices,loc2pet,PETSC_COPY_VALUES,&isl2g);
2512: MatSetLocalToGlobalMapping(grid->A,isl2g,isl2g);
2513: ISLocalToGlobalMappingDestroy(&isl2g);
2514: return(0);
2515: }
2517: /*================================= CLINK ===================================*/
2518: /* */
2519: /* Used in establishing the links between FORTRAN common blocks and C */
2520: /* */
2521: /*===========================================================================*/
2522: EXTERN_C_BEGIN
2523: void f77CLINK(CINFO *p1,CRUNGE *p2,CGMCOM *p3)
2524: {
2525: c_info = p1;
2526: c_runge = p2;
2527: c_gmcom = p3;
2528: }
2529: EXTERN_C_END
2531: /*========================== SET_UP_GRID====================================*/
2532: /* */
2533: /* Allocates the memory for the fine grid */
2534: /* */
2535: /*==========================================================================*/
2536: int set_up_grid(GRID *grid)
2537: {
2538: int nnodes,nedge;
2539: int nsface,nvface,nfface,nbface;
2540: int tnode,ierr;
2541: /*int vface,lnodes,nnz,ncell,kvisc,ilu0,nsrch,ileast,ifcn,valloc;*/
2542: /*int nsnode,nvnode,nfnode; */
2543: /*int mgzero=0;*/ /* Variable so we dont allocate memory for multigrid */
2544: /*int jalloc;*/ /* If jalloc=1 allocate space for dfp and dfm */
2545: /*
2546: * stuff to read in dave's grids
2547: */
2548: /*int nnbound,nvbound,nfbound,nnfacet,nvfacet,nffacet,ntte;*/
2549: /* end of stuff */
2552: nnodes = grid->nnodes;
2553: tnode = grid->nnodes;
2554: nedge = grid->nedge;
2555: nsface = grid->nsface;
2556: nvface = grid->nvface;
2557: nfface = grid->nfface;
2558: nbface = nsface + nvface + nfface;
2560: /*ncell = grid->ncell;
2561: vface = grid->nedge;
2562: lnodes = grid->nnodes;
2563: nsnode = grid->nsnode;
2564: nvnode = grid->nvnode;
2565: nfnode = grid->nfnode;
2566: nsrch = c_gmcom->nsrch;
2567: ilu0 = c_gmcom->ilu0;
2568: ileast = grid->ileast;
2569: ifcn = c_gmcom->ifcn;
2570: jalloc = 0;
2571: kvisc = grid->jvisc;*/
2573: /* if (ilu0 >=1 && ifcn == 1) jalloc=0;*/
2575: /*
2576: * stuff to read in dave's grids
2577: */
2578: /*nnbound = grid->nnbound;
2579: nvbound = grid->nvbound;
2580: nfbound = grid->nfbound;
2581: nnfacet = grid->nnfacet;
2582: nvfacet = grid->nvfacet;
2583: nffacet = grid->nffacet;
2584: ntte = grid->ntte;*/
2585: /* end of stuff */
2587: /* if (!ileast) lnodes = 1;
2588: printf("In set_up_grid->jvisc = %d\n",grid->jvisc);
2590: if (grid->jvisc != 2 && grid->jvisc != 4 && grid->jvisc != 6)vface = 1;
2591: printf(" vface = %d \n",vface);
2592: if (grid->jvisc < 3) tnode = 1;
2593: valloc = 1;
2594: if (grid->jvisc == 0)valloc = 0;*/
2596: /*PetscPrintf(PETSC_COMM_WORLD," nsnode= %d nvnode= %d nfnode= %d\n",nsnode,nvnode,nfnode);*/
2597: /*PetscPrintf(PETSC_COMM_WORLD," nsface= %d nvface= %d nfface= %d\n",nsface,nvface,nfface);
2598: PetscPrintf(PETSC_COMM_WORLD," nbface= %d\n",nbface);*/
2599: /* Now allocate memory for the other grid arrays */
2600: /* ICALLOC(nedge*2, &grid->eptr); */
2601: ICALLOC(nsface, &grid->isface);
2602: ICALLOC(nvface, &grid->ivface);
2603: ICALLOC(nfface, &grid->ifface);
2604: /* ICALLOC(nsnode, &grid->isnode);
2605: ICALLOC(nvnode, &grid->ivnode);
2606: ICALLOC(nfnode, &grid->ifnode);*/
2607: /*ICALLOC(nnodes, &grid->clist);
2608: ICALLOC(nnodes, &grid->iupdate);
2609: ICALLOC(nsface*2, &grid->sface);
2610: ICALLOC(nvface*2, &grid->vface);
2611: ICALLOC(nfface*2, &grid->fface);
2612: ICALLOC(lnodes, &grid->icount);*/
FCALLOC(nnodes, &grid->y);
FCALLOC(nnodes, &grid->z);
FCALLOC(nnodes, &grid->area);*/
/*
* FCALLOC(nnodes*4, &grid->gradx);
* FCALLOC(nnodes*4, &grid->grady);
* FCALLOC(nnodes*4, &grid->gradz);
* FCALLOC(nnodes, &grid->cdt);
*/
/*
* FCALLOC(nnodes*4, &grid->qnode);
* FCALLOC(nnodes*4, &grid->dq);
* FCALLOC(nnodes*4, &grid->res);
* FCALLOC(jalloc*nnodes*4*4,&grid->A);
* FCALLOC(nnodes*4, &grid->B);
* FCALLOC(jalloc*nedge*4*4,&grid->dfp);
* FCALLOC(jalloc*nedge*4*4,&grid->dfm);
*/
FCALLOC(nsnode, &grid->syn);
FCALLOC(nsnode, &grid->szn);
FCALLOC(nsnode, &grid->sa);
FCALLOC(nvnode, &grid->vxn);
FCALLOC(nvnode, &grid->vyn);
FCALLOC(nvnode, &grid->vzn);
FCALLOC(nvnode, &grid->va);
FCALLOC(nfnode, &grid->fxn);
FCALLOC(nfnode, &grid->fyn);
FCALLOC(nfnode, &grid->fzn);
FCALLOC(nfnode, &grid->fa);
FCALLOC(nedge, &grid->xn);
FCALLOC(nedge, &grid->yn);
FCALLOC(nedge, &grid->zn);
FCALLOC(nedge, &grid->rl);*/
line2649">2649: FCALLOC(nbface*15,&grid-&
line2650">2650: FCALLOC(nbface*15,&grid-&
line2651">2651: FCALLOC(nbface*15,&grid-&
/*
* FCALLOC(nnodes*4, &grid->phi);
* FCALLOC(nnodes, &grid->r11);
* FCALLOC(nnodes, &grid->r12);
* FCALLOC(nnodes, &grid->r13);
* FCALLOC(nnodes, &grid->r22);
* FCALLOC(nnodes, &grid->r23);
* FCALLOC(nnodes, &grid->r33);
*/
/*
* Allocate memory for viscous length scale if turbulent
*/
line2664">2664: if (grid->jvisc >
line2665">2665: FCALLOC(tnode, &grid->
line2666">2666: FCALLOC(nnodes, &grid->t
line2667">2667: FCALLOC(nnodes, &grid->
line2668">2668: FCALLOC(tnode, &grid->tu
line2669">2669: FCALLOC(nedge, &grid->
line2670">2670: FCALLOC(nedge, &grid->
line2671">2671:
/*
* Allocate memory for MG transfer
*/
/*
ICALLOC(mgzero*nsface, &grid->isford);
ICALLOC(mgzero*nvface, &grid->ivford);
ICALLOC(mgzero*nfface, &grid->ifford);
ICALLOC(mgzero*nnodes, &grid->nflag);
ICALLOC(mgzero*nnodes, &grid->nnext);
ICALLOC(mgzero*nnodes, &grid->nneigh);
ICALLOC(mgzero*ncell, &grid->ctag);
ICALLOC(mgzero*ncell, &grid->csearch);
ICALLOC(valloc*ncell*4, &grid->c2n);
ICALLOC(valloc*ncell*6, &grid->c2e);
grid->c2c = (int*)grid->dfp;
ICALLOC(ncell*4, &grid->c2c);
ICALLOC(nnodes, &grid->cenc);
if (grid->iup == 1) {
ICALLOC(mgzero*nnodes*3, &grid->icoefup);
FCALLOC(mgzero*nnodes*3, &grid->rcoefup);
}
if (grid->idown == 1) {
ICALLOC(mgzero*nnodes*3, &grid->icoefdn);
FCALLOC(mgzero*nnodes*3, &grid->rcoefdn);
}
FCALLOC(nnodes*4, &grid->ff);
FCALLOC(tnode, &grid->turbff);
*/
/*
* If using GMRES (nsrch>0) allocate memory
*/
/* NoEq = 0;
if (nsrch > 0)NoEq = 4*nnodes;
if (nsrch < 0)NoEq = nnodes;
FCALLOC(NoEq, &grid->AP);
FCALLOC(NoEq, &grid->Xgm);
FCALLOC(NoEq, &grid->temr);
FCALLOC((abs(nsrch)+1)*NoEq,&grid->Fgm);
*/
/*
* stuff to read in dave's grids
*/
/*
ICALLOC(nnbound, &grid->ncolorn);
ICALLOC(nnbound*100,&grid->countn);
ICALLOC(nvbound, &grid->ncolorv);
ICALLOC(nvbound*100,&grid->countv);
ICALLOC(nfbound, &grid->ncolorf);
ICALLOC(nfbound*100,&grid->countf);
*/
/*ICALLOC(nnbound, &grid->nntet);
ICALLOC(nnbound, &grid->nnpts);
ICALLOC(nvbound, &grid->nvtet);
ICALLOC(nvbound, &grid->nvpts);
ICALLOC(nfbound, &grid->nftet);
ICALLOC(nfbound, &grid->nfpts);
ICALLOC(nnfacet*4,&grid->f2ntn);
ICALLOC(nvfacet*4,&grid->f2ntv);
ICALLOC(nffacet*4,&grid->f2ntf);*/
line2731">2731: return
line2732">2732
/*========================== WRITE_FINE_GRID ================================*/
/* */
/* Write memory locations and other information for the fine grid */
/* */
/*===========================================================================*/
line2739">2739: int write_fine_grid(GRID *grid)
line2740">2740
line2741">2741: FILE *
/* open file for output */
/* call the output frame.out */
line2747">2747: if (!(output = fopen("frame.out","a"))) SETERRQ(PETSC_COMM_SELF,1,"can't open frame.out"
line2748">2748: fprintf(output,"information for fine grid\n"
line2749">2749: fprintf(output,"\n"
line2750">2750: fprintf(output," address of fine grid = %p\n",(void*
line2752">2752: fprintf(output,"grid.nnodes = %d\n",grid->n
line2753">2753: fprintf(output,"grid.ncell = %d\n",grid->
line2754">2754: fprintf(output,"grid.nedge = %d\n",grid->
line2755">2755: fprintf(output,"grid.nsface = %d\n",grid->n
line2756">2756: fprintf(output,"grid.nvface = %d\n",grid->n
line2757">2757: fprintf(output,"grid.nfface = %d\n",grid->n
line2758">2758: fprintf(output,"grid.nsnode = %d\n",grid->n
line2759">2759: fprintf(output,"grid.nvnode = %d\n",grid->n
line2760">2760: fprintf(output,"grid.nfnode = %d\n",grid->n
/*
fprintf(output,"grid.eptr = %p\n",grid->eptr);
fprintf(output,"grid.isface = %p\n",grid->isface);
fprintf(output,"grid.ivface = %p\n",grid->ivface);
fprintf(output,"grid.ifface = %p\n",grid->ifface);
fprintf(output,"grid.isnode = %p\n",grid->isnode);
fprintf(output,"grid.ivnode = %p\n",grid->ivnode);
fprintf(output,"grid.ifnode = %p\n",grid->ifnode);
fprintf(output,"grid.c2n = %p\n",grid->c2n);
fprintf(output,"grid.c2e = %p\n",grid->c2e);
fprintf(output,"grid.xyz = %p\n",grid->xyz);
*/
/*fprintf(output,"grid.y = %p\n",grid->xyz);
fprintf(output,"grid.z = %p\n",grid->z);*/
/*
fprintf(output,"grid.area = %p\n",grid->area);
fprintf(output,"grid.qnode = %p\n",grid->qnode);
*/
/*
fprintf(output,"grid.gradx = %p\n",grid->gradx);
fprintf(output,"grid.grady = %p\n",grid->grady);
fprintf(output,"grid.gradz = %p\n",grid->gradz);
*/
/*
fprintf(output,"grid.cdt = %p\n",grid->cdt);
fprintf(output,"grid.sxn = %p\n",grid->sxn);
fprintf(output,"grid.syn = %p\n",grid->syn);
fprintf(output,"grid.szn = %p\n",grid->szn);
fprintf(output,"grid.vxn = %p\n",grid->vxn);
fprintf(output,"grid.vyn = %p\n",grid->vyn);
fprintf(output,"grid.vzn = %p\n",grid->vzn);
fprintf(output,"grid.fxn = %p\n",grid->fxn);
fprintf(output,"grid.fyn = %p\n",grid->fyn);
fprintf(output,"grid.fzn = %p\n",grid->fzn);
fprintf(output,"grid.xyzn = %p\n",grid->xyzn);
*/
/*fprintf(output,"grid.yn = %p\n",grid->yn);
fprintf(output,"grid.zn = %p\n",grid->zn);
fprintf(output,"grid.rl = %p\n",grid->rl);*/
line2800">2800: fclose(o
line2801">2801: return
line2802">2802
line2804">2804: #if defined(_OPENMP) && defined(HAVE_EDGE_COLORING)
line2805">2805: int EdgeColoring(int nnodes,int nedge,int *e2n,int *eperm,int *ncle,int *counte)
line2806">2806
line2807">2807: int ncolore = *nc
line2808">2808: int iedg = 0,ib = 0,ie = nedge,ta
line2809">2809: int i
line2810">2810: in
line2811">2811: ICALLOC(nnodes,&am
line2812">2812: while (ib <
line2813">2813: for (i = 0; i < nnodes; i++) tag[
line2814">2814: counte[ncolor
line2815">2815: for (i = ib; i < ie;
line2816">2816: n1 =
line2817">2817: n2 = e2n[i+
line2818">2818: tagcount = tag[n1]+t
/* If tagcount = 0 then this edge belongs in this color */
line2820">2820: if (!tagc
line2821">2821: tag[n1]
line2822">2822: tag[n2]
line2823">2823: e2n[i] = e2n
line2824">2824: e2n[i+nedge] = e2n[iedg+
line2825">2825: e2n[iedg]
line2826">2826: e2n[iedg+nedge
line2827">2827: n1 = ep
line2828">2828: eperm[i] = eperm
line2829">2829: eperm[iedg]
line2830">2830:
line2831">2831: counte[ncolor
line2832">2832:
line2833">2833:
line2834">2834: ib
line2835">2835: nco
line2836">2836:
line2837">2837: *ncle = n
line2838">2838: return
line2839">2839
line2840">2840: #endif
line2841">2841: #if defined(PARCH_IRIX64) && defined(USE_HW_COUNTERS)
line2842">2842: int EventCountersBegin(int *gen_start,PetscScalar *time_start_counters)
line2843">2843
line2845">2845: if ((*gen_start = start_counters(event0,event1)) < 0) SETERRQ(PETSC_COMM_SELF,1,"Error in start_counters\n"
line2846">2846: PetscTime(&time_start_cou
line2847">2847: return
line2848">2848
line2850">2850: int EventCountersEnd(int gen_start,PetscScalar time_start_counters)
line2851">2851
line2852">2852: int gen_rea
line2853">2853: PetscScalar time_read_co
line2854">2854: long long _counter0,_co
line2856">2856: if ((gen_read = read_counters(event0,&_counter0,event1,&_counter1)) < 0) SETERRQ(PETSC_COMM_SELF,1,"Error in read_counter\n"
line2857">2857: PetscTime(&&time_read_cou
line2858">2858: if (gen_read != gen_start) SETERRQ(PETSC_COMM_SELF,1,"Lost Counters!! Aborting ...\n"
line2859">2859: counter0 += _co
line2860">2860: counter1 += _co
line2861">2861: time_counters += time_read_counters-time_start_co
line2862">2862: return
line2863">2863
line2864">2864: #endif