Actual source code: vechip.hip.cpp
1: /*
2: Implementation of the sequential hip vectors.
4: This file contains the code that can be compiled with a C
5: compiler. The companion file vechip2.hip.cpp contains the code that
6: must be compiled with hipcc compiler.
7: */
9: #define PETSC_SKIP_SPINLOCK
11: #include <petscconf.h>
12: #include <petsc/private/vecimpl.h>
13: #include <../src/vec/vec/impls/dvecimpl.h>
14: #include <petsc/private/hipvecimpl.h>
16: PetscErrorCode VecHIPGetArrays_Private(Vec v,const PetscScalar** x,const PetscScalar** x_d,PetscOffloadMask* flg)
17: {
20: if (x) {
21: Vec_Seq *h = (Vec_Seq*)v->data;
23: *x = h->array;
24: }
25: if (x_d) {
26: Vec_HIP *d = (Vec_HIP*)v->spptr;
28: *x_d = d ? d->GPUarray : NULL;
29: }
30: if (flg) *flg = v->offloadmask;
31: return(0);
32: }
34: /*
35: Allocates space for the vector array on the Host if it does not exist.
36: Does NOT change the PetscHIPFlag for the vector
37: Does NOT zero the HIP array
38: */
39: PetscErrorCode VecHIPAllocateCheckHost(Vec v)
40: {
42: PetscScalar *array;
43: Vec_Seq *s = (Vec_Seq*)v->data;
44: PetscInt n = v->map->n;
47: if (!s) {
48: PetscNewLog((PetscObject)v,&s);
49: v->data = s;
50: }
51: if (!s->array) {
52: if (n*sizeof(PetscScalar) > v->minimum_bytes_pinned_memory) {
53: PetscMallocSetHIPHost();
54: v->pinned_memory = PETSC_TRUE;
55: }
56: PetscMalloc1(n,&array);
57: PetscLogObjectMemory((PetscObject)v,n*sizeof(PetscScalar));
58: s->array = array;
59: s->array_allocated = array;
60: if (n*sizeof(PetscScalar) > v->minimum_bytes_pinned_memory) {
61: PetscMallocResetHIPHost();
62: }
63: if (v->offloadmask == PETSC_OFFLOAD_UNALLOCATED) {
64: v->offloadmask = PETSC_OFFLOAD_CPU;
65: }
66: }
67: return(0);
68: }
70: PetscErrorCode VecCopy_SeqHIP_Private(Vec xin,Vec yin)
71: {
72: PetscScalar *ya;
73: const PetscScalar *xa;
74: PetscErrorCode ierr;
77: VecHIPAllocateCheckHost(xin);
78: VecHIPAllocateCheckHost(yin);
79: if (xin != yin) {
80: VecGetArrayRead(xin,&xa);
81: VecGetArray(yin,&ya);
82: PetscArraycpy(ya,xa,xin->map->n);
83: VecRestoreArrayRead(xin,&xa);
84: VecRestoreArray(yin,&ya);
85: }
86: return(0);
87: }
89: PetscErrorCode VecSetRandom_SeqHIP(Vec xin,PetscRandom r)
90: {
92: PetscInt n = xin->map->n;
93: PetscScalar *xx;
96: VecGetArrayWrite(xin,&xx);
97: PetscRandomGetValues(r,n,xx);
98: VecRestoreArrayWrite(xin,&xx);
99: return(0);
100: }
102: PetscErrorCode VecDestroy_SeqHIP_Private(Vec v)
103: {
104: Vec_Seq *vs = (Vec_Seq*)v->data;
108: PetscObjectSAWsViewOff(v);
109: #if defined(PETSC_USE_LOG)
110: PetscLogObjectState((PetscObject)v,"Length=%D",v->map->n);
111: #endif
112: if (vs) {
113: if (vs->array_allocated) {
114: if (v->pinned_memory) {
115: PetscMallocSetHIPHost();
116: }
117: PetscFree(vs->array_allocated);
118: if (v->pinned_memory) {
119: PetscMallocResetHIPHost();
120: v->pinned_memory = PETSC_FALSE;
121: }
122: }
123: PetscFree(vs);
124: }
125: return(0);
126: }
128: PetscErrorCode VecResetArray_SeqHIP_Private(Vec vin)
129: {
130: Vec_Seq *v = (Vec_Seq*)vin->data;
133: v->array = v->unplacedarray;
134: v->unplacedarray = 0;
135: return(0);
136: }
138: PetscErrorCode VecResetArray_SeqHIP(Vec vin)
139: {
143: VecHIPCopyFromGPU(vin);
144: VecResetArray_SeqHIP_Private(vin);
145: vin->offloadmask = PETSC_OFFLOAD_CPU;
146: return(0);
147: }
149: PetscErrorCode VecPlaceArray_SeqHIP(Vec vin,const PetscScalar *a)
150: {
154: VecHIPCopyFromGPU(vin);
155: VecPlaceArray_Seq(vin,a);
156: vin->offloadmask = PETSC_OFFLOAD_CPU;
157: return(0);
158: }
160: PetscErrorCode VecReplaceArray_SeqHIP(Vec vin,const PetscScalar *a)
161: {
163: Vec_Seq *vs = (Vec_Seq*)vin->data;
166: if (vs->array != vs->array_allocated) {
167: /* make sure the users array has the latest values */
168: VecHIPCopyFromGPU(vin);
169: }
170: if (vs->array_allocated) {
171: if (vin->pinned_memory) {
172: PetscMallocSetHIPHost();
173: }
174: PetscFree(vs->array_allocated);
175: if (vin->pinned_memory) {
176: PetscMallocResetHIPHost();
177: }
178: }
179: vin->pinned_memory = PETSC_FALSE;
180: vs->array_allocated = vs->array = (PetscScalar*)a;
181: vin->offloadmask = PETSC_OFFLOAD_CPU;
182: return(0);
183: }
185: /*@
186: VecCreateSeqHIP - Creates a standard, sequential array-style vector.
188: Collective
190: Input Parameter:
191: + comm - the communicator, should be PETSC_COMM_SELF
192: - n - the vector length
194: Output Parameter:
195: . v - the vector
197: Notes:
198: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
199: same type as an existing vector.
201: Level: intermediate
203: .seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
204: @*/
205: PetscErrorCode VecCreateSeqHIP(MPI_Comm comm,PetscInt n,Vec *v)
206: {
210: VecCreate(comm,v);
211: VecSetSizes(*v,n,n);
212: VecSetType(*v,VECSEQHIP);
213: return(0);
214: }
216: PetscErrorCode VecDuplicate_SeqHIP(Vec win,Vec *V)
217: {
221: VecCreateSeqHIP(PetscObjectComm((PetscObject)win),win->map->n,V);
222: PetscLayoutReference(win->map,&(*V)->map);
223: PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*V))->olist);
224: PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*V))->qlist);
225: (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
226: return(0);
227: }
229: PetscErrorCode VecCreate_SeqHIP(Vec V)
230: {
234: PetscHIPInitializeCheck();
235: PetscLayoutSetUp(V->map);
236: VecHIPAllocateCheck(V);
237: VecCreate_SeqHIP_Private(V,((Vec_HIP*)V->spptr)->GPUarray_allocated);
238: VecHIPAllocateCheckHost(V);
239: VecSet(V,0.0);
240: VecSet_Seq(V,0.0);
241: V->offloadmask = PETSC_OFFLOAD_BOTH;
242: return(0);
243: }
245: /*@C
246: VecCreateSeqHIPWithArray - Creates a HIP sequential array-style vector,
247: where the user provides the array space to store the vector values. The array
248: provided must be a GPU array.
250: Collective
252: Input Parameters:
253: + comm - the communicator, should be PETSC_COMM_SELF
254: . bs - the block size
255: . n - the vector length
256: - array - GPU memory where the vector elements are to be stored.
258: Output Parameter:
259: . V - the vector
261: Notes:
262: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
263: same type as an existing vector.
265: If the user-provided array is NULL, then VecHIPPlaceArray() can be used
266: at a later stage to SET the array for storing the vector values.
268: PETSc does NOT free the array when the vector is destroyed via VecDestroy().
269: The user should not free the array until the vector is destroyed.
271: Level: intermediate
273: .seealso: VecCreateMPIHIPWithArray(), VecCreate(), VecDuplicate(), VecDuplicateVecs(),
274: VecCreateGhost(), VecCreateSeq(), VecHIPPlaceArray(), VecCreateSeqWithArray(),
275: VecCreateMPIWithArray()
276: @*/
277: PetscErrorCode VecCreateSeqHIPWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar array[],Vec *V)
278: {
282: PetscHIPInitializeCheck();
283: VecCreate(comm,V);
284: VecSetSizes(*V,n,n);
285: VecSetBlockSize(*V,bs);
286: VecCreate_SeqHIP_Private(*V,array);
287: return(0);
288: }
290: /*@C
291: VecCreateSeqHIPWithArrays - Creates a HIP sequential array-style vector,
292: where the user provides the array space to store the vector values.
294: Collective
296: Input Parameters:
297: + comm - the communicator, should be PETSC_COMM_SELF
298: . bs - the block size
299: . n - the vector length
300: - cpuarray - CPU memory where the vector elements are to be stored.
301: - gpuarray - GPU memory where the vector elements are to be stored.
303: Output Parameter:
304: . V - the vector
306: Notes:
307: If both cpuarray and gpuarray are provided, the caller must ensure that
308: the provided arrays have identical values.
310: PETSc does NOT free the provided arrays when the vector is destroyed via
311: VecDestroy(). The user should not free the array until the vector is
312: destroyed.
314: Level: intermediate
316: .seealso: VecCreateMPIHIPWithArrays(), VecCreate(), VecCreateSeqWithArray(),
317: VecHIPPlaceArray(), VecCreateSeqHIPWithArray(),
318: VecHIPAllocateCheckHost()
319: @*/
320: PetscErrorCode VecCreateSeqHIPWithArrays(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar cpuarray[],const PetscScalar gpuarray[],Vec *V)
321: {
325: // set V's gpuarray to be gpuarray, do not allocate memory on host yet.
326: VecCreateSeqHIPWithArray(comm,bs,n,gpuarray,V);
328: if (cpuarray && gpuarray) {
329: Vec_Seq *s = (Vec_Seq*)((*V)->data);
330: s->array = (PetscScalar*)cpuarray;
331: (*V)->offloadmask = PETSC_OFFLOAD_BOTH;
332: } else if (cpuarray) {
333: Vec_Seq *s = (Vec_Seq*)((*V)->data);
334: s->array = (PetscScalar*)cpuarray;
335: (*V)->offloadmask = PETSC_OFFLOAD_CPU;
336: } else if (gpuarray) {
337: (*V)->offloadmask = PETSC_OFFLOAD_GPU;
338: } else {
339: (*V)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
340: }
342: return(0);
343: }
345: PetscErrorCode VecGetArray_SeqHIP(Vec v,PetscScalar **a)
346: {
350: if (v->offloadmask == PETSC_OFFLOAD_GPU) {
351: VecHIPCopyFromGPU(v);
352: } else {
353: VecHIPAllocateCheckHost(v);
354: }
355: *a = *((PetscScalar**)v->data);
356: return(0);
357: }
359: PetscErrorCode VecRestoreArray_SeqHIP(Vec v,PetscScalar **a)
360: {
362: v->offloadmask = PETSC_OFFLOAD_CPU;
363: return(0);
364: }
366: PetscErrorCode VecGetArrayWrite_SeqHIP(Vec v,PetscScalar **a)
367: {
371: VecHIPAllocateCheckHost(v);
372: *a = *((PetscScalar**)v->data);
373: return(0);
374: }
376: PetscErrorCode VecGetArrayAndMemType_SeqHIP(Vec v,PetscScalar** a,PetscMemType *mtype)
377: {
381: if (v->offloadmask & PETSC_OFFLOAD_GPU) { /* Prefer working on GPU when offloadmask is PETSC_OFFLOAD_BOTH */
382: *a = ((Vec_HIP*)v->spptr)->GPUarray;
383: v->offloadmask = PETSC_OFFLOAD_GPU; /* Change the mask once GPU gets write access, don't wait until restore array */
384: if (mtype) *mtype = PETSC_MEMTYPE_HIP;
385: } else {
386: VecHIPAllocateCheckHost(v);
387: *a = *((PetscScalar**)v->data);
388: if (mtype) *mtype = PETSC_MEMTYPE_HOST;
389: }
390: return(0);
391: }
393: PetscErrorCode VecRestoreArrayAndMemType_SeqHIP(Vec v,PetscScalar** a)
394: {
396: if (v->offloadmask & PETSC_OFFLOAD_GPU) {
397: v->offloadmask = PETSC_OFFLOAD_GPU;
398: } else {
399: v->offloadmask = PETSC_OFFLOAD_CPU;
400: }
401: return(0);
402: }
404: PetscErrorCode VecBindToCPU_SeqHIP(Vec V,PetscBool pin)
405: {
409: V->boundtocpu = pin;
410: if (pin) {
411: VecHIPCopyFromGPU(V);
412: V->offloadmask = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
413: V->ops->dot = VecDot_Seq;
414: V->ops->norm = VecNorm_Seq;
415: V->ops->tdot = VecTDot_Seq;
416: V->ops->scale = VecScale_Seq;
417: V->ops->copy = VecCopy_Seq;
418: V->ops->set = VecSet_Seq;
419: V->ops->swap = VecSwap_Seq;
420: V->ops->axpy = VecAXPY_Seq;
421: V->ops->axpby = VecAXPBY_Seq;
422: V->ops->axpbypcz = VecAXPBYPCZ_Seq;
423: V->ops->pointwisemult = VecPointwiseMult_Seq;
424: V->ops->pointwisedivide = VecPointwiseDivide_Seq;
425: V->ops->setrandom = VecSetRandom_Seq;
426: V->ops->dot_local = VecDot_Seq;
427: V->ops->tdot_local = VecTDot_Seq;
428: V->ops->norm_local = VecNorm_Seq;
429: V->ops->mdot_local = VecMDot_Seq;
430: V->ops->mtdot_local = VecMTDot_Seq;
431: V->ops->maxpy = VecMAXPY_Seq;
432: V->ops->mdot = VecMDot_Seq;
433: V->ops->mtdot = VecMTDot_Seq;
434: V->ops->aypx = VecAYPX_Seq;
435: V->ops->waxpy = VecWAXPY_Seq;
436: V->ops->dotnorm2 = NULL;
437: V->ops->placearray = VecPlaceArray_Seq;
438: V->ops->replacearray = VecReplaceArray_SeqHIP;
439: V->ops->resetarray = VecResetArray_Seq;
440: V->ops->duplicate = VecDuplicate_Seq;
441: V->ops->conjugate = VecConjugate_Seq;
442: V->ops->getlocalvector = NULL;
443: V->ops->restorelocalvector = NULL;
444: V->ops->getlocalvectorread = NULL;
445: V->ops->restorelocalvectorread = NULL;
446: V->ops->getarraywrite = NULL;
447: V->ops->max = VecMax_Seq;
448: V->ops->min = VecMin_Seq;
449: V->ops->reciprocal = VecReciprocal_Default;
450: V->ops->sum = NULL;
451: V->ops->shift = NULL;
452: } else {
453: V->ops->dot = VecDot_SeqHIP;
454: V->ops->norm = VecNorm_SeqHIP;
455: V->ops->tdot = VecTDot_SeqHIP;
456: V->ops->scale = VecScale_SeqHIP;
457: V->ops->copy = VecCopy_SeqHIP;
458: V->ops->set = VecSet_SeqHIP;
459: V->ops->swap = VecSwap_SeqHIP;
460: V->ops->axpy = VecAXPY_SeqHIP;
461: V->ops->axpby = VecAXPBY_SeqHIP;
462: V->ops->axpbypcz = VecAXPBYPCZ_SeqHIP;
463: V->ops->pointwisemult = VecPointwiseMult_SeqHIP;
464: V->ops->pointwisedivide = VecPointwiseDivide_SeqHIP;
465: V->ops->setrandom = VecSetRandom_SeqHIP;
466: V->ops->dot_local = VecDot_SeqHIP;
467: V->ops->tdot_local = VecTDot_SeqHIP;
468: V->ops->norm_local = VecNorm_SeqHIP;
469: V->ops->mdot_local = VecMDot_SeqHIP;
470: V->ops->maxpy = VecMAXPY_SeqHIP;
471: V->ops->mdot = VecMDot_SeqHIP;
472: V->ops->aypx = VecAYPX_SeqHIP;
473: V->ops->waxpy = VecWAXPY_SeqHIP;
474: V->ops->dotnorm2 = VecDotNorm2_SeqHIP;
475: V->ops->placearray = VecPlaceArray_SeqHIP;
476: V->ops->replacearray = VecReplaceArray_SeqHIP;
477: V->ops->resetarray = VecResetArray_SeqHIP;
478: V->ops->destroy = VecDestroy_SeqHIP;
479: V->ops->duplicate = VecDuplicate_SeqHIP;
480: V->ops->conjugate = VecConjugate_SeqHIP;
481: V->ops->getlocalvector = VecGetLocalVector_SeqHIP;
482: V->ops->restorelocalvector = VecRestoreLocalVector_SeqHIP;
483: V->ops->getlocalvectorread = VecGetLocalVectorRead_SeqHIP;
484: V->ops->restorelocalvectorread = VecRestoreLocalVectorRead_SeqHIP;
485: V->ops->getarraywrite = VecGetArrayWrite_SeqHIP;
486: V->ops->getarray = VecGetArray_SeqHIP;
487: V->ops->restorearray = VecRestoreArray_SeqHIP;
488: V->ops->getarrayandmemtype = VecGetArrayAndMemType_SeqHIP;
489: V->ops->restorearrayandmemtype = VecRestoreArrayAndMemType_SeqHIP;
490: V->ops->max = VecMax_SeqHIP;
491: V->ops->min = VecMin_SeqHIP;
492: V->ops->reciprocal = VecReciprocal_SeqHIP;
493: V->ops->sum = VecSum_SeqHIP;
494: V->ops->shift = VecShift_SeqHIP;
495: }
496: return(0);
497: }
499: PetscErrorCode VecCreate_SeqHIP_Private(Vec V,const PetscScalar *array)
500: {
502: Vec_HIP *vechip;
503: PetscMPIInt size;
504: PetscBool option_set;
507: MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
508: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQHIP on more than one process");
509: VecCreate_Seq_Private(V,0);
510: PetscObjectChangeTypeName((PetscObject)V,VECSEQHIP);
511: VecBindToCPU_SeqHIP(V,PETSC_FALSE);
512: V->ops->bindtocpu = VecBindToCPU_SeqHIP;
514: /* Later, functions check for the Vec_HIP structure existence, so do not create it without array */
515: if (array) {
516: if (!V->spptr) {
517: PetscReal pinned_memory_min;
518: PetscCalloc(sizeof(Vec_HIP),&V->spptr);
519: vechip = (Vec_HIP*)V->spptr;
520: V->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
522: pinned_memory_min = 0;
523: /* Need to parse command line for minimum size to use for pinned memory allocations on host here.
524: Note: This same code duplicated in VecHIPAllocateCheck() and VecCreate_MPIHIP_Private(). Is there a good way to avoid this? */
525: PetscOptionsBegin(PetscObjectComm((PetscObject)V),((PetscObject)V)->prefix,"VECHIP Options","Vec");
526: PetscOptionsReal("-vec_pinned_memory_min","Minimum size (in bytes) for an allocation to use pinned memory on host","VecSetPinnedMemoryMin",pinned_memory_min,&pinned_memory_min,&option_set);
527: if (option_set) V->minimum_bytes_pinned_memory = pinned_memory_min;
528: PetscOptionsEnd();
529: }
530: vechip = (Vec_HIP*)V->spptr;
531: vechip->GPUarray = (PetscScalar*)array;
532: V->offloadmask = PETSC_OFFLOAD_GPU;
534: }
535: return(0);
536: }