Actual source code: dgefa2.c
2: /*
3: Inverts 2 by 2 matrix using gaussian elimination with partial pivoting.
5: Used by the sparse factorization routines in
6: src/mat/impls/baij/seq
8: This is a combination of the Linpack routines
9: dgefa() and dgedi() specialized for a size of 2.
11: */
12: #include <petscsys.h>
14: PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_2(MatScalar *a,PetscReal shift,PetscBool allowzeropivot,PetscBool *zeropivotdetected)
15: {
16: PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[2],k3;
17: PetscInt k4,j3;
18: MatScalar *aa,*ax,*ay,work[4],stmp;
19: MatReal tmp,max;
22: if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE;
23: shift = .25*shift*(1.e-12 + PetscAbsScalar(a[0]) + PetscAbsScalar(a[3]));
25: /* Parameter adjustments */
26: a -= 3;
28: k = 1;
29: kp1 = k + 1;
30: k3 = 2*k;
31: k4 = k3 + k;
33: /* find l = pivot index */
34: i__2 = 3 - k;
35: aa = &a[k4];
36: max = PetscAbsScalar(aa[0]);
37: l = 1;
38: for (ll=1; ll<i__2; ll++) {
39: tmp = PetscAbsScalar(aa[ll]);
40: if (tmp > max) { max = tmp; l = ll+1;}
41: }
42: l += k - 1;
43: ipvt[k-1] = l;
45: if (a[l + k3] == 0.0) {
46: if (shift == 0.0) {
47: if (allowzeropivot) {
49: PetscInfo1(NULL,"Zero pivot, row %D\n",k-1);
50: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
51: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);
52: } else {
53: a[l + k3] = shift;
54: }
55: }
57: /* interchange if necessary */
58: if (l != k) {
59: stmp = a[l + k3];
60: a[l + k3] = a[k4];
61: a[k4] = stmp;
62: }
64: /* compute multipliers */
65: stmp = -1. / a[k4];
66: i__2 = 2 - k;
67: aa = &a[1 + k4];
68: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
70: /* row elimination with column indexing */
71: ax = &a[k4+1];
72: for (j = kp1; j <= 2; ++j) {
73: j3 = 2*j;
74: stmp = a[l + j3];
75: if (l != k) {
76: a[l + j3] = a[k + j3];
77: a[k + j3] = stmp;
78: }
80: i__3 = 2 - k;
81: ay = &a[1+k+j3];
82: for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll];
83: }
85: ipvt[1] = 2;
86: if (a[6] == 0.0) {
87: if (allowzeropivot) {
89: PetscInfo1(NULL,"Zero pivot, row %D\n",1);
90: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
91: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",1);
92: }
94: /* Now form the inverse */
95: /* compute inverse(u) */
96: for (k = 1; k <= 2; ++k) {
97: k3 = 2*k;
98: k4 = k3 + k;
99: a[k4] = 1.0 / a[k4];
100: stmp = -a[k4];
101: i__2 = k - 1;
102: aa = &a[k3 + 1];
103: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
104: kp1 = k + 1;
105: if (2 < kp1) continue;
106: ax = aa;
107: for (j = kp1; j <= 2; ++j) {
108: j3 = 2*j;
109: stmp = a[k + j3];
110: a[k + j3] = 0.0;
111: ay = &a[j3 + 1];
112: for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll];
113: }
114: }
116: /* form inverse(u)*inverse(l) */
117: k = 1;
118: k3 = 2*k;
119: kp1 = k + 1;
120: aa = a + k3;
121: for (i = kp1; i <= 2; ++i) {
122: work[i-1] = aa[i];
123: aa[i] = 0.0;
124: }
125: for (j = kp1; j <= 2; ++j) {
126: stmp = work[j-1];
127: ax = &a[2*j + 1];
128: ay = &a[k3 + 1];
129: ay[0] += stmp*ax[0];
130: ay[1] += stmp*ax[1];
131: }
132: l = ipvt[k-1];
133: if (l != k) {
134: ax = &a[k3 + 1];
135: ay = &a[2*l + 1];
136: stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp;
137: stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp;
138: }
139: return(0);
140: }
142: /* gaussian elimination with partial pivoting */
143: PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_9(MatScalar *a,PetscReal shift,PetscBool allowzeropivot,PetscBool *zeropivotdetected)
144: {
145: PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[9],kb,k3;
146: PetscInt k4,j3;
147: MatScalar *aa,*ax,*ay,work[81],stmp;
148: MatReal tmp,max;
151: if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE;
153: /* Parameter adjustments */
154: a -= 10;
156: for (k = 1; k <= 8; ++k) {
157: kp1 = k + 1;
158: k3 = 9*k;
159: k4 = k3 + k;
161: /* find l = pivot index */
162: i__2 = 10 - k;
163: aa = &a[k4];
164: max = PetscAbsScalar(aa[0]);
165: l = 1;
166: for (ll=1; ll<i__2; ll++) {
167: tmp = PetscAbsScalar(aa[ll]);
168: if (tmp > max) { max = tmp; l = ll+1;}
169: }
170: l += k - 1;
171: ipvt[k-1] = l;
173: if (a[l + k3] == 0.0) {
174: if (shift == 0.0) {
175: if (allowzeropivot) {
177: PetscInfo1(NULL,"Zero pivot, row %D\n",k-1);
178: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
179: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);
180: } else {
181: a[l + k3] = shift;
182: }
183: }
185: /* interchange if necessary */
186: if (l != k) {
187: stmp = a[l + k3];
188: a[l + k3] = a[k4];
189: a[k4] = stmp;
190: }
192: /* compute multipliers */
193: stmp = -1. / a[k4];
194: i__2 = 9 - k;
195: aa = &a[1 + k4];
196: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
198: /* row elimination with column indexing */
199: ax = &a[k4+1];
200: for (j = kp1; j <= 9; ++j) {
201: j3 = 9*j;
202: stmp = a[l + j3];
203: if (l != k) {
204: a[l + j3] = a[k + j3];
205: a[k + j3] = stmp;
206: }
208: i__3 = 9 - k;
209: ay = &a[1+k+j3];
210: for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll];
211: }
212: }
213: ipvt[8] = 9;
214: if (a[90] == 0.0) {
215: if (allowzeropivot) {
217: PetscInfo1(NULL,"Zero pivot, row %D\n",8);
218: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
219: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",8);
220: }
222: /* Now form the inverse */
223: /* compute inverse(u) */
224: for (k = 1; k <= 9; ++k) {
225: k3 = 9*k;
226: k4 = k3 + k;
227: a[k4] = 1.0 / a[k4];
228: stmp = -a[k4];
229: i__2 = k - 1;
230: aa = &a[k3 + 1];
231: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
232: kp1 = k + 1;
233: if (9 < kp1) continue;
234: ax = aa;
235: for (j = kp1; j <= 9; ++j) {
236: j3 = 9*j;
237: stmp = a[k + j3];
238: a[k + j3] = 0.0;
239: ay = &a[j3 + 1];
240: for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll];
241: }
242: }
244: /* form inverse(u)*inverse(l) */
245: for (kb = 1; kb <= 8; ++kb) {
246: k = 9 - kb;
247: k3 = 9*k;
248: kp1 = k + 1;
249: aa = a + k3;
250: for (i = kp1; i <= 9; ++i) {
251: work[i-1] = aa[i];
252: aa[i] = 0.0;
253: }
254: for (j = kp1; j <= 9; ++j) {
255: stmp = work[j-1];
256: ax = &a[9*j + 1];
257: ay = &a[k3 + 1];
258: ay[0] += stmp*ax[0];
259: ay[1] += stmp*ax[1];
260: ay[2] += stmp*ax[2];
261: ay[3] += stmp*ax[3];
262: ay[4] += stmp*ax[4];
263: ay[5] += stmp*ax[5];
264: ay[6] += stmp*ax[6];
265: ay[7] += stmp*ax[7];
266: ay[8] += stmp*ax[8];
267: }
268: l = ipvt[k-1];
269: if (l != k) {
270: ax = &a[k3 + 1];
271: ay = &a[9*l + 1];
272: stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp;
273: stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp;
274: stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp;
275: stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp;
276: stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp;
277: stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp;
278: stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp;
279: stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp;
280: stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp;
281: }
282: }
283: return(0);
284: }
286: /*
287: Inverts 15 by 15 matrix using gaussian elimination with partial pivoting.
289: Used by the sparse factorization routines in
290: src/mat/impls/baij/seq
292: This is a combination of the Linpack routines
293: dgefa() and dgedi() specialized for a size of 15.
295: */
297: PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_15(MatScalar *a,PetscInt *ipvt,MatScalar *work,PetscReal shift,PetscBool allowzeropivot,PetscBool *zeropivotdetected)
298: {
299: PetscInt i__2,i__3,kp1,j,k,l,ll,i,kb,k3;
300: PetscInt k4,j3;
301: MatScalar *aa,*ax,*ay,stmp;
302: MatReal tmp,max;
305: if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE;
307: /* Parameter adjustments */
308: a -= 16;
310: for (k = 1; k <= 14; ++k) {
311: kp1 = k + 1;
312: k3 = 15*k;
313: k4 = k3 + k;
315: /* find l = pivot index */
316: i__2 = 16 - k;
317: aa = &a[k4];
318: max = PetscAbsScalar(aa[0]);
319: l = 1;
320: for (ll=1; ll<i__2; ll++) {
321: tmp = PetscAbsScalar(aa[ll]);
322: if (tmp > max) { max = tmp; l = ll+1;}
323: }
324: l += k - 1;
325: ipvt[k-1] = l;
327: if (a[l + k3] == 0.0) {
328: if (shift == 0.0) {
329: if (allowzeropivot) {
331: PetscInfo1(NULL,"Zero pivot, row %D\n",k-1);
332: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
333: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);
334: } else {
335: a[l + k3] = shift;
336: }
337: }
339: /* interchange if necessary */
340: if (l != k) {
341: stmp = a[l + k3];
342: a[l + k3] = a[k4];
343: a[k4] = stmp;
344: }
346: /* compute multipliers */
347: stmp = -1. / a[k4];
348: i__2 = 15 - k;
349: aa = &a[1 + k4];
350: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
352: /* row elimination with column indexing */
353: ax = &a[k4+1];
354: for (j = kp1; j <= 15; ++j) {
355: j3 = 15*j;
356: stmp = a[l + j3];
357: if (l != k) {
358: a[l + j3] = a[k + j3];
359: a[k + j3] = stmp;
360: }
362: i__3 = 15 - k;
363: ay = &a[1+k+j3];
364: for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll];
365: }
366: }
367: ipvt[14] = 15;
368: if (a[240] == 0.0) {
369: if (allowzeropivot) {
371: PetscInfo1(NULL,"Zero pivot, row %D\n",14);
372: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
373: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",14);
374: }
376: /* Now form the inverse */
377: /* compute inverse(u) */
378: for (k = 1; k <= 15; ++k) {
379: k3 = 15*k;
380: k4 = k3 + k;
381: a[k4] = 1.0 / a[k4];
382: stmp = -a[k4];
383: i__2 = k - 1;
384: aa = &a[k3 + 1];
385: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
386: kp1 = k + 1;
387: if (15 < kp1) continue;
388: ax = aa;
389: for (j = kp1; j <= 15; ++j) {
390: j3 = 15*j;
391: stmp = a[k + j3];
392: a[k + j3] = 0.0;
393: ay = &a[j3 + 1];
394: for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll];
395: }
396: }
398: /* form inverse(u)*inverse(l) */
399: for (kb = 1; kb <= 14; ++kb) {
400: k = 15 - kb;
401: k3 = 15*k;
402: kp1 = k + 1;
403: aa = a + k3;
404: for (i = kp1; i <= 15; ++i) {
405: work[i-1] = aa[i];
406: aa[i] = 0.0;
407: }
408: for (j = kp1; j <= 15; ++j) {
409: stmp = work[j-1];
410: ax = &a[15*j + 1];
411: ay = &a[k3 + 1];
412: ay[0] += stmp*ax[0];
413: ay[1] += stmp*ax[1];
414: ay[2] += stmp*ax[2];
415: ay[3] += stmp*ax[3];
416: ay[4] += stmp*ax[4];
417: ay[5] += stmp*ax[5];
418: ay[6] += stmp*ax[6];
419: ay[7] += stmp*ax[7];
420: ay[8] += stmp*ax[8];
421: ay[9] += stmp*ax[9];
422: ay[10] += stmp*ax[10];
423: ay[11] += stmp*ax[11];
424: ay[12] += stmp*ax[12];
425: ay[13] += stmp*ax[13];
426: ay[14] += stmp*ax[14];
427: }
428: l = ipvt[k-1];
429: if (l != k) {
430: ax = &a[k3 + 1];
431: ay = &a[15*l + 1];
432: stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp;
433: stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp;
434: stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp;
435: stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp;
436: stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp;
437: stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp;
438: stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp;
439: stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp;
440: stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp;
441: stmp = ax[9]; ax[9] = ay[9]; ay[9] = stmp;
442: stmp = ax[10]; ax[10] = ay[10]; ay[10] = stmp;
443: stmp = ax[11]; ax[11] = ay[11]; ay[11] = stmp;
444: stmp = ax[12]; ax[12] = ay[12]; ay[12] = stmp;
445: stmp = ax[13]; ax[13] = ay[13]; ay[13] = stmp;
446: stmp = ax[14]; ax[14] = ay[14]; ay[14] = stmp;
447: }
448: }
449: return(0);
450: }