1 #include <omp.h> 2 3 #include <stdio.h> 4 #include <matrix.h> 5 #include <glb.h> 6 #include <math.h> 7 8 #ifdef TANGO 9 #define COUNT 10 #endif 11 12 extern int max_panel_size; 13 14 /* #define DEBUG */ 15 16 NumericSolve() 17 { 18 int k, panel; 19 20 k = 0; 21 22 while (k < L.n) { 23 HandlePanel(k); 24 k += node[k]; 25 } 26 } 27 28 CompleteColumn4(j) 29 { 30 double recip, diag, *theNZ, *last; 31 32 #ifdef DEBUG 33 printf("Complete %d\n", j); 34 #endif 35 36 theNZ = &L.nz[L.firstnz[j]]; 37 last = &L.nz[L.lastnz[j]]; 38 39 diag = *theNZ; 40 if (diag <= 0.0) { 41 printf("Negative pivot, d[%d] = %f\n", j, diag); 42 exit(0); 43 } 44 45 #ifdef TANGO 46 AUG_OFF 47 { extern unsigned int aug_count; aug_count += 50; } 48 AUG_ON 49 #endif 50 51 diag = sqrt(diag); 52 *theNZ++ = diag; 53 recip = 1.0/diag; 54 55 while (theNZ != last) 56 *theNZ++ *= recip; 57 58 #ifdef COUNT 59 AUG_OFF 60 ops += L.lastnz[j] - L.firstnz[j] + 10; 61 AUG_ON 62 #endif 63 64 #ifdef TANGO 65 AUG_OFF 66 Log(); 67 AUG_ON 68 #endif 69 } 70 71 72 ModifyColumn4(k, where) 73 { 74 double ljk; 75 int j, where2, inco; 76 int *leftRow, *rightRow, *last; 77 double *leftNZ, *rightNZ; 78 79 where2 = where + L.startrow[k] - L.firstnz[k]; 80 81 j = L.row[where2]; 82 83 #ifdef DEBUG 84 printf("Modify %d by %d\n", j, k); 85 #endif 86 87 ljk = L.nz[where]; 88 89 leftRow = &L.row[where2+1]; 90 leftNZ = &L.nz[where+1]; 91 last = &L.row[L.lastnz[k]-L.firstnz[k]+L.startrow[k]]; 92 rightRow = &L.row[L.startrow[j]]; 93 rightNZ = &L.nz[L.firstnz[j]]; 94 95 *rightNZ -= ljk*ljk; 96 rightRow++; rightNZ++; 97 98 while (leftRow != last) { 99 while (*rightRow != *leftRow) { 100 rightRow++; 101 rightNZ++; 102 } 103 *rightNZ -= ljk*(*leftNZ); 104 leftRow++; leftNZ++; rightRow++; rightNZ++; 105 } 106 107 #ifdef COUNT 108 AUG_OFF 109 ops += 2*(L.lastnz[k] - where); 110 AUG_ON 111 #endif 112 } 113 114 115 CompletePanel(panel) 116 { 117 int i, j, lastcol; 118 119 lastcol = panel + node[panel]; 120 121 CompleteColumn4(panel); 122 for (i=panel+1; i<lastcol; i++) { 123 j = L.row[L.startrow[panel]+(i-panel)]; 124 ModifyOneByPanel(panel, i, i-panel, &L.nz[L.firstnz[j]]); 125 CompleteColumn4(i); 126 } 127 } 128 129 130 HandlePanel(panel) 131 { 132 int i, j, where, lastcol; 133 int first_nz; 134 int row, dest_panel, dest_last; 135 int theFirst, theLast, length; 136 int is_column; 137 138 #ifdef DEBUG 139 printf("HandlePanel %d\n", panel); 140 #endif 141 142 /* complete task begins here, parameter panel */ 143 144 is_column = (node[panel] == 1); 145 146 CompletePanel(panel); 147 148 lastcol = panel + node[panel]; 149 length = L.lastnz[panel]-L.firstnz[panel]; 150 151 theFirst = node[panel]; 152 while (theFirst < length) { 153 int *local; 154 double *storage; 155 156 row = L.row[L.startrow[panel]+theFirst]; 157 dest_panel = row; 158 if (node[dest_panel] < 0) 159 dest_panel += node[dest_panel]; 160 dest_last = dest_panel + node[dest_panel]; 161 theLast = theFirst+1; 162 while (theLast < length && L.row[L.startrow[panel]+theLast] < dest_last) 163 theLast++; 164 165 166 #ifdef DEBUG 167 printf("Modify %d,%d by %d,%d (%d,%d)\n", 168 dest_panel, dest_last, panel, lastcol, theFirst, theLast); 169 #endif 170 171 if (is_column) { 172 ModifyPanelByColumn(panel, theFirst, theLast, length); 173 } 174 else { 175 if (theLast-theFirst == node[dest_panel] && 176 length-theFirst == L.lastnz[dest_panel]-L.firstnz[dest_panel]) { 177 /* same structure; add update directly into destination */ 178 ModifyPanelByPanel(panel, theFirst, theLast, length, 179 &L.nz[L.firstnz[dest_panel]]); 180 } 181 else { 182 local = (int *) malloc(L.n*sizeof(int)); 183 storage = (double *) malloc(max_panel_size*sizeof(double)); 184 ModifyPanelByPanel(panel, theFirst, theLast, length, storage); 185 FindRelativeIndicesRight(panel, theFirst, dest_panel, local); 186 ScatterPanelUpdate(panel, theFirst, theLast, length-theFirst, 187 dest_panel, storage, local); 188 free(storage); 189 free(local); 190 } 191 } 192 193 theFirst = theLast; 194 195 } 196 } 197 198 ModifyPanelByColumn(src, theFirst, theLast) 199 { 200 int i; 201 202 for (i=theFirst; i<theLast; i++) 203 ModifyColumn4(src, L.firstnz[src]+i); 204 } 205 206 207 ModifyPanelByPanel(src, theFirst, theLast, length, dest) 208 double *dest; 209 { 210 int i, this_length, lastcol; 211 double *destination; 212 213 lastcol = src + node[src]; 214 destination = dest; this_length = length-theFirst; 215 #pragma omp parallel for schedule(static, 1) 216 for (i=theFirst; i<theLast; i++) { 217 ModifyOneByPanel(src, lastcol, i, destination); 218 destination += this_length; this_length--; 219 } 220 } 221 222 223 FindRelativeIndicesRight(panel, theFirst, dest_panel, relative) 224 int *relative; 225 { 226 int i, *leftRow, *rightRow, *topRight, *last; 227 228 leftRow = &L.row[L.startrow[panel]+theFirst]; 229 last = &L.row[L.startrow[panel]+L.lastnz[panel]-L.firstnz[panel]]; 230 topRight = &L.row[L.startrow[dest_panel]]; 231 232 i = 0; 233 rightRow = topRight; 234 while (leftRow != last) { 235 while (*rightRow != *leftRow) 236 rightRow++; 237 relative[i] = (rightRow-topRight); 238 i++; leftRow++; rightRow++; 239 } 240 } 241 242 243 ScatterPanelUpdate(panel, theFirst, theLast, length, 244 dest_panel, update, relative) 245 double *update; 246 int *relative; 247 { 248 int i, row, dest, offset, *leftRow, *last, *rightRow; 249 int *indices_l; 250 double *theTmp, *rightNZ; 251 /* scatter between theFirst and theLast */ 252 253 theTmp = update; 254 255 for (i=theFirst; i<theLast; i++) { 256 dest = L.row[L.startrow[panel]+i]; 257 leftRow = relative+(i-theFirst); 258 last = relative + length; 259 rightNZ = &L.nz[L.firstnz[dest]] + (dest_panel-dest); 260 while (leftRow != last) { 261 rightNZ[*leftRow] += *theTmp; 262 *theTmp = 0.0; 263 theTmp++; leftRow++; 264 } 265 } 266 } 267 268 269 ModifyOneByPanel(panel, lastcol, where, destination) 270 double *destination; 271 { 272 int j, col, theFirst; 273 int lastcol_7_loc = lastcol - 7; 274 j = L.row[L.startrow[panel]+where]; 275 276 #ifdef DEBUG 277 printf("Modify %d by panel %d, theFirst is %d\n", j, panel, 278 where); 279 #endif 280 281 theFirst = where; 282 283 col = panel; 284 for (; col < lastcol_7_loc; ) {//while (col < lastcol_7_loc) { 285 DoEightModifies4(panel, col, theFirst, j, destination); 286 col += 8; 287 } 288 289 for (; col < lastcol -3; ) {// while (col < lastcol - 3) { 290 DoFourModifies4(panel, col, theFirst, j, destination); 291 col += 4; 292 } 293 for (; col < lastcol; ) {//while (col < lastcol) { 294 DoOneModify4(panel, col, theFirst, j, destination); 295 col++; 296 } 297 } 298 299 DoOneModify4(panel, col, theFirst, j, destination) 300 double *destination; 301 { 302 double ljk; 303 int thisFirst; 304 double *theTmp, *theNZ, *last; 305 306 #ifdef DEBUG 307 printf("Modify %d by %d, L[%d][%d] = %f\n", j, col, j, col, 308 L.nz[L.firstnz[col] + theFirst + panel - col]); 309 310 if (L.row[theFirst+L.startrow[col]+(panel-col)] != j) 311 printf("Bogus in col %d, %d should be %d, first %d\n", col, 312 L.row[theFirst+L.startrow[col]+(panel-col)], j, 313 theFirst+(panel-col)); 314 #endif 315 316 thisFirst = theFirst + L.firstnz[col] + (panel-col); 317 318 ljk = L.nz[thisFirst]; 319 320 theNZ = &L.nz[thisFirst+1]; 321 last = &L.nz[L.lastnz[col]]; 322 theTmp = &destination[0]; 323 324 *theTmp++ -= ljk*ljk; 325 326 while (theNZ < last - 3) { 327 *theTmp -= ljk*(*theNZ); 328 *(theTmp+1) -= ljk*(*(theNZ+1)); 329 *(theTmp+2) -= ljk*(*(theNZ+2)); 330 *(theTmp+3) -= ljk*(*(theNZ+3)); 331 theTmp += 4; theNZ += 4; 332 } 333 while (theNZ != last) 334 *theTmp++ -= ljk*(*theNZ++); 335 336 #ifdef COUNT 337 AUG_OFF 338 ops += 2*(L.lastnz[col] - thisFirst); 339 AUG_ON 340 #endif 341 } 342 343 344 DoFourModifies4(panel, col, theFirst, j, destination) 345 double *destination; 346 { 347 double ljk0, ljk1, ljk2, ljk3; 348 int increment; 349 double *theTmp, *theNZ0, *theNZ1, *theNZ2, *theNZ3, *last; 350 double t0; 351 352 last = &L.nz[L.lastnz[col]]; 353 theTmp = &destination[0]; 354 355 increment = L.lastnz[col] - L.firstnz[col] - 1; 356 357 theNZ0 = &L.nz[theFirst + L.firstnz[col] + (panel-col)]; 358 ljk0 = *theNZ0++; 359 increment--; 360 361 theNZ1 = theNZ0 + increment; 362 ljk1 = *theNZ1++; 363 increment--; 364 365 theNZ2 = theNZ1 + increment; 366 ljk2 = *theNZ2++; 367 increment--; 368 369 theNZ3 = theNZ2 + increment; 370 ljk3 = *theNZ3++; 371 372 *theTmp++ -= ljk0*ljk0 + ljk1*ljk1 + ljk2*ljk2 + ljk3*ljk3; 373 374 while (theNZ0 != last) { 375 t0 = *theTmp; 376 t0 -= ljk0*(*theNZ0++); 377 t0 -= ljk1*(*theNZ1++); 378 t0 -= ljk2*(*theNZ2++); 379 t0 -= ljk3*(*theNZ3++); 380 *theTmp++ = t0; 381 } 382 383 #ifdef COUNT 384 AUG_OFF 385 ops += 2*4*(L.lastnz[col] - (theFirst+L.firstnz[col]+(panel-col))); 386 AUG_ON 387 #endif 388 } 389 390 DoEightModifies4(panel, col, theFirst, j, destination) 391 double *destination; 392 { 393 double ljk0, ljk1, ljk2, ljk3, ljk4, ljk5, ljk6, ljk7; 394 int increment; 395 double *theTmp, *last; 396 double *theNZ0, *theNZ1, *theNZ2, *theNZ3, *theNZ4, *theNZ5, *theNZ6,*theNZ7; 397 double t0, t1, t2, t3; 398 399 last = &L.nz[L.lastnz[col]]; 400 theTmp = &destination[0]; 401 402 increment = L.lastnz[col] - L.firstnz[col] - 1; 403 404 theNZ0 = &L.nz[theFirst + L.firstnz[col] + (panel-col)]; 405 ljk0 = *theNZ0++; 406 increment--; 407 408 theNZ1 = theNZ0 + increment; 409 ljk1 = *theNZ1++; 410 increment--; 411 412 theNZ2 = theNZ1 + increment; 413 ljk2 = *theNZ2++; 414 increment--; 415 416 theNZ3 = theNZ2 + increment; 417 ljk3 = *theNZ3++; 418 increment--; 419 420 theNZ4 = theNZ3 + increment; 421 ljk4 = *theNZ4++; 422 increment--; 423 424 theNZ5 = theNZ4 + increment; 425 ljk5 = *theNZ5++; 426 increment--; 427 428 theNZ6 = theNZ5 + increment; 429 ljk6 = *theNZ6++; 430 increment--; 431 432 theNZ7 = theNZ6 + increment; 433 ljk7 = *theNZ7++; 434 435 *theTmp++ -= ljk0*ljk0 + ljk1*ljk1 + ljk2*ljk2 + ljk3*ljk3 436 + ljk4*ljk4 + ljk5*ljk5 + ljk6*ljk6 + ljk7*ljk7; 437 438 439 long i = theNZ0, e = last; 440 // while (theNZ0 != last) { 441 //while (i!=e){ 442 for(; i < e; i += sizeof(double)) { 443 //if ( theNZ0 != last) break; 444 445 t0 = *theTmp; 446 //t0 -= ljk0*(*theNZ0++); 447 t0 -= ljk0*(*theNZ0++); 448 t0 -= ljk1*(*theNZ1++); 449 t0 -= ljk2*(*theNZ2++); 450 t0 -= ljk3*(*theNZ3++); 451 t0 -= ljk4*(*theNZ4++); 452 t0 -= ljk5*(*theNZ5++); 453 t0 -= ljk6*(*theNZ6++); 454 t0 -= ljk7*(*theNZ7++); 455 // *(theTmp+i) = t0; 456 *theTmp++ = t0; 457 } 458 459 #ifdef COUNT 460 AUG_OFF 461 ops += 2*8*(L.lastnz[col] - (theFirst+L.firstnz[col]+(panel-col))); 462 AUG_ON 463 #endif 464 }