1 #include <omp.h>
    2 
    3 #include <stdio.h>
    4 #include <matrix.h>
    5 #include <glb.h>
    6 #include <math.h>
    7 
    8 #ifdef TANGO
    9 #define COUNT
   10 #endif
   11 
   12 extern int max_panel_size;
   13 
   14 /* #define DEBUG */
   15 
   16 NumericSolve()
   17 {
   18     int k, panel;
   19 
   20     k = 0;
   21 
   22     while (k < L.n) {
   23       HandlePanel(k);
   24       k += node[k];
   25     }
   26 }
   27 
   28 CompleteColumn4(j)
   29 {
   30  double recip, diag, *theNZ, *last;
   31 
   32 #ifdef DEBUG
   33  printf("Complete %d\n", j);
   34 #endif
   35 
   36  theNZ = &L.nz[L.firstnz[j]];
   37  last = &L.nz[L.lastnz[j]];
   38 
   39  diag = *theNZ;
   40  if (diag <= 0.0) {
   41   printf("Negative pivot, d[%d] = %f\n", j, diag);
   42   exit(0);
   43   }
   44 
   45 #ifdef TANGO
   46  AUG_OFF
   47  { extern unsigned int aug_count; aug_count += 50; }
   48  AUG_ON
   49 #endif
   50 
   51  diag = sqrt(diag);
   52  *theNZ++ = diag;
   53  recip = 1.0/diag;
   54 
   55  while (theNZ != last)
   56   *theNZ++ *= recip;
   57 
   58 #ifdef COUNT
   59  AUG_OFF
   60  ops += L.lastnz[j] - L.firstnz[j] + 10;
   61  AUG_ON
   62 #endif
   63 
   64 #ifdef TANGO
   65  AUG_OFF
   66  Log();
   67  AUG_ON
   68 #endif
   69 }
   70 
   71 
   72 ModifyColumn4(k, where)
   73 {
   74  double ljk;
   75  int j, where2, inco;
   76  int *leftRow, *rightRow, *last;
   77  double *leftNZ, *rightNZ;
   78 
   79  where2 = where + L.startrow[k] - L.firstnz[k];
   80 
   81  j = L.row[where2];
   82 
   83 #ifdef DEBUG
   84  printf("Modify %d by %d\n", j, k);
   85 #endif
   86 
   87  ljk = L.nz[where];
   88 
   89  leftRow = &L.row[where2+1];
   90  leftNZ = &L.nz[where+1];
   91  last = &L.row[L.lastnz[k]-L.firstnz[k]+L.startrow[k]];
   92  rightRow = &L.row[L.startrow[j]];
   93  rightNZ = &L.nz[L.firstnz[j]];
   94 
   95  *rightNZ -= ljk*ljk;
   96  rightRow++; rightNZ++;
   97 
   98  while (leftRow != last) {
   99   while (*rightRow != *leftRow) {
  100           rightRow++;
  101    rightNZ++;
  102         }
  103          *rightNZ -= ljk*(*leftNZ);
  104   leftRow++; leftNZ++; rightRow++; rightNZ++;
  105   }
  106 
  107 #ifdef COUNT
  108  AUG_OFF
  109  ops += 2*(L.lastnz[k] - where);
  110  AUG_ON
  111 #endif
  112 }
  113 
  114 
  115 CompletePanel(panel)
  116 {
  117   int i, j, lastcol;
  118 
  119   lastcol = panel + node[panel];
  120 
  121   CompleteColumn4(panel);
  122   for (i=panel+1; i<lastcol; i++) {
  123     j = L.row[L.startrow[panel]+(i-panel)];
  124     ModifyOneByPanel(panel, i, i-panel, &L.nz[L.firstnz[j]]);
  125     CompleteColumn4(i);
  126   }
  127 }
  128 
  129 
  130 HandlePanel(panel)
  131 {
  132   int i, j, where, lastcol;
  133   int first_nz;
  134   int row, dest_panel, dest_last;
  135   int theFirst, theLast, length;
  136   int is_column;
  137 
  138 #ifdef DEBUG
  139   printf("HandlePanel %d\n", panel);
  140 #endif
  141 
  142 /* complete task begins here, parameter panel */
  143 
  144   is_column = (node[panel] == 1);
  145 
  146   CompletePanel(panel);
  147 
  148   lastcol = panel + node[panel];
  149   length = L.lastnz[panel]-L.firstnz[panel];
  150 
  151   theFirst = node[panel];
  152   while (theFirst < length) {
  153     int *local;
  154     double *storage;
  155 
  156     row = L.row[L.startrow[panel]+theFirst];
  157     dest_panel = row;
  158     if (node[dest_panel] < 0)
  159       dest_panel += node[dest_panel];
  160     dest_last = dest_panel + node[dest_panel];
  161     theLast = theFirst+1;
  162     while (theLast < length && L.row[L.startrow[panel]+theLast] < dest_last)
  163       theLast++;
  164 
  165 
  166 #ifdef DEBUG
  167   printf("Modify %d,%d by %d,%d (%d,%d)\n",
  168 	 dest_panel, dest_last, panel, lastcol, theFirst, theLast);
  169 #endif
  170 
  171     if (is_column) {
  172       ModifyPanelByColumn(panel, theFirst, theLast, length);
  173     }
  174     else {
  175       if (theLast-theFirst == node[dest_panel] &&
  176 	  length-theFirst == L.lastnz[dest_panel]-L.firstnz[dest_panel]) {
  177 	/* same structure; add update directly into destination */
  178 	ModifyPanelByPanel(panel, theFirst, theLast, length,
  179 			   &L.nz[L.firstnz[dest_panel]]);
  180       }
  181       else {
  182         local = (int *) malloc(L.n*sizeof(int));
  183         storage = (double *) malloc(max_panel_size*sizeof(double));
  184 	ModifyPanelByPanel(panel, theFirst, theLast, length, storage);
  185 	FindRelativeIndicesRight(panel, theFirst, dest_panel, local);
  186 	ScatterPanelUpdate(panel, theFirst, theLast, length-theFirst,
  187 			   dest_panel, storage, local);
  188         free(storage);
  189         free(local);
  190       }
  191     }
  192 
  193     theFirst = theLast;
  194 
  195   }
  196 }
  197 
  198 ModifyPanelByColumn(src, theFirst, theLast)
  199 {
  200   int i;
  201 
  202   for (i=theFirst; i<theLast; i++)
  203     ModifyColumn4(src, L.firstnz[src]+i);
  204 }
  205 
  206 
  207 ModifyPanelByPanel(src, theFirst, theLast, length, dest)
  208 double *dest;
  209 {
  210   int i, this_length, lastcol;
  211   double *destination;
  212 
  213   lastcol = src + node[src];
  214   destination = dest; this_length = length-theFirst;
  215    #pragma omp parallel for schedule(static, 1)
  216   for (i=theFirst; i<theLast; i++) {
  217     ModifyOneByPanel(src, lastcol, i, destination);
  218     destination += this_length; this_length--;
  219   }
  220 }
  221 
  222 
  223 FindRelativeIndicesRight(panel, theFirst, dest_panel, relative)
  224 int *relative;
  225 {
  226   int i, *leftRow, *rightRow, *topRight, *last;
  227 
  228   leftRow = &L.row[L.startrow[panel]+theFirst];
  229   last = &L.row[L.startrow[panel]+L.lastnz[panel]-L.firstnz[panel]];
  230   topRight = &L.row[L.startrow[dest_panel]];
  231 
  232   i = 0;
  233   rightRow = topRight;
  234   while (leftRow != last) {
  235     while (*rightRow != *leftRow)
  236       rightRow++;
  237     relative[i] = (rightRow-topRight);
  238     i++; leftRow++; rightRow++;
  239   }
  240 }
  241 
  242 
  243 ScatterPanelUpdate(panel, theFirst, theLast, length,
  244         dest_panel, update, relative)
  245 double *update;
  246 int *relative;
  247 {
  248   int i, row, dest, offset, *leftRow, *last, *rightRow;
  249   int *indices_l;
  250   double *theTmp, *rightNZ;
  251   /* scatter between theFirst and theLast */
  252 
  253   theTmp = update;
  254 
  255   for (i=theFirst; i<theLast; i++) {
  256     dest = L.row[L.startrow[panel]+i];
  257     leftRow = relative+(i-theFirst);
  258     last = relative + length;
  259     rightNZ = &L.nz[L.firstnz[dest]] + (dest_panel-dest);
  260     while (leftRow != last) {
  261       rightNZ[*leftRow] += *theTmp;
  262       *theTmp = 0.0;
  263       theTmp++; leftRow++;
  264     }
  265   }
  266 }
  267 
  268 
  269 ModifyOneByPanel(panel, lastcol, where, destination)
  270 double *destination;
  271 {
  272   int j, col, theFirst;
  273   int lastcol_7_loc = lastcol - 7;
  274   j = L.row[L.startrow[panel]+where];
  275 
  276 #ifdef DEBUG
  277   printf("Modify %d by panel %d, theFirst is %d\n", j, panel,
  278   where);
  279 #endif
  280 
  281   theFirst = where;
  282 
  283   col = panel;
  284   for (; col < lastcol_7_loc; ) {//while (col < lastcol_7_loc) {
  285     DoEightModifies4(panel, col, theFirst, j, destination);
  286     col += 8;
  287   }
  288 
  289   for (; col < lastcol -3; ) {// while (col < lastcol - 3) {
  290     DoFourModifies4(panel, col, theFirst, j, destination);
  291     col += 4;
  292   }
  293   for (; col < lastcol; ) {//while (col < lastcol) {
  294     DoOneModify4(panel, col, theFirst, j, destination);
  295     col++;
  296   }
  297 }
  298 
  299 DoOneModify4(panel, col, theFirst, j, destination)
  300 double *destination;
  301 {
  302   double ljk;
  303   int thisFirst;
  304   double *theTmp, *theNZ, *last;
  305 
  306 #ifdef DEBUG
  307   printf("Modify %d by %d, L[%d][%d] = %f\n", j, col, j, col,
  308   L.nz[L.firstnz[col] + theFirst + panel - col]);
  309 
  310   if (L.row[theFirst+L.startrow[col]+(panel-col)] != j)
  311     printf("Bogus in col %d, %d should be %d, first %d\n", col,
  312     L.row[theFirst+L.startrow[col]+(panel-col)], j,
  313      theFirst+(panel-col));
  314 #endif
  315 
  316   thisFirst = theFirst + L.firstnz[col] + (panel-col);
  317 
  318   ljk = L.nz[thisFirst];
  319 
  320   theNZ = &L.nz[thisFirst+1];
  321   last = &L.nz[L.lastnz[col]];
  322   theTmp = &destination[0];
  323 
  324   *theTmp++ -= ljk*ljk;
  325 
  326   while (theNZ < last - 3) {
  327     *theTmp -= ljk*(*theNZ);
  328     *(theTmp+1) -= ljk*(*(theNZ+1));
  329     *(theTmp+2) -= ljk*(*(theNZ+2));
  330     *(theTmp+3) -= ljk*(*(theNZ+3));
  331     theTmp += 4; theNZ += 4;
  332   }
  333   while (theNZ != last)
  334     *theTmp++ -= ljk*(*theNZ++);
  335 
  336 #ifdef COUNT
  337   AUG_OFF
  338   ops += 2*(L.lastnz[col] - thisFirst);
  339   AUG_ON
  340 #endif
  341 }
  342 
  343 
  344 DoFourModifies4(panel, col, theFirst, j, destination)
  345 double *destination;
  346 {
  347   double ljk0, ljk1, ljk2, ljk3;
  348   int increment;
  349   double *theTmp, *theNZ0, *theNZ1, *theNZ2, *theNZ3, *last;
  350   double t0;
  351 
  352   last = &L.nz[L.lastnz[col]];
  353   theTmp = &destination[0];
  354 
  355   increment = L.lastnz[col] - L.firstnz[col] - 1;
  356 
  357   theNZ0 = &L.nz[theFirst + L.firstnz[col] + (panel-col)];
  358   ljk0 = *theNZ0++;
  359   increment--;
  360 
  361   theNZ1 = theNZ0 + increment;
  362   ljk1 = *theNZ1++;
  363   increment--;
  364 
  365   theNZ2 = theNZ1 + increment;
  366   ljk2 = *theNZ2++;
  367   increment--;
  368 
  369   theNZ3 = theNZ2 + increment;
  370   ljk3 = *theNZ3++;
  371 
  372   *theTmp++ -= ljk0*ljk0 + ljk1*ljk1 + ljk2*ljk2 + ljk3*ljk3;
  373 
  374   while (theNZ0 != last) {
  375     t0 = *theTmp;
  376     t0 -= ljk0*(*theNZ0++);
  377     t0 -= ljk1*(*theNZ1++);
  378     t0 -= ljk2*(*theNZ2++);
  379     t0 -= ljk3*(*theNZ3++);
  380     *theTmp++ = t0;
  381   }
  382 
  383 #ifdef COUNT
  384   AUG_OFF
  385   ops += 2*4*(L.lastnz[col] - (theFirst+L.firstnz[col]+(panel-col)));
  386   AUG_ON
  387 #endif
  388 }
  389 
  390 DoEightModifies4(panel, col, theFirst, j, destination)
  391 double *destination;
  392 {
  393   double ljk0, ljk1, ljk2, ljk3, ljk4, ljk5, ljk6, ljk7;
  394   int increment;
  395   double *theTmp, *last;
  396   double *theNZ0, *theNZ1, *theNZ2, *theNZ3, *theNZ4, *theNZ5, *theNZ6,*theNZ7;
  397   double t0, t1, t2, t3;
  398 
  399   last = &L.nz[L.lastnz[col]];
  400   theTmp = &destination[0];
  401 
  402   increment = L.lastnz[col] - L.firstnz[col] - 1;
  403 
  404   theNZ0 = &L.nz[theFirst + L.firstnz[col] + (panel-col)];
  405   ljk0 = *theNZ0++;
  406   increment--;
  407 
  408   theNZ1 = theNZ0 + increment;
  409   ljk1 = *theNZ1++;
  410   increment--;
  411 
  412   theNZ2 = theNZ1 + increment;
  413   ljk2 = *theNZ2++;
  414   increment--;
  415 
  416   theNZ3 = theNZ2 + increment;
  417   ljk3 = *theNZ3++;
  418   increment--;
  419 
  420   theNZ4 = theNZ3 + increment;
  421   ljk4 = *theNZ4++;
  422   increment--;
  423 
  424   theNZ5 = theNZ4 + increment;
  425   ljk5 = *theNZ5++;
  426   increment--;
  427 
  428   theNZ6 = theNZ5 + increment;
  429   ljk6 = *theNZ6++;
  430   increment--;
  431 
  432   theNZ7 = theNZ6 + increment;
  433   ljk7 = *theNZ7++;
  434 
  435   *theTmp++ -= ljk0*ljk0 + ljk1*ljk1 + ljk2*ljk2 + ljk3*ljk3
  436      + ljk4*ljk4 + ljk5*ljk5 + ljk6*ljk6 + ljk7*ljk7;
  437 
  438 
  439   long i = theNZ0, e = last;
  440 //  while (theNZ0 != last) {
  441   //while (i!=e){
  442   for(; i < e; i += sizeof(double)) {
  443      //if ( theNZ0 != last) break;
  444 
  445      t0 = *theTmp;
  446     //t0 -= ljk0*(*theNZ0++);
  447     t0 -= ljk0*(*theNZ0++);
  448     t0 -= ljk1*(*theNZ1++);
  449     t0 -= ljk2*(*theNZ2++);
  450     t0 -= ljk3*(*theNZ3++);
  451     t0 -= ljk4*(*theNZ4++);
  452     t0 -= ljk5*(*theNZ5++);
  453     t0 -= ljk6*(*theNZ6++);
  454     t0 -= ljk7*(*theNZ7++);
  455   //  *(theTmp+i) = t0;
  456     *theTmp++ = t0;
  457   }
  458 
  459 #ifdef COUNT
  460   AUG_OFF
  461   ops += 2*8*(L.lastnz[col] - (theFirst+L.firstnz[col]+(panel-col)));
  462   AUG_ON
  463 #endif
  464 }