1 #include <omp.h> 2 #include <stdio.h> 3 #include <matrix.h> 4 #include <glb.h> 5 #include <stdlib.h> 6 /* #define DEBUG */ 7 8 #define ALLOC 20*pM.n 9 10 InitSets(n) 11 { 12 int i; 13 14 link = (int *) GetShMem(n*sizeof(int)); 15 glb_nz = (int *) GetShMem(n*sizeof(int)); 16 17 for (i=0; i<n; i++) { 18 link[i] = -1; 19 } 20 } 21 22 AddNZ(n) 23 { 24 int i; 25 26 printf("Add %d\n", n); 27 28 for (i=first_member; i != -1; i = glb_nz[i]) { 29 printf("%d ", i); 30 if (i == n) { 31 printf("%d already\n", n); 32 return; 33 } 34 } 35 printf("\n"); 36 37 glb_nz[n] = first_member; 38 first_member = n; 39 } 40 41 AddMember(s, n) /* Add n to set s */ 42 { 43 link[n] = link[s]; 44 link[s] = n; 45 } 46 47 PrintMembers(s) 48 { 49 int i; 50 51 for (i=link[s]; i!=-1; i=link[i]) 52 printf("%d ", i); 53 printf("\n"); 54 } 55 56 First(pL, k) 57 SMatrix pL; 58 { 59 int f, i, *theRow; 60 61 f = pL.n; 62 theRow = pL.row + (pL.startrow[k]-pL.firstnz[k]); 63 for (i=pL.firstnz[k]; i<pL.firstnz[k+1]; i++) 64 if (theRow[i] < f && theRow[i] > k) 65 f = theRow[i]; 66 if (f == pL.n) f = k; 67 #ifdef DEBUG 68 printf("First is %d\n", f); 69 #endif 70 return(f); 71 } 72 73 PrintNZ(j) 74 { 75 int i; 76 77 for (i=j; i != -1; i = glb_nz[i]) 78 printf("%d ", i); 79 printf("\n"); 80 81 /* for (i=j; i != -1; i = nz[i]) 82 printf("%d %d\n", i, j); */ 83 } 84 85 Copy(pM, j, k) 86 SMatrix pM; 87 { 88 int i, i_ind, prev; 89 90 first_member = k; 91 prev = first_member; 92 for (i_ind=pM.firstnz[j]; i_ind<pM.firstnz[j+1]; i_ind++) { 93 i = pM.row[i_ind]; 94 if (i > k) { 95 #ifdef DEBUG 96 printf("%d %d\n", i, j); 97 #endif 98 glb_nz[prev] = i; 99 prev = i; 100 } 101 } 102 glb_nz[prev] = -1; 103 104 /* PrintNZ(j); */ 105 } 106 107 Merge(pL, j, k) 108 SMatrix pL; 109 { 110 int i, i_ind, lag, lead; 111 112 lag = j; lead = glb_nz[lag]; 113 114 for (i_ind=pL.firstnz[j]; i_ind < pL.firstnz[j+1]; i_ind++) { 115 i = pL.row[i_ind + pL.startrow[j] - pL.firstnz[j]]; 116 if (i > k) { 117 while (lead != -1 && lead < i) { 118 lag = lead; 119 lead = glb_nz[lag]; 120 } 121 if (lead != i) { 122 glb_nz[i] = lead; 123 glb_nz[lag] = i; 124 lead = i; 125 } 126 } 127 } 128 /* PrintNZ(k); */ 129 } 130 131 Subset(pM, pL, k, j) 132 SMatrix pM, pL; 133 { 134 int i, l_ind; 135 int *theRow; 136 137 #ifdef DEBUG 138 printf("Check sub\n"); 139 140 printf("Is:\n"); 141 142 for (i=pM.firstnz[k]; i<pM.firstnz[k+1]; i++) 143 printf("%d %d\n", pM.row[i], k); 144 145 printf("a subset of? \n"); 146 147 for (i=pL.firstnz[j]; i<pL.firstnz[j+1]; i++) 148 printf("%d %d\n", pL.row[i+pL.startrow[j]-pL.firstnz[j]], j); 149 #endif 150 151 l_ind = pL.firstnz[j]; 152 153 for (i=pM.firstnz[k]; i<pM.firstnz[k+1]; i++) { 154 theRow = pL.row + (pL.startrow[j] - pL.firstnz[j]); 155 if (pM.row[i] > k) { 156 while (l_ind < pL.firstnz[j+1] && theRow[l_ind] < pM.row[i]){ 157 l_ind++; 158 } 159 if (l_ind == pL.firstnz[j+1] || theRow[l_ind] != pM.row[i]) 160 return(0); 161 l_ind++; 162 } 163 } 164 165 #ifdef DEBUG 166 printf("Yes!\n"); 167 #endif 168 169 return(1); 170 } 171 172 173 /* probably doesn't work */ 174 175 TailsTheSame(pL, k, start, finish) 176 SMatrix pL; 177 int *start, *finish; 178 { 179 int prev, which, last; 180 181 if (!k) return(0); 182 183 DumpStructure(pL, k-1); 184 PrintNZ(glb_nz[k]); 185 186 prev = pL.startrow[k-1]; 187 last = pL.startrow[k-1]+pL.firstnz[k]-pL.firstnz[k-1]; 188 while (pL.row[prev] < glb_nz[first_member] && prev < last) 189 prev++; 190 191 if (prev == last) 192 return(0); 193 else if (pL.row[prev] > glb_nz[first_member]) 194 return(0); 195 196 /* it's equal */ 197 198 *start = prev; 199 200 which = glb_nz[first_member]; 201 while (prev < last) { 202 if (pL.row[prev] !=which) 203 break; 204 prev++; 205 which = glb_nz[which]; 206 } 207 208 *finish = which; 209 210 printf("%d %d\n", *start, *finish); 211 212 return(prev == last); 213 } 214 215 216 217 SMatrix SymbolicFactor(pM) 218 SMatrix pM; 219 { 220 int j, k, knz, lmax, inz, lind, start, finish; 221 SMatrix pL; 222 int f, ind=0; 223 224 InitSets(pM.n); 225 226 pL = NewMatrix(pM.n, ALLOC, 0); 227 228 pL.firstnz[0] = 0; 229 230 for (k=0; k<pM.n; k++) { 231 232 #ifdef DEBUG 233 printf("Column %d\n", k); 234 #endif 235 236 if (link[k] != -1 && link[link[k]] == -1 && 237 Subset(pM, pL, k, link[k])) { 238 /* Copy(pL, link[k], k); */ 239 pL.startrow[k] = pL.startrow[link[k]] + 1; 240 knz = pL.firstnz[link[k]+1] - pL.firstnz[link[k]] - 1; 241 /* printf("Skipping, size %d\n", knz); */ 242 } 243 else { 244 #ifdef DEBUG 245 printf("Copy %d\n", k); 246 #endif 247 Copy(pM, k, k); 248 249 lmax = 0; 250 for (j=link[k]; j != -1; j = link[j]) { 251 #ifdef DEBUG 252 printf("Add pL[%d]\n", j); 253 #endif 254 Merge(pL, j, k); 255 256 inz = pL.firstnz[j+1]-pL.firstnz[j]-1; 257 if (inz > lmax) { 258 pL.startrow[k] = pL.startrow[j]+1; 259 lmax = inz; 260 lind = j; 261 } 262 } 263 264 knz = 1; 265 for (j=glb_nz[first_member]; j!=-1; j=glb_nz[j]) 266 knz++; 267 268 if (knz == lmax) { 269 /* printf("Another, %d mirrors %d\n", k, lind); */ 270 } 271 /* else if (TailsTheSame(pL, k, &start, &finish)) { 272 273 pL.startrow[k] = start; 274 275 for (j=finish; j!=-1; j=nz[j]) { 276 if (ind >= ALLOC) { 277 printf("Overflow in symb, iter %d/%d\n", k, 278 pM.n); 279 exit(0); 280 } 281 pL.row[ind] = j; 282 ind++; 283 } 284 } */ 285 else { 286 287 pL.startrow[k] = ind; 288 pL.row[ind++] = k; 289 290 for (j=glb_nz[first_member]; j!=-1; j=glb_nz[j]) { 291 if (ind >= ALLOC) { 292 printf("Overflow in symb, iter %d/%d\n", k, 293 pM.n); 294 exit(0); 295 } 296 pL.row[ind++] = j; 297 #ifdef DEBUG 298 printf("NZ at %d %d\n", j, k); 299 #endif 300 } 301 } 302 } 303 304 pL.firstnz[k+1] = pL.firstnz[k] + knz; 305 306 #ifdef DEBUG 307 printf("size %d: ", knz); 308 DumpStructure(pL, k); 309 #endif 310 311 f = First(pL, k); 312 313 if (f > k) 314 AddMember(f, k); 315 316 /* PrintMembers(k); */ 317 } 318 319 pL.startrow[pL.n] = ind; 320 pL.m = pL.firstnz[pL.n]; 321 322 pL.nz = NewVector(pL.m); 323 324 for (k=0; k<pL.n; k++) 325 pL.lastnz[k] = pL.firstnz[k+1]; 326 327 FillElements(pL, pM); 328 329 printf("pM.n = %d, pM.m = %d, pL.m = %d, pL.ind = %d, %d alloc\n", 330 pM.n, pM.m-pM.n, pL.m-pL.n, ind, ALLOC); 331 printf("%d k for pM, %d k for pL\n", 332 MatSize(pM,1)/1024, MatSize(pL,1)/1024); 333 334 FreeShMem(link); 335 FreeShMem(glb_nz); 336 337 /* WriteSparse3(stdout, pL, 0); */ 338 339 LookForSupernodes(pL); 340 341 return(pL); 342 } 343 344 FillElements(pL, pM) 345 SMatrix pL, pM; 346 { 347 int i, j, ind=0; 348 int *theRow; 349 350 /* for (i=0; i<pM.n; i++) 351 pL.diag[i] = pM.diag[i]; */ 352 353 #pragma omp parallel for private (ind, j, theRow) schedule(dynamic) 354 for (i=0; i<pM.n; i++) { 355 /* ISort(pL, i); */ 356 ind = pL.firstnz[i]; 357 for (j=pM.firstnz[i]; j<pM.lastnz[i]; j++) { 358 theRow = pL.row + (pL.startrow[i]-pL.firstnz[i]); 359 if (i <= pM.row[j]) { 360 while (theRow[ind] != pM.row[j]) { 361 pL.nz[ind++] = 0.0; 362 } 363 pL.nz[ind++] = pM.nz[j]; 364 } 365 } 366 while (ind < pL.lastnz[i]) 367 pL.nz[ind++] = 0.0; 368 } 369 } 370 371 DumpStructure(pL, n) 372 SMatrix pL; 373 { 374 int i, *theRow; 375 376 theRow = pL.row + (pL.startrow[n]-pL.firstnz[n]); 377 378 printf("Structure of %d: ", n); 379 for (i=pL.firstnz[n]; i<pL.firstnz[n+1]; i++) 380 printf("%d ", theRow[i]); 381 printf("\n"); 382 } 383 384 LookForSupernodes(pL) 385 SMatrix pL; 386 { 387 int i, supers, current, current_size, size, max_super = 0; 388 389 node = (int *) GetShMem(pL.n*sizeof(int)); 390 printf("Look : node = %lx\n", node); 391 392 current = 0; 393 current_size = pL.firstnz[1]; 394 supers = 1; 395 size = 1; 396 for (i=1; i<pL.n; i++) 397 if ((pL.startrow[i] == pL.startrow[i-1] + 1) && 398 (pL.firstnz[i+1]-pL.firstnz[i]) == (pL.firstnz[i] - pL.firstnz[i-1] - 1) && 399 (firstchild[i+1] - firstchild[i] == 1)) { 400 node[i] = current-i; 401 current_size += pL.firstnz[i+1]-pL.firstnz[i]; 402 size++; 403 } 404 else { 405 node[current] = size; 406 if (size > max_super) 407 max_super = size; 408 supers++; 409 current = i; 410 current_size = pL.firstnz[i+1]-pL.firstnz[i]; 411 size = 1; 412 } 413 414 node[current] = size; 415 if (size > max_super) 416 max_super = size; 417 418 printf("%d supers, %4.2f nodes/super, %d max super\n", 419 supers, pL.n/(double) supers, max_super); 420 }