1 #include <omp.h> 2 #include <stdio.h> 3 #include <matrix.h> 4 #include <glb.h> 5 6 7 #define AddMember(set, new) { int s, n; s = set; n = new; \ 8 linknz[n] = linknz[s]; linknz[s] = n; } 9 10 /* Compute the elimination tree from L */ 11 12 int *EliminationTree(pM) 13 SMatrix pM; 14 { 15 int *parent, k, f; 16 17 parent = (int *) GetShMem(pM.n*sizeof(int)); 18 19 for (k=0; k<pM.n; k++) { 20 21 f = First(pM, k); 22 23 if (k == f) 24 parent[k] = -1; 25 else parent[k] = f; 26 } 27 28 /* PrintTree(parent, pM.n); */ 29 30 printf("Max depth of %d\n", MaxDepth(parent, pM.n)); 31 32 return(parent); 33 } 34 35 36 ParentToChild(pT, n) 37 int *pT; 38 { 39 int i, k, parent, count=0; 40 int *next; 41 42 firstchild = (int *) GetShMem((n+1)*sizeof(int)); 43 child = (int *) GetShMem(n*sizeof(int)); 44 next = (int *) GetShMem(2*n*sizeof(int)); 45 46 for (i=0; i<n; i++) 47 firstchild[i] = next[i] = -1; 48 49 for (i=n-1; i>=0; i--) { 50 parent = pT[i]; 51 if (parent != -1) { 52 next[i] = firstchild[parent]; 53 firstchild[parent] = i; 54 /* if (firstchild[parent] == -1) 55 firstchild[parent] = i; 56 else { 57 k = firstchild[parent]; 58 while (next[k] != -1) 59 k = next[k]; 60 next[k] = i; 61 } */ 62 } 63 } 64 65 for (i=0; i<n; i++) { 66 k = firstchild[i]; 67 firstchild[i] = count; 68 while (k != -1) { 69 child[count++] = k; 70 k = next[k]; 71 } 72 } 73 firstchild[n] = count; 74 75 /* if (count != n-1) 76 printf("Not enough children\n"); */ 77 78 FreeShMem(next); 79 80 /* for (i=0; i<n; i++) 81 printf("%d: %d %d %d\n", i, pT[i], firstchild[i], child[i]); */ 82 } 83 84 85 PrintTree(parent, size) 86 int *parent; 87 { 88 int k; 89 int *depth, *Depth(); 90 91 depth = Depth(parent, size); 92 93 for (k=0; k<size; k++) 94 printf("%d: Parent %d, depth %d\n", k, parent[k], depth[k]); 95 } 96 97 MaxDepth(parent, size) 98 int *parent; 99 { 100 int k, d, maxd=0, total=0; 101 int *depth, *Depth(); 102 103 depth = Depth(parent, size); 104 105 for (k=0; k<size; k++) { 106 d = depth[k]; 107 if (d > maxd) maxd = d; 108 total += d+1; 109 } 110 111 /* printf("Total is %d\n", total); */ 112 113 node_depth = depth; 114 max_depth = maxd; 115 116 return(maxd); 117 } 118 119 120 /* Assumes parent[n] > n */ 121 122 int *Depth(parent, n) 123 int *parent; 124 { 125 int i, *d; 126 127 d = (int *) GetShMem(n*sizeof(int)); 128 129 for (i=n-1; i>=0; i--) { 130 if (parent[i] == -1) 131 d[i] = 0; 132 else d[i] = d[parent[i]] + 1; 133 } 134 135 return(d); 136 } 137 138 139 140 int *EliminationTreeFromA(A) 141 SMatrix A; 142 { 143 int *parent, *subtree; 144 int i, newi, j, r, nextr, root; 145 int *linknz, *first; 146 147 parent = (int *) GetShMem(A.n*sizeof(int)); 148 subtree = (int *) GetShMem(A.n*sizeof(int)); 149 150 first = (int *) GetShMem(A.n*sizeof(int)); 151 linknz = (int *) GetShMem(A.n*sizeof(int)); 152 for (i=0; i<A.n; i++) 153 linknz[i] = -1; 154 155 for (i=0; i<A.n; i++) 156 parent[i] = subtree[i] = -1; 157 158 for (j=0; j<A.n; j++) { 159 i = linknz[j]; 160 while (i != -1) { 161 newi = linknz[i]; 162 for (r=i; subtree[r] != -1; r = subtree[r]) 163 ; 164 if (r != j) 165 parent[r] = subtree[r] = root = j; 166 else root = r; 167 168 r = i; 169 while (subtree[r] != -1) { 170 nextr = subtree[r]; 171 subtree[r] = root; 172 r = nextr; 173 } 174 175 first[i]++; 176 if (first[i] < A.firstnz[i+1]) 177 AddMember(A.row[A.startrow[i]-A.firstnz[i]+first[i]], i); 178 i = newi; /* move on to next nz */ 179 } 180 181 first[j] = A.firstnz[j]+1; 182 if (first[j] < A.firstnz[j+1]) 183 AddMember(A.row[A.startrow[j]-A.firstnz[j]+first[j]], j); 184 } 185 186 /* PrintTree(parent, A.n); */ 187 188 FreeShMem(subtree); 189 FreeShMem(linknz); 190 FreeShMem(first); 191 192 return(parent); 193 } 194 195 196 ComputeWork(pL) 197 SMatrix pL; 198 { 199 int i, j, newj, k, operations; 200 int *nz, *marker; 201 int *linknz, *first; 202 int pln_local; 203 204 work = (int *) GetShMem(pL.n*sizeof(int)); 205 for (i=0; i<pL.n; i++) 206 work[i] = 0; 207 208 nz = (int *) GetShMem(pL.n*sizeof(int)); 209 marker = (int *) GetShMem(pL.n*sizeof(int)); 210 for (i=0; i<pL.n; i++) 211 nz[i] = 0; 212 213 linknz = (int *) GetShMem(pL.n*sizeof(int)); 214 first = (int *) GetShMem(pL.n*sizeof(int)); 215 for (i=0; i<pL.n; i++) 216 linknz[i] = -1; 217 218 pln_local = pL.n; 219 #pragma omp parallel for private (newj, k, j) schedule(static) 220 for (i=0; i<pln_local; i++) { 221 marker[i] = -1; 222 j = linknz[i]; 223 while (j != -1) { 224 newj = linknz[j]; 225 k = j; 226 while (marker[k] != i && k < i) { 227 nz[k]++; 228 marker[k] = i; 229 k = T[k]; 230 } 231 first[j]++; 232 if (first[j] < M.firstnz[j+1]) 233 AddMember(M.row[M.startrow[j]-M.firstnz[j]+first[j]], j); 234 j = newj; 235 } 236 first[i] = M.firstnz[i]+1; 237 if (first[i] < M.firstnz[i+1]) 238 AddMember(M.row[M.startrow[i]-M.firstnz[i]+first[i]], i); 239 } 240 241 for (i=0; i<pL.n; i++) 242 marker[i] = linknz[i] = -1; 243 244 pln_local = pL.n; 245 #pragma omp parallel for private (newj, k, j) schedule(static) 246 for (i=0; i<pln_local; i++) { 247 work[i] += nz[i]+10; 248 j = linknz[i]; 249 while (j != -1) { 250 newj = linknz[j]; 251 k = j; 252 while (marker[k] != i && k < i) { 253 work[i] += 2*nz[k]; 254 marker[k] = i; 255 nz[k]--; 256 k = T[k]; 257 } 258 first[j]++; 259 if (first[j] < M.firstnz[j+1]) 260 AddMember(M.row[M.startrow[j]-M.firstnz[j]+first[j]], j); 261 j = newj; 262 } 263 first[i] = M.firstnz[i]+1; 264 if (first[i] < M.firstnz[i+1]) 265 AddMember(M.row[M.startrow[i]-M.firstnz[i]+first[i]], i); 266 } 267 268 operations = 0; 269 for (i=0; i<pL.n; i++) 270 operations += work[i]; 271 272 printf("%d operations\n", operations); 273 274 #ifndef TANGO 275 ops = operations; 276 #endif 277 278 work_tree = (int *) GetShMem(pL.n*sizeof(int)); 279 ComputeWorkTree(pL, pL.n-1); 280 281 FreeShMem(nz); FreeShMem(marker); 282 FreeShMem(linknz); FreeShMem(first); 283 } 284 285 286 ComputeWorkTree(pL, root) 287 SMatrix pL; 288 int root; 289 { 290 int i, end, child_ops = 0, child_data = 0; 291 292 for (i=firstchild[root], end = firstchild[root+1]; i<end; i++) { 293 ComputeWorkTree(pL, child[i]); 294 child_ops += work_tree[child[i]]; 295 } 296 297 work_tree[root] = work[root] + child_ops; 298 }