download the original source code.
  1 /*
  2    Example 16
  3 
  4    Interface:      Semi-Structured interface (SStruct)
  5 
  6    Compile with:   make ex16
  7 
  8    Sample run:     mpirun -np 4 ex16 -n 10
  9 
 10    To see options: ex16 -help
 11 
 12    Description:    This code solves the 2D Laplace equation using a high order
 13                    Q3 finite element discretization.  Specifically, we solve
 14                    -Delta u = 1 with zero boundary conditions on a unit square
 15                    domain meshed with a uniform grid.  The mesh is distributed
 16                    across an N x N process grid, with each processor containing
 17                    an n x n sub-mesh of data, so the global mesh is nN x nN.
 18 */
 19 
 20 #include <math.h>
 21 #include "_hypre_utilities.h"
 22 #include "HYPRE_sstruct_mv.h"
 23 #include "HYPRE_sstruct_ls.h"
 24 #include "HYPRE.h"
 25 
 26 #include "vis.c"
 27 
 28 /*
 29    This routine computes the stiffness matrix for the Laplacian on a square of
 30    size h, using bi-cubic elements with degrees of freedom in lexicographical
 31    ordering.  So, the element looks as follows:
 32 
 33                           [12]-[13]-[14]-[15]
 34                             |              |
 35                            [8]  [9] [10] [11]
 36                             |              |
 37                            [4]  [5]  [6]  [7]
 38                             |              |
 39                            [0]--[1]--[2]--[3]
 40 */
 41 void ComputeFEMQ3 (double S[16][16], double F[16], double h)
 42 {
 43    int i, j;
 44    double s = 1.0/33600;
 45    double h2_64 = h*h/64;
 46 
 47    S[ 0][ 0] = 18944*s;
 48    S[ 0][ 1] = -4770*s;
 49    S[ 0][ 2] = 792*s;
 50    S[ 0][ 3] = 574*s;
 51    S[ 0][ 4] = -4770*s;
 52    S[ 0][ 5] = -18711*s;
 53    S[ 0][ 6] = 6075*s;
 54    S[ 0][ 7] = -2439*s;
 55    S[ 0][ 8] = 792*s;
 56    S[ 0][ 9] = 6075*s;
 57    S[ 0][10] = -1944*s;
 58    S[ 0][11] = 747*s;
 59    S[ 0][12] = 574*s;
 60    S[ 0][13] = -2439*s;
 61    S[ 0][14] = 747*s;
 62    S[ 0][15] = -247*s;
 63 
 64    S[ 1][ 1] = 75600*s;
 65    S[ 1][ 2] = -25002*s;
 66    S[ 1][ 3] = 792*s;
 67    S[ 1][ 4] = -18711*s;
 68    S[ 1][ 5] = -39852*s;
 69    S[ 1][ 6] = -7047*s;
 70    S[ 1][ 7] = 6075*s;
 71    S[ 1][ 8] = 6075*s;
 72    S[ 1][ 9] = 9720*s;
 73    S[ 1][10] = 3159*s;
 74    S[ 1][11] = -1944*s;
 75    S[ 1][12] = -2439*s;
 76    S[ 1][13] = -108*s;
 77    S[ 1][14] = -2295*s;
 78    S[ 1][15] = 747*s;
 79 
 80    S[ 2][ 2] = 75600*s;
 81    S[ 2][ 3] = -4770*s;
 82    S[ 2][ 4] = 6075*s;
 83    S[ 2][ 5] = -7047*s;
 84    S[ 2][ 6] = -39852*s;
 85    S[ 2][ 7] = -18711*s;
 86    S[ 2][ 8] = -1944*s;
 87    S[ 2][ 9] = 3159*s;
 88    S[ 2][10] = 9720*s;
 89    S[ 2][11] = 6075*s;
 90    S[ 2][12] = 747*s;
 91    S[ 2][13] = -2295*s;
 92    S[ 2][14] = -108*s;
 93    S[ 2][15] = -2439*s;
 94 
 95    S[ 3][ 3] = 18944*s;
 96    S[ 3][ 4] = -2439*s;
 97    S[ 3][ 5] = 6075*s;
 98    S[ 3][ 6] = -18711*s;
 99    S[ 3][ 7] = -4770*s;
100    S[ 3][ 8] = 747*s;
101    S[ 3][ 9] = -1944*s;
102    S[ 3][10] = 6075*s;
103    S[ 3][11] = 792*s;
104    S[ 3][12] = -247*s;
105    S[ 3][13] = 747*s;
106    S[ 3][14] = -2439*s;
107    S[ 3][15] = 574*s;
108 
109    S[ 4][ 4] = 75600*s;
110    S[ 4][ 5] = -39852*s;
111    S[ 4][ 6] = 9720*s;
112    S[ 4][ 7] = -108*s;
113    S[ 4][ 8] = -25002*s;
114    S[ 4][ 9] = -7047*s;
115    S[ 4][10] = 3159*s;
116    S[ 4][11] = -2295*s;
117    S[ 4][12] = 792*s;
118    S[ 4][13] = 6075*s;
119    S[ 4][14] = -1944*s;
120    S[ 4][15] = 747*s;
121 
122    S[ 5][ 5] = 279936*s;
123    S[ 5][ 6] = -113724*s;
124    S[ 5][ 7] = 9720*s;
125    S[ 5][ 8] = -7047*s;
126    S[ 5][ 9] = -113724*s;
127    S[ 5][10] = 24057*s;
128    S[ 5][11] = 3159*s;
129    S[ 5][12] = 6075*s;
130    S[ 5][13] = 9720*s;
131    S[ 5][14] = 3159*s;
132    S[ 5][15] = -1944*s;
133 
134    S[ 6][ 6] = 279936*s;
135    S[ 6][ 7] = -39852*s;
136    S[ 6][ 8] = 3159*s;
137    S[ 6][ 9] = 24057*s;
138    S[ 6][10] = -113724*s;
139    S[ 6][11] = -7047*s;
140    S[ 6][12] = -1944*s;
141    S[ 6][13] = 3159*s;
142    S[ 6][14] = 9720*s;
143    S[ 6][15] = 6075*s;
144 
145    S[ 7][ 7] = 75600*s;
146    S[ 7][ 8] = -2295*s;
147    S[ 7][ 9] = 3159*s;
148    S[ 7][10] = -7047*s;
149    S[ 7][11] = -25002*s;
150    S[ 7][12] = 747*s;
151    S[ 7][13] = -1944*s;
152    S[ 7][14] = 6075*s;
153    S[ 7][15] = 792*s;
154 
155    S[ 8][ 8] = 75600*s;
156    S[ 8][ 9] = -39852*s;
157    S[ 8][10] = 9720*s;
158    S[ 8][11] = -108*s;
159    S[ 8][12] = -4770*s;
160    S[ 8][13] = -18711*s;
161    S[ 8][14] = 6075*s;
162    S[ 8][15] = -2439*s;
163 
164    S[ 9][ 9] = 279936*s;
165    S[ 9][10] = -113724*s;
166    S[ 9][11] = 9720*s;
167    S[ 9][12] = -18711*s;
168    S[ 9][13] = -39852*s;
169    S[ 9][14] = -7047*s;
170    S[ 9][15] = 6075*s;
171 
172    S[10][10] = 279936*s;
173    S[10][11] = -39852*s;
174    S[10][12] = 6075*s;
175    S[10][13] = -7047*s;
176    S[10][14] = -39852*s;
177    S[10][15] = -18711*s;
178 
179    S[11][11] = 75600*s;
180    S[11][12] = -2439*s;
181    S[11][13] = 6075*s;
182    S[11][14] = -18711*s;
183    S[11][15] = -4770*s;
184 
185    S[12][12] = 18944*s;
186    S[12][13] = -4770*s;
187    S[12][14] = 792*s;
188    S[12][15] = 574*s;
189 
190    S[13][13] = 75600*s;
191    S[13][14] = -25002*s;
192    S[13][15] = 792*s;
193 
194    S[14][14] = 75600*s;
195    S[14][15] = -4770*s;
196 
197    S[15][15] = 18944*s;
198 
199    /* The stiffness matrix is symmetric */
200    for (i = 1; i < 16; i++)
201       for (j = 0; j < i; j++)
202          S[i][j] = S[j][i];
203 
204    F[ 0] = h2_64;
205    F[ 1] = 3*h2_64;
206    F[ 2] = 3*h2_64;
207    F[ 3] = h2_64;
208    F[ 4] = 3*h2_64;
209    F[ 5] = 9*h2_64;
210    F[ 6] = 9*h2_64;
211    F[ 7] = 3*h2_64;
212    F[ 8] = 3*h2_64;
213    F[ 9] = 9*h2_64;
214    F[10] = 9*h2_64;
215    F[11] = 3*h2_64;
216    F[12] = h2_64;
217    F[13] = 3*h2_64;
218    F[14] = 3*h2_64;
219    F[15] = h2_64;
220 }
221 
222 
223 int main (int argc, char *argv[])
224 {
225    int myid, num_procs;
226    int n, N, pi, pj;
227    double h;
228    int vis;
229 
230    HYPRE_SStructGrid     grid;
231    HYPRE_SStructGraph    graph;
232    HYPRE_SStructMatrix   A;
233    HYPRE_SStructVector   b;
234    HYPRE_SStructVector   x;
235 
236    HYPRE_Solver          solver;
237 
238    /* Initialize MPI */
239    MPI_Init(&argc, &argv);
240    MPI_Comm_rank(MPI_COMM_WORLD, &myid);
241    MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
242 
243    /* Set default parameters */
244    n = 10;
245    vis = 0;
246 
247    /* Parse command line */
248    {
249       int arg_index = 0;
250       int print_usage = 0;
251 
252       while (arg_index < argc)
253       {
254          if ( strcmp(argv[arg_index], "-n") == 0 )
255          {
256             arg_index++;
257             n = atoi(argv[arg_index++]);
258          }
259          else if ( strcmp(argv[arg_index], "-vis") == 0 )
260          {
261             arg_index++;
262             vis = 1;
263          }
264          else if ( strcmp(argv[arg_index], "-help") == 0 )
265          {
266             print_usage = 1;
267             break;
268          }
269          else
270          {
271             arg_index++;
272          }
273       }
274 
275       if ((print_usage) && (myid == 0))
276       {
277          printf("\n");
278          printf("Usage: %s [<options>]\n", argv[0]);
279          printf("\n");
280          printf("  -n <n>           : problem size per processor (default: 10)\n");
281          printf("  -vis             : save the solution for GLVis visualization\n");
282          printf("\n");
283       }
284 
285       if (print_usage)
286       {
287          MPI_Finalize();
288          return (0);
289       }
290    }
291 
292    /* Figure out the processor grid (N x N).  The local problem size is n^2,
293       while pi and pj indicate the position in the processor grid. */
294    N  = pow(num_procs,1.0/2.0) + 0.5;
295    if (num_procs != N*N)
296    {
297       if (myid == 0)
298       {
299          printf("Can't run on %d processors, try %d.\n", num_procs, N*N);
300       }
301       MPI_Finalize();
302       exit(1);
303    }
304    h  = 1.0 / (N*n);
305    pj = myid / N;
306    pi = myid - pj*N;
307 
308    /* 1. Set up the grid.  For simplicity we use only one part to represent the
309          unit square. */
310    {
311       int ndim = 2;
312       int nparts = 1;
313 
314       /* Create an empty 2D grid object */
315       HYPRE_SStructGridCreate(MPI_COMM_WORLD, ndim, nparts, &grid);
316 
317       /* Set the extents of the grid - each processor sets its grid boxes. */
318       {
319          int part = 0;
320          int ilower[2] = {1 + pi*n, 1 + pj*n};
321          int iupper[2] = {n + pi*n, n + pj*n};
322 
323          HYPRE_SStructGridSetExtents(grid, part, ilower, iupper);
324       }
325 
326       /* Set the variable type and number of variables on each part.  There is
327          one variable of type NODE, two of type XFACE, two of type YFACE, and
328          four of type CELL. */
329       {
330          int i;
331          int nvars = 9;
332 
333          HYPRE_SStructVariable vars[9] = {HYPRE_SSTRUCT_VARIABLE_NODE,
334                                           HYPRE_SSTRUCT_VARIABLE_XFACE,
335                                           HYPRE_SSTRUCT_VARIABLE_XFACE,
336                                           HYPRE_SSTRUCT_VARIABLE_YFACE,
337                                           HYPRE_SSTRUCT_VARIABLE_YFACE,
338                                           HYPRE_SSTRUCT_VARIABLE_CELL,
339                                           HYPRE_SSTRUCT_VARIABLE_CELL,
340                                           HYPRE_SSTRUCT_VARIABLE_CELL,
341                                           HYPRE_SSTRUCT_VARIABLE_CELL};
342          for (i = 0; i < nparts; i++)
343          {
344             HYPRE_SStructGridSetVariables(grid, i, nvars, vars);
345          }
346       }
347 
348       /* Set the ordering of the variables in the finite element problem.  This
349          is done by listing the variable numbers and offset directions relative
350          to the element's center.  See the Reference Manual for more details.
351          The ordering and location of the nine variables in each element is as
352          follows (notation is [order# : variable#]):
353 
354                           [12:0]-[13:3]-[14:4]-[15:0]
355                              |                    |
356                              |                    |
357                            [8:2]  [9:7] [10:8] [11:2]
358                              |                    |
359                              |                    |
360                            [4:1]  [5:5]  [6:6]  [7:1]
361                              |                    |
362                              |                    |
363                            [0:0]--[1:3]--[2:4]--[3:0]
364       */
365       {
366          int part = 0;
367          int ordering[48] = { 0,-1,-1,   3, 0,-1,   4, 0,-1,   0,+1,-1,
368                               1,-1, 0,   5, 0, 0,   6, 0, 0,   1,+1, 0,
369                               2,-1, 0,   7, 0, 0,   8, 0, 0,   2,+1, 0,
370                               0,-1,+1,   3, 0,+1,   4, 0,+1,   0,+1,+1  };
371 
372          HYPRE_SStructGridSetFEMOrdering(grid, part, ordering);
373       }
374 
375       /* Now the grid is ready to be used */
376       HYPRE_SStructGridAssemble(grid);
377    }
378 
379    /* 2. Set up the Graph - this determines the non-zero structure of the
380          matrix. */
381    {
382       int part = 0;
383 
384       /* Create the graph object */
385       HYPRE_SStructGraphCreate(MPI_COMM_WORLD, grid, &graph);
386 
387       /* See MatrixSetObjectType below */
388       HYPRE_SStructGraphSetObjectType(graph, HYPRE_PARCSR);
389 
390       /* Indicate that this problem uses finite element stiffness matrices and
391          load vectors, instead of stencils. */
392       HYPRE_SStructGraphSetFEM(graph, part);
393 
394       /* The local stiffness matrix is full, so there is no need to call
395          HYPRE_SStructGraphSetFEMSparsity() to set its sparsity pattern. */
396 
397       /* Assemble the graph */
398       HYPRE_SStructGraphAssemble(graph);
399    }
400 
401    /* 3. Set up the SStruct Matrix and right-hand side vector */
402    {
403       int part = 0;
404 
405       /* Create the matrix object */
406       HYPRE_SStructMatrixCreate(MPI_COMM_WORLD, graph, &A);
407       /* Use a ParCSR storage */
408       HYPRE_SStructMatrixSetObjectType(A, HYPRE_PARCSR);
409       /* Indicate that the matrix coefficients are ready to be set */
410       HYPRE_SStructMatrixInitialize(A);
411 
412       /* Create an empty vector object */
413       HYPRE_SStructVectorCreate(MPI_COMM_WORLD, grid, &b);
414       /* Use a ParCSR storage */
415       HYPRE_SStructVectorSetObjectType(b, HYPRE_PARCSR);
416       /* Indicate that the vector coefficients are ready to be set */
417       HYPRE_SStructVectorInitialize(b);
418 
419       /* Set the matrix and vector entries by finite element assembly */
420       {
421          /* Local stifness matrix and load vector */
422          double S[16][16], F[16];
423 
424          int i, j;
425          int index[2];
426 
427          for (j = 1; j <= n; j++)
428          {
429             for (i = 1; i <= n; i++)
430             {
431                index[0] = i + pi*n;
432                index[1] = j + pj*n;
433 
434                /* Compute the FEM matrix and rhs */
435                ComputeFEMQ3(S, F, h);
436 
437                /* Set boundary conditions */
438                {
439                   int ii, jj, bdy, dd;
440                   int set_bc[4] = {0, 0, 0, 0};
441                   int bc_dofs[4][4] = {{ 0,  4,  8, 12},  /* x = 0 boundary */
442                                        { 0,  1,  2,  3},  /* y = 0 boundary */
443                                        { 3,  7, 11, 15},  /* x = 1 boundary */
444                                        {12, 13, 14, 15}}; /* y = 1 boundary */
445 
446                   /* Determine the boundary conditions to be set */
447                   if (index[0] == 1)   set_bc[0] = 1;  /* x = 0 boundary */
448                   if (index[1] == 1)   set_bc[1] = 1;  /* y = 0 boundary */
449                   if (index[0] == N*n) set_bc[2] = 1;  /* x = 1 boundary */
450                   if (index[1] == N*n) set_bc[3] = 1;  /* y = 1 boundary */
451 
452                   /* Modify the FEM matrix and rhs on each boundary by setting
453                      rows and columns of S to the identity and F to zero */
454                   for (bdy = 0; bdy < 4; bdy++)
455                   {
456                      /* Only modify if boundary condition needs to be set */
457                      if (set_bc[bdy])
458                      {
459                         for (dd = 0; dd < 4; dd++)
460                         {
461                            for (jj = 0; jj < 16; jj++)
462                            {
463                               ii = bc_dofs[bdy][dd];
464                               S[ii][jj] = 0.0; /* row */
465                               S[jj][ii] = 0.0; /* col */
466                            }
467                            S[ii][ii] = 1.0; /* diagonal */
468                            F[ii]     = 0.0; /* rhs */
469                         }
470                      }
471                   }
472                }
473 
474                /* Add this elements contribution to the matrix */
475                HYPRE_SStructMatrixAddFEMValues(A, part, index, &S[0][0]);
476 
477                /* Add this elements contribution to the rhs */
478                HYPRE_SStructVectorAddFEMValues(b, part, index, F);
479             }
480          }
481       }
482    }
483 
484    /* Collective calls finalizing the matrix and vector assembly */
485    HYPRE_SStructMatrixAssemble(A);
486    HYPRE_SStructVectorAssemble(b);
487 
488    /* 4. Set up SStruct Vector for the solution vector x */
489    {
490       int part = 0;
491       int var, nvars = 9;
492       int nvalues = (n+1)*(n+1);
493       double *values;
494 
495       values = calloc(nvalues, sizeof(double));
496 
497       /* Create an empty vector object */
498       HYPRE_SStructVectorCreate(MPI_COMM_WORLD, grid, &x);
499       /* Set the object type to ParCSR */
500       HYPRE_SStructVectorSetObjectType(x, HYPRE_PARCSR);
501       /* Indicate that the vector coefficients are ready to be set */
502       HYPRE_SStructVectorInitialize(x);
503 
504       /* Set the values for the initial guess one variable at a time.  Since the
505          SetBoxValues() calls below set the values to the right and up from the
506          cell center, ilower needs to be adjusted. */
507       for (var = 0; var < nvars; var++)
508       {
509          int ilower[2] = {1 + pi*n, 1 + pj*n};
510          int iupper[2] = {n + pi*n, n + pj*n};
511 
512          switch(var)
513          {
514             case 0: /* NODE */
515                ilower[0]--;
516                ilower[1]--;
517                break;
518             case 1: case 2: /* XFACE */
519                ilower[0]--;
520                break;
521             case 3: case 4: /* YFACE */
522                ilower[1]--;
523                break;
524          }
525 
526          HYPRE_SStructVectorSetBoxValues(x, part, ilower, iupper, var, values);
527       }
528 
529       free(values);
530 
531       /* Finalize the vector assembly */
532       HYPRE_SStructVectorAssemble(x);
533    }
534 
535    /* 5. Set up and call the solver (Solver options can be found in the
536          Reference Manual.) */
537    {
538       double final_res_norm;
539       int its;
540 
541       HYPRE_ParCSRMatrix    par_A;
542       HYPRE_ParVector       par_b;
543       HYPRE_ParVector       par_x;
544 
545       /* Extract the ParCSR objects needed in the solver */
546       HYPRE_SStructMatrixGetObject(A, (void **) &par_A);
547       HYPRE_SStructVectorGetObject(b, (void **) &par_b);
548       HYPRE_SStructVectorGetObject(x, (void **) &par_x);
549 
550       /* Here we construct a BoomerAMG solver.  See the other SStruct examples
551          as well as the Reference manual for additional solver choices. */
552       HYPRE_BoomerAMGCreate(&solver);
553       HYPRE_BoomerAMGSetCoarsenType(solver, 6);
554       HYPRE_BoomerAMGSetStrongThreshold(solver, 0.25);
555       HYPRE_BoomerAMGSetTol(solver, 1e-6);
556       HYPRE_BoomerAMGSetPrintLevel(solver, 2);
557       HYPRE_BoomerAMGSetMaxIter(solver, 50);
558 
559       /* call the setup */
560       HYPRE_BoomerAMGSetup(solver, par_A, par_b, par_x);
561 
562       /* call the solve */
563       HYPRE_BoomerAMGSolve(solver, par_A, par_b, par_x);
564 
565       /* get some info */
566       HYPRE_BoomerAMGGetNumIterations(solver, &its);
567       HYPRE_BoomerAMGGetFinalRelativeResidualNorm(solver,
568                                                   &final_res_norm);
569       /* clean up */
570       HYPRE_BoomerAMGDestroy(solver);
571 
572       /* Gather the solution vector */
573       HYPRE_SStructVectorGather(x);
574 
575       /* Save the solution for GLVis visualization, see vis/glvis-ex16.sh */
576       if (vis)
577       {
578          FILE *file;
579          char  filename[255];
580 
581          int part = 0;
582          int i, j, k, index[2];
583          int nvalues = n*n*16;
584          double X[16], *values;
585 
586          /* GLVis-to-hypre local renumbering */
587          int g2h[16] = {0, 3, 15, 12, 1, 2, 7, 11, 14, 13, 8, 4, 5, 6, 9, 10};
588 
589          values = calloc(nvalues, sizeof(double));
590 
591          nvalues = 0;
592          for (j = 1; j <= n; j++)
593          {
594             for (i = 1; i <= n; i++)
595             {
596                index[0] = i + pi*n;
597                index[1] = j + pj*n;
598 
599                /* Get local element solution values X */
600                HYPRE_SStructVectorGetFEMValues(x, part, index, X);
601 
602                /* Copy local solution X into values array */
603                for (k = 0; k < 16; k++)
604                {
605                   values[nvalues] = X[g2h[k]];
606                   nvalues++;
607                }
608             }
609          }
610 
611          sprintf(filename, "%s.%06d", "vis/ex16.sol", myid);
612          if ((file = fopen(filename, "w")) == NULL)
613          {
614             printf("Error: can't open output file %s\n", filename);
615             MPI_Finalize();
616             exit(1);
617          }
618 
619          /* Finite element space header */
620          fprintf(file, "FiniteElementSpace\n");
621          fprintf(file, "FiniteElementCollection: Local_Quad_Q3\n");
622          fprintf(file, "VDim: 1\n");
623          fprintf(file, "Ordering: 0\n\n");
624 
625          /* Save solution with replicated shared data */
626          for (i = 0; i < nvalues; i++)
627             fprintf(file, "%.14e\n", values[i]);
628 
629          fflush(file);
630          fclose(file);
631          free(values);
632 
633          /* Save local finite element mesh */
634          GLVis_PrintLocalSquareMesh("vis/ex16.mesh", n, n, h,
635                                     pi*h*n, pj*h*n, myid);
636 
637          /* Additional visualization data */
638          if (myid == 0)
639          {
640             sprintf(filename, "%s", "vis/ex16.data");
641             file = fopen(filename, "w");
642             fprintf(file, "np %d\n", num_procs);
643             fflush(file);
644             fclose(file);
645          }
646       }
647 
648       if (myid == 0)
649       {
650          printf("\n");
651          printf("Iterations = %d\n", its);
652          printf("Final Relative Residual Norm = %g\n", final_res_norm);
653          printf("\n");
654       }
655    }
656 
657    /* Free memory */
658    HYPRE_SStructGridDestroy(grid);
659    HYPRE_SStructGraphDestroy(graph);
660    HYPRE_SStructMatrixDestroy(A);
661    HYPRE_SStructVectorDestroy(b);
662    HYPRE_SStructVectorDestroy(x);
663 
664    /* Finalize MPI */
665    MPI_Finalize();
666 
667    return 0;
668 }


syntax highlighted by Code2HTML, v. 0.9.1