download the original source code.
   1 /*
   2    Example 15big
   3 
   4    Interface:      Semi-Structured interface (SStruct)
   5 
   6    Compile with:   make ex15big
   7 
   8    Sample run:     mpirun -np 8 ex15big -n 10
   9 
  10    To see options: ex15big -help
  11 
  12    Description:    This example is a slight modification of Example 15 that
  13                    illustrates the 64-bit integer support in hypre needed to
  14                    runproblems with more than 2B unknowns.
  15 
  16                    Specifically, the changes compared to Example 15 are as
  17                    follows:
  18 
  19                    1) All integer arguments to HYPRE functions should be
  20                       declared of type HYPRE_Int.
  21 
  22                    2) Variables of type HYPRE_Int are 64-bit integers, so
  23                       they should be printed in the %lld format (not %d).
  24 
  25                    To enable the 64-bit integer support, you need to build
  26                    hypre with the --enable-bigint option of 'configure'.
  27                    We recommend comparing this example with Example 15.
  28 */
  29 
  30 #include <math.h>
  31 #include "_hypre_utilities.h"
  32 #include "HYPRE_sstruct_mv.h"
  33 #include "HYPRE_sstruct_ls.h"
  34 #include "_hypre_parcsr_ls.h"
  35 #include "HYPRE.h"
  36 
  37 
  38 int optionAlpha, optionBeta;
  39 
  40 /* Curl-curl coefficient alpha = mu^{-1} */
  41 double alpha(double x, double y, double z)
  42 {
  43    switch (optionAlpha)
  44    {
  45       case 0: /* uniform coefficient */
  46          return 1.0;
  47       case 1: /* smooth coefficient */
  48          return x*x+exp(y)+sin(z);
  49       case 2: /* small outside of an interior cube */
  50          if ((fabs(x-0.5) < 0.25) && (fabs(y-0.5) < 0.25) && (fabs(z-0.5) < 0.25))
  51             return 1.0;
  52          else
  53             return 1.0e-6;
  54       case 3: /* small outside of an interior ball */
  55          if (((x-0.5)*(x-0.5)+(y-0.5)*(y-0.5)+(z-0.5)*(z-0.5)) < 0.0625)
  56             return 1.0;
  57          else
  58             return 1.0e-6;
  59       case 4: /* random coefficient */
  60          return hypre_Rand();
  61       default:
  62          return 1.0;
  63    }
  64 }
  65 
  66 /* Mass coefficient beta = sigma */
  67 double beta(double x, double y, double z)
  68 {
  69    switch (optionBeta)
  70    {
  71       case 0: /* uniform coefficient */
  72          return 1.0;
  73       case 1: /* smooth coefficient */
  74          return x*x+exp(y)+sin(z);
  75       case 2:/* small outside of interior cube */
  76          if ((fabs(x-0.5) < 0.25) && (fabs(y-0.5) < 0.25) && (fabs(z-0.5) < 0.25))
  77             return 1.0;
  78          else
  79             return 1.0e-6;
  80       case 3: /* small outside of an interior ball */
  81          if (((x-0.5)*(x-0.5)+(y-0.5)*(y-0.5)+(z-0.5)*(z-0.5)) < 0.0625)
  82             return 1.0;
  83          else
  84             return 1.0e-6;
  85       case 4: /* random coefficient */
  86          return hypre_Rand();
  87       default:
  88          return 1.0;
  89    }
  90 }
  91 
  92 /*
  93    This routine computes the lowest order Nedelec, or "edge" finite element
  94    stiffness matrix and load vector on a cube of size h.  The 12 edges {e_i}
  95    are numbered in terms of the vertices as follows:
  96 
  97            [7]------[6]
  98            /|       /|     e_0 = 01, e_1 = 12, e_2  = 32, e_3  = 03,
  99           / |      / |     e_4 = 45, e_5 = 56, e_6  = 76, e_7  = 47,
 100         [4]------[5] |     e_8 = 04, e_9 = 15, e_10 = 26, e_11 = 37.
 101          | [3]----|-[2]
 102          | /      | /      The edges are oriented from first to the
 103          |/       |/       second vertex, e.g. e_0 is from [0] to [1].
 104         [0]------[1]
 105 
 106    We allow for different scaling of the curl-curl and the mass parts of the
 107    matrix with coefficients alpha and beta respectively:
 108 
 109          S_ij = alpha (curl phi_i,curl phi_j) + beta (phi_i, phi_j).
 110 
 111    The load vector corresponding to a right-hand side of {1,1,1} is
 112 
 113                         F_j = (1,phi_j) = h^2/4.
 114 */
 115 void ComputeFEMND1(double S[12][12], double F[12],
 116                    double x, double y, double z, double h)
 117 {
 118    int i, j;
 119 
 120    double h2_4 = h*h/4;
 121 
 122    double cS1 = alpha(x,y,z)/(6.0*h), cS2 = 2*cS1, cS4 = 2*cS2;
 123    double cM1 = beta(x,y,z)*h/36.0,   cM2 = 2*cM1, cM4 = 2*cM2;
 124 
 125    S[ 0][ 0] =  cS4 + cM4;   S[ 0][ 1] =  cS2;         S[ 0][ 2] = -cS1 + cM2;
 126    S[ 0][ 3] = -cS2;         S[ 0][ 4] = -cS1 + cM2;   S[ 0][ 5] =  cS1;
 127    S[ 0][ 6] = -cS2 + cM1;   S[ 0][ 7] = -cS1;         S[ 0][ 8] = -cS2;
 128    S[ 0][ 9] =  cS2;         S[ 0][10] =  cS1;         S[ 0][11] = -cS1;
 129 
 130    S[ 1][ 1] =  cS4 + cM4;   S[ 1][ 2] = -cS2;         S[ 1][ 3] = -cS1 + cM2;
 131    S[ 1][ 4] =  cS1;         S[ 1][ 5] = -cS1 + cM2;   S[ 1][ 6] = -cS1;
 132    S[ 1][ 7] = -cS2 + cM1;   S[ 1][ 8] = -cS1;         S[ 1][ 9] = -cS2;
 133    S[ 1][10] =  cS2;         S[ 1][11] =  cS1;
 134 
 135    S[ 2][ 2] =  cS4 + cM4;   S[ 2][ 3] =  cS2;         S[ 2][ 4] = -cS2 + cM1;
 136    S[ 2][ 5] = -cS1;         S[ 2][ 6] = -cS1 + cM2;   S[ 2][ 7] =  cS1;
 137    S[ 2][ 8] = -cS1;         S[ 2][ 9] =  cS1;         S[ 2][10] =  cS2;
 138    S[ 2][11] = -cS2;
 139 
 140    S[ 3][ 3] =  cS4 + cM4;   S[ 3][ 4] = -cS1;         S[ 3][ 5] = -cS2 + cM1;
 141    S[ 3][ 6] =  cS1;         S[ 3][ 7] = -cS1 + cM2;   S[ 3][ 8] = -cS2;
 142    S[ 3][ 9] = -cS1;         S[ 3][10] =  cS1;         S[ 3][11] =  cS2;
 143 
 144    S[ 4][ 4] =  cS4 + cM4;   S[ 4][ 5] =  cS2;         S[ 4][ 6] = -cS1 + cM2;
 145    S[ 4][ 7] = -cS2;         S[ 4][ 8] =  cS2;         S[ 4][ 9] = -cS2;
 146    S[ 4][10] = -cS1;         S[ 4][11] =  cS1;
 147 
 148    S[ 5][ 5] =  cS4 + cM4;   S[ 5][ 6] = -cS2;         S[ 5][ 7] = -cS1 + cM2;
 149    S[ 5][ 8] =  cS1;         S[ 5][ 9] =  cS2;         S[ 5][10] = -cS2;
 150    S[ 5][11] = -cS1;
 151 
 152    S[ 6][ 6] =  cS4 + cM4;   S[ 6][ 7] =  cS2;         S[ 6][ 8] =  cS1;
 153    S[ 6][ 9] = -cS1;         S[ 6][10] = -cS2;         S[ 6][11] =  cS2;
 154 
 155    S[ 7][ 7] =  cS4 + cM4;   S[ 7][ 8] =  cS2;         S[ 7][ 9] =  cS1;
 156    S[ 7][10] = -cS1;         S[ 7][11] = -cS2;
 157 
 158    S[ 8][ 8] =  cS4 + cM4;   S[ 8][ 9] = -cS1 + cM2;   S[ 8][10] = -cS2 + cM1;
 159    S[ 8][11] = -cS1 + cM2;
 160 
 161    S[ 9][ 9] =  cS4 + cM4;   S[ 9][10] = -cS1 + cM2;   S[ 9][11] = -cS2 + cM1;
 162 
 163    S[10][10] =  cS4 + cM4;   S[10][11] = -cS1 + cM2;
 164 
 165    S[11][11] =  cS4 + cM4;
 166 
 167    /* The stiffness matrix is symmetric */
 168    for (i = 1; i < 12; i++)
 169       for (j = 0; j < i; j++)
 170          S[i][j] = S[j][i];
 171 
 172    for (i = 0; i < 12; i++)
 173       F[i] = h2_4;
 174 }
 175 
 176 
 177 int main (int argc, char *argv[])
 178 {
 179    int myid, num_procs;
 180    int n, N, pi, pj, pk;
 181    double h;
 182    int print_solution;
 183 
 184    double tol, theta;
 185    int maxit, cycle_type;
 186    int rlx_type, rlx_sweeps, rlx_weight, rlx_omega;
 187    int amg_coarsen_type, amg_agg_levels, amg_rlx_type;
 188    int amg_interp_type, amg_Pmax;
 189    int singular_problem ;
 190 
 191    HYPRE_Int time_index;
 192 
 193    HYPRE_SStructGrid     edge_grid;
 194    HYPRE_SStructGraph    A_graph;
 195    HYPRE_SStructMatrix   A;
 196    HYPRE_SStructVector   b;
 197    HYPRE_SStructVector   x;
 198    HYPRE_SStructGrid     node_grid;
 199    HYPRE_SStructGraph    G_graph;
 200    HYPRE_SStructStencil  G_stencil[3];
 201    HYPRE_SStructMatrix   G;
 202    HYPRE_SStructVector   xcoord, ycoord, zcoord;
 203 
 204    HYPRE_Solver          solver, precond;
 205 
 206    /* Initialize MPI */
 207    MPI_Init(&argc, &argv);
 208    MPI_Comm_rank(MPI_COMM_WORLD, &myid);
 209    MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
 210 
 211    /* Set default parameters */
 212    n                = 10;
 213    print_solution   = 0;
 214    optionAlpha      = 0;
 215    optionBeta       = 0;
 216    maxit            = 100;
 217    tol              = 1e-6;
 218    cycle_type       = 13;
 219    rlx_type         = 2;
 220    rlx_sweeps       = 1;
 221    rlx_weight       = 1.0;
 222    rlx_omega        = 1.0;
 223    amg_coarsen_type = 10;
 224    amg_agg_levels   = 1;
 225    amg_rlx_type     = 6;
 226    theta            = 0.25;
 227    amg_interp_type  = 6;
 228    amg_Pmax         = 4;
 229    singular_problem = 0;
 230 
 231    /* Parse command line */
 232    {
 233       int arg_index = 0;
 234       int print_usage = 0;
 235 
 236       while (arg_index < argc)
 237       {
 238          if ( strcmp(argv[arg_index], "-n") == 0 )
 239          {
 240             arg_index++;
 241             n = atoi(argv[arg_index++]);
 242          }
 243          else if ( strcmp(argv[arg_index], "-a") == 0 )
 244          {
 245             arg_index++;
 246             optionAlpha = atoi(argv[arg_index++]);
 247          }
 248          else if ( strcmp(argv[arg_index], "-b") == 0 )
 249          {
 250             arg_index++;
 251             optionBeta = atoi(argv[arg_index++]);
 252          }
 253          else if ( strcmp(argv[arg_index], "-print_solution") == 0 )
 254          {
 255             arg_index++;
 256             print_solution = 1;
 257          }
 258          else if ( strcmp(argv[arg_index], "-maxit") == 0 )
 259          {
 260             arg_index++;
 261             maxit = atoi(argv[arg_index++]);
 262          }
 263          else if ( strcmp(argv[arg_index], "-tol") == 0 )
 264          {
 265             arg_index++;
 266             tol = atof(argv[arg_index++]);
 267          }
 268          else if ( strcmp(argv[arg_index], "-type") == 0 )
 269          {
 270             arg_index++;
 271             cycle_type = atoi(argv[arg_index++]);
 272          }
 273          else if ( strcmp(argv[arg_index], "-rlx") == 0 )
 274          {
 275             arg_index++;
 276             rlx_type = atoi(argv[arg_index++]);
 277          }
 278          else if ( strcmp(argv[arg_index], "-rlxn") == 0 )
 279          {
 280             arg_index++;
 281             rlx_sweeps = atoi(argv[arg_index++]);
 282          }
 283          else if ( strcmp(argv[arg_index], "-rlxw") == 0 )
 284          {
 285             arg_index++;
 286             rlx_weight = atof(argv[arg_index++]);
 287          }
 288          else if ( strcmp(argv[arg_index], "-rlxo") == 0 )
 289          {
 290             arg_index++;
 291             rlx_omega = atof(argv[arg_index++]);
 292          }
 293          else if ( strcmp(argv[arg_index], "-ctype") == 0 )
 294          {
 295             arg_index++;
 296             amg_coarsen_type = atoi(argv[arg_index++]);
 297          }
 298          else if ( strcmp(argv[arg_index], "-amgrlx") == 0 )
 299          {
 300             arg_index++;
 301             amg_rlx_type = atoi(argv[arg_index++]);
 302          }
 303          else if ( strcmp(argv[arg_index], "-agg") == 0 )
 304          {
 305             arg_index++;
 306             amg_agg_levels = atoi(argv[arg_index++]);
 307          }
 308          else if ( strcmp(argv[arg_index], "-itype") == 0 )
 309          {
 310             arg_index++;
 311             amg_interp_type = atoi(argv[arg_index++]);
 312          }
 313          else if ( strcmp(argv[arg_index], "-pmax") == 0 )
 314          {
 315             arg_index++;
 316             amg_Pmax = atoi(argv[arg_index++]);
 317          }
 318          else if ( strcmp(argv[arg_index], "-sing") == 0 )
 319          {
 320             arg_index++;
 321             singular_problem = 1;
 322          }
 323          else if ( strcmp(argv[arg_index], "-theta") == 0 )
 324          {
 325             arg_index++;
 326             theta = atof(argv[arg_index++]);
 327          }
 328 
 329          else if ( strcmp(argv[arg_index], "-help") == 0 )
 330          {
 331             print_usage = 1;
 332             break;
 333          }
 334          else
 335          {
 336             arg_index++;
 337          }
 338       }
 339 
 340       if ((print_usage) && (myid == 0))
 341       {
 342          printf("\n");
 343          printf("Usage: %s [<options>]\n", argv[0]);
 344          printf("\n");
 345          printf("  -n <n>              : problem size per processor (default: 10)\n");
 346          printf("  -a <alpha_opt>      : choice for the curl-curl coefficient (default: 1.0)\n");
 347          printf("  -b <beta_opt>       : choice for the mass coefficient (default: 1.0)\n");
 348          printf("  -print_solution     : print the solution vector\n");
 349          printf("\n");
 350          printf("PCG-AMS solver options:                                     \n");
 351          printf("  -maxit <num>        : maximum number of iterations (100)  \n");
 352          printf("  -tol <num>          : convergence tolerance (1e-6)        \n");
 353          printf("  -type <num>         : 3-level cycle type (0-8, 11-14)     \n");
 354          printf("  -theta <num>        : BoomerAMG threshold (0.25)          \n");
 355          printf("  -ctype <num>        : BoomerAMG coarsening type           \n");
 356          printf("  -agg <num>          : Levels of BoomerAMG agg. coarsening \n");
 357          printf("  -amgrlx <num>       : BoomerAMG relaxation type           \n");
 358          printf("  -itype <num>        : BoomerAMG interpolation type        \n");
 359          printf("  -pmax <num>         : BoomerAMG interpolation truncation  \n");
 360          printf("  -rlx <num>          : relaxation type                     \n");
 361          printf("  -rlxn <num>         : number of relaxation sweeps         \n");
 362          printf("  -rlxw <num>         : damping parameter (usually <=1)     \n");
 363          printf("  -rlxo <num>         : SOR parameter (usually in (0,2))    \n");
 364          printf("  -sing               : curl-curl only (singular) problem   \n");
 365          printf("\n");
 366          printf("\n");
 367       }
 368 
 369       if (print_usage)
 370       {
 371          MPI_Finalize();
 372          return (0);
 373       }
 374    }
 375 
 376    /* Figure out the processor grid (N x N x N).  The local problem size is n^3,
 377       while pi, pj and pk indicate the position in the processor grid. */
 378    N  = pow(num_procs,1.0/3.0) + 0.5;
 379    if (num_procs != N*N*N)
 380    {
 381       if (myid == 0) printf("Can't run on %d processors, try %d.\n",
 382                             num_procs, N*N*N);
 383       MPI_Finalize();
 384       exit(1);
 385    }
 386    h  = 1.0 / (N*n);
 387    pk = myid / (N*N);
 388    pj = myid/N - pk*N;
 389    pi = myid - pj*N - pk*N*N;
 390 
 391    /* Start timing */
 392    time_index = hypre_InitializeTiming("SStruct Setup");
 393    hypre_BeginTiming(time_index);
 394 
 395    /* 1. Set up the edge and nodal grids.  Note that we do this simultaneously
 396          to make sure that they have the same extends.  For simplicity we use
 397          only one part to represent the unit cube. */
 398    {
 399       HYPRE_Int ndim = 3;
 400       HYPRE_Int nparts = 1;
 401 
 402       /* Create empty 2D grid objects */
 403       HYPRE_SStructGridCreate(MPI_COMM_WORLD, ndim, nparts, &node_grid);
 404       HYPRE_SStructGridCreate(MPI_COMM_WORLD, ndim, nparts, &edge_grid);
 405 
 406       /* Set the extents of the grid - each processor sets its grid boxes. */
 407       {
 408          HYPRE_Int part = 0;
 409          HYPRE_Int ilower[3] = {1 + pi*n, 1 + pj*n, 1 + pk*n};
 410          HYPRE_Int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
 411 
 412          HYPRE_SStructGridSetExtents(node_grid, part, ilower, iupper);
 413          HYPRE_SStructGridSetExtents(edge_grid, part, ilower, iupper);
 414       }
 415 
 416       /* Set the variable type and number of variables on each grid. */
 417       {
 418          HYPRE_Int i;
 419          HYPRE_Int nnodevars = 1;
 420          HYPRE_Int nedgevars = 3;
 421 
 422          HYPRE_SStructVariable nodevars[1] = {HYPRE_SSTRUCT_VARIABLE_NODE};
 423          HYPRE_SStructVariable edgevars[3] = {HYPRE_SSTRUCT_VARIABLE_XEDGE,
 424                                               HYPRE_SSTRUCT_VARIABLE_YEDGE,
 425                                               HYPRE_SSTRUCT_VARIABLE_ZEDGE};
 426          for (i = 0; i < nparts; i++)
 427          {
 428             HYPRE_SStructGridSetVariables(node_grid, i, nnodevars, nodevars);
 429             HYPRE_SStructGridSetVariables(edge_grid, i, nedgevars, edgevars);
 430          }
 431       }
 432 
 433       /* Since there is only one part, there is no need to call the
 434          SetNeighborPart or SetSharedPart functions, which determine the spatial
 435          relation between the parts.  See Examples 12, 13 and 14 for
 436          illustrations of these calls. */
 437 
 438       /* Now the grids are ready to be used */
 439       HYPRE_SStructGridAssemble(node_grid);
 440       HYPRE_SStructGridAssemble(edge_grid);
 441    }
 442 
 443    /* 2. Create the finite element stiffness matrix A and load vector b. */
 444    {
 445       HYPRE_Int part = 0; /* this problem has only one part */
 446 
 447       /* Set the ordering of the variables in the finite element problem.  This
 448          is done by listing the variable offset directions relative to the
 449          element's center.  See the Reference Manual for more details. */
 450       {
 451          HYPRE_Int ordering[48] = { 0,  0, -1, -1,    /* x-edge [0]-[1] */
 452                                     1, +1,  0, -1,    /* y-edge [1]-[2] */
 453          /*     [7]------[6]  */    0,  0, +1, -1,    /* x-edge [3]-[2] */
 454          /*     /|       /|   */    1, -1,  0, -1,    /* y-edge [0]-[3] */
 455          /*    / |      / |   */    0,  0, -1, +1,    /* x-edge [4]-[5] */
 456          /*  [4]------[5] |   */    1, +1,  0, +1,    /* y-edge [5]-[6] */
 457          /*   | [3]----|-[2]  */    0,  0, +1, +1,    /* x-edge [7]-[6] */
 458          /*   | /      | /    */    1, -1,  0, +1,    /* y-edge [4]-[7] */
 459          /*   |/       |/     */    2, -1, -1,  0,    /* z-edge [0]-[4] */
 460          /*  [0]------[1]     */    2, +1, -1,  0,    /* z-edge [1]-[5] */
 461                                     2, +1, +1,  0,    /* z-edge [2]-[6] */
 462                                     2, -1, +1,  0 };  /* z-edge [3]-[7] */
 463 
 464          HYPRE_SStructGridSetFEMOrdering(edge_grid, part, ordering);
 465       }
 466 
 467       /* Set up the Graph - this determines the non-zero structure of the
 468          matrix. */
 469       {
 470          HYPRE_Int part = 0;
 471 
 472          /* Create the graph object */
 473          HYPRE_SStructGraphCreate(MPI_COMM_WORLD, edge_grid, &A_graph);
 474 
 475          /* See MatrixSetObjectType below */
 476          HYPRE_SStructGraphSetObjectType(A_graph, HYPRE_PARCSR);
 477 
 478          /* Indicate that this problem uses finite element stiffness matrices and
 479             load vectors, instead of stencils. */
 480          HYPRE_SStructGraphSetFEM(A_graph, part);
 481 
 482          /* The edge finite element matrix is full, so there is no need to call the
 483             HYPRE_SStructGraphSetFEMSparsity() function. */
 484 
 485          /* Assemble the graph */
 486          HYPRE_SStructGraphAssemble(A_graph);
 487       }
 488 
 489       /* Set up the SStruct Matrix and right-hand side vector */
 490       {
 491          /* Create the matrix object */
 492          HYPRE_SStructMatrixCreate(MPI_COMM_WORLD, A_graph, &A);
 493          /* Use a ParCSR storage */
 494          HYPRE_SStructMatrixSetObjectType(A, HYPRE_PARCSR);
 495          /* Indicate that the matrix coefficients are ready to be set */
 496          HYPRE_SStructMatrixInitialize(A);
 497 
 498          /* Create an empty vector object */
 499          HYPRE_SStructVectorCreate(MPI_COMM_WORLD, edge_grid, &b);
 500          /* Use a ParCSR storage */
 501          HYPRE_SStructVectorSetObjectType(b, HYPRE_PARCSR);
 502          /* Indicate that the vector coefficients are ready to be set */
 503          HYPRE_SStructVectorInitialize(b);
 504       }
 505 
 506       /* Set the matrix and vector entries by finite element assembly */
 507       {
 508          /* local stiffness matrix and load vector */
 509          double S[12][12], F[12];
 510 
 511          int i, j, k;
 512          HYPRE_Int index[3];
 513 
 514          for (i = 1; i <= n; i++)
 515             for (j = 1; j <= n; j++)
 516                for (k = 1; k <= n; k++)
 517                {
 518                   /* Compute the FEM matrix and r.h.s. for cell (i,j,k) with
 519                      coefficients evaluated at the cell center. */
 520                   index[0] = i + pi*n; index[1] = j + pj*n; index[2] = k + pk*n;
 521                   ComputeFEMND1(S,F,(pi*n+i)*h-h/2,(pj*n+j)*h-h/2,(pk*n+k)*h-h/2,h);
 522 
 523                   /* Eliminate boundary conditions on x = 0 */
 524                   if (index[0] == 1)
 525                   {
 526                      int ii, jj, bc_edges[4] = { 3, 11, 7, 8 };
 527                      for (ii = 0; ii < 4; ii++)
 528                      {
 529                         for (jj = 0; jj < 12; jj++)
 530                            S[bc_edges[ii]][jj] = S[jj][bc_edges[ii]] = 0.0;
 531                         S[bc_edges[ii]][bc_edges[ii]] = 1.0;
 532                         F[bc_edges[ii]] = 0.0;
 533                      }
 534                   }
 535                   /* Eliminate boundary conditions on y = 0 */
 536                   if (index[1] == 1)
 537                   {
 538                      int ii, jj, bc_edges[4] = { 0, 9, 4, 8 };
 539                      for (ii = 0; ii < 4; ii++)
 540                      {
 541                         for (jj = 0; jj < 12; jj++)
 542                            S[bc_edges[ii]][jj] = S[jj][bc_edges[ii]] = 0.0;
 543                         S[bc_edges[ii]][bc_edges[ii]] = 1.0;
 544                         F[bc_edges[ii]] = 0.0;
 545                      }
 546                   }
 547                   /* Eliminate boundary conditions on z = 0 */
 548                   if (index[2] == 1)
 549                   {
 550                      int ii, jj, bc_edges[4] = { 0, 1, 2, 3 };
 551                      for (ii = 0; ii < 4; ii++)
 552                      {
 553                         for (jj = 0; jj < 12; jj++)
 554                            S[bc_edges[ii]][jj] = S[jj][bc_edges[ii]] = 0.0;
 555                         S[bc_edges[ii]][bc_edges[ii]] = 1.0;
 556                         F[bc_edges[ii]] = 0.0;
 557                      }
 558                   }
 559                   /* Eliminate boundary conditions on x = 1 */
 560                   if (index[0] == N*n)
 561                   {
 562                      int ii, jj, bc_edges[4] = { 1, 10, 5, 9 };
 563                      for (ii = 0; ii < 4; ii++)
 564                      {
 565                         for (jj = 0; jj < 12; jj++)
 566                            S[bc_edges[ii]][jj] = S[jj][bc_edges[ii]] = 0.0;
 567                         S[bc_edges[ii]][bc_edges[ii]] = 1.0;
 568                         F[bc_edges[ii]] = 0.0;
 569                      }
 570                   }
 571                   /* Eliminate boundary conditions on y = 1 */
 572                   if (index[1] == N*n)
 573                   {
 574                      int ii, jj, bc_edges[4] = { 2, 10, 6, 11 };
 575                      for (ii = 0; ii < 4; ii++)
 576                      {
 577                         for (jj = 0; jj < 12; jj++)
 578                            S[bc_edges[ii]][jj] = S[jj][bc_edges[ii]] = 0.0;
 579                         S[bc_edges[ii]][bc_edges[ii]] = 1.0;
 580                         F[bc_edges[ii]] = 0.0;
 581                      }
 582                   }
 583                   /* Eliminate boundary conditions on z = 1 */
 584                   if (index[2] == N*n)
 585                   {
 586                      int ii, jj, bc_edges[4] = { 4, 5, 6, 7 };
 587                      for (ii = 0; ii < 4; ii++)
 588                      {
 589                         for (jj = 0; jj < 12; jj++)
 590                            S[bc_edges[ii]][jj] = S[jj][bc_edges[ii]] = 0.0;
 591                         S[bc_edges[ii]][bc_edges[ii]] = 1.0;
 592                         F[bc_edges[ii]] = 0.0;
 593                      }
 594                   }
 595 
 596                   /* Assemble the matrix */
 597                   HYPRE_SStructMatrixAddFEMValues(A, part, index, &S[0][0]);
 598 
 599                   /* Assemble the vector */
 600                   HYPRE_SStructVectorAddFEMValues(b, part, index, F);
 601                }
 602       }
 603 
 604       /* Collective calls finalizing the matrix and vector assembly */
 605       HYPRE_SStructMatrixAssemble(A);
 606       HYPRE_SStructVectorAssemble(b);
 607    }
 608 
 609    /* 3. Create the discrete gradient matrix G, which is needed in AMS. */
 610    {
 611       HYPRE_Int part = 0;
 612       HYPRE_Int stencil_size = 2;
 613 
 614       /* Define the discretization stencil relating the edges and nodes of the
 615          grid. */
 616       {
 617          HYPRE_Int ndim = 3;
 618          HYPRE_Int entry;
 619          HYPRE_Int var = 0; /* the node variable */
 620 
 621          /* The discrete gradient stencils connect edge to node variables. */
 622          HYPRE_Int Gx_offsets[2][3] = {{-1,0,0},{0,0,0}};  /* x-edge [7]-[6] */
 623          HYPRE_Int Gy_offsets[2][3] = {{0,-1,0},{0,0,0}};  /* y-edge [5]-[6] */
 624          HYPRE_Int Gz_offsets[2][3] = {{0,0,-1},{0,0,0}};  /* z-edge [2]-[6] */
 625 
 626          HYPRE_SStructStencilCreate(ndim, stencil_size, &G_stencil[0]);
 627          HYPRE_SStructStencilCreate(ndim, stencil_size, &G_stencil[1]);
 628          HYPRE_SStructStencilCreate(ndim, stencil_size, &G_stencil[2]);
 629 
 630          for (entry = 0; entry < stencil_size; entry++)
 631          {
 632             HYPRE_SStructStencilSetEntry(G_stencil[0], entry, Gx_offsets[entry], var);
 633             HYPRE_SStructStencilSetEntry(G_stencil[1], entry, Gy_offsets[entry], var);
 634             HYPRE_SStructStencilSetEntry(G_stencil[2], entry, Gz_offsets[entry], var);
 635          }
 636       }
 637 
 638       /* Set up the Graph - this determines the non-zero structure of the
 639          matrix. */
 640       {
 641          HYPRE_Int nvars = 3;
 642          HYPRE_Int var; /* the edge variables */
 643 
 644          /* Create the discrete gradient graph object */
 645          HYPRE_SStructGraphCreate(MPI_COMM_WORLD, edge_grid, &G_graph);
 646 
 647          /* See MatrixSetObjectType below */
 648          HYPRE_SStructGraphSetObjectType(G_graph, HYPRE_PARCSR);
 649 
 650          /* Since the discrete gradient relates edge and nodal variables (it is a
 651             rectangular matrix), we have to specify the domain (column) grid. */
 652          HYPRE_SStructGraphSetDomainGrid(G_graph, node_grid);
 653 
 654          /* Tell the graph which stencil to use for each edge variable on each
 655             part (we only have one part). */
 656          for (var = 0; var < nvars; var++)
 657             HYPRE_SStructGraphSetStencil(G_graph, part, var, G_stencil[var]);
 658 
 659          /* Assemble the graph */
 660          HYPRE_SStructGraphAssemble(G_graph);
 661       }
 662 
 663       /* Set up the SStruct Matrix */
 664       {
 665          /* Create the matrix object */
 666          HYPRE_SStructMatrixCreate(MPI_COMM_WORLD, G_graph, &G);
 667          /* Use a ParCSR storage */
 668          HYPRE_SStructMatrixSetObjectType(G, HYPRE_PARCSR);
 669          /* Indicate that the matrix coefficients are ready to be set */
 670          HYPRE_SStructMatrixInitialize(G);
 671       }
 672 
 673       /* Set the discrete gradient values, assuming a "natural" orientation of
 674          the edges (i.e. one in agreement with the coordinate directions). */
 675       {
 676          int i;
 677          int nedges = n*(n+1)*(n+1);
 678          double *values;
 679          HYPRE_Int stencil_indices[2] = {0,1}; /* the nodes of each edge */
 680 
 681          values = calloc(2*nedges, sizeof(double));
 682 
 683          /* The edge orientation is fixed: from first to second node */
 684          for (i = 0; i < nedges; i++)
 685          {
 686             values[2*i]   = -1.0;
 687             values[2*i+1] =  1.0;
 688          }
 689 
 690          /* Set the values in the discrete gradient x-edges */
 691          {
 692             HYPRE_Int var = 0;
 693             HYPRE_Int ilower[3] = {1 + pi*n, 0 + pj*n, 0 + pk*n};
 694             HYPRE_Int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
 695             HYPRE_SStructMatrixSetBoxValues(G, part, ilower, iupper, var,
 696                                             stencil_size, stencil_indices,
 697                                             values);
 698          }
 699          /* Set the values in the discrete gradient y-edges */
 700          {
 701             HYPRE_Int var = 1;
 702             HYPRE_Int ilower[3] = {0 + pi*n, 1 + pj*n, 0 + pk*n};
 703             HYPRE_Int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
 704             HYPRE_SStructMatrixSetBoxValues(G, part, ilower, iupper, var,
 705                                             stencil_size, stencil_indices,
 706                                             values);
 707          }
 708          /* Set the values in the discrete gradient z-edges */
 709          {
 710             HYPRE_Int var = 2;
 711             HYPRE_Int ilower[3] = {0 + pi*n, 0 + pj*n, 1 + pk*n};
 712             HYPRE_Int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
 713             HYPRE_SStructMatrixSetBoxValues(G, part, ilower, iupper, var,
 714                                             stencil_size, stencil_indices,
 715                                             values);
 716          }
 717 
 718          free(values);
 719       }
 720 
 721       /* Finalize the matrix assembly */
 722       HYPRE_SStructMatrixAssemble(G);
 723    }
 724 
 725    /* 4. Create the vectors of nodal coordinates xcoord, ycoord and zcoord,
 726          which are needed in AMS. */
 727    {
 728       int i, j, k;
 729       HYPRE_Int part = 0;
 730       HYPRE_Int var = 0; /* the node variable */
 731       HYPRE_Int index[3];
 732       double xval, yval, zval;
 733 
 734       /* Create empty vector objects */
 735       HYPRE_SStructVectorCreate(MPI_COMM_WORLD, node_grid, &xcoord);
 736       HYPRE_SStructVectorCreate(MPI_COMM_WORLD, node_grid, &ycoord);
 737       HYPRE_SStructVectorCreate(MPI_COMM_WORLD, node_grid, &zcoord);
 738       /* Set the object type to ParCSR */
 739       HYPRE_SStructVectorSetObjectType(xcoord, HYPRE_PARCSR);
 740       HYPRE_SStructVectorSetObjectType(ycoord, HYPRE_PARCSR);
 741       HYPRE_SStructVectorSetObjectType(zcoord, HYPRE_PARCSR);
 742       /* Indicate that the vector coefficients are ready to be set */
 743       HYPRE_SStructVectorInitialize(xcoord);
 744       HYPRE_SStructVectorInitialize(ycoord);
 745       HYPRE_SStructVectorInitialize(zcoord);
 746 
 747       /* Compute and set the coordinates of the nodes */
 748       for (i = 0; i <= n; i++)
 749          for (j = 0; j <= n; j++)
 750             for (k = 0; k <= n; k++)
 751             {
 752                index[0] = i + pi*n; index[1] = j + pj*n; index[2] = k + pk*n;
 753 
 754                xval = index[0]*h;
 755                yval = index[1]*h;
 756                zval = index[2]*h;
 757 
 758                HYPRE_SStructVectorSetValues(xcoord, part, index, var, &xval);
 759                HYPRE_SStructVectorSetValues(ycoord, part, index, var, &yval);
 760                HYPRE_SStructVectorSetValues(zcoord, part, index, var, &zval);
 761             }
 762 
 763       /* Finalize the vector assembly */
 764       HYPRE_SStructVectorAssemble(xcoord);
 765       HYPRE_SStructVectorAssemble(ycoord);
 766       HYPRE_SStructVectorAssemble(zcoord);
 767    }
 768 
 769    /* 5. Set up a SStruct Vector for the solution vector x */
 770    {
 771       HYPRE_Int part = 0;
 772       int nvalues = n*(n+1)*(n+1);
 773       double *values;
 774 
 775       values = calloc(nvalues, sizeof(double));
 776 
 777       /* Create an empty vector object */
 778       HYPRE_SStructVectorCreate(MPI_COMM_WORLD, edge_grid, &x);
 779       /* Set the object type to ParCSR */
 780       HYPRE_SStructVectorSetObjectType(x, HYPRE_PARCSR);
 781       /* Indicate that the vector coefficients are ready to be set */
 782       HYPRE_SStructVectorInitialize(x);
 783 
 784       /* Set the values for the initial guess x-edge */
 785       {
 786          HYPRE_Int var = 0;
 787          HYPRE_Int ilower[3] = {1 + pi*n, 0 + pj*n, 0 + pk*n};
 788          HYPRE_Int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
 789          HYPRE_SStructVectorSetBoxValues(x, part, ilower, iupper, var, values);
 790       }
 791       /* Set the values for the initial guess y-edge */
 792       {
 793          HYPRE_Int var = 1;
 794          HYPRE_Int ilower[3] = {0 + pi*n, 1 + pj*n, 0 + pk*n};
 795          HYPRE_Int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
 796          HYPRE_SStructVectorSetBoxValues(x, part, ilower, iupper, var, values);
 797       }
 798       /* Set the values for the initial guess z-edge */
 799       {
 800          HYPRE_Int var = 2;
 801          HYPRE_Int ilower[3] = {0 + pi*n, 0 + pj*n, 1 + pk*n};
 802          HYPRE_Int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
 803          HYPRE_SStructVectorSetBoxValues(x, part, ilower, iupper, var, values);
 804       }
 805 
 806       free(values);
 807 
 808       /* Finalize the vector assembly */
 809       HYPRE_SStructVectorAssemble(x);
 810    }
 811 
 812    /* Finalize current timing */
 813    hypre_EndTiming(time_index);
 814    hypre_PrintTiming("SStruct phase times", MPI_COMM_WORLD);
 815    hypre_FinalizeTiming(time_index);
 816    hypre_ClearTiming();
 817 
 818    /* 6. Set up and call the PCG-AMS solver (Solver options can be found in the
 819          Reference Manual.) */
 820    {
 821       double final_res_norm;
 822       HYPRE_Int its;
 823 
 824       HYPRE_ParCSRMatrix    par_A;
 825       HYPRE_ParVector       par_b;
 826       HYPRE_ParVector       par_x;
 827 
 828       HYPRE_ParCSRMatrix    par_G;
 829       HYPRE_ParVector       par_xcoord;
 830       HYPRE_ParVector       par_ycoord;
 831       HYPRE_ParVector       par_zcoord;
 832 
 833       /* Extract the ParCSR objects needed in the solver */
 834       HYPRE_SStructMatrixGetObject(A, (void **) &par_A);
 835       HYPRE_SStructVectorGetObject(b, (void **) &par_b);
 836       HYPRE_SStructVectorGetObject(x, (void **) &par_x);
 837       HYPRE_SStructMatrixGetObject(G, (void **) &par_G);
 838       HYPRE_SStructVectorGetObject(xcoord, (void **) &par_xcoord);
 839       HYPRE_SStructVectorGetObject(ycoord, (void **) &par_ycoord);
 840       HYPRE_SStructVectorGetObject(zcoord, (void **) &par_zcoord);
 841 
 842       if (myid == 0)
 843          printf("Problem size: %lld\n\n",
 844              hypre_ParCSRMatrixGlobalNumRows((hypre_ParCSRMatrix*)par_A));
 845 
 846       /* Start timing */
 847       time_index = hypre_InitializeTiming("AMS Setup");
 848       hypre_BeginTiming(time_index);
 849 
 850       /* Create solver */
 851       HYPRE_ParCSRPCGCreate(MPI_COMM_WORLD, &solver);
 852 
 853       /* Set some parameters (See Reference Manual for more parameters) */
 854       HYPRE_PCGSetMaxIter(solver, maxit); /* max iterations */
 855       HYPRE_PCGSetTol(solver, tol); /* conv. tolerance */
 856       HYPRE_PCGSetTwoNorm(solver, 0); /* use the two norm as the stopping criteria */
 857       HYPRE_PCGSetPrintLevel(solver, 2); /* print solve info */
 858       HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */
 859 
 860       /* Create AMS preconditioner */
 861       HYPRE_AMSCreate(&precond);
 862 
 863       /* Set AMS parameters */
 864       HYPRE_AMSSetMaxIter(precond, 1);
 865       HYPRE_AMSSetTol(precond, 0.0);
 866       HYPRE_AMSSetCycleType(precond, cycle_type);
 867       HYPRE_AMSSetPrintLevel(precond, 1);
 868 
 869       /* Set discrete gradient */
 870       HYPRE_AMSSetDiscreteGradient(precond, par_G);
 871 
 872       /* Set vertex coordinates */
 873       HYPRE_AMSSetCoordinateVectors(precond,
 874                                     par_xcoord, par_ycoord, par_zcoord);
 875 
 876       if (singular_problem)
 877          HYPRE_AMSSetBetaPoissonMatrix(precond, NULL);
 878 
 879       /* Smoothing and AMG options */
 880       HYPRE_AMSSetSmoothingOptions(precond,
 881                                    rlx_type, rlx_sweeps,
 882                                    rlx_weight, rlx_omega);
 883       HYPRE_AMSSetAlphaAMGOptions(precond,
 884                                   amg_coarsen_type, amg_agg_levels,
 885                                   amg_rlx_type, theta, amg_interp_type,
 886                                   amg_Pmax);
 887       HYPRE_AMSSetBetaAMGOptions(precond,
 888                                  amg_coarsen_type, amg_agg_levels,
 889                                  amg_rlx_type, theta, amg_interp_type,
 890                                  amg_Pmax);
 891 
 892       /* Set the PCG preconditioner */
 893       HYPRE_PCGSetPrecond(solver,
 894                           (HYPRE_PtrToSolverFcn) HYPRE_AMSSolve,
 895                           (HYPRE_PtrToSolverFcn) HYPRE_AMSSetup,
 896                           precond);
 897 
 898       /* Call the setup */
 899       HYPRE_ParCSRPCGSetup(solver, par_A, par_b, par_x);
 900 
 901       /* Finalize current timing */
 902       hypre_EndTiming(time_index);
 903       hypre_PrintTiming("Setup phase times", MPI_COMM_WORLD);
 904       hypre_FinalizeTiming(time_index);
 905       hypre_ClearTiming();
 906 
 907       /* Start timing again */
 908       time_index = hypre_InitializeTiming("AMS Solve");
 909       hypre_BeginTiming(time_index);
 910 
 911       /* Call the solve */
 912       HYPRE_ParCSRPCGSolve(solver, par_A, par_b, par_x);
 913 
 914       /* Finalize current timing */
 915       hypre_EndTiming(time_index);
 916       hypre_PrintTiming("Solve phase times", MPI_COMM_WORLD);
 917       hypre_FinalizeTiming(time_index);
 918       hypre_ClearTiming();
 919 
 920       /* Get some info */
 921       HYPRE_PCGGetNumIterations(solver, &its);
 922       HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm);
 923 
 924       /* Clean up */
 925       HYPRE_AMSDestroy(precond);
 926       HYPRE_ParCSRPCGDestroy(solver);
 927 
 928       /* Gather the solution vector */
 929       HYPRE_SStructVectorGather(x);
 930 
 931       /* Print the solution with replicated shared data */
 932       if (print_solution)
 933       {
 934          FILE *file;
 935          char  filename[255];
 936 
 937          HYPRE_Int part = 0;
 938          int nvalues = n*(n+1)*(n+1);
 939          double *xvalues, *yvalues, *zvalues;
 940 
 941          xvalues = calloc(nvalues, sizeof(double));
 942          yvalues = calloc(nvalues, sizeof(double));
 943          zvalues = calloc(nvalues, sizeof(double));
 944 
 945          /* Get local solution in the x-edges */
 946          {
 947             HYPRE_Int var = 0;
 948             HYPRE_Int ilower[3] = {1 + pi*n, 0 + pj*n, 0 + pk*n};
 949             HYPRE_Int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
 950             HYPRE_SStructVectorGetBoxValues(x, part, ilower, iupper,
 951                                             var, xvalues);
 952          }
 953          /* Get local solution in the y-edges */
 954          {
 955             HYPRE_Int var = 1;
 956             HYPRE_Int ilower[3] = {0 + pi*n, 1 + pj*n, 0 + pk*n};
 957             HYPRE_Int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
 958             HYPRE_SStructVectorGetBoxValues(x, part, ilower, iupper,
 959                                             var, yvalues);
 960          }
 961          /* Get local solution in the z-edges */
 962          {
 963             HYPRE_Int var = 2;
 964             HYPRE_Int ilower[3] = {0 + pi*n, 0 + pj*n, 1 + pk*n};
 965             HYPRE_Int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
 966             HYPRE_SStructVectorGetBoxValues(x, part, ilower, iupper,
 967                                             var, zvalues);
 968          }
 969 
 970          sprintf(filename, "sstruct.out.x.00.00.%05d", myid);
 971          if ((file = fopen(filename, "w")) == NULL)
 972          {
 973             printf("Error: can't open output file %s\n", filename);
 974             MPI_Finalize();
 975             exit(1);
 976          }
 977 
 978          /* Save the edge values, element by element, using the same numbering
 979             as the local finite element degrees of freedom. */
 980          {
 981             int i, j, k, s;
 982 
 983             /* Initial x-, y- and z-edge indices in the values arrays */
 984             int oi[4] = { 0, n, n*(n+1), n*(n+1)+n }; /* e_0, e_2,  e_4,  e_6 */
 985             int oj[4] = { 0, 1, n*(n+1), n*(n+1)+1 }; /* e_3, e_1,  e_7,  e_5 */
 986             int ok[4] = { 0, 1,     n+1,       n+2 }; /* e_8, e_9, e_11, e_10 */
 987             /* Loop over the cells while updating the above offsets */
 988             for (k = 0; k < n; k++)
 989             {
 990                for (j = 0; j < n; j++)
 991                {
 992                   for (i = 0; i < n; i++)
 993                   {
 994                      fprintf(file,
 995                              "%.14e\n%.14e\n%.14e\n%.14e\n"
 996                              "%.14e\n%.14e\n%.14e\n%.14e\n"
 997                              "%.14e\n%.14e\n%.14e\n%.14e\n",
 998                              xvalues[oi[0]], yvalues[oj[1]], xvalues[oi[1]], yvalues[oj[0]],
 999                              xvalues[oi[2]], yvalues[oj[3]], xvalues[oi[3]], yvalues[oj[2]],
1000                              zvalues[ok[0]], zvalues[ok[1]], zvalues[ok[3]], zvalues[ok[2]]);
1001 
1002                      for (s=0; s<4; s++) oi[s]++, oj[s]++, ok[s]++;
1003                   }
1004                   for (s=0; s<4; s++) oj[s]++, ok[s]++;
1005                }
1006                for (s=0; s<4; s++) oi[s]+=n, ok[s]+=n+1;
1007             }
1008          }
1009 
1010          fflush(file);
1011          fclose(file);
1012 
1013          free(xvalues);
1014          free(yvalues);
1015          free(zvalues);
1016       }
1017 
1018       if (myid == 0)
1019       {
1020          printf("\n");
1021          printf("Iterations = %lld\n", its);
1022          printf("Final Relative Residual Norm = %g\n", final_res_norm);
1023          printf("\n");
1024       }
1025    }
1026 
1027    /* Free memory */
1028    HYPRE_SStructGridDestroy(edge_grid);
1029    HYPRE_SStructGraphDestroy(A_graph);
1030    HYPRE_SStructMatrixDestroy(A);
1031    HYPRE_SStructVectorDestroy(b);
1032    HYPRE_SStructVectorDestroy(x);
1033    HYPRE_SStructGridDestroy(node_grid);
1034    HYPRE_SStructGraphDestroy(G_graph);
1035    HYPRE_SStructStencilDestroy(G_stencil[0]);
1036    HYPRE_SStructStencilDestroy(G_stencil[1]);
1037    HYPRE_SStructStencilDestroy(G_stencil[2]);
1038    HYPRE_SStructMatrixDestroy(G);
1039    HYPRE_SStructVectorDestroy(xcoord);
1040    HYPRE_SStructVectorDestroy(ycoord);
1041    HYPRE_SStructVectorDestroy(zcoord);
1042 
1043    /* Finalize MPI */
1044    MPI_Finalize();
1045 
1046    return 0;
1047 }


syntax highlighted by Code2HTML, v. 0.9.1