Hello.

Following patch fixes Fortran FAIL_ALLOC predictor in a way that it introduces 
a new one (PRED_FORTRAN_REALLOC)
and it properly marks returned values as described in the following 2 examples:

A) allocate_allocatable

original annotation:

    if ((logical(kind=4)) __builtin_expect ((integer(kind=8)) (overflow.343 != 
0), 0, 33)) // overflow
      {
        stat.341 = 5014;
      }
    else
      {
        if ((logical(kind=4)) __builtin_expect ((integer(kind=8)) (bx_ilow.data 
!= 0B), 0, 34)) // fail alloc
          {
            stat.341 = 5014;
          }
        else
          {
            stat.341 = 0;
            bx_ilow.data = (void * restrict) __builtin_malloc (MAX_EXPR 
<size.342, 1>);
            if (bx_ilow.data == 0B)
              {
                stat.341 = 5014;
              }
          }
      }      
    if ((logical(kind=4)) __builtin_expect ((integer(kind=8)) (stat.341 == 0), 
1, 34)) // fail alloc
      {
        bx_ilow.dtype = 539;
        bx_ilow.dim[0].lbound = (integer(kind=8)) xstart;
        bx_ilow.dim[0].ubound = 1;
        bx_ilow.dim[0].stride = 1;
        bx_ilow.dim[1].lbound = (integer(kind=8)) ystart;
        bx_ilow.dim[1].ubound = D.5342;
        bx_ilow.dim[1].stride = D.5341;
        bx_ilow.dim[2].lbound = (integer(kind=8)) zstart;
        bx_ilow.dim[2].ubound = D.5346;
        bx_ilow.dim[2].stride = D.5345;
        bx_ilow.offset = D.5352;
      }


I changed it to:

    if ((logical(kind=4)) __builtin_expect ((integer(kind=8)) (overflow.343 != 
0), 0, 33)) // overflow
      {
        stat.341 = 5014;
      }
    else
      {
        if ((logical(kind=4)) __builtin_expect ((integer(kind=8)) (bx_ilow.data 
!= 0B), 0, 35)) // repeated allocation/deallocation
          {
            stat.341 = 5014;
          }
        else
          {
            stat.341 = 0;
            bx_ilow.data = (void * restrict) __builtin_malloc (MAX_EXPR 
<size.342, 1>);
            if ((logical(kind=4)) __builtin_expect ((integer(kind=8)) 
(bx_ilow.data == 0B), 0, 34)) // fail alloc
              {
                stat.341 = 5014;
              }
          }
      }
    if (stat.341 == 0) // no expectation
      {
        bx_ilow.dtype = 539;
        bx_ilow.dim[0].lbound = (integer(kind=8)) xstart;
        bx_ilow.dim[0].ubound = 1;
        bx_ilow.dim[0].stride = 1;
        bx_ilow.dim[1].lbound = (integer(kind=8)) ystart;
        bx_ilow.dim[1].ubound = D.5342;
        bx_ilow.dim[1].stride = D.5341;
        bx_ilow.dim[2].lbound = (integer(kind=8)) zstart;
        bx_ilow.dim[2].ubound = D.5346;
        bx_ilow.dim[2].stride = D.5345;
        bx_ilow.offset = D.5352;
      }

B) array allocation

  <bb 9>:
  # size.1478_3210 = PHI <0(7), size.1478_3743(8)>
  _21 = _3740 != 0;
  _22 = (integer(kind=8)) _21;
  _23 = BUILTIN_EXPECT (_22, 0, 33); // overflow
  _24 = (logical(kind=4)) _23;
  if (_24 != 0)
    goto <bb 13>;
  else
    goto <bb 10>;

  <bb 10>:
  _25 = hrval.data;
  _26 = _25 != 0B;
  _27 = (integer(kind=8)) _26;
  _28 = BUILTIN_EXPECT (_27, 0, 34); // fail malloc
  _29 = (logical(kind=4)) _28;
  if (_29 != 0)
    goto <bb 13>;
  else
    goto <bb 11>;

  <bb 11>:
  _30 = MAX_EXPR <size.1478_3210, 1>;
  _31 = __builtin_malloc (_30);
  hrval.data = _31;
  if (_31 == 0B)
    goto <bb 12>;
  else
    goto <bb 13>;

  <bb 12>:

  <bb 13>:
  # stat.1477_3202 = PHI <5014(9), 5014(10), 0(11), 5014(12)>
  _33 = stat.1477_3202 == 0;
  _34 = (integer(kind=8)) _33;
  _35 = BUILTIN_EXPECT (_34, 1, 34); // fail malloc
  _36 = (logical(kind=4)) _35;
  if (_36 != 0)
    goto <bb 14>;
  else
    goto <bb 15>;

currently looks as follows:

  <bb 9>:
  # size.1478_3210 = PHI <0(7), size.1478_3743(8)>
  _21 = _3740 != 0;
  _22 = (integer(kind=8)) _21;
  _23 = BUILTIN_EXPECT (_22, 0, 33); // overflow
  _24 = (logical(kind=4)) _23;
  if (_24 != 0)
    goto <bb 13>;
  else
    goto <bb 10>;

  <bb 10>:
  _25 = hrval.data;
  _26 = _25 != 0B;
  _27 = (integer(kind=8)) _26;
  _28 = BUILTIN_EXPECT (_27, 0, 35); // repeated allocation/deallocation
  _29 = (logical(kind=4)) _28;
  if (_29 != 0)
    goto <bb 13>;
  else
    goto <bb 11>;

  <bb 11>:
  _30 = MAX_EXPR <size.1478_3210, 1>;
  _31 = __builtin_malloc (_30);
  hrval.data = _31;
  _33 = _31 == 0B;
  _34 = (integer(kind=8)) _33;
  _35 = BUILTIN_EXPECT (_34, 0, 34); // fail alloc
  _36 = (logical(kind=4)) _35;
  if (_36 != 0)
    goto <bb 12>;
  else
    goto <bb 13>;

  <bb 12>:

  <bb 13>:
  # stat.1477_3202 = PHI <5014(9), 5014(10), 0(11), 5014(12)>
  if (stat.1477_3202 == 0) // no prediction
    goto <bb 14>;
  else
    goto <bb 15>;

I get following numbers with the patch applied:

1) polyhedron benchmark (aermod.f90.061i.profile):
HEURISTICS                           BRANCHES  (REL)  HITRATE                
COVERAGE COVERAGE  (REL)
repeated allocation/deallocation          194   4.1% 100.00% / 100.00%          
  194   194.00   0.0%
fail alloc                                377   7.9% 100.00% / 100.00%          
  377   377.00   0.0%

b) 459.GemsFDTD SPEC2006 benchmark:
HEURISTICS                           BRANCHES  (REL)  HITRATE                
COVERAGE COVERAGE  (REL)
repeated allocation/deallocation          203   4.3% 100.00% / 100.00%          
  203   203.00   0.0%
fail alloc                                378   8.0% 100.00% / 100.00%          
  378   378.00   0.0%

Even though all numbers are 100%, I would suggest to set FAIL_ALLOC to 
PROB_VERY_LIKELY and
PRED_FORTRAN_REALLOC to PROB_LIKELY.

Ready to install the patch if it survives regression tests?

Thanks,
Martin

Reply via email to