01-Feb-2013 20:08, Sparsh Mittal пишет:
Here is the code:


Mine reiteration on it, with a bit of help from std.parallelism.
std.parallelism uses thread pool thus it's somewhat faster then creating threads anew. Still it's instantaneous for me in a range of 30-40ms even with grid size of 1024 and 5M of iterations.

Have you enabled all of the optimizations? Correct switches are:

dmd -inline -O -release optimize_me.d

or

rdmd -inline -O -release optimize_me.d

to run after compile


import std.stdio;
import std.parallelism;
import std.datetime;
import std.conv;


immutable int gridSize = 1024;
immutable int MAXSTEPS = 5000_000;       /* Maximum number of iterations  */
immutable double TOL_VAL =0.00001;         /* Numerical Tolerance */
immutable double omega =  0.376;
immutable double one_minus_omega = 1.0 - 0.376;


immutable int numberOfThreads = 2;


double MAX_FUNC(double a, double b)
{
  return a> b? a: b;
}

double ABS_VAL(double a)
{
  return a> 0? a: -a;
}

shared double[gridSize+2][gridSize+2] gridInfo;
shared double maxError = 0.0;

void main(string args[])
{

  for(int i=0; i<gridSize+2; i++)
  {
    for(int j=0; j<gridSize+2; j++)
    {
      if(i==0)
        gridInfo[i][j] = 1.0;
      else
        gridInfo[i][j] = 0.0;
    }
  }

  bool shouldCheck = false;
  bool isConverged = false;
  for(int iter = 1; iter <= MAXSTEPS; iter++)
  {
    shouldCheck = false;
    if(iter % 400 ==0)
    {
      shouldCheck = true;
      maxError = 0.0;
    }


    alias MyTask = typeof(task!(SolverSlave)(0, 0, false));
      //This is Phase 1
    {
      MyTask[numberOfThreads] tasks;
      foreach(cc; 0..numberOfThreads)
      {
        tasks[cc] = task!(SolverSlave)(cc, 0, shouldCheck);
        taskPool.put(tasks[cc]);
      }
      foreach(cc; 0..numberOfThreads)
        tasks[cc].yieldForce();
    }

     //This is Phase 2
    {
      MyTask[numberOfThreads] tasks;
      foreach(cc; 0..numberOfThreads)
      {
        tasks[cc] = task!(SolverSlave)(cc, 1, shouldCheck);
        taskPool.put(tasks[cc]);
      }
      foreach(cc; 0..numberOfThreads)
        tasks[cc].yieldForce();

    }

    if( maxError <  TOL_VAL)
      {
        isConverged = true;
        break;
      }

  }
  /*if(isConverged)
    writeln("It converged");
  else
    writeln("It did not converge");*/
}



void SolverSlave(int myNumber, int remainder, bool shouldCheckHere)
{

  double sum =0;

  //Divide task among threads
  int iStart = ((myNumber*gridSize)/numberOfThreads) + 1;
  int iEnd =  (((myNumber+1)*gridSize)/numberOfThreads) ;


  for(int i=iStart; i<= iEnd; i++)
  {
    for(int j=1; j< gridSize+1; j++)
    {
      if( ((i+j)%2 ==remainder)) //Phase 1 or 2
      {
        sum = ( gridInfo[i  ][j+1] + gridInfo[i+1][j  ] +
            gridInfo[i-1][j  ] + gridInfo[i  ][j-1] )*0.25;

        //Should not check everytime to reduce synchronization overhead
        if(shouldCheckHere)
        {
maxError = MAX_FUNC(ABS_VAL(omega *(sum-gridInfo[i][j])), maxError);
        }
        gridInfo[i][j] = one_minus_omega*gridInfo[i][j] + omega*sum;
      }

    }
  }

}









--
Dmitry Olshansky

Reply via email to