[Users] Einstein Toolkit Meeting Reminder

2023-08-16 Thread rhaas
Hello,

Please consider joining the weekly Einstein Toolkit phone call at
9:00 am US central time on Thursdays. For details on how to connect
and what agenda items are to be discussed, use the link below.

https://docs.einsteintoolkit.org/et-docs/Main_Page#Weekly_Users_Call

--The Maintainers
___
Users mailing list
Users@einsteintoolkit.org
http://lists.einsteintoolkit.org/mailman/listinfo/users


[Users] Problems running BBH example

2023-08-16 Thread Praveer Krishna
Hello,

I've downloaded and configured the Einstein Toolkit on my desktop, but I'm
having an issue when I try to run the BBH example provided here
 and the BNS simulation
from here . The job
finishes in an instant, producing the attached error file. (I'm using the
command "simfactory/bin/sim create-submit GW150914_28 --define N 28 --parfile
par/GW150914/GW150914.rpar --procs 24 --walltime 24:00:00")

There seems to be an issue with the 'num_pus' to 'num_cores' ratio in the
"system_topology.cc" file, but the error oddly persists even when I
manually equate them, or cut out line 471 altogether. This error does not
appear when I run the tov_ET example or the HelloWorld one. I'm also able
to run this mini BNS tutorial 
without any issues, so I'm a bit baffled. I've attached my .ini file and
the .cc file along with the error file in case it helps.
Please take a look and let me know if I'm missing something obvious.

PS: My PC has 128GB of memory, a 13900K(24C/32T) processor and 1TB of
storage.

Regards,
Praveer


GW150914_28.err
Description: Binary data


Astra.U.ini
Description: Binary data
#include 
#include 

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

#ifdef HAVE_CAPABILITY_MPI
#include 

#if defined __bgq__
// The processor names on a Blue Gene/Q include the MPI rank, and thus
// do not uniquely identify the host name. We therefore roll our own.
// See .
#include 
namespace {
void MPI_Get_processor_name1(char *name, int *resultlen) {
  MPIX_Hardware_t hw;
  MPIX_Hardware();
  *resultlen =
  snprintf(name, MPI_MAX_PROCESSOR_NAME, "(%u,%u,%u,%u,%u)", hw.Coords[0],
   hw.Coords[1], hw.Coords[2], hw.Coords[3], hw.Coords[4]);
  // ignoring hw.Coords[5], which is the core number inside a node
}
} // namespace
#define MPI_Get_processor_name MPI_Get_processor_name1
#endif
#endif

#ifdef _OPENMP
#include 
#else
#include 
int omp_get_max_threads() { return 1; }
int omp_get_num_threads() { return 1; }
int omp_get_thread_num() { return 0; }
#endif

#include 
// On a Blue Gene/Q, hwloc reports per-process hardware information
// instead of per-node hardware information. We need to correct for
// this.
#ifdef __bgq__
#define HWLOC_PER_PROCESS
#endif

using namespace std;

namespace {
int divup(int a, int b) {
  assert(a >= 0);
  assert(b > 0);
  return (a + b - 1) / b;
}

bool is_pow2(int a) {
  if (a <= 0)
return false;
  while (a != 1) {
if (a % 2)
  return false;
a /= 2;
  }
  return true;
}
} // namespace

namespace {

// Check that OpenMP counts and numbers threads as expected
void check_openmp() {
  bool found_inconsistency = false;

  // Count OpenMP threads
  int num_threads_direct = 0;
#pragma omp parallel reduction(+ : num_threads_direct)
  { ++num_threads_direct; }
  int num_threads_omp = -1;
#pragma omp parallel
  {
#pragma omp master
{ num_threads_omp = omp_get_num_threads(); }
  }
  if (num_threads_direct != num_threads_omp) {
found_inconsistency = true;
CCTK_VWarn(CCTK_WARN_ALERT, __LINE__, __FILE__, CCTK_THORNSTRING,
   "Number of OpenMP threads is inconsistent: counting %d threads, "
   "but OpenMP run-time reports %d threads",
   num_threads_direct, num_threads_omp);
  }

  // Check OpenMP thread numbers
  vector thread_nums;
#pragma omp parallel
  {
#pragma omp critical
{ thread_nums.push_back(omp_get_thread_num()); }
  }
  // Prevent insanity
  assert(int(thread_nums.size()) == num_threads_direct);
  int max_thread_num = -1;
  for (int i = 0; i < int(thread_nums.size()); ++i) {
max_thread_num = max(max_thread_num, thread_nums.at(i));
  }
  vector thread_counts(max_thread_num + 1, 0);
  for (int i = 0; i < int(thread_nums.size()); ++i) {
++thread_counts.at(thread_nums.at(i));
  }
  int num_threads_direct_again = 0;
  for (size_t i = 0; i < thread_counts.size(); ++i) {
num_threads_direct_again += thread_counts.at(i);
  }
  // Prevent insanity
  assert(num_threads_direct_again == num_threads_direct);
  bool thread_counts_bad = int(thread_counts.size()) < num_threads_direct;
  for (int i = 0; i < int(thread_counts.size()); ++i) {
thread_counts_bad =
thread_counts_bad or thread_counts.at(i) != (i < num_threads_direct);
  }
  if (thread_counts_bad) {
found_inconsistency = true;
printf("OpenMP thread numbers:");
for (int i = 0; i < int(thread_counts.size()); ++i) {
  for (int j = 0; j < thread_counts.at(i); ++j) {
printf(" %d", i);
  }
}
printf("\n");
CCTK_WARN(CCTK_WARN_ALERT, "OpenMP threads are numbered inconsistently");
  }

  if (found_inconsistency) {
CCTK_ERROR("Severe OpenMP inconsistency detected -- aborting");
  }
}
} // namespace

namespace {

struct mpi_host_mapping_t {
  int