On Sat, 27 Oct 2012, Roy Stogner wrote:
I'll post again when I've got a patch I can't break.
Not quite "a patch I can't break" yet, so don't waste time doing any
non-automated testing, but here's "a patch I can't break quite as
easily" if anyone else wants to run it through a gauntlet on the
configurations I can't break myself. It seems to work for SerialMesh
with or without threads and ParallelMesh without, but still breaks
pretty easily when run threaded on a ParallelMesh.
---
Roy
Index: include/base/sparsity_pattern.h
===================================================================
--- include/base/sparsity_pattern.h (revision 6236)
+++ include/base/sparsity_pattern.h (working copy)
@@ -48,6 +48,8 @@
typedef std::vector<unsigned int, Threads::scalable_allocator<unsigned int>
> Row;
class Graph : public std::vector<Row> {};
+ class NonlocalGraph : public std::map<unsigned int, Row> {};
+
/**
* Splices the two sorted ranges [begin,middle) and [middle,end)
* into one sorted range [begin,end). This method is much like
@@ -85,6 +87,8 @@
public:
SparsityPattern::Graph sparsity_pattern;
+ SparsityPattern::NonlocalGraph nonlocal_pattern;
+
std::vector<unsigned int> n_nz;
std::vector<unsigned int> n_oz;
@@ -116,7 +120,9 @@
void operator()(const ConstElemRange &range);
- void join (const Build &other);
+ void join (Build &other);
+
+ void parallel_sync ();
};
#if defined(__GNUC__) && (__GNUC__ < 4) && !defined(__INTEL_COMPILER)
Index: src/base/dof_map.C
===================================================================
--- src/base/dof_map.C (revision 6236)
+++ src/base/dof_map.C (working copy)
@@ -86,9 +86,11 @@
implicit_neighbor_dofs,
need_full_sparsity_pattern));
- Threads::parallel_reduce (ConstElemRange (mesh.active_elements_begin(),
- mesh.active_elements_end()), *sp);
+ Threads::parallel_reduce (ConstElemRange (mesh.active_local_elements_begin(),
+ mesh.active_local_elements_end()),
*sp);
+ sp->parallel_sync();
+
#ifndef NDEBUG
// Avoid declaring these variables unless asserts are enabled.
const unsigned int proc_id = mesh.processor_id();
@@ -2158,10 +2160,13 @@
{
const unsigned int ig = element_dofs[i];
- // Only bother if this matrix row will be stored
- // on this processor.
+ SparsityPattern::Row *row;
+
+ // We save non-local row components for now so we can
+ // communicate them to other processors later.
+
if ((ig >= first_dof_on_proc) &&
- (ig < end_dof_on_proc))
+ (ig < end_dof_on_proc))
{
// This is what I mean
// libmesh_assert_greater_equal ((ig - first_dof_on_proc), 0);
@@ -2170,100 +2175,105 @@
libmesh_assert_greater_equal (ig, first_dof_on_proc);
libmesh_assert_less ((ig - first_dof_on_proc),
sparsity_pattern.size());
- SparsityPattern::Row &row = sparsity_pattern[ig -
first_dof_on_proc];
+ row = &sparsity_pattern[ig - first_dof_on_proc];
+ }
+ else
+ {
+ row = &nonlocal_pattern[ig];
+ }
- // If the row is empty we will add *all* the element DOFs,
- // so just do that.
- if (row.empty())
- {
- row.insert(row.end(),
- element_dofs.begin(),
- element_dofs.end());
- }
- else
- {
- // Build a list of the DOF indices not found in the
- // sparsity pattern
- dofs_to_add.clear();
+ // If the row is empty we will add *all* the element DOFs,
+ // so just do that.
+ if (row->empty())
+ {
+ row->insert(row->end(),
+ element_dofs.begin(),
+ element_dofs.end());
+ }
+ else
+ {
+ // Build a list of the DOF indices not found in the
+ // sparsity pattern
+ dofs_to_add.clear();
- // Cache iterators. Low will move forward, subsequent
- // searches will be on smaller ranges
- SparsityPattern::Row::iterator
- low = std::lower_bound (row.begin(), row.end(),
element_dofs.front()),
- high = std::upper_bound (low, row.end(),
element_dofs.back());
+ // Cache iterators. Low will move forward, subsequent
+ // searches will be on smaller ranges
+ SparsityPattern::Row::iterator
+ low = std::lower_bound (row->begin(), row->end(),
element_dofs.front()),
+ high = std::upper_bound (low, row->end(),
element_dofs.back());
- for (unsigned int j=0; j<n_dofs_on_element; j++)
- {
- const unsigned int jg = element_dofs[j];
+ for (unsigned int j=0; j<n_dofs_on_element; j++)
+ {
+ const unsigned int jg = element_dofs[j];
- // See if jg is in the sorted range
- std::pair<SparsityPattern::Row::iterator,
- SparsityPattern::Row::iterator>
- pos = std::equal_range (low, high, jg);
+ // See if jg is in the sorted range
+ std::pair<SparsityPattern::Row::iterator,
+ SparsityPattern::Row::iterator>
+ pos = std::equal_range (low, high, jg);
- // Must add jg if it wasn't found
- if (pos.first == pos.second)
- dofs_to_add.push_back(jg);
+ // Must add jg if it wasn't found
+ if (pos.first == pos.second)
+ dofs_to_add.push_back(jg);
- // pos.first is now a valid lower bound for any
- // remaining element DOFs. (That's why we sorted
them.)
- // Use it for the next search
- low = pos.first;
- }
+ // pos.first is now a valid lower bound for any
+ // remaining element DOFs. (That's why we sorted them.)
+ // Use it for the next search
+ low = pos.first;
+ }
- // Add to the sparsity pattern
- if (!dofs_to_add.empty())
- {
- const unsigned int old_size = row.size();
+ // Add to the sparsity pattern
+ if (!dofs_to_add.empty())
+ {
+ const unsigned int old_size = row->size();
- row.insert (row.end(),
- dofs_to_add.begin(),
- dofs_to_add.end());
+ row->insert (row->end(),
+ dofs_to_add.begin(),
+ dofs_to_add.end());
- SparsityPattern::sort_row (row.begin(),
row.begin()+old_size, row.end());
- }
- }
+ SparsityPattern::sort_row
+ (row->begin(), row->begin()+old_size, row->end());
+ }
+ }
- // Now (possibly) add dofs from neighboring elements
- // TODO:[BSK] optimize this like above!
- if (implicit_neighbor_dofs)
- for (unsigned int s=0; s<elem->n_sides(); s++)
- if (elem->neighbor(s) != NULL)
- {
- const Elem* const neighbor_0 = elem->neighbor(s);
+ // Now (possibly) add dofs from neighboring elements
+ // TODO:[BSK] optimize this like above!
+ if (implicit_neighbor_dofs)
+ for (unsigned int s=0; s<elem->n_sides(); s++)
+ if (elem->neighbor(s) != NULL)
+ {
+ const Elem* const neighbor_0 = elem->neighbor(s);
#ifdef LIBMESH_ENABLE_AMR
-
neighbor_0->active_family_tree_by_neighbor(active_neighbors,elem);
+
neighbor_0->active_family_tree_by_neighbor(active_neighbors,elem);
#else
- active_neighbors.clear();
- active_neighbors.push_back(neighbor_0);
+ active_neighbors.clear();
+ active_neighbors.push_back(neighbor_0);
#endif
- for (unsigned int a=0; a != active_neighbors.size();
++a)
- {
- const Elem *neighbor = active_neighbors[a];
+ for (unsigned int a=0; a != active_neighbors.size(); ++a)
+ {
+ const Elem *neighbor = active_neighbors[a];
- dof_map.dof_indices (neighbor, neighbor_dofs);
+ dof_map.dof_indices (neighbor, neighbor_dofs);
#ifdef LIBMESH_ENABLE_CONSTRAINTS
- dof_map.find_connected_dofs (neighbor_dofs);
+ dof_map.find_connected_dofs (neighbor_dofs);
#endif
- const unsigned int n_dofs_on_neighbor =
neighbor_dofs.size();
+ const unsigned int n_dofs_on_neighbor =
neighbor_dofs.size();
- for (unsigned int j=0; j<n_dofs_on_neighbor; j++)
- {
- const unsigned int jg = neighbor_dofs[j];
+ for (unsigned int j=0; j<n_dofs_on_neighbor; j++)
+ {
+ const unsigned int jg = neighbor_dofs[j];
- // See if jg is in the sorted range
- std::pair<SparsityPattern::Row::iterator,
- SparsityPattern::Row::iterator>
- pos = std::equal_range (row.begin(),
row.end(), jg);
+ // See if jg is in the sorted range
+ std::pair<SparsityPattern::Row::iterator,
+ SparsityPattern::Row::iterator>
+ pos = std::equal_range (row->begin(),
row->end(), jg);
- // Insert jg if it wasn't found
- if (pos.first == pos.second)
- row.insert (pos.first, jg);
- }
- }
- }
- }
+ // Insert jg if it wasn't found
+ if (pos.first == pos.second)
+ row->insert (pos.first, jg);
+ }
+ }
+ }
}
}
}
@@ -2333,13 +2343,11 @@
{
const unsigned int ig = element_dofs_i[i];
- // We are only interested in computing the sparsity
pattern
- // for the rows in [range.begin(),range.end()). So, if
this
- // element's DOFs are not in that range just go ahead to
the
- // next one.
- //
- // Also, only bother if this matrix row will be stored
- // on this processor.
+ SparsityPattern::Row *row;
+
+ // We save non-local row components for now so we can
+ // communicate them to other processors later.
+
if ((ig >= first_dof_on_proc) &&
(ig < end_dof_on_proc))
{
@@ -2351,99 +2359,107 @@
libmesh_assert_less (ig, (sparsity_pattern.size() +
first_dof_on_proc));
- SparsityPattern::Row &row = sparsity_pattern[ig -
first_dof_on_proc];
+ row = &sparsity_pattern[ig - first_dof_on_proc];
+ }
+ else
+ {
+ row = &nonlocal_pattern[ig];
+ }
- // If the row is empty we will add *all* the element
j DOFs,
- // so just do that.
- if (row.empty())
- {
- row.insert(row.end(),
- element_dofs_j.begin(),
- element_dofs_j.end());
- }
- else
- {
- // Build a list of the DOF indices not found in
the
- // sparsity pattern
- dofs_to_add.clear();
+ // If the row is empty we will add *all* the element j
DOFs,
+ // so just do that.
+ if (row->empty())
+ {
+ row->insert(row->end(),
+ element_dofs_j.begin(),
+ element_dofs_j.end());
+ }
+ else
+ {
+ // Build a list of the DOF indices not found in the
+ // sparsity pattern
+ dofs_to_add.clear();
- // Cache iterators. Low will move forward,
subsequent
- // searches will be on smaller ranges
- SparsityPattern::Row::iterator
- low = std::lower_bound (row.begin(),
row.end(), element_dofs_j.front()),
- high = std::upper_bound (low,
row.end(), element_dofs_j.back());
+ // Cache iterators. Low will move forward,
subsequent
+ // searches will be on smaller ranges
+ SparsityPattern::Row::iterator
+ low = std::lower_bound
+ (row->begin(), row->end(),
element_dofs_j.front()),
+ high = std::upper_bound
+ (low, row->end(),
element_dofs_j.back());
- for (unsigned int j=0; j<n_dofs_on_element_j; j++)
- {
- const unsigned int jg = element_dofs_j[j];
+ for (unsigned int j=0; j<n_dofs_on_element_j; j++)
+ {
+ const unsigned int jg = element_dofs_j[j];
- // See if jg is in the sorted range
- std::pair<SparsityPattern::Row::iterator,
- SparsityPattern::Row::iterator>
- pos = std::equal_range (low, high, jg);
+ // See if jg is in the sorted range
+ std::pair<SparsityPattern::Row::iterator,
+ SparsityPattern::Row::iterator>
+ pos = std::equal_range (low, high, jg);
- // Must add jg if it wasn't found
- if (pos.first == pos.second)
- dofs_to_add.push_back(jg);
+ // Must add jg if it wasn't found
+ if (pos.first == pos.second)
+ dofs_to_add.push_back(jg);
- // pos.first is now a valid lower bound for
any
- // remaining element j DOFs. (That's why we
sorted them.)
- // Use it for the next search
- low = pos.first;
- }
+ // pos.first is now a valid lower bound for any
+ // remaining element j DOFs. (That's why we
sorted them.)
+ // Use it for the next search
+ low = pos.first;
+ }
- // Add to the sparsity pattern
- if (!dofs_to_add.empty())
- {
- const unsigned int old_size = row.size();
+ // Add to the sparsity pattern
+ if (!dofs_to_add.empty())
+ {
+ const unsigned int old_size = row->size();
- row.insert (row.end(),
- dofs_to_add.begin(),
- dofs_to_add.end());
+ row->insert (row->end(),
+ dofs_to_add.begin(),
+ dofs_to_add.end());
- SparsityPattern::sort_row (row.begin(),
row.begin()+old_size, row.end());
- }
- }
- // Now (possibly) add dofs from neighboring elements
- // TODO:[BSK] optimize this like above!
- if (implicit_neighbor_dofs)
- for (unsigned int s=0; s<elem->n_sides(); s++)
- if (elem->neighbor(s) != NULL)
- {
- const Elem* const neighbor_0 =
elem->neighbor(s);
- #ifdef LIBMESH_ENABLE_AMR
-
neighbor_0->active_family_tree_by_neighbor(active_neighbors,elem);
- #else
- active_neighbors.clear();
- active_neighbors.push_back(neighbor_0);
- #endif
+ SparsityPattern::sort_row
+ (row->begin(), row->begin()+old_size,
+ row->end());
+ }
+ }
+ // Now (possibly) add dofs from neighboring elements
+ // TODO:[BSK] optimize this like above!
+ if (implicit_neighbor_dofs)
+ for (unsigned int s=0; s<elem->n_sides(); s++)
+ if (elem->neighbor(s) != NULL)
+ {
+ const Elem* const neighbor_0 =
elem->neighbor(s);
+#ifdef LIBMESH_ENABLE_AMR
+
neighbor_0->active_family_tree_by_neighbor(active_neighbors,elem);
+#else
+ active_neighbors.clear();
+ active_neighbors.push_back(neighbor_0);
+#endif
- for (unsigned int a=0; a !=
active_neighbors.size(); ++a)
- {
- const Elem *neighbor =
active_neighbors[a];
+ for (unsigned int a=0; a !=
active_neighbors.size(); ++a)
+ {
+ const Elem *neighbor = active_neighbors[a];
- dof_map.dof_indices (neighbor,
neighbor_dofs);
- #ifdef LIBMESH_ENABLE_CONSTRAINTS
- dof_map.find_connected_dofs (neighbor_dofs);
- #endif
- const unsigned int n_dofs_on_neighbor =
neighbor_dofs.size();
+ dof_map.dof_indices (neighbor,
neighbor_dofs);
+#ifdef LIBMESH_ENABLE_CONSTRAINTS
+ dof_map.find_connected_dofs (neighbor_dofs);
+#endif
+ const unsigned int n_dofs_on_neighbor =
neighbor_dofs.size();
- for (unsigned int j=0;
j<n_dofs_on_neighbor; j++)
- {
- const unsigned int jg =
neighbor_dofs[j];
+ for (unsigned int j=0;
j<n_dofs_on_neighbor; j++)
+ {
+ const unsigned int jg =
neighbor_dofs[j];
- // See if jg is in the sorted range
-
std::pair<SparsityPattern::Row::iterator,
-
SparsityPattern::Row::iterator>
- pos = std::equal_range (row.begin(),
row.end(), jg);
+ // See if jg is in the sorted range
+
std::pair<SparsityPattern::Row::iterator,
+
SparsityPattern::Row::iterator>
+ pos = std::equal_range (row->begin(),
row->end(), jg);
- // Insert jg if it wasn't found
- if (pos.first == pos.second)
- row.insert (pos.first, jg);
- }
- }
- }
- }
+ // Insert jg if it wasn't found
+ if (pos.first == pos.second)
+ row->insert (pos.first, jg);
+ }
+ }
+ }
}
}
}
@@ -2474,7 +2490,7 @@
-void SparsityPattern::Build::join (const SparsityPattern::Build &other)
+void SparsityPattern::Build::join (SparsityPattern::Build &other)
{
const unsigned int proc_id = mesh.processor_id();
const unsigned int n_global_dofs = dof_map.n_dofs();
@@ -2529,10 +2545,161 @@
n_oz[r] += other.n_oz[r]; n_oz[r] = std::min(n_oz[r],
n_global_dofs-n_nz[r]);
}
}
+
+ // Move nonlocal row information to ourselves; the other thread
+ // won't need it in the map after that.
+ NonlocalGraph::iterator it = other.nonlocal_pattern.begin();
+ while (it != other.nonlocal_pattern.end())
+ {
+ const unsigned int dof_id = it->first;
+
+#ifndef NDEBUG
+ unsigned int proc_id = 0;
+ while (dof_id >= dof_map.end_dof(proc_id))
+ proc_id++;
+ libmesh_assert (proc_id != libMesh::processor_id());
+#endif
+
+ SparsityPattern::Row &their_row = it->second;
+
+ // We should have no empty values in a map
+ libmesh_assert (!their_row.empty());
+
+ NonlocalGraph::iterator my_it = nonlocal_pattern.find(it->first);
+ if (my_it == nonlocal_pattern.end())
+ {
+ nonlocal_pattern[it->first].swap(their_row);
+ }
+ else
+ {
+ SparsityPattern::Row &my_row = my_it->second;
+
+ my_row.insert (my_row.end(),
+ their_row.begin(),
+ their_row.end());
+
+ // We cannot use SparsityPattern::sort_row() here because it expects
+ // the [begin,middle) [middle,end) to be non-overlapping. This is not
+ // necessarily the case here, so use std::sort()
+ std::sort (my_row.begin(), my_row.end());
+
+ my_row.erase(std::unique (my_row.begin(), my_row.end()),
my_row.end());
+ }
+
+ other.nonlocal_pattern.erase(it++);
+ }
}
+void SparsityPattern::Build::parallel_sync ()
+{
+ const unsigned int local_first_dof = dof_map.first_dof();
+ const unsigned int local_end_dof = dof_map.end_dof();
+
+ // Trade sparsity rows with other processors
+ for (unsigned int p=1; p != libMesh::n_processors(); ++p)
+ {
+ // Push to processor procup while receiving from procdown
+ unsigned int procup = (libMesh::processor_id() + p) %
+ libMesh::n_processors();
+ unsigned int procdown = (libMesh::n_processors() +
+ libMesh::processor_id() - p) %
+ libMesh::n_processors();
+
+ // Pack the sparsity pattern rows to push to procup
+ std::vector<unsigned int> pushed_row_ids,
+ pushed_row_ids_to_me;
+ std::vector<std::vector<unsigned int> > pushed_rows,
+ pushed_rows_to_me;
+
+ // Move nonlocal row information to a structure to send it from;
+ // we don't need it in the map after that.
+ NonlocalGraph::iterator it = nonlocal_pattern.begin();
+ while (it != nonlocal_pattern.end())
+ {
+ const unsigned int dof_id = it->first;
+ unsigned int proc_id = 0;
+ while (dof_id >= dof_map.end_dof(proc_id))
+ proc_id++;
+
+ libmesh_assert (proc_id != libMesh::processor_id());
+
+ if (proc_id == procup)
+ {
+ pushed_row_ids.push_back(dof_id);
+
+ // We can't just do the swap trick here, thanks to the
+ // differing vector allocators?
+ pushed_rows.push_back(std::vector<unsigned int>());
+ pushed_rows.back().assign
+ (it->second.begin(), it->second.end());
+
+ nonlocal_pattern.erase(it++);
+ }
+ else
+ ++it;
+ }
+
+ Parallel::send_receive(procup, pushed_row_ids,
+ procdown, pushed_row_ids_to_me);
+ Parallel::send_receive(procup, pushed_rows,
+ procdown, pushed_rows_to_me);
+ pushed_row_ids.clear();
+ pushed_rows.clear();
+
+ const unsigned int n_rows = pushed_row_ids_to_me.size();
+ for (unsigned int i=0; i != n_rows; ++i)
+ {
+ const unsigned int r = pushed_row_ids_to_me[i];
+ const unsigned int my_r = r - local_first_dof;
+
+ SparsityPattern::Row &my_row =
+ sparsity_pattern[my_r];
+
+ std::vector<unsigned int> &their_row = pushed_rows_to_me[i];
+
+ // They wouldn't have sent an empty row
+ libmesh_assert(!their_row.empty());
+
+ // We can end up with an empty row on a dof that touches our
+ // inactive elements but not our active ones
+ if (my_row.empty())
+ {
+ my_row.assign (their_row.begin(),
+ their_row.end());
+ }
+ else
+ {
+ my_row.insert (my_row.end(),
+ their_row.begin(),
+ their_row.end());
+
+ // We cannot use SparsityPattern::sort_row() here because it
expects
+ // the [begin,middle) [middle,end) to be non-overlapping. This
is not
+ // necessarily the case here, so use std::sort()
+ std::sort (my_row.begin(), my_row.end());
+
+ my_row.erase(std::unique (my_row.begin(), my_row.end()),
my_row.end());
+ }
+
+ // fix the number of on and off-processor nonzeros in this row
+ n_nz[my_r] = n_oz[my_r] = 0;
+
+ for (unsigned int j=0; j<my_row.size(); j++)
+ if ((my_row[j] < local_first_dof) || (my_row[j] >= local_end_dof))
+ n_oz[my_r]++;
+ else
+ n_nz[my_r]++;
+ }
+ }
+
+ // We should have sent everything at this point.
+ libmesh_assert (nonlocal_pattern.empty());
+}
+
+
+
void DofMap::print_info(std::ostream& os) const
{
os << this->get_info();
------------------------------------------------------------------------------
The Windows 8 Center - In partnership with Sourceforge
Your idea - your app - 30 days.
Get started!
http://windows8center.sourceforge.net/
what-html-developers-need-to-know-about-coding-windows-8-metro-style-apps/
_______________________________________________
Libmesh-devel mailing list
Libmesh-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/libmesh-devel