Re: [ovs-dev] [OVN Patch v4 2/2] Add support for configuring parallelization via unixctl

2021-10-25 Thread Anton Ivanov
Hi Han,

I had requests to move this to cli. This also allows a unified format if we add 
parallelization to ovsdb or controller. 

On 25 October 2021 04:05:39 BST, Han Zhou  wrote:
>On Tue, Sep 21, 2021 at 8:48 AM  wrote:
>>
>> From: Anton Ivanov 
>>
>> libs: add configuration support to parallel-hmap.[c,h]
>> northd: add support for configuring parallelization to northd
>
>Hi Anton,
>
>This patch seems to replace the NB option use_parallel_build with unix
>command configuration. Could you explain the motivation of this? I feel
>that NB option is better, because with HA we only need to set in one place
>for all northds.
>BTW, there is no documentation change for the NB options, if it is supposed
>to be removed.
>
>Thanks,
>Han
>
>>
>> Signed-off-by: Anton Ivanov 
>> ---
>>  lib/ovn-parallel-hmap.c | 185 ++--
>>  lib/ovn-parallel-hmap.h |  63 +-
>>  northd/northd.c |  30 +++
>>  northd/northd.h |   2 -
>>  northd/ovn-northd.c |   5 +-
>>  tests/ovn-macros.at |  16 +++-
>>  6 files changed, 263 insertions(+), 38 deletions(-)
>>
>> diff --git a/lib/ovn-parallel-hmap.c b/lib/ovn-parallel-hmap.c
>> index 1b3883441..6a6488a17 100644
>> --- a/lib/ovn-parallel-hmap.c
>> +++ b/lib/ovn-parallel-hmap.c
>> @@ -33,6 +33,7 @@
>>  #include "ovs-thread.h"
>>  #include "ovs-numa.h"
>>  #include "random.h"
>> +#include "unixctl.h"
>>
>>  VLOG_DEFINE_THIS_MODULE(ovn_parallel_hmap);
>>
>> @@ -46,6 +47,7 @@ VLOG_DEFINE_THIS_MODULE(ovn_parallel_hmap);
>>   */
>>  static atomic_bool initial_pool_setup = ATOMIC_VAR_INIT(false);
>>  static bool can_parallelize = false;
>> +static bool should_parallelize = false;
>>
>>  /* This is set only in the process of exit and the set is
>>   * accompanied by a fence. It does not need to be atomic or be
>> @@ -83,7 +85,7 @@ static void *standard_helper_thread(void *arg);
>>
>>  struct worker_pool *ovn_add_standard_pool(int size)
>>  {
>> -return add_worker_pool(standard_helper_thread, size);
>> +return add_worker_pool(standard_helper_thread, size, "default",
>true);
>>  }
>>
>>  bool
>> @@ -92,6 +94,19 @@ ovn_stop_parallel_processing(struct worker_pool *pool)
>>  return pool->workers_must_exit;
>>  }
>>
>> +bool
>> +ovn_set_parallel_processing(bool enable)
>> +{
>> +should_parallelize = enable;
>> +return can_parallelize;
>> +}
>> +
>> +bool
>> +ovn_get_parallel_processing(void)
>> +{
>> +return can_parallelize && should_parallelize;
>> +}
>> +
>>  bool
>>  ovn_can_parallelize_hashes(bool force_parallel)
>>  {
>> @@ -117,6 +132,7 @@ destroy_pool(struct worker_pool *pool) {
>>  sem_close(pool->done);
>>  sprintf(sem_name, MAIN_SEM_NAME, sembase, pool);
>>  sem_unlink(sem_name);
>> +free(pool->name);
>>  free(pool);
>>  }
>>
>> @@ -127,6 +143,10 @@ ovn_resize_pool(struct worker_pool *pool, int size)
>>
>>  ovs_assert(pool != NULL);
>>
>> +if (!pool->is_mutable) {
>> +return false;
>> +}
>> +
>>  if (!size) {
>>  size = pool_size;
>>  }
>> @@ -166,7 +186,8 @@ cleanup:
>>
>>
>>  struct worker_pool *
>> -ovn_add_worker_pool(void *(*start)(void *), int size)
>> +ovn_add_worker_pool(void *(*start)(void *), int size, char *name,
>> +bool is_mutable)
>>  {
>>  struct worker_pool *new_pool = NULL;
>>  bool test = false;
>> @@ -194,6 +215,8 @@ ovn_add_worker_pool(void *(*start)(void *), int size)
>>  new_pool = xmalloc(sizeof(struct worker_pool));
>>  new_pool->size = size;
>>  new_pool->start = start;
>> +new_pool->is_mutable = is_mutable;
>> +new_pool->name = xstrdup(name);
>>  sprintf(sem_name, MAIN_SEM_NAME, sembase, new_pool);
>>  new_pool->done = sem_open(sem_name, O_CREAT, S_IRWXU, 0);
>>  if (new_pool->done == SEM_FAILED) {
>> @@ -226,6 +249,7 @@ cleanup:
>>  sprintf(sem_name, MAIN_SEM_NAME, sembase, new_pool);
>>  sem_unlink(sem_name);
>>  }
>> +free(new_pool->name);
>>  ovs_mutex_unlock(_mutex);
>>  return NULL;
>>  }
>> @@ -342,8 +366,7 @@ ovn_complete_pool_callback(struct worker_pool *pool,
>> 

[ovs-dev] [OVN Patch] libs: Replace weak with strong compare and exchange

2021-10-20 Thread anton . ivanov
From: Anton Ivanov 

Weak may fail for reasons unrelated to the comparison (not
on x86, where these are practically equivalent).

Thus, it can and should be used only in cases where there will
be a repeat of the event which runs the code (in this case
semaphore post).

On non-x86 architectures using weak may result in a failed
comparison without any more sem post events - a hang. Reported
for arm, likely for other platforms where weak may fail.

Reported-by: wentao@easystack.cn

Signed-off-by: Anton Ivanov 
---
 lib/ovn-parallel-hmap.c | 2 +-
 ovs | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/ovn-parallel-hmap.c b/lib/ovn-parallel-hmap.c
index b8c7ac786..56ceed8e8 100644
--- a/lib/ovn-parallel-hmap.c
+++ b/lib/ovn-parallel-hmap.c
@@ -267,7 +267,7 @@ ovn_run_pool_callback(struct worker_pool *pool,
  * (most likely acq_rel) to ensure that the main thread
  * sees all of the results produced by the worker.
  */
-if (atomic_compare_exchange_weak(
+if (atomic_compare_exchange_strong(
 >controls[index].finished,
 ,
 false)) {
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [OVN Patch v3] northd: Optimize dp/lflow postprocessing

2021-10-11 Thread anton . ivanov
From: Anton Ivanov 

1. Compute dp group hash only if there will be dp group processing.
2. Remove hmapx interim storage and related hmapx computation for
single dp flows and replace it with a pre-sized hmap.

Signed-off-by: Anton Ivanov 
---
 northd/northd.c | 50 -
 1 file changed, 29 insertions(+), 21 deletions(-)

diff --git a/northd/northd.c b/northd/northd.c
index e42795ca0..49948d568 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -13331,10 +13331,20 @@ build_lflows(struct northd_context *ctx, struct hmap 
*datapaths,
 stopwatch_start(LFLOWS_DP_GROUPS_STOPWATCH_NAME, time_msec());
 /* Collecting all unique datapath groups. */
 struct hmap dp_groups = HMAP_INITIALIZER(_groups);
-struct hmapx single_dp_lflows = HMAPX_INITIALIZER(_dp_lflows);
-struct ovn_lflow *lflow;
-HMAP_FOR_EACH (lflow, hmap_node, ) {
-uint32_t hash = hash_int(hmapx_count(>od_group), 0);
+struct hmap single_dp_lflows;
+
+/* Single dp_flows will never grow bigger than lflows,
+ * thus the two hmaps will remain the same size regardless
+ * of how many elements we remove from lflows and add to
+ * single_dp_lflows.
+ * Note - lflows is always sized for at least 128 flows.
+ */
+fast_hmap_size_for(_dp_lflows, max_seen_lflow_size);
+
+struct ovn_lflow *lflow, *next_lflow;
+struct hmapx_node *node;
+HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, ) {
+uint32_t hash;
 struct ovn_dp_group *dpg;
 
 ovs_assert(hmapx_count(>od_group));
@@ -13342,17 +13352,24 @@ build_lflows(struct northd_context *ctx, struct hmap 
*datapaths,
 if (hmapx_count(>od_group) == 1) {
 /* There is only one datapath, so it should be moved out of the
  * group to a single 'od'. */
-const struct hmapx_node *node;
 HMAPX_FOR_EACH (node, >od_group) {
 lflow->od = node->data;
 break;
 }
 hmapx_clear(>od_group);
-/* Logical flow should be re-hashed later to allow lookups. */
-hmapx_add(_dp_lflows, lflow);
+
+/* Logical flow should be re-hashed to allow lookups. */
+hash = hmap_node_hash(>hmap_node);
+/* Remove from lflows. */
+hmap_remove(, >hmap_node);
+hash = ovn_logical_flow_hash_datapath(>od->sb->header_.uuid,
+  hash);
+/* Add to single_dp_lflows. */
+hmap_insert_fast(_dp_lflows, >hmap_node, hash);
 continue;
 }
 
+hash = hash_int(hmapx_count(>od_group), 0);
 dpg = ovn_dp_group_find(_groups, >od_group, hash);
 if (!dpg) {
 dpg = xzalloc(sizeof *dpg);
@@ -13362,19 +13379,11 @@ build_lflows(struct northd_context *ctx, struct hmap 
*datapaths,
 lflow->dpg = dpg;
 }
 
-/* Adding datapath to the flow hash for logical flows that have only one,
- * so they could be found by the southbound db record. */
-const struct hmapx_node *node;
-uint32_t hash;
-HMAPX_FOR_EACH (node, _dp_lflows) {
-lflow = node->data;
-hash = hmap_node_hash(>hmap_node);
-hmap_remove(, >hmap_node);
-hash = ovn_logical_flow_hash_datapath(>od->sb->header_.uuid,
-  hash);
-hmap_insert(, >hmap_node, hash);
-}
-hmapx_destroy(_dp_lflows);
+/* Merge multiple and single dp hashes. */
+
+fast_hmap_merge(, _dp_lflows);
+
+hmap_destroy(_dp_lflows);
 
 /* Push changes to the Logical_Flow table to database. */
 const struct sbrec_logical_flow *sbflow, *next_sbflow;
@@ -13507,7 +13516,6 @@ build_lflows(struct northd_context *ctx, struct hmap 
*datapaths,
 }
 
 stopwatch_stop(LFLOWS_DP_GROUPS_STOPWATCH_NAME, time_msec());
-struct ovn_lflow *next_lflow;
 HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, ) {
 const char *pipeline = ovn_stage_get_pipeline_name(lflow->stage);
 uint8_t table = ovn_stage_get_table(lflow->stage);
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [OVN Patch v2] northd: Optimize dp/lflow postprocessing

2021-10-08 Thread anton . ivanov
From: Anton Ivanov 

1. Compute dp group hash only if there will be dp group processing.
2. Remove hmapx interim storage and related hmapx computation for
single dp flows and replace it with a pre-sized hmap.

Signed-off-by: Anton Ivanov 
---
 northd/northd.c | 48 
 ovs |  2 +-
 2 files changed, 29 insertions(+), 21 deletions(-)

diff --git a/northd/northd.c b/northd/northd.c
index e42795ca0..a3c3dbcf9 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -13331,10 +13331,20 @@ build_lflows(struct northd_context *ctx, struct hmap 
*datapaths,
 stopwatch_start(LFLOWS_DP_GROUPS_STOPWATCH_NAME, time_msec());
 /* Collecting all unique datapath groups. */
 struct hmap dp_groups = HMAP_INITIALIZER(_groups);
-struct hmapx single_dp_lflows = HMAPX_INITIALIZER(_dp_lflows);
-struct ovn_lflow *lflow;
-HMAP_FOR_EACH (lflow, hmap_node, ) {
-uint32_t hash = hash_int(hmapx_count(>od_group), 0);
+struct hmap single_dp_lflows;
+
+/* Single dp_flows will never grow bigger than lflows,
+ * thus the two hmaps will remain the same size regardless
+ * of how many elements we remove from lflows and add to
+ * single_dp_lflows.
+ * Note - lflows is always sized for at least 128 flows.
+ */
+fast_hmap_size_for(_dp_lflows, max_seen_lflow_size);
+
+struct ovn_lflow *lflow, *next_lflow;
+struct hmapx_node *node;
+HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, ) {
+uint32_t hash;
 struct ovn_dp_group *dpg;
 
 ovs_assert(hmapx_count(>od_group));
@@ -13342,17 +13352,24 @@ build_lflows(struct northd_context *ctx, struct hmap 
*datapaths,
 if (hmapx_count(>od_group) == 1) {
 /* There is only one datapath, so it should be moved out of the
  * group to a single 'od'. */
-const struct hmapx_node *node;
 HMAPX_FOR_EACH (node, >od_group) {
 lflow->od = node->data;
 break;
 }
 hmapx_clear(>od_group);
+
 /* Logical flow should be re-hashed later to allow lookups. */
-hmapx_add(_dp_lflows, lflow);
+hash = hmap_node_hash(>hmap_node);
+/* Remove from lflows. */
+hmap_remove(, >hmap_node);
+hash = ovn_logical_flow_hash_datapath(>od->sb->header_.uuid,
+  hash);
+/* Add to single_dp_lflows. */
+hmap_insert_fast(_dp_lflows, >hmap_node, hash);
 continue;
 }
 
+hash = hash_int(hmapx_count(>od_group), 0);
 dpg = ovn_dp_group_find(_groups, >od_group, hash);
 if (!dpg) {
 dpg = xzalloc(sizeof *dpg);
@@ -13362,19 +13379,11 @@ build_lflows(struct northd_context *ctx, struct hmap 
*datapaths,
 lflow->dpg = dpg;
 }
 
-/* Adding datapath to the flow hash for logical flows that have only one,
- * so they could be found by the southbound db record. */
-const struct hmapx_node *node;
-uint32_t hash;
-HMAPX_FOR_EACH (node, _dp_lflows) {
-lflow = node->data;
-hash = hmap_node_hash(>hmap_node);
-hmap_remove(, >hmap_node);
-hash = ovn_logical_flow_hash_datapath(>od->sb->header_.uuid,
-  hash);
-hmap_insert(, >hmap_node, hash);
-}
-hmapx_destroy(_dp_lflows);
+/* Merge multiple and single dp hashes. */
+
+fast_hmap_merge(, _dp_lflows);
+
+hmap_destroy(_dp_lflows);
 
 /* Push changes to the Logical_Flow table to database. */
 const struct sbrec_logical_flow *sbflow, *next_sbflow;
@@ -13507,7 +13516,6 @@ build_lflows(struct northd_context *ctx, struct hmap 
*datapaths,
 }
 
 stopwatch_stop(LFLOWS_DP_GROUPS_STOPWATCH_NAME, time_msec());
-struct ovn_lflow *next_lflow;
 HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, ) {
 const char *pipeline = ovn_stage_get_pipeline_name(lflow->stage);
 uint8_t table = ovn_stage_get_table(lflow->stage);
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn v2 3/3] northd.c: Optimize parallel build performance with hash based locks.

2021-10-04 Thread Anton Ivanov


On 04/10/2021 16:58, Han Zhou wrote:



On Mon, Oct 4, 2021 at 2:31 AM Anton Ivanov mailto:anton.iva...@cambridgegreys.com>> wrote:
>
>
> On 03/10/2021 23:45, Han Zhou wrote:
> > The current implementation of parallel build in northd with dp-groups
> > enabled results in bad performance when the below assumption is not
> > true:
> >
> >   * 3. Most RW ops are not flow adds, but changes to the
> >   * od groups.
> >
> > In fact most (if not all) of the ovn port based flows don't share
> > dp-groups, so the assumption doesn't hold in the reality, and in a scale
> > test environment with ovn-k8s topology of 800 nodes, the parallel build
> > shows 5 times more time spent for one northd iteration than without
> > parallel build on a test machine with 24 cores (24 northd worker
> > threads). This is because of lock contension on the global rw lock
> > protecting the lflow hmap.
> >
> > This patch optimizes it by using an array of bash based locks instead of
> > a single global lock. It is similar to the approach prior to the commit
> > 8c980ce6, but with two major differences:
> >
> > 1) It uses a fixed length mutex array instead of the dynamic array of
> >     the struct hashrow_locks. It is equally efficient considering the low
> >     chance of contention in a large array of locks, but without the burden
> >     of resizing every time the hmap size changes. The uniqueness of the
> >     lock is guaranteed by combining the masks of both the hmap and the
> >     mutex array.
> >
> > 2) It fixes the corrupted hmap size after each round of parallel flow
> >     build. The hash based lock protects the list in each bucket, but
> >     doesn't protect the hmap size. The patch uses thread-local counters
> >     and aggregate them at the end of each iteration, which is lock free.
> >     This approach has lower cost than alternatively using atomic
> >     incrementing a global counter.
> >
> > This patch ends up with 8 times speedup than the current parallel build
> > with dp-group enabled for the same scale test (which is 30% faster than
> > without parallel).
> >
> > Test command: ovn-nbctl --print-wait-time --wait=sb sync
> >
> > Before:
> >
> > no parallel:
> > ovn-northd completion: 7807ms
> >
> > parallel:
> > ovn-northd completion: 41267ms
> >
> > After:
> >
> > no parallel: (no change)
> >
> > parallel:
> > ovn-northd completion: 5081ms
> > (8x faster than before, 30% faster than no parallel)
> >
> > Note: all the above tests are with dp-groups enabled)
> >
> > Signed-off-by: Han Zhou mailto:hz...@ovn.org>>
> > ---
> > v1 -> v2: Addressed comments from Anton
> >      - Fixes the hmap size afte each round of prallel flow building.
> >      - Refactored lflow_hash_lock and removed unnecessary functions to avoid
> >        clang warnings.
> >
> >   northd/northd.c | 140 
> >   1 file changed, 70 insertions(+), 70 deletions(-)
> >
> > diff --git a/northd/northd.c b/northd/northd.c
> > index afd812700..88ab0c929 100644
> > --- a/northd/northd.c
> > +++ b/northd/northd.c
> > @@ -4319,41 +4319,44 @@ ovn_dp_group_add_with_reference(struct ovn_lflow 
*lflow_ref,
> >       return true;
> >   }
> >
> > -/* Adds a row with the specified contents to the Logical_Flow table.
> > - * Version to use with dp_groups + parallel - when locking is required.
> > - *
> > - * Assumptions:
> > - *
> > - * 1. A large proportion of the operations are lookups (reads).
> > - * 2. RW operations are a small proportion of overall adds.
> > - * 3. Most RW ops are not flow adds, but changes to the
> > - * od groups.
> > +/* The lflow_hash_lock is a mutex array that protects updates to the shared
> > + * lflow table across threads when parallel lflow build and dp-group are 
both
> > + * enabled. To avoid high contention between threads, a big array of 
mutexes
> > + * are used instead of just one. This is possible because when parallel 
build
> > + * is used we only use hmap_insert_fast() to update the hmap, which would 
not
> > + * touch the bucket array but only the list in a single bucket. We only 
need to
> > + * make sure that when adding lflows to the same hash bucket, the same 
lock is
> > + * used, so that no two threads can add to the bucket at the same time.  
It is
> > + * ok that the same lock is used to protect multiple buckets, so a fixed 
sized
> > + * mutex array is used instead of 1-1 mapping to the h

Re: [ovs-dev] [PATCH ovn 2/2] ovn-parallel-hmap: Remove the unused mutex in worker_control.

2021-10-04 Thread Anton Ivanov



On 04/10/2021 02:37, Han Zhou wrote:

It is not used, and seems not going to be needed, so remove it.

Signed-off-by: Han Zhou 
---
  lib/ovn-parallel-hmap.c | 1 -
  lib/ovn-parallel-hmap.h | 1 -
  2 files changed, 2 deletions(-)

diff --git a/lib/ovn-parallel-hmap.c b/lib/ovn-parallel-hmap.c
index b8c7ac786..3c9f3a0ff 100644
--- a/lib/ovn-parallel-hmap.c
+++ b/lib/ovn-parallel-hmap.c
@@ -132,7 +132,6 @@ ovn_add_worker_pool(void *(*start)(void *))
  new_control->id = i;
  new_control->done = new_pool->done;
  new_control->data = NULL;
-ovs_mutex_init(_control->mutex);
  new_control->finished = ATOMIC_VAR_INIT(false);
  sprintf(sem_name, WORKER_SEM_NAME, sembase, new_pool, i);
  new_control->fire = sem_open(sem_name, O_CREAT, S_IRWXU, 0);
diff --git a/lib/ovn-parallel-hmap.h b/lib/ovn-parallel-hmap.h
index 897208ef8..f3cd7a210 100644
--- a/lib/ovn-parallel-hmap.h
+++ b/lib/ovn-parallel-hmap.h
@@ -79,7 +79,6 @@ struct worker_control {
  atomic_bool finished; /* Set to true after achunk of work is complete. */
  sem_t *fire; /* Work start semaphore - sem_post starts the worker. */
  sem_t *done; /* Work completion semaphore - sem_post on completion. */
-struct ovs_mutex mutex; /* Guards the data. */
  void *data; /* Pointer to data to be processed. */
  void *workload; /* back-pointer to the worker pool structure. */
  pthread_t worker;


I was actually using this in the next series of patches.

If we remove it, the next series will be putting it right back in.

--
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn 1/2] ovn-parallel-hmap.h: Minor fixes for hashrow_lock.

2021-10-04 Thread Anton Ivanov



On 04/10/2021 02:37, Han Zhou wrote:

Although not used currently, it is better to fix:
1. The type of the mask field should be the same as hmap->mask: size_t.
2. Calculating the index is better to use & instead of %.

Signed-off-by: Han Zhou 
---
  lib/ovn-parallel-hmap.h | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/ovn-parallel-hmap.h b/lib/ovn-parallel-hmap.h
index 2df132ea8..897208ef8 100644
--- a/lib/ovn-parallel-hmap.h
+++ b/lib/ovn-parallel-hmap.h
@@ -224,7 +224,7 @@ static inline void wait_for_work_completion(struct 
worker_pool *pool)
   */
  
  struct hashrow_locks {

-ssize_t mask;
+size_t mask;
  struct ovs_mutex *row_locks;
  };
  
@@ -235,13 +235,13 @@ void ovn_update_hashrow_locks(struct hmap *lflows, struct hashrow_locks *hrl);

  /* Lock a hash row */
  static inline void lock_hash_row(struct hashrow_locks *hrl, uint32_t hash)
  {
-ovs_mutex_lock(>row_locks[hash % hrl->mask]);
+ovs_mutex_lock(>row_locks[hash & hrl->mask]);
  }
  
  /* Unlock a hash row */

  static inline void unlock_hash_row(struct hashrow_locks *hrl, uint32_t hash)
  {
-ovs_mutex_unlock(>row_locks[hash % hrl->mask]);
+ovs_mutex_unlock(>row_locks[hash & hrl->mask]);
  }
  
  /* Init the row locks structure */


+1.

--
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn v2 3/3] northd.c: Optimize parallel build performance with hash based locks.

2021-10-04 Thread Anton Ivanov



On 03/10/2021 23:45, Han Zhou wrote:

The current implementation of parallel build in northd with dp-groups
enabled results in bad performance when the below assumption is not
true:

  * 3. Most RW ops are not flow adds, but changes to the
  * od groups.

In fact most (if not all) of the ovn port based flows don't share
dp-groups, so the assumption doesn't hold in the reality, and in a scale
test environment with ovn-k8s topology of 800 nodes, the parallel build
shows 5 times more time spent for one northd iteration than without
parallel build on a test machine with 24 cores (24 northd worker
threads). This is because of lock contension on the global rw lock
protecting the lflow hmap.

This patch optimizes it by using an array of bash based locks instead of
a single global lock. It is similar to the approach prior to the commit
8c980ce6, but with two major differences:

1) It uses a fixed length mutex array instead of the dynamic array of
the struct hashrow_locks. It is equally efficient considering the low
chance of contention in a large array of locks, but without the burden
of resizing every time the hmap size changes. The uniqueness of the
lock is guaranteed by combining the masks of both the hmap and the
mutex array.

2) It fixes the corrupted hmap size after each round of parallel flow
build. The hash based lock protects the list in each bucket, but
doesn't protect the hmap size. The patch uses thread-local counters
and aggregate them at the end of each iteration, which is lock free.
This approach has lower cost than alternatively using atomic
incrementing a global counter.

This patch ends up with 8 times speedup than the current parallel build
with dp-group enabled for the same scale test (which is 30% faster than
without parallel).

Test command: ovn-nbctl --print-wait-time --wait=sb sync

Before:

no parallel:
ovn-northd completion: 7807ms

parallel:
ovn-northd completion: 41267ms

After:

no parallel: (no change)

parallel:
ovn-northd completion: 5081ms
(8x faster than before, 30% faster than no parallel)

Note: all the above tests are with dp-groups enabled)

Signed-off-by: Han Zhou 
---
v1 -> v2: Addressed comments from Anton
 - Fixes the hmap size afte each round of prallel flow building.
 - Refactored lflow_hash_lock and removed unnecessary functions to avoid
   clang warnings.

  northd/northd.c | 140 
  1 file changed, 70 insertions(+), 70 deletions(-)

diff --git a/northd/northd.c b/northd/northd.c
index afd812700..88ab0c929 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -4319,41 +4319,44 @@ ovn_dp_group_add_with_reference(struct ovn_lflow 
*lflow_ref,
  return true;
  }
  
-/* Adds a row with the specified contents to the Logical_Flow table.

- * Version to use with dp_groups + parallel - when locking is required.
- *
- * Assumptions:
- *
- * 1. A large proportion of the operations are lookups (reads).
- * 2. RW operations are a small proportion of overall adds.
- * 3. Most RW ops are not flow adds, but changes to the
- * od groups.
+/* The lflow_hash_lock is a mutex array that protects updates to the shared
+ * lflow table across threads when parallel lflow build and dp-group are both
+ * enabled. To avoid high contention between threads, a big array of mutexes
+ * are used instead of just one. This is possible because when parallel build
+ * is used we only use hmap_insert_fast() to update the hmap, which would not
+ * touch the bucket array but only the list in a single bucket. We only need to
+ * make sure that when adding lflows to the same hash bucket, the same lock is
+ * used, so that no two threads can add to the bucket at the same time.  It is
+ * ok that the same lock is used to protect multiple buckets, so a fixed sized
+ * mutex array is used instead of 1-1 mapping to the hash buckets. This
+ * simplies the implementation while effectively reduces lock contention
+ * because the chance that different threads contending the same lock amongst
+ * the big number of locks is very low. */
+#define LFLOW_HASH_LOCK_MASK 0x
+static struct ovs_mutex lflow_hash_locks[LFLOW_HASH_LOCK_MASK + 1];
+
+static void
+lflow_hash_lock_init(void)
+{
+for (size_t i = 0; i < LFLOW_HASH_LOCK_MASK + 1; i++) {
+ovs_mutex_init(_hash_locks[i]);
+}
+}
+
+/* This thread-local var is used for parallel lflow building when dp-groups is
+ * enabled. It maintains the number of lflows inserted by the current thread to
+ * the shared lflow hmap in the current iteration. It is needed because the
+ * lflow_hash_lock cannot protect current update of the hmap's size (hmap->n)
+ * by different threads.
   *
- * Principles of operation:
- * 1. All accesses to the flow table are protected by a rwlock.
- * 2. By default, everyone grabs a rd lock so that multiple threads
- * can do lookups simultaneously.
- * 3. If a change to the lflow is needed, the rd lock is released and
- * a wr 

Re: [ovs-dev] [PATCH ovn v2 2/3] northd.c: Lock to protect against possible od->group corruption.

2021-10-04 Thread Anton Ivanov



On 03/10/2021 23:45, Han Zhou wrote:

When parallel build is used, od->group can be updated by threads outside
of the function do_ovn_lflow_add_pd (for lb related flow building). So
use the function ovn_dp_group_add_with_reference() to update it in
function do_ovn_lflow_add() when it is not a newly created flow.

Signed-off-by: Han Zhou 
---
  northd/northd.c | 42 +-
  1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/northd/northd.c b/northd/northd.c
index 027c5b170..afd812700 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -4299,6 +4299,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct 
ovn_datapath *od,
  }
  }
  
+static bool

+ovn_dp_group_add_with_reference(struct ovn_lflow *lflow_ref,
+struct ovn_datapath *od)
+OVS_NO_THREAD_SAFETY_ANALYSIS
+{
+if (!use_logical_dp_groups || !lflow_ref) {
+return false;
+}
+
+if (use_parallel_build) {
+ovs_mutex_lock(_ref->odg_lock);
+hmapx_add(_ref->od_group, od);
+ovs_mutex_unlock(_ref->odg_lock);
+} else {
+hmapx_add(_ref->od_group, od);
+}
+
+return true;
+}
+
  /* Adds a row with the specified contents to the Logical_Flow table.
   * Version to use with dp_groups + parallel - when locking is required.
   *
@@ -4351,7 +4371,7 @@ do_ovn_lflow_add(struct hmap *lflow_map, struct 
ovn_datapath *od,
  old_lflow = ovn_lflow_find(lflow_map, NULL, stage, priority, match,
 actions, ctrl_meter, hash);
  if (old_lflow) {
-hmapx_add(_lflow->od_group, od);
+ovn_dp_group_add_with_reference(old_lflow, od);
  return old_lflow;
  }
  }
@@ -4466,26 +4486,6 @@ ovn_lflow_add_at(struct hmap *lflow_map, struct 
ovn_datapath *od,
 io_port, ctrl_meter, stage_hint, where, hash);
  }
  
-static bool

-ovn_dp_group_add_with_reference(struct ovn_lflow *lflow_ref,
-struct ovn_datapath *od)
-OVS_NO_THREAD_SAFETY_ANALYSIS
-{
-if (!use_logical_dp_groups || !lflow_ref) {
-return false;
-}
-
-if (use_parallel_build) {
-ovs_mutex_lock(_ref->odg_lock);
-hmapx_add(_ref->od_group, od);
-ovs_mutex_unlock(_ref->odg_lock);
-} else {
-hmapx_add(_ref->od_group, od);
-}
-
-return true;
-}
-
  /* Adds a row with the specified contents to the Logical_Flow table. */
  #define ovn_lflow_add_with_hint__(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, \
ACTIONS, IN_OUT_PORT, CTRL_METER, \



+1

--
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn v2 1/3] northd.c: Remove redundant condition check in ovn_dp_group_add_with_reference().

2021-10-04 Thread Anton Ivanov



On 03/10/2021 23:45, Han Zhou wrote:

Signed-off-by: Han Zhou 
---
  northd/northd.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/northd/northd.c b/northd/northd.c
index cf2467fe1..027c5b170 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -4475,7 +4475,7 @@ ovn_dp_group_add_with_reference(struct ovn_lflow 
*lflow_ref,
  return false;
  }
  
-if (use_parallel_build && use_logical_dp_groups) {

+if (use_parallel_build) {
  ovs_mutex_lock(_ref->odg_lock);
  hmapx_add(_ref->od_group, od);
  ovs_mutex_unlock(_ref->odg_lock);

+1

--
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn] northd.c: Optimize parallel build performance with hash based locks.

2021-10-02 Thread Anton Ivanov

On 01/10/2021 23:29, Han Zhou wrote:

The current implementation of parallel build in northd with dp-groups
enabled results in bad performance when the below assumption is not
true:

  * 3. Most RW ops are not flow adds, but changes to the
  * od groups.

Hi Han,

Have a look at the ovn-parallel-hmap.h

It has most of the primitives from my previous attempt to use this 
approach, you do not need to redefine them. It also has the relevant 
pragma to make CLANG shut up because it really dislikes taking a lock in 
a function and not releasing it.


The sole difference is that the lock array is sized to current hash 
size, not to max-mask.


Also - when using this approach, using the stock hmap_insert_fast() will 
result in corrupting hmap_size (at the very least - I had a few cases 
where the whole cash-line was corrupt).


As a result the hmap_resize() of lflows on the exit of the build 
procedure will either produce a bogus result (on a good day) or coredump 
(on a really bad day).


It needs to be removed and something along the lines of the "rebuild the 
lflows while building dp groups and rehashing the single flows 
correctly" incorporated: 
https://patchwork.ozlabs.org/project/ovn/patch/20210915162144.28369-1-anton.iva...@cambridgegreys.com/


Brgds,




In fact most (if not all) of the ovn port based flows don't share
dp-groups, so the assumption doesn't hold in the reality, and in a scale
test environment with ovn-k8s topology of 800 nodes, the parallel build
shows 5 times more time spent for one northd iteration than without
parallel build on a test machine with 24 cores (24 northd worker
threads). This is because of lock contension on the global rw lock
protecting the lflow hmap.

This patch optimizes it by using an array of bash based locks instead of
a single global lock. It ends up with 8 times speedup than the current
parallel build for the same scale test (which is 30% faster than
without parallel).

Test command: ovn-nbctl --print-wait-time --wait=sb sync

Before:

no parallel:
ovn-northd completion: 7807ms

parallel:
ovn-northd completion: 41267ms

After:

no parallel: (no change)

parallel:
ovn-northd completion: 5081ms
(8x faster than before, 30% faster than no parallel)

Note: all the above tests are with dp-groups enabled)

Signed-off-by: Han Zhou 
---
  northd/northd.c | 110 ++--
  1 file changed, 42 insertions(+), 68 deletions(-)

diff --git a/northd/northd.c b/northd/northd.c
index cf2467fe1..f6706e0b3 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -4299,39 +4299,6 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct 
ovn_datapath *od,
  }
  }
  
-/* Adds a row with the specified contents to the Logical_Flow table.

- * Version to use with dp_groups + parallel - when locking is required.
- *
- * Assumptions:
- *
- * 1. A large proportion of the operations are lookups (reads).
- * 2. RW operations are a small proportion of overall adds.
- * 3. Most RW ops are not flow adds, but changes to the
- * od groups.
- *
- * Principles of operation:
- * 1. All accesses to the flow table are protected by a rwlock.
- * 2. By default, everyone grabs a rd lock so that multiple threads
- * can do lookups simultaneously.
- * 3. If a change to the lflow is needed, the rd lock is released and
- * a wr lock is acquired instead (the fact that POSIX does not have an
- * "upgrade" on locks is a major pain, but there is nothing we can do
- * - it's not available).
- * 4. WR lock operations in rd/wr locking have LOWER priority than RD.
- * That is by design and spec. So the code after a request for WR lock
- * may wait for a considerable amount of time until it is given a
- * change to run. That means that another thread may get there in the
- * meantime and change the data. Hence all wr operations MUST be coded
- * to ensure that they are not vulnerable to "someone pulled this from
- * under my feet". Re- reads, checks for presense, etc.
- * 5. Operations on the actual od_group hash map are protected by
- * per-flow locks. There is no need for these to be rd, mutex is more
- * appropriate. They are low contention as each protects only its flow
- * and only during modification which happen while holding a rd lock on
- * the flow table.
- */
-static struct ovs_rwlock flowtable_lock;
-
  /* Adds a row with the specified contents to the Logical_Flow table.
   * Version to use when locking is NOT required.
   */
@@ -4374,10 +4341,47 @@ do_ovn_lflow_add(struct hmap *lflow_map, struct 
ovn_datapath *od,
  return lflow;
  }
  
+/* The lflow_hash_lock is a mutex array that protects updates to the shared

+ * lflow table across threads when parallel lflow build and dp-group are both
+ * enabled. To avoid high contention between threads, a big array of mutexes
+ * are used instead of just one. This is possible because when parallel build
+ * is used we only use hmap_insert_fast() to update the hmap, which would not
+ * touch the bucket array but only the list in 

Re: [ovs-dev] [OVN Patch v8 3/3] northd: Restore parallel build with dp_groups

2021-10-01 Thread Anton Ivanov

On 01/10/2021 01:32, Han Zhou wrote:



On Thu, Sep 30, 2021 at 2:03 PM Anton Ivanov 
<mailto:anton.iva...@cambridgegreys.com>> wrote:


On 30/09/2021 20:48, Han Zhou wrote:



On Thu, Sep 30, 2021 at 7:34 AM Anton Ivanov
mailto:anton.iva...@cambridgegreys.com>> wrote:

Summary of findings.

1. The numbers on the perf test do not align with heater
which is much closer to a realistic load. On some tests where
heater gives 5-10% end-to-end improvement with
parallelization we get worse results with the perf-test. You
spotted this one correctly.

Example of the northd average pulled out of the test report
via grep and sed.

   127.489353
   131.509458
   116.088205
   94.721911
   119.629756
   114.896258
   124.811069
   129.679160
   106.699905
   134.490338
   112.106713
   135.957658
   132.47
   94.106849
   117.431450
   115.861592
   106.830657
   132.396905
   107.092542
   128.945760
   94.298464
   120.455510
   136.910426
   134.311765
   115.881292
   116.918458

These values are all over the place - this is not a
reproducible test.

2. In the present state you need to re-run it > 30+ times and
take an average. The standard deviation for the values for
the northd loop is > 10%. Compared to that the
reproducibility of ovn-heater is significantly better. I
usually get less than 0.5% difference between runs if there
was no iteration failures. I would suggest using that instead
if you want to do performance comparisons until we have
figured out what affects the perf-test.

3. It is using the short term running average value in
reports which is probably wrong because you have very
significant skew from the last several values.

I will look into all of these.

Thanks for the summary! However, I think there is a bigger
problem (probably related to my environment) than the stability
of the test (make check-perf TESTSUITEFLAGS="--rebuild") itself.
As I mentioned in an earlier email I observed even worse results
with a large scale topology closer to a real world deployment of
ovn-k8s just testing with the command:
    ovn-nbctl --print-wait-time --wait=sb sync

This command simply triggers a change in NB_Global table and wait
for northd to complete all the recompute and update SB. It
doesn't have to use "sync" command but any change to the NB DB
produces similar result (e.g.: ovn-nbctl --print-wait-time
--wait=sb ls-add ls1)

Without parallel:
ovn-northd completion: 7807ms

With parallel:
ovn-northd completion: 41267ms


Is this with current master or prior to these patches?

1. There was an issue prior to these where the hash on first
iteration with an existing database when loading a large database
for the first time was not sized correctly. These numbers sound
about right when this bug was around.

The patches are included. The commit id is 9242f27f63 as mentioned in 
my first email.


2. There should be NO DIFFERENCE in a single compute cycle with an
existing database between a run with parallel and without with dp
groups at present. This is because the first cycle does not use
parallel compute. It is disabled in order to achieve the correct
hash sizings for future cycle by auto-scaling the hash.

Yes, I understand this and I did enable dp-group for the above 
"ovn-nbctl sync" test, so the number I showed above for "with 
parallel" was for the 2nd run and onwards. For the first round the 
result is exactly the same as without parallel.


I just tried disabling DP group for the large scale "ovn-nbctl sync" 
test (after taking some effort squeezing out memory spaces on my 
desktop), and the result shows that parallel build performs slightly 
better (although it is 3x slower than with dp-group & without 
parallel, which is expected). Summarize the result together below:


Without parallel, with dp-group:
ovn-northd completion: 7807ms

With parallel, with dp-group:
ovn-northd completion: 41267ms

without parallel, without dp-group:
ovn-northd completion: 27996ms

with parallel, without dp-group:
ovn-northd completion: 26584ms

Now the interesting part:
I implemented a POC of a hash based mutex array that replaces the rw 
lock in the function do_ovn_lflow_add_pd(), and the performance is 
greatly improved for the dp-group test:


with parallel, with dp-group (hash based mutex):
ovn-northd completion: 5081ms

This is 8x faster than the current parallel one and 30% faster than 
without parallel. This result looks much more reasona

Re: [ovs-dev] [OVN Patch v8 3/3] northd: Restore parallel build with dp_groups

2021-09-30 Thread Anton Ivanov

On 30/09/2021 20:48, Han Zhou wrote:



On Thu, Sep 30, 2021 at 7:34 AM Anton Ivanov 
<mailto:anton.iva...@cambridgegreys.com>> wrote:


Summary of findings.

1. The numbers on the perf test do not align with heater which is
much closer to a realistic load. On some tests where heater gives
5-10% end-to-end improvement with parallelization we get worse
results with the perf-test. You spotted this one correctly.

Example of the northd average pulled out of the test report via
grep and sed.

   127.489353
   131.509458
   116.088205
   94.721911
   119.629756
   114.896258
   124.811069
   129.679160
   106.699905
   134.490338
   112.106713
   135.957658
   132.47
   94.106849
   117.431450
   115.861592
   106.830657
   132.396905
   107.092542
   128.945760
   94.298464
   120.455510
   136.910426
   134.311765
   115.881292
   116.918458

These values are all over the place - this is not a reproducible test.

2. In the present state you need to re-run it > 30+ times and take
an average. The standard deviation for the values for the northd
loop is > 10%. Compared to that the reproducibility of ovn-heater
is significantly better. I usually get less than 0.5% difference
between runs if there was no iteration failures. I would suggest
using that instead if you want to do performance comparisons until
we have figured out what affects the perf-test.

3. It is using the short term running average value in reports
which is probably wrong because you have very significant skew
from the last several values.

I will look into all of these.

Thanks for the summary! However, I think there is a bigger problem 
(probably related to my environment) than the stability of the test 
(make check-perf TESTSUITEFLAGS="--rebuild") itself. As I mentioned in 
an earlier email I observed even worse results with a large scale 
topology closer to a real world deployment of ovn-k8s just testing 
with the command:

    ovn-nbctl --print-wait-time --wait=sb sync

This command simply triggers a change in NB_Global table and wait for 
northd to complete all the recompute and update SB. It doesn't have to 
use "sync" command but any change to the NB DB produces similar result 
(e.g.: ovn-nbctl --print-wait-time --wait=sb ls-add ls1)


Without parallel:
ovn-northd completion: 7807ms

With parallel:
ovn-northd completion: 41267ms


Is this with current master or prior to these patches?

1. There was an issue prior to these where the hash on first iteration 
with an existing database when loading a large database for the first 
time was not sized correctly. These numbers sound about right when this 
bug was around.


2. There should be NO DIFFERENCE in a single compute cycle with an 
existing database between a run with parallel and without with dp groups 
at present. This is because the first cycle does not use parallel 
compute. It is disabled in order to achieve the correct hash sizings for 
future cycle by auto-scaling the hash.


So what exact tag/commit are you running this with and with what options 
are on/off?


A.



This result is stable and consistent when repeating the command on my 
machine. Would you try it on your machine as well? I understand that 
only the lflow generation part can be parallelized and it doesn't 
solve all the bottleneck, but I did expect it to be faster instead of 
slower. If your result always shows that parallel is better, then I 
will have to dig it out myself on my test machine.


Thanks,
Han

Brgds,

On 30/09/2021 08:26, Han Zhou wrote:



    On Thu, Sep 30, 2021 at 12:08 AM Anton Ivanov
mailto:anton.iva...@cambridgegreys.com>> wrote:

After quickly adding some more prints into the testsuite.

Test 1:

Without

  1: ovn-northd basic scale test -- 200 Hypervisors, 200
Logical Ports/Hypervisor -- ovn-northd -- dp-groups=yes
  ---
  Maximum (NB in msec): 1130
  Average (NB in msec): 620.375000
  Maximum (SB in msec): 23
  Average (SB in msec): 21.468759
  Maximum (northd-loop in msec): 6002
  Minimum (northd-loop in msec): 0
  Average (northd-loop in msec): 914.760417
  Long term average (northd-loop in msec): 104.799340

With

  1: ovn-northd basic scale test -- 200 Hypervisors, 200
Logical Ports/Hypervisor -- ovn-northd -- dp-groups=yes
  ---
  Maximum (NB in msec): 1148
  Average (NB in msec): 630.25
  Maximum (SB in msec): 24
  Average (SB in msec): 21.468744
  Maximum (northd-loop in msec): 6090
  Minimum (northd-loop in msec): 0
  Average (northd-loop in msec): 762.101565
  Long term average (northd-loop in msec): 80.735192

The metric w

Re: [ovs-dev] [OVN Patch v8 3/3] northd: Restore parallel build with dp_groups

2021-09-30 Thread Anton Ivanov

Summary of findings.

1. The numbers on the perf test do not align with heater which is much closer 
to a realistic load. On some tests where heater gives 5-10% end-to-end 
improvement with parallelization we get worse results with the perf-test. You 
spotted this one correctly.

Example of the northd average pulled out of the test report via grep and sed.

   127.489353
   131.509458
   116.088205
   94.721911
   119.629756
   114.896258
   124.811069
   129.679160
   106.699905
   134.490338
   112.106713
   135.957658
   132.47
   94.106849
   117.431450
   115.861592
   106.830657
   132.396905
   107.092542
   128.945760
   94.298464
   120.455510
   136.910426
   134.311765
   115.881292
   116.918458

These values are all over the place - this is not a reproducible test.

2. In the present state you need to re-run it > 30+ times and take an average. The 
standard deviation for the values for the northd loop is > 10%. Compared to that 
the reproducibility of ovn-heater is significantly better. I usually get less than 
0.5% difference between runs if there was no iteration failures. I would suggest 
using that instead if you want to do performance comparisons until we have figured 
out what affects the perf-test.

3. It is using the short term running average value in reports which is 
probably wrong because you have very significant skew from the last several 
values.

I will look into all of these.

Brgds,

On 30/09/2021 08:26, Han Zhou wrote:



On Thu, Sep 30, 2021 at 12:08 AM Anton Ivanov mailto:anton.iva...@cambridgegreys.com>> wrote:

After quickly adding some more prints into the testsuite.

Test 1:

Without

  1: ovn-northd basic scale test -- 200 Hypervisors, 200 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=yes
  ---
  Maximum (NB in msec): 1130
  Average (NB in msec): 620.375000
  Maximum (SB in msec): 23
  Average (SB in msec): 21.468759
  Maximum (northd-loop in msec): 6002
  Minimum (northd-loop in msec): 0
  Average (northd-loop in msec): 914.760417
  Long term average (northd-loop in msec): 104.799340

With

  1: ovn-northd basic scale test -- 200 Hypervisors, 200 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=yes
  ---
  Maximum (NB in msec): 1148
  Average (NB in msec): 630.25
  Maximum (SB in msec): 24
  Average (SB in msec): 21.468744
  Maximum (northd-loop in msec): 6090
  Minimum (northd-loop in msec): 0
  Average (northd-loop in msec): 762.101565
  Long term average (northd-loop in msec): 80.735192

The metric which actually matters and which SHOULD me measured - long term 
average is better by 20%. Using short term average instead of long term in the 
test suite is actually a BUG.

Good catch!

Are you running yours under some sort of virtualization?


No, I am testing on a bare-metal.

A.

On 30/09/2021 07:52, Han Zhou wrote:

Thanks Anton for checking. I am using: Intel(R) Core(TM) i9-7920X CPU @ 
2.90GHz, 24 cores.
It is weird why my result is so different. I also verified with a scale 
test script that creates a large scale NB/SB with 800 nodes of simulated k8s 
setup. And then just run:
    ovn-nbctl --print-wait-time --wait=sb sync

Without parallel:
ovn-northd completion: 7807ms

With parallel:
ovn-northd completion: 41267ms

I suspected the hmap size problem but I tried changing the initial size to 64k 
buckets and it didn't help. I will find some time to check the "perf" reports.

Thanks,
Han

On Wed, Sep 29, 2021 at 11:31 PM Anton Ivanov mailto:anton.iva...@cambridgegreys.com>> wrote:

    On 30/09/2021 07:16, Anton Ivanov wrote:

Results on a Ryzen 5 3600 - 6 cores 12 threads


I will also have a look into the "maximum" measurement for multi-thread.

It does not tie up with the drop in average across the board.

A.



Without


  1: ovn-northd basic scale test -- 200 Hypervisors, 200 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=yes
  ---
  Maximum (NB in msec): 1256
  Average (NB in msec): 679.463785
  Maximum (SB in msec): 25
  Average (SB in msec): 22.489798
  Maximum (northd-loop in msec): 1347
  Average (northd-loop in msec): 799.944878

  2: ovn-northd basic scale test -- 200 Hypervisors, 200 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=no
  ---
  Maximum (NB in msec): 1956
  Average (NB in msec): 809.387285
  Maximum (SB in msec): 24
  Average (SB in msec): 21.649258
  Maximum (northd-loop in msec): 2011
  Average (northd-loop in msec): 961.718686

  5: ovn-northd basic scale test -- 500 Hypervisors, 50 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=yes
  ---
  Maximum (NB in msec): 557
  Average (NB in msec): 474.010

Re: [ovs-dev] [OVN Patch v8 3/3] northd: Restore parallel build with dp_groups

2021-09-30 Thread Anton Ivanov

OK,

I can dig into this later this afternoon.

There is quite a bit of dispersion in tests without parallelization on my 
system which should not be there.

I want to get down to the bottom of where it is coming from and why are we 
getting different results compared to ovn-heater.

I did all the original tests with ovn-heater and they were consistently 5-10% 
better end-to-end with parallelization enabled.

As far as the worker threads never reaching 100% and the northd thread being 
regularly at 100% that is unfortunately how it is. Large sections of northd 
cannot be parallelized at present. The only bit which can be run in parallel is 
lflow compute.

Generation of datapaths, ports, groups - all before the lflows cannot be 
parallelized and it is compute heavy.

Post-processing of flows once they have been generated - hash recompute, 
reconciliation of databases, etc - cannot be parallelized at present. Some of 
it may be run in parallel if there were parallel macros in the OVS source, but 
they are likely to give only marginal effect on performance - 1-2% at most.

Best Regards,

A.

On 30/09/2021 08:26, Han Zhou wrote:



On Thu, Sep 30, 2021 at 12:08 AM Anton Ivanov mailto:anton.iva...@cambridgegreys.com>> wrote:

After quickly adding some more prints into the testsuite.

Test 1:

Without

  1: ovn-northd basic scale test -- 200 Hypervisors, 200 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=yes
  ---
  Maximum (NB in msec): 1130
  Average (NB in msec): 620.375000
  Maximum (SB in msec): 23
  Average (SB in msec): 21.468759
  Maximum (northd-loop in msec): 6002
  Minimum (northd-loop in msec): 0
  Average (northd-loop in msec): 914.760417
  Long term average (northd-loop in msec): 104.799340

With

  1: ovn-northd basic scale test -- 200 Hypervisors, 200 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=yes
  ---
  Maximum (NB in msec): 1148
  Average (NB in msec): 630.25
  Maximum (SB in msec): 24
  Average (SB in msec): 21.468744
  Maximum (northd-loop in msec): 6090
  Minimum (northd-loop in msec): 0
  Average (northd-loop in msec): 762.101565
  Long term average (northd-loop in msec): 80.735192

The metric which actually matters and which SHOULD me measured - long term 
average is better by 20%. Using short term average instead of long term in the 
test suite is actually a BUG.

Good catch!

Are you running yours under some sort of virtualization?


No, I am testing on a bare-metal.

A.

On 30/09/2021 07:52, Han Zhou wrote:

Thanks Anton for checking. I am using: Intel(R) Core(TM) i9-7920X CPU @ 
2.90GHz, 24 cores.
It is weird why my result is so different. I also verified with a scale 
test script that creates a large scale NB/SB with 800 nodes of simulated k8s 
setup. And then just run:
    ovn-nbctl --print-wait-time --wait=sb sync

Without parallel:
ovn-northd completion: 7807ms

With parallel:
ovn-northd completion: 41267ms

I suspected the hmap size problem but I tried changing the initial size to 64k 
buckets and it didn't help. I will find some time to check the "perf" reports.

Thanks,
Han

On Wed, Sep 29, 2021 at 11:31 PM Anton Ivanov mailto:anton.iva...@cambridgegreys.com>> wrote:

    On 30/09/2021 07:16, Anton Ivanov wrote:

Results on a Ryzen 5 3600 - 6 cores 12 threads


I will also have a look into the "maximum" measurement for multi-thread.

It does not tie up with the drop in average across the board.

A.



Without


  1: ovn-northd basic scale test -- 200 Hypervisors, 200 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=yes
  ---
  Maximum (NB in msec): 1256
  Average (NB in msec): 679.463785
  Maximum (SB in msec): 25
  Average (SB in msec): 22.489798
  Maximum (northd-loop in msec): 1347
  Average (northd-loop in msec): 799.944878

  2: ovn-northd basic scale test -- 200 Hypervisors, 200 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=no
  ---
  Maximum (NB in msec): 1956
  Average (NB in msec): 809.387285
  Maximum (SB in msec): 24
  Average (SB in msec): 21.649258
  Maximum (northd-loop in msec): 2011
  Average (northd-loop in msec): 961.718686

  5: ovn-northd basic scale test -- 500 Hypervisors, 50 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=yes
  ---
  Maximum (NB in msec): 557
  Average (NB in msec): 474.010337
  Maximum (SB in msec): 15
  Average (SB in msec): 13.927192
  Maximum (northd-loop in msec): 1261
  Average (northd-loop in msec): 580.999122

  6: ovn-northd basic scale test -- 500 Hypervisors, 50 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=no
  ---
  Maximum (NB in msec): 7

Re: [ovs-dev] [OVN Patch v8 3/3] northd: Restore parallel build with dp_groups

2021-09-30 Thread Anton Ivanov

After quickly adding some more prints into the testsuite.

Test 1:

Without

  1: ovn-northd basic scale test -- 200 Hypervisors, 200 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=yes
  ---
  Maximum (NB in msec): 1130
  Average (NB in msec): 620.375000
  Maximum (SB in msec): 23
  Average (SB in msec): 21.468759
  Maximum (northd-loop in msec): 6002
  Minimum (northd-loop in msec): 0
  Average (northd-loop in msec): 914.760417
  Long term average (northd-loop in msec): 104.799340

With

  1: ovn-northd basic scale test -- 200 Hypervisors, 200 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=yes
  ---
  Maximum (NB in msec): 1148
  Average (NB in msec): 630.25
  Maximum (SB in msec): 24
  Average (SB in msec): 21.468744
  Maximum (northd-loop in msec): 6090
  Minimum (northd-loop in msec): 0
  Average (northd-loop in msec): 762.101565
  Long term average (northd-loop in msec): 80.735192

The metric which actually matters and which SHOULD me measured - long term 
average is better by 20%. Using short term average instead of long term in the 
test suite is actually a BUG.

Are you running yours under some sort of virtualization?

A.

On 30/09/2021 07:52, Han Zhou wrote:

Thanks Anton for checking. I am using: Intel(R) Core(TM) i9-7920X CPU @ 
2.90GHz, 24 cores.
It is weird why my result is so different. I also verified with a scale test 
script that creates a large scale NB/SB with 800 nodes of simulated k8s setup. 
And then just run:
    ovn-nbctl --print-wait-time --wait=sb sync

Without parallel:
ovn-northd completion: 7807ms

With parallel:
ovn-northd completion: 41267ms

I suspected the hmap size problem but I tried changing the initial size to 64k buckets 
and it didn't help. I will find some time to check the "perf" reports.

Thanks,
Han

On Wed, Sep 29, 2021 at 11:31 PM Anton Ivanov mailto:anton.iva...@cambridgegreys.com>> wrote:

On 30/09/2021 07:16, Anton Ivanov wrote:

Results on a Ryzen 5 3600 - 6 cores 12 threads


I will also have a look into the "maximum" measurement for multi-thread.

It does not tie up with the drop in average across the board.

A.



Without


  1: ovn-northd basic scale test -- 200 Hypervisors, 200 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=yes
  ---
  Maximum (NB in msec): 1256
  Average (NB in msec): 679.463785
  Maximum (SB in msec): 25
  Average (SB in msec): 22.489798
  Maximum (northd-loop in msec): 1347
  Average (northd-loop in msec): 799.944878

  2: ovn-northd basic scale test -- 200 Hypervisors, 200 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=no
  ---
  Maximum (NB in msec): 1956
  Average (NB in msec): 809.387285
  Maximum (SB in msec): 24
  Average (SB in msec): 21.649258
  Maximum (northd-loop in msec): 2011
  Average (northd-loop in msec): 961.718686

  5: ovn-northd basic scale test -- 500 Hypervisors, 50 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=yes
  ---
  Maximum (NB in msec): 557
  Average (NB in msec): 474.010337
  Maximum (SB in msec): 15
  Average (SB in msec): 13.927192
  Maximum (northd-loop in msec): 1261
  Average (northd-loop in msec): 580.999122

  6: ovn-northd basic scale test -- 500 Hypervisors, 50 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=no
  ---
  Maximum (NB in msec): 756
  Average (NB in msec): 625.614724
  Maximum (SB in msec): 15
  Average (SB in msec): 14.181048
  Maximum (northd-loop in msec): 1649
  Average (northd-loop in msec): 746.208332


With

  1: ovn-northd basic scale test -- 200 Hypervisors, 200 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=yes
  ---
  Maximum (NB in msec): 1140
  Average (NB in msec): 631.125000
  Maximum (SB in msec): 24
  Average (SB in msec): 21.453609
  Maximum (northd-loop in msec): 6080
  Average (northd-loop in msec): 759.718815

  2: ovn-northd basic scale test -- 200 Hypervisors, 200 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=no
  ---
  Maximum (NB in msec): 1210
  Average (NB in msec): 673.00
  Maximum (SB in msec): 27
  Average (SB in msec): 22.453125
  Maximum (northd-loop in msec): 6514
  Average (northd-loop in msec): 808.596842

  5: ovn-northd basic scale test -- 500 Hypervisors, 50 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=yes
  ---
  Maximum (NB in msec): 798
  Average (NB in msec): 429.75
  Maximum (SB in msec): 15
  Average (SB in msec): 12.998533
  Maximum (northd-loop in msec): 3835
  Average (northd-loop in msec): 564.875986

  6: ovn-northd basic scale test -- 500 Hypervisors, 50 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=no
  ---
  Maximum (NB in msec): 1074
  Average (NB in msec): 593.875000
  Maximum (SB in msec): 14
  Average (SB in msec): 13.655273
  Maximum (northd-loop in

Re: [ovs-dev] [OVN Patch v8 3/3] northd: Restore parallel build with dp_groups

2021-09-30 Thread Anton Ivanov

On 30/09/2021 07:16, Anton Ivanov wrote:

Results on a Ryzen 5 3600 - 6 cores 12 threads


I will also have a look into the "maximum" measurement for multi-thread.

It does not tie up with the drop in average across the board.

A.



Without


  1: ovn-northd basic scale test -- 200 Hypervisors, 200 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=yes

  ---
  Maximum (NB in msec): 1256
  Average (NB in msec): 679.463785
  Maximum (SB in msec): 25
  Average (SB in msec): 22.489798
  Maximum (northd-loop in msec): 1347
  Average (northd-loop in msec): 799.944878

  2: ovn-northd basic scale test -- 200 Hypervisors, 200 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=no

  ---
  Maximum (NB in msec): 1956
  Average (NB in msec): 809.387285
  Maximum (SB in msec): 24
  Average (SB in msec): 21.649258
  Maximum (northd-loop in msec): 2011
  Average (northd-loop in msec): 961.718686

  5: ovn-northd basic scale test -- 500 Hypervisors, 50 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=yes

  ---
  Maximum (NB in msec): 557
  Average (NB in msec): 474.010337
  Maximum (SB in msec): 15
  Average (SB in msec): 13.927192
  Maximum (northd-loop in msec): 1261
  Average (northd-loop in msec): 580.999122

  6: ovn-northd basic scale test -- 500 Hypervisors, 50 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=no

  ---
  Maximum (NB in msec): 756
  Average (NB in msec): 625.614724
  Maximum (SB in msec): 15
  Average (SB in msec): 14.181048
  Maximum (northd-loop in msec): 1649
  Average (northd-loop in msec): 746.208332


With

  1: ovn-northd basic scale test -- 200 Hypervisors, 200 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=yes

  ---
  Maximum (NB in msec): 1140
  Average (NB in msec): 631.125000
  Maximum (SB in msec): 24
  Average (SB in msec): 21.453609
  Maximum (northd-loop in msec): 6080
  Average (northd-loop in msec): 759.718815

  2: ovn-northd basic scale test -- 200 Hypervisors, 200 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=no

  ---
  Maximum (NB in msec): 1210
  Average (NB in msec): 673.00
  Maximum (SB in msec): 27
  Average (SB in msec): 22.453125
  Maximum (northd-loop in msec): 6514
  Average (northd-loop in msec): 808.596842

  5: ovn-northd basic scale test -- 500 Hypervisors, 50 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=yes

  ---
  Maximum (NB in msec): 798
  Average (NB in msec): 429.75
  Maximum (SB in msec): 15
  Average (SB in msec): 12.998533
  Maximum (northd-loop in msec): 3835
  Average (northd-loop in msec): 564.875986

  6: ovn-northd basic scale test -- 500 Hypervisors, 50 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=no

  ---
  Maximum (NB in msec): 1074
  Average (NB in msec): 593.875000
  Maximum (SB in msec): 14
  Average (SB in msec): 13.655273
  Maximum (northd-loop in msec): 4973
  Average (northd-loop in msec): 771.102605

The only one slower is test 6 which I will look into.

The rest are > 5% faster.

A.

On 30/09/2021 00:56, Han Zhou wrote:



On Wed, Sep 15, 2021 at 5:45 AM <mailto:anton.iva...@cambridgegreys.com>> wrote:

>
> From: Anton Ivanov <mailto:anton.iva...@cambridgegreys.com>>

>
> Restore parallel build with dp groups using rwlock instead
> of per row locking as an underlying mechanism.
>
> This provides improvement ~ 10% end-to-end on ovn-heater
> under virutalization despite awakening some qemu gremlin
> which makes qemu climb to silly CPU usage. The gain on
> bare metal is likely to be higher.
>
Hi Anton,

I am trying to see the benefit of parallel_build, but encountered 
unexpected performance result when running the perf tests with command:

 make check-perf TESTSUITEFLAGS="--rebuild"

It shows significantly worse performance than without parallel_build. 
For dp_group = no cases, it is better, but still ~30% slower than 
without parallel_build. I have 24 cores, but each thread is not 
consuming much CPU except the main thread. I also tried hardcode the 
number of thread to just 4, which end up with slightly better 
results, but still far behind "without parallel_build".


             no parallel                                   | 
parallel  (24 pool threads)                  | parallel with (4 pool 
threads)
                                     |                               
        |
    1: ovn-northd basic scale test -- 200 Hypervisors, 200 |    1: 
ovn-northd basic scale test -- 200 Hypervisors, 200 |    1: 
ovn-northd basic scale test -- 200 Hypervisors, 200

    ---  |    ---    |    ---
    Maximum (NB in msec): 1058 |    Maximum (NB in msec): 4269   |   
 Maximum (NB in msec): 4097
    Average (NB in msec): 836.941167 |    Average (NB in msec): 
3697.253931  |    Average (NB in msec): 3498.311525
    Maximum (SB in msec): 30 |    Maximum (SB in msec): 30   |   
 Maximum (SB in msec): 28
    Average (SB in msec): 25.934011  |    Average (SB in msec): 
26.001840    |    Average (SB in

Re: [ovs-dev] [OVN Patch v8 3/3] northd: Restore parallel build with dp_groups

2021-09-30 Thread Anton Ivanov

Results on a Ryzen 5 3600 - 6 cores 12 threads

Without


  1: ovn-northd basic scale test -- 200 Hypervisors, 200 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=yes

  ---
  Maximum (NB in msec): 1256
  Average (NB in msec): 679.463785
  Maximum (SB in msec): 25
  Average (SB in msec): 22.489798
  Maximum (northd-loop in msec): 1347
  Average (northd-loop in msec): 799.944878

  2: ovn-northd basic scale test -- 200 Hypervisors, 200 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=no

  ---
  Maximum (NB in msec): 1956
  Average (NB in msec): 809.387285
  Maximum (SB in msec): 24
  Average (SB in msec): 21.649258
  Maximum (northd-loop in msec): 2011
  Average (northd-loop in msec): 961.718686

  5: ovn-northd basic scale test -- 500 Hypervisors, 50 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=yes

  ---
  Maximum (NB in msec): 557
  Average (NB in msec): 474.010337
  Maximum (SB in msec): 15
  Average (SB in msec): 13.927192
  Maximum (northd-loop in msec): 1261
  Average (northd-loop in msec): 580.999122

  6: ovn-northd basic scale test -- 500 Hypervisors, 50 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=no

  ---
  Maximum (NB in msec): 756
  Average (NB in msec): 625.614724
  Maximum (SB in msec): 15
  Average (SB in msec): 14.181048
  Maximum (northd-loop in msec): 1649
  Average (northd-loop in msec): 746.208332


With

  1: ovn-northd basic scale test -- 200 Hypervisors, 200 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=yes

  ---
  Maximum (NB in msec): 1140
  Average (NB in msec): 631.125000
  Maximum (SB in msec): 24
  Average (SB in msec): 21.453609
  Maximum (northd-loop in msec): 6080
  Average (northd-loop in msec): 759.718815

  2: ovn-northd basic scale test -- 200 Hypervisors, 200 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=no

  ---
  Maximum (NB in msec): 1210
  Average (NB in msec): 673.00
  Maximum (SB in msec): 27
  Average (SB in msec): 22.453125
  Maximum (northd-loop in msec): 6514
  Average (northd-loop in msec): 808.596842

  5: ovn-northd basic scale test -- 500 Hypervisors, 50 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=yes

  ---
  Maximum (NB in msec): 798
  Average (NB in msec): 429.75
  Maximum (SB in msec): 15
  Average (SB in msec): 12.998533
  Maximum (northd-loop in msec): 3835
  Average (northd-loop in msec): 564.875986

  6: ovn-northd basic scale test -- 500 Hypervisors, 50 Logical 
Ports/Hypervisor -- ovn-northd -- dp-groups=no

  ---
  Maximum (NB in msec): 1074
  Average (NB in msec): 593.875000
  Maximum (SB in msec): 14
  Average (SB in msec): 13.655273
  Maximum (northd-loop in msec): 4973
  Average (northd-loop in msec): 771.102605

The only one slower is test 6 which I will look into.

The rest are > 5% faster.

A.

On 30/09/2021 00:56, Han Zhou wrote:



On Wed, Sep 15, 2021 at 5:45 AM <mailto:anton.iva...@cambridgegreys.com>> wrote:

>
> From: Anton Ivanov <mailto:anton.iva...@cambridgegreys.com>>

>
> Restore parallel build with dp groups using rwlock instead
> of per row locking as an underlying mechanism.
>
> This provides improvement ~ 10% end-to-end on ovn-heater
> under virutalization despite awakening some qemu gremlin
> which makes qemu climb to silly CPU usage. The gain on
> bare metal is likely to be higher.
>
Hi Anton,

I am trying to see the benefit of parallel_build, but encountered 
unexpected performance result when running the perf tests with command:

 make check-perf TESTSUITEFLAGS="--rebuild"

It shows significantly worse performance than without parallel_build. 
For dp_group = no cases, it is better, but still ~30% slower than 
without parallel_build. I have 24 cores, but each thread is not 
consuming much CPU except the main thread. I also tried hardcode the 
number of thread to just 4, which end up with slightly better results, 
but still far behind "without parallel_build".


             no parallel                                   | parallel  
(24 pool threads)                  | parallel with (4 pool threads)

                                   |                                   |
    1: ovn-northd basic scale test -- 200 Hypervisors, 200 |    1: 
ovn-northd basic scale test -- 200 Hypervisors, 200 |    1: ovn-northd 
basic scale test -- 200 Hypervisors, 200
    ---                                                    |    ---   
                                                 |    ---
    Maximum (NB in msec): 1058                             |   
 Maximum (NB in msec): 4269                             |    Maximum 
(NB in msec): 4097
    Average (NB in msec): 836.941167                       |   
 Average (NB in msec): 3697.253931                      |    Average 
(NB in msec): 3498.311525
    Maximum (SB in msec): 30                               |   
 Maximum (SB in msec): 30                               |    Maximum 
(SB in msec): 28
    Average (SB in mse

Re: [ovs-dev] [OVN Patch v8 3/3] northd: Restore parallel build with dp_groups

2021-09-29 Thread Anton Ivanov

I need to have a look.

I use the ovn-heater end-to-end test, that was showing a substantial 
improvement.


What are you running this on?

A.

On 30/09/2021 00:56, Han Zhou wrote:



On Wed, Sep 15, 2021 at 5:45 AM <mailto:anton.iva...@cambridgegreys.com>> wrote:

>
> From: Anton Ivanov <mailto:anton.iva...@cambridgegreys.com>>

>
> Restore parallel build with dp groups using rwlock instead
> of per row locking as an underlying mechanism.
>
> This provides improvement ~ 10% end-to-end on ovn-heater
> under virutalization despite awakening some qemu gremlin
> which makes qemu climb to silly CPU usage. The gain on
> bare metal is likely to be higher.
>
Hi Anton,

I am trying to see the benefit of parallel_build, but encountered 
unexpected performance result when running the perf tests with command:

 make check-perf TESTSUITEFLAGS="--rebuild"

It shows significantly worse performance than without parallel_build. 
For dp_group = no cases, it is better, but still ~30% slower than 
without parallel_build. I have 24 cores, but each thread is not 
consuming much CPU except the main thread. I also tried hardcode the 
number of thread to just 4, which end up with slightly better results, 
but still far behind "without parallel_build".


             no parallel                                   | parallel  
(24 pool threads)                  | parallel with (4 pool threads)

                                   |                                   |
    1: ovn-northd basic scale test -- 200 Hypervisors, 200 |    1: 
ovn-northd basic scale test -- 200 Hypervisors, 200 |    1: ovn-northd 
basic scale test -- 200 Hypervisors, 200
    ---                                                    |    ---   
                                                 |    ---
    Maximum (NB in msec): 1058                             |   
 Maximum (NB in msec): 4269                             |    Maximum 
(NB in msec): 4097
    Average (NB in msec): 836.941167                       |   
 Average (NB in msec): 3697.253931                      |    Average 
(NB in msec): 3498.311525
    Maximum (SB in msec): 30                               |   
 Maximum (SB in msec): 30                               |    Maximum 
(SB in msec): 28
    Average (SB in msec): 25.934011                        |   
 Average (SB in msec): 26.001840                        |    Average 
(SB in msec): 25.685091
    Maximum (northd-loop in msec): 1204                    |   
 Maximum (northd-loop in msec): 4379                    |    Maximum 
(northd-loop in msec): 4251
    Average (northd-loop in msec): 1005.330078             |   
 Average (northd-loop in msec): 4233.871504             |    Average 
(northd-loop in msec): 4022.774208
                                                           |           
                                                |
    2: ovn-northd basic scale test -- 200 Hypervisors, 200 |    2: 
ovn-northd basic scale test -- 200 Hypervisors, 200 |    2: ovn-northd 
basic scale test -- 200 Hypervisors, 200
    ---                                                    |    ---   
                                                 |    ---
    Maximum (NB in msec): 1124                             |   
 Maximum (NB in msec): 1480                             |    Maximum 
(NB in msec): 1331
    Average (NB in msec): 892.403405                       |   
 Average (NB in msec): 1206.189287                      |    Average 
(NB in msec): 1089.378455
    Maximum (SB in msec): 29                               |   
 Maximum (SB in msec): 31                               |    Maximum 
(SB in msec): 30
    Average (SB in msec): 26.922632                        |   
 Average (SB in msec): 26.636706                        |    Average 
(SB in msec): 25.657484
    Maximum (northd-loop in msec): 1275                    |   
 Maximum (northd-loop in msec): 1639                    |    Maximum 
(northd-loop in msec): 1495
    Average (northd-loop in msec): 1074.917873             |   
 Average (northd-loop in msec): 1458.152327             |    Average 
(northd-loop in msec): 1301.057201
                                                           |           
                                                |
    5: ovn-northd basic scale test -- 500 Hypervisors, 50 L|    5: 
ovn-northd basic scale test -- 500 Hypervisors, 50 L|    5: ovn-northd 
basic scale test -- 500 Hypervisors, 50
    ---                                                    |    ---   
                                                 |    ---
    Maximum (NB in msec): 768                              |   
 Maximum (NB in msec): 3086                             |    Maximum 
(NB in msec): 2876
    Average (NB in msec): 614.491938                       |   
 Average (NB in msec): 2681.688365                      |    Average 
(NB in msec): 2531.255444
    Maximum (SB in msec): 18                               |   
 M

[ovs-dev] [PATCH] json: SORT json only if debug is enabled

2021-09-24 Thread anton . ivanov
From: Anton Ivanov 

No point to abuse CPU with sorting (in a worst case scenario -
up to million of rows in OVN) unless a human is reading the result.
The machine on the other side could not care less if it is
sorted or not.

Signed-off-by: Anton Ivanov 
---
 lib/json.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/lib/json.c b/lib/json.c
index 0baf7c622..d4c3d12f7 100644
--- a/lib/json.c
+++ b/lib/json.c
@@ -30,6 +30,9 @@
 #include "unicode.h"
 #include "util.h"
 #include "uuid.h"
+#include "openvswitch/vlog.h"
+
+VLOG_DEFINE_THIS_MODULE(json);
 
 /* The type of a JSON token. */
 enum json_token_type {
@@ -185,7 +188,11 @@ struct json *
 json_serialized_object_create(const struct json *src)
 {
 struct json *json = json_create(JSON_SERIALIZED_OBJECT);
-json->string = json_to_string(src, JSSF_SORT);
+if (VLOG_IS_DBG_ENABLED()) {
+json->string = json_to_string(src, JSSF_SORT);
+} else {
+json->string = json_to_string(src, 0);
+}
 return json;
 }
 
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2] jsonrpc: Sort JSON objects only if debug is on

2021-09-24 Thread anton . ivanov
From: Anton Ivanov 

There is no point to sort JSON objects when nobody is
observing them. Machines do not care if it is sorted or
not.

Signed-off-by: Anton Ivanov 
---
 lib/jsonrpc.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/lib/jsonrpc.c b/lib/jsonrpc.c
index c8ce5362e..18777f4d6 100644
--- a/lib/jsonrpc.c
+++ b/lib/jsonrpc.c
@@ -802,7 +802,15 @@ jsonrpc_msg_to_string(const struct jsonrpc_msg *m)
 {
 struct jsonrpc_msg *copy = jsonrpc_msg_clone(m);
 struct json *json = jsonrpc_msg_to_json(copy);
-char *s = json_to_string(json, JSSF_SORT);
+char *s;
+if (VLOG_IS_DBG_ENABLED()) {
+/* We need json sorted only if a human is looking
+ * at it. No point to sort it if debug is not on.
+ */
+s = json_to_string(json, JSSF_SORT);
+} else {
+s = json_to_string(json, 0);
+}
 json_destroy(json);
 return s;
 }
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [OVN Patch v4 1/2] Make changes to the parallel processing API to allow pool sizing

2021-09-21 Thread anton . ivanov
From: Anton Ivanov 

1. Make pool size user defineable.
2. Expose pool destruction.
3. Make pools resizeable at runtime.
4. Split pool start and completion to allow background execution.
5. Add a simplified API for SAFE walking single hash.

Signed-off-by: Anton Ivanov 
---
 lib/ovn-parallel-hmap.c | 290 +++-
 lib/ovn-parallel-hmap.h |  77 ++-
 northd/northd.c |  72 +++---
 3 files changed, 321 insertions(+), 118 deletions(-)

diff --git a/lib/ovn-parallel-hmap.c b/lib/ovn-parallel-hmap.c
index b8c7ac786..1b3883441 100644
--- a/lib/ovn-parallel-hmap.c
+++ b/lib/ovn-parallel-hmap.c
@@ -51,7 +51,6 @@ static bool can_parallelize = false;
  * accompanied by a fence. It does not need to be atomic or be
  * accessed under a lock.
  */
-static bool workers_must_exit = false;
 
 static struct ovs_list worker_pools = OVS_LIST_INITIALIZER(_pools);
 
@@ -70,10 +69,27 @@ static void merge_hash_results(struct worker_pool *pool 
OVS_UNUSED,
void *fin_result, void *result_frags,
int index);
 
+
+static bool init_control(struct worker_control *control, int id,
+ struct worker_pool *pool);
+
+static void cleanup_control(struct worker_pool *pool, int id);
+
+static void free_controls(struct worker_pool *pool);
+
+static struct worker_control *alloc_controls(int size);
+
+static void *standard_helper_thread(void *arg);
+
+struct worker_pool *ovn_add_standard_pool(int size)
+{
+return add_worker_pool(standard_helper_thread, size);
+}
+
 bool
-ovn_stop_parallel_processing(void)
+ovn_stop_parallel_processing(struct worker_pool *pool)
 {
-return workers_must_exit;
+return pool->workers_must_exit;
 }
 
 bool
@@ -92,11 +108,67 @@ ovn_can_parallelize_hashes(bool force_parallel)
 return can_parallelize;
 }
 
+
+void
+destroy_pool(struct worker_pool *pool) {
+char sem_name[256];
+
+free_controls(pool);
+sem_close(pool->done);
+sprintf(sem_name, MAIN_SEM_NAME, sembase, pool);
+sem_unlink(sem_name);
+free(pool);
+}
+
+bool
+ovn_resize_pool(struct worker_pool *pool, int size)
+{
+int i;
+
+ovs_assert(pool != NULL);
+
+if (!size) {
+size = pool_size;
+}
+
+ovs_mutex_lock(_mutex);
+
+if (can_parallelize) {
+free_controls(pool);
+pool->size = size;
+
+/* Allocate new control structures. */
+
+pool->controls = alloc_controls(size);
+pool->workers_must_exit = false;
+
+for (i = 0; i < pool->size; i++) {
+if (! init_control(>controls[i], i, pool)) {
+goto cleanup;
+}
+}
+}
+ovs_mutex_unlock(_mutex);
+return true;
+cleanup:
+
+/* Something went wrong when opening semaphores. In this case
+ * it is better to shut off parallel procesing altogether
+ */
+
+VLOG_INFO("Failed to initialize parallel processing, error %d", errno);
+can_parallelize = false;
+free_controls(pool);
+
+ovs_mutex_unlock(_mutex);
+return false;
+}
+
+
 struct worker_pool *
-ovn_add_worker_pool(void *(*start)(void *))
+ovn_add_worker_pool(void *(*start)(void *), int size)
 {
 struct worker_pool *new_pool = NULL;
-struct worker_control *new_control;
 bool test = false;
 int i;
 char sem_name[256];
@@ -113,38 +185,29 @@ ovn_add_worker_pool(void *(*start)(void *))
 ovs_mutex_unlock(_mutex);
 }
 
+if (!size) {
+size = pool_size;
+}
+
 ovs_mutex_lock(_mutex);
 if (can_parallelize) {
 new_pool = xmalloc(sizeof(struct worker_pool));
-new_pool->size = pool_size;
-new_pool->controls = NULL;
+new_pool->size = size;
+new_pool->start = start;
 sprintf(sem_name, MAIN_SEM_NAME, sembase, new_pool);
 new_pool->done = sem_open(sem_name, O_CREAT, S_IRWXU, 0);
 if (new_pool->done == SEM_FAILED) {
 goto cleanup;
 }
 
-new_pool->controls =
-xmalloc(sizeof(struct worker_control) * new_pool->size);
+new_pool->controls = alloc_controls(size);
+new_pool->workers_must_exit = false;
 
 for (i = 0; i < new_pool->size; i++) {
-new_control = _pool->controls[i];
-new_control->id = i;
-new_control->done = new_pool->done;
-new_control->data = NULL;
-ovs_mutex_init(_control->mutex);
-new_control->finished = ATOMIC_VAR_INIT(false);
-sprintf(sem_name, WORKER_SEM_NAME, sembase, new_pool, i);
-new_control->fire = sem_open(sem_name, O_CREAT, S_IRWXU, 0);
-if (new_control->fire == SEM_FAILED) {
+if (!init_control(_pool->controls[i], i, new_pool)) {
 goto cleanup;
 }
 }
-
-for (i = 0; i < pool_size; i++) {
-  

[ovs-dev] [OVN Patch v4 2/2] Add support for configuring parallelization via unixctl

2021-09-21 Thread anton . ivanov
From: Anton Ivanov 

libs: add configuration support to parallel-hmap.[c,h]
northd: add support for configuring parallelization to northd

Signed-off-by: Anton Ivanov 
---
 lib/ovn-parallel-hmap.c | 185 ++--
 lib/ovn-parallel-hmap.h |  63 +-
 northd/northd.c |  30 +++
 northd/northd.h |   2 -
 northd/ovn-northd.c |   5 +-
 tests/ovn-macros.at |  16 +++-
 6 files changed, 263 insertions(+), 38 deletions(-)

diff --git a/lib/ovn-parallel-hmap.c b/lib/ovn-parallel-hmap.c
index 1b3883441..6a6488a17 100644
--- a/lib/ovn-parallel-hmap.c
+++ b/lib/ovn-parallel-hmap.c
@@ -33,6 +33,7 @@
 #include "ovs-thread.h"
 #include "ovs-numa.h"
 #include "random.h"
+#include "unixctl.h"
 
 VLOG_DEFINE_THIS_MODULE(ovn_parallel_hmap);
 
@@ -46,6 +47,7 @@ VLOG_DEFINE_THIS_MODULE(ovn_parallel_hmap);
  */
 static atomic_bool initial_pool_setup = ATOMIC_VAR_INIT(false);
 static bool can_parallelize = false;
+static bool should_parallelize = false;
 
 /* This is set only in the process of exit and the set is
  * accompanied by a fence. It does not need to be atomic or be
@@ -83,7 +85,7 @@ static void *standard_helper_thread(void *arg);
 
 struct worker_pool *ovn_add_standard_pool(int size)
 {
-return add_worker_pool(standard_helper_thread, size);
+return add_worker_pool(standard_helper_thread, size, "default", true);
 }
 
 bool
@@ -92,6 +94,19 @@ ovn_stop_parallel_processing(struct worker_pool *pool)
 return pool->workers_must_exit;
 }
 
+bool
+ovn_set_parallel_processing(bool enable)
+{
+should_parallelize = enable;
+return can_parallelize;
+}
+
+bool
+ovn_get_parallel_processing(void)
+{
+return can_parallelize && should_parallelize;
+}
+
 bool
 ovn_can_parallelize_hashes(bool force_parallel)
 {
@@ -117,6 +132,7 @@ destroy_pool(struct worker_pool *pool) {
 sem_close(pool->done);
 sprintf(sem_name, MAIN_SEM_NAME, sembase, pool);
 sem_unlink(sem_name);
+free(pool->name);
 free(pool);
 }
 
@@ -127,6 +143,10 @@ ovn_resize_pool(struct worker_pool *pool, int size)
 
 ovs_assert(pool != NULL);
 
+if (!pool->is_mutable) {
+return false;
+}
+
 if (!size) {
 size = pool_size;
 }
@@ -166,7 +186,8 @@ cleanup:
 
 
 struct worker_pool *
-ovn_add_worker_pool(void *(*start)(void *), int size)
+ovn_add_worker_pool(void *(*start)(void *), int size, char *name,
+bool is_mutable)
 {
 struct worker_pool *new_pool = NULL;
 bool test = false;
@@ -194,6 +215,8 @@ ovn_add_worker_pool(void *(*start)(void *), int size)
 new_pool = xmalloc(sizeof(struct worker_pool));
 new_pool->size = size;
 new_pool->start = start;
+new_pool->is_mutable = is_mutable;
+new_pool->name = xstrdup(name);
 sprintf(sem_name, MAIN_SEM_NAME, sembase, new_pool);
 new_pool->done = sem_open(sem_name, O_CREAT, S_IRWXU, 0);
 if (new_pool->done == SEM_FAILED) {
@@ -226,6 +249,7 @@ cleanup:
 sprintf(sem_name, MAIN_SEM_NAME, sembase, new_pool);
 sem_unlink(sem_name);
 }
+free(new_pool->name);
 ovs_mutex_unlock(_mutex);
 return NULL;
 }
@@ -342,8 +366,7 @@ ovn_complete_pool_callback(struct worker_pool *pool,
 }
 } while (completed < pool->size);
 }
-
-/* Complete a thread pool which uses a callback function to process results
+/* Run a thread pool which uses a callback function to process results
  */
 void
 ovn_run_pool_callback(struct worker_pool *pool,
@@ -352,8 +375,8 @@ ovn_run_pool_callback(struct worker_pool *pool,
   void *fin_result,
   void *result_frags, int index))
 {
-ovn_start_pool(pool);
-ovn_complete_pool_callback(pool, fin_result, result_frags, helper_func);
+start_pool(pool);
+complete_pool_callback(pool, fin_result, result_frags, helper_func);
 }
 
 /* Run a thread pool - basic, does not do results processing.
@@ -401,6 +424,28 @@ ovn_fast_hmap_merge(struct hmap *dest, struct hmap *inc)
 inc->n = 0;
 }
 
+/* Run a thread pool which gathers results in an array
+ * of hashes. Merge results.
+ */
+void
+ovn_complete_pool_hash(struct worker_pool *pool,
+  struct hmap *result,
+  struct hmap *result_frags)
+{
+complete_pool_callback(pool, result, result_frags, merge_hash_results);
+}
+
+/* Run a thread pool which gathers results in an array of lists.
+ * Merge results.
+ */
+void
+ovn_complete_pool_list(struct worker_pool *pool,
+  struct ovs_list *result,
+  struct ovs_list *result_frags)
+{
+complete_pool_callback(pool, result, result_frags, merge_list_results);
+}
+
 /* Run a thread pool which gathers results in an array
  * of hashes. Merge results.
  */
@@ -514,7 +559,7 @@ static struct worker_control *alloc_contro

Re: [ovs-dev] [OVN Patch v8 1/3] northd: Disable parallel processing for logical_dp_groups

2021-09-18 Thread Anton Ivanov

On 17/09/2021 21:44, Mark Michelson wrote:
Based on the successful GHA run I had in my fork, I fixed the 
submodule downgrade (and fixed a couple of grammar errors in 
comments). I've pushed this to master.


Thanks,

I will rebase the thread API improvements on top of that.

A.



On 9/17/21 3:49 PM, Mark Michelson wrote:

On 9/17/21 1:38 PM, Ilya Maximets wrote:

On 9/15/21 14:43, anton.iva...@cambridgegreys.com wrote:

From: Anton Ivanov 

Work on improving processing with dp_groups enabled has
discovered that the locking mechanism presently in use
is not reliable. Disabling parallel processing if dp_groups
are enabled until the root cause is determined and fixed.

Signed-off-by: Anton Ivanov 
---
  northd/ovn-northd.c | 2 +-
  ovs | 2 +-
  2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index baaddb73e..3113fafc7 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -12974,7 +12974,7 @@ build_lswitch_and_lrouter_flows(struct hmap 
*datapaths, struct hmap *ports,

  }
  }
-    if (use_parallel_build) {
+    if (use_parallel_build && (!use_logical_dp_groups)) {
  struct hmap *lflow_segs;
  struct lswitch_flow_build_info *lsiv;
  int index;
diff --git a/ovs b/ovs
index 748010ff3..50e5523b9 16
--- a/ovs
+++ b/ovs
@@ -1 +1 @@
-Subproject commit 748010ff304b7cd2c43f4eb98a554433f0df07f9
+Subproject commit 50e5523b9b2b154e5fafc5acdcdec85e9cc5a330



This change breaks the build due to submodule downgrade.

Best regards, Ilya Maximets.



Thanks for the note, Ilya. I did a manual rebase (which was 
non-trivial due to the northd split) and pushed to my fork. Seems to 
be OK with the submodule downgrade fixed: 
https://github.com/putnopvut/ovn/runs/3635759940?check_suite_focus=true





--
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [OVN Patch v8 1/3] northd: Disable parallel processing for logical_dp_groups

2021-09-17 Thread Anton Ivanov

On 17/09/2021 18:23, Mark Michelson wrote:

Hi Anton,

For the series:

Acked-by: Mark Michelson 

Thanks.


On 9/15/21 8:43 AM, anton.iva...@cambridgegreys.com wrote:

From: Anton Ivanov 

Work on improving processing with dp_groups enabled has
discovered that the locking mechanism presently in use
is not reliable. Disabling parallel processing if dp_groups
are enabled until the root cause is determined and fixed.

Signed-off-by: Anton Ivanov 
---
  northd/ovn-northd.c | 2 +-
  ovs | 2 +-
  2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index baaddb73e..3113fafc7 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -12974,7 +12974,7 @@ build_lswitch_and_lrouter_flows(struct hmap 
*datapaths, struct hmap *ports,

  }
  }
  -    if (use_parallel_build) {
+    if (use_parallel_build && (!use_logical_dp_groups)) {
  struct hmap *lflow_segs;
  struct lswitch_flow_build_info *lsiv;
  int index;
diff --git a/ovs b/ovs
index 748010ff3..50e5523b9 16


As noted by Ilia I had a submodule change from ovs filter through, so I 
will resend the series.



--- a/ovs
+++ b/ovs
@@ -1 +1 @@
-Subproject commit 748010ff304b7cd2c43f4eb98a554433f0df07f9
+Subproject commit 50e5523b9b2b154e5fafc5acdcdec85e9cc5a330






--
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [OVN Patch] northd: Optimize dp/lflow postprocessing

2021-09-15 Thread anton . ivanov
From: Anton Ivanov 

1. Compute dp group hash only if there will be dp group processing.
2. Remove hmapx interim storage and related hmapx computation for
single dp flows and replace it with a pre-sized hmap.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 55 -
 ovs |  2 +-
 2 files changed, 36 insertions(+), 21 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index baaddb73e..9edd1e0e4 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -13178,6 +13178,11 @@ build_lflows(struct northd_context *ctx, struct hmap 
*datapaths,
 igmp_groups, meter_groups, lbs,
 bfd_connections);
 
+/* Parallel build may result in a suboptimal hash. Resize the
+ * hash to a correct size before doing lookups */
+
+hmap_expand();
+
 if (hmap_count() > max_seen_lflow_size) {
 max_seen_lflow_size = hmap_count();
 }
@@ -13185,10 +13190,22 @@ build_lflows(struct northd_context *ctx, struct hmap 
*datapaths,
 stopwatch_start(LFLOWS_DP_GROUPS_STOPWATCH_NAME, time_msec());
 /* Collecting all unique datapath groups. */
 struct hmap dp_groups = HMAP_INITIALIZER(_groups);
-struct hmapx single_dp_lflows = HMAPX_INITIALIZER(_dp_lflows);
-struct ovn_lflow *lflow;
-HMAP_FOR_EACH (lflow, hmap_node, ) {
-uint32_t hash = hash_int(hmapx_count(>od_group), 0);
+struct hmap single_dp_lflows;
+
+/* Single dp_flows will never grow bigger than lflows,
+ * thus the two hmaps will remain the same size regardless
+ * of how many elements we remove from lflows and add to
+ * single_dp_lflows.
+ * Note - lflows is always sized for max_seen_lflow_size.
+ * If this iteration has resulted in a smaller lflow count,
+ * the correct sizing is from the previous ones.
+ */
+fast_hmap_size_for(_dp_lflows, max_seen_lflow_size);
+
+struct ovn_lflow *lflow, *next_lflow;
+struct hmapx_node *node;
+HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, ) {
+uint32_t hash;
 struct ovn_dp_group *dpg;
 
 ovs_assert(hmapx_count(>od_group));
@@ -13196,17 +13213,24 @@ build_lflows(struct northd_context *ctx, struct hmap 
*datapaths,
 if (hmapx_count(>od_group) == 1) {
 /* There is only one datapath, so it should be moved out of the
  * group to a single 'od'. */
-const struct hmapx_node *node;
 HMAPX_FOR_EACH (node, >od_group) {
 lflow->od = node->data;
 break;
 }
 hmapx_clear(>od_group);
+
 /* Logical flow should be re-hashed later to allow lookups. */
-hmapx_add(_dp_lflows, lflow);
+hash = hmap_node_hash(>hmap_node);
+/* Remove from lflows. */
+hmap_remove(, >hmap_node);
+hash = ovn_logical_flow_hash_datapath(>od->sb->header_.uuid,
+  hash);
+/* Add to single_dp_lflows. */
+hmap_insert_fast(_dp_lflows, >hmap_node, hash);
 continue;
 }
 
+hash = hash_int(hmapx_count(>od_group), 0);
 dpg = ovn_dp_group_find(_groups, >od_group, hash);
 if (!dpg) {
 dpg = xzalloc(sizeof *dpg);
@@ -13216,19 +13240,11 @@ build_lflows(struct northd_context *ctx, struct hmap 
*datapaths,
 lflow->dpg = dpg;
 }
 
-/* Adding datapath to the flow hash for logical flows that have only one,
- * so they could be found by the southbound db record. */
-const struct hmapx_node *node;
-uint32_t hash;
-HMAPX_FOR_EACH (node, _dp_lflows) {
-lflow = node->data;
-hash = hmap_node_hash(>hmap_node);
-hmap_remove(, >hmap_node);
-hash = ovn_logical_flow_hash_datapath(>od->sb->header_.uuid,
-  hash);
-hmap_insert(, >hmap_node, hash);
-}
-hmapx_destroy(_dp_lflows);
+/* Merge multiple and single dp hashes. */
+
+fast_hmap_merge(, _dp_lflows);
+
+hmap_destroy(_dp_lflows);
 
 /* Push changes to the Logical_Flow table to database. */
 const struct sbrec_logical_flow *sbflow, *next_sbflow;
@@ -13361,7 +13377,6 @@ build_lflows(struct northd_context *ctx, struct hmap 
*datapaths,
 }
 
 stopwatch_stop(LFLOWS_DP_GROUPS_STOPWATCH_NAME, time_msec());
-struct ovn_lflow *next_lflow;
 HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, ) {
 const char *pipeline = ovn_stage_get_pipeline_name(lflow->stage);
 uint8_t table = ovn_stage_get_table(lflow->stage);
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [OVN Patch v6 4/4] northd: Restore parallel build with dp_groups

2021-09-15 Thread Anton Ivanov


On 15/09/2021 13:45, Anton Ivanov wrote:

I have dropped the split processing of lflows for now.


Sorry, pressed send to early by mistake.

V8 has the same code as the existing ovn for single thread and code derived 
from the my split processing work for multi-thread.

It is tested to be identical to master in single thread and significantly 
faster (albeit at CPU cost) than single thread if you have at least

8+ threads to work on it.

A.



On 10/09/2021 17:13, Ilya Maximets wrote:

On 9/10/21 4:19 PM, Anton Ivanov wrote:

On 10/09/2021 13:29, Ilya Maximets wrote:

On 9/10/21 2:23 PM, Anton Ivanov wrote:

On 10/09/2021 13:18, Anton Ivanov wrote:

On 10/09/2021 12:43, Ilya Maximets wrote:

On 9/9/21 11:02 PM, Mark Michelson wrote:

Hi Anton,

On a high level, this change results in some parts of the parallelized hashmap 
being unused. Should we keep the hashrow_locks structure and its APIs, for 
instance?

See below for more comments.

On 9/2/21 9:27 AM, anton.iva...@cambridgegreys.com wrote:

From: Anton Ivanov 

Restore parallel build with dp groups using rwlock instead
of per row locking as an underlying mechanism.

This provides improvement ~ 10% end-to-end on ovn-heater
under virutalization despite awakening some qemu gremlin
which makes qemu climb to silly CPU usage. The gain on
bare metal is likely to be higher.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 215 
 1 file changed, 159 insertions(+), 56 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 1f903882b..4537c55dd 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -59,6 +59,7 @@
 #include "unixctl.h"
 #include "util.h"
 #include "uuid.h"
+#include "ovs-thread.h"
 #include "openvswitch/vlog.h"
   VLOG_DEFINE_THIS_MODULE(ovn_northd);
@@ -4372,7 +4373,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct 
ovn_datapath *od,
 static bool use_logical_dp_groups = false;
 static bool use_parallel_build = true;
 -static struct hashrow_locks lflow_locks;
+static struct ovs_rwlock flowtable_lock;

With the change from the custom hashrow_locks to using an ovs_rwlock, I think 
it's important to document what data this lock is intending to protect.

   From what I can tell, this lock specifically is intended to protect access 
to the hmaps in the global lflow_state structure, and it's also intended to 
protect all ovn_datapaths' od_group hmaps. This is not something that is 
immediately obvious just from a global rwlock declaration.


+
+static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
+  struct ovn_datapath *od,
+  struct lflow_state *lflow_map,
+  uint32_t hash)
+{
+    hmapx_add(_lflow->od_group, od);
+    hmap_remove(_map->single_od, _lflow->hmap_node);
+    if (use_parallel_build) {
+ hmap_insert_fast(_map->multiple_od, _lflow->hmap_node, hash);
+    } else {
+ hmap_insert(_map->multiple_od, _lflow->hmap_node, hash);
+    }
+}
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wthread-safety"
+#endif

The section above needs a comment to explain why -Wthread-safety is ignored.

Please, use OVS_NO_THREAD_SAFETY_ANALYSIS marker instead of manually disabling
the diagnostics.


On a side note, I ran some tests with this patch set applied and it drops
performance of non-parallel case by 20% on my laptop with databases from
the ovn-heater's 120 node density test.

I see, you mentioned 10% improvement in the commit message. Assuming it's
with parallelization enabled, right?

Yes. And no difference on heater tests up to 36 fake nodes with 7200 ports.


In any case, some performance testing without parallelization required before
accepting the change, as it's a default configuration.

They were done. I am somewhat surprised by your measurement, can you please 
provide some more details.

I have databases from a previous 120 node ovn-heater run.
I'm loading them to the OVN sandbox like this:

make sandbox SANDBOXFLAGS="--nbdb-source=/tmp/ovnnb_db.db 
--sbdb-source=/tmp/ovnsb_db.db"

And performing a couple of small operations with northbound database just to
trigger the northd processing loop.

Configuration has dp-groups enabled and parallelization disabled.

Can you run v7 versus that. It should fix 3 performance regressions I found in 
v6. Each of them is relatively small, but they may add up for a large database.

v7 seems only slightly better.  By a couple of percents.
Nowhere close to the current master.

And stopwatches in v7 are still messed up.


I will run an ovn-heater with this, but to be honest - I do not expect any 
difference. I did not see any difference before at the scales I was testing.

If it is still slower than the original code, it will be interesting to find 
the culprit. They 

Re: [ovs-dev] [OVN Patch v6 4/4] northd: Restore parallel build with dp_groups

2021-09-15 Thread Anton Ivanov

I have dropped the split processing of lflows for now.

On 10/09/2021 17:13, Ilya Maximets wrote:

On 9/10/21 4:19 PM, Anton Ivanov wrote:

On 10/09/2021 13:29, Ilya Maximets wrote:

On 9/10/21 2:23 PM, Anton Ivanov wrote:

On 10/09/2021 13:18, Anton Ivanov wrote:

On 10/09/2021 12:43, Ilya Maximets wrote:

On 9/9/21 11:02 PM, Mark Michelson wrote:

Hi Anton,

On a high level, this change results in some parts of the parallelized hashmap 
being unused. Should we keep the hashrow_locks structure and its APIs, for 
instance?

See below for more comments.

On 9/2/21 9:27 AM, anton.iva...@cambridgegreys.com wrote:

From: Anton Ivanov 

Restore parallel build with dp groups using rwlock instead
of per row locking as an underlying mechanism.

This provides improvement ~ 10% end-to-end on ovn-heater
under virutalization despite awakening some qemu gremlin
which makes qemu climb to silly CPU usage. The gain on
bare metal is likely to be higher.

Signed-off-by: Anton Ivanov 
---
     northd/ovn-northd.c | 215 
     1 file changed, 159 insertions(+), 56 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 1f903882b..4537c55dd 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -59,6 +59,7 @@
     #include "unixctl.h"
     #include "util.h"
     #include "uuid.h"
+#include "ovs-thread.h"
     #include "openvswitch/vlog.h"
       VLOG_DEFINE_THIS_MODULE(ovn_northd);
@@ -4372,7 +4373,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct 
ovn_datapath *od,
     static bool use_logical_dp_groups = false;
     static bool use_parallel_build = true;
     -static struct hashrow_locks lflow_locks;
+static struct ovs_rwlock flowtable_lock;

With the change from the custom hashrow_locks to using an ovs_rwlock, I think 
it's important to document what data this lock is intending to protect.

   From what I can tell, this lock specifically is intended to protect access 
to the hmaps in the global lflow_state structure, and it's also intended to 
protect all ovn_datapaths' od_group hmaps. This is not something that is 
immediately obvious just from a global rwlock declaration.


+
+static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
+  struct ovn_datapath *od,
+  struct lflow_state *lflow_map,
+  uint32_t hash)
+{
+    hmapx_add(_lflow->od_group, od);
+    hmap_remove(_map->single_od, _lflow->hmap_node);
+    if (use_parallel_build) {
+    hmap_insert_fast(_map->multiple_od, _lflow->hmap_node, hash);
+    } else {
+    hmap_insert(_map->multiple_od, _lflow->hmap_node, hash);
+    }
+}
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wthread-safety"
+#endif

The section above needs a comment to explain why -Wthread-safety is ignored.

Please, use OVS_NO_THREAD_SAFETY_ANALYSIS marker instead of manually disabling
the diagnostics.


On a side note, I ran some tests with this patch set applied and it drops
performance of non-parallel case by 20% on my laptop with databases from
the ovn-heater's 120 node density test.

I see, you mentioned 10% improvement in the commit message. Assuming it's
with parallelization enabled, right?

Yes. And no difference on heater tests up to 36 fake nodes with 7200 ports.


In any case, some performance testing without parallelization required before
accepting the change, as it's a default configuration.

They were done. I am somewhat surprised by your measurement, can you please 
provide some more details.

I have databases from a previous 120 node ovn-heater run.
I'm loading them to the OVN sandbox like this:

make sandbox SANDBOXFLAGS="--nbdb-source=/tmp/ovnnb_db.db 
--sbdb-source=/tmp/ovnsb_db.db"

And performing a couple of small operations with northbound database just to
trigger the northd processing loop.

Configuration has dp-groups enabled and parallelization disabled.

Can you run v7 versus that. It should fix 3 performance regressions I found in 
v6. Each of them is relatively small, but they may add up for a large database.

v7 seems only slightly better.  By a couple of percents.
Nowhere close to the current master.

And stopwatches in v7 are still messed up.


I will run an ovn-heater with this, but to be honest - I do not expect any 
difference. I did not see any difference before at the scales I was testing.

If it is still slower than the original code, it will be interesting to find 
the culprit. They should be nearly identical.

A.


You are saying "database from 120 nodes" - are we talking a stopwatch 
measurement here?

Which is the case. The patch moved it where it should not be. Will be fixed in 
the next revision.

Not only stopwatches.  Poll intervals in general grew too.


A.


Best regards, Ilya Maximets.




--
Anton R. Ivanov

[ovs-dev] [OVN Patch v8 3/3] northd: Restore parallel build with dp_groups

2021-09-15 Thread anton . ivanov
From: Anton Ivanov 

Restore parallel build with dp groups using rwlock instead
of per row locking as an underlying mechanism.

This provides improvement ~ 10% end-to-end on ovn-heater
under virutalization despite awakening some qemu gremlin
which makes qemu climb to silly CPU usage. The gain on
bare metal is likely to be higher.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 150 +---
 1 file changed, 127 insertions(+), 23 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index ed231510e..34e6ad1a9 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -59,6 +59,7 @@
 #include "unixctl.h"
 #include "util.h"
 #include "uuid.h"
+#include "ovs-thread.h"
 #include "openvswitch/vlog.h"
 
 VLOG_DEFINE_THIS_MODULE(ovn_northd);
@@ -4294,6 +4295,7 @@ struct ovn_lflow {
 struct hmap_node hmap_node;
 
 struct ovn_datapath *od; /* 'logical_datapath' in SB schema.  */
+struct ovs_mutex odg_lock;   /* Lock guarding access to od_group */
 struct hmapx od_group;   /* Hash map of 'struct ovn_datapath *'. */
 enum ovn_stage stage;
 uint16_t priority;
@@ -4335,6 +4337,11 @@ ovn_lflow_equal(const struct ovn_lflow *a, const struct 
ovn_datapath *od,
 && !strcmp(a->actions, actions)
 && nullable_string_is_equal(a->ctrl_meter, ctrl_meter));
 }
+/* If this option is 'true' northd will combine logical flows that differ by
+ * logical datapath only by creating a datapath group. */
+static bool use_logical_dp_groups = false;
+static bool use_parallel_build = true;
+
 
 static void
 ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
@@ -4353,24 +4360,56 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct 
ovn_datapath *od,
 lflow->ctrl_meter = ctrl_meter;
 lflow->dpg = NULL;
 lflow->where = where;
+if (use_parallel_build && use_logical_dp_groups) {
+ovs_mutex_init(>odg_lock);
+}
 }
 
-/* If this option is 'true' northd will combine logical flows that differ by
- * logical datapath only by creating a datapath group. */
-static bool use_logical_dp_groups = false;
-static bool use_parallel_build = true;
+ /* Adds a row with the specified contents to the Logical_Flow table.
+  * Version to use with dp_groups + parallel - when locking is required.
+ *
+ * Assumptions:
+ *
+ * 1. A large proportion of the operations are lookups (reads).
+ * 2. RW operations are a small proportion of overall adds.
+ * 3. Most RW ops are not flow adds, but changes to the
+ * od groups.
+ *
+ * Principles of operation:
+ * 1. All accesses to the flow table are protected by a rwlock.
+ * 2. By default, everyone grabs a rd lock so that multiple threads
+ * can do lookups simultaneously.
+ * 3. If a change to the lflow is needed, the rd lock is released and
+ * a wr lock is acquired instead (the fact that POSIX does not have an
+ * "upgrade" on locks is a major pain, but there is nothing we can do
+ * - it's not available).
+ * 4. WR lock operations in rd/wr locking have LOWER priority than RD.
+ * That is by design and spec. So the code after a request for WR lock
+ * may wait for a considerable amount of time until it is given a
+ * change to run. That means that another thread may get there in the
+ * meantime and change the data. Hence all wr operations MUST be coded
+ * to ensure that they are not vulnerable to "someone pulled this from
+ * under my feet". Re- reads, checks for presense, etc.
+ * 5. Operations on the actual od_group hash map are protected by
+ * per-flow locks. There is no need for these to be rd, mutex is more
+ * appropriate. They are low contention as each protects only its flow
+ * and only during modification which happen while holding a rd lock on
+ * the flow table.
+ */
 
-static struct hashrow_locks lflow_locks;
+static struct ovs_rwlock flowtable_lock;
 
 /* Adds a row with the specified contents to the Logical_Flow table.
- * Version to use when locking is required.
+ * Version to use when locking is NOT required.
  */
+
 static struct ovn_lflow *
 do_ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od,
  uint32_t hash, enum ovn_stage stage, uint16_t priority,
  const char *match, const char *actions, const char *io_port,
  const struct ovsdb_idl_row *stage_hint,
  const char *where, const char *ctrl_meter)
+ OVS_NO_THREAD_SAFETY_ANALYSIS
 {
 
 struct ovn_lflow *old_lflow;
@@ -4403,6 +4442,59 @@ do_ovn_lflow_add(struct hmap *lflow_map, struct 
ovn_datapath *od,
 return lflow;
 }
 
+/* Adds a row with the specified contents to the Logical_Flow table.
+ * Version to use when locking is IS required.
+ */
+
+static struct ovn_lflow *
+do_ovn_lflow_add_pd(struct hmap *lflow_map, struct ovn_datapath *od,
+uint32_t hash, enum ovn_stage s

[ovs-dev] [OVN Patch v8 2/3] northd: Resize the hash to correct parameters after build

2021-09-15 Thread anton . ivanov
From: Anton Ivanov 

Parallel builds may result in suboptimal hash bucket sizing.
In the absense of dp-groups this does not matter as the hash
is purely storage and not used for lookups during the build.

Such a hash needs to be resized to a correct size at the end
of the build to ensure that any lookups during the lflow
reconcilliation phase are done as fast as possible.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 3113fafc7..ed231510e 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -13178,6 +13178,11 @@ build_lflows(struct northd_context *ctx, struct hmap 
*datapaths,
 igmp_groups, meter_groups, lbs,
 bfd_connections);
 
+/* Parallel build may result in a suboptimal hash. Resize the
+ * hash to a correct size before doing lookups */
+
+hmap_expand();
+
 if (hmap_count() > max_seen_lflow_size) {
 max_seen_lflow_size = hmap_count();
 }
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [OVN Patch v8 1/3] northd: Disable parallel processing for logical_dp_groups

2021-09-15 Thread anton . ivanov
From: Anton Ivanov 

Work on improving processing with dp_groups enabled has
discovered that the locking mechanism presently in use
is not reliable. Disabling parallel processing if dp_groups
are enabled until the root cause is determined and fixed.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 2 +-
 ovs | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index baaddb73e..3113fafc7 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -12974,7 +12974,7 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, 
struct hmap *ports,
 }
 }
 
-if (use_parallel_build) {
+if (use_parallel_build && (!use_logical_dp_groups)) {
 struct hmap *lflow_segs;
 struct lswitch_flow_build_info *lsiv;
 int index;
diff --git a/ovs b/ovs
index 748010ff3..50e5523b9 16
--- a/ovs
+++ b/ovs
@@ -1 +1 @@
-Subproject commit 748010ff304b7cd2c43f4eb98a554433f0df07f9
+Subproject commit 50e5523b9b2b154e5fafc5acdcdec85e9cc5a330
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [OVN Patch v6 4/4] northd: Restore parallel build with dp_groups

2021-09-13 Thread Anton Ivanov


On 10/09/2021 16:37, Dumitru Ceara wrote:

On 9/10/21 2:18 PM, Anton Ivanov wrote:

On a side note, I ran some tests with this patch set applied and it drops
performance of non-parallel case by 20% on my laptop with databases from
the ovn-heater's 120 node density test.

I see, you mentioned 10% improvement in the commit message.  Assuming
it's
with parallelization enabled, right?

Yes. And no difference on heater tests up to 36 fake nodes with 7200 ports.

Just joining the conversation to mention that ovn-heater was
significantly changed/improved and its tests are way different compared
to a few months ago.

If you're not running the new version of ovn-heater (that doesn't rely
on rally-ovs/ovn-scale-test) it's available here:


Thanks, I updated and it is much better than the old one.

I scaled down the config for 20 heavy to 11 match my rig (I try to run one node 
- one worker).

Brgds,



https://github.com/dceara/ovn-heater

And the test Ilya referred to is:

https://github.com/dceara/ovn-heater/blob/master/test-scenarios/ocp-120-density-heavy.yml

To run it (just replace 20 with 120):
https://github.com/dceara/ovn-heater#example-run-20-nodes-density-heavy

We run these tests weekly on a setup with 30 physical servers on which
we provision 120 "fake" nodes:

https://github.com/dceara/ovn-heater/blob/master/test-scenarios/ocp-120-density-light.yml
https://github.com/dceara/ovn-heater/blob/master/test-scenarios/ocp-120-density-heavy.yml
https://github.com/dceara/ovn-heater/blob/master/test-scenarios/ocp-120-cluster-density.yml
https://github.com/dceara/ovn-heater/blob/master/test-scenarios/ocp-120-np-multitenant.yml

Regards,
Dumitru



--
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [OVN Patch v6 4/4] northd: Restore parallel build with dp_groups

2021-09-13 Thread Anton Ivanov

The culprit has been found and neutralized.

It is now marginally faster (~ 0.5%) than master.

I will do full retests and scale tests for all combinations including parallel 
and if it tests out OK post v8 tomorrow.

A.

On 10/09/2021 17:13, Ilya Maximets wrote:

On 9/10/21 4:19 PM, Anton Ivanov wrote:

On 10/09/2021 13:29, Ilya Maximets wrote:

On 9/10/21 2:23 PM, Anton Ivanov wrote:

On 10/09/2021 13:18, Anton Ivanov wrote:

On 10/09/2021 12:43, Ilya Maximets wrote:

On 9/9/21 11:02 PM, Mark Michelson wrote:

Hi Anton,

On a high level, this change results in some parts of the parallelized hashmap 
being unused. Should we keep the hashrow_locks structure and its APIs, for 
instance?

See below for more comments.

On 9/2/21 9:27 AM, anton.iva...@cambridgegreys.com wrote:

From: Anton Ivanov 

Restore parallel build with dp groups using rwlock instead
of per row locking as an underlying mechanism.

This provides improvement ~ 10% end-to-end on ovn-heater
under virutalization despite awakening some qemu gremlin
which makes qemu climb to silly CPU usage. The gain on
bare metal is likely to be higher.

Signed-off-by: Anton Ivanov 
---
     northd/ovn-northd.c | 215 
     1 file changed, 159 insertions(+), 56 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 1f903882b..4537c55dd 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -59,6 +59,7 @@
     #include "unixctl.h"
     #include "util.h"
     #include "uuid.h"
+#include "ovs-thread.h"
     #include "openvswitch/vlog.h"
       VLOG_DEFINE_THIS_MODULE(ovn_northd);
@@ -4372,7 +4373,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct 
ovn_datapath *od,
     static bool use_logical_dp_groups = false;
     static bool use_parallel_build = true;
     -static struct hashrow_locks lflow_locks;
+static struct ovs_rwlock flowtable_lock;

With the change from the custom hashrow_locks to using an ovs_rwlock, I think 
it's important to document what data this lock is intending to protect.

   From what I can tell, this lock specifically is intended to protect access 
to the hmaps in the global lflow_state structure, and it's also intended to 
protect all ovn_datapaths' od_group hmaps. This is not something that is 
immediately obvious just from a global rwlock declaration.


+
+static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
+  struct ovn_datapath *od,
+  struct lflow_state *lflow_map,
+  uint32_t hash)
+{
+    hmapx_add(_lflow->od_group, od);
+    hmap_remove(_map->single_od, _lflow->hmap_node);
+    if (use_parallel_build) {
+    hmap_insert_fast(_map->multiple_od, _lflow->hmap_node, hash);
+    } else {
+    hmap_insert(_map->multiple_od, _lflow->hmap_node, hash);
+    }
+}
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wthread-safety"
+#endif

The section above needs a comment to explain why -Wthread-safety is ignored.

Please, use OVS_NO_THREAD_SAFETY_ANALYSIS marker instead of manually disabling
the diagnostics.


On a side note, I ran some tests with this patch set applied and it drops
performance of non-parallel case by 20% on my laptop with databases from
the ovn-heater's 120 node density test.

I see, you mentioned 10% improvement in the commit message. Assuming it's
with parallelization enabled, right?

Yes. And no difference on heater tests up to 36 fake nodes with 7200 ports.


In any case, some performance testing without parallelization required before
accepting the change, as it's a default configuration.

They were done. I am somewhat surprised by your measurement, can you please 
provide some more details.

I have databases from a previous 120 node ovn-heater run.
I'm loading them to the OVN sandbox like this:

make sandbox SANDBOXFLAGS="--nbdb-source=/tmp/ovnnb_db.db 
--sbdb-source=/tmp/ovnsb_db.db"

And performing a couple of small operations with northbound database just to
trigger the northd processing loop.

Configuration has dp-groups enabled and parallelization disabled.

Can you run v7 versus that. It should fix 3 performance regressions I found in 
v6. Each of them is relatively small, but they may add up for a large database.

v7 seems only slightly better.  By a couple of percents.
Nowhere close to the current master.

And stopwatches in v7 are still messed up.


I will run an ovn-heater with this, but to be honest - I do not expect any 
difference. I did not see any difference before at the scales I was testing.

If it is still slower than the original code, it will be interesting to find 
the culprit. They should be nearly identical.

A.


You are saying "database from 120 nodes" - are we talking a stopwatch 
measurement here?

Which is the case. The patch moved it where it should not be. Will be fixed in 
the

Re: [ovs-dev] [OVN Patch v6 4/4] northd: Restore parallel build with dp_groups

2021-09-10 Thread Anton Ivanov


On 10/09/2021 17:13, Ilya Maximets wrote:

On 9/10/21 4:19 PM, Anton Ivanov wrote:

On 10/09/2021 13:29, Ilya Maximets wrote:

On 9/10/21 2:23 PM, Anton Ivanov wrote:

On 10/09/2021 13:18, Anton Ivanov wrote:

On 10/09/2021 12:43, Ilya Maximets wrote:

On 9/9/21 11:02 PM, Mark Michelson wrote:

Hi Anton,

On a high level, this change results in some parts of the parallelized hashmap 
being unused. Should we keep the hashrow_locks structure and its APIs, for 
instance?

See below for more comments.

On 9/2/21 9:27 AM, anton.iva...@cambridgegreys.com wrote:

From: Anton Ivanov 

Restore parallel build with dp groups using rwlock instead
of per row locking as an underlying mechanism.

This provides improvement ~ 10% end-to-end on ovn-heater
under virutalization despite awakening some qemu gremlin
which makes qemu climb to silly CPU usage. The gain on
bare metal is likely to be higher.

Signed-off-by: Anton Ivanov 
---
     northd/ovn-northd.c | 215 
     1 file changed, 159 insertions(+), 56 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 1f903882b..4537c55dd 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -59,6 +59,7 @@
     #include "unixctl.h"
     #include "util.h"
     #include "uuid.h"
+#include "ovs-thread.h"
     #include "openvswitch/vlog.h"
       VLOG_DEFINE_THIS_MODULE(ovn_northd);
@@ -4372,7 +4373,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct 
ovn_datapath *od,
     static bool use_logical_dp_groups = false;
     static bool use_parallel_build = true;
     -static struct hashrow_locks lflow_locks;
+static struct ovs_rwlock flowtable_lock;

With the change from the custom hashrow_locks to using an ovs_rwlock, I think 
it's important to document what data this lock is intending to protect.

   From what I can tell, this lock specifically is intended to protect access 
to the hmaps in the global lflow_state structure, and it's also intended to 
protect all ovn_datapaths' od_group hmaps. This is not something that is 
immediately obvious just from a global rwlock declaration.


+
+static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
+  struct ovn_datapath *od,
+  struct lflow_state *lflow_map,
+  uint32_t hash)
+{
+    hmapx_add(_lflow->od_group, od);
+    hmap_remove(_map->single_od, _lflow->hmap_node);
+    if (use_parallel_build) {
+    hmap_insert_fast(_map->multiple_od, _lflow->hmap_node, hash);
+    } else {
+    hmap_insert(_map->multiple_od, _lflow->hmap_node, hash);
+    }
+}
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wthread-safety"
+#endif

The section above needs a comment to explain why -Wthread-safety is ignored.

Please, use OVS_NO_THREAD_SAFETY_ANALYSIS marker instead of manually disabling
the diagnostics.


On a side note, I ran some tests with this patch set applied and it drops
performance of non-parallel case by 20% on my laptop with databases from
the ovn-heater's 120 node density test.

I see, you mentioned 10% improvement in the commit message. Assuming it's
with parallelization enabled, right?

Yes. And no difference on heater tests up to 36 fake nodes with 7200 ports.


In any case, some performance testing without parallelization required before
accepting the change, as it's a default configuration.

They were done. I am somewhat surprised by your measurement, can you please 
provide some more details.

I have databases from a previous 120 node ovn-heater run.
I'm loading them to the OVN sandbox like this:

make sandbox SANDBOXFLAGS="--nbdb-source=/tmp/ovnnb_db.db 
--sbdb-source=/tmp/ovnsb_db.db"

And performing a couple of small operations with northbound database just to
trigger the northd processing loop.

Configuration has dp-groups enabled and parallelization disabled.

Can you run v7 versus that. It should fix 3 performance regressions I found in 
v6. Each of them is relatively small, but they may add up for a large database.

v7 seems only slightly better.  By a couple of percents.
Nowhere close to the current master.

And stopwatches in v7 are still messed up.


Thanks, I will continue digging.

Brgds,

A.


I will run an ovn-heater with this, but to be honest - I do not expect any 
difference. I did not see any difference before at the scales I was testing.

If it is still slower than the original code, it will be interesting to find 
the culprit. They should be nearly identical.

A.


You are saying "database from 120 nodes" - are we talking a stopwatch 
measurement here?

Which is the case. The patch moved it where it should not be. Will be fixed in 
the next revision.

Not only stopwatches.  Poll intervals in general grew too.


A.


Best regards, Ilya Maximets.




--
Anton R. Ivanov

Re: [ovs-dev] [OVN Patch v6 4/4] northd: Restore parallel build with dp_groups

2021-09-10 Thread Anton Ivanov

Thanks, I will update to the newer version.

A.

On 10/09/2021 16:37, Dumitru Ceara wrote:

On 9/10/21 2:18 PM, Anton Ivanov wrote:

On a side note, I ran some tests with this patch set applied and it drops
performance of non-parallel case by 20% on my laptop with databases from
the ovn-heater's 120 node density test.

I see, you mentioned 10% improvement in the commit message.  Assuming
it's
with parallelization enabled, right?

Yes. And no difference on heater tests up to 36 fake nodes with 7200 ports.

Just joining the conversation to mention that ovn-heater was
significantly changed/improved and its tests are way different compared
to a few months ago.

If you're not running the new version of ovn-heater (that doesn't rely
on rally-ovs/ovn-scale-test) it's available here:

https://github.com/dceara/ovn-heater

And the test Ilya referred to is:

https://github.com/dceara/ovn-heater/blob/master/test-scenarios/ocp-120-density-heavy.yml

To run it (just replace 20 with 120):
https://github.com/dceara/ovn-heater#example-run-20-nodes-density-heavy

We run these tests weekly on a setup with 30 physical servers on which
we provision 120 "fake" nodes:

https://github.com/dceara/ovn-heater/blob/master/test-scenarios/ocp-120-density-light.yml
https://github.com/dceara/ovn-heater/blob/master/test-scenarios/ocp-120-density-heavy.yml
https://github.com/dceara/ovn-heater/blob/master/test-scenarios/ocp-120-cluster-density.yml
https://github.com/dceara/ovn-heater/blob/master/test-scenarios/ocp-120-np-multitenant.yml

Regards,
Dumitru




--
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [OVN Patch v6 4/4] northd: Restore parallel build with dp_groups

2021-09-10 Thread Anton Ivanov


On 10/09/2021 13:29, Ilya Maximets wrote:

On 9/10/21 2:23 PM, Anton Ivanov wrote:

On 10/09/2021 13:18, Anton Ivanov wrote:

On 10/09/2021 12:43, Ilya Maximets wrote:

On 9/9/21 11:02 PM, Mark Michelson wrote:

Hi Anton,

On a high level, this change results in some parts of the parallelized hashmap 
being unused. Should we keep the hashrow_locks structure and its APIs, for 
instance?

See below for more comments.

On 9/2/21 9:27 AM, anton.iva...@cambridgegreys.com wrote:

From: Anton Ivanov 

Restore parallel build with dp groups using rwlock instead
of per row locking as an underlying mechanism.

This provides improvement ~ 10% end-to-end on ovn-heater
under virutalization despite awakening some qemu gremlin
which makes qemu climb to silly CPU usage. The gain on
bare metal is likely to be higher.

Signed-off-by: Anton Ivanov 
---
    northd/ovn-northd.c | 215 
    1 file changed, 159 insertions(+), 56 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 1f903882b..4537c55dd 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -59,6 +59,7 @@
    #include "unixctl.h"
    #include "util.h"
    #include "uuid.h"
+#include "ovs-thread.h"
    #include "openvswitch/vlog.h"
      VLOG_DEFINE_THIS_MODULE(ovn_northd);
@@ -4372,7 +4373,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct 
ovn_datapath *od,
    static bool use_logical_dp_groups = false;
    static bool use_parallel_build = true;
    -static struct hashrow_locks lflow_locks;
+static struct ovs_rwlock flowtable_lock;

With the change from the custom hashrow_locks to using an ovs_rwlock, I think 
it's important to document what data this lock is intending to protect.

  From what I can tell, this lock specifically is intended to protect access to 
the hmaps in the global lflow_state structure, and it's also intended to 
protect all ovn_datapaths' od_group hmaps. This is not something that is 
immediately obvious just from a global rwlock declaration.


+
+static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
+  struct ovn_datapath *od,
+  struct lflow_state *lflow_map,
+  uint32_t hash)
+{
+    hmapx_add(_lflow->od_group, od);
+    hmap_remove(_map->single_od, _lflow->hmap_node);
+    if (use_parallel_build) {
+    hmap_insert_fast(_map->multiple_od, _lflow->hmap_node, hash);
+    } else {
+    hmap_insert(_map->multiple_od, _lflow->hmap_node, hash);
+    }
+}
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wthread-safety"
+#endif

The section above needs a comment to explain why -Wthread-safety is ignored.

Please, use OVS_NO_THREAD_SAFETY_ANALYSIS marker instead of manually disabling
the diagnostics.


On a side note, I ran some tests with this patch set applied and it drops
performance of non-parallel case by 20% on my laptop with databases from
the ovn-heater's 120 node density test.

I see, you mentioned 10% improvement in the commit message. Assuming it's
with parallelization enabled, right?

Yes. And no difference on heater tests up to 36 fake nodes with 7200 ports.


In any case, some performance testing without parallelization required before
accepting the change, as it's a default configuration.

They were done. I am somewhat surprised by your measurement, can you please 
provide some more details.

I have databases from a previous 120 node ovn-heater run.
I'm loading them to the OVN sandbox like this:

make sandbox SANDBOXFLAGS="--nbdb-source=/tmp/ovnnb_db.db 
--sbdb-source=/tmp/ovnsb_db.db"

And performing a couple of small operations with northbound database just to
trigger the northd processing loop.

Configuration has dp-groups enabled and parallelization disabled.


Can you run v7 versus that. It should fix 3 performance regressions I found in 
v6. Each of them is relatively small, but they may add up for a large database.

I will run an ovn-heater with this, but to be honest - I do not expect any 
difference. I did not see any difference before at the scales I was testing.

If it is still slower than the original code, it will be interesting to find 
the culprit. They should be nearly identical.

A.




You are saying "database from 120 nodes" - are we talking a stopwatch 
measurement here?

Which is the case. The patch moved it where it should not be. Will be fixed in 
the next revision.

Not only stopwatches.  Poll intervals in general grew too.


A.


Best regards, Ilya Maximets.




--
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [OVN Patch v7 3/4] northd: Optimize dp groups operations

2021-09-10 Thread anton . ivanov
From: Anton Ivanov 

Remove full hash walks to form lflow dp_groups and add them
to the overall parallelizeable lflow build.

Make processing of "with dp groups" and "without" in
build_lflows independent to allow these to run in parallel
after the updates to the parallel API have been merged.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 553 +++-
 1 file changed, 344 insertions(+), 209 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 6dfb4327a..aee5b9508 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -86,6 +86,11 @@ struct northd_state {
 bool paused;
 };
 
+struct lflow_state {
+struct hmap single_od;
+struct hmap multiple_od;
+};
+
 static const char *ovnnb_db;
 static const char *ovnsb_db;
 static const char *unixctl_path;
@@ -4306,8 +4311,15 @@ struct ovn_lflow {
 const char *where;
 };
 
-static void ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow);
-static struct ovn_lflow *ovn_lflow_find(const struct hmap *lflows,
+static void ovn_lflow_destroy(struct lflow_state *lflows,
+  struct ovn_lflow *lflow);
+static struct ovn_lflow *do_ovn_lflow_find(const struct hmap *lflows,
+const struct ovn_datapath *od,
+enum ovn_stage stage,
+uint16_t priority, const char *match,
+const char *actions,
+const char *ctrl_meter, uint32_t hash);
+static struct ovn_lflow *ovn_lflow_find(const struct lflow_state *lflows,
 const struct ovn_datapath *od,
 enum ovn_stage stage,
 uint16_t priority, const char *match,
@@ -4366,7 +4378,7 @@ static struct hashrow_locks lflow_locks;
  * Version to use when locking is required.
  */
 static struct ovn_lflow *
-do_ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od,
+do_ovn_lflow_add(struct lflow_state *lflow_map, struct ovn_datapath *od,
  uint32_t hash, enum ovn_stage stage, uint16_t priority,
  const char *match, const char *actions, const char *io_port,
  const struct ovsdb_idl_row *stage_hint,
@@ -4377,10 +4389,33 @@ do_ovn_lflow_add(struct hmap *lflow_map, struct 
ovn_datapath *od,
 struct ovn_lflow *lflow;
 
 if (use_logical_dp_groups) {
-old_lflow = ovn_lflow_find(lflow_map, NULL, stage, priority, match,
-   actions, ctrl_meter, hash);
+/* Look up in multiple first. */
+old_lflow = do_ovn_lflow_find(_map->multiple_od, NULL, stage,
+  priority, match,
+  actions, ctrl_meter, hash);
 if (old_lflow) {
 hmapx_add(_lflow->od_group, od);
+} else {
+/* Not found, lookup in single od. */
+old_lflow = do_ovn_lflow_find(_map->single_od, NULL,
+  stage, priority, match,
+  actions, ctrl_meter, hash);
+if (old_lflow) {
+hmapx_add(_lflow->od_group, od);
+/* Found, different, od count went up. Move to multiple od. */
+if (hmapx_count(_lflow->od_group) > 1) {
+hmap_remove(_map->single_od, _lflow->hmap_node);
+if (use_parallel_build) {
+hmap_insert_fast(_map->multiple_od,
+ _lflow->hmap_node, hash);
+} else {
+hmap_insert(_map->multiple_od,
+_lflow->hmap_node, hash);
+}
+}
+}
+}
+if (old_lflow) {
 return old_lflow;
 }
 }
@@ -4395,16 +4430,20 @@ do_ovn_lflow_add(struct hmap *lflow_map, struct 
ovn_datapath *od,
nullable_xstrdup(ctrl_meter),
ovn_lflow_hint(stage_hint), where);
 hmapx_add(>od_group, od);
+
+/* Insert "fresh" lflows into single_od. */
+
 if (!use_parallel_build) {
-hmap_insert(lflow_map, >hmap_node, hash);
+hmap_insert(_map->single_od, >hmap_node, hash);
 } else {
-hmap_insert_fast(lflow_map, >hmap_node, hash);
+hmap_insert_fast(_map->single_od, >hmap_node, hash);
 }
 return lflow;
 }
 
 static struct ovn_lflow *
-ovn_lflow_add_at_with_hash(struct hmap *lflow_map, struct ovn_datapath *od,
+ovn_lflow_add_at_with_hash(struct lflow_state *lflow_map,
+   struct ovn_datapath *od,
enum ovn_stage stage, uint16_t priority,
   

[ovs-dev] [OVN Patch v7 4/4] northd: Restore parallel build with dp_groups

2021-09-10 Thread anton . ivanov
From: Anton Ivanov 

Restore parallel build with dp groups using rwlock instead
of per row locking as an underlying mechanism.

This provides improvement ~ 10% end-to-end on ovn-heater
under virutalization despite awakening some qemu gremlin
which makes qemu climb to silly CPU usage. The gain on
bare metal is likely to be higher.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 299 ++--
 1 file changed, 234 insertions(+), 65 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index aee5b9508..16e39ec5e 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -59,6 +59,7 @@
 #include "unixctl.h"
 #include "util.h"
 #include "uuid.h"
+#include "ovs-thread.h"
 #include "openvswitch/vlog.h"
 
 VLOG_DEFINE_THIS_MODULE(ovn_northd);
@@ -4372,72 +4373,219 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct 
ovn_datapath *od,
 static bool use_logical_dp_groups = false;
 static bool use_parallel_build = true;
 
-static struct hashrow_locks lflow_locks;
+/* This lock is used to lock the lflow table and all related structures.
+ * It cannot be a mutex, because most of the accesses are read and there is
+ * only an occasional write change.
+ */
+
+static struct ovs_rwlock flowtable_lock;
+
+static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
+  struct ovn_datapath *od,
+  struct lflow_state *lflow_map,
+  uint32_t hash)
+{
+hmapx_add(_lflow->od_group, od);
+hmap_remove(_map->single_od, _lflow->hmap_node);
+if (use_parallel_build) {
+hmap_insert_fast(_map->multiple_od, _lflow->hmap_node, hash);
+} else {
+hmap_insert(_map->multiple_od, _lflow->hmap_node, hash);
+}
+}
 
 /* Adds a row with the specified contents to the Logical_Flow table.
- * Version to use when locking is required.
+ *
+ * Assumptions:
+ *
+ * 1. A large proportion of the operations are lookups (reads).
+ * 2. RW operations are a small proportion of overall adds.
+ *
+ * Principles of operation:
+ * 1. All accesses to the flow table are protected by a rwlock.
+ * 2. By default, everyone grabs a rd lock so that multiple threads
+ * can do lookups simultaneously.
+ * 3. If a change is needed, the rd lock is released and a wr lock
+ * is acquired instead (the fact that POSIX does not have an "upgrade"
+ * on locks is a major pain, but there is nothing we can do - it's not
+ * there).
+ * 4. WR lock operations in rd/wr locking have LOWER priority than RD.
+ * That is by design and spec. So a request for WR lock may wait for a
+ * considerable amount of time until it is given a change to lock. That
+ * means that another thread may get there in the meantime and change
+ * the data. Hence all wr operations MUST be coded to ensure that they
+ * are not vulnerable to "someone pulled this from under my feet". Re-
+ * reads, checks for presense, etc.
+ */
+
+/* The code which follows is executed both as single thread and parallel.
+ * When executed as a single thread locking, re-reading after a lock change
+ * from rd to wr, etc are not needed and that path does not lock.
+ * clang thread safety analyzer cannot quite get that idea so we have to
+ * disable it.
  */
+
 static struct ovn_lflow *
 do_ovn_lflow_add(struct lflow_state *lflow_map, struct ovn_datapath *od,
  uint32_t hash, enum ovn_stage stage, uint16_t priority,
  const char *match, const char *actions, const char *io_port,
  const struct ovsdb_idl_row *stage_hint,
  const char *where, const char *ctrl_meter)
+ OVS_NO_THREAD_SAFETY_ANALYSIS
 {
 
 struct ovn_lflow *old_lflow;
 struct ovn_lflow *lflow;
 
 if (use_logical_dp_groups) {
-/* Look up in multiple first. */
-old_lflow = do_ovn_lflow_find(_map->multiple_od, NULL, stage,
-  priority, match,
+if (use_parallel_build) {
+/* Fast Path. In case we run in parallel, see if we
+ * can get away without writing - grab a rdlock and check
+ * if we can get away with as little work as possible.
+ */
+ovs_rwlock_rdlock(_lock);
+}
+
+/* Look up multiple_od first. That is the more common
+ * lookup.
+ */
+
+old_lflow = do_ovn_lflow_find(_map->multiple_od,
+  NULL, stage, priority, match,
   actions, ctrl_meter, hash);
+
 if (old_lflow) {
-hmapx_add(_lflow->od_group, od);
+/* Found, amend od_group. */
+if (!use_parallel_build) {
+hmapx_add(_lflow->od_group, od);
+} else {
+/* See if we need to add this od before upgrading
+

[ovs-dev] [OVN Patch v7 2/4] northd: Resize the hash to correct parameters after build

2021-09-10 Thread anton . ivanov
From: Anton Ivanov 

Parallel builds may result in suboptimal hash bucket sizing.
In the absense of dp-groups this does not matter as the hash
is purely storage and not used for lookups during the build.

Such a hash needs to be resized to a correct size at the end
of the build to ensure that any lookups during the lflow
reconcilliation phase are done as fast as possible.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 2474d1ca4..6dfb4327a 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -13178,6 +13178,11 @@ build_lflows(struct northd_context *ctx, struct hmap 
*datapaths,
 igmp_groups, meter_groups, lbs,
 bfd_connections);
 
+/* Parallel build may result in a suboptimal hash. Resize the
+ * hash to a correct size before doing lookups */
+
+hmap_expand();
+
 if (hmap_count() > max_seen_lflow_size) {
 max_seen_lflow_size = hmap_count();
 }
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [OVN Patch v7 1/4] northd: Disable parallel processing for logical_dp_groups

2021-09-10 Thread anton . ivanov
From: Anton Ivanov 

Work on improving processing with dp_groups enabled has
discovered that the locking mechanism presently in use
is not reliable. Disabling parallel processing if dp_groups
are enabled until the root cause is determined and fixed.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 2 +-
 ovs | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index ee761cef0..2474d1ca4 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -12974,7 +12974,7 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, 
struct hmap *ports,
 }
 }
 
-if (use_parallel_build) {
+if (use_parallel_build && (!use_logical_dp_groups)) {
 struct hmap *lflow_segs;
 struct lswitch_flow_build_info *lsiv;
 int index;
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [OVN Patch v6 4/4] northd: Restore parallel build with dp_groups

2021-09-10 Thread Anton Ivanov


On 10/09/2021 13:49, Anton Ivanov wrote:


On 10/09/2021 13:34, Anton Ivanov wrote:


On 10/09/2021 13:29, Ilya Maximets wrote:

On 9/10/21 2:23 PM, Anton Ivanov wrote:

On 10/09/2021 13:18, Anton Ivanov wrote:

On 10/09/2021 12:43, Ilya Maximets wrote:

On 9/9/21 11:02 PM, Mark Michelson wrote:

Hi Anton,

On a high level, this change results in some parts of the parallelized hashmap 
being unused. Should we keep the hashrow_locks structure and its APIs, for 
instance?

See below for more comments.

On 9/2/21 9:27 AM, anton.iva...@cambridgegreys.com wrote:

From: Anton Ivanov 

Restore parallel build with dp groups using rwlock instead
of per row locking as an underlying mechanism.

This provides improvement ~ 10% end-to-end on ovn-heater
under virutalization despite awakening some qemu gremlin
which makes qemu climb to silly CPU usage. The gain on
bare metal is likely to be higher.

Signed-off-by: Anton Ivanov 
---
    northd/ovn-northd.c | 215 
    1 file changed, 159 insertions(+), 56 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 1f903882b..4537c55dd 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -59,6 +59,7 @@
    #include "unixctl.h"
    #include "util.h"
    #include "uuid.h"
+#include "ovs-thread.h"
    #include "openvswitch/vlog.h"
      VLOG_DEFINE_THIS_MODULE(ovn_northd);
@@ -4372,7 +4373,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct 
ovn_datapath *od,
    static bool use_logical_dp_groups = false;
    static bool use_parallel_build = true;
    -static struct hashrow_locks lflow_locks;
+static struct ovs_rwlock flowtable_lock;

With the change from the custom hashrow_locks to using an ovs_rwlock, I think 
it's important to document what data this lock is intending to protect.

  From what I can tell, this lock specifically is intended to protect access to 
the hmaps in the global lflow_state structure, and it's also intended to 
protect all ovn_datapaths' od_group hmaps. This is not something that is 
immediately obvious just from a global rwlock declaration.


+
+static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
+  struct ovn_datapath *od,
+  struct lflow_state *lflow_map,
+  uint32_t hash)
+{
+    hmapx_add(_lflow->od_group, od);
+    hmap_remove(_map->single_od, _lflow->hmap_node);
+    if (use_parallel_build) {
+ hmap_insert_fast(_map->multiple_od, _lflow->hmap_node, hash);
+    } else {
+    hmap_insert(_map->multiple_od, _lflow->hmap_node, hash);
+    }
+}
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wthread-safety"
+#endif

The section above needs a comment to explain why -Wthread-safety is ignored.

Please, use OVS_NO_THREAD_SAFETY_ANALYSIS marker instead of manually disabling
the diagnostics.


On a side note, I ran some tests with this patch set applied and it drops
performance of non-parallel case by 20% on my laptop with databases from
the ovn-heater's 120 node density test.

I see, you mentioned 10% improvement in the commit message. Assuming it's
with parallelization enabled, right?

Yes. And no difference on heater tests up to 36 fake nodes with 7200 ports.


In any case, some performance testing without parallelization required before
accepting the change, as it's a default configuration.

They were done. I am somewhat surprised by your measurement, can you please 
provide some more details.

I have databases from a previous 120 node ovn-heater run.
I'm loading them to the OVN sandbox like this:

make sandbox SANDBOXFLAGS="--nbdb-source=/tmp/ovnnb_db.db 
--sbdb-source=/tmp/ovnsb_db.db"

And performing a couple of small operations with northbound database just to
trigger the northd processing loop.

Configuration has dp-groups enabled and parallelization disabled.


You are saying "database from 120 nodes" - are we talking a stopwatch 
measurement here?

Which is the case. The patch moved it where it should not be. Will be fixed in 
the next revision.

Not only stopwatches.  Poll intervals in general grew too.


OK. Looking at it. There may be a slight increase due to extra lookup in 
ovn_lflow_destroy, but it should not be anywhere near 10%.


The other one is that while there are more single od flows, in most cases 
ovn_lookup_flow will return a multiple od flows. That is how the arguments to 
ovn_lookup_flow pan out. Ditto for flow insertion. That lookup is probably in 
the wrong order. It should lookup


Apologies, pressed send by mistake.

Wanted to say - it should look at multiple flows first, then at single (though 
it will be good to measure this properly).

A.









A.


Best regards, Ilya Maximets.




--
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegr

Re: [ovs-dev] [OVN Patch v6 4/4] northd: Restore parallel build with dp_groups

2021-09-10 Thread Anton Ivanov


On 10/09/2021 13:34, Anton Ivanov wrote:


On 10/09/2021 13:29, Ilya Maximets wrote:

On 9/10/21 2:23 PM, Anton Ivanov wrote:

On 10/09/2021 13:18, Anton Ivanov wrote:

On 10/09/2021 12:43, Ilya Maximets wrote:

On 9/9/21 11:02 PM, Mark Michelson wrote:

Hi Anton,

On a high level, this change results in some parts of the parallelized hashmap 
being unused. Should we keep the hashrow_locks structure and its APIs, for 
instance?

See below for more comments.

On 9/2/21 9:27 AM, anton.iva...@cambridgegreys.com wrote:

From: Anton Ivanov 

Restore parallel build with dp groups using rwlock instead
of per row locking as an underlying mechanism.

This provides improvement ~ 10% end-to-end on ovn-heater
under virutalization despite awakening some qemu gremlin
which makes qemu climb to silly CPU usage. The gain on
bare metal is likely to be higher.

Signed-off-by: Anton Ivanov 
---
    northd/ovn-northd.c | 215 
    1 file changed, 159 insertions(+), 56 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 1f903882b..4537c55dd 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -59,6 +59,7 @@
    #include "unixctl.h"
    #include "util.h"
    #include "uuid.h"
+#include "ovs-thread.h"
    #include "openvswitch/vlog.h"
      VLOG_DEFINE_THIS_MODULE(ovn_northd);
@@ -4372,7 +4373,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct 
ovn_datapath *od,
    static bool use_logical_dp_groups = false;
    static bool use_parallel_build = true;
    -static struct hashrow_locks lflow_locks;
+static struct ovs_rwlock flowtable_lock;

With the change from the custom hashrow_locks to using an ovs_rwlock, I think 
it's important to document what data this lock is intending to protect.

  From what I can tell, this lock specifically is intended to protect access to 
the hmaps in the global lflow_state structure, and it's also intended to 
protect all ovn_datapaths' od_group hmaps. This is not something that is 
immediately obvious just from a global rwlock declaration.


+
+static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
+  struct ovn_datapath *od,
+  struct lflow_state *lflow_map,
+  uint32_t hash)
+{
+    hmapx_add(_lflow->od_group, od);
+    hmap_remove(_map->single_od, _lflow->hmap_node);
+    if (use_parallel_build) {
+ hmap_insert_fast(_map->multiple_od, _lflow->hmap_node, hash);
+    } else {
+    hmap_insert(_map->multiple_od, _lflow->hmap_node, hash);
+    }
+}
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wthread-safety"
+#endif

The section above needs a comment to explain why -Wthread-safety is ignored.

Please, use OVS_NO_THREAD_SAFETY_ANALYSIS marker instead of manually disabling
the diagnostics.


On a side note, I ran some tests with this patch set applied and it drops
performance of non-parallel case by 20% on my laptop with databases from
the ovn-heater's 120 node density test.

I see, you mentioned 10% improvement in the commit message. Assuming it's
with parallelization enabled, right?

Yes. And no difference on heater tests up to 36 fake nodes with 7200 ports.


In any case, some performance testing without parallelization required before
accepting the change, as it's a default configuration.

They were done. I am somewhat surprised by your measurement, can you please 
provide some more details.

I have databases from a previous 120 node ovn-heater run.
I'm loading them to the OVN sandbox like this:

make sandbox SANDBOXFLAGS="--nbdb-source=/tmp/ovnnb_db.db 
--sbdb-source=/tmp/ovnsb_db.db"

And performing a couple of small operations with northbound database just to
trigger the northd processing loop.

Configuration has dp-groups enabled and parallelization disabled.


You are saying "database from 120 nodes" - are we talking a stopwatch 
measurement here?

Which is the case. The patch moved it where it should not be. Will be fixed in 
the next revision.

Not only stopwatches.  Poll intervals in general grew too.


OK. Looking at it. There may be a slight increase due to extra lookup in 
ovn_lflow_destroy, but it should not be anywhere near 10%.


The other one is that while there are more single od flows, in most cases 
ovn_lookup_flow will return a multiple od flows. That is how the arguments to 
ovn_lookup_flow pan out. Ditto for flow insertion. That lookup is probably in 
the wrong order. It should lookup






A.


Best regards, Ilya Maximets.




--
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [OVN Patch v6 4/4] northd: Restore parallel build with dp_groups

2021-09-10 Thread Anton Ivanov


On 10/09/2021 13:29, Ilya Maximets wrote:

On 9/10/21 2:23 PM, Anton Ivanov wrote:

On 10/09/2021 13:18, Anton Ivanov wrote:

On 10/09/2021 12:43, Ilya Maximets wrote:

On 9/9/21 11:02 PM, Mark Michelson wrote:

Hi Anton,

On a high level, this change results in some parts of the parallelized hashmap 
being unused. Should we keep the hashrow_locks structure and its APIs, for 
instance?

See below for more comments.

On 9/2/21 9:27 AM, anton.iva...@cambridgegreys.com wrote:

From: Anton Ivanov 

Restore parallel build with dp groups using rwlock instead
of per row locking as an underlying mechanism.

This provides improvement ~ 10% end-to-end on ovn-heater
under virutalization despite awakening some qemu gremlin
which makes qemu climb to silly CPU usage. The gain on
bare metal is likely to be higher.

Signed-off-by: Anton Ivanov 
---
    northd/ovn-northd.c | 215 
    1 file changed, 159 insertions(+), 56 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 1f903882b..4537c55dd 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -59,6 +59,7 @@
    #include "unixctl.h"
    #include "util.h"
    #include "uuid.h"
+#include "ovs-thread.h"
    #include "openvswitch/vlog.h"
      VLOG_DEFINE_THIS_MODULE(ovn_northd);
@@ -4372,7 +4373,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct 
ovn_datapath *od,
    static bool use_logical_dp_groups = false;
    static bool use_parallel_build = true;
    -static struct hashrow_locks lflow_locks;
+static struct ovs_rwlock flowtable_lock;

With the change from the custom hashrow_locks to using an ovs_rwlock, I think 
it's important to document what data this lock is intending to protect.

  From what I can tell, this lock specifically is intended to protect access to 
the hmaps in the global lflow_state structure, and it's also intended to 
protect all ovn_datapaths' od_group hmaps. This is not something that is 
immediately obvious just from a global rwlock declaration.


+
+static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
+  struct ovn_datapath *od,
+  struct lflow_state *lflow_map,
+  uint32_t hash)
+{
+    hmapx_add(_lflow->od_group, od);
+    hmap_remove(_map->single_od, _lflow->hmap_node);
+    if (use_parallel_build) {
+    hmap_insert_fast(_map->multiple_od, _lflow->hmap_node, hash);
+    } else {
+    hmap_insert(_map->multiple_od, _lflow->hmap_node, hash);
+    }
+}
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wthread-safety"
+#endif

The section above needs a comment to explain why -Wthread-safety is ignored.

Please, use OVS_NO_THREAD_SAFETY_ANALYSIS marker instead of manually disabling
the diagnostics.


On a side note, I ran some tests with this patch set applied and it drops
performance of non-parallel case by 20% on my laptop with databases from
the ovn-heater's 120 node density test.

I see, you mentioned 10% improvement in the commit message. Assuming it's
with parallelization enabled, right?

Yes. And no difference on heater tests up to 36 fake nodes with 7200 ports.


In any case, some performance testing without parallelization required before
accepting the change, as it's a default configuration.

They were done. I am somewhat surprised by your measurement, can you please 
provide some more details.

I have databases from a previous 120 node ovn-heater run.
I'm loading them to the OVN sandbox like this:

make sandbox SANDBOXFLAGS="--nbdb-source=/tmp/ovnnb_db.db 
--sbdb-source=/tmp/ovnsb_db.db"

And performing a couple of small operations with northbound database just to
trigger the northd processing loop.

Configuration has dp-groups enabled and parallelization disabled.


You are saying "database from 120 nodes" - are we talking a stopwatch 
measurement here?

Which is the case. The patch moved it where it should not be. Will be fixed in 
the next revision.

Not only stopwatches.  Poll intervals in general grew too.


OK. Looking at it. There may be a slight increase due to extra lookup in 
ovn_lflow_destroy, but it should not be anywhere near 10%.




A.


Best regards, Ilya Maximets.




--
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [OVN Patch v6 4/4] northd: Restore parallel build with dp_groups

2021-09-10 Thread Anton Ivanov


On 10/09/2021 13:18, Anton Ivanov wrote:


On 10/09/2021 12:43, Ilya Maximets wrote:

On 9/9/21 11:02 PM, Mark Michelson wrote:

Hi Anton,

On a high level, this change results in some parts of the parallelized hashmap 
being unused. Should we keep the hashrow_locks structure and its APIs, for 
instance?

See below for more comments.

On 9/2/21 9:27 AM, anton.iva...@cambridgegreys.com wrote:

From: Anton Ivanov 

Restore parallel build with dp groups using rwlock instead
of per row locking as an underlying mechanism.

This provides improvement ~ 10% end-to-end on ovn-heater
under virutalization despite awakening some qemu gremlin
which makes qemu climb to silly CPU usage. The gain on
bare metal is likely to be higher.

Signed-off-by: Anton Ivanov 
---
   northd/ovn-northd.c | 215 
   1 file changed, 159 insertions(+), 56 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 1f903882b..4537c55dd 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -59,6 +59,7 @@
   #include "unixctl.h"
   #include "util.h"
   #include "uuid.h"
+#include "ovs-thread.h"
   #include "openvswitch/vlog.h"
     VLOG_DEFINE_THIS_MODULE(ovn_northd);
@@ -4372,7 +4373,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct 
ovn_datapath *od,
   static bool use_logical_dp_groups = false;
   static bool use_parallel_build = true;
   -static struct hashrow_locks lflow_locks;
+static struct ovs_rwlock flowtable_lock;

With the change from the custom hashrow_locks to using an ovs_rwlock, I think 
it's important to document what data this lock is intending to protect.

 From what I can tell, this lock specifically is intended to protect access to 
the hmaps in the global lflow_state structure, and it's also intended to 
protect all ovn_datapaths' od_group hmaps. This is not something that is 
immediately obvious just from a global rwlock declaration.


+
+static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
+  struct ovn_datapath *od,
+  struct lflow_state *lflow_map,
+  uint32_t hash)
+{
+    hmapx_add(_lflow->od_group, od);
+    hmap_remove(_map->single_od, _lflow->hmap_node);
+    if (use_parallel_build) {
+    hmap_insert_fast(_map->multiple_od, _lflow->hmap_node, hash);
+    } else {
+    hmap_insert(_map->multiple_od, _lflow->hmap_node, hash);
+    }
+}
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wthread-safety"
+#endif

The section above needs a comment to explain why -Wthread-safety is ignored.

Please, use OVS_NO_THREAD_SAFETY_ANALYSIS marker instead of manually disabling
the diagnostics.


On a side note, I ran some tests with this patch set applied and it drops
performance of non-parallel case by 20% on my laptop with databases from
the ovn-heater's 120 node density test.

I see, you mentioned 10% improvement in the commit message. Assuming it's
with parallelization enabled, right?


Yes. And no difference on heater tests up to 36 fake nodes with 7200 ports.



In any case, some performance testing without parallelization required before
accepting the change, as it's a default configuration.


They were done. I am somewhat surprised by your measurement, can you please 
provide some more details.

You are saying "database from 120 nodes" - are we talking a stopwatch 
measurement here?


Which is the case. The patch moved it where it should not be. Will be fixed in 
the next revision.

A.





Best regards, Ilya Maximets.


--
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [OVN Patch v6 4/4] northd: Restore parallel build with dp_groups

2021-09-10 Thread Anton Ivanov


On 10/09/2021 12:43, Ilya Maximets wrote:

On 9/9/21 11:02 PM, Mark Michelson wrote:

Hi Anton,

On a high level, this change results in some parts of the parallelized hashmap 
being unused. Should we keep the hashrow_locks structure and its APIs, for 
instance?

See below for more comments.

On 9/2/21 9:27 AM, anton.iva...@cambridgegreys.com wrote:

From: Anton Ivanov 

Restore parallel build with dp groups using rwlock instead
of per row locking as an underlying mechanism.

This provides improvement ~ 10% end-to-end on ovn-heater
under virutalization despite awakening some qemu gremlin
which makes qemu climb to silly CPU usage. The gain on
bare metal is likely to be higher.

Signed-off-by: Anton Ivanov 
---
   northd/ovn-northd.c | 215 
   1 file changed, 159 insertions(+), 56 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 1f903882b..4537c55dd 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -59,6 +59,7 @@
   #include "unixctl.h"
   #include "util.h"
   #include "uuid.h"
+#include "ovs-thread.h"
   #include "openvswitch/vlog.h"
     VLOG_DEFINE_THIS_MODULE(ovn_northd);
@@ -4372,7 +4373,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct 
ovn_datapath *od,
   static bool use_logical_dp_groups = false;
   static bool use_parallel_build = true;
   -static struct hashrow_locks lflow_locks;
+static struct ovs_rwlock flowtable_lock;

With the change from the custom hashrow_locks to using an ovs_rwlock, I think 
it's important to document what data this lock is intending to protect.

 From what I can tell, this lock specifically is intended to protect access to 
the hmaps in the global lflow_state structure, and it's also intended to 
protect all ovn_datapaths' od_group hmaps. This is not something that is 
immediately obvious just from a global rwlock declaration.


+
+static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
+  struct ovn_datapath *od,
+  struct lflow_state *lflow_map,
+  uint32_t hash)
+{
+    hmapx_add(_lflow->od_group, od);
+    hmap_remove(_map->single_od, _lflow->hmap_node);
+    if (use_parallel_build) {
+    hmap_insert_fast(_map->multiple_od, _lflow->hmap_node, hash);
+    } else {
+    hmap_insert(_map->multiple_od, _lflow->hmap_node, hash);
+    }
+}
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wthread-safety"
+#endif

The section above needs a comment to explain why -Wthread-safety is ignored.

Please, use OVS_NO_THREAD_SAFETY_ANALYSIS marker instead of manually disabling
the diagnostics.


On a side note, I ran some tests with this patch set applied and it drops
performance of non-parallel case by 20% on my laptop with databases from
the ovn-heater's 120 node density test.

I see, you mentioned 10% improvement in the commit message.  Assuming it's
with parallelization enabled, right?


Yes. And no difference on heater tests up to 36 fake nodes with 7200 ports.



In any case, some performance testing without parallelization required before
accepting the change, as it's a default configuration.


They were done. I am somewhat surprised by your measurement, can you please 
provide some more details.

You are saying "database from 120 nodes" - are we talking a stopwatch 
measurement here?



Best regards, Ilya Maximets.


--
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [OVN Patch v6 4/4] northd: Restore parallel build with dp_groups

2021-09-10 Thread Anton Ivanov

Hi Mark,

I have it rebased and amended to address the comments on my branch with the "Thread 
API changes"  where the thread pools can be adjusted and that is ready to be posted.

I can do a version off master as well, but that will need a rebase if we merge 
the thread api changes.

All up to you - which one do you want and in which order?

A.

On 09/09/2021 22:02, Mark Michelson wrote:

Hi Anton,

On a high level, this change results in some parts of the parallelized hashmap 
being unused. Should we keep the hashrow_locks structure and its APIs, for 
instance?

See below for more comments.

On 9/2/21 9:27 AM, anton.iva...@cambridgegreys.com wrote:

From: Anton Ivanov 

Restore parallel build with dp groups using rwlock instead
of per row locking as an underlying mechanism.

This provides improvement ~ 10% end-to-end on ovn-heater
under virutalization despite awakening some qemu gremlin
which makes qemu climb to silly CPU usage. The gain on
bare metal is likely to be higher.

Signed-off-by: Anton Ivanov 
---
  northd/ovn-northd.c | 215 
  1 file changed, 159 insertions(+), 56 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 1f903882b..4537c55dd 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -59,6 +59,7 @@
  #include "unixctl.h"
  #include "util.h"
  #include "uuid.h"
+#include "ovs-thread.h"
  #include "openvswitch/vlog.h"
    VLOG_DEFINE_THIS_MODULE(ovn_northd);
@@ -4372,7 +4373,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct 
ovn_datapath *od,
  static bool use_logical_dp_groups = false;
  static bool use_parallel_build = true;
  -static struct hashrow_locks lflow_locks;
+static struct ovs_rwlock flowtable_lock;


With the change from the custom hashrow_locks to using an ovs_rwlock, I think 
it's important to document what data this lock is intending to protect.

From what I can tell, this lock specifically is intended to protect access to 
the hmaps in the global lflow_state structure, and it's also intended to 
protect all ovn_datapaths' od_group hmaps. This is not something that is 
immediately obvious just from a global rwlock declaration.


+
+static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
+  struct ovn_datapath *od,
+  struct lflow_state *lflow_map,
+  uint32_t hash)
+{
+    hmapx_add(_lflow->od_group, od);
+    hmap_remove(_map->single_od, _lflow->hmap_node);
+    if (use_parallel_build) {
+    hmap_insert_fast(_map->multiple_od, _lflow->hmap_node, hash);
+    } else {
+    hmap_insert(_map->multiple_od, _lflow->hmap_node, hash);
+    }
+}
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wthread-safety"
+#endif


The section above needs a comment to explain why -Wthread-safety is ignored.


    /* Adds a row with the specified contents to the Logical_Flow table.
   * Version to use when locking is required.
@@ -4388,55 +4408,127 @@ do_ovn_lflow_add(struct lflow_state *lflow_map, struct 
ovn_datapath *od,
  struct ovn_lflow *old_lflow;
  struct ovn_lflow *lflow;
  +    /* Fast Path.
+ * See if we can get away without writing - grab a rdlock and check
+ * if we can get away with as little work as possible.
+ */
+
  if (use_logical_dp_groups) {
-    old_lflow = do_ovn_lflow_find(_map->single_od, NULL, stage,
-  priority, match,
+    if (use_parallel_build) {
+    ovs_rwlock_rdlock(_lock);
+    }
+    old_lflow = do_ovn_lflow_find(_map->single_od,
+  NULL, stage, priority, match,
    actions, ctrl_meter, hash);
  if (old_lflow) {
-    hmapx_add(_lflow->od_group, od);
-    /* Found, different, od count went up. Move to multiple od. */
-    if (hmapx_count(_lflow->od_group) > 1) {
-    hmap_remove(_map->single_od, _lflow->hmap_node);
+    if (!hmapx_contains(_lflow->od_group, od)) {
+    /* od not in od_group, we need to add it and move to
+ * multiple. */
  if (use_parallel_build) {
- hmap_insert_fast(_map->multiple_od,
- _lflow->hmap_node, hash);
-    } else {
-    hmap_insert(_map->multiple_od,
-    _lflow->hmap_node, hash);
+    /* Upgrade the lock to write, we are likely to
+ * modify data. */
+    ovs_rwlock_unlock(_lock);
+    ovs_rwlock_wrlock(_lock);
+
+    /* Check if someone got ahead of us and the flow is already
+ * in multiple. */
+    if (!hmap_contains(_map->single_od,
+ _lflow->hmap_node)) {


The l

Re: [ovs-dev] [OVN Patch v6 4/4] northd: Restore parallel build with dp_groups

2021-09-10 Thread Anton Ivanov

On 09/09/2021 22:02, Mark Michelson wrote:

Hi Anton,

On a high level, this change results in some parts of the parallelized 
hashmap being unused. Should we keep the hashrow_locks structure and 
its APIs, for instance?


See below for more comments.


Addressing those separately from the API discussion.



On 9/2/21 9:27 AM, anton.iva...@cambridgegreys.com wrote:

From: Anton Ivanov 

Restore parallel build with dp groups using rwlock instead
of per row locking as an underlying mechanism.

This provides improvement ~ 10% end-to-end on ovn-heater
under virutalization despite awakening some qemu gremlin
which makes qemu climb to silly CPU usage. The gain on
bare metal is likely to be higher.

Signed-off-by: Anton Ivanov 
---
  northd/ovn-northd.c | 215 
  1 file changed, 159 insertions(+), 56 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 1f903882b..4537c55dd 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -59,6 +59,7 @@
  #include "unixctl.h"
  #include "util.h"
  #include "uuid.h"
+#include "ovs-thread.h"
  #include "openvswitch/vlog.h"
    VLOG_DEFINE_THIS_MODULE(ovn_northd);
@@ -4372,7 +4373,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct 
ovn_datapath *od,

  static bool use_logical_dp_groups = false;
  static bool use_parallel_build = true;
  -static struct hashrow_locks lflow_locks;
+static struct ovs_rwlock flowtable_lock;


With the change from the custom hashrow_locks to using an ovs_rwlock, 
I think it's important to document what data this lock is intending to 
protect.


From what I can tell, this lock specifically is intended to protect 
access to the hmaps in the global lflow_state structure, and it's also 
intended to protect all ovn_datapaths' od_group hmaps. This is not 
something that is immediately obvious just from a global rwlock 
declaration.



Ack.





+
+static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
+  struct ovn_datapath *od,
+  struct lflow_state *lflow_map,
+  uint32_t hash)
+{
+    hmapx_add(_lflow->od_group, od);
+    hmap_remove(_map->single_od, _lflow->hmap_node);
+    if (use_parallel_build) {
+    hmap_insert_fast(_map->multiple_od, 
_lflow->hmap_node, hash);

+    } else {
+    hmap_insert(_map->multiple_od, _lflow->hmap_node, 
hash);

+    }
+}
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wthread-safety"
+#endif


The section above needs a comment to explain why -Wthread-safety is 
ignored.



Ack.





    /* Adds a row with the specified contents to the Logical_Flow table.
   * Version to use when locking is required.
@@ -4388,55 +4408,127 @@ do_ovn_lflow_add(struct lflow_state 
*lflow_map, struct ovn_datapath *od,

  struct ovn_lflow *old_lflow;
  struct ovn_lflow *lflow;
  +    /* Fast Path.
+ * See if we can get away without writing - grab a rdlock and check
+ * if we can get away with as little work as possible.
+ */
+
  if (use_logical_dp_groups) {
-    old_lflow = do_ovn_lflow_find(_map->single_od, NULL, 
stage,

-  priority, match,
+    if (use_parallel_build) {
+    ovs_rwlock_rdlock(_lock);
+    }
+    old_lflow = do_ovn_lflow_find(_map->single_od,
+  NULL, stage, priority, match,
    actions, ctrl_meter, hash);
  if (old_lflow) {
-    hmapx_add(_lflow->od_group, od);
-    /* Found, different, od count went up. Move to multiple 
od. */

-    if (hmapx_count(_lflow->od_group) > 1) {
-    hmap_remove(_map->single_od, 
_lflow->hmap_node);

+    if (!hmapx_contains(_lflow->od_group, od)) {
+    /* od not in od_group, we need to add it and move to
+ * multiple. */
  if (use_parallel_build) {
- hmap_insert_fast(_map->multiple_od,
- _lflow->hmap_node, hash);
-    } else {
-    hmap_insert(_map->multiple_od,
-    _lflow->hmap_node, hash);
+    /* Upgrade the lock to write, we are likely to
+ * modify data. */
+    ovs_rwlock_unlock(_lock);
+    ovs_rwlock_wrlock(_lock);
+
+    /* Check if someone got ahead of us and the flow 
is already

+ * in multiple. */
+    if (!hmap_contains(_map->single_od,
+ _lflow->hmap_node)) {


The logic here is fine, but that comment paired with that if statement 
is strange. Either


a) Change the comment to say "Check if someone got ahead of us and the 
flow has been removed from single."


Ack.



or

b) Change the if statement

Re: [ovs-dev] [OVN Patch v6 4/4] northd: Restore parallel build with dp_groups

2021-09-10 Thread Anton Ivanov

On 09/09/2021 22:02, Mark Michelson wrote:

Hi Anton,

On a high level, this change results in some parts of the parallelized 
hashmap being unused. Should we keep the hashrow_locks structure and 
its APIs, for instance?


It is a general method of fine grain locking of hashes for parallel 
processing. While it is no longer used in lflow generation, it may stay 
as it may come handy elsewhere.


Some renaming is probably in order - the names are too lflow specific.

A.



See below for more comments.

On 9/2/21 9:27 AM, anton.iva...@cambridgegreys.com wrote:

From: Anton Ivanov 

Restore parallel build with dp groups using rwlock instead
of per row locking as an underlying mechanism.

This provides improvement ~ 10% end-to-end on ovn-heater
under virutalization despite awakening some qemu gremlin
which makes qemu climb to silly CPU usage. The gain on
bare metal is likely to be higher.

Signed-off-by: Anton Ivanov 
---
  northd/ovn-northd.c | 215 
  1 file changed, 159 insertions(+), 56 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 1f903882b..4537c55dd 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -59,6 +59,7 @@
  #include "unixctl.h"
  #include "util.h"
  #include "uuid.h"
+#include "ovs-thread.h"
  #include "openvswitch/vlog.h"
    VLOG_DEFINE_THIS_MODULE(ovn_northd);
@@ -4372,7 +4373,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct 
ovn_datapath *od,

  static bool use_logical_dp_groups = false;
  static bool use_parallel_build = true;
  -static struct hashrow_locks lflow_locks;
+static struct ovs_rwlock flowtable_lock;


With the change from the custom hashrow_locks to using an ovs_rwlock, 
I think it's important to document what data this lock is intending to 
protect.


From what I can tell, this lock specifically is intended to protect 
access to the hmaps in the global lflow_state structure, and it's also 
intended to protect all ovn_datapaths' od_group hmaps. This is not 
something that is immediately obvious just from a global rwlock 
declaration.



+
+static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
+  struct ovn_datapath *od,
+  struct lflow_state *lflow_map,
+  uint32_t hash)
+{
+    hmapx_add(_lflow->od_group, od);
+    hmap_remove(_map->single_od, _lflow->hmap_node);
+    if (use_parallel_build) {
+    hmap_insert_fast(_map->multiple_od, 
_lflow->hmap_node, hash);

+    } else {
+    hmap_insert(_map->multiple_od, _lflow->hmap_node, 
hash);

+    }
+}
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wthread-safety"
+#endif


The section above needs a comment to explain why -Wthread-safety is 
ignored.



    /* Adds a row with the specified contents to the Logical_Flow table.
   * Version to use when locking is required.
@@ -4388,55 +4408,127 @@ do_ovn_lflow_add(struct lflow_state 
*lflow_map, struct ovn_datapath *od,

  struct ovn_lflow *old_lflow;
  struct ovn_lflow *lflow;
  +    /* Fast Path.
+ * See if we can get away without writing - grab a rdlock and check
+ * if we can get away with as little work as possible.
+ */
+
  if (use_logical_dp_groups) {
-    old_lflow = do_ovn_lflow_find(_map->single_od, NULL, 
stage,

-  priority, match,
+    if (use_parallel_build) {
+    ovs_rwlock_rdlock(_lock);
+    }
+    old_lflow = do_ovn_lflow_find(_map->single_od,
+  NULL, stage, priority, match,
    actions, ctrl_meter, hash);
  if (old_lflow) {
-    hmapx_add(_lflow->od_group, od);
-    /* Found, different, od count went up. Move to multiple 
od. */

-    if (hmapx_count(_lflow->od_group) > 1) {
-    hmap_remove(_map->single_od, 
_lflow->hmap_node);

+    if (!hmapx_contains(_lflow->od_group, od)) {
+    /* od not in od_group, we need to add it and move to
+ * multiple. */
  if (use_parallel_build) {
- hmap_insert_fast(_map->multiple_od,
- _lflow->hmap_node, hash);
-    } else {
-    hmap_insert(_map->multiple_od,
-    _lflow->hmap_node, hash);
+    /* Upgrade the lock to write, we are likely to
+ * modify data. */
+    ovs_rwlock_unlock(_lock);
+    ovs_rwlock_wrlock(_lock);
+
+    /* Check if someone got ahead of us and the flow 
is already

+ * in multiple. */
+    if (!hmap_contains(_map->single_od,
+ _lflow->hmap_node)) {


The logic here is fine, but that comment paired with that if statement 
is strange. E

Re: [ovs-dev] [OVN Patch] Make changes to the parallel processing API to allow pool sizing

2021-09-07 Thread Anton Ivanov

I also have a rebase of the dp-groups improvements and parallelization on top of
the API changes.

I will hold off sending them so that we do not have conflicting patch versions 
and dependencies in patchwork.

IMHO it will be easier if we get the API changes in first, then the dp groups 
and their parallelization.

A.

On 02/09/2021 19:30, Numan Siddique wrote:

On Mon, Aug 16, 2021 at 1:31 PM  wrote:


From: Anton Ivanov 

1. Make pool size user defineable.
2. Expose pool destruction.
3. Make pools resizeable at runtime.

Signed-off-by: Anton Ivanov 


Hi Anton,

Thanks for the patch.  If I understand correctly this patch adds the
ability to override the pool
size.  What is missing is the usage of the API from ovn-northd.  Is
there a plan to have a follow up
patch which makes use of this ?

I'd expect a config option for the user to specify the pool size and
ovn-northd calling add_worker_pool()
with the configured pool size.  Right now I see it's called with 0.

Also in order for us to be more confident and adopt parallel
processing we need to enable
parallel runs in CI.

For this to happen, there should be a config option in NB_Global to
force parallel processing.
The parallel processing library function - setup_worker_pools() takes
a 'boo' param to force
enable the parallel processing,  but it is never used by ovn-northd.c

So I'd suggest adding this support so that we can test the parallel
processing patches in CI.

One comment below.



---
  lib/ovn-parallel-hmap.c | 202 ++--
  lib/ovn-parallel-hmap.h |  23 -
  northd/ovn-northd.c |  58 +---
  ovs |   2 +-
  4 files changed, 194 insertions(+), 91 deletions(-)

diff --git a/lib/ovn-parallel-hmap.c b/lib/ovn-parallel-hmap.c
index b8c7ac786..6c5199fb3 100644
--- a/lib/ovn-parallel-hmap.c
+++ b/lib/ovn-parallel-hmap.c
@@ -51,7 +51,6 @@ static bool can_parallelize = false;
   * accompanied by a fence. It does not need to be atomic or be
   * accessed under a lock.
   */
-static bool workers_must_exit = false;

  static struct ovs_list worker_pools = OVS_LIST_INITIALIZER(_pools);

@@ -70,10 +69,20 @@ static void merge_hash_results(struct worker_pool *pool 
OVS_UNUSED,
 void *fin_result, void *result_frags,
 int index);

+
+static bool init_control(struct worker_control *control, int id,
+ struct worker_pool *pool);
+
+static void cleanup_control(struct worker_pool *pool, int id);
+
+static void free_controls(struct worker_pool *pool);
+
+static struct worker_control *alloc_controls(int size);
+
  bool
-ovn_stop_parallel_processing(void)
+ovn_stop_parallel_processing(struct worker_pool *pool)
  {
-return workers_must_exit;
+return pool->workers_must_exit;
  }

  bool
@@ -92,11 +101,67 @@ ovn_can_parallelize_hashes(bool force_parallel)
  return can_parallelize;
  }

+
+void
+destroy_pool(struct worker_pool *pool) {
+char sem_name[256];
+
+free_controls(pool);
+sem_close(pool->done);
+sprintf(sem_name, MAIN_SEM_NAME, sembase, pool);
+sem_unlink(sem_name);
+free(pool);
+}
+
+bool
+ovn_resize_pool(struct worker_pool *pool, int size)
+{
+int i;
+
+ovs_assert(pool != NULL);
+
+if (!size) {
+size = pool_size;
+}
+
+ovs_mutex_lock(_mutex);
+
+if (can_parallelize) {
+free_controls(pool);
+pool->size = size;
+
+/* Allocate new control structures. */
+
+pool->controls = alloc_controls(size);
+pool->workers_must_exit = false;
+
+for (i = 0; i < pool->size; i++) {
+if (! init_control(>controls[i], i, pool)) {
+goto cleanup;
+}
+}
+}
+ovs_mutex_unlock(_mutex);
+return true;
+cleanup:
+
+/* Something went wrong when opening semaphores. In this case
+ * it is better to shut off parallel procesing altogether
+ */
+
+VLOG_INFO("Failed to initialize parallel processing, error %d", errno);
+can_parallelize = false;
+free_controls(pool);
+
+ovs_mutex_unlock(_mutex);
+return false;
+}
+
+
  struct worker_pool *
-ovn_add_worker_pool(void *(*start)(void *))
+ovn_add_worker_pool(void *(*start)(void *), int size)
  {
  struct worker_pool *new_pool = NULL;
-struct worker_control *new_control;
  bool test = false;
  int i;
  char sem_name[256];
@@ -113,38 +178,29 @@ ovn_add_worker_pool(void *(*start)(void *))
  ovs_mutex_unlock(_mutex);
  }

+if (!size) {
+size = pool_size;
+}
+
  ovs_mutex_lock(_mutex);
  if (can_parallelize) {
  new_pool = xmalloc(sizeof(struct worker_pool));
-new_pool->size = pool_size;
-new_pool->controls = NULL;
+new_pool->size = size;
+new_pool->start = start;
  sprintf(sem_name, MAIN_SEM_NAME, sembase, new_pool);
  new_pool-

[ovs-dev] [OVN Patch v3 2/2] Add support for configuring parallelization via unixctl

2021-09-06 Thread anton . ivanov
From: Anton Ivanov 

Signed-off-by: Anton Ivanov 
---
 lib/ovn-parallel-hmap.c | 209 ++--
 lib/ovn-parallel-hmap.h |  63 +++-
 northd/ovn-northd.c |  26 ++---
 tests/ovn-macros.at |  16 ++-
 4 files changed, 283 insertions(+), 31 deletions(-)

diff --git a/lib/ovn-parallel-hmap.c b/lib/ovn-parallel-hmap.c
index 30de457b5..8a055b2c6 100644
--- a/lib/ovn-parallel-hmap.c
+++ b/lib/ovn-parallel-hmap.c
@@ -33,6 +33,7 @@
 #include "ovs-thread.h"
 #include "ovs-numa.h"
 #include "random.h"
+#include "unixctl.h"
 
 VLOG_DEFINE_THIS_MODULE(ovn_parallel_hmap);
 
@@ -46,6 +47,7 @@ VLOG_DEFINE_THIS_MODULE(ovn_parallel_hmap);
  */
 static atomic_bool initial_pool_setup = ATOMIC_VAR_INIT(false);
 static bool can_parallelize = false;
+static bool should_parallelize = false;
 
 /* This is set only in the process of exit and the set is
  * accompanied by a fence. It does not need to be atomic or be
@@ -85,6 +87,19 @@ ovn_stop_parallel_processing(struct worker_pool *pool)
 return pool->workers_must_exit;
 }
 
+bool
+ovn_set_parallel_processing(bool enable)
+{
+should_parallelize = enable;
+return can_parallelize;
+}
+
+bool
+ovn_get_parallel_processing(void)
+{
+return can_parallelize && should_parallelize;
+}
+
 bool
 ovn_can_parallelize_hashes(bool force_parallel)
 {
@@ -110,6 +125,7 @@ destroy_pool(struct worker_pool *pool) {
 sem_close(pool->done);
 sprintf(sem_name, MAIN_SEM_NAME, sembase, pool);
 sem_unlink(sem_name);
+free(pool->name);
 free(pool);
 }
 
@@ -120,6 +136,10 @@ ovn_resize_pool(struct worker_pool *pool, int size)
 
 ovs_assert(pool != NULL);
 
+if (!pool->is_mutable) {
+return false;
+}
+
 if (!size) {
 size = pool_size;
 }
@@ -159,7 +179,8 @@ cleanup:
 
 
 struct worker_pool *
-ovn_add_worker_pool(void *(*start)(void *), int size)
+ovn_add_worker_pool(void *(*start)(void *), int size, char *name,
+bool is_mutable)
 {
 struct worker_pool *new_pool = NULL;
 bool test = false;
@@ -187,6 +208,8 @@ ovn_add_worker_pool(void *(*start)(void *), int size)
 new_pool = xmalloc(sizeof(struct worker_pool));
 new_pool->size = size;
 new_pool->start = start;
+new_pool->is_mutable = is_mutable;
+new_pool->name = xstrdup(name);
 sprintf(sem_name, MAIN_SEM_NAME, sembase, new_pool);
 new_pool->done = sem_open(sem_name, O_CREAT, S_IRWXU, 0);
 if (new_pool->done == SEM_FAILED) {
@@ -219,6 +242,7 @@ cleanup:
 sprintf(sem_name, MAIN_SEM_NAME, sembase, new_pool);
 sem_unlink(sem_name);
 }
+free(new_pool->name);
 ovs_mutex_unlock(_mutex);
 return NULL;
 }
@@ -267,13 +291,9 @@ ovn_fast_hmap_size_for(struct hmap *hmap, int size)
 /* Run a thread pool which uses a callback function to process results
  */
 void
-ovn_run_pool_callback(struct worker_pool *pool,
-  void *fin_result, void *result_frags,
-  void (*helper_func)(struct worker_pool *pool,
-  void *fin_result,
-  void *result_frags, int index))
+ovn_start_pool(struct worker_pool *pool)
 {
-int index, completed;
+int index;
 
 /* Ensure that all worker threads see the same data as the
  * main thread.
@@ -284,8 +304,19 @@ ovn_run_pool_callback(struct worker_pool *pool,
 for (index = 0; index < pool->size; index++) {
 sem_post(pool->controls[index].fire);
 }
+}
+
 
-completed = 0;
+/* Run a thread pool which uses a callback function to process results
+ */
+void
+ovn_complete_pool_callback(struct worker_pool *pool,
+  void *fin_result, void *result_frags,
+  void (*helper_func)(struct worker_pool *pool,
+  void *fin_result,
+  void *result_frags, int index))
+{
+int index, completed = 0;
 
 do {
 bool test;
@@ -327,6 +358,18 @@ ovn_run_pool_callback(struct worker_pool *pool,
 }
 } while (completed < pool->size);
 }
+/* Run a thread pool which uses a callback function to process results
+ */
+void
+ovn_run_pool_callback(struct worker_pool *pool,
+  void *fin_result, void *result_frags,
+  void (*helper_func)(struct worker_pool *pool,
+  void *fin_result,
+  void *result_frags, int index))
+{
+start_pool(pool);
+complete_pool_callback(pool, fin_result, result_frags, helper_func);
+}
 
 /* Run a thread pool - basic, does not do results processing.
  */
@@ -373,6 +416,28 @@ ovn_fast_hmap_merge(struct hmap *dest, struct hmap *inc)
 inc->n = 0;
 }
 
+/* Run a thread pool which gathers results in

[ovs-dev] [OVN Patch v3 1/2] Make changes to the parallel processing API to allow pool sizing

2021-09-06 Thread anton . ivanov
From: Anton Ivanov 

1. Make pool size user defineable.
2. Expose pool destruction.
3. Make pools resizeable at runtime.

Signed-off-by: Anton Ivanov 
---
 lib/ovn-parallel-hmap.c | 202 ++--
 lib/ovn-parallel-hmap.h |  23 -
 northd/ovn-northd.c |  58 +---
 ovs |   2 +-
 4 files changed, 194 insertions(+), 91 deletions(-)

diff --git a/lib/ovn-parallel-hmap.c b/lib/ovn-parallel-hmap.c
index b8c7ac786..30de457b5 100644
--- a/lib/ovn-parallel-hmap.c
+++ b/lib/ovn-parallel-hmap.c
@@ -51,7 +51,6 @@ static bool can_parallelize = false;
  * accompanied by a fence. It does not need to be atomic or be
  * accessed under a lock.
  */
-static bool workers_must_exit = false;
 
 static struct ovs_list worker_pools = OVS_LIST_INITIALIZER(_pools);
 
@@ -70,10 +69,20 @@ static void merge_hash_results(struct worker_pool *pool 
OVS_UNUSED,
void *fin_result, void *result_frags,
int index);
 
+
+static bool init_control(struct worker_control *control, int id,
+ struct worker_pool *pool);
+
+static void cleanup_control(struct worker_pool *pool, int id);
+
+static void free_controls(struct worker_pool *pool);
+
+static struct worker_control *alloc_controls(int size);
+
 bool
-ovn_stop_parallel_processing(void)
+ovn_stop_parallel_processing(struct worker_pool *pool)
 {
-return workers_must_exit;
+return pool->workers_must_exit;
 }
 
 bool
@@ -92,11 +101,67 @@ ovn_can_parallelize_hashes(bool force_parallel)
 return can_parallelize;
 }
 
+
+void
+destroy_pool(struct worker_pool *pool) {
+char sem_name[256];
+
+free_controls(pool);
+sem_close(pool->done);
+sprintf(sem_name, MAIN_SEM_NAME, sembase, pool);
+sem_unlink(sem_name);
+free(pool);
+}
+
+bool
+ovn_resize_pool(struct worker_pool *pool, int size)
+{
+int i;
+
+ovs_assert(pool != NULL);
+
+if (!size) {
+size = pool_size;
+}
+
+ovs_mutex_lock(_mutex);
+
+if (can_parallelize) {
+free_controls(pool);
+pool->size = size;
+
+/* Allocate new control structures. */
+
+pool->controls = alloc_controls(size);
+pool->workers_must_exit = false;
+
+for (i = 0; i < pool->size; i++) {
+if (! init_control(>controls[i], i, pool)) {
+goto cleanup;
+}
+}
+}
+ovs_mutex_unlock(_mutex);
+return true;
+cleanup:
+
+/* Something went wrong when opening semaphores. In this case
+ * it is better to shut off parallel procesing altogether
+ */
+
+VLOG_INFO("Failed to initialize parallel processing, error %d", errno);
+can_parallelize = false;
+free_controls(pool);
+
+ovs_mutex_unlock(_mutex);
+return false;
+}
+
+
 struct worker_pool *
-ovn_add_worker_pool(void *(*start)(void *))
+ovn_add_worker_pool(void *(*start)(void *), int size)
 {
 struct worker_pool *new_pool = NULL;
-struct worker_control *new_control;
 bool test = false;
 int i;
 char sem_name[256];
@@ -113,38 +178,29 @@ ovn_add_worker_pool(void *(*start)(void *))
 ovs_mutex_unlock(_mutex);
 }
 
+if (!size) {
+size = pool_size;
+}
+
 ovs_mutex_lock(_mutex);
 if (can_parallelize) {
 new_pool = xmalloc(sizeof(struct worker_pool));
-new_pool->size = pool_size;
-new_pool->controls = NULL;
+new_pool->size = size;
+new_pool->start = start;
 sprintf(sem_name, MAIN_SEM_NAME, sembase, new_pool);
 new_pool->done = sem_open(sem_name, O_CREAT, S_IRWXU, 0);
 if (new_pool->done == SEM_FAILED) {
 goto cleanup;
 }
 
-new_pool->controls =
-xmalloc(sizeof(struct worker_control) * new_pool->size);
+new_pool->controls = alloc_controls(size);
+new_pool->workers_must_exit = false;
 
 for (i = 0; i < new_pool->size; i++) {
-new_control = _pool->controls[i];
-new_control->id = i;
-new_control->done = new_pool->done;
-new_control->data = NULL;
-ovs_mutex_init(_control->mutex);
-new_control->finished = ATOMIC_VAR_INIT(false);
-sprintf(sem_name, WORKER_SEM_NAME, sembase, new_pool, i);
-new_control->fire = sem_open(sem_name, O_CREAT, S_IRWXU, 0);
-if (new_control->fire == SEM_FAILED) {
+if (!init_control(_pool->controls[i], i, new_pool)) {
 goto cleanup;
 }
 }
-
-for (i = 0; i < pool_size; i++) {
-new_pool->controls[i].worker =
-ovs_thread_create("worker pool helper", start, 
_pool->controls[i]);
-}
 ovs_list_push_back(_pools, _pool->list_node);
 }
 ovs_mutex_unlock(_mutex);
@@ -157,16 +213

[ovs-dev] [OVN Patch v2] Make changes to the parallel processing API to allow pool sizing

2021-09-03 Thread anton . ivanov
From: Anton Ivanov 

1. Make pool size user defineable.
2. Expose pool destruction.
3. Make pools resizeable at runtime.

Signed-off-by: Anton Ivanov 
---
 lib/ovn-parallel-hmap.c | 202 ++--
 lib/ovn-parallel-hmap.h |  23 -
 northd/ovn-northd.c |  58 +---
 ovs |   2 +-
 4 files changed, 194 insertions(+), 91 deletions(-)

diff --git a/lib/ovn-parallel-hmap.c b/lib/ovn-parallel-hmap.c
index b8c7ac786..30de457b5 100644
--- a/lib/ovn-parallel-hmap.c
+++ b/lib/ovn-parallel-hmap.c
@@ -51,7 +51,6 @@ static bool can_parallelize = false;
  * accompanied by a fence. It does not need to be atomic or be
  * accessed under a lock.
  */
-static bool workers_must_exit = false;
 
 static struct ovs_list worker_pools = OVS_LIST_INITIALIZER(_pools);
 
@@ -70,10 +69,20 @@ static void merge_hash_results(struct worker_pool *pool 
OVS_UNUSED,
void *fin_result, void *result_frags,
int index);
 
+
+static bool init_control(struct worker_control *control, int id,
+ struct worker_pool *pool);
+
+static void cleanup_control(struct worker_pool *pool, int id);
+
+static void free_controls(struct worker_pool *pool);
+
+static struct worker_control *alloc_controls(int size);
+
 bool
-ovn_stop_parallel_processing(void)
+ovn_stop_parallel_processing(struct worker_pool *pool)
 {
-return workers_must_exit;
+return pool->workers_must_exit;
 }
 
 bool
@@ -92,11 +101,67 @@ ovn_can_parallelize_hashes(bool force_parallel)
 return can_parallelize;
 }
 
+
+void
+destroy_pool(struct worker_pool *pool) {
+char sem_name[256];
+
+free_controls(pool);
+sem_close(pool->done);
+sprintf(sem_name, MAIN_SEM_NAME, sembase, pool);
+sem_unlink(sem_name);
+free(pool);
+}
+
+bool
+ovn_resize_pool(struct worker_pool *pool, int size)
+{
+int i;
+
+ovs_assert(pool != NULL);
+
+if (!size) {
+size = pool_size;
+}
+
+ovs_mutex_lock(_mutex);
+
+if (can_parallelize) {
+free_controls(pool);
+pool->size = size;
+
+/* Allocate new control structures. */
+
+pool->controls = alloc_controls(size);
+pool->workers_must_exit = false;
+
+for (i = 0; i < pool->size; i++) {
+if (! init_control(>controls[i], i, pool)) {
+goto cleanup;
+}
+}
+}
+ovs_mutex_unlock(_mutex);
+return true;
+cleanup:
+
+/* Something went wrong when opening semaphores. In this case
+ * it is better to shut off parallel procesing altogether
+ */
+
+VLOG_INFO("Failed to initialize parallel processing, error %d", errno);
+can_parallelize = false;
+free_controls(pool);
+
+ovs_mutex_unlock(_mutex);
+return false;
+}
+
+
 struct worker_pool *
-ovn_add_worker_pool(void *(*start)(void *))
+ovn_add_worker_pool(void *(*start)(void *), int size)
 {
 struct worker_pool *new_pool = NULL;
-struct worker_control *new_control;
 bool test = false;
 int i;
 char sem_name[256];
@@ -113,38 +178,29 @@ ovn_add_worker_pool(void *(*start)(void *))
 ovs_mutex_unlock(_mutex);
 }
 
+if (!size) {
+size = pool_size;
+}
+
 ovs_mutex_lock(_mutex);
 if (can_parallelize) {
 new_pool = xmalloc(sizeof(struct worker_pool));
-new_pool->size = pool_size;
-new_pool->controls = NULL;
+new_pool->size = size;
+new_pool->start = start;
 sprintf(sem_name, MAIN_SEM_NAME, sembase, new_pool);
 new_pool->done = sem_open(sem_name, O_CREAT, S_IRWXU, 0);
 if (new_pool->done == SEM_FAILED) {
 goto cleanup;
 }
 
-new_pool->controls =
-xmalloc(sizeof(struct worker_control) * new_pool->size);
+new_pool->controls = alloc_controls(size);
+new_pool->workers_must_exit = false;
 
 for (i = 0; i < new_pool->size; i++) {
-new_control = _pool->controls[i];
-new_control->id = i;
-new_control->done = new_pool->done;
-new_control->data = NULL;
-ovs_mutex_init(_control->mutex);
-new_control->finished = ATOMIC_VAR_INIT(false);
-sprintf(sem_name, WORKER_SEM_NAME, sembase, new_pool, i);
-new_control->fire = sem_open(sem_name, O_CREAT, S_IRWXU, 0);
-if (new_control->fire == SEM_FAILED) {
+if (!init_control(_pool->controls[i], i, new_pool)) {
 goto cleanup;
 }
 }
-
-for (i = 0; i < pool_size; i++) {
-new_pool->controls[i].worker =
-ovs_thread_create("worker pool helper", start, 
_pool->controls[i]);
-}
 ovs_list_push_back(_pools, _pool->list_node);
 }
 ovs_mutex_unlock(_mutex);
@@ -157,16 +213

Re: [ovs-dev] [OVN Patch] Make changes to the parallel processing API to allow pool sizing

2021-09-03 Thread Anton Ivanov


On 02/09/2021 21:20, Numan Siddique wrote:

On Thu, Sep 2, 2021 at 3:20 PM Anton Ivanov
 wrote:

On 02/09/2021 19:30, Numan Siddique wrote:

On Mon, Aug 16, 2021 at 1:31 PM  wrote:

From: Anton Ivanov 

1. Make pool size user defineable.
2. Expose pool destruction.
3. Make pools resizeable at runtime.

Signed-off-by: Anton Ivanov 

Hi Anton,

Thanks for the patch.  If I understand correctly this patch adds the
ability to override the pool
size.  What is missing is the usage of the API from ovn-northd.  Is
there a plan to have a follow up
patch which makes use of this ?

1. Yes. I did not want to define options or change options without
discussing.

2. Even before that it allows to cover one special case - a pool of 1
thread. Basically - sending a thread into background to work on
something so if you have two independent data sets (f.e. multi-data-path
lflows and single datapath lflows) they can be worked in parallel.


I'd expect a config option for the user to specify the pool size and
ovn-northd calling add_worker_pool()
with the configured pool size.  Right now I see it's called with 0.

I can add it as a separate patch. We should make sure it is something
which makes sense syntactically in different OVN (and not just OVN - I
want to port this to OVS) executables.


Also in order for us to be more confident and adopt parallel
processing we need to enable
parallel runs in CI.

For this to happen, there should be a config option in NB_Global to
force parallel processing.

There is boolean already for northd. Going back a few paragaphs, one
possible approach is to redefine it as an integer

-1 - never use parallel

0 - auto

  > 1 use N threads.

Right now the option "use_parallel_build" is a bool.  Will it be good
to change to int now ?


I think I got my head around it.

We need two more params during creation for this.

1. Name.

2. Mutability - can the size and other parameters of the pool be changed later.

Example - pools of one thread for background processing. They cannot be changed 
- you are not supposed to run more than one thread in such a pool. It is 
created as one and it stays as one. We do not have this yet, it is coming after 
freeze for the next version - postprocessing single od lflows in a single 
background thread while foreground is processing the multiple od. There are a 
few cases elsewhere for this as well, it is a fairly common pattern.

Such pools should be immutable.

Name is fairly obvious. If you have more than one pool (f.e. if lflows, 
post-processing, uuid prefetch, etc), which one are you trying to resize (with 
possible wildcard).

So overall things look like two commands and options:

use-parallel-processing on/off

note: processing, not build. We are aiming at multiple things in parallel not 
just lflow build

option - num-worker-threads N - set global default at startup

command num-worker-threads PoolName N  - set threads for PoolName N (if allowed)

command list-pools - show the pools

All of that should probably sit under thread-pools/ so it is the same in 
whichever ovn (and hopefully ovs) executable where it is enabled.

How does this sound?

A.





The parallel processing library function - setup_worker_pools() takes
a 'boo' param to force
enable the parallel processing,  but it is never used by ovn-northd.c

So I'd suggest adding this support so that we can test the parallel
processing patches in CI.

One comment below.



---
   lib/ovn-parallel-hmap.c | 202 ++--
   lib/ovn-parallel-hmap.h |  23 -
   northd/ovn-northd.c |  58 +---
   ovs |   2 +-
   4 files changed, 194 insertions(+), 91 deletions(-)

diff --git a/lib/ovn-parallel-hmap.c b/lib/ovn-parallel-hmap.c
index b8c7ac786..6c5199fb3 100644
--- a/lib/ovn-parallel-hmap.c
+++ b/lib/ovn-parallel-hmap.c
@@ -51,7 +51,6 @@ static bool can_parallelize = false;
* accompanied by a fence. It does not need to be atomic or be
* accessed under a lock.
*/
-static bool workers_must_exit = false;

   static struct ovs_list worker_pools = OVS_LIST_INITIALIZER(_pools);

@@ -70,10 +69,20 @@ static void merge_hash_results(struct worker_pool *pool 
OVS_UNUSED,
  void *fin_result, void *result_frags,
  int index);

+
+static bool init_control(struct worker_control *control, int id,
+ struct worker_pool *pool);
+
+static void cleanup_control(struct worker_pool *pool, int id);
+
+static void free_controls(struct worker_pool *pool);
+
+static struct worker_control *alloc_controls(int size);
+
   bool
-ovn_stop_parallel_processing(void)
+ovn_stop_parallel_processing(struct worker_pool *pool)
   {
-return workers_must_exit;
+return pool->workers_must_exit;
   }

   bool
@@ -92,11 +101,67 @@ ovn_can_parallelize_hashes(bool force_parallel)
   return can_parallelize;
   }

+
+void
+destroy_pool(struct worker_pool *pool) {
+c

Re: [ovs-dev] [OVN Patch] Make changes to the parallel processing API to allow pool sizing

2021-09-03 Thread Anton Ivanov



On 03/09/2021 08:56, Dumitru Ceara wrote:

On 9/2/21 10:20 PM, Numan Siddique wrote:\

[...]


Can we have some suggestions for option names please?


How about num_parallel_workers ?

So user/CMS has to set 2 options (unfortunately)

use_parallel_build=true
num_parallel_workers=6

Or we can deprecate 'use_parallel_build' and use
'num_parallel_workers' instead as you suggested.

This can be a string/integer.

i.e num_parallel_workers=auto  will enable parallel processing and use
the system default pool size.
num_parallel_workers=0  -> will disable parallel processing (this will
be the default value if not defined by user)
num_parallel_workers = N where N > 0 enable parallel processing with pool size.

and still honor 'use_parallel_build' if 'num_parallel_workers' is not
set.  Does this sound good ?

I wonder if it's better however if we don't make 'num_parallel_workers'
a persistent config value and let the CMS pass it as a command line
argument to ovn-northd.

I'm thinking of the case when different ovn-northd instances, connecting
to the same NB DB, run on different nodes of a heterogeneous cluster
(with different hardware specs for the nodes running northd).
Configuring all of them to run with the same number of threads is
inefficient.

Another point is that whenever debugging offline a NB database from a
deployment with parallelization enabled, we'll have to first somehow
adapt the number of threads ovn-northd would use.

Thoughts?


Fair points.

Changing them at runtime would need to be a cli command instead of a config 
option.

A.


We need to document this in ovn-nb.8.xml.

Thanks
Numan


Regards,
Dumitru



--
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [OVN Patch] Make changes to the parallel processing API to allow pool sizing

2021-09-02 Thread Anton Ivanov

On 02/09/2021 19:30, Numan Siddique wrote:

On Mon, Aug 16, 2021 at 1:31 PM  wrote:

From: Anton Ivanov 

1. Make pool size user defineable.
2. Expose pool destruction.
3. Make pools resizeable at runtime.

Signed-off-by: Anton Ivanov 

Hi Anton,

Thanks for the patch.  If I understand correctly this patch adds the
ability to override the pool
size.  What is missing is the usage of the API from ovn-northd.  Is
there a plan to have a follow up
patch which makes use of this ?


1. Yes. I did not want to define options or change options without 
discussing.


2. Even before that it allows to cover one special case - a pool of 1 
thread. Basically - sending a thread into background to work on 
something so if you have two independent data sets (f.e. multi-data-path 
lflows and single datapath lflows) they can be worked in parallel.




I'd expect a config option for the user to specify the pool size and
ovn-northd calling add_worker_pool()
with the configured pool size.  Right now I see it's called with 0.


I can add it as a separate patch. We should make sure it is something 
which makes sense syntactically in different OVN (and not just OVN - I 
want to port this to OVS) executables.




Also in order for us to be more confident and adopt parallel
processing we need to enable
parallel runs in CI.

For this to happen, there should be a config option in NB_Global to
force parallel processing.


There is boolean already for northd. Going back a few paragaphs, one 
possible approach is to redefine it as an integer


-1 - never use parallel

0 - auto

> 1 use N threads.


The parallel processing library function - setup_worker_pools() takes
a 'boo' param to force
enable the parallel processing,  but it is never used by ovn-northd.c

So I'd suggest adding this support so that we can test the parallel
processing patches in CI.

One comment below.



---
  lib/ovn-parallel-hmap.c | 202 ++--
  lib/ovn-parallel-hmap.h |  23 -
  northd/ovn-northd.c |  58 +---
  ovs |   2 +-
  4 files changed, 194 insertions(+), 91 deletions(-)

diff --git a/lib/ovn-parallel-hmap.c b/lib/ovn-parallel-hmap.c
index b8c7ac786..6c5199fb3 100644
--- a/lib/ovn-parallel-hmap.c
+++ b/lib/ovn-parallel-hmap.c
@@ -51,7 +51,6 @@ static bool can_parallelize = false;
   * accompanied by a fence. It does not need to be atomic or be
   * accessed under a lock.
   */
-static bool workers_must_exit = false;

  static struct ovs_list worker_pools = OVS_LIST_INITIALIZER(_pools);

@@ -70,10 +69,20 @@ static void merge_hash_results(struct worker_pool *pool 
OVS_UNUSED,
 void *fin_result, void *result_frags,
 int index);

+
+static bool init_control(struct worker_control *control, int id,
+ struct worker_pool *pool);
+
+static void cleanup_control(struct worker_pool *pool, int id);
+
+static void free_controls(struct worker_pool *pool);
+
+static struct worker_control *alloc_controls(int size);
+
  bool
-ovn_stop_parallel_processing(void)
+ovn_stop_parallel_processing(struct worker_pool *pool)
  {
-return workers_must_exit;
+return pool->workers_must_exit;
  }

  bool
@@ -92,11 +101,67 @@ ovn_can_parallelize_hashes(bool force_parallel)
  return can_parallelize;
  }

+
+void
+destroy_pool(struct worker_pool *pool) {
+char sem_name[256];
+
+free_controls(pool);
+sem_close(pool->done);
+sprintf(sem_name, MAIN_SEM_NAME, sembase, pool);
+sem_unlink(sem_name);
+free(pool);
+}
+
+bool
+ovn_resize_pool(struct worker_pool *pool, int size)
+{
+int i;
+
+ovs_assert(pool != NULL);
+
+if (!size) {
+size = pool_size;
+}
+
+ovs_mutex_lock(_mutex);
+
+if (can_parallelize) {
+free_controls(pool);
+pool->size = size;
+
+/* Allocate new control structures. */
+
+pool->controls = alloc_controls(size);
+pool->workers_must_exit = false;
+
+for (i = 0; i < pool->size; i++) {
+if (! init_control(>controls[i], i, pool)) {
+goto cleanup;
+}
+}
+}
+ovs_mutex_unlock(_mutex);
+return true;
+cleanup:
+
+/* Something went wrong when opening semaphores. In this case
+ * it is better to shut off parallel procesing altogether
+ */
+
+VLOG_INFO("Failed to initialize parallel processing, error %d", errno);
+can_parallelize = false;
+free_controls(pool);
+
+ovs_mutex_unlock(_mutex);
+return false;
+}
+
+
  struct worker_pool *
-ovn_add_worker_pool(void *(*start)(void *))
+ovn_add_worker_pool(void *(*start)(void *), int size)
  {
  struct worker_pool *new_pool = NULL;
-struct worker_control *new_control;
  bool test = false;
  int i;
  char sem_name[256];
@@ -113,38 +178,29 @@ ovn_add_worker_pool(void *(*start)(void *))
  ovs_mutex_unlock(_mutex);
  }

+i

[ovs-dev] [OVN Patch v6 4/4] northd: Restore parallel build with dp_groups

2021-09-02 Thread anton . ivanov
From: Anton Ivanov 

Restore parallel build with dp groups using rwlock instead
of per row locking as an underlying mechanism.

This provides improvement ~ 10% end-to-end on ovn-heater
under virutalization despite awakening some qemu gremlin
which makes qemu climb to silly CPU usage. The gain on
bare metal is likely to be higher.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 215 
 1 file changed, 159 insertions(+), 56 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 1f903882b..4537c55dd 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -59,6 +59,7 @@
 #include "unixctl.h"
 #include "util.h"
 #include "uuid.h"
+#include "ovs-thread.h"
 #include "openvswitch/vlog.h"
 
 VLOG_DEFINE_THIS_MODULE(ovn_northd);
@@ -4372,7 +4373,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct 
ovn_datapath *od,
 static bool use_logical_dp_groups = false;
 static bool use_parallel_build = true;
 
-static struct hashrow_locks lflow_locks;
+static struct ovs_rwlock flowtable_lock;
+
+static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
+  struct ovn_datapath *od,
+  struct lflow_state *lflow_map,
+  uint32_t hash)
+{
+hmapx_add(_lflow->od_group, od);
+hmap_remove(_map->single_od, _lflow->hmap_node);
+if (use_parallel_build) {
+hmap_insert_fast(_map->multiple_od, _lflow->hmap_node, hash);
+} else {
+hmap_insert(_map->multiple_od, _lflow->hmap_node, hash);
+}
+}
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wthread-safety"
+#endif
 
 /* Adds a row with the specified contents to the Logical_Flow table.
  * Version to use when locking is required.
@@ -4388,55 +4408,127 @@ do_ovn_lflow_add(struct lflow_state *lflow_map, struct 
ovn_datapath *od,
 struct ovn_lflow *old_lflow;
 struct ovn_lflow *lflow;
 
+/* Fast Path.
+ * See if we can get away without writing - grab a rdlock and check
+ * if we can get away with as little work as possible.
+ */
+
 if (use_logical_dp_groups) {
-old_lflow = do_ovn_lflow_find(_map->single_od, NULL, stage,
-  priority, match,
+if (use_parallel_build) {
+ovs_rwlock_rdlock(_lock);
+}
+old_lflow = do_ovn_lflow_find(_map->single_od,
+  NULL, stage, priority, match,
   actions, ctrl_meter, hash);
 if (old_lflow) {
-hmapx_add(_lflow->od_group, od);
-/* Found, different, od count went up. Move to multiple od. */
-if (hmapx_count(_lflow->od_group) > 1) {
-hmap_remove(_map->single_od, _lflow->hmap_node);
+if (!hmapx_contains(_lflow->od_group, od)) {
+/* od not in od_group, we need to add it and move to
+ * multiple. */
 if (use_parallel_build) {
-hmap_insert_fast(_map->multiple_od,
- _lflow->hmap_node, hash);
-} else {
-hmap_insert(_map->multiple_od,
-_lflow->hmap_node, hash);
+/* Upgrade the lock to write, we are likely to
+ * modify data. */
+ovs_rwlock_unlock(_lock);
+ovs_rwlock_wrlock(_lock);
+
+/* Check if someone got ahead of us and the flow is already
+ * in multiple. */
+if (!hmap_contains(_map->single_od,
+   _lflow->hmap_node)) {
+/* Someone did get ahead of us, add the od to the
+ * group. */
+hmapx_add(_lflow->od_group, od);
+goto done_update_unlock;
+}
 }
+ovn_make_multi_lflow(old_lflow, od, lflow_map, hash);
+goto done_update_unlock;
 }
-} else {
-/* Not found, lookup in multiple od. */
+}
+if (!old_lflow) {
+/* Not found in single, lookup in multiple od. */
 old_lflow = do_ovn_lflow_find(_map->multiple_od, NULL,
   stage, priority, match,
   actions, ctrl_meter, hash);
 if (old_lflow) {
-hmapx_add(_lflow->od_group, od);
+if (!hmapx_contains(_lflow->od_group, od)) {
+if (use_parallel_build) {
+/* Upgrade lock to write.*/
+ovs_rwlock_unloc

[ovs-dev] [OVN Patch v6 3/4] northd: Optimize dp groups operations

2021-09-02 Thread anton . ivanov
From: Anton Ivanov 

Remove full hash walks to form lflow dp_groups and add them
to the overall parallelizeable lflow build.

Make processing of "with dp groups" and "without" in
build_lflows independent to allow these to run in parallel
after the updates to the parallel API have been merged.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 546 +++-
 1 file changed, 337 insertions(+), 209 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 6dfb4327a..1f903882b 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -86,6 +86,11 @@ struct northd_state {
 bool paused;
 };
 
+struct lflow_state {
+struct hmap single_od;
+struct hmap multiple_od;
+};
+
 static const char *ovnnb_db;
 static const char *ovnsb_db;
 static const char *unixctl_path;
@@ -4306,8 +4311,15 @@ struct ovn_lflow {
 const char *where;
 };
 
-static void ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow);
-static struct ovn_lflow *ovn_lflow_find(const struct hmap *lflows,
+static void ovn_lflow_destroy(struct lflow_state *lflows,
+  struct ovn_lflow *lflow);
+static struct ovn_lflow *do_ovn_lflow_find(const struct hmap *lflows,
+const struct ovn_datapath *od,
+enum ovn_stage stage,
+uint16_t priority, const char *match,
+const char *actions,
+const char *ctrl_meter, uint32_t hash);
+static struct ovn_lflow *ovn_lflow_find(const struct lflow_state *lflows,
 const struct ovn_datapath *od,
 enum ovn_stage stage,
 uint16_t priority, const char *match,
@@ -4366,7 +4378,7 @@ static struct hashrow_locks lflow_locks;
  * Version to use when locking is required.
  */
 static struct ovn_lflow *
-do_ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od,
+do_ovn_lflow_add(struct lflow_state *lflow_map, struct ovn_datapath *od,
  uint32_t hash, enum ovn_stage stage, uint16_t priority,
  const char *match, const char *actions, const char *io_port,
  const struct ovsdb_idl_row *stage_hint,
@@ -4377,10 +4389,32 @@ do_ovn_lflow_add(struct hmap *lflow_map, struct 
ovn_datapath *od,
 struct ovn_lflow *lflow;
 
 if (use_logical_dp_groups) {
-old_lflow = ovn_lflow_find(lflow_map, NULL, stage, priority, match,
-   actions, ctrl_meter, hash);
+old_lflow = do_ovn_lflow_find(_map->single_od, NULL, stage,
+  priority, match,
+  actions, ctrl_meter, hash);
 if (old_lflow) {
 hmapx_add(_lflow->od_group, od);
+/* Found, different, od count went up. Move to multiple od. */
+if (hmapx_count(_lflow->od_group) > 1) {
+hmap_remove(_map->single_od, _lflow->hmap_node);
+if (use_parallel_build) {
+hmap_insert_fast(_map->multiple_od,
+ _lflow->hmap_node, hash);
+} else {
+hmap_insert(_map->multiple_od,
+_lflow->hmap_node, hash);
+}
+}
+} else {
+/* Not found, lookup in multiple od. */
+old_lflow = do_ovn_lflow_find(_map->multiple_od, NULL,
+  stage, priority, match,
+  actions, ctrl_meter, hash);
+if (old_lflow) {
+hmapx_add(_lflow->od_group, od);
+}
+}
+if (old_lflow) {
 return old_lflow;
 }
 }
@@ -4395,16 +4429,20 @@ do_ovn_lflow_add(struct hmap *lflow_map, struct 
ovn_datapath *od,
nullable_xstrdup(ctrl_meter),
ovn_lflow_hint(stage_hint), where);
 hmapx_add(>od_group, od);
+
+/* Insert "fresh" lflows into single_od. */
+
 if (!use_parallel_build) {
-hmap_insert(lflow_map, >hmap_node, hash);
+hmap_insert(_map->single_od, >hmap_node, hash);
 } else {
-hmap_insert_fast(lflow_map, >hmap_node, hash);
+hmap_insert_fast(_map->single_od, >hmap_node, hash);
 }
 return lflow;
 }
 
 static struct ovn_lflow *
-ovn_lflow_add_at_with_hash(struct hmap *lflow_map, struct ovn_datapath *od,
+ovn_lflow_add_at_with_hash(struct lflow_state *lflow_map,
+   struct ovn_datapath *od,
enum ovn_stage stage, uint16_t priority,
const char *match, const char *actions,
const 

[ovs-dev] [OVN Patch v6 2/4] northd: Resize the hash to correct parameters after build

2021-09-02 Thread anton . ivanov
From: Anton Ivanov 

Parallel builds may result in suboptimal hash bucket sizing.
In the absense of dp-groups this does not matter as the hash
is purely storage and not used for lookups during the build.

Such a hash needs to be resized to a correct size at the end
of the build to ensure that any lookups during the lflow
reconcilliation phase are done as fast as possible.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 2474d1ca4..6dfb4327a 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -13178,6 +13178,11 @@ build_lflows(struct northd_context *ctx, struct hmap 
*datapaths,
 igmp_groups, meter_groups, lbs,
 bfd_connections);
 
+/* Parallel build may result in a suboptimal hash. Resize the
+ * hash to a correct size before doing lookups */
+
+hmap_expand();
+
 if (hmap_count() > max_seen_lflow_size) {
 max_seen_lflow_size = hmap_count();
 }
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [OVN Patch v6 1/4] northd: Disable parallel processing for logical_dp_groups

2021-09-02 Thread anton . ivanov
From: Anton Ivanov 

Work on improving processing with dp_groups enabled has
discovered that the locking mechanism presently in use
is not reliable. Disabling parallel processing if dp_groups
are enabled until the root cause is determined and fixed.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 2 +-
 ovs | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index ee761cef0..2474d1ca4 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -12974,7 +12974,7 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, 
struct hmap *ports,
 }
 }
 
-if (use_parallel_build) {
+if (use_parallel_build && (!use_logical_dp_groups)) {
 struct hmap *lflow_segs;
 struct lswitch_flow_build_info *lsiv;
 int index;
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [OVN Patch v5 4/4] northd: Restore parallel build with dp_groups

2021-09-02 Thread anton . ivanov
From: Anton Ivanov 

Restore parallel build with dp groups using rwlock instead
of per row locking as an underlying mechanism.

This provides improvement ~ 10% end-to-end on ovn-heater
under virutalization despite awakening some qemu gremlin
which makes qemu climb to silly CPU usage. The gain on
bare metal is likely to be higher.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 183 +---
 1 file changed, 137 insertions(+), 46 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 28af790bc..f5d49143d 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -59,6 +59,7 @@
 #include "unixctl.h"
 #include "util.h"
 #include "uuid.h"
+#include "ovs-thread.h"
 #include "openvswitch/vlog.h"
 
 VLOG_DEFINE_THIS_MODULE(ovn_northd);
@@ -4369,7 +4370,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct 
ovn_datapath *od,
 static bool use_logical_dp_groups = false;
 static bool use_parallel_build = true;
 
-static struct hashrow_locks lflow_locks;
+static struct ovs_rwlock flowtable_lock;
+
+static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
+  struct ovn_datapath *od,
+  struct lflow_state *lflow_map,
+  uint32_t hash)
+{
+hmapx_add(_lflow->od_group, od);
+hmap_remove(_map->single_od, _lflow->hmap_node);
+if (use_parallel_build) {
+hmap_insert_fast(_map->multiple_od, _lflow->hmap_node, hash);
+} else {
+hmap_insert(_map->multiple_od, _lflow->hmap_node, hash);
+}
+}
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wthread-safety"
+#endif
 
 /* Adds a row with the specified contents to the Logical_Flow table.
  * Version to use when locking is required.
@@ -4385,57 +4405,133 @@ do_ovn_lflow_add(struct lflow_state *lflow_map, struct 
ovn_datapath *od,
 struct ovn_lflow *old_lflow;
 struct ovn_lflow *lflow;
 
+/* Fast Path.
+ * See if we can get away without writing - grab a rdlock and check
+ * if we can get away with as little work as possible.
+ */
+
 if (use_logical_dp_groups) {
-old_lflow = do_ovn_lflow_find(_map->single_od, NULL, stage,
-  priority, match,
+if (use_parallel_build) {
+ovs_rwlock_rdlock(_lock);
+}
+old_lflow = do_ovn_lflow_find(_map->single_od,
+  NULL, stage, priority, match,
   actions, ctrl_meter, hash);
 if (old_lflow) {
-hmapx_add(_lflow->od_group, od);
-/* Found, different, od count went up. Move to multiple od. */
-if (hmapx_count(_lflow->od_group) > 1) {
-hmap_remove(_map->single_od, _lflow->hmap_node);
+if (!hmapx_contains(_lflow->od_group, od)) {
+/* od not in od_group, we need to add it and move to
+ * multiple. */
 if (use_parallel_build) {
-hmap_insert_fast(_map->multiple_od,
- _lflow->hmap_node, hash);
-} else {
-hmap_insert(_map->multiple_od,
-_lflow->hmap_node, hash);
+/* Upgrade the lock to write, we are likely to
+ * modify data. */
+ovs_rwlock_unlock(_lock);
+ovs_rwlock_wrlock(_lock);
+
+/* Check if someone got ahead of us and the flow is already
+ * in multiple. */
+if (!hmap_contains(_map->single_od,
+   _lflow->hmap_node)) {
+/* Someone did get ahead of us, add the od to the
+ * group. */
+hmapx_add(_lflow->od_group, od);
+goto done_update_unlock;
+}
 }
+ovn_make_multi_lflow(old_lflow, od, lflow_map, hash);
+goto done_update_unlock;
 }
-} else {
-/* Not found, lookup in multiple od. */
+}
+if (!old_lflow) {
+/* Not found in single, lookup in multiple od. */
 old_lflow = do_ovn_lflow_find(_map->multiple_od, NULL,
   stage, priority, match,
   actions, ctrl_meter, hash);
 if (old_lflow) {
-hmapx_add(_lflow->od_group, od);
+if (!hmapx_contains(_lflow->od_group, od)) {
+if (use_parallel_build) {
+/* Upgrade lock to write.*/
+ovs_rwlock_unloc

[ovs-dev] [OVN Patch v5 3/4] northd: Optimize dp groups operations

2021-09-02 Thread anton . ivanov
From: Anton Ivanov 

Remove full hash walks to form lflow dp_groups and add them
to the overall parallelizeable lflow build.

Make processing of "with dp groups" and "without" in
build_lflows independent to allow these to run in parallel
after the updates to the parallel API have been merged.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 537 +++-
 1 file changed, 332 insertions(+), 205 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 71458ff4e..28af790bc 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -86,6 +86,11 @@ struct northd_state {
 bool paused;
 };
 
+struct lflow_state {
+struct hmap single_od;
+struct hmap multiple_od;
+};
+
 static const char *ovnnb_db;
 static const char *ovnsb_db;
 static const char *unixctl_path;
@@ -4304,8 +4309,15 @@ struct ovn_lflow {
 const char *where;
 };
 
-static void ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow);
-static struct ovn_lflow *ovn_lflow_find(const struct hmap *lflows,
+static void ovn_lflow_destroy(struct lflow_state *lflows,
+  struct ovn_lflow *lflow);
+static struct ovn_lflow *do_ovn_lflow_find(const struct hmap *lflows,
+const struct ovn_datapath *od,
+enum ovn_stage stage,
+uint16_t priority, const char *match,
+const char *actions,
+const char *ctrl_meter, uint32_t hash);
+static struct ovn_lflow *ovn_lflow_find(const struct lflow_state *lflows,
 const struct ovn_datapath *od,
 enum ovn_stage stage,
 uint16_t priority, const char *match,
@@ -4363,7 +4375,7 @@ static struct hashrow_locks lflow_locks;
  * Version to use when locking is required.
  */
 static void
-do_ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od,
+do_ovn_lflow_add(struct lflow_state *lflow_map, struct ovn_datapath *od,
  uint32_t hash, enum ovn_stage stage, uint16_t priority,
  const char *match, const char *actions, const char *io_port,
  const struct ovsdb_idl_row *stage_hint,
@@ -4374,10 +4386,32 @@ do_ovn_lflow_add(struct hmap *lflow_map, struct 
ovn_datapath *od,
 struct ovn_lflow *lflow;
 
 if (use_logical_dp_groups) {
-old_lflow = ovn_lflow_find(lflow_map, NULL, stage, priority, match,
-   actions, ctrl_meter, hash);
+old_lflow = do_ovn_lflow_find(_map->single_od, NULL, stage,
+  priority, match,
+  actions, ctrl_meter, hash);
 if (old_lflow) {
 hmapx_add(_lflow->od_group, od);
+/* Found, different, od count went up. Move to multiple od. */
+if (hmapx_count(_lflow->od_group) > 1) {
+hmap_remove(_map->single_od, _lflow->hmap_node);
+if (use_parallel_build) {
+hmap_insert_fast(_map->multiple_od,
+ _lflow->hmap_node, hash);
+} else {
+hmap_insert(_map->multiple_od,
+_lflow->hmap_node, hash);
+}
+}
+} else {
+/* Not found, lookup in multiple od. */
+old_lflow = do_ovn_lflow_find(_map->multiple_od, NULL,
+  stage, priority, match,
+  actions, ctrl_meter, hash);
+if (old_lflow) {
+hmapx_add(_lflow->od_group, od);
+}
+}
+if (old_lflow) {
 return;
 }
 }
@@ -4392,16 +4426,19 @@ do_ovn_lflow_add(struct hmap *lflow_map, struct 
ovn_datapath *od,
nullable_xstrdup(ctrl_meter),
ovn_lflow_hint(stage_hint), where);
 hmapx_add(>od_group, od);
+
+/* Insert "fresh" lflows into single_od. */
+
 if (!use_parallel_build) {
-hmap_insert(lflow_map, >hmap_node, hash);
+hmap_insert(_map->single_od, >hmap_node, hash);
 } else {
-hmap_insert_fast(lflow_map, >hmap_node, hash);
+hmap_insert_fast(_map->single_od, >hmap_node, hash);
 }
 }
 
 /* Adds a row with the specified contents to the Logical_Flow table. */
 static void
-ovn_lflow_add_at(struct hmap *lflow_map, struct ovn_datapath *od,
+ovn_lflow_add_at(struct lflow_state *lflow_map, struct ovn_datapath *od,
  enum ovn_stage stage, uint16_t priority,
  const char *match, const char *actions, const char *io_port,
  const char *ctrl_meter,
@@

[ovs-dev] [OVN Patch v5 2/4] northd: Resize the hash to correct parameters after build

2021-09-02 Thread anton . ivanov
From: Anton Ivanov 

Parallel builds may result in suboptimal hash bucket sizing.
In the absense of dp-groups this does not matter as the hash
is purely storage and not used for lookups during the build.

Such a hash needs to be resized to a correct size at the end
of the build to ensure that any lookups during the lflow
reconcilliation phase are done as fast as possible.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 8c653cf52..71458ff4e 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -13057,6 +13057,11 @@ build_lflows(struct northd_context *ctx, struct hmap 
*datapaths,
 igmp_groups, meter_groups, lbs,
 bfd_connections);
 
+/* Parallel build may result in a suboptimal hash. Resize the
+ * hash to a correct size before doing lookups */
+
+hmap_expand();
+
 if (hmap_count() > max_seen_lflow_size) {
 max_seen_lflow_size = hmap_count();
 }
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [OVN Patch v5 1/4] northd: Disable parallel processing for logical_dp_groups

2021-09-02 Thread anton . ivanov
From: Anton Ivanov 

Work on improving processing with dp_groups enabled has
discovered that the locking mechanism presently in use
is not reliable. Disabling parallel processing if dp_groups
are enabled until the root cause is determined and fixed.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 2 +-
 ovs | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index af413aba4..8c653cf52 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -12861,7 +12861,7 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, 
struct hmap *ports,
 }
 }
 
-if (use_parallel_build) {
+if (use_parallel_build && (!use_logical_dp_groups)) {
 struct hmap *lflow_segs;
 struct lswitch_flow_build_info *lsiv;
 int index;
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [OVN Patch v5 4/4] northd: Restore parallel build with dp_groups

2021-09-02 Thread anton . ivanov
From: Anton Ivanov 

Restore parallel build with dp groups using rwlock instead
of per row locking as an underlying mechanism.

This provides improvement ~ 10% end-to-end on ovn-heater
under virutalization despite awakening some qemu gremlin
which makes qemu climb to silly CPU usage. The gain on
bare metal is likely to be higher.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 183 +---
 1 file changed, 137 insertions(+), 46 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 28af790bc..f5d49143d 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -59,6 +59,7 @@
 #include "unixctl.h"
 #include "util.h"
 #include "uuid.h"
+#include "ovs-thread.h"
 #include "openvswitch/vlog.h"
 
 VLOG_DEFINE_THIS_MODULE(ovn_northd);
@@ -4369,7 +4370,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct 
ovn_datapath *od,
 static bool use_logical_dp_groups = false;
 static bool use_parallel_build = true;
 
-static struct hashrow_locks lflow_locks;
+static struct ovs_rwlock flowtable_lock;
+
+static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
+  struct ovn_datapath *od,
+  struct lflow_state *lflow_map,
+  uint32_t hash)
+{
+hmapx_add(_lflow->od_group, od);
+hmap_remove(_map->single_od, _lflow->hmap_node);
+if (use_parallel_build) {
+hmap_insert_fast(_map->multiple_od, _lflow->hmap_node, hash);
+} else {
+hmap_insert(_map->multiple_od, _lflow->hmap_node, hash);
+}
+}
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wthread-safety"
+#endif
 
 /* Adds a row with the specified contents to the Logical_Flow table.
  * Version to use when locking is required.
@@ -4385,57 +4405,133 @@ do_ovn_lflow_add(struct lflow_state *lflow_map, struct 
ovn_datapath *od,
 struct ovn_lflow *old_lflow;
 struct ovn_lflow *lflow;
 
+/* Fast Path.
+ * See if we can get away without writing - grab a rdlock and check
+ * if we can get away with as little work as possible.
+ */
+
 if (use_logical_dp_groups) {
-old_lflow = do_ovn_lflow_find(_map->single_od, NULL, stage,
-  priority, match,
+if (use_parallel_build) {
+ovs_rwlock_rdlock(_lock);
+}
+old_lflow = do_ovn_lflow_find(_map->single_od,
+  NULL, stage, priority, match,
   actions, ctrl_meter, hash);
 if (old_lflow) {
-hmapx_add(_lflow->od_group, od);
-/* Found, different, od count went up. Move to multiple od. */
-if (hmapx_count(_lflow->od_group) > 1) {
-hmap_remove(_map->single_od, _lflow->hmap_node);
+if (!hmapx_contains(_lflow->od_group, od)) {
+/* od not in od_group, we need to add it and move to
+ * multiple. */
 if (use_parallel_build) {
-hmap_insert_fast(_map->multiple_od,
- _lflow->hmap_node, hash);
-} else {
-hmap_insert(_map->multiple_od,
-_lflow->hmap_node, hash);
+/* Upgrade the lock to write, we are likely to
+ * modify data. */
+ovs_rwlock_unlock(_lock);
+ovs_rwlock_wrlock(_lock);
+
+/* Check if someone got ahead of us and the flow is already
+ * in multiple. */
+if (!hmap_contains(_map->single_od,
+   _lflow->hmap_node)) {
+/* Someone did get ahead of us, add the od to the
+ * group. */
+hmapx_add(_lflow->od_group, od);
+goto done_update_unlock;
+}
 }
+ovn_make_multi_lflow(old_lflow, od, lflow_map, hash);
+goto done_update_unlock;
 }
-} else {
-/* Not found, lookup in multiple od. */
+}
+if (!old_lflow) {
+/* Not found in single, lookup in multiple od. */
 old_lflow = do_ovn_lflow_find(_map->multiple_od, NULL,
   stage, priority, match,
   actions, ctrl_meter, hash);
 if (old_lflow) {
-hmapx_add(_lflow->od_group, od);
+if (!hmapx_contains(_lflow->od_group, od)) {
+if (use_parallel_build) {
+/* Upgrade lock to write.*/
+ovs_rwlock_unloc

[ovs-dev] [OVN Patch v5 3/4] northd: Optimize dp groups operations

2021-09-02 Thread anton . ivanov
From: Anton Ivanov 

Remove full hash walks to form lflow dp_groups and add them
to the overall parallelizeable lflow build.

Make processing of "with dp groups" and "without" in
build_lflows independent to allow these to run in parallel
after the updates to the parallel API have been merged.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 537 +++-
 1 file changed, 332 insertions(+), 205 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 71458ff4e..28af790bc 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -86,6 +86,11 @@ struct northd_state {
 bool paused;
 };
 
+struct lflow_state {
+struct hmap single_od;
+struct hmap multiple_od;
+};
+
 static const char *ovnnb_db;
 static const char *ovnsb_db;
 static const char *unixctl_path;
@@ -4304,8 +4309,15 @@ struct ovn_lflow {
 const char *where;
 };
 
-static void ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow);
-static struct ovn_lflow *ovn_lflow_find(const struct hmap *lflows,
+static void ovn_lflow_destroy(struct lflow_state *lflows,
+  struct ovn_lflow *lflow);
+static struct ovn_lflow *do_ovn_lflow_find(const struct hmap *lflows,
+const struct ovn_datapath *od,
+enum ovn_stage stage,
+uint16_t priority, const char *match,
+const char *actions,
+const char *ctrl_meter, uint32_t hash);
+static struct ovn_lflow *ovn_lflow_find(const struct lflow_state *lflows,
 const struct ovn_datapath *od,
 enum ovn_stage stage,
 uint16_t priority, const char *match,
@@ -4363,7 +4375,7 @@ static struct hashrow_locks lflow_locks;
  * Version to use when locking is required.
  */
 static void
-do_ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od,
+do_ovn_lflow_add(struct lflow_state *lflow_map, struct ovn_datapath *od,
  uint32_t hash, enum ovn_stage stage, uint16_t priority,
  const char *match, const char *actions, const char *io_port,
  const struct ovsdb_idl_row *stage_hint,
@@ -4374,10 +4386,32 @@ do_ovn_lflow_add(struct hmap *lflow_map, struct 
ovn_datapath *od,
 struct ovn_lflow *lflow;
 
 if (use_logical_dp_groups) {
-old_lflow = ovn_lflow_find(lflow_map, NULL, stage, priority, match,
-   actions, ctrl_meter, hash);
+old_lflow = do_ovn_lflow_find(_map->single_od, NULL, stage,
+  priority, match,
+  actions, ctrl_meter, hash);
 if (old_lflow) {
 hmapx_add(_lflow->od_group, od);
+/* Found, different, od count went up. Move to multiple od. */
+if (hmapx_count(_lflow->od_group) > 1) {
+hmap_remove(_map->single_od, _lflow->hmap_node);
+if (use_parallel_build) {
+hmap_insert_fast(_map->multiple_od,
+ _lflow->hmap_node, hash);
+} else {
+hmap_insert(_map->multiple_od,
+_lflow->hmap_node, hash);
+}
+}
+} else {
+/* Not found, lookup in multiple od. */
+old_lflow = do_ovn_lflow_find(_map->multiple_od, NULL,
+  stage, priority, match,
+  actions, ctrl_meter, hash);
+if (old_lflow) {
+hmapx_add(_lflow->od_group, od);
+}
+}
+if (old_lflow) {
 return;
 }
 }
@@ -4392,16 +4426,19 @@ do_ovn_lflow_add(struct hmap *lflow_map, struct 
ovn_datapath *od,
nullable_xstrdup(ctrl_meter),
ovn_lflow_hint(stage_hint), where);
 hmapx_add(>od_group, od);
+
+/* Insert "fresh" lflows into single_od. */
+
 if (!use_parallel_build) {
-hmap_insert(lflow_map, >hmap_node, hash);
+hmap_insert(_map->single_od, >hmap_node, hash);
 } else {
-hmap_insert_fast(lflow_map, >hmap_node, hash);
+hmap_insert_fast(_map->single_od, >hmap_node, hash);
 }
 }
 
 /* Adds a row with the specified contents to the Logical_Flow table. */
 static void
-ovn_lflow_add_at(struct hmap *lflow_map, struct ovn_datapath *od,
+ovn_lflow_add_at(struct lflow_state *lflow_map, struct ovn_datapath *od,
  enum ovn_stage stage, uint16_t priority,
  const char *match, const char *actions, const char *io_port,
  const char *ctrl_meter,
@@

[ovs-dev] [OVN Patch v5 1/4] northd: Disable parallel processing for logical_dp_groups

2021-09-02 Thread anton . ivanov
From: Anton Ivanov 

Work on improving processing with dp_groups enabled has
discovered that the locking mechanism presently in use
is not reliable. Disabling parallel processing if dp_groups
are enabled until the root cause is determined and fixed.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 2 +-
 ovs | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index af413aba4..8c653cf52 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -12861,7 +12861,7 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, 
struct hmap *ports,
 }
 }
 
-if (use_parallel_build) {
+if (use_parallel_build && (!use_logical_dp_groups)) {
 struct hmap *lflow_segs;
 struct lswitch_flow_build_info *lsiv;
 int index;
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [OVN Patch v5 2/4] northd: Resize the hash to correct parameters after build

2021-09-02 Thread anton . ivanov
From: Anton Ivanov 

Parallel builds may result in suboptimal hash bucket sizing.
In the absense of dp-groups this does not matter as the hash
is purely storage and not used for lookups during the build.

Such a hash needs to be resized to a correct size at the end
of the build to ensure that any lookups during the lflow
reconcilliation phase are done as fast as possible.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 8c653cf52..71458ff4e 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -13057,6 +13057,11 @@ build_lflows(struct northd_context *ctx, struct hmap 
*datapaths,
 igmp_groups, meter_groups, lbs,
 bfd_connections);
 
+/* Parallel build may result in a suboptimal hash. Resize the
+ * hash to a correct size before doing lookups */
+
+hmap_expand();
+
 if (hmap_count() > max_seen_lflow_size) {
 max_seen_lflow_size = hmap_count();
 }
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] jsonrpc: Turn sorting of json objects off in messages

2021-08-30 Thread Anton Ivanov
There are options for that.

Debug mode should never be a default. 

On 30 August 2021 12:29:16 BST, Ilya Maximets  wrote:
>On 8/26/21 7:31 AM, Anton Ivanov wrote:
>> On 25/08/2021 22:12, Ilya Maximets wrote:
>>> On 8/25/21 5:17 PM, anton.iva...@cambridgegreys.com wrote:
>>>> From: Anton Ivanov 
>>>>
>>>> A JSON object sort order is by definition arbitrary. OVS
>>>> parser(s) do not care about object order - the result is
>>>> loaded into a SHASH losing any order on the wire.
>>>>
>>>> Having the objects sorted is a performance penalty, especially
>>>> for large objects like f.e. lflow state. That is represented
>>>> as {"table_name":{"uuid":data, "uuid":data, "uuid":data}}
>>>>
>>>> Sorting in a case like this has no meaning neither to human,
>>>> nor to computer.
>>> There is a meaning for both human and computer in some cases.
>>>
>>> While sorting by UUIDs doesn't make a lot of sense, I agree,
>>> having sorted columns inside the database row is important
>>> for debugging purposes.  I'm using something like this:
>>>    sed 's/{"_uuid"/\n{"_uuid"/g'
>>> very frequently, while inspecting database transactions in
>>> order to split different rows to different lines, so the
>>> text editor can work with the result or some other scripts
>>> can process them in a pipeline.  And this command relies on
>>> a fact that '_uuid' goes first in a row.
>>>
>>> Also, the thing that unit tests are not failing with this
>>> change is a pure luck, because, IIUC, we do have a fair amount
>>> of tests that looks for exact order in transactions.  The
>>> problem here that order will depend on the CPU architecture,
>>> because different ways of hash computation will be in use.
>> 
>> JSON which has a "special" requirement to be sorted is NOT JSON.
>
>There is no requirement, but there is convenience in having them sorted.
>
>> 
>> 1. The change do not change transaction order, because order of operations 
>> in transaction is expressed via ARRAYS. Any part which is expressed as an 
>> object is an arbitrary order.
>
>I didn't say it changes transaction order, I said that it changes
>order of columns in rows.
>
>> 
>> 2. The change is specific solely to JSON RPC which for some reason someone 
>> at some point decided to have sorted. All other parts of OVS do not use sort.
>
>I believe it was done for debugability reasons.  The same that I
>have described.
>
>> 
>> 3. There is no benefit to it. Only a penalty which is paid for every 
>> transaction. Every time.
>
>As I described previously, there is a benefit.  It's not a performance,
>obviously, but convenience and higher debugability.
>
>> 
>>>
>>> In general, I'd consider this change as a step back in
>>> debugability.  So, unless it provides a huge performance
>>> benefit, I'd like to not have it.  And tests should be
>>> modified in a way that doesn't require exact order of columns,
>>> for sure.
>>>
>>> I'll give it a shot in a scale test run once I have a chance.
>>>
>>> Best regards, Ilya Maximets.
>>>
>>> If, however, there are 0.5M such records, it
>>>> is a subtantial CPU (and latency) penalty.
>>>>
>>>> Signed-off-by: Anton Ivanov 
>>>> ---
>>>>   lib/jsonrpc.c | 2 +-
>>>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>>>
>>>> diff --git a/lib/jsonrpc.c b/lib/jsonrpc.c
>>>> index c8ce5362e..3b44f73fe 100644
>>>> --- a/lib/jsonrpc.c
>>>> +++ b/lib/jsonrpc.c
>>>> @@ -802,7 +802,7 @@ jsonrpc_msg_to_string(const struct jsonrpc_msg *m)
>>>>   {
>>>>   struct jsonrpc_msg *copy = jsonrpc_msg_clone(m);
>>>>   struct json *json = jsonrpc_msg_to_json(copy);
>>>> -    char *s = json_to_string(json, JSSF_SORT);
>>>> +    char *s = json_to_string(json, 0);
>>>>   json_destroy(json);
>>>>   return s;
>>>>   }
>>>>
>>>
>> 
>
>

-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [OVN Patch v4 4/4] northd: Restore parallel build with dp_groups

2021-08-27 Thread anton . ivanov
From: Anton Ivanov 

Restore parallel build with dp groups using rwlock instead
of per row locking as an underlying mechanism.

This provides improvement ~ 10% end-to-end on ovn-heater
under virutalization despite awakening some qemu gremlin
which makes qemu climb to silly CPU usage. The gain on
bare metal is likely to be higher.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 156 
 1 file changed, 114 insertions(+), 42 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 6493780d8..abf2ad9d8 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -59,6 +59,7 @@
 #include "unixctl.h"
 #include "util.h"
 #include "uuid.h"
+#include "ovs-thread.h"
 #include "openvswitch/vlog.h"
 
 VLOG_DEFINE_THIS_MODULE(ovn_northd);
@@ -4368,7 +4369,21 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct 
ovn_datapath *od,
 static bool use_logical_dp_groups = false;
 static bool use_parallel_build = true;
 
-static struct hashrow_locks lflow_locks;
+static struct ovs_rwlock flowtable_lock;
+
+static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
+  struct ovn_datapath *od,
+  struct lflow_state *lflow_map,
+  uint32_t hash)
+{
+hmapx_add(_lflow->od_group, od);
+hmap_remove(_map->single_od, _lflow->hmap_node);
+if (use_parallel_build) {
+hmap_insert_fast(_map->multiple_od, _lflow->hmap_node, hash);
+} else {
+hmap_insert(_map->multiple_od, _lflow->hmap_node, hash);
+}
+}
 
 /* Adds a row with the specified contents to the Logical_Flow table.
  * Version to use when locking is required.
@@ -4383,52 +4398,120 @@ do_ovn_lflow_add(struct lflow_state *lflow_map, struct 
ovn_datapath *od,
 
 struct ovn_lflow *old_lflow;
 struct ovn_lflow *lflow;
+bool can_write = false;
+
+/* Fast Path.
+ * See if we can get away without writing - grab a rdlock and check if we 
can get away 
+ * with as little work as possible.
+ */
 
 if (use_logical_dp_groups) {
+if (use_parallel_build) {
+ovs_rwlock_rdlock(_lock);
+}
 old_lflow = do_ovn_lflow_find(_map->single_od, NULL, stage, 
priority, match,
actions, ctrl_meter, hash);
 if (old_lflow) {
-hmapx_add(_lflow->od_group, od);
-/* Found, different, od count went up. Move to multiple od. */
-if (hmapx_count(_lflow->od_group) > 1) {
-hmap_remove(_map->single_od, _lflow->hmap_node);
+if (!hmapx_contains(_lflow->od_group, od)) {
+/* od not in od_group, we need to add it and move to multiple. 
*/
 if (use_parallel_build) {
-hmap_insert_fast(_map->multiple_od, 
_lflow->hmap_node, hash);
-} else {
-hmap_insert(_map->multiple_od, 
_lflow->hmap_node, hash);
+/* Upgrade the lock to write, we are likely to modify data 
*/
+if (!can_write) {
+ovs_rwlock_unlock(_lock);
+ovs_rwlock_wrlock(_lock);
+}
+can_write = true;
+/* Search again, someone may have gotten here ahead of us
+ * and moved it to multiple. */
+if (!hmap_contains(_map->single_od, 
_lflow->hmap_node)) {
+old_lflow = NULL;
+/* Someone did get ahead of us. */
+goto amend_multiple;
+}
 }
+ovn_make_multi_lflow(old_lflow, od, lflow_map, hash); 
 }
-} else {
-/* Not found, lookup in multiple od. */
+}
+amend_multiple:
+if (!old_lflow) {
+/* Not found in single, lookup in multiple od. */
 old_lflow = do_ovn_lflow_find(_map->multiple_od, NULL, 
stage, priority, match,
actions, ctrl_meter, hash);
 if (old_lflow) {
-hmapx_add(_lflow->od_group, od);
+if (!hmapx_contains(_lflow->od_group, od)) {
+if (use_parallel_build && (!can_write)) {
+/* Upgrade lock to write.*/
+ovs_rwlock_unlock(_lock);
+ovs_rwlock_wrlock(_lock);
+}
+hmapx_add(_lflow->od_group, od);
+}
 }
 }
+if (use_parallel_build) {
+ovs_rwlock_unlock(_lock);
+}
 if (old_lflow) {
 return;
 }
 }
 
-lflow = xmalloc(sizeof *lflow);
-/* While adding new logical flows we're not setting single datapath, but
- * collec

[ovs-dev] [OVN Patch v4 3/4] northd: Optimize dp groups operations

2021-08-27 Thread anton . ivanov
From: Anton Ivanov 

Remove full hash walks to form lflow dp_groups and add them
to the overall parallelizeable lflow build.

Make processing of "with dp groups" and "without" in
build_lflows independent to allow these to run in parallel
after the updates to the parallel API have been merged.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 512 ++--
 1 file changed, 309 insertions(+), 203 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 71458ff4e..6493780d8 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -86,6 +86,11 @@ struct northd_state {
 bool paused;
 };
 
+struct lflow_state {
+struct hmap single_od;
+struct hmap multiple_od;
+};
+
 static const char *ovnnb_db;
 static const char *ovnsb_db;
 static const char *unixctl_path;
@@ -4304,8 +4309,14 @@ struct ovn_lflow {
 const char *where;
 };
 
-static void ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow);
-static struct ovn_lflow *ovn_lflow_find(const struct hmap *lflows,
+static void ovn_lflow_destroy(struct lflow_state *lflows, struct ovn_lflow 
*lflow);
+static struct ovn_lflow *do_ovn_lflow_find(const struct hmap *lflows,
+const struct ovn_datapath *od,
+enum ovn_stage stage,
+uint16_t priority, const char *match,
+const char *actions,
+const char *ctrl_meter, uint32_t hash);
+static struct ovn_lflow *ovn_lflow_find(const struct lflow_state *lflows,
 const struct ovn_datapath *od,
 enum ovn_stage stage,
 uint16_t priority, const char *match,
@@ -4363,7 +4374,7 @@ static struct hashrow_locks lflow_locks;
  * Version to use when locking is required.
  */
 static void
-do_ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od,
+do_ovn_lflow_add(struct lflow_state *lflow_map, struct ovn_datapath *od,
  uint32_t hash, enum ovn_stage stage, uint16_t priority,
  const char *match, const char *actions, const char *io_port,
  const struct ovsdb_idl_row *stage_hint,
@@ -4374,10 +4385,28 @@ do_ovn_lflow_add(struct hmap *lflow_map, struct 
ovn_datapath *od,
 struct ovn_lflow *lflow;
 
 if (use_logical_dp_groups) {
-old_lflow = ovn_lflow_find(lflow_map, NULL, stage, priority, match,
+old_lflow = do_ovn_lflow_find(_map->single_od, NULL, stage, 
priority, match,
actions, ctrl_meter, hash);
 if (old_lflow) {
 hmapx_add(_lflow->od_group, od);
+/* Found, different, od count went up. Move to multiple od. */
+if (hmapx_count(_lflow->od_group) > 1) {
+hmap_remove(_map->single_od, _lflow->hmap_node);
+if (use_parallel_build) {
+hmap_insert_fast(_map->multiple_od, 
_lflow->hmap_node, hash);
+} else {
+hmap_insert(_map->multiple_od, 
_lflow->hmap_node, hash);
+}
+}
+} else {
+/* Not found, lookup in multiple od. */
+old_lflow = do_ovn_lflow_find(_map->multiple_od, NULL, 
stage, priority, match,
+   actions, ctrl_meter, hash);
+if (old_lflow) {
+hmapx_add(_lflow->od_group, od);
+}
+}
+if (old_lflow) {
 return;
 }
 }
@@ -4392,16 +4421,19 @@ do_ovn_lflow_add(struct hmap *lflow_map, struct 
ovn_datapath *od,
nullable_xstrdup(ctrl_meter),
ovn_lflow_hint(stage_hint), where);
 hmapx_add(>od_group, od);
+
+/* Insert "fresh" lflows into single_od. */
+
 if (!use_parallel_build) {
-hmap_insert(lflow_map, >hmap_node, hash);
+hmap_insert(_map->single_od, >hmap_node, hash);
 } else {
-hmap_insert_fast(lflow_map, >hmap_node, hash);
+hmap_insert_fast(_map->single_od, >hmap_node, hash);
 }
 }
 
 /* Adds a row with the specified contents to the Logical_Flow table. */
 static void
-ovn_lflow_add_at(struct hmap *lflow_map, struct ovn_datapath *od,
+ovn_lflow_add_at(struct lflow_state *lflow_map, struct ovn_datapath *od,
  enum ovn_stage stage, uint16_t priority,
  const char *match, const char *actions, const char *io_port,
  const char *ctrl_meter,
@@ -4465,27 +4497,74 @@ ovn_lflow_add_at(struct hmap *lflow_map, struct 
ovn_datapath *od,
   ACTIONS, NULL, CTRL_METER, NULL)
 
 static struct ovn_lflow *
-ovn_lflow_find(const struct hmap *lflows, const struct ovn_datapath *od,
+do_ovn_lflow_find(c

[ovs-dev] [OVN Patch v4 2/4] northd: Resize the hash to correct parameters after build

2021-08-27 Thread anton . ivanov
From: Anton Ivanov 

Parallel builds may result in suboptimal hash bucket sizing.
In the absense of dp-groups this does not matter as the hash
is purely storage and not used for lookups during the build.

Such a hash needs to be resized to a correct size at the end
of the build to ensure that any lookups during the lflow
reconcilliation phase are done as fast as possible.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 8c653cf52..71458ff4e 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -13057,6 +13057,11 @@ build_lflows(struct northd_context *ctx, struct hmap 
*datapaths,
 igmp_groups, meter_groups, lbs,
 bfd_connections);
 
+/* Parallel build may result in a suboptimal hash. Resize the
+ * hash to a correct size before doing lookups */
+
+hmap_expand();
+
 if (hmap_count() > max_seen_lflow_size) {
 max_seen_lflow_size = hmap_count();
 }
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [OVN Patch v4 1/4] northd: Disable parallel processing for logical_dp_groups

2021-08-27 Thread anton . ivanov
From: Anton Ivanov 

Work on improving processing with dp_groups enabled has
discovered that the locking mechanism presently in use
is not reliable. Disabling parallel processing if dp_groups
are enabled until the root cause is determined and fixed.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 2 +-
 ovs | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index af413aba4..8c653cf52 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -12861,7 +12861,7 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, 
struct hmap *ports,
 }
 }
 
-if (use_parallel_build) {
+if (use_parallel_build && (!use_logical_dp_groups)) {
 struct hmap *lflow_segs;
 struct lswitch_flow_build_info *lsiv;
 int index;
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [OVN Patch v3 3/3] northd: Optimize dp groups operations

2021-08-26 Thread anton . ivanov
From: Anton Ivanov 

Remove full hash walks to form lflow dp_groups and add them
to the overall parallelizeable lflow build.

Make processing of "with dp groups" and "without" in
build_lflows independent to allow these to run in parallel
after the updates to the parallel API have been merged.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 512 ++--
 1 file changed, 309 insertions(+), 203 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 71458ff4e..6493780d8 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -86,6 +86,11 @@ struct northd_state {
 bool paused;
 };
 
+struct lflow_state {
+struct hmap single_od;
+struct hmap multiple_od;
+};
+
 static const char *ovnnb_db;
 static const char *ovnsb_db;
 static const char *unixctl_path;
@@ -4304,8 +4309,14 @@ struct ovn_lflow {
 const char *where;
 };
 
-static void ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow);
-static struct ovn_lflow *ovn_lflow_find(const struct hmap *lflows,
+static void ovn_lflow_destroy(struct lflow_state *lflows, struct ovn_lflow 
*lflow);
+static struct ovn_lflow *do_ovn_lflow_find(const struct hmap *lflows,
+const struct ovn_datapath *od,
+enum ovn_stage stage,
+uint16_t priority, const char *match,
+const char *actions,
+const char *ctrl_meter, uint32_t hash);
+static struct ovn_lflow *ovn_lflow_find(const struct lflow_state *lflows,
 const struct ovn_datapath *od,
 enum ovn_stage stage,
 uint16_t priority, const char *match,
@@ -4363,7 +4374,7 @@ static struct hashrow_locks lflow_locks;
  * Version to use when locking is required.
  */
 static void
-do_ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od,
+do_ovn_lflow_add(struct lflow_state *lflow_map, struct ovn_datapath *od,
  uint32_t hash, enum ovn_stage stage, uint16_t priority,
  const char *match, const char *actions, const char *io_port,
  const struct ovsdb_idl_row *stage_hint,
@@ -4374,10 +4385,28 @@ do_ovn_lflow_add(struct hmap *lflow_map, struct 
ovn_datapath *od,
 struct ovn_lflow *lflow;
 
 if (use_logical_dp_groups) {
-old_lflow = ovn_lflow_find(lflow_map, NULL, stage, priority, match,
+old_lflow = do_ovn_lflow_find(_map->single_od, NULL, stage, 
priority, match,
actions, ctrl_meter, hash);
 if (old_lflow) {
 hmapx_add(_lflow->od_group, od);
+/* Found, different, od count went up. Move to multiple od. */
+if (hmapx_count(_lflow->od_group) > 1) {
+hmap_remove(_map->single_od, _lflow->hmap_node);
+if (use_parallel_build) {
+hmap_insert_fast(_map->multiple_od, 
_lflow->hmap_node, hash);
+} else {
+hmap_insert(_map->multiple_od, 
_lflow->hmap_node, hash);
+}
+}
+} else {
+/* Not found, lookup in multiple od. */
+old_lflow = do_ovn_lflow_find(_map->multiple_od, NULL, 
stage, priority, match,
+   actions, ctrl_meter, hash);
+if (old_lflow) {
+hmapx_add(_lflow->od_group, od);
+}
+}
+if (old_lflow) {
 return;
 }
 }
@@ -4392,16 +4421,19 @@ do_ovn_lflow_add(struct hmap *lflow_map, struct 
ovn_datapath *od,
nullable_xstrdup(ctrl_meter),
ovn_lflow_hint(stage_hint), where);
 hmapx_add(>od_group, od);
+
+/* Insert "fresh" lflows into single_od. */
+
 if (!use_parallel_build) {
-hmap_insert(lflow_map, >hmap_node, hash);
+hmap_insert(_map->single_od, >hmap_node, hash);
 } else {
-hmap_insert_fast(lflow_map, >hmap_node, hash);
+hmap_insert_fast(_map->single_od, >hmap_node, hash);
 }
 }
 
 /* Adds a row with the specified contents to the Logical_Flow table. */
 static void
-ovn_lflow_add_at(struct hmap *lflow_map, struct ovn_datapath *od,
+ovn_lflow_add_at(struct lflow_state *lflow_map, struct ovn_datapath *od,
  enum ovn_stage stage, uint16_t priority,
  const char *match, const char *actions, const char *io_port,
  const char *ctrl_meter,
@@ -4465,27 +4497,74 @@ ovn_lflow_add_at(struct hmap *lflow_map, struct 
ovn_datapath *od,
   ACTIONS, NULL, CTRL_METER, NULL)
 
 static struct ovn_lflow *
-ovn_lflow_find(const struct hmap *lflows, const struct ovn_datapath *od,
+do_ovn_lflow_find(c

[ovs-dev] [OVN Patch v3 1/3] northd: Disable parallel processing for logical_dp_groups

2021-08-26 Thread anton . ivanov
From: Anton Ivanov 

Work on improving processing with dp_groups enabled has
discovered that the locking mechanism presently in use
is not reliable. Disabling parallel processing if dp_groups
are enabled until the root cause is determined and fixed.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 2 +-
 ovs | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index af413aba4..8c653cf52 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -12861,7 +12861,7 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, 
struct hmap *ports,
 }
 }
 
-if (use_parallel_build) {
+if (use_parallel_build && (!use_logical_dp_groups)) {
 struct hmap *lflow_segs;
 struct lswitch_flow_build_info *lsiv;
 int index;
diff --git a/ovs b/ovs
index daf627f45..50e5523b9 16
--- a/ovs
+++ b/ovs
@@ -1 +1 @@
-Subproject commit daf627f459ffbc7171d42a2c01f80754bfd54edc
+Subproject commit 50e5523b9b2b154e5fafc5acdcdec85e9cc5a330
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [OVN Patch v3 2/3] northd: Resize the hash to correct parameters after build

2021-08-26 Thread anton . ivanov
From: Anton Ivanov 

Parallel builds may result in suboptimal hash bucket sizing.
In the absense of dp-groups this does not matter as the hash
is purely storage and not used for lookups during the build.

Such a hash needs to be resized to a correct size at the end
of the build to ensure that any lookups during the lflow
reconcilliation phase are done as fast as possible.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 8c653cf52..71458ff4e 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -13057,6 +13057,11 @@ build_lflows(struct northd_context *ctx, struct hmap 
*datapaths,
 igmp_groups, meter_groups, lbs,
 bfd_connections);
 
+/* Parallel build may result in a suboptimal hash. Resize the
+ * hash to a correct size before doing lookups */
+
+hmap_expand();
+
 if (hmap_count() > max_seen_lflow_size) {
 max_seen_lflow_size = hmap_count();
 }
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [OVN Patch v2 3/3] northd: Optimize dp groups operations

2021-08-26 Thread anton . ivanov
From: Anton Ivanov 

Remove full hash walks to form lflow dp_groups and add them
to the overall parallelizeable lflow build.

Make processing of "with dp groups" and "without" in
build_lflows independent to allow these to run in parallel
after the updates to the parallel API have been merged.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 514 +++-
 1 file changed, 311 insertions(+), 203 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 71458ff4e..297b7873d 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -86,6 +86,11 @@ struct northd_state {
 bool paused;
 };
 
+struct lflow_state {
+struct hmap single_od;
+struct hmap multiple_od;
+};
+
 static const char *ovnnb_db;
 static const char *ovnsb_db;
 static const char *unixctl_path;
@@ -4304,8 +4309,14 @@ struct ovn_lflow {
 const char *where;
 };
 
-static void ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow);
-static struct ovn_lflow *ovn_lflow_find(const struct hmap *lflows,
+static void ovn_lflow_destroy(struct lflow_state *lflows, struct ovn_lflow 
*lflow);
+static struct ovn_lflow *do_ovn_lflow_find(const struct hmap *lflows,
+const struct ovn_datapath *od,
+enum ovn_stage stage,
+uint16_t priority, const char *match,
+const char *actions,
+const char *ctrl_meter, uint32_t hash);
+static struct ovn_lflow *ovn_lflow_find(const struct lflow_state *lflows,
 const struct ovn_datapath *od,
 enum ovn_stage stage,
 uint16_t priority, const char *match,
@@ -4363,7 +4374,7 @@ static struct hashrow_locks lflow_locks;
  * Version to use when locking is required.
  */
 static void
-do_ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od,
+do_ovn_lflow_add(struct lflow_state *lflow_map, struct ovn_datapath *od,
  uint32_t hash, enum ovn_stage stage, uint16_t priority,
  const char *match, const char *actions, const char *io_port,
  const struct ovsdb_idl_row *stage_hint,
@@ -4374,10 +4385,28 @@ do_ovn_lflow_add(struct hmap *lflow_map, struct 
ovn_datapath *od,
 struct ovn_lflow *lflow;
 
 if (use_logical_dp_groups) {
-old_lflow = ovn_lflow_find(lflow_map, NULL, stage, priority, match,
+old_lflow = do_ovn_lflow_find(_map->single_od, NULL, stage, 
priority, match,
actions, ctrl_meter, hash);
 if (old_lflow) {
 hmapx_add(_lflow->od_group, od);
+/* Found, different, od count went up. Move to multiple od. */
+if (hmapx_count(_lflow->od_group) > 1) {
+hmap_remove(_map->single_od, _lflow->hmap_node);
+if (use_parallel_build) {
+hmap_insert_fast(_map->multiple_od, 
_lflow->hmap_node, hash);
+} else {
+hmap_insert(_map->multiple_od, 
_lflow->hmap_node, hash);
+}
+}
+} else {
+/* Not found, lookup in multiple od. */
+old_lflow = do_ovn_lflow_find(_map->multiple_od, NULL, 
stage, priority, match,
+   actions, ctrl_meter, hash);
+if (old_lflow) {
+hmapx_add(_lflow->od_group, od);
+}
+}
+if (old_lflow) {
 return;
 }
 }
@@ -4392,16 +4421,21 @@ do_ovn_lflow_add(struct hmap *lflow_map, struct 
ovn_datapath *od,
nullable_xstrdup(ctrl_meter),
ovn_lflow_hint(stage_hint), where);
 hmapx_add(>od_group, od);
+
+/* Insert "fresh" lflows into single_od. */
+
 if (!use_parallel_build) {
-hmap_insert(lflow_map, >hmap_node, hash);
+hmap_insert(_map->single_od, >hmap_node, hash);
 } else {
-hmap_insert_fast(lflow_map, >hmap_node, hash);
+hmap_insert_fast(_map->single_od, >hmap_node, hash);
 }
 }
 
+struct ovs_mutex test_mutex = OVS_MUTEX_INITIALIZER;
+
 /* Adds a row with the specified contents to the Logical_Flow table. */
 static void
-ovn_lflow_add_at(struct hmap *lflow_map, struct ovn_datapath *od,
+ovn_lflow_add_at(struct lflow_state *lflow_map, struct ovn_datapath *od,
  enum ovn_stage stage, uint16_t priority,
  const char *match, const char *actions, const char *io_port,
  const char *ctrl_meter,
@@ -4465,27 +4499,74 @@ ovn_lflow_add_at(struct hmap *lflow_map, struct 
ovn_datapath *od,
   ACTIONS, NULL, CTRL_METER, NULL)
 
 static struct ovn_lflow *
-ovn_lflow_find(const struct hmap *lfl

[ovs-dev] [OVN Patch v2 2/3] northd: Resize the hash to correct parameters after build

2021-08-26 Thread anton . ivanov
From: Anton Ivanov 

Parallel builds may result in suboptimal hash bucket sizing.
In the absense of dp-groups this does not matter as the hash
is purely storage and not used for lookups during the build.

Such a hash needs to be resized to a correct size at the end
of the build to ensure that any lookups during the lflow
reconcilliation phase are done as fast as possible.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 8c653cf52..71458ff4e 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -13057,6 +13057,11 @@ build_lflows(struct northd_context *ctx, struct hmap 
*datapaths,
 igmp_groups, meter_groups, lbs,
 bfd_connections);
 
+/* Parallel build may result in a suboptimal hash. Resize the
+ * hash to a correct size before doing lookups */
+
+hmap_expand();
+
 if (hmap_count() > max_seen_lflow_size) {
 max_seen_lflow_size = hmap_count();
 }
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [OVN Patch v2 1/3] northd: Disable parallel processing for logical_dp_groups

2021-08-26 Thread anton . ivanov
From: Anton Ivanov 

Work on improving processing with dp_groups enabled has
discovered that the locking mechanism presently in use
is not reliable. Disabling parallel processing if dp_groups
are enabled until the root cause is determined and fixed.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 2 +-
 ovs | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index af413aba4..8c653cf52 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -12861,7 +12861,7 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, 
struct hmap *ports,
 }
 }
 
-if (use_parallel_build) {
+if (use_parallel_build && (!use_logical_dp_groups)) {
 struct hmap *lflow_segs;
 struct lswitch_flow_build_info *lsiv;
 int index;
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 3/3] northd: Optimize dp groups operations

2021-08-26 Thread anton . ivanov
From: Anton Ivanov 

Remove full hash walks to form lflow dp_groups and add them
to the overall parallelizeable lflow build.

Make processing of "with dp groups" and "without" in
build_lflows independent to allow these to run in parallel
after the updates to the parallel API have been merged.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 514 +++-
 1 file changed, 311 insertions(+), 203 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 71458ff4e..297b7873d 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -86,6 +86,11 @@ struct northd_state {
 bool paused;
 };
 
+struct lflow_state {
+struct hmap single_od;
+struct hmap multiple_od;
+};
+
 static const char *ovnnb_db;
 static const char *ovnsb_db;
 static const char *unixctl_path;
@@ -4304,8 +4309,14 @@ struct ovn_lflow {
 const char *where;
 };
 
-static void ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow);
-static struct ovn_lflow *ovn_lflow_find(const struct hmap *lflows,
+static void ovn_lflow_destroy(struct lflow_state *lflows, struct ovn_lflow 
*lflow);
+static struct ovn_lflow *do_ovn_lflow_find(const struct hmap *lflows,
+const struct ovn_datapath *od,
+enum ovn_stage stage,
+uint16_t priority, const char *match,
+const char *actions,
+const char *ctrl_meter, uint32_t hash);
+static struct ovn_lflow *ovn_lflow_find(const struct lflow_state *lflows,
 const struct ovn_datapath *od,
 enum ovn_stage stage,
 uint16_t priority, const char *match,
@@ -4363,7 +4374,7 @@ static struct hashrow_locks lflow_locks;
  * Version to use when locking is required.
  */
 static void
-do_ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od,
+do_ovn_lflow_add(struct lflow_state *lflow_map, struct ovn_datapath *od,
  uint32_t hash, enum ovn_stage stage, uint16_t priority,
  const char *match, const char *actions, const char *io_port,
  const struct ovsdb_idl_row *stage_hint,
@@ -4374,10 +4385,28 @@ do_ovn_lflow_add(struct hmap *lflow_map, struct 
ovn_datapath *od,
 struct ovn_lflow *lflow;
 
 if (use_logical_dp_groups) {
-old_lflow = ovn_lflow_find(lflow_map, NULL, stage, priority, match,
+old_lflow = do_ovn_lflow_find(_map->single_od, NULL, stage, 
priority, match,
actions, ctrl_meter, hash);
 if (old_lflow) {
 hmapx_add(_lflow->od_group, od);
+/* Found, different, od count went up. Move to multiple od. */
+if (hmapx_count(_lflow->od_group) > 1) {
+hmap_remove(_map->single_od, _lflow->hmap_node);
+if (use_parallel_build) {
+hmap_insert_fast(_map->multiple_od, 
_lflow->hmap_node, hash);
+} else {
+hmap_insert(_map->multiple_od, 
_lflow->hmap_node, hash);
+}
+}
+} else {
+/* Not found, lookup in multiple od. */
+old_lflow = do_ovn_lflow_find(_map->multiple_od, NULL, 
stage, priority, match,
+   actions, ctrl_meter, hash);
+if (old_lflow) {
+hmapx_add(_lflow->od_group, od);
+}
+}
+if (old_lflow) {
 return;
 }
 }
@@ -4392,16 +4421,21 @@ do_ovn_lflow_add(struct hmap *lflow_map, struct 
ovn_datapath *od,
nullable_xstrdup(ctrl_meter),
ovn_lflow_hint(stage_hint), where);
 hmapx_add(>od_group, od);
+
+/* Insert "fresh" lflows into single_od. */
+
 if (!use_parallel_build) {
-hmap_insert(lflow_map, >hmap_node, hash);
+hmap_insert(_map->single_od, >hmap_node, hash);
 } else {
-hmap_insert_fast(lflow_map, >hmap_node, hash);
+hmap_insert_fast(_map->single_od, >hmap_node, hash);
 }
 }
 
+struct ovs_mutex test_mutex = OVS_MUTEX_INITIALIZER;
+
 /* Adds a row with the specified contents to the Logical_Flow table. */
 static void
-ovn_lflow_add_at(struct hmap *lflow_map, struct ovn_datapath *od,
+ovn_lflow_add_at(struct lflow_state *lflow_map, struct ovn_datapath *od,
  enum ovn_stage stage, uint16_t priority,
  const char *match, const char *actions, const char *io_port,
  const char *ctrl_meter,
@@ -4465,27 +4499,74 @@ ovn_lflow_add_at(struct hmap *lflow_map, struct 
ovn_datapath *od,
   ACTIONS, NULL, CTRL_METER, NULL)
 
 static struct ovn_lflow *
-ovn_lflow_find(const struct hmap *lfl

[ovs-dev] [PATCH v2 2/3] northd: Resize the hash to correct parameters after build

2021-08-26 Thread anton . ivanov
From: Anton Ivanov 

Parallel builds may result in suboptimal hash bucket sizing.
In the absense of dp-groups this does not matter as the hash
is purely storage and not used for lookups during the build.

Such a hash needs to be resized to a correct size at the end
of the build to ensure that any lookups during the lflow
reconcilliation phase are done as fast as possible.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 8c653cf52..71458ff4e 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -13057,6 +13057,11 @@ build_lflows(struct northd_context *ctx, struct hmap 
*datapaths,
 igmp_groups, meter_groups, lbs,
 bfd_connections);
 
+/* Parallel build may result in a suboptimal hash. Resize the
+ * hash to a correct size before doing lookups */
+
+hmap_expand();
+
 if (hmap_count() > max_seen_lflow_size) {
 max_seen_lflow_size = hmap_count();
 }
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 1/3] northd: Disable parallel processing for logical_dp_groups

2021-08-26 Thread anton . ivanov
From: Anton Ivanov 

Work on improving processing with dp_groups enabled has
discovered that the locking mechanism presently in use
is not reliable. Disabling parallel processing if dp_groups
are enabled until the root cause is determined and fixed.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 2 +-
 ovs | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index af413aba4..8c653cf52 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -12861,7 +12861,7 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, 
struct hmap *ports,
 }
 }
 
-if (use_parallel_build) {
+if (use_parallel_build && (!use_logical_dp_groups)) {
 struct hmap *lflow_segs;
 struct lswitch_flow_build_info *lsiv;
 int index;
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] jsonrpc: Turn sorting of json objects off in messages

2021-08-25 Thread Anton Ivanov

On 25/08/2021 22:12, Ilya Maximets wrote:

On 8/25/21 5:17 PM, anton.iva...@cambridgegreys.com wrote:

From: Anton Ivanov 

A JSON object sort order is by definition arbitrary. OVS
parser(s) do not care about object order - the result is
loaded into a SHASH losing any order on the wire.

Having the objects sorted is a performance penalty, especially
for large objects like f.e. lflow state. That is represented
as {"table_name":{"uuid":data, "uuid":data, "uuid":data}}

Sorting in a case like this has no meaning neither to human,
nor to computer.

There is a meaning for both human and computer in some cases.

While sorting by UUIDs doesn't make a lot of sense, I agree,
having sorted columns inside the database row is important
for debugging purposes.  I'm using something like this:
   sed 's/{"_uuid"/\n{"_uuid"/g'
very frequently, while inspecting database transactions in
order to split different rows to different lines, so the
text editor can work with the result or some other scripts
can process them in a pipeline.  And this command relies on
a fact that '_uuid' goes first in a row.

Also, the thing that unit tests are not failing with this
change is a pure luck, because, IIUC, we do have a fair amount
of tests that looks for exact order in transactions.  The
problem here that order will depend on the CPU architecture,
because different ways of hash computation will be in use.


JSON which has a "special" requirement to be sorted is NOT JSON.

1. The change do not change transaction order, because order of 
operations in transaction is expressed via ARRAYS. Any part which is 
expressed as an object is an arbitrary order.


2. The change is specific solely to JSON RPC which for some reason 
someone at some point decided to have sorted. All other parts of OVS do 
not use sort.


3. There is no benefit to it. Only a penalty which is paid for every 
transaction. Every time.




In general, I'd consider this change as a step back in
debugability.  So, unless it provides a huge performance
benefit, I'd like to not have it.  And tests should be
modified in a way that doesn't require exact order of columns,
for sure.

I'll give it a shot in a scale test run once I have a chance.

Best regards, Ilya Maximets.

If, however, there are 0.5M such records, it

is a subtantial CPU (and latency) penalty.

Signed-off-by: Anton Ivanov 
---
  lib/jsonrpc.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/jsonrpc.c b/lib/jsonrpc.c
index c8ce5362e..3b44f73fe 100644
--- a/lib/jsonrpc.c
+++ b/lib/jsonrpc.c
@@ -802,7 +802,7 @@ jsonrpc_msg_to_string(const struct jsonrpc_msg *m)
  {
  struct jsonrpc_msg *copy = jsonrpc_msg_clone(m);
  struct json *json = jsonrpc_msg_to_json(copy);
-char *s = json_to_string(json, JSSF_SORT);
+char *s = json_to_string(json, 0);
  json_destroy(json);
  return s;
  }





--
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] jsonrpc: Turn sorting of json objects off in messages

2021-08-25 Thread anton . ivanov
From: Anton Ivanov 

A JSON object sort order is by definition arbitrary. OVS
parser(s) do not care about object order - the result is
loaded into a SHASH losing any order on the wire.

Having the objects sorted is a performance penalty, especially
for large objects like f.e. lflow state. That is represented
as {"table_name":{"uuid":data, "uuid":data, "uuid":data}}

Sorting in a case like this has no meaning neither to human,
nor to computer. If, however, there are 0.5M such records, it
is a subtantial CPU (and latency) penalty.

Signed-off-by: Anton Ivanov 
---
 lib/jsonrpc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/jsonrpc.c b/lib/jsonrpc.c
index c8ce5362e..3b44f73fe 100644
--- a/lib/jsonrpc.c
+++ b/lib/jsonrpc.c
@@ -802,7 +802,7 @@ jsonrpc_msg_to_string(const struct jsonrpc_msg *m)
 {
 struct jsonrpc_msg *copy = jsonrpc_msg_clone(m);
 struct json *json = jsonrpc_msg_to_json(copy);
-char *s = json_to_string(json, JSSF_SORT);
+char *s = json_to_string(json, 0);
 json_destroy(json);
 return s;
 }
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [OVN Patch 2/2] northd: Alter initial hash sizing for dp_groups+parallel

2021-08-25 Thread Anton Ivanov




On 25/08/2021 08:35, anton.iva...@cambridgegreys.com wrote:

From: Anton Ivanov 

When running with dp_groups and parallelization enabled we have
a possible worse case scenario where northd connects for the
first time to a pre-populated database.
If we do not size the hash to a sufficiently big size to
accommodate for this, we end up with severe lock contention
and very slow lookups.
If both dp_groups and parallelization are enabled we have
no choice, but to allocate for a worst case scenario on the
first run and adjust later.

Signed-off-by: Anton Ivanov 
---
  northd/ovn-northd.c | 21 -
  1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 14659c407..3a5ab1d9f 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -13034,7 +13034,7 @@ ovn_sb_set_lflow_logical_dp_group(
  sbrec_logical_flow_set_logical_dp_group(sbflow, dpg->dp_group);
  }
  
-static ssize_t max_seen_lflow_size = 128;

+static ssize_t max_seen_lflow_size = 0;
  
  /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,

   * constructing their contents based on the OVN_NB database. */
@@ -13048,6 +13048,25 @@ build_lflows(struct northd_context *ctx, struct hmap 
*datapaths,
  {
  struct hmap lflows;
  
+if (!max_seen_lflow_size) {

+if (use_logical_dp_groups && use_parallel_build) {
+/* We are running for the first time. The user has
+ * requested both dp_groups and parallelisation. We
+ * may encounter a very large amount of flows on first
+ * run and we have no way to guess the flow hash size.
+ * We allocate for worst a case scenario on the first
+ * run. This will be resized to a sane size later. */
+max_seen_lflow_size = INT_MAX;


This ends up being too big for most systems. First run has to be in single 
threaded mode.


+} else {
+/* If the build is not parallel, this will be resized
+ * to a correct size. If it is parallel, but without
+ * dp_groups, the sizing is irrelevant as the hash is
+ * not used for lookups during build. We resize it to
+ * a correct size after that. */
+max_seen_lflow_size = 128;
+}
+}
+
  fast_hmap_size_for(, max_seen_lflow_size);
  if (use_parallel_build) {
  update_hashrow_locks(, _locks);



--
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v9] ovsdb: provide raft and command interfaces with priority

2021-08-25 Thread Anton Ivanov



On 25/08/2021 13:08, Ilya Maximets wrote:

On 8/20/21 11:16 AM, anton.iva...@cambridgegreys.com wrote:

From: Anton Ivanov 

Set a soft time limit of "raft election timer"/2 on ovsdb
processing.

This improves behaviour in large heavily loaded clusters.
While it cannot fully eliminate spurious raft elections
under heavy load, it significantly decreases their number.

Processing is (to the extent possible) restarted where it
stopped on the previous iteration to ensure that sessions
towards the tail of the session list are not starved.

Signed-off-by: Anton Ivanov 
---
  ovsdb/jsonrpc-server.c | 98 --
  ovsdb/jsonrpc-server.h |  2 +-
  ovsdb/ovsdb-server.c   | 16 ++-
  ovsdb/raft.c   |  6 +++
  ovsdb/raft.h   |  3 ++
  ovsdb/storage.c| 12 ++
  ovsdb/storage.h|  2 +
  7 files changed, 124 insertions(+), 15 deletions(-)

diff --git a/ovsdb/jsonrpc-server.c b/ovsdb/jsonrpc-server.c
index 351c39d8a..457e1c040 100644
--- a/ovsdb/jsonrpc-server.c
+++ b/ovsdb/jsonrpc-server.c
@@ -60,7 +60,8 @@ static struct ovsdb_jsonrpc_session 
*ovsdb_jsonrpc_session_create(
  struct ovsdb_jsonrpc_remote *, struct jsonrpc_session *, bool);
  static void ovsdb_jsonrpc_session_preremove_db(struct ovsdb_jsonrpc_remote *,
 struct ovsdb *);
-static void ovsdb_jsonrpc_session_run_all(struct ovsdb_jsonrpc_remote *);
+static void ovsdb_jsonrpc_session_run_all(struct ovsdb_jsonrpc_remote *,
+  uint64_t limit);
  static void ovsdb_jsonrpc_session_wait_all(struct ovsdb_jsonrpc_remote *);
  static void ovsdb_jsonrpc_session_get_memory_usage_all(
  const struct ovsdb_jsonrpc_remote *, struct simap *usage);
@@ -128,6 +129,9 @@ struct ovsdb_jsonrpc_server {
  bool read_only;/* This server is does not accept any
transactions that can modify the database. 
*/
  struct shash remotes;  /* Contains "struct ovsdb_jsonrpc_remote *"s. 
*/
+struct ovs_list worklist; /* List of remotes to work on. */
+bool must_wake_up; /* The processing loop must be re-run. It was
+  interrupted due to exceeding a time constraint. */
  };
  
  /* A configured remote.  This is either a passive stream listener plus a list

@@ -137,6 +141,7 @@ struct ovsdb_jsonrpc_remote {
  struct ovsdb_jsonrpc_server *server;
  struct pstream *listener;   /* Listener, if passive. */
  struct ovs_list sessions;   /* List of "struct ovsdb_jsonrpc_session"s. */
+struct ovs_list work_node;
  uint8_t dscp;
  bool read_only;
  char *role;
@@ -158,6 +163,7 @@ ovsdb_jsonrpc_server_create(bool read_only)
  struct ovsdb_jsonrpc_server *server = xzalloc(sizeof *server);
  ovsdb_server_init(>up);
  shash_init(>remotes);
+ovs_list_init(>worklist);
  server->read_only = read_only;
  return server;
  }
@@ -255,6 +261,7 @@ ovsdb_jsonrpc_server_set_remotes(struct 
ovsdb_jsonrpc_server *svr,
  
  ovsdb_jsonrpc_session_set_all_options(remote, options);

  }
+ovs_list_init(>worklist); /* Reset any pending work. */
  }
  
  static struct ovsdb_jsonrpc_remote *

@@ -280,6 +287,7 @@ ovsdb_jsonrpc_server_add_remote(struct ovsdb_jsonrpc_server 
*svr,
  remote->read_only = options->read_only;
  remote->role = nullable_xstrdup(options->role);
  shash_add(>remotes, name, remote);
+ovs_list_init(>worklist); /* Reset any pending work. */
  
  if (!listener) {

  ovsdb_jsonrpc_session_create(remote, jsonrpc_session_open(name, true),
@@ -292,10 +300,10 @@ static void
  ovsdb_jsonrpc_server_del_remote(struct shash_node *node)
  {
  struct ovsdb_jsonrpc_remote *remote = node->data;
-
  ovsdb_jsonrpc_session_close_all(remote);
  pstream_close(remote->listener);
  shash_delete(>server->remotes, node);
+ovs_list_init(>server->worklist); /* Reset any pending work. */
  free(remote->role);
  free(remote);
  }
@@ -378,32 +386,55 @@ ovsdb_jsonrpc_server_set_read_only(struct 
ovsdb_jsonrpc_server *svr,
  }
  
  void

-ovsdb_jsonrpc_server_run(struct ovsdb_jsonrpc_server *svr)
+ovsdb_jsonrpc_server_run(struct ovsdb_jsonrpc_server *svr, uint64_t limit)
  {
  struct shash_node *node;
+uint64_t elapsed = 0;
+uint64_t start_time = time_msec();
+struct ovsdb_jsonrpc_remote *work;
  
-SHASH_FOR_EACH (node, >remotes) {

-struct ovsdb_jsonrpc_remote *remote = node->data;
+svr->must_wake_up = false;
  
-if (remote->listener) {

+if (ovs_list_is_empty(>worklist)) {
+SHASH_FOR_EACH (node, >remotes) {
+struct ovsdb_jsonrpc_remote *remote = node->data;
+ovs_list_push_back(>worklist, >work_node);
+}
+}

If there is unfinished work, the loop below will o

[ovs-dev] [OVN Patch 1/2] northd: Resize the hash to correct parameters after build

2021-08-25 Thread anton . ivanov
From: Anton Ivanov 

Parallel builds may result in suboptimal hash bucket sizing.
In the absense of dp-groups this does not matter as the hash
is purely storage and not used for lookups during the build.

Such a hash needs to be resized to a correct size at the end
of the build to ensure that any lookups during the lflow
reconcilliation phase are done as fast as possible.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 5 +
 ovs | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index af413aba4..14659c407 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -13057,6 +13057,11 @@ build_lflows(struct northd_context *ctx, struct hmap 
*datapaths,
 igmp_groups, meter_groups, lbs,
 bfd_connections);
 
+/* Parallel build may result in a suboptimal hash. Resize the
+ * hash to a correct size before doing lookups */
+
+hmap_expand();
+
 if (hmap_count() > max_seen_lflow_size) {
 max_seen_lflow_size = hmap_count();
 }
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [OVN Patch 2/2] northd: Alter initial hash sizing for dp_groups+parallel

2021-08-25 Thread anton . ivanov
From: Anton Ivanov 

When running with dp_groups and parallelization enabled we have
a possible worse case scenario where northd connects for the
first time to a pre-populated database.
If we do not size the hash to a sufficiently big size to
accommodate for this, we end up with severe lock contention
and very slow lookups.
If both dp_groups and parallelization are enabled we have
no choice, but to allocate for a worst case scenario on the
first run and adjust later.

Signed-off-by: Anton Ivanov 
---
 northd/ovn-northd.c | 21 -
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 14659c407..3a5ab1d9f 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -13034,7 +13034,7 @@ ovn_sb_set_lflow_logical_dp_group(
 sbrec_logical_flow_set_logical_dp_group(sbflow, dpg->dp_group);
 }
 
-static ssize_t max_seen_lflow_size = 128;
+static ssize_t max_seen_lflow_size = 0;
 
 /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
  * constructing their contents based on the OVN_NB database. */
@@ -13048,6 +13048,25 @@ build_lflows(struct northd_context *ctx, struct hmap 
*datapaths,
 {
 struct hmap lflows;
 
+if (!max_seen_lflow_size) {
+if (use_logical_dp_groups && use_parallel_build) {
+/* We are running for the first time. The user has
+ * requested both dp_groups and parallelisation. We
+ * may encounter a very large amount of flows on first
+ * run and we have no way to guess the flow hash size.
+ * We allocate for worst a case scenario on the first
+ * run. This will be resized to a sane size later. */
+max_seen_lflow_size = INT_MAX;
+} else {
+/* If the build is not parallel, this will be resized
+ * to a correct size. If it is parallel, but without
+ * dp_groups, the sizing is irrelevant as the hash is
+ * not used for lookups during build. We resize it to
+ * a correct size after that. */
+max_seen_lflow_size = 128;
+}
+}
+
 fast_hmap_size_for(, max_seen_lflow_size);
 if (use_parallel_build) {
 update_hashrow_locks(, _locks);
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn] ovn-northd: Fix extremely inefficient usage of lflow hash map.

2021-08-25 Thread Anton Ivanov

On 24/08/2021 22:49, Ilya Maximets wrote:

On 8/24/21 10:07 PM, Anton Ivanov wrote:

On 23/08/2021 22:36, Ilya Maximets wrote:

On 8/23/21 10:37 PM, Anton Ivanov wrote:

On 23/08/2021 21:26, Ilya Maximets wrote:

On 8/23/21 10:20 PM, Anton Ivanov wrote:

Should not be the case.

The map is pre-sized for the size from the previous iterations.

Line 12861 in my tree which is probably a few commits out of date:

   fast_hmap_size_for(, max_seen_lflow_size);

And immediately after building the lflows:

   if (hmap_count() > max_seen_lflow_size) {
   max_seen_lflow_size = hmap_count();
   }

So the map SHOULD be sized correctly - to the most recent seen lflow count.

Please, re-read the commit message.  It's a first run of the loop
and the 'max_seen_lflow_size' is default 128 at this point.

Ack,

Not using auto-resizing in single threaded mode is a bug. Thanks for fixing it.

  From that perspective the patch is a straight +1 from me.

  From the perspective of the use case stated in the commit message- I am not 
sure it addresses it.

If northd is in single-threaded mode and is tackling a GIGANTIC> database, it 
may never complete the first iteration before the
expiration of the timers and everyone deciding that northd is AWOL.

Well, how do you suggest to fix that?  Obviously, we can always create
a database that northd will never be able to process in a reasonable
amount of time.  And it doesn't matter if it's single- or multi-threaded.

I will send the additional "baseline" fixes for parallel - resizing and initial 
sizing tomorrow, they are fairly trivial and have tested out OK.

However, they do not solve the fact that the overall "heatmap" with dp_groups have moved. 
A lot of processing once again happens out of the "parallel" portion and in the single 
threaded part.

Unless I am mistaken, this can be improved.

Namely, at present, after computing lflows with dp_groups they are walked in 
full, single dp flows separated into a hmap and reprocessed. That is suboptimal 
for parallel (and possibly suboptimal for single threaded).

Unless I am mistaken, when dp_groups are enabled, all lflows can be initially inserted 
into a separate "single datapath" hmap. If the dp_group for an lflow grows to 
more than one, the flow is then moved to the main lflow hmap. This way, the computation 
will generate both types of flows straight away (optionally in parallel) and there will 
be no need to do a full single threaded walk of lflows after they have been generated.

One question (so I can get some idea on which optimizations are worth it and 
which aren't). What is the percentage and overall numbers of single datapath 
lflows?

 From the DB that I have I extracted following information:

Total lflows generated : 9.916.227
Ended up in SbDB: 540.795 (462.196 has no dp_group, 78.599 has a dp_group)
On disk size of this DB with dp groups enabled is 270 MB.

So, the lflows hashmap contains ~540K flows, 460K of them are single
flows.  But still it's 20 times less than number of lflows that northd
generated.  So, performance improvement from parallelization of this
part might be not significant if dp-groups enabled.


Actually, removing a 460K flow map creation, 460K flow map walk and 
replacing a "pop"-"push" map moves with a fast merge (all of that is 
possible) should give a significant performance boost even in single 
thread. All of that is parallelizable too (with some locking).



  If disabled, it
will be very hard for both northd and SbDB to handle database of this
size even from the memory consumption standpoint.  Database will take
around 5 GB on disk.  In memory as a parsed json object, it will be huge.
I'd not advise running a setup of that size without dp groups.  Node
will ran out of memory very fast.



Brgds,

A.


In this case NbDB is only 9MB in size, which is very reasonable, and
northd on my laptop takes more than 15 minutes to process it (I killed
it at this point).  With the patch applied it took only 11 seconds.
So, for me, this patch pretty much fixes the issue.  11 seconds is not
that bad, e.g. ovn-k8s configures inactivity probes for clients to 180.
It would be great to reduce, but we're not there yet.


In that case, if it is multi-threaded from the start, it should probably
grab the sizing from the lflow table hash in south db. That would be a
good approximation for the first run.

This will not work for a case where SbDB is empty for any reason while
NbDB is not.  And there is still a case where northd initially connects
to semi-empty databases and after few iterations NbDB receives a big
transaction and generates a big update for northd.


A.


A.

On 23/08/2021 21:02, Ilya Maximets wrote:

'lflow_map' is never expanded because northd always uses fast
insertion.  This leads to the case where we have a hash map
with only 128 initial buckets and every ovn_lflow_find() ends
up iterating over n_lflows / 128 en

Re: [ovs-dev] [PATCH ovn] ovn-northd: Fix extremely inefficient usage of lflow hash map.

2021-08-24 Thread Anton Ivanov

On 23/08/2021 22:36, Ilya Maximets wrote:

On 8/23/21 10:37 PM, Anton Ivanov wrote:

On 23/08/2021 21:26, Ilya Maximets wrote:

On 8/23/21 10:20 PM, Anton Ivanov wrote:

Should not be the case.

The map is pre-sized for the size from the previous iterations.

Line 12861 in my tree which is probably a few commits out of date:

  fast_hmap_size_for(, max_seen_lflow_size);

And immediately after building the lflows:

  if (hmap_count() > max_seen_lflow_size) {
  max_seen_lflow_size = hmap_count();
  }

So the map SHOULD be sized correctly - to the most recent seen lflow count.

Please, re-read the commit message.  It's a first run of the loop
and the 'max_seen_lflow_size' is default 128 at this point.

Ack,

Not using auto-resizing in single threaded mode is a bug. Thanks for fixing it.

 From that perspective the patch is a straight +1 from me.

 From the perspective of the use case stated in the commit message- I am not 
sure it addresses it.

If northd is in single-threaded mode and is tackling a GIGANTIC> database, it 
may never complete the first iteration before the
expiration of the timers and everyone deciding that northd is AWOL.

Well, how do you suggest to fix that?  Obviously, we can always create
a database that northd will never be able to process in a reasonable
amount of time.  And it doesn't matter if it's single- or multi-threaded.


I will send the additional "baseline" fixes for parallel - resizing and 
initial sizing tomorrow, they are fairly trivial and have tested out OK.


However, they do not solve the fact that the overall "heatmap" with 
dp_groups have moved. A lot of processing once again happens out of the 
"parallel" portion and in the single threaded part.


Unless I am mistaken, this can be improved.

Namely, at present, after computing lflows with dp_groups they are 
walked in full, single dp flows separated into a hmap and reprocessed. 
That is suboptimal for parallel (and possibly suboptimal for single 
threaded).


Unless I am mistaken, when dp_groups are enabled, all lflows can be 
initially inserted into a separate "single datapath" hmap. If the 
dp_group for an lflow grows to more than one, the flow is then moved to 
the main lflow hmap. This way, the computation will generate both types 
of flows straight away (optionally in parallel) and there will be no 
need to do a full single threaded walk of lflows after they have been 
generated.


One question (so I can get some idea on which optimizations are worth it 
and which aren't). What is the percentage and overall numbers of single 
datapath lflows?


Brgds,

A.



In this case NbDB is only 9MB in size, which is very reasonable, and
northd on my laptop takes more than 15 minutes to process it (I killed
it at this point).  With the patch applied it took only 11 seconds.
So, for me, this patch pretty much fixes the issue.  11 seconds is not
that bad, e.g. ovn-k8s configures inactivity probes for clients to 180.
It would be great to reduce, but we're not there yet.


In that case, if it is multi-threaded from the start, it should probably
grab the sizing from the lflow table hash in south db. That would be a
good approximation for the first run.

This will not work for a case where SbDB is empty for any reason while
NbDB is not.  And there is still a case where northd initially connects
to semi-empty databases and after few iterations NbDB receives a big
transaction and generates a big update for northd.


A.


A.

On 23/08/2021 21:02, Ilya Maximets wrote:

'lflow_map' is never expanded because northd always uses fast
insertion.  This leads to the case where we have a hash map
with only 128 initial buckets and every ovn_lflow_find() ends
up iterating over n_lflows / 128 entries.  It's thousands of
logical flows or even more.  For example, it takes forever for
ovn-northd to start up with the Northbound Db from the 120 node
density-heavy test from ovn-heater, because every lookup is slower
than previous one.  I aborted the process after 15 minutes of
waiting, because there was no sign that it will converge.  With
this change applied the loop completes in only 11 seconds.

Hash map will be pre-allocated to the maximum seen number of
logical flows on a second iteration, but this doesn't help for
the first iteration when northd first time connects to a big
Northbound database, which is a common case during failover or
cluster upgrade.  And there is an even trickier case where big
NbDB transaction that explodes the number of logical flows received
on not the first run.

We can't expand the hash map in case of parallel build, so this
should be fixed separately.

CC: Anton Ivanov 
Fixes: 74daa0607c7f ("ovn-northd: Introduce parallel lflow build")
Signed-off-by: Ilya Maximets 
---
    northd/ovn-northd.c | 6 +-
    1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 3d8e21a4f..40cf957c0 10

Re: [ovs-dev] [PATCH ovn] ovn-northd: Fix extremely inefficient usage of lflow hash map.

2021-08-24 Thread Anton Ivanov

On 24/08/2021 19:16, Ilya Maximets wrote:

On 8/24/21 7:48 PM, Numan Siddique wrote:

On Tue, Aug 24, 2021 at 1:24 PM Anton Ivanov
 wrote:


On 24/08/2021 17:35, Ilya Maximets wrote:

On 8/24/21 6:25 PM, Numan Siddique wrote:

On Tue, Aug 24, 2021 at 7:56 AM Anton Ivanov
 wrote:

On 24/08/2021 12:46, Ilya Maximets wrote:

On 8/24/21 1:18 PM, Anton Ivanov wrote:

On 24/08/2021 12:05, Ilya Maximets wrote:

On 8/24/21 7:36 AM, Anton Ivanov wrote:

On 23/08/2021 22:36, Ilya Maximets wrote:

On 8/23/21 10:37 PM, Anton Ivanov wrote:

On 23/08/2021 21:26, Ilya Maximets wrote:

On 8/23/21 10:20 PM, Anton Ivanov wrote:

Should not be the case.

The map is pre-sized for the size from the previous iterations.

Line 12861 in my tree which is probably a few commits out of date:

  fast_hmap_size_for(, max_seen_lflow_size);

And immediately after building the lflows:

  if (hmap_count() > max_seen_lflow_size) {
  max_seen_lflow_size = hmap_count();
  }

So the map SHOULD be sized correctly - to the most recent seen lflow count.

Please, re-read the commit message.  It's a first run of the loop
and the 'max_seen_lflow_size' is default 128 at this point.

Ack,

Not using auto-resizing in single threaded mode is a bug. Thanks for fixing it.

 From that perspective the patch is a straight +1 from me.

 From the perspective of the use case stated in the commit message- I am 
not sure it addresses it.

If northd is in single-threaded mode and is tackling a GIGANTIC> database, it 
may never complete the first iteration before the
expiration of the timers and everyone deciding that northd is AWOL.

Well, how do you suggest to fix that?  Obviously, we can always create
a database that northd will never be able to process in a reasonable
amount of time.  And it doesn't matter if it's single- or multi-threaded.

In this case NbDB is only 9MB in size, which is very reasonable, and
northd on my laptop takes more than 15 minutes to process it (I killed
it at this point).  With the patch applied it took only 11 seconds.
So, for me, this patch pretty much fixes the issue.  11 seconds is not
that bad, e.g. ovn-k8s configures inactivity probes for clients to 180.
It would be great to reduce, but we're not there yet.


In that case, if it is multi-threaded from the start, it should probably
grab the sizing from the lflow table hash in south db. That would be a
good approximation for the first run.

This will not work for a case where SbDB is empty for any reason while
NbDB is not.  And there is still a case where northd initially connects
to semi-empty databases and after few iterations NbDB receives a big
transaction and generates a big update for northd.

A partial fix is to resize to optimum size the hash after lflow processing.

I'm not sure I understand what you mean here, because resizing after
lflow processing will not help.  The lflow processing itself is the
very slow part that we're trying to make faster here.

That can be the case only with dpgroups. Otherwise lflows are just a 
destination to dump data and the bucket sizing is irrelevant because there is 
never any lookup inside lflows during processing. The lflow map is just used to 
store data. So if it is suboptimal at the exit of build_lflows() the resize 
will fix it before the first lookup shortly thereafter.

Are you running it with dpgroups enabled? In that case there are lookups inside 
lflows during build which happen under a per-bucket lock. So in addition to 
suboptimal size when searching the contention depends on the number of buckets. 
If they are too few, the system becomes heavily contended resulting in 
ridiculous computation sizes.

Oh, I see.  Indeed, without dp-groups there is no lookup during
lflow build.  I missed that.  So, yes, I agree that for a case
without dp-groups, re-sizing after lflow processing should work.
We need that for parallel case.

Current patch (use hmap_insert() that resizes if needed) helps
for all non-parallel cases.

Indeed. It should go in.


Why can't we have hmap_insert() for both parallel and non parallel configs
to start with and switch over to hmap_insert_fast() when ovn-northd
has successfully connected to SB DB and has approximated on the
more accurate hmap size ?

We can't use hmap_insert() for parallel, because resize of a hash map
will crash threads that are working on it at the moment, IIUC.

We actually can for non-dp-groups case, but the merge will become much slower. 
Example - the last version of the snapshot and monitor parallelization for OVS. 
It no longer uses pre-sized fixed hmaps because it is nearly impossible to 
presize an RFC7047 structure correctly.

For the dp-groups case we can't. Locking of the lflows which are used for 
lookups is per hash bucket. You cannot change the number of buckets in the 
middle of the run and other locking mechanisms will be more coarse. So the 
marginal benefit with dp-groups at present will become none (or even slower).


We

Re: [ovs-dev] [PATCH ovn] ovn-northd: Fix extremely inefficient usage of lflow hash map.

2021-08-24 Thread Anton Ivanov

On 24/08/2021 18:48, Numan Siddique wrote:

On Tue, Aug 24, 2021 at 1:24 PM Anton Ivanov
 wrote:


On 24/08/2021 17:35, Ilya Maximets wrote:

On 8/24/21 6:25 PM, Numan Siddique wrote:

On Tue, Aug 24, 2021 at 7:56 AM Anton Ivanov
 wrote:

On 24/08/2021 12:46, Ilya Maximets wrote:

On 8/24/21 1:18 PM, Anton Ivanov wrote:

On 24/08/2021 12:05, Ilya Maximets wrote:

On 8/24/21 7:36 AM, Anton Ivanov wrote:

On 23/08/2021 22:36, Ilya Maximets wrote:

On 8/23/21 10:37 PM, Anton Ivanov wrote:

On 23/08/2021 21:26, Ilya Maximets wrote:

On 8/23/21 10:20 PM, Anton Ivanov wrote:

Should not be the case.

The map is pre-sized for the size from the previous iterations.

Line 12861 in my tree which is probably a few commits out of date:

  fast_hmap_size_for(, max_seen_lflow_size);

And immediately after building the lflows:

  if (hmap_count() > max_seen_lflow_size) {
  max_seen_lflow_size = hmap_count();
  }

So the map SHOULD be sized correctly - to the most recent seen lflow count.

Please, re-read the commit message.  It's a first run of the loop
and the 'max_seen_lflow_size' is default 128 at this point.

Ack,

Not using auto-resizing in single threaded mode is a bug. Thanks for fixing it.

 From that perspective the patch is a straight +1 from me.

 From the perspective of the use case stated in the commit message- I am 
not sure it addresses it.

If northd is in single-threaded mode and is tackling a GIGANTIC> database, it 
may never complete the first iteration before the
expiration of the timers and everyone deciding that northd is AWOL.

Well, how do you suggest to fix that?  Obviously, we can always create
a database that northd will never be able to process in a reasonable
amount of time.  And it doesn't matter if it's single- or multi-threaded.

In this case NbDB is only 9MB in size, which is very reasonable, and
northd on my laptop takes more than 15 minutes to process it (I killed
it at this point).  With the patch applied it took only 11 seconds.
So, for me, this patch pretty much fixes the issue.  11 seconds is not
that bad, e.g. ovn-k8s configures inactivity probes for clients to 180.
It would be great to reduce, but we're not there yet.


In that case, if it is multi-threaded from the start, it should probably
grab the sizing from the lflow table hash in south db. That would be a
good approximation for the first run.

This will not work for a case where SbDB is empty for any reason while
NbDB is not.  And there is still a case where northd initially connects
to semi-empty databases and after few iterations NbDB receives a big
transaction and generates a big update for northd.

A partial fix is to resize to optimum size the hash after lflow processing.

I'm not sure I understand what you mean here, because resizing after
lflow processing will not help.  The lflow processing itself is the
very slow part that we're trying to make faster here.

That can be the case only with dpgroups. Otherwise lflows are just a 
destination to dump data and the bucket sizing is irrelevant because there is 
never any lookup inside lflows during processing. The lflow map is just used to 
store data. So if it is suboptimal at the exit of build_lflows() the resize 
will fix it before the first lookup shortly thereafter.

Are you running it with dpgroups enabled? In that case there are lookups inside 
lflows during build which happen under a per-bucket lock. So in addition to 
suboptimal size when searching the contention depends on the number of buckets. 
If they are too few, the system becomes heavily contended resulting in 
ridiculous computation sizes.

Oh, I see.  Indeed, without dp-groups there is no lookup during
lflow build.  I missed that.  So, yes, I agree that for a case
without dp-groups, re-sizing after lflow processing should work.
We need that for parallel case.

Current patch (use hmap_insert() that resizes if needed) helps
for all non-parallel cases.

Indeed. It should go in.


Why can't we have hmap_insert() for both parallel and non parallel configs
to start with and switch over to hmap_insert_fast() when ovn-northd
has successfully connected to SB DB and has approximated on the
more accurate hmap size ?

We can't use hmap_insert() for parallel, because resize of a hash map
will crash threads that are working on it at the moment, IIUC.

We actually can for non-dp-groups case, but the merge will become much slower. 
Example - the last version of the snapshot and monitor parallelization for OVS. 
It no longer uses pre-sized fixed hmaps because it is nearly impossible to 
presize an RFC7047 structure correctly.

For the dp-groups case we can't. Locking of the lflows which are used for 
lookups is per hash bucket. You cannot change the number of buckets in the 
middle of the run and other locking mechanisms will be more coarse. So the 
marginal benefit with dp-groups at present will become none (or even slower).


We could disable parallel for first s

Re: [ovs-dev] [PATCH ovn] ovn-northd: Fix extremely inefficient usage of lflow hash map.

2021-08-24 Thread Anton Ivanov



On 24/08/2021 17:35, Ilya Maximets wrote:

On 8/24/21 6:25 PM, Numan Siddique wrote:

On Tue, Aug 24, 2021 at 7:56 AM Anton Ivanov
 wrote:


On 24/08/2021 12:46, Ilya Maximets wrote:

On 8/24/21 1:18 PM, Anton Ivanov wrote:

On 24/08/2021 12:05, Ilya Maximets wrote:

On 8/24/21 7:36 AM, Anton Ivanov wrote:

On 23/08/2021 22:36, Ilya Maximets wrote:

On 8/23/21 10:37 PM, Anton Ivanov wrote:

On 23/08/2021 21:26, Ilya Maximets wrote:

On 8/23/21 10:20 PM, Anton Ivanov wrote:

Should not be the case.

The map is pre-sized for the size from the previous iterations.

Line 12861 in my tree which is probably a few commits out of date:

 fast_hmap_size_for(, max_seen_lflow_size);

And immediately after building the lflows:

 if (hmap_count() > max_seen_lflow_size) {
 max_seen_lflow_size = hmap_count();
 }

So the map SHOULD be sized correctly - to the most recent seen lflow count.

Please, re-read the commit message.  It's a first run of the loop
and the 'max_seen_lflow_size' is default 128 at this point.

Ack,

Not using auto-resizing in single threaded mode is a bug. Thanks for fixing it.

From that perspective the patch is a straight +1 from me.

From the perspective of the use case stated in the commit message- I am not 
sure it addresses it.

If northd is in single-threaded mode and is tackling a GIGANTIC> database, it 
may never complete the first iteration before the
expiration of the timers and everyone deciding that northd is AWOL.

Well, how do you suggest to fix that?  Obviously, we can always create
a database that northd will never be able to process in a reasonable
amount of time.  And it doesn't matter if it's single- or multi-threaded.

In this case NbDB is only 9MB in size, which is very reasonable, and
northd on my laptop takes more than 15 minutes to process it (I killed
it at this point).  With the patch applied it took only 11 seconds.
So, for me, this patch pretty much fixes the issue.  11 seconds is not
that bad, e.g. ovn-k8s configures inactivity probes for clients to 180.
It would be great to reduce, but we're not there yet.


In that case, if it is multi-threaded from the start, it should probably
grab the sizing from the lflow table hash in south db. That would be a
good approximation for the first run.

This will not work for a case where SbDB is empty for any reason while
NbDB is not.  And there is still a case where northd initially connects
to semi-empty databases and after few iterations NbDB receives a big
transaction and generates a big update for northd.

A partial fix is to resize to optimum size the hash after lflow processing.

I'm not sure I understand what you mean here, because resizing after
lflow processing will not help.  The lflow processing itself is the
very slow part that we're trying to make faster here.

That can be the case only with dpgroups. Otherwise lflows are just a 
destination to dump data and the bucket sizing is irrelevant because there is 
never any lookup inside lflows during processing. The lflow map is just used to 
store data. So if it is suboptimal at the exit of build_lflows() the resize 
will fix it before the first lookup shortly thereafter.

Are you running it with dpgroups enabled? In that case there are lookups inside 
lflows during build which happen under a per-bucket lock. So in addition to 
suboptimal size when searching the contention depends on the number of buckets. 
If they are too few, the system becomes heavily contended resulting in 
ridiculous computation sizes.

Oh, I see.  Indeed, without dp-groups there is no lookup during
lflow build.  I missed that.  So, yes, I agree that for a case
without dp-groups, re-sizing after lflow processing should work.
We need that for parallel case.

Current patch (use hmap_insert() that resizes if needed) helps
for all non-parallel cases.

Indeed. It should go in.


Why can't we have hmap_insert() for both parallel and non parallel configs
to start with and switch over to hmap_insert_fast() when ovn-northd
has successfully connected to SB DB and has approximated on the
more accurate hmap size ?

We can't use hmap_insert() for parallel, because resize of a hash map
will crash threads that are working on it at the moment, IIUC.


We actually can for non-dp-groups case, but the merge will become much slower. 
Example - the last version of the snapshot and monitor parallelization for OVS. 
It no longer uses pre-sized fixed hmaps because it is nearly impossible to 
presize an RFC7047 structure correctly.

For the dp-groups case we can't. Locking of the lflows which are used for 
lookups is per hash bucket. You cannot change the number of buckets in the 
middle of the run and other locking mechanisms will be more coarse. So the 
marginal benefit with dp-groups at present will become none (or even slower).



We could disable parallel for first several iterations, but still,
this doesn't cover all the cases.  i.e. the one where we have a big
updat

Re: [ovs-dev] [PATCH ovn] ovn-northd: Fix extremely inefficient usage of lflow hash map.

2021-08-24 Thread Anton Ivanov



On 24/08/2021 17:25, Numan Siddique wrote:

On Tue, Aug 24, 2021 at 7:56 AM Anton Ivanov
 wrote:


On 24/08/2021 12:46, Ilya Maximets wrote:

On 8/24/21 1:18 PM, Anton Ivanov wrote:

On 24/08/2021 12:05, Ilya Maximets wrote:

On 8/24/21 7:36 AM, Anton Ivanov wrote:

On 23/08/2021 22:36, Ilya Maximets wrote:

On 8/23/21 10:37 PM, Anton Ivanov wrote:

On 23/08/2021 21:26, Ilya Maximets wrote:

On 8/23/21 10:20 PM, Anton Ivanov wrote:

Should not be the case.

The map is pre-sized for the size from the previous iterations.

Line 12861 in my tree which is probably a few commits out of date:

 fast_hmap_size_for(, max_seen_lflow_size);

And immediately after building the lflows:

 if (hmap_count() > max_seen_lflow_size) {
 max_seen_lflow_size = hmap_count();
 }

So the map SHOULD be sized correctly - to the most recent seen lflow count.

Please, re-read the commit message.  It's a first run of the loop
and the 'max_seen_lflow_size' is default 128 at this point.

Ack,

Not using auto-resizing in single threaded mode is a bug. Thanks for fixing it.

From that perspective the patch is a straight +1 from me.

From the perspective of the use case stated in the commit message- I am not 
sure it addresses it.

If northd is in single-threaded mode and is tackling a GIGANTIC> database, it 
may never complete the first iteration before the
expiration of the timers and everyone deciding that northd is AWOL.

Well, how do you suggest to fix that?  Obviously, we can always create
a database that northd will never be able to process in a reasonable
amount of time.  And it doesn't matter if it's single- or multi-threaded.

In this case NbDB is only 9MB in size, which is very reasonable, and
northd on my laptop takes more than 15 minutes to process it (I killed
it at this point).  With the patch applied it took only 11 seconds.
So, for me, this patch pretty much fixes the issue.  11 seconds is not
that bad, e.g. ovn-k8s configures inactivity probes for clients to 180.
It would be great to reduce, but we're not there yet.


In that case, if it is multi-threaded from the start, it should probably
grab the sizing from the lflow table hash in south db. That would be a
good approximation for the first run.

This will not work for a case where SbDB is empty for any reason while
NbDB is not.  And there is still a case where northd initially connects
to semi-empty databases and after few iterations NbDB receives a big
transaction and generates a big update for northd.

A partial fix is to resize to optimum size the hash after lflow processing.

I'm not sure I understand what you mean here, because resizing after
lflow processing will not help.  The lflow processing itself is the
very slow part that we're trying to make faster here.

That can be the case only with dpgroups. Otherwise lflows are just a 
destination to dump data and the bucket sizing is irrelevant because there is 
never any lookup inside lflows during processing. The lflow map is just used to 
store data. So if it is suboptimal at the exit of build_lflows() the resize 
will fix it before the first lookup shortly thereafter.

Are you running it with dpgroups enabled? In that case there are lookups inside 
lflows during build which happen under a per-bucket lock. So in addition to 
suboptimal size when searching the contention depends on the number of buckets. 
If they are too few, the system becomes heavily contended resulting in 
ridiculous computation sizes.

Oh, I see.  Indeed, without dp-groups there is no lookup during
lflow build.  I missed that.  So, yes, I agree that for a case
without dp-groups, re-sizing after lflow processing should work.
We need that for parallel case.

Current patch (use hmap_insert() that resizes if needed) helps
for all non-parallel cases.

Indeed. It should go in.


Why can't we have hmap_insert() for both parallel and non parallel configs
to start with and switch over to hmap_insert_fast() when ovn-northd
has successfully connected to SB DB and has approximated on the
more accurate hmap size ?


That is possible, but not for dp_groups.

If it is just the lflow compute, you can use hmap_insert, but that does not 
actually have any benefit. In fact, you will consume much more CPU than merging 
into a suboptimal hmap and then resizing it at the end.

For dp_groups, the locking is per hash bucket. If you change the number of 
buckets (as upon resize) your locks are no longer valid and you end up 
corrupting the data.

I am running tests on dp_groups and I am starting to think that we should 
abandon the parallelization of lflow compute altogether for the dp_groups case.

I get at best the same results and sometimes worse results. Looking at the the 
picture on ovn-central node of ovn-heater the threads never ramp up to more 
than single digit percents - they are waiting on locks. Compared to that the 
brute-force lflow compute has threads ramping up to 100% and clear benefi

Re: [ovs-dev] [PATCH ovn] ovn-northd: Fix extremely inefficient usage of lflow hash map.

2021-08-24 Thread Anton Ivanov


On 24/08/2021 12:46, Ilya Maximets wrote:

On 8/24/21 1:18 PM, Anton Ivanov wrote:

On 24/08/2021 12:05, Ilya Maximets wrote:

On 8/24/21 7:36 AM, Anton Ivanov wrote:

On 23/08/2021 22:36, Ilya Maximets wrote:

On 8/23/21 10:37 PM, Anton Ivanov wrote:

On 23/08/2021 21:26, Ilya Maximets wrote:

On 8/23/21 10:20 PM, Anton Ivanov wrote:

Should not be the case.

The map is pre-sized for the size from the previous iterations.

Line 12861 in my tree which is probably a few commits out of date:

    fast_hmap_size_for(, max_seen_lflow_size);

And immediately after building the lflows:

    if (hmap_count() > max_seen_lflow_size) {
    max_seen_lflow_size = hmap_count();
    }

So the map SHOULD be sized correctly - to the most recent seen lflow count.

Please, re-read the commit message.  It's a first run of the loop
and the 'max_seen_lflow_size' is default 128 at this point.

Ack,

Not using auto-resizing in single threaded mode is a bug. Thanks for fixing it.

   From that perspective the patch is a straight +1 from me.

   From the perspective of the use case stated in the commit message- I am not 
sure it addresses it.

If northd is in single-threaded mode and is tackling a GIGANTIC> database, it 
may never complete the first iteration before the
expiration of the timers and everyone deciding that northd is AWOL.

Well, how do you suggest to fix that?  Obviously, we can always create
a database that northd will never be able to process in a reasonable
amount of time.  And it doesn't matter if it's single- or multi-threaded.

In this case NbDB is only 9MB in size, which is very reasonable, and
northd on my laptop takes more than 15 minutes to process it (I killed
it at this point).  With the patch applied it took only 11 seconds.
So, for me, this patch pretty much fixes the issue.  11 seconds is not
that bad, e.g. ovn-k8s configures inactivity probes for clients to 180.
It would be great to reduce, but we're not there yet.


In that case, if it is multi-threaded from the start, it should probably
grab the sizing from the lflow table hash in south db. That would be a
good approximation for the first run.

This will not work for a case where SbDB is empty for any reason while
NbDB is not.  And there is still a case where northd initially connects
to semi-empty databases and after few iterations NbDB receives a big
transaction and generates a big update for northd.

A partial fix is to resize to optimum size the hash after lflow processing.

I'm not sure I understand what you mean here, because resizing after
lflow processing will not help.  The lflow processing itself is the
very slow part that we're trying to make faster here.

That can be the case only with dpgroups. Otherwise lflows are just a 
destination to dump data and the bucket sizing is irrelevant because there is 
never any lookup inside lflows during processing. The lflow map is just used to 
store data. So if it is suboptimal at the exit of build_lflows() the resize 
will fix it before the first lookup shortly thereafter.

Are you running it with dpgroups enabled? In that case there are lookups inside 
lflows during build which happen under a per-bucket lock. So in addition to 
suboptimal size when searching the contention depends on the number of buckets. 
If they are too few, the system becomes heavily contended resulting in 
ridiculous computation sizes.

Oh, I see.  Indeed, without dp-groups there is no lookup during
lflow build.  I missed that.  So, yes, I agree that for a case
without dp-groups, re-sizing after lflow processing should work.
We need that for parallel case.

Current patch (use hmap_insert() that resizes if needed) helps
for all non-parallel cases.


Indeed. It should go in.

I will sort out the other cases to the extent possible.

Brgds,

A.



I'm mostly running dp-groups + non-parallel which is a default
case for ovn-heater/ovn-k8s.


For the case of "dpgroups + parallel + first iteration + pre-existing large 
database" there is no cure short of pre-allocating the hash to maximum size.

Yeah, dp-groups + parallel is a hard case.


I am scale testing that as well as resize (for non-dp-groups cases) at present.

Brgds,

A.


If the sizing was correct - 99.9% of the case this will be a noop.

If the sizing was incorrect, it will be resized so that the DP searches and all 
other ops which were recently added for flow reduction will work optimally.

This still does not work for lflow compute with dpgroups + parallel upon 
initial connect and without a SB database to use for size guidance. It will 
work for all other cases.

I will send two separate patches to address the cases which can be easily 
addressed and see what can be done with the dp+parallel upon initial connect to 
an empty sb database.

Brgds,

A


A.


A.

On 23/08/2021 21:02, Ilya Maximets wrote:

'lflow_map' is never expanded because northd always uses fast
insertion.  This leads to the case where we have a hash map
with 

Re: [ovs-dev] [PATCH ovn] ovn-northd: Fix extremely inefficient usage of lflow hash map.

2021-08-24 Thread Anton Ivanov


On 24/08/2021 12:05, Ilya Maximets wrote:

On 8/24/21 7:36 AM, Anton Ivanov wrote:

On 23/08/2021 22:36, Ilya Maximets wrote:

On 8/23/21 10:37 PM, Anton Ivanov wrote:

On 23/08/2021 21:26, Ilya Maximets wrote:

On 8/23/21 10:20 PM, Anton Ivanov wrote:

Should not be the case.

The map is pre-sized for the size from the previous iterations.

Line 12861 in my tree which is probably a few commits out of date:

   fast_hmap_size_for(, max_seen_lflow_size);

And immediately after building the lflows:

   if (hmap_count() > max_seen_lflow_size) {
   max_seen_lflow_size = hmap_count();
   }

So the map SHOULD be sized correctly - to the most recent seen lflow count.

Please, re-read the commit message.  It's a first run of the loop
and the 'max_seen_lflow_size' is default 128 at this point.

Ack,

Not using auto-resizing in single threaded mode is a bug. Thanks for fixing it.

  From that perspective the patch is a straight +1 from me.

  From the perspective of the use case stated in the commit message- I am not 
sure it addresses it.

If northd is in single-threaded mode and is tackling a GIGANTIC> database, it 
may never complete the first iteration before the
expiration of the timers and everyone deciding that northd is AWOL.

Well, how do you suggest to fix that?  Obviously, we can always create
a database that northd will never be able to process in a reasonable
amount of time.  And it doesn't matter if it's single- or multi-threaded.

In this case NbDB is only 9MB in size, which is very reasonable, and
northd on my laptop takes more than 15 minutes to process it (I killed
it at this point).  With the patch applied it took only 11 seconds.
So, for me, this patch pretty much fixes the issue.  11 seconds is not
that bad, e.g. ovn-k8s configures inactivity probes for clients to 180.
It would be great to reduce, but we're not there yet.


In that case, if it is multi-threaded from the start, it should probably
grab the sizing from the lflow table hash in south db. That would be a
good approximation for the first run.

This will not work for a case where SbDB is empty for any reason while
NbDB is not.  And there is still a case where northd initially connects
to semi-empty databases and after few iterations NbDB receives a big
transaction and generates a big update for northd.

A partial fix is to resize to optimum size the hash after lflow processing.

I'm not sure I understand what you mean here, because resizing after
lflow processing will not help.  The lflow processing itself is the
very slow part that we're trying to make faster here.


That can be the case only with dpgroups. Otherwise lflows are just a 
destination to dump data and the bucket sizing is irrelevant because there is 
never any lookup inside lflows during processing. The lflow map is just used to 
store data. So if it is suboptimal at the exit of build_lflows() the resize 
will fix it before the first lookup shortly thereafter.

Are you running it with dpgroups enabled? In that case there are lookups inside 
lflows during build which happen under a per-bucket lock. So in addition to 
suboptimal size when searching the contention depends on the number of buckets. 
If they are too few, the system becomes heavily contended resulting in 
ridiculous computation sizes.

For the case of "dpgroups + parallel + first iteration + pre-existing large 
database" there is no cure short of pre-allocating the hash to maximum size.

I am scale testing that as well as resize (for non-dp-groups cases) at present.

Brgds,

A.




If the sizing was correct - 99.9% of the case this will be a noop.

If the sizing was incorrect, it will be resized so that the DP searches and all 
other ops which were recently added for flow reduction will work optimally.

This still does not work for lflow compute with dpgroups + parallel upon 
initial connect and without a SB database to use for size guidance. It will 
work for all other cases.

I will send two separate patches to address the cases which can be easily 
addressed and see what can be done with the dp+parallel upon initial connect to 
an empty sb database.

Brgds,

A


A.


A.

On 23/08/2021 21:02, Ilya Maximets wrote:

'lflow_map' is never expanded because northd always uses fast
insertion.  This leads to the case where we have a hash map
with only 128 initial buckets and every ovn_lflow_find() ends
up iterating over n_lflows / 128 entries.  It's thousands of
logical flows or even more.  For example, it takes forever for
ovn-northd to start up with the Northbound Db from the 120 node
density-heavy test from ovn-heater, because every lookup is slower
than previous one.  I aborted the process after 15 minutes of
waiting, because there was no sign that it will converge.  With
this change applied the loop completes in only 11 seconds.

Hash map will be pre-allocated to the maximum seen number of
logical flows on a second iteration, but this doesn't help for
the first iteration w

Re: [ovs-dev] [PATCH ovn] ovn-northd: Fix extremely inefficient usage of lflow hash map.

2021-08-23 Thread Anton Ivanov

On 23/08/2021 22:36, Ilya Maximets wrote:

On 8/23/21 10:37 PM, Anton Ivanov wrote:

On 23/08/2021 21:26, Ilya Maximets wrote:

On 8/23/21 10:20 PM, Anton Ivanov wrote:

Should not be the case.

The map is pre-sized for the size from the previous iterations.

Line 12861 in my tree which is probably a few commits out of date:

  fast_hmap_size_for(, max_seen_lflow_size);

And immediately after building the lflows:

  if (hmap_count() > max_seen_lflow_size) {
  max_seen_lflow_size = hmap_count();
  }

So the map SHOULD be sized correctly - to the most recent seen lflow count.

Please, re-read the commit message.  It's a first run of the loop
and the 'max_seen_lflow_size' is default 128 at this point.

Ack,

Not using auto-resizing in single threaded mode is a bug. Thanks for fixing it.

 From that perspective the patch is a straight +1 from me.

 From the perspective of the use case stated in the commit message- I am not 
sure it addresses it.

If northd is in single-threaded mode and is tackling a GIGANTIC> database, it 
may never complete the first iteration before the
expiration of the timers and everyone deciding that northd is AWOL.

Well, how do you suggest to fix that?  Obviously, we can always create
a database that northd will never be able to process in a reasonable
amount of time.  And it doesn't matter if it's single- or multi-threaded.

In this case NbDB is only 9MB in size, which is very reasonable, and
northd on my laptop takes more than 15 minutes to process it (I killed
it at this point).  With the patch applied it took only 11 seconds.
So, for me, this patch pretty much fixes the issue.  11 seconds is not
that bad, e.g. ovn-k8s configures inactivity probes for clients to 180.
It would be great to reduce, but we're not there yet.


In that case, if it is multi-threaded from the start, it should probably
grab the sizing from the lflow table hash in south db. That would be a
good approximation for the first run.

This will not work for a case where SbDB is empty for any reason while
NbDB is not.  And there is still a case where northd initially connects
to semi-empty databases and after few iterations NbDB receives a big
transaction and generates a big update for northd.


A partial fix is to resize to optimum size the hash after lflow processing.

If the sizing was correct - 99.9% of the case this will be a noop.

If the sizing was incorrect, it will be resized so that the DP searches 
and all other ops which were recently added for flow reduction will work 
optimally.


This still does not work for lflow compute with dpgroups + parallel upon 
initial connect and without a SB database to use for size guidance. It 
will work for all other cases.


I will send two separate patches to address the cases which can be 
easily addressed and see what can be done with the dp+parallel upon 
initial connect to an empty sb database.


Brgds,

A




A.


A.

On 23/08/2021 21:02, Ilya Maximets wrote:

'lflow_map' is never expanded because northd always uses fast
insertion.  This leads to the case where we have a hash map
with only 128 initial buckets and every ovn_lflow_find() ends
up iterating over n_lflows / 128 entries.  It's thousands of
logical flows or even more.  For example, it takes forever for
ovn-northd to start up with the Northbound Db from the 120 node
density-heavy test from ovn-heater, because every lookup is slower
than previous one.  I aborted the process after 15 minutes of
waiting, because there was no sign that it will converge.  With
this change applied the loop completes in only 11 seconds.

Hash map will be pre-allocated to the maximum seen number of
logical flows on a second iteration, but this doesn't help for
the first iteration when northd first time connects to a big
Northbound database, which is a common case during failover or
cluster upgrade.  And there is an even trickier case where big
NbDB transaction that explodes the number of logical flows received
on not the first run.

We can't expand the hash map in case of parallel build, so this
should be fixed separately.

CC: Anton Ivanov 
Fixes: 74daa0607c7f ("ovn-northd: Introduce parallel lflow build")
Signed-off-by: Ilya Maximets 
---
    northd/ovn-northd.c | 6 +-
    1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 3d8e21a4f..40cf957c0 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -4387,7 +4387,11 @@ do_ovn_lflow_add(struct hmap *lflow_map, struct 
ovn_datapath *od,
   nullable_xstrdup(ctrl_meter),
   ovn_lflow_hint(stage_hint), where);
    hmapx_add(>od_group, od);
-    hmap_insert_fast(lflow_map, >hmap_node, hash);
+    if (!use_parallel_build) {
+    hmap_insert(lflow_map, >hmap_node, hash);
+    } else {
+    hmap_insert_fast(lflow_map, >hmap_node, hash);
+    }
    }
      /* Adds a row with the specified contents to the Logical_Flow tabl

Re: [ovs-dev] [PATCH ovn] ovn-northd: Fix extremely inefficient usage of lflow hash map.

2021-08-23 Thread Anton Ivanov

On 23/08/2021 21:26, Ilya Maximets wrote:

On 8/23/21 10:20 PM, Anton Ivanov wrote:

Should not be the case.

The map is pre-sized for the size from the previous iterations.

Line 12861 in my tree which is probably a few commits out of date:

     fast_hmap_size_for(, max_seen_lflow_size);

And immediately after building the lflows:

     if (hmap_count() > max_seen_lflow_size) {
     max_seen_lflow_size = hmap_count();
     }

So the map SHOULD be sized correctly - to the most recent seen lflow count.

Please, re-read the commit message.  It's a first run of the loop
and the 'max_seen_lflow_size' is default 128 at this point.


Ack,

Not using auto-resizing in single threaded mode is a bug. Thanks for 
fixing it.


From that perspective the patch is a straight +1 from me.

From the perspective of the use case stated in the commit message- I am 
not sure it addresses it.


If northd is in single-threaded mode and is tackling a GIGANTIC 
database, it may never complete the first iteration before the 
expiration of the timers and everyone deciding that northd is AWOL.


In that case, if it is multi-threaded from the start, it should probably 
grab the sizing from the lflow table hash in south db. That would be a 
good approximation for the first run.


A.




A.

On 23/08/2021 21:02, Ilya Maximets wrote:

'lflow_map' is never expanded because northd always uses fast
insertion.  This leads to the case where we have a hash map
with only 128 initial buckets and every ovn_lflow_find() ends
up iterating over n_lflows / 128 entries.  It's thousands of
logical flows or even more.  For example, it takes forever for
ovn-northd to start up with the Northbound Db from the 120 node
density-heavy test from ovn-heater, because every lookup is slower
than previous one.  I aborted the process after 15 minutes of
waiting, because there was no sign that it will converge.  With
this change applied the loop completes in only 11 seconds.

Hash map will be pre-allocated to the maximum seen number of
logical flows on a second iteration, but this doesn't help for
the first iteration when northd first time connects to a big
Northbound database, which is a common case during failover or
cluster upgrade.  And there is an even trickier case where big
NbDB transaction that explodes the number of logical flows received
on not the first run.

We can't expand the hash map in case of parallel build, so this
should be fixed separately.

CC: Anton Ivanov 
Fixes: 74daa0607c7f ("ovn-northd: Introduce parallel lflow build")
Signed-off-by: Ilya Maximets 
---
   northd/ovn-northd.c | 6 +-
   1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 3d8e21a4f..40cf957c0 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -4387,7 +4387,11 @@ do_ovn_lflow_add(struct hmap *lflow_map, struct 
ovn_datapath *od,
  nullable_xstrdup(ctrl_meter),
  ovn_lflow_hint(stage_hint), where);
   hmapx_add(>od_group, od);
-    hmap_insert_fast(lflow_map, >hmap_node, hash);
+    if (!use_parallel_build) {
+    hmap_insert(lflow_map, >hmap_node, hash);
+    } else {
+    hmap_insert_fast(lflow_map, >hmap_node, hash);
+    }
   }
     /* Adds a row with the specified contents to the Logical_Flow table. */






--
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn] ovn-northd: Fix extremely inefficient usage of lflow hash map.

2021-08-23 Thread Anton Ivanov

Should not be the case.

The map is pre-sized for the size from the previous iterations.

Line 12861 in my tree which is probably a few commits out of date:

    fast_hmap_size_for(, max_seen_lflow_size);

And immediately after building the lflows:

    if (hmap_count() > max_seen_lflow_size) {
    max_seen_lflow_size = hmap_count();
    }

So the map SHOULD be sized correctly - to the most recent seen lflow count.

A.

On 23/08/2021 21:02, Ilya Maximets wrote:

'lflow_map' is never expanded because northd always uses fast
insertion.  This leads to the case where we have a hash map
with only 128 initial buckets and every ovn_lflow_find() ends
up iterating over n_lflows / 128 entries.  It's thousands of
logical flows or even more.  For example, it takes forever for
ovn-northd to start up with the Northbound Db from the 120 node
density-heavy test from ovn-heater, because every lookup is slower
than previous one.  I aborted the process after 15 minutes of
waiting, because there was no sign that it will converge.  With
this change applied the loop completes in only 11 seconds.

Hash map will be pre-allocated to the maximum seen number of
logical flows on a second iteration, but this doesn't help for
the first iteration when northd first time connects to a big
Northbound database, which is a common case during failover or
cluster upgrade.  And there is an even trickier case where big
NbDB transaction that explodes the number of logical flows received
on not the first run.

We can't expand the hash map in case of parallel build, so this
should be fixed separately.

CC: Anton Ivanov 
Fixes: 74daa0607c7f ("ovn-northd: Introduce parallel lflow build")
Signed-off-by: Ilya Maximets 
---
  northd/ovn-northd.c | 6 +-
  1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 3d8e21a4f..40cf957c0 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -4387,7 +4387,11 @@ do_ovn_lflow_add(struct hmap *lflow_map, struct 
ovn_datapath *od,
 nullable_xstrdup(ctrl_meter),
 ovn_lflow_hint(stage_hint), where);
  hmapx_add(>od_group, od);
-hmap_insert_fast(lflow_map, >hmap_node, hash);
+if (!use_parallel_build) {
+hmap_insert(lflow_map, >hmap_node, hash);
+} else {
+hmap_insert_fast(lflow_map, >hmap_node, hash);
+}
  }
  
  /* Adds a row with the specified contents to the Logical_Flow table. */



--
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v9] ovsdb: provide raft and command interfaces with priority

2021-08-20 Thread anton . ivanov
From: Anton Ivanov 

Set a soft time limit of "raft election timer"/2 on ovsdb
processing.

This improves behaviour in large heavily loaded clusters.
While it cannot fully eliminate spurious raft elections
under heavy load, it significantly decreases their number.

Processing is (to the extent possible) restarted where it
stopped on the previous iteration to ensure that sessions
towards the tail of the session list are not starved.

Signed-off-by: Anton Ivanov 
---
 ovsdb/jsonrpc-server.c | 98 --
 ovsdb/jsonrpc-server.h |  2 +-
 ovsdb/ovsdb-server.c   | 16 ++-
 ovsdb/raft.c   |  6 +++
 ovsdb/raft.h   |  3 ++
 ovsdb/storage.c| 12 ++
 ovsdb/storage.h|  2 +
 7 files changed, 124 insertions(+), 15 deletions(-)

diff --git a/ovsdb/jsonrpc-server.c b/ovsdb/jsonrpc-server.c
index 351c39d8a..457e1c040 100644
--- a/ovsdb/jsonrpc-server.c
+++ b/ovsdb/jsonrpc-server.c
@@ -60,7 +60,8 @@ static struct ovsdb_jsonrpc_session 
*ovsdb_jsonrpc_session_create(
 struct ovsdb_jsonrpc_remote *, struct jsonrpc_session *, bool);
 static void ovsdb_jsonrpc_session_preremove_db(struct ovsdb_jsonrpc_remote *,
struct ovsdb *);
-static void ovsdb_jsonrpc_session_run_all(struct ovsdb_jsonrpc_remote *);
+static void ovsdb_jsonrpc_session_run_all(struct ovsdb_jsonrpc_remote *,
+  uint64_t limit);
 static void ovsdb_jsonrpc_session_wait_all(struct ovsdb_jsonrpc_remote *);
 static void ovsdb_jsonrpc_session_get_memory_usage_all(
 const struct ovsdb_jsonrpc_remote *, struct simap *usage);
@@ -128,6 +129,9 @@ struct ovsdb_jsonrpc_server {
 bool read_only;/* This server is does not accept any
   transactions that can modify the database. */
 struct shash remotes;  /* Contains "struct ovsdb_jsonrpc_remote *"s. */
+struct ovs_list worklist; /* List of remotes to work on. */
+bool must_wake_up; /* The processing loop must be re-run. It was
+  interrupted due to exceeding a time constraint. */
 };
 
 /* A configured remote.  This is either a passive stream listener plus a list
@@ -137,6 +141,7 @@ struct ovsdb_jsonrpc_remote {
 struct ovsdb_jsonrpc_server *server;
 struct pstream *listener;   /* Listener, if passive. */
 struct ovs_list sessions;   /* List of "struct ovsdb_jsonrpc_session"s. */
+struct ovs_list work_node;
 uint8_t dscp;
 bool read_only;
 char *role;
@@ -158,6 +163,7 @@ ovsdb_jsonrpc_server_create(bool read_only)
 struct ovsdb_jsonrpc_server *server = xzalloc(sizeof *server);
 ovsdb_server_init(>up);
 shash_init(>remotes);
+ovs_list_init(>worklist);
 server->read_only = read_only;
 return server;
 }
@@ -255,6 +261,7 @@ ovsdb_jsonrpc_server_set_remotes(struct 
ovsdb_jsonrpc_server *svr,
 
 ovsdb_jsonrpc_session_set_all_options(remote, options);
 }
+ovs_list_init(>worklist); /* Reset any pending work. */
 }
 
 static struct ovsdb_jsonrpc_remote *
@@ -280,6 +287,7 @@ ovsdb_jsonrpc_server_add_remote(struct ovsdb_jsonrpc_server 
*svr,
 remote->read_only = options->read_only;
 remote->role = nullable_xstrdup(options->role);
 shash_add(>remotes, name, remote);
+ovs_list_init(>worklist); /* Reset any pending work. */
 
 if (!listener) {
 ovsdb_jsonrpc_session_create(remote, jsonrpc_session_open(name, true),
@@ -292,10 +300,10 @@ static void
 ovsdb_jsonrpc_server_del_remote(struct shash_node *node)
 {
 struct ovsdb_jsonrpc_remote *remote = node->data;
-
 ovsdb_jsonrpc_session_close_all(remote);
 pstream_close(remote->listener);
 shash_delete(>server->remotes, node);
+ovs_list_init(>server->worklist); /* Reset any pending work. */
 free(remote->role);
 free(remote);
 }
@@ -378,32 +386,55 @@ ovsdb_jsonrpc_server_set_read_only(struct 
ovsdb_jsonrpc_server *svr,
 }
 
 void
-ovsdb_jsonrpc_server_run(struct ovsdb_jsonrpc_server *svr)
+ovsdb_jsonrpc_server_run(struct ovsdb_jsonrpc_server *svr, uint64_t limit)
 {
 struct shash_node *node;
+uint64_t elapsed = 0;
+uint64_t start_time = time_msec();
+struct ovsdb_jsonrpc_remote *work;
 
-SHASH_FOR_EACH (node, >remotes) {
-struct ovsdb_jsonrpc_remote *remote = node->data;
+svr->must_wake_up = false;
 
-if (remote->listener) {
+if (ovs_list_is_empty(>worklist)) {
+SHASH_FOR_EACH (node, >remotes) {
+struct ovsdb_jsonrpc_remote *remote = node->data;
+ovs_list_push_back(>worklist, >work_node);
+}
+}
+
+LIST_FOR_EACH_POP (work, work_node, >worklist) {
+if (work->listener) {
 struct stream *stream;
 int error;
 
-error = pstream_accept(remote->listener, );
+

[ovs-dev] [PATCH v3] Replace DIY AES with openssl if openssl is available

2021-08-19 Thread anton . ivanov
From: Anton Ivanov 

This allows to leverage the openssl implementation which can use
hardware crypto on supported platforms.

UUID generation speed is improved by ~ 12% on an AMD Ryzen with
support for AES instructions.

Signed-off-by: Anton Ivanov 
---
 lib/aes128.c| 79 +++--
 lib/aes128.h| 13 +---
 lib/uuid.c  |  6 ++--
 tests/test-aes128.c |  6 ++--
 4 files changed, 90 insertions(+), 14 deletions(-)

diff --git a/lib/aes128.c b/lib/aes128.c
index 98447d14b..8236e8ba0 100644
--- a/lib/aes128.c
+++ b/lib/aes128.c
@@ -28,6 +28,75 @@
 
 #include "util.h"
 
+#ifdef HAVE_OPENSSL
+
+#include 
+#include 
+#include 
+#include 
+#include "entropy.h"
+#include "openvswitch/vlog.h"
+
+VLOG_DEFINE_THIS_MODULE(aes);
+
+struct aes128 {
+EVP_CIPHER_CTX *ctx;
+};
+
+void *aes128_schedule(const uint8_t key[16])
+{
+uint8_t iv[16];
+
+struct aes128 *aes = xmalloc(sizeof *aes);
+
+aes->ctx = EVP_CIPHER_CTX_new();
+memset(iv, 0, sizeof iv);
+if (EVP_EncryptInit_ex(aes->ctx, EVP_aes_128_cbc(), NULL, key, iv) != 1) {
+unsigned long ssl_error = ERR_get_error();
+
+ERR_load_crypto_strings();
+VLOG_FATAL("Encryption init failed. Error %s",
+   ERR_error_string(ssl_error, NULL));
+}
+return aes;
+}
+
+void aes128_encrypt(void *aes, const void *plain, void *cipher)
+{
+int len;
+struct aes128 *aes_ctx = aes;
+
+if (1 != EVP_EncryptUpdate(aes_ctx->ctx, cipher, , plain, 16)) {
+unsigned long ssl_error = ERR_get_error();
+
+ERR_load_crypto_strings();
+VLOG_FATAL("Encryption failed. Error %s",
+   ERR_error_string(ssl_error, NULL));
+}
+}
+
+#else
+
+struct aes128 {
+uint32_t rk[128/8 + 28];
+};
+
+void *aes128_schedule(const uint8_t key[16])
+{
+return ovs_aes128_schedule(key);
+}
+
+void aes128_encrypt(void *aes, const void *input_, void *output_)
+{
+ovs_aes128_encrypt(aes, input_, output_);
+}
+
+#endif
+
+struct ovs_aes128 {
+uint32_t rk[128/8 + 28];
+};
+
 static const uint32_t Te0[256] = {
 0xc66363a5U, 0xf87c7c84U, 0xee99U, 0xf67b7b8dU,
 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
@@ -390,8 +459,9 @@ put_u32(uint8_t *p, uint32_t x)
 
 /* Expands 128-bit 'key' into the encryption key 'schedule'. */
 void
-aes128_schedule(struct aes128 *aes, const uint8_t key[16])
+*ovs_aes128_schedule(const uint8_t key[16])
 {
+struct ovs_aes128 *aes = xmalloc(sizeof *aes);
 uint32_t *rk = aes->rk;
 int i;
 
@@ -412,14 +482,16 @@ aes128_schedule(struct aes128 *aes, const uint8_t key[16])
 rk[7] = rk[3] ^ rk[6];
 }
 ovs_assert(rk == >rk[40]);
+return aes;
 }
 
 void
-aes128_encrypt(const struct aes128 *aes, const void *input_, void *output_)
+ovs_aes128_encrypt(void *aes, const void *input_, void *output_)
 {
 const uint8_t *input = input_;
 uint8_t *output = output_;
-const uint32_t *rk = aes->rk;
+struct ovs_aes128 *ovs_aes = aes;
+const uint32_t *rk = ovs_aes->rk;
 uint32_t s0, s1, s2, s3;
 uint32_t t0, t1, t2, t3;
 int r;
@@ -507,3 +579,4 @@ aes128_encrypt(const struct aes128 *aes, const void 
*input_, void *output_)
   ^ rk[3]);
 put_u32(output + 12, s3);
 }
+
diff --git a/lib/aes128.h b/lib/aes128.h
index f0f55d7cf..3e04e00c4 100644
--- a/lib/aes128.h
+++ b/lib/aes128.h
@@ -27,11 +27,14 @@
 
 #include 
 
-struct aes128 {
-uint32_t rk[128/8 + 28];
-};
 
-void aes128_schedule(struct aes128 *, const uint8_t key[16]);
-void aes128_encrypt(const struct aes128 *, const void *, void *);
+void *aes128_schedule(const uint8_t key[16]);
+void aes128_encrypt(void *, const void *, void *);
+
+/* These are exposed for unit test purposes. */
+
+
+void *ovs_aes128_schedule(const uint8_t key[16]);
+void ovs_aes128_encrypt(void *, const void *, void *);
 
 #endif  /* aes128.h */
diff --git a/lib/uuid.c b/lib/uuid.c
index 8a16606da..3e14dcb5f 100644
--- a/lib/uuid.c
+++ b/lib/uuid.c
@@ -36,7 +36,7 @@
 
 VLOG_DEFINE_THIS_MODULE(uuid);
 
-static struct aes128 key;
+static void *key;
 static uint64_t counter[2];
 BUILD_ASSERT_DECL(sizeof counter == 16);
 
@@ -164,7 +164,7 @@ uuid_generate(struct uuid *uuid)
 ovs_mutex_unlock();
 
 /* AES output is exactly 16 bytes, so we encrypt directly into 'uuid'. */
-aes128_encrypt(, copy, uuid);
+aes128_encrypt(key, copy, uuid);
 
 uuid_set_bits_v4(uuid);
 
@@ -370,7 +370,7 @@ do_init(void)
 
 /* Generate key. */
 BUILD_ASSERT(sizeof sha1 >= 16);
-aes128_schedule(, sha1);
+key = aes128_schedule(sha1);
 
 /* Generate initial counter. */
 get_entropy_or_die(counter, sizeof counter);
diff --git a/tests/test-aes128.c b/tests/test-aes128.c
index 7960551be..8706a7c7c 100644
--- a/tests/test-aes128.c
+++ b/tests/test-aes128.c
@@ -46,7 +46,7 @@ error:
 static void
 test_aes128_main(int argc, char *ar

  1   2   3   4   5   >