On 7/1/20 4:10 PM, Han Zhou wrote:
> Hi Dumitru,
> 
> Just a few minor comments. With them addressed:
> Acked-by: Han Zhou <[email protected] <mailto:[email protected]>>
> 
> 
> On Thu, Jun 18, 2020 at 1:45 PM Dumitru Ceara <[email protected]
> <mailto:[email protected]>> wrote:
>>
>> Adds a generic recovery mechanism which triggers an IDL retry with fast
>> resync disabled in case the IDL has detected that it ended up in an
>> inconsistent state due to other bugs in the ovsdb-server/ovsdb-idl
>> implementation.
>>
>> Additionally, this commit also:
>> - bumps IDL semantic error logs to level ERR to make them more
>>   visible.
>> - triggers an IDL retry in cases when the IDL client used to try to
>>   recover (i.e., trying to add an existing row, trying to remove a non
>>   existent row). This is required because even though the specific
>>   inconsistency can be fixed locally the client has no clue about the
>>   state of the server. For example, if the server hit a bug and isn't
>>   accepting any other transactions from its cluster leader the client
>>   will have no other chance of detecting this.
> 
> I think this is not a good example. In this case even if we retry we
> could still connect to the same stale server and get stale data
> (normally such servers should inform clients that they are disconnect
> from the cluster and client IDL should retry other servers, but we are
> talking about bug situation). The change here is in fact useful when the
> server is already recovered from bugs (probably with operator
> intervention), but clients already have inconsistent data, and now it is
> an opportunity to correct all inconsistency on the client side. I'd
> suggest removing the example.
> 

Hi Han,

Thanks for the review. Sure, I'll remove this part.

>>
>> CC: Andy Zhou <[email protected] <mailto:[email protected]>>
>> CC: Han Zhou <[email protected] <mailto:[email protected]>>
>> CC: Ilya Maximets <[email protected] <mailto:[email protected]>>
>> Fixes: db2b5757328c ("lib: add monitor2 support in ovsdb-idl.")
>> Signed-off-by: Dumitru Ceara <[email protected]
> <mailto:[email protected]>>
>>
>> ---
>> V7:
>> - Address Ilya's comments:
>>   - improve result returning in ovsdb_idl_process_update2
>> - For consistency use the same way of returning results in
>>   ovsdb_idl_process_update and make error handling generic.
>> - Bump semantic log level to ERR to make them more visible and force IDL
>>   retry on every semantic error.
>> ---
>>  lib/ovsdb-idl.c | 169
> ++++++++++++++++++++++++++++++++++++--------------------
>>  1 file changed, 109 insertions(+), 60 deletions(-)
>>
>> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
>> index 0a18261..ef3b97b 100644
>> --- a/lib/ovsdb-idl.c
>> +++ b/lib/ovsdb-idl.c
>> @@ -321,14 +321,19 @@ static bool
> ovsdb_idl_handle_monitor_canceled(struct ovsdb_idl *,
>>  static void ovsdb_idl_db_parse_update(struct ovsdb_idl_db *,
>>                                        const struct json *table_updates,
>>                                        enum ovsdb_idl_monitor_method
> method);
>> -static bool ovsdb_idl_process_update(struct ovsdb_idl_table *,
>> -                                     const struct uuid *,
>> -                                     const struct json *old,
>> -                                     const struct json *new);
>> -static bool ovsdb_idl_process_update2(struct ovsdb_idl_table *,
>> -                                      const struct uuid *,
>> -                                      const char *operation,
>> -                                      const struct json *row);
>> +enum update_result {
>> +    OVSDB_IDL_UPDATE_DB_CHANGED,
>> +    OVSDB_IDL_UPDATE_NO_CHANGES,
>> +    OVSDB_IDL_UPDATE_INCONSISTENT,
>> +};
>> +static enum update_result ovsdb_idl_process_update(struct
> ovsdb_idl_table *,
>> +                                                   const struct uuid *,
>> +                                                   const struct json
> *old,
>> +                                                   const struct json
> *new);
>> +static enum update_result ovsdb_idl_process_update2(struct
> ovsdb_idl_table *,
>> +                                                    const struct uuid *,
>> +                                                    const char
> *operation,
>> +                                                    const struct json
> *row);
>>  static void ovsdb_idl_insert_row(struct ovsdb_idl_row *, const struct
> json *);
>>  static void ovsdb_idl_delete_row(struct ovsdb_idl_row *);
>>  static bool ovsdb_idl_modify_row(struct ovsdb_idl_row *, const struct
> json *);
>> @@ -2418,6 +2423,7 @@ ovsdb_idl_db_parse_update__(struct ovsdb_idl_db *db,
>>                                        version_suffix,
> table->class_->name);
>>          }
>>          SHASH_FOR_EACH (table_node, json_object(table_update)) {
>> +            enum update_result result = OVSDB_IDL_UPDATE_NO_CHANGES;
>>              const struct json *row_update = table_node->data;
>>              struct uuid uuid;
>>
>> @@ -2450,13 +2456,13 @@ ovsdb_idl_db_parse_update__(struct
> ovsdb_idl_db *db,
>>                      operation = ops[i];
>>                      row = shash_find_data(json_object(row_update),
> operation);
>>
>> -                    if (row)  {
>> -                        if (ovsdb_idl_process_update2(table, &uuid,
> operation,
>> -                                                      row)) {
>> -                            db->change_seqno++;
>> -                        }
>> -                        break;
>> +                    if (!row) {
>> +                        continue;
>>                      }
>> +
>> +                    result = ovsdb_idl_process_update2(table, &uuid,
>> +                                                       operation, row);
>> +                    break;
>>                  }
>>
>>                  /* row_update2 should contain one of the objects */
>> @@ -2487,10 +2493,24 @@ ovsdb_idl_db_parse_update__(struct
> ovsdb_idl_db *db,
>>                                                "and \"new\" members");
>>                  }
>>
>> -                if (ovsdb_idl_process_update(table, &uuid, old_json,
>> -                                             new_json)) {
>> -                    db->change_seqno++;
>> -                }
>> +                result = ovsdb_idl_process_update(table, &uuid, old_json,
>> +                                                  new_json);
>> +            }
>> +
>> +            switch (result) {
>> +            case OVSDB_IDL_UPDATE_DB_CHANGED:
>> +                db->change_seqno++;
>> +                break;
>> +            case OVSDB_IDL_UPDATE_NO_CHANGES:
>> +                break;
>> +            case OVSDB_IDL_UPDATE_INCONSISTENT:
>> +                memset(&db->last_id, 0, sizeof db->last_id);
>> +                ovsdb_idl_retry(db->idl);
>> +                return ovsdb_error(NULL,
>> +                                   "<row_update%s> received for
> inconsistent "
>> +                                   "IDL: reconnecting IDL and resync
> all "
>> +                                   "data",
>> +                                   version_suffix);
>>              }
>>          }
>>      }
>> @@ -2523,9 +2543,22 @@ ovsdb_idl_get_row(struct ovsdb_idl_table
> *table, const struct uuid *uuid)
>>      return NULL;
>>  }
>>
>> -/* Returns true if a column with mode OVSDB_IDL_MODE_RW changed, false
>> - * otherwise. */
>> -static bool
>> +/* Returns OVSDB_IDL_UPDATE_DB_CHANGED if a column with mode
>> + * OVSDB_IDL_MODE_RW changed.
>> + *
>> + * Some IDL inconsistencies can be detected when processing updates:
>> + * - trying to insert an already existing row
>> + * - trying to update a missing row
>> + * - trying to delete a non existent row
>> + *
>> + * In such cases OVSDB_IDL_UPDATE_INCONSISTENT is returned.
>> + * Even though the IDL client could recover, it's best to report the
>> + * inconsistent state because the state the server is in is unknown
> so the
>> + * safest thing to do is to retry (potentially connecting to a new
> server).
>> + *
>> + * Returns OVSDB_IDL_UPDATE_NO_CHANGES otherwise.
>> + */
>> +static enum update_result
>>  ovsdb_idl_process_update(struct ovsdb_idl_table *table,
>>                           const struct uuid *uuid, const struct json *old,
>>                           const struct json *new)
>> @@ -2539,10 +2572,10 @@ ovsdb_idl_process_update(struct
> ovsdb_idl_table *table,
>>              /* XXX perhaps we should check the 'old' values? */
>>              ovsdb_idl_delete_row(row);
>>          } else {
>> -            VLOG_WARN_RL(&semantic_rl, "cannot delete missing row
> "UUID_FMT" "
>> -                         "from table %s",
>> -                         UUID_ARGS(uuid), table->class_->name);
>> -            return false;
>> +            VLOG_ERR_RL(&semantic_rl, "cannot delete missing row
> "UUID_FMT" "
>> +                        "from table %s",
>> +                        UUID_ARGS(uuid), table->class_->name);
>> +            return OVSDB_IDL_UPDATE_INCONSISTENT;
>>          }
>>      } else if (!old) {
>>          /* Insert row. */
>> @@ -2551,35 +2584,50 @@ ovsdb_idl_process_update(struct
> ovsdb_idl_table *table,
>>          } else if (ovsdb_idl_row_is_orphan(row)) {
>>              ovsdb_idl_insert_row(row, new);
>>          } else {
>> -            VLOG_WARN_RL(&semantic_rl, "cannot add existing row
> "UUID_FMT" to "
>> -                         "table %s", UUID_ARGS(uuid),
> table->class_->name);
>> -            return ovsdb_idl_modify_row(row, new);
>> +            VLOG_ERR_RL(&semantic_rl, "cannot add existing row
> "UUID_FMT" to "
>> +                        "table %s", UUID_ARGS(uuid),
> table->class_->name);
>> +            return OVSDB_IDL_UPDATE_INCONSISTENT;
>>          }
>>      } else {
>>          /* Modify row. */
>>          if (row) {
>>              /* XXX perhaps we should check the 'old' values? */
>>              if (!ovsdb_idl_row_is_orphan(row)) {
>> -                return ovsdb_idl_modify_row(row, new);
>> +                return ovsdb_idl_modify_row(row, new)
>> +                       ? OVSDB_IDL_UPDATE_DB_CHANGED
>> +                       : OVSDB_IDL_UPDATE_NO_CHANGES;
>>              } else {
>> -                VLOG_WARN_RL(&semantic_rl, "cannot modify missing but "
>> -                             "referenced row "UUID_FMT" in table %s",
>> -                             UUID_ARGS(uuid), table->class_->name);
>> -                ovsdb_idl_insert_row(row, new);
>> +                VLOG_ERR_RL(&semantic_rl, "cannot modify missing but "
>> +                            "referenced row "UUID_FMT" in table %s",
>> +                            UUID_ARGS(uuid), table->class_->name);
>> +                return OVSDB_IDL_UPDATE_INCONSISTENT;
>>              }
>>          } else {
>> -            VLOG_WARN_RL(&semantic_rl, "cannot modify missing row
> "UUID_FMT" "
>> -                         "in table %s", UUID_ARGS(uuid),
> table->class_->name);
>> -            ovsdb_idl_insert_row(ovsdb_idl_row_create(table, uuid), new);
>> +            VLOG_ERR_RL(&semantic_rl, "cannot modify missing row
> "UUID_FMT" "
>> +                        "in table %s", UUID_ARGS(uuid),
> table->class_->name);
>> +            return OVSDB_IDL_UPDATE_INCONSISTENT;
>>          }
>>      }
>>
>> -    return true;
>> +    return OVSDB_IDL_UPDATE_DB_CHANGED;
> 
> This return should never be reached.
> 

I'm not sure I understand your point here. There are valid code paths
that can lead to this return. Do you mean to move it to all places that
might lead up to here?

>>  }
>>
>> -/* Returns true if a column with mode OVSDB_IDL_MODE_RW changed, false
>> - * otherwise. */
>> -static bool
>> +/* Returns OVSDB_IDL_UPDATE_DB_CHANGED if a column with mode
>> + * OVSDB_IDL_MODE_RW changed.
>> + *
>> + * Some IDL inconsistencies can be detected when processing updates:
>> + * - trying to insert an already existing row
>> + * - trying to update a missing row
>> + * - trying to delete a non existent row
>> + *
>> + * In such cases OVSDB_IDL_UPDATE_INCONSISTENT is returned.
>> + * Even though the IDL client could recover, it's best to report the
>> + * inconsistent state because the state the server is in is unknown
> so the
>> + * safest thing to do is to retry (potentially connecting to a new
> server).
>> + *
>> + * Otherwise OVSDB_IDL_UPDATE_NO_CHANGES is returned.
>> + */
>> +static enum update_result
>>  ovsdb_idl_process_update2(struct ovsdb_idl_table *table,
>>                            const struct uuid *uuid,
>>                            const char *operation,
>> @@ -2593,10 +2641,10 @@ ovsdb_idl_process_update2(struct
> ovsdb_idl_table *table,
>>          if (row && !ovsdb_idl_row_is_orphan(row)) {
>>              ovsdb_idl_delete_row(row);
>>          } else {
>> -            VLOG_WARN_RL(&semantic_rl, "cannot delete missing row
> "UUID_FMT" "
>> -                         "from table %s",
>> -                         UUID_ARGS(uuid), table->class_->name);
>> -            return false;
>> +            VLOG_ERR_RL(&semantic_rl, "cannot delete missing row
> "UUID_FMT" "
>> +                        "from table %s",
>> +                        UUID_ARGS(uuid), table->class_->name);
>> +            return OVSDB_IDL_UPDATE_INCONSISTENT;
>>          }
>>      } else if (!strcmp(operation, "insert") || !strcmp(operation,
> "initial")) {
>>          /* Insert row. */
>> @@ -2605,34 +2653,35 @@ ovsdb_idl_process_update2(struct
> ovsdb_idl_table *table,
>>          } else if (ovsdb_idl_row_is_orphan(row)) {
>>              ovsdb_idl_insert_row(row, json_row);
>>          } else {
>> -            VLOG_WARN_RL(&semantic_rl, "cannot add existing row
> "UUID_FMT" to "
>> -                         "table %s", UUID_ARGS(uuid),
> table->class_->name);
>> -            ovsdb_idl_delete_row(row);
>> -            ovsdb_idl_insert_row(row, json_row);
>> +            VLOG_ERR_RL(&semantic_rl, "cannot add existing row
> "UUID_FMT" to "
>> +                        "table %s", UUID_ARGS(uuid),
> table->class_->name);
>> +            return OVSDB_IDL_UPDATE_INCONSISTENT;
>>          }
>>      } else if (!strcmp(operation, "modify")) {
>>          /* Modify row. */
>>          if (row) {
>>              if (!ovsdb_idl_row_is_orphan(row)) {
>> -                return ovsdb_idl_modify_row_by_diff(row, json_row);
>> +                return ovsdb_idl_modify_row_by_diff(row, json_row)
>> +                       ? OVSDB_IDL_UPDATE_DB_CHANGED
>> +                       : OVSDB_IDL_UPDATE_NO_CHANGES;
>>              } else {
>> -                VLOG_WARN_RL(&semantic_rl, "cannot modify missing but "
>> -                             "referenced row "UUID_FMT" in table %s",
>> -                             UUID_ARGS(uuid), table->class_->name);
>> -                return false;
>> +                VLOG_ERR_RL(&semantic_rl, "cannot modify missing but "
>> +                            "referenced row "UUID_FMT" in table %s",
>> +                            UUID_ARGS(uuid), table->class_->name);
>> +                return OVSDB_IDL_UPDATE_INCONSISTENT;
>>              }
>>          } else {
>> -            VLOG_WARN_RL(&semantic_rl, "cannot modify missing row
> "UUID_FMT" "
>> -                         "in table %s", UUID_ARGS(uuid),
> table->class_->name);
>> -            return false;
>> +            VLOG_ERR_RL(&semantic_rl, "cannot modify missing row
> "UUID_FMT" "
>> +                        "in table %s", UUID_ARGS(uuid),
> table->class_->name);
>> +            return OVSDB_IDL_UPDATE_INCONSISTENT;
>>          }
>>      } else {
>> -        VLOG_WARN_RL(&semantic_rl, "unknown operation %s to "
>> -                     "table %s", operation, table->class_->name);
>> -        return false;
>> +        VLOG_ERR_RL(&semantic_rl, "unknown operation %s to "
>> +                    "table %s", operation, table->class_->name);
>> +        return OVSDB_IDL_UPDATE_NO_CHANGES;
>>      }
>>
>> -    return true;
>> +    return OVSDB_IDL_UPDATE_DB_CHANGED;
> 
> This return should never be reached.
> 

Same as for ovsdb_idl_process_update(), there are valid paths that lead
here.

Thanks,
Dumitru

>>  }
>>
>>  /* Recursively add rows to tracked change lists for current row
>> --
>> 1.8.3.1
>>

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to