On 5/10/21 12:12 PM, Dumitru Ceara wrote:
> On 4/13/21 12:00 AM, Ilya Maximets wrote:
>> This library provides interfaces to open replay files and
>> read/write records. Will be used later for stream record/replay
>> functionality, i.e. to record all the incoming connections and
>> data and replay it later for debugging and performance analysis
>> purposes.
>>
>> Signed-off-by: Ilya Maximets <[email protected]>
>> ---
>
> Hi Ilya,
>
> I only have two nits below and a question regarding partial record reads
> (although I'm not sure that's a requirement).
>
> The rest of the code looks good to me:
>
> Acked-by: Dumitru Ceara <[email protected]>
>
> Thanks,
> Dumitru
>
>> lib/automake.mk | 5 +
>> lib/ovs-replay-syn.man | 3 +
>> lib/ovs-replay.c | 237 +++++++++++++++++++++++++++++++++++++++++
>> lib/ovs-replay.h | 163 ++++++++++++++++++++++++++++
>> lib/ovs-replay.man | 16 +++
>> lib/ovs-replay.xml | 35 ++++++
>> 6 files changed, 459 insertions(+)
>> create mode 100644 lib/ovs-replay-syn.man
>> create mode 100644 lib/ovs-replay.c
>> create mode 100644 lib/ovs-replay.h
>> create mode 100644 lib/ovs-replay.man
>> create mode 100644 lib/ovs-replay.xml
>>
>> diff --git a/lib/automake.mk b/lib/automake.mk
>> index 39901bd6d..b558692c6 100644
>> --- a/lib/automake.mk
>> +++ b/lib/automake.mk
>> @@ -236,6 +236,8 @@ lib_libopenvswitch_la_SOURCES = \
>> lib/ovs-numa.h \
>> lib/ovs-rcu.c \
>> lib/ovs-rcu.h \
>> + lib/ovs-replay.c \
>> + lib/ovs-replay.h \
>> lib/ovs-router.h \
>> lib/ovs-router.c \
>> lib/ovs-thread.c \
>> @@ -532,6 +534,7 @@ EXTRA_DIST += \
>> lib/daemon.xml \
>> lib/dirs.c.in \
>> lib/db-ctl-base.xml \
>> + lib/ovs-replay.xml \
>> lib/ssl.xml \
>> lib/ssl-bootstrap.xml \
>> lib/ssl-peer-ca-cert.xml \
>> @@ -554,6 +557,8 @@ MAN_FRAGMENTS += \
>> lib/dpif-netdev-unixctl.man \
>> lib/ofp-version.man \
>> lib/ovs.tmac \
>> + lib/ovs-replay.man \
>> + lib/ovs-replay-syn.man \
>> lib/service.man \
>> lib/service-syn.man \
>> lib/ssl-bootstrap.man \
>> diff --git a/lib/ovs-replay-syn.man b/lib/ovs-replay-syn.man
>> new file mode 100644
>> index 000000000..f0c78656b
>> --- /dev/null
>> +++ b/lib/ovs-replay-syn.man
>> @@ -0,0 +1,3 @@
>> +.IP "Replay options:"
>> +[\fB\-\-record-replay\fR[\fB=\fIdirectory\fR]]
>> +[\fB\-\-replay\fR[\fB=\fIdirectory\fR]]
>> diff --git a/lib/ovs-replay.c b/lib/ovs-replay.c
>> new file mode 100644
>> index 000000000..fcdb4edbb
>> --- /dev/null
>> +++ b/lib/ovs-replay.c
>> @@ -0,0 +1,237 @@
>> +/*
>> + * Copyright (c) 2021, Red Hat, Inc.
>> + *
>> + * Licensed under the Apache License, Version 2.0 (the "License");
>> + * you may not use this file except in compliance with the License.
>> + * You may obtain a copy of the License at:
>> + *
>> + * http://www.apache.org/licenses/LICENSE-2.0
>> + *
>> + * Unless required by applicable law or agreed to in writing, software
>> + * distributed under the License is distributed on an "AS IS" BASIS,
>> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>> + * See the License for the specific language governing permissions and
>> + * limitations under the License.
>> + */
>> +
>> +#include <config.h>
>> +#include <ctype.h>
>> +#include <errno.h>
>> +#include <stdlib.h>
>> +#include <string.h>
>> +#include <sys/socket.h>
>> +#include <sys/types.h>
>> +#include <unistd.h>
>> +#include "dirs.h"
>> +#include "ovs-atomic.h"
>> +#include "ovs-replay.h"
>> +#include "util.h"
>> +#include "openvswitch/vlog.h"
>> +
>> +VLOG_DEFINE_THIS_MODULE(ovs_replay);
>> +
>> +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 25);
>> +
>> +static struct ovs_mutex replay_mutex = OVS_MUTEX_INITIALIZER;
>> +static int replay_seqno OVS_GUARDED_BY(replay_mutex) = 0;
>> +static atomic_int replay_state = ATOMIC_VAR_INIT(OVS_REPLAY_NONE);
>> +
>> +static char *dirname = NULL;
>> +
>> +void
>> +ovs_replay_set_state(enum ovs_replay_state state)
>> +{
>> + atomic_store_relaxed(&replay_state, state);
>> +}
>> +
>> +enum ovs_replay_state
>> +ovs_replay_get_state(void)
>> +{
>> + int state;
>> +
>> + atomic_read_relaxed(&replay_state, &state);
>> + return state;
>> +}
>> +
>> +void
>> +ovs_replay_set_dirname(const char *new_dirname)
>> +{
>> + if (new_dirname) {
>> + free(dirname);
>> + dirname = xstrdup(new_dirname);
>> + }
>> +}
>> +
>> +
>
> Nit: I'd remove one of the newlines here.
Fixed.
>
>> +void
>> +ovs_replay_lock(void)
>> + OVS_ACQUIRES(replay_mutex)
>> +{
>> + ovs_mutex_lock(&replay_mutex);
>> +}
>> +
>> +void
>> +ovs_replay_unlock(void)
>> + OVS_RELEASES(replay_mutex)
>> +{
>> + ovs_mutex_unlock(&replay_mutex);
>> +}
>> +
>> +int
>> +ovs_replay_seqno(void)
>> + OVS_REQUIRES(replay_mutex)
>> +{
>> + return replay_seqno;
>> +}
>> +
>> +static char *
>> +ovs_replay_file_name(const char *name, int seqno)
>> +{
>> + char *local_name = xstrdup(name);
>> + char *filename, *p, *c;
>> + bool skip = false;
>> +
>> + /* Replace all the numbers and special symbols with single underscore.
>> + * Numbers might be PIDs or port numbers that could change between
>> record
>> + * and replay phases, special symbols might be not good as a filename.
>> + * We have a unique seuqence number as part of the name, so we don't
>> care
>> + * keeping too much information. */
>> + for (c = p = local_name; *p; p++) {
>> + if (!isalpha((unsigned char) *p)) {
>> + if (!skip) {
>> + *c++ = '_';
>> + skip = true;
>> + }
>> + } else {
>> + *c++ = *p;
>> + skip = false;
>> + }
>> + }
>> + if (skip) {
>> + c--;
>> + }
>> + *c = '\0';
>> + filename = xasprintf("%s/replay_%s_%d", dirname ? dirname : "",
>> + local_name, seqno);
>> + VLOG_DBG("Constructing replay filename: '%s' --> '%s' --> '%s'.",
>> + name, local_name, filename);
>> + free(local_name);
>> +
>> + return filename;
>> +}
>> +
>> +int
>> +ovs_replay_file_open(const char *name, replay_file_t *f, int *seqno)
>> + OVS_REQUIRES(replay_mutex)
>> +{
>> + char *file_path, *filename;
>> + int state = ovs_replay_get_state();
>> +
>> + ovs_assert(state != OVS_REPLAY_NONE);
>> +
>> + filename = ovs_replay_file_name(name, replay_seqno);
>> + if (filename[0] != '/') {
>> + file_path = abs_file_name(ovs_rundir(), filename);
>> + free(filename);
>> + } else {
>> + file_path = filename;
>> + }
>> +
>> + *f = fopen(file_path, state == OVS_REPLAY_WRITE ? "wb" : "rb");
>> + if (!*f) {
>> + VLOG_ERR("%s: fopen failed: %s", file_path, ovs_strerror(errno));
>> + free(file_path);
>> + return errno;
>> + }
>> + free(file_path);
>> +
>> + if (state == OVS_REPLAY_READ
>> + && fread(seqno, sizeof *seqno, 1, *f) != 1) {
>> + VLOG_INFO("%s: failed to read seqno: replay might be empty.", name);
>> + *seqno = INT_MAX;
>> + }
>> + replay_seqno++; /* New file opened. */
>> + return 0;
>> +}
>> +
>> +void
>> +ovs_replay_file_close(replay_file_t f)
>> +{
>> + fclose(f);
>> +}
>> +
>> +int
>> +ovs_replay_write(replay_file_t f, const void *buffer, int n, bool is_read)
>> + OVS_EXCLUDED(replay_mutex)
>> +{
>> + int state = ovs_replay_get_state();
>> + int seqno_to_write;
>> + int retval = 0;
>> +
>> + if (OVS_LIKELY(state != OVS_REPLAY_WRITE)) {
>> + return 0;
>> + }
>> +
>> + ovs_replay_lock();
>> +
>> + seqno_to_write = is_read ? replay_seqno : -replay_seqno;
>> + if (fwrite(&seqno_to_write, sizeof seqno_to_write, 1, f) != 1) {
>> + VLOG_ERR_RL(&rl, "Failed to write seqno.");
>> + retval = -1;
>> + goto out;
>> + }
>> + if (fwrite(&n, sizeof n, 1, f) != 1) {
>> + VLOG_ERR_RL(&rl, "Failed to write length.");
>> + retval = -1;
>> + goto out;
>> + }
>> + if (n > 0 && is_read && fwrite(buffer, 1, n, f) != n) {
>> + VLOG_ERR_RL(&rl, "Failed to write data.");
>> + retval = -1;
>> + }
>> +out:
>> + replay_seqno++; /* Write completed. */
>> + ovs_replay_unlock();
>> + fflush(f);
>> + return retval;
>> +}
>> +
>> +int
>> +ovs_replay_read(replay_file_t f, void *buffer, int buffer_size,
>> + int *len, int *seqno, bool is_read)
>> + OVS_REQUIRES(replay_mutex)
>> +{
>> + int retval = EINVAL;
>> +
>> + if (fread(len, sizeof *len, 1, f) != 1
>> + || (is_read && *len > buffer_size)) {
>> + VLOG_ERR("Failed to read replay length.");
>
> Would it make sense to distinguish between the two cases? That is,
> should we allow partial reads into buffers that can't store the whole
> record?
If the buffer size is not sufficient, it means that replay went
wrong. :) And almost any error during replay is lkely fatal.
So, I don't think that we need to allow partial reads here.
For the splitting the 'if', I did that to separate error messages,
so it would be a bit easier to debug replay functionality itself.
>
>> + goto out;
>> + }
>> +
>> + if (*len > 0 && is_read && fread(buffer, 1, *len, f) != *len) {
>> + VLOG_ERR("Failed to read replay buffer.");
>> + goto out;
>> + }
>> +
>> + if (fread(seqno, sizeof *seqno, 1, f) != 1) {
>> + *seqno = INT_MAX; /* Most likely EOF. */
>> + if (ferror(f)) {
>> + VLOG_INFO("Failed to read replay seqno.");
>> + goto out;
>> + }
>> + }
>> +
>> + retval = 0;
>> +out:
>> + replay_seqno++; /* Read completed. */
>> + return retval;
>> +}
>> +
>> +void
>> +ovs_replay_usage(void)
>> +{
>> + printf("\nReplay options:\n"
>> + " --replay-record[=DIR] turn on writing replay files\n"
>
> nit: I wonder if it would be more clear if this was renamed to
> "--record". If so, we'd have to also update the man pages.
Yeah, it makes sense. I changed this option to just '--record'.
I also messed up few parts of documentation by using 'record-replay'
instead of 'replay-record', so I fixed this too.
>
>> + " --replay[=DIR] run from replay files\n");
>> +}
>> diff --git a/lib/ovs-replay.h b/lib/ovs-replay.h
>> new file mode 100644
>> index 000000000..03cddeb13
>> --- /dev/null
>> +++ b/lib/ovs-replay.h
>> @@ -0,0 +1,163 @@
>> +/*
>> + * Copyright (c) 2021, Red Hat, Inc.
>> + *
>> + * Licensed under the Apache License, Version 2.0 (the "License");
>> + * you may not use this file except in compliance with the License.
>> + * You may obtain a copy of the License at:
>> + *
>> + * http://www.apache.org/licenses/LICENSE-2.0
>> + *
>> + * Unless required by applicable law or agreed to in writing, software
>> + * distributed under the License is distributed on an "AS IS" BASIS,
>> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>> + * See the License for the specific language governing permissions and
>> + * limitations under the License.
>> + */
>> +
>> +#ifndef OVS_REPLAY_H
>> +#define OVS_REPLAY_H 1
>> +
>> +/*
>> + * Library to work with 'replay' files.
>> + *
>> + * ovs_replay_file_open() should be used to open a new replay file.
>> + * 'replay' file contains records. If current state is OVS_REPLAY_WRITE,
>> + * files are opened in a write mode and new records could be written by
>> + * ovs_replay_write(). If current mode is OVS_REPLAY_READ, files are
>> + * opened in a read mode and records could be read with ovs_replay_read().
>> + *
>> + * Each record has several fields:
>> + * <seqno> <len> [<data>]
>> + *
>> + * Here <seqno> is a global sequence number of the record, it is unique
>> + * across all the replay files. By comparing normalized version of this
>> + * number (ovs_replay_normalized_seqno()) with the current global sequence
>> + * number (ovs_replay_seqno()) users may detect if this record should be
>> + * replayed now.
>> + *
>> + * Non-normalized versions of seqno are used to distinguish 'read' and
>> 'write'
>> + * records. 'read' records are records that corresponds to incoming events.
>> + * Only 'read' records contains <data>. 'write' records contains outgoing
>> + * events, i.e. stream_write() and contains only the size of outgoing
>> message.
>> + *
>> + * For 'read' records, <len> is a size of a <data> stored in this record in
>> + * bytes. For 'write' records, it is a size of outgoing message, but there
>> + * is no <data>. If it contains negative value, it means that this record
>> + * holds some recorded error and no data available.
>> + */
>> +
>> +#include <stdbool.h>
>> +#include <stdio.h>
>> +
>> +typedef FILE *replay_file_t;
>> +
>> +/* Replay state. */
>> +enum ovs_replay_state {
>> + OVS_REPLAY_NONE,
>> + OVS_REPLAY_WRITE,
>> + OVS_REPLAY_READ,
>> +};
>> +
>> +void ovs_replay_set_state(enum ovs_replay_state);
>> +enum ovs_replay_state ovs_replay_get_state(void);
>> +
>> +static inline bool
>> +ovs_replay_is_active(void)
>> +{
>> + return ovs_replay_get_state() != OVS_REPLAY_NONE;
>> +}
>> +
>> +/* Returns 'true' if provided sequence number belongs to 'read' record. */
>> +static inline bool
>> +ovs_replay_seqno_is_read(int seqno)
>> +{
>> + return seqno >= 0;
>> +}
>> +
>> +/* Normalizes sequence number, so it can be used to compare with result of
>> + * ovs_replay_seqno(). */
>> +static inline int
>> +ovs_replay_normalized_seqno(int seqno)
>> +{
>> + return seqno >= 0 ? seqno : -seqno;
>> +}
>> +
>> +/* Locks the replay module.
>> + * Locking required to use ovs_replay_file_open() and ovs_replay_read(). */
>> +void ovs_replay_lock(void);
>> +
>> +/* Unlocks the replay module. */
>> +void ovs_replay_unlock(void);
>> +
>> +/* Returns current global replay sequence number. */
>> +int ovs_replay_seqno(void);
>> +
>> +/* In write mode creates a new replay file to write stream replay.
>> + * In read mode opens an existing replay file.
>> + *
>> + * Requires replay being locked with ovs_replay_lock().
>> + *
>> + * On success returns 0, 'f' points to the opened file. If current mode is
>> + * OVS_REPLAY_READ, sets 'seqno' to the sequence number of the first record
>> in
>> + * the file.
>> + *
>> + * On failure returns positive errno. */
>> +int ovs_replay_file_open(const char *name, replay_file_t *f, int *seqno);
>> +
>> +/* Closes replay file. */
>> +void ovs_replay_file_close(replay_file_t f);
>> +
>> +/* Writes a new record of 'n' bytes from 'buffer' to a replay file.
>> + * 'is_read' should be true if the record belongs to 'read' operation
>> + * Depending on 'is_read', creates 'read' or 'write' record. 'write'
>> records
>> + * contains only the size of a bufer ('n').
>> + * If 'n' is negative, writes 'n' as an error status.
>> + *
>> + * On success returns 0. Otherwise, positive errno. */
>> +int ovs_replay_write(replay_file_t f, const void *buffer, int n, bool
>> is_read);
>> +
>> +/* Reads one record from a replay file to 'buffer'. 'buffer_size' should be
>> + * equal to the size of a memory available.
>> + *
>> + * On success, actual size of the read record will be set to 'len', 'seqno'
>> + * will be set to the sequence number of the next record in the file. If it
>> + * was the last record, sets 'seqno' to INT_MAX.
>> + * Negative 'len' means that record contained an error status.
>> + *
>> + * Depending on 'is_read', tries to read 'read' or 'write' record. For the
>> + * 'write' record, only 'len' and 'seqno' updated, no data read to 'buffer'.
>> + *
>> + * On success returns 0. Otherwise, positive errno. */
>> +int ovs_replay_read(replay_file_t f, void *buffer, int buffer_size,
>> + int *len, int *seqno, bool is_read);
>> +
>> +/* Helpers for cmdline options. */
>> +#define OVS_REPLAY_OPTION_ENUMS \
>> + OPT_OVS_REPLAY_REC, \
>> + OPT_OVS_REPLAY
>> +
>> +#define OVS_REPLAY_LONG_OPTIONS \
>> + {"replay-record", optional_argument, NULL, OPT_OVS_REPLAY_REC}, \
>> + {"replay", optional_argument, NULL, OPT_OVS_REPLAY}
>> +
>> +#define OVS_REPLAY_OPTION_HANDLERS \
>> + case OPT_OVS_REPLAY_REC: \
>> + ovs_replay_set_state(OVS_REPLAY_WRITE); \
>> + ovs_replay_set_dirname(optarg); \
>> + break; \
>> + \
>> + case OPT_OVS_REPLAY: \
>> + ovs_replay_set_state(OVS_REPLAY_READ); \
>> + ovs_replay_set_dirname(optarg); \
>> + break;
>> +
>> +#define OVS_REPLAY_CASES \
>> + case OPT_OVS_REPLAY_REC: case OPT_OVS_REPLAY:
>> +
>> +/* Prints usage information. */
>> +void ovs_replay_usage(void);
>> +
>> +/* Sets path to the directory where replay files should be stored. */
>> +void ovs_replay_set_dirname(const char *new_dirname);
>> +
>> +#endif /* OVS_REPLAY_H */
>> diff --git a/lib/ovs-replay.man b/lib/ovs-replay.man
>> new file mode 100644
>> index 000000000..f81b9fbed
>> --- /dev/null
>> +++ b/lib/ovs-replay.man
>> @@ -0,0 +1,16 @@
>> +.IP "\fB\-\-record-replay[=\fIdirectory\fR]"
>> +Sets the process in "recording" mode, in which it will record all the
>> +connections, data from streams (Unix domain and network sockets) and some
>> other
>> +important necessary bits, so they could be replayed later.
>> +Recorded data is stored in replay files in specified \fIdirectory\fR.
>> +If \fIdirectory\fR does not begin with \fB/\fR, it is interpreted as
>> relative
>> +to \fB@RUNDIR@\fR. If \fIdirectory\fR is not specified, \fB@RUNDIR@\fR will
>> +be used.
>> +.
>> +.IP "\fB\-\-replay[=\fIdirectory\fR]"
>> +Sets the process in "replay" mode, in which it will read information about
>> +connections, data from streams (Unix domain and network sockets) and some
>> +other necessary bits directly from replay files instead of using real
>> sockets.
>> +Replay files from the \fIdirectory\fR will be used. If \fIdirectory\fR does
>> +not begin with \fB/\fR, it is interpreted as relative to \fB@RUNDIR@\fR.
>> +If \fIdirectory\fR is not specified, \fB@RUNDIR@\fR will be used.
>> diff --git a/lib/ovs-replay.xml b/lib/ovs-replay.xml
>> new file mode 100644
>> index 000000000..7392bd540
>> --- /dev/null
>> +++ b/lib/ovs-replay.xml
>> @@ -0,0 +1,35 @@
>> +<?xml version="1.0" encoding="utf-8"?>
>> +<dl>
>> + <dt><code>--record-replay</code>[<code>=</code><var>directory</var>]</dt>
>> + <dd>
>> + <p>
>> + Sets the process in "recording" mode, in which it will record all the
>> + connections, data from streams (Unix domain and network sockets) and
>> some
>> + other necessary bits, so they could be replayed later.
>> + </p>
>> + <p>
>> + Recorded data is stored in replay files in specified
>> + <var>directory</var>. If <var>directory</var> does not begin with
>> + <code>/<code>, it is interpreted as relative to <code>@RUNDIR@</code>.
>> + If <var>directory</var> is not specified, <code>@RUNDIR@</code> will
>> + be used.
>> + </p>
>> + </dd>
>> +
>> + <dt><code>--replay</code>[<code>=</code><var>directory</var>]</dt>
>> + <dd>
>> + <p>
>> + Sets the process in "replay" mode, in which it will read information
>> + about connections, data from streams (Unix domain and network sockets)
>> + and some other necessary bits directly from replay files instead of
>> using
>> + real sockets.
>> + </p>
>> + <p>
>> + Replay files from the <var>directory</var> will be used.
>> + If <var>directory</var> does not begin with <code>/<code>, it is
>> + interpreted as relative to <code>@RUNDIR@</code>. If
>> + <var>directory</var> is not specified, <code>@RUNDIR@</code> will be
>> + used.
>> + </p>
>> + </dd>
>> +</dl>
>>
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev