On 4/13/21 12:00 AM, Ilya Maximets wrote: > This library provides interfaces to open replay files and > read/write records. Will be used later for stream record/replay > functionality, i.e. to record all the incoming connections and > data and replay it later for debugging and performance analysis > purposes. > > Signed-off-by: Ilya Maximets <[email protected]> > ---
Hi Ilya, I only have two nits below and a question regarding partial record reads (although I'm not sure that's a requirement). The rest of the code looks good to me: Acked-by: Dumitru Ceara <[email protected]> Thanks, Dumitru > lib/automake.mk | 5 + > lib/ovs-replay-syn.man | 3 + > lib/ovs-replay.c | 237 +++++++++++++++++++++++++++++++++++++++++ > lib/ovs-replay.h | 163 ++++++++++++++++++++++++++++ > lib/ovs-replay.man | 16 +++ > lib/ovs-replay.xml | 35 ++++++ > 6 files changed, 459 insertions(+) > create mode 100644 lib/ovs-replay-syn.man > create mode 100644 lib/ovs-replay.c > create mode 100644 lib/ovs-replay.h > create mode 100644 lib/ovs-replay.man > create mode 100644 lib/ovs-replay.xml > > diff --git a/lib/automake.mk b/lib/automake.mk > index 39901bd6d..b558692c6 100644 > --- a/lib/automake.mk > +++ b/lib/automake.mk > @@ -236,6 +236,8 @@ lib_libopenvswitch_la_SOURCES = \ > lib/ovs-numa.h \ > lib/ovs-rcu.c \ > lib/ovs-rcu.h \ > + lib/ovs-replay.c \ > + lib/ovs-replay.h \ > lib/ovs-router.h \ > lib/ovs-router.c \ > lib/ovs-thread.c \ > @@ -532,6 +534,7 @@ EXTRA_DIST += \ > lib/daemon.xml \ > lib/dirs.c.in \ > lib/db-ctl-base.xml \ > + lib/ovs-replay.xml \ > lib/ssl.xml \ > lib/ssl-bootstrap.xml \ > lib/ssl-peer-ca-cert.xml \ > @@ -554,6 +557,8 @@ MAN_FRAGMENTS += \ > lib/dpif-netdev-unixctl.man \ > lib/ofp-version.man \ > lib/ovs.tmac \ > + lib/ovs-replay.man \ > + lib/ovs-replay-syn.man \ > lib/service.man \ > lib/service-syn.man \ > lib/ssl-bootstrap.man \ > diff --git a/lib/ovs-replay-syn.man b/lib/ovs-replay-syn.man > new file mode 100644 > index 000000000..f0c78656b > --- /dev/null > +++ b/lib/ovs-replay-syn.man > @@ -0,0 +1,3 @@ > +.IP "Replay options:" > +[\fB\-\-record-replay\fR[\fB=\fIdirectory\fR]] > +[\fB\-\-replay\fR[\fB=\fIdirectory\fR]] > diff --git a/lib/ovs-replay.c b/lib/ovs-replay.c > new file mode 100644 > index 000000000..fcdb4edbb > --- /dev/null > +++ b/lib/ovs-replay.c > @@ -0,0 +1,237 @@ > +/* > + * Copyright (c) 2021, Red Hat, Inc. > + * > + * Licensed under the Apache License, Version 2.0 (the "License"); > + * you may not use this file except in compliance with the License. > + * You may obtain a copy of the License at: > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +#include <config.h> > +#include <ctype.h> > +#include <errno.h> > +#include <stdlib.h> > +#include <string.h> > +#include <sys/socket.h> > +#include <sys/types.h> > +#include <unistd.h> > +#include "dirs.h" > +#include "ovs-atomic.h" > +#include "ovs-replay.h" > +#include "util.h" > +#include "openvswitch/vlog.h" > + > +VLOG_DEFINE_THIS_MODULE(ovs_replay); > + > +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 25); > + > +static struct ovs_mutex replay_mutex = OVS_MUTEX_INITIALIZER; > +static int replay_seqno OVS_GUARDED_BY(replay_mutex) = 0; > +static atomic_int replay_state = ATOMIC_VAR_INIT(OVS_REPLAY_NONE); > + > +static char *dirname = NULL; > + > +void > +ovs_replay_set_state(enum ovs_replay_state state) > +{ > + atomic_store_relaxed(&replay_state, state); > +} > + > +enum ovs_replay_state > +ovs_replay_get_state(void) > +{ > + int state; > + > + atomic_read_relaxed(&replay_state, &state); > + return state; > +} > + > +void > +ovs_replay_set_dirname(const char *new_dirname) > +{ > + if (new_dirname) { > + free(dirname); > + dirname = xstrdup(new_dirname); > + } > +} > + > + Nit: I'd remove one of the newlines here. > +void > +ovs_replay_lock(void) > + OVS_ACQUIRES(replay_mutex) > +{ > + ovs_mutex_lock(&replay_mutex); > +} > + > +void > +ovs_replay_unlock(void) > + OVS_RELEASES(replay_mutex) > +{ > + ovs_mutex_unlock(&replay_mutex); > +} > + > +int > +ovs_replay_seqno(void) > + OVS_REQUIRES(replay_mutex) > +{ > + return replay_seqno; > +} > + > +static char * > +ovs_replay_file_name(const char *name, int seqno) > +{ > + char *local_name = xstrdup(name); > + char *filename, *p, *c; > + bool skip = false; > + > + /* Replace all the numbers and special symbols with single underscore. > + * Numbers might be PIDs or port numbers that could change between record > + * and replay phases, special symbols might be not good as a filename. > + * We have a unique seuqence number as part of the name, so we don't care > + * keeping too much information. */ > + for (c = p = local_name; *p; p++) { > + if (!isalpha((unsigned char) *p)) { > + if (!skip) { > + *c++ = '_'; > + skip = true; > + } > + } else { > + *c++ = *p; > + skip = false; > + } > + } > + if (skip) { > + c--; > + } > + *c = '\0'; > + filename = xasprintf("%s/replay_%s_%d", dirname ? dirname : "", > + local_name, seqno); > + VLOG_DBG("Constructing replay filename: '%s' --> '%s' --> '%s'.", > + name, local_name, filename); > + free(local_name); > + > + return filename; > +} > + > +int > +ovs_replay_file_open(const char *name, replay_file_t *f, int *seqno) > + OVS_REQUIRES(replay_mutex) > +{ > + char *file_path, *filename; > + int state = ovs_replay_get_state(); > + > + ovs_assert(state != OVS_REPLAY_NONE); > + > + filename = ovs_replay_file_name(name, replay_seqno); > + if (filename[0] != '/') { > + file_path = abs_file_name(ovs_rundir(), filename); > + free(filename); > + } else { > + file_path = filename; > + } > + > + *f = fopen(file_path, state == OVS_REPLAY_WRITE ? "wb" : "rb"); > + if (!*f) { > + VLOG_ERR("%s: fopen failed: %s", file_path, ovs_strerror(errno)); > + free(file_path); > + return errno; > + } > + free(file_path); > + > + if (state == OVS_REPLAY_READ > + && fread(seqno, sizeof *seqno, 1, *f) != 1) { > + VLOG_INFO("%s: failed to read seqno: replay might be empty.", name); > + *seqno = INT_MAX; > + } > + replay_seqno++; /* New file opened. */ > + return 0; > +} > + > +void > +ovs_replay_file_close(replay_file_t f) > +{ > + fclose(f); > +} > + > +int > +ovs_replay_write(replay_file_t f, const void *buffer, int n, bool is_read) > + OVS_EXCLUDED(replay_mutex) > +{ > + int state = ovs_replay_get_state(); > + int seqno_to_write; > + int retval = 0; > + > + if (OVS_LIKELY(state != OVS_REPLAY_WRITE)) { > + return 0; > + } > + > + ovs_replay_lock(); > + > + seqno_to_write = is_read ? replay_seqno : -replay_seqno; > + if (fwrite(&seqno_to_write, sizeof seqno_to_write, 1, f) != 1) { > + VLOG_ERR_RL(&rl, "Failed to write seqno."); > + retval = -1; > + goto out; > + } > + if (fwrite(&n, sizeof n, 1, f) != 1) { > + VLOG_ERR_RL(&rl, "Failed to write length."); > + retval = -1; > + goto out; > + } > + if (n > 0 && is_read && fwrite(buffer, 1, n, f) != n) { > + VLOG_ERR_RL(&rl, "Failed to write data."); > + retval = -1; > + } > +out: > + replay_seqno++; /* Write completed. */ > + ovs_replay_unlock(); > + fflush(f); > + return retval; > +} > + > +int > +ovs_replay_read(replay_file_t f, void *buffer, int buffer_size, > + int *len, int *seqno, bool is_read) > + OVS_REQUIRES(replay_mutex) > +{ > + int retval = EINVAL; > + > + if (fread(len, sizeof *len, 1, f) != 1 > + || (is_read && *len > buffer_size)) { > + VLOG_ERR("Failed to read replay length."); Would it make sense to distinguish between the two cases? That is, should we allow partial reads into buffers that can't store the whole record? > + goto out; > + } > + > + if (*len > 0 && is_read && fread(buffer, 1, *len, f) != *len) { > + VLOG_ERR("Failed to read replay buffer."); > + goto out; > + } > + > + if (fread(seqno, sizeof *seqno, 1, f) != 1) { > + *seqno = INT_MAX; /* Most likely EOF. */ > + if (ferror(f)) { > + VLOG_INFO("Failed to read replay seqno."); > + goto out; > + } > + } > + > + retval = 0; > +out: > + replay_seqno++; /* Read completed. */ > + return retval; > +} > + > +void > +ovs_replay_usage(void) > +{ > + printf("\nReplay options:\n" > + " --replay-record[=DIR] turn on writing replay files\n" nit: I wonder if it would be more clear if this was renamed to "--record". If so, we'd have to also update the man pages. > + " --replay[=DIR] run from replay files\n"); > +} > diff --git a/lib/ovs-replay.h b/lib/ovs-replay.h > new file mode 100644 > index 000000000..03cddeb13 > --- /dev/null > +++ b/lib/ovs-replay.h > @@ -0,0 +1,163 @@ > +/* > + * Copyright (c) 2021, Red Hat, Inc. > + * > + * Licensed under the Apache License, Version 2.0 (the "License"); > + * you may not use this file except in compliance with the License. > + * You may obtain a copy of the License at: > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +#ifndef OVS_REPLAY_H > +#define OVS_REPLAY_H 1 > + > +/* > + * Library to work with 'replay' files. > + * > + * ovs_replay_file_open() should be used to open a new replay file. > + * 'replay' file contains records. If current state is OVS_REPLAY_WRITE, > + * files are opened in a write mode and new records could be written by > + * ovs_replay_write(). If current mode is OVS_REPLAY_READ, files are > + * opened in a read mode and records could be read with ovs_replay_read(). > + * > + * Each record has several fields: > + * <seqno> <len> [<data>] > + * > + * Here <seqno> is a global sequence number of the record, it is unique > + * across all the replay files. By comparing normalized version of this > + * number (ovs_replay_normalized_seqno()) with the current global sequence > + * number (ovs_replay_seqno()) users may detect if this record should be > + * replayed now. > + * > + * Non-normalized versions of seqno are used to distinguish 'read' and > 'write' > + * records. 'read' records are records that corresponds to incoming events. > + * Only 'read' records contains <data>. 'write' records contains outgoing > + * events, i.e. stream_write() and contains only the size of outgoing > message. > + * > + * For 'read' records, <len> is a size of a <data> stored in this record in > + * bytes. For 'write' records, it is a size of outgoing message, but there > + * is no <data>. If it contains negative value, it means that this record > + * holds some recorded error and no data available. > + */ > + > +#include <stdbool.h> > +#include <stdio.h> > + > +typedef FILE *replay_file_t; > + > +/* Replay state. */ > +enum ovs_replay_state { > + OVS_REPLAY_NONE, > + OVS_REPLAY_WRITE, > + OVS_REPLAY_READ, > +}; > + > +void ovs_replay_set_state(enum ovs_replay_state); > +enum ovs_replay_state ovs_replay_get_state(void); > + > +static inline bool > +ovs_replay_is_active(void) > +{ > + return ovs_replay_get_state() != OVS_REPLAY_NONE; > +} > + > +/* Returns 'true' if provided sequence number belongs to 'read' record. */ > +static inline bool > +ovs_replay_seqno_is_read(int seqno) > +{ > + return seqno >= 0; > +} > + > +/* Normalizes sequence number, so it can be used to compare with result of > + * ovs_replay_seqno(). */ > +static inline int > +ovs_replay_normalized_seqno(int seqno) > +{ > + return seqno >= 0 ? seqno : -seqno; > +} > + > +/* Locks the replay module. > + * Locking required to use ovs_replay_file_open() and ovs_replay_read(). */ > +void ovs_replay_lock(void); > + > +/* Unlocks the replay module. */ > +void ovs_replay_unlock(void); > + > +/* Returns current global replay sequence number. */ > +int ovs_replay_seqno(void); > + > +/* In write mode creates a new replay file to write stream replay. > + * In read mode opens an existing replay file. > + * > + * Requires replay being locked with ovs_replay_lock(). > + * > + * On success returns 0, 'f' points to the opened file. If current mode is > + * OVS_REPLAY_READ, sets 'seqno' to the sequence number of the first record > in > + * the file. > + * > + * On failure returns positive errno. */ > +int ovs_replay_file_open(const char *name, replay_file_t *f, int *seqno); > + > +/* Closes replay file. */ > +void ovs_replay_file_close(replay_file_t f); > + > +/* Writes a new record of 'n' bytes from 'buffer' to a replay file. > + * 'is_read' should be true if the record belongs to 'read' operation > + * Depending on 'is_read', creates 'read' or 'write' record. 'write' records > + * contains only the size of a bufer ('n'). > + * If 'n' is negative, writes 'n' as an error status. > + * > + * On success returns 0. Otherwise, positive errno. */ > +int ovs_replay_write(replay_file_t f, const void *buffer, int n, bool > is_read); > + > +/* Reads one record from a replay file to 'buffer'. 'buffer_size' should be > + * equal to the size of a memory available. > + * > + * On success, actual size of the read record will be set to 'len', 'seqno' > + * will be set to the sequence number of the next record in the file. If it > + * was the last record, sets 'seqno' to INT_MAX. > + * Negative 'len' means that record contained an error status. > + * > + * Depending on 'is_read', tries to read 'read' or 'write' record. For the > + * 'write' record, only 'len' and 'seqno' updated, no data read to 'buffer'. > + * > + * On success returns 0. Otherwise, positive errno. */ > +int ovs_replay_read(replay_file_t f, void *buffer, int buffer_size, > + int *len, int *seqno, bool is_read); > + > +/* Helpers for cmdline options. */ > +#define OVS_REPLAY_OPTION_ENUMS \ > + OPT_OVS_REPLAY_REC, \ > + OPT_OVS_REPLAY > + > +#define OVS_REPLAY_LONG_OPTIONS \ > + {"replay-record", optional_argument, NULL, OPT_OVS_REPLAY_REC}, \ > + {"replay", optional_argument, NULL, OPT_OVS_REPLAY} > + > +#define OVS_REPLAY_OPTION_HANDLERS \ > + case OPT_OVS_REPLAY_REC: \ > + ovs_replay_set_state(OVS_REPLAY_WRITE); \ > + ovs_replay_set_dirname(optarg); \ > + break; \ > + \ > + case OPT_OVS_REPLAY: \ > + ovs_replay_set_state(OVS_REPLAY_READ); \ > + ovs_replay_set_dirname(optarg); \ > + break; > + > +#define OVS_REPLAY_CASES \ > + case OPT_OVS_REPLAY_REC: case OPT_OVS_REPLAY: > + > +/* Prints usage information. */ > +void ovs_replay_usage(void); > + > +/* Sets path to the directory where replay files should be stored. */ > +void ovs_replay_set_dirname(const char *new_dirname); > + > +#endif /* OVS_REPLAY_H */ > diff --git a/lib/ovs-replay.man b/lib/ovs-replay.man > new file mode 100644 > index 000000000..f81b9fbed > --- /dev/null > +++ b/lib/ovs-replay.man > @@ -0,0 +1,16 @@ > +.IP "\fB\-\-record-replay[=\fIdirectory\fR]" > +Sets the process in "recording" mode, in which it will record all the > +connections, data from streams (Unix domain and network sockets) and some > other > +important necessary bits, so they could be replayed later. > +Recorded data is stored in replay files in specified \fIdirectory\fR. > +If \fIdirectory\fR does not begin with \fB/\fR, it is interpreted as relative > +to \fB@RUNDIR@\fR. If \fIdirectory\fR is not specified, \fB@RUNDIR@\fR will > +be used. > +. > +.IP "\fB\-\-replay[=\fIdirectory\fR]" > +Sets the process in "replay" mode, in which it will read information about > +connections, data from streams (Unix domain and network sockets) and some > +other necessary bits directly from replay files instead of using real > sockets. > +Replay files from the \fIdirectory\fR will be used. If \fIdirectory\fR does > +not begin with \fB/\fR, it is interpreted as relative to \fB@RUNDIR@\fR. > +If \fIdirectory\fR is not specified, \fB@RUNDIR@\fR will be used. > diff --git a/lib/ovs-replay.xml b/lib/ovs-replay.xml > new file mode 100644 > index 000000000..7392bd540 > --- /dev/null > +++ b/lib/ovs-replay.xml > @@ -0,0 +1,35 @@ > +<?xml version="1.0" encoding="utf-8"?> > +<dl> > + <dt><code>--record-replay</code>[<code>=</code><var>directory</var>]</dt> > + <dd> > + <p> > + Sets the process in "recording" mode, in which it will record all the > + connections, data from streams (Unix domain and network sockets) and > some > + other necessary bits, so they could be replayed later. > + </p> > + <p> > + Recorded data is stored in replay files in specified > + <var>directory</var>. If <var>directory</var> does not begin with > + <code>/<code>, it is interpreted as relative to <code>@RUNDIR@</code>. > + If <var>directory</var> is not specified, <code>@RUNDIR@</code> will > + be used. > + </p> > + </dd> > + > + <dt><code>--replay</code>[<code>=</code><var>directory</var>]</dt> > + <dd> > + <p> > + Sets the process in "replay" mode, in which it will read information > + about connections, data from streams (Unix domain and network sockets) > + and some other necessary bits directly from replay files instead of > using > + real sockets. > + </p> > + <p> > + Replay files from the <var>directory</var> will be used. > + If <var>directory</var> does not begin with <code>/<code>, it is > + interpreted as relative to <code>@RUNDIR@</code>. If > + <var>directory</var> is not specified, <code>@RUNDIR@</code> will be > + used. > + </p> > + </dd> > +</dl> > _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
