neels has uploaded this change for review. ( 
https://gerrit.osmocom.org/c/osmo-hnbgw/+/36540?usp=email )


Change subject: nft_kpi: retrieve counters in a separate thread
......................................................................

nft_kpi: retrieve counters in a separate thread

Introduce an NFT thread which does:
- periodically run nftables command to read all counters
- parse the response
- update rate_ctr values.

The main thread still runs the rule addition / removal when a HNB
registers or deregisters. See the comment added in nft_kpi.c, starting
with "A more scalable solution...".

This patch requires the new osmo_stats_report_lock(), see 'Depends'.

Related: SYS#6773
Depends: libosmocore Ib335bea7d2a440ca284e6c439066f96456bf2c2d
Change-Id: I9dc54e6bc94c553f45adfa71ae8ad70be4afbc8f
---
M src/osmo-hnbgw/hnbgw.c
M src/osmo-hnbgw/nft_kpi.c
M src/osmo-hnbgw/osmo_hnbgw_main.c
M src/osmo-hnbgw/tdefs.c
4 files changed, 171 insertions(+), 34 deletions(-)



  git pull ssh://gerrit.osmocom.org:29418/osmo-hnbgw refs/changes/40/36540/1

diff --git a/src/osmo-hnbgw/hnbgw.c b/src/osmo-hnbgw/hnbgw.c
index 6340d10..fa516ea 100644
--- a/src/osmo-hnbgw/hnbgw.c
+++ b/src/osmo-hnbgw/hnbgw.c
@@ -513,6 +513,12 @@
        if (!hnbp)
                return NULL;

+       /* nft_kpi.c updates hnb stats, and it locks osmo_stats_report_lock() 
while it does so. This here should run
+        * in the same thread as stats reporting, so there should be no 
conflict with stats. But to avoid a hnb in flux
+        * while nft_kpi.c looks up hnb, also obtain the 
osmo_stats_report_lock() while manipulating hnb records. */
+       osmo_stats_report_lock(true);
+       /* { */
+
        hnbp->id = *id;
        hnbp->id_str = talloc_strdup(hnbp, umts_cell_id_name(id));
        hnbp->ctrs = rate_ctr_group_alloc(hnbp, &hnb_ctrg_desc, 0);
@@ -525,14 +531,21 @@
        osmo_stat_item_group_set_name(hnbp->statg, hnbp->id_str);

        llist_add(&hnbp->list, &g_hnbgw->hnb_persistent_list);
+       /* success */
+       goto out_unlock;

-       return hnbp;
-
+       /* failure */
 out_free_ctrs:
        rate_ctr_group_free(hnbp->ctrs);
 out_free:
        talloc_free(hnbp);
-       return NULL;
+       hnbp = NULL;
+
+       /* for both success and failure: */
+out_unlock:
+       /* } */
+       osmo_stats_report_lock(false);
+       return hnbp;
 }

 struct hnb_persistent *hnb_persistent_find_by_id(const struct umts_cell_id *id)
diff --git a/src/osmo-hnbgw/nft_kpi.c b/src/osmo-hnbgw/nft_kpi.c
index f585415..039c891 100644
--- a/src/osmo-hnbgw/nft_kpi.c
+++ b/src/osmo-hnbgw/nft_kpi.c
@@ -15,7 +15,11 @@
  * GNU Affero General Public License for more details.
  */

+#include <pthread.h>
+#include <unistd.h>
+
 #include <osmocom/core/logging.h>
+#include <osmocom/core/stats.h>
 #include <osmocom/hnbgw/hnbgw.h>

 #include "config.h"
@@ -46,13 +50,68 @@
 #include <osmocom/hnbgw/nft_kpi.h>
 #include <osmocom/hnbgw/tdefs.h>

+/* Threading and locks in this implementation:
+ *
+ * - osmo_stats_report_lock() held while updating rate_ctr from nft results.
+ * - g_nft_kpi_state.lock held while running an nftables command buffer.
+ *
+ * contrived example:
+ *
+ *    Main Thread
+ *        |
+ *     osmo_stats_report_use_lock(true)
+ *        |
+ *     nft_kpi_init()
+ *     create nft ctx, create table
+ *        |
+ *        +--------------------------->  NFT thread
+ *        |                                 |
+ *        |                               sleep(X34)
+ *        |                                 |
+ *        |                               LOCK(g_nft_kpi_state.lock)
+ *        |                                         |
+ *      osmo_stats_report()                        query all nft counters
+ *      LOCK(osmo_stats_report_lock)                |
+ *              |                                  LOCK(osmo_stats_report_lock)
+ *             collect stats                        : wait because libosmocore 
stats reporting is busy
+ *              |                                   :
+ *      UNLOCK(osmo_stats_report_lock)             LOCK------|
+ *      send out stats                                      for all hnbp: 
rate_ctr_add2()
+ *        |                                                  |
+ *        |                                        
UNLOCK(osmo_stats_report_lock)
+ *        |                                         |
+ *        |                               UNLOCK(g_nft_kpi_state.lock)
+ *        |                                 |
+ *      hnbgw_rx_hnb_register_req()       sleep(X34)
+ *      hnb_nft_kpi_start()                 |
+ *      LOCK(g_nft_kpi_state.lock)         ...
+ *                 |
+ *                nftables: add new rule
+ *                 |
+ *      UNLOCK(g_nft_kpi_state.lock)
+ *        |
+ *       ...
+ *
+ * So the NFT thread only retrieves counters. The main thread adds and removes 
NFT rules for counters. It is possible
+ * that a HNBAP HNB Register or HNB De-Register occurrs while the NFT thread 
holds the g_nft_kpi_state.lock, so that the
+ * main thread blocks until the NFT thread is done reading counters. Note, 
this happens only for HNB attach or detach.
+ *
+ * A more scalable solution is to move all NFT interaction to the thread. 
Instead of submitting rules from the main
+ * thread, we could submit instructions to an inter-thread queue that the NFT 
thread works off. This would add
+ * considerable complexity -- for now we accept the possible but rarely 
occurring short delay for HNB de/registration.
+ */
+
 struct nft_kpi_state {
+       /* lock this while modifying g_nft_kpi_state */
+       pthread_mutex_t lock;
+
        struct {
                struct nft_ctx *nft_ctx;
                char *table_name;
                bool table_created;
        } nft;
-       struct osmo_timer_list period;
+
+       pthread_t thread;
 };

 static struct nft_kpi_state g_nft_kpi_state = {};
@@ -98,15 +157,22 @@
        return 0;
 }

-static void nft_kpi_period_cb(void *data);
+static void nft_kpi_period_cb(void);

-static void nft_kpi_period_schedule(void)
+void *nft_kpi_thread(void *arg)
 {
-       unsigned long period = osmo_tdef_get(hnbgw_T_defs, -34, OSMO_TDEF_S, 
10);
-       if (period < 1)
-               period = 1;
-       osmo_timer_setup(&g_nft_kpi_state.period, nft_kpi_period_cb, NULL);
-       osmo_timer_schedule(&g_nft_kpi_state.period, period, 0);
+       OSMO_ASSERT(osmo_ctx_init(__func__) == 0);
+       while (1) {
+               /* Let's just hope that the unsigned long in the hnbgw_T_defs 
is not modified non-atomically while
+                * reading the timeout value. */
+               unsigned long period = osmo_tdef_get(hnbgw_T_defs, -34, 
OSMO_TDEF_US, 1000000);
+               if (period < 1)
+                       period = 1;
+               usleep(period);
+
+               nft_kpi_period_cb();
+       }
+       return NULL;
 }
 
 int nft_kpi_init(const char *table_name)
@@ -145,7 +211,11 @@
                return -EIO;

        s->nft.table_created = true;
-       nft_kpi_period_schedule();
+
+       /* Up to here, it was fine to dispatch nft without locking, because 
this is the initialization from the main
+        * thread. From now on, whoever wants to use the g_nft_ctx must lock 
this mutex first. */
+       pthread_mutex_init(&s->lock, NULL);
+       pthread_create(&s->thread, NULL, nft_kpi_thread, NULL);
        return 0;
 }

@@ -163,14 +233,19 @@
                return -EINVAL;
        }

+       /* Manipulating nft state, obtain lock */
+       pthread_mutex_lock(&s->lock);
+       /* { */
+
        if (!osmo_sockaddr_str_cmp(gtpu_remote, &hnbp->nft_kpi.addr_remote)) {
                /* The remote address is unchanged, no need to update the nft 
probe */
-               return 0;
+               rc = 0;
+               goto unlock_return;
        }

        /* When there is no table created, it means nft is disabled. Do not 
attempt to set up counters. */
        if (!s->nft.table_created)
-               return 0;
+               goto unlock_return;

        /* The remote address has changed. Cancel previous probe, if any, and 
start a new one. */
        if (osmo_sockaddr_str_is_nonzero(&hnbp->nft_kpi.addr_remote))
@@ -196,6 +271,10 @@
                /* There was an error running the rule, clear addr_remote to 
indicate that no rule exists. */
                hnbp->nft_kpi.addr_remote = (struct osmo_sockaddr_str){};
        }
+
+unlock_return:
+       /* } */
+       pthread_mutex_unlock(&s->lock);
        return rc;
 }

@@ -206,15 +285,19 @@
 {
        struct nft_kpi_state *s = &g_nft_kpi_state;
        char *cmd;
+       int rc = 0;

        /* When there is no table created, neither can there be any rules to be 
deleted.
         * The rules get removed, but the table remains present for as long as 
osmo-hnbgw runs. */
        if (!s->nft.table_created)
                return 0;

+       pthread_mutex_lock(&s->lock);
+       /* { */
+
        /* presence of addr_remote indicates whether an nft rule has been 
submitted and still needs to be removed */
        if (!osmo_sockaddr_str_is_nonzero(&hnbp->nft_kpi.addr_remote))
-               return 0;
+               goto unlock_return;

        if (!hnbp->nft_kpi.last.ul.handle_present
            || !hnbp->nft_kpi.last.dl.handle_present) {
@@ -235,7 +318,12 @@
                              hnbp->nft_kpi.last.ul.handle,
                              s->nft.table_name,
                              hnbp->nft_kpi.last.dl.handle);
-       return nft_run_now(cmd);
+       rc = nft_run_now(cmd);
+
+unlock_return:
+       /* } */
+       pthread_mutex_unlock(&s->lock);
+       return rc;
 }

 static void update_ctr(struct rate_ctr_group *cg, int cgidx, uint64_t 
*last_val, uint64_t new_val)
@@ -383,6 +471,7 @@
        LOGP(DNFT, LOGL_DEBUG, "read %d counters from nft table %s\n", count, 
s->nft.table_name);
 }

+/* The caller must hold the g_nft_kpi_state.lock! */
 static void nft_kpi_read_counters(void)
 {
        int rc;
@@ -399,6 +488,12 @@
        OSMO_STRBUF_PRINTF(sb, "list table inet %s", s->nft.table_name);
        OSMO_ASSERT(sb.chars_needed < sizeof(cmd));

+       size_t l = strlen(cmd);
+       LOGP(DNFT, LOGL_DEBUG, "running nft request, %zu chars: \"%s%s\"\n",
+            l,
+            osmo_escape_cstr_c(OTC_SELECT, cmd, OSMO_MIN(logmax, l)),
+            l > logmax ? "..." : "");
+
        rc = nft_ctx_buffer_output(nft);
        if (rc) {
                LOGP(DNFT, LOGL_ERROR, "error: nft_ctx_buffer_output() returned 
failure: rc=%d cmd=%s\n",
@@ -413,29 +508,29 @@
        }

        output = nft_ctx_get_output_buffer(nft);
-       if (log_check_level(DNFT, LOGL_DEBUG)) {
-               size_t l = strlen(cmd);
-               LOGP(DNFT, LOGL_DEBUG, "ran nft request, %zu chars: \"%s%s\"\n",
-                    l,
-                    osmo_escape_cstr_c(OTC_SELECT, cmd, OSMO_MIN(logmax, l)),
-                    l > logmax ? "..." : "");
-               l = strlen(output);
-               LOGP(DNFT, LOGL_DEBUG, "got nft response, %zu chars: 
\"%s%s\"\n",
-                    l,
-                    osmo_escape_cstr_c(OTC_SELECT, output, OSMO_MIN(logmax, 
l)),
-                    l > logmax ? "..." : "");
-       }
+       l = strlen(output);
+       LOGP(DNFT, LOGL_DEBUG, "got nft response, %zu chars: \"%s%s\"\n",
+            l,
+            osmo_escape_cstr_c(OTC_SELECT, output, OSMO_MIN(logmax, l)),
+            l > logmax ? "..." : "");

+       osmo_stats_report_lock(true);
+       /* { */
        decode_nft_response(output);
+       /* } */
+       osmo_stats_report_lock(false);

 unbuffer_and_exit:
        nft_ctx_unbuffer_output(nft);
 }

-static void nft_kpi_period_cb(void *data)
+static void nft_kpi_period_cb(void)
 {
+       pthread_mutex_lock(&g_nft_kpi_state.lock);
+       /* { */
        nft_kpi_read_counters();
-       nft_kpi_period_schedule();
+       /* } */
+       pthread_mutex_unlock(&g_nft_kpi_state.lock);
 }

 #endif // ENABLE_NFTABLES
diff --git a/src/osmo-hnbgw/osmo_hnbgw_main.c b/src/osmo-hnbgw/osmo_hnbgw_main.c
index 1052d18..36180ef 100644
--- a/src/osmo-hnbgw/osmo_hnbgw_main.c
+++ b/src/osmo-hnbgw/osmo_hnbgw_main.c
@@ -328,16 +328,23 @@
        /* If UPF is configured, set up PFCP socket and send Association Setup 
Request to UPF */
        hnbgw_pfcp_init();
 #endif
-#if ENABLE_NFTABLES
+
        /* If nftables is enabled, initialize the nft table now or fail 
startup. This is important to immediately let
         * the user know if cap_net_admin privileges are missing, and not only 
when the first hNodeB connects. */
        if (g_hnbgw->config.nft_kpi.enable) {
+#if ENABLE_NFTABLES
                if (nft_kpi_init(g_hnbgw->config.nft_kpi.table_name)) {
-                       perror("Failed to initialize nft KPI, probably missing 
cap_net_admin");
+                       perror("ERROR: Failed to initialize nft KPI, probably 
missing cap_net_admin");
                        exit(1);
                }
-       }
+               /* nft_kpi.c manipulates rate_ctr state directly. Enable the 
mutex lock around stats reporting, so
+                * nft_kpi.c can make use of it. */
+               osmo_stats_report_use_lock(true);
+#else
+               fprintf(stderr, "ERROR: Cannot enable nft KPI, this binary was 
built without nftables support\n");
+               exit(1);
 #endif
+       }

        hnbgw_cnpool_start(&g_hnbgw->sccp.cnpool_iucs);
        hnbgw_cnpool_start(&g_hnbgw->sccp.cnpool_iups);
diff --git a/src/osmo-hnbgw/tdefs.c b/src/osmo-hnbgw/tdefs.c
index 3a7f0bc..cfcd4c2 100644
--- a/src/osmo-hnbgw/tdefs.c
+++ b/src/osmo-hnbgw/tdefs.c
@@ -35,8 +35,8 @@
        {.T = 3113, .default_val = 15, .desc = "Time to keep Paging record, for 
CN pools with more than one link" },
        {.T = 4, .default_val = 5, .desc = "Timeout to receive RANAP RESET 
ACKNOWLEDGE from an MSC/SGSN" },
        {.T = -31, .default_val = 15, .desc = "Timeout for establishing and 
releasing context maps (RUA <-> SCCP)" },
+       {.T = -34, .default_val = 1000, .unit = OSMO_TDEF_MS, .desc = "Period 
to query network traffic stats from netfilter" },
        {.T = -1002, .default_val = 10, .desc = "Timeout for the HNB to respond 
to PS RAB Assignment Request" },
-       {.T = -34, .default_val = 1, .desc = "Period to query network traffic 
stats from netfilter" },
        { }
 };


--
To view, visit https://gerrit.osmocom.org/c/osmo-hnbgw/+/36540?usp=email
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.osmocom.org/settings

Gerrit-Project: osmo-hnbgw
Gerrit-Branch: master
Gerrit-Change-Id: I9dc54e6bc94c553f45adfa71ae8ad70be4afbc8f
Gerrit-Change-Number: 36540
Gerrit-PatchSet: 1
Gerrit-Owner: neels <[email protected]>
Gerrit-MessageType: newchange

Reply via email to