Timur Davydov has uploaded this change for review. ( 
https://gerrit.osmocom.org/c/osmo-trx/+/42248?usp=email )


Change subject: transceiver: add optional Laurent burst LUT modulation (int16)
......................................................................

transceiver: add optional Laurent burst LUT modulation (int16)

Introduce an optional Burst LUT optimization for Laurent-based burst
modulation, enabled via --with-burst-lut.

- Add precomputed Laurent LUT tables (float and int16 I/Q variants)
- Generate LUT at initialization (prepareBurstLUT)
- Use LUT-based modulation for sps=4 when enabled
- Keep original modulateBurstLaurent() as fallback
- Add configure.ac option and USE_BURST_LUT define

This reduces runtime computation by replacing per-burst Laurent
processing with memcpy-based LUT lookups.

Change-Id: I1715c2d33dc55fe1c7be5c6e7259d378ea5e80b2
---
M Transceiver52M/sigProcLib.cpp
M configure.ac
2 files changed, 180 insertions(+), 0 deletions(-)



  git pull ssh://gerrit.osmocom.org:29418/osmo-trx refs/changes/48/42248/1

diff --git a/Transceiver52M/sigProcLib.cpp b/Transceiver52M/sigProcLib.cpp
index 5fac365..bcea848 100644
--- a/Transceiver52M/sigProcLib.cpp
+++ b/Transceiver52M/sigProcLib.cpp
@@ -51,6 +51,29 @@
 /** Lookup tables for trigonometric approximation */
 static float sincTable[TABLESIZE+1]; // add 1 element for wrap around

+/** Lookup tables for burst processing */
+#ifdef USE_BURST_LUT
+#define BURST_SCALING 32700 /* Scaling factor */
+
+static bool burst_lut_prepared = false;
+
+/** Complex Laurent LUT table (float complex samples).
+ * Layout: prologue, per-4bit-combination filters and epilogue. Each
+ * entry contains complex<float> samples used for fast burst modulation.
+ */
+static complex lut_laurent4_prologue[4 * 4];  /* 4 prologue filters, each with 
4 taps */
+static complex lut_laurent4[16 * 4 * 4];      /* 16 possible 4-bit 
combinations, each with 4 filters of 4 taps */
+static complex lut_laurent4_epilogue[4 * 4];  /* 4 epilogue filters, each with 
4 taps */
+
+/** Integer LUT table (interleaved I/Q int16 samples).
+ * These tables contain int16_t I/Q pairs (interleaved) generated from
+ * the complex LUT and scaled by `BURST_SCALING` for integer output.
+ */
+static int16_t i16_lut_laurent4_prologue[4 * 4 * 2];  /* 4 prologue filters, 
each with 4 taps */
+static int16_t i16_lut_laurent4[16 * 4 * 4 * 2];      /* 16 possible 4-bit 
combinations */
+static int16_t i16_lut_laurent4_epilogue[4 * 4 * 2];  /* 4 epilogue filters, 
each with 4 taps */
+#endif /* USE_BURST_LUT */
+
 /** Constants */
 static const float M_PI_F = (float)M_PI;

@@ -669,6 +692,146 @@
   return c0_shaped;
 }

+#ifdef USE_BURST_LUT
+/**
+ * @brief Modulate a burst using the precomputed Laurent approximation LUT.
+ * @param bits The input bit vector to be modulated (bit values 0/1).
+ * @param bitlen The length of the input bit vector.
+ * @param outbuf Output buffer to store interleaved int16 I/Q samples.
+ *               The buffer is filled with int16 pairs (I0,Q0,I1,Q1,...).
+ * @return The number of complex samples (I/Q pairs) written to `outbuf`.
+ */
+unsigned modulateBits(uint8_t *bits, unsigned bitlen, int16_t *outbuf)
+{
+    unsigned off = 0;
+    unsigned lut_idx = (bits[0] << 1) | (bits[1]);
+
+    memcpy(outbuf + off, i16_lut_laurent4_prologue, sizeof(int16_t) * 12 * 2);
+    off += 12 * 2;
+    for (unsigned i = 2; i < bitlen; ++i) {
+        lut_idx = ((lut_idx << 1) | bits[i]) & 0xf;
+        memcpy(outbuf + off, &i16_lut_laurent4[2 * (64 * (3 - (i & 0x3)) + 4 * 
lut_idx)], sizeof(int16_t) * 4 * 2);
+        off += 4 * 2;
+    }
+    for (unsigned i = bitlen; i < bitlen + 1; ++i) {
+        lut_idx = ((lut_idx << 1) | 0) & 0xf;
+        memcpy(outbuf + off, &i16_lut_laurent4[2 * (64 * (3 - (i & 0x3)) + 4 * 
lut_idx)], sizeof(int16_t) * 4 * 2);
+        off += 4 * 2;
+    }
+    memcpy(outbuf + off, i16_lut_laurent4_epilogue, sizeof(int16_t) * 12 * 2);
+    off += 12 * 2;
+
+    return off >> 1;
+}
+
+/**
+ * @brief Modulate a burst using the precomputed Laurent approximation LUT.
+ * @param bits The input bit vector to be modulated.
+ * @return A newly allocated `signalVector` containing complex<float>
+ *         samples (I/Q) ready for transmission. The vector length is 625
+ *         samples. The caller is responsible for deleting the returned
+ *         `signalVector`.
+ */
+static signalVector *modulateBurstLaurentLutInt(const BitVector& bits)
+{
+    signalVector *burst = new signalVector(625, 16);
+    signalVector::iterator it = burst->begin();
+
+    memcpy((void *)it, lut_laurent4_prologue, sizeof(complex) * 12);
+    it += 12;
+
+    unsigned lut_idx = (bits[0] << 1) | (bits[1]);
+    for (unsigned i = 2; i < bits.size(); i++) {
+        lut_idx = ((lut_idx << 1) | bits[i]) & 0xf;
+        memcpy((void *)it, &lut_laurent4[64 * (3 - (i % 4)) + 4 * lut_idx], 
sizeof(complex) * 4);
+        it += 4;
+    }
+    for (unsigned i = bits.size(); i < bits.size() + 1; ++i) {
+        lut_idx = ((lut_idx << 1) | 0) & 0xf;
+        memcpy((void *)it, &lut_laurent4[64 * (3 - (i % 4)) + 4 * lut_idx], 
sizeof(complex) * 4);
+        it += 4;
+    }
+
+    memcpy((void *)it, lut_laurent4_epilogue, sizeof(complex) * 12);
+    it += 12;
+
+    return burst;
+}
+
+/**
+ * @brief Modulate a burst using the precomputed Laurent LUT if prepared,
+ * otherwise fall back to the original Laurent modulation routine.
+ * @param bits The input bit vector to be modulated.
+ * @note If `burst_lut_prepared` is true, `modulateBurstLaurentLutInt`
+ *       is used which returns a `signalVector` of length 625. Otherwise,
+ *       `modulateBurstLaurent` is called instead.
+ * @return A newly allocated `signalVector` containing complex<float>
+ *         samples (I/Q) ready for transmission. The caller owns the
+ *         returned vector.
+ */
+static signalVector *modulateBurstLaurentLut(const BitVector &bits)
+{
+  return (burst_lut_prepared) ? modulateBurstLaurentLutInt(bits) : 
modulateBurstLaurent(bits);
+}
+
+/**
+ * @brief Prepare the burst modulation LUT by generating the Laurent 
approximation for all possible
+ * bit combinations and storing them in the LUT tables.
+ * @return 0 on success, -1 on failure.
+ */
+static int prepareBurstLUT()
+{
+  complex modtbl[256 * 4 * 4];
+  const unsigned B = 8;
+  for (int b = 0; b < (1 << B); ++b) {
+    char v[NORMAL_BURST_NBITS + 1]; /* extra byte for null terminator */
+    memset(v, '0', NORMAL_BURST_NBITS);
+    v[NORMAL_BURST_NBITS] = 0;
+
+    for (int c = 0; c < B; ++c) {
+      if ((b >> (B - c - 1)) & 1) {
+        v[8 + c] = '1';
+      }
+    }
+
+    BitVector bv(v);
+    signalVector* sv = modulateBurstLaurent(bv);
+
+    for (unsigned t = 0; t < 16; ++t) {
+      lut_laurent4_prologue[t] = sv->operator[](t);
+      i16_lut_laurent4_prologue[2 * t + 0] = lut_laurent4_prologue[t].r * 
BURST_SCALING;
+      i16_lut_laurent4_prologue[2 * t + 1] = lut_laurent4_prologue[t].i * 
BURST_SCALING;
+    }
+
+    for (unsigned t = 600; t < 616; ++t) {
+      lut_laurent4_epilogue[t - 600] = sv->operator[](t);
+      i16_lut_laurent4_epilogue[2 * (t - 600) + 0] = lut_laurent4_epilogue[t - 
600].r * BURST_SCALING;
+      i16_lut_laurent4_epilogue[2 * (t - 600) + 1] = lut_laurent4_epilogue[t - 
600].i * BURST_SCALING;
+    }
+
+    int j_off = (8 + B - 3) * 4;
+    for (int k = 0; k < 16; ++k) {
+      modtbl[b * 16 + k] = sv->operator[](j_off + k);
+    }
+  }
+
+  // Compose modulation LUT table
+  for (int q = 0; q < 4; ++q) { // quadrant
+    for (int i = 0; i < 16; ++i) {
+      for (int k = 0; k < 4; ++k) {
+        lut_laurent4[(q * 16 + i) * 4 + k ] = modtbl[(i << q) * 16 + (12 - q * 
4) + k];
+
+        i16_lut_laurent4[2 * ((q * 16 + i) * 4 + k) + 0] = lut_laurent4[(q * 
16 + i) * 4 + k].r * BURST_SCALING;
+        i16_lut_laurent4[2 * ((q * 16 + i) * 4 + k) + 1] = lut_laurent4[(q * 
16 + i) * 4 + k].i * BURST_SCALING;
+      }
+    }
+  }
+
+  burst_lut_prepared = true;
+  return 0;
+}
+#endif /* USE_BURST_LUT */
+
 static signalVector *rotateEdgeBurst(const signalVector &symbols, int sps)
 {
   signalVector *burst;
@@ -973,7 +1136,11 @@
   if (emptyPulse)
     return rotateBurst(wBurst, guardPeriodLength, sps);
   else if (sps == 4)
+#ifdef USE_BURST_LUT
+    return modulateBurstLaurentLut(wBurst);
+#else
     return modulateBurstLaurent(wBurst);
+#endif
   else
     return modulateBurstBasic(wBurst, guardPeriodLength, sps);
 }
@@ -2141,6 +2308,10 @@
   generateSincTable();
   initGMSKRotationTables();

+#ifdef USE_BURST_LUT
+  prepareBurstLUT();
+#endif
+
   GSMPulse1 = generateGSMPulse(1);
   GSMPulse4 = generateGSMPulse(4);

diff --git a/configure.ac b/configure.ac
index c81b089..910f18b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -185,6 +185,15 @@
         [enable x86 SSE support (default)])
 ])

+AC_ARG_WITH(burst-lut, [
+    AC_HELP_STRING([--with-burst-lut],
+        [enable Burst LUT optimization])
+])
+
+AS_IF([test "x$with_burst_lut" = "xyes"], [
+    AC_DEFINE(USE_BURST_LUT, 1, Define to 1 for using Burst LUT)
+])
+
 AS_IF([test "x$with_neon" = "xyes"], [
     AC_DEFINE(HAVE_NEON, 1, Support ARM NEON)
 ])

-- 
To view, visit https://gerrit.osmocom.org/c/osmo-trx/+/42248?usp=email
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.osmocom.org/settings?usp=email

Gerrit-MessageType: newchange
Gerrit-Project: osmo-trx
Gerrit-Branch: master
Gerrit-Change-Id: I1715c2d33dc55fe1c7be5c6e7259d378ea5e80b2
Gerrit-Change-Number: 42248
Gerrit-PatchSet: 1
Gerrit-Owner: Timur Davydov <[email protected]>

Reply via email to