This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch cpc_no_base64
in repository https://gitbox.apache.org/repos/asf/datasketches-bigquery.git

commit 853aefac58238a83fa8b3c0c04ffd3de9a723032
Author: AlexanderSaydakov <[email protected]>
AuthorDate: Wed Sep 25 14:40:00 2024 -0700

    removed base64 encoding-decoding from CPC
---
 cpc/cpc_sketch.cpp                                 | 52 ++++++++--------------
 .../cpc_sketch_get_estimate_and_bounds_seed.sqlx   | 19 ++++----
 cpc/sqlx/cpc_sketch_get_estimate_seed.sqlx         | 10 ++---
 cpc/sqlx/cpc_sketch_to_string_seed.sqlx            | 10 ++---
 cpc/sqlx/cpc_sketch_union_lgk_seed.sqlx            | 14 ++----
 5 files changed, 36 insertions(+), 69 deletions(-)

diff --git a/cpc/cpc_sketch.cpp b/cpc/cpc_sketch.cpp
index fcb5eb8..bb70a64 100644
--- a/cpc/cpc_sketch.cpp
+++ b/cpc/cpc_sketch.cpp
@@ -23,11 +23,10 @@
 #include <cpc_sketch.hpp>
 #include <cpc_union.hpp>
 
-#include "../base64.hpp"
-
 const emscripten::val Uint8Array = emscripten::val::global("Uint8Array");
 
 EMSCRIPTEN_BINDINGS(cpc_sketch) {
+  emscripten::register_vector<double>("VectorDouble");
 
   emscripten::function("getExceptionMessage", 
emscripten::optional_override([](intptr_t ptr) {
     return std::string(reinterpret_cast<std::exception*>(ptr)->what());
@@ -45,19 +44,16 @@ EMSCRIPTEN_BINDINGS(cpc_sketch) {
       auto bytes = self.serialize();
       return Uint8Array.new_(emscripten::typed_memory_view(bytes.size(), 
bytes.data()));
     }))
-    .class_function("deserializeFromB64", 
emscripten::optional_override([](const std::string& b64, uint64_t seed) {
-      std::vector<char> bytes(b64_dec_len(b64.data(), b64.size()));
-      b64_decode(b64.data(), b64.size(), bytes.data());
-      return new 
datasketches::cpc_sketch(datasketches::cpc_sketch::deserialize(bytes.data(), 
bytes.size(), seed));
-    }), emscripten::allow_raw_pointers())
-    .class_function("deserializeFromBytes", 
emscripten::optional_override([](const std::string& bytes, uint64_t seed) {
-      return new 
datasketches::cpc_sketch(datasketches::cpc_sketch::deserialize(bytes.data(), 
bytes.size(), seed));
-    }), emscripten::allow_raw_pointers())
-    .function("getEstimate", &datasketches::cpc_sketch::get_estimate)
-    .function("getLowerBound", &datasketches::cpc_sketch::get_lower_bound)
-    .function("getUpperBound", &datasketches::cpc_sketch::get_upper_bound)
-    .function("toString", &datasketches::cpc_sketch::to_string)
-    .class_function("getMaxSerializedSizeBytes", 
&datasketches::cpc_sketch::get_max_serialized_size_bytes)
+    .class_function("getEstimate", emscripten::optional_override([](const 
std::string& sketch_bytes, uint64_t seed) {
+      return datasketches::cpc_sketch::deserialize(sketch_bytes.data(), 
sketch_bytes.size(), seed).get_estimate();
+    }))
+    .class_function("getEstimateAndBounds", 
emscripten::optional_override([](const std::string& sketch_bytes, uint8_t 
num_std_devs, uint64_t seed) {
+      const auto sketch = 
datasketches::cpc_sketch::deserialize(sketch_bytes.data(), sketch_bytes.size(), 
seed);
+      return std::vector<double>{sketch.get_estimate(), 
sketch.get_lower_bound(num_std_devs), sketch.get_upper_bound(num_std_devs)};
+    }))
+    .class_function("toString", emscripten::optional_override([](const 
std::string& sketch_bytes, uint64_t seed) {
+      return datasketches::cpc_sketch::deserialize(sketch_bytes.data(), 
sketch_bytes.size(), seed).to_string();
+    }))
     ;
 
   emscripten::class_<datasketches::cpc_union>("cpc_union")
@@ -70,29 +66,17 @@ EMSCRIPTEN_BINDINGS(cpc_sketch) {
     .function("updateWithBytes", 
emscripten::optional_override([](datasketches::cpc_union& self, const 
std::string& bytes, uint64_t seed) {
       self.update(datasketches::cpc_sketch::deserialize(bytes.data(), 
bytes.size(), seed));
     }), emscripten::allow_raw_pointers())
-    .function("updateWithB64", 
emscripten::optional_override([](datasketches::cpc_union& self, const 
std::string& b64, uint64_t seed) {
-      std::vector<char> bytes(b64_dec_len(b64.data(), b64.size()));
-      b64_decode(b64.data(), b64.size(), bytes.data());
-      self.update(datasketches::cpc_sketch::deserialize(bytes.data(), 
bytes.size(), seed));
-    }), emscripten::allow_raw_pointers())
-    .function("updateWithBuffer", 
emscripten::optional_override([](datasketches::cpc_union& self, intptr_t bytes, 
size_t size, uint64_t seed) {
-      
self.update(datasketches::cpc_sketch::deserialize(reinterpret_cast<void*>(bytes),
 size, seed));
-    }))
-//    .function("getResultStream", 
emscripten::optional_override([](datasketches::cpc_union& self, intptr_t bytes, 
size_t size) {
-//      std::strstream stream(reinterpret_cast<char*>(bytes), size);
-//      self.get_result().serialize(stream);
-//      return (int) stream.tellp();
-//    }))
     .function("getResultAsUint8Array", 
emscripten::optional_override([](datasketches::cpc_union& self) {
       auto bytes = self.get_result().serialize();
       return Uint8Array.new_(emscripten::typed_memory_view(bytes.size(), 
bytes.data()));
     }))
-    .function("getResultB64", 
emscripten::optional_override([](datasketches::cpc_union& self) {
-      auto bytes = self.get_result().serialize();
-      std::vector<char> b64(b64_enc_len(bytes.size()));
-      b64_encode((const char*) bytes.data(), bytes.size(), b64.data());
-      return std::string(b64.data(), b64.size());
-    }))
     ;
 
+  emscripten::function("cpcUnion", emscripten::optional_override([](const 
std::string& bytes1, const std::string& bytes2, uint8_t lg_k, uint64_t seed) {
+    datasketches::cpc_union u(lg_k, seed);
+    u.update(datasketches::cpc_sketch::deserialize(bytes1.data(), 
bytes1.size(), seed));
+    u.update(datasketches::cpc_sketch::deserialize(bytes2.data(), 
bytes2.size(), seed));
+    const auto bytes = u.get_result().serialize();
+    return Uint8Array.new_(emscripten::typed_memory_view(bytes.size(), 
bytes.data()));
+  }));
 }
diff --git a/cpc/sqlx/cpc_sketch_get_estimate_and_bounds_seed.sqlx 
b/cpc/sqlx/cpc_sketch_get_estimate_and_bounds_seed.sqlx
index 1559592..d47be0c 100644
--- a/cpc/sqlx/cpc_sketch_get_estimate_and_bounds_seed.sqlx
+++ b/cpc/sqlx/cpc_sketch_get_estimate_and_bounds_seed.sqlx
@@ -24,6 +24,7 @@ RETURNS STRUCT<estimate FLOAT64, lower_bound FLOAT64, 
upper_bound FLOAT64>
 LANGUAGE js
 OPTIONS (
   library=["gs://$GCS_BUCKET/cpc_sketch.js"],
+  js_parameter_encoding_mode='STANDARD',
   description = '''Gets cardinality estimate and bounds from given sketch.
   
 Param sketch: The given sketch to query as bytes.
@@ -38,18 +39,14 @@ For more information:
 '''
 ) AS R"""
 try {
-  var sketchObject = null;
-  try {
-    sketchObject = Module.cpc_sketch.deserializeFromB64(sketch, seed ? 
BigInt(seed) : BigInt(Module.DEFAULT_SEED));
-    return {
-      estimate: sketchObject.getEstimate(),
-      lower_bound: sketchObject.getLowerBound(num_std_devs),
-      upper_bound: sketchObject.getUpperBound(num_std_devs)
-    };
-  } finally {
-    if (sketchObject != null) sketchObject.delete();
-  }
+  const result = Module.cpc_sketch.getEstimateAndBounds(sketch, 
Number(num_std_devs), seed ? BigInt(seed) : BigInt(Module.DEFAULT_SEED));
+  return {
+    estimate: result.get(0),
+    lower_bound: result.get(1),
+    upper_bound: result.get(2)
+  };
 } catch (e) {
+  if (e.message != null) throw e;
   throw new Error(Module.getExceptionMessage(e));
 }
 """;
diff --git a/cpc/sqlx/cpc_sketch_get_estimate_seed.sqlx 
b/cpc/sqlx/cpc_sketch_get_estimate_seed.sqlx
index 520eb4c..90722ad 100644
--- a/cpc/sqlx/cpc_sketch_get_estimate_seed.sqlx
+++ b/cpc/sqlx/cpc_sketch_get_estimate_seed.sqlx
@@ -24,6 +24,7 @@ RETURNS FLOAT64
 LANGUAGE js
 OPTIONS (
   library=["gs://$GCS_BUCKET/cpc_sketch.js"],
+  js_parameter_encoding_mode='STANDARD',
   description = '''Gets cardinality estimate and bounds from given sketch.
 
 Param sketch: The given sketch to query as BYTES.
@@ -35,14 +36,9 @@ For more information:
 '''
 ) AS R"""
 try {
-  var sketchObject = null;
-  try {
-    sketchObject = Module.cpc_sketch.deserializeFromB64(sketch, seed ? 
BigInt(seed) : BigInt(Module.DEFAULT_SEED));
-    return sketchObject.getEstimate();
-  } finally {
-    if (sketchObject != null) sketchObject.delete();
-  }
+  return Module.cpc_sketch.getEstimate(sketch, seed ? BigInt(seed) : 
BigInt(Module.DEFAULT_SEED));
 } catch (e) {
+  if (e.message != null) throw e;
   throw new Error(Module.getExceptionMessage(e));
 }
 """;
diff --git a/cpc/sqlx/cpc_sketch_to_string_seed.sqlx 
b/cpc/sqlx/cpc_sketch_to_string_seed.sqlx
index ae97dda..3d74051 100644
--- a/cpc/sqlx/cpc_sketch_to_string_seed.sqlx
+++ b/cpc/sqlx/cpc_sketch_to_string_seed.sqlx
@@ -24,6 +24,7 @@ RETURNS STRING
 LANGUAGE js
 OPTIONS (
   library=["gs://$GCS_BUCKET/cpc_sketch.js"],
+  js_parameter_encoding_mode='STANDARD',
   description = '''Returns a summary string that represents the state of the 
given sketch.
 
 Param sketch the given sketch as BYTES.
@@ -36,14 +37,9 @@ For more information:
 ) AS R"""
 const default_seed = BigInt(Module.DEFAULT_SEED);
 try {
-  var sketchObject = null;
-  try {
-    sketchObject = Module.cpc_sketch.deserializeFromB64(sketch, seed ? 
BigInt(seed) : default_seed);
-    return sketchObject.toString();
-  } finally {
-    if (sketchObject != null) sketchObject.delete();
-  }
+  return Module.cpc_sketch.toString(sketch, seed ? BigInt(seed) : 
default_seed);
 } catch (e) {
+  if (e.message != null) throw e;
   throw new Error(Module.getExceptionMessage(e));
 }
 """;
diff --git a/cpc/sqlx/cpc_sketch_union_lgk_seed.sqlx 
b/cpc/sqlx/cpc_sketch_union_lgk_seed.sqlx
index 7d82223..5089d5e 100644
--- a/cpc/sqlx/cpc_sketch_union_lgk_seed.sqlx
+++ b/cpc/sqlx/cpc_sketch_union_lgk_seed.sqlx
@@ -24,6 +24,7 @@ RETURNS BYTES
 LANGUAGE js
 OPTIONS (
   library=["gs://$GCS_BUCKET/cpc_sketch.js"],
+  js_parameter_encoding_mode='STANDARD',
   description = '''Computes a sketch that represents the scalar union of the 
two given sketches.
 
 Param sketchA: the first sketch as BYTES.
@@ -36,19 +37,12 @@ For more information:
  - https://datasketches.apache.org/docs/CPC/CpcSketches.html
 '''
 ) AS R"""
-const default_lg_k = 12;
+const default_lg_k = Number(12);
 const default_seed = BigInt(Module.DEFAULT_SEED);
 try {
-  var union = null;
-  try {
-    union = new Module.cpc_union(lg_k ? lg_k : default_lg_k, seed ? 
BigInt(seed) : default_seed);
-    union.updateWithB64(sketchA, seed ? BigInt(seed) : default_seed)
-    union.updateWithB64(sketchB, seed ? BigInt(seed) : default_seed)
-    return union.getResultB64();
-  } finally {
-    if (union != null) union.delete();
-  }
+  return Module.cpcUnion(sketchA, sketchB, lg_k ? Number(lg_k) : default_lg_k, 
seed ? BigInt(seed) : default_seed);
 } catch (e) {
+  if (e.message != null) throw e;
   throw new Error(Module.getExceptionMessage(e));
 }
 """;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to