This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch hll_no_base64
in repository https://gitbox.apache.org/repos/asf/datasketches-bigquery.git

commit 95a5621d43f302f8b6ad62da5abba5c415bc9858
Author: AlexanderSaydakov <[email protected]>
AuthorDate: Wed Sep 25 15:40:36 2024 -0700

    removed base64 encoding-decoding from HLL
---
 hll/hll_sketch.cpp                               | 54 +++++++++---------------
 hll/sqlx/hll_sketch_get_estimate.sqlx            | 10 ++---
 hll/sqlx/hll_sketch_get_estimate_and_bounds.sqlx | 19 ++++-----
 hll/sqlx/hll_sketch_to_string.sqlx               | 10 ++---
 hll/sqlx/hll_sketch_union_lgk_type.sqlx          | 14 ++----
 5 files changed, 37 insertions(+), 70 deletions(-)

diff --git a/hll/hll_sketch.cpp b/hll/hll_sketch.cpp
index 9e4c800..35b7f92 100644
--- a/hll/hll_sketch.cpp
+++ b/hll/hll_sketch.cpp
@@ -22,8 +22,6 @@
 
 #include <hll.hpp>
 
-#include "../base64.hpp"
-
 datasketches::target_hll_type convert_tgt_type(const std::string& 
tgt_type_str) {
   if (tgt_type_str == "" || tgt_type_str == "HLL_4") return 
datasketches::HLL_4;
   if (tgt_type_str == "HLL_6") return datasketches::HLL_6;
@@ -34,6 +32,7 @@ datasketches::target_hll_type convert_tgt_type(const 
std::string& tgt_type_str)
 const emscripten::val Uint8Array = emscripten::val::global("Uint8Array");
 
 EMSCRIPTEN_BINDINGS(hll_sketch) {
+  emscripten::register_vector<double>("VectorDouble");
 
   emscripten::function("getExceptionMessage", 
emscripten::optional_override([](intptr_t ptr) {
     return std::string(reinterpret_cast<std::exception*>(ptr)->what());
@@ -48,21 +47,16 @@ EMSCRIPTEN_BINDINGS(hll_sketch) {
       auto bytes = self.serialize_compact();
       return Uint8Array.new_(emscripten::typed_memory_view(bytes.size(), 
bytes.data()));
     }))
-    .class_function("deserializeFromB64", 
emscripten::optional_override([](const std::string& b64) {
-      std::vector<char> bytes(b64_dec_len(b64.data(), b64.size()));
-      b64_decode(b64.data(), b64.size(), bytes.data());
-      return new 
datasketches::hll_sketch(datasketches::hll_sketch::deserialize(bytes.data(), 
bytes.size()));
-    }), emscripten::allow_raw_pointers())
-    .class_function("deserializeFromBytes", 
emscripten::optional_override([](const std::string& bytes) {
-      return new 
datasketches::hll_sketch(datasketches::hll_sketch::deserialize(bytes.data(), 
bytes.size()));
-    }), emscripten::allow_raw_pointers())
-    .function("getEstimate", &datasketches::hll_sketch::get_estimate)
-    .function("getLowerBound", &datasketches::hll_sketch::get_lower_bound)
-    .function("getUpperBound", &datasketches::hll_sketch::get_upper_bound)
-    .function("toString", emscripten::optional_override([](const 
datasketches::hll_sketch& self) {
-      return self.to_string();
+    .class_function("getEstimate", emscripten::optional_override([](const 
std::string& bytes) {
+      return datasketches::hll_sketch::deserialize(bytes.data(), 
bytes.size()).get_estimate();
+    }))
+    .class_function("getEstimateAndBounds", 
emscripten::optional_override([](const std::string& bytes, uint8_t 
num_std_devs) {
+      const auto sketch = datasketches::hll_sketch::deserialize(bytes.data(), 
bytes.size());
+      return std::vector<double>{sketch.get_estimate(), 
sketch.get_lower_bound(num_std_devs), sketch.get_upper_bound(num_std_devs)};
+    }))
+    .class_function("toString", emscripten::optional_override([](const 
std::string& bytes) {
+      return datasketches::hll_sketch::deserialize(bytes.data(), 
bytes.size()).to_string();
     }))
-    .class_function("getMaxSerializedSizeBytes", 
&datasketches::hll_sketch::get_max_updatable_serialization_bytes)
     ;
 
   emscripten::class_<datasketches::hll_union>("hll_union")
@@ -75,29 +69,19 @@ EMSCRIPTEN_BINDINGS(hll_sketch) {
     .function("updateWithBytes", 
emscripten::optional_override([](datasketches::hll_union& self, const 
std::string& bytes) {
       self.update(datasketches::hll_sketch::deserialize(bytes.data(), 
bytes.size()));
     }), emscripten::allow_raw_pointers())
-    .function("updateWithB64", 
emscripten::optional_override([](datasketches::hll_union& self, const 
std::string& b64) {
-      std::vector<char> bytes(b64_dec_len(b64.data(), b64.size()));
-      b64_decode(b64.data(), b64.size(), bytes.data());
-      self.update(datasketches::hll_sketch::deserialize(bytes.data(), 
bytes.size()));
-    }), emscripten::allow_raw_pointers())
-    .function("updateWithBuffer", 
emscripten::optional_override([](datasketches::hll_union& self, intptr_t bytes, 
size_t size) {
-      
self.update(datasketches::hll_sketch::deserialize(reinterpret_cast<void*>(bytes),
 size));
-    }))
-//    .function("getResultStream", 
emscripten::optional_override([](datasketches::hll_union& self, intptr_t bytes, 
size_t size, datasketches::target_hll_type tgt_type) {
-//      std::strstream stream(reinterpret_cast<char*>(bytes), size);
-//      self.get_result(tgt_type).serialize_compact(stream);
-//      return (int) stream.tellp();
-//    }))
     .function("getResultAsUint8Array", 
emscripten::optional_override([](datasketches::hll_union& self, const 
std::string& tgt_type_str) {
       auto bytes = 
self.get_result(convert_tgt_type(tgt_type_str)).serialize_compact();
       return Uint8Array.new_(emscripten::typed_memory_view(bytes.size(), 
bytes.data()));
     }))
-    .function("getResultB64", 
emscripten::optional_override([](datasketches::hll_union& self, const 
std::string& tgt_type_str) {
-      auto bytes = 
self.get_result(convert_tgt_type(tgt_type_str)).serialize_compact();
-      std::vector<char> b64(b64_enc_len(bytes.size()));
-      b64_encode((const char*) bytes.data(), bytes.size(), b64.data());
-      return std::string(b64.data(), b64.size());
-    }))
     ;
 
+  emscripten::function("hllUnion", emscripten::optional_override([](
+    const std::string& bytes1, const std::string& bytes2, uint8_t lg_k, const 
std::string& tgt_type_str
+  ) {
+    datasketches::hll_union u(lg_k);
+    u.update(datasketches::hll_sketch::deserialize(bytes1.data(), 
bytes1.size()));
+    u.update(datasketches::hll_sketch::deserialize(bytes2.data(), 
bytes2.size()));
+    const auto bytes = 
u.get_result(convert_tgt_type(tgt_type_str)).serialize_compact();
+    return Uint8Array.new_(emscripten::typed_memory_view(bytes.size(), 
bytes.data()));
+  }));
 }
diff --git a/hll/sqlx/hll_sketch_get_estimate.sqlx 
b/hll/sqlx/hll_sketch_get_estimate.sqlx
index 2900485..6605806 100644
--- a/hll/sqlx/hll_sketch_get_estimate.sqlx
+++ b/hll/sqlx/hll_sketch_get_estimate.sqlx
@@ -24,6 +24,7 @@ RETURNS FLOAT64
 LANGUAGE js
 OPTIONS (
   library=["gs://$GCS_BUCKET/hll_sketch.js"],
+  js_parameter_encoding_mode='STANDARD',
   description = '''Returns a summary string that represents the state of the 
given sketch.
 
 Param sketch: the given sketch as BYTES.
@@ -34,14 +35,9 @@ For more information:
 '''
 ) AS R"""
 try {
-  var sketchObject = null;
-  try {
-    sketchObject = Module.hll_sketch.deserializeFromB64(sketch);
-    return sketchObject.getEstimate();
-  } finally {
-    if (sketchObject != null) sketchObject.delete();
-  }
+  return Module.hll_sketch.getEstimate(sketch);
 } catch (e) {
+  if (e.message != null) throw e;
   throw new Error(Module.getExceptionMessage(e));
 }
 """;
diff --git a/hll/sqlx/hll_sketch_get_estimate_and_bounds.sqlx 
b/hll/sqlx/hll_sketch_get_estimate_and_bounds.sqlx
index 4ba7e3a..cb69563 100644
--- a/hll/sqlx/hll_sketch_get_estimate_and_bounds.sqlx
+++ b/hll/sqlx/hll_sketch_get_estimate_and_bounds.sqlx
@@ -24,6 +24,7 @@ RETURNS STRUCT<estimate FLOAT64, lower_bound FLOAT64, 
upper_bound FLOAT64>
 LANGUAGE js
 OPTIONS (
   library=["gs://$GCS_BUCKET/hll_sketch.js"],
+  js_parameter_encoding_mode='STANDARD',
   description = '''Gets cardinality estimate and bounds from given sketch.
 
 Param sketch: The given sketch to query as BYTES.
@@ -37,18 +38,14 @@ For more information:
 '''
 ) AS R"""
 try {
-  var sketchObject = null;
-  try {
-    sketchObject = Module.hll_sketch.deserializeFromB64(sketch);
-    return {
-      estimate: sketchObject.getEstimate(),
-      lower_bound: sketchObject.getLowerBound(num_std_devs),
-      upper_bound: sketchObject.getUpperBound(num_std_devs)
-    };
-  } finally {
-    if (sketchObject != null) sketchObject.delete();
-  }
+  const result = Module.hll_sketch.getEstimateAndBounds(sketch, 
Number(num_std_devs));
+  return {
+    estimate: result.get(0),
+    lower_bound: result.get(1),
+    upper_bound: result.get(2)
+  };
 } catch (e) {
+  if (e.message != null) throw e;
   throw new Error(Module.getExceptionMessage(e));
 }
 """;
diff --git a/hll/sqlx/hll_sketch_to_string.sqlx 
b/hll/sqlx/hll_sketch_to_string.sqlx
index dfc7c72..cd83b2a 100644
--- a/hll/sqlx/hll_sketch_to_string.sqlx
+++ b/hll/sqlx/hll_sketch_to_string.sqlx
@@ -24,6 +24,7 @@ RETURNS STRING
 LANGUAGE js
 OPTIONS (
   library=["gs://$GCS_BUCKET/hll_sketch.js"],
+  js_parameter_encoding_mode='STANDARD',
   description = '''Returns a summary string that represents the state of the 
given sketch.
 
 Param sketch: the given sketch as BYTES.
@@ -34,14 +35,9 @@ For more information:
 '''
 ) AS R"""
 try {
-  var sketchObject = null;
-  try {
-    sketchObject = Module.hll_sketch.deserializeFromB64(sketch);
-    return sketchObject.toString();
-  } finally {
-    if (sketchObject != null) sketchObject.delete();
-  }
+  return Module.hll_sketch.toString(sketch);
 } catch (e) {
+  if (e.message != null) throw e;
   throw new Error(Module.getExceptionMessage(e));
 }
 """;
diff --git a/hll/sqlx/hll_sketch_union_lgk_type.sqlx 
b/hll/sqlx/hll_sketch_union_lgk_type.sqlx
index ba8c25f..5760630 100644
--- a/hll/sqlx/hll_sketch_union_lgk_type.sqlx
+++ b/hll/sqlx/hll_sketch_union_lgk_type.sqlx
@@ -24,6 +24,7 @@ RETURNS BYTES
 LANGUAGE js
 OPTIONS (
   library=["gs://$GCS_BUCKET/hll_sketch.js"],
+  js_parameter_encoding_mode='STANDARD',
   description = '''Computes a sketch that represents the union of the two 
given sketches.
 Param sketchA: the first sketch as bytes.
 Param sketchB: the second sketch as bytes.
@@ -35,18 +36,11 @@ For more information:
  - https://datasketches.apache.org/docs/HLL/HllSketches.html
 '''
 ) AS R"""
-const default_lg_k = 12;
+const default_lg_k = Number(12);
 try {
-  var union = null;
-  try {
-    union = new Module.hll_union(lg_k ? lg_k : default_lg_k);
-    union.updateWithB64(sketchA)
-    union.updateWithB64(sketchB)
-    return union.getResultB64(tgt_type ? tgt_type : "");
-  } finally {
-    if (union != null) union.delete();
-  }
+  return Module.hllUnion(sketchA, sketchB, lg_k ? Number(lg_k) : default_lg_k, 
tgt_type ? tgt_type : "");
 } catch (e) {
+  if (e.message != null) throw e;
   throw new Error(Module.getExceptionMessage(e));
 }
 """;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to