This is an automated email from the ASF dual-hosted git repository. alsay pushed a commit to branch hll_no_base64 in repository https://gitbox.apache.org/repos/asf/datasketches-bigquery.git
commit 95a5621d43f302f8b6ad62da5abba5c415bc9858 Author: AlexanderSaydakov <[email protected]> AuthorDate: Wed Sep 25 15:40:36 2024 -0700 removed base64 encoding-decoding from HLL --- hll/hll_sketch.cpp | 54 +++++++++--------------- hll/sqlx/hll_sketch_get_estimate.sqlx | 10 ++--- hll/sqlx/hll_sketch_get_estimate_and_bounds.sqlx | 19 ++++----- hll/sqlx/hll_sketch_to_string.sqlx | 10 ++--- hll/sqlx/hll_sketch_union_lgk_type.sqlx | 14 ++---- 5 files changed, 37 insertions(+), 70 deletions(-) diff --git a/hll/hll_sketch.cpp b/hll/hll_sketch.cpp index 9e4c800..35b7f92 100644 --- a/hll/hll_sketch.cpp +++ b/hll/hll_sketch.cpp @@ -22,8 +22,6 @@ #include <hll.hpp> -#include "../base64.hpp" - datasketches::target_hll_type convert_tgt_type(const std::string& tgt_type_str) { if (tgt_type_str == "" || tgt_type_str == "HLL_4") return datasketches::HLL_4; if (tgt_type_str == "HLL_6") return datasketches::HLL_6; @@ -34,6 +32,7 @@ datasketches::target_hll_type convert_tgt_type(const std::string& tgt_type_str) const emscripten::val Uint8Array = emscripten::val::global("Uint8Array"); EMSCRIPTEN_BINDINGS(hll_sketch) { + emscripten::register_vector<double>("VectorDouble"); emscripten::function("getExceptionMessage", emscripten::optional_override([](intptr_t ptr) { return std::string(reinterpret_cast<std::exception*>(ptr)->what()); @@ -48,21 +47,16 @@ EMSCRIPTEN_BINDINGS(hll_sketch) { auto bytes = self.serialize_compact(); return Uint8Array.new_(emscripten::typed_memory_view(bytes.size(), bytes.data())); })) - .class_function("deserializeFromB64", emscripten::optional_override([](const std::string& b64) { - std::vector<char> bytes(b64_dec_len(b64.data(), b64.size())); - b64_decode(b64.data(), b64.size(), bytes.data()); - return new datasketches::hll_sketch(datasketches::hll_sketch::deserialize(bytes.data(), bytes.size())); - }), emscripten::allow_raw_pointers()) - .class_function("deserializeFromBytes", emscripten::optional_override([](const std::string& bytes) { - return new datasketches::hll_sketch(datasketches::hll_sketch::deserialize(bytes.data(), bytes.size())); - }), emscripten::allow_raw_pointers()) - .function("getEstimate", &datasketches::hll_sketch::get_estimate) - .function("getLowerBound", &datasketches::hll_sketch::get_lower_bound) - .function("getUpperBound", &datasketches::hll_sketch::get_upper_bound) - .function("toString", emscripten::optional_override([](const datasketches::hll_sketch& self) { - return self.to_string(); + .class_function("getEstimate", emscripten::optional_override([](const std::string& bytes) { + return datasketches::hll_sketch::deserialize(bytes.data(), bytes.size()).get_estimate(); + })) + .class_function("getEstimateAndBounds", emscripten::optional_override([](const std::string& bytes, uint8_t num_std_devs) { + const auto sketch = datasketches::hll_sketch::deserialize(bytes.data(), bytes.size()); + return std::vector<double>{sketch.get_estimate(), sketch.get_lower_bound(num_std_devs), sketch.get_upper_bound(num_std_devs)}; + })) + .class_function("toString", emscripten::optional_override([](const std::string& bytes) { + return datasketches::hll_sketch::deserialize(bytes.data(), bytes.size()).to_string(); })) - .class_function("getMaxSerializedSizeBytes", &datasketches::hll_sketch::get_max_updatable_serialization_bytes) ; emscripten::class_<datasketches::hll_union>("hll_union") @@ -75,29 +69,19 @@ EMSCRIPTEN_BINDINGS(hll_sketch) { .function("updateWithBytes", emscripten::optional_override([](datasketches::hll_union& self, const std::string& bytes) { self.update(datasketches::hll_sketch::deserialize(bytes.data(), bytes.size())); }), emscripten::allow_raw_pointers()) - .function("updateWithB64", emscripten::optional_override([](datasketches::hll_union& self, const std::string& b64) { - std::vector<char> bytes(b64_dec_len(b64.data(), b64.size())); - b64_decode(b64.data(), b64.size(), bytes.data()); - self.update(datasketches::hll_sketch::deserialize(bytes.data(), bytes.size())); - }), emscripten::allow_raw_pointers()) - .function("updateWithBuffer", emscripten::optional_override([](datasketches::hll_union& self, intptr_t bytes, size_t size) { - self.update(datasketches::hll_sketch::deserialize(reinterpret_cast<void*>(bytes), size)); - })) -// .function("getResultStream", emscripten::optional_override([](datasketches::hll_union& self, intptr_t bytes, size_t size, datasketches::target_hll_type tgt_type) { -// std::strstream stream(reinterpret_cast<char*>(bytes), size); -// self.get_result(tgt_type).serialize_compact(stream); -// return (int) stream.tellp(); -// })) .function("getResultAsUint8Array", emscripten::optional_override([](datasketches::hll_union& self, const std::string& tgt_type_str) { auto bytes = self.get_result(convert_tgt_type(tgt_type_str)).serialize_compact(); return Uint8Array.new_(emscripten::typed_memory_view(bytes.size(), bytes.data())); })) - .function("getResultB64", emscripten::optional_override([](datasketches::hll_union& self, const std::string& tgt_type_str) { - auto bytes = self.get_result(convert_tgt_type(tgt_type_str)).serialize_compact(); - std::vector<char> b64(b64_enc_len(bytes.size())); - b64_encode((const char*) bytes.data(), bytes.size(), b64.data()); - return std::string(b64.data(), b64.size()); - })) ; + emscripten::function("hllUnion", emscripten::optional_override([]( + const std::string& bytes1, const std::string& bytes2, uint8_t lg_k, const std::string& tgt_type_str + ) { + datasketches::hll_union u(lg_k); + u.update(datasketches::hll_sketch::deserialize(bytes1.data(), bytes1.size())); + u.update(datasketches::hll_sketch::deserialize(bytes2.data(), bytes2.size())); + const auto bytes = u.get_result(convert_tgt_type(tgt_type_str)).serialize_compact(); + return Uint8Array.new_(emscripten::typed_memory_view(bytes.size(), bytes.data())); + })); } diff --git a/hll/sqlx/hll_sketch_get_estimate.sqlx b/hll/sqlx/hll_sketch_get_estimate.sqlx index 2900485..6605806 100644 --- a/hll/sqlx/hll_sketch_get_estimate.sqlx +++ b/hll/sqlx/hll_sketch_get_estimate.sqlx @@ -24,6 +24,7 @@ RETURNS FLOAT64 LANGUAGE js OPTIONS ( library=["gs://$GCS_BUCKET/hll_sketch.js"], + js_parameter_encoding_mode='STANDARD', description = '''Returns a summary string that represents the state of the given sketch. Param sketch: the given sketch as BYTES. @@ -34,14 +35,9 @@ For more information: ''' ) AS R""" try { - var sketchObject = null; - try { - sketchObject = Module.hll_sketch.deserializeFromB64(sketch); - return sketchObject.getEstimate(); - } finally { - if (sketchObject != null) sketchObject.delete(); - } + return Module.hll_sketch.getEstimate(sketch); } catch (e) { + if (e.message != null) throw e; throw new Error(Module.getExceptionMessage(e)); } """; diff --git a/hll/sqlx/hll_sketch_get_estimate_and_bounds.sqlx b/hll/sqlx/hll_sketch_get_estimate_and_bounds.sqlx index 4ba7e3a..cb69563 100644 --- a/hll/sqlx/hll_sketch_get_estimate_and_bounds.sqlx +++ b/hll/sqlx/hll_sketch_get_estimate_and_bounds.sqlx @@ -24,6 +24,7 @@ RETURNS STRUCT<estimate FLOAT64, lower_bound FLOAT64, upper_bound FLOAT64> LANGUAGE js OPTIONS ( library=["gs://$GCS_BUCKET/hll_sketch.js"], + js_parameter_encoding_mode='STANDARD', description = '''Gets cardinality estimate and bounds from given sketch. Param sketch: The given sketch to query as BYTES. @@ -37,18 +38,14 @@ For more information: ''' ) AS R""" try { - var sketchObject = null; - try { - sketchObject = Module.hll_sketch.deserializeFromB64(sketch); - return { - estimate: sketchObject.getEstimate(), - lower_bound: sketchObject.getLowerBound(num_std_devs), - upper_bound: sketchObject.getUpperBound(num_std_devs) - }; - } finally { - if (sketchObject != null) sketchObject.delete(); - } + const result = Module.hll_sketch.getEstimateAndBounds(sketch, Number(num_std_devs)); + return { + estimate: result.get(0), + lower_bound: result.get(1), + upper_bound: result.get(2) + }; } catch (e) { + if (e.message != null) throw e; throw new Error(Module.getExceptionMessage(e)); } """; diff --git a/hll/sqlx/hll_sketch_to_string.sqlx b/hll/sqlx/hll_sketch_to_string.sqlx index dfc7c72..cd83b2a 100644 --- a/hll/sqlx/hll_sketch_to_string.sqlx +++ b/hll/sqlx/hll_sketch_to_string.sqlx @@ -24,6 +24,7 @@ RETURNS STRING LANGUAGE js OPTIONS ( library=["gs://$GCS_BUCKET/hll_sketch.js"], + js_parameter_encoding_mode='STANDARD', description = '''Returns a summary string that represents the state of the given sketch. Param sketch: the given sketch as BYTES. @@ -34,14 +35,9 @@ For more information: ''' ) AS R""" try { - var sketchObject = null; - try { - sketchObject = Module.hll_sketch.deserializeFromB64(sketch); - return sketchObject.toString(); - } finally { - if (sketchObject != null) sketchObject.delete(); - } + return Module.hll_sketch.toString(sketch); } catch (e) { + if (e.message != null) throw e; throw new Error(Module.getExceptionMessage(e)); } """; diff --git a/hll/sqlx/hll_sketch_union_lgk_type.sqlx b/hll/sqlx/hll_sketch_union_lgk_type.sqlx index ba8c25f..5760630 100644 --- a/hll/sqlx/hll_sketch_union_lgk_type.sqlx +++ b/hll/sqlx/hll_sketch_union_lgk_type.sqlx @@ -24,6 +24,7 @@ RETURNS BYTES LANGUAGE js OPTIONS ( library=["gs://$GCS_BUCKET/hll_sketch.js"], + js_parameter_encoding_mode='STANDARD', description = '''Computes a sketch that represents the union of the two given sketches. Param sketchA: the first sketch as bytes. Param sketchB: the second sketch as bytes. @@ -35,18 +36,11 @@ For more information: - https://datasketches.apache.org/docs/HLL/HllSketches.html ''' ) AS R""" -const default_lg_k = 12; +const default_lg_k = Number(12); try { - var union = null; - try { - union = new Module.hll_union(lg_k ? lg_k : default_lg_k); - union.updateWithB64(sketchA) - union.updateWithB64(sketchB) - return union.getResultB64(tgt_type ? tgt_type : ""); - } finally { - if (union != null) union.delete(); - } + return Module.hllUnion(sketchA, sketchB, lg_k ? Number(lg_k) : default_lg_k, tgt_type ? tgt_type : ""); } catch (e) { + if (e.message != null) throw e; throw new Error(Module.getExceptionMessage(e)); } """; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
