This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch kll
in repository https://gitbox.apache.org/repos/asf/datasketches-bigquery.git

commit f7ea68336c035ee953765e8a0f196ec777d86afc
Author: AlexanderSaydakov <[email protected]>
AuthorDate: Tue Aug 13 15:23:24 2024 -0700

    KLL sketch
---
 Makefile                                   |   5 +-
 kll_sketch.cpp                             |  65 +++++++++++++++++++
 kll_sketch_float_build.sqlx                | 101 +++++++++++++++++++++++++++++
 kll_sketch_float_get_quantile.sqlx         |  41 ++++++++++++
 kll_sketch_float_get_rank.sqlx             |  41 ++++++++++++
 kll_sketch_float_merge.sqlx                | 101 +++++++++++++++++++++++++++++
 Makefile => kll_sketch_float_to_tring.sqlx |  49 ++++++--------
 7 files changed, 374 insertions(+), 29 deletions(-)

diff --git a/Makefile b/Makefile
index bd6f65f..991417c 100644
--- a/Makefile
+++ b/Makefile
@@ -19,6 +19,7 @@ EMCC=emcc
 EMCFLAGS=-Idatasketches-cpp/common/include \
        -Idatasketches-cpp/theta/include \
        -Idatasketches-cpp/cpc/include \
+       -Idatasketches-cpp/kll/include \
        --no-entry \
        -sWASM_BIGINT=1 \
        -sEXPORTED_FUNCTIONS=[_malloc,_free] \
@@ -27,7 +28,9 @@ EMCFLAGS=-Idatasketches-cpp/common/include \
        -O3 \
        --bind
 
-all: theta_sketch.mjs theta_sketch.js theta_sketch.wasm cpc_sketch.mjs 
cpc_sketch.js cpc_sketch.wasm
+all: theta_sketch.mjs theta_sketch.js theta_sketch.wasm \
+     cpc_sketch.mjs cpc_sketch.js cpc_sketch.wasm \
+     kll_sketch.mjs kll_sketch.js kll_sketch.wasm
 
 %.mjs: %.cpp
        $(EMCC) $< $(EMCFLAGS) -sSINGLE_FILE=1 -o $@
diff --git a/kll_sketch.cpp b/kll_sketch.cpp
new file mode 100644
index 0000000..6bb74e0
--- /dev/null
+++ b/kll_sketch.cpp
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <strstream>
+#include <emscripten/bind.h>
+
+#include <kll_sketch.hpp>
+
+#include "base64.hpp"
+
+using kll_sketch_float = datasketches::kll_sketch<float>;
+
+const emscripten::val Uint8Array = emscripten::val::global("Uint8Array");
+
+EMSCRIPTEN_BINDINGS(kll_sketch_float) {
+  emscripten::function("getExceptionMessage", 
emscripten::optional_override([](intptr_t ptr) {
+    return std::string(reinterpret_cast<std::exception*>(ptr)->what());
+  }));
+
+  emscripten::class_<kll_sketch_float>("kll_sketch_float")
+    .constructor(emscripten::optional_override([](uint16_t k) {
+      return new kll_sketch_float(k);
+    }))
+    .function("isEmpty", &kll_sketch_float::is_empty)
+    .function("update", emscripten::optional_override([](kll_sketch_float& 
self, float value) {
+      self.update(value);
+    }))
+    .function("mergeBytes", emscripten::optional_override([](kll_sketch_float& 
self, const std::string& bytes) {
+      self.merge(kll_sketch_float::deserialize(bytes.data(), bytes.size()));
+    }), emscripten::allow_raw_pointers())
+    .function("mergeBuffer", 
emscripten::optional_override([](kll_sketch_float& self, intptr_t bytes, size_t 
size) {
+      self.merge(kll_sketch_float::deserialize(reinterpret_cast<void*>(bytes), 
size));
+    }))
+    .function("serializeAsUint8Array", emscripten::optional_override([](const 
kll_sketch_float& self) {
+      auto bytes = self.serialize();
+      return Uint8Array.new_(emscripten::typed_memory_view(bytes.size(), 
bytes.data()));
+    }))
+    .class_function("deserializeFromB64", 
emscripten::optional_override([](const std::string& b64) {
+      std::vector<char> bytes(b64_dec_len(b64.data(), b64.size()));
+      b64_decode(b64.data(), b64.size(), bytes.data());
+      return new kll_sketch_float(kll_sketch_float::deserialize(bytes.data(), 
bytes.size()));
+    }), emscripten::allow_raw_pointers())
+    .function("getRank", &kll_sketch_float::get_rank)
+    .function("getQuantile", &kll_sketch_float::get_quantile)
+    .function("toString", emscripten::optional_override([](const 
kll_sketch_float& self) {
+      return std::string(self.to_string());
+    }))
+    ;
+}
diff --git a/kll_sketch_float_build.sqlx b/kll_sketch_float_build.sqlx
new file mode 100644
index 0000000..45acde9
--- /dev/null
+++ b/kll_sketch_float_build.sqlx
@@ -0,0 +1,101 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+CREATE OR REPLACE AGGREGATE FUNCTION 
`$BQ_PROJECT.$BQ_DATASET`.kll_sketch_float_build(value FLOAT64, k INT NOT 
AGGREGATE)
+RETURNS BYTES
+LANGUAGE js
+OPTIONS (
+  library=["gs://$GCS_BUCKET/kll_sketch.mjs"],
+  description = '''Creates a sketch that represents the distribution of the 
given column.
+Param value: the column of values.
+Param k: the sketch accuracy/size parameter as an integer in the range [8, 
65535].
+Returns a KLL Sketch, as bytes.
+For more details: https://datasketches.apache.org/docs/KLL/KLLSketch.html'''
+) AS R"""
+import ModuleFactory from "gs://$GCS_BUCKET/kll_sketch.mjs";
+var Module = await ModuleFactory();
+const default_k = Number(200);
+
+// UDAF interface
+export function initialState(k) {
+  try {
+    var state = {
+      k: k == null ? default_k : Number(k),
+    };
+    state.sketch = new Module.kll_sketch_float(state.k);
+    return state;
+  } catch (e) {
+    throw new Error(Module.getExceptionMessage(e));
+  }
+}
+
+export function aggregate(state, value) {
+  try {
+    if (state.sketch == null) { // for transition deserialize-aggregate
+      state.sketch = new Module.kll_sketch_float(state.k);
+    }
+    state.sketch.update(value);
+  } catch (e) {
+    throw new Error(Module.getExceptionMessage(e));
+  }
+}
+
+export function serialize(state) {
+  if (state.sketch == null) return state; // for transition 
deserialize-serialize
+  try {
+    // for prior transition deserialize-aggregate
+    // merge aggregated and serialized state
+    if (state.sketch != null && state.serialized != null) {
+      sketch.mergeBytes(state.serialized);
+    }
+    return {
+      k: state.k,
+      serialized: state.sketch.serializeAsUint8Array()
+    };
+  } catch (e) {
+    throw new Error(Module.getExceptionMessage(e));
+  } finally {
+    state.sketch.delete();
+  }
+}
+
+export function deserialize(serialized) {
+  return serialized;
+}
+
+export function merge(state, other_state) {
+  try {
+    if (state.sketch == null) {
+      state.sketch = new Module.kll_sketch_float(state.k);
+    }
+    if (state.serialized != null) {
+      state.sketch.mergeBytes(state.serialized);
+      state.serialized = null;
+    }
+    if (other_state.serialized != null) {
+      state.sketch.mergeBytes(other_state.serialized);
+      other_state.serialized = null;
+    }
+  } catch (e) {
+    throw new Error(Module.getExceptionMessage(e));
+  }
+}
+
+export function finalize(state) {
+  return serialize(state).serialized;
+}
+""";
diff --git a/kll_sketch_float_get_quantile.sqlx 
b/kll_sketch_float_get_quantile.sqlx
new file mode 100644
index 0000000..04c0a2c
--- /dev/null
+++ b/kll_sketch_float_get_quantile.sqlx
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+CREATE OR REPLACE FUNCTION 
`$BQ_PROJECT.$BQ_DATASET`.kll_sketch_float_get_quantile(sketch BYTES, rank 
FLOAT64, inclusive BOOL)
+RETURNS FLOAT64
+LANGUAGE js
+OPTIONS (
+  library=["gs://$GCS_BUCKET/kll_sketch.js"],
+  description = '''Returns a value from the sketch that is the best 
approximation to a value from the original stream with the given rank.
+Param sketch: the given sketch in serialized form.
+Param rank: rank of a value in the hypothetical sorted stream.
+Param inclusive: if true, the given rank is considered inclusive (includes 
weight of a value)
+Returns an approximate quantile associated with the given rank.
+For more details: https://datasketches.apache.org/docs/KLL/KLLSketch.html'''
+) AS R"""
+try {
+  var sketch = Module.kll_sketch_float.deserializeFromB64(sketch);
+  try {
+    if (sketch.isEmpty()) return null;
+    return sketch.getRank(rank, inclusive);
+  } finally {
+    sketch.delete();
+  }
+} catch (e) {
+  throw new Error(Module.getExceptionMessage(e));
+}
+""";
diff --git a/kll_sketch_float_get_rank.sqlx b/kll_sketch_float_get_rank.sqlx
new file mode 100644
index 0000000..796717e
--- /dev/null
+++ b/kll_sketch_float_get_rank.sqlx
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+CREATE OR REPLACE FUNCTION 
`$BQ_PROJECT.$BQ_DATASET`.kll_sketch_float_get_rank(sketch BYTES, value 
FLOAT64, inclusive BOOL)
+RETURNS FLOAT64
+LANGUAGE js
+OPTIONS (
+  library=["gs://$GCS_BUCKET/kll_sketch.js"],
+  description = '''Returns an approximation to the normalized rank of the 
given value from 0 to 1, inclusive.
+Param sketch: the given sketch in serialized form.
+Param value: value to be ranked.
+Param inclusive: if true the weight of the given value is included into the 
rank.
+Returns an approximate rank of the given value.
+For more details: https://datasketches.apache.org/docs/KLL/KLLSketch.html'''
+) AS R"""
+try {
+  var sketch = Module.kll_sketch_float.deserializeFromB64(sketch);
+  try {
+    if (sketch.isEmpty()) return null;
+    return sketch.getQuantile(value, inclusive);
+  } finally {
+    sketch.delete();
+  }
+} catch (e) {
+  throw new Error(Module.getExceptionMessage(e));
+}
+""";
diff --git a/kll_sketch_float_merge.sqlx b/kll_sketch_float_merge.sqlx
new file mode 100644
index 0000000..78f4da7
--- /dev/null
+++ b/kll_sketch_float_merge.sqlx
@@ -0,0 +1,101 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+CREATE OR REPLACE AGGREGATE FUNCTION 
`$BQ_PROJECT.$BQ_DATASET`.kll_sketch_float_merge(sketch BYTES, k INT NOT 
AGGREGATE)
+RETURNS BYTES
+LANGUAGE js
+OPTIONS (
+  library=["gs://$GCS_BUCKET/kll_sketch.mjs"],
+  description = '''Merges sketches from the given column.
+Param sketch: the column of values.
+Param k: the sketch accuracy/size parameter as an integer in the range [8, 
65535].
+Returns a serialized KLL sketch as bytes.
+For more details: https://datasketches.apache.org/docs/KLL/KLLSketch.html'''
+) AS R"""
+import ModuleFactory from "gs://$GCS_BUCKET/kll_sketch.mjs";
+var Module = await ModuleFactory();
+const default_k = Number(200);
+
+// UDAF interface
+export function initialState(k) {
+  try {
+    var state = {
+      k: k == null ? default_k : Number(k),
+    };
+    state.sketch = new Module.kll_sketch_float(state.k);
+    return state;
+  } catch (e) {
+    throw new Error(Module.getExceptionMessage(e));
+  }
+}
+
+export function aggregate(state, sketch) {
+  try {
+    if (state.sketch == null) { // for transition deserialize-aggregate
+      state.sketch = new Module.kll_sketch_float(state.k);
+    }
+    state.sketch.mergeBytes(sketch);
+  } catch (e) {
+    throw new Error(Module.getExceptionMessage(e));
+  }
+}
+
+export function serialize(state) {
+  if (state.sketch == null) return state; // for transition 
deserialize-serialize
+  try {
+    // for prior transition deserialize-aggregate
+    // merge aggregated and serialized state
+    if (state.sketch != null && state.serialized != null) {
+      sketch.mergeBytes(state.serialized);
+    }
+    return {
+      k: state.k,
+      serialized: state.sketch.serializeAsUint8Array()
+    };
+  } catch (e) {
+    throw new Error(Module.getExceptionMessage(e));
+  } finally {
+    state.sketch.delete();
+  }
+}
+
+export function deserialize(serialized) {
+  return serialized;
+}
+
+export function merge(state, other_state) {
+  try {
+    if (state.sketch == null) {
+      state.sketch = new Module.kll_sketch_float(state.k);
+    }
+    if (state.serialized != null) {
+      state.sketch.mergeBytes(state.serialized);
+      state.serialized = null;
+    }
+    if (other_state.serialized != null) {
+      state.sketch.mergeBytes(other_state.serialized);
+      other_state.serialized = null;
+    }
+  } catch (e) {
+    throw new Error(Module.getExceptionMessage(e));
+  }
+}
+
+export function finalize(state) {
+  return serialize(state).serialized;
+}
+""";
diff --git a/Makefile b/kll_sketch_float_to_tring.sqlx
similarity index 53%
copy from Makefile
copy to kll_sketch_float_to_tring.sqlx
index bd6f65f..d3c3384 100644
--- a/Makefile
+++ b/kll_sketch_float_to_tring.sqlx
@@ -15,31 +15,24 @@
 # specific language governing permissions and limitations
 # under the License.
 
-EMCC=emcc
-EMCFLAGS=-Idatasketches-cpp/common/include \
-       -Idatasketches-cpp/theta/include \
-       -Idatasketches-cpp/cpc/include \
-       --no-entry \
-       -sWASM_BIGINT=1 \
-       -sEXPORTED_FUNCTIONS=[_malloc,_free] \
-       -sENVIRONMENT=shell \
-       -sTOTAL_MEMORY=1024MB \
-       -O3 \
-       --bind
-
-all: theta_sketch.mjs theta_sketch.js theta_sketch.wasm cpc_sketch.mjs 
cpc_sketch.js cpc_sketch.wasm
-
-%.mjs: %.cpp
-       $(EMCC) $< $(EMCFLAGS) -sSINGLE_FILE=1 -o $@
-
-# this rule creates a non-es6 loadable library
-%.js: %.cpp
-       $(EMCC) $< $(EMCFLAGS) -sSINGLE_FILE=1 -o $@
-
-%.wasm: %.cpp
-       $(EMCC) $< $(EMCFLAGS) -sSTANDALONE_WASM=1 -o $@
-
-clean:
-       $(RM) *.mjs *.js *.wasm
-
-.PHONY: clean
+CREATE OR REPLACE FUNCTION 
`$BQ_PROJECT.$BQ_DATASET`.kll_sketch_float_to_string(base64 BYTES)
+RETURNS STRING
+LANGUAGE js
+OPTIONS (
+  library=["gs://$GCS_BUCKET/kll_sketch.js"],
+  description = '''Returns a summary string that represents the state of the 
given sketch.
+Param base64 the given sketch as base64 encoded bytes.
+Returns a string that represents the state of the given sketch.
+For more details: https://datasketches.apache.org/docs/KLL/KLLSketch.html'''
+) AS R"""
+try {
+  var sketch = Module.kll_sketch_float.deserializeFromB64(base64);
+  try {
+    return sketch.toString();
+  } finally {
+    sketch.delete();
+  }
+} catch (e) {
+  throw new Error(Module.getExceptionMessage(e));
+}
+""";


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to