This is an automated email from the ASF dual-hosted git repository. jiayu pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push: new 53260de chore: add Scarf package to track usage (#35) 53260de is described below commit 53260de4e9536158a6aead6c2b63d405f5cf1f30 Author: Jia Yu <ji...@wherobots.com> AuthorDate: Sat Sep 6 22:57:35 2025 -0700 chore: add Scarf package to track usage (#35) --- python/sedonadb/python/sedonadb/_scarf.py | 58 +++++++++++++++++++++++++++ python/sedonadb/python/sedonadb/adbc.py | 4 ++ python/sedonadb/python/sedonadb/context.py | 4 ++ python/sedonadb/python/sedonadb/dbapi.py | 5 +++ rust/sedona/src/context.rs | 6 +++ rust/sedona/src/lib.rs | 1 + rust/sedona/src/scarf.rs | 63 ++++++++++++++++++++++++++++++ sedona-cli/src/main.rs | 3 ++ 8 files changed, 144 insertions(+) diff --git a/python/sedonadb/python/sedonadb/_scarf.py b/python/sedonadb/python/sedonadb/_scarf.py new file mode 100644 index 0000000..024945c --- /dev/null +++ b/python/sedonadb/python/sedonadb/_scarf.py @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Scarf analytics utility functions.""" + +import os +import platform +import threading +import urllib.request + + +def make_scarf_call(language: str) -> None: + """Make a call to Scarf for usage analytics. + + Args: + language: The language identifier (e.g., 'python', 'adbc', 'dbapi') + """ + + def _scarf_request(): + try: + # Check for user opt-out + if ( + os.environ.get("SCARF_NO_ANALYTICS") is not None + or os.environ.get("DO_NOT_TRACK") is not None + ): + return + + # Detect architecture and OS + arch = platform.machine().lower().replace(" ", "_") + os_name = platform.system().lower().replace(" ", "_") + + # Construct Scarf URL + scarf_url = ( + f"https://sedona.gateway.scarf.sh/sedona-db/{arch}/{os_name}/{language}" + ) + + # Make the request in a non-blocking way + urllib.request.urlopen(scarf_url, timeout=1) + except Exception: + # Silently ignore any errors - we don't want Scarf calls to break user code + pass + + # Run in a separate thread to avoid blocking + thread = threading.Thread(target=_scarf_request, daemon=True) + thread.start() diff --git a/python/sedonadb/python/sedonadb/adbc.py b/python/sedonadb/python/sedonadb/adbc.py index 0773c17..25176a5 100644 --- a/python/sedonadb/python/sedonadb/adbc.py +++ b/python/sedonadb/python/sedonadb/adbc.py @@ -21,6 +21,10 @@ from sedonadb import _lib def connect() -> adbc_driver_manager.AdbcDatabase: """Create a low level ADBC connection to Sedona.""" + # Make Scarf call for usage analytics + from ._scarf import make_scarf_call + + make_scarf_call("adbc") return adbc_driver_manager.AdbcDatabase( driver=_lib.__file__, entrypoint="AdbcSedonadbDriverInit" ) diff --git a/python/sedonadb/python/sedonadb/context.py b/python/sedonadb/python/sedonadb/context.py index 6531b86..b981057 100644 --- a/python/sedonadb/python/sedonadb/context.py +++ b/python/sedonadb/python/sedonadb/context.py @@ -149,6 +149,10 @@ class SedonaContext: def connect() -> SedonaContext: """Create a new [SedonaContext][sedonadb.context.SedonaContext]""" + # Make Scarf call for usage analytics + from ._scarf import make_scarf_call + + make_scarf_call("python") return SedonaContext() diff --git a/python/sedonadb/python/sedonadb/dbapi.py b/python/sedonadb/python/sedonadb/dbapi.py index cfa8a43..bd0489d 100644 --- a/python/sedonadb/python/sedonadb/dbapi.py +++ b/python/sedonadb/python/sedonadb/dbapi.py @@ -21,6 +21,11 @@ import sedonadb.adbc def connect(**kwargs) -> "Connection": """Connect to Sedona via ADBC.""" + # Make Scarf call for usage analytics + from ._scarf import make_scarf_call + + make_scarf_call("dbapi") + db = None conn = None diff --git a/rust/sedona/src/context.rs b/rust/sedona/src/context.rs index 68b87bb..4096b04 100644 --- a/rust/sedona/src/context.rs +++ b/rust/sedona/src/context.rs @@ -67,6 +67,9 @@ pub struct SedonaContext { impl SedonaContext { /// Creates a new context with default options pub fn new() -> Self { + // Make Scarf call for usage analytics + crate::scarf::make_scarf_call("rust"); + // This will panic only if the default build settings are // incorrect which we test! Self::new_from_context(SessionContext::new()).unwrap() @@ -77,6 +80,9 @@ impl SedonaContext { /// Initializes a context from the current environment and registers access /// to the local file system. pub async fn new_local_interactive() -> Result<Self> { + // Make Scarf call for usage analytics + crate::scarf::make_scarf_call("rust"); + // These three objects enable configuring various elements of the runtime. // Eventually we probably want to have a common set of configuration parameters // exposed via the CLI/Python as arguments, via ADBC as connection options, diff --git a/rust/sedona/src/lib.rs b/rust/sedona/src/lib.rs index 52b5438..c31de9c 100644 --- a/rust/sedona/src/lib.rs +++ b/rust/sedona/src/lib.rs @@ -22,4 +22,5 @@ mod object_storage; pub mod random_geometry_provider; pub mod reader; pub mod record_batch_reader_provider; +pub mod scarf; pub mod show; diff --git a/rust/sedona/src/scarf.rs b/rust/sedona/src/scarf.rs new file mode 100644 index 0000000..201ce2f --- /dev/null +++ b/rust/sedona/src/scarf.rs @@ -0,0 +1,63 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +/// Make a call to Scarf for usage analytics. +/// +/// # Arguments +/// +/// * `language` - The language identifier (e.g., "rust", "cli") +pub fn make_scarf_call(language: &str) { + let language = language.to_string(); + std::thread::spawn(move || { + let _ = scarf_request(&language); + }); +} + +fn scarf_request(language: &str) -> Result<(), Box<dyn std::error::Error + Send + Sync>> { + // Check for user opt-out + if std::env::var("SCARF_NO_ANALYTICS").is_ok() || std::env::var("DO_NOT_TRACK").is_ok() { + return Ok(()); + } + + // Detect architecture and OS + let arch = std::env::consts::ARCH.to_lowercase().replace(' ', "_"); + let os = std::env::consts::OS.to_lowercase().replace(' ', "_"); + + // Construct Scarf URL + let scarf_url = format!("https://sedona.gateway.scarf.sh/sedona-db/{arch}/{os}/{language}"); + + // Make the request using std::net::TcpStream for a simple HTTP GET + if let Ok(url) = url::Url::parse(&scarf_url) { + if let Some(host) = url.host_str() { + let port = url.port().unwrap_or(443); + let path = url.path(); + + // Try to make a simple HTTP request + if let Ok(addr) = format!("{host}:{port}").parse::<std::net::SocketAddr>() { + if let Ok(mut stream) = + std::net::TcpStream::connect_timeout(&addr, std::time::Duration::from_secs(1)) + { + let request = + format!("GET {path} HTTP/1.1\r\nHost: {host}\r\nConnection: close\r\n\r\n"); + let _ = std::io::Write::write_all(&mut stream, request.as_bytes()); + } + } + } + } + + Ok(()) +} diff --git a/sedona-cli/src/main.rs b/sedona-cli/src/main.rs index bcfd477..0cd11aa 100644 --- a/sedona-cli/src/main.rs +++ b/sedona-cli/src/main.rs @@ -128,6 +128,9 @@ async fn main_inner() -> Result<()> { println!("Sedona CLI v{DATAFUSION_CLI_VERSION}"); } + // Make Scarf call for usage analytics + sedona::scarf::make_scarf_call("cli"); + if let Some(ref path) = args.data_path { let p = Path::new(path); env::set_current_dir(p).unwrap();