This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 23ba9acb3 [SEDONA-547] Use scarf to collect telemetry data (#1373)
23ba9acb3 is described below
commit 23ba9acb31a131993fc7b5f908774ef7a7d8668b
Author: Jia Yu <[email protected]>
AuthorDate: Fri Apr 26 11:49:24 2024 -0700
[SEDONA-547] Use scarf to collect telemetry data (#1373)
* Add scarf
* Fix linter
* Temporarily remove CI on R release version
* Update the path
* Revert "Temporarily remove CI on R release version"
This reverts commit 108051e52a9fe4e6016955c1a1fc9c88b815d198.
---
.github/workflows/java.yml | 1 +
.github/workflows/python.yml | 1 +
.github/workflows/r.yml | 1 +
R/R/dependencies.R | 3 +-
.../sedona/common/utils/TelemetryCollector.java | 67 ++++++++++++++++++++++
.../sedona/common/telemetry/TelemetryTest.java | 28 ++++-----
docs/asf/telemetry.md | 3 +
.../org/apache/sedona/flink/SedonaContext.java | 2 +
mkdocs.yml | 1 +
python/sedona/spark/SedonaContext.py | 2 +-
.../org/apache/sedona/spark/SedonaContext.scala | 10 ++++
.../sedona/sql/utils/SedonaSQLRegistrator.scala | 13 ++++-
12 files changed, 110 insertions(+), 22 deletions(-)
diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml
index 5c87ac265..3d780c7ae 100644
--- a/.github/workflows/java.yml
+++ b/.github/workflows/java.yml
@@ -26,6 +26,7 @@ on:
env:
MAVEN_OPTS: -Dmaven.wagon.httpconnectionManager.ttlSeconds=60
+ DO_NOT_TRACK: true
permissions:
contents: read
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 0ebba2e70..9e5546677 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -25,6 +25,7 @@ env:
JAI_CORE_VERSION: "1.1.3"
JAI_CODEC_VERSION: "1.1.3"
JAI_IMAGEIO_VERSION: "1.1"
+ DO_NOT_TRACK: true
permissions:
contents: read
diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml
index 3ffdda98b..199e03077 100644
--- a/.github/workflows/r.yml
+++ b/.github/workflows/r.yml
@@ -22,6 +22,7 @@ on:
env:
MAVEN_OPTS: -Dmaven.wagon.httpconnectionManager.ttlSeconds=60
+ DO_NOT_TRACK: true
jobs:
build:
diff --git a/R/R/dependencies.R b/R/R/dependencies.R
index 68cf4e498..b0c2741d1 100644
--- a/R/R/dependencies.R
+++ b/R/R/dependencies.R
@@ -60,7 +60,8 @@ sedona_initialize_spark_connection <- function(sc) {
sc,
"org.apache.sedona.sql.utils.SedonaSQLRegistrator",
"registerAll",
- spark_session(sc)
+ spark_session(sc),
+ "r"
)
# Instantiate all enum objects and store them immutably under
diff --git
a/common/src/main/java/org/apache/sedona/common/utils/TelemetryCollector.java
b/common/src/main/java/org/apache/sedona/common/utils/TelemetryCollector.java
new file mode 100644
index 000000000..8b17fdedc
--- /dev/null
+++
b/common/src/main/java/org/apache/sedona/common/utils/TelemetryCollector.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.common.utils;
+
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.net.URLEncoder;
+
+public class TelemetryCollector {
+
+ private static final String BASE_URL =
"https://sedona.gateway.scarf.sh/packages/";
+
+ public static String send(String engineName, String language) {
+ HttpURLConnection conn = null;
+ String telemetrySubmitted = "";
+ try {
+ String arch =
URLEncoder.encode(System.getProperty("os.arch").replaceAll(" ", "_"), "UTF-8");
+ String os =
URLEncoder.encode(System.getProperty("os.name").replaceAll(" ", "_"), "UTF-8");
+ String jvm =
URLEncoder.encode(System.getProperty("java.version").replaceAll(" ", "_"),
"UTF-8");
+
+ // Construct URL
+ telemetrySubmitted = BASE_URL + language + "/" + engineName + "/"
+ arch + "/" + os + "/" + jvm;
+
+ // Check for user opt-out
+ if (System.getenv("SCARF_NO_ANALYTICS") != null &&
System.getenv("SCARF_NO_ANALYTICS").equals("true") ||
+ System.getenv("DO_NOT_TRACK") != null &&
System.getenv("DO_NOT_TRACK").equals("true") ||
+ System.getProperty("SCARF_NO_ANALYTICS") != null &&
System.getProperty("SCARF_NO_ANALYTICS").equals("true") ||
+ System.getProperty("DO_NOT_TRACK") != null &&
System.getProperty("DO_NOT_TRACK").equals("true")){
+ return telemetrySubmitted;
+ }
+
+ // Send GET request
+ URL url = new URL(telemetrySubmitted);
+ conn = (HttpURLConnection) url.openConnection();
+ conn.setRequestMethod("GET");
+ conn.connect();
+ int responseCode = conn.getResponseCode();
+ // Optionally check the response for successful execution
+ if (responseCode != 200) {
+ // Silent handling, no output or log
+ }
+ } catch (Exception e) {
+ // Silent catch block
+ } finally {
+ if (conn != null) {
+ conn.disconnect();
+ }
+ }
+ return telemetrySubmitted;
+ }
+}
diff --git
a/spark/common/src/main/scala/org/apache/sedona/sql/utils/SedonaSQLRegistrator.scala
b/common/src/test/java/org/apache/sedona/common/telemetry/TelemetryTest.java
similarity index 54%
copy from
spark/common/src/main/scala/org/apache/sedona/sql/utils/SedonaSQLRegistrator.scala
copy to
common/src/test/java/org/apache/sedona/common/telemetry/TelemetryTest.java
index 6673054e2..4d518d946 100644
---
a/spark/common/src/main/scala/org/apache/sedona/sql/utils/SedonaSQLRegistrator.scala
+++ b/common/src/test/java/org/apache/sedona/common/telemetry/TelemetryTest.java
@@ -16,24 +16,16 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.apache.sedona.sql.utils
+package org.apache.sedona.common.telemetry;
-import org.apache.sedona.spark.SedonaContext
-import org.apache.sedona.sql.UDF.UdfRegistrator
-import org.apache.spark.sql.{SQLContext, SparkSession}
+import org.apache.sedona.common.utils.TelemetryCollector;
+import org.junit.Test;
-@deprecated("Use SedonaContext instead", "1.4.1")
-object SedonaSQLRegistrator {
- @deprecated("Use SedonaContext.create instead", "1.4.1")
- def registerAll(sqlContext: SQLContext): Unit = {
- SedonaContext.create(sqlContext.sparkSession)
- }
-
- @deprecated("Use SedonaContext.create instead", "1.4.1")
- def registerAll(sparkSession: SparkSession): Unit =
- SedonaContext.create(sparkSession)
-
- def dropAll(sparkSession: SparkSession): Unit = {
- UdfRegistrator.dropAll(sparkSession)
- }
+public class TelemetryTest
+{
+ @Test
+ public void testTelemetryCollector()
+ {
+ assert TelemetryCollector.send("test",
"java").contains("https://sedona.gateway.scarf.sh/packages/java/test");
+ }
}
diff --git a/docs/asf/telemetry.md b/docs/asf/telemetry.md
new file mode 100644
index 000000000..ef1214734
--- /dev/null
+++ b/docs/asf/telemetry.md
@@ -0,0 +1,3 @@
+Apache Sedona uses Scarf to collect anonymous usage data to help us understand
how the software is being used and how we can improve it. You can opt out of
telemetry collection by setting the environment variable `SCARF_NO_ANALYTICS`
or `DO_NOT_TRACK` to `true` on your local machine, or the driver machine of
your cluster.
+
+Scarf fully supports the GDPR and is allowed by [the Apache Software
Foundation privacy policy](https://privacy.apache.org/faq/committers.html). The
privacy policy of Scarf is available at
[https://about.scarf.sh/privacy-policy](https://about.scarf.sh/privacy-policy).
diff --git a/flink/src/main/java/org/apache/sedona/flink/SedonaContext.java
b/flink/src/main/java/org/apache/sedona/flink/SedonaContext.java
index 01eda47f4..4d3511dc0 100644
--- a/flink/src/main/java/org/apache/sedona/flink/SedonaContext.java
+++ b/flink/src/main/java/org/apache/sedona/flink/SedonaContext.java
@@ -23,6 +23,7 @@ import
org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.sedona.common.geometryObjects.Circle;
import org.apache.sedona.common.geometrySerde.GeometrySerde;
import org.apache.sedona.common.geometrySerde.SpatialIndexSerde;
+import org.apache.sedona.common.utils.TelemetryCollector;
import org.locationtech.jts.geom.Envelope;
import org.locationtech.jts.geom.GeometryCollection;
import org.locationtech.jts.geom.LineString;
@@ -46,6 +47,7 @@ public class SedonaContext
*/
public static StreamTableEnvironment create(StreamExecutionEnvironment
env, StreamTableEnvironment tblEnv)
{
+ TelemetryCollector.send("flink", "java");
GeometrySerde serializer = new GeometrySerde();
SpatialIndexSerde indexSerializer = new SpatialIndexSerde(serializer);
env.getConfig().registerTypeWithKryoSerializer(Point.class,
serializer);
diff --git a/mkdocs.yml b/mkdocs.yml
index 2b3896843..369d31dc2 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -123,6 +123,7 @@ nav:
- Thanks: https://www.apache.org/foundation/thanks.html" target="_blank
- Security: https://www.apache.org/security/" target="_blank
- Privacy:
https://privacy.apache.org/policies/privacy-policy-public.html" target="_blank
+ - Telemetry: asf/telemetry.md
repo_url: https://github.com/apache/sedona
repo_name: apache/sedona
theme:
diff --git a/python/sedona/spark/SedonaContext.py
b/python/sedona/spark/SedonaContext.py
index 76be9576f..cda98a60f 100644
--- a/python/sedona/spark/SedonaContext.py
+++ b/python/sedona/spark/SedonaContext.py
@@ -35,7 +35,7 @@ class SedonaContext:
"""
spark.sql("SELECT 1 as geom").count()
PackageImporter.import_jvm_lib(spark._jvm)
- spark._jvm.SedonaContext.create(spark._jsparkSession)
+ spark._jvm.SedonaContext.create(spark._jsparkSession, "python")
return spark
@classmethod
diff --git
a/spark/common/src/main/scala/org/apache/sedona/spark/SedonaContext.scala
b/spark/common/src/main/scala/org/apache/sedona/spark/SedonaContext.scala
index d9339193e..6b262ed16 100644
--- a/spark/common/src/main/scala/org/apache/sedona/spark/SedonaContext.scala
+++ b/spark/common/src/main/scala/org/apache/sedona/spark/SedonaContext.scala
@@ -18,6 +18,7 @@
*/
package org.apache.sedona.spark
+import org.apache.sedona.common.utils.TelemetryCollector
import org.apache.sedona.core.serde.SedonaKryoRegistrator
import org.apache.sedona.sql.UDF.UdfRegistrator
import org.apache.sedona.sql.UDT.UdtRegistrator
@@ -26,8 +27,11 @@ import
org.apache.spark.sql.sedona_sql.optimization.SpatialFilterPushDownForGeoP
import org.apache.spark.sql.sedona_sql.strategy.join.JoinQueryDetector
import org.apache.spark.sql.{SQLContext, SparkSession}
+import scala.annotation.StaticAnnotation
import scala.util.Try
+class InternalApi(description: String = "This method is for internal use only
and may change without notice.") extends StaticAnnotation
+
object SedonaContext {
def create(sqlContext: SQLContext): SQLContext = {
create(sqlContext.sparkSession)
@@ -40,6 +44,12 @@ object SedonaContext {
* @return
*/
def create(sparkSession: SparkSession):SparkSession = {
+ create(sparkSession, "java")
+ }
+
+ @InternalApi
+ def create(sparkSession: SparkSession, language: String):SparkSession = {
+ TelemetryCollector.send("spark", language)
if
(!sparkSession.experimental.extraStrategies.exists(_.isInstanceOf[JoinQueryDetector]))
{
sparkSession.experimental.extraStrategies ++= Seq(new
JoinQueryDetector(sparkSession))
}
diff --git
a/spark/common/src/main/scala/org/apache/sedona/sql/utils/SedonaSQLRegistrator.scala
b/spark/common/src/main/scala/org/apache/sedona/sql/utils/SedonaSQLRegistrator.scala
index 6673054e2..91a712fed 100644
---
a/spark/common/src/main/scala/org/apache/sedona/sql/utils/SedonaSQLRegistrator.scala
+++
b/spark/common/src/main/scala/org/apache/sedona/sql/utils/SedonaSQLRegistrator.scala
@@ -26,12 +26,21 @@ import org.apache.spark.sql.{SQLContext, SparkSession}
object SedonaSQLRegistrator {
@deprecated("Use SedonaContext.create instead", "1.4.1")
def registerAll(sqlContext: SQLContext): Unit = {
- SedonaContext.create(sqlContext.sparkSession)
+ registerAll(sqlContext, "java")
}
@deprecated("Use SedonaContext.create instead", "1.4.1")
def registerAll(sparkSession: SparkSession): Unit =
- SedonaContext.create(sparkSession)
+ registerAll(sparkSession, "java")
+
+ @deprecated("Use SedonaContext.create instead", "1.4.1")
+ def registerAll(sqlContext: SQLContext, language: String): Unit = {
+ SedonaContext.create(sqlContext.sparkSession, language)
+ }
+
+ @deprecated("Use SedonaContext.create instead", "1.4.1")
+ def registerAll(sparkSession: SparkSession, language: String): Unit =
+ SedonaContext.create(sparkSession, language)
def dropAll(sparkSession: SparkSession): Unit = {
UdfRegistrator.dropAll(sparkSession)