This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new 23ba9acb3 [SEDONA-547] Use scarf to collect telemetry data (#1373)
23ba9acb3 is described below

commit 23ba9acb31a131993fc7b5f908774ef7a7d8668b
Author: Jia Yu <[email protected]>
AuthorDate: Fri Apr 26 11:49:24 2024 -0700

    [SEDONA-547] Use scarf to collect telemetry data (#1373)
    
    * Add scarf
    
    * Fix linter
    
    * Temporarily remove CI on R release version
    
    * Update the path
    
    * Revert "Temporarily remove CI on R release version"
    
    This reverts commit 108051e52a9fe4e6016955c1a1fc9c88b815d198.
---
 .github/workflows/java.yml                         |  1 +
 .github/workflows/python.yml                       |  1 +
 .github/workflows/r.yml                            |  1 +
 R/R/dependencies.R                                 |  3 +-
 .../sedona/common/utils/TelemetryCollector.java    | 67 ++++++++++++++++++++++
 .../sedona/common/telemetry/TelemetryTest.java     | 28 ++++-----
 docs/asf/telemetry.md                              |  3 +
 .../org/apache/sedona/flink/SedonaContext.java     |  2 +
 mkdocs.yml                                         |  1 +
 python/sedona/spark/SedonaContext.py               |  2 +-
 .../org/apache/sedona/spark/SedonaContext.scala    | 10 ++++
 .../sedona/sql/utils/SedonaSQLRegistrator.scala    | 13 ++++-
 12 files changed, 110 insertions(+), 22 deletions(-)

diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml
index 5c87ac265..3d780c7ae 100644
--- a/.github/workflows/java.yml
+++ b/.github/workflows/java.yml
@@ -26,6 +26,7 @@ on:
 
 env:
   MAVEN_OPTS: -Dmaven.wagon.httpconnectionManager.ttlSeconds=60
+  DO_NOT_TRACK: true
 
 permissions:
   contents: read
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 0ebba2e70..9e5546677 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -25,6 +25,7 @@ env:
   JAI_CORE_VERSION: "1.1.3"
   JAI_CODEC_VERSION: "1.1.3"
   JAI_IMAGEIO_VERSION: "1.1"
+  DO_NOT_TRACK: true
 
 permissions:
   contents: read
diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml
index 3ffdda98b..199e03077 100644
--- a/.github/workflows/r.yml
+++ b/.github/workflows/r.yml
@@ -22,6 +22,7 @@ on:
 
 env:
   MAVEN_OPTS: -Dmaven.wagon.httpconnectionManager.ttlSeconds=60
+  DO_NOT_TRACK: true
 
 jobs:
   build:
diff --git a/R/R/dependencies.R b/R/R/dependencies.R
index 68cf4e498..b0c2741d1 100644
--- a/R/R/dependencies.R
+++ b/R/R/dependencies.R
@@ -60,7 +60,8 @@ sedona_initialize_spark_connection <- function(sc) {
     sc,
     "org.apache.sedona.sql.utils.SedonaSQLRegistrator",
     "registerAll",
-    spark_session(sc)
+    spark_session(sc),
+    "r"
   )
 
   # Instantiate all enum objects and store them immutably under
diff --git 
a/common/src/main/java/org/apache/sedona/common/utils/TelemetryCollector.java 
b/common/src/main/java/org/apache/sedona/common/utils/TelemetryCollector.java
new file mode 100644
index 000000000..8b17fdedc
--- /dev/null
+++ 
b/common/src/main/java/org/apache/sedona/common/utils/TelemetryCollector.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.common.utils;
+
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.net.URLEncoder;
+
+public class TelemetryCollector {
+
+    private static final String BASE_URL = 
"https://sedona.gateway.scarf.sh/packages/";;
+
+    public static String send(String engineName, String language) {
+        HttpURLConnection conn = null;
+        String telemetrySubmitted = "";
+        try {
+            String arch = 
URLEncoder.encode(System.getProperty("os.arch").replaceAll(" ", "_"), "UTF-8");
+            String os = 
URLEncoder.encode(System.getProperty("os.name").replaceAll(" ", "_"), "UTF-8");
+            String jvm = 
URLEncoder.encode(System.getProperty("java.version").replaceAll(" ", "_"), 
"UTF-8");
+
+            // Construct URL
+            telemetrySubmitted = BASE_URL + language + "/" + engineName + "/" 
+ arch + "/" + os + "/" + jvm;
+
+            // Check for user opt-out
+            if (System.getenv("SCARF_NO_ANALYTICS") != null && 
System.getenv("SCARF_NO_ANALYTICS").equals("true") ||
+                    System.getenv("DO_NOT_TRACK") != null && 
System.getenv("DO_NOT_TRACK").equals("true") ||
+                    System.getProperty("SCARF_NO_ANALYTICS") != null && 
System.getProperty("SCARF_NO_ANALYTICS").equals("true") ||
+                    System.getProperty("DO_NOT_TRACK") != null && 
System.getProperty("DO_NOT_TRACK").equals("true")){
+                return telemetrySubmitted;
+            }
+
+            // Send GET request
+            URL url = new URL(telemetrySubmitted);
+            conn = (HttpURLConnection) url.openConnection();
+            conn.setRequestMethod("GET");
+            conn.connect();
+            int responseCode = conn.getResponseCode();
+            // Optionally check the response for successful execution
+            if (responseCode != 200) {
+                // Silent handling, no output or log
+            }
+        } catch (Exception e) {
+            // Silent catch block
+        } finally {
+            if (conn != null) {
+                conn.disconnect();
+            }
+        }
+        return telemetrySubmitted;
+    }
+}
diff --git 
a/spark/common/src/main/scala/org/apache/sedona/sql/utils/SedonaSQLRegistrator.scala
 b/common/src/test/java/org/apache/sedona/common/telemetry/TelemetryTest.java
similarity index 54%
copy from 
spark/common/src/main/scala/org/apache/sedona/sql/utils/SedonaSQLRegistrator.scala
copy to 
common/src/test/java/org/apache/sedona/common/telemetry/TelemetryTest.java
index 6673054e2..4d518d946 100644
--- 
a/spark/common/src/main/scala/org/apache/sedona/sql/utils/SedonaSQLRegistrator.scala
+++ b/common/src/test/java/org/apache/sedona/common/telemetry/TelemetryTest.java
@@ -16,24 +16,16 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.apache.sedona.sql.utils
+package org.apache.sedona.common.telemetry;
 
-import org.apache.sedona.spark.SedonaContext
-import org.apache.sedona.sql.UDF.UdfRegistrator
-import org.apache.spark.sql.{SQLContext, SparkSession}
+import org.apache.sedona.common.utils.TelemetryCollector;
+import org.junit.Test;
 
-@deprecated("Use SedonaContext instead", "1.4.1")
-object SedonaSQLRegistrator {
-  @deprecated("Use SedonaContext.create instead", "1.4.1")
-  def registerAll(sqlContext: SQLContext): Unit = {
-    SedonaContext.create(sqlContext.sparkSession)
-  }
-
-  @deprecated("Use SedonaContext.create instead", "1.4.1")
-  def registerAll(sparkSession: SparkSession): Unit =
-    SedonaContext.create(sparkSession)
-
-  def dropAll(sparkSession: SparkSession): Unit = {
-    UdfRegistrator.dropAll(sparkSession)
-  }
+public class TelemetryTest
+{
+    @Test
+    public void testTelemetryCollector()
+    {
+        assert TelemetryCollector.send("test", 
"java").contains("https://sedona.gateway.scarf.sh/packages/java/test";);
+    }
 }
diff --git a/docs/asf/telemetry.md b/docs/asf/telemetry.md
new file mode 100644
index 000000000..ef1214734
--- /dev/null
+++ b/docs/asf/telemetry.md
@@ -0,0 +1,3 @@
+Apache Sedona uses Scarf to collect anonymous usage data to help us understand 
how the software is being used and how we can improve it. You can opt out of 
telemetry collection by setting the environment variable `SCARF_NO_ANALYTICS` 
or `DO_NOT_TRACK` to `true` on your local machine, or the driver machine of 
your cluster.
+
+Scarf fully supports the GDPR and is allowed by [the Apache Software 
Foundation privacy policy](https://privacy.apache.org/faq/committers.html). The 
privacy policy of Scarf is available at 
[https://about.scarf.sh/privacy-policy](https://about.scarf.sh/privacy-policy).
diff --git a/flink/src/main/java/org/apache/sedona/flink/SedonaContext.java 
b/flink/src/main/java/org/apache/sedona/flink/SedonaContext.java
index 01eda47f4..4d3511dc0 100644
--- a/flink/src/main/java/org/apache/sedona/flink/SedonaContext.java
+++ b/flink/src/main/java/org/apache/sedona/flink/SedonaContext.java
@@ -23,6 +23,7 @@ import 
org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 import org.apache.sedona.common.geometryObjects.Circle;
 import org.apache.sedona.common.geometrySerde.GeometrySerde;
 import org.apache.sedona.common.geometrySerde.SpatialIndexSerde;
+import org.apache.sedona.common.utils.TelemetryCollector;
 import org.locationtech.jts.geom.Envelope;
 import org.locationtech.jts.geom.GeometryCollection;
 import org.locationtech.jts.geom.LineString;
@@ -46,6 +47,7 @@ public class SedonaContext
      */
     public static StreamTableEnvironment create(StreamExecutionEnvironment 
env, StreamTableEnvironment tblEnv)
     {
+        TelemetryCollector.send("flink", "java");
         GeometrySerde serializer = new GeometrySerde();
         SpatialIndexSerde indexSerializer = new SpatialIndexSerde(serializer);
         env.getConfig().registerTypeWithKryoSerializer(Point.class, 
serializer);
diff --git a/mkdocs.yml b/mkdocs.yml
index 2b3896843..369d31dc2 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -123,6 +123,7 @@ nav:
       - Thanks: https://www.apache.org/foundation/thanks.html"; target="_blank
       - Security: https://www.apache.org/security/"; target="_blank
       - Privacy: 
https://privacy.apache.org/policies/privacy-policy-public.html"; target="_blank
+      - Telemetry: asf/telemetry.md
 repo_url: https://github.com/apache/sedona
 repo_name: apache/sedona
 theme:
diff --git a/python/sedona/spark/SedonaContext.py 
b/python/sedona/spark/SedonaContext.py
index 76be9576f..cda98a60f 100644
--- a/python/sedona/spark/SedonaContext.py
+++ b/python/sedona/spark/SedonaContext.py
@@ -35,7 +35,7 @@ class SedonaContext:
         """
         spark.sql("SELECT 1 as geom").count()
         PackageImporter.import_jvm_lib(spark._jvm)
-        spark._jvm.SedonaContext.create(spark._jsparkSession)
+        spark._jvm.SedonaContext.create(spark._jsparkSession, "python")
         return spark
 
     @classmethod
diff --git 
a/spark/common/src/main/scala/org/apache/sedona/spark/SedonaContext.scala 
b/spark/common/src/main/scala/org/apache/sedona/spark/SedonaContext.scala
index d9339193e..6b262ed16 100644
--- a/spark/common/src/main/scala/org/apache/sedona/spark/SedonaContext.scala
+++ b/spark/common/src/main/scala/org/apache/sedona/spark/SedonaContext.scala
@@ -18,6 +18,7 @@
  */
 package org.apache.sedona.spark
 
+import org.apache.sedona.common.utils.TelemetryCollector
 import org.apache.sedona.core.serde.SedonaKryoRegistrator
 import org.apache.sedona.sql.UDF.UdfRegistrator
 import org.apache.sedona.sql.UDT.UdtRegistrator
@@ -26,8 +27,11 @@ import 
org.apache.spark.sql.sedona_sql.optimization.SpatialFilterPushDownForGeoP
 import org.apache.spark.sql.sedona_sql.strategy.join.JoinQueryDetector
 import org.apache.spark.sql.{SQLContext, SparkSession}
 
+import scala.annotation.StaticAnnotation
 import scala.util.Try
 
+class InternalApi(description: String = "This method is for internal use only 
and may change without notice.") extends StaticAnnotation
+
 object SedonaContext {
   def create(sqlContext: SQLContext): SQLContext = {
     create(sqlContext.sparkSession)
@@ -40,6 +44,12 @@ object SedonaContext {
     * @return
     */
   def create(sparkSession: SparkSession):SparkSession = {
+    create(sparkSession, "java")
+  }
+
+  @InternalApi
+  def create(sparkSession: SparkSession, language: String):SparkSession = {
+    TelemetryCollector.send("spark", language)
     if 
(!sparkSession.experimental.extraStrategies.exists(_.isInstanceOf[JoinQueryDetector]))
 {
       sparkSession.experimental.extraStrategies ++= Seq(new 
JoinQueryDetector(sparkSession))
     }
diff --git 
a/spark/common/src/main/scala/org/apache/sedona/sql/utils/SedonaSQLRegistrator.scala
 
b/spark/common/src/main/scala/org/apache/sedona/sql/utils/SedonaSQLRegistrator.scala
index 6673054e2..91a712fed 100644
--- 
a/spark/common/src/main/scala/org/apache/sedona/sql/utils/SedonaSQLRegistrator.scala
+++ 
b/spark/common/src/main/scala/org/apache/sedona/sql/utils/SedonaSQLRegistrator.scala
@@ -26,12 +26,21 @@ import org.apache.spark.sql.{SQLContext, SparkSession}
 object SedonaSQLRegistrator {
   @deprecated("Use SedonaContext.create instead", "1.4.1")
   def registerAll(sqlContext: SQLContext): Unit = {
-    SedonaContext.create(sqlContext.sparkSession)
+    registerAll(sqlContext, "java")
   }
 
   @deprecated("Use SedonaContext.create instead", "1.4.1")
   def registerAll(sparkSession: SparkSession): Unit =
-    SedonaContext.create(sparkSession)
+    registerAll(sparkSession, "java")
+
+  @deprecated("Use SedonaContext.create instead", "1.4.1")
+  def registerAll(sqlContext: SQLContext, language: String): Unit = {
+    SedonaContext.create(sqlContext.sparkSession, language)
+  }
+
+  @deprecated("Use SedonaContext.create instead", "1.4.1")
+  def registerAll(sparkSession: SparkSession, language: String): Unit =
+    SedonaContext.create(sparkSession, language)
 
   def dropAll(sparkSession: SparkSession): Unit = {
     UdfRegistrator.dropAll(sparkSession)

Reply via email to