jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/362159 )

Change subject: Rename project_class to project_family
......................................................................


Rename project_class to project_family

In host normalization, we used the project_class name.
This project_class name is now changed to project_family
(rethought when working unique devices).
This patch updates the webrequest refinement functions to
now provide both project_class AND project_family, in a plan
to remove project_class in some future time.

Bug: T168874
Change-Id: If2a3a285c6a4194be9287dfca2b9dbaada99a916
---
M 
refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/NormalizedHostInfo.java
M 
refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java
M 
refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestWebrequest.java
M refinery-core/src/test/resources/normalize_host_test_data.csv
M 
refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GetHostPropertiesUDF.java
M 
refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/HostNormalizerUDF.java
M 
refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGetHostPropertiesUDF.java
7 files changed, 36 insertions(+), 24 deletions(-)

Approvals:
  Joal: Looks good to me, approved
  jenkins-bot: Verified



diff --git 
a/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/NormalizedHostInfo.java
 
b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/NormalizedHostInfo.java
index 06f5833..774dd91 100644
--- 
a/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/NormalizedHostInfo.java
+++ 
b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/NormalizedHostInfo.java
@@ -13,24 +13,24 @@
      */
     public static final String EMPTY_NORM_HOST_VALUE  = "-";
 
-    private String projectClass;
+    private String projectFamily;
     private String project;
     private List<String> qualifiers;
     private String tld;
 
     public NormalizedHostInfo() {
-        projectClass = EMPTY_NORM_HOST_VALUE;
+        projectFamily = EMPTY_NORM_HOST_VALUE;
         project = EMPTY_NORM_HOST_VALUE;
         qualifiers = new ArrayList<>();
         tld = EMPTY_NORM_HOST_VALUE;
     }
 
-    public String getProjectClass() {
-        return projectClass;
+    public String getProjectFamily() {
+        return projectFamily;
     }
 
-    public void setProjectClass(String projectClass) {
-        this.projectClass = projectClass;
+    public void setProjectFamily(String projectFamily) {
+        this.projectFamily = projectFamily;
     }
 
     public String getProject() {
diff --git 
a/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java
 
b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java
index e3667ee..d509872 100644
--- 
a/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java
+++ 
b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java
@@ -233,14 +233,14 @@
      * Example: normalizeHost("en.m.zero.wikipedia.org")<br/>
      * Returns:<br/>
      * NormalizedHostInfo(
-     * "project_class":"wikipedia",
+     * "projectFamily":"wikipedia",
      * "project":"en",
      * "qualifiers":["m", "zero"],
      * "tld":"org",
      * )
      *
      * @param uriHost The url's host
-     * @return A NormalizedHostInfo object with project_class, project, 
qualifiers and tld values set.
+     * @return A NormalizedHostInfo object with projectFamily, project, 
qualifiers and tld values set.
      */
     public NormalizedHostInfo normalizeHost(String uriHost) {
 
@@ -272,8 +272,8 @@
             if (uriParts[uriParts.length - 1].matches("[0-9]+")) return result;
 
             if (uriParts.length > 1) {
-                // project_class and TLD normalization
-                result.setProjectClass(uriParts[uriParts.length - 2]);
+                // project_family and TLD normalization
+                result.setProjectFamily(uriParts[uriParts.length - 2]);
                 result.setTld(uriParts[uriParts.length - 1]);
             }
             // project normalization
diff --git 
a/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestWebrequest.java
 
b/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestWebrequest.java
index b3d1d66..05f89d3 100644
--- 
a/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestWebrequest.java
+++ 
b/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestWebrequest.java
@@ -126,7 +126,7 @@
         assertEquals(
                 test_description + " - Project Class",
                 expectedProjectClass,
-                webrequest_inst.normalizeHost(uriHost).getProjectClass()
+                webrequest_inst.normalizeHost(uriHost).getProjectFamily()
         );
         assertEquals(
                 test_description + " - Project",
@@ -155,7 +155,7 @@
         assertEquals(
                 "Null - Project Class",
                 NormalizedHostInfo.EMPTY_NORM_HOST_VALUE,
-                webrequest_inst.normalizeHost(testUriHost).getProjectClass()
+                webrequest_inst.normalizeHost(testUriHost).getProjectFamily()
         );
         assertEquals(
                 "Null - Project",
@@ -179,7 +179,7 @@
         assertEquals(
                "Empty - Project Class",
                 NormalizedHostInfo.EMPTY_NORM_HOST_VALUE,
-                webrequest_inst.normalizeHost(testUriHost).getProjectClass()
+                webrequest_inst.normalizeHost(testUriHost).getProjectFamily()
         );
         assertEquals(
                 "Empty - Project",
diff --git a/refinery-core/src/test/resources/normalize_host_test_data.csv 
b/refinery-core/src/test/resources/normalize_host_test_data.csv
index bae97da..da162ef 100644
--- a/refinery-core/src/test/resources/normalize_host_test_data.csv
+++ b/refinery-core/src/test/resources/normalize_host_test_data.csv
@@ -1,4 +1,4 @@
-test_description,expected_project_class,expected_project,expected_qualifiers,expected_tld,uri_host
+test_description,expected_project_family,expected_project,expected_qualifiers,expected_tld,uri_host
 IP,-,-,,-,192.168.0.1
 No Dot,-,-,,-,wikipedia
 One dot capital,wikipedia,-,,org,Wikipedia.org
diff --git 
a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GetHostPropertiesUDF.java
 
b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GetHostPropertiesUDF.java
index 87f1b5a..d52fdc3 100644
--- 
a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GetHostPropertiesUDF.java
+++ 
b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GetHostPropertiesUDF.java
@@ -14,7 +14,6 @@
 import 
org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
 import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
-import org.apache.log4j.Logger;
 import org.wikimedia.analytics.refinery.core.*;
 
 import java.util.*;
@@ -22,11 +21,13 @@
 /**
  * UDF that normalizes host (lower case, split) and returns a struct.
  * Records are processed one by one.<p/>
+ * NOTE: project_class is renamed to project_family - we currently provide 
both.
  * Example:<br/>
  * SELECT get_host_properties('en.m.zero.wikipedia.org') FROM test_table LIMIT 
1;<br/>
  * Returns:<br/>
  * {
  * "project_class":"wikipedia",
+ * "project_family":"wikipedia",
  * "project":"en",
  * "qualifiers":["m", "zero"],
  * "tld":"org",
@@ -39,7 +40,7 @@
 
 @UDFType(deterministic = true)
 @Description(name = "get_host_properties", value = "_FUNC_(uri_host) - "
-        + "Returns a map with project_class, project, qualifiers, tld keys and 
"
+        + "Returns a map with project_family, project, qualifiers, tld keys 
and "
         + "the appropriate values for each of them")
 public class GetHostPropertiesUDF extends GenericUDF {
     private Object[] result;
@@ -49,6 +50,7 @@
     private StringObjectInspector argumentOI;
 
     private int IDX_PROJECT_CLASS;
+    private int IDX_PROJECT_FAMILY;
     private int IDX_PROJECT;
     private int IDX_QUALIFIERS;
     private int IDX_TLD;
@@ -111,6 +113,10 @@
         
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
         IDX_PROJECT_CLASS=idx++;
 
+        fieldNames.add("project_family");
+        
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
+        IDX_PROJECT_FAMILY=idx++;
+
         fieldNames.add("project");
         
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
         IDX_PROJECT=idx++;
@@ -168,11 +174,13 @@
 
         if (normHost == null) {
             result[IDX_PROJECT_CLASS] = 
NormalizedHostInfo.EMPTY_NORM_HOST_VALUE;
+            result[IDX_PROJECT_FAMILY] = 
NormalizedHostInfo.EMPTY_NORM_HOST_VALUE;
             result[IDX_PROJECT] = NormalizedHostInfo.EMPTY_NORM_HOST_VALUE;
             result[IDX_QUALIFIERS] = new ArrayList<String>();
             result[IDX_TLD] = NormalizedHostInfo.EMPTY_NORM_HOST_VALUE;
         } else {
-            result[IDX_PROJECT_CLASS] = normHost.getProjectClass();
+            result[IDX_PROJECT_CLASS] = normHost.getProjectFamily();
+            result[IDX_PROJECT_FAMILY] = normHost.getProjectFamily();
             result[IDX_PROJECT] = normHost.getProject();
             result[IDX_QUALIFIERS] = normHost.getQualifiers();
             result[IDX_TLD] = normHost.getTld();
diff --git 
a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/HostNormalizerUDF.java
 
b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/HostNormalizerUDF.java
index 975a7d7..fc91474 100644
--- 
a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/HostNormalizerUDF.java
+++ 
b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/HostNormalizerUDF.java
@@ -7,11 +7,13 @@
  * Deprecated - Use GetHostPropertiesUDF
  * UDF that normalizes host (lower case, split) and returns a struct.
  * Records are processed one by one.<p/>
+ * NOTE: project_class is renamed to project_family - we currently provide 
both.
  * Example:<br/>
  * SELECT normalize_host('en.m.zero.wikipedia.org') FROM test_table LIMIT 
1;<br/>
  * Returns:<br/>
  * {
  * "project_class":"wikipedia",
+ * "project_family":"wikipedia",
  * "project":"en",
  * "qualifiers":["m", "zero"],
  * "tld":"org",
@@ -25,6 +27,6 @@
 @Deprecated
 @UDFType(deterministic = true)
 @Description(name = "normalize_host", value = "_FUNC_(uri_host) - "
-        + "Returns a map with project_class, project, qualifiers, tld keys and 
"
+        + "Returns a map with project_family, project, qualifiers, tld keys 
and "
         + "the appropriate values for each of them")
 public class HostNormalizerUDF extends GetHostPropertiesUDF {}
\ No newline at end of file
diff --git 
a/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGetHostPropertiesUDF.java
 
b/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGetHostPropertiesUDF.java
index f35701d..fe7c2b8 100644
--- 
a/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGetHostPropertiesUDF.java
+++ 
b/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGetHostPropertiesUDF.java
@@ -67,7 +67,7 @@
     )
     public void testNormalizeHost(
             String test_description,
-            String expectedProjectClass,
+            String expectedProjectFamily,
             String expectedProject,
             String expectedQualifiers,
             String expectedTld,
@@ -81,12 +81,14 @@
         DeferredObject[] args = new DeferredObject[] { new 
DeferredJavaObject(uriHost) };
         Object[] res = (Object[]) getHostPropertiesUDF.evaluate(args);
 
-        assertEquals("Result array has wrong length", 4, res.length);
+        // Hacked for normalized_host.project_family change
+        assertEquals("Result array has wrong length", 5, res.length);
 
-        assertEquals(test_description + " - ProjectClass", 
expectedProjectClass, res[0]);
-        assertEquals(test_description + " - Project ", expectedProject, 
res[1]);
-        assertEquals(test_description + " - Qualifiers", expectedQualifiers, 
join((List<String>)res[2], ";"));
-        assertEquals(test_description + " - TLD", expectedTld, res[3]);
+        assertEquals(test_description + " - ProjectClass", 
expectedProjectFamily, res[0]);
+        assertEquals(test_description + " - ProjectFamily", 
expectedProjectFamily, res[1]);
+        assertEquals(test_description + " - Project ", expectedProject, 
res[2]);
+        assertEquals(test_description + " - Qualifiers", expectedQualifiers, 
join((List<String>)res[3], ";"));
+        assertEquals(test_description + " - TLD", expectedTld, res[4]);
 
         getHostPropertiesUDF.close();
     }

-- 
To view, visit https://gerrit.wikimedia.org/r/362159
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: If2a3a285c6a4194be9287dfca2b9dbaada99a916
Gerrit-PatchSet: 2
Gerrit-Project: analytics/refinery/source
Gerrit-Branch: master
Gerrit-Owner: Joal <[email protected]>
Gerrit-Reviewer: Joal <[email protected]>
Gerrit-Reviewer: Ottomata <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to