Madhuvishy has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/243990

Change subject: [WIP] Add refinery-camus module
......................................................................

[WIP] Add refinery-camus module

In order to add analytics specific code without changing the existing
upstream camus repo, we are adding a refinery-camus module for any custom code.

This patch includes the decoders, schema registry and the schema for importing
Search team's avro data from Kafka to HDFS

Change-Id: Ib3e14cbf382e3b2942c724eaa811fdebdbdf3268
---
M pom.xml
A refinery-camus/pom.xml
A refinery-camus/src/main/avro/CirrusSearchRequestSet.avsc
A 
refinery-camus/src/main/java/org/wikimedia/analytics/refinery/camus/coders/AvroBinaryMessageDecoder.java
A 
refinery-camus/src/main/java/org/wikimedia/analytics/refinery/camus/coders/AvroJsonMessageDecoder.java
5 files changed, 330 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery/source 
refs/changes/90/243990/1

diff --git a/pom.xml b/pom.xml
index 95ba45b..f766fd7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -16,6 +16,7 @@
     <module>refinery-tools</module>
     <module>refinery-hive</module>
     <module>refinery-job</module>
+    <module>refinery-camus</module>
   </modules>
 
   <scm>
diff --git a/refinery-camus/pom.xml b/refinery-camus/pom.xml
new file mode 100644
index 0000000..eda09b2
--- /dev/null
+++ b/refinery-camus/pom.xml
@@ -0,0 +1,162 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0";
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <artifactId>refinery</artifactId>
+        <groupId>org.wikimedia.analytics.refinery</groupId>
+        <version>0.0.20-SNAPSHOT</version>
+    </parent>
+
+    <groupId>org.wikimedia.analytics.refinery.camus</groupId>
+    <artifactId>refinery-camus</artifactId>
+    <name>Wikimedia Analytics Refinery Camus</name>
+    <packaging>jar</packaging>
+    <version>0.0.20-SNAPSHOT</version>
+
+    <dependencies>
+
+        <dependency>
+            <groupId>com.linkedin.camus</groupId>
+            <artifactId>camus-api</artifactId>
+            <version>0.1.0-wmf4</version>
+        </dependency>
+
+        <dependency>
+            <groupId>com.linkedin.camus</groupId>
+            <artifactId>camus-schema-registry</artifactId>
+            <version>0.1.0-wmf4</version>
+        </dependency>
+
+        <dependency>
+            <groupId>com.linkedin.camus</groupId>
+            <artifactId>camus-etl-kafka</artifactId>
+            <version>0.1.0-wmf4</version>
+        </dependency>
+
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <scope>test</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.scala-lang</groupId>
+            <artifactId>scala-library</artifactId>
+            <scope>provided</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.scalatest</groupId>
+            <artifactId>scalatest_2.10</artifactId>
+            <scope>test</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.avro</groupId>
+            <artifactId>avro</artifactId>
+            <version>1.7.7</version>
+        </dependency>
+
+    </dependencies>
+
+    <build>
+        <plugins>
+
+            <plugin>
+                <groupId>org.apache.avro</groupId>
+                <artifactId>avro-maven-plugin</artifactId>
+                <version>1.7.7</version>
+                <executions>
+                    <execution>
+                        <phase>generate-sources</phase>
+                        <goals>
+                            <goal>schema</goal>
+                        </goals>
+                        <configuration>
+                            
<sourceDirectory>${project.basedir}/src/main/avro/</sourceDirectory>
+                            
<outputDirectory>${project.basedir}/src/main/java/</outputDirectory>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+
+            <plugin>
+                <groupId>org.scala-tools</groupId>
+                <artifactId>maven-scala-plugin</artifactId>
+                <version>2.15.2</version>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>compile</goal>
+                            <goal>testCompile</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+
+            <plugin>
+                <groupId>org.scalatest</groupId>
+                <artifactId>scalatest-maven-plugin</artifactId>
+                <version>1.0</version>
+                <configuration>
+                    
<reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
+                    <junitxml>.</junitxml>
+                    <filereports>WDF TestSuite.txt</filereports>
+                </configuration>
+                <executions>
+                    <execution>
+                        <id>test</id>
+                        <goals>
+                            <goal>test</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-shade-plugin</artifactId>
+                <version>2.0</version>
+                <configuration>
+                    <shadedArtifactAttached>false</shadedArtifactAttached>
+                    <filters>
+                        <filter>
+                            <artifact>*:*</artifact>
+                            <excludes>
+                                <exclude>META-INF/*.SF</exclude>
+                                <exclude>META-INF/*.DSA</exclude>
+                                <exclude>META-INF/*.RSA</exclude>
+                            </excludes>
+                        </filter>
+                    </filters>
+                </configuration>
+                <executions>
+                    <execution>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>shade</goal>
+                        </goals>
+                        <configuration>
+                            
<createDependencyReducedPom>false</createDependencyReducedPom>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <version>3.2</version>
+                <configuration>
+                    <source>${java.version}</source>
+                    <target>${java.version}</target>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+
+
+</project>
\ No newline at end of file
diff --git a/refinery-camus/src/main/avro/CirrusSearchRequestSet.avsc 
b/refinery-camus/src/main/avro/CirrusSearchRequestSet.avsc
new file mode 100644
index 0000000..73c28e5
--- /dev/null
+++ b/refinery-camus/src/main/avro/CirrusSearchRequestSet.avsc
@@ -0,0 +1,153 @@
+{
+  "type": "record",
+  "name": "CirrusSearchRequestSet",
+  "namespace": "org.wikimedia.mediawiki.search",
+  "doc": "A set of requests made by CirrusSearch to the elasticsearch user for 
a single php execution context",
+  "fields": [
+    {
+      "name": "wikiId",
+      "doc": "The wiki making this request, such as dewiki or enwiktionary",
+      "type": "string"
+    },
+    {
+      "name": "source",
+      "doc": "Where the request is coming from. Typically: web, api or cli",
+      "type": "string"
+    },
+    {
+      "name": "identity",
+      "doc": "A hash identifying the requestor. Includes the IP address and 
User Agent when available.",
+      "type": "string"
+    },
+    {
+      "name": "ip",
+      "doc": "The IP address (either ipv4 or ipv6) in string notation",
+      "type": [
+        "null",
+        "string"
+      ],
+      "default": null
+    },
+    {
+      "name": "userAgent",
+      "doc": "The HTTP User-Agent header, or null if not-applicable",
+      "type": [
+        "null",
+        "string"
+      ],
+      "default": null
+    },
+    {
+      "name": "backendUserTests",
+      "doc": "List of backend tests the requests are participating in",
+      "type": {
+        "type": "array",
+        "items": "string"
+      }
+    },
+    {
+      "name": "requests",
+      "doc": "A list of requests made between mediawiki and elasticsearch in a 
single execution context",
+      "type": {
+        "type": "array",
+        "items": {
+          "name": "CirrusSearchRequest",
+          "namespace": "org.wikimedia.mediawiki.search",
+          "doc": "An individual request made between mediawiki and 
elasticsearch",
+          "type": "record",
+          "fields": [
+            {
+              "name": "query",
+              "doc": "The actual search request",
+              "type": "string"
+            },
+            {
+              "name": "queryType",
+              "doc": "The general type of query performed, such as full_text, 
prefix, etc.",
+              "type": "string"
+            },
+            {
+              "name": "index",
+              "doc": "The list of indices the request was performed against",
+              "type": {
+                "type": "array",
+                "items": "string"
+              }
+            },
+            {
+              "name": "tookMs",
+              "doc": "The number of milliseconds between passing the query to 
the client library and getting the response back in the application",
+              "type": [
+                "null",
+                "int"
+              ],
+              "default": null
+            },
+            {
+              "name": "elasticTookMs",
+              "doc": "The number of milliseconds the query took, according to 
the elasticsearch response",
+              "type": [
+                "null",
+                "int"
+              ],
+              "default": null
+            },
+            {
+              "name": "limit",
+              "doc": "The maximum number of results requested by the 
application",
+              "type": [
+                "null",
+                "int"
+              ],
+              "default": null
+            },
+            {
+              "name": "hitsTotal",
+              "doc": "The approximate total number of documents matching the 
query",
+              "type": [
+                "null",
+                "int"
+              ],
+              "default": null
+            },
+            {
+              "name": "hitsReturned",
+              "doc": "The number of results returned to the application",
+              "type": [
+                "null",
+                "int"
+              ],
+              "default": null
+            },
+            {
+              "name": "hitsOffset",
+              "doc": "The offset of the query",
+              "type": [
+                "null",
+                "int"
+              ],
+              "default": null
+            },
+            {
+              "name": "namespaces",
+              "doc": "Each element is a mediawiki namespace id that was 
searched.",
+              "type": {
+                "type": "array",
+                "items": "int"
+              }
+            },
+            {
+              "name": "suggestion",
+              "doc": "The suggestion generated by elasticsearch, or null if 
not requested",
+              "type": [
+                "null",
+                "string"
+              ],
+              "default": null
+            }
+          ]
+        }
+      }
+    }
+  ]
+}
\ No newline at end of file
diff --git 
a/refinery-camus/src/main/java/org/wikimedia/analytics/refinery/camus/coders/AvroBinaryMessageDecoder.java
 
b/refinery-camus/src/main/java/org/wikimedia/analytics/refinery/camus/coders/AvroBinaryMessageDecoder.java
new file mode 100644
index 0000000..401fdfe
--- /dev/null
+++ 
b/refinery-camus/src/main/java/org/wikimedia/analytics/refinery/camus/coders/AvroBinaryMessageDecoder.java
@@ -0,0 +1,7 @@
+package org.wikimedia.analytics.refinery.camus.coders;
+
+/**
+ * Created by mviswanathan on 10/6/15.
+ */
+public class AvroBinaryMessageDecoder {
+}
diff --git 
a/refinery-camus/src/main/java/org/wikimedia/analytics/refinery/camus/coders/AvroJsonMessageDecoder.java
 
b/refinery-camus/src/main/java/org/wikimedia/analytics/refinery/camus/coders/AvroJsonMessageDecoder.java
new file mode 100644
index 0000000..812d8b0
--- /dev/null
+++ 
b/refinery-camus/src/main/java/org/wikimedia/analytics/refinery/camus/coders/AvroJsonMessageDecoder.java
@@ -0,0 +1,7 @@
+package org.wikimedia.analytics.refinery.camus.coders;
+
+/**
+ * Created by mviswanathan on 10/6/15.
+ */
+public class AvroJsonMessageDecoder {
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/243990
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ib3e14cbf382e3b2942c724eaa811fdebdbdf3268
Gerrit-PatchSet: 1
Gerrit-Project: analytics/refinery/source
Gerrit-Branch: master
Gerrit-Owner: Madhuvishy <mviswanat...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to