Madhuvishy has uploaded a new change for review. https://gerrit.wikimedia.org/r/243990
Change subject: [WIP] Add refinery-camus module ...................................................................... [WIP] Add refinery-camus module In order to add analytics specific code without changing the existing upstream camus repo, we are adding a refinery-camus module for any custom code. This patch includes the decoders, schema registry and the schema for importing Search team's avro data from Kafka to HDFS Change-Id: Ib3e14cbf382e3b2942c724eaa811fdebdbdf3268 --- M pom.xml A refinery-camus/pom.xml A refinery-camus/src/main/avro/CirrusSearchRequestSet.avsc A refinery-camus/src/main/java/org/wikimedia/analytics/refinery/camus/coders/AvroBinaryMessageDecoder.java A refinery-camus/src/main/java/org/wikimedia/analytics/refinery/camus/coders/AvroJsonMessageDecoder.java 5 files changed, 330 insertions(+), 0 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery/source refs/changes/90/243990/1 diff --git a/pom.xml b/pom.xml index 95ba45b..f766fd7 100644 --- a/pom.xml +++ b/pom.xml @@ -16,6 +16,7 @@ <module>refinery-tools</module> <module>refinery-hive</module> <module>refinery-job</module> + <module>refinery-camus</module> </modules> <scm> diff --git a/refinery-camus/pom.xml b/refinery-camus/pom.xml new file mode 100644 index 0000000..eda09b2 --- /dev/null +++ b/refinery-camus/pom.xml @@ -0,0 +1,162 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <parent> + <artifactId>refinery</artifactId> + <groupId>org.wikimedia.analytics.refinery</groupId> + <version>0.0.20-SNAPSHOT</version> + </parent> + + <groupId>org.wikimedia.analytics.refinery.camus</groupId> + <artifactId>refinery-camus</artifactId> + <name>Wikimedia Analytics Refinery Camus</name> + <packaging>jar</packaging> + <version>0.0.20-SNAPSHOT</version> + + <dependencies> + + <dependency> + <groupId>com.linkedin.camus</groupId> + <artifactId>camus-api</artifactId> + <version>0.1.0-wmf4</version> + </dependency> + + <dependency> + <groupId>com.linkedin.camus</groupId> + <artifactId>camus-schema-registry</artifactId> + <version>0.1.0-wmf4</version> + </dependency> + + <dependency> + <groupId>com.linkedin.camus</groupId> + <artifactId>camus-etl-kafka</artifactId> + <version>0.1.0-wmf4</version> + </dependency> + + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <scope>test</scope> + </dependency> + + <dependency> + <groupId>org.scala-lang</groupId> + <artifactId>scala-library</artifactId> + <scope>provided</scope> + </dependency> + + <dependency> + <groupId>org.scalatest</groupId> + <artifactId>scalatest_2.10</artifactId> + <scope>test</scope> + </dependency> + + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro</artifactId> + <version>1.7.7</version> + </dependency> + + </dependencies> + + <build> + <plugins> + + <plugin> + <groupId>org.apache.avro</groupId> + <artifactId>avro-maven-plugin</artifactId> + <version>1.7.7</version> + <executions> + <execution> + <phase>generate-sources</phase> + <goals> + <goal>schema</goal> + </goals> + <configuration> + <sourceDirectory>${project.basedir}/src/main/avro/</sourceDirectory> + <outputDirectory>${project.basedir}/src/main/java/</outputDirectory> + </configuration> + </execution> + </executions> + </plugin> + + <plugin> + <groupId>org.scala-tools</groupId> + <artifactId>maven-scala-plugin</artifactId> + <version>2.15.2</version> + <executions> + <execution> + <goals> + <goal>compile</goal> + <goal>testCompile</goal> + </goals> + </execution> + </executions> + </plugin> + + <plugin> + <groupId>org.scalatest</groupId> + <artifactId>scalatest-maven-plugin</artifactId> + <version>1.0</version> + <configuration> + <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory> + <junitxml>.</junitxml> + <filereports>WDF TestSuite.txt</filereports> + </configuration> + <executions> + <execution> + <id>test</id> + <goals> + <goal>test</goal> + </goals> + </execution> + </executions> + </plugin> + + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-shade-plugin</artifactId> + <version>2.0</version> + <configuration> + <shadedArtifactAttached>false</shadedArtifactAttached> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + </excludes> + </filter> + </filters> + </configuration> + <executions> + <execution> + <phase>package</phase> + <goals> + <goal>shade</goal> + </goals> + <configuration> + <createDependencyReducedPom>false</createDependencyReducedPom> + </configuration> + </execution> + </executions> + </plugin> + + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-compiler-plugin</artifactId> + <version>3.2</version> + <configuration> + <source>${java.version}</source> + <target>${java.version}</target> + </configuration> + </plugin> + </plugins> + </build> + + +</project> \ No newline at end of file diff --git a/refinery-camus/src/main/avro/CirrusSearchRequestSet.avsc b/refinery-camus/src/main/avro/CirrusSearchRequestSet.avsc new file mode 100644 index 0000000..73c28e5 --- /dev/null +++ b/refinery-camus/src/main/avro/CirrusSearchRequestSet.avsc @@ -0,0 +1,153 @@ +{ + "type": "record", + "name": "CirrusSearchRequestSet", + "namespace": "org.wikimedia.mediawiki.search", + "doc": "A set of requests made by CirrusSearch to the elasticsearch user for a single php execution context", + "fields": [ + { + "name": "wikiId", + "doc": "The wiki making this request, such as dewiki or enwiktionary", + "type": "string" + }, + { + "name": "source", + "doc": "Where the request is coming from. Typically: web, api or cli", + "type": "string" + }, + { + "name": "identity", + "doc": "A hash identifying the requestor. Includes the IP address and User Agent when available.", + "type": "string" + }, + { + "name": "ip", + "doc": "The IP address (either ipv4 or ipv6) in string notation", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "userAgent", + "doc": "The HTTP User-Agent header, or null if not-applicable", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "backendUserTests", + "doc": "List of backend tests the requests are participating in", + "type": { + "type": "array", + "items": "string" + } + }, + { + "name": "requests", + "doc": "A list of requests made between mediawiki and elasticsearch in a single execution context", + "type": { + "type": "array", + "items": { + "name": "CirrusSearchRequest", + "namespace": "org.wikimedia.mediawiki.search", + "doc": "An individual request made between mediawiki and elasticsearch", + "type": "record", + "fields": [ + { + "name": "query", + "doc": "The actual search request", + "type": "string" + }, + { + "name": "queryType", + "doc": "The general type of query performed, such as full_text, prefix, etc.", + "type": "string" + }, + { + "name": "index", + "doc": "The list of indices the request was performed against", + "type": { + "type": "array", + "items": "string" + } + }, + { + "name": "tookMs", + "doc": "The number of milliseconds between passing the query to the client library and getting the response back in the application", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "elasticTookMs", + "doc": "The number of milliseconds the query took, according to the elasticsearch response", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "limit", + "doc": "The maximum number of results requested by the application", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "hitsTotal", + "doc": "The approximate total number of documents matching the query", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "hitsReturned", + "doc": "The number of results returned to the application", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "hitsOffset", + "doc": "The offset of the query", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "namespaces", + "doc": "Each element is a mediawiki namespace id that was searched.", + "type": { + "type": "array", + "items": "int" + } + }, + { + "name": "suggestion", + "doc": "The suggestion generated by elasticsearch, or null if not requested", + "type": [ + "null", + "string" + ], + "default": null + } + ] + } + } + } + ] +} \ No newline at end of file diff --git a/refinery-camus/src/main/java/org/wikimedia/analytics/refinery/camus/coders/AvroBinaryMessageDecoder.java b/refinery-camus/src/main/java/org/wikimedia/analytics/refinery/camus/coders/AvroBinaryMessageDecoder.java new file mode 100644 index 0000000..401fdfe --- /dev/null +++ b/refinery-camus/src/main/java/org/wikimedia/analytics/refinery/camus/coders/AvroBinaryMessageDecoder.java @@ -0,0 +1,7 @@ +package org.wikimedia.analytics.refinery.camus.coders; + +/** + * Created by mviswanathan on 10/6/15. + */ +public class AvroBinaryMessageDecoder { +} diff --git a/refinery-camus/src/main/java/org/wikimedia/analytics/refinery/camus/coders/AvroJsonMessageDecoder.java b/refinery-camus/src/main/java/org/wikimedia/analytics/refinery/camus/coders/AvroJsonMessageDecoder.java new file mode 100644 index 0000000..812d8b0 --- /dev/null +++ b/refinery-camus/src/main/java/org/wikimedia/analytics/refinery/camus/coders/AvroJsonMessageDecoder.java @@ -0,0 +1,7 @@ +package org.wikimedia.analytics.refinery.camus.coders; + +/** + * Created by mviswanathan on 10/6/15. + */ +public class AvroJsonMessageDecoder { +} -- To view, visit https://gerrit.wikimedia.org/r/243990 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ib3e14cbf382e3b2942c724eaa811fdebdbdf3268 Gerrit-PatchSet: 1 Gerrit-Project: analytics/refinery/source Gerrit-Branch: master Gerrit-Owner: Madhuvishy <mviswanat...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits