Updated Branches: refs/heads/master 61f09e7e1 -> c57eb635e
DRILL-33 - Cleanups. Added some doc, changed build to do jar-with-dependencies. Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/c57eb635 Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/c57eb635 Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/c57eb635 Branch: refs/heads/master Commit: c57eb635e37d06b659139bdcc59073aa3f79ce89 Parents: f04a0fd Author: tdunning <[email protected]> Authored: Mon Feb 4 17:23:42 2013 -0800 Committer: tdunning <[email protected]> Committed: Mon Feb 4 17:23:42 2013 -0800 ---------------------------------------------------------------------- sandbox/prototype/contrib/synth-log/README.md | 20 +++++ sandbox/prototype/contrib/synth-log/pom.xml | 63 ++++++++++++--- .../src/main/java/org/apache/drill/synth/Main.java | 22 +++++- .../java/org/apache/drill/synth/WordGenerator.java | 7 +- .../synth-log/src/main/resources/log4j.properties | 11 +++ 5 files changed, 106 insertions(+), 17 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/c57eb635/sandbox/prototype/contrib/synth-log/README.md ---------------------------------------------------------------------- diff --git a/sandbox/prototype/contrib/synth-log/README.md b/sandbox/prototype/contrib/synth-log/README.md index b29fde7..b4efc63 100644 --- a/sandbox/prototype/contrib/synth-log/README.md +++ b/sandbox/prototype/contrib/synth-log/README.md @@ -3,6 +3,26 @@ log-synth The basic idea here is to have a random log generator build fairly realistic log files for analysis. The analyses specified here are fairly typical use cases for trying to figure out where the load on a web-site is coming from. +How to Run It +============ + +Install Java 7, maven and get this software using git. + +On a mac, this can help get the right version of Java + + export JAVA_HOME=$(/usr/libexec/java_home) + +Then do this to build a jar file with all dependencies included + + mvn package + +Then use this to write one million log lines into the file "log" and to write the associated user database into the file "users". + + java -cp target/log-synth-0.1-SNAPSHOT-jar-with-dependencies.jar org.apache.drill.synth.Main 1M log users + +This program will produce a line of output on the standard output for each 10,000 lines of log produced. Each line will contain the number of log lines produced so far and the number of unique users in the user profile database. + + The Data Source ============== The data source here is a set of heavily biased random numbers to generate traffic sources, response times and queries. In order to give a realistic long-tail experience the data are generated using special random number generators available in the Mahout library. http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/c57eb635/sandbox/prototype/contrib/synth-log/pom.xml ---------------------------------------------------------------------- diff --git a/sandbox/prototype/contrib/synth-log/pom.xml b/sandbox/prototype/contrib/synth-log/pom.xml index 39aa680..a8c4e6c 100644 --- a/sandbox/prototype/contrib/synth-log/pom.xml +++ b/sandbox/prototype/contrib/synth-log/pom.xml @@ -9,16 +9,59 @@ <version>0.1-SNAPSHOT</version> <dependencies> - <dependency> - <groupId>org.apache.mahout</groupId> - <artifactId>mahout-math</artifactId> - <version>0.8-SNAPSHOT</version> - </dependency> - <dependency> - <groupId>junit</groupId> - <artifactId>junit</artifactId> - <version>4.8.2</version> - </dependency> + <dependency> + <groupId>org.apache.mahout</groupId> + <artifactId>mahout-math</artifactId> + <version>0.8-SNAPSHOT</version> + </dependency> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <version>4.8.2</version> + </dependency> + <dependency> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-api</artifactId> + <version>1.6.6</version> + </dependency> + <dependency> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-log4j12</artifactId> + <version>1.6.6</version> + <scope>runtime</scope> + </dependency> </dependencies> + <build> + <plugins> + <plugin> + <artifactId>maven-assembly-plugin</artifactId> + <version>2.4</version> + <configuration> + <descriptorRefs> + <descriptorRef>jar-with-dependencies</descriptorRef> + </descriptorRefs> + </configuration> + <executions> + <execution> + <id>make-assembly</id> + <phase>package</phase> + <!-- bind to the packaging phase --> + <goals> + <goal>single</goal> + </goals> + </execution> + </executions> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-compiler-plugin</artifactId> + <version>3.0</version> + <configuration> + <verbose>true</verbose> + <compilerVersion>1.7</compilerVersion> + </configuration> + </plugin> + </plugins> + </build> </project> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/c57eb635/sandbox/prototype/contrib/synth-log/src/main/java/org/apache/drill/synth/Main.java ---------------------------------------------------------------------- diff --git a/sandbox/prototype/contrib/synth-log/src/main/java/org/apache/drill/synth/Main.java b/sandbox/prototype/contrib/synth-log/src/main/java/org/apache/drill/synth/Main.java index 9260d36..2641dd5 100644 --- a/sandbox/prototype/contrib/synth-log/src/main/java/org/apache/drill/synth/Main.java +++ b/sandbox/prototype/contrib/synth-log/src/main/java/org/apache/drill/synth/Main.java @@ -10,15 +10,31 @@ import java.io.IOException; /** * Create a query log with a specified number of log lines and an associated user profile database. - * + * <p/> * Command line args include number of log lines to generate, the name of the log file to generate and the * name of the file to store the user profile database in. - * + * <p/> * Log lines and user profile entries are single line JSON. */ public class Main { public static void main(String[] args) throws IOException { - int n = Integer.parseInt(args[0]); + + int n = Integer.parseInt(args[0].replaceAll("[KMG]?$", "")); + + switch (args[0].charAt(args[0].length() - 1)) { + case 'G': + n *= 1e9; + break; + case 'M': + n *= 1e6; + break; + case 'K': + n *= 1e3; + break; + default: + // no suffix leads here + break; + } LogGenerator lg = new LogGenerator(); BufferedWriter log = Files.newWriter(new File(args[1]), Charsets.UTF_8); http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/c57eb635/sandbox/prototype/contrib/synth-log/src/main/java/org/apache/drill/synth/WordGenerator.java ---------------------------------------------------------------------- diff --git a/sandbox/prototype/contrib/synth-log/src/main/java/org/apache/drill/synth/WordGenerator.java b/sandbox/prototype/contrib/synth-log/src/main/java/org/apache/drill/synth/WordGenerator.java index 3f27d16..f9a4d47 100644 --- a/sandbox/prototype/contrib/synth-log/src/main/java/org/apache/drill/synth/WordGenerator.java +++ b/sandbox/prototype/contrib/synth-log/src/main/java/org/apache/drill/synth/WordGenerator.java @@ -11,8 +11,10 @@ import org.slf4j.LoggerFactory; import java.io.BufferedReader; import java.io.IOException; +import java.net.URI; import java.net.URISyntaxException; import java.nio.file.Files; +import java.nio.file.Path; import java.nio.file.Paths; import java.util.Iterator; import java.util.List; @@ -66,13 +68,10 @@ public class WordGenerator { } try { - wordReader = Files.newBufferedReader(Paths.get(Resources.getResource(others).toURI()), Charsets.UTF_8); + wordReader = new BufferedReader(Resources.newReaderSupplier(Resources.getResource(others), Charsets.UTF_8).getInput()); } catch (IOException e) { log.error("Can't read resource \"{}\", will continue without realistic words", others); wordReader = null; - } catch (URISyntaxException e) { - log.error("Bad format for resource URI \"{}\", will continue without realistic words", others, e); - wordReader = null; } } http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/c57eb635/sandbox/prototype/contrib/synth-log/src/main/resources/log4j.properties ---------------------------------------------------------------------- diff --git a/sandbox/prototype/contrib/synth-log/src/main/resources/log4j.properties b/sandbox/prototype/contrib/synth-log/src/main/resources/log4j.properties new file mode 100644 index 0000000..972574a --- /dev/null +++ b/sandbox/prototype/contrib/synth-log/src/main/resources/log4j.properties @@ -0,0 +1,11 @@ +# Set root logger level to DEBUG and its only appender to A1. +log4j.rootLogger=DEBUG, A1 + +# A1 is set to be a ConsoleAppender. +log4j.appender.A1=org.apache.log4j.ConsoleAppender + +# A1 uses PatternLayout. +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n + +log4j.logger.org.apache=off
