http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/43d71f10/sandbox/synth-log/src/test/java/org/apache/drill/synth/TermGeneratorTest.java
----------------------------------------------------------------------
diff --git 
a/sandbox/synth-log/src/test/java/org/apache/drill/synth/TermGeneratorTest.java 
b/sandbox/synth-log/src/test/java/org/apache/drill/synth/TermGeneratorTest.java
new file mode 100644
index 0000000..b983eb2
--- /dev/null
+++ 
b/sandbox/synth-log/src/test/java/org/apache/drill/synth/TermGeneratorTest.java
@@ -0,0 +1,97 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.synth;
+
+import com.google.common.base.Function;
+import com.google.common.collect.*;
+import org.apache.commons.math3.distribution.NormalDistribution;
+import org.apache.mahout.math.stats.LogLikelihood;
+import org.junit.Test;
+
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+public class TermGeneratorTest {
+
+    private static final WordGenerator WORDS = new 
WordGenerator("word-frequency-seed", "other-words");
+
+    @Test
+    public void generateTerms() {
+        TermGenerator x = new TermGenerator(WORDS, 1, 0.8);
+        final Multiset<String> counts = HashMultiset.create();
+        for (int i = 0; i < 10000; i++) {
+            counts.add(x.sample());
+        }
+
+        assertEquals(10000, counts.size());
+        assertTrue("Should have some common words", counts.elementSet().size() 
< 10000);
+        List<Integer> k = 
Lists.newArrayList(Iterables.transform(counts.elementSet(), new 
Function<String, Integer>() {
+            public Integer apply(String s) {
+                return counts.count(s);
+            }
+        }));
+//        System.out.printf("%s\n", 
Ordering.natural().reverse().sortedCopy(k).subList(0, 30));
+//        System.out.printf("%s\n", 
Iterables.transform(Iterables.filter(counts.elementSet(), new 
Predicate<String>() {
+//            public boolean apply(String s) {
+//                return counts.count(s) > 100;
+//            }
+//        }), new Function<String, String>() {
+//            public String apply(String s) {
+//                return s + ":" + counts.count(s);
+//            }
+//        }));
+        assertEquals(1, Ordering.natural().leastOf(k, 1).get(0).intValue());
+        assertTrue(Ordering.natural().greatestOf(k, 1).get(0) > 300);
+        assertTrue(counts.count("the") > 300);
+    }
+
+    @Test
+    public void distinctVocabularies() {
+        TermGenerator x1 = new TermGenerator(WORDS, 1, 0.8);
+        final Multiset<String> k1 = HashMultiset.create();
+        for (int i = 0; i < 50000; i++) {
+            k1.add(x1.sample());
+        }
+
+        TermGenerator x2 = new TermGenerator(WORDS, 1, 0.8);
+        final Multiset<String> k2 = HashMultiset.create();
+        for (int i = 0; i < 50000; i++) {
+            k2.add(x2.sample());
+        }
+
+        final NormalDistribution normal = new NormalDistribution();
+        List<Double> scores = 
Ordering.natural().sortedCopy(Iterables.transform(k1.elementSet(),
+                new Function<String, Double>() {
+                    public Double apply(String s) {
+                        return 
normal.cumulativeProbability(LogLikelihood.rootLogLikelihoodRatio(k1.count(s), 
50000 - k1.count(s), k2.count(s), 50000 - k2.count(s)));
+                    }
+                }));
+        int n = scores.size();
+//        System.out.printf("%.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f", 
scores.get(0), scores.get((int) (0.05*n)), scores.get(n / 4), scores.get(n / 
2), scores.get(3 * n / 4), scores.get((int) (0.95 * n)), scores.get(n - 1));
+        int i = 0;
+        for (Double score : scores) {
+            if (i % 10 == 0) {
+                System.out.printf("%.6f\t%.6f\n", (double) i / n, score);
+            }
+
+            i++;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/43d71f10/sandbox/synth-log/src/test/java/org/apache/drill/synth/WordGeneratorTest.java
----------------------------------------------------------------------
diff --git 
a/sandbox/synth-log/src/test/java/org/apache/drill/synth/WordGeneratorTest.java 
b/sandbox/synth-log/src/test/java/org/apache/drill/synth/WordGeneratorTest.java
new file mode 100644
index 0000000..0bfdf06
--- /dev/null
+++ 
b/sandbox/synth-log/src/test/java/org/apache/drill/synth/WordGeneratorTest.java
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.synth;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+public class WordGeneratorTest {
+    @Test
+    public void checkRealWords() {
+        WordGenerator words = new WordGenerator("word-frequency-seed", 
"other-words");
+        for (int i = 0; i < 20000; i++) {
+            assertFalse(words.getString(i).matches("-[0-9]+"));
+        }
+
+        for (int i = 0; i < 1000; i++) {
+            String w = words.getString(i + 200000);
+            assertTrue(w.matches(".*-[0-9]+"));
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/43d71f10/sandbox/web-frontend/drill.js
----------------------------------------------------------------------
diff --git a/sandbox/web-frontend/drill.js b/sandbox/web-frontend/drill.js
index 598ac39..bbc1b9d 100644
--- a/sandbox/web-frontend/drill.js
+++ b/sandbox/web-frontend/drill.js
@@ -1,18 +1,20 @@
-//Licensed to the Apache Software Foundation (ASF) under one
-//or more contributor license agreements.  See the NOTICE file
-//distributed with this work for additional information
-//regarding copyright ownership.  The ASF licenses this file
-//to you under the Apache License, Version 2.0 (the
-//"License"); you may not use this file except in compliance
-//with the License.  You may obtain a copy of the License at
-//
-//http://www.apache.org/licenses/LICENSE-2.0
-//
-//Unless required by applicable law or agreed to in writing, software
-//distributed under the License is distributed on an "AS IS" BASIS,
-//WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-//See the License for the specific language governing permissions and
-//limitations under the License.
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 // Drill frontend settings
 var MAX_PROJECTS = 100;

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/43d71f10/sqlparser/pom.xml
----------------------------------------------------------------------
diff --git a/sqlparser/pom.xml b/sqlparser/pom.xml
index bdde6f4..db14848 100644
--- a/sqlparser/pom.xml
+++ b/sqlparser/pom.xml
@@ -1,20 +1,14 @@
 <?xml version="1.0"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more 
contributor 
+  license agreements. See the NOTICE file distributed with this work for 
additional 
+  information regarding copyright ownership. The ASF licenses this file to 
+  You under the Apache License, Version 2.0 (the "License"); you may not use 
+  this file except in compliance with the License. You may obtain a copy of 
+  the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
+  by applicable law or agreed to in writing, software distributed under the 
+  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
+  OF ANY KIND, either express or implied. See the License for the specific 
+  language governing permissions and limitations under the License. -->
 <project
   xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";
   xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";>
@@ -22,7 +16,7 @@
   <parent>
     <artifactId>drill-root</artifactId>
     <groupId>org.apache.drill</groupId>
-    <version>1.0.0-m1-SNAPSHOT</version>
+    <version>1.0.0-m1-incubating-SNAPSHOT</version>
   </parent>
   <artifactId>sqlparser</artifactId>
   <name>SQL Parser</name>
@@ -96,6 +90,23 @@
       <version>2.7.1</version>
     </dependency>
   </dependencies>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.rat</groupId>
+        <artifactId>apache-rat-plugin</artifactId>
+        <inherited>true</inherited>
+        <configuration>
+          <excludes>
+            <exclude>**/.buildpath</exclude>
+            <exclude>**/*.json</exclude>
+            <exclude>**/donuts-output-data.txt</exclude>
+          </excludes>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
   <profiles>
     <profile>
       <id>default-hadoop</id>

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/43d71f10/sqlparser/src/test/resources/logback.xml
----------------------------------------------------------------------
diff --git a/sqlparser/src/test/resources/logback.xml 
b/sqlparser/src/test/resources/logback.xml
index ff9e2ca..13808a6 100644
--- a/sqlparser/src/test/resources/logback.xml
+++ b/sqlparser/src/test/resources/logback.xml
@@ -1,4 +1,20 @@
 <?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
 <configuration>
 
   <appender name="SOCKET" 
class="de.huxhorn.lilith.logback.appender.ClassicMultiplexSocketAppender">

Reply via email to