http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/43d71f10/sandbox/synth-log/src/test/java/org/apache/drill/synth/TermGeneratorTest.java ---------------------------------------------------------------------- diff --git a/sandbox/synth-log/src/test/java/org/apache/drill/synth/TermGeneratorTest.java b/sandbox/synth-log/src/test/java/org/apache/drill/synth/TermGeneratorTest.java new file mode 100644 index 0000000..b983eb2 --- /dev/null +++ b/sandbox/synth-log/src/test/java/org/apache/drill/synth/TermGeneratorTest.java @@ -0,0 +1,97 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.synth; + +import com.google.common.base.Function; +import com.google.common.collect.*; +import org.apache.commons.math3.distribution.NormalDistribution; +import org.apache.mahout.math.stats.LogLikelihood; +import org.junit.Test; + +import java.util.List; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class TermGeneratorTest { + + private static final WordGenerator WORDS = new WordGenerator("word-frequency-seed", "other-words"); + + @Test + public void generateTerms() { + TermGenerator x = new TermGenerator(WORDS, 1, 0.8); + final Multiset<String> counts = HashMultiset.create(); + for (int i = 0; i < 10000; i++) { + counts.add(x.sample()); + } + + assertEquals(10000, counts.size()); + assertTrue("Should have some common words", counts.elementSet().size() < 10000); + List<Integer> k = Lists.newArrayList(Iterables.transform(counts.elementSet(), new Function<String, Integer>() { + public Integer apply(String s) { + return counts.count(s); + } + })); +// System.out.printf("%s\n", Ordering.natural().reverse().sortedCopy(k).subList(0, 30)); +// System.out.printf("%s\n", Iterables.transform(Iterables.filter(counts.elementSet(), new Predicate<String>() { +// public boolean apply(String s) { +// return counts.count(s) > 100; +// } +// }), new Function<String, String>() { +// public String apply(String s) { +// return s + ":" + counts.count(s); +// } +// })); + assertEquals(1, Ordering.natural().leastOf(k, 1).get(0).intValue()); + assertTrue(Ordering.natural().greatestOf(k, 1).get(0) > 300); + assertTrue(counts.count("the") > 300); + } + + @Test + public void distinctVocabularies() { + TermGenerator x1 = new TermGenerator(WORDS, 1, 0.8); + final Multiset<String> k1 = HashMultiset.create(); + for (int i = 0; i < 50000; i++) { + k1.add(x1.sample()); + } + + TermGenerator x2 = new TermGenerator(WORDS, 1, 0.8); + final Multiset<String> k2 = HashMultiset.create(); + for (int i = 0; i < 50000; i++) { + k2.add(x2.sample()); + } + + final NormalDistribution normal = new NormalDistribution(); + List<Double> scores = Ordering.natural().sortedCopy(Iterables.transform(k1.elementSet(), + new Function<String, Double>() { + public Double apply(String s) { + return normal.cumulativeProbability(LogLikelihood.rootLogLikelihoodRatio(k1.count(s), 50000 - k1.count(s), k2.count(s), 50000 - k2.count(s))); + } + })); + int n = scores.size(); +// System.out.printf("%.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f", scores.get(0), scores.get((int) (0.05*n)), scores.get(n / 4), scores.get(n / 2), scores.get(3 * n / 4), scores.get((int) (0.95 * n)), scores.get(n - 1)); + int i = 0; + for (Double score : scores) { + if (i % 10 == 0) { + System.out.printf("%.6f\t%.6f\n", (double) i / n, score); + } + + i++; + } + } +}
http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/43d71f10/sandbox/synth-log/src/test/java/org/apache/drill/synth/WordGeneratorTest.java ---------------------------------------------------------------------- diff --git a/sandbox/synth-log/src/test/java/org/apache/drill/synth/WordGeneratorTest.java b/sandbox/synth-log/src/test/java/org/apache/drill/synth/WordGeneratorTest.java new file mode 100644 index 0000000..0bfdf06 --- /dev/null +++ b/sandbox/synth-log/src/test/java/org/apache/drill/synth/WordGeneratorTest.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.synth; + +import org.junit.Test; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +public class WordGeneratorTest { + @Test + public void checkRealWords() { + WordGenerator words = new WordGenerator("word-frequency-seed", "other-words"); + for (int i = 0; i < 20000; i++) { + assertFalse(words.getString(i).matches("-[0-9]+")); + } + + for (int i = 0; i < 1000; i++) { + String w = words.getString(i + 200000); + assertTrue(w.matches(".*-[0-9]+")); + } + } + +} http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/43d71f10/sandbox/web-frontend/drill.js ---------------------------------------------------------------------- diff --git a/sandbox/web-frontend/drill.js b/sandbox/web-frontend/drill.js index 598ac39..bbc1b9d 100644 --- a/sandbox/web-frontend/drill.js +++ b/sandbox/web-frontend/drill.js @@ -1,18 +1,20 @@ -//Licensed to the Apache Software Foundation (ASF) under one -//or more contributor license agreements. See the NOTICE file -//distributed with this work for additional information -//regarding copyright ownership. The ASF licenses this file -//to you under the Apache License, Version 2.0 (the -//"License"); you may not use this file except in compliance -//with the License. You may obtain a copy of the License at -// -//http://www.apache.org/licenses/LICENSE-2.0 -// -//Unless required by applicable law or agreed to in writing, software -//distributed under the License is distributed on an "AS IS" BASIS, -//WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -//See the License for the specific language governing permissions and -//limitations under the License. +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ // Drill frontend settings var MAX_PROJECTS = 100; http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/43d71f10/sqlparser/pom.xml ---------------------------------------------------------------------- diff --git a/sqlparser/pom.xml b/sqlparser/pom.xml index bdde6f4..db14848 100644 --- a/sqlparser/pom.xml +++ b/sqlparser/pom.xml @@ -1,20 +1,14 @@ <?xml version="1.0"?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> +<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor + license agreements. See the NOTICE file distributed with this work for additional + information regarding copyright ownership. The ASF licenses this file to + You under the Apache License, Version 2.0 (the "License"); you may not use + this file except in compliance with the License. You may obtain a copy of + the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required + by applicable law or agreed to in writing, software distributed under the + License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS + OF ANY KIND, either express or implied. See the License for the specific + language governing permissions and limitations under the License. --> <project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> @@ -22,7 +16,7 @@ <parent> <artifactId>drill-root</artifactId> <groupId>org.apache.drill</groupId> - <version>1.0.0-m1-SNAPSHOT</version> + <version>1.0.0-m1-incubating-SNAPSHOT</version> </parent> <artifactId>sqlparser</artifactId> <name>SQL Parser</name> @@ -96,6 +90,23 @@ <version>2.7.1</version> </dependency> </dependencies> + + <build> + <plugins> + <plugin> + <groupId>org.apache.rat</groupId> + <artifactId>apache-rat-plugin</artifactId> + <inherited>true</inherited> + <configuration> + <excludes> + <exclude>**/.buildpath</exclude> + <exclude>**/*.json</exclude> + <exclude>**/donuts-output-data.txt</exclude> + </excludes> + </configuration> + </plugin> + </plugins> + </build> <profiles> <profile> <id>default-hadoop</id> http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/43d71f10/sqlparser/src/test/resources/logback.xml ---------------------------------------------------------------------- diff --git a/sqlparser/src/test/resources/logback.xml b/sqlparser/src/test/resources/logback.xml index ff9e2ca..13808a6 100644 --- a/sqlparser/src/test/resources/logback.xml +++ b/sqlparser/src/test/resources/logback.xml @@ -1,4 +1,20 @@ <?xml version="1.0" encoding="UTF-8" ?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> <configuration> <appender name="SOCKET" class="de.huxhorn.lilith.logback.appender.ClassicMultiplexSocketAppender">
