http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/osgi/sesame-runtime-osgi/pom.xml ---------------------------------------------------------------------- diff --git a/osgi/sesame-runtime-osgi/pom.xml b/osgi/sesame-runtime-osgi/pom.xml new file mode 100644 index 0000000..db1a4bb --- /dev/null +++ b/osgi/sesame-runtime-osgi/pom.xml @@ -0,0 +1,116 @@ +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <groupId>mvm.rya</groupId> + <artifactId>rya.osgi</artifactId> + <version>3.0.4-SNAPSHOT</version> + </parent> + <artifactId>sesame-runtime-osgi</artifactId> + <version>2.6.4</version> + <packaging>pom</packaging> + <name>${project.groupId}.${project.artifactId}</name> + <dependencies> + <dependency> + <groupId>org.openrdf.sesame</groupId> + <artifactId>sesame-runtime-osgi</artifactId> + </dependency> + <dependency> + <groupId>biz.aQute</groupId> + <artifactId>bnd</artifactId> + <version>0.0.397</version> + </dependency> + </dependencies> + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-dependency-plugin</artifactId> + <executions> + <execution> + <id>copy</id> + <phase>generate-resources</phase> + <goals> + <goal>copy</goal> + </goals> + <configuration> + <artifactItems> + <artifactItem> + <groupId>org.openrdf.sesame</groupId> + <artifactId>sesame-runtime-osgi</artifactId> + <version>${project.version}</version> + <outputDirectory>${project.build.directory}</outputDirectory> + <destFileName>sesame-runtime-osgi.jar</destFileName> + </artifactItem> + <artifactItem> + <groupId>biz.aQute</groupId> + <artifactId>bnd</artifactId> + <version>0.0.397</version> + <outputDirectory>${project.build.directory}</outputDirectory> + <destFileName>bnd.jar</destFileName> + </artifactItem> + </artifactItems> + <!-- other configurations here --> + </configuration> + </execution> + </executions> + </plugin> + <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>exec-maven-plugin</artifactId> + <version>1.1</version> + <executions> + <execution> + <phase>process-resources</phase> + <goals> + <goal>exec</goal> + </goals> + </execution> + </executions> + <configuration> + <executable>java</executable> + <arguments> + <argument>-jar</argument> + <argument>target/bnd.jar</argument> + <argument>build</argument> + <argument>openrdf-sesame-osgi.bnd</argument> + </arguments> + </configuration> + </plugin> + <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>build-helper-maven-plugin + </artifactId> + <version>1.7</version> + <executions> + <execution> + <id>attach-artifacts</id> + <phase>package</phase> + <goals> + <goal>attach-artifact</goal> + </goals> + <configuration> + <artifacts> + <artifact> + <file>${project.build.directory}/sesame-runtime-osgi-${project.version}.jar</file> + </artifact> + </artifacts> + </configuration> + </execution> + </executions> + </plugin> + </plugins> + </build> + <repositories> + <repository> + <releases> + <enabled>true</enabled> + </releases> + <snapshots> + <enabled>true</enabled> + </snapshots> + <id>bndrepo</id> + <name>aQute BND Repo</name> + <url>http://www.aqute.biz/repo</url> + </repository> + </repositories> +</project>
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/partition/common-query-ext/pom.xml ---------------------------------------------------------------------- diff --git a/partition/common-query-ext/pom.xml b/partition/common-query-ext/pom.xml new file mode 100644 index 0000000..4fb0aee --- /dev/null +++ b/partition/common-query-ext/pom.xml @@ -0,0 +1,71 @@ +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <!--<parent>--> + <!--<groupId>sitestore</groupId>--> + <!--<artifactId>sitestore</artifactId>--> + <!--<version>2.0.0-SNAPSHOT</version>--> + <!--</parent>--> + + <parent> + <groupId>mvm.rya</groupId> + <artifactId>parent</artifactId> + <version>2.0.0-SNAPSHOT</version> + </parent> + + <groupId>sitestore.common</groupId> + <artifactId>common-query-ext</artifactId> + <name>common-query (${project.version})</name> + <version>1.0.0-SNAPSHOT</version> + <description>A set of filters and iterators for cloudbase queries</description> + + <properties> + <skipTests>true</skipTests> + </properties> + + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-source-plugin</artifactId> + <version>2.1.2</version> + <executions> + <execution> + <id>attach-sources</id> + <phase>install</phase> + <goals> + <goal>jar</goal> + </goals> + </execution> + </executions> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-surefire-plugin</artifactId> + <version>2.7.2</version> + <configuration> + <skipTests>${skipTests}</skipTests> + </configuration> + </plugin> + </plugins> + </build> + <!--<scm>--> + <!--<connection>${scmLocation}/tto/ss/common/trunk/common-query</connection>--> + <!--</scm>--> + <dependencies> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + </dependency> + <dependency> + <groupId>sitestore.common</groupId> + <artifactId>common-query</artifactId> + <version>2.0.0-SNAPSHOT</version> + </dependency> + <dependency> + <groupId>cloudbase</groupId> + <artifactId>cloudbase-core</artifactId> + </dependency> + </dependencies> +</project> http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/partition/common-query-ext/src/main/java/ss/cloudbase/core/iterators/ext/EncodedSortedRangeIterator.java ---------------------------------------------------------------------- diff --git a/partition/common-query-ext/src/main/java/ss/cloudbase/core/iterators/ext/EncodedSortedRangeIterator.java b/partition/common-query-ext/src/main/java/ss/cloudbase/core/iterators/ext/EncodedSortedRangeIterator.java new file mode 100644 index 0000000..fb59102 --- /dev/null +++ b/partition/common-query-ext/src/main/java/ss/cloudbase/core/iterators/ext/EncodedSortedRangeIterator.java @@ -0,0 +1,44 @@ +package ss.cloudbase.core.iterators.ext; + +import cloudbase.core.data.Key; +import cloudbase.core.data.Value; +import cloudbase.core.iterators.IteratorEnvironment; +import cloudbase.core.iterators.SortedKeyValueIterator; +import org.apache.commons.codec.binary.Base64; +import org.apache.hadoop.io.Text; +import ss.cloudbase.core.iterators.SortedRangeIterator; + +import java.io.IOException; +import java.util.Map; + +/** + * Class EncodedSortedRangeIterator + * Date: Aug 10, 2011 + * Time: 10:37:28 AM + */ +public class EncodedSortedRangeIterator extends SortedRangeIterator { + + @Override + public void init(SortedKeyValueIterator<Key, Value> source, Map<String, String> options, IteratorEnvironment env) throws IOException { + super.init(source, options, env); + if (options.containsKey(OPTION_LOWER_BOUND)) { + lower = new Text(decode(options.get(OPTION_LOWER_BOUND))); + } else { + lower = new Text("\u0000"); + } + + if (options.containsKey(OPTION_UPPER_BOUND)) { + upper = new Text(decode(options.get(OPTION_UPPER_BOUND))); + } else { + upper = new Text("\u0000"); + } + } + + public static String encode(String str) { + return new String(Base64.encodeBase64(str.getBytes())); + } + + public static String decode(String str) { + return new String(Base64.decodeBase64(str.getBytes())); + } +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/partition/common-query-ext/src/test/java/GVDateFilterTest.java ---------------------------------------------------------------------- diff --git a/partition/common-query-ext/src/test/java/GVDateFilterTest.java b/partition/common-query-ext/src/test/java/GVDateFilterTest.java new file mode 100644 index 0000000..8ea5578 --- /dev/null +++ b/partition/common-query-ext/src/test/java/GVDateFilterTest.java @@ -0,0 +1,156 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ + +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.Map.Entry; + +import org.apache.hadoop.io.Text; +import org.junit.Test; + +import ss.cloudbase.core.iterators.GMDenIntersectingIterator; +import ss.cloudbase.core.iterators.filter.general.GVDateFilter; + +import cloudbase.core.client.Connector; +import cloudbase.core.client.Scanner; +import cloudbase.core.client.TableNotFoundException; +import cloudbase.core.data.Key; +import cloudbase.core.data.Range; +import cloudbase.core.data.Value; +import cloudbase.core.iterators.FilteringIterator; +import cloudbase.core.security.Authorizations; + +/** + * + * @author rashah + */ +public class GVDateFilterTest +{ + + private Connector cellLevelConn; + private Connector serializedConn; + private static final String TABLE = "partition"; + private static final Authorizations AUTHS = new Authorizations("ALPHA,BETA,GAMMA".split(",")); + + + + protected Connector getSerializedConnector() + { + if (serializedConn == null) + { + serializedConn = SampleGVData.initConnector(); + SampleGVData.writeDenSerialized(serializedConn, SampleGVData.sampleData()); + } + return serializedConn; + } + + + + protected Scanner getSerializedScanner() + { + Connector c = getSerializedConnector(); + try + { + return c.createScanner(TABLE, AUTHS); + } + catch (TableNotFoundException e) + { + return null; + } + } + + protected Scanner setUpGVDFFilter(Scanner s, String timesta) + { + try + { + + s.setScanIterators(50, FilteringIterator.class.getName(), "gvdf"); + s.setScanIteratorOption("gvdf", "0", GVDateFilter.class.getName()); + s.setScanIteratorOption("gvdf", "0." + GVDateFilter.OPTIONInTimestamp, timesta); + + } + catch (IOException e) + { + // TODO Auto-generated catch block + e.printStackTrace(); + } + return s; + } + + protected String checkSerialized(Scanner s) + { + StringBuilder sb = new StringBuilder(); + boolean first = true; + for (Entry<Key, Value> e : s) + { + + if (!first) + { + sb.append(","); + } + else + { + first = false; + } + + String colq = e.getKey().getColumnQualifier().toString(); + + sb.append(colq); + } + return sb.toString(); + } + + + @Test + public void testNoResults() + { + + Scanner s = setUpGVDFFilter(getSerializedScanner(), "2008-03-03T20:44:28.633Z"); + s.setRange(new Range()); + + assertTrue(checkSerialized(s).equals("")); + } + + + @Test + public void testOneResult() + { + + Scanner s = setUpGVDFFilter(getSerializedScanner(), "2011-03-03T20:44:28.633Z"); + s.setRange(new Range()); + + System.out.println(checkSerialized(s)); + + assertTrue(checkSerialized(s).equals("03")); + } + + @Test + public void testTwoResults() + { + + Scanner s = setUpGVDFFilter(getSerializedScanner(), "2009-03-03T20:44:28.633Z"); + s.setRange(new Range()); + + assertTrue(checkSerialized(s).equals("04,01")); + } + + @Test + public void testThreeResults() + { + + Scanner s = setUpGVDFFilter(getSerializedScanner(), "2010-03-01T20:44:28.633Z"); + s.setRange(new Range()); + + assertTrue(checkSerialized(s).equals("04,01,03")); + } + + @Test + public void testDummyTest() + { + assertTrue(true); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/partition/common-query-ext/src/test/java/GVFrequencyFilterTest.java ---------------------------------------------------------------------- diff --git a/partition/common-query-ext/src/test/java/GVFrequencyFilterTest.java b/partition/common-query-ext/src/test/java/GVFrequencyFilterTest.java new file mode 100644 index 0000000..25c602a --- /dev/null +++ b/partition/common-query-ext/src/test/java/GVFrequencyFilterTest.java @@ -0,0 +1,144 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ + +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.Map.Entry; + +import org.apache.hadoop.io.Text; +import org.junit.Test; + +import ss.cloudbase.core.iterators.GMDenIntersectingIterator; +import ss.cloudbase.core.iterators.filter.general.GVFrequencyFilter; + +import cloudbase.core.client.Connector; +import cloudbase.core.client.Scanner; +import cloudbase.core.client.TableNotFoundException; +import cloudbase.core.data.Key; +import cloudbase.core.data.Range; +import cloudbase.core.data.Value; +import cloudbase.core.iterators.FilteringIterator; +import cloudbase.core.security.Authorizations; + +/** + * + * @author rashah + */ +public class GVFrequencyFilterTest +{ + + private Connector cellLevelConn; + private Connector serializedConn; + private static final String TABLE = "partition"; + private static final Authorizations AUTHS = new Authorizations("ALPHA,BETA,GAMMA".split(",")); + + + + protected Connector getSerializedConnector() + { + if (serializedConn == null) + { + serializedConn = SampleGVData.initConnector(); + SampleGVData.writeDenSerialized(serializedConn, SampleGVData.sampleData()); + } + return serializedConn; + } + + + + protected Scanner getSerializedScanner() + { + Connector c = getSerializedConnector(); + try + { + return c.createScanner(TABLE, AUTHS); + } + catch (TableNotFoundException e) + { + return null; + } + } + + protected Scanner setUpGVDFFilter(Scanner s, String Frequency) + { + try + { + s.clearScanIterators(); + + s.setScanIterators(50, FilteringIterator.class.getName(), "gvff"); + s.setScanIteratorOption("gvff", "0", GVFrequencyFilter.class.getName()); + s.setScanIteratorOption("gvff", "0." + GVFrequencyFilter.OPTIONFrequency, Frequency); + + } + catch (IOException e) + { + // TODO Auto-generated catch block + e.printStackTrace(); + } + return s; + } + + protected String checkSerialized(Scanner s) + { + StringBuilder sb = new StringBuilder(); + boolean first = true; + for (Entry<Key, Value> e : s) + { + + if (!first) + { + sb.append(","); + } + else + { + first = false; + } + + String colq = e.getKey().getColumnQualifier().toString(); + + //System.out.println(e.getKey()+"\t"+e.getValue()); + + sb.append(colq); + } + return sb.toString(); + } + + @Test + public void testNoMatch() + { + + Scanner s = setUpGVDFFilter(getSerializedScanner(), "2000000000"); + s.setRange(new Range()); + + assertTrue(checkSerialized(s).isEmpty()); + } + + @Test + public void testSingleMatch() + { + Scanner s = setUpGVDFFilter(getSerializedScanner(), "1500000000"); + s.setRange(new Range()); + + assertTrue(checkSerialized(s).equals("01")); + } + + + @Test + public void testDoubleMatch() + { + Scanner s = setUpGVDFFilter(getSerializedScanner(), "1200000000"); + s.setRange(new Range()); + + assertTrue(checkSerialized(s).equals("01,03")); + } + + @Test + public void testDummyTest() + { + assertTrue(true); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/partition/common-query-ext/src/test/java/IteratorTest.java ---------------------------------------------------------------------- diff --git a/partition/common-query-ext/src/test/java/IteratorTest.java b/partition/common-query-ext/src/test/java/IteratorTest.java new file mode 100644 index 0000000..1b5cf14 --- /dev/null +++ b/partition/common-query-ext/src/test/java/IteratorTest.java @@ -0,0 +1,554 @@ +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.hadoop.io.Text; +import org.junit.Test; + +import ss.cloudbase.core.iterators.CellLevelFilteringIterator; +import ss.cloudbase.core.iterators.CellLevelRecordIterator; +import ss.cloudbase.core.iterators.ConversionIterator; +import ss.cloudbase.core.iterators.GMDenIntersectingIterator; +import ss.cloudbase.core.iterators.SortedMinIterator; +import ss.cloudbase.core.iterators.SortedRangeIterator; +import ss.cloudbase.core.iterators.UniqueIterator; +import ss.cloudbase.core.iterators.filter.CBConverter; +import cloudbase.core.client.Connector; +import cloudbase.core.client.Scanner; +import cloudbase.core.client.TableNotFoundException; +import cloudbase.core.data.Key; +import cloudbase.core.data.PartialKey; +import cloudbase.core.data.Range; +import cloudbase.core.data.Value; +import cloudbase.core.security.Authorizations; + +public class IteratorTest { + private Connector cellLevelConn; + private Connector serializedConn; + + private static final String TABLE = "partition"; + private static final Authorizations AUTHS = new Authorizations("ALPHA,BETA,GAMMA".split(",")); + + public IteratorTest() { + + } + + protected Connector getCellLevelConnector() { + if (cellLevelConn == null) { + cellLevelConn = SampleData.initConnector(); + SampleData.writeDenCellLevel(cellLevelConn, SampleData.sampleData()); + } + return cellLevelConn; + } + + protected Connector getSerializedConnector() { + if (serializedConn == null) { + serializedConn = SampleData.initConnector(); + SampleData.writeDenSerialized(serializedConn, SampleData.sampleData()); + SampleData.writeDenProvenance(serializedConn); + SampleData.writeMinIndexes(serializedConn); + } + return serializedConn; + } + + protected Scanner getProvenanceScanner() { + Connector c = getSerializedConnector(); + try { + return c.createScanner("provenance", AUTHS); + } catch (TableNotFoundException e) { + return null; + } + } + + protected Scanner getCellLevelScanner() { + Connector c = getCellLevelConnector(); + try { + return c.createScanner(TABLE, AUTHS); + } catch (TableNotFoundException e) { + return null; + } + } + + protected Scanner getSerializedScanner() { + Connector c = getSerializedConnector(); + try { + return c.createScanner(TABLE, AUTHS); + } catch (TableNotFoundException e) { + return null; + } + } + + protected Scanner setUpIntersectingIterator(Scanner s, Text[] terms, boolean multiDoc) { + try { + s.setScanIterators(50, GMDenIntersectingIterator.class.getName(), "ii"); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + s.setScanIteratorOption("ii", GMDenIntersectingIterator.indexFamilyOptionName, "index"); + s.setScanIteratorOption("ii", GMDenIntersectingIterator.docFamilyOptionName, "event"); + s.setScanIteratorOption("ii", GMDenIntersectingIterator.OPTION_MULTI_DOC, "" + multiDoc); + s.setScanIteratorOption("ii", GMDenIntersectingIterator.columnFamiliesOptionName, GMDenIntersectingIterator.encodeColumns(terms)); + return s; + } + + protected String checkSerialized(Scanner s) { + StringBuilder sb = new StringBuilder(); + boolean first = true; + for (Entry<Key, Value> e: s) { + if (!first) { + sb.append(","); + } else { + first = false; + } + + String colq = e.getKey().getColumnQualifier().toString(); + + sb.append(colq); + } + return sb.toString(); + } + + protected String checkCellLevel(Scanner s) { + StringBuilder sb = new StringBuilder(); + boolean first = true; + for (Entry<Key, Value> e: s) { + String colq = e.getKey().getColumnQualifier().toString(); + int i = colq.indexOf("\u0000"); + if (i > -1) { + if (!first) { + sb.append(","); + } else { + first = false; + } + sb.append(colq.substring(0, i)); + sb.append("."); + sb.append(colq.substring(i + 1)); + sb.append("="); + sb.append(e.getValue().toString()); + } + } + return sb.toString(); + } + + @Test + public void testSerializedSingleDuplicate() { + Text[] terms = new Text[] { + new Text("A"), + new Text("A") + }; + + String test = "01"; + Scanner s = setUpIntersectingIterator(getSerializedScanner(), terms, false); + s.setRange(new Range()); + assertTrue(test.equals(checkSerialized(s))); + } + + @Test + public void testCellLevelSingleDuplicate() { + Text[] terms = new Text[] { + new Text("A"), + new Text("A") + }; + String test = "01.field0=A,01.field1=B,01.field2=C,01.field3=D,01.field4=E"; + Scanner s = setUpIntersectingIterator(getCellLevelScanner(), terms, true); + s.setRange(new Range()); + assertTrue(test.equals(checkCellLevel(s))); + } + + @Test + public void testSerializedTwoTerms() { + Text[] terms = new Text[] { + new Text("C"), + new Text("D") + }; + // all the evens will come first + String test = "02,01,03"; + Scanner s = setUpIntersectingIterator(getSerializedScanner(), terms, false); + s.setRange(new Range()); + assertTrue(test.equals(checkSerialized(s))); + } + + @Test + public void testCellLevelTwoTerms() { + Text[] terms = new Text[] { + new Text("C"), + new Text("D") + }; + + String test = "02.field0=B,02.field1=C,02.field2=D,02.field3=E,02.field4=F," + + "01.field0=A,01.field1=B,01.field2=C,01.field3=D,01.field4=E," + + "03.field0=C,03.field1=D,03.field2=E,03.field3=F,03.field4=G"; + Scanner s = setUpIntersectingIterator(getCellLevelScanner(), terms, true); + s.setRange(new Range()); + assertTrue(test.equals(checkCellLevel(s))); + } + + @Test + public void testSerializedTwoTermsWithRange() { + Text[] terms = new Text[] { + new Text("C"), + new Text("D") + }; + + String test = "02"; + Scanner s = setUpIntersectingIterator(getSerializedScanner(), terms, false); + s.setRange(new Range(new Key(new Text("0")), true, new Key(new Text("1")), false)); + assertTrue(test.equals(checkSerialized(s))); + } + + @Test + public void testCellLevelTwoTermsWithRange() { + Text[] terms = new Text[] { + new Text("C"), + new Text("D") + }; + + String test = "02.field0=B,02.field1=C,02.field2=D,02.field3=E,02.field4=F"; + Scanner s = setUpIntersectingIterator(getCellLevelScanner(), terms, true); + s.setRange(new Range(new Key(new Text("0")), true, new Key(new Text("1")), false)); + assertTrue(test.equals(checkCellLevel(s))); + } + + @Test + public void testSerializedSingleRange() { + Text[] terms = new Text[] { + new Text(GMDenIntersectingIterator.getRangeTerm("index", "A", true, "B", true)), + new Text(GMDenIntersectingIterator.getRangeTerm("index", "A", true, "B", true)) + }; + + String test = "02,01"; + Scanner s = setUpIntersectingIterator(getSerializedScanner(), terms, false); + s.setRange(new Range()); + assertTrue(test.equals(checkSerialized(s))); + } + + @Test + public void testSerializedMultiRange() { + Text[] terms = new Text[] { + new Text(GMDenIntersectingIterator.getRangeTerm("index", "A", true, "B", true)), + new Text(GMDenIntersectingIterator.getRangeTerm("index", "B", true, "C", true)) + }; + + String test = "02,01"; + Scanner s = setUpIntersectingIterator(getSerializedScanner(), terms, false); + s.setRange(new Range()); + assertTrue(test.equals(checkSerialized(s))); + } + + @Test + public void testSerializedTermAndRange() { + Text[] terms = new Text[] { + new Text("B"), + new Text(GMDenIntersectingIterator.getRangeTerm("index", "A", true, "E", true)) + }; + + String test = "02,01"; + Scanner s = setUpIntersectingIterator(getSerializedScanner(), terms, false); + s.setRange(new Range()); + assertTrue(test.equals(checkSerialized(s))); + } + + protected Scanner setUpSortedRangeIterator(Scanner s, boolean multiDoc) { + try { + s.setScanIterators(50, SortedRangeIterator.class.getName(), "ri"); + s.setScanIteratorOption("ri", SortedRangeIterator.OPTION_COLF, "index"); + s.setScanIteratorOption("ri", SortedRangeIterator.OPTION_DOC_COLF, "event"); + s.setScanIteratorOption("ri", SortedRangeIterator.OPTION_LOWER_BOUND, "A"); + s.setScanIteratorOption("ri", SortedRangeIterator.OPTION_UPPER_BOUND, "C"); + s.setScanIteratorOption("ri", SortedRangeIterator.OPTION_START_INCLUSIVE, "true"); + s.setScanIteratorOption("ri", SortedRangeIterator.OPTION_END_INCLUSIVE, "true"); + s.setScanIteratorOption("ri", SortedRangeIterator.OPTION_MULTI_DOC, "" + multiDoc); + return s; + } catch (IOException e) { + e.printStackTrace(); + return null; + } + } + + @Test + public void testSerializedSortedRangeIterator() { + Scanner s = setUpSortedRangeIterator(getSerializedScanner(), false); + String test = "02,01,03"; + s.setRange(new Range()); + assertTrue(test.equals(checkSerialized(s))); + } + + @Test + public void testCellLevelSortedRangeIterator() { + Scanner s = setUpSortedRangeIterator(getCellLevelScanner(), true); + String test = "02.field0=B,02.field1=C,02.field2=D,02.field3=E,02.field4=F," + + "01.field0=A,01.field1=B,01.field2=C,01.field3=D,01.field4=E," + + "03.field0=C,03.field1=D,03.field2=E,03.field3=F,03.field4=G"; + s.setRange(new Range()); + assertTrue(test.equals(checkCellLevel(s))); + } + + @Test + public void testUniqueIterator() { + Scanner s = getProvenanceScanner(); + try { + s.setScanIterators(50, UniqueIterator.class.getName(), "skipper"); + Key start = new Key(new Text("sid1")); + s.setRange(new Range(start, start.followingKey(PartialKey.ROW))); + + int count = 0; + for (Entry<Key, Value> e: s) { + count++; + } + + assertEquals(count, 3); + } catch (IOException e) { + e.printStackTrace(); + } + } + + protected Scanner setUpConversionIterator(Scanner s) { + String[] conversions = new String[] { + "field0 + 10", + "field1 - 10", + "field2 * 10", + "field3 / 10", + "field4 % 10" + }; + + try { + s.setScanIterators(50, ConversionIterator.class.getName(), "ci"); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + s.setScanIteratorOption("ci", ConversionIterator.OPTION_CONVERSIONS, ConversionIterator.encodeConversions(conversions)); + Key start = new Key(new Text("1"), new Text("event"), new Text("01")); + s.setRange(new Range(start, true, start.followingKey(PartialKey.ROW_COLFAM_COLQUAL), false)); + + return s; + } + + @Test + public void testConversionIteratorSerialized() { + Scanner s = getSerializedScanner(); + s = setUpConversionIterator(s); + + CBConverter c = new CBConverter(); + + boolean test = true; + Map<String, Double> expected = new HashMap<String, Double>(); + + expected.put("field0", 20.0); + expected.put("field1", 1.0); + expected.put("field2", 120.0); + expected.put("field3", 1.3); + expected.put("field4", 4.0); + + Map<String, String> record; + + for (Entry<Key, Value> e: s) { + record = c.toMap(e.getKey(), e.getValue()); + + for (Entry<String, String> pair: record.entrySet()) { + test = test && expected.get(pair.getKey()).equals(new Double(Double.parseDouble(record.get(pair.getKey())))); + } + } + + assertTrue(test); + } + + @Test + public void testConversionIteratorCellLevel() { + Scanner s = getCellLevelScanner(); + s = setUpConversionIterator(s); + s.setScanIteratorOption("ci", ConversionIterator.OPTION_MULTI_DOC, "true"); + + boolean test = true; + Map<String, Double> expected = new HashMap<String, Double>(); + + expected.put("field0", 20.0); + expected.put("field1", 1.0); + expected.put("field2", 120.0); + expected.put("field3", 1.3); + expected.put("field4", 4.0); + + for (Entry<Key, Value> e: s) { + String field = getField(e.getKey()); + if (field != null) { + test = test && expected.get(field).equals(new Double(Double.parseDouble(e.getValue().toString()))); + } + } + + assertTrue(test); + } + + protected String getField(Key key) { + String colq = key.getColumnQualifier().toString(); + int start = colq.indexOf("\u0000"); + if (start == -1) { + return null; + } + + int end = colq.indexOf("\u0000", start + 1); + if (end == -1) { + end = colq.length(); + } + + return colq.substring(start + 1, end); + } + + @Test + public void testCellLevelOGCFilter() { + Scanner s = getCellLevelScanner(); + s.fetchColumnFamily(new Text("event")); + + try { + s.setScanIterators(60, CellLevelFilteringIterator.class.getName(), "fi"); + } catch (IOException e) { + e.printStackTrace(); + } + + s.setScanIteratorOption("fi", CellLevelFilteringIterator.OPTION_FILTER, "<PropertyIsBetween><PropertyName>field0</PropertyName>" + + "<LowerBoundary><Literal>A</Literal></LowerBoundary>" + + "<UpperBoundary><Literal>C</Literal></UpperBoundary>" + + "</PropertyIsBetween>"); + + String test = "02.field0=B,02.field1=C,02.field2=D,02.field3=E,02.field4=F," + + "01.field0=A,01.field1=B,01.field2=C,01.field3=D,01.field4=E," + + "03.field0=C,03.field1=D,03.field2=E,03.field3=F,03.field4=G"; + assertTrue(test.equals(checkCellLevel(s))); + } + + @Test + public void testMultiLevelIterator() { + Scanner s = getCellLevelScanner(); + Text[] terms = new Text[] { + new Text("C"), + new Text("D") + }; + + s = setUpIntersectingIterator(s, terms, true); + + try { + s.setScanIterators(60, CellLevelFilteringIterator.class.getName(), "fi"); + } catch (IOException e) { + e.printStackTrace(); + } + + s.setScanIteratorOption("fi", CellLevelFilteringIterator.OPTION_FILTER, "<PropertyIsEqualTo><PropertyName>field0</PropertyName>" + + "<Literal>A</Literal>" + + "</PropertyIsEqualTo>"); + + String test = "01.field0=A,01.field1=B,01.field2=C,01.field3=D,01.field4=E"; + assertTrue(test.equals(checkCellLevel(s))); + } + + @Test + public void testMultiLevelIterator2() { + Scanner s = getCellLevelScanner(); + s = setUpSortedRangeIterator(s, true); + try { + s.setScanIterators(60, CellLevelFilteringIterator.class.getName(), "fi"); + } catch (IOException e) { + e.printStackTrace(); + } + s.setScanIteratorOption("fi", CellLevelFilteringIterator.OPTION_FILTER, "<PropertyIsEqualTo><PropertyName>field0</PropertyName>" + + "<Literal>A</Literal>" + + "</PropertyIsEqualTo>"); + + String test = "01.field0=A,01.field1=B,01.field2=C,01.field3=D,01.field4=E"; + assertTrue(test.equals(checkCellLevel(s))); + } + + @Test + public void testCellLevelRecordIterator() { + Scanner s = getCellLevelScanner(); + s = setUpSortedRangeIterator(s, true); + try { + s.setScanIterators(60, CellLevelRecordIterator.class.getName(), "recordItr"); + } catch (IOException e) { + e.printStackTrace(); + } + +// for (Entry<Key, Value> e: s) { +// String v = e.getValue().toString(); +// v = v.replaceAll("\\u0000", ","); +// v = v.replaceAll("\\uFFFD", "="); +// System.out.println(e.getKey() + "\t" + v); +// } + String test = "02,01,03"; + assertTrue(test.equals(checkSerialized(s))); + } + + @Test + public void testIntersectionWithoutDocLookup() { + Text[] terms = new Text[] { + new Text("C"), + new Text("D") + }; + // all the evens will come first + String test = "\u000002,\u000001,\u000003"; + Scanner s = setUpIntersectingIterator(getSerializedScanner(), terms, false); + s.setScanIteratorOption("ii", GMDenIntersectingIterator.OPTION_DOC_LOOKUP, "false"); + s.setRange(new Range()); + assertTrue(test.equals(checkSerialized(s))); + } + + @Test + public void testSimpleNot() { + Text[] terms = new Text[] { + new Text("B"), + new Text("F") + }; + + boolean[] nots = new boolean[] { + false, + true + }; + + String test="01"; + Scanner s = setUpIntersectingIterator(getSerializedScanner(), terms, false); + s.setScanIteratorOption("ii", GMDenIntersectingIterator.notFlagOptionName, GMDenIntersectingIterator.encodeBooleans(nots)); + s.setRange(new Range()); + + assertTrue(test.equals(checkSerialized(s))); + } + + @Test + public void testRangeNot() { + Text[] terms = new Text[] { + new Text("B"), + new Text(GMDenIntersectingIterator.getRangeTerm("index", "F", true, "H", true)) + }; + + boolean[] nots = new boolean[] { + false, + true + }; + + String test = "01"; + Scanner s = setUpIntersectingIterator(getSerializedScanner(), terms, false); + s.setScanIteratorOption("ii", GMDenIntersectingIterator.notFlagOptionName, GMDenIntersectingIterator.encodeBooleans(nots)); + s.setRange(new Range()); + + assertTrue(test.equals(checkSerialized(s))); + } + + @Test + public void testMinIteratorOnLastKeys() { + Scanner s = getSerializedScanner(); + try { + s.setScanIterators(50, SortedMinIterator.class.getName(), "min"); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + s.setScanIteratorOption("min", SortedMinIterator.OPTION_PREFIX, "z"); + s.setRange(new Range()); + + String test = "02,04,06,08,10,01,03,05,07,09"; + assertTrue(test.equals(checkSerialized(s))); + } +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/partition/common-query-ext/src/test/java/JTSFilterTest.java ---------------------------------------------------------------------- diff --git a/partition/common-query-ext/src/test/java/JTSFilterTest.java b/partition/common-query-ext/src/test/java/JTSFilterTest.java new file mode 100644 index 0000000..8224f64 --- /dev/null +++ b/partition/common-query-ext/src/test/java/JTSFilterTest.java @@ -0,0 +1,181 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ + +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.Map.Entry; + +import org.apache.hadoop.io.Text; +import org.junit.Test; + +import ss.cloudbase.core.iterators.GMDenIntersectingIterator; +import ss.cloudbase.core.iterators.filter.jts.JTSFilter; + +import cloudbase.core.client.Connector; +import cloudbase.core.client.Scanner; +import cloudbase.core.client.TableNotFoundException; +import cloudbase.core.data.Key; +import cloudbase.core.data.Range; +import cloudbase.core.data.Value; +import cloudbase.core.iterators.FilteringIterator; +import cloudbase.core.security.Authorizations; + +/** + * + * @author rashah + */ +public class JTSFilterTest +{ + + private Connector cellLevelConn; + private Connector serializedConn; + private static final String TABLE = "partition"; + private static final Authorizations AUTHS = new Authorizations("ALPHA,BETA,GAMMA".split(",")); + + + + protected Connector getSerializedConnector() + { + if (serializedConn == null) + { + serializedConn = SampleJTSData.initConnector(); + SampleJTSData.writeDenSerialized(serializedConn, SampleJTSData.sampleData()); + } + return serializedConn; + } + + + + protected Scanner getSerializedScanner() + { + Connector c = getSerializedConnector(); + try + { + return c.createScanner(TABLE, AUTHS); + } + catch (TableNotFoundException e) + { + return null; + } + } + + protected Scanner setUpJTSFilter(Scanner s, String latitude, String longitude, boolean change_name) + { + try + { + + s.setScanIterators(50, FilteringIterator.class.getName(), "gvdf"); + s.setScanIteratorOption("gvdf", "0", JTSFilter.class.getName()); + s.setScanIteratorOption("gvdf", "0." + JTSFilter.OPTIONCenterPointLat, latitude); + s.setScanIteratorOption("gvdf", "0." + JTSFilter.OPTIONCenterPointLon, longitude); + if (change_name) + s.setScanIteratorOption("gvdf", "0." + JTSFilter.OPTIONGeometryKeyName, "beam-footprint"); + + + } + catch (IOException e) + { + // TODO Auto-generated catch block + e.printStackTrace(); + } + return s; + } + + protected String checkSerialized(Scanner s) + { + StringBuilder sb = new StringBuilder(); + boolean first = true; + for (Entry<Key, Value> e : s) + { + + if (!first) + { + sb.append(","); + } + else + { + first = false; + } + + String colq = e.getKey().getColumnQualifier().toString(); + + sb.append(colq); + } + return sb.toString(); + } + + + @Test + public void testNoResults() + { + //London is in niether - 51°30'0.00"N 0° 7'0.00"W + String latitude = "51.5"; + String longitude = "0.11"; + + Scanner s = setUpJTSFilter(getSerializedScanner(), latitude, longitude, false); + s.setRange(new Range()); + +// System.out.println("{" + checkSerialized(s) + "}"); + assertTrue(checkSerialized(s).isEmpty()); + } + + + @Test + public void testOneResultAmerica() + { + //This is North America + //Points 39°44'21.00"N 104°59'3.00"W (Denver) are in the footprint + String latitude = "33"; + String longitude = "-93.0"; + + Scanner s = setUpJTSFilter(getSerializedScanner(), latitude, longitude, false); + s.setRange(new Range()); + + System.out.println("{" + checkSerialized(s) + "}"); + assertTrue(checkSerialized(s).equals("02")); + } + + + @Test + public void testOneResultAustralia() + { + //This is Australia + //Points like 22S 135E are in the beam + String latitude = "-9"; + String longitude = "100.0"; + + Scanner s = setUpJTSFilter(getSerializedScanner(), latitude, longitude, false); + s.setRange(new Range()); + + System.out.println("{" + checkSerialized(s) + "}"); + assertTrue(checkSerialized(s).equals("01")); + } + + @Test + public void testOneResultHawaii() + { + // -164 40 - somewhere near hawaii + + //This is Australia + //Points like 22S 135E are in the beam + String latitude = "40"; + String longitude = "-164.0"; + + Scanner s = setUpJTSFilter(getSerializedScanner(), latitude, longitude, true); + s.setRange(new Range()); + + System.out.println("{" + checkSerialized(s) + "}"); + assertTrue(checkSerialized(s).equals("03")); + } + + + @Test + public void testDummyTest() + { + assertTrue(true); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/partition/common-query-ext/src/test/java/OGCFilterTest.java ---------------------------------------------------------------------- diff --git a/partition/common-query-ext/src/test/java/OGCFilterTest.java b/partition/common-query-ext/src/test/java/OGCFilterTest.java new file mode 100644 index 0000000..fd54945 --- /dev/null +++ b/partition/common-query-ext/src/test/java/OGCFilterTest.java @@ -0,0 +1,163 @@ +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.io.Text; +import org.junit.Test; + +import cloudbase.core.data.Key; +import cloudbase.core.data.Value; +import ss.cloudbase.core.iterators.filter.ogc.OGCFilter; + +import static org.junit.Assert.*; + +public class OGCFilterTest { + private Key testKey = new Key(new Text("row"), new Text("colf"), new Text("colq")); + private Value testValue = new Value("uuid~event\uFFFDmy-event-hash-1\u0000date\uFFFD20100819\u0000time~dss\uFFFD212706.000\u0000frequency\uFFFD3.368248181443644E8\u0000latitude\uFFFD48.74571142707959\u0000longitude\uFFFD13.865561564126812\u0000altitude\uFFFD1047.0\u0000datetime\uFFFD2010-08-19T21:27:06.000Z\u0000test~key\uFFFD\u0000key\uFFFDa\uFFFDb".getBytes()); + + public OGCFilterTest() { + + } + + private OGCFilter getFilter(String filter) { + OGCFilter f = new OGCFilter(); + Map<String, String> options = new HashMap<String, String>(); + options.put(OGCFilter.OPTION_FILTER, filter); + f.init(options); + return f; + } + + @Test + public void testBBOX() { + OGCFilter f = getFilter("<BBOX><gml:Envelope>" + + "<gml:LowerCorner>13 48</gml:LowerCorner>" + + "<gml:UpperCorner>14 49</gml:UpperCorner>" + + "</gml:Envelope></BBOX>"); + assertTrue(f.accept(testKey, testValue)); + } + + @Test + public void testBetweenStr() { + OGCFilter f = getFilter("<PropertyIsBetween><PropertyName>datetime</PropertyName>" + + "<LowerBoundary><Literal>2010-08-19</Literal></LowerBoundary>" + + "<UpperBoundary><Literal>2010-08-20</Literal></UpperBoundary>" + + "</PropertyIsBetween>"); + + assertTrue(f.accept(testKey, testValue)); + } + + @Test + public void testBetweenNum() { + OGCFilter f = getFilter("<PropertyIsBetween><PropertyName>frequency</PropertyName>" + + "<LowerBoundary><Literal>330000000</Literal></LowerBoundary>" + + "<UpperBoundary><Literal>340000000</Literal></UpperBoundary>" + + "</PropertyIsBetween>"); + + assertTrue(f.accept(testKey, testValue)); + } + + @Test + public void testEqualStr() { + OGCFilter f = getFilter("<PropertyIsEqualTo><PropertyName>uuid~event</PropertyName><Literal>my-event-hash-1</Literal></PropertyIsEqualTo>"); + assertTrue(f.accept(testKey, testValue)); + } + + @Test + public void testEqualNum() { + OGCFilter f = getFilter("<PropertyIsEqualTo><PropertyName>altitude</PropertyName><Literal>1047</Literal></PropertyIsEqualTo>"); + assertTrue(f.accept(testKey, testValue)); + } + + @Test + public void testGreaterThanStr() { + OGCFilter f = getFilter("<PropertyIsGreaterThan><PropertyName>datetime</PropertyName><Literal>2010-08-15</Literal></PropertyIsGreaterThan>"); + assertTrue(f.accept(testKey, testValue)); + } + + @Test + public void testGreaterThanNum() { + OGCFilter f = getFilter("<PropertyIsGreaterThan><PropertyName>altitude</PropertyName><Literal>1000</Literal></PropertyIsGreaterThan>"); + assertTrue(f.accept(testKey, testValue)); + } + + @Test + public void testLessThanStr() { + OGCFilter f = getFilter("<PropertyIsLessThan><PropertyName>datetime</PropertyName><Literal>2010-08-20</Literal></PropertyIsLessThan>"); + assertTrue(f.accept(testKey, testValue)); + } + + @Test + public void testLessThanNum() { + OGCFilter f = getFilter("<PropertyIsLessThan><PropertyName>altitude</PropertyName><Literal>1200</Literal></PropertyIsLessThan>"); + assertTrue(f.accept(testKey, testValue)); + } + + @Test + public void testLike() { + OGCFilter f = getFilter("<PropertyIsLike><PropertyName>uuid~event</PropertyName><Literal>*event*</Literal></PropertyIsLike>"); + assertTrue(f.accept(testKey, testValue)); + } + + @Test + public void testNotEqualNum() { + OGCFilter f = getFilter("<PropertyIsNotEqualTo><PropertyName>altitude</PropertyName><Literal>1046</Literal></PropertyIsNotEqualTo>"); + assertTrue(f.accept(testKey, testValue)); + } + + @Test + public void testNull() { + OGCFilter f = getFilter("<PropertyIsNull><PropertyName>test~key</PropertyName></PropertyIsNull>"); + assertTrue(f.accept(testKey, testValue)); + } + + @Test + public void testNot() { + OGCFilter f = getFilter("<Not><PropertyIsEqualTo><PropertyName>altitude</PropertyName><Literal>1047</Literal></PropertyIsEqualTo></Not>"); + assertFalse(f.accept(testKey, testValue)); + } + + @Test + public void testAnd() { + OGCFilter f = getFilter("<And>" + + "<PropertyIsEqualTo><PropertyName>altitude</PropertyName><Literal>1047</Literal></PropertyIsEqualTo>" + + "<PropertyIsNull><PropertyName>test~key</PropertyName></PropertyIsNull>" + + "</And>"); + + assertTrue(f.accept(testKey, testValue)); + } + + @Test + public void testOr() { + OGCFilter f = getFilter("<Or>" + + "<PropertyIsLike><PropertyName>uuid~event</PropertyName><Literal>*event*</Literal></PropertyIsLike>" + + "<PropertyIsNull><PropertyName>uuid~event</PropertyName></PropertyIsNull>" + + "</Or>"); + + assertTrue(f.accept(testKey, testValue)); + } + + @Test + public void testNand() { + OGCFilter f = getFilter("<Not><And>" + + "<PropertyIsNull><PropertyName>uuid~event</PropertyName></PropertyIsNull>" + + "<PropertyIsNull><PropertyName>test~key</PropertyName></PropertyIsNull>" + + "</And></Not>"); + + assertTrue(f.accept(testKey, testValue)); + } + + @Test + public void testNor() { + OGCFilter f = getFilter("<Not>" + + "<PropertyIsNull><PropertyName>uuid~event</PropertyName></PropertyIsNull>" + + "<PropertyIsNull><PropertyName>altitude</PropertyName></PropertyIsNull>" + + "</Not>"); + + assertTrue(f.accept(testKey, testValue)); + } + + @Test + public void testParse() { + OGCFilter f = getFilter("<PropertyIsEqualTo><PropertyName>key</PropertyName><Literal>a</Literal></PropertyIsEqualTo>"); + assertTrue(f.accept(testKey, testValue)); + } +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/partition/common-query-ext/src/test/java/SampleData.java ---------------------------------------------------------------------- diff --git a/partition/common-query-ext/src/test/java/SampleData.java b/partition/common-query-ext/src/test/java/SampleData.java new file mode 100644 index 0000000..071076b --- /dev/null +++ b/partition/common-query-ext/src/test/java/SampleData.java @@ -0,0 +1,228 @@ +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.hadoop.io.Text; + +import cloudbase.core.client.BatchWriter; +import cloudbase.core.client.CBException; +import cloudbase.core.client.CBSecurityException; +import cloudbase.core.client.Connector; +import cloudbase.core.client.Instance; +import cloudbase.core.client.MultiTableBatchWriter; +import cloudbase.core.client.TableExistsException; +import cloudbase.core.client.TableNotFoundException; +import cloudbase.core.client.mock.MockInstance; +import cloudbase.core.data.Mutation; +import cloudbase.core.security.Authorizations; + + +public class SampleData { + public static int NUM_PARTITIONS = 2; + public static int NUM_SAMPLES = 10; + + public static Connector initConnector() { + Instance instance = new MockInstance(); + + try { + Connector connector = instance.getConnector("root", "password".getBytes()); + + // set up table + connector.tableOperations().create("partition"); + connector.tableOperations().create("provenance"); + + // set up root's auths + connector.securityOperations().changeUserAuthorizations("root", new Authorizations("ALPHA,BETA,GAMMA".split(","))); + + return connector; + } catch (CBException e) { + e.printStackTrace(); + } catch (CBSecurityException e) { + e.printStackTrace(); + } catch (TableExistsException e) { + e.printStackTrace(); + } + + return null; + } + + public static Collection<Map<String, String>> sampleData() { + List<Map<String, String>> list = new ArrayList<Map<String, String>>(); + Map<String, String> item; + + for (int i = 0; i < NUM_SAMPLES; i++) { + item = new HashMap<String, String>(); + for (int j = 0; j < 5; j++) { + item.put("field" + j , new String(new char[] {(char) ('A' + ((j + i) % 26))})); + } + list.add(item); + } + return list; + } + + public static void writeDenCellLevel(Connector connector, Collection<Map<String, String>> data) { + // write sample data + MultiTableBatchWriter mtbw = connector.createMultiTableBatchWriter(200000, 10000, 1); + try { + BatchWriter writer; + if (mtbw != null) { + writer = mtbw.getBatchWriter("partition"); + } else { + writer = connector.createBatchWriter("partition", 200000, 10000, 1); + } + int count = 0; + Mutation m; + for (Map<String, String> object: data) { + count++; + String id = (count < 10 ? "0" + count: "" + count); + Text partition = new Text("" + (count % NUM_PARTITIONS)); + + // write dummy record + m = new Mutation(partition); + m.put("event", id, ""); + writer.addMutation(m); + + for (Entry<String, String> entry: object.entrySet()) { + // write the event mutation + m = new Mutation(partition); + m.put("event", id + "\u0000" + entry.getKey(), entry.getValue()); + writer.addMutation(m); + + // write the general index mutation + m = new Mutation(partition); + m.put("index", entry.getValue() + "\u0000" + id, ""); + writer.addMutation(m); + + // write the specific index mutation + m = new Mutation(partition); + m.put("index", entry.getKey() + "//" + entry.getValue() + "\u0000" + id, ""); + writer.addMutation(m); + } + } + writer.close(); + } catch (CBException e) { + e.printStackTrace(); + } catch (CBSecurityException e) { + e.printStackTrace(); + } catch (TableNotFoundException e) { + e.printStackTrace(); + } + } + + public static void writeDenSerialized(Connector connector, Collection<Map<String, String>> data) { + // write sample data + MultiTableBatchWriter mtbw = connector.createMultiTableBatchWriter(200000, 10000, 1); + try { + BatchWriter writer; + if (mtbw != null) { + writer = mtbw.getBatchWriter("partition"); + } else { + writer = connector.createBatchWriter("partition", 200000, 10000, 1); + } + int count = 0; + Mutation m; + for (Map<String, String> object: data) { + count++; + String id = (count < 10 ? "0" + count: "" + count); + Text partition = new Text("" + (count % NUM_PARTITIONS)); + + StringBuilder value = new StringBuilder(); + boolean first = true; + for (Entry<String, String> entry: object.entrySet()) { + if (!first) { + value.append("\u0000"); + } else { + first = false; + } + value.append(entry.getKey()); + value.append("\uFFFD"); + value.append(entry.getValue()); + + // write the general index mutation + m = new Mutation(partition); + m.put("index", entry.getValue() + "\u0000" + id, ""); + writer.addMutation(m); + + // write the specific index mutation + m = new Mutation(partition); + m.put("index", entry.getKey() + "//" + entry.getValue() + "\u0000" + id, ""); + writer.addMutation(m); + } + + // write the event mutation + m = new Mutation(partition); + m.put("event", id, value.toString()); + writer.addMutation(m); + } + writer.close(); + } catch (CBException e) { + e.printStackTrace(); + } catch (CBSecurityException e) { + e.printStackTrace(); + } catch (TableNotFoundException e) { + e.printStackTrace(); + } + } + + public static void writeDenProvenance(Connector connector) { + // write sample data + MultiTableBatchWriter mtbw = connector.createMultiTableBatchWriter(200000, 10000, 1); + try { + BatchWriter writer; + if (mtbw != null) { + writer = mtbw.getBatchWriter("provenance"); + } else { + writer = connector.createBatchWriter("provenance", 200000, 10000, 1); + } + Mutation m; + for (int sid = 1; sid <= 2; sid++) { + for (int time = 1; time <= 3; time++) { + for (int uuid = 1; uuid <= (6 + 2 * time); uuid++) { + m = new Mutation(new Text("sid" + sid)); + m.put("time" + time, "uuid-" + Integer.toHexString(uuid), ""); + writer.addMutation(m); + } + } + } + writer.close(); + } catch (CBException e) { + e.printStackTrace(); + } catch (CBSecurityException e) { + e.printStackTrace(); + } catch (TableNotFoundException e) { + e.printStackTrace(); + } + } + + public static void writeMinIndexes(Connector connector) { + // write sample data + MultiTableBatchWriter mtbw = connector.createMultiTableBatchWriter(200000, 10000, 1); + try { + BatchWriter writer; + if (mtbw != null) { + writer = mtbw.getBatchWriter("partition"); + } else { + writer = connector.createBatchWriter("partition", 200000, 10000, 1); + } + Mutation m; + for (int i = 1; i <= NUM_SAMPLES; i++) { + m = new Mutation(new Text("" + (i % NUM_PARTITIONS))); + + String id = (i < 10 ? "0" + i: "" + i); + + m.put("index", "z_" + id + "_rdate\u0000" + id, ""); + writer.addMutation(m); + } + writer.close(); + } catch (CBException e) { + e.printStackTrace(); + } catch (CBSecurityException e) { + e.printStackTrace(); + } catch (TableNotFoundException e) { + e.printStackTrace(); + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/partition/common-query-ext/src/test/java/SampleGVData.java ---------------------------------------------------------------------- diff --git a/partition/common-query-ext/src/test/java/SampleGVData.java b/partition/common-query-ext/src/test/java/SampleGVData.java new file mode 100644 index 0000000..d8168de --- /dev/null +++ b/partition/common-query-ext/src/test/java/SampleGVData.java @@ -0,0 +1,182 @@ + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.hadoop.io.Text; + +import cloudbase.core.client.BatchWriter; +import cloudbase.core.client.CBException; +import cloudbase.core.client.CBSecurityException; +import cloudbase.core.client.Connector; +import cloudbase.core.client.Instance; +import cloudbase.core.client.MultiTableBatchWriter; +import cloudbase.core.client.TableExistsException; +import cloudbase.core.client.TableNotFoundException; +import cloudbase.core.client.mock.MockInstance; +import cloudbase.core.data.Mutation; +import cloudbase.core.security.Authorizations; + +// For use in testing the Date Filter and Frequency Filter classes +public class SampleGVData +{ + + public static int NUM_PARTITIONS = 2; + + + public static Connector initConnector() + { + Instance instance = new MockInstance(); + + try + { + Connector connector = instance.getConnector("root", "password".getBytes()); + + // set up table + connector.tableOperations().create("partition"); + + // set up root's auths + connector.securityOperations().changeUserAuthorizations("root", new Authorizations("ALPHA,BETA,GAMMA".split(","))); + + return connector; + } + catch (CBException e) + { + e.printStackTrace(); + } + catch (CBSecurityException e) + { + e.printStackTrace(); + } + catch (TableExistsException e) + { + e.printStackTrace(); + } + + return null; + } + + public static Collection<Map<String, String>> sampleData() + { + List<Map<String, String>> list = new ArrayList<Map<String, String>>(); + Map<String, String> item; + + item = new HashMap<String, String>(); + item.put("a", "a"); + item.put("b", "b"); + + //This one is like RB + item.put("date-start", "2009-01-01"); + item.put("date-end", "2011-02-24"); + item.put("date-update", "2011-02-24T00:00:00Z"); + item.put("frequency", "1250000000"); + item.put("bandwidth", "500000000"); + item.put("version", "1"); + list.add(item); + + item = new HashMap<String, String>(); + item.put("a", "a"); + item.put("b", "b"); + list.add(item); + + //This one is like GV + item = new HashMap<String, String>(); + item.put("a", "a"); + item.put("b", "b"); + item.put("date-start", "2010-01-01"); + item.put("date-update", "2010-01-23"); + item.put("frequency", "1150000000"); + item.put("bandwidth", "300000000"); + list.add(item); + + item = new HashMap<String, String>(); + item.put("a", "a"); + item.put("b", "b"); + item.put("date-start", "2009-01-01"); + item.put("date-end", "2011-02-24"); + item.put("date-update", "2008-01-23"); + list.add(item); + + item = new HashMap<String, String>(); + item.put("a", "a"); + item.put("b", "b"); + list.add(item); + + return list; + } + + + public static void writeDenSerialized(Connector connector, Collection<Map<String, String>> data) + { + // write sample data + MultiTableBatchWriter mtbw = connector.createMultiTableBatchWriter(200000, 10000, 1); + try + { + BatchWriter writer; + if (mtbw != null) + { + writer = mtbw.getBatchWriter("partition"); + } + else + { + writer = connector.createBatchWriter("partition", 200000, 10000, 1); + } + int count = 0; + Mutation m; + for (Map<String, String> object : data) + { + count++; + String id = (count < 10 ? "0" + count : "" + count); + Text partition = new Text("" + (count % NUM_PARTITIONS)); + + StringBuilder value = new StringBuilder(); + boolean first = true; + for (Entry<String, String> entry : object.entrySet()) + { + if (!first) + { + value.append("\u0000"); + } + else + { + first = false; + } + value.append(entry.getKey()); + value.append("\uFFFD"); + value.append(entry.getValue()); + + // write the general index mutation + m = new Mutation(partition); + m.put("index", entry.getValue() + "\u0000" + id, ""); + writer.addMutation(m); + + // write the specific index mutation + m = new Mutation(partition); + m.put("index", entry.getKey() + "//" + entry.getValue() + "\u0000" + id, ""); + writer.addMutation(m); + } + + // write the event mutation + m = new Mutation(partition); + m.put("event", id, value.toString()); + writer.addMutation(m); + } + writer.close(); + } + catch (CBException e) + { + e.printStackTrace(); + } + catch (CBSecurityException e) + { + e.printStackTrace(); + } + catch (TableNotFoundException e) + { + e.printStackTrace(); + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/partition/common-query-ext/src/test/java/SampleJTSData.java ---------------------------------------------------------------------- diff --git a/partition/common-query-ext/src/test/java/SampleJTSData.java b/partition/common-query-ext/src/test/java/SampleJTSData.java new file mode 100644 index 0000000..41df658 --- /dev/null +++ b/partition/common-query-ext/src/test/java/SampleJTSData.java @@ -0,0 +1,171 @@ + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.hadoop.io.Text; + +import cloudbase.core.client.BatchWriter; +import cloudbase.core.client.CBException; +import cloudbase.core.client.CBSecurityException; +import cloudbase.core.client.Connector; +import cloudbase.core.client.Instance; +import cloudbase.core.client.MultiTableBatchWriter; +import cloudbase.core.client.TableExistsException; +import cloudbase.core.client.TableNotFoundException; +import cloudbase.core.client.mock.MockInstance; +import cloudbase.core.data.Mutation; +import cloudbase.core.security.Authorizations; + +// For use in testing the Date Filter and Frequency Filter classes +public class SampleJTSData +{ + + public static int NUM_PARTITIONS = 2; + + + public static Connector initConnector() + { + Instance instance = new MockInstance(); + + try + { + Connector connector = instance.getConnector("root", "password".getBytes()); + + // set up table + connector.tableOperations().create("partition"); + + // set up root's auths + connector.securityOperations().changeUserAuthorizations("root", new Authorizations("ALPHA,BETA,GAMMA".split(","))); + + return connector; + } + catch (CBException e) + { + e.printStackTrace(); + } + catch (CBSecurityException e) + { + e.printStackTrace(); + } + catch (TableExistsException e) + { + e.printStackTrace(); + } + + return null; + } + + public static Collection<Map<String, String>> sampleData() + { + List<Map<String, String>> list = new ArrayList<Map<String, String>>(); + Map<String, String> item; + + item = new HashMap<String, String>(); + item.put("geometry-contour", "SDO_GEOMETRY(2007, 8307, NULL, SDO_ELEM_INFO_ARRAY(1, 1003, 1), SDO_ORDINATE_ARRAY(91.985, -12.108, 94.657, -12.059, 98.486, -11.988, 101.385, -12.296, 102.911, -12.569, 103.93, -12.852, 105.005, -12.531, 106.37, -12.204, 108.446, -11.503, 109.585, -10.88, 110.144, -10.207, 108.609, -9.573, 106.05, -8.535, 104.145, -7.606, 102.191, -7.522, 99.522, -7.691, 97.64, -7.606, 95.482, -7.947, 94.546, -8.084, 92.465, -8.605, 90.554, -9.366, 90.197, -10.436, 89.84, -11.729, 90.554, -12.175, 91.985, -12.108))"); + item.put("beam-name", "OPTUS D1 Ku-BAND NATIONAL A & B AUSTRALIA Downlink"); + list.add(item); + //This is Australia + //Points like 22S 135E are in the beam + + //This one is like GV + item = new HashMap<String, String>(); + item.put("beam-name", "AMC 1 Ku-BAND ZONAL NORTH AMERICA Down HV"); + item.put("geometry-contour", "SDO_GEOMETRY(2007, 8307, NULL, SDO_ELEM_INFO_ARRAY(1, 1003, 1), SDO_ORDINATE_ARRAY(-70.838, 39.967, -70.506, 40.331, -70.698, 41.679, -71.179, 42.401, -71.578, 42.38, -72.994, 42.924, -74.353, 43.242, -75.715, 43.26, -77.318, 42.981, -78.684, 42.774, -80.05, 42.491, -82.005, 42.517, -83.608, 42.312, -84.977, 41.805, -86.58, 41.525, -88.127, 41.02, -89.731, 40.741, -90.905, 41.582, -92.264, 41.9, -93.861, 42.147, -95.411, 41.341, -96.257, 40.076, -97.222, 38.737, -98.011, 37.17, -98.031, 35.593, -97.691, 34.312, -96.875, 33.25, -97.307, 31.904, -97.916, 30.561, -98.702, 29.295, -99.134, 27.949, -98.14, 26.884, -97.205, 25.821, -95.842, 25.803, -94.42, 25.784, -92.876, 26.064, -91.277, 26.043, -90.085, 26.553, -88.729, 26.01, -87.38, 24.941, -86.031, 23.797, -84.616, 23.253, -83.256, 23.01, -81.887, 23.517, -80.866, 24.555, -80.254, 26.124, -79.642, 27.693, -78.444, 28.728, -77.486, 29.542, -76.463, 30.805, -76.088, 32.377, -75.656, 33.723, -76.051, 35.305, -75.442, 36.649, -74.426, 37.386, -73.228, 38.422, -72.032, 39.232, -70.838, 39.967))"); + list.add(item); + //This is North America + //Points 39°44'21.00"N 104°59'3.00"W (Denver) are in the footprint + + item = new HashMap<String, String>(); + item.put("beam-name", "testa"); + item.put("beam-footprint", "MULTIPOLYGON (((-169.286 40.431, -164.971 39.992, -155.397 38.482, -146.566 36.233, -136.975 32.539, -128.124 27.742, -121.946 24.548, -116.849 21.339, -112.156 17.479, -109.391 14.206, -107.301 11.715, -105.274 9.477, -103.443 8.229, -102.108 7.7, -99.109 7.428, -96.681 7.745, -93.894 8.843, -89.917 11.687, -85.953 15.017, -81.148 17.266, -78.145 17.986, -75.582 17.887, -68.1 17.987, -64.696 18.493, -61.445 19.38, -60.094 20.288, -59.315 21.564, -57.026 26.51, -55.089 30.962, -53.59 33.657, -52.495 34.691, -50.468 36.204, -46.146 38.672, -41.684 40.663, -37.914 42.055, -33.806 43.082, -27.523 44.149, -21.645 44.96, -16.578 45.406, -13.807 45.771, -14.929 50.108, -16.186 53.919, -17.051 56.0, -18.388 58.824, -19.861 61.567, -21.807 64.188, -23.104 65.742, -25.28 67.904, -27.699 69.823, -28.955 70.728, -32.415 72.768, -34.968 73.998, -38.468 75.309, -48.292 73.025, -56.545 71.12, -64.023 70.474, -72.753 70.357, -78.41 70.827, -80.466 71.093, -82.412 71.876, -83.02 72.944, -83.175 74.04, -82.493 74.782, -82.412 75.552, -82.697 76.778, -84.041 78.398, -86.316 81.078, -104.098 80.819, -110.861 80.482, -115.73 80.17, -120.936 79.669, -125.84 79.176, -126.696 79.02, -134.316 77.732, -139.505 76.478, -144.823 74.826, -148.231 73.417, -151.517 71.687, -153.87 70.165, -154.536 69.672, -155.868 68.678, -156.482 68.098, -158.281 66.421, -159.716 64.804, -160.996 63.126, -161.878 61.786, -163.046 59.875, -164.369 57.254, -165.563 54.479, -166.73 51.089, -167.811 47.267, -168.581 44.041, -169.286 40.431)), ((-171.333 23.244, -171.523 18.894, -170.127 18.986, -161.559 18.555, -156.977 18.134, -153.574 18.116, -151.108 18.324, -149.947 18.45, -149.018 18.957, -148.515 19.822, -148.524 20.914, -149.018 21.766, -149.947 22.272, -152.185 23.054, -155.563 23.434, -158.075 23.75, -160.272 24.034, -162.184 24.008, -163.514 23.99, -164.595 23.976, -166.52 23.687, -169.159 23.18, -171.333 23.244)))"); + list.add(item); +// this point should be in there... + // -164 40 - somewhere near hawaii + + item = new HashMap<String, String>(); + item.put("beam-name", "testb"); + item.put("beam-footprint", "POLYGON ((-140.153 34.772, -140.341 33.272, -137.024 33.026, -132.723 32.369, -130.947 31.916, -128.664 31.225, -125.293 29.612, -121.813 27.871, -118.699 25.892, -115.589 23.79, -112.593 21.875, -109.136 19.335, -106.939 16.701, -105.006 14.97, -104.195 14.407, -103.049 13.659, -100.363 12.717, -98.063 12.288, -94.299 11.612, -90.825 11.097, -87.997 11.584, -86.815 12.109, -86.163 12.893, -85.014 14.342, -83.804 15.788, -82.104 16.998, -80.413 17.269, -78.005 16.574, -76.181 16.531, -74.65 16.68, -73.552 17.392, -72.957 18.3, -72.917 19.651, -73.526 21.325, -74.913 23.018, -76.036 24.519, -76.159 26.428, -75.741 28.447, -74.257 30.072, -72.771 31.331, -70.517 34.328, -69.638 36.04, -68.624 39.467, -68.015 41.851, -67.607 43.501, -67.548 45.528, -67.586 47.308, -68.601 49.066, -69.868 50.07, -71.621 50.778, -73.285 50.888, -74.9 50.926, -76.994 50.975, -79.332 50.846, -81.066 50.887, -83.842 51.136, -86.569 51.016, -87.95 50.864, -90.831 50.563, -94 .27 50.644, -98.068 50.733, -102.937 51.032, -106.455 51.484, -109.973 51.936, -114.119 52.402, -117.363 53.031, -119.899 53.276, -123.243 53.539, -127.017 54.427, -130.519 55.431, -133.643 56.058, -134.826 56.279, -135.354 55.029, -135.792 53.864, -136.168965072136 52.8279962761917, -136.169 52.828, -136.169497186166 52.8264970826432, -136.192 52.763, -136.556548517884 51.6453176911637, -136.703232746756 51.2152965828266, -136.781220290925 50.9919311116929, -136.793 50.959, -136.80274055379 50.9259886895048, -136.992 50.295, -137.200898649547 49.5808675274021, -137.202 49.581, -137.200962495599 49.5806459535167, -137.360714473458 49.0197683891632, -137.459 48.677, -137.462166719028 48.6649126473121, -137.471 48.634, -137.515105536699 48.4619710228524, -137.74710368039 47.5528216167105, -137.793718522461 47.3758260237407, -137.854 47.152, -137.977773277882 46.6610808974241, -138.044 46.403, -138.330834102374 45.1674736036557, -138.365 45.019, -138.38180854655 44.9421315900087, -138. 449801069917 44.6389849661384, -138.485 44.484, -138.497077239724 44.4262941289417, -138.536 44.25, -138.622787032392 43.8206200438395, -138.743816168807 43.232032787661, -138.981390224617 42.0843314825185, -138.989 42.048, -138.990605533614 42.0389442888447, -138.991 42.037, -138.997785044232 41.9994454595406, -139.004 41.969, -139.035645873997 41.7890661698517, -139.061212567475 41.6462082823816, -139.428 39.584, -139.673 38.073, -139.713116752585 37.8001474769807, -139.766 37.457, -139.764942047737 37.4567768906428, -139.898 36.573, -139.897723683259 36.5729429963606, -139.986 35.994, -140.04777653037 35.5462970502163, -140.094 35.232, -140.090797568766 35.2315144621917, -140.153 34.772))"); + list.add(item); + + + + //London is in niether - 51°30'0.00"N 0° 7'0.00"W + return list; + } + + + public static void writeDenSerialized(Connector connector, Collection<Map<String, String>> data) + { + // write sample data + MultiTableBatchWriter mtbw = connector.createMultiTableBatchWriter(200000, 10000, 1); + try + { + BatchWriter writer; + if (mtbw != null) + { + writer = mtbw.getBatchWriter("partition"); + } + else + { + writer = connector.createBatchWriter("partition", 200000, 10000, 1); + } + int count = 0; + Mutation m; + for (Map<String, String> object : data) + { + count++; + String id = (count < 10 ? "0" + count : "" + count); + Text partition = new Text("" + (count % NUM_PARTITIONS)); + + StringBuilder value = new StringBuilder(); + boolean first = true; + for (Entry<String, String> entry : object.entrySet()) + { + if (!first) + { + value.append("\u0000"); + } + else + { + first = false; + } + value.append(entry.getKey()); + value.append("\uFFFD"); + value.append(entry.getValue()); + + // write the general index mutation + m = new Mutation(partition); + m.put("index", entry.getValue() + "\u0000" + id, ""); + writer.addMutation(m); + + // write the specific index mutation + m = new Mutation(partition); + m.put("index", entry.getKey() + "//" + entry.getValue() + "\u0000" + id, ""); + writer.addMutation(m); + } + + // write the event mutation + m = new Mutation(partition); + m.put("event", id, value.toString()); + writer.addMutation(m); + } + writer.close(); + } + catch (CBException e) + { + e.printStackTrace(); + } + catch (CBSecurityException e) + { + e.printStackTrace(); + } + catch (TableNotFoundException e) + { + e.printStackTrace(); + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/partition/common-query/pom.xml ---------------------------------------------------------------------- diff --git a/partition/common-query/pom.xml b/partition/common-query/pom.xml new file mode 100644 index 0000000..6db84bf --- /dev/null +++ b/partition/common-query/pom.xml @@ -0,0 +1,103 @@ +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <!--<parent>--> + <!--<groupId>sitestore</groupId>--> + <!--<artifactId>sitestore</artifactId>--> + <!--<version>2.0.0-SNAPSHOT</version>--> + <!--</parent>--> + + <parent> + <groupId>mvm.rya</groupId> + <artifactId>parent</artifactId> + <version>2.0.0-SNAPSHOT</version> + </parent> + + <groupId>sitestore.common</groupId> + <artifactId>common-query</artifactId> + <name>common-query (${project.version})</name> + <version>2.0.0-SNAPSHOT</version> + <description>A set of filters and iterators for cloudbase queries</description> + + <properties> + <skipTests>true</skipTests> + </properties> + + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-source-plugin</artifactId> + <version>2.1.2</version> + <executions> + <execution> + <id>attach-sources</id> + <phase>install</phase> + <goals> + <goal>jar</goal> + </goals> + </execution> + </executions> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-surefire-plugin</artifactId> + <version>2.7.2</version> + <configuration> + <skipTests>${skipTests}</skipTests> + </configuration> + </plugin> + </plugins> + </build> + <!--<scm>--> + <!--<connection>${scmLocation}/tto/ss/common/trunk/common-query</connection>--> + <!--</scm>--> + <dependencies> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + </dependency> + <dependency> + <groupId>log4j</groupId> + <artifactId>log4j</artifactId> + <version>1.2.14</version> + </dependency> + <dependency> + <groupId>cloudbase</groupId> + <artifactId>cloudbase-core</artifactId> + </dependency> + <dependency> + <groupId>commons-logging</groupId> + <artifactId>commons-logging</artifactId> + <version>1.0.4</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-common</artifactId> + </dependency> + <dependency> + <groupId>cloudbase</groupId> + <artifactId>cloudbase-start</artifactId> + </dependency> + <dependency> + <groupId>commons-codec</groupId> + <artifactId>commons-codec</artifactId> + <version>1.3</version> + </dependency> + <dependency> + <groupId>org.apache.thrift</groupId> + <artifactId>thrift</artifactId> + </dependency> + <dependency> + <groupId>com.vividsolutions</groupId> + <artifactId>jts</artifactId> + <version>1.11</version> + </dependency> + <dependency> + <groupId>xerces</groupId> + <artifactId>xercesImpl</artifactId> + <version>2.8.1</version> + </dependency> + </dependencies> +</project> http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/partition/common-query/src/main/java/ss/cloudbase/core/iterators/CellLevelFilteringIterator.java ---------------------------------------------------------------------- diff --git a/partition/common-query/src/main/java/ss/cloudbase/core/iterators/CellLevelFilteringIterator.java b/partition/common-query/src/main/java/ss/cloudbase/core/iterators/CellLevelFilteringIterator.java new file mode 100644 index 0000000..e0126fa --- /dev/null +++ b/partition/common-query/src/main/java/ss/cloudbase/core/iterators/CellLevelFilteringIterator.java @@ -0,0 +1,163 @@ +package ss.cloudbase.core.iterators; + +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.io.Text; + +import ss.cloudbase.core.iterators.filter.ogc.OGCFilter; +import cloudbase.core.data.ByteSequence; +import cloudbase.core.data.Key; +import cloudbase.core.data.Range; +import cloudbase.core.data.Value; +import cloudbase.core.iterators.IteratorEnvironment; +import cloudbase.core.iterators.SortedKeyValueIterator; +import cloudbase.core.iterators.WrappingIterator; + +public class CellLevelFilteringIterator extends WrappingIterator { + private static final Collection<ByteSequence> EMPTY_SET = Collections.emptySet(); + + /** The OGC Filter string **/ + public static final String OPTION_FILTER = "filter"; + + /** The character or characters that defines the end of the field in the column qualifier. Defaults to '@' **/ + public static final String OPTION_FIELD_END = "fieldEnd"; + + protected SortedKeyValueIterator<Key, Value> checkSource; + + protected Map<String, Boolean> cache = new HashMap<String, Boolean>(); + + protected OGCFilter filter; + + protected String fieldEnd = "@"; + + public CellLevelFilteringIterator() {} + + public CellLevelFilteringIterator(CellLevelFilteringIterator other, IteratorEnvironment env) { + setSource(other.getSource().deepCopy(env)); + checkSource = other.checkSource.deepCopy(env); + cache = other.cache; + fieldEnd = other.fieldEnd; + } + + @Override + public CellLevelFilteringIterator deepCopy(IteratorEnvironment env) { + return new CellLevelFilteringIterator(this, env); + } + + @Override + public void init(SortedKeyValueIterator<Key, Value> source, Map<String, String> options, IteratorEnvironment env) throws IOException { + super.init(source, options, env); + if (source instanceof GMDenIntersectingIterator) { + checkSource = ((GMDenIntersectingIterator) source).docSource.deepCopy(env); + } else if (source instanceof SortedRangeIterator) { + checkSource = ((SortedRangeIterator) source).docSource.deepCopy(env); + } else { + checkSource = source.deepCopy(env); + } + filter = new OGCFilter(); + filter.init(options); + + if (options.containsKey(OPTION_FIELD_END)) { + fieldEnd = options.get(OPTION_FIELD_END); + } + } + + @Override + public void next() throws IOException { + getSource().next(); + findTop(); + } + + protected String getDocId(Key key) { + String colq = key.getColumnQualifier().toString(); + int i = colq.indexOf("\u0000"); + if (i == -1) { + i = colq.length(); + } + return colq.substring(0, i); + } + + protected Key getRecordStartKey(Key key, String docId) { + return new Key(key.getRow(), key.getColumnFamily(), new Text(docId + "\u0000")); + } + + protected Key getRecordEndKey(Key key, String docId) { + return new Key(key.getRow(), key.getColumnFamily(), new Text(docId + "\u0000\uFFFD")); + } + + protected String getField(Key key, Value value) { + String colq = key.getColumnQualifier().toString(); + int i = colq.indexOf("\u0000"); + if (i == -1) { + return null; + } + + int j = colq.indexOf(fieldEnd, i + 1); + if (j == -1) { + j = colq.length(); + } + + return colq.substring(i + 1, j); + } + + protected String getValue(Key key, Value value) { + return value.toString(); + } + + protected void findTop() throws IOException { + boolean goodKey; + String docId; + Map<String, String> record = new HashMap<String, String>(); + + while (getSource().hasTop()) { + docId = getDocId(getSource().getTopKey()); + + // if the document is in the cache, then we have already scanned it + if (cache.containsKey(docId)) { + goodKey = cache.get(docId); + } else { + // we need to scan the whole record into a map and evaluate the filter + + // seek the check source to the beginning of the record + Range range = new Range( + getRecordStartKey(getSource().getTopKey(), docId), + true, + getRecordEndKey(getSource().getTopKey(), docId), + true + ); + + checkSource.seek(range, EMPTY_SET, false); + + // read in the record to the map + record.clear(); + while (checkSource.hasTop()) { + String field = getField(checkSource.getTopKey(), checkSource.getTopValue()); + if (field != null) { + record.put(field, getValue(checkSource.getTopKey(), checkSource.getTopValue())); + } + checkSource.next(); + } + + // evaluate the filter + goodKey = filter.accept(record); + + // cache the result so that we don't do this for every cell + cache.put(docId, goodKey); + } + + if (goodKey==true) + return; + getSource().next(); + } + } + + @Override + public void seek(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException { + getSource().seek(range, columnFamilies, inclusive); + findTop(); + } +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/partition/common-query/src/main/java/ss/cloudbase/core/iterators/CellLevelRecordIterator.java ---------------------------------------------------------------------- diff --git a/partition/common-query/src/main/java/ss/cloudbase/core/iterators/CellLevelRecordIterator.java b/partition/common-query/src/main/java/ss/cloudbase/core/iterators/CellLevelRecordIterator.java new file mode 100644 index 0000000..1f59882 --- /dev/null +++ b/partition/common-query/src/main/java/ss/cloudbase/core/iterators/CellLevelRecordIterator.java @@ -0,0 +1,144 @@ +package ss.cloudbase.core.iterators; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.io.Text; + +import ss.cloudbase.core.iterators.filter.CBConverter; + +import cloudbase.core.data.Key; +import cloudbase.core.data.Range; +import cloudbase.core.data.Value; +import cloudbase.core.iterators.IteratorEnvironment; +import cloudbase.core.iterators.SkippingIterator; +import cloudbase.core.iterators.SortedKeyValueIterator; + +public class CellLevelRecordIterator extends SkippingIterator { + public static final String OPTION_FIELD_END = "fieldEnd"; + public static final String OPTION_MULTIPLE_DELIMITER = "multipleDelimiter"; + + protected String multipleDelimiter = ","; + + protected Key topKey; + protected Value topValue; + protected String fieldEnd = "@"; + protected String docId = null; + protected CBConverter converter = new CBConverter(); + + @Override + public SortedKeyValueIterator<Key, Value> deepCopy(IteratorEnvironment env) { + CellLevelRecordIterator itr = new CellLevelRecordIterator(); + itr.setSource(this.getSource().deepCopy(env)); + itr.fieldEnd = this.fieldEnd; + return itr; + } + + @Override + public void init(SortedKeyValueIterator<Key, Value> source, Map<String, String> options, IteratorEnvironment env) throws IOException { + super.init(source, options, env); + converter.init(options); + if (options.containsKey(OPTION_FIELD_END)) { + fieldEnd = options.get(OPTION_FIELD_END); + } + + if (options.containsKey(OPTION_MULTIPLE_DELIMITER)) { + multipleDelimiter = options.get(OPTION_MULTIPLE_DELIMITER); + } + } + + @Override + public void next() throws IOException { + consume(); + } + + @Override + public boolean hasTop() { + return getSource().hasTop() || topKey != null || topValue != null; + } + + @Override + public Key getTopKey() { + return topKey; + } + + @Override + public Value getTopValue() { + return topValue; + } + + protected String getDocId(Key key) { + String colq = key.getColumnQualifier().toString(); + int i = colq.indexOf("\u0000"); + if (i == -1) { + i = colq.length(); + } + return colq.substring(0, i); + } + + protected Key buildTopKey(Key key, String docId) { + return new Key(key.getRow(), key.getColumnFamily(), new Text(docId), key.getColumnVisibility(), key.getTimestamp()); + } + + protected String getField(Key key, Value value) { + String colq = key.getColumnQualifier().toString(); + int i = colq.indexOf("\u0000"); + if (i == -1) { + return null; + } + + int j = colq.indexOf(fieldEnd, i + 1); + if (j == -1) { + j = colq.length(); + } + + return colq.substring(i + 1, j); + } + + protected String getValue(Key key, Value value) { + return value.toString(); + } + + protected Key getRecordStartKey(Key key, String docId) { + return new Key(key.getRow(), key.getColumnFamily(), new Text(docId)); + } + + protected Key getRecordEndKey(Key key, String docId) { + return new Key(key.getRow(), key.getColumnFamily(), new Text(docId + "\u0000\uFFFD")); + } + + @Override + protected void consume() throws IOException { + // build the top key + if (getSource().hasTop()) { + docId = getDocId(getSource().getTopKey()); + topKey = buildTopKey(getSource().getTopKey(), docId); + + Range range = new Range( + getRecordStartKey(getSource().getTopKey(), docId), + true, + getRecordEndKey(getSource().getTopKey(), docId), + true + ); + + Map<String, String> record = new HashMap<String, String>(); + while (getSource().hasTop() && range.contains(getSource().getTopKey())) { + String field = getField(getSource().getTopKey(), getSource().getTopValue()); + if (field != null) { + if (record.get(field) == null) { + record.put(field, getValue(getSource().getTopKey(), getSource().getTopValue())); + } else { + record.put(field, record.get(field) + multipleDelimiter + getValue(getSource().getTopKey(), getSource().getTopValue())); + } + } + getSource().next(); + } + + topValue = converter.toValue(record); + } else { + topKey = null; + topValue = null; + } + } +}
