http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/80faf06d/pig/cloudbase.pig/src/main/java/mvm/rya/cloudbase/pig/dep/CloudbaseStorage.java ---------------------------------------------------------------------- diff --git a/pig/cloudbase.pig/src/main/java/mvm/rya/cloudbase/pig/dep/CloudbaseStorage.java b/pig/cloudbase.pig/src/main/java/mvm/rya/cloudbase/pig/dep/CloudbaseStorage.java deleted file mode 100644 index bcdd8cc..0000000 --- a/pig/cloudbase.pig/src/main/java/mvm/rya/cloudbase/pig/dep/CloudbaseStorage.java +++ /dev/null @@ -1,299 +0,0 @@ -//package mvm.rya.cloudbase.pig.dep; -// -//import cloudbase.core.CBConstants; -//import cloudbase.core.client.mapreduce.CloudbaseInputFormat; -//import cloudbase.core.data.*; -//import cloudbase.core.security.Authorizations; -//import org.apache.commons.codec.binary.Base64; -//import org.apache.hadoop.conf.Configuration; -//import org.apache.hadoop.fs.Path; -//import org.apache.hadoop.io.Text; -//import org.apache.hadoop.io.Writable; -//import org.apache.hadoop.io.WritableComparable; -//import org.apache.hadoop.mapreduce.*; -//import org.apache.pig.LoadFunc; -//import org.apache.pig.OrderedLoadFunc; -//import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit; -//import org.apache.pig.data.DataByteArray; -//import org.apache.pig.data.Tuple; -//import org.apache.pig.data.TupleFactory; -// -//import java.io.*; -//import java.math.BigInteger; -//import java.util.*; -// -///** -// */ -//@Deprecated -//public class CloudbaseStorage extends LoadFunc -// implements OrderedLoadFunc -//{ -// -// protected String user; -// protected String password; -// protected Authorizations auths; -// protected String zk; -// protected String instanceName; -// protected String startRow; -// protected String endRow; -// protected Collection<Range> ranges; -// protected RecordReader reader; -// -// public CloudbaseStorage(String startRow, String endRow, String instanceName, String zk, String user, String password) { -// auths = CBConstants.NO_AUTHS; -// this.startRow = startRow; -// this.endRow = endRow; -// this.instanceName = instanceName; -// this.zk = zk; -// this.user = user; -// this.password = password; -// } -// -// protected void addRange(Range range) { -// if(ranges == null) { -// ranges = new ArrayList<Range>(); -// } -// ranges.add(range); -// } -// -// @Override -// public void setLocation(String tableName, Job job) throws IOException { -// try { -// Configuration conf = job.getConfiguration(); -// //TODO: ? -// conf.setBoolean("mapred.reduce.tasks.speculative.execution", false); -// conf.setBoolean("mapred.map.tasks.speculative.execution", false); -// conf.set("io.sort.mb", "256"); -// conf.setBoolean("mapred.compress.map.output", true); -// conf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec"); -// -// if (!conf.getBoolean(INSTANCE_HAS_BEEN_SET, false)) -// CloudbaseInputFormat.setZooKeeperInstance(job, instanceName, zk); -// if (!conf.getBoolean(INPUT_INFO_HAS_BEEN_SET, false)) -// CloudbaseInputFormat.setInputInfo(job, user, password.getBytes(), tableName, auths); -// System.out.println(tableName); -// conf.set(TABLE_NAME, tableName); -// if (ranges == null) { -// addRange(new Range(new Text(startRow), new Text(endRow))); -// } -//// List<Range> ranges = getRanges(job); -//// ranges.add(range); -//// System.out.println(ranges); -//// CloudbaseInputFormat.setRanges(job, ranges); -// CloudbaseInputFormat.setRanges(job, ranges); -//// CloudbaseInputFormat.fetchColumns(job, Collections.singleton(new Pair<Text, Text>())); -// } catch (IllegalStateException e) { -// throw new IOException(e); -// } -// } -// -// private static final String PREFIX = CloudbaseInputFormat.class.getSimpleName(); -// private static final String RANGES = PREFIX + ".ranges"; -// private static final String INSTANCE_HAS_BEEN_SET = PREFIX + ".instanceConfigured"; -// private static final String INPUT_INFO_HAS_BEEN_SET = PREFIX + ".configured"; -// private static final String TABLE_NAME = PREFIX + ".tablename"; -// -// protected static List<Range> getRanges(JobContext job) throws IOException { -// ArrayList<Range> ranges = new ArrayList<Range>(); -// for (String rangeString : job.getConfiguration().getStringCollection(RANGES)) { -// ByteArrayInputStream bais = new ByteArrayInputStream(Base64.decodeBase64(rangeString.getBytes())); -// Range range = new Range(); -// range.readFields(new DataInputStream(bais)); -// ranges.add(range); -// } -// return ranges; -// } -// -// @Override -// public String relativeToAbsolutePath(String location, Path curDir) throws IOException { -// return location; -// } -// -// @Override -// public InputFormat getInputFormat() throws IOException { -// -//// CloudbaseInputFormat format = new CloudbaseInputFormat() { -//// @Override -//// public List<InputSplit> getSplits(JobContext job) throws IOException { -//// try { -//// List<InputSplit> splits = super.getSplits(job); -//// List<InputSplit> outsplits = new ArrayList<InputSplit>(); -//// for (InputSplit inputSplit : splits) { -//// RangeInputSplit ris = (RangeInputSplit) inputSplit; -//// ByteArrayOutputStream bais = new ByteArrayOutputStream(); -//// DataOutputStream out = new DataOutputStream(bais); -//// ris.write(out); -//// out.close(); -//// MyRangeInputSplit rangeInputSplit = new MyRangeInputSplit(); -//// DataInputStream in = new DataInputStream(new ByteArrayInputStream(bais.toByteArray())); -//// rangeInputSplit.readFields(in); -//// in.close(); -//// String[] locations = inputSplit.getLocations(); -//// String[] newlocs = new String[locations.length]; -//// int i = 0; -//// for (String loc : locations) { -//// java.net.InetAddress inetAdd = java.net.InetAddress.getByName(loc); -//// newlocs[i] = inetAdd.getHostName(); -//// i++; -//// } -//// rangeInputSplit.locations = newlocs; -//// outsplits.add(rangeInputSplit); -//// } -//// return outsplits; -//// } catch (Exception e) { -//// throw new IOException(e); -//// } -//// } -//// }; -//// return format; -// -// return new CloudbaseInputFormat(); -// -// } -// -// @Override -// public void prepareToRead(RecordReader recordReader, PigSplit pigSplit) throws IOException { -// this.reader = recordReader; -// } -// -// @Override -// public Tuple getNext() throws IOException { -// try { -// if (reader.nextKeyValue()) { -// Key key = (Key) reader.getCurrentKey(); -// Value value = (Value) reader.getCurrentValue(); -// -// Text row = key.getRow(); -// Text cf = key.getColumnFamily(); -// Text cq = key.getColumnQualifier(); -// byte[] val_bytes = value.get(); -// Tuple tuple = TupleFactory.getInstance().newTuple(4); -// tuple.set(0, row); -// tuple.set(1, cf); -// tuple.set(2, cq); -// tuple.set(3, new DataByteArray(val_bytes)); -// return tuple; -// } -// } catch (Exception e) { -// throw new IOException(e); -// } -// return null; -// } -// -// @Override -// public WritableComparable<?> getSplitComparable(InputSplit inputSplit) throws IOException { -// //cannot get access to the range directly -// CloudbaseInputFormat.RangeInputSplit rangeInputSplit = (CloudbaseInputFormat.RangeInputSplit) inputSplit; -// ByteArrayOutputStream baos = new ByteArrayOutputStream(); -// DataOutputStream out = new DataOutputStream(baos); -// rangeInputSplit.write(out); -// out.close(); -// DataInputStream stream = new DataInputStream(new ByteArrayInputStream(baos.toByteArray())); -// Range range = new Range(); -// range.readFields(stream); -// stream.close(); -// return range; -// } -// -// public static class MyRangeInputSplit extends CloudbaseInputFormat.RangeInputSplit -// implements Writable { -// -// private static byte[] extractBytes(ByteSequence seq, int numBytes) { -// byte bytes[] = new byte[numBytes + 1]; -// bytes[0] = 0; -// for (int i = 0; i < numBytes; i++) -// if (i >= seq.length()) -// bytes[i + 1] = 0; -// else -// bytes[i + 1] = seq.byteAt(i); -// -// return bytes; -// } -// -// public static float getProgress(ByteSequence start, ByteSequence end, ByteSequence position) { -// int maxDepth = Math.min(Math.max(end.length(), start.length()), position.length()); -// BigInteger startBI = new BigInteger(extractBytes(start, maxDepth)); -// BigInteger endBI = new BigInteger(extractBytes(end, maxDepth)); -// BigInteger positionBI = new BigInteger(extractBytes(position, maxDepth)); -// return (float) (positionBI.subtract(startBI).doubleValue() / endBI.subtract(startBI).doubleValue()); -// } -// -// public float getProgress(Key currentKey) { -// if (currentKey == null) -// return 0.0F; -// if (range.getStartKey() != null && range.getEndKey() != null) { -// if (range.getStartKey().compareTo(range.getEndKey(), PartialKey.ROW) != 0) -// return getProgress(range.getStartKey().getRowData(), range.getEndKey().getRowData(), currentKey.getRowData()); -// if (range.getStartKey().compareTo(range.getEndKey(), PartialKey.ROW_COLFAM) != 0) -// return getProgress(range.getStartKey().getColumnFamilyData(), range.getEndKey().getColumnFamilyData(), currentKey.getColumnFamilyData()); -// if (range.getStartKey().compareTo(range.getEndKey(), PartialKey.ROW_COLFAM_COLQUAL) != 0) -// return getProgress(range.getStartKey().getColumnQualifierData(), range.getEndKey().getColumnQualifierData(), currentKey.getColumnQualifierData()); -// } -// return 0.0F; -// } -// -// /** -// * @deprecated Method getLength is deprecated -// */ -// -// public long getLength() -// throws IOException { -// Text startRow = range.isInfiniteStartKey() ? new Text(new byte[]{ -// -128 -// }) : range.getStartKey().getRow(); -// Text stopRow = range.isInfiniteStopKey() ? new Text(new byte[]{ -// 127 -// }) : range.getEndKey().getRow(); -// int maxCommon = Math.min(7, Math.min(startRow.getLength(), stopRow.getLength())); -// long diff = 0L; -// byte start[] = startRow.getBytes(); -// byte stop[] = stopRow.getBytes(); -// for (int i = 0; i < maxCommon; i++) { -// diff |= 255 & (start[i] ^ stop[i]); -// diff <<= 8; -// } -// -// if (startRow.getLength() != stopRow.getLength()) -// diff |= 255L; -// return diff + 1L; -// } -// -// public String[] getLocations() -// throws IOException { -// return locations; -// } -// -// public void readFields(DataInput in) -// throws IOException { -// range.readFields(in); -// int numLocs = in.readInt(); -// locations = new String[numLocs]; -// for (int i = 0; i < numLocs; i++) -// locations[i] = in.readUTF(); -// -// } -// -// public void write(DataOutput out) -// throws IOException { -// range.write(out); -// out.writeInt(locations.length); -// for (int i = 0; i < locations.length; i++) -// out.writeUTF(locations[i]); -// -// } -// -// public Range range; -// public String locations[]; -// -// -// public MyRangeInputSplit() { -// range = new Range(); -// locations = new String[0]; -// } -// -// MyRangeInputSplit(String table, Range range, String locations[]) { -// this.range = range; -// this.locations = locations; -// } -// } -//}
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/80faf06d/pig/cloudbase.pig/src/main/java/mvm/rya/cloudbase/pig/dep/StatementPatternStorage.java ---------------------------------------------------------------------- diff --git a/pig/cloudbase.pig/src/main/java/mvm/rya/cloudbase/pig/dep/StatementPatternStorage.java b/pig/cloudbase.pig/src/main/java/mvm/rya/cloudbase/pig/dep/StatementPatternStorage.java deleted file mode 100644 index 6cc40bd..0000000 --- a/pig/cloudbase.pig/src/main/java/mvm/rya/cloudbase/pig/dep/StatementPatternStorage.java +++ /dev/null @@ -1,178 +0,0 @@ -//package mvm.rya.cloudbase.pig.dep; -// -//import cloudbase.core.client.ZooKeeperInstance; -//import cloudbase.core.data.Key; -//import cloudbase.core.data.Range; -//import com.google.common.io.ByteArrayDataInput; -//import com.google.common.io.ByteStreams; -//import mvm.mmrts.api.RdfCloudTripleStoreConstants; -//import mvm.mmrts.api.RdfCloudTripleStoreUtils; -//import mvm.rya.cloudbase.CloudbaseRdfDAO; -//import mvm.rya.cloudbase.query.DefineTripleQueryRangeFactory; -//import mvm.mmrts.rdftriplestore.inference.InferenceEngine; -//import org.apache.hadoop.conf.Configuration; -//import org.apache.hadoop.mapreduce.Job; -//import org.apache.pig.data.Tuple; -//import org.apache.pig.data.TupleFactory; -//import org.openrdf.model.Resource; -//import org.openrdf.model.Statement; -//import org.openrdf.model.URI; -//import org.openrdf.model.Value; -//import org.openrdf.model.vocabulary.RDF; -//import org.openrdf.query.MalformedQueryException; -//import org.openrdf.query.algebra.StatementPattern; -//import org.openrdf.query.algebra.Var; -//import org.openrdf.query.algebra.helpers.QueryModelVisitorBase; -//import org.openrdf.query.parser.ParsedQuery; -//import org.openrdf.query.parser.QueryParser; -//import org.openrdf.query.parser.sparql.SPARQLParser; -// -//import java.io.IOException; -//import java.util.Collection; -//import java.util.Map; -//import java.util.Set; -// -//import static mvm.mmrts.api.RdfCloudTripleStoreConstants.*; -// -///** -// */ -//@Deprecated -//public class StatementPatternStorage extends CloudbaseStorage { -// protected RdfCloudTripleStoreConstants.TABLE_LAYOUT layout; -// protected String subject; -// protected String predicate; -// protected String object; -// private Value object_value; -// private Value predicate_value; -// private Value subject_value; -// -// DefineTripleQueryRangeFactory queryRangeFactory = new DefineTripleQueryRangeFactory(); -// -// -// public StatementPatternStorage(String subject, String predicate, String object, String instanceName, String zk, String user, String password) { -// super(null, null, instanceName, zk, user, password); -// this.subject = (subject != null && subject.length() > 0) ? subject : "?s"; -// this.predicate = (predicate != null && predicate.length() > 0) ? predicate : "?p"; -// this.object = (object != null && object.length() > 0) ? object : "?o"; -// } -// -// private Value getValue(Var subjectVar) { -// return subjectVar.hasValue() ? subjectVar.getValue() : null; -// } -// -// @Override -// public void setLocation(String tablePrefix, Job job) throws IOException { -// addStatementPatternRange(subject, predicate, object); -// addInferredRanges(tablePrefix, job); -//// range = entry.getValue(); -//// layout = entry.getKey(); -// if (layout == null) -// throw new IllegalArgumentException("Range and/or layout is null. Check the query"); -// String tableName = RdfCloudTripleStoreUtils.layoutPrefixToTable(layout, tablePrefix); -// super.setLocation(tableName, job); -// } -// -// protected void addInferredRanges(String tablePrefix, Job job) throws IOException { -// //inference engine -// CloudbaseRdfDAO rdfDAO = new CloudbaseRdfDAO(); -// rdfDAO.setConf(job.getConfiguration()); -// rdfDAO.setSpoTable(tablePrefix + TBL_SPO_SUFFIX); -// rdfDAO.setPoTable(tablePrefix + TBL_PO_SUFFIX); -// rdfDAO.setOspTable(tablePrefix + TBL_OSP_SUFFIX); -// rdfDAO.setNamespaceTable(tablePrefix + TBL_NS_SUFFIX); -// try { -// rdfDAO.setConnector(new ZooKeeperInstance(instanceName, zk).getConnector(user, password.getBytes())); -// } catch (Exception e) { -// throw new IOException(e); -// } -// rdfDAO.init(); -// InferenceEngine inferenceEngine = new InferenceEngine(); -// inferenceEngine.setConf(job.getConfiguration()); -// inferenceEngine.setRyaDAO(rdfDAO); -// inferenceEngine.init(); -// //is it subclassof or subpropertyof -// if(RDF.TYPE.equals(predicate_value)) { -// //try subclassof -// Collection<URI> parents = inferenceEngine.findParents(inferenceEngine.getSubClassOfGraph(), (URI) object_value); -// if (parents != null && parents.size() > 0) { -// //subclassof relationships found -// //don't add self, that will happen anyway later -// //add all relationships -// for(URI parent : parents) { -// Map.Entry<RdfCloudTripleStoreConstants.TABLE_LAYOUT, Range> temp = -// queryRangeFactory.defineRange(subject_value, predicate_value, parent, new Configuration()); -// Range range = temp.getValue(); -// System.out.println(range); -// addRange(range); -// } -// } -// } else if(predicate_value != null) { -// //subpropertyof check -// Set<URI> parents = inferenceEngine.findParents(inferenceEngine.getSubPropertyOfGraph(), (URI) predicate_value); -// for(URI parent : parents) { -// Map.Entry<RdfCloudTripleStoreConstants.TABLE_LAYOUT, Range> temp = -// queryRangeFactory.defineRange(subject_value, parent, object_value, new Configuration()); -// Range range = temp.getValue(); -// System.out.println(range); -// addRange(range); -// } -// } -// inferenceEngine.destroy(); -// rdfDAO.destroy(); -// } -// -// protected void addStatementPatternRange(String subj, String pred, String obj) throws IOException { -// String sparql = "select * where {\n" + -// subj + " " + pred + " " + obj + ".\n" + -// "}"; -// System.out.println(sparql); -// QueryParser parser = new SPARQLParser(); -// ParsedQuery parsedQuery = null; -// try { -// parsedQuery = parser.parseQuery(sparql, null); -// } catch (MalformedQueryException e) { -// throw new IOException(e); -// } -// parsedQuery.getTupleExpr().visitChildren(new QueryModelVisitorBase<IOException>() { -// @Override -// public void meet(StatementPattern node) throws IOException { -// Var subjectVar = node.getSubjectVar(); -// Var predicateVar = node.getPredicateVar(); -// Var objectVar = node.getObjectVar(); -// subject_value = getValue(subjectVar); -// predicate_value = getValue(predicateVar); -// object_value = getValue(objectVar); -// System.out.println(subject_value + " " + predicate_value + " " + object_value); -// Map.Entry<RdfCloudTripleStoreConstants.TABLE_LAYOUT, Range> temp = -// queryRangeFactory.defineRange((Resource) subject_value, (URI) predicate_value, object_value, new Configuration()); -// layout = temp.getKey(); -// Range range = temp.getValue(); -// addRange(range); -// System.out.println(range); -// } -// }); -// } -// -// @Override -// public Tuple getNext() throws IOException { -// try { -// if (reader.nextKeyValue()) { -// Key key = (Key) reader.getCurrentKey(); -// cloudbase.core.data.Value value = (cloudbase.core.data.Value) reader.getCurrentValue(); -// ByteArrayDataInput input = ByteStreams.newDataInput(key.getRow().getBytes()); -// Statement statement = RdfCloudTripleStoreUtils.translateStatementFromRow(input, -// key.getColumnFamily(), layout, RdfCloudTripleStoreConstants.VALUE_FACTORY); -// -// Tuple tuple = TupleFactory.getInstance().newTuple(4); -// tuple.set(0, statement.getSubject().stringValue()); -// tuple.set(1, statement.getPredicate().stringValue()); -// tuple.set(2, statement.getObject().stringValue()); -// tuple.set(3, (statement.getContext() != null) ? (statement.getContext().stringValue()) : (null)); -// return tuple; -// } -// } catch (Exception e) { -// throw new IOException(e); -// } -// return null; -// } -//} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/80faf06d/pig/cloudbase.pig/src/main/java/mvm/rya/cloudbase/pig/optimizer/SimilarVarJoinOptimizer.java ---------------------------------------------------------------------- diff --git a/pig/cloudbase.pig/src/main/java/mvm/rya/cloudbase/pig/optimizer/SimilarVarJoinOptimizer.java b/pig/cloudbase.pig/src/main/java/mvm/rya/cloudbase/pig/optimizer/SimilarVarJoinOptimizer.java deleted file mode 100644 index 3e14fdc..0000000 --- a/pig/cloudbase.pig/src/main/java/mvm/rya/cloudbase/pig/optimizer/SimilarVarJoinOptimizer.java +++ /dev/null @@ -1,189 +0,0 @@ -package mvm.rya.cloudbase.pig.optimizer; - -import org.openrdf.query.BindingSet; -import org.openrdf.query.Dataset; -import org.openrdf.query.algebra.*; -import org.openrdf.query.algebra.evaluation.QueryOptimizer; -import org.openrdf.query.algebra.evaluation.impl.EvaluationStatistics; -import org.openrdf.query.algebra.helpers.QueryModelVisitorBase; -import org.openrdf.query.algebra.helpers.StatementPatternCollector; - -import java.util.*; - -/** - * A query optimizer that re-orders nested Joins according to cardinality, preferring joins that have similar variables. - * - */ -public class SimilarVarJoinOptimizer implements QueryOptimizer { - - protected final EvaluationStatistics statistics; - - public SimilarVarJoinOptimizer() { - this(new EvaluationStatistics()); - } - - public SimilarVarJoinOptimizer(EvaluationStatistics statistics) { - this.statistics = statistics; - } - - /** - * Applies generally applicable optimizations: path expressions are sorted - * from more to less specific. - * - * @param tupleExpr - */ - public void optimize(TupleExpr tupleExpr, Dataset dataset, BindingSet bindings) { - tupleExpr.visit(new JoinVisitor()); - } - - protected class JoinVisitor extends QueryModelVisitorBase<RuntimeException> { - - Set<String> boundVars = new HashSet<String>(); - - @Override - public void meet(LeftJoin leftJoin) { - leftJoin.getLeftArg().visit(this); - - Set<String> origBoundVars = boundVars; - try { - boundVars = new HashSet<String>(boundVars); - boundVars.addAll(leftJoin.getLeftArg().getBindingNames()); - - leftJoin.getRightArg().visit(this); - } finally { - boundVars = origBoundVars; - } - } - - @Override - public void meet(Join node) { - Set<String> origBoundVars = boundVars; - try { - boundVars = new HashSet<String>(boundVars); - - // Recursively get the join arguments - List<TupleExpr> joinArgs = getJoinArgs(node, new ArrayList<TupleExpr>()); - - // Build maps of cardinalities and vars per tuple expression - Map<TupleExpr, Double> cardinalityMap = new HashMap<TupleExpr, Double>(); - - for (TupleExpr tupleExpr : joinArgs) { - double cardinality = statistics.getCardinality(tupleExpr); - cardinalityMap.put(tupleExpr, cardinality); - } - - // Reorder the (recursive) join arguments to a more optimal sequence - List<TupleExpr> orderedJoinArgs = new ArrayList<TupleExpr>(joinArgs.size()); - TupleExpr last = null; - while (!joinArgs.isEmpty()) { - TupleExpr tupleExpr = selectNextTupleExpr(joinArgs, cardinalityMap, last); - if (tupleExpr == null) { - break; - } - - joinArgs.remove(tupleExpr); - orderedJoinArgs.add(tupleExpr); - last = tupleExpr; - - // Recursively optimize join arguments - tupleExpr.visit(this); - - boundVars.addAll(tupleExpr.getBindingNames()); - } - - // Build new join hierarchy - // Note: generated hierarchy is right-recursive to help the - // IterativeEvaluationOptimizer to factor out the left-most join - // argument - int i = 0; - TupleExpr replacement = orderedJoinArgs.get(i); - for (i++; i < orderedJoinArgs.size(); i++) { - replacement = new Join(replacement, orderedJoinArgs.get(i)); - } - - // Replace old join hierarchy - node.replaceWith(replacement); - } finally { - boundVars = origBoundVars; - } - } - - protected <L extends List<TupleExpr>> L getJoinArgs(TupleExpr tupleExpr, L joinArgs) { - if (tupleExpr instanceof Join) { - Join join = (Join) tupleExpr; - getJoinArgs(join.getLeftArg(), joinArgs); - getJoinArgs(join.getRightArg(), joinArgs); - } else { - joinArgs.add(tupleExpr); - } - - return joinArgs; - } - - protected List<Var> getStatementPatternVars(TupleExpr tupleExpr) { - if(tupleExpr == null) - return null; - List<StatementPattern> stPatterns = StatementPatternCollector.process(tupleExpr); - List<Var> varList = new ArrayList<Var>(stPatterns.size() * 4); - for (StatementPattern sp : stPatterns) { - sp.getVars(varList); - } - return varList; - } - - protected <M extends Map<Var, Integer>> M getVarFreqMap(List<Var> varList, M varFreqMap) { - for (Var var : varList) { - Integer freq = varFreqMap.get(var); - freq = (freq == null) ? 1 : freq + 1; - varFreqMap.put(var, freq); - } - return varFreqMap; - } - - /** - * Selects from a list of tuple expressions the next tuple expression that - * should be evaluated. This method selects the tuple expression with - * highest number of bound variables, preferring variables that have been - * bound in other tuple expressions over variables with a fixed value. - */ - protected TupleExpr selectNextTupleExpr(List<TupleExpr> expressions, - Map<TupleExpr, Double> cardinalityMap, - TupleExpr last) { - double lowestCardinality = Double.MAX_VALUE; - TupleExpr result = expressions.get(0); - expressions = getExprsWithSameVars(expressions, last); - - for (TupleExpr tupleExpr : expressions) { - // Calculate a score for this tuple expression - double cardinality = cardinalityMap.get(tupleExpr); - - if (cardinality < lowestCardinality) { - // More specific path expression found - lowestCardinality = cardinality; - result = tupleExpr; - } - } - - return result; - } - - protected List<TupleExpr> getExprsWithSameVars(List<TupleExpr> expressions, TupleExpr last) { - if(last == null) - return expressions; - List<TupleExpr> retExprs = new ArrayList<TupleExpr>(); - for(TupleExpr tupleExpr : expressions) { - List<Var> statementPatternVars = getStatementPatternVars(tupleExpr); - List<Var> lastVars = getStatementPatternVars(last); - statementPatternVars.retainAll(lastVars); - if(statementPatternVars.size() > 0) { - retExprs.add(tupleExpr); - } - } - if(retExprs.size() == 0) { - return expressions; - } - return retExprs; - } - - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/80faf06d/pig/cloudbase.pig/src/main/java/org/apache/hadoop/mapred/PreferLocalMapTaskSelector.java ---------------------------------------------------------------------- diff --git a/pig/cloudbase.pig/src/main/java/org/apache/hadoop/mapred/PreferLocalMapTaskSelector.java b/pig/cloudbase.pig/src/main/java/org/apache/hadoop/mapred/PreferLocalMapTaskSelector.java deleted file mode 100644 index 9966704..0000000 --- a/pig/cloudbase.pig/src/main/java/org/apache/hadoop/mapred/PreferLocalMapTaskSelector.java +++ /dev/null @@ -1,39 +0,0 @@ -//package org.apache.hadoop.mapred; -// -//import org.apache.hadoop.net.Node; -// -//import java.io.IOException; -//import java.util.Arrays; -// -///** -// */ -//public class PreferLocalMapTaskSelector extends DefaultTaskSelector { -// -// @Override -// public Task obtainNewMapTask(TaskTrackerStatus taskTracker, JobInProgress job) throws IOException { -// return this.obtainNewLocalMapTask(taskTracker, job); -// } -// -// public Task obtainNewLocalMapTask(TaskTrackerStatus taskTracker, JobInProgress job) -// throws IOException { -// ClusterStatus clusterStatus = taskTrackerManager.getClusterStatus(); -// int numTaskTrackers = clusterStatus.getTaskTrackers(); -// System.out.println(taskTracker.getHost()); -// for (TaskInProgress tip : job.maps) { -// String[] splitLocations = tip.getSplitLocations(); -// System.out.println(Arrays.toString(splitLocations)); -// for (String loc : splitLocations) { -// Node node = job.jobtracker.getNode(loc); -// System.out.println(node); -// if(!taskTracker.getHost().equals(loc)) { -// return null; -// } -// } -// } -// -// Node node = job.jobtracker.getNode(taskTracker.getHost()); -// System.out.println(node); -// Task task = job.obtainNewLocalMapTask(taskTracker, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts()); -// return task; -// } -//} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/80faf06d/pig/cloudbase.pig/src/test/java/mvm/rya/cloudbase/pig/CloudbaseInputFormatMain.java ---------------------------------------------------------------------- diff --git a/pig/cloudbase.pig/src/test/java/mvm/rya/cloudbase/pig/CloudbaseInputFormatMain.java b/pig/cloudbase.pig/src/test/java/mvm/rya/cloudbase/pig/CloudbaseInputFormatMain.java deleted file mode 100644 index a2a3e45..0000000 --- a/pig/cloudbase.pig/src/test/java/mvm/rya/cloudbase/pig/CloudbaseInputFormatMain.java +++ /dev/null @@ -1,50 +0,0 @@ -//package mvm.mmrts.cloudbase.pig; -// -//import cloudbase.core.CBConstants; -//import cloudbase.core.client.mapreduce.CloudbaseInputFormat; -//import cloudbase.core.data.Range; -//import mvm.mmrts.api.RdfCloudTripleStoreConstants; -//import mvm.rya.cloudbase.query.DefineTripleQueryRangeFactory; -//import org.apache.hadoop.conf.Configuration; -//import org.apache.hadoop.mapreduce.InputSplit; -//import org.apache.hadoop.mapreduce.JobContext; -//import org.openrdf.model.ValueFactory; -//import org.openrdf.model.impl.ValueFactoryImpl; -// -//import java.util.Collections; -//import java.util.List; -//import java.util.Map; -// -///** -// * Created by IntelliJ IDEA. -// * User: RoshanP -// * Date: 4/5/12 -// * Time: 4:52 PM -// * To change this template use File | Settings | File Templates. -// */ -//public class CloudbaseInputFormatMain { -// public static void main(String[] args) { -// try { -// ValueFactory vf = new ValueFactoryImpl(); -// CloudbaseInputFormat format = new CloudbaseInputFormat(); -// Configuration configuration = new Configuration(); -// JobContext context = new JobContext(configuration, null); -// CloudbaseInputFormat.setZooKeeperInstance(context, "stratus", "stratus13:2181"); -// CloudbaseInputFormat.setInputInfo(context, "root", "password".getBytes(), "l_po", CBConstants.NO_AUTHS); -// DefineTripleQueryRangeFactory queryRangeFactory = new DefineTripleQueryRangeFactory(); -// Map.Entry<RdfCloudTripleStoreConstants.TABLE_LAYOUT,Range> entry = -// queryRangeFactory.defineRange(null, vf.createURI("urn:lubm:rdfts#takesCourse"), null, context.getConfiguration()); -// CloudbaseInputFormat.setRanges(context, Collections.singleton(entry.getValue())); -// List<InputSplit> splits = format.getSplits(context); -// for (InputSplit inputSplit : splits) { -// String[] locations = inputSplit.getLocations(); -// for (String loc : locations) { -// java.net.InetAddress inetAdd = java.net.InetAddress.getByName(loc); -// System.out.println("Hostname is: " + inetAdd.getHostName()); -// } -// } -// } catch (Exception e) { -// e.printStackTrace(); -// } -// } -//} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/80faf06d/pig/cloudbase.pig/src/test/java/mvm/rya/cloudbase/pig/CloudbaseStorageTest.java ---------------------------------------------------------------------- diff --git a/pig/cloudbase.pig/src/test/java/mvm/rya/cloudbase/pig/CloudbaseStorageTest.java b/pig/cloudbase.pig/src/test/java/mvm/rya/cloudbase/pig/CloudbaseStorageTest.java deleted file mode 100644 index 5cb0940..0000000 --- a/pig/cloudbase.pig/src/test/java/mvm/rya/cloudbase/pig/CloudbaseStorageTest.java +++ /dev/null @@ -1,250 +0,0 @@ -package mvm.rya.cloudbase.pig; - -import cloudbase.core.CBConstants; -import cloudbase.core.client.BatchWriter; -import cloudbase.core.client.Connector; -import cloudbase.core.client.admin.SecurityOperations; -import cloudbase.core.client.mock.MockInstance; -import cloudbase.core.data.Mutation; -import cloudbase.core.data.Value; -import cloudbase.core.security.Authorizations; -import cloudbase.core.security.ColumnVisibility; -import cloudbase.core.security.TablePermission; -import junit.framework.TestCase; -import mvm.rya.cloudbase.pig.CloudbaseStorage; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.mapreduce.*; -import org.apache.pig.data.Tuple; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -/** - * Created by IntelliJ IDEA. - * Date: 4/20/12 - * Time: 10:17 AM - * To change this template use File | Settings | File Templates. - */ -public class CloudbaseStorageTest extends TestCase { - - private String user = "user"; - private String pwd = "pwd"; - private String instance = "myinstance"; - private String table = "testTable"; - private Authorizations auths = CBConstants.NO_AUTHS; - private Connector connector; - - @Override - public void setUp() throws Exception { - super.setUp(); - connector = new MockInstance(instance).getConnector(user, pwd.getBytes()); - connector.tableOperations().create(table); - SecurityOperations secOps = connector.securityOperations(); - secOps.createUser(user, pwd.getBytes(), auths); - secOps.grantTablePermission(user, table, TablePermission.READ); - secOps.grantTablePermission(user, table, TablePermission.WRITE); - } - - public void testSimpleOutput() throws Exception { - BatchWriter batchWriter = connector.createBatchWriter(table, 10l, 10l, 2); - Mutation row = new Mutation("row"); - row.put("cf", "cq", new Value(new byte[0])); - batchWriter.addMutation(row); - batchWriter.flush(); - batchWriter.close(); - - String location = "cloudbase://" + table + "?instance=" + instance + "&user=" + user + "&password=" + pwd + "&range=a|z&mock=true"; - CloudbaseStorage storage = createCloudbaseStorage(location); - int count = 0; - while (true) { - Tuple next = storage.getNext(); - if (next == null) - break; - assertEquals(6, next.size()); - count++; - } - assertEquals(1, count); - } - - public void testRange() throws Exception { - BatchWriter batchWriter = connector.createBatchWriter(table, 10l, 10l, 2); - Mutation row = new Mutation("a"); - row.put("cf", "cq", new Value(new byte[0])); - batchWriter.addMutation(row); - row = new Mutation("b"); - row.put("cf", "cq", new Value(new byte[0])); - batchWriter.addMutation(row); - row = new Mutation("d"); - row.put("cf", "cq", new Value(new byte[0])); - batchWriter.addMutation(row); - batchWriter.flush(); - batchWriter.close(); - - String location = "cloudbase://" + table + "?instance=" + instance + "&user=" + user + "&password=" + pwd + "&range=a|c&mock=true"; - CloudbaseStorage storage = createCloudbaseStorage(location); - int count = 0; - while (true) { - Tuple next = storage.getNext(); - if (next == null) - break; - assertEquals(6, next.size()); - count++; - } - assertEquals(2, count); - } - - public void testMultipleRanges() throws Exception { - BatchWriter batchWriter = connector.createBatchWriter(table, 10l, 10l, 2); - Mutation row = new Mutation("a"); - row.put("cf", "cq", new Value(new byte[0])); - batchWriter.addMutation(row); - row = new Mutation("b"); - row.put("cf", "cq", new Value(new byte[0])); - batchWriter.addMutation(row); - row = new Mutation("d"); - row.put("cf", "cq", new Value(new byte[0])); - batchWriter.addMutation(row); - batchWriter.flush(); - batchWriter.close(); - - String location = "cloudbase://" + table + "?instance=" + instance + "&user=" + user + "&password=" + pwd + "&range=a|c&range=d|e&mock=true"; - List<CloudbaseStorage> storages = createCloudbaseStorages(location); - assertEquals(2, storages.size()); - CloudbaseStorage storage = storages.get(0); - int count = 0; - while (true) { - Tuple next = storage.getNext(); - if (next == null) - break; - assertEquals(6, next.size()); - count++; - } - assertEquals(2, count); - storage = storages.get(1); - count = 0; - while (true) { - Tuple next = storage.getNext(); - if (next == null) - break; - assertEquals(6, next.size()); - count++; - } - assertEquals(1, count); - } - - public void testColumns() throws Exception { - BatchWriter batchWriter = connector.createBatchWriter(table, 10l, 10l, 2); - Mutation row = new Mutation("a"); - row.put("cf1", "cq", new Value(new byte[0])); - row.put("cf2", "cq", new Value(new byte[0])); - row.put("cf3", "cq1", new Value(new byte[0])); - row.put("cf3", "cq2", new Value(new byte[0])); - batchWriter.addMutation(row); - batchWriter.flush(); - batchWriter.close(); - - String location = "cloudbase://" + table + "?instance=" + instance + "&user=" + user + "&password=" + pwd + "&range=a|c&columns=cf1,cf3|cq1&mock=true"; - CloudbaseStorage storage = createCloudbaseStorage(location); - int count = 0; - while (true) { - Tuple next = storage.getNext(); - if (next == null) - break; - assertEquals(6, next.size()); - count++; - } - assertEquals(2, count); - } - - public void testWholeRowRange() throws Exception { - BatchWriter batchWriter = connector.createBatchWriter(table, 10l, 10l, 2); - Mutation row = new Mutation("a"); - row.put("cf1", "cq", new Value(new byte[0])); - row.put("cf2", "cq", new Value(new byte[0])); - row.put("cf3", "cq1", new Value(new byte[0])); - row.put("cf3", "cq2", new Value(new byte[0])); - batchWriter.addMutation(row); - batchWriter.flush(); - batchWriter.close(); - - String location = "cloudbase://" + table + "?instance=" + instance + "&user=" + user + "&password=" + pwd + "&range=a&mock=true"; - CloudbaseStorage storage = createCloudbaseStorage(location); - int count = 0; - while (true) { - Tuple next = storage.getNext(); - if (next == null) - break; - assertEquals(6, next.size()); - count++; - } - assertEquals(4, count); - } - - public void testAuths() throws Exception { - BatchWriter batchWriter = connector.createBatchWriter(table, 10l, 10l, 2); - Mutation row = new Mutation("a"); - row.put("cf1", "cq1", new ColumnVisibility("A"), new Value(new byte[0])); - row.put("cf2", "cq2", new Value(new byte[0])); - batchWriter.addMutation(row); - batchWriter.flush(); - batchWriter.close(); - - String location = "cloudbase://" + table + "?instance=" + instance + "&user=" + user + "&password=" + pwd + "&range=a|c&mock=true"; - CloudbaseStorage storage = createCloudbaseStorage(location); - int count = 0; - while (true) { - Tuple next = storage.getNext(); - if (next == null) - break; - assertEquals(6, next.size()); - count++; - } - assertEquals(1, count); - - location = "cloudbase://" + table + "?instance=" + instance + "&user=" + user + "&password=" + pwd + "&range=a|c&auths=A&mock=true"; - storage = createCloudbaseStorage(location); - count = 0; - while (true) { - Tuple next = storage.getNext(); - if (next == null) - break; - assertEquals(6, next.size()); - count++; - } - assertEquals(2, count); - } - - protected CloudbaseStorage createCloudbaseStorage(String location) throws IOException, InterruptedException { - List<CloudbaseStorage> cloudbaseStorages = createCloudbaseStorages(location); - if (cloudbaseStorages.size() > 0) { - return cloudbaseStorages.get(0); - } - return null; - } - - protected List<CloudbaseStorage> createCloudbaseStorages(String location) throws IOException, InterruptedException { - List<CloudbaseStorage> cloudbaseStorages = new ArrayList<CloudbaseStorage>(); - CloudbaseStorage storage = new CloudbaseStorage(); - InputFormat inputFormat = storage.getInputFormat(); - Job job = new Job(new Configuration()); - storage.setLocation(location, job); - List<InputSplit> splits = inputFormat.getSplits(job); - assertNotNull(splits); - - for (InputSplit inputSplit : splits) { - storage = new CloudbaseStorage(); - job = new Job(new Configuration()); - storage.setLocation(location, job); - TaskAttemptContext taskAttemptContext = new TaskAttemptContext(job.getConfiguration(), - new TaskAttemptID("jtid", 0, false, 0, 0)); - RecordReader recordReader = inputFormat.createRecordReader(inputSplit, - taskAttemptContext); - recordReader.initialize(inputSplit, taskAttemptContext); - - storage.prepareToRead(recordReader, null); - cloudbaseStorages.add(storage); - } - return cloudbaseStorages; - } -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/80faf06d/pig/cloudbase.pig/src/test/java/mvm/rya/cloudbase/pig/SparqlQueryPigEngineTest.java ---------------------------------------------------------------------- diff --git a/pig/cloudbase.pig/src/test/java/mvm/rya/cloudbase/pig/SparqlQueryPigEngineTest.java b/pig/cloudbase.pig/src/test/java/mvm/rya/cloudbase/pig/SparqlQueryPigEngineTest.java deleted file mode 100644 index f3cdd5f..0000000 --- a/pig/cloudbase.pig/src/test/java/mvm/rya/cloudbase/pig/SparqlQueryPigEngineTest.java +++ /dev/null @@ -1,55 +0,0 @@ -package mvm.rya.cloudbase.pig; - -import junit.framework.TestCase; -import mvm.rya.cloudbase.pig.SparqlQueryPigEngine; -import mvm.rya.cloudbase.pig.SparqlToPigTransformVisitor; -import org.apache.pig.ExecType; - -/** - * Created by IntelliJ IDEA. - * Date: 4/23/12 - * Time: 10:14 AM - * To change this template use File | Settings | File Templates. - */ -public class SparqlQueryPigEngineTest extends TestCase { - - private SparqlQueryPigEngine engine; - - @Override - public void setUp() throws Exception { - super.setUp(); - SparqlToPigTransformVisitor visitor = new SparqlToPigTransformVisitor(); - visitor.setTablePrefix("l_"); - visitor.setInstance("stratus"); - visitor.setZk("stratus13:2181"); - visitor.setUser("root"); - visitor.setPassword("password"); - - engine = new SparqlQueryPigEngine(); - engine.setSparqlToPigTransformVisitor(visitor); - engine.setExecType(ExecType.LOCAL); - engine.setInference(false); - engine.setStats(false); - engine.init(); - } - - @Override - public void tearDown() throws Exception { - super.tearDown(); - engine.destroy(); - } - - public void testStatementPattern() throws Exception { - String query = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n" + - " PREFIX ub: <urn:lubm:rdfts#>\n" + - "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n" + - " SELECT * WHERE\n" + - " {\n" + - "\t<http://www.Department0.University0.edu> ?p ?o\n" + - " }\n" + - ""; - -// engine.runQuery(query, "/temp/testSP"); - assertNotNull(engine.generatePigScript(query)); - } -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/80faf06d/pig/cloudbase.pig/src/test/java/mvm/rya/cloudbase/pig/SparqlToPigTransformVisitorTest.java ---------------------------------------------------------------------- diff --git a/pig/cloudbase.pig/src/test/java/mvm/rya/cloudbase/pig/SparqlToPigTransformVisitorTest.java b/pig/cloudbase.pig/src/test/java/mvm/rya/cloudbase/pig/SparqlToPigTransformVisitorTest.java deleted file mode 100644 index fc5abb8..0000000 --- a/pig/cloudbase.pig/src/test/java/mvm/rya/cloudbase/pig/SparqlToPigTransformVisitorTest.java +++ /dev/null @@ -1,367 +0,0 @@ -package mvm.rya.cloudbase.pig; - -import junit.framework.TestCase; -import mvm.rya.cloudbase.pig.optimizer.SimilarVarJoinOptimizer; -import mvm.rya.cloudbase.pig.SparqlToPigTransformVisitor; -import org.openrdf.query.algebra.QueryRoot; -import org.openrdf.query.algebra.TupleExpr; -import org.openrdf.query.parser.ParsedQuery; -import org.openrdf.query.parser.QueryParser; -import org.openrdf.query.parser.sparql.SPARQLParser; - -/** - * Created by IntelliJ IDEA. - * Date: 4/12/12 - * Time: 10:18 AM - * To change this template use File | Settings | File Templates. - */ -public class SparqlToPigTransformVisitorTest extends TestCase { - - public void testStatementPattern() throws Exception { - String query = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n" + - " PREFIX ub: <urn:lubm:rdfts#>\n" + - "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n" + - " SELECT * WHERE\n" + - " {\n" + - "\t?x rdf:type ub:UndergraduateStudent\n" + - " }\n" + - ""; - QueryParser parser = new SPARQLParser(); - ParsedQuery parsedQuery = parser.parseQuery(query, null); - -// System.out.println(parsedQuery); - - SparqlToPigTransformVisitor visitor = new SparqlToPigTransformVisitor(); - visitor.setTablePrefix("l_"); - visitor.setInstance("stratus"); - visitor.setZk("stratus13:2181"); - visitor.setUser("root"); - visitor.setPassword("password"); - visitor.meet(new QueryRoot(parsedQuery.getTupleExpr())); - System.out.println(visitor.getPigScript()); - } - - public void testStatementPatternContext() throws Exception { - String query = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n" + - " PREFIX ub: <urn:lubm:rdfts#>\n" + - "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n" + - " SELECT * WHERE\n" + - " {\n" + - " GRAPH ub:g1 {\n" + - "\t?x rdf:type ub:UndergraduateStudent\n" + - " }\n" + - " }\n" + - ""; - QueryParser parser = new SPARQLParser(); - ParsedQuery parsedQuery = parser.parseQuery(query, null); - -// System.out.println(parsedQuery); - - SparqlToPigTransformVisitor visitor = new SparqlToPigTransformVisitor(); - visitor.setTablePrefix("l_"); - visitor.setInstance("stratus"); - visitor.setZk("stratus13:2181"); - visitor.setUser("root"); - visitor.setPassword("password"); - visitor.meet(new QueryRoot(parsedQuery.getTupleExpr())); -// System.out.println(visitor.getPigScript()); - } - - public void testStatementPatternContextVar() throws Exception { - String query = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n" + - " PREFIX ub: <urn:lubm:rdfts#>\n" + - "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n" + - " SELECT * WHERE\n" + - " {\n" + - " GRAPH ?g {\n" + - "\t?x rdf:type ub:UndergraduateStudent\n" + - " }\n" + - " ?x ub:pred ?g." + - " }\n" + - ""; - QueryParser parser = new SPARQLParser(); - ParsedQuery parsedQuery = parser.parseQuery(query, null); - -// System.out.println(parsedQuery); - - SparqlToPigTransformVisitor visitor = new SparqlToPigTransformVisitor(); - visitor.setTablePrefix("l_"); - visitor.setInstance("stratus"); - visitor.setZk("stratus13:2181"); - visitor.setUser("root"); - visitor.setPassword("password"); - visitor.meet(new QueryRoot(parsedQuery.getTupleExpr())); -// System.out.println(visitor.getPigScript()); - } - - public void testJoin() throws Exception { - String query = "select * where {\n" + - "?subj <urn:lubm:rdfts#name> 'Department0'.\n" + - "?subj <urn:lubm:rdfts#subOrganizationOf> <http://www.University0.edu>.\n" + - "}"; -// System.out.println(query); - QueryParser parser = new SPARQLParser(); - ParsedQuery parsedQuery = parser.parseQuery(query, null); - -// System.out.println(parsedQuery); - - SparqlToPigTransformVisitor visitor = new SparqlToPigTransformVisitor(); - visitor.setTablePrefix("l_"); - visitor.setInstance("stratus"); - visitor.setZk("stratus13:2181"); - visitor.setUser("root"); - visitor.setPassword("password"); - visitor.meet(new QueryRoot(parsedQuery.getTupleExpr())); -// System.out.println(visitor.getPigScript()); - } - - public void testMutliReturnJoin() throws Exception { - String query = "select * where {\n" + - "?subj <urn:lubm:rdfts#name> 'Department0'.\n" + - "?subj <urn:lubm:rdfts#subOrganizationOf> ?suborg.\n" + - "}"; - QueryParser parser = new SPARQLParser(); - ParsedQuery parsedQuery = parser.parseQuery(query, null); - -// System.out.println(parsedQuery); - - SparqlToPigTransformVisitor visitor = new SparqlToPigTransformVisitor(); - visitor.setTablePrefix("l_"); - visitor.setInstance("stratus"); - visitor.setZk("stratus13:2181"); - visitor.setUser("root"); - visitor.setPassword("password"); - visitor.meet(new QueryRoot(parsedQuery.getTupleExpr())); -// System.out.println(visitor.getPigScript()); - } - - public void testMutlipleJoins() throws Exception { - String query = "select * where {\n" + - "?subj <urn:lubm:rdfts#name> 'Department0'.\n" + - "?subj <urn:lubm:rdfts#subOrganizationOf> <http://www.University0.edu>.\n" + - "?subj <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <urn:lubm:rdfts#Department>.\n" + - "}"; -// System.out.println(query); - QueryParser parser = new SPARQLParser(); - ParsedQuery parsedQuery = parser.parseQuery(query, null); - -// System.out.println(parsedQuery); - - SparqlToPigTransformVisitor visitor = new SparqlToPigTransformVisitor(); - visitor.setTablePrefix("l_"); - visitor.setInstance("stratus"); - visitor.setZk("stratus13:2181"); - visitor.setUser("root"); - visitor.setPassword("password"); - visitor.meet(new QueryRoot(parsedQuery.getTupleExpr())); -// System.out.println(visitor.getPigScript()); - } - - public void testCross() throws Exception { - String query = "select * where {\n" + - "?subj0 <urn:lubm:rdfts#name> 'Department0'.\n" + - "?subj1 <urn:lubm:rdfts#name> 'Department1'.\n" + - "?subj0 <urn:lubm:rdfts#subOrganizationOf> <http://www.University0.edu>.\n" + - "?subj1 <urn:lubm:rdfts#subOrganizationOf> <http://www.University0.edu>.\n" + - "}"; -// System.out.println(query); - QueryParser parser = new SPARQLParser(); - ParsedQuery parsedQuery = parser.parseQuery(query, null); - QueryRoot tupleExpr = new QueryRoot(parsedQuery.getTupleExpr()); - - SimilarVarJoinOptimizer similarVarJoinOptimizer = new SimilarVarJoinOptimizer(); - similarVarJoinOptimizer.optimize(tupleExpr, null, null); - -// System.out.println(tupleExpr); - - SparqlToPigTransformVisitor visitor = new SparqlToPigTransformVisitor(); - visitor.setTablePrefix("l_"); - visitor.setInstance("stratus"); - visitor.setZk("stratus13:2181"); - visitor.setUser("root"); - visitor.setPassword("password"); - visitor.meet(tupleExpr); -// System.out.println(visitor.getPigScript()); - } - - public void testLimit() throws Exception { - String query = "select * where {\n" + - "?subj <urn:lubm:rdfts#name> 'Department0'.\n" + - "?subj <urn:lubm:rdfts#subOrganizationOf> ?suborg.\n" + - "} limit 100"; -// System.out.println(query); - QueryParser parser = new SPARQLParser(); - ParsedQuery parsedQuery = parser.parseQuery(query, null); - -// System.out.println(parsedQuery); - - SparqlToPigTransformVisitor visitor = new SparqlToPigTransformVisitor(); - visitor.setTablePrefix("l_"); - visitor.setInstance("stratus"); - visitor.setZk("stratus13:2181"); - visitor.setUser("root"); - visitor.setPassword("password"); - visitor.meet(new QueryRoot(parsedQuery.getTupleExpr())); -// System.out.println(visitor.getPigScript()); - } - - public void testHardQuery() throws Exception { -// String query = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n" + -// " PREFIX ub: <urn:lubm:rdfts#>\n" + -// " SELECT * WHERE\n" + -// " {\n" + -// " ?y rdf:type ub:University .\n" + -// " ?z ub:subOrganizationOf ?y .\n" + -// " ?z rdf:type ub:Department .\n" + -// " ?x ub:memberOf ?z .\n" + -// " ?x ub:undergraduateDegreeFrom ?y .\n" + -// " ?x rdf:type ub:GraduateStudent .\n" + -// " }\n" + -// "limit 100"; - String query = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n" + - " PREFIX ub: <urn:lubm:rdfts#>\n" + - " SELECT * WHERE\n" + - " {\n" + - "\t?x ub:advisor ?y.\n" + - "\t?y ub:teacherOf ?z.\n" + - "\t?x ub:takesCourse ?z.\n" + - "\t?x rdf:type ub:Student.\n" + - "\t?y rdf:type ub:Faculty.\n" + - "\t?z rdf:type ub:Course.\n" + - " }\n" + - "limit 100"; -// System.out.println(query); - QueryParser parser = new SPARQLParser(); - ParsedQuery parsedQuery = parser.parseQuery(query, null); - - TupleExpr tupleExpr = parsedQuery.getTupleExpr(); - -// CloudbaseRdfEvalStatsDAO rdfEvalStatsDAO = new CloudbaseRdfEvalStatsDAO(); -// rdfEvalStatsDAO.setConnector(new ZooKeeperInstance("stratus", "stratus13:2181").getConnector("root", "password".getBytes())); -// rdfEvalStatsDAO.setEvalTable("l_eval"); -// RdfCloudTripleStoreEvaluationStatistics stats = new RdfCloudTripleStoreEvaluationStatistics(new Configuration(), rdfEvalStatsDAO); -// (new SimilarVarJoinOptimizer(stats)).optimize(tupleExpr, null, null); - -// System.out.println(tupleExpr); - - SparqlToPigTransformVisitor visitor = new SparqlToPigTransformVisitor(); - visitor.setTablePrefix("l_"); - visitor.setInstance("stratus"); - visitor.setZk("stratus13:2181"); - visitor.setUser("root"); - visitor.setPassword("password"); - visitor.meet(new QueryRoot(tupleExpr)); -// System.out.println(visitor.getPigScript()); - } - - public void testFixedStatementPatternInferenceQuery() throws Exception { - String query = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n" + - " PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n" + - " PREFIX ub: <urn:lubm:rdfts#>\n" + - " SELECT * WHERE\n" + - " {\n" + - " ?y ub:memberOf <http://www.Department3.University10.edu>.\n" + - " {?y rdf:type ub:Professor.}\n" + - " UNION \n" + - " {?y rdf:type ub:GraduateStudent.}\n" + - " }"; -// System.out.println(query); - QueryParser parser = new SPARQLParser(); - ParsedQuery parsedQuery = parser.parseQuery(query, null); - - TupleExpr tupleExpr = parsedQuery.getTupleExpr(); - -// Configuration conf = new Configuration(); -// Connector connector = new ZooKeeperInstance("stratus", "stratus13:2181").getConnector("root", "password".getBytes()); -// -// InferenceEngine inferenceEngine = new InferenceEngine(); -// CloudbaseRdfDAO rdfDAO = new CloudbaseRdfDAO(); -// rdfDAO.setConf(conf); -// rdfDAO.setConnector(connector); -// rdfDAO.setNamespaceTable("l_ns"); -// rdfDAO.setSpoTable("l_spo"); -// rdfDAO.setPoTable("l_po"); -// rdfDAO.setOspTable("l_osp"); -// rdfDAO.init(); -// -// inferenceEngine.setRyaDAO(rdfDAO); -// inferenceEngine.setConf(conf); -// inferenceEngine.init(); -// -// tupleExpr.visit(new TransitivePropertyVisitor(conf, inferenceEngine)); -// tupleExpr.visit(new SymmetricPropertyVisitor(conf, inferenceEngine)); -// tupleExpr.visit(new InverseOfVisitor(conf, inferenceEngine)); -// tupleExpr.visit(new SubPropertyOfVisitor(conf, inferenceEngine)); -// tupleExpr.visit(new SubClassOfVisitor(conf, inferenceEngine)); -// -// CloudbaseRdfEvalStatsDAO rdfEvalStatsDAO = new CloudbaseRdfEvalStatsDAO(); -// rdfEvalStatsDAO.setConnector(connector); -// rdfEvalStatsDAO.setEvalTable("l_eval"); -// RdfCloudTripleStoreEvaluationStatistics stats = new RdfCloudTripleStoreEvaluationStatistics(conf, rdfEvalStatsDAO); -// (new QueryJoinOptimizer(stats)).optimize(tupleExpr, null, null); - -// System.out.println(tupleExpr); - - SparqlToPigTransformVisitor visitor = new SparqlToPigTransformVisitor(); - visitor.setTablePrefix("l_"); - visitor.setInstance("stratus"); - visitor.setZk("stratus13:2181"); - visitor.setUser("root"); - visitor.setPassword("password"); - visitor.meet(new QueryRoot(tupleExpr)); -// System.out.println(visitor.getPigScript()); - } - -// public void testInverseOf() throws Exception { -// String query = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n" + -// " PREFIX ub: <urn:lubm:rdfts#>\n" + -// " SELECT * WHERE\n" + -// " {\n" + -// " ?x rdf:type ub:Person .\n" + -// " <http://www.University0.edu> ub:hasAlumnus ?x .\n" + -// " } "; -// System.out.println(query); -// QueryParser parser = new SPARQLParser(); -// ParsedQuery parsedQuery = parser.parseQuery(query, null); -// TupleExpr tupleExpr = parsedQuery.getTupleExpr(); -// -// Configuration conf = new Configuration(); -// Connector connector = new ZooKeeperInstance("stratus", "stratus13:2181").getConnector("root", "password".getBytes()); -// -// InferenceEngine inferenceEngine = new InferenceEngine(); -// CloudbaseRdfDAO rdfDAO = new CloudbaseRdfDAO(); -// rdfDAO.setConf(conf); -// rdfDAO.setConnector(connector); -// rdfDAO.setNamespaceTable("l_ns"); -// rdfDAO.setSpoTable("l_spo"); -// rdfDAO.setPoTable("l_po"); -// rdfDAO.setOspTable("l_osp"); -// rdfDAO.init(); -// -// inferenceEngine.setRyaDAO(rdfDAO); -// inferenceEngine.setConf(conf); -// inferenceEngine.init(); -// -// tupleExpr.visit(new TransitivePropertyVisitor(conf, inferenceEngine)); -// tupleExpr.visit(new SymmetricPropertyVisitor(conf, inferenceEngine)); -// tupleExpr.visit(new InverseOfVisitor(conf, inferenceEngine)); -// -// CloudbaseRdfEvalStatsDAO rdfEvalStatsDAO = new CloudbaseRdfEvalStatsDAO(); -// rdfEvalStatsDAO.setConnector(connector); -// rdfEvalStatsDAO.setEvalTable("l_eval"); -// RdfCloudTripleStoreEvaluationStatistics stats = new RdfCloudTripleStoreEvaluationStatistics(conf, rdfEvalStatsDAO); -// (new QueryJoinOptimizer(stats)).optimize(tupleExpr, null, null); -// -// -// System.out.println(tupleExpr); -// -// SparqlToPigTransformVisitor visitor = new SparqlToPigTransformVisitor(); -// visitor.setTablePrefix("l_"); -// visitor.setInstance("stratus"); -// visitor.setZk("stratus13:2181"); -// visitor.setUser("root"); -// visitor.setPassword("password"); -// visitor.meet(new QueryRoot(parsedQuery.getTupleExpr())); -// System.out.println(visitor.getPigScript()); -// } -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/80faf06d/pig/cloudbase.pig/src/test/java/mvm/rya/cloudbase/pig/StatementPatternStorageTest.java ---------------------------------------------------------------------- diff --git a/pig/cloudbase.pig/src/test/java/mvm/rya/cloudbase/pig/StatementPatternStorageTest.java b/pig/cloudbase.pig/src/test/java/mvm/rya/cloudbase/pig/StatementPatternStorageTest.java deleted file mode 100644 index de88138..0000000 --- a/pig/cloudbase.pig/src/test/java/mvm/rya/cloudbase/pig/StatementPatternStorageTest.java +++ /dev/null @@ -1,148 +0,0 @@ -package mvm.rya.cloudbase.pig; - -import cloudbase.core.CBConstants; -import cloudbase.core.client.Connector; -import cloudbase.core.client.admin.SecurityOperations; -import cloudbase.core.client.mock.MockInstance; -import cloudbase.core.security.Authorizations; -import cloudbase.core.security.TablePermission; -import junit.framework.TestCase; -import mvm.rya.api.RdfCloudTripleStoreConstants; -import mvm.rya.api.resolver.RdfToRyaConversions; -import mvm.rya.cloudbase.CloudbaseRdfConfiguration; -import mvm.rya.cloudbase.CloudbaseRyaDAO; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.mapreduce.*; -import org.apache.pig.data.Tuple; -import org.openrdf.model.ValueFactory; -import org.openrdf.model.impl.ContextStatementImpl; -import org.openrdf.model.impl.StatementImpl; -import org.openrdf.model.impl.ValueFactoryImpl; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -/** - * Created by IntelliJ IDEA. - * Date: 4/20/12 - * Time: 5:14 PM - * To change this template use File | Settings | File Templates. - */ -public class StatementPatternStorageTest extends TestCase { - - private String user = "user"; - private String pwd = "pwd"; - private String instance = "myinstance"; - private String tablePrefix = "t_"; - private Authorizations auths = CBConstants.NO_AUTHS; - private Connector connector; - private CloudbaseRyaDAO ryaDAO; - private ValueFactory vf = new ValueFactoryImpl(); - private String namespace = "urn:test#"; - private CloudbaseRdfConfiguration conf; - - @Override - public void setUp() throws Exception { - super.setUp(); - connector = new MockInstance(instance).getConnector(user, pwd.getBytes()); - connector.tableOperations().create(tablePrefix + RdfCloudTripleStoreConstants.TBL_SPO_SUFFIX); - connector.tableOperations().create(tablePrefix + RdfCloudTripleStoreConstants.TBL_PO_SUFFIX); - connector.tableOperations().create(tablePrefix + RdfCloudTripleStoreConstants.TBL_OSP_SUFFIX); - connector.tableOperations().create(tablePrefix + RdfCloudTripleStoreConstants.TBL_NS_SUFFIX); - SecurityOperations secOps = connector.securityOperations(); - secOps.createUser(user, pwd.getBytes(), auths); - secOps.grantTablePermission(user, tablePrefix + RdfCloudTripleStoreConstants.TBL_SPO_SUFFIX, TablePermission.READ); - secOps.grantTablePermission(user, tablePrefix + RdfCloudTripleStoreConstants.TBL_PO_SUFFIX, TablePermission.READ); - secOps.grantTablePermission(user, tablePrefix + RdfCloudTripleStoreConstants.TBL_OSP_SUFFIX, TablePermission.READ); - secOps.grantTablePermission(user, tablePrefix + RdfCloudTripleStoreConstants.TBL_NS_SUFFIX, TablePermission.READ); - - conf = new CloudbaseRdfConfiguration(); - ryaDAO = new CloudbaseRyaDAO(); - ryaDAO.setConnector(connector); - conf.setTablePrefix(tablePrefix); - ryaDAO.setConf(conf); - ryaDAO.init(); - } - - public void testSimplePredicateRange() throws Exception { - ryaDAO.add(RdfToRyaConversions.convertStatement(new StatementImpl(vf.createURI(namespace, "a"), vf.createURI(namespace, "p"), vf.createLiteral("l")))); - ryaDAO.add(RdfToRyaConversions.convertStatement(new StatementImpl(vf.createURI(namespace, "b"), vf.createURI(namespace, "p"), vf.createLiteral("l")))); - ryaDAO.add(RdfToRyaConversions.convertStatement(new StatementImpl(vf.createURI(namespace, "c"), vf.createURI(namespace, "n"), vf.createLiteral("l")))); - - - int count = 0; - List<StatementPatternStorage> storages = createStorages("cloudbase://" + tablePrefix + "?instance=" + instance + "&user=" + user + "&password=" + pwd + "&predicate=<" + namespace + "p>&mock=true"); - for (StatementPatternStorage storage : storages) { - while (true) { - Tuple next = storage.getNext(); - if (next == null) { - break; - } - count++; - } - } - assertEquals(2, count); - ryaDAO.destroy(); - } - - public void testContext() throws Exception { - ryaDAO.add(RdfToRyaConversions.convertStatement(new StatementImpl(vf.createURI(namespace, "a"), vf.createURI(namespace, "p"), vf.createLiteral("l1")))); - ryaDAO.add(RdfToRyaConversions.convertStatement(new ContextStatementImpl(vf.createURI(namespace, "a"), vf.createURI(namespace, "p"), vf.createLiteral("l2"), vf.createURI(namespace, "g1")))); - - - int count = 0; - List<StatementPatternStorage> storages = createStorages("cloudbase://" + tablePrefix + "?instance=" + instance + "&user=" + user + "&password=" + pwd + "&predicate=<" + namespace + "p>&mock=true"); - for (StatementPatternStorage storage : storages) { - while (true) { - Tuple next = storage.getNext(); - if (next == null) { - break; - } - count++; - } - } - assertEquals(2, count); - - count = 0; - storages = createStorages("cloudbase://" + tablePrefix + "?instance=" + instance + "&user=" + user + "&password=" + pwd + "&predicate=<" + namespace + "p>&context=<"+namespace+"g1>&mock=true"); - for (StatementPatternStorage storage : storages) { - while (true) { - Tuple next = storage.getNext(); - if (next == null) { - break; - } - count++; - } - } - assertEquals(1, count); - - ryaDAO.destroy(); - } - - protected List<StatementPatternStorage> createStorages(String location) throws IOException, InterruptedException { - List<StatementPatternStorage> storages = new ArrayList<StatementPatternStorage>(); - StatementPatternStorage storage = new StatementPatternStorage(); - InputFormat inputFormat = storage.getInputFormat(); - Job job = new Job(new Configuration()); - storage.setLocation(location, job); - List<InputSplit> splits = inputFormat.getSplits(job); - assertNotNull(splits); - - for (InputSplit inputSplit : splits) { - storage = new StatementPatternStorage(); - job = new Job(new Configuration()); - storage.setLocation(location, job); - TaskAttemptContext taskAttemptContext = new TaskAttemptContext(job.getConfiguration(), - new TaskAttemptID("jtid", 0, false, 0, 0)); - RecordReader recordReader = inputFormat.createRecordReader(inputSplit, - taskAttemptContext); - recordReader.initialize(inputSplit, taskAttemptContext); - - storage.prepareToRead(recordReader, null); - storages.add(storage); - } - return storages; - } - -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/80faf06d/pig/pom.xml ---------------------------------------------------------------------- diff --git a/pig/pom.xml b/pig/pom.xml index e766717..2df2d1c 100644 --- a/pig/pom.xml +++ b/pig/pom.xml @@ -1,15 +1,37 @@ <?xml version="1.0" encoding="utf-8"?> -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <parent> - <groupId>mvm.rya</groupId> - <artifactId>parent</artifactId> + <groupId>org.apache.rya</groupId> + <artifactId>rya-project</artifactId> <version>3.2.10-SNAPSHOT</version> </parent> + <artifactId>rya.pig</artifactId> + <name>Apache Rya Pig Projects</name> + <packaging>pom</packaging> - <name>${project.groupId}.${project.artifactId}</name> + <modules> <module>accumulo.pig</module> </modules>
