Github user meiercaleb commented on a diff in the pull request: https://github.com/apache/incubator-rya/pull/251#discussion_r153561179 --- Diff: extras/rya.pcj.fluo/pcj.fluo.app/src/main/java/org/apache/rya/indexing/pcj/fluo/app/util/BindingHashShardingFunction.java --- @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.indexing.pcj.fluo.app.util; + +import static org.apache.rya.indexing.pcj.fluo.app.IncrementalUpdateConstants.NODEID_BS_DELIM; +import static org.apache.rya.indexing.pcj.storage.accumulo.BindingSetStringConverter.TYPE_DELIM; + +import org.apache.fluo.api.data.Bytes; +import org.apache.fluo.api.data.Bytes.BytesBuilder; +import org.apache.rya.api.domain.RyaType; +import org.apache.rya.api.resolver.RdfToRyaConversions; +import org.apache.rya.indexing.pcj.storage.accumulo.BindingSetStringConverter; +import org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder; +import org.apache.rya.indexing.pcj.storage.accumulo.VisibilityBindingSet; +import org.openrdf.model.Value; + +import com.google.common.base.Preconditions; +import com.google.common.base.Strings; +import com.google.common.hash.Hashing; + +/** + * This class adds and removes a hash to and from the rowId for sharding purposes. + * + */ +public class BindingHashShardingFunction { + + private static final BindingSetStringConverter BS_CONVERTER = new BindingSetStringConverter(); + private static final int HASH_LEN = 4; + + /** + * Generates a sharded rowId. + * + * @param nodeId - Node Id with type and UUID + * @param varOrder - VarOrder used to order BindingSet values + * @param bs - BindingSet with partially formed query values + * @return - serialized Bytes rowId for storing BindingSet results in Fluo + */ + public static Bytes addShard(String nodeId, VariableOrder varOrder, VisibilityBindingSet bs) { + String[] rowPrefixAndId = nodeId.split("_"); + Preconditions.checkArgument(rowPrefixAndId.length == 2); + String prefix = rowPrefixAndId[0]; + String id = rowPrefixAndId[1]; + + String firstBindingString = ""; + Bytes rowSuffix = Bytes.of(id); + if (varOrder.getVariableOrders().size() > 0) { + VariableOrder first = new VariableOrder(varOrder.getVariableOrders().get(0)); + firstBindingString = BS_CONVERTER.convert(bs, first); + rowSuffix = RowKeyUtil.makeRowKey(id, varOrder, bs); + } + + BytesBuilder builder = Bytes.builder(); + builder.append(Bytes.of(prefix + ":")); + builder.append(genHash(Bytes.of(id + NODEID_BS_DELIM + firstBindingString))); + builder.append(":"); + builder.append(rowSuffix); + return builder.toBytes(); + } + + /** + * Generates a sharded rowId. + * + * @param nodeId - Node Id with type and UUID + * @param firstBsVal - String representation of the first BsValue + * @return - serialized Bytes prefix for scanning rows + */ + public static Bytes getShardedScanPrefix(String nodeId, Value firstBsVal) { + Preconditions.checkNotNull(firstBsVal); + + final RyaType ryaValue = RdfToRyaConversions.convertValue(firstBsVal); + final String bindingString = ryaValue.getData() + TYPE_DELIM + ryaValue.getDataType(); + + return getShardedScanPrefix(nodeId, bindingString); + } + + /** + * Generates a sharded rowId from the indicated nodeId and bindingString. + * + * @param nodeId - NodeId with tyep and UUID + * @param bindingString - String representation of BindingSet values, as formed by {@link BindingSetStringConverter} + * . --- End diff -- Done.
---