Simple nested loop join implementation
Project: http://git-wip-us.apache.org/repos/asf/metamodel/repo Commit: http://git-wip-us.apache.org/repos/asf/metamodel/commit/ee2b9167 Tree: http://git-wip-us.apache.org/repos/asf/metamodel/tree/ee2b9167 Diff: http://git-wip-us.apache.org/repos/asf/metamodel/diff/ee2b9167 Branch: refs/heads/master Commit: ee2b91671d8cb6b35046aabba8426724831b7205 Parents: ef5ac06 Author: JoÌrg Unbehauen <jo...@unbehauen.net> Authored: Tue May 3 12:34:03 2016 +0200 Committer: JoÌrg Unbehauen <jo...@unbehauen.net> Committed: Fri Jul 21 23:25:38 2017 +0200 ---------------------------------------------------------------------- .../java/org/apache/metamodel/JoinHelper.java | 133 +++++++++++++++++++ .../org/apache/metamodel/MetaModelHelper.java | 79 +++-------- 2 files changed, 152 insertions(+), 60 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/metamodel/blob/ee2b9167/core/src/main/java/org/apache/metamodel/JoinHelper.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/metamodel/JoinHelper.java b/core/src/main/java/org/apache/metamodel/JoinHelper.java new file mode 100644 index 0000000..c8cdfa7 --- /dev/null +++ b/core/src/main/java/org/apache/metamodel/JoinHelper.java @@ -0,0 +1,133 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.metamodel; + +import com.google.common.collect.Lists; +import org.apache.metamodel.data.*; +import org.apache.metamodel.query.FilterItem; +import org.apache.metamodel.query.SelectItem; + +import java.util.*; +import java.util.function.Predicate; +import java.util.stream.Collectors; + +/** + * Join Execution and related methods. + */ +public abstract class JoinHelper { + + + /** + * Executes a simple nested loop join. The innerLoopDs will be copied in an in-memory dataset. + * + * @param outerLoopDs + * @param innerLoopDs + * @param filters + * @return + */ + public static InMemoryDataSet nestedLoopJoin( DataSet innerLoopDs, DataSet outerLoopDs, Collection<FilterItem> filters){ + + List<Row> innerRows = innerLoopDs.toRows(); + + + List<SelectItem> innerSelItems = Lists.newArrayList(innerLoopDs.getSelectItems()); + List<SelectItem> outerSelItems = Lists.newArrayList(outerLoopDs.getSelectItems()); + List<SelectItem> allItems = Lists.newArrayList(innerSelItems); + allItems.addAll(outerSelItems); + + + Set<FilterItem> filterAll = applicableFilters(filters, allItems); + + + DataSetHeader jointHeader = joinHeader(outerLoopDs, innerLoopDs); + + List<Row> resultRows = Lists.newArrayList(); + for(Row outerRow: outerLoopDs){ + for(Row innerRow: innerRows){ + Row joinedRow = joinRow(outerRow,innerRow,jointHeader); + if(filterAll.isEmpty()|| filterAll.stream().allMatch(fi -> fi.accept(joinedRow))){ + resultRows.add(joinedRow); + } + } + } + + + + return new InMemoryDataSet(jointHeader,resultRows); + } + + + public static Set<FilterItem> applicableFilters(Collection<FilterItem> filters, Collection<SelectItem> selectItemList) { + + Set<SelectItem> items = new HashSet<>(selectItemList); + + return filters.stream().filter( fi -> { + Collection<SelectItem> fiSelectItems = Lists.newArrayList(fi.getSelectItem()); + Object operand = fi.getOperand(); + if(operand instanceof SelectItem){ + fiSelectItems.add((SelectItem) operand); + } + + return items.containsAll(fiSelectItems); + + }).collect(Collectors.toSet()); + } + + + + + + /** + * joins two datasetheader. + * @param ds1 the headers for the left + * @param ds2 the tright headers + * @return + */ + public static DataSetHeader joinHeader(DataSet ds1, DataSet ds2){ + List<SelectItem> joinedSelectItems = Lists.newArrayList(ds1.getSelectItems()); + joinedSelectItems.addAll(Lists.newArrayList(ds2.getSelectItems())); + return new CachingDataSetHeader(joinedSelectItems); + + + } + + /** + * Joins two rows into one. + * + * Consider parameter ordering to maintain backwards compatbility + * + * @param row1 the tuples, that will be on the left + * @param row2 the tuples, that will be on the right + * @param jointHeader + * @return + */ + public static Row joinRow(Row row1, Row row2, DataSetHeader jointHeader){ + Object[] joinedRow = new Object[row1.getValues().length + row2.getValues().length]; + + System.arraycopy(row1.getValues(),0,joinedRow,0,row1.getValues().length); + System.arraycopy(row2.getValues(),0,joinedRow,row1.getValues().length,row2.getValues().length); + + + return new DefaultRow(jointHeader,joinedRow); + + + } + + +} http://git-wip-us.apache.org/repos/asf/metamodel/blob/ee2b9167/core/src/main/java/org/apache/metamodel/MetaModelHelper.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/metamodel/MetaModelHelper.java b/core/src/main/java/org/apache/metamodel/MetaModelHelper.java index 09d47bc..c788633 100644 --- a/core/src/main/java/org/apache/metamodel/MetaModelHelper.java +++ b/core/src/main/java/org/apache/metamodel/MetaModelHelper.java @@ -18,17 +18,10 @@ */ package org.apache.metamodel; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.Map.Entry; +import com.google.common.collect.Lists; import org.apache.metamodel.data.CachingDataSetHeader; import org.apache.metamodel.data.DataSet; import org.apache.metamodel.data.DataSetHeader; @@ -46,6 +39,7 @@ import org.apache.metamodel.data.SubSelectionDataSet; import org.apache.metamodel.query.FilterItem; import org.apache.metamodel.query.FromItem; import org.apache.metamodel.query.GroupByItem; +import org.apache.metamodel.query.OperatorType; import org.apache.metamodel.query.OrderByItem; import org.apache.metamodel.query.Query; import org.apache.metamodel.query.ScalarFunction; @@ -176,71 +170,36 @@ public final class MetaModelHelper { public static DataSet getCarthesianProduct(DataSet... fromDataSets) { return getCarthesianProduct(fromDataSets, new FilterItem[0]); } + + + public static DataSet getCarthesianProduct(DataSet[] fromDataSets, Iterable<FilterItem> whereItems) { + assert(fromDataSets.length>0); // First check if carthesian product is even nescesary if (fromDataSets.length == 1) { return getFiltered(fromDataSets[0], whereItems); } + // do a nested loop join, no matter what + Iterator<DataSet> dsIter = Lists.newArrayList(fromDataSets).iterator(); - List<SelectItem> selectItems = new ArrayList<SelectItem>(); - for (DataSet dataSet : fromDataSets) { - for (int i = 0; i < dataSet.getSelectItems().length; i++) { - SelectItem item = dataSet.getSelectItems()[i]; - selectItems.add(item); - } - } + DataSet joined = dsIter.next(); - int selectItemOffset = 0; - List<Object[]> data = new ArrayList<Object[]>(); - for (int fromDataSetIndex = 0; fromDataSetIndex < fromDataSets.length; fromDataSetIndex++) { - DataSet fromDataSet = fromDataSets[fromDataSetIndex]; - SelectItem[] fromSelectItems = fromDataSet.getSelectItems(); - if (fromDataSetIndex == 0) { - while (fromDataSet.next()) { - Object[] values = fromDataSet.getRow().getValues(); - Object[] row = new Object[selectItems.size()]; - System.arraycopy(values, 0, row, selectItemOffset, values.length); - data.add(row); - } - fromDataSet.close(); - } else { - List<Object[]> fromDataRows = new ArrayList<Object[]>(); - while (fromDataSet.next()) { - fromDataRows.add(fromDataSet.getRow().getValues()); - } - fromDataSet.close(); - for (int i = 0; i < data.size(); i = i + fromDataRows.size()) { - Object[] originalRow = data.get(i); - data.remove(i); - for (int j = 0; j < fromDataRows.size(); j++) { - Object[] newRow = fromDataRows.get(j); - System.arraycopy(newRow, 0, originalRow, selectItemOffset, newRow.length); - data.add(i + j, originalRow.clone()); - } - } - } - selectItemOffset += fromSelectItems.length; - } + while(dsIter.hasNext()){ + joined = JoinHelper.nestedLoopJoin( + dsIter.next(), + joined, + Lists.newArrayList(whereItems)); - if (data.isEmpty()) { - return new EmptyDataSet(selectItems); } - final DataSetHeader header = new CachingDataSetHeader(selectItems); - final List<Row> rows = new ArrayList<Row>(data.size()); - for (Object[] objects : data) { - rows.add(new DefaultRow(header, objects, null)); - } + return joined; + - DataSet result = new InMemoryDataSet(header, rows); - if (whereItems != null) { - DataSet filteredResult = getFiltered(result, whereItems); - result = filteredResult; - } - return result; } + + public static DataSet getCarthesianProduct(DataSet[] fromDataSets, FilterItem... filterItems) { return getCarthesianProduct(fromDataSets, Arrays.asList(filterItems)); }