[
https://issues.apache.org/jira/browse/FLINK-2105?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14647527#comment-14647527
]
ASF GitHub Bot commented on FLINK-2105:
---------------------------------------
Github user fhueske commented on a diff in the pull request:
https://github.com/apache/flink/pull/907#discussion_r35863699
--- Diff:
flink-runtime/src/main/java/org/apache/flink/runtime/operators/sort/AbstractMergeInnerJoinIterator.java
---
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.operators.sort;
+
+import org.apache.flink.api.common.functions.FlatJoinFunction;
+import org.apache.flink.api.common.typeutils.TypeComparator;
+import org.apache.flink.api.common.typeutils.TypePairComparator;
+import org.apache.flink.api.common.typeutils.TypeSerializer;
+import org.apache.flink.runtime.io.disk.iomanager.IOManager;
+import org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable;
+import org.apache.flink.runtime.memorymanager.MemoryAllocationException;
+import org.apache.flink.runtime.memorymanager.MemoryManager;
+import org.apache.flink.util.Collector;
+import org.apache.flink.util.MutableObjectIterator;
+
+import java.util.Iterator;
+
+/**
+ * An implementation of the {@link
org.apache.flink.runtime.operators.util.JoinTaskIterator} that realizes the
+ * matching through a sort-merge join strategy.
+ */
+public abstract class AbstractMergeInnerJoinIterator<T1, T2, O> extends
AbstractMergeIterator<T1, T2, O> {
+
+ public AbstractMergeInnerJoinIterator(
+ MutableObjectIterator<T1> input1,
MutableObjectIterator<T2> input2,
+ TypeSerializer<T1> serializer1, TypeComparator<T1>
comparator1,
+ TypeSerializer<T2> serializer2, TypeComparator<T2>
comparator2,
+ TypePairComparator<T1, T2> pairComparator,
+ MemoryManager memoryManager,
+ IOManager ioManager,
+ int numMemoryPages,
+ AbstractInvokable parentTask)
+ throws MemoryAllocationException {
+ super(input1, input2, serializer1, comparator1, serializer2,
comparator2, pairComparator, memoryManager, ioManager, numMemoryPages,
parentTask);
+ }
+
+ /**
+ * Calls the <code>JoinFunction#match()</code> method for all two
key-value pairs that share the same key and come
+ * from different inputs. The output of the <code>match()</code> method
is forwarded.
+ * <p/>
+ * This method first zig-zags between the two sorted inputs in order to
find a common
+ * key, and then calls the match stub with the cross product of the
values.
+ *
+ * @throws Exception Forwards all exceptions from the user code and the
I/O system.
+ * @see
org.apache.flink.runtime.operators.util.JoinTaskIterator#callWithNextKey(org.apache.flink.api.common.functions.FlatJoinFunction,
org.apache.flink.util.Collector)
+ */
+ @Override
+ public boolean callWithNextKey(final FlatJoinFunction<T1, T2, O>
matchFunction, final Collector<O> collector)
+ throws Exception {
+ if (!this.iterator1.nextKey() || !this.iterator2.nextKey()) {
+ // consume all remaining keys (hack to prevent
remaining inputs during iterations, lets get rid of this soon)
+ while (this.iterator1.nextKey()) ;
+ while (this.iterator2.nextKey()) ;
+
+ return false;
+ }
+
+ final TypePairComparator<T1, T2> comparator =
this.pairComparator;
+ comparator.setReference(this.iterator1.getCurrent());
+ T2 current2 = this.iterator2.getCurrent();
+
+ // zig zag
+ while (true) {
+ // determine the relation between the (possibly
composite) keys
+ final int comp =
comparator.compareToReference(current2);
+
+ if (comp == 0) {
+ break;
+ }
+
+ if (comp < 0) {
+ if (!this.iterator2.nextKey()) {
+ return false;
+ }
+ current2 = this.iterator2.getCurrent();
+ } else {
+ if (!this.iterator1.nextKey()) {
+ return false;
+ }
+
comparator.setReference(this.iterator1.getCurrent());
+ }
+ }
+
+ // here, we have a common key! call the match function with the
cross product of the
+ // values
+ final Iterator<T1> values1 = this.iterator1.getValues();
+ final Iterator<T2> values2 = this.iterator2.getValues();
+
+ crossMatchingGroup(values1, values2, matchFunction, collector);
--- End diff --
matchFunction -> joinFunction
> Implement Sort-Merge Outer Join algorithm
> -----------------------------------------
>
> Key: FLINK-2105
> URL: https://issues.apache.org/jira/browse/FLINK-2105
> Project: Flink
> Issue Type: Sub-task
> Components: Local Runtime
> Reporter: Fabian Hueske
> Assignee: Ricky Pogalz
> Priority: Minor
> Fix For: pre-apache
>
>
> Flink does not natively support outer joins at the moment.
> This issue proposes to implement a sort-merge outer join algorithm that can
> cover left, right, and full outer joins.
> The implementation can be based on the regular sort-merge join iterator
> ({{ReusingMergeMatchIterator}} and {{NonReusingMergeMatchIterator}}, see also
> {{MatchDriver}} class)
> The Reusing and NonReusing variants differ in whether object instances are
> reused or new objects are created. I would start with the NonReusing variant
> which is safer from a user's point of view and should also be easier to
> implement.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)