[ 
https://issues.apache.org/jira/browse/TAJO-1415?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14501230#comment-14501230
 ] 

ASF GitHub Bot commented on TAJO-1415:
--------------------------------------

Github user jihoonson commented on a diff in the pull request:

    https://github.com/apache/tajo/pull/454#discussion_r28642811
  
    --- Diff: 
tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/WindowAggExec.java
 ---
    @@ -347,6 +350,372 @@ private void finalizeWindow() {
         }
       }
     
    +
    +  private void evaluateNonframableWindowFunction(int functionIndex) {
    +    for (int i = 0; i < accumulatedInTuples.size(); i++) {
    +      Tuple inTuple = accumulatedInTuples.get(i);
    +      Tuple outTuple = evaluatedTuples.get(i);
    +
    +      functions[functionIndex].merge(contexts[functionIndex], inTuple);
    +
    +      if (windowFuncFlags[functionIndex]) {
    +        Datum result = 
functions[functionIndex].terminate(contexts[functionIndex]);
    +        outTuple.put(nonFunctionColumnNum + functionIndex, result);
    +      }
    +    }
    +
    +    if (aggFuncFlags[functionIndex]) {
    +      for (int i = 0; i < evaluatedTuples.size(); i++) {
    +        Datum result = 
functions[functionIndex].terminate(contexts[functionIndex]);
    +        Tuple outTuple = evaluatedTuples.get(i);
    +        outTuple.put(nonFunctionColumnNum + functionIndex, result);
    +      }
    +    }
    +  }
    +
    +  private void evaluateFramableWindowFunction(int functionIndex, 
LogicalWindowSpec.LogicalWindowFrame windowFrame,
    +                                              BaseTupleComparator comp) {
    +    LogicalWindowSpec.LogicalWindowFrame.WindowFrameType windowFrameType = 
windowFrame.getFrameType();
    +    WindowSpec.WindowFrameUnit windowFrameUnit = 
windowFrame.getFrameUnit();
    +    int windowFrameStartOffset = windowFrame.getStartBound().getNumber();
    +    int windowFrameEndOffset = windowFrame.getEndBound().getNumber();
    +
    +    String funcName = functions[functionIndex].getName();
    +    int sameStartRange = 0; int sameEndRange = 0;
    +    int frameStart = 0, frameEnd = accumulatedInTuples.size() - 1;
    +    int actualStart = 0;
    +
    +    for (int i = 0; i < accumulatedInTuples.size(); i++) {
    +      switch(windowFrameType) {
    +        case TO_CURRENT_ROW:
    +          frameEnd = i + windowFrameEndOffset; break;
    +        case FROM_CURRENT_ROW:
    +          frameStart = i + windowFrameStartOffset; break;
    +        case SLIDING_WINDOW:
    +          frameStart = i + windowFrameStartOffset;
    +          frameEnd = i + windowFrameEndOffset;
    +          break;
    +      }
    +
    +      // RANGE window frame SHOULD include all the rows that has the same 
order by value with the current row
    +      //   if comp == null, there is no order by and window frame is set 
as the entire partition
    +      if (comp != null && windowFrameUnit == 
WindowSpec.WindowFrameUnit.RANGE) {
    +        // move frame end point to the last row of the same order by value
    +        if (sameEndRange == 0) {
    +          sameEndRange = numOfTuplesWithTheSameKeyValue(frameEnd, comp, 
true);
    +        }
    +        sameEndRange --;
    +        frameEnd += sameEndRange;
    +
    +        // move frame start point to the first row of the same order by 
value
    +        if (sameStartRange == 0) {
    +          sameStartRange = numOfTuplesWithTheSameKeyValue(frameStart, 
comp, true);
    +          actualStart = frameStart;
    +        }
    +        sameStartRange --;
    +        frameStart = actualStart;
    +      }
    +      // As the number of built-in window functions that support window 
frame is small,
    +      // special treatment for each function seems to be reasonable
    +      Tuple inTuple = getFunctionSpecificInput(funcName, frameStart, 
frameEnd);
    +      Datum result = NullDatum.get();
    +
    +      if (inTuple != null) {
    +        functions[functionIndex].merge(contexts[functionIndex], inTuple);
    +        result = 
functions[functionIndex].terminate(contexts[functionIndex]);
    +      }
    +      Tuple outTuple = evaluatedTuples.get(i);
    +      outTuple.put(nonFunctionColumnNum + functionIndex, result);
    +    }
    +  }
    +
    +  // it returns the number of Tuples with the same order by key value
    +  // if only one Tuple for the given order by key exists, it returns 1
    +  private int numOfTuplesWithTheSameKeyValue(int startOffset, 
BaseTupleComparator comp, boolean isForward) {
    +    int numberOfTuples = 0;
    +
    +    Tuple inTuple = accumulatedInTuples.get(startOffset);
    +    if (isForward) {
    +      do {
    +        numberOfTuples++;
    +        startOffset++;
    +      } while (startOffset < accumulatedInTuples.size() && 
comp.compare(accumulatedInTuples.get(startOffset), inTuple) == 0);
    +    } else {      // backward direction
    +      do {
    +        numberOfTuples++;
    +        startOffset--;
    +      } while (startOffset >= 0 && 
comp.compare(accumulatedInTuples.get(startOffset), inTuple) == 0);
    +    }
    +
    +    return numberOfTuples;
    +  }
    +
    +  private Tuple getFunctionSpecificInput(String funcName, int dataStart, 
int dataEnd) {
    --- End diff --
    
    ```WindowAggFunc``` is designed to abstract all kinds of window functions, 
and each function inherited ```WindowAggFunc``` does some its own work. Even 
though this code makes the implementation of ```FirstValue``` and 
```LastValue``` simpler, I think that this is opposed to our design. In 
addition, I think that this kind of function-specific routine makes the 
implementation of ```WindowAggExec``` complex. 
    Do you have any reasons?


> Window frame support
> --------------------
>
>                 Key: TAJO-1415
>                 URL: https://issues.apache.org/jira/browse/TAJO-1415
>             Project: Tajo
>          Issue Type: Sub-task
>          Components: distributed query plan, parser, physical operator, 
> planner/optimizer
>            Reporter: Keuntae Park
>            Assignee: Keuntae Park
>             Fix For: window function
>
>
> We can define frame clause in window definition like
> {code}
> [ RANGE | ROWS ] frame_start
> [ RANGE | ROWS ] BETWEEN frame_start AND frame_end
> {code}
> , where frame_start and frame_end can be one of 
> {code}
> UNBOUNDED PRECEDING
> value PRECEDING
> CURRENT ROW
> value FOLLOWING
> UNBOUNDED FOLLOWING
> {code}
> According to the window functions description of 
> PostgreSQL(http://www.postgresql.org/docs/9.4/static/functions-window.html),
> there are two types of window functions based on window frame support.
> 1) row_number, rank, dense_rank, percent_rank, cume_dist, tile, lag and lead:
> these functions only work within window partition, which means window frame 
> has no effect on these functions.
> 2) first_value, last_value, nth_value, and aggregation function as as window 
> function: these functions should work with rows within window frame.
> Currently, Tajo parser recognize the window frame grammar but windowAggExec 
> does not use that information. 
> It works as if window frame is set as "RANGE BETWEEN UNBOUND PROCEEDING AND 
> UNBOUNDED FOLLOWING", which is different from the default window frame 
> setting of most DBMSs "RANGE BETWEEN UNBOUND PROCEEDING AND CURRENT ROW".
> Following should be done:
> 1) Applying correct default window frame for first_value, last_value, 
> nth_value, and aggregation functions .
> 2) Supporting various window frame expressions.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to