Repository: tajo Updated Branches: refs/heads/master d2a4f9b3d -> 14a1e536c
TAJO-923: Add VAR_SAMP and VAR_POP window functions. Closes #458 Signed-off-by: Jihoon Son <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/tajo/repo Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/14a1e536 Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/14a1e536 Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/14a1e536 Branch: refs/heads/master Commit: 14a1e536c2848dbb9e1a714076d833a7ff1f5640 Parents: d2a4f9b Author: Dongjoon Hyun <[email protected]> Authored: Fri Apr 17 23:55:57 2015 +0900 Committer: Jihoon Son <[email protected]> Committed: Fri Apr 17 23:57:28 2015 +0900 ---------------------------------------------------------------------- CHANGES | 3 + .../tajo/engine/function/builtin/StdDev.java | 94 -------------- .../tajo/engine/function/builtin/StdDevPop.java | 10 +- .../engine/function/builtin/StdDevSamp.java | 8 +- .../tajo/engine/function/builtin/VarPop.java | 42 +++++++ .../engine/function/builtin/VarPopDouble.java | 39 ++++++ .../engine/function/builtin/VarPopFloat.java | 39 ++++++ .../tajo/engine/function/builtin/VarPopInt.java | 39 ++++++ .../engine/function/builtin/VarPopLong.java | 39 ++++++ .../tajo/engine/function/builtin/VarSamp.java | 40 ++++++ .../engine/function/builtin/VarSampDouble.java | 39 ++++++ .../engine/function/builtin/VarSampFloat.java | 39 ++++++ .../engine/function/builtin/VarSampInt.java | 39 ++++++ .../engine/function/builtin/VarSampLong.java | 39 ++++++ .../tajo/engine/function/builtin/Variance.java | 94 ++++++++++++++ tajo-core/src/main/proto/InternalTypes.proto | 2 +- .../engine/function/TestBuiltinFunctions.java | 123 +++++++++++++++++++ 17 files changed, 624 insertions(+), 104 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/CHANGES ---------------------------------------------------------------------- diff --git a/CHANGES b/CHANGES index b04358c..4899ed2 100644 --- a/CHANGES +++ b/CHANGES @@ -9,6 +9,9 @@ Release 0.11.0 - unreleased TAJO-921: Add STDDEV_SAMP and STDDEV_POP window functions. (Keuntae Park) + TAJO-923: Add VAR_SAMP and VAR_POP window functions. + (Contributed by Dongjoon Hyun, Committed by jihoon) + TAJO-1135: Implement queryable virtual table for cluster information. (jihun) http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/StdDev.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/StdDev.java b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/StdDev.java deleted file mode 100644 index e736167..0000000 --- a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/StdDev.java +++ /dev/null @@ -1,94 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tajo.engine.function.builtin; - -import org.apache.tajo.catalog.CatalogUtil; -import org.apache.tajo.catalog.Column; -import org.apache.tajo.common.TajoDataTypes.DataType; -import org.apache.tajo.common.TajoDataTypes.Type; -import org.apache.tajo.datum.Datum; -import org.apache.tajo.datum.NullDatum; -import org.apache.tajo.datum.ProtobufDatum; -import org.apache.tajo.plan.function.AggFunction; -import org.apache.tajo.plan.function.FunctionContext; -import org.apache.tajo.storage.Tuple; - -import static org.apache.tajo.InternalTypes.StdDevProto; - -public abstract class StdDev extends AggFunction<Datum> { - - public StdDev(Column[] definedArgs) { - super(definedArgs); - } - - public StdDevContext newContext() { - return new StdDevContext(); - } - - @Override - public void eval(FunctionContext ctx, Tuple params) { - StdDevContext StdDevCtx = (StdDevContext) ctx; - Datum datum = params.get(0); - if (datum.isNotNull()) { - double delta = datum.asFloat8() - StdDevCtx.avg; - StdDevCtx.count++; - StdDevCtx.avg += delta/StdDevCtx.count; - StdDevCtx.squareSumOfDiff += delta * (datum.asFloat8() - StdDevCtx.avg); - } - } - - @Override - public void merge(FunctionContext ctx, Tuple part) { - StdDevContext StdDevCtx = (StdDevContext) ctx; - Datum d = part.get(0); - if (d instanceof NullDatum) { - return; - } - ProtobufDatum datum = (ProtobufDatum) d; - StdDevProto proto = (StdDevProto) datum.get(); - double delta = proto.getAvg() - StdDevCtx.avg; - StdDevCtx.avg += delta * proto.getCount() / (StdDevCtx.count + proto.getCount()); - StdDevCtx.squareSumOfDiff += proto.getSquareSumOfDiff() + delta * delta * StdDevCtx.count * proto.getCount() / (StdDevCtx.count + proto.getCount()); - StdDevCtx.count += proto.getCount(); - } - - @Override - public Datum getPartialResult(FunctionContext ctx) { - StdDevContext StdDevCtx = (StdDevContext) ctx; - if (StdDevCtx.count == 0) { - return NullDatum.get(); - } - StdDevProto.Builder builder = StdDevProto.newBuilder(); - builder.setSquareSumOfDiff(StdDevCtx.squareSumOfDiff); - builder.setAvg(StdDevCtx.avg); - builder.setCount(StdDevCtx.count); - return new ProtobufDatum(builder.build()); - } - - @Override - public DataType getPartialResultType() { - return CatalogUtil.newDataType(Type.PROTOBUF, StdDevProto.class.getName()); - } - - protected static class StdDevContext implements FunctionContext { - double squareSumOfDiff = 0.0; - double avg = 0.0; - long count = 0; - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/StdDevPop.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/StdDevPop.java b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/StdDevPop.java index 3403df1..3a5a017 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/StdDevPop.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/StdDevPop.java @@ -24,19 +24,19 @@ import org.apache.tajo.datum.DatumFactory; import org.apache.tajo.datum.NullDatum; import org.apache.tajo.plan.function.FunctionContext; -public abstract class StdDevPop extends StdDev { +public abstract class StdDevPop extends Variance { public StdDevPop(Column[] definedArgs) { super(definedArgs); } @Override public Datum terminate(FunctionContext ctx) { - StdDevContext StdDevCtx = (StdDevContext) ctx; - if (StdDevCtx.count == 0) { + VarianceContext varianceCtx = (VarianceContext) ctx; + if (varianceCtx.count == 0) { return NullDatum.get(); - } else if (StdDevCtx.count == 1) { + } else if (varianceCtx.count == 1) { return DatumFactory.createFloat8(0); } - return DatumFactory.createFloat8(Math.sqrt(StdDevCtx.squareSumOfDiff / StdDevCtx.count)); + return DatumFactory.createFloat8(Math.sqrt(varianceCtx.squareSumOfDiff / varianceCtx.count)); } } http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/StdDevSamp.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/StdDevSamp.java b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/StdDevSamp.java index 60f83a4..f4d56f5 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/StdDevSamp.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/StdDevSamp.java @@ -24,18 +24,18 @@ import org.apache.tajo.datum.DatumFactory; import org.apache.tajo.datum.NullDatum; import org.apache.tajo.plan.function.FunctionContext; -public abstract class StdDevSamp extends StdDev { +public abstract class StdDevSamp extends Variance { public StdDevSamp(Column[] definedArgs) { super(definedArgs); } @Override public Datum terminate(FunctionContext ctx) { - StdDevContext StdDevCtx = (StdDevContext) ctx; - if (StdDevCtx.count <= 1) { + VarianceContext varianceCtx = (VarianceContext) ctx; + if (varianceCtx.count <= 1) { return NullDatum.get(); } - return DatumFactory.createFloat8(Math.sqrt(StdDevCtx.squareSumOfDiff / (StdDevCtx.count - 1))); + return DatumFactory.createFloat8(Math.sqrt(varianceCtx.squareSumOfDiff / (varianceCtx.count - 1))); } } http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPop.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPop.java b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPop.java new file mode 100644 index 0000000..2420302 --- /dev/null +++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPop.java @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.function.builtin; + +import org.apache.tajo.catalog.Column; +import org.apache.tajo.datum.Datum; +import org.apache.tajo.datum.DatumFactory; +import org.apache.tajo.datum.NullDatum; +import org.apache.tajo.plan.function.FunctionContext; + +public abstract class VarPop extends Variance { + public VarPop(Column[] definedArgs) { + super(definedArgs); + } + + @Override + public Datum terminate(FunctionContext ctx) { + VarianceContext varianceCtx = (VarianceContext) ctx; + if (varianceCtx.count == 0) { + return NullDatum.get(); + } else if (varianceCtx.count == 1) { + return DatumFactory.createFloat8(0); + } + return DatumFactory.createFloat8(varianceCtx.squareSumOfDiff / varianceCtx.count); + } +} http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopDouble.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopDouble.java b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopDouble.java new file mode 100644 index 0000000..1f6d733 --- /dev/null +++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopDouble.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.function.builtin; + +import org.apache.tajo.catalog.Column; +import org.apache.tajo.common.TajoDataTypes.Type; +import org.apache.tajo.engine.function.annotation.Description; +import org.apache.tajo.engine.function.annotation.ParamTypes; + +@Description( + functionName = "VAR_POP", + description = "The variance of a set of numbers.", + example = "> SELECT VAR_POP(expr);", + returnType = Type.FLOAT8, + paramTypes = {@ParamTypes(paramTypes = {Type.FLOAT8})} +) +public class VarPopDouble extends VarPop { + public VarPopDouble() { + super(new Column[] { + new Column("expr", Type.FLOAT8) + }); + } +} http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopFloat.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopFloat.java b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopFloat.java new file mode 100644 index 0000000..6351dfe --- /dev/null +++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopFloat.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.function.builtin; + +import org.apache.tajo.catalog.Column; +import org.apache.tajo.common.TajoDataTypes.Type; +import org.apache.tajo.engine.function.annotation.Description; +import org.apache.tajo.engine.function.annotation.ParamTypes; + +@Description( + functionName = "VAR_POP", + description = "The variance of a set of numbers.", + example = "> SELECT VAR_POP(expr);", + returnType = Type.FLOAT8, + paramTypes = {@ParamTypes(paramTypes = {Type.FLOAT4})} +) +public class VarPopFloat extends VarPop { + public VarPopFloat() { + super(new Column[] { + new Column("expr", Type.FLOAT4) + }); + } +} http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopInt.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopInt.java b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopInt.java new file mode 100644 index 0000000..35ec4f3 --- /dev/null +++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopInt.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.function.builtin; + +import org.apache.tajo.catalog.Column; +import org.apache.tajo.common.TajoDataTypes.Type; +import org.apache.tajo.engine.function.annotation.Description; +import org.apache.tajo.engine.function.annotation.ParamTypes; + +@Description( + functionName = "VAR_POP", + description = "The variance of a set of numbers.", + example = "> SELECT VAR_POP(expr);", + returnType = Type.FLOAT8, + paramTypes = {@ParamTypes(paramTypes = {Type.INT4})} +) +public class VarPopInt extends VarPop { + public VarPopInt() { + super(new Column[] { + new Column("expr", Type.INT4) + }); + } +} http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopLong.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopLong.java b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopLong.java new file mode 100644 index 0000000..14d8b55 --- /dev/null +++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopLong.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.function.builtin; + +import org.apache.tajo.catalog.Column; +import org.apache.tajo.common.TajoDataTypes.Type; +import org.apache.tajo.engine.function.annotation.Description; +import org.apache.tajo.engine.function.annotation.ParamTypes; + +@Description( + functionName = "VAR_POP", + description = "The variance of a set of numbers.", + example = "> SELECT VAR_POP(expr);", + returnType = Type.FLOAT8, + paramTypes = {@ParamTypes(paramTypes = {Type.INT8})} +) +public class VarPopLong extends VarPop { + public VarPopLong() { + super(new Column[] { + new Column("expr", Type.INT8) + }); + } +} http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSamp.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSamp.java b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSamp.java new file mode 100644 index 0000000..2f2ab51 --- /dev/null +++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSamp.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.function.builtin; + +import org.apache.tajo.catalog.Column; +import org.apache.tajo.datum.Datum; +import org.apache.tajo.datum.DatumFactory; +import org.apache.tajo.datum.NullDatum; +import org.apache.tajo.plan.function.FunctionContext; + +public abstract class VarSamp extends Variance { + public VarSamp(Column[] definedArgs) { + super(definedArgs); + } + + @Override + public Datum terminate(FunctionContext ctx) { + VarianceContext varianceCtx = (VarianceContext) ctx; + if (varianceCtx.count <= 1) { + return NullDatum.get(); + } + return DatumFactory.createFloat8(varianceCtx.squareSumOfDiff / (varianceCtx.count - 1)); + } +} http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampDouble.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampDouble.java b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampDouble.java new file mode 100644 index 0000000..de4ee7f --- /dev/null +++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampDouble.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.function.builtin; + +import org.apache.tajo.catalog.Column; +import org.apache.tajo.common.TajoDataTypes.Type; +import org.apache.tajo.engine.function.annotation.Description; +import org.apache.tajo.engine.function.annotation.ParamTypes; + +@Description( + functionName = "VAR_SAMP", + description = "The unbiased sample variance of a set of numbers.", + example = "> SELECT VAR_SAMP(expr);", + returnType = Type.FLOAT8, + paramTypes = {@ParamTypes(paramTypes = {Type.FLOAT8})} +) +public class VarSampDouble extends VarSamp { + public VarSampDouble() { + super(new Column[] { + new Column("expr", Type.FLOAT8) + }); + } +} http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampFloat.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampFloat.java b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampFloat.java new file mode 100644 index 0000000..014affc --- /dev/null +++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampFloat.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.function.builtin; + +import org.apache.tajo.catalog.Column; +import org.apache.tajo.common.TajoDataTypes.Type; +import org.apache.tajo.engine.function.annotation.Description; +import org.apache.tajo.engine.function.annotation.ParamTypes; + +@Description( + functionName = "VAR_SAMP", + description = "The unbiased sample variance of a set of numbers.", + example = "> SELECT VAR_SAMP(expr);", + returnType = Type.FLOAT8, + paramTypes = {@ParamTypes(paramTypes = {Type.FLOAT4})} +) +public class VarSampFloat extends VarSamp { + public VarSampFloat() { + super(new Column[] { + new Column("expr", Type.FLOAT4) + }); + } +} http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampInt.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampInt.java b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampInt.java new file mode 100644 index 0000000..1e50b9c --- /dev/null +++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampInt.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.function.builtin; + +import org.apache.tajo.catalog.Column; +import org.apache.tajo.common.TajoDataTypes.Type; +import org.apache.tajo.engine.function.annotation.Description; +import org.apache.tajo.engine.function.annotation.ParamTypes; + +@Description( + functionName = "VAR_SAMP", + description = "The unbiased sample variance of a set of numbers.", + example = "> SELECT VAR_SAMP(expr);", + returnType = Type.FLOAT8, + paramTypes = {@ParamTypes(paramTypes = {Type.INT4})} +) +public class VarSampInt extends VarSamp { + public VarSampInt() { + super(new Column[] { + new Column("expr", Type.INT4) + }); + } +} http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampLong.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampLong.java b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampLong.java new file mode 100644 index 0000000..a0a5a58 --- /dev/null +++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampLong.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.function.builtin; + +import org.apache.tajo.catalog.Column; +import org.apache.tajo.common.TajoDataTypes.Type; +import org.apache.tajo.engine.function.annotation.Description; +import org.apache.tajo.engine.function.annotation.ParamTypes; + +@Description( + functionName = "VAR_SAMP", + description = "The unbiased sample variance of a set of numbers.", + example = "> SELECT VAR_SAMP(expr);", + returnType = Type.FLOAT8, + paramTypes = {@ParamTypes(paramTypes = {Type.INT8})} +) +public class VarSampLong extends VarSamp { + public VarSampLong() { + super(new Column[] { + new Column("expr", Type.INT8) + }); + } +} http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/Variance.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/Variance.java b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/Variance.java new file mode 100644 index 0000000..3d925d1 --- /dev/null +++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/Variance.java @@ -0,0 +1,94 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.function.builtin; + +import org.apache.tajo.catalog.CatalogUtil; +import org.apache.tajo.catalog.Column; +import org.apache.tajo.common.TajoDataTypes.DataType; +import org.apache.tajo.common.TajoDataTypes.Type; +import org.apache.tajo.datum.Datum; +import org.apache.tajo.datum.NullDatum; +import org.apache.tajo.datum.ProtobufDatum; +import org.apache.tajo.plan.function.AggFunction; +import org.apache.tajo.plan.function.FunctionContext; +import org.apache.tajo.storage.Tuple; + +import static org.apache.tajo.InternalTypes.VarianceProto; + +public abstract class Variance extends AggFunction<Datum> { + + public Variance(Column[] definedArgs) { + super(definedArgs); + } + + public VarianceContext newContext() { + return new VarianceContext(); + } + + @Override + public void eval(FunctionContext ctx, Tuple params) { + VarianceContext varianceCtx = (VarianceContext) ctx; + Datum datum = params.get(0); + if (datum.isNotNull()) { + double delta = datum.asFloat8() - varianceCtx.avg; + varianceCtx.count++; + varianceCtx.avg += delta/varianceCtx.count; + varianceCtx.squareSumOfDiff += delta * (datum.asFloat8() - varianceCtx.avg); + } + } + + @Override + public void merge(FunctionContext ctx, Tuple part) { + VarianceContext varianceCtx = (VarianceContext) ctx; + Datum d = part.get(0); + if (d instanceof NullDatum) { + return; + } + ProtobufDatum datum = (ProtobufDatum) d; + VarianceProto proto = (VarianceProto) datum.get(); + double delta = proto.getAvg() - varianceCtx.avg; + varianceCtx.avg += delta * proto.getCount() / (varianceCtx.count + proto.getCount()); + varianceCtx.squareSumOfDiff += proto.getSquareSumOfDiff() + delta * delta * varianceCtx.count * proto.getCount() / (varianceCtx.count + proto.getCount()); + varianceCtx.count += proto.getCount(); + } + + @Override + public Datum getPartialResult(FunctionContext ctx) { + VarianceContext varianceCtx = (VarianceContext) ctx; + if (varianceCtx.count == 0) { + return NullDatum.get(); + } + VarianceProto.Builder builder = VarianceProto.newBuilder(); + builder.setSquareSumOfDiff(varianceCtx.squareSumOfDiff); + builder.setAvg(varianceCtx.avg); + builder.setCount(varianceCtx.count); + return new ProtobufDatum(builder.build()); + } + + @Override + public DataType getPartialResultType() { + return CatalogUtil.newDataType(Type.PROTOBUF, VarianceProto.class.getName()); + } + + protected static class VarianceContext implements FunctionContext { + double squareSumOfDiff = 0.0; + double avg = 0.0; + long count = 0; + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/main/proto/InternalTypes.proto ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/proto/InternalTypes.proto b/tajo-core/src/main/proto/InternalTypes.proto index 7108991..13dd107 100644 --- a/tajo-core/src/main/proto/InternalTypes.proto +++ b/tajo-core/src/main/proto/InternalTypes.proto @@ -31,7 +31,7 @@ message AvgDoubleProto { required int64 count = 2; } -message StdDevProto { +message VarianceProto { required double squareSumOfDiff = 1; required double avg = 2; required int64 count = 3; http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/test/java/org/apache/tajo/engine/function/TestBuiltinFunctions.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/function/TestBuiltinFunctions.java b/tajo-core/src/test/java/org/apache/tajo/engine/function/TestBuiltinFunctions.java index d9d9b77..6bec3ad 100644 --- a/tajo-core/src/test/java/org/apache/tajo/engine/function/TestBuiltinFunctions.java +++ b/tajo-core/src/test/java/org/apache/tajo/engine/function/TestBuiltinFunctions.java @@ -597,6 +597,129 @@ public class TestBuiltinFunctions extends QueryTestCaseBase { } + @Test + public void testVarSamp() throws Exception { + KeyValueSet tableOptions = new KeyValueSet(); + tableOptions.set(StorageConstants.TEXT_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER); + tableOptions.set(StorageConstants.TEXT_NULL, "\\\\N"); + + Schema schema = new Schema(); + schema.addColumn("id", TajoDataTypes.Type.INT4); + schema.addColumn("value_int", TajoDataTypes.Type.INT4); + schema.addColumn("value_long", TajoDataTypes.Type.INT8); + schema.addColumn("value_float", TajoDataTypes.Type.FLOAT4); + schema.addColumn("value_double", TajoDataTypes.Type.FLOAT8); + String[] data = new String[]{ + "1|\\N|-111|1.2|-50.5", + "2|1|\\N|\\N|52.5", + "3|2|-333|2.8|\\N" }; + TajoTestingCluster.createTable("table11", schema, tableOptions, data, 1); + + try { + ResultSet res = executeString("select var_samp(value_int) as vs_int, var_samp(value_long) as vs_long, var_samp(value_float) as vs_float, var_samp(value_double) as vs_double from table11"); + String ascExpected = "vs_int,vs_long,vs_float,vs_double\n" + + "-------------------------------\n" + + "0.5,24642.0,1.279999847412114,5304.5\n"; + + assertEquals(ascExpected, resultSetToString(res)); + res.close(); + } finally { + executeString("DROP TABLE table11 PURGE"); + } + } + + @Test + public void testVarSampWithFewNumbers() throws Exception { + KeyValueSet tableOptions = new KeyValueSet(); + tableOptions.set(StorageConstants.TEXT_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER); + tableOptions.set(StorageConstants.TEXT_NULL, "\\\\N"); + + Schema schema = new Schema(); + schema.addColumn("id", TajoDataTypes.Type.INT4); + schema.addColumn("value_int", TajoDataTypes.Type.INT4); + schema.addColumn("value_long", TajoDataTypes.Type.INT8); + schema.addColumn("value_float", TajoDataTypes.Type.FLOAT4); + schema.addColumn("value_double", TajoDataTypes.Type.FLOAT8); + String[] data = new String[]{ + "1|\\N|\\N|\\N|-50.5", + "2|1|\\N|\\N|\\N", + "3|\\N|\\N|\\N|\\N" }; + TajoTestingCluster.createTable("table11", schema, tableOptions, data, 1); + + try { + ResultSet res = executeString("select var_samp(value_int) as vsamp_int, var_samp(value_long) as vsamp_long, var_samp(value_float) as vsamp_float, var_samp(value_double) as vsamp_double from table11"); + String ascExpected = "vsamp_int,vsamp_long,vsamp_float,vsamp_double\n" + + "-------------------------------\n" + + "null,null,null,null\n"; + + assertEquals(ascExpected, resultSetToString(res)); + res.close(); + } finally { + executeString("DROP TABLE table11 PURGE"); + } + } + + @Test + public void testVarPop() throws Exception { + KeyValueSet tableOptions = new KeyValueSet(); + tableOptions.set(StorageConstants.TEXT_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER); + tableOptions.set(StorageConstants.TEXT_NULL, "\\\\N"); + + Schema schema = new Schema(); + schema.addColumn("id", TajoDataTypes.Type.INT4); + schema.addColumn("value_int", TajoDataTypes.Type.INT4); + schema.addColumn("value_long", TajoDataTypes.Type.INT8); + schema.addColumn("value_float", TajoDataTypes.Type.FLOAT4); + schema.addColumn("value_double", TajoDataTypes.Type.FLOAT8); + String[] data = new String[]{ + "1|\\N|-111|1.2|-50.5", + "2|1|\\N|\\N|52.5", + "3|2|-333|2.8|\\N" }; + TajoTestingCluster.createTable("table11", schema, tableOptions, data, 1); + + try { + ResultSet res = executeString("select var_pop(value_int) as vpop_int, var_pop(value_long) as vpop_long, var_pop(value_float) as vpop_float, var_pop(value_double) as vpop_double from table11"); + String ascExpected = "vpop_int,vpop_long,vpop_float,vpop_double\n" + + "-------------------------------\n" + + "0.25,12321.0,0.639999923706057,2652.25\n"; + + assertEquals(ascExpected, resultSetToString(res)); + res.close(); + } finally { + executeString("DROP TABLE table11 PURGE"); + } + } + + @Test + public void testVarPopWithFewNumbers() throws Exception { + KeyValueSet tableOptions = new KeyValueSet(); + tableOptions.set(StorageConstants.TEXT_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER); + tableOptions.set(StorageConstants.TEXT_NULL, "\\\\N"); + + Schema schema = new Schema(); + schema.addColumn("id", TajoDataTypes.Type.INT4); + schema.addColumn("value_int", TajoDataTypes.Type.INT4); + schema.addColumn("value_long", TajoDataTypes.Type.INT8); + schema.addColumn("value_float", TajoDataTypes.Type.FLOAT4); + schema.addColumn("value_double", TajoDataTypes.Type.FLOAT8); + String[] data = new String[]{ + "1|\\N|\\N|\\N|-50.5", + "2|1|\\N|\\N|\\N", + "3|\\N|\\N|\\N|\\N" }; + TajoTestingCluster.createTable("table11", schema, tableOptions, data, 1); + + try { + ResultSet res = executeString("select var_pop(value_int) as vpop_int, var_pop(value_long) as vpop_long, var_pop(value_float) as vpop_float, var_pop(value_double) as vpop_double from table11"); + String ascExpected = "vpop_int,vpop_long,vpop_float,vpop_double\n" + + "-------------------------------\n" + + "0.0,null,null,0.0\n"; + + assertEquals(ascExpected, resultSetToString(res)); + res.close(); + } finally { + executeString("DROP TABLE table11 PURGE"); + } + } // @Test // public void testRandom() throws Exception {
