Repository: incubator-hivemall
Updated Branches:
  refs/heads/master 85f8e173a -> 389a9e331


Close #43: Added rownum() UDF


Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/389a9e33
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/389a9e33
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/389a9e33

Branch: refs/heads/master
Commit: 389a9e331a1fdb2884d7b184b8ed63e510d9acc0
Parents: 85f8e17
Author: myui <yuin...@gmail.com>
Authored: Fri Feb 10 20:49:06 2017 +0900
Committer: myui <yuin...@gmail.com>
Committed: Fri Feb 10 20:49:06 2017 +0900

----------------------------------------------------------------------
 .github/PULL_REQUEST_TEMPLATE                   |  6 +-
 .../hivemall/tools/mapred/RowNumberUDF.java     | 69 ++++++++++++++++++++
 resources/ddl/define-all-as-permanent.hive      |  3 +
 resources/ddl/define-all.hive                   |  3 +
 4 files changed, 80 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/389a9e33/.github/PULL_REQUEST_TEMPLATE
----------------------------------------------------------------------
diff --git a/.github/PULL_REQUEST_TEMPLATE b/.github/PULL_REQUEST_TEMPLATE
index 04f3004..361d613 100644
--- a/.github/PULL_REQUEST_TEMPLATE
+++ b/.github/PULL_REQUEST_TEMPLATE
@@ -6,10 +6,14 @@
 
 [Bug Fix | Improvement | Feature | Documentation | Hot Fix | Refactoring]
 
-### What is the Jira issue?
+## What is the Jira issue?
 
 (Put link here and add [HIVEMALL-*Jira number*] in PR title, e.g., 
[HIVEMALL-533])
 
 ## How was this patch tested?
 
 (Please explain how this patch was tested. e.g., unit tests, integration 
tests, manual tests)
+
+## How to use this feature?
+
+(Please remove this section if not needed)

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/389a9e33/core/src/main/java/hivemall/tools/mapred/RowNumberUDF.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/tools/mapred/RowNumberUDF.java 
b/core/src/main/java/hivemall/tools/mapred/RowNumberUDF.java
new file mode 100644
index 0000000..59f64ba
--- /dev/null
+++ b/core/src/main/java/hivemall/tools/mapred/RowNumberUDF.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.tools.mapred;
+
+import hivemall.utils.hadoop.HadoopUtils;
+
+import javax.annotation.Nonnull;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFType;
+import org.apache.hadoop.io.LongWritable;
+
+@Description(name = "rownum", value = "_FUNC_() - Returns a generated row 
number in long",
+        extended = "returns sprintf(`%d%04d`,sequence,taskId) as long")
+@UDFType(deterministic = false, stateful = true)
+public final class RowNumberUDF extends UDF {
+
+    private long sequence;
+    private int taskId;
+    @Nonnull
+    private final LongWritable result;
+
+    public RowNumberUDF() {
+        this.sequence = 0L;
+        this.taskId = -1;
+        this.result = new LongWritable(Double.doubleToLongBits(Double.NaN));
+    }
+
+    @Nonnull
+    public LongWritable evaluate() throws HiveException {
+        if (taskId == -1) {
+            this.taskId = HadoopUtils.getTaskId() + 1;
+            if (taskId > 9999) {
+                throw new HiveException("TaskId out of range `" + taskId
+                        + "`. rownum() supports 9999 tasks at max");
+            }
+        }
+        sequence++;
+
+        String rowid = String.format("%d%04d", sequence, taskId);
+        final long l;
+        try {
+            l = Long.parseLong(rowid);
+        } catch (NumberFormatException e) {
+            throw new HiveException("failed to parse `" + rowid + "` as long", 
e);
+        }
+
+        result.set(l);
+        return result;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/389a9e33/resources/ddl/define-all-as-permanent.hive
----------------------------------------------------------------------
diff --git a/resources/ddl/define-all-as-permanent.hive 
b/resources/ddl/define-all-as-permanent.hive
index 33a15a1..f9f9fd8 100644
--- a/resources/ddl/define-all-as-permanent.hive
+++ b/resources/ddl/define-all-as-permanent.hive
@@ -477,6 +477,9 @@ CREATE FUNCTION jobid as 'hivemall.tools.mapred.JobIdUDF' 
USING JAR '${hivemall_
 DROP FUNCTION IF EXISTS rowid;
 CREATE FUNCTION rowid as 'hivemall.tools.mapred.RowIdUDF' USING JAR 
'${hivemall_jar}';
 
+DROP FUNCTION IF EXISTS rownum;
+CREATE FUNCTION rownum as 'hivemall.tools.mapred.RowNumberUDF' USING JAR 
'${hivemall_jar}';
+
 DROP FUNCTION IF EXISTS distcache_gets;
 CREATE FUNCTION distcache_gets as 
'hivemall.tools.mapred.DistributedCacheLookupUDF' USING JAR '${hivemall_jar}';
 

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/389a9e33/resources/ddl/define-all.hive
----------------------------------------------------------------------
diff --git a/resources/ddl/define-all.hive b/resources/ddl/define-all.hive
index 9cc348f..6e0f911 100644
--- a/resources/ddl/define-all.hive
+++ b/resources/ddl/define-all.hive
@@ -473,6 +473,9 @@ create temporary function jobid as 
'hivemall.tools.mapred.JobIdUDF';
 drop temporary function if exists rowid;
 create temporary function rowid as 'hivemall.tools.mapred.RowIdUDF';
 
+drop temporary function if exists rownum;
+create temporary function rownum as 'hivemall.tools.mapred.RowNumberUDF';
+
 drop temporary function if exists distcache_gets;
 create temporary function distcache_gets as 
'hivemall.tools.mapred.DistributedCacheLookupUDF';
 

Reply via email to