Merge #91: [HIVEMALL-122] Added tokenize_cn UDF
Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/ec6d945f Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/ec6d945f Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/ec6d945f Branch: refs/heads/master Commit: ec6d945fe48d64cd5175a2ad0156b8bd387c6138 Parents: 9876d06 efc3a6d Author: Takuya Kitazawa <[email protected]> Authored: Sat Jul 1 06:13:00 2017 -0700 Committer: Takuya Kitazawa <[email protected]> Committed: Sat Jul 1 06:13:00 2017 -0700 ---------------------------------------------------------------------- docs/gitbook/SUMMARY.md | 2 +- docs/gitbook/misc/tokenizer.md | 27 +++- nlp/pom.xml | 13 ++ .../java/hivemall/nlp/tokenizer/SmartcnUDF.java | 138 +++++++++++++++++++ .../hivemall/nlp/tokenizer/KuromojiUDFTest.java | 12 +- .../hivemall/nlp/tokenizer/SmartcnUDFTest.java | 85 ++++++++++++ resources/ddl/define-additional.hive | 3 + resources/ddl/define-udfs.td.hql | 1 + 8 files changed, 271 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/ec6d945f/docs/gitbook/SUMMARY.md ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/ec6d945f/resources/ddl/define-udfs.td.hql ----------------------------------------------------------------------
