ITO Ryuichi created HIVEMALL-119: ------------------------------------ Summary: Fail to use xgboost on Hive Key: HIVEMALL-119 URL: https://issues.apache.org/jira/browse/HIVEMALL-119 Project: Hivemall Issue Type: Bug Environment: Head of https://github.com/amaya382/incubator-hivemall/tree/cross-compiling
On docker, xgboost native built on local (Linux) Reporter: ITO Ryuichi Although this error raises on [this branch](https://github.com/amaya382/incubator-hivemall/tree/cross-compiling), probably on head of master too. It seems that labels aren't set properly. ```sh make xgboost-native-local mvn package -Dmaven.test.skip=true docker-compose -f resources/docker/docker-compose.yml build docker-compose -f resources/docker/docker-compose.yml up -d && docker attach hivemall ``` On docker ```sh bin/prepare_iris.sh hive ``` On hive ```sql -- add jar /opt/hivemall/target/hivemall-core-0.4.2-rc.2-with-dependencies.jar; -- source /opt/hivemall/resources/ddl/define-all.hive; add jar /opt/hivemall/target/hivemall-xgboost-0.60-0.4.2-rc.2-with-dependencies.jar; source /opt/hivemall/resources/ddl/define-additional.hive; set hivevar:f0_min=4.3; set hivevar:f0_max=7.9; set hivevar:f1_min=2.0; set hivevar:f1_max=4.4; set hivevar:f2_min=1.0; set hivevar:f2_max=6.9; set hivevar:f3_min=0.1; set hivevar:f3_max=2.5; use iris; create or replace view iris_scaled as select rowid, label, add_bias(array( concat("1:", rescale(features[0],${hivevar:f0_min},${hivevar:f0_max})), concat("2:", rescale(features[1],${hivevar:f1_min},${hivevar:f1_max})), concat("3:", rescale(features[2],${hivevar:f2_min},${hivevar:f2_max})), concat("4:", rescale(features[3],${hivevar:f3_min},${hivevar:f3_max})) )) as features from iris_raw; -- select * from iris_scaled limit 3; -- 1 Iris-setosa ["1:0.22222215","2:0.625","3:0.0677966","4:0.041666664","0:1.0"] -- 2 Iris-setosa ["1:0.16666664","2:0.41666666","3:0.0677966","4:0.041666664","0:1.0"] -- 3 Iris-setosa ["1:0.11111101","2:0.5","3:0.05084745","4:0.041666664","0:1.0"] select train_xgboost_classifier(features, case when label = 'Iris-setosa' then 1.0 else 0.0 end) from iris_scaled; -- got exception ``` ``` Failed with exception java.io.IOException:java.lang.ClassCastException: org.apache.hadoop.io.Text cannot be cast to java.lang.String [20:51:33] dmlc-core/include/dmlc/logging.h:235: [20:51:33] src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) label set cannot be empty org.apache.hadoop.hive.ql.metadata.HiveException: [20:51:33] src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) label set cannot be empty Check failed: (info.labels.size()) != (0) label set cannot be empty at hivemall.xgboost.XGBoostUDTF.close(XGBoostUDTF.java:313) at org.apache.hadoop.hive.ql.exec.UDTFOperator.closeOp(UDTFOperator.java:152) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:683) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) at org.apache.hadoop.hive.ql.exec.FetchOperator.closeOperator(FetchOperator.java:552) at org.apache.hadoop.hive.ql.exec.FetchOperator.clearFetchContext(FetchOperator.java:535) at org.apache.hadoop.hive.ql.exec.FetchTask.clearFetch(FetchTask.java:191) at org.apache.hadoop.hive.ql.Driver.releaseFetchTask(Driver.java:2233) at org.apache.hadoop.hive.ql.Driver.close(Driver.java:2278) at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:269) at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399) at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:776) at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714) at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.hadoop.util.RunJar.run(RunJar.java:221) at org.apache.hadoop.util.RunJar.main(RunJar.java:136) org.apache.hadoop.hive.ql.metadata.HiveException: [20:51:33] src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) label set cannot be empty at hivemall.xgboost.XGBoostUDTF.close(XGBoostUDTF.java:313) at org.apache.hadoop.hive.ql.exec.UDTFOperator.closeOp(UDTFOperator.java:152) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:683) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) at org.apache.hadoop.hive.ql.exec.FetchOperator.closeOperator(FetchOperator.java:552) at org.apache.hadoop.hive.ql.exec.FetchOperator.clearFetchContext(FetchOperator.java:535) at org.apache.hadoop.hive.ql.exec.FetchTask.clearFetch(FetchTask.java:191) at org.apache.hadoop.hive.ql.Driver.releaseFetchTask(Driver.java:2233) at org.apache.hadoop.hive.ql.Driver.close(Driver.java:2278) at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:269) at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399) at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:776) at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714) at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.hadoop.util.RunJar.run(RunJar.java:221) at org.apache.hadoop.util.RunJar.main(RunJar.java:136) Time taken: 3.375 seconds ``` -- This message was sent by Atlassian JIRA (v6.4.14#64029)