[ https://issues.apache.org/jira/browse/HIVEMALL-119?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Makoto Yui updated HIVEMALL-119: -------------------------------- Description: This error rises on [this branch](https://github.com/amaya382/incubator-hivemall/tree/cross-compiling), probably on head of master too. It seems that labels aren't set properly. {code} make xgboost-native-local mvn package -Dmaven.test.skip=true docker-compose -f resources/docker/docker-compose.yml build docker-compose -f resources/docker/docker-compose.yml up -d && docker attach hivemall {code} On docker {code} bin/prepare_iris.sh hive {code} On hive {code:sql} -- add jar /opt/hivemall/target/hivemall-core-0.4.2-rc.2-with-dependencies.jar; -- source /opt/hivemall/resources/ddl/define-all.hive; add jar /opt/hivemall/target/hivemall-xgboost-0.60-0.4.2-rc.2-with-dependencies.jar; source /opt/hivemall/resources/ddl/define-additional.hive; set hivevar:f0_min=4.3; set hivevar:f0_max=7.9; set hivevar:f1_min=2.0; set hivevar:f1_max=4.4; set hivevar:f2_min=1.0; set hivevar:f2_max=6.9; set hivevar:f3_min=0.1; set hivevar:f3_max=2.5; use iris; create or replace view iris_scaled as select rowid, label, add_bias(array( concat("1:", rescale(features[0],${hivevar:f0_min},${hivevar:f0_max})), concat("2:", rescale(features[1],${hivevar:f1_min},${hivevar:f1_max})), concat("3:", rescale(features[2],${hivevar:f2_min},${hivevar:f2_max})), concat("4:", rescale(features[3],${hivevar:f3_min},${hivevar:f3_max})) )) as features from iris_raw; -- select * from iris_scaled limit 3; -- 1 Iris-setosa ["1:0.22222215","2:0.625","3:0.0677966","4:0.041666664","0:1.0"] -- 2 Iris-setosa ["1:0.16666664","2:0.41666666","3:0.0677966","4:0.041666664","0:1.0"] -- 3 Iris-setosa ["1:0.11111101","2:0.5","3:0.05084745","4:0.041666664","0:1.0"] select train_xgboost_classifier(features, case when label = 'Iris-setosa' then 1.0 else 0.0 end) from iris_scaled; -- got exception {code} {code} Failed with exception java.io.IOException:java.lang.ClassCastException: org.apache.hadoop.io.Text cannot be cast to java.lang.String [20:51:33] dmlc-core/include/dmlc/logging.h:235: [20:51:33] src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) label set cannot be empty org.apache.hadoop.hive.ql.metadata.HiveException: [20:51:33] src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) label set cannot be empty Check failed: (info.labels.size()) != (0) label set cannot be empty at hivemall.xgboost.XGBoostUDTF.close(XGBoostUDTF.java:313) at org.apache.hadoop.hive.ql.exec.UDTFOperator.closeOp(UDTFOperator.java:152) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:683) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) at org.apache.hadoop.hive.ql.exec.FetchOperator.closeOperator(FetchOperator.java:552) at org.apache.hadoop.hive.ql.exec.FetchOperator.clearFetchContext(FetchOperator.java:535) at org.apache.hadoop.hive.ql.exec.FetchTask.clearFetch(FetchTask.java:191) at org.apache.hadoop.hive.ql.Driver.releaseFetchTask(Driver.java:2233) at org.apache.hadoop.hive.ql.Driver.close(Driver.java:2278) at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:269) at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399) at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:776) at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714) at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.hadoop.util.RunJar.run(RunJar.java:221) at org.apache.hadoop.util.RunJar.main(RunJar.java:136) org.apache.hadoop.hive.ql.metadata.HiveException: [20:51:33] src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) label set cannot be empty at hivemall.xgboost.XGBoostUDTF.close(XGBoostUDTF.java:313) at org.apache.hadoop.hive.ql.exec.UDTFOperator.closeOp(UDTFOperator.java:152) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:683) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) at org.apache.hadoop.hive.ql.exec.FetchOperator.closeOperator(FetchOperator.java:552) at org.apache.hadoop.hive.ql.exec.FetchOperator.clearFetchContext(FetchOperator.java:535) at org.apache.hadoop.hive.ql.exec.FetchTask.clearFetch(FetchTask.java:191) at org.apache.hadoop.hive.ql.Driver.releaseFetchTask(Driver.java:2233) at org.apache.hadoop.hive.ql.Driver.close(Driver.java:2278) at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:269) at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399) at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:776) at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714) at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.hadoop.util.RunJar.run(RunJar.java:221) at org.apache.hadoop.util.RunJar.main(RunJar.java:136) Time taken: 3.375 seconds {code} was: This error rises on [this branch](https://github.com/amaya382/incubator-hivemall/tree/cross-compiling), probably on head of master too. It seems that labels aren't set properly. {code:bash} make xgboost-native-local mvn package -Dmaven.test.skip=true docker-compose -f resources/docker/docker-compose.yml build docker-compose -f resources/docker/docker-compose.yml up -d && docker attach hivemall {code} On docker {code:bash} bin/prepare_iris.sh hive {code} On hive {code:sql} -- add jar /opt/hivemall/target/hivemall-core-0.4.2-rc.2-with-dependencies.jar; -- source /opt/hivemall/resources/ddl/define-all.hive; add jar /opt/hivemall/target/hivemall-xgboost-0.60-0.4.2-rc.2-with-dependencies.jar; source /opt/hivemall/resources/ddl/define-additional.hive; set hivevar:f0_min=4.3; set hivevar:f0_max=7.9; set hivevar:f1_min=2.0; set hivevar:f1_max=4.4; set hivevar:f2_min=1.0; set hivevar:f2_max=6.9; set hivevar:f3_min=0.1; set hivevar:f3_max=2.5; use iris; create or replace view iris_scaled as select rowid, label, add_bias(array( concat("1:", rescale(features[0],${hivevar:f0_min},${hivevar:f0_max})), concat("2:", rescale(features[1],${hivevar:f1_min},${hivevar:f1_max})), concat("3:", rescale(features[2],${hivevar:f2_min},${hivevar:f2_max})), concat("4:", rescale(features[3],${hivevar:f3_min},${hivevar:f3_max})) )) as features from iris_raw; -- select * from iris_scaled limit 3; -- 1 Iris-setosa ["1:0.22222215","2:0.625","3:0.0677966","4:0.041666664","0:1.0"] -- 2 Iris-setosa ["1:0.16666664","2:0.41666666","3:0.0677966","4:0.041666664","0:1.0"] -- 3 Iris-setosa ["1:0.11111101","2:0.5","3:0.05084745","4:0.041666664","0:1.0"] select train_xgboost_classifier(features, case when label = 'Iris-setosa' then 1.0 else 0.0 end) from iris_scaled; -- got exception {code} {code} Failed with exception java.io.IOException:java.lang.ClassCastException: org.apache.hadoop.io.Text cannot be cast to java.lang.String [20:51:33] dmlc-core/include/dmlc/logging.h:235: [20:51:33] src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) label set cannot be empty org.apache.hadoop.hive.ql.metadata.HiveException: [20:51:33] src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) label set cannot be empty Check failed: (info.labels.size()) != (0) label set cannot be empty at hivemall.xgboost.XGBoostUDTF.close(XGBoostUDTF.java:313) at org.apache.hadoop.hive.ql.exec.UDTFOperator.closeOp(UDTFOperator.java:152) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:683) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) at org.apache.hadoop.hive.ql.exec.FetchOperator.closeOperator(FetchOperator.java:552) at org.apache.hadoop.hive.ql.exec.FetchOperator.clearFetchContext(FetchOperator.java:535) at org.apache.hadoop.hive.ql.exec.FetchTask.clearFetch(FetchTask.java:191) at org.apache.hadoop.hive.ql.Driver.releaseFetchTask(Driver.java:2233) at org.apache.hadoop.hive.ql.Driver.close(Driver.java:2278) at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:269) at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399) at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:776) at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714) at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.hadoop.util.RunJar.run(RunJar.java:221) at org.apache.hadoop.util.RunJar.main(RunJar.java:136) org.apache.hadoop.hive.ql.metadata.HiveException: [20:51:33] src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) label set cannot be empty at hivemall.xgboost.XGBoostUDTF.close(XGBoostUDTF.java:313) at org.apache.hadoop.hive.ql.exec.UDTFOperator.closeOp(UDTFOperator.java:152) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:683) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) at org.apache.hadoop.hive.ql.exec.FetchOperator.closeOperator(FetchOperator.java:552) at org.apache.hadoop.hive.ql.exec.FetchOperator.clearFetchContext(FetchOperator.java:535) at org.apache.hadoop.hive.ql.exec.FetchTask.clearFetch(FetchTask.java:191) at org.apache.hadoop.hive.ql.Driver.releaseFetchTask(Driver.java:2233) at org.apache.hadoop.hive.ql.Driver.close(Driver.java:2278) at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:269) at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399) at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:776) at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714) at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.hadoop.util.RunJar.run(RunJar.java:221) at org.apache.hadoop.util.RunJar.main(RunJar.java:136) Time taken: 3.375 seconds {code} > Fail to use xgboost on Hive > --------------------------- > > Key: HIVEMALL-119 > URL: https://issues.apache.org/jira/browse/HIVEMALL-119 > Project: Hivemall > Issue Type: Bug > Environment: Head of > https://github.com/amaya382/incubator-hivemall/tree/cross-compiling > On docker, xgboost native built on local (Linux) > Reporter: ITO Ryuichi > Assignee: Takeshi Yamamuro > Labels: xgboost > > This error rises on [this > branch](https://github.com/amaya382/incubator-hivemall/tree/cross-compiling), > probably on head of master too. > It seems that labels aren't set properly. > {code} > make xgboost-native-local > mvn package -Dmaven.test.skip=true > docker-compose -f resources/docker/docker-compose.yml build > docker-compose -f resources/docker/docker-compose.yml up -d && docker attach > hivemall > {code} > On docker > {code} > bin/prepare_iris.sh > hive > {code} > On hive > {code:sql} > -- add jar > /opt/hivemall/target/hivemall-core-0.4.2-rc.2-with-dependencies.jar; > -- source /opt/hivemall/resources/ddl/define-all.hive; > add jar > /opt/hivemall/target/hivemall-xgboost-0.60-0.4.2-rc.2-with-dependencies.jar; > source /opt/hivemall/resources/ddl/define-additional.hive; > set hivevar:f0_min=4.3; > set hivevar:f0_max=7.9; > set hivevar:f1_min=2.0; > set hivevar:f1_max=4.4; > set hivevar:f2_min=1.0; > set hivevar:f2_max=6.9; > set hivevar:f3_min=0.1; > set hivevar:f3_max=2.5; > use iris; > create or replace view iris_scaled > as > select > rowid, > label, > add_bias(array( > concat("1:", rescale(features[0],${hivevar:f0_min},${hivevar:f0_max})), > concat("2:", rescale(features[1],${hivevar:f1_min},${hivevar:f1_max})), > concat("3:", rescale(features[2],${hivevar:f2_min},${hivevar:f2_max})), > concat("4:", rescale(features[3],${hivevar:f3_min},${hivevar:f3_max})) > )) as features > from > iris_raw; > -- select * from iris_scaled limit 3; > -- 1 Iris-setosa > ["1:0.22222215","2:0.625","3:0.0677966","4:0.041666664","0:1.0"] > -- 2 Iris-setosa > ["1:0.16666664","2:0.41666666","3:0.0677966","4:0.041666664","0:1.0"] > -- 3 Iris-setosa > ["1:0.11111101","2:0.5","3:0.05084745","4:0.041666664","0:1.0"] > select train_xgboost_classifier(features, case when label = 'Iris-setosa' > then 1.0 else 0.0 end) from iris_scaled; -- got exception > {code} > {code} > Failed with exception java.io.IOException:java.lang.ClassCastException: > org.apache.hadoop.io.Text cannot be cast to java.lang.String > [20:51:33] dmlc-core/include/dmlc/logging.h:235: [20:51:33] > src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) > label set cannot be empty > org.apache.hadoop.hive.ql.metadata.HiveException: [20:51:33] > src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) > label set cannot be empty > Check failed: (info.labels.size()) != (0) label set cannot be empty > at hivemall.xgboost.XGBoostUDTF.close(XGBoostUDTF.java:313) > at > org.apache.hadoop.hive.ql.exec.UDTFOperator.closeOp(UDTFOperator.java:152) > at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:683) > at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) > at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.closeOperator(FetchOperator.java:552) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.clearFetchContext(FetchOperator.java:535) > at > org.apache.hadoop.hive.ql.exec.FetchTask.clearFetch(FetchTask.java:191) > at org.apache.hadoop.hive.ql.Driver.releaseFetchTask(Driver.java:2233) > at org.apache.hadoop.hive.ql.Driver.close(Driver.java:2278) > at > org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:269) > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183) > at > org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399) > at > org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:776) > at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714) > at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at org.apache.hadoop.util.RunJar.run(RunJar.java:221) > at org.apache.hadoop.util.RunJar.main(RunJar.java:136) > org.apache.hadoop.hive.ql.metadata.HiveException: [20:51:33] > src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) > label set cannot be empty > at hivemall.xgboost.XGBoostUDTF.close(XGBoostUDTF.java:313) > at > org.apache.hadoop.hive.ql.exec.UDTFOperator.closeOp(UDTFOperator.java:152) > at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:683) > at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) > at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.closeOperator(FetchOperator.java:552) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.clearFetchContext(FetchOperator.java:535) > at > org.apache.hadoop.hive.ql.exec.FetchTask.clearFetch(FetchTask.java:191) > at org.apache.hadoop.hive.ql.Driver.releaseFetchTask(Driver.java:2233) > at org.apache.hadoop.hive.ql.Driver.close(Driver.java:2278) > at > org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:269) > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183) > at > org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399) > at > org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:776) > at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714) > at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at org.apache.hadoop.util.RunJar.run(RunJar.java:221) > at org.apache.hadoop.util.RunJar.main(RunJar.java:136) > Time taken: 3.375 seconds > {code} -- This message was sent by Atlassian JIRA (v6.4.14#64029)