[ 
https://issues.apache.org/jira/browse/HIVEMALL-119?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Makoto Yui updated HIVEMALL-119:
--------------------------------
    Description: 
This error rises on [this 
branch](https://github.com/amaya382/incubator-hivemall/tree/cross-compiling), 
probably on head of master too.
It seems that labels aren't set properly.

{code}
make xgboost-native-local
mvn package -Dmaven.test.skip=true
docker-compose -f resources/docker/docker-compose.yml build
docker-compose -f resources/docker/docker-compose.yml up -d && docker attach 
hivemall
{code}

On docker
{code}
bin/prepare_iris.sh
hive
{code}

On hive
{code:sql}
-- add jar /opt/hivemall/target/hivemall-core-0.4.2-rc.2-with-dependencies.jar;
-- source /opt/hivemall/resources/ddl/define-all.hive;
add jar 
/opt/hivemall/target/hivemall-xgboost-0.60-0.4.2-rc.2-with-dependencies.jar;
source /opt/hivemall/resources/ddl/define-additional.hive;

set hivevar:f0_min=4.3;
set hivevar:f0_max=7.9;
set hivevar:f1_min=2.0;
set hivevar:f1_max=4.4;
set hivevar:f2_min=1.0;
set hivevar:f2_max=6.9;
set hivevar:f3_min=0.1;
set hivevar:f3_max=2.5;

use iris;
create or replace view iris_scaled
as
select
  rowid, 
  label,
  add_bias(array(
     concat("1:", rescale(features[0],${hivevar:f0_min},${hivevar:f0_max})), 
     concat("2:", rescale(features[1],${hivevar:f1_min},${hivevar:f1_max})), 
     concat("3:", rescale(features[2],${hivevar:f2_min},${hivevar:f2_max})), 
     concat("4:", rescale(features[3],${hivevar:f3_min},${hivevar:f3_max}))
  )) as features
from 
  iris_raw;

-- select * from iris_scaled limit 3;
-- 1       Iris-setosa     
["1:0.22222215","2:0.625","3:0.0677966","4:0.041666664","0:1.0"]
-- 2       Iris-setosa     
["1:0.16666664","2:0.41666666","3:0.0677966","4:0.041666664","0:1.0"]
-- 3       Iris-setosa     
["1:0.11111101","2:0.5","3:0.05084745","4:0.041666664","0:1.0"]

select train_xgboost_classifier(features, case when label = 'Iris-setosa' then 
1.0 else 0.0 end) from iris_scaled; -- got exception
{code}

{code}
Failed with exception java.io.IOException:java.lang.ClassCastException: 
org.apache.hadoop.io.Text cannot be cast to java.lang.String
[20:51:33] dmlc-core/include/dmlc/logging.h:235: [20:51:33] 
src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) 
label set cannot be empty
org.apache.hadoop.hive.ql.metadata.HiveException: [20:51:33] 
src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) 
label set cannot be empty
Check failed: (info.labels.size()) != (0) label set cannot be empty
        at hivemall.xgboost.XGBoostUDTF.close(XGBoostUDTF.java:313)
        at 
org.apache.hadoop.hive.ql.exec.UDTFOperator.closeOp(UDTFOperator.java:152)
        at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:683)
        at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697)
        at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697)
        at 
org.apache.hadoop.hive.ql.exec.FetchOperator.closeOperator(FetchOperator.java:552)
        at 
org.apache.hadoop.hive.ql.exec.FetchOperator.clearFetchContext(FetchOperator.java:535)
        at 
org.apache.hadoop.hive.ql.exec.FetchTask.clearFetch(FetchTask.java:191)
        at org.apache.hadoop.hive.ql.Driver.releaseFetchTask(Driver.java:2233)
        at org.apache.hadoop.hive.ql.Driver.close(Driver.java:2278)
        at 
org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:269)
        at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183)
        at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399)
        at 
org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:776)
        at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714)
        at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:606)
        at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
        at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
org.apache.hadoop.hive.ql.metadata.HiveException: [20:51:33] 
src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) 
label set cannot be empty
        at hivemall.xgboost.XGBoostUDTF.close(XGBoostUDTF.java:313)
        at 
org.apache.hadoop.hive.ql.exec.UDTFOperator.closeOp(UDTFOperator.java:152)
        at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:683)
        at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697)
        at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697)
        at 
org.apache.hadoop.hive.ql.exec.FetchOperator.closeOperator(FetchOperator.java:552)
        at 
org.apache.hadoop.hive.ql.exec.FetchOperator.clearFetchContext(FetchOperator.java:535)
        at 
org.apache.hadoop.hive.ql.exec.FetchTask.clearFetch(FetchTask.java:191)
        at org.apache.hadoop.hive.ql.Driver.releaseFetchTask(Driver.java:2233)
        at org.apache.hadoop.hive.ql.Driver.close(Driver.java:2278)
        at 
org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:269)
        at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183)
        at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399)
        at 
org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:776)
        at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714)
        at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:606)
        at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
        at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
Time taken: 3.375 seconds
{code}

  was:
This error rises on [this 
branch](https://github.com/amaya382/incubator-hivemall/tree/cross-compiling), 
probably on head of master too.
It seems that labels aren't set properly.

{code:bash}
make xgboost-native-local
mvn package -Dmaven.test.skip=true
docker-compose -f resources/docker/docker-compose.yml build
docker-compose -f resources/docker/docker-compose.yml up -d && docker attach 
hivemall
{code}

On docker
{code:bash}
bin/prepare_iris.sh
hive
{code}

On hive
{code:sql}
-- add jar /opt/hivemall/target/hivemall-core-0.4.2-rc.2-with-dependencies.jar;
-- source /opt/hivemall/resources/ddl/define-all.hive;
add jar 
/opt/hivemall/target/hivemall-xgboost-0.60-0.4.2-rc.2-with-dependencies.jar;
source /opt/hivemall/resources/ddl/define-additional.hive;

set hivevar:f0_min=4.3;
set hivevar:f0_max=7.9;
set hivevar:f1_min=2.0;
set hivevar:f1_max=4.4;
set hivevar:f2_min=1.0;
set hivevar:f2_max=6.9;
set hivevar:f3_min=0.1;
set hivevar:f3_max=2.5;

use iris;
create or replace view iris_scaled
as
select
  rowid, 
  label,
  add_bias(array(
     concat("1:", rescale(features[0],${hivevar:f0_min},${hivevar:f0_max})), 
     concat("2:", rescale(features[1],${hivevar:f1_min},${hivevar:f1_max})), 
     concat("3:", rescale(features[2],${hivevar:f2_min},${hivevar:f2_max})), 
     concat("4:", rescale(features[3],${hivevar:f3_min},${hivevar:f3_max}))
  )) as features
from 
  iris_raw;

-- select * from iris_scaled limit 3;
-- 1       Iris-setosa     
["1:0.22222215","2:0.625","3:0.0677966","4:0.041666664","0:1.0"]
-- 2       Iris-setosa     
["1:0.16666664","2:0.41666666","3:0.0677966","4:0.041666664","0:1.0"]
-- 3       Iris-setosa     
["1:0.11111101","2:0.5","3:0.05084745","4:0.041666664","0:1.0"]

select train_xgboost_classifier(features, case when label = 'Iris-setosa' then 
1.0 else 0.0 end) from iris_scaled; -- got exception
{code}

{code}
Failed with exception java.io.IOException:java.lang.ClassCastException: 
org.apache.hadoop.io.Text cannot be cast to java.lang.String
[20:51:33] dmlc-core/include/dmlc/logging.h:235: [20:51:33] 
src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) 
label set cannot be empty
org.apache.hadoop.hive.ql.metadata.HiveException: [20:51:33] 
src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) 
label set cannot be empty
Check failed: (info.labels.size()) != (0) label set cannot be empty
        at hivemall.xgboost.XGBoostUDTF.close(XGBoostUDTF.java:313)
        at 
org.apache.hadoop.hive.ql.exec.UDTFOperator.closeOp(UDTFOperator.java:152)
        at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:683)
        at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697)
        at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697)
        at 
org.apache.hadoop.hive.ql.exec.FetchOperator.closeOperator(FetchOperator.java:552)
        at 
org.apache.hadoop.hive.ql.exec.FetchOperator.clearFetchContext(FetchOperator.java:535)
        at 
org.apache.hadoop.hive.ql.exec.FetchTask.clearFetch(FetchTask.java:191)
        at org.apache.hadoop.hive.ql.Driver.releaseFetchTask(Driver.java:2233)
        at org.apache.hadoop.hive.ql.Driver.close(Driver.java:2278)
        at 
org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:269)
        at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183)
        at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399)
        at 
org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:776)
        at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714)
        at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:606)
        at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
        at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
org.apache.hadoop.hive.ql.metadata.HiveException: [20:51:33] 
src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) 
label set cannot be empty
        at hivemall.xgboost.XGBoostUDTF.close(XGBoostUDTF.java:313)
        at 
org.apache.hadoop.hive.ql.exec.UDTFOperator.closeOp(UDTFOperator.java:152)
        at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:683)
        at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697)
        at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697)
        at 
org.apache.hadoop.hive.ql.exec.FetchOperator.closeOperator(FetchOperator.java:552)
        at 
org.apache.hadoop.hive.ql.exec.FetchOperator.clearFetchContext(FetchOperator.java:535)
        at 
org.apache.hadoop.hive.ql.exec.FetchTask.clearFetch(FetchTask.java:191)
        at org.apache.hadoop.hive.ql.Driver.releaseFetchTask(Driver.java:2233)
        at org.apache.hadoop.hive.ql.Driver.close(Driver.java:2278)
        at 
org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:269)
        at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183)
        at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399)
        at 
org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:776)
        at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714)
        at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:606)
        at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
        at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
Time taken: 3.375 seconds
{code}


> Fail to use xgboost on Hive
> ---------------------------
>
>                 Key: HIVEMALL-119
>                 URL: https://issues.apache.org/jira/browse/HIVEMALL-119
>             Project: Hivemall
>          Issue Type: Bug
>         Environment: Head of 
> https://github.com/amaya382/incubator-hivemall/tree/cross-compiling
> On docker, xgboost native built on local (Linux)
>            Reporter: ITO Ryuichi
>            Assignee: Takeshi Yamamuro
>              Labels: xgboost
>
> This error rises on [this 
> branch](https://github.com/amaya382/incubator-hivemall/tree/cross-compiling), 
> probably on head of master too.
> It seems that labels aren't set properly.
> {code}
> make xgboost-native-local
> mvn package -Dmaven.test.skip=true
> docker-compose -f resources/docker/docker-compose.yml build
> docker-compose -f resources/docker/docker-compose.yml up -d && docker attach 
> hivemall
> {code}
> On docker
> {code}
> bin/prepare_iris.sh
> hive
> {code}
> On hive
> {code:sql}
> -- add jar 
> /opt/hivemall/target/hivemall-core-0.4.2-rc.2-with-dependencies.jar;
> -- source /opt/hivemall/resources/ddl/define-all.hive;
> add jar 
> /opt/hivemall/target/hivemall-xgboost-0.60-0.4.2-rc.2-with-dependencies.jar;
> source /opt/hivemall/resources/ddl/define-additional.hive;
> set hivevar:f0_min=4.3;
> set hivevar:f0_max=7.9;
> set hivevar:f1_min=2.0;
> set hivevar:f1_max=4.4;
> set hivevar:f2_min=1.0;
> set hivevar:f2_max=6.9;
> set hivevar:f3_min=0.1;
> set hivevar:f3_max=2.5;
> use iris;
> create or replace view iris_scaled
> as
> select
>   rowid, 
>   label,
>   add_bias(array(
>      concat("1:", rescale(features[0],${hivevar:f0_min},${hivevar:f0_max})), 
>      concat("2:", rescale(features[1],${hivevar:f1_min},${hivevar:f1_max})), 
>      concat("3:", rescale(features[2],${hivevar:f2_min},${hivevar:f2_max})), 
>      concat("4:", rescale(features[3],${hivevar:f3_min},${hivevar:f3_max}))
>   )) as features
> from 
>   iris_raw;
> -- select * from iris_scaled limit 3;
> -- 1       Iris-setosa     
> ["1:0.22222215","2:0.625","3:0.0677966","4:0.041666664","0:1.0"]
> -- 2       Iris-setosa     
> ["1:0.16666664","2:0.41666666","3:0.0677966","4:0.041666664","0:1.0"]
> -- 3       Iris-setosa     
> ["1:0.11111101","2:0.5","3:0.05084745","4:0.041666664","0:1.0"]
> select train_xgboost_classifier(features, case when label = 'Iris-setosa' 
> then 1.0 else 0.0 end) from iris_scaled; -- got exception
> {code}
> {code}
> Failed with exception java.io.IOException:java.lang.ClassCastException: 
> org.apache.hadoop.io.Text cannot be cast to java.lang.String
> [20:51:33] dmlc-core/include/dmlc/logging.h:235: [20:51:33] 
> src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) 
> label set cannot be empty
> org.apache.hadoop.hive.ql.metadata.HiveException: [20:51:33] 
> src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) 
> label set cannot be empty
> Check failed: (info.labels.size()) != (0) label set cannot be empty
>         at hivemall.xgboost.XGBoostUDTF.close(XGBoostUDTF.java:313)
>         at 
> org.apache.hadoop.hive.ql.exec.UDTFOperator.closeOp(UDTFOperator.java:152)
>         at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:683)
>         at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697)
>         at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697)
>         at 
> org.apache.hadoop.hive.ql.exec.FetchOperator.closeOperator(FetchOperator.java:552)
>         at 
> org.apache.hadoop.hive.ql.exec.FetchOperator.clearFetchContext(FetchOperator.java:535)
>         at 
> org.apache.hadoop.hive.ql.exec.FetchTask.clearFetch(FetchTask.java:191)
>         at org.apache.hadoop.hive.ql.Driver.releaseFetchTask(Driver.java:2233)
>         at org.apache.hadoop.hive.ql.Driver.close(Driver.java:2278)
>         at 
> org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:269)
>         at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183)
>         at 
> org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399)
>         at 
> org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:776)
>         at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714)
>         at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641)
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at 
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>         at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>         at java.lang.reflect.Method.invoke(Method.java:606)
>         at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
>         at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
> org.apache.hadoop.hive.ql.metadata.HiveException: [20:51:33] 
> src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) 
> label set cannot be empty
>         at hivemall.xgboost.XGBoostUDTF.close(XGBoostUDTF.java:313)
>         at 
> org.apache.hadoop.hive.ql.exec.UDTFOperator.closeOp(UDTFOperator.java:152)
>         at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:683)
>         at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697)
>         at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697)
>         at 
> org.apache.hadoop.hive.ql.exec.FetchOperator.closeOperator(FetchOperator.java:552)
>         at 
> org.apache.hadoop.hive.ql.exec.FetchOperator.clearFetchContext(FetchOperator.java:535)
>         at 
> org.apache.hadoop.hive.ql.exec.FetchTask.clearFetch(FetchTask.java:191)
>         at org.apache.hadoop.hive.ql.Driver.releaseFetchTask(Driver.java:2233)
>         at org.apache.hadoop.hive.ql.Driver.close(Driver.java:2278)
>         at 
> org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:269)
>         at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183)
>         at 
> org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399)
>         at 
> org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:776)
>         at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714)
>         at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641)
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at 
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>         at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>         at java.lang.reflect.Method.invoke(Method.java:606)
>         at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
>         at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
> Time taken: 3.375 seconds
> {code}



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Reply via email to