http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/regression/e2006_arow.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/regression/e2006_arow.md b/docs/gitbook/regression/e2006_arow.md index a3b60eb..a02dfa8 100644 --- a/docs/gitbook/regression/e2006_arow.md +++ b/docs/gitbook/regression/e2006_arow.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression.html#E2006-tfidf ---
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/regression/e2006_dataset.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/regression/e2006_dataset.md b/docs/gitbook/regression/e2006_dataset.md index 329fb39..001eda2 100644 --- a/docs/gitbook/regression/e2006_dataset.md +++ b/docs/gitbook/regression/e2006_dataset.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression.html#E2006-tfidf Prerequisite http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/regression/kddcup12tr2.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/regression/kddcup12tr2.md b/docs/gitbook/regression/kddcup12tr2.md index e69de29..2959148 100644 --- a/docs/gitbook/regression/kddcup12tr2.md +++ b/docs/gitbook/regression/kddcup12tr2.md @@ -0,0 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/regression/kddcup12tr2_adagrad.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/regression/kddcup12tr2_adagrad.md b/docs/gitbook/regression/kddcup12tr2_adagrad.md index e6c8eb4..f6c7675 100644 --- a/docs/gitbook/regression/kddcup12tr2_adagrad.md +++ b/docs/gitbook/regression/kddcup12tr2_adagrad.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + _Note adagrad/adadelta is supported from hivemall v0.3b2 or later (or in the master branch)._ # Preparation http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/regression/kddcup12tr2_dataset.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/regression/kddcup12tr2_dataset.md b/docs/gitbook/regression/kddcup12tr2_dataset.md index 8713e99..15bfbfd 100644 --- a/docs/gitbook/regression/kddcup12tr2_dataset.md +++ b/docs/gitbook/regression/kddcup12tr2_dataset.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + The task is predicting the click through rate (CTR) of advertisement, meaning that we are to predict the probability of each ad being clicked. http://www.kddcup2012.org/c/kddcup2012-track2 http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/regression/kddcup12tr2_lr.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/regression/kddcup12tr2_lr.md b/docs/gitbook/regression/kddcup12tr2_lr.md index 0ff9b97..6db07ab 100644 --- a/docs/gitbook/regression/kddcup12tr2_lr.md +++ b/docs/gitbook/regression/kddcup12tr2_lr.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + The task is predicting the click through rate (CTR) of advertisement, meaning that we are to predict the probability of each ad being clicked. http://www.kddcup2012.org/c/kddcup2012-track2 http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/regression/kddcup12tr2_lr_amplify.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/regression/kddcup12tr2_lr_amplify.md b/docs/gitbook/regression/kddcup12tr2_lr_amplify.md index 4df124e..55b8caf 100644 --- a/docs/gitbook/regression/kddcup12tr2_lr_amplify.md +++ b/docs/gitbook/regression/kddcup12tr2_lr_amplify.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + This article explains *amplify* technique that is useful for improving prediction score. Iterations are mandatory in machine learning (e.g., in [stochastic gradient descent](http://en.wikipedia.org/wiki/Stochastic_gradient_descent)) to get good prediction models. However, MapReduce is known to be not suited for iterative algorithms because IN/OUT of each MapReduce job is through HDFS. http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/tips/README.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/tips/README.md b/docs/gitbook/tips/README.md index e69de29..2959148 100644 --- a/docs/gitbook/tips/README.md +++ b/docs/gitbook/tips/README.md @@ -0,0 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/tips/addbias.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/tips/addbias.md b/docs/gitbook/tips/addbias.md index 2b11d51..dfa4bfc 100644 --- a/docs/gitbook/tips/addbias.md +++ b/docs/gitbook/tips/addbias.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + A trainer learns the function f(x)=y, or weights _W_, of the following form to predict a label y where x is a feature vector. _y=f(x)=Wx_ http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/tips/emr.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/tips/emr.md b/docs/gitbook/tips/emr.md index 4521635..030a594 100644 --- a/docs/gitbook/tips/emr.md +++ b/docs/gitbook/tips/emr.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + ## Prerequisite Learn how to use Hive with Elastic MapReduce (EMR). http://docs.aws.amazon.com/ElasticMapReduce/latest/DeveloperGuide/emr-hive.html http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/tips/ensemble_learning.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/tips/ensemble_learning.md b/docs/gitbook/tips/ensemble_learning.md index 6daaf1a..9288f84 100644 --- a/docs/gitbook/tips/ensemble_learning.md +++ b/docs/gitbook/tips/ensemble_learning.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + This example explains how to run ensemble learning in Hivemall. Two heads are better than one? Let's verify it by ensemble learning. http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/tips/general_tips.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/tips/general_tips.md b/docs/gitbook/tips/general_tips.md index e69de29..2959148 100644 --- a/docs/gitbook/tips/general_tips.md +++ b/docs/gitbook/tips/general_tips.md @@ -0,0 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/tips/hadoop_tuning.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/tips/hadoop_tuning.md b/docs/gitbook/tips/hadoop_tuning.md index a6c1854..7125508 100644 --- a/docs/gitbook/tips/hadoop_tuning.md +++ b/docs/gitbook/tips/hadoop_tuning.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + # Prerequisites Please refer the following guides for Hadoop tuning: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/tips/mixserver.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/tips/mixserver.md b/docs/gitbook/tips/mixserver.md index 631557c..bd58279 100644 --- a/docs/gitbook/tips/mixserver.md +++ b/docs/gitbook/tips/mixserver.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + In this page, we will explain how to use model mixing on Hivemall. The model mixing is useful for a better prediction performance and faster convergence in training classifiers. <!-- http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/tips/rand_amplify.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/tips/rand_amplify.md b/docs/gitbook/tips/rand_amplify.md index 4df124e..55b8caf 100644 --- a/docs/gitbook/tips/rand_amplify.md +++ b/docs/gitbook/tips/rand_amplify.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + This article explains *amplify* technique that is useful for improving prediction score. Iterations are mandatory in machine learning (e.g., in [stochastic gradient descent](http://en.wikipedia.org/wiki/Stochastic_gradient_descent)) to get good prediction models. However, MapReduce is known to be not suited for iterative algorithms because IN/OUT of each MapReduce job is through HDFS. http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/tips/rowid.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/tips/rowid.md b/docs/gitbook/tips/rowid.md index c43aa74..2b24401 100644 --- a/docs/gitbook/tips/rowid.md +++ b/docs/gitbook/tips/rowid.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + ```sql CREATE TABLE xxx AS http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/tips/rt_prediction.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/tips/rt_prediction.md b/docs/gitbook/tips/rt_prediction.md index 3ac4fb6..c342378 100644 --- a/docs/gitbook/tips/rt_prediction.md +++ b/docs/gitbook/tips/rt_prediction.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + Hivemall provides a batch learning scheme that builds prediction models on Apache Hive. The learning process itself is a batch process; however, an online/real-time prediction can be achieved by carrying a prediction on a transactional relational DBMS. http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/troubleshooting/README.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/troubleshooting/README.md b/docs/gitbook/troubleshooting/README.md index e69de29..2959148 100644 --- a/docs/gitbook/troubleshooting/README.md +++ b/docs/gitbook/troubleshooting/README.md @@ -0,0 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/troubleshooting/asterisk.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/troubleshooting/asterisk.md b/docs/gitbook/troubleshooting/asterisk.md index 49e2f71..621ab3f 100644 --- a/docs/gitbook/troubleshooting/asterisk.md +++ b/docs/gitbook/troubleshooting/asterisk.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + See [HIVE-4181](https://issues.apache.org/jira/browse/HIVE-4181) that asterisk argument without table alias for UDTF is not working. It has been fixed as part of Hive v0.12 release. A possible workaround is to use asterisk with a table alias, or to specify names of arguments explicitly. \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/troubleshooting/mapjoin_classcastex.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/troubleshooting/mapjoin_classcastex.md b/docs/gitbook/troubleshooting/mapjoin_classcastex.md index c48919a..28e7709 100644 --- a/docs/gitbook/troubleshooting/mapjoin_classcastex.md +++ b/docs/gitbook/troubleshooting/mapjoin_classcastex.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + Map-side join on Tez causes [ClassCastException](http://markmail.org/message/7cwbgupnhah6ggkv) when a serialized table contains array column(s). [Workaround] Try setting _hive.mapjoin.optimized.hashtable_ off as follows: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/troubleshooting/mapjoin_task_error.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/troubleshooting/mapjoin_task_error.md b/docs/gitbook/troubleshooting/mapjoin_task_error.md index 02aff2f..78b4e32 100644 --- a/docs/gitbook/troubleshooting/mapjoin_task_error.md +++ b/docs/gitbook/troubleshooting/mapjoin_task_error.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + From Hive 0.11.0, **hive.auto.convert.join** is [enabled by the default](https://issues.apache.org/jira/browse/HIVE-3297). When using complex queries using views, the auto conversion sometimes throws SemanticException, cannot serialize object. http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/troubleshooting/num_mappers.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/troubleshooting/num_mappers.md b/docs/gitbook/troubleshooting/num_mappers.md index be01f2a..c1820db 100644 --- a/docs/gitbook/troubleshooting/num_mappers.md +++ b/docs/gitbook/troubleshooting/num_mappers.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + The default _hive.input.format_ is set to _org.apache.hadoop.hive.ql.io.CombineHiveInputFormat_. This configuration could give less number of mappers than the split size (i.e., # blocks in HDFS) of the input table. http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/troubleshooting/oom.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/troubleshooting/oom.md b/docs/gitbook/troubleshooting/oom.md index 643d09a..50bee25 100644 --- a/docs/gitbook/troubleshooting/oom.md +++ b/docs/gitbook/troubleshooting/oom.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + # OOM in mappers In a certain setting, the default input split size is too large for Hivemall. Due to that, OutOfMemoryError cloud happen on mappers in the middle of training. http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index f36a36c..6616f47 100644 --- a/pom.xml +++ b/pom.xml @@ -391,17 +391,22 @@ <currentYear>${build.year}</currentYear> <copyrightOwner>${project.organization.name}</copyrightOwner> </properties> + <useDefaultExcludes>false</useDefaultExcludes> <includes> <include>src/main/**/*.java</include> <include>src/test/**/*.java</include> <include>src/main/**/*.scala</include> <include>src/test/**/*.scala</include> <include>bin/*.sh</include> + <include>docs/gitbook/**/*.md</include> </includes> <encoding>UTF-8</encoding> <headerDefinitions> <headerDefinition>${main.basedir}/resources/header-definition.xml</headerDefinition> </headerDefinitions> + <mapping> + <md>XML_STYLE</md> + </mapping> </configuration> <executions> <execution> http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/resources/header-definition.xml ---------------------------------------------------------------------- diff --git a/resources/header-definition.xml b/resources/header-definition.xml index 057ad52..232b1d7 100644 --- a/resources/header-definition.xml +++ b/resources/header-definition.xml @@ -11,4 +11,16 @@ <isMultiline>true</isMultiline> <padLines>false</padLines> </javadoc_style> + <xml_style> + <firstLine><![CDATA[<!--]]></firstLine> + <beforeEachLine> </beforeEachLine> + <endLine><![CDATA[--> + ]]></endLine> + <skipLine><![CDATA[^<\?xml.*>$]]></skipLine> + <firstLineDetectionPattern><![CDATA[(\s|\t)*<!--.*$]]></firstLineDetectionPattern> + <lastLineDetectionPattern><![CDATA[.*-->(\s|\t)*$]]></lastLineDetectionPattern> + <allowBlankLines>false</allowBlankLines> + <isMultiline>true</isMultiline> + <padLines>true</padLines> + </xml_style> </additionalHeaders>
