Repository: incubator-hivemall
Updated Branches:
  refs/heads/master 7ea6bfd52 -> 494960324


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/49496032/tools/hivemall-docs/src/main/java/hivemall/docs/FuncsListGeneratorMojo.java
----------------------------------------------------------------------
diff --git 
a/tools/hivemall-docs/src/main/java/hivemall/docs/FuncsListGeneratorMojo.java 
b/tools/hivemall-docs/src/main/java/hivemall/docs/FuncsListGeneratorMojo.java
new file mode 100644
index 0000000..0d58b3f
--- /dev/null
+++ 
b/tools/hivemall-docs/src/main/java/hivemall/docs/FuncsListGeneratorMojo.java
@@ -0,0 +1,264 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.docs;
+
+import static hivemall.docs.utils.MarkdownUtils.asCodeBlock;
+import static hivemall.docs.utils.MarkdownUtils.asInlineCode;
+import static hivemall.docs.utils.MarkdownUtils.asListElement;
+import static hivemall.docs.utils.MarkdownUtils.indent;
+import static org.apache.commons.lang.StringEscapeUtils.escapeHtml;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import javax.annotation.Nonnull;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.maven.execution.MavenSession;
+import org.apache.maven.plugin.AbstractMojo;
+import org.apache.maven.plugin.MojoExecutionException;
+import org.apache.maven.plugins.annotations.LifecyclePhase;
+import org.apache.maven.plugins.annotations.Mojo;
+import org.apache.maven.plugins.annotations.Parameter;
+import org.apache.maven.plugins.annotations.ResolutionScope;
+import org.reflections.Reflections;
+
+/**
+ * Generate a list of UDFs for documentation.
+ *
+ * @link 
https://hivemall.incubator.apache.org/userguide/misc/generic_funcs.html
+ * @link https://hivemall.incubator.apache.org/userguide/misc/funcs.html
+ */
+@Mojo(name = "generate-funcs-list", defaultPhase = 
LifecyclePhase.PROCESS_CLASSES,
+        requiresDependencyResolution = ResolutionScope.COMPILE_PLUS_RUNTIME,
+        configurator = "include-project-dependencies")
+public class FuncsListGeneratorMojo extends AbstractMojo {
+
+    @Parameter(defaultValue = "${basedir}", readonly = true)
+    private File basedir;
+
+    @Parameter(defaultValue = "${session}", readonly = true)
+    private MavenSession session;
+
+    @Parameter(defaultValue = "docs/gitbook/misc/generic_funcs.md")
+    private String pathToGenericFuncs;
+
+    @Parameter(defaultValue = "docs/gitbook/misc/funcs.md")
+    private String pathToFuncs;
+
+    private static final Map<String, List<String>> genericFuncsHeaders = new 
LinkedHashMap<>();
+    static {
+        genericFuncsHeaders.put("# Array",
+            Arrays.asList("hivemall.tools.array", "hivemall.tools.list"));
+        genericFuncsHeaders.put("# Bitset", 
Collections.singletonList("hivemall.tools.bits"));
+        genericFuncsHeaders.put("# Compression",
+            Collections.singletonList("hivemall.tools.compress"));
+        genericFuncsHeaders.put("# Datetime", 
Collections.singletonList("hivemall.tools.datetime"));
+        genericFuncsHeaders.put("# JSON", 
Collections.singletonList("hivemall.tools.json"));
+        genericFuncsHeaders.put("# Map", 
Collections.singletonList("hivemall.tools.map"));
+        genericFuncsHeaders.put("# MapReduce", 
Collections.singletonList("hivemall.tools.mapred"));
+        genericFuncsHeaders.put("# Math", 
Collections.singletonList("hivemall.tools.math"));
+        genericFuncsHeaders.put("# Matrix", 
Collections.singletonList("hivemall.tools.matrix"));
+        genericFuncsHeaders.put("# Sanity Checks",
+            Collections.singletonList("hivemall.tools.sanity"));
+        genericFuncsHeaders.put("# Text processing",
+            Collections.singletonList("hivemall.tools.text"));
+        genericFuncsHeaders.put("# Timeseries",
+            Collections.singletonList("hivemall.tools.timeseries"));
+        genericFuncsHeaders.put("# Vector", 
Collections.singletonList("hivemall.tools.vector"));
+        genericFuncsHeaders.put("# Others", 
Collections.singletonList("hivemall.tools"));
+    }
+
+    private static final Map<String, List<String>> funcsHeaders = new 
LinkedHashMap<>();
+    static {
+        funcsHeaders.put("# Regression", 
Collections.singletonList("hivemall.regression"));
+        funcsHeaders.put("# Classification", Collections.<String>emptyList());
+        funcsHeaders.put("## Binary classification",
+            Collections.singletonList("hivemall.classifier"));
+        funcsHeaders.put("## Multiclass classification",
+            Collections.singletonList("hivemall.classifier.multiclass"));
+        funcsHeaders.put("# Matrix factorization", 
Collections.singletonList("hivemall.mf"));
+        funcsHeaders.put("# Factorization machines", 
Collections.singletonList("hivemall.fm"));
+        funcsHeaders.put("# Recommendation", 
Collections.singletonList("hivemall.recommend"));
+        funcsHeaders.put("# Anomaly detection", 
Collections.singletonList("hivemall.anomaly"));
+        funcsHeaders.put("# Topic modeling", 
Collections.singletonList("hivemall.topicmodel"));
+        funcsHeaders.put("# Preprocessing", 
Collections.singletonList("hivemall.ftvec"));
+        funcsHeaders.put("## Data amplification",
+            Collections.singletonList("hivemall.ftvec.amplify"));
+        funcsHeaders.put("## Feature binning", 
Collections.singletonList("hivemall.ftvec.binning"));
+        funcsHeaders.put("## Feature format conversion",
+            Collections.singletonList("hivemall.ftvec.conv"));
+        funcsHeaders.put("## Feature hashing", 
Collections.singletonList("hivemall.ftvec.hashing"));
+        funcsHeaders.put("## Feature paring", 
Collections.singletonList("hivemall.ftvec.pairing"));
+        funcsHeaders.put("## Ranking", 
Collections.singletonList("hivemall.ftvec.ranking"));
+        funcsHeaders.put("## Feature scaling", 
Collections.singletonList("hivemall.ftvec.scaling"));
+        funcsHeaders.put("## Feature selection",
+            Collections.singletonList("hivemall.ftvec.selection"));
+        funcsHeaders.put("## Feature transformation and vectorization",
+            Collections.singletonList("hivemall.ftvec.trans"));
+        funcsHeaders.put("# Geospatial functions",
+            Collections.singletonList("hivemall.geospatial"));
+        funcsHeaders.put("# Distance measures", 
Collections.singletonList("hivemall.knn.distance"));
+        funcsHeaders.put("# Locality-sensitive hashing",
+            Collections.singletonList("hivemall.knn.lsh"));
+        funcsHeaders.put("# Similarity measures",
+            Collections.singletonList("hivemall.knn.similarity"));
+        funcsHeaders.put("# Evaluation", 
Collections.singletonList("hivemall.evaluation"));
+        funcsHeaders.put("# Sketching",
+            Arrays.asList("hivemall.sketch.hll", "hivemall.sketch.bloom"));
+        funcsHeaders.put("# Ensemble learning", 
Collections.singletonList("hivemall.ensemble"));
+        funcsHeaders.put("## Bagging", 
Collections.singletonList("hivemall.ensemble.bagging"));
+        funcsHeaders.put("# Decision trees and RandomForest", Arrays.asList(
+            "hivemall.smile.classification", "hivemall.smile.regression", 
"hivemall.smile.tools"));
+        funcsHeaders.put("# XGBoost", 
Arrays.asList("hivemall.xgboost.classification",
+            "hivemall.xgboost.regression", "hivemall.xgboost.tools"));
+        funcsHeaders.put("# Others",
+            Arrays.asList("hivemall", "hivemall.dataset", 
"hivemall.ftvec.text"));
+    }
+
+    @Override
+    public void execute() throws MojoExecutionException {
+        if (!isReactorRootProject()) {
+            // output only once across the projects
+            return;
+        }
+
+        generate(new File(basedir, pathToGenericFuncs),
+            "This page describes a list of useful Hivemall generic functions. 
See also a [list of machine-learning-related functions](./funcs.md).",
+            genericFuncsHeaders);
+        generate(new File(basedir, pathToFuncs),
+            "This page describes a list of Hivemall functions. See also a 
[list of generic Hivemall functions](./generic_funcs.md) for more 
general-purpose functions such as array and map UDFs.",
+            funcsHeaders);
+    }
+
+    private boolean isReactorRootProject() {
+        return 
session.getExecutionRootDirectory().equalsIgnoreCase(basedir.toString());
+    }
+
+    private void generate(@Nonnull File outputFile, @Nonnull String preface,
+            @Nonnull Map<String, List<String>> headers) throws 
MojoExecutionException {
+        Reflections reflections = new Reflections("hivemall");
+        Set<Class<?>> annotatedClasses = 
reflections.getTypesAnnotatedWith(Description.class);
+
+        StringBuilder sb = new StringBuilder();
+        Map<String, Set<String>> packages = new HashMap<>();
+
+        Pattern func = Pattern.compile("_FUNC_(\\(.*?\\))(.*)", 
Pattern.DOTALL);
+
+        for (Class<?> annotatedClass : annotatedClasses) {
+            Deprecated deprecated = 
annotatedClass.getAnnotation(Deprecated.class);
+            if (deprecated != null) {
+                continue;
+            }
+
+            Description description = 
annotatedClass.getAnnotation(Description.class);
+
+            String value = description.value().replaceAll("\n", " ");
+            Matcher matcher = func.matcher(value);
+            if (matcher.find()) {
+                value = asInlineCode(description.name() + matcher.group(1))
+                        + escapeHtml(matcher.group(2));
+            }
+            sb.append(asListElement(value));
+
+            StringBuilder sbExtended = new StringBuilder();
+            if (!description.extended().isEmpty()) {
+                sbExtended.append(description.extended());
+                sb.append("\n");
+            }
+
+            String extended = sbExtended.toString();
+            if (extended.isEmpty()) {
+                sb.append("\n");
+            } else {
+                if (extended.toLowerCase().contains("select")) { // extended 
description contains SQL statements
+                    sb.append(indent(asCodeBlock(extended, "sql")));
+                } else {
+                    sb.append(indent(asCodeBlock(extended)));
+                }
+            }
+
+            String packageName = annotatedClass.getPackage().getName();
+            if (!packages.containsKey(packageName)) {
+                Set<String> set = new TreeSet<>();
+                packages.put(packageName, set);
+            }
+            Set<String> List = packages.get(packageName);
+            List.add(sb.toString());
+
+            sb.setLength(0);
+        }
+
+        try (PrintWriter writer = new PrintWriter(outputFile)) {
+            // license header
+            writer.println("<!--");
+            try {
+                File licenseFile = new File(basedir, 
"resources/license-header.txt");
+                FileReader fileReader = new FileReader(licenseFile);
+
+                try (BufferedReader bufferedReader = new 
BufferedReader(fileReader)) {
+                    String line;
+                    while ((line = bufferedReader.readLine()) != null) {
+                        writer.println(indent(line));
+                    }
+                }
+            } catch (IOException e) {
+                throw new MojoExecutionException("Failed to read license 
file");
+            }
+            writer.println("-->\n");
+
+            writer.println(preface);
+
+            writer.println("\n<!-- toc -->\n");
+
+            for (Map.Entry<String, List<String>> e : headers.entrySet()) {
+                writer.println(e.getKey() + "\n");
+                List<String> packageNames = e.getValue();
+                for (String packageName : packageNames) {
+                    if (!packages.containsKey(packageName)) {
+                        writer.close();
+                        throw new MojoExecutionException(
+                            "Failed to find package in the classpath: " + 
packageName);
+                    }
+                    for (String desc : packages.get(packageName)) {
+                        writer.println(desc);
+                    }
+                }
+            }
+
+            writer.flush();
+        } catch (FileNotFoundException e) {
+            throw new MojoExecutionException("Output file is not found");
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/49496032/tools/hivemall-docs/src/main/java/hivemall/docs/IncludeProjectDependenciesComponentConfigurator.java
----------------------------------------------------------------------
diff --git 
a/tools/hivemall-docs/src/main/java/hivemall/docs/IncludeProjectDependenciesComponentConfigurator.java
 
b/tools/hivemall-docs/src/main/java/hivemall/docs/IncludeProjectDependenciesComponentConfigurator.java
new file mode 100644
index 0000000..29774f0
--- /dev/null
+++ 
b/tools/hivemall-docs/src/main/java/hivemall/docs/IncludeProjectDependenciesComponentConfigurator.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.docs;
+
+import java.io.File;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+
+import javax.annotation.Nonnull;
+
+import org.codehaus.classworlds.ClassRealm;
+import org.codehaus.plexus.component.annotations.Component;
+import 
org.codehaus.plexus.component.configurator.AbstractComponentConfigurator;
+import 
org.codehaus.plexus.component.configurator.ComponentConfigurationException;
+import org.codehaus.plexus.component.configurator.ConfigurationListener;
+import 
org.codehaus.plexus.component.configurator.converters.composite.ObjectWithFieldsConverter;
+import 
org.codehaus.plexus.component.configurator.converters.special.ClassRealmConverter;
+import 
org.codehaus.plexus.component.configurator.expression.ExpressionEvaluationException;
+import 
org.codehaus.plexus.component.configurator.expression.ExpressionEvaluator;
+import org.codehaus.plexus.configuration.PlexusConfiguration;
+
+/**
+ * A custom ComponentConfigurator which adds the project's runtime classpath 
elements to the MOJO
+ * classloader.
+ */
+@SuppressWarnings("deprecation")
+@Component(role = 
org.codehaus.plexus.component.configurator.ComponentConfigurator.class,
+        hint = "include-project-dependencies")
+public class IncludeProjectDependenciesComponentConfigurator extends 
AbstractComponentConfigurator {
+
+    public void configureComponent(final Object component, final 
PlexusConfiguration configuration,
+            final ExpressionEvaluator expressionEvaluator, final ClassRealm 
containerRealm,
+            final ConfigurationListener listener) throws 
ComponentConfigurationException {
+        addProjectDependenciesToClassRealm(expressionEvaluator, 
containerRealm);
+
+        converterLookup.registerConverter(new 
ClassRealmConverter(containerRealm));
+
+        ObjectWithFieldsConverter converter = new ObjectWithFieldsConverter();
+
+        converter.processConfiguration(converterLookup, component, 
containerRealm.getClassLoader(),
+            configuration, expressionEvaluator, listener);
+    }
+
+    @SuppressWarnings("unchecked")
+    private static void addProjectDependenciesToClassRealm(
+            final ExpressionEvaluator expressionEvaluator, final ClassRealm 
containerRealm)
+            throws ComponentConfigurationException {
+        final List<String> runtimeClasspathElements;
+        try {
+            // noinspection unchecked
+            runtimeClasspathElements = (List<String>) 
expressionEvaluator.evaluate(
+                "${project.runtimeClasspathElements}");
+        } catch (ExpressionEvaluationException e) {
+            throw new ComponentConfigurationException(
+                "There was a problem evaluating: 
${project.runtimeClasspathElements}", e);
+        }
+
+        // Add the project dependencies to the ClassRealm
+        final URL[] urls = buildURLs(runtimeClasspathElements);
+        for (URL url : urls) {
+            containerRealm.addConstituent(url);
+        }
+    }
+
+    @Nonnull
+    private static URL[] buildURLs(@Nonnull final List<String> 
runtimeClasspathElements)
+            throws ComponentConfigurationException {
+        // Add the projects classes and dependencies
+        final List<URL> urls = new 
ArrayList<>(runtimeClasspathElements.size());
+        for (String element : runtimeClasspathElements) {
+            try {
+                URL url = new File(element).toURI().toURL();
+                urls.add(url);
+            } catch (MalformedURLException e) {
+                throw new ComponentConfigurationException(
+                    "Unable to access project dependency: " + element, e);
+            }
+        }
+        return urls.toArray(new URL[urls.size()]);
+    }
+
+}

Reply via email to