[37/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.
http://git-wip-us.apache.org/repos/asf/hbase/blob/59d03410/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java -- diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java new file mode 100644 index 000..b64271e --- /dev/null +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java @@ -0,0 +1,793 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.mapreduce; + +import static java.lang.String.format; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Set; + +import org.apache.commons.lang.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.TableNotEnabledException; +import org.apache.hadoop.hbase.TableNotFoundException; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.client.ConnectionFactory; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.RegionLocator; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.util.Base64; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; +import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat; +import org.apache.hadoop.security.Credentials; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.shaded.com.google.common.base.Preconditions; +import org.apache.hadoop.hbase.shaded.com.google.common.base.Splitter; +import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists; + +/** + * Tool to import data from a TSV file. + * + * This tool is rather simplistic - it doesn't do any quoting or + * escaping, but is useful for many data loads. + * + * @see ImportTsv#usage(String) + */ +@InterfaceAudience.Public +public class ImportTsv extends Configured implements Tool { + + protected static final Log LOG = LogFactory.getLog(ImportTsv.class); + + final static String NAME = "importtsv"; + + public final static String MAPPER_CONF_KEY = "importtsv.mapper.class"; + public final static String BULK_OUTPUT_CONF_KEY = "importtsv.bulk.output"; + public final static String TIMESTAMP_CONF_KEY = "importtsv.timestamp"; + public final static String JOB_NAME_CONF_KEY = "mapreduce.job.name"; + // TODO: the rest of these configs are used exclusively by TsvImporterMapper. + // Move them out of the tool and let the mapper handle its own validation. + public final static String DRY_RUN_CONF_KEY = "importtsv.dry.run"; + // If true, bad lines are logged to stderr. Default: false. + public final static String LOG_BAD_LINES_CONF_KEY = "importtsv.log.bad.lines"; + public final static String SKIP_LINES_CONF_KEY = "importtsv.skip.bad.lines"; + public final static String SKIP_EMPTY_COLUMNS = "importtsv.skip.empty.columns"; + public final static String COLUMNS_CONF_KEY = "importtsv.columns"; + public final static String SEPARATOR_CONF_KEY = "importtsv.separator"; + public final static String
[37/41] hbase git commit: HBASE-18640 Move mapreduce out of hbase-server into separate module.
http://git-wip-us.apache.org/repos/asf/hbase/blob/664b6be0/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java -- diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java new file mode 100644 index 000..b64271e --- /dev/null +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java @@ -0,0 +1,793 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.mapreduce; + +import static java.lang.String.format; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Set; + +import org.apache.commons.lang.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.TableNotEnabledException; +import org.apache.hadoop.hbase.TableNotFoundException; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.client.ConnectionFactory; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.RegionLocator; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.util.Base64; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; +import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat; +import org.apache.hadoop.security.Credentials; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.shaded.com.google.common.base.Preconditions; +import org.apache.hadoop.hbase.shaded.com.google.common.base.Splitter; +import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists; + +/** + * Tool to import data from a TSV file. + * + * This tool is rather simplistic - it doesn't do any quoting or + * escaping, but is useful for many data loads. + * + * @see ImportTsv#usage(String) + */ +@InterfaceAudience.Public +public class ImportTsv extends Configured implements Tool { + + protected static final Log LOG = LogFactory.getLog(ImportTsv.class); + + final static String NAME = "importtsv"; + + public final static String MAPPER_CONF_KEY = "importtsv.mapper.class"; + public final static String BULK_OUTPUT_CONF_KEY = "importtsv.bulk.output"; + public final static String TIMESTAMP_CONF_KEY = "importtsv.timestamp"; + public final static String JOB_NAME_CONF_KEY = "mapreduce.job.name"; + // TODO: the rest of these configs are used exclusively by TsvImporterMapper. + // Move them out of the tool and let the mapper handle its own validation. + public final static String DRY_RUN_CONF_KEY = "importtsv.dry.run"; + // If true, bad lines are logged to stderr. Default: false. + public final static String LOG_BAD_LINES_CONF_KEY = "importtsv.log.bad.lines"; + public final static String SKIP_LINES_CONF_KEY = "importtsv.skip.bad.lines"; + public final static String SKIP_EMPTY_COLUMNS = "importtsv.skip.empty.columns"; + public final static String COLUMNS_CONF_KEY = "importtsv.columns"; + public final static String SEPARATOR_CONF_KEY = "importtsv.separator"; + public final static String