[
https://issues.apache.org/jira/browse/TAJO-1131?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14211837#comment-14211837
]
ASF GitHub Bot commented on TAJO-1131:
--------------------------------------
Github user babokim commented on a diff in the pull request:
https://github.com/apache/tajo/pull/232#discussion_r20343234
--- Diff:
tajo-storage/src/main/java/org/apache/tajo/storage/hbase/HBaseScanner.java ---
@@ -0,0 +1,445 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.storage.hbase;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.client.*;
+import org.apache.hadoop.hbase.filter.*;
+import org.apache.tajo.catalog.Column;
+import org.apache.tajo.catalog.Schema;
+import org.apache.tajo.catalog.TableMeta;
+import org.apache.tajo.catalog.proto.CatalogProtos.StoreType;
+import org.apache.tajo.catalog.statistics.ColumnStats;
+import org.apache.tajo.catalog.statistics.TableStats;
+import org.apache.tajo.conf.TajoConf;
+import org.apache.tajo.datum.Datum;
+import org.apache.tajo.datum.NullDatum;
+import org.apache.tajo.datum.TextDatum;
+import org.apache.tajo.storage.Scanner;
+import org.apache.tajo.storage.StorageManager;
+import org.apache.tajo.storage.Tuple;
+import org.apache.tajo.storage.VTuple;
+import org.apache.tajo.storage.fragment.Fragment;
+import org.apache.tajo.util.BytesUtils;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.NavigableMap;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+public class HBaseScanner implements Scanner {
+ private static final Log LOG = LogFactory.getLog(HBaseScanner.class);
+ private static final int DEFAULT_FETCH_SIZE = 1000;
+ private static final int MAX_LIST_SIZE = 100;
+
+ protected boolean inited = false;
+ private TajoConf conf;
+ private Schema schema;
+ private TableMeta meta;
+ private HBaseFragment fragment;
+ private Scan scan;
+ private HTableInterface htable;
+ private Configuration hbaseConf;
+ private Column[] targets;
+ private TableStats tableStats;
+ private ResultScanner scanner;
+ private AtomicBoolean finished = new AtomicBoolean(false);
+ private float progress = 0.0f;
+ private int scanFetchSize;
+ private Result[] scanResults;
+ private int scanResultIndex = -1;
+ private Column[] schemaColumns;
+
+ private ColumnMapping columnMapping;
+ private int[] targetIndexes;
+
+ private int numRows = 0;
+
+ private byte[][][] mappingColumnFamilies;
+ private boolean[] isRowKeyMappings;
+ private boolean[] isBinaryColumns;
+ private boolean[] isColumnKeys;
+ private boolean[] isColumnValues;
+
+ private int[] rowKeyFieldIndexes;
+ private char rowKeyDelimiter;
+
+ public HBaseScanner (Configuration conf, Schema schema, TableMeta meta,
Fragment fragment) throws IOException {
+ this.conf = (TajoConf)conf;
+ this.schema = schema;
+ this.meta = meta;
+ this.fragment = (HBaseFragment)fragment;
+ this.tableStats = new TableStats();
+ }
+
+ @Override
+ public void init() throws IOException {
+ inited = true;
+ schemaColumns = schema.toArray();
+ if (fragment != null) {
+ tableStats.setNumBytes(0);
+ tableStats.setNumBlocks(1);
+ }
+ if (schema != null) {
+ for(Column eachColumn: schema.getColumns()) {
+ ColumnStats columnStats = new ColumnStats(eachColumn);
+ tableStats.addColumnStat(columnStats);
+ }
+ }
+
+ scanFetchSize =
Integer.parseInt(meta.getOption("hbase.scanner.fetch,size", "" +
DEFAULT_FETCH_SIZE));
--- End diff --
I will change constant value. And all constant variables for HBase storage
are placed in HBaseStorageConstants class.
> Supports Inserting or Creating table into the HBase mapped table.
> -----------------------------------------------------------------
>
> Key: TAJO-1131
> URL: https://issues.apache.org/jira/browse/TAJO-1131
> Project: Tajo
> Issue Type: Sub-task
> Reporter: Hyoungjun Kim
> Assignee: Hyoungjun Kim
> Priority: Minor
>
> Tajo should support inserting or creating table into the HBase mapped table.
> HBase supports bulk uploading. For using this tool the query result should
> be range partitioned and sorted by a region split range.
> See the following HBase reference.
> http://hbase.apache.org/book/arch.bulk.load.html
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)