[
https://issues.apache.org/jira/browse/HBASE-4944?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13162253#comment-13162253
]
Andrew Purtell commented on HBASE-4944:
---------------------------------------
>From JIRA: "Cannot attach file HBASE-4944.patch: Unknown server error (500)."
The patch is pretty small, so here it is:
{code}
Index: src/main/java/org/apache/hadoop/hbase/regionserver/Store.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/regionserver/Store.java
(revision 1210044)
+++ src/main/java/org/apache/hadoop/hbase/regionserver/Store.java
(working copy)
@@ -50,6 +50,7 @@
import org.apache.hadoop.hbase.io.hfile.Compression;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.hfile.HFileScanner;
+import org.apache.hadoop.hbase.io.hfile.InvalidHFileException;
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
import org.apache.hadoop.hbase.regionserver.StoreScanner.ScanType;
import org.apache.hadoop.hbase.regionserver.compactions.CompactionProgress;
@@ -123,6 +124,7 @@
private final String storeNameStr;
private CompactionProgress progress;
private final int compactionKVMax;
+ private final boolean verifyBulkLoads;
// not private for testing
/* package */ScanInfo scanInfo;
@@ -222,6 +224,9 @@
= conf.getLong("hbase.hstore.compaction.max.size", Long.MAX_VALUE);
this.compactionKVMax = conf.getInt("hbase.hstore.compaction.kv.max", 10);
+ this.verifyBulkLoads = conf.getBoolean("hbase.hstore.bulkload.verify",
+ true);
+
if (Store.closeCheckInterval == 0) {
Store.closeCheckInterval = conf.getInt(
"hbase.hstore.close.check.interval", 10*1000*1000 /* 10 MB */);
@@ -355,8 +360,8 @@
}
/**
- * This throws a WrongRegionException if the bulkHFile does not fit in this
- * region.
+ * This throws a WrongRegionException if the HFile does not fit in this
+ * region, or an InvalidHFileException if the HFile is not valid.
*
*/
void assertBulkLoadHFileOk(Path srcPath) throws IOException {
@@ -386,6 +391,34 @@
"Bulk load file " + srcPath.toString() + " does not fit inside
region "
+ this.region);
}
+
+ if (verifyBulkLoads) {
+ KeyValue pkv = null;
+ HFileScanner scanner = reader.getScanner(false, false, false);
+ scanner.seekTo();
+ do {
+ KeyValue kv = scanner.getKeyValue();
+ if (pkv != null) {
+ if (Bytes.compareTo(pkv.getBuffer(), pkv.getRowOffset(),
+ pkv.getRowLength(), kv.getBuffer(), kv.getRowOffset(),
+ kv.getRowLength()) > 0) {
+ throw new InvalidHFileException("Previous row is greater then"
+ + " current row: path=" + srcPath + " previous="
+ + Bytes.toStringBinary(pkv.getKey()) + " current="
+ + Bytes.toStringBinary(kv.getKey()));
+ }
+ if (Bytes.compareTo(pkv.getBuffer(), pkv.getFamilyOffset(),
+ pkv.getFamilyLength(), kv.getBuffer(), kv.getFamilyOffset(),
+ kv.getFamilyLength()) != 0) {
+ throw new InvalidHFileException("Previous key had different"
+ + " family compared to current key: path=" + srcPath
+ + " previous=" + Bytes.toStringBinary(pkv.getKey())
+ + " current=" + Bytes.toStringBinary(kv.getKey()));
+ }
+ }
+ pkv = kv;
+ } while (scanner.next());
+ }
} finally {
if (reader != null) reader.close();
}
Index: src/main/java/org/apache/hadoop/hbase/io/hfile/InvalidHFileException.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/io/hfile/InvalidHFileException.java
(revision 0)
+++ src/main/java/org/apache/hadoop/hbase/io/hfile/InvalidHFileException.java
(revision 0)
@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+import java.io.IOException;
+
+/**
+ * Thrown when an invalid HFile format is detected
+ */
+public class InvalidHFileException extends IOException {
+ private static final long serialVersionUID = 4660352028739861249L;
+
+ /** constructor */
+ public InvalidHFileException() {
+ super();
+ }
+
+ /**
+ * Constructor
+ * @param s message
+ */
+ public InvalidHFileException(String s) {
+ super(s);
+ }
+}
\ No newline at end of file
{code}
> Optionally verify bulk loaded HFiles
> ------------------------------------
>
> Key: HBASE-4944
> URL: https://issues.apache.org/jira/browse/HBASE-4944
> Project: HBase
> Issue Type: Improvement
> Components: regionserver
> Affects Versions: 0.92.0, 0.94.0, 0.90.5
> Reporter: Andrew Purtell
> Priority: Minor
>
> We rely on users to produce properly formatted HFiles for bulk import.
> Attached patch adds an optional code path, toggled by a configuration
> property, that verifies the HFile under consideration for import is properly
> sorted. The default maintains the current behavior, which does not scan the
> file for correctness.
> Patch is against trunk but can apply against all active branches.
--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators:
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira