This is an automated email from the ASF dual-hosted git repository.
cshannon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/main by this push:
new 4b1c1b17eb Validate file metadata in MetadataConstraints (#3506)
4b1c1b17eb is described below
commit 4b1c1b17ebc41ae5f356ed991341e521b6defadf
Author: Christopher L. Shannon <[email protected]>
AuthorDate: Sun Jul 2 13:43:24 2023 -0400
Validate file metadata in MetadataConstraints (#3506)
MetadataConstraints will now validate mutations for updates of columns
that store file metadata. The StoredTabletFile validate method is used to
parse the metadata and validate the format.
---
.../accumulo/core/metadata/StoredTabletFile.java | 7 ++
.../server/constraints/MetadataConstraints.java | 25 +++-
.../constraints/MetadataConstraintsTest.java | 127 +++++++++++++++++----
3 files changed, 135 insertions(+), 24 deletions(-)
diff --git
a/core/src/main/java/org/apache/accumulo/core/metadata/StoredTabletFile.java
b/core/src/main/java/org/apache/accumulo/core/metadata/StoredTabletFile.java
index 614368ebcb..6a9dd94d97 100644
--- a/core/src/main/java/org/apache/accumulo/core/metadata/StoredTabletFile.java
+++ b/core/src/main/java/org/apache/accumulo/core/metadata/StoredTabletFile.java
@@ -114,6 +114,13 @@ public class StoredTabletFile extends
AbstractTabletFile<StoredTabletFile> {
return metadataEntry;
}
+ /**
+ * Validates that the provided metadata string for the StoredTabletFile is
valid.
+ */
+ public static void validate(String metadataEntry) {
+ ReferencedTabletFile.parsePath(new Path(URI.create(metadataEntry)));
+ }
+
public static StoredTabletFile of(final String metadataEntry) {
return new StoredTabletFile(metadataEntry);
}
diff --git
a/server/base/src/main/java/org/apache/accumulo/server/constraints/MetadataConstraints.java
b/server/base/src/main/java/org/apache/accumulo/server/constraints/MetadataConstraints.java
index 3b709366b9..921639bda8 100644
---
a/server/base/src/main/java/org/apache/accumulo/server/constraints/MetadataConstraints.java
+++
b/server/base/src/main/java/org/apache/accumulo/server/constraints/MetadataConstraints.java
@@ -37,6 +37,7 @@ import org.apache.accumulo.core.fate.zookeeper.ZooCache;
import org.apache.accumulo.core.fate.zookeeper.ZooUtil;
import org.apache.accumulo.core.lock.ServiceLock;
import org.apache.accumulo.core.metadata.MetadataTable;
+import org.apache.accumulo.core.metadata.StoredTabletFile;
import org.apache.accumulo.core.metadata.schema.DataFileValue;
import
org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.BulkFileColumnFamily;
import
org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ChoppedColumnFamily;
@@ -128,6 +129,19 @@ public class MetadataConstraints implements Constraint {
return lst;
}
+ /*
+ * Validates the data file metadata is valid for a StoredDataFile.
+ */
+ private static ArrayList<Short> validateDataFilePath(ArrayList<Short>
violations,
+ String metadata) {
+ try {
+ StoredTabletFile.validate(metadata);
+ } catch (RuntimeException e) {
+ violations = addViolation(violations, 9);
+ }
+ return violations;
+ }
+
@Override
public List<Short> check(Environment env, Mutation mutation) {
final ServerContext context = ((SystemEnvironment) env).getServerContext();
@@ -202,6 +216,9 @@ public class MetadataConstraints implements Constraint {
}
if (columnFamily.equals(DataFileColumnFamily.NAME)) {
+ violations =
+ validateDataFilePath(violations, new
String(columnUpdate.getColumnQualifier(), UTF_8));
+
try {
DataFileValue dfv = new DataFileValue(columnUpdate.getValue());
@@ -212,9 +229,13 @@ public class MetadataConstraints implements Constraint {
violations = addViolation(violations, 1);
}
} else if (columnFamily.equals(ScanFileColumnFamily.NAME)) {
-
+ violations =
+ validateDataFilePath(violations, new
String(columnUpdate.getColumnQualifier(), UTF_8));
} else if (columnFamily.equals(BulkFileColumnFamily.NAME)) {
if (!columnUpdate.isDeleted() && !checkedBulk) {
+ violations = validateDataFilePath(violations,
+ new String(columnUpdate.getColumnQualifier(), UTF_8));
+
// splits, which also write the time reference, are allowed to write
this reference even
// when
// the transaction is not running because the other half of the
tablet is holding a
@@ -344,6 +365,8 @@ public class MetadataConstraints implements Constraint {
return "Lock not held in zookeeper by writer";
case 8:
return "Bulk load transaction no longer running";
+ case 9:
+ return "Invalid data file metadata format";
}
return null;
}
diff --git
a/server/base/src/test/java/org/apache/accumulo/server/constraints/MetadataConstraintsTest.java
b/server/base/src/test/java/org/apache/accumulo/server/constraints/MetadataConstraintsTest.java
index f798f29418..be66aa1d66 100644
---
a/server/base/src/test/java/org/apache/accumulo/server/constraints/MetadataConstraintsTest.java
+++
b/server/base/src/test/java/org/apache/accumulo/server/constraints/MetadataConstraintsTest.java
@@ -31,6 +31,7 @@ import org.apache.accumulo.core.metadata.schema.DataFileValue;
import
org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.BulkFileColumnFamily;
import
org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.CurrentLocationColumnFamily;
import
org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.DataFileColumnFamily;
+import
org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ScanFileColumnFamily;
import
org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily;
import
org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.TabletColumnFamily;
import org.apache.accumulo.server.ServerContext;
@@ -153,8 +154,9 @@ public class MetadataConstraintsTest {
// inactive txid
m = new Mutation(new Text("0;foo"));
- m.put(BulkFileColumnFamily.NAME, new Text("/someFile"), new
Value("12345"));
- m.put(DataFileColumnFamily.NAME, new Text("/someFile"),
+ m.put(BulkFileColumnFamily.NAME, new
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
+ new Value("12345"));
+ m.put(DataFileColumnFamily.NAME, new
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
new DataFileValue(1, 1).encodeAsValue());
violations = mc.check(createEnv(), m);
assertNotNull(violations);
@@ -163,8 +165,9 @@ public class MetadataConstraintsTest {
// txid that throws exception
m = new Mutation(new Text("0;foo"));
- m.put(BulkFileColumnFamily.NAME, new Text("/someFile"), new Value("9"));
- m.put(DataFileColumnFamily.NAME, new Text("/someFile"),
+ m.put(BulkFileColumnFamily.NAME, new
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
+ new Value("9"));
+ m.put(DataFileColumnFamily.NAME, new
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
new DataFileValue(1, 1).encodeAsValue());
violations = mc.check(createEnv(), m);
assertNotNull(violations);
@@ -173,15 +176,17 @@ public class MetadataConstraintsTest {
// active txid w/ file
m = new Mutation(new Text("0;foo"));
- m.put(BulkFileColumnFamily.NAME, new Text("/someFile"), new Value("5"));
- m.put(DataFileColumnFamily.NAME, new Text("/someFile"),
+ m.put(BulkFileColumnFamily.NAME, new
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
+ new Value("5"));
+ m.put(DataFileColumnFamily.NAME, new
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
new DataFileValue(1, 1).encodeAsValue());
violations = mc.check(createEnv(), m);
assertNull(violations);
// active txid w/o file
m = new Mutation(new Text("0;foo"));
- m.put(BulkFileColumnFamily.NAME, new Text("/someFile"), new Value("5"));
+ m.put(BulkFileColumnFamily.NAME, new
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
+ new Value("5"));
violations = mc.check(createEnv(), m);
assertNotNull(violations);
assertEquals(1, violations.size());
@@ -189,11 +194,13 @@ public class MetadataConstraintsTest {
// two active txids w/ files
m = new Mutation(new Text("0;foo"));
- m.put(BulkFileColumnFamily.NAME, new Text("/someFile"), new Value("5"));
- m.put(DataFileColumnFamily.NAME, new Text("/someFile"),
+ m.put(BulkFileColumnFamily.NAME, new
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
+ new Value("5"));
+ m.put(DataFileColumnFamily.NAME, new
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
new DataFileValue(1, 1).encodeAsValue());
- m.put(BulkFileColumnFamily.NAME, new Text("/someFile2"), new Value("7"));
- m.put(DataFileColumnFamily.NAME, new Text("/someFile2"),
+ m.put(BulkFileColumnFamily.NAME, new
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile2"),
+ new Value("7"));
+ m.put(DataFileColumnFamily.NAME, new
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile2"),
new DataFileValue(1, 1).encodeAsValue());
violations = mc.check(createEnv(), m);
assertNotNull(violations);
@@ -202,21 +209,25 @@ public class MetadataConstraintsTest {
// two files w/ one active txid
m = new Mutation(new Text("0;foo"));
- m.put(BulkFileColumnFamily.NAME, new Text("/someFile"), new Value("5"));
- m.put(DataFileColumnFamily.NAME, new Text("/someFile"),
+ m.put(BulkFileColumnFamily.NAME, new
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
+ new Value("5"));
+ m.put(DataFileColumnFamily.NAME, new
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
new DataFileValue(1, 1).encodeAsValue());
- m.put(BulkFileColumnFamily.NAME, new Text("/someFile2"), new Value("5"));
- m.put(DataFileColumnFamily.NAME, new Text("/someFile2"),
+ m.put(BulkFileColumnFamily.NAME, new
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile2"),
+ new Value("5"));
+ m.put(DataFileColumnFamily.NAME, new
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile2"),
new DataFileValue(1, 1).encodeAsValue());
violations = mc.check(createEnv(), m);
assertNull(violations);
// two loaded w/ one active txid and one file
m = new Mutation(new Text("0;foo"));
- m.put(BulkFileColumnFamily.NAME, new Text("/someFile"), new Value("5"));
- m.put(DataFileColumnFamily.NAME, new Text("/someFile"),
+ m.put(BulkFileColumnFamily.NAME, new
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
+ new Value("5"));
+ m.put(DataFileColumnFamily.NAME, new
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
new DataFileValue(1, 1).encodeAsValue());
- m.put(BulkFileColumnFamily.NAME, new Text("/someFile2"), new Value("5"));
+ m.put(BulkFileColumnFamily.NAME, new
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile2"),
+ new Value("5"));
violations = mc.check(createEnv(), m);
assertNotNull(violations);
assertEquals(1, violations.size());
@@ -224,38 +235,108 @@ public class MetadataConstraintsTest {
// active txid, mutation that looks like split
m = new Mutation(new Text("0;foo"));
- m.put(BulkFileColumnFamily.NAME, new Text("/someFile"), new Value("5"));
+ m.put(BulkFileColumnFamily.NAME, new
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
+ new Value("5"));
ServerColumnFamily.DIRECTORY_COLUMN.put(m, new Value("/t1"));
violations = mc.check(createEnv(), m);
assertNull(violations);
// inactive txid, mutation that looks like split
m = new Mutation(new Text("0;foo"));
- m.put(BulkFileColumnFamily.NAME, new Text("/someFile"), new
Value("12345"));
+ m.put(BulkFileColumnFamily.NAME, new
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
+ new Value("12345"));
ServerColumnFamily.DIRECTORY_COLUMN.put(m, new Value("/t1"));
violations = mc.check(createEnv(), m);
assertNull(violations);
// active txid, mutation that looks like a load
m = new Mutation(new Text("0;foo"));
- m.put(BulkFileColumnFamily.NAME, new Text("/someFile"), new Value("5"));
+ m.put(BulkFileColumnFamily.NAME, new
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
+ new Value("5"));
m.put(CurrentLocationColumnFamily.NAME, new Text("789"), new
Value("127.0.0.1:9997"));
violations = mc.check(createEnv(), m);
assertNull(violations);
// inactive txid, mutation that looks like a load
m = new Mutation(new Text("0;foo"));
- m.put(BulkFileColumnFamily.NAME, new Text("/someFile"), new
Value("12345"));
+ m.put(BulkFileColumnFamily.NAME, new
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
+ new Value("12345"));
m.put(CurrentLocationColumnFamily.NAME, new Text("789"), new
Value("127.0.0.1:9997"));
violations = mc.check(createEnv(), m);
assertNull(violations);
// deleting a load flag
m = new Mutation(new Text("0;foo"));
- m.putDelete(BulkFileColumnFamily.NAME, new Text("/someFile"));
+ m.putDelete(BulkFileColumnFamily.NAME,
+ new Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"));
+ violations = mc.check(createEnv(), m);
+ assertNull(violations);
+
+ // Missing beginning of path
+ m = new Mutation(new Text("0;foo"));
+ m.put(BulkFileColumnFamily.NAME, new Text("/someFile"), new Value("5"));
+ violations = mc.check(createEnv(), m);
+ assertNotNull(violations);
+ assertEquals(2, violations.size());
+ assertEquals(Short.valueOf((short) 9), violations.get(0));
+ assertNotNull(mc.getViolationDescription(violations.get(0)));
+ // No DataFileColumnFamily included
+ assertEquals(Short.valueOf((short) 8), violations.get(1));
+
+ // Missing tables directory in path
+ m = new Mutation(new Text("0;foo"));
+ m.put(BulkFileColumnFamily.NAME, new
Text("hdfs://nn1/a/accumulo/2b/t-001/C00.rf"),
+ new Value("5"));
+ violations = mc.check(createEnv(), m);
+ assertNotNull(violations);
+ assertEquals(2, violations.size());
+ assertEquals(Short.valueOf((short) 9), violations.get(0));
+ // No DataFileColumnFamily included
+ assertEquals(Short.valueOf((short) 8), violations.get(1));
+
+ }
+
+ @Test
+ public void testDataFileCheck() {
+ testFileMetadataValidation(DataFileColumnFamily.NAME, new DataFileValue(1,
1).encodeAsValue());
+ }
+
+ @Test
+ public void testScanFileCheck() {
+ testFileMetadataValidation(ScanFileColumnFamily.NAME, new Value());
+ }
+
+ private void testFileMetadataValidation(Text columnFamily, Value value) {
+ MetadataConstraints mc = new TestMetadataConstraints();
+ Mutation m;
+ List<Short> violations;
+
+ // Missing beginning of path
+ m = new Mutation(new Text("0;foo"));
+ m.put(columnFamily, new Text("/someFile"), value);
+ violations = mc.check(createEnv(), m);
+ assertNotNull(violations);
+ assertEquals(1, violations.size());
+ assertEquals(Short.valueOf((short) 9), violations.get(0));
+ assertNotNull(mc.getViolationDescription(violations.get(0)));
+
+ // Missing tables directory in path
+ m = new Mutation(new Text("0;foo"));
+ m.put(columnFamily, new Text("hdfs://nn1/a/accumulo/2b/t-001/C00.rf"),
+ new DataFileValue(1, 1).encodeAsValue());
+ violations = mc.check(createEnv(), m);
+ assertNotNull(violations);
+ assertEquals(1, violations.size());
+ assertEquals(Short.valueOf((short) 9), violations.get(0));
+
+ // Should pass validation
+ m = new Mutation(new Text("0;foo"));
+ m.put(columnFamily, new
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/C00.rf"),
+ new DataFileValue(1, 1).encodeAsValue());
violations = mc.check(createEnv(), m);
assertNull(violations);
+ assertNotNull(mc.getViolationDescription((short) 9));
}
}