This is an automated email from the ASF dual-hosted git repository.

cshannon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/main by this push:
     new 4b1c1b17eb Validate file metadata in MetadataConstraints (#3506)
4b1c1b17eb is described below

commit 4b1c1b17ebc41ae5f356ed991341e521b6defadf
Author: Christopher L. Shannon <[email protected]>
AuthorDate: Sun Jul 2 13:43:24 2023 -0400

    Validate file metadata in MetadataConstraints (#3506)
    
    MetadataConstraints will now validate mutations for updates of columns
    that store file metadata. The StoredTabletFile validate method is used to
    parse the metadata and validate the format.
---
 .../accumulo/core/metadata/StoredTabletFile.java   |   7 ++
 .../server/constraints/MetadataConstraints.java    |  25 +++-
 .../constraints/MetadataConstraintsTest.java       | 127 +++++++++++++++++----
 3 files changed, 135 insertions(+), 24 deletions(-)

diff --git 
a/core/src/main/java/org/apache/accumulo/core/metadata/StoredTabletFile.java 
b/core/src/main/java/org/apache/accumulo/core/metadata/StoredTabletFile.java
index 614368ebcb..6a9dd94d97 100644
--- a/core/src/main/java/org/apache/accumulo/core/metadata/StoredTabletFile.java
+++ b/core/src/main/java/org/apache/accumulo/core/metadata/StoredTabletFile.java
@@ -114,6 +114,13 @@ public class StoredTabletFile extends 
AbstractTabletFile<StoredTabletFile> {
     return metadataEntry;
   }
 
+  /**
+   * Validates that the provided metadata string for the StoredTabletFile is 
valid.
+   */
+  public static void validate(String metadataEntry) {
+    ReferencedTabletFile.parsePath(new Path(URI.create(metadataEntry)));
+  }
+
   public static StoredTabletFile of(final String metadataEntry) {
     return new StoredTabletFile(metadataEntry);
   }
diff --git 
a/server/base/src/main/java/org/apache/accumulo/server/constraints/MetadataConstraints.java
 
b/server/base/src/main/java/org/apache/accumulo/server/constraints/MetadataConstraints.java
index 3b709366b9..921639bda8 100644
--- 
a/server/base/src/main/java/org/apache/accumulo/server/constraints/MetadataConstraints.java
+++ 
b/server/base/src/main/java/org/apache/accumulo/server/constraints/MetadataConstraints.java
@@ -37,6 +37,7 @@ import org.apache.accumulo.core.fate.zookeeper.ZooCache;
 import org.apache.accumulo.core.fate.zookeeper.ZooUtil;
 import org.apache.accumulo.core.lock.ServiceLock;
 import org.apache.accumulo.core.metadata.MetadataTable;
+import org.apache.accumulo.core.metadata.StoredTabletFile;
 import org.apache.accumulo.core.metadata.schema.DataFileValue;
 import 
org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.BulkFileColumnFamily;
 import 
org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ChoppedColumnFamily;
@@ -128,6 +129,19 @@ public class MetadataConstraints implements Constraint {
     return lst;
   }
 
+  /*
+   * Validates the data file metadata is valid for a StoredDataFile.
+   */
+  private static ArrayList<Short> validateDataFilePath(ArrayList<Short> 
violations,
+      String metadata) {
+    try {
+      StoredTabletFile.validate(metadata);
+    } catch (RuntimeException e) {
+      violations = addViolation(violations, 9);
+    }
+    return violations;
+  }
+
   @Override
   public List<Short> check(Environment env, Mutation mutation) {
     final ServerContext context = ((SystemEnvironment) env).getServerContext();
@@ -202,6 +216,9 @@ public class MetadataConstraints implements Constraint {
       }
 
       if (columnFamily.equals(DataFileColumnFamily.NAME)) {
+        violations =
+            validateDataFilePath(violations, new 
String(columnUpdate.getColumnQualifier(), UTF_8));
+
         try {
           DataFileValue dfv = new DataFileValue(columnUpdate.getValue());
 
@@ -212,9 +229,13 @@ public class MetadataConstraints implements Constraint {
           violations = addViolation(violations, 1);
         }
       } else if (columnFamily.equals(ScanFileColumnFamily.NAME)) {
-
+        violations =
+            validateDataFilePath(violations, new 
String(columnUpdate.getColumnQualifier(), UTF_8));
       } else if (columnFamily.equals(BulkFileColumnFamily.NAME)) {
         if (!columnUpdate.isDeleted() && !checkedBulk) {
+          violations = validateDataFilePath(violations,
+              new String(columnUpdate.getColumnQualifier(), UTF_8));
+
           // splits, which also write the time reference, are allowed to write 
this reference even
           // when
           // the transaction is not running because the other half of the 
tablet is holding a
@@ -344,6 +365,8 @@ public class MetadataConstraints implements Constraint {
         return "Lock not held in zookeeper by writer";
       case 8:
         return "Bulk load transaction no longer running";
+      case 9:
+        return "Invalid data file metadata format";
     }
     return null;
   }
diff --git 
a/server/base/src/test/java/org/apache/accumulo/server/constraints/MetadataConstraintsTest.java
 
b/server/base/src/test/java/org/apache/accumulo/server/constraints/MetadataConstraintsTest.java
index f798f29418..be66aa1d66 100644
--- 
a/server/base/src/test/java/org/apache/accumulo/server/constraints/MetadataConstraintsTest.java
+++ 
b/server/base/src/test/java/org/apache/accumulo/server/constraints/MetadataConstraintsTest.java
@@ -31,6 +31,7 @@ import org.apache.accumulo.core.metadata.schema.DataFileValue;
 import 
org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.BulkFileColumnFamily;
 import 
org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.CurrentLocationColumnFamily;
 import 
org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.DataFileColumnFamily;
+import 
org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ScanFileColumnFamily;
 import 
org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily;
 import 
org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.TabletColumnFamily;
 import org.apache.accumulo.server.ServerContext;
@@ -153,8 +154,9 @@ public class MetadataConstraintsTest {
 
     // inactive txid
     m = new Mutation(new Text("0;foo"));
-    m.put(BulkFileColumnFamily.NAME, new Text("/someFile"), new 
Value("12345"));
-    m.put(DataFileColumnFamily.NAME, new Text("/someFile"),
+    m.put(BulkFileColumnFamily.NAME, new 
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
+        new Value("12345"));
+    m.put(DataFileColumnFamily.NAME, new 
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
         new DataFileValue(1, 1).encodeAsValue());
     violations = mc.check(createEnv(), m);
     assertNotNull(violations);
@@ -163,8 +165,9 @@ public class MetadataConstraintsTest {
 
     // txid that throws exception
     m = new Mutation(new Text("0;foo"));
-    m.put(BulkFileColumnFamily.NAME, new Text("/someFile"), new Value("9"));
-    m.put(DataFileColumnFamily.NAME, new Text("/someFile"),
+    m.put(BulkFileColumnFamily.NAME, new 
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
+        new Value("9"));
+    m.put(DataFileColumnFamily.NAME, new 
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
         new DataFileValue(1, 1).encodeAsValue());
     violations = mc.check(createEnv(), m);
     assertNotNull(violations);
@@ -173,15 +176,17 @@ public class MetadataConstraintsTest {
 
     // active txid w/ file
     m = new Mutation(new Text("0;foo"));
-    m.put(BulkFileColumnFamily.NAME, new Text("/someFile"), new Value("5"));
-    m.put(DataFileColumnFamily.NAME, new Text("/someFile"),
+    m.put(BulkFileColumnFamily.NAME, new 
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
+        new Value("5"));
+    m.put(DataFileColumnFamily.NAME, new 
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
         new DataFileValue(1, 1).encodeAsValue());
     violations = mc.check(createEnv(), m);
     assertNull(violations);
 
     // active txid w/o file
     m = new Mutation(new Text("0;foo"));
-    m.put(BulkFileColumnFamily.NAME, new Text("/someFile"), new Value("5"));
+    m.put(BulkFileColumnFamily.NAME, new 
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
+        new Value("5"));
     violations = mc.check(createEnv(), m);
     assertNotNull(violations);
     assertEquals(1, violations.size());
@@ -189,11 +194,13 @@ public class MetadataConstraintsTest {
 
     // two active txids w/ files
     m = new Mutation(new Text("0;foo"));
-    m.put(BulkFileColumnFamily.NAME, new Text("/someFile"), new Value("5"));
-    m.put(DataFileColumnFamily.NAME, new Text("/someFile"),
+    m.put(BulkFileColumnFamily.NAME, new 
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
+        new Value("5"));
+    m.put(DataFileColumnFamily.NAME, new 
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
         new DataFileValue(1, 1).encodeAsValue());
-    m.put(BulkFileColumnFamily.NAME, new Text("/someFile2"), new Value("7"));
-    m.put(DataFileColumnFamily.NAME, new Text("/someFile2"),
+    m.put(BulkFileColumnFamily.NAME, new 
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile2"),
+        new Value("7"));
+    m.put(DataFileColumnFamily.NAME, new 
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile2"),
         new DataFileValue(1, 1).encodeAsValue());
     violations = mc.check(createEnv(), m);
     assertNotNull(violations);
@@ -202,21 +209,25 @@ public class MetadataConstraintsTest {
 
     // two files w/ one active txid
     m = new Mutation(new Text("0;foo"));
-    m.put(BulkFileColumnFamily.NAME, new Text("/someFile"), new Value("5"));
-    m.put(DataFileColumnFamily.NAME, new Text("/someFile"),
+    m.put(BulkFileColumnFamily.NAME, new 
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
+        new Value("5"));
+    m.put(DataFileColumnFamily.NAME, new 
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
         new DataFileValue(1, 1).encodeAsValue());
-    m.put(BulkFileColumnFamily.NAME, new Text("/someFile2"), new Value("5"));
-    m.put(DataFileColumnFamily.NAME, new Text("/someFile2"),
+    m.put(BulkFileColumnFamily.NAME, new 
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile2"),
+        new Value("5"));
+    m.put(DataFileColumnFamily.NAME, new 
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile2"),
         new DataFileValue(1, 1).encodeAsValue());
     violations = mc.check(createEnv(), m);
     assertNull(violations);
 
     // two loaded w/ one active txid and one file
     m = new Mutation(new Text("0;foo"));
-    m.put(BulkFileColumnFamily.NAME, new Text("/someFile"), new Value("5"));
-    m.put(DataFileColumnFamily.NAME, new Text("/someFile"),
+    m.put(BulkFileColumnFamily.NAME, new 
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
+        new Value("5"));
+    m.put(DataFileColumnFamily.NAME, new 
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
         new DataFileValue(1, 1).encodeAsValue());
-    m.put(BulkFileColumnFamily.NAME, new Text("/someFile2"), new Value("5"));
+    m.put(BulkFileColumnFamily.NAME, new 
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile2"),
+        new Value("5"));
     violations = mc.check(createEnv(), m);
     assertNotNull(violations);
     assertEquals(1, violations.size());
@@ -224,38 +235,108 @@ public class MetadataConstraintsTest {
 
     // active txid, mutation that looks like split
     m = new Mutation(new Text("0;foo"));
-    m.put(BulkFileColumnFamily.NAME, new Text("/someFile"), new Value("5"));
+    m.put(BulkFileColumnFamily.NAME, new 
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
+        new Value("5"));
     ServerColumnFamily.DIRECTORY_COLUMN.put(m, new Value("/t1"));
     violations = mc.check(createEnv(), m);
     assertNull(violations);
 
     // inactive txid, mutation that looks like split
     m = new Mutation(new Text("0;foo"));
-    m.put(BulkFileColumnFamily.NAME, new Text("/someFile"), new 
Value("12345"));
+    m.put(BulkFileColumnFamily.NAME, new 
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
+        new Value("12345"));
     ServerColumnFamily.DIRECTORY_COLUMN.put(m, new Value("/t1"));
     violations = mc.check(createEnv(), m);
     assertNull(violations);
 
     // active txid, mutation that looks like a load
     m = new Mutation(new Text("0;foo"));
-    m.put(BulkFileColumnFamily.NAME, new Text("/someFile"), new Value("5"));
+    m.put(BulkFileColumnFamily.NAME, new 
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
+        new Value("5"));
     m.put(CurrentLocationColumnFamily.NAME, new Text("789"), new 
Value("127.0.0.1:9997"));
     violations = mc.check(createEnv(), m);
     assertNull(violations);
 
     // inactive txid, mutation that looks like a load
     m = new Mutation(new Text("0;foo"));
-    m.put(BulkFileColumnFamily.NAME, new Text("/someFile"), new 
Value("12345"));
+    m.put(BulkFileColumnFamily.NAME, new 
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"),
+        new Value("12345"));
     m.put(CurrentLocationColumnFamily.NAME, new Text("789"), new 
Value("127.0.0.1:9997"));
     violations = mc.check(createEnv(), m);
     assertNull(violations);
 
     // deleting a load flag
     m = new Mutation(new Text("0;foo"));
-    m.putDelete(BulkFileColumnFamily.NAME, new Text("/someFile"));
+    m.putDelete(BulkFileColumnFamily.NAME,
+        new Text("hdfs://nn1/a/accumulo/tables/2b/t-001/someFile"));
+    violations = mc.check(createEnv(), m);
+    assertNull(violations);
+
+    // Missing beginning of path
+    m = new Mutation(new Text("0;foo"));
+    m.put(BulkFileColumnFamily.NAME, new Text("/someFile"), new Value("5"));
+    violations = mc.check(createEnv(), m);
+    assertNotNull(violations);
+    assertEquals(2, violations.size());
+    assertEquals(Short.valueOf((short) 9), violations.get(0));
+    assertNotNull(mc.getViolationDescription(violations.get(0)));
+    // No DataFileColumnFamily included
+    assertEquals(Short.valueOf((short) 8), violations.get(1));
+
+    // Missing tables directory in path
+    m = new Mutation(new Text("0;foo"));
+    m.put(BulkFileColumnFamily.NAME, new 
Text("hdfs://nn1/a/accumulo/2b/t-001/C00.rf"),
+        new Value("5"));
+    violations = mc.check(createEnv(), m);
+    assertNotNull(violations);
+    assertEquals(2, violations.size());
+    assertEquals(Short.valueOf((short) 9), violations.get(0));
+    // No DataFileColumnFamily included
+    assertEquals(Short.valueOf((short) 8), violations.get(1));
+
+  }
+
+  @Test
+  public void testDataFileCheck() {
+    testFileMetadataValidation(DataFileColumnFamily.NAME, new DataFileValue(1, 
1).encodeAsValue());
+  }
+
+  @Test
+  public void testScanFileCheck() {
+    testFileMetadataValidation(ScanFileColumnFamily.NAME, new Value());
+  }
+
+  private void testFileMetadataValidation(Text columnFamily, Value value) {
+    MetadataConstraints mc = new TestMetadataConstraints();
+    Mutation m;
+    List<Short> violations;
+
+    // Missing beginning of path
+    m = new Mutation(new Text("0;foo"));
+    m.put(columnFamily, new Text("/someFile"), value);
+    violations = mc.check(createEnv(), m);
+    assertNotNull(violations);
+    assertEquals(1, violations.size());
+    assertEquals(Short.valueOf((short) 9), violations.get(0));
+    assertNotNull(mc.getViolationDescription(violations.get(0)));
+
+    // Missing tables directory in path
+    m = new Mutation(new Text("0;foo"));
+    m.put(columnFamily, new Text("hdfs://nn1/a/accumulo/2b/t-001/C00.rf"),
+        new DataFileValue(1, 1).encodeAsValue());
+    violations = mc.check(createEnv(), m);
+    assertNotNull(violations);
+    assertEquals(1, violations.size());
+    assertEquals(Short.valueOf((short) 9), violations.get(0));
+
+    // Should pass validation
+    m = new Mutation(new Text("0;foo"));
+    m.put(columnFamily, new 
Text("hdfs://nn1/a/accumulo/tables/2b/t-001/C00.rf"),
+        new DataFileValue(1, 1).encodeAsValue());
     violations = mc.check(createEnv(), m);
     assertNull(violations);
 
+    assertNotNull(mc.getViolationDescription((short) 9));
   }
 
 }

Reply via email to