DomGarguilo commented on code in PR #3435:
URL: https://github.com/apache/accumulo/pull/3435#discussion_r1210556193


##########
test/src/main/java/org/apache/accumulo/test/functional/FileNormalizationIT.java:
##########
@@ -0,0 +1,243 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.accumulo.test.functional;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.accumulo.core.client.Accumulo;
+import org.apache.accumulo.core.client.AccumuloClient;
+import org.apache.accumulo.core.client.MutationsRejectedException;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.admin.CompactionConfig;
+import org.apache.accumulo.core.client.admin.NewTableConfiguration;
+import org.apache.accumulo.core.conf.Property;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.TableId;
+import org.apache.accumulo.core.dataImpl.KeyExtent;
+import org.apache.accumulo.core.metadata.MetadataTable;
+import 
org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.DataFileColumnFamily;
+import org.apache.accumulo.core.security.TablePermission;
+import org.apache.accumulo.harness.SharedMiniClusterBase;
+import org.apache.accumulo.test.TestIngest;
+import org.apache.accumulo.test.VerifyIngest;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Text;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * These test check that Accumulo handles misnormalized files in the metadata 
table correctly. If
+ * Accumulo code reads a misnormalized file from the metadata table, 
normalizes it, and then tries
+ * to update the metadata table then the key will not match. The mismatch 
could result in duplicate
+ * entries.
+ */
+public class FileNormalizationIT extends SharedMiniClusterBase {
+
+  private static final Logger log = 
LoggerFactory.getLogger(FileNormalizationIT.class);
+
+  @BeforeAll
+  public static void setup() throws Exception {
+    SharedMiniClusterBase.startMiniCluster();
+  }
+
+  @AfterAll
+  public static void teardown() {
+    SharedMiniClusterBase.stopMiniCluster();
+  }
+
+  @Test
+  public void testSplits() throws Exception {
+    try (AccumuloClient client = 
Accumulo.newClient().from(getClientProps()).build()) {
+      String table = getUniqueNames(1)[0];
+
+      client.tableOperations().create(table);
+      VerifyIngest.VerifyParams params =
+          new VerifyIngest.VerifyParams(getClientProps(), table, 100_000);
+      TestIngest.ingest(client, params);
+
+      client.tableOperations().flush(table, null, null, true);
+
+      misnormalizeFiles(client, table);
+
+      var splits = TestIngest.getSplitPoints(params.startRow, params.startRow 
+ params.rows, 2);
+      assertEquals(1, splits.size());
+
+      client.tableOperations().addSplits(table, splits);
+
+      HashSet<String> paths = new HashSet<>();
+
+      // uncomment verify and test will hang here because tablet server dies 
w/ uncaught exception
+      // in AssignmentHandler
+      //
+      // VerifyIngest.verifyIngest(client, params);
+
+      try (var scanner = createMetadataFileScanner(client, table)) {
+        scanner.forEach((k, v) -> {
+          var row = k.getRowData().toString();
+          var qual = k.getColumnQualifierData().toString();
+          var path = new Path(qual).toString();
+          var rowPath = row + "+" + path;
+
+          log.debug("split test, inspecting {} {} {}", row, qual, v);
+
+          assertFalse(paths.contains(rowPath),
+              "Tablet " + row + " has duplicate normalized path " + path);
+          paths.add(rowPath);
+        });
+      }
+
+      VerifyIngest.verifyIngest(client, params);
+    }
+  }
+
+  @Test
+  public void testCompaction() throws Exception {
+    try (AccumuloClient client = 
Accumulo.newClient().from(getClientProps()).build()) {
+      String table = getUniqueNames(1)[0];
+
+      client.tableOperations().create(table);
+      VerifyIngest.VerifyParams params =
+          new VerifyIngest.VerifyParams(getClientProps(), table, 100_000);
+      TestIngest.ingest(client, params);
+
+      client.tableOperations().flush(table, null, null, true);
+
+      misnormalizeFiles(client, table);
+
+      client.tableOperations().compact(table, new 
CompactionConfig().setWait(true));
+
+      try (var scanner = createMetadataFileScanner(client, table)) {
+        Set<String> filenames = new HashSet<>();
+
+        scanner.forEach((k, v) -> {
+          var path = new Path(k.getColumnQualifierData().toString());
+          assertFalse(filenames.contains(path.getName()));
+          assertTrue(path.getName().startsWith("A"));
+          filenames.add(path.getName());
+        });
+
+        assertEquals(1, filenames.size());
+
+        VerifyIngest.verifyIngest(client, params);
+      }
+    }
+  }
+
+  @Test
+  public void testMerge() throws Exception {
+    try (AccumuloClient client = 
Accumulo.newClient().from(getClientProps()).build()) {
+      String table = getUniqueNames(1)[0];
+
+      // disable compactions
+      Map<String,String> props = Map.of(Property.TABLE_MAJC_RATIO.getKey(), 
"10");
+      client.tableOperations().create(table, new 
NewTableConfiguration().setProperties(props));
+      VerifyIngest.VerifyParams params =
+          new VerifyIngest.VerifyParams(getClientProps(), table, 100_000);
+      TestIngest.ingest(client, params);
+
+      client.tableOperations().flush(table, null, null, true);
+
+      var splits = TestIngest.getSplitPoints(params.startRow, params.startRow 
+ params.rows, 4);
+      assertEquals(3, splits.size());
+
+      client.tableOperations().addSplits(table, splits);
+
+      // create a unique file per tablet
+      client.tableOperations().compact(table, new 
CompactionConfig().setWait(true));
+
+      misnormalizeFiles(client, table);
+
+      Set<String> filesBeforeMerge = new HashSet<>();
+      try (var scanner = createMetadataFileScanner(client, table)) {
+        scanner.forEach((k, v) -> {
+          var qual = k.getColumnQualifierData().toString();
+          assertTrue(qual.contains("//tables//"));
+          filesBeforeMerge.add(qual);
+        });
+      }
+
+      // expect 4 files. one for each tablet
+      assertEquals(4, filesBeforeMerge.size());
+
+      client.tableOperations().merge(table, null, null);
+
+      Set<String> filesAfterMerge = new HashSet<>();
+      try (var scanner = createMetadataFileScanner(client, table)) {
+        scanner.forEach((k, v) -> {
+          // should only see the default tablet
+          assertTrue(k.getRow().toString().endsWith("<"));
+          filesAfterMerge.add(k.getColumnQualifierData().toString());
+        });
+      }
+
+      assertEquals(0, client.tableOperations().listSplits(table).size());
+      assertEquals(filesBeforeMerge, filesAfterMerge);
+
+      VerifyIngest.verifyIngest(client, params);
+    }
+  }
+
+  private Scanner createMetadataFileScanner(AccumuloClient client, String 
table) throws Exception {
+    var scanner = client.createScanner(MetadataTable.NAME);
+    var tableId = TableId.of(client.tableOperations().tableIdMap().get(table));
+    var range = new KeyExtent(tableId, null, null).toMetaRange();
+    scanner.setRange(range);
+    scanner.fetchColumnFamily(DataFileColumnFamily.NAME);
+    return scanner;
+  }
+
+  private Set<String> misnormalizeFiles(AccumuloClient client, String table) 
throws Exception {

Review Comment:
   It doesn't look like the return value is ever used. Can this method be 
changes to `void`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to