This is an automated email from the ASF dual-hosted git repository.
rong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iotdb.git
The following commit(s) were added to refs/heads/master by this push:
new 736cdf7a56a Pipe: Add a tool for validating and repairing
isGeneratedByPipe mark in tsfile resources (#15934) (#15945)
736cdf7a56a is described below
commit 736cdf7a56a0fbe02ceedc8e85f56306b8b2cc77
Author: Steve Yurong Su <[email protected]>
AuthorDate: Wed Jul 16 10:29:03 2025 +0800
Pipe: Add a tool for validating and repairing isGeneratedByPipe mark in
tsfile resources (#15934) (#15945)
---
.../tools/tsfile/mark-is-generated-by-pipe.bat | 59 +++++
.../tools/tsfile/mark-is-generated-by-pipe.sh | 51 ++++
...GeneratedByPipeMarkValidationAndRepairTool.java | 263 +++++++++++++++++++++
3 files changed, 373 insertions(+)
diff --git
a/iotdb-core/datanode/src/assembly/resources/tools/tsfile/mark-is-generated-by-pipe.bat
b/iotdb-core/datanode/src/assembly/resources/tools/tsfile/mark-is-generated-by-pipe.bat
new file mode 100644
index 00000000000..7d2b867bba9
--- /dev/null
+++
b/iotdb-core/datanode/src/assembly/resources/tools/tsfile/mark-is-generated-by-pipe.bat
@@ -0,0 +1,59 @@
+@REM
+@REM Licensed to the Apache Software Foundation (ASF) under one
+@REM or more contributor license agreements. See the NOTICE file
+@REM distributed with this work for additional information
+@REM regarding copyright ownership. The ASF licenses this file
+@REM to you under the Apache License, Version 2.0 (the
+@REM "License"); you may not use this file except in compliance
+@REM with the License. You may obtain a copy of the License at
+@REM
+@REM http://www.apache.org/licenses/LICENSE-2.0
+@REM
+@REM Unless required by applicable law or agreed to in writing,
+@REM software distributed under the License is distributed on an
+@REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+@REM KIND, either express or implied. See the License for the
+@REM specific language governing permissions and limitations
+@REM under the License.
+@REM
+
+@echo off
+echo ````````````````````````````````````````````````````````````````````````
+echo Starting Validating the isGeneratedByPipe Mark in TsFile Resources
+echo ````````````````````````````````````````````````````````````````````````
+
+if "%OS%" == "Windows_NT" setlocal
+
+pushd %~dp0..\..
+if NOT DEFINED IOTDB_HOME set IOTDB_HOME=%CD%
+popd
+
+if NOT DEFINED MAIN_CLASS set
MAIN_CLASS=org.apache.iotdb.db.tools.validate.TsFileResourceIsGeneratedByPipeMarkValidationAndRepairTool
+if NOT DEFINED JAVA_HOME goto :err
+
+@REM
-----------------------------------------------------------------------------
+@REM ***** CLASSPATH library setting *****
+@REM Ensure that any user defined CLASSPATH variables are not used on startup
+set CLASSPATH="%IOTDB_HOME%\lib\*"
+
+goto okClasspath
+
+:append
+set CLASSPATH=%CLASSPATH%;%1
+goto :eof
+
+@REM
-----------------------------------------------------------------------------
+:okClasspath
+
+"%JAVA_HOME%\bin\java" -cp "%CLASSPATH%" %MAIN_CLASS% %*
+
+goto finally
+
+:err
+echo JAVA_HOME environment variable must be set!
+pause
+
+@REM
-----------------------------------------------------------------------------
+:finally
+
+ENDLOCAL
\ No newline at end of file
diff --git
a/iotdb-core/datanode/src/assembly/resources/tools/tsfile/mark-is-generated-by-pipe.sh
b/iotdb-core/datanode/src/assembly/resources/tools/tsfile/mark-is-generated-by-pipe.sh
new file mode 100644
index 00000000000..daabae671f3
--- /dev/null
+++
b/iotdb-core/datanode/src/assembly/resources/tools/tsfile/mark-is-generated-by-pipe.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+echo
------------------------------------------------------------------------------------
+echo Starting Validating the isGeneratedByPipe Mark in TsFile Resources
+echo
------------------------------------------------------------------------------------
+
+source "$(dirname "$0")/../../sbin/iotdb-common.sh"
+#get_iotdb_include and checkAllVariables is in iotdb-common.sh
+VARS=$(get_iotdb_include "$*")
+checkAllVariables
+export IOTDB_HOME="${IOTDB_HOME}/.."
+eval set -- "$VARS"
+
+if [ -n "$JAVA_HOME" ]; then
+ for java in "$JAVA_HOME"/bin/amd64/java "$JAVA_HOME"/bin/java; do
+ if [ -x "$java" ]; then
+ JAVA="$java"
+ break
+ fi
+ done
+else
+ JAVA=java
+fi
+
+CLASSPATH=""
+for f in ${IOTDB_HOME}/lib/*.jar; do
+ CLASSPATH=${CLASSPATH}":"$f
+done
+
+MAIN_CLASS=org.apache.iotdb.db.tools.validate.TsFileResourceIsGeneratedByPipeMarkValidationAndRepairTool
+
+"$JAVA" -cp "$CLASSPATH" "$MAIN_CLASS" "$@"
+exit $?
diff --git
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/tools/validate/TsFileResourceIsGeneratedByPipeMarkValidationAndRepairTool.java
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/tools/validate/TsFileResourceIsGeneratedByPipeMarkValidationAndRepairTool.java
new file mode 100644
index 00000000000..e14f7e81d05
--- /dev/null
+++
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/tools/validate/TsFileResourceIsGeneratedByPipeMarkValidationAndRepairTool.java
@@ -0,0 +1,263 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.db.tools.validate;
+
+import org.apache.iotdb.db.storageengine.dataregion.tsfile.TsFileResource;
+
+import org.apache.tsfile.common.constant.TsFileConstant;
+import org.slf4j.Logger;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+import java.util.Set;
+import java.util.concurrent.ConcurrentSkipListSet;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
+
+public class TsFileResourceIsGeneratedByPipeMarkValidationAndRepairTool {
+
+ private static final Logger LOGGER =
+ org.slf4j.LoggerFactory.getLogger(
+ TsFileResourceIsGeneratedByPipeMarkValidationAndRepairTool.class);
+
+ private static final String USAGE =
+ "Usage: --expected true|false --dirs <dir1> <dir2> ...\n"
+ + " --expected: whether the TsFileResource is expected to be
generated by pipe\n"
+ + " --dirs: list of data directories to validate and repair";
+
+ private static final Set<File> dataDirs = new ConcurrentSkipListSet<>();
+ private static final AtomicBoolean expectedMark = new AtomicBoolean(true);
+
+ private static final AtomicLong runtime = new
AtomicLong(System.currentTimeMillis());
+
+ private static final AtomicInteger totalTsFileNum = new AtomicInteger(0);
+ private static final AtomicInteger toRepairTsFileNum = new AtomicInteger(0);
+
+ // Usage: --expected true|false --dirs <dir1> <dir2> ...
+ public static void main(String[] args) throws IOException {
+ parseCommandLineArgs(args);
+ final List<File> partitionDirs = findAllPartitionDirs();
+ partitionDirs.parallelStream()
+ .forEach(
+ TsFileResourceIsGeneratedByPipeMarkValidationAndRepairTool
+ ::validateAndRepairTsFileResourcesInPartition);
+ printStatistics();
+ }
+
+ private static void parseCommandLineArgs(final String[] args) {
+ final Set<String> argSet =
+ new ConcurrentSkipListSet<>(
+ args.length > 0 ? Arrays.asList(args) : Collections.emptyList());
+ if (args.length == 0
+ || argSet.contains("--help")
+ || argSet.contains("-h")
+ || !(argSet.contains("--expected") && argSet.contains("--dirs"))) {
+ LOGGER.info(USAGE);
+ System.exit(1);
+ }
+
+ for (int i = 0; i < args.length; i++) {
+ if ("--expected".equals(args[i]) && i + 1 < args.length) {
+ expectedMark.set(Boolean.parseBoolean(args[++i]));
+ } else if ("--dirs".equals(args[i]) && i + 1 < args.length) {
+ i++;
+ while (i < args.length && !args[i].startsWith("--")) {
+ dataDirs.add(new File(args[i++]));
+ }
+ i--;
+ } else {
+ LOGGER.info("Unknown argument: {}", args[i]);
+ LOGGER.info(USAGE);
+ // Exit if an unknown argument is encountered
+ System.exit(1);
+ }
+ }
+
+ if (dataDirs.isEmpty()) {
+ LOGGER.info("No data directories provided. Please specify with --dirs
<dir1> <dir2> ...");
+ System.exit(1);
+ }
+
+ LOGGER.info("------------------------------------------------------");
+ LOGGER.info("Expected mark: {}", expectedMark.get());
+ LOGGER.info("Data directories: ");
+ for (File dir : dataDirs) {
+ LOGGER.info(" {}", dir.getAbsolutePath());
+ }
+ LOGGER.info("------------------------------------------------------");
+ }
+
+ private static List<File> findAllPartitionDirs() {
+ final List<File> partitionDirs = new ArrayList<>();
+ for (final File dataDir : dataDirs) {
+ if (dataDir.exists() && dataDir.isDirectory()) {
+ partitionDirs.addAll(findLeafDirectories(dataDir));
+ }
+ }
+ return partitionDirs;
+ }
+
+ public static List<File> findLeafDirectories(File dir) {
+ List<File> leafDirectories = new ArrayList<>();
+
+ File[] files = dir.listFiles();
+
+ if (files == null || files.length == 0) {
+ leafDirectories.add(dir);
+ return leafDirectories;
+ }
+
+ for (File file : files) {
+ if (file.isDirectory()) {
+ leafDirectories.addAll(findLeafDirectories(file));
+ }
+ }
+
+ if (leafDirectories.isEmpty()) {
+ leafDirectories.add(dir);
+ }
+
+ return leafDirectories;
+ }
+
+ private static void validateAndRepairTsFileResourcesInPartition(final File
partitionDir) {
+ final AtomicInteger totalResources = new AtomicInteger();
+ final AtomicInteger toRepairResources = new AtomicInteger();
+
+ try {
+ final List<TsFileResource> resources =
+ loadAllTsFileResources(Collections.singletonList(partitionDir));
+ totalResources.addAndGet(resources.size());
+
+ for (final TsFileResource resource : resources) {
+ try {
+ if (validateAndRepairSingleTsFileResource(resource)) {
+ toRepairResources.incrementAndGet();
+ }
+ } catch (final Exception e) {
+ // Continue processing other resources even if one fails
+ LOGGER.warn(
+ "Error validating or repairing resource {}: {}",
+ resource.getTsFile().getAbsolutePath(),
+ e.getMessage(),
+ e);
+ }
+ }
+ } catch (final Exception e) {
+ LOGGER.warn(
+ "Error loading resources from partition {}: {}",
+ partitionDir.getAbsolutePath(),
+ e.getMessage(),
+ e);
+ }
+
+ totalTsFileNum.addAndGet(totalResources.get());
+ toRepairTsFileNum.addAndGet(toRepairResources.get());
+ LOGGER.info(
+ "TimePartition {} has {} total resources, {} to repair resources.
Process completed.",
+ partitionDir,
+ totalResources.get(),
+ toRepairResources.get());
+ }
+
+ private static List<TsFileResource> loadAllTsFileResources(List<File>
timePartitionDirs)
+ throws IOException {
+ final List<TsFileResource> resources = new ArrayList<>();
+
+ for (final File timePartitionDir : timePartitionDirs) {
+ for (final File tsfile :
Objects.requireNonNull(timePartitionDir.listFiles())) {
+ final String filePath = tsfile.getAbsolutePath();
+ if (!filePath.endsWith(TsFileConstant.TSFILE_SUFFIX) ||
!tsfile.isFile()) {
+ continue;
+ }
+ String resourcePath = tsfile.getAbsolutePath() +
TsFileResource.RESOURCE_SUFFIX;
+
+ if (!new File(resourcePath).exists()) {
+ LOGGER.info(
+ "{} is skipped because resource file is not exist.",
tsfile.getAbsolutePath());
+ continue;
+ }
+
+ TsFileResource resource = new TsFileResource(tsfile);
+ resource.deserialize();
+ resource.close();
+ resources.add(resource);
+ }
+ }
+
+ return resources;
+ }
+
+ /**
+ * Validates and repairs a single TsFileResource.
+ *
+ * @param resource the TsFileResource to validate and repair
+ * @return true if the resource needs to be repaired and false if it is valid
+ */
+ private static boolean validateAndRepairSingleTsFileResource(TsFileResource
resource) {
+ if (resource.isGeneratedByPipe() == expectedMark.get()) {
+ // The resource is valid, no need to repair
+ return false;
+ }
+
+ LOGGER.info(
+ "Repairing TsFileResource: {}, expected mark: {}, actual mark: {}",
+ resource.getTsFile().getAbsolutePath(),
+ expectedMark.get(),
+ resource.isGeneratedByPipe());
+
+ try {
+ repairSingleTsFileResource(resource);
+
+ LOGGER.info(
+ "Marked TsFileResource as {} in resource: {}",
+ expectedMark.get(),
+ resource.getTsFile().getAbsolutePath());
+ } catch (final Exception e) {
+ LOGGER.warn(
+ "ERROR: Failed to repair TsFileResource: {}, error: {}",
+ resource.getTsFile().getAbsolutePath(),
+ e.getMessage());
+ }
+
+ return true;
+ }
+
+ private static void repairSingleTsFileResource(TsFileResource resource)
throws IOException {
+ resource.setGeneratedByPipe(expectedMark.get());
+ resource.serialize();
+ }
+
+ private static void printStatistics() {
+ LOGGER.info("------------------------------------------------------");
+ LOGGER.info("Validation and repair completed. Statistics:");
+ LOGGER.info(
+ "Total time taken: {} ms, total TsFile resources: {}, repaired TsFile
resources: {}",
+ System.currentTimeMillis() - runtime.get(),
+ totalTsFileNum.get(),
+ toRepairTsFileNum.get());
+ }
+}