This is an automated email from the ASF dual-hosted git repository. xiangweiwei pushed a commit to branch rewriteTool in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit 929f0c4e3bc5e5ab62039a634b659c5a813f2995 Author: Alima777 <[email protected]> AuthorDate: Wed May 25 16:24:21 2022 +0800 add rewrite tool --- .../org/apache/iotdb/db/qp/sql/SqlLexer.tokens | 253 ++++++++++++++ distribution/pom.xml | 5 +- distribution/src/assembly/rewriteBadFile.xml | 49 +++ pom.xml | 1 + rewriteBadFile/pom.xml | 62 ++++ .../resources/sbin/rewrite-badFile-tool.sh | 48 +++ .../src/assembly/resources/sbin/validate-tsfile.sh | 48 +++ rewriteBadFile/src/assembly/rewriteBadFile.xml | 40 +++ .../java/org/apache/iotdb/RewriteBadFileTool.java | 310 +++++++++++++++++ .../org/apache/iotdb/TsFileValidationTool.java | 379 +++++++++++++++++++++ .../iotdb/tsfile/file/metadata/ChunkMetadata.java | 2 +- 11 files changed, 1194 insertions(+), 3 deletions(-) diff --git a/antlr/src/main/antlr4/org/apache/iotdb/db/qp/sql/SqlLexer.tokens b/antlr/src/main/antlr4/org/apache/iotdb/db/qp/sql/SqlLexer.tokens new file mode 100644 index 0000000000..bc327191f9 --- /dev/null +++ b/antlr/src/main/antlr4/org/apache/iotdb/db/qp/sql/SqlLexer.tokens @@ -0,0 +1,253 @@ +WS=1 +ADD=2 +AFTER=3 +ALIAS=4 +ALIGN=5 +ALIGNED=6 +ALL=7 +ALTER=8 +ANY=9 +APPEND=10 +AS=11 +ASC=12 +ATTRIBUTES=13 +AUTOREGISTER=14 +BEFORE=15 +BEGIN=16 +BOUNDARY=17 +BY=18 +CACHE=19 +CHILD=20 +CLEAR=21 +COMPRESSION=22 +COMPRESSOR=23 +CONCAT=24 +CONFIGURATION=25 +CONTINUOUS=26 +COUNT=27 +CONTAIN=28 +CQ=29 +CQS=30 +CREATE=31 +DATATYPE=32 +DEBUG=33 +DELETE=34 +DESC=35 +DESCRIBE=36 +DEVICE=37 +DEVICES=38 +DISABLE=39 +DROP=40 +ENCODING=41 +END=42 +EVERY=43 +EXPLAIN=44 +FILL=45 +FLUSH=46 +FOR=47 +FROM=48 +FULL=49 +FUNCTION=50 +FUNCTIONS=51 +GLOBAL=52 +GRANT=53 +GROUP=54 +INDEX=55 +INFO=56 +INSERT=57 +INTO=58 +KILL=59 +LABEL=60 +LAST=61 +LATEST=62 +LEVEL=63 +LIKE=64 +LIMIT=65 +LINEAR=66 +LINK=67 +LIST=68 +LOAD=69 +LOCK=70 +MERGE=71 +METADATA=72 +NODES=73 +NOW=74 +OF=75 +OFF=76 +OFFSET=77 +ON=78 +ORDER=79 +PARTITION=80 +PASSWORD=81 +PATHS=82 +PIPE=83 +PIPES=84 +PIPESERVER=85 +PIPESINK=86 +PIPESINKS=87 +PIPESINKTYPE=88 +PREVIOUS=89 +PREVIOUSUNTILLAST=90 +PRIVILEGES=91 +PROCESSLIST=92 +PROPERTY=93 +PRUNE=94 +QUERIES=95 +QUERY=96 +READONLY=97 +REGEXP=98 +REMOVE=99 +RENAME=100 +RESAMPLE=101 +RESOURCE=102 +REVOKE=103 +ROLE=104 +ROOT=105 +SCHEMA=106 +SELECT=107 +SET=108 +SETTLE=109 +SGLEVEL=110 +SHOW=111 +SLIMIT=112 +SOFFSET=113 +STORAGE=114 +START=115 +STOP=116 +SYSTEM=117 +TAGS=118 +TASK=119 +TEMPLATE=120 +TEMPLATES=121 +TIME=122 +TIMESERIES=123 +TIMESTAMP=124 +TO=125 +TOLERANCE=126 +TOP=127 +TRACING=128 +TRIGGER=129 +TRIGGERS=130 +TTL=131 +UNLINK=132 +UNLOAD=133 +UNSET=134 +UPDATE=135 +UPSERT=136 +USER=137 +USING=138 +VALUES=139 +VERIFY=140 +VERSION=141 +WATERMARK_EMBEDDING=142 +WHERE=143 +WITH=144 +WITHOUT=145 +WRITABLE=146 +DATATYPE_VALUE=147 +BOOLEAN=148 +DOUBLE=149 +FLOAT=150 +INT32=151 +INT64=152 +TEXT=153 +ENCODING_VALUE=154 +DICTIONARY=155 +DIFF=156 +GORILLA=157 +PLAIN=158 +REGULAR=159 +RLE=160 +TS_2DIFF=161 +ZIGZAG=162 +FREQ=163 +COMPRESSOR_VALUE=164 +GZIP=165 +LZ4=166 +SNAPPY=167 +UNCOMPRESSED=168 +PRIVILEGE_VALUE=169 +SET_STORAGE_GROUP=170 +DELETE_STORAGE_GROUP=171 +CREATE_TIMESERIES=172 +INSERT_TIMESERIES=173 +READ_TIMESERIES=174 +DELETE_TIMESERIES=175 +CREATE_USER=176 +DELETE_USER=177 +MODIFY_PASSWORD=178 +LIST_USER=179 +GRANT_USER_PRIVILEGE=180 +REVOKE_USER_PRIVILEGE=181 +GRANT_USER_ROLE=182 +REVOKE_USER_ROLE=183 +CREATE_ROLE=184 +DELETE_ROLE=185 +LIST_ROLE=186 +GRANT_ROLE_PRIVILEGE=187 +REVOKE_ROLE_PRIVILEGE=188 +CREATE_FUNCTION=189 +DROP_FUNCTION=190 +CREATE_TRIGGER=191 +DROP_TRIGGER=192 +START_TRIGGER=193 +STOP_TRIGGER=194 +CREATE_CONTINUOUS_QUERY=195 +DROP_CONTINUOUS_QUERY=196 +SCHEMA_REPLICATION_FACTOR=197 +DATA_REPLICATION_FACTOR=198 +TIME_PARTITION_INTERVAL=199 +MINUS=200 +PLUS=201 +DIV=202 +MOD=203 +OPERATOR_DEQ=204 +OPERATOR_SEQ=205 +OPERATOR_GT=206 +OPERATOR_GTE=207 +OPERATOR_LT=208 +OPERATOR_LTE=209 +OPERATOR_NEQ=210 +OPERATOR_IN=211 +OPERATOR_AND=212 +OPERATOR_OR=213 +OPERATOR_NOT=214 +OPERATOR_CONTAINS=215 +DOT=216 +COMMA=217 +SEMI=218 +STAR=219 +DOUBLE_STAR=220 +LR_BRACKET=221 +RR_BRACKET=222 +LS_BRACKET=223 +RS_BRACKET=224 +STRING_LITERAL=225 +DURATION_LITERAL=226 +DATETIME_LITERAL=227 +INTEGER_LITERAL=228 +EXPONENT_NUM_PART=229 +BOOLEAN_LITERAL=230 +NULL_LITERAL=231 +NAN_LITERAL=232 +ID=233 +QUOTED_ID=234 +'-'=200 +'+'=201 +'/'=202 +'%'=203 +'=='=204 +'='=205 +'>'=206 +'>='=207 +'<'=208 +'<='=209 +'.'=216 +','=217 +';'=218 +'*'=219 +'**'=220 +'('=221 +')'=222 +'['=223 +']'=224 diff --git a/distribution/pom.xml b/distribution/pom.xml index 67bb6467c7..66c7816e6c 100644 --- a/distribution/pom.xml +++ b/distribution/pom.xml @@ -52,6 +52,7 @@ <descriptor>src/assembly/cli.xml</descriptor> <descriptor>src/assembly/grafana.xml</descriptor> <descriptor>src/assembly/client-cpp.xml</descriptor> + <descriptor>src/assembly/rewriteBadFile.xml</descriptor> </descriptors> <finalName>apache-iotdb-${project.version}</finalName> </configuration> @@ -59,8 +60,8 @@ </executions> </plugin> <!-- - Create SHA512 checksum files for the release artifacts. - --> +Create SHA512 checksum files for the release artifacts. +--> <plugin> <groupId>net.nicoulaj.maven.plugins</groupId> <artifactId>checksum-maven-plugin</artifactId> diff --git a/distribution/src/assembly/rewriteBadFile.xml b/distribution/src/assembly/rewriteBadFile.xml new file mode 100644 index 0000000000..faddb91ee1 --- /dev/null +++ b/distribution/src/assembly/rewriteBadFile.xml @@ -0,0 +1,49 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +--> +<assembly> + <id>cli-bin</id> + <formats> + <format>dir</format> + <format>zip</format> + </formats> + <baseDirectory>apache-iotdb-${project.version}-rewriteBadFile-bin</baseDirectory> + <dependencySets> + <dependencySet> + <includes> + <include>*:iotdb-rewriteBadFile:zip</include> + </includes> + <outputDirectory>${file.separator}</outputDirectory> + <outputFileNameMapping>${artifact.artifactId}.${artifact.extension}</outputFileNameMapping> + <unpack>true</unpack> + </dependencySet> + </dependencySets> + <fileSets> + <fileSet> + <outputDirectory>sbin</outputDirectory> + <directory>${maven.multiModuleProjectDirectory}/rewriteBadFile/src/assembly/resources/sbin</directory> + <fileMode>0755</fileMode> + </fileSet> + </fileSets> + <componentDescriptors> + <componentDescriptor>common-files.xml</componentDescriptor> + </componentDescriptors> +</assembly> diff --git a/pom.xml b/pom.xml index 5318a29530..7e0242c8b6 100644 --- a/pom.xml +++ b/pom.xml @@ -104,6 +104,7 @@ <module>client-py</module> <module>compile-tools</module> <module>client-cpp</module> + <module>rewriteBadFile</module> </modules> <!-- Properties Management --> <properties> diff --git a/rewriteBadFile/pom.xml b/rewriteBadFile/pom.xml new file mode 100644 index 0000000000..7953a5e9e5 --- /dev/null +++ b/rewriteBadFile/pom.xml @@ -0,0 +1,62 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>iotdb-parent</artifactId> + <groupId>org.apache.iotdb</groupId> + <version>0.12.6-SNAPSHOT</version> + </parent> + <modelVersion>4.0.0</modelVersion> + <artifactId>rewriteBadFile</artifactId> + <dependencies> + <dependency> + <groupId>org.apache.iotdb</groupId> + <artifactId>iotdb-server</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>org.apache.iotdb</groupId> + <artifactId>tsfile</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>org.apache.iotdb</groupId> + <artifactId>iotdb-session</artifactId> + <version>${project.version}</version> + </dependency> + </dependencies> + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-assembly-plugin</artifactId> + <version>${maven.assembly.version}</version> + <executions> + <!-- Package binaries--> + <execution> + <id>client-assembly</id> + <phase>package</phase> + <goals> + <goal>single</goal> + </goals> + <configuration> + <descriptors> + <descriptor>src/assembly/rewriteBadFile.xml</descriptor> + </descriptors> + <appendAssemblyId>false</appendAssemblyId> + <archive> + <manifest> + <addDefaultImplementationEntries>true</addDefaultImplementationEntries> + <addDefaultSpecificationEntries>true</addDefaultSpecificationEntries> + </manifest> + </archive> + </configuration> + </execution> + </executions> + </plugin> + </plugins> + </build> + <properties> + <maven.compiler.source>11</maven.compiler.source> + <maven.compiler.target>11</maven.compiler.target> + </properties> +</project> diff --git a/rewriteBadFile/src/assembly/resources/sbin/rewrite-badFile-tool.sh b/rewriteBadFile/src/assembly/resources/sbin/rewrite-badFile-tool.sh new file mode 100644 index 0000000000..4f0d45c931 --- /dev/null +++ b/rewriteBadFile/src/assembly/resources/sbin/rewrite-badFile-tool.sh @@ -0,0 +1,48 @@ +#!/bin/sh +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +echo --------------------- +echo Starting Rewriting the bad TsFiles +echo --------------------- + +if [ -z "${IOTDB_HOME}" ]; then + export IOTDB_HOME="$(cd "`dirname "$0"`"/..; pwd)" +fi + +if [ -n "$JAVA_HOME" ]; then + for java in "$JAVA_HOME"/bin/amd64/java "$JAVA_HOME"/bin/java; do + if [ -x "$java" ]; then + JAVA="$java" + break + fi + done +else + JAVA=java +fi + +CLASSPATH="" +for f in ${IOTDB_HOME}/lib/*.jar; do + CLASSPATH=${CLASSPATH}":"$f +done + +MAIN_CLASS=org.apache.iotdb.RewriteBadFileTool + +"$JAVA" -cp "$CLASSPATH" "$MAIN_CLASS" "$@" +exit $? diff --git a/rewriteBadFile/src/assembly/resources/sbin/validate-tsfile.sh b/rewriteBadFile/src/assembly/resources/sbin/validate-tsfile.sh new file mode 100644 index 0000000000..db50b91400 --- /dev/null +++ b/rewriteBadFile/src/assembly/resources/sbin/validate-tsfile.sh @@ -0,0 +1,48 @@ +#!/bin/sh +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +echo --------------------- +echo Starting Validating the TsFile +echo --------------------- + +if [ -z "${IOTDB_HOME}" ]; then + export IOTDB_HOME="$(cd "`dirname "$0"`"/..; pwd)" +fi + +if [ -n "$JAVA_HOME" ]; then + for java in "$JAVA_HOME"/bin/amd64/java "$JAVA_HOME"/bin/java; do + if [ -x "$java" ]; then + JAVA="$java" + break + fi + done +else + JAVA=java +fi + +CLASSPATH="" +for f in ${IOTDB_HOME}/lib/*.jar; do + CLASSPATH=${CLASSPATH}":"$f +done + +MAIN_CLASS=org.apache.iotdb.TsFileValidationTool + +"$JAVA" -cp "$CLASSPATH" "$MAIN_CLASS" "$@" +exit $? \ No newline at end of file diff --git a/rewriteBadFile/src/assembly/rewriteBadFile.xml b/rewriteBadFile/src/assembly/rewriteBadFile.xml new file mode 100644 index 0000000000..dfbd548135 --- /dev/null +++ b/rewriteBadFile/src/assembly/rewriteBadFile.xml @@ -0,0 +1,40 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +--> +<assembly> + <id>rewriteBadFile</id> + <formats> + <format>dir</format> + <format>zip</format> + </formats> + <includeBaseDirectory>false</includeBaseDirectory> + <dependencySets> + <dependencySet> + <outputDirectory>lib</outputDirectory> + </dependencySet> + </dependencySets> + <fileSets> + <fileSet> + <directory>src/assembly/resources</directory> + <outputDirectory>${file.separator}</outputDirectory> + </fileSet> + </fileSets> +</assembly> diff --git a/rewriteBadFile/src/main/java/org/apache/iotdb/RewriteBadFileTool.java b/rewriteBadFile/src/main/java/org/apache/iotdb/RewriteBadFileTool.java new file mode 100644 index 0000000000..9a75e529c6 --- /dev/null +++ b/rewriteBadFile/src/main/java/org/apache/iotdb/RewriteBadFileTool.java @@ -0,0 +1,310 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iotdb; + +import org.apache.iotdb.db.engine.modification.Deletion; +import org.apache.iotdb.db.engine.modification.Modification; +import org.apache.iotdb.db.engine.modification.ModificationFile; +import org.apache.iotdb.db.engine.storagegroup.TsFileResource; +import org.apache.iotdb.db.exception.metadata.IllegalPathException; +import org.apache.iotdb.db.metadata.PartialPath; +import org.apache.iotdb.rpc.IoTDBConnectionException; +import org.apache.iotdb.rpc.StatementExecutionException; +import org.apache.iotdb.session.Session; +import org.apache.iotdb.tsfile.common.conf.TSFileConfig; +import org.apache.iotdb.tsfile.common.conf.TSFileDescriptor; +import org.apache.iotdb.tsfile.encoding.decoder.Decoder; +import org.apache.iotdb.tsfile.file.MetaMarker; +import org.apache.iotdb.tsfile.file.header.ChunkGroupHeader; +import org.apache.iotdb.tsfile.file.header.ChunkHeader; +import org.apache.iotdb.tsfile.file.header.PageHeader; +import org.apache.iotdb.tsfile.file.metadata.ChunkMetadata; +import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; +import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding; +import org.apache.iotdb.tsfile.fileSystem.FSFactoryProducer; +import org.apache.iotdb.tsfile.fileSystem.fsFactory.FSFactory; +import org.apache.iotdb.tsfile.read.TsFileSequenceReader; +import org.apache.iotdb.tsfile.read.common.BatchData; +import org.apache.iotdb.tsfile.read.common.TimeRange; +import org.apache.iotdb.tsfile.read.reader.page.PageReader; +import org.apache.iotdb.tsfile.write.record.Tablet; +import org.apache.iotdb.tsfile.write.schema.MeasurementSchema; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.PrintWriter; +import java.nio.ByteBuffer; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; + +public class RewriteBadFileTool { + // backup data dir path + private static String backUpDirPath = "backup"; + // validation file path + private static String validationFilePath = "TsFile_validation_view.txt"; + // output file path + private static String outputLogFilePath = "TsFile_rewrite_view.txt"; + // whether moving the data files + private static boolean moveFile = true; + + private static final String HostIP = "localhost"; + private static final String rpcPort = "6667"; + private static final String user = "root"; + private static final String password = "root"; + + private static final FSFactory fsFactory = FSFactoryProducer.getFSFactory(); + + private static PrintWriter pw; + + /** + * -b=[path of backUp directory] -v=[path of validation file] -o=[path of output log] -m=[whether + * move the file] + */ + public static void main(String[] args) throws IOException { + if (!checkArgs(args)) { + System.exit(1); + } + pw = new PrintWriter(new FileWriter(outputLogFilePath)); + try { + if (moveFile) { + moveBadFileToBackUp(); + } else { + rewriteAllBadFiles(); + } + } catch (IoTDBConnectionException | IOException e) { + e.printStackTrace(); + } finally { + pw.close(); + } + } + + public static void moveBadFileToBackUp() throws IOException { + printBoth("Start moving bad files to backup dir."); + BufferedReader bufferedReader = new BufferedReader(new FileReader(validationFilePath)); + String line; + while ((line = bufferedReader.readLine()) != null) { + if (!line.startsWith("-- Find the bad file ")) { + continue; + } + String badFilePath = line.replace("-- Find the bad file ", ""); + String targetFilePath = + backUpDirPath + File.separator + "sequence" + badFilePath.split("sequence")[1]; + File targetFile = new File(targetFilePath); + if (!targetFile.getParentFile().exists()) { + targetFile.getParentFile().mkdir(); + } + // move tsfile + fsFactory.moveFile(new File(badFilePath), targetFile); + // move resource file + fsFactory.moveFile( + new File(badFilePath + TsFileResource.RESOURCE_SUFFIX), + new File(targetFilePath + TsFileResource.RESOURCE_SUFFIX)); + // move mods file + File modsFile = new File(badFilePath + ModificationFile.FILE_SUFFIX); + if (modsFile.exists()) { + fsFactory.moveFile(modsFile, new File(targetFilePath + ModificationFile.FILE_SUFFIX)); + } + } + bufferedReader.close(); + printBoth("Finish moving all bad files to backup dir."); + } + + private static void rewriteAllBadFiles() throws IOException, IoTDBConnectionException { + printBoth("Start rewriting bad files to iotdb."); + Session session = new Session(HostIP, rpcPort, user, password); + session.open(false); + BufferedReader bufferedReader = new BufferedReader(new FileReader(validationFilePath)); + String line; + while ((line = bufferedReader.readLine()) != null) { + if (!line.startsWith("-- Find the bad file ")) { + continue; + } + String badFilePath = line.replace("-- Find the bad file ", ""); + String targetFilePath = + backUpDirPath + File.separator + "sequence" + badFilePath.split("sequence")[1]; + try { + if (new File(targetFilePath).exists()) { + rewriteWrongTsFile(targetFilePath, session); + } else { + printBoth("---- Meet error in rewriting, " + targetFilePath + " does not exist."); + } + } catch (Throwable e) { + e.printStackTrace(); + printBoth("---- Meet error in rewriting " + targetFilePath + ", " + e.getMessage()); + } + } + session.close(); + printBoth("Finish rewriting all bad files to iotdb."); + } + + public static void rewriteWrongTsFile(String filename, Session session) + throws IoTDBConnectionException, StatementExecutionException, IOException, + IllegalPathException { + // read mods file + List<Modification> modifications = null; + if (FSFactoryProducer.getFSFactory() + .getFile(filename + ModificationFile.FILE_SUFFIX) + .exists()) { + modifications = + (List<Modification>) + new ModificationFile(filename + ModificationFile.FILE_SUFFIX).getModifications(); + } + + try (TsFileSequenceReader reader = new TsFileSequenceReader(filename)) { + // Sequential reading of one ChunkGroup now follows this order: + // first the CHUNK_GROUP_HEADER, then SeriesChunks (headers and data) in one ChunkGroup + // Because we do not know how many chunks a ChunkGroup may have, we should read one byte (the + // marker) ahead and judge accordingly. + reader.position((long) TSFileConfig.MAGIC_STRING.getBytes().length + 1); + byte marker; + String curDevice = null; + long chunkHeaderOffset = -1; + while ((marker = reader.readMarker()) != MetaMarker.SEPARATOR) { + switch (marker) { + case MetaMarker.CHUNK_HEADER: + case MetaMarker.ONLY_ONE_PAGE_CHUNK_HEADER: + chunkHeaderOffset = reader.position() - 1; + ChunkHeader header = reader.readChunkHeader(marker); + Decoder defaultTimeDecoder = + Decoder.getDecoderByType( + TSEncoding.valueOf(TSFileDescriptor.getInstance().getConfig().getTimeEncoder()), + TSDataType.INT64); + Decoder valueDecoder = + Decoder.getDecoderByType(header.getEncodingType(), header.getDataType()); + // 1. construct MeasurementSchema from chunkHeader + String measurement = header.getMeasurementID(); + MeasurementSchema measurementSchema = + new MeasurementSchema( + measurement, + header.getDataType(), + header.getEncodingType(), + header.getCompressionType()); + // 2. record data point of each measurement + int dataSize = header.getDataSize(); + while (dataSize > 0) { + valueDecoder.reset(); + PageHeader pageHeader = + reader.readPageHeader( + header.getDataType(), header.getChunkType() == MetaMarker.CHUNK_HEADER); + ByteBuffer pageData = reader.readPage(pageHeader, header.getCompressionType()); + PageReader reader1 = + new PageReader( + pageData, header.getDataType(), valueDecoder, defaultTimeDecoder, null); + // read delete time range from old modification file + List<TimeRange> deleteIntervalList = + getOldSortedDeleteIntervals( + curDevice, measurementSchema, chunkHeaderOffset, modifications); + reader1.setDeleteIntervalList(deleteIntervalList); + BatchData batchData = reader1.getAllSatisfiedPageData(); + int maxRow; + if (header.getChunkType() == MetaMarker.CHUNK_HEADER) { + maxRow = (int) pageHeader.getNumOfValues(); + } else { + maxRow = batchData.length(); + } + Tablet tablet = + new Tablet(curDevice, Collections.singletonList(measurementSchema), maxRow); + int rowIndex = 0; + while (batchData.hasCurrent()) { + tablet.addTimestamp(rowIndex, batchData.currentTime()); + tablet.addValue(measurement, rowIndex, batchData.currentValue()); + batchData.next(); + rowIndex++; + } + tablet.rowSize = rowIndex; + session.insertTablet(tablet); + dataSize -= pageHeader.getSerializedPageSize(); + } + break; + case MetaMarker.CHUNK_GROUP_HEADER: + ChunkGroupHeader chunkGroupHeader = reader.readChunkGroupHeader(); + curDevice = chunkGroupHeader.getDeviceID(); + break; + case MetaMarker.OPERATION_INDEX_RANGE: + reader.readPlanIndex(); + reader.getMinPlanIndex(); + reader.getMaxPlanIndex(); + break; + default: + MetaMarker.handleUnexpectedMarker(marker); + } + } + } + } + + private static boolean checkArgs(String[] args) { + if (args.length != 4) { + System.out.println( + "Param incorrect, -m=[need moving data file or not] -b=[path of backUp directory] -v=[path of validation file] -o=[path of output file]."); + return false; + } + for (String arg : args) { + if (arg.startsWith("-b")) { + backUpDirPath = arg.split("=")[1]; + } else if (arg.startsWith("-v")) { + validationFilePath = arg.split("=")[1]; + } else if (arg.startsWith("-o")) { + outputLogFilePath = arg.split("=")[1]; + } else if (arg.startsWith("-m")) { + moveFile = Boolean.parseBoolean(arg.split("=")[1]); + } else { + System.out.println( + "Param incorrect, -m=[need moving data file or not] -b=[path of backUp directory] -v=[path of validation file] -o=[path of output file]."); + return false; + } + } + return true; + } + + private static void printBoth(String msg) { + System.out.println(msg); + pw.println(msg); + } + + private static List<TimeRange> getOldSortedDeleteIntervals( + String deviceId, + MeasurementSchema schema, + long chunkHeaderOffset, + List<Modification> modifications) + throws IllegalPathException { + if (modifications != null && modifications.size() != 0) { + Iterator<Modification> modsIterator = modifications.listIterator(); + ChunkMetadata chunkMetadata = new ChunkMetadata(); + Deletion currentDeletion = null; + while (modsIterator.hasNext()) { + currentDeletion = (Deletion) modsIterator.next(); + // if deletion path match the chunkPath, then add the deletion to the list + if (currentDeletion + .getPath() + .matchFullPath(new PartialPath(deviceId + "." + schema.getMeasurementId())) + && currentDeletion.getFileOffset() > chunkHeaderOffset) { + chunkMetadata.insertIntoSortedDeletions( + currentDeletion.getStartTime(), currentDeletion.getEndTime()); + } + } + return chunkMetadata.getDeleteIntervalList(); + } + return null; + } +} diff --git a/rewriteBadFile/src/main/java/org/apache/iotdb/TsFileValidationTool.java b/rewriteBadFile/src/main/java/org/apache/iotdb/TsFileValidationTool.java new file mode 100644 index 0000000000..863c3c41ef --- /dev/null +++ b/rewriteBadFile/src/main/java/org/apache/iotdb/TsFileValidationTool.java @@ -0,0 +1,379 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iotdb; + +import org.apache.iotdb.db.engine.storagegroup.TsFileResource; +import org.apache.iotdb.tsfile.common.conf.TSFileConfig; +import org.apache.iotdb.tsfile.common.conf.TSFileDescriptor; +import org.apache.iotdb.tsfile.encoding.decoder.Decoder; +import org.apache.iotdb.tsfile.file.MetaMarker; +import org.apache.iotdb.tsfile.file.header.ChunkGroupHeader; +import org.apache.iotdb.tsfile.file.header.ChunkHeader; +import org.apache.iotdb.tsfile.file.header.PageHeader; +import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; +import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding; +import org.apache.iotdb.tsfile.read.TsFileSequenceReader; +import org.apache.iotdb.tsfile.read.common.BatchData; +import org.apache.iotdb.tsfile.read.reader.page.PageReader; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.PrintWriter; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import static org.apache.iotdb.tsfile.common.constant.TsFileConstant.TSFILE_SUFFIX; + +/** + * This tool can be used to check the correctness of tsfile and point out errors in specific + * timeseries or devices. The types of errors include the following: + * + * <p>Device overlap between files + * + * <p>Timeseries overlap between files + * + * <p>Timeseries overlap between chunks + * + * <p>Timeseries overlap between pages + * + * <p>Timeseries overlap within one page + */ +public class TsFileValidationTool { + // print detail type of overlap or not + private static boolean printDetails = false; + + // print to local file or not + private static boolean printToFile = false; + + private static String outFilePath = "TsFile_validation_view.txt"; + + private static PrintWriter pw = null; + + private static final Logger logger = LoggerFactory.getLogger(TsFileValidationTool.class); + private static final List<File> seqDataDirList = new ArrayList<>(); + private static final List<File> fileList = new ArrayList<>(); + private static int badFileNum = 0; + + /** + * The form of param is: [path of data dir or tsfile] [-pd = print details or not] [-f = path of + * outFile]. Eg: xxx/iotdb/data/data1 xxx/xxx.tsfile -pd=true -f=xxx/TsFile_validation_view.txt + * + * <p>The first parameter is required, the others are optional. + */ + public static void main(String[] args) throws IOException { + if (!checkArgs(args)) { + System.exit(1); + } + if (printToFile) { + pw = new PrintWriter(new FileWriter(outFilePath)); + } + printBoth("Start checking seq files ..."); + + // check tsfile, which will only check for correctness inside a single tsfile + for (File f : fileList) { + findUncorrectFiles(Collections.singletonList(f)); + } + + // check tsfiles in data dir, which will check for correctness inside one single tsfile and + // between files + for (File seqDataDir : seqDataDirList) { + // get sg data dirs + if (!checkIsDirectory(seqDataDir)) { + continue; + } + File[] sgDirs = seqDataDir.listFiles(); + for (File sgDir : Objects.requireNonNull(sgDirs)) { + if (!checkIsDirectory(sgDir)) { + continue; + } + printBoth("- Check files in storage group: " + sgDir.getAbsolutePath()); + // get data region dirs + File[] dataRegionDirs = sgDir.listFiles(); + for (File dataRegionDir : Objects.requireNonNull(dataRegionDirs)) { + if (!checkIsDirectory(dataRegionDir)) { + continue; + } + // get time partition dir + File[] timePartitionDirs = dataRegionDir.listFiles(); + for (File timePartitionDir : Objects.requireNonNull(timePartitionDirs)) { + if (!checkIsDirectory(timePartitionDir)) { + continue; + } + // get all seq files under the time partition dir + List<File> tsFiles = + Arrays.asList( + Objects.requireNonNull( + timePartitionDir.listFiles( + file -> file.getName().endsWith(TSFILE_SUFFIX)))); + // sort the seq files with timestamp + tsFiles.sort( + (f1, f2) -> + Long.compareUnsigned( + Long.parseLong(f1.getName().split("-")[0]), + Long.parseLong(f2.getName().split("-")[0]))); + findUncorrectFiles(tsFiles); + } + } + } + } + printBoth("Finish checking successfully, totally find " + badFileNum + " bad files."); + if (printToFile) { + pw.close(); + } + } + + private static void findUncorrectFiles(List<File> tsFiles) { + // measurementID -> [lastTime, endTimeInLastFile] + Map<String, long[]> measurementLastTime = new HashMap<>(); + // deviceID -> endTime, the endTime of device in the last seq file + Map<String, Long> deviceEndTime = new HashMap<>(); + + for (File tsFile : tsFiles) { + try { + TsFileResource resource = new TsFileResource(tsFile); + if (!new File(tsFile.getAbsolutePath() + TsFileResource.RESOURCE_SUFFIX).exists()) { + // resource file does not exist, tsfile may not be flushed yet + logger.warn( + "{} does not exist ,skip it.", + tsFile.getAbsolutePath() + TsFileResource.RESOURCE_SUFFIX); + continue; + } else { + resource.deserialize(); + } + boolean isBadFile = false; + try (TsFileSequenceReader reader = new TsFileSequenceReader(tsFile.getAbsolutePath())) { + // deviceID -> has checked overlap or not + Map<String, Boolean> hasCheckedDeviceOverlap = new HashMap<>(); + reader.position((long) TSFileConfig.MAGIC_STRING.getBytes().length + 1); + byte marker; + String deviceID = ""; + Map<String, boolean[]> hasMeasurementPrintedDetails = new HashMap<>(); + // measurementId -> lastChunkEndTime in current file + Map<String, Long> lashChunkEndTime = new HashMap<>(); + + // start reading data points in sequence + while ((marker = reader.readMarker()) != MetaMarker.SEPARATOR) { + switch (marker) { + case MetaMarker.CHUNK_HEADER: + case MetaMarker.ONLY_ONE_PAGE_CHUNK_HEADER: + ChunkHeader header = reader.readChunkHeader(marker); + if (header.getDataSize() == 0) { + // empty value chunk + break; + } + long currentChunkEndTime = Long.MIN_VALUE; + String measurementID = deviceID + "." + header.getMeasurementID(); + hasMeasurementPrintedDetails.computeIfAbsent(measurementID, k -> new boolean[4]); + measurementLastTime.computeIfAbsent( + measurementID, + k -> { + long[] arr = new long[2]; + Arrays.fill(arr, Long.MIN_VALUE); + return arr; + }); + Decoder defaultTimeDecoder = + Decoder.getDecoderByType( + TSEncoding.valueOf( + TSFileDescriptor.getInstance().getConfig().getTimeEncoder()), + TSDataType.INT64); + Decoder valueDecoder = + Decoder.getDecoderByType(header.getEncodingType(), header.getDataType()); + int dataSize = header.getDataSize(); + long lastPageEndTime = Long.MIN_VALUE; + while (dataSize > 0) { + valueDecoder.reset(); + PageHeader pageHeader = + reader.readPageHeader( + header.getDataType(), + (header.getChunkType() & 0x3F) == MetaMarker.CHUNK_HEADER); + ByteBuffer pageData = reader.readPage(pageHeader, header.getCompressionType()); + long currentPageEndTime = Long.MIN_VALUE; + // NonAligned Chunk + PageReader pageReader = + new PageReader( + pageData, header.getDataType(), valueDecoder, defaultTimeDecoder, null); + BatchData batchData = pageReader.getAllSatisfiedPageData(); + while (batchData.hasCurrent()) { + long timestamp = batchData.currentTime(); + if (timestamp <= measurementLastTime.get(measurementID)[0]) { + // find bad file + if (!isBadFile) { + printBoth("-- Find the bad file " + tsFile.getAbsolutePath()); + isBadFile = true; + badFileNum++; + } + if (printDetails) { + if (timestamp <= measurementLastTime.get(measurementID)[1]) { + if (!hasMeasurementPrintedDetails.get(measurementID)[0]) { + printBoth( + "-------- Timeseries " + measurementID + " overlap between files"); + hasMeasurementPrintedDetails.get(measurementID)[0] = true; + } + } else if (timestamp + <= lashChunkEndTime.getOrDefault(measurementID, Long.MIN_VALUE)) { + if (!hasMeasurementPrintedDetails.get(measurementID)[1]) { + printBoth( + "-------- Timeseries " + measurementID + " overlap between chunks"); + hasMeasurementPrintedDetails.get(measurementID)[1] = true; + } + } else if (timestamp <= lastPageEndTime) { + if (!hasMeasurementPrintedDetails.get(measurementID)[2]) { + printBoth( + "-------- Timeseries " + measurementID + " overlap between pages"); + hasMeasurementPrintedDetails.get(measurementID)[2] = true; + } + } else { + if (!hasMeasurementPrintedDetails.get(measurementID)[3]) { + printBoth( + "-------- Timeseries " + + measurementID + + " overlap within one page"); + hasMeasurementPrintedDetails.get(measurementID)[3] = true; + } + } + } + } else { + measurementLastTime.get(measurementID)[0] = timestamp; + currentPageEndTime = timestamp; + currentChunkEndTime = timestamp; + } + batchData.next(); + } + dataSize -= pageHeader.getSerializedPageSize(); + lastPageEndTime = Math.max(lastPageEndTime, currentPageEndTime); + } + lashChunkEndTime.put( + measurementID, + Math.max( + lashChunkEndTime.getOrDefault(measurementID, Long.MIN_VALUE), + currentChunkEndTime)); + break; + case MetaMarker.CHUNK_GROUP_HEADER: + if (!deviceID.equals("")) { + // record the end time of last device in current file + if (resource.getEndTime(deviceID) + > deviceEndTime.getOrDefault(deviceID, Long.MIN_VALUE)) { + deviceEndTime.put(deviceID, resource.getEndTime(deviceID)); + } + } + ChunkGroupHeader chunkGroupHeader = reader.readChunkGroupHeader(); + deviceID = chunkGroupHeader.getDeviceID(); + if (!hasCheckedDeviceOverlap.getOrDefault(deviceID, false) + && resource.getStartTime(deviceID) + <= deviceEndTime.getOrDefault(deviceID, Long.MIN_VALUE)) { + // find bad file + if (!isBadFile) { + printBoth("-- Find the bad file " + tsFile.getAbsolutePath()); + isBadFile = true; + badFileNum++; + } + if (printDetails) { + printBoth("---- Device " + deviceID + " overlap between files"); + } + } + hasCheckedDeviceOverlap.put(deviceID, true); + break; + case MetaMarker.OPERATION_INDEX_RANGE: + reader.readPlanIndex(); + break; + default: + MetaMarker.handleUnexpectedMarker(marker); + } + } + + // record the end time of each timeseries in current file + for (Map.Entry<String, Long> entry : lashChunkEndTime.entrySet()) { + Long endTime = Math.max(measurementLastTime.get(entry.getKey())[1], entry.getValue()); + measurementLastTime.get(entry.getKey())[1] = endTime; + } + } + } catch (Throwable e) { + logger.error("Meet errors in reading file {} , skip it.", tsFile.getAbsolutePath(), e); + printBoth("-- Meet errors in reading file " + tsFile.getAbsolutePath()); + } + } + } + + public static boolean checkArgs(String[] args) { + if (args.length < 1) { + System.out.println( + "Please input correct param, which is [path of data dir] [-pd = print details or not] [-f = path of outFile]. Eg: xxx/iotdb/data/data -pd=true -f=xxx/TsFile_validation_view.txt"); + return false; + } else { + for (String arg : args) { + if (arg.startsWith("-pd")) { + printDetails = Boolean.parseBoolean(arg.split("=")[1]); + } else if (arg.startsWith("-f")) { + printToFile = true; + outFilePath = arg.split("=")[1]; + } else { + File f = new File(arg); + if (f.isDirectory() + && Objects.requireNonNull( + f.list( + (dir, name) -> + (name.equals("sequence") || name.equals("unsequence")))) + .length + == 2) { + File seqDataDir = new File(f, "sequence"); + seqDataDirList.add(seqDataDir); + } else if (arg.endsWith(TSFILE_SUFFIX) && f.isFile()) { + fileList.add(f); + } else { + System.out.println(arg + " is not a correct data directory or tsfile of IOTDB."); + return false; + } + } + } + if (seqDataDirList.size() == 0 && fileList.size() == 0) { + System.out.println( + "Please input correct param, which is [path of data dir] [-pd = print details or not] [-f = path of outFile]. Eg: xxx/iotdb/data/data -pd=true -f=xxx/TsFile_validation_view.txt"); + return false; + } + return true; + } + } + + private static boolean checkIsDirectory(File dir) { + boolean res = true; + if (!dir.isDirectory()) { + logger.error("{} is not a directory or does not exist, skip it.", dir.getAbsolutePath()); + res = false; + } + return res; + } + + private static void printBoth(String msg) { + System.out.println(msg); + if (printToFile) { + pw.println(msg); + } + } +} diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/ChunkMetadata.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/ChunkMetadata.java index 5a0bca21da..a7ef49bdf9 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/ChunkMetadata.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/ChunkMetadata.java @@ -73,7 +73,7 @@ public class ChunkMetadata { // used for ChunkCache, Eg:"root.sg1/0/0" private String tsFilePrefixPath; - private ChunkMetadata() {} + public ChunkMetadata() {} /** * constructor of ChunkMetaData.
