This is an automated email from the ASF dual-hosted git repository.
cwylie pushed a commit to branch 36.0.0
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/36.0.0 by this push:
new 1f219e451ed add dump-segment tool mode to dump v10 segment metadata
(#18901) (#18914)
1f219e451ed is described below
commit 1f219e451ed53bd486d29b7361a821331174cdff
Author: Clint Wylie <[email protected]>
AuthorDate: Wed Jan 14 10:24:07 2026 -0800
add dump-segment tool mode to dump v10 segment metadata (#18901) (#18914)
---
examples/bin/dump-segment | 32 ++++++++++
.../druid/tools/_common/common.runtime.properties | 2 +
examples/conf/druid/tools/_common/log4j2.xml | 26 ++++++++
examples/conf/druid/tools/dump-segment/jvm.config | 4 ++
.../java/org/apache/druid/cli/DumpSegment.java | 31 +++++++++-
.../java/org/apache/druid/cli/DumpSegmentTest.java | 71 ++++++++++++++++++++++
6 files changed, 165 insertions(+), 1 deletion(-)
diff --git a/examples/bin/dump-segment b/examples/bin/dump-segment
new file mode 100644
index 00000000000..04c0072fef8
--- /dev/null
+++ b/examples/bin/dump-segment
@@ -0,0 +1,32 @@
+#!/bin/bash -eu
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+PWD="$(pwd)"
+WHEREAMI="$(dirname "$0")"
+WHATAMI="dump-segment"
+CONFDIR="$WHEREAMI/../conf/druid/tools/"
+MAIN_CLASS="org.apache.druid.cli.Main tools dump-segment"
+
+cd "$WHEREAMI/.."
+
+CLASS_PATH="$CONFDIR"/"$WHATAMI":"$CONFDIR"/_common:"$CONFDIR"/../_common:"$WHEREAMI/../lib/*"
+
+exec "$WHEREAMI"/run-java \
+ `cat "$CONFDIR"/"$WHATAMI"/jvm.config | xargs` \
+ -cp $CLASS_PATH $MAIN_CLASS `echo "${@:1}"`
\ No newline at end of file
diff --git a/examples/conf/druid/tools/_common/common.runtime.properties
b/examples/conf/druid/tools/_common/common.runtime.properties
new file mode 100644
index 00000000000..6b0665c2c93
--- /dev/null
+++ b/examples/conf/druid/tools/_common/common.runtime.properties
@@ -0,0 +1,2 @@
+## common tool extensions
+druid.extensions.loadList=["druid-datasketches"]
diff --git a/examples/conf/druid/tools/_common/log4j2.xml
b/examples/conf/druid/tools/_common/log4j2.xml
new file mode 100644
index 00000000000..756094c9ca1
--- /dev/null
+++ b/examples/conf/druid/tools/_common/log4j2.xml
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements. See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership. The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied. See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+-->
+<Configuration status="WARN">
+ <Loggers>
+ <Root level="OFF">
+ <!-- logging is disabled so tools can have their output piped to other
stuff -->
+ </Root>
+ </Loggers>
+</Configuration>
diff --git a/examples/conf/druid/tools/dump-segment/jvm.config
b/examples/conf/druid/tools/dump-segment/jvm.config
new file mode 100644
index 00000000000..cb2403c5f3b
--- /dev/null
+++ b/examples/conf/druid/tools/dump-segment/jvm.config
@@ -0,0 +1,4 @@
+-server
+-XX:+ExitOnOutOfMemoryError
+-Duser.timezone=UTC
+-Dfile.encoding=UTF-8
diff --git a/services/src/main/java/org/apache/druid/cli/DumpSegment.java
b/services/src/main/java/org/apache/druid/cli/DumpSegment.java
index fb8620df4fc..1986fb1ad9a 100644
--- a/services/src/main/java/org/apache/druid/cli/DumpSegment.java
+++ b/services/src/main/java/org/apache/druid/cli/DumpSegment.java
@@ -88,6 +88,7 @@ import
org.apache.druid.segment.data.ConciseBitmapSerdeFactory;
import org.apache.druid.segment.data.FixedIndexed;
import org.apache.druid.segment.data.Indexed;
import org.apache.druid.segment.data.RoaringBitmapSerdeFactory;
+import org.apache.druid.segment.file.SegmentFileMapperV10;
import org.apache.druid.segment.filter.Filters;
import
org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex;
import org.apache.druid.segment.nested.CompressedNestedDataComplexColumn;
@@ -125,7 +126,8 @@ public class DumpSegment extends GuiceRunnable
ROWS,
METADATA,
BITMAPS,
- NESTED
+ NESTED,
+ METADATA_V10
}
public DumpSegment()
@@ -194,6 +196,11 @@ public class DumpSegment extends GuiceRunnable
throw new IAE("Not a valid dump type: %s", dumpTypeString);
}
+ if (dumpType == DumpType.METADATA_V10) {
+ dumpV10Metadata(injector, directory, outputFileName);
+ return;
+ }
+
try (final QueryableIndex index = indexIO.loadIndex(new File(directory))) {
switch (dumpType) {
case ROWS:
@@ -690,6 +697,28 @@ public class DumpSegment extends GuiceRunnable
outputFileName
);
}
+ @VisibleForTesting
+ public static void dumpV10Metadata(Injector injector, String segmentFile,
String output)
+ {
+ final ObjectMapper objectMapper =
injector.getInstance(Key.get(ObjectMapper.class, Json.class));
+ try (SegmentFileMapperV10 fileMapperV10 = SegmentFileMapperV10.create(new
File(segmentFile), objectMapper)) {
+ withOutputStream(
+ (Function<OutputStream, Object>) outStream -> {
+ try {
+ objectMapper.writeValue(outStream,
fileMapperV10.getSegmentFileMetadata());
+ }
+ catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ return null;
+ },
+ output
+ );
+ }
+ catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
@VisibleForTesting
public static List<String> getColumnsToInclude(final QueryableIndex index,
List<String> columns)
diff --git a/services/src/test/java/org/apache/druid/cli/DumpSegmentTest.java
b/services/src/test/java/org/apache/druid/cli/DumpSegmentTest.java
index 47b0ae9534f..186d109b1f5 100644
--- a/services/src/test/java/org/apache/druid/cli/DumpSegmentTest.java
+++ b/services/src/test/java/org/apache/druid/cli/DumpSegmentTest.java
@@ -28,6 +28,7 @@ import com.google.inject.name.Names;
import org.apache.druid.collections.bitmap.BitmapFactory;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.collections.bitmap.RoaringBitmapFactory;
+import org.apache.druid.data.input.ResourceInputSource;
import org.apache.druid.data.input.impl.DimensionsSpec;
import org.apache.druid.data.input.impl.TimestampSpec;
import org.apache.druid.guice.BuiltInTypesModule;
@@ -51,6 +52,8 @@ import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.CountAggregatorFactory;
import org.apache.druid.query.expression.TestExprMacroTable;
import org.apache.druid.segment.DefaultColumnFormatConfig;
+import org.apache.druid.segment.IndexBuilder;
+import org.apache.druid.segment.IndexIO;
import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.Segment;
@@ -59,6 +62,8 @@ import org.apache.druid.segment.TestIndex;
import org.apache.druid.segment.column.BaseColumnHolder;
import org.apache.druid.segment.column.ColumnConfig;
import org.apache.druid.segment.column.ColumnIndexSupplier;
+import org.apache.druid.segment.file.SegmentFileMetadata;
+import org.apache.druid.segment.incremental.IncrementalIndexSchema;
import
org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex;
import org.apache.druid.testing.InitializedNullHandlingTest;
import org.junit.After;
@@ -273,6 +278,72 @@ public class DumpSegmentTest extends
InitializedNullHandlingTest
Assert.assertEquals(-1, (int) injector.getInstance(Key.get(Integer.class,
Names.named("tlsServicePort"))));
}
+ @Test
+ public void testDumpV10Metadata() throws IOException
+ {
+ Injector injector = Mockito.mock(Injector.class);
+ ObjectMapper mapper = TestHelper.makeJsonMapper();
+ mapper.registerModules(BuiltInTypesModule.getJacksonModulesList());
+ mapper.setInjectableValues(
+ new InjectableValues.Std()
+ .addValue(ExprMacroTable.class.getName(),
TestExprMacroTable.INSTANCE)
+ .addValue(ObjectMapper.class.getName(), mapper)
+ .addValue(DefaultColumnFormatConfig.class, new
DefaultColumnFormatConfig(null, null, null))
+ );
+ Mockito.when(injector.getInstance(Key.get(ObjectMapper.class,
Json.class))).thenReturn(mapper);
+
Mockito.when(injector.getInstance(DefaultColumnFormatConfig.class)).thenReturn(new
DefaultColumnFormatConfig(null, null, null));
+
+ File f = buildV10Segment();
+
+ File outputFile = tempFolder.newFile();
+ DumpSegment.dumpV10Metadata(
+ injector,
+ f.getPath() + "/" + IndexIO.V10_FILE_NAME,
+ outputFile.getPath()
+ );
+ final byte[] fileBytes = Files.readAllBytes(outputFile.toPath());
+ SegmentFileMetadata dumped = mapper.readValue(fileBytes,
SegmentFileMetadata.class);
+ Assert.assertNotNull(dumped);
+ Assert.assertEquals(1, dumped.getContainers().size());
+ Assert.assertEquals(2, dumped.getColumnDescriptors().size());
+ Assert.assertEquals(12, dumped.getFiles().size());
+ }
+
+
+ private File buildV10Segment() throws IOException
+ {
+ final File segmentDir = tempFolder.newFolder();
+ IndexBuilder bob = IndexBuilder.create()
+ .useV10()
+ .tmpDir(segmentDir)
+ .schema(
+ IncrementalIndexSchema.builder()
+
.withTimestampSpec(
+ new
TimestampSpec(
+
"timestamp",
+ null,
+ null
+ )
+ )
+
.withDimensionsSpec(
+
DimensionsSpec.builder()
+
.useSchemaDiscovery(true)
+
.build()
+ )
+
.withQueryGranularity(Granularities.NONE)
+ .withRollup(false)
+
.withMinTimestamp(0)
+ .build()
+ )
+ .inputSource(ResourceInputSource.of(
+ getClass().getClassLoader(),
+ "nested-test-data.json"
+ ))
+
.inputFormat(TestIndex.DEFAULT_JSON_INPUT_FORMAT)
+ .inputTmpDir(tempFolder.newFolder());
+ return bob.buildMMappedIndexFile();
+ }
+
public static List<Segment> createSegments(
TemporaryFolder tempFolder,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]