This is an automated email from the ASF dual-hosted git repository.

cwylie pushed a commit to branch 36.0.0
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/36.0.0 by this push:
     new 1f219e451ed add dump-segment tool mode to dump v10 segment metadata 
(#18901) (#18914)
1f219e451ed is described below

commit 1f219e451ed53bd486d29b7361a821331174cdff
Author: Clint Wylie <[email protected]>
AuthorDate: Wed Jan 14 10:24:07 2026 -0800

    add dump-segment tool mode to dump v10 segment metadata (#18901) (#18914)
---
 examples/bin/dump-segment                          | 32 ++++++++++
 .../druid/tools/_common/common.runtime.properties  |  2 +
 examples/conf/druid/tools/_common/log4j2.xml       | 26 ++++++++
 examples/conf/druid/tools/dump-segment/jvm.config  |  4 ++
 .../java/org/apache/druid/cli/DumpSegment.java     | 31 +++++++++-
 .../java/org/apache/druid/cli/DumpSegmentTest.java | 71 ++++++++++++++++++++++
 6 files changed, 165 insertions(+), 1 deletion(-)

diff --git a/examples/bin/dump-segment b/examples/bin/dump-segment
new file mode 100644
index 00000000000..04c0072fef8
--- /dev/null
+++ b/examples/bin/dump-segment
@@ -0,0 +1,32 @@
+#!/bin/bash -eu
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+PWD="$(pwd)"
+WHEREAMI="$(dirname "$0")"
+WHATAMI="dump-segment"
+CONFDIR="$WHEREAMI/../conf/druid/tools/"
+MAIN_CLASS="org.apache.druid.cli.Main tools dump-segment"
+
+cd "$WHEREAMI/.."
+
+CLASS_PATH="$CONFDIR"/"$WHATAMI":"$CONFDIR"/_common:"$CONFDIR"/../_common:"$WHEREAMI/../lib/*"
+
+exec "$WHEREAMI"/run-java \
+    `cat "$CONFDIR"/"$WHATAMI"/jvm.config | xargs` \
+    -cp  $CLASS_PATH $MAIN_CLASS `echo "${@:1}"`
\ No newline at end of file
diff --git a/examples/conf/druid/tools/_common/common.runtime.properties 
b/examples/conf/druid/tools/_common/common.runtime.properties
new file mode 100644
index 00000000000..6b0665c2c93
--- /dev/null
+++ b/examples/conf/druid/tools/_common/common.runtime.properties
@@ -0,0 +1,2 @@
+## common tool extensions
+druid.extensions.loadList=["druid-datasketches"]
diff --git a/examples/conf/druid/tools/_common/log4j2.xml 
b/examples/conf/druid/tools/_common/log4j2.xml
new file mode 100644
index 00000000000..756094c9ca1
--- /dev/null
+++ b/examples/conf/druid/tools/_common/log4j2.xml
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements.  See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership.  The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License.  You may obtain a copy of the License at
+ ~
+ ~   http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied.  See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+-->
+<Configuration status="WARN">
+  <Loggers>
+    <Root level="OFF">
+      <!-- logging is disabled so tools can have their output piped to other 
stuff  -->
+    </Root>
+  </Loggers>
+</Configuration>
diff --git a/examples/conf/druid/tools/dump-segment/jvm.config 
b/examples/conf/druid/tools/dump-segment/jvm.config
new file mode 100644
index 00000000000..cb2403c5f3b
--- /dev/null
+++ b/examples/conf/druid/tools/dump-segment/jvm.config
@@ -0,0 +1,4 @@
+-server
+-XX:+ExitOnOutOfMemoryError
+-Duser.timezone=UTC
+-Dfile.encoding=UTF-8
diff --git a/services/src/main/java/org/apache/druid/cli/DumpSegment.java 
b/services/src/main/java/org/apache/druid/cli/DumpSegment.java
index fb8620df4fc..1986fb1ad9a 100644
--- a/services/src/main/java/org/apache/druid/cli/DumpSegment.java
+++ b/services/src/main/java/org/apache/druid/cli/DumpSegment.java
@@ -88,6 +88,7 @@ import 
org.apache.druid.segment.data.ConciseBitmapSerdeFactory;
 import org.apache.druid.segment.data.FixedIndexed;
 import org.apache.druid.segment.data.Indexed;
 import org.apache.druid.segment.data.RoaringBitmapSerdeFactory;
+import org.apache.druid.segment.file.SegmentFileMapperV10;
 import org.apache.druid.segment.filter.Filters;
 import 
org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex;
 import org.apache.druid.segment.nested.CompressedNestedDataComplexColumn;
@@ -125,7 +126,8 @@ public class DumpSegment extends GuiceRunnable
     ROWS,
     METADATA,
     BITMAPS,
-    NESTED
+    NESTED,
+    METADATA_V10
   }
 
   public DumpSegment()
@@ -194,6 +196,11 @@ public class DumpSegment extends GuiceRunnable
       throw new IAE("Not a valid dump type: %s", dumpTypeString);
     }
 
+    if (dumpType == DumpType.METADATA_V10) {
+      dumpV10Metadata(injector, directory, outputFileName);
+      return;
+    }
+
     try (final QueryableIndex index = indexIO.loadIndex(new File(directory))) {
       switch (dumpType) {
         case ROWS:
@@ -690,6 +697,28 @@ public class DumpSegment extends GuiceRunnable
         outputFileName
     );
   }
+  @VisibleForTesting
+  public static void dumpV10Metadata(Injector injector, String segmentFile, 
String output)
+  {
+    final ObjectMapper objectMapper = 
injector.getInstance(Key.get(ObjectMapper.class, Json.class));
+    try (SegmentFileMapperV10 fileMapperV10 = SegmentFileMapperV10.create(new 
File(segmentFile), objectMapper)) {
+      withOutputStream(
+          (Function<OutputStream, Object>) outStream -> {
+            try {
+              objectMapper.writeValue(outStream, 
fileMapperV10.getSegmentFileMetadata());
+            }
+            catch (IOException e) {
+              throw new RuntimeException(e);
+            }
+            return null;
+          },
+          output
+      );
+    }
+    catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
 
   @VisibleForTesting
   public static List<String> getColumnsToInclude(final QueryableIndex index, 
List<String> columns)
diff --git a/services/src/test/java/org/apache/druid/cli/DumpSegmentTest.java 
b/services/src/test/java/org/apache/druid/cli/DumpSegmentTest.java
index 47b0ae9534f..186d109b1f5 100644
--- a/services/src/test/java/org/apache/druid/cli/DumpSegmentTest.java
+++ b/services/src/test/java/org/apache/druid/cli/DumpSegmentTest.java
@@ -28,6 +28,7 @@ import com.google.inject.name.Names;
 import org.apache.druid.collections.bitmap.BitmapFactory;
 import org.apache.druid.collections.bitmap.ImmutableBitmap;
 import org.apache.druid.collections.bitmap.RoaringBitmapFactory;
+import org.apache.druid.data.input.ResourceInputSource;
 import org.apache.druid.data.input.impl.DimensionsSpec;
 import org.apache.druid.data.input.impl.TimestampSpec;
 import org.apache.druid.guice.BuiltInTypesModule;
@@ -51,6 +52,8 @@ import org.apache.druid.query.aggregation.AggregatorFactory;
 import org.apache.druid.query.aggregation.CountAggregatorFactory;
 import org.apache.druid.query.expression.TestExprMacroTable;
 import org.apache.druid.segment.DefaultColumnFormatConfig;
+import org.apache.druid.segment.IndexBuilder;
+import org.apache.druid.segment.IndexIO;
 import org.apache.druid.segment.IndexSpec;
 import org.apache.druid.segment.QueryableIndex;
 import org.apache.druid.segment.Segment;
@@ -59,6 +62,8 @@ import org.apache.druid.segment.TestIndex;
 import org.apache.druid.segment.column.BaseColumnHolder;
 import org.apache.druid.segment.column.ColumnConfig;
 import org.apache.druid.segment.column.ColumnIndexSupplier;
+import org.apache.druid.segment.file.SegmentFileMetadata;
+import org.apache.druid.segment.incremental.IncrementalIndexSchema;
 import 
org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex;
 import org.apache.druid.testing.InitializedNullHandlingTest;
 import org.junit.After;
@@ -273,6 +278,72 @@ public class DumpSegmentTest extends 
InitializedNullHandlingTest
     Assert.assertEquals(-1, (int) injector.getInstance(Key.get(Integer.class, 
Names.named("tlsServicePort"))));
   }
 
+  @Test
+  public void testDumpV10Metadata() throws IOException
+  {
+    Injector injector = Mockito.mock(Injector.class);
+    ObjectMapper mapper = TestHelper.makeJsonMapper();
+    mapper.registerModules(BuiltInTypesModule.getJacksonModulesList());
+    mapper.setInjectableValues(
+        new InjectableValues.Std()
+            .addValue(ExprMacroTable.class.getName(), 
TestExprMacroTable.INSTANCE)
+            .addValue(ObjectMapper.class.getName(), mapper)
+            .addValue(DefaultColumnFormatConfig.class, new 
DefaultColumnFormatConfig(null, null, null))
+    );
+    Mockito.when(injector.getInstance(Key.get(ObjectMapper.class, 
Json.class))).thenReturn(mapper);
+    
Mockito.when(injector.getInstance(DefaultColumnFormatConfig.class)).thenReturn(new
 DefaultColumnFormatConfig(null, null, null));
+
+    File f = buildV10Segment();
+
+    File outputFile = tempFolder.newFile();
+    DumpSegment.dumpV10Metadata(
+        injector,
+        f.getPath() + "/" + IndexIO.V10_FILE_NAME,
+        outputFile.getPath()
+    );
+    final byte[] fileBytes = Files.readAllBytes(outputFile.toPath());
+    SegmentFileMetadata dumped = mapper.readValue(fileBytes, 
SegmentFileMetadata.class);
+    Assert.assertNotNull(dumped);
+    Assert.assertEquals(1, dumped.getContainers().size());
+    Assert.assertEquals(2, dumped.getColumnDescriptors().size());
+    Assert.assertEquals(12, dumped.getFiles().size());
+  }
+
+
+  private File buildV10Segment() throws IOException
+  {
+    final File segmentDir = tempFolder.newFolder();
+    IndexBuilder bob = IndexBuilder.create()
+                                   .useV10()
+                                   .tmpDir(segmentDir)
+                                   .schema(
+                                       IncrementalIndexSchema.builder()
+                                                             
.withTimestampSpec(
+                                                                 new 
TimestampSpec(
+                                                                     
"timestamp",
+                                                                     null,
+                                                                     null
+                                                                 )
+                                                             )
+                                                             
.withDimensionsSpec(
+                                                                 
DimensionsSpec.builder()
+                                                                               
.useSchemaDiscovery(true)
+                                                                               
.build()
+                                                             )
+                                                             
.withQueryGranularity(Granularities.NONE)
+                                                             .withRollup(false)
+                                                             
.withMinTimestamp(0)
+                                                             .build()
+                                   )
+                                   .inputSource(ResourceInputSource.of(
+                                       getClass().getClassLoader(),
+                                       "nested-test-data.json"
+                                   ))
+                                   
.inputFormat(TestIndex.DEFAULT_JSON_INPUT_FORMAT)
+                                   .inputTmpDir(tempFolder.newFolder());
+    return bob.buildMMappedIndexFile();
+  }
+
 
   public static List<Segment> createSegments(
       TemporaryFolder tempFolder,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to