This is an automated email from the ASF dual-hosted git repository.

abhishekrb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new 27a8fc30477 minor: change maxStringLength default (#19198)
27a8fc30477 is described below

commit 27a8fc30477713dff82a2daeeb476a3615fa0d88
Author: Jay Kanakiya <[email protected]>
AuthorDate: Mon Mar 23 14:56:22 2026 -0700

    minor: change maxStringLength default (#19198)
    
    Follow up to #19146
    
    
    Updated the default value for druid.indexing.formats.maxStringLength to 
null from 0. This change also included documentation update for the same.
---
 docs/configuration/index.md                        |  2 +-
 docs/ingestion/ingestion-spec.md                   |  2 +-
 .../data/input/impl/StringDimensionSchema.java     | 29 ++++++++++++++++------
 .../org/apache/druid/guice/BuiltInTypesModule.java |  8 +++---
 .../druid/segment/DefaultColumnFormatConfig.java   |  4 +--
 .../druid/segment/StringDimensionHandler.java      |  6 +++--
 .../druid/segment/StringDimensionIndexer.java      | 13 +++++++---
 .../data/input/impl/StringDimensionSchemaTest.java | 13 +++++++++-
 .../apache/druid/guice/BuiltInTypesModuleTest.java |  6 ++---
 9 files changed, 59 insertions(+), 24 deletions(-)

diff --git a/docs/configuration/index.md b/docs/configuration/index.md
index 853ec3878a2..c31cf0747cb 100644
--- a/docs/configuration/index.md
+++ b/docs/configuration/index.md
@@ -1424,7 +1424,7 @@ Additional Peon configs include:
 |`druid.indexer.task.storeEmptyColumns`|Boolean value for whether or not to 
store empty columns during ingestion. When set to true, Druid stores every 
column specified in the 
[`dimensionsSpec`](../ingestion/ingestion-spec.md#dimensionsspec). If you use 
the string-based schemaless ingestion and don't specify any dimensions to 
ingest, you must also set 
[`includeAllDimensions`](../ingestion/ingestion-spec.md#dimensionsspec) for 
Druid to store empty columns.<br/><br/>If you set `storeEmptyCo [...]
 |`druid.indexer.task.tmpStorageBytesPerTask`|Maximum number of bytes per task 
to be used to store temporary files on disk. This config is generally intended 
for internal usage. Attempts to set it are very likely to be overwritten by the 
TaskRunner that executes the task, so be sure of what you expect to happen 
before directly adjusting this configuration parameter. The config is 
documented here primarily to provide an understanding of what it means if/when 
someone sees that it has been s [...]
 |`druid.indexer.server.maxChatRequests`|Maximum number of concurrent requests 
served by a task's chat handler. Set to 0 to disable limiting.|0|
-|`druid.indexing.formats.maxStringLength`|Maximum number of characters to 
store per string dimension value. Longer values are truncated during ingestion. 
Does not apply to multi-value string dimensions. Set to 0 to disable. Can be 
overridden per-dimension using `maxStringLength` in the [dimension 
object](../ingestion/ingestion-spec.md#dimension-objects).|0 (no truncation)|
+|`druid.indexing.formats.maxStringLength`|Maximum number of characters to 
store per string dimension value. Longer values are truncated during ingestion. 
Does not apply to multi-value string dimensions. Can be overridden 
per-dimension using `maxStringLength` in the [dimension 
object](../ingestion/ingestion-spec.md#dimension-objects). Value must be >= 
0.|`null` (no truncation)|
 
 If the Peon is running in remote mode, there must be an Overlord up and 
running. Peons in remote mode can set the following configurations:
 
diff --git a/docs/ingestion/ingestion-spec.md b/docs/ingestion/ingestion-spec.md
index 72ec6d793d3..b2a0e41f48d 100644
--- a/docs/ingestion/ingestion-spec.md
+++ b/docs/ingestion/ingestion-spec.md
@@ -243,7 +243,7 @@ Dimension objects can have the following components:
 | name | The name of the dimension. This will be used as the field name to 
read from input records, as well as the column name stored in generated 
segments.<br /><br />Note that you can use a [`transformSpec`](#transformspec) 
if you want to rename columns during ingestion time. | none (required) |
 | createBitmapIndex | For `string` typed dimensions, whether or not bitmap 
indexes should be created for the column in generated segments. Creating a 
bitmap index requires more storage, but speeds up certain kinds of filtering 
(especially equality and prefix filtering). Only supported for `string` typed 
dimensions. | `true` |
 | multiValueHandling | For `string` typed dimensions, specifies the type of 
handling for [multi-value fields](../querying/multi-value-dimensions.md). 
Possible values are `array` (ingest string arrays as-is), `sorted_array` (sort 
string arrays during ingestion), and `sorted_set` (sort and de-duplicate string 
arrays during ingestion). This parameter is ignored for types other than 
`string`. | `sorted_array` |
-| maxStringLength | For `string` typed dimensions, the maximum number of 
characters to store per value. Longer values are truncated during ingestion. 
Does not apply to multi-value string dimensions. Set to 0 to disable. Overrides 
the global 
[`druid.indexing.formats.maxStringLength`](../configuration/index.md#additional-peon-configuration)
 property. | `0` (no truncation) |
+| maxStringLength | For `string` typed dimensions, the maximum number of 
characters to store per value. Longer values are truncated during ingestion. 
Does not apply to multi-value string dimensions. Overrides the global 
[`druid.indexing.formats.maxStringLength`](../configuration/index.md#additional-peon-configuration)
 property. Value must be >= 0. | `null` (no truncation) |
 
 #### Inclusions and exclusions
 
diff --git 
a/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java
 
b/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java
index ab00952e867..018d9ca5c35 100644
--- 
a/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java
+++ 
b/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java
@@ -23,6 +23,7 @@ import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonIgnore;
 import com.fasterxml.jackson.annotation.JsonInclude;
 import com.fasterxml.jackson.annotation.JsonProperty;
+import org.apache.druid.error.DruidException;
 import org.apache.druid.guice.BuiltInTypesModule;
 import org.apache.druid.segment.DimensionHandler;
 import org.apache.druid.segment.StringDimensionHandler;
@@ -34,12 +35,25 @@ public class StringDimensionSchema extends DimensionSchema
 {
   private static final boolean DEFAULT_CREATE_BITMAP_INDEX = true;
 
-  public static int getDefaultMaxStringLength()
+  @Nullable
+  public static Integer getDefaultMaxStringLength()
   {
     return BuiltInTypesModule.getMaxStringLength();
   }
 
-  private final int maxStringLength;
+  @Nullable
+  private static Integer validateMaxStringLength(String name, @Nullable 
Integer maxStringLength)
+  {
+    if (maxStringLength != null && maxStringLength < 0) {
+      throw DruidException.forPersona(DruidException.Persona.USER)
+                          .ofCategory(DruidException.Category.INVALID_INPUT)
+                          .build("maxStringLength for column [%s] must be >= 
0, got [%s]", name, maxStringLength);
+    }
+    return maxStringLength != null ? maxStringLength : 
getDefaultMaxStringLength();
+  }
+
+  @Nullable
+  private final Integer maxStringLength;
 
   @JsonCreator
   public static StringDimensionSchema create(String name)
@@ -56,7 +70,7 @@ public class StringDimensionSchema extends DimensionSchema
   )
   {
     super(name, multiValueHandling, createBitmapIndex == null ? 
DEFAULT_CREATE_BITMAP_INDEX : createBitmapIndex);
-    this.maxStringLength = maxStringLength != null && maxStringLength > 0 ? 
maxStringLength : getDefaultMaxStringLength();
+    this.maxStringLength = validateMaxStringLength(name, maxStringLength);
   }
 
   public StringDimensionSchema(
@@ -65,17 +79,18 @@ public class StringDimensionSchema extends DimensionSchema
       Boolean createBitmapIndex
   )
   {
-    this(name, multiValueHandling, createBitmapIndex, 
getDefaultMaxStringLength());
+    this(name, multiValueHandling, createBitmapIndex, null);
   }
 
   public StringDimensionSchema(String name)
   {
-    this(name, null, DEFAULT_CREATE_BITMAP_INDEX, getDefaultMaxStringLength());
+    this(name, null, DEFAULT_CREATE_BITMAP_INDEX, null);
   }
 
   @JsonProperty
-  @JsonInclude(JsonInclude.Include.NON_DEFAULT)
-  public int getMaxStringLength()
+  @JsonInclude(JsonInclude.Include.NON_NULL)
+  @Nullable
+  public Integer getMaxStringLength()
   {
     return maxStringLength;
   }
diff --git 
a/processing/src/main/java/org/apache/druid/guice/BuiltInTypesModule.java 
b/processing/src/main/java/org/apache/druid/guice/BuiltInTypesModule.java
index e260a4bd8b6..5205c6ba311 100644
--- a/processing/src/main/java/org/apache/druid/guice/BuiltInTypesModule.java
+++ b/processing/src/main/java/org/apache/druid/guice/BuiltInTypesModule.java
@@ -53,7 +53,8 @@ public class BuiltInTypesModule implements DruidModule
    */
   private static DimensionSchema.MultiValueHandling STRING_MV_MODE = 
DimensionSchema.MultiValueHandling.SORTED_ARRAY;
   private static IndexSpec DEFAULT_INDEX_SPEC = IndexSpec.builder().build();
-  private static int MAX_STRING_LENGTH = 0;
+  @Nullable
+  private static Integer MAX_STRING_LENGTH = null;
 
   /**
    * @return the configured string multi value handling mode from the system 
config if set; otherwise, returns
@@ -138,12 +139,13 @@ public class BuiltInTypesModule implements DruidModule
   }
 
   @VisibleForTesting
-  public static void setMaxStringLength(int maxStringLength)
+  public static void setMaxStringLength(@Nullable Integer maxStringLength)
   {
     MAX_STRING_LENGTH = maxStringLength;
   }
 
-  public static int getMaxStringLength()
+  @Nullable
+  public static Integer getMaxStringLength()
   {
     return MAX_STRING_LENGTH;
   }
diff --git 
a/processing/src/main/java/org/apache/druid/segment/DefaultColumnFormatConfig.java
 
b/processing/src/main/java/org/apache/druid/segment/DefaultColumnFormatConfig.java
index 19b875b5f6c..56118b02686 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/DefaultColumnFormatConfig.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/DefaultColumnFormatConfig.java
@@ -71,12 +71,12 @@ public class DefaultColumnFormatConfig
   @Nullable
   private static Integer validateMaxStringLength(@Nullable Integer 
maxStringLength)
   {
-    if (maxStringLength != null && maxStringLength <= 0) {
+    if (maxStringLength != null && maxStringLength < 0) {
       throw DruidException.forPersona(DruidException.Persona.OPERATOR)
                           .ofCategory(DruidException.Category.INVALID_INPUT)
                           .build(
                               "Invalid value[%s] specified for 
'druid.indexing.formats.maxStringLength'."
-                              + " Value must be a positive integer.",
+                              + " Value must be a non-negative integer.",
                               maxStringLength
                           );
     }
diff --git 
a/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java 
b/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java
index d2b41ab7a4b..0d23fe24aa7 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java
@@ -32,6 +32,7 @@ import 
org.apache.druid.segment.selector.settable.SettableColumnValueSelector;
 import 
org.apache.druid.segment.selector.settable.SettableDimensionValueSelector;
 import org.apache.druid.segment.writeout.SegmentWriteOutMedium;
 
+import javax.annotation.Nullable;
 import java.io.File;
 import java.util.Collections;
 import java.util.Comparator;
@@ -104,7 +105,8 @@ public class StringDimensionHandler implements 
DimensionHandler<Integer, int[],
   private final MultiValueHandling multiValueHandling;
   private final boolean hasBitmapIndexes;
   private final boolean hasSpatialIndexes;
-  private final int maxStringLength;
+  @Nullable
+  private final Integer maxStringLength;
 
   public StringDimensionHandler(
       String dimensionName,
@@ -121,7 +123,7 @@ public class StringDimensionHandler implements 
DimensionHandler<Integer, int[],
       MultiValueHandling multiValueHandling,
       boolean hasBitmapIndexes,
       boolean hasSpatialIndexes,
-      int maxStringLength
+      @Nullable Integer maxStringLength
   )
   {
     this.dimensionName = dimensionName;
diff --git 
a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java 
b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
index d41fe6fea98..88f60ee8042 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
@@ -58,7 +58,8 @@ public class StringDimensionIndexer extends 
DictionaryEncodedColumnIndexer<int[]
   private final MultiValueHandling multiValueHandling;
   private final boolean hasBitmapIndexes;
   private final boolean hasSpatialIndexes;
-  private final int maxStringLength;
+  @Nullable
+  private final Integer maxStringLength;
   private volatile boolean hasMultipleValues = false;
 
   public StringDimensionIndexer(
@@ -74,7 +75,7 @@ public class StringDimensionIndexer extends 
DictionaryEncodedColumnIndexer<int[]
       @Nullable MultiValueHandling multiValueHandling,
       boolean hasBitmapIndexes,
       boolean hasSpatialIndexes,
-      int maxStringLength
+      @Nullable Integer maxStringLength
   )
   {
     super(new StringDimensionDictionary());
@@ -84,9 +85,13 @@ public class StringDimensionIndexer extends 
DictionaryEncodedColumnIndexer<int[]
     this.maxStringLength = maxStringLength;
   }
 
-  private String truncateIfNeeded(String value)
+  /**
+   * Truncates the value to the first {@link #maxStringLength} characters if 
configured, otherwise returns it as-is.
+   */
+  @Nullable
+  private String truncateIfNeeded(@Nullable String value)
   {
-    if (maxStringLength > 0 && value != null && value.length() > 
maxStringLength) {
+    if (maxStringLength != null && value != null && value.length() > 
maxStringLength) {
       return value.substring(0, maxStringLength);
     }
     return value;
diff --git 
a/processing/src/test/java/org/apache/druid/data/input/impl/StringDimensionSchemaTest.java
 
b/processing/src/test/java/org/apache/druid/data/input/impl/StringDimensionSchemaTest.java
index 3354ac8b82a..dbee07bddb8 100644
--- 
a/processing/src/test/java/org/apache/druid/data/input/impl/StringDimensionSchemaTest.java
+++ 
b/processing/src/test/java/org/apache/druid/data/input/impl/StringDimensionSchemaTest.java
@@ -23,6 +23,7 @@ import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.AnnotationIntrospector;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import org.apache.druid.data.input.impl.DimensionSchema.MultiValueHandling;
+import org.apache.druid.error.DruidException;
 import org.apache.druid.guice.DruidSecondaryModule;
 import org.apache.druid.guice.GuiceAnnotationIntrospector;
 import org.junit.Assert;
@@ -59,6 +60,16 @@ public class StringDimensionSchemaTest
                         + "}";
     final StringDimensionSchema schema = (StringDimensionSchema) 
jsonMapper.readValue(json, DimensionSchema.class);
     Assert.assertEquals(new StringDimensionSchema("dim", 
MultiValueHandling.SORTED_SET, false), schema);
-    Assert.assertEquals(200, schema.getMaxStringLength());
+    Assert.assertEquals(Integer.valueOf(200), schema.getMaxStringLength());
+  }
+
+  @Test
+  public void testInvalidMaxStringLength()
+  {
+    final Exception exception = Assert.assertThrows(
+        DruidException.class,
+        () -> new StringDimensionSchema("dim", null, true, -1)
+    );
+    Assert.assertTrue(exception.getMessage().contains("maxStringLength for 
column [dim] must be >= 0"));
   }
 }
diff --git 
a/processing/src/test/java/org/apache/druid/guice/BuiltInTypesModuleTest.java 
b/processing/src/test/java/org/apache/druid/guice/BuiltInTypesModuleTest.java
index 189a8a2bdf3..4fdcad50e87 100644
--- 
a/processing/src/test/java/org/apache/druid/guice/BuiltInTypesModuleTest.java
+++ 
b/processing/src/test/java/org/apache/druid/guice/BuiltInTypesModuleTest.java
@@ -60,7 +60,7 @@ public class BuiltInTypesModuleTest
   public void teardownEach()
   {
     BuiltInTypesModule.setIndexSpecDefaults(IndexSpec.builder().build());
-    BuiltInTypesModule.setMaxStringLength(0);
+    BuiltInTypesModule.setMaxStringLength(null);
   }
 
   @AfterClass
@@ -75,7 +75,7 @@ public class BuiltInTypesModuleTest
       );
     }
     BuiltInTypesModule.setIndexSpecDefaults(IndexSpec.builder().build());
-    BuiltInTypesModule.setMaxStringLength(0);
+    BuiltInTypesModule.setMaxStringLength(null);
   }
 
   @Test
@@ -98,7 +98,7 @@ public class BuiltInTypesModuleTest
         BuiltInTypesModule.getStringMultiValueHandlingMode()
     );
 
-    Assertions.assertEquals(0, BuiltInTypesModule.getMaxStringLength());
+    Assertions.assertNull(BuiltInTypesModule.getMaxStringLength());
   }
 
   @Test


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to