kfaraz commented on a change in pull request #11848:
URL: https://github.com/apache/druid/pull/11848#discussion_r741665781



##########
File path: 
core/src/main/java/org/apache/druid/indexer/partitions/MultiDimensionPartitionsSpec.java
##########
@@ -0,0 +1,232 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexer.partitions;
+
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import org.apache.druid.indexer.Checks;
+import org.apache.druid.indexer.Property;
+
+import javax.annotation.Nullable;
+import javax.validation.constraints.NotNull;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * Partition a segment by multiple dimensions.
+ */
+public class MultiDimensionPartitionsSpec implements 
DimensionBasedPartitionsSpec
+{
+  public static final String NAME = "multi_dim";
+
+  private static final String PARITION_DIMENSIONS = "partitionDimensions";
+
+  private static final String FORCE_GUARANTEED_ROLLUP_COMPATIBLE = "";
+
+  private final Integer targetRowsPerSegment;
+  private final Integer maxRowsPerSegment;
+  private final List<String> partitionDimensions;
+  private final boolean assumeGrouped;
+
+  // Value of this field is derived from targetRows and maxRows
+  private final int resolvedMaxRowPerSegment;
+
+  @JsonCreator
+  public MultiDimensionPartitionsSpec(
+      @JsonProperty(TARGET_ROWS_PER_SEGMENT) @Nullable Integer 
targetRowsPerSegment,
+      @JsonProperty(MAX_ROWS_PER_SEGMENT) @Nullable Integer maxRowsPerSegment,
+      @JsonProperty(PARITION_DIMENSIONS) List<String> partitionDimensions,
+      @JsonProperty("assumeGrouped") boolean assumeGrouped,  // false by 
default
+
+      // Deprecated properties preserved for backward compatibility:
+      @Deprecated @JsonProperty(TARGET_PARTITION_SIZE) @Nullable
+          Integer targetPartitionSize,  // prefer targetRowsPerSegment
+      @Deprecated @JsonProperty(MAX_PARTITION_SIZE) @Nullable
+          Integer maxPartitionSize  // prefer maxRowsPerSegment
+  )
+  {
+    Preconditions.checkArgument(partitionDimensions != null, 
"partitionDimensions must be specified");
+    this.partitionDimensions = partitionDimensions;
+    this.assumeGrouped = assumeGrouped;
+
+    Integer adjustedTargetRowsPerSegment = 
PartitionsSpec.resolveHistoricalNullIfNeeded(targetRowsPerSegment);
+    Integer adjustedMaxRowsPerSegment = 
PartitionsSpec.resolveHistoricalNullIfNeeded(maxRowsPerSegment);
+    Integer adjustedTargetPartitionSize = 
PartitionsSpec.resolveHistoricalNullIfNeeded(targetPartitionSize);
+    Integer adjustedMaxPartitionSize = 
PartitionsSpec.resolveHistoricalNullIfNeeded(maxPartitionSize);
+
+    Property<Integer> target = Checks.checkAtMostOneNotNull(
+        DimensionBasedPartitionsSpec.TARGET_ROWS_PER_SEGMENT,
+        adjustedTargetRowsPerSegment,
+        DimensionBasedPartitionsSpec.TARGET_PARTITION_SIZE,
+        adjustedTargetPartitionSize
+    );
+
+    Property<Integer> max = Checks.checkAtMostOneNotNull(

Review comment:
       Removed deprecated fields from multi dim.

##########
File path: 
core/src/main/java/org/apache/druid/indexer/partitions/SingleDimensionPartitionsSpec.java
##########
@@ -21,83 +21,52 @@
 
 
 import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonIgnore;
 import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonValue;
 import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
-import org.apache.druid.indexer.Checks;
-import org.apache.druid.indexer.Property;
 
 import javax.annotation.Nullable;
-import javax.validation.constraints.NotNull;
 import java.util.Collections;
-import java.util.List;
-import java.util.Objects;
+import java.util.HashMap;
+import java.util.Map;
 
 /**
  * Partition a segment by a single dimension.
  */
-public class SingleDimensionPartitionsSpec implements 
DimensionBasedPartitionsSpec
+public class SingleDimensionPartitionsSpec extends MultiDimensionPartitionsSpec
 {
   public static final String NAME = "single_dim";
   static final String OLD_NAME = "dimension";  // for backward compatibility
 
-  private static final String PARITION_DIMENSION = "partitionDimension";
-  private static final String MAX_PARTITION_SIZE = "maxPartitionSize";
+  private static final String PARTITION_DIMENSION = "partitionDimension";
+
   private static final String FORCE_GUARANTEED_ROLLUP_COMPATIBLE = "";
 
-  private final Integer targetRowsPerSegment;
-  private final Integer maxRowsPerSegment;
   private final String partitionDimension;
-  private final boolean assumeGrouped;
-
-  // Values for these fields are derived from the one above:
-  private final int resolvedMaxRowPerSegment;
 
   @JsonCreator
   public SingleDimensionPartitionsSpec(

Review comment:
       Done. Thanks for the suggestion!

##########
File path: 
core/src/main/java/org/apache/druid/timeline/partition/MultiDimensionShardSpec.java
##########
@@ -0,0 +1,246 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.timeline.partition;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Range;
+import com.google.common.collect.RangeSet;
+import org.apache.druid.data.input.InputRow;
+import org.apache.druid.data.input.StringTuple;
+import org.apache.druid.java.util.common.ISE;
+
+import javax.annotation.Nullable;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+/**
+ * {@link ShardSpec} for range partitioning based on multiple dimensions
+ */
+public class MultiDimensionShardSpec implements ShardSpec
+{
+  public static final int UNKNOWN_NUM_CORE_PARTITIONS = -1;
+
+  private final List<String> dimensions;
+  @Nullable
+  private final StringTuple start;
+  @Nullable
+  private final StringTuple end;
+  private final int partitionNum;
+  private final int numCorePartitions;
+
+  private final String firstDimStart;
+  private final String firstDimEnd;
+
+  /**
+   * @param dimensions   partition dimensions
+   * @param start        inclusive start of this range
+   * @param end          exclusive end of this range
+   * @param partitionNum unique ID for this shard
+   */
+  @JsonCreator
+  public MultiDimensionShardSpec(
+      @JsonProperty("dimensions") List<String> dimensions,
+      @JsonProperty("start") @Nullable StringTuple start,
+      @JsonProperty("end") @Nullable StringTuple end,
+      @JsonProperty("partitionNum") int partitionNum,
+      @JsonProperty("numCorePartitions") @Nullable Integer numCorePartitions 
// nullable for backward compatibility
+  )
+  {
+    Preconditions.checkArgument(partitionNum >= 0, "partitionNum >= 0");
+    Preconditions.checkArgument(
+        dimensions != null && !dimensions.isEmpty(),
+        "dimensions should be non-null and non-empty"
+    );
+
+    this.dimensions = dimensions;
+    this.start = start;
+    this.end = end;
+    this.partitionNum = partitionNum;
+    this.numCorePartitions = numCorePartitions == null ? 
UNKNOWN_NUM_CORE_PARTITIONS : numCorePartitions;
+    this.firstDimStart = getFirstValueOrNull(start);
+    this.firstDimEnd = getFirstValueOrNull(end);
+  }
+
+  @JsonProperty("dimensions")
+  public List<String> getDimensions()
+  {
+    return dimensions;
+  }
+
+  @Nullable
+  @JsonProperty("start")
+  public StringTuple getStart()
+  {
+    return start;
+  }
+
+  @Nullable
+  @JsonProperty("end")
+  public StringTuple getEnd()
+  {
+    return end;
+  }
+
+  @Override
+  @JsonProperty("partitionNum")
+  public int getPartitionNum()
+  {
+    return partitionNum;
+  }
+
+  @Override
+  @JsonProperty
+  public int getNumCorePartitions()
+  {
+    return numCorePartitions;
+  }
+
+  @Override
+  public ShardSpecLookup getLookup(final List<? extends ShardSpec> shardSpecs)
+  {
+    return createLookup(shardSpecs);
+  }
+
+  private static ShardSpecLookup createLookup(List<? extends ShardSpec> 
shardSpecs)
+  {
+    return (long timestamp, InputRow row) -> {
+      for (ShardSpec spec : shardSpecs) {
+        if (((MultiDimensionShardSpec) spec).isInChunk(row)) {
+          return spec;
+        }
+      }
+      throw new ISE("row[%s] doesn't fit in any shard[%s]", row, shardSpecs);
+    };
+  }
+
+  @Override
+  public List<String> getDomainDimensions()
+  {
+    return Collections.unmodifiableList(dimensions);
+  }
+
+  private Range<String> getFirstDimRange()
+  {
+    Range<String> range;
+    if (firstDimStart == null && firstDimEnd == null) {
+      range = Range.all();
+    } else if (firstDimStart == null) {
+      range = Range.atMost(firstDimEnd);
+    } else if (firstDimEnd == null) {
+      range = Range.atLeast(firstDimStart);
+    } else {
+      range = Range.closed(firstDimStart, firstDimEnd);
+    }
+    return range;
+  }
+
+  @Override
+  public boolean possibleInDomain(Map<String, RangeSet<String>> domain)
+  {
+    RangeSet<String> rangeSet = domain.get(dimensions.get(0));
+    if (rangeSet == null) {
+      return true;
+    }
+    return !rangeSet.subRangeSet(getFirstDimRange()).isEmpty();
+  }
+
+  @Override
+  public <T> PartitionChunk<T> createChunk(T obj)
+  {
+    if (numCorePartitions == UNKNOWN_NUM_CORE_PARTITIONS) {
+      return new StringPartitionChunk<>(start, end, partitionNum, obj);
+    } else {
+      return new NumberedPartitionChunk<>(partitionNum, numCorePartitions, 
obj);
+    }
+  }
+
+  @VisibleForTesting

Review comment:
       Fixed.

##########
File path: 
indexing-service/src/test/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocatorTest.java
##########
@@ -54,12 +55,16 @@
 import java.util.Map;
 import java.util.stream.Collectors;
 
+import static org.apache.druid.timeline.partition.ShardSpecTestUtils.tupleOf;

Review comment:
       Replaced with `StringTuple.create()` in this test and other places.

##########
File path: 
core/src/main/java/org/apache/druid/indexer/partitions/MultiDimensionPartitionsSpec.java
##########
@@ -0,0 +1,232 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexer.partitions;
+
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import org.apache.druid.indexer.Checks;
+import org.apache.druid.indexer.Property;
+
+import javax.annotation.Nullable;
+import javax.validation.constraints.NotNull;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * Partition a segment by multiple dimensions.
+ */
+public class MultiDimensionPartitionsSpec implements 
DimensionBasedPartitionsSpec
+{
+  public static final String NAME = "multi_dim";
+
+  private static final String PARITION_DIMENSIONS = "partitionDimensions";
+
+  private static final String FORCE_GUARANTEED_ROLLUP_COMPATIBLE = "";
+
+  private final Integer targetRowsPerSegment;
+  private final Integer maxRowsPerSegment;
+  private final List<String> partitionDimensions;
+  private final boolean assumeGrouped;
+
+  // Value of this field is derived from targetRows and maxRows
+  private final int resolvedMaxRowPerSegment;
+
+  @JsonCreator
+  public MultiDimensionPartitionsSpec(
+      @JsonProperty(TARGET_ROWS_PER_SEGMENT) @Nullable Integer 
targetRowsPerSegment,
+      @JsonProperty(MAX_ROWS_PER_SEGMENT) @Nullable Integer maxRowsPerSegment,
+      @JsonProperty(PARITION_DIMENSIONS) List<String> partitionDimensions,
+      @JsonProperty("assumeGrouped") boolean assumeGrouped,  // false by 
default
+
+      // Deprecated properties preserved for backward compatibility:
+      @Deprecated @JsonProperty(TARGET_PARTITION_SIZE) @Nullable

Review comment:
       Removed.

##########
File path: 
indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/ArrayOfStringTuplesSerDe.java
##########
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel.distribution;
+
+import org.apache.datasketches.ArrayOfItemsSerDe;
+import org.apache.datasketches.ArrayOfStringsSerDe;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.memory.WritableMemory;
+import org.apache.datasketches.memory.internal.UnsafeUtil;
+import org.apache.druid.data.input.StringTuple;
+
+/**
+ * Serde for {@link StringTuple}.
+ *
+ * Implementation similar to {@link ArrayOfStringsSerDe}.
+ */
+public class ArrayOfStringTuplesSerDe extends ArrayOfItemsSerDe<StringTuple>
+{
+  private static final ArrayOfStringsSerDe STRINGS_SERDE = new 
ArrayOfStringsSerDe();
+
+  @Override
+  public byte[] serializeToByteArray(StringTuple[] items)
+  {
+    int length = 0;
+    final byte[][] itemsBytes = new byte[items.length][];
+    for (int i = 0; i < items.length; i++) {
+      itemsBytes[i] = STRINGS_SERDE.serializeToByteArray(items[i].toArray());
+      length += itemsBytes[i].length + Integer.BYTES;
+      length += items[i].size() + Integer.BYTES;

Review comment:
       Fixed. Thanks!

##########
File path: 
indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTask.java
##########
@@ -92,20 +92,20 @@ public PartialRangeSegmentGenerateTask(
     this.intervalToPartitions = intervalToPartitions;
   }
 
-  private static String getPartitionDimension(ParallelIndexIngestionSpec 
ingestionSpec)
+  private static List<String> 
getPartitionDimensions(ParallelIndexIngestionSpec ingestionSpec)
   {
     PartitionsSpec partitionsSpec = 
ingestionSpec.getTuningConfig().getPartitionsSpec();
     Preconditions.checkArgument(
-        partitionsSpec instanceof SingleDimensionPartitionsSpec,
+        partitionsSpec instanceof MultiDimensionPartitionsSpec,
         "%s partitionsSpec required",

Review comment:
       Done.

##########
File path: 
core/src/main/java/org/apache/druid/indexer/partitions/MultiDimensionPartitionsSpec.java
##########
@@ -0,0 +1,199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexer.partitions;
+
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.base.Preconditions;
+import org.apache.druid.indexer.Property;
+
+import javax.annotation.Nullable;
+import javax.validation.constraints.NotNull;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * Range partition a segment by multiple dimensions.
+ */
+public class MultiDimensionPartitionsSpec implements 
DimensionBasedPartitionsSpec
+{
+  public static final String NAME = "multi_dim";
+
+  private final Integer targetRowsPerSegment;
+  private final Integer maxRowsPerSegment;
+  private final List<String> partitionDimensions;
+  private final boolean assumeGrouped;

Review comment:
       Added javadoc at the class level.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to