jihoonson commented on a change in pull request #11597:
URL: https://github.com/apache/druid/pull/11597#discussion_r725439798



##########
File path: 
sql/src/test/java/org/apache/druid/sql/calcite/schema/SegmentsTableBenchamrkBase.java
##########
@@ -0,0 +1,374 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.sql.calcite.schema;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import org.apache.druid.client.BrokerSegmentWatcherConfig;
+import org.apache.druid.common.config.NullHandling;
+import org.apache.druid.data.input.impl.DimensionSchema;
+import org.apache.druid.data.input.impl.DimensionsSpec;
+import org.apache.druid.data.input.impl.StringDimensionSchema;
+import org.apache.druid.discovery.DruidLeaderClient;
+import org.apache.druid.discovery.DruidNodeDiscoveryProvider;
+import org.apache.druid.discovery.NodeRole;
+import org.apache.druid.jackson.DefaultObjectMapper;
+import org.apache.druid.java.util.common.Intervals;
+import org.apache.druid.java.util.common.NonnullPair;
+import org.apache.druid.java.util.common.granularity.Granularity;
+import org.apache.druid.java.util.common.guava.Sequence;
+import org.apache.druid.java.util.common.guava.Sequences;
+import org.apache.druid.java.util.common.io.Closer;
+import org.apache.druid.query.QueryRunnerFactoryConglomerate;
+import org.apache.druid.query.QuerySegmentWalker;
+import org.apache.druid.query.aggregation.AggregatorFactory;
+import org.apache.druid.query.aggregation.CountAggregatorFactory;
+import org.apache.druid.query.metadata.metadata.SegmentAnalysis;
+import org.apache.druid.segment.incremental.IncrementalIndexSchema;
+import org.apache.druid.segment.loading.SegmentLoader;
+import org.apache.druid.server.QueryLifecycleFactory;
+import org.apache.druid.server.QueryStackTests;
+import org.apache.druid.server.SegmentManager;
+import org.apache.druid.server.security.AuthTestUtils;
+import org.apache.druid.server.security.AuthenticationResult;
+import org.apache.druid.server.security.AuthorizerMapper;
+import org.apache.druid.sql.calcite.planner.Calcites;
+import org.apache.druid.sql.calcite.planner.PlannerConfig;
+import org.apache.druid.sql.calcite.planner.PlannerFactory;
+import org.apache.druid.sql.calcite.planner.SegmentsTableConfig;
+import org.apache.druid.sql.calcite.util.CalciteTests;
+import 
org.apache.druid.sql.calcite.util.CalciteTests.FakeDruidNodeDiscoveryProvider;
+import org.apache.druid.sql.calcite.util.CalciteTests.FakeHttpClient;
+import org.apache.druid.sql.calcite.util.CalciteTests.FakeServerInventoryView;
+import org.apache.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker;
+import org.apache.druid.sql.calcite.util.TestServerInventoryView;
+import org.apache.druid.timeline.DataSegment;
+import org.apache.druid.timeline.SegmentId;
+import org.apache.druid.timeline.SegmentWithOvershadowedStatus;
+import org.apache.druid.timeline.partition.NumberedShardSpec;
+import org.easymock.EasyMock;
+import org.joda.time.Interval;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.stream.Collectors;
+
+public abstract class SegmentsTableBenchamrkBase

Review comment:
       Thanks. I renamed it to `SegmentsTableQueryTestSuite`.

##########
File path: 
sql/src/test/java/org/apache/druid/sql/calcite/schema/SegmentsTableBenchmarkQueryTest.java
##########
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.sql.calcite.schema;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import org.apache.calcite.sql.parser.SqlParseException;
+import org.apache.calcite.tools.RelConversionException;
+import org.apache.calcite.tools.ValidationException;
+import org.apache.druid.common.guava.SettableSupplier;
+import org.apache.druid.sql.calcite.planner.DruidPlanner;
+import org.apache.druid.sql.calcite.planner.PlannerConfig;
+import org.apache.druid.sql.calcite.planner.PlannerResult;
+import org.apache.druid.sql.calcite.planner.SegmentsTableConfig;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.io.IOException;
+import java.util.List;
+
+@RunWith(Parameterized.class)
+public class SegmentsTableBenchmarkQueryTest extends SegmentsTableBenchamrkBase

Review comment:
       Hmm, my intention was that this test verifies the results of the queries 
used for the segments table benchmark. Is `SegmentsTableBenchmarkQueriesTest` 
better?

##########
File path: 
sql/src/main/java/org/apache/druid/sql/calcite/schema/ObjectStringCache.java
##########
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.sql.calcite.schema;
+
+import org.apache.druid.common.guava.SettableSupplier;
+
+import javax.annotation.Nullable;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.function.Function;
+
+/**
+ * A simple cache to hold string representation of the given keys.
+ * This cache can be used to avoid repetitive calls of expensive {@link 
Object#toString()}
+ * or JSON serializations.
+ *
+ * This cache uses a simple caching policy that keeps only the first N entries.
+ * The cache size is limited by the {@link #cacheSize} constructor parameter.
+ *
+ * @param <K> cache key type
+ *
+ * @see SegmentsTableRow#toObjectArray
+ */
+class ObjectStringCache<K>

Review comment:
       Good question. I didn't use the caffeine at first because 1) the 
strategy of caching first N entries will likely work well for the access 
pattern for segments table columns in most cases and 2) I wasn't sure about the 
overhead in caffeine for concurrent access support. For 1), I still think 
caching first N would probably be OK in most cases, but perhaps caffeine's 
Window TinyLfu could be better in some edge cases. For 2), I ran the benchmark 
again with a caffeine-based cache with 1MB of cache size and didn't see any 
difference in the benchmark results as seen below (numbers are slightly larger 
than the one in the PR description, but I think it's my computer as I got 
similarly slightly larger numbers with the original map-based cache). So I 
switched the cache to using caffeine instead. 
   
   ```
   Benchmark                             (availableSegmentsInterval)  
(forceHashBasedMerge)  (numSegmentsPerInterval)  (publishedSegmentsInterval)  
(segmentGranularity)  (sql)  Mode  Cnt     Score    Error  Units
   SegmentsTableBenchmark.segmentsTable               2021-01-01/P3Y            
       true                        10               2021-01-02/P3Y              
     DAY      0  avgt   10    18.890 ±  1.291  ms/op
   SegmentsTableBenchmark.segmentsTable               2021-01-01/P3Y            
       true                        10               2021-01-02/P3Y              
     DAY      1  avgt   10    25.546 ±  0.764  ms/op
   SegmentsTableBenchmark.segmentsTable               2021-01-01/P3Y            
       true                       100               2021-01-02/P3Y              
     DAY      0  avgt   10   201.993 ±  2.534  ms/op
   SegmentsTableBenchmark.segmentsTable               2021-01-01/P3Y            
       true                       100               2021-01-02/P3Y              
     DAY      1  avgt   10   208.421 ±  2.847  ms/op
   SegmentsTableBenchmark.segmentsTable               2021-01-01/P3Y            
       true                      1000               2021-01-02/P3Y              
     DAY      0  avgt   10  2195.274 ± 70.780  ms/op
   SegmentsTableBenchmark.segmentsTable               2021-01-01/P3Y            
       true                      1000               2021-01-02/P3Y              
     DAY      1  avgt   10  2743.012 ± 88.182  ms/op
   SegmentsTableBenchmark.segmentsTable               2021-01-01/P3Y            
      false                        10               2021-01-02/P3Y              
     DAY      0  avgt   10    15.990 ±  0.355  ms/op
   SegmentsTableBenchmark.segmentsTable               2021-01-01/P3Y            
      false                        10               2021-01-02/P3Y              
     DAY      1  avgt   10    22.791 ±  0.625  ms/op
   SegmentsTableBenchmark.segmentsTable               2021-01-01/P3Y            
      false                       100               2021-01-02/P3Y              
     DAY      0  avgt   10   135.999 ±  1.466  ms/op
   SegmentsTableBenchmark.segmentsTable               2021-01-01/P3Y            
      false                       100               2021-01-02/P3Y              
     DAY      1  avgt   10   145.376 ±  1.334  ms/op
   SegmentsTableBenchmark.segmentsTable               2021-01-01/P3Y            
      false                      1000               2021-01-02/P3Y              
     DAY      0  avgt   10  1254.524 ± 39.122  ms/op
   SegmentsTableBenchmark.segmentsTable               2021-01-01/P3Y            
      false                      1000               2021-01-02/P3Y              
     DAY      1  avgt   10  1891.175 ± 25.529  ms/op
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to