[GitHub] [ignite] zstan commented on a change in pull request #9751: IGNITE-16315 Execution workflow optimization

GitBox Tue, 25 Jan 2022 09:07:49 -0800


zstan commented on a change in pull request #9751:
URL: https://github.com/apache/ignite/pull/9751#discussion_r791615695




##########
File path: 
modules/benchmarks/src/main/java/org/apache/ignite/internal/benchmarks/jmh/sql/JmhSqlBenchmark.java
##########
@@ -0,0 +1,285 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.internal.benchmarks.jmh.sql;
+
+import java.util.List;
+import java.util.concurrent.ThreadLocalRandom;
+import java.util.concurrent.TimeUnit;
+import org.apache.ignite.Ignite;
+import org.apache.ignite.IgniteCache;
+import org.apache.ignite.IgniteDataStreamer;
+import org.apache.ignite.IgniteSystemProperties;
+import org.apache.ignite.Ignition;
+import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
+import org.apache.ignite.cache.query.SqlFieldsQuery;
+import org.apache.ignite.cache.query.annotations.QuerySqlField;
+import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.configuration.IgniteConfiguration;
+import org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi;
+import org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * Benchmark simple SQL queries.
+ */
+@State(Scope.Benchmark)
+@Fork(1)
+@BenchmarkMode(Mode.Throughput)
+@OutputTimeUnit(TimeUnit.SECONDS)
+@Warmup(iterations = 3, time = 5)
+@Measurement(iterations = 3, time = 5)
+public class JmhSqlBenchmark {
+    /** Count of server nodes. */
+    private static final int SRV_NODES_CNT = 3;
+
+    /** Keys count. */
+    private static final int KEYS_CNT = 100000;
+
+    /** Size of batch. */
+    private static final int BATCH_SIZE = 1000;
+
+    /** Partitions count. */
+    private static final int PARTS_CNT = 1024;
+
+    /** IP finder shared across nodes. */
+    private static final TcpDiscoveryVmIpFinder IP_FINDER = new 
TcpDiscoveryVmIpFinder(true);
+
+    /** Query engine. */
+    @Param({"H2", "CALCITE"})
+    private String engine;
+
+    /** Ignite client. */
+    private Ignite client;
+
+    /** Servers. */
+    private final Ignite[] servers = new Ignite[SRV_NODES_CNT];
+
+    /** Cache. */
+    private IgniteCache<Integer, Item> cache;
+
+    /**
+     * Create Ignite configuration.
+     *
+     * @param igniteInstanceName Ignite instance name.
+     * @return Configuration.
+     */
+    private IgniteConfiguration configuration(String igniteInstanceName) {
+        IgniteConfiguration cfg = new IgniteConfiguration();
+
+        cfg.setIgniteInstanceName(igniteInstanceName);
+        cfg.setLocalHost("127.0.0.1");
+        cfg.setDiscoverySpi(new TcpDiscoverySpi().setIpFinder(IP_FINDER));
+
+        return cfg;
+    }
+
+    /**
+     * Initiate Ignite and caches.
+     */
+    @Setup(Level.Trial)
+    public void setup() {
+        if ("CALCITE".equals(engine))
+            
System.setProperty(IgniteSystemProperties.IGNITE_EXPERIMENTAL_SQL_ENGINE, 
"true");
+        else
+            
System.clearProperty(IgniteSystemProperties.IGNITE_EXPERIMENTAL_SQL_ENGINE);
+
+        for (int i = 0; i < SRV_NODES_CNT; i++)
+            servers[i] = Ignition.start(configuration("server" + i));
+
+        client = Ignition.start(configuration("client").setClientMode(true));
+
+        cache = client.getOrCreateCache(new CacheConfiguration<Integer, 
Item>("CACHE")
+            .setIndexedTypes(Integer.class, Item.class)
+            .setAffinity(new RendezvousAffinityFunction(false, PARTS_CNT))
+        );
+
+        try (IgniteDataStreamer<Integer, Item> ds = 
client.dataStreamer("CACHE")) {
+            for (int i = 0; i < KEYS_CNT; i++)
+                ds.addData(i, new Item(i));
+        }
+    }
+
+    /**
+     * Stop Ignite instance.
+     */
+    @TearDown
+    public void tearDown() {
+        client.close();
+
+        for (Ignite ignite : servers)
+            ignite.close();
+    }
+
+    /**
+     * Query unique value (full scan).
+     */
+    @Benchmark
+    public void querySimpleUnique() {
+        int key = ThreadLocalRandom.current().nextInt(KEYS_CNT);
+
+        List<?> res = executeSql("SELECT name FROM Item WHERE fld=?", key);
+
+        assert res.size() == 1;
+    }
+
+    /**
+     * Query unique value (indexed).
+     */
+    @Benchmark
+    public void querySimpleUniqueIndexed() {
+        int key = ThreadLocalRandom.current().nextInt(KEYS_CNT);
+
+        List<?> res = executeSql("SELECT name FROM Item WHERE fldIdx=?", key);
+
+        assert res.size() == 1;
+    }
+
+    /**
+     * Query batch (full scan).
+     */
+    @Benchmark
+    public void querySimpleBatch() {
+        int key = ThreadLocalRandom.current().nextInt(KEYS_CNT);
+
+        List<?> res = executeSql("SELECT name FROM Item WHERE fldBatch=?", key 
/ BATCH_SIZE);
+
+        assert res.size() == BATCH_SIZE;
+    }
+
+    /**
+     * Query batch (indexed).
+     */
+    @Benchmark
+    public void querySimpleBatchIndexed() {
+        int key = ThreadLocalRandom.current().nextInt(KEYS_CNT);
+
+        List<?> res = executeSql("SELECT name FROM Item WHERE fldIdxBatch=?", 
key / BATCH_SIZE);
+
+        assert res.size() == BATCH_SIZE;
+    }
+
+    /**
+     * Query with group by and aggregate.
+     */
+    @Benchmark
+    public void queryGroupBy() {
+        List<?> res = executeSql("SELECT fldBatch, AVG(fld) FROM Item GROUP BY 
fldBatch");
+
+        assert res.size() == KEYS_CNT / BATCH_SIZE;
+    }
+
+    /**
+     * Query with indexed field group by and aggregate.
+     */
+    @Benchmark
+    public void queryGroupByIndexed() {
+        List<?> res = executeSql("SELECT fldIdxBatch, AVG(fld) FROM Item GROUP 
BY fldIdxBatch");
+
+        assert res.size() == KEYS_CNT / BATCH_SIZE;
+    }
+
+    /**
+     * Query with sorting (full set).
+     */
+    @Benchmark
+    public void queryOrderByFull() {
+        List<?> res = executeSql("SELECT name, fld FROM Item ORDER BY fld 
DESC");
+
+        assert res.size() == KEYS_CNT;
+    }
+
+    /**
+     * Query with sorting (batch).
+     */
+    @Benchmark
+    public void queryOrderByBatch() {
+        int key = ThreadLocalRandom.current().nextInt(KEYS_CNT);

Review comment:
       Instead of this rnd implementstion it`s more clear to use pre generated 
arr of pseudo random algo, such already exist in some of utility class but i 
can`t found it ) Count of operations must be predicted.

##########
File path: 
modules/benchmarks/src/main/java/org/apache/ignite/internal/benchmarks/jmh/sql/JmhSqlBenchmark.java
##########
@@ -0,0 +1,285 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.internal.benchmarks.jmh.sql;
+
+import java.util.List;
+import java.util.concurrent.ThreadLocalRandom;
+import java.util.concurrent.TimeUnit;
+import org.apache.ignite.Ignite;
+import org.apache.ignite.IgniteCache;
+import org.apache.ignite.IgniteDataStreamer;
+import org.apache.ignite.IgniteSystemProperties;
+import org.apache.ignite.Ignition;
+import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
+import org.apache.ignite.cache.query.SqlFieldsQuery;
+import org.apache.ignite.cache.query.annotations.QuerySqlField;
+import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.configuration.IgniteConfiguration;
+import org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi;
+import org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * Benchmark simple SQL queries.
+ */
+@State(Scope.Benchmark)
+@Fork(1)
+@BenchmarkMode(Mode.Throughput)
+@OutputTimeUnit(TimeUnit.SECONDS)
+@Warmup(iterations = 3, time = 5)
+@Measurement(iterations = 3, time = 5)
+public class JmhSqlBenchmark {
+    /** Count of server nodes. */
+    private static final int SRV_NODES_CNT = 3;
+
+    /** Keys count. */
+    private static final int KEYS_CNT = 100000;
+
+    /** Size of batch. */
+    private static final int BATCH_SIZE = 1000;
+
+    /** Partitions count. */
+    private static final int PARTS_CNT = 1024;
+
+    /** IP finder shared across nodes. */
+    private static final TcpDiscoveryVmIpFinder IP_FINDER = new 
TcpDiscoveryVmIpFinder(true);
+
+    /** Query engine. */
+    @Param({"H2", "CALCITE"})
+    private String engine;
+
+    /** Ignite client. */
+    private Ignite client;
+
+    /** Servers. */
+    private final Ignite[] servers = new Ignite[SRV_NODES_CNT];
+
+    /** Cache. */
+    private IgniteCache<Integer, Item> cache;
+
+    /**
+     * Create Ignite configuration.
+     *
+     * @param igniteInstanceName Ignite instance name.
+     * @return Configuration.
+     */
+    private IgniteConfiguration configuration(String igniteInstanceName) {
+        IgniteConfiguration cfg = new IgniteConfiguration();
+
+        cfg.setIgniteInstanceName(igniteInstanceName);
+        cfg.setLocalHost("127.0.0.1");
+        cfg.setDiscoverySpi(new TcpDiscoverySpi().setIpFinder(IP_FINDER));
+
+        return cfg;
+    }
+
+    /**
+     * Initiate Ignite and caches.
+     */
+    @Setup(Level.Trial)
+    public void setup() {
+        if ("CALCITE".equals(engine))
+            
System.setProperty(IgniteSystemProperties.IGNITE_EXPERIMENTAL_SQL_ENGINE, 
"true");
+        else
+            
System.clearProperty(IgniteSystemProperties.IGNITE_EXPERIMENTAL_SQL_ENGINE);
+
+        for (int i = 0; i < SRV_NODES_CNT; i++)
+            servers[i] = Ignition.start(configuration("server" + i));
+
+        client = Ignition.start(configuration("client").setClientMode(true));
+
+        cache = client.getOrCreateCache(new CacheConfiguration<Integer, 
Item>("CACHE")
+            .setIndexedTypes(Integer.class, Item.class)
+            .setAffinity(new RendezvousAffinityFunction(false, PARTS_CNT))
+        );
+
+        try (IgniteDataStreamer<Integer, Item> ds = 
client.dataStreamer("CACHE")) {
+            for (int i = 0; i < KEYS_CNT; i++)
+                ds.addData(i, new Item(i));
+        }
+    }
+
+    /**
+     * Stop Ignite instance.
+     */
+    @TearDown
+    public void tearDown() {
+        client.close();
+
+        for (Ignite ignite : servers)
+            ignite.close();
+    }
+
+    /**
+     * Query unique value (full scan).
+     */
+    @Benchmark
+    public void querySimpleUnique() {
+        int key = ThreadLocalRandom.current().nextInt(KEYS_CNT);
+
+        List<?> res = executeSql("SELECT name FROM Item WHERE fld=?", key);
+
+        assert res.size() == 1;
+    }
+
+    /**
+     * Query unique value (indexed).
+     */
+    @Benchmark
+    public void querySimpleUniqueIndexed() {
+        int key = ThreadLocalRandom.current().nextInt(KEYS_CNT);
+
+        List<?> res = executeSql("SELECT name FROM Item WHERE fldIdx=?", key);
+
+        assert res.size() == 1;
+    }
+
+    /**
+     * Query batch (full scan).
+     */
+    @Benchmark
+    public void querySimpleBatch() {
+        int key = ThreadLocalRandom.current().nextInt(KEYS_CNT);
+
+        List<?> res = executeSql("SELECT name FROM Item WHERE fldBatch=?", key 
/ BATCH_SIZE);
+
+        assert res.size() == BATCH_SIZE;
+    }
+
+    /**
+     * Query batch (indexed).
+     */
+    @Benchmark
+    public void querySimpleBatchIndexed() {
+        int key = ThreadLocalRandom.current().nextInt(KEYS_CNT);
+
+        List<?> res = executeSql("SELECT name FROM Item WHERE fldIdxBatch=?", 
key / BATCH_SIZE);
+
+        assert res.size() == BATCH_SIZE;
+    }
+
+    /**
+     * Query with group by and aggregate.
+     */
+    @Benchmark
+    public void queryGroupBy() {
+        List<?> res = executeSql("SELECT fldBatch, AVG(fld) FROM Item GROUP BY 
fldBatch");
+
+        assert res.size() == KEYS_CNT / BATCH_SIZE;
+    }
+
+    /**
+     * Query with indexed field group by and aggregate.
+     */
+    @Benchmark
+    public void queryGroupByIndexed() {
+        List<?> res = executeSql("SELECT fldIdxBatch, AVG(fld) FROM Item GROUP 
BY fldIdxBatch");
+
+        assert res.size() == KEYS_CNT / BATCH_SIZE;
+    }
+
+    /**
+     * Query with sorting (full set).
+     */
+    @Benchmark
+    public void queryOrderByFull() {
+        List<?> res = executeSql("SELECT name, fld FROM Item ORDER BY fld 
DESC");
+
+        assert res.size() == KEYS_CNT;
+    }
+
+    /**
+     * Query with sorting (batch).
+     */
+    @Benchmark
+    public void queryOrderByBatch() {
+        int key = ThreadLocalRandom.current().nextInt(KEYS_CNT);

Review comment:
       Instead of this rnd implementation it`s more clear to use pre generated 
arr of pseudo random algo, such already exist in some of utility class but i 
can`t found it ) Count of operations must be predicted.

##########
File path: 
modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/exec/ExecutionServiceImpl.java
##########
@@ -555,9 +565,16 @@ private QueryPlan prepareFragment(BaseQueryContext ctx, 
String jsonFragment) {
                             fragment.serialized(),
                             ectx.topologyVersion(),
                             fragmentDesc,
-                            qry.parameters());
+                            fragmentsPerNode.get(nodeId).intValue(),
+                            qry.parameters(),
+                            parametersMarshalled
+                        );
 
                         messageService().send(nodeId, req);
+
+                        // Avoid double marshaling.

Review comment:
       don`t understand comment and avoidance here, can u explain plz?

##########
File path: 
modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/exec/ExecutionServiceImpl.java
##########
@@ -534,7 +537,12 @@ private QueryPlan prepareFragment(BaseQueryContext ctx, 
String jsonFragment) {
 
         qry.run(ectx, plan, node);
 
-        // start remote execution
+        Map<UUID, Long> fragmentsPerNode = fragments.stream()
+            .filter(f -> f != F.first(fragments))

Review comment:
       ```suggestion
               .skip(1)
   ```

##########
File path: 
modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/exec/rel/ScanNode.java
##########
@@ -58,8 +60,20 @@ public ScanNode(ExecutionContext<Row> ctx, RelDataType 
rowType, Iterable<Row> sr
 
         requested = rowsCnt;
 
-        if (!inLoop)
-            context().execute(this::push, this::onError);
+        if (!inLoop) {
+            if (firstReq) {

Review comment:
       seems like stub as for me, wins here you can loose on real concurrent 
processing.

##########
File path: 
modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/message/QueryStartRequest.java
##########
@@ -54,14 +57,24 @@
 
     /** */
     @SuppressWarnings("AssignmentOrReturnOfFieldWithMutableType")
-    public QueryStartRequest(UUID qryId, String schema, String root, 
AffinityTopologyVersion ver,
-        FragmentDescription fragmentDesc, Object[] params) {
+    public QueryStartRequest(
+        UUID qryId,
+        String schema,
+        String root,
+        AffinityTopologyVersion ver,
+        FragmentDescription fragmentDesc,
+        int totalFragmentsCnt,
+        Object[] params,
+        byte[] paramsBytes
+    ) {
         this.qryId = qryId;
         this.schema = schema;
         this.root = root;
         this.ver = ver;
         this.fragmentDesc = fragmentDesc;
+        this.totalFragmentsCnt = totalFragmentsCnt;
         this.params = params;
+        this.paramsBytes = paramsBytes;

Review comment:
       if i correctly understand this optimization helps get rid of multiple 
serialization of equal data ? if it so - code become complicated to understand 
..

##########
File path: 
modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/metadata/ColocationGroup.java
##########
@@ -47,12 +49,7 @@
 /** */
 public class ColocationGroup implements MarshalableMessage {
     /** */
-    private static final int SYNTHETIC_PARTITIONS_COUNT =
-        
IgniteSystemProperties.getInteger("IGNITE_CALCITE_SYNTHETIC_PARTITIONS_COUNT", 
512);
-
-    /** */
-    @GridDirectCollection(Long.class)
-    private List<Long> sourceIds;
+    private long[] sourceIds;

Review comment:
       GridIntList may be ?

##########
File path: 
modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/metadata/ColocationGroup.java
##########
@@ -62,9 +59,8 @@
     @GridDirectTransient
     private List<List<UUID>> assignments;
 
-    /** */
-    @GridDirectCollection(Message.class)
-    private List<UUIDCollectionMessage> assignments0;
+    /** Marshalled assignments. */
+    private int[] assignments0;

Review comment:
       you already have local : `List<List<UUID>> assignments0` plz change this 
naming.

##########
File path: 
modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/exec/rel/ScanNode.java
##########
@@ -58,8 +60,20 @@ public ScanNode(ExecutionContext<Row> ctx, RelDataType 
rowType, Iterable<Row> sr
 
         requested = rowsCnt;
 
-        if (!inLoop)
-            context().execute(this::push, this::onError);
+        if (!inLoop) {
+            if (firstReq) {

Review comment:
       I`m not debate about context switching, we on the same side here, i 
talking about concurrent overall throughput here.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [ignite] zstan commented on a change in pull request #9751: IGNITE-16315 Execution workflow optimization

Reply via email to