[ https://issues.apache.org/jira/browse/PHOENIX-4666?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16446182#comment-16446182 ]
ASF GitHub Bot commented on PHOENIX-4666: ----------------------------------------- Github user maryannxue commented on a diff in the pull request: https://github.com/apache/phoenix/pull/298#discussion_r183126183 --- Diff: phoenix-core/src/it/java/org/apache/phoenix/end2end/join/HashJoinPersistentCacheIT.java --- @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.phoenix.end2end.join; + +import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES; +import static org.junit.Assert.assertEquals; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.util.Properties; + +import org.apache.phoenix.end2end.join.HashJoinCacheIT.InvalidateHashCache; +import org.apache.phoenix.util.PropertiesUtil; +import org.apache.phoenix.util.SchemaUtil; +import org.apache.phoenix.util.TestUtil; +import org.junit.Test; + +public class HashJoinPersistentCacheIT extends BaseJoinIT { + + @Override + protected String getTableName(Connection conn, String virtualName) throws Exception { + String realName = super.getTableName(conn, virtualName); + TestUtil.addCoprocessor(conn, SchemaUtil.normalizeFullTableName(realName), InvalidateHashCache.class); + return realName; + } + + @Test + public void testPersistentCache() throws Exception { + Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES); + Connection conn = DriverManager.getConnection(getUrl(), props); + + createTestTable(getUrl(), "CREATE TABLE IF NOT EXISTS states (state CHAR(2) NOT NULL, name VARCHAR NOT NULL CONSTRAINT my_pk PRIMARY KEY (state, name))"); + createTestTable(getUrl(), "CREATE TABLE IF NOT EXISTS cities (state CHAR(2) NOT NULL, city VARCHAR NOT NULL, population BIGINT CONSTRAINT my_pk PRIMARY KEY (state, city))"); + + conn.prepareStatement("UPSERT INTO states VALUES ('CA', 'California')").executeUpdate(); + conn.prepareStatement("UPSERT INTO states VALUES ('AZ', 'Arizona')").executeUpdate(); + conn.prepareStatement("UPSERT INTO cities VALUES ('CA', 'San Francisco', 50000)").executeUpdate(); + conn.prepareStatement("UPSERT INTO cities VALUES ('CA', 'Sacramento', 3000)").executeUpdate(); + conn.prepareStatement("UPSERT INTO cities VALUES ('AZ', 'Phoenix', 20000)").executeUpdate(); + conn.commit(); + + /* First, run query without using the persistent cache. This should return + * different results after an UPSERT takes place. + */ + ResultSet rs = conn.prepareStatement("SELECT SUM(population) FROM states s JOIN cities c ON c.state = s.state").executeQuery(); + rs.next(); + int population1 = rs.getInt(1); + + conn.prepareStatement("UPSERT INTO cities VALUES ('CA', 'Mt View', 1500)").executeUpdate(); + conn.commit(); + rs = conn.prepareStatement("SELECT SUM(population) FROM states s JOIN cities c ON c.state = s.state").executeQuery(); + rs.next(); + int population2 = rs.getInt(1); + + assertEquals(73000, population1); + assertEquals(74500, population2); + + /* Second, run query using the persistent cache. This should return the + * same results after an UPSERT takes place. + */ + rs = conn.prepareStatement("SELECT /*+ USE_PERSISTENT_CACHE */ SUM(population) FROM states s JOIN cities c ON c.state = s.state").executeQuery(); + rs.next(); + int population3 = rs.getInt(1); + + conn.prepareStatement("UPSERT INTO cities VALUES ('CA', 'Palo Alto', 2000)").executeUpdate(); + conn.commit(); + rs = conn.prepareStatement("SELECT /*+ USE_PERSISTENT_CACHE */ SUM(population) FROM states s JOIN cities c ON c.state = s.state").executeQuery(); + rs.next(); + int population4 = rs.getInt(1); + rs = conn.prepareStatement("SELECT SUM(population) FROM states s JOIN cities c ON c.state = s.state").executeQuery(); + rs.next(); + int population5 = rs.getInt(1); + + assertEquals(74500, population3); + assertEquals(74500, population4); + assertEquals(76500, population5); + } +} --- End diff -- Nice test. Since the persistent cache is supposed to be reusable across different queries, can we have another test in which the same cache is used for different queries? > Add a subquery cache that persists beyond the life of a query > ------------------------------------------------------------- > > Key: PHOENIX-4666 > URL: https://issues.apache.org/jira/browse/PHOENIX-4666 > Project: Phoenix > Issue Type: Improvement > Reporter: Marcell Ortutay > Assignee: Marcell Ortutay > Priority: Major > > The user list thread for additional context is here: > [https://lists.apache.org/thread.html/e62a6f5d79bdf7cd238ea79aed8886816d21224d12b0f1fe9b6bb075@%3Cuser.phoenix.apache.org%3E] > ---- > A Phoenix query may contain expensive subqueries, and moreover those > expensive subqueries may be used across multiple different queries. While > whole result caching is possible at the application level, it is not possible > to cache subresults in the application. This can cause bad performance for > queries in which the subquery is the most expensive part of the query, and > the application is powerless to do anything at the query level. It would be > good if Phoenix provided a way to cache subquery results, as it would provide > a significant performance gain. > An illustrative example: > SELECT * FROM table1 JOIN (SELECT id_1 FROM large_table WHERE x = 10) > expensive_result ON table1.id_1 = expensive_result.id_2 AND table1.id_1 = > \{id} > In this case, the subquery "expensive_result" is expensive to compute, but it > doesn't change between queries. The rest of the query does because of the > \{id} parameter. This means the application can't cache it, but it would be > good if there was a way to cache expensive_result. > Note that there is currently a coprocessor based "server cache", but the data > in this "cache" is not persisted across queries. It is deleted after a TTL > expires (30sec by default), or when the query completes. > This is issue is fairly high priority for us at 23andMe and we'd be happy to > provide a patch with some guidance from Phoenix maintainers. We are currently > putting together a design document for a solution, and we'll post it to this > Jira ticket for review in a few days. -- This message was sent by Atlassian JIRA (v7.6.3#76005)