Hi,
first let me say that I may have the wrong expectations. I'm migrating a
project using python and sqlite to Java and H2. Inserts, especially
concurrent ones, are a lot faster. But now some of my code takes multiple
times as before, specifically those parts that do extensive querying on the
database.
This is an example query:
SELECT
avg(count)
FROM (
(SELECT
SOME_ID,
cast(count(SOME_ID) AS FLOAT) AS count
FROM TESTTABLE
WHERE SOME_ID = 0
GROUP BY SOME_ID
, truncate(time)));
The idea of the query is to calculate the average number of entries per day.
I created a test database with 400k entries which takes up about 160MB. On
a new database the query takes more than a second (700ms from the shell).
I repeated the same steps with an sqlite database (in Java) and got a
database that's 50MB big and on which the query takes less than 100ms,
usually about 50ms.
How can this be explained and, ideally, optimized? I'm quite happy with the
rest of my code, but the calculation of statistics (which involves a number
of database accesses) now takes four times as long.
The output of explain analyze:
PLAN
SELECT
AVG(COUNT)
FROM (
SELECT
SOME_ID,
CAST(COUNT(SOME_ID) AS DOUBLE) AS COUNT
FROM PUBLIC.TESTTABLE
WHERE SOME_ID = 0
GROUP BY SOME_ID, TRUNCATE(TIME)
) _0
/* SELECT
SOME_ID,
CAST(COUNT(SOME_ID) AS DOUBLE) AS COUNT
FROM PUBLIC.TESTTABLE
/++ PUBLIC.INDEX1: SOME_ID = 0 ++/
/++ scanCount: 40001 ++/
WHERE SOME_ID = 0
GROUP BY SOME_ID, TRUNCATE(TIME)
*/
/* scanCount: 367 */
/*
reads: 10278
*/
(1 row, 695 ms)
and of the profiler:
Profiler: top 3 stack trace(s) of of 1787 ms of 104 thread dumps:
46/109 (42%):
at org.h2.store.fs.FileNio.read(FilePathNio.java:74)
at org.h2.mvstore.DataUtils.readFully(DataUtils.java:421)
at org.h2.mvstore.FileStore.readFully(FileStore.java:98)
at org.h2.mvstore.Page.read(Page.java:190)
at org.h2.mvstore.MVStore.readPage(MVStore.java:1952)
at org.h2.mvstore.MVMap.readPage(MVMap.java:741)
at org.h2.mvstore.Page.getChildPage(Page.java:217)
at org.h2.mvstore.MVMap.binarySearch(MVMap.java:473)
at org.h2.mvstore.MVMap.binarySearch(MVMap.java:474)
at org.h2.mvstore.MVMap.get(MVMap.java:455)
at org.h2.mvstore.db.TransactionStore$TransactionMap.getValue(
TransactionStore.java:1203)
at org.h2.mvstore.db.TransactionStore$TransactionMap.get(TransactionStore.
java:1180)
at org.h2.mvstore.db.TransactionStore$TransactionMap.get(TransactionStore.
java:1148)
at org.h2.mvstore.db.MVPrimaryIndex.getRow(MVPrimaryIndex.java:216)
at org.h2.mvstore.db.MVTable.getRow(MVTable.java:466)
at org.h2.mvstore.db.MVSecondaryIndex$MVStoreCursor.get(MVSecondaryIndex.
java:489)
at org.h2.index.IndexCursor.get(IndexCursor.java:288)
at org.h2.table.TableFilter.getValue(TableFilter.java:1087)
at org.h2.expression.ExpressionColumn.getValue(ExpressionColumn.java:186)
at org.h2.expression.Function.getValueWithArgs(Function.java:1188)
at org.h2.expression.Function.getValue(Function.java:591)
at org.h2.command.dml.Select.queryGroup(Select.java:324)
at org.h2.command.dml.Select.queryWithoutCache(Select.java:620)
at org.h2.command.dml.Query.queryWithoutCacheLazyCheck(Query.java:114)
at org.h2.command.dml.Query.query(Query.java:371)
at org.h2.command.dml.Query.query(Query.java:333)
at org.h2.index.ViewIndex.find(ViewIndex.java:291)
at org.h2.index.ViewIndex.find(ViewIndex.java:161)
at org.h2.index.BaseIndex.find(BaseIndex.java:128)
at org.h2.index.IndexCursor.find(IndexCursor.java:169)
at org.h2.table.TableFilter.next(TableFilter.java:468)
at org.h2.command.dml.Select.queryGroup(Select.java:311)
at org.h2.command.dml.Select.queryWithoutCache(Select.java:620)
at org.h2.command.dml.Query.queryWithoutCacheLazyCheck(Query.java:114)
at org.h2.command.dml.Query.query(Query.java:371)
at org.h2.command.dml.Query.query(Query.java:333)
at org.h2.command.CommandContainer.query(CommandContainer.java:113)
at org.h2.command.Command.executeQuery(Command.java:201)
at org.h2.jdbc.JdbcStatement.executeQuery(JdbcStatement.java:81)
at org.nzbhydra.TestPerformance.executeTimedQueryH2(TestPerformance.java:68)
at org.nzbhydra.TestPerformance.testH2(TestPerformance.java:59)
at org.nzbhydra.TestPerformance.main(TestPerformance.java:37)
7/109 (6%):
at org.h2.store.fs.FileNio.read(FilePathNio.java:74)
at org.h2.mvstore.DataUtils.readFully(DataUtils.java:421)
at org.h2.mvstore.FileStore.readFully(FileStore.java:98)
at org.h2.mvstore.Page$PageChildren.read(Page.java:1043)
at org.h2.mvstore.MVStore.readPageChunkReferences(MVStore.java:1387)
at org.h2.mvstore.MVStore.collectReferencedChunks(MVStore.java:1332)
at org.h2.mvstore.MVStore.collectReferencedChunks(MVStore.java:1337)
at org.h2.mvstore.MVStore.collectReferencedChunks(MVStore.java:1317)
at org.h2.mvstore.MVStore.freeUnusedChunks(MVStore.java:1276)
at org.h2.mvstore.MVStore.storeNowTry(MVStore.java:1063)
at org.h2.mvstore.MVStore.storeNow(MVStore.java:1050)
at org.h2.mvstore.MVStore.commitAndSave(MVStore.java:1039)
at org.h2.mvstore.MVStore.writeInBackground(MVStore.java:2489)
at org.h2.mvstore.MVStore$BackgroundWriterThread.run(MVStore.java:2720)
4/109 (3%):
at org.h2.mvstore.Page.binarySearch(Page.java:344)
at org.h2.mvstore.MVMap.binarySearch(MVMap.java:466)
at org.h2.mvstore.MVMap.binarySearch(MVMap.java:474)
at org.h2.mvstore.MVMap.get(MVMap.java:455)
at org.h2.mvstore.db.TransactionStore$TransactionMap.getValue(
TransactionStore.java:1203)
at org.h2.mvstore.db.TransactionStore$TransactionMap.get(TransactionStore.
java:1180)
at org.h2.mvstore.db.TransactionStore$TransactionMap.get(TransactionStore.
java:1148)
at org.h2.mvstore.db.TransactionStore$TransactionMap.containsKey(
TransactionStore.java:1168)
at org.h2.mvstore.db.TransactionStore$TransactionMap$1.fetchNext(
TransactionStore.java:1415)
at org.h2.mvstore.db.TransactionStore$TransactionMap$1.next(TransactionStore
.java:1430)
at org.h2.mvstore.db.MVSecondaryIndex$MVStoreCursor.next(MVSecondaryIndex.
java:507)
at org.h2.index.IndexCursor.next(IndexCursor.java:305)
at org.h2.table.TableFilter.next(TableFilter.java:499)
at org.h2.command.dml.Select.queryGroup(Select.java:311)
at org.h2.command.dml.Select.queryWithoutCache(Select.java:620)
at org.h2.command.dml.Query.queryWithoutCacheLazyCheck(Query.java:114)
at org.h2.command.dml.Query.query(Query.java:371)
at org.h2.command.dml.Query.query(Query.java:333)
at org.h2.index.ViewIndex.find(ViewIndex.java:291)
at org.h2.index.ViewIndex.find(ViewIndex.java:161)
at org.h2.index.BaseIndex.find(BaseIndex.java:128)
at org.h2.index.IndexCursor.find(IndexCursor.java:169)
at org.h2.table.TableFilter.next(TableFilter.java:468)
at org.h2.command.dml.Select.queryGroup(Select.java:311)
at org.h2.command.dml.Select.queryWithoutCache(Select.java:620)
at org.h2.command.dml.Query.queryWithoutCacheLazyCheck(Query.java:114)
at org.h2.command.dml.Query.query(Query.java:371)
at org.h2.command.dml.Query.query(Query.java:333)
at org.h2.command.CommandContainer.query(CommandContainer.java:113)
at org.h2.command.Command.executeQuery(Command.java:201)
at org.h2.jdbc.JdbcStatement.executeQuery(JdbcStatement.java:81)
at org.nzbhydra.TestPerformance.executeTimedQueryH2(TestPerformance.java:68)
at org.nzbhydra.TestPerformance.testH2(TestPerformance.java:59)
at org.nzbhydra.TestPerformance.main(TestPerformance.java:37)
summary:
53%: org.h2.store.fs
18%: org.h2.mvstore
5%: org.h2.value
5%: org.h2.mvstore.cache
5%: org.h2.util
5%: org.h2.expression
I understand that reading the whole database file takes some time, but why
then is sqlite so much faster?
and this is the table:
CREATE TABLE TESTTABLE
(
ID INTEGER PRIMARY KEY NOT NULL,
ERROR VARCHAR(4000),
RESULT VARCHAR(255),
SOME_ID INTEGER,
TIME TIMESTAMP
);
CREATE INDEX INDEX1 ON TESTTABLE (SOME_ID);
CREATE INDEX INDEX2 ON TESTTABLE (TIME);
I'm also wondering why the database file size increases by 50MB on my first
query, but that's not that important.
I've attached a class to reproduce this.
Thanks so much.
PS: If this is a double post, sorry, my browser isn't handling google's GUI
well.
--
You received this message because you are subscribed to the Google Groups "H2
Database" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
To post to this group, send email to [email protected].
Visit this group at https://groups.google.com/group/h2-database.
For more options, visit https://groups.google.com/d/optout.
package org.nzbhydra;
import org.h2.util.Profiler;
import java.io.File;
import java.io.IOException;
import java.math.BigInteger;
import java.nio.file.Files;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.concurrent.ThreadLocalRandom;
public class TestPerformance {
private static Random random = new Random();
private static final String CREATE_TABLE = "CREATE TABLE TESTTABLE\n" +
"(\n" +
" ID INTEGER PRIMARY KEY NOT NULL,\n" +
" ERROR VARCHAR(4000),\n" +
" RESULT VARCHAR(255),\n" +
" SOME_ID INTEGER,\n" +
" TIME TIMESTAMP\n" +
");\n" +
"CREATE INDEX INDEX1 ON TESTTABLE (SOME_ID);\n" +
"CREATE INDEX INDEX2 ON TESTTABLE (TIME);";
public static void main(String[] args) throws Exception {
//First query on fresh database takes about 1200ms
//Size of new database is 160MB, size of database on which query was executed is 179MB
testH2();
//First query on frsh sqlite database usually takes less than 100ms
//Size of new database is 50MB
//testSqlite();
}
private static void testH2() throws Exception {
getOrCreateTestH2Database();
Class.forName("org.h2.Driver");
String url = "jdbc:h2:file:./copyh2";
Connection conn = DriverManager.getConnection(url, "SA", "");
Statement statement = conn.createStatement();
//Warmup
statement.executeQuery("SELECT count(*) FROM TESTTABLE");
Profiler prof = new Profiler();
prof.startCollecting();
executeTimedQueryH2(statement);
prof.stopCollecting();
statement.close();
conn.close();
System.out.println(prof.getTop(3));
}
private static void executeTimedQueryH2(Statement statement) throws SQLException {
long before = System.currentTimeMillis();
statement.executeQuery("SELECT\n" +
" SOME_ID,\n" +
" avg(count)\n" +
"FROM (\n" +
" (SELECT\n" +
" SOME_ID,\n" +
" count(SOME_ID) AS count\n" +
" FROM TESTTABLE\n" +
" WHERE SOME_ID = " + random.nextInt(9) +
" GROUP BY SOME_ID\n" +
" , truncate(time)))\n" +
"GROUP BY SOME_ID");
System.out.println("H2 querying took: " + (System.currentTimeMillis() - before));
}
private static void getOrCreateTestH2Database() throws ClassNotFoundException, SQLException, IOException {
File newFile = new File("newh2.mv.db");
if (!newFile.exists()) {
System.out.println("Creating new test database");
Connection conn;
Class.forName("org.h2.Driver");
conn = DriverManager.getConnection("jdbc:h2:file:./newh2", "SA", "");
Statement statement = conn.createStatement();
statement.executeUpdate(CREATE_TABLE);
long before = System.currentTimeMillis();
int count = 1;
Random random = new Random();
StringBuilder sql;
for (int i = 0; i < 10; i++) {
for (int j = 0; j < 20; j++) {
sql = new StringBuilder();
sql.append("insert into TESTTABLE values ");
List<String> values = new ArrayList<>();
for (int k = 0; k < 2000; k++) {
String date = "DATEADD('SECOND', " + ThreadLocalRandom.current().nextLong(1474265503, 1505801503) + ", DATE '1970-01-01')";
values.add("(" + count++ + ", '" + new BigInteger(130, random).toString(32) + "', '" + new BigInteger(130, random).toString(32) + "'," + i + "," + date + ")");
}
sql.append(join(values)).append(";");
statement.executeUpdate(sql.toString());
}
}
statement.close();
conn.close();
System.out.println("Creating test data took: " + (System.currentTimeMillis() - before));
}
//Use a copy so we're starting off a fresh database
File copy = new File("copyh2.mv.db");
copy.delete();
Files.copy(newFile.toPath(), copy.toPath());
}
private static String join(List<String> parts) {
StringBuilder builder = new StringBuilder();
for (int i = 0; i < parts.size() - 1; i++) {
String part = parts.get(i);
builder.append(part).append(", ");
}
return builder.append(parts.get(parts.size() - 1)).toString();
}
/*
private static void testSqlite() throws Exception {
createTestSqliteDatabase();
Class.forName("org.sqlite.JDBC");
Connection conn = DriverManager.getConnection("jdbc:sqlite:copysqlite.db", "", "");
Statement statement = conn.createStatement();
//Warmup
statement.executeQuery("SELECT count(*) FROM TESTTABLE");
executeTimedQuerySqlite(statement);
}
private static void executeTimedQuerySqlite(Statement statement) throws SQLException {
long before = System.currentTimeMillis();
statement.executeQuery("SELECT\n" +
" SOME_ID,\n" +
" avg(count)\n" +
"FROM (\n" +
" (SELECT\n" +
" SOME_ID,\n" +
" count(SOME_ID) AS count\n" +
" FROM TESTTABLE\n" +
" WHERE SOME_ID = " + random.nextInt(9) +
" GROUP BY SOME_ID\n" +
" , date(time)))\n" +
"GROUP BY SOME_ID");
System.out.println("SQLite querying took: " + (System.currentTimeMillis() - before));
}
private static void createTestSqliteDatabase() throws ClassNotFoundException, SQLException, IOException {
File newFile = new File("newsqlite.db");
File copy = new File("copysqlite.db");
if (!newFile.exists()) {
System.out.println("Creating new test database");
Connection conn;
Class.forName("org.sqlite.JDBC");
conn = DriverManager.getConnection("jdbc:sqlite:newsqlite.db", "", "");
Statement statement = conn.createStatement();
statement.executeUpdate(CREATE_TABLE);
long before = System.currentTimeMillis();
int count = 1;
Random random = new Random();
StringBuilder sql;
for (int i = 0; i < 10; i++) {
for (int j = 0; j < 20; j++) {
sql = new StringBuilder();
sql.append("insert into TESTTABLE values ");
List<String> values = new ArrayList<>();
for (int k = 0; k < 2000; k++) {
String date = "DATETIME(" + ThreadLocalRandom.current().nextLong(1474265503, 1505801503) + ", 'unixepoch')";
String randomString = new BigInteger(130, random).toString(32);
values.add("(" + count++ + ", '" + randomString + "', '" + randomString + "'," + i + "," + date + ")");
}
sql.append(join(values)).append(";");
statement.executeUpdate(sql.toString());
}
}
System.out.println("Creating test data took: " + (System.currentTimeMillis() - before));
}
copy.delete();
Files.copy(newFile.toPath(), copy.toPath());
}
*/
}