Hey Paul, Can you try this change on your large-file-count working copies? I believe this should fix the performance problems you were seeing.
Cheers, -g On Mon, Jun 7, 2010 at 20:47, <gst...@apache.org> wrote: > Author: gstein > Date: Tue Jun 8 00:47:22 2010 > New Revision: 952493 > > URL: http://svn.apache.org/viewvc?rev=952493&view=rev > Log: > The query that we used to fetch all children in BASE_NODE and WORKING_NODE > used a UNION between two SELECT statements. The idea was to have SQLite > remove all duplicates for us in a single query. Unfortunately, this caused > SQLite to create an ephemeral (temporary) table and place the results of > each query into that table. It created an index to remove dupliates. Then > it returned the values in that ephemeral table. For large numbers of > nodes, the construction of the table and its index becomes very costly. > > This change rebuilds gather_children() in wc_db.c to do the duplicate > removal manually using a hash table. It does some simple scanning straight > into an array when it knows duplicates cannot exist (one of BASE or > WORKING is empty). > > The performance problem of svn_wc__db_read_children() was first observed > in issue #3499. The actual performance improvement is untested so far, but > I'm assuming pburba can pick up this change and try in his scenario. > > * subversion/libsvn_wc/wc_db.c: > (count_children): new helper to count the number of children of a given > PARENT_RELPATH within a specific table. > (add_children_to_hash): new helper to scan children, placing their names > into a hash table as keys (and the mapped values). > (union_children): new helper to scan both BASE_NODE and WORKING_NODE and > manually create a union of the resulting names using a hash table. > (single_table_children): new helper to return the children from a single > table. > (gather_children): rebuilt in terms of the above helpers > > * subversion/libsvn_wc/wc-queries.sql: > (STMT_COUNT_BASE_NODE_CHILDREN, STMT_COUNT_WORKING_NODE_CHILDREN): new > statements to count the number of children for a given parent_relpath > (STMT_SELECT_WORKING_CHILDREN): removed > (STMT_SELECT_WORKING_NODE_CHILDREN): like STMT_SELECT_BASE_NODE_CHILDREN, > this scan all children in WORKING_NODE for a given parent_relpath > > Modified: > subversion/trunk/subversion/libsvn_wc/wc-queries.sql > subversion/trunk/subversion/libsvn_wc/wc_db.c > > Modified: subversion/trunk/subversion/libsvn_wc/wc-queries.sql > URL: > http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_wc/wc-queries.sql?rev=952493&r1=952492&r2=952493&view=diff > ============================================================================== > --- subversion/trunk/subversion/libsvn_wc/wc-queries.sql (original) > +++ subversion/trunk/subversion/libsvn_wc/wc-queries.sql Tue Jun 8 00:47:22 > 2010 > @@ -87,16 +87,21 @@ INSERT OR IGNORE INTO WORKING_NODE ( > wc_id, local_relpath, parent_relpath, presence, kind) > VALUES (?1, ?2, ?3, 'incomplete', 'unknown'); > > +-- STMT_COUNT_BASE_NODE_CHILDREN > +SELECT COUNT(*) FROM BASE_NODE > +WHERE wc_id = ?1 AND parent_relpath = ?2; > + > +-- STMT_COUNT_WORKING_NODE_CHILDREN > +SELECT COUNT(*) FROM WORKING_NODE > +WHERE wc_id = ?1 AND parent_relpath = ?2; > + > -- STMT_SELECT_BASE_NODE_CHILDREN > select local_relpath from base_node > where wc_id = ?1 and parent_relpath = ?2; > > --- STMT_SELECT_WORKING_CHILDREN > -select local_relpath from base_node > -where wc_id = ?1 and parent_relpath = ?2 > -union > -select local_relpath from working_node > -where wc_id = ?1 and parent_relpath = ?2; > +-- STMT_SELECT_WORKING_NODE_CHILDREN > +SELECT local_relpath FROM WORKING_NODE > +WHERE wc_id = ?1 AND parent_relpath = ?2; > > -- STMT_SELECT_WORKING_IS_FILE > select kind == 'file' from working_node > > Modified: subversion/trunk/subversion/libsvn_wc/wc_db.c > URL: > http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_wc/wc_db.c?rev=952493&r1=952492&r2=952493&view=diff > ============================================================================== > --- subversion/trunk/subversion/libsvn_wc/wc_db.c (original) > +++ subversion/trunk/subversion/libsvn_wc/wc_db.c Tue Jun 8 00:47:22 2010 > @@ -813,40 +813,97 @@ insert_working_node(void *baton, > } > > > -/* */ > static svn_error_t * > -gather_children(const apr_array_header_t **children, > - svn_boolean_t base_only, > - svn_wc__db_t *db, > - const char *local_abspath, > - apr_pool_t *result_pool, > - apr_pool_t *scratch_pool) > +count_children(int *count, > + int stmt_idx, > + svn_sqlite__db_t *sdb, > + apr_int64_t wc_id, > + const char *parent_relpath) > +{ > + svn_sqlite__stmt_t *stmt; > + > + SVN_ERR(svn_sqlite__get_statement(&stmt, sdb, stmt_idx)); > + SVN_ERR(svn_sqlite__bindf(stmt, "is", wc_id, parent_relpath)); > + SVN_ERR(svn_sqlite__step_row(stmt)); > + *count = svn_sqlite__column_int(stmt, 0); > + return svn_error_return(svn_sqlite__reset(stmt)); > +} > + > + > +/* Each name is allocated in RESULT_POOL and stored into CHILDREN as a key > + pointed to the same name. */ > +static svn_error_t * > +add_children_to_hash(apr_hash_t *children, > + int stmt_idx, > + svn_sqlite__db_t *sdb, > + apr_int64_t wc_id, > + const char *parent_relpath, > + apr_pool_t *result_pool) > { > - svn_wc__db_pdh_t *pdh; > - const char *local_relpath; > svn_sqlite__stmt_t *stmt; > - apr_array_header_t *child_names; > svn_boolean_t have_row; > > - SVN_ERR_ASSERT(svn_dirent_is_absolute(local_abspath)); > + SVN_ERR(svn_sqlite__get_statement(&stmt, sdb, stmt_idx)); > + SVN_ERR(svn_sqlite__bindf(stmt, "is", wc_id, parent_relpath)); > + SVN_ERR(svn_sqlite__step(&have_row, stmt)); > + while (have_row) > + { > + const char *child_relpath = svn_sqlite__column_text(stmt, 0, NULL); > + const char *name = svn_relpath_basename(child_relpath, result_pool); > > - SVN_ERR(svn_wc__db_pdh_parse_local_abspath(&pdh, &local_relpath, db, > - local_abspath, svn_sqlite__mode_readonly, > - scratch_pool, scratch_pool)); > - VERIFY_USABLE_PDH(pdh); > + apr_hash_set(children, name, APR_HASH_KEY_STRING, name); > > - SVN_ERR(svn_sqlite__get_statement(&stmt, pdh->wcroot->sdb, > - base_only > - ? STMT_SELECT_BASE_NODE_CHILDREN > - : STMT_SELECT_WORKING_CHILDREN)); > - SVN_ERR(svn_sqlite__bindf(stmt, "is", pdh->wcroot->wc_id, local_relpath)); > + SVN_ERR(svn_sqlite__step(&have_row, stmt)); > + } > + > + return svn_sqlite__reset(stmt); > +} > + > + > +static svn_error_t * > +union_children(const apr_array_header_t **children, > + svn_sqlite__db_t *sdb, > + apr_int64_t wc_id, > + const char *parent_relpath, > + apr_pool_t *result_pool, > + apr_pool_t *scratch_pool) > +{ > + /* ### it would be nice to pre-size this hash table. */ > + apr_hash_t *names = apr_hash_make(scratch_pool); > + apr_array_header_t *names_array; > + > + /* All of the names get allocated in RESULT_POOL. */ > + SVN_ERR(add_children_to_hash(names, STMT_SELECT_BASE_NODE_CHILDREN, > + sdb, wc_id, parent_relpath, result_pool)); > + SVN_ERR(add_children_to_hash(names, STMT_SELECT_WORKING_NODE_CHILDREN, > + sdb, wc_id, parent_relpath, result_pool)); > + > + SVN_ERR(svn_hash_keys(&names_array, names, result_pool)); > + *children = names_array; > + > + return SVN_NO_ERROR; > +} > + > + > +static svn_error_t * > +single_table_children(const apr_array_header_t **children, > + int stmt_idx, > + int start_size, > + svn_sqlite__db_t *sdb, > + apr_int64_t wc_id, > + const char *parent_relpath, > + apr_pool_t *result_pool) > +{ > + svn_sqlite__stmt_t *stmt; > + apr_array_header_t *child_names; > + svn_boolean_t have_row; > + > + SVN_ERR(svn_sqlite__get_statement(&stmt, sdb, stmt_idx)); > + SVN_ERR(svn_sqlite__bindf(stmt, "is", wc_id, parent_relpath)); > > /* ### should test the node to ensure it is a directory */ > > - /* ### 10 is based on Subversion's average of 8.5 files per versioned > - ### directory in its repository. maybe use a different value? or > - ### count rows first? */ > - child_names = apr_array_make(result_pool, 10, sizeof(const char *)); > + child_names = apr_array_make(result_pool, start_size, sizeof(const char > *)); > > SVN_ERR(svn_sqlite__step(&have_row, stmt)); > while (have_row) > @@ -866,6 +923,76 @@ gather_children(const apr_array_header_t > > > /* */ > +static svn_error_t * > +gather_children(const apr_array_header_t **children, > + svn_boolean_t base_only, > + svn_wc__db_t *db, > + const char *local_abspath, > + apr_pool_t *result_pool, > + apr_pool_t *scratch_pool) > +{ > + svn_wc__db_pdh_t *pdh; > + const char *local_relpath; > + int base_count; > + int working_count; > + > + SVN_ERR_ASSERT(svn_dirent_is_absolute(local_abspath)); > + > + SVN_ERR(svn_wc__db_pdh_parse_local_abspath(&pdh, &local_relpath, db, > + local_abspath, > + svn_sqlite__mode_readonly, > + scratch_pool, scratch_pool)); > + VERIFY_USABLE_PDH(pdh); > + > + if (base_only) > + { > + /* 10 is based on Subversion's average of 8.7 files per versioned > + directory in its repository. > + > + ### note "files". should redo count with subdirs included */ > + return svn_error_return(single_table_children( > + children, STMT_SELECT_BASE_NODE_CHILDREN, > + 10 /* start_size */, > + pdh->wcroot->sdb, pdh->wcroot->wc_id, > + local_relpath, result_pool)); > + } > + > + SVN_ERR(count_children(&base_count, STMT_COUNT_BASE_NODE_CHILDREN, > + pdh->wcroot->sdb, pdh->wcroot->wc_id, > local_relpath)); > + SVN_ERR(count_children(&working_count, STMT_COUNT_WORKING_NODE_CHILDREN, > + pdh->wcroot->sdb, pdh->wcroot->wc_id, > local_relpath)); > + > + if (base_count == 0) > + { > + if (working_count == 0) > + { > + *children = apr_array_make(result_pool, 0, sizeof(const char *)); > + return SVN_NO_ERROR; > + } > + > + return svn_error_return(single_table_children( > + children, STMT_SELECT_WORKING_NODE_CHILDREN, > + working_count, > + pdh->wcroot->sdb, pdh->wcroot->wc_id, > + local_relpath, result_pool)); > + } > + if (working_count == 0) > + return svn_error_return(single_table_children( > + children, STMT_SELECT_BASE_NODE_CHILDREN, > + base_count, > + pdh->wcroot->sdb, pdh->wcroot->wc_id, > + local_relpath, result_pool)); > + > + /* ### it would be nice to pass BASE_COUNT and WORKING_COUNT, but there is > + ### nothing union_children() can do with those. */ > + return svn_error_return(union_children(children, > + pdh->wcroot->sdb, > pdh->wcroot->wc_id, > + local_relpath, > + result_pool, scratch_pool)); > +} > + > + > +/* */ > static void > flush_entries(const svn_wc__db_pdh_t *pdh) > { > > >