[[[ Simpler/faster LCS algorithm in libsvn_diff by elimination of idx. * subversion/libsvn_diff/lcs.c (svn_diff__snake_t): Removed idx parameter (always 0) (svn_diff__lcs_t): Removed idx variable (always 0) , let d have either sign, and moved the origo of fp by d. New version no longer chooses the shorter file as the "original", and can thus give different LCS's depending on the order of the input files even when the files have different lengths. Speed is ~15% faster for core lcs algorithm. ]]]
The old version of the LCS algorithm started at k=0 and ended at k=d, where d is the length difference of the two files. d was required to be positive. It is more efficient to start at k=(-)d and end at 0 (since ending the loops at 0 allows for faster comparisons), and this also makes it easy to let d have either sign, which makes the idx variable unnecessary, further increasing speed by eliminating a parameter to the frequently called function svn_diff__snake_t. The sign of d is handled in the initial values of the loops (which have minimal speed impact). The new version will more often give different (but equally good) results depending on the order of the input files. I estimated the speed difference by diffing all of the first 100 revisions of merge.c in libsvn_client against each other (when diffing only against the previous revision, the core LCS algorithm usually takes a minor part of the total running time, so this is not suitable for gauging the LCS algorithm speed). Morten
Index: subversion/libsvn_diff/lcs.c =================================================================== --- subversion/libsvn_diff/lcs.c (revision 1104615) +++ subversion/libsvn_diff/lcs.c (working copy) @@ -50,7 +50,6 @@ static APR_INLINE void svn_diff__snake(apr_off_t k, svn_diff__snake_t *fp, - int idx, svn_diff__lcs_t **freelist, apr_pool_t *pool) { @@ -117,8 +116,8 @@ lcs = apr_palloc(pool, sizeof(*lcs)); } - lcs->position[idx] = start_position[0]; - lcs->position[abs(1 - idx)] = start_position[1]; + lcs->position[0] = start_position[0]; + lcs->position[1] = start_position[1]; lcs->length = position[1]->offset - start_position[1]->offset; lcs->next = previous_lcs; lcs->refcount = 1; @@ -192,7 +191,6 @@ apr_off_t suffix_lines, apr_pool_t *pool) { - int idx; apr_off_t length[2]; svn_diff__snake_t *fp; apr_off_t d; @@ -234,22 +232,21 @@ /* Calculate length of both sequences to be compared */ length[0] = position_list1->offset - position_list1->next->offset + 1; length[1] = position_list2->offset - position_list2->next->offset + 1; - idx = length[0] > length[1] ? 1 : 0; /* strikerXXX: here we allocate the furthest point array, which is * strikerXXX: sized M + N + 3 (!) */ fp = apr_pcalloc(pool, sizeof(*fp) * (apr_size_t)(length[0] + length[1] + 3)); - fp += length[idx] + 1; + fp += length[1] + 1; - sentinel_position[idx].next = position_list1->next; - position_list1->next = &sentinel_position[idx]; - sentinel_position[idx].offset = position_list1->offset + 1; + sentinel_position[0].next = position_list1->next; + position_list1->next = &sentinel_position[0]; + sentinel_position[0].offset = position_list1->offset + 1; - sentinel_position[abs(1 - idx)].next = position_list2->next; - position_list2->next = &sentinel_position[abs(1 - idx)]; - sentinel_position[abs(1 - idx)].offset = position_list2->offset + 1; + sentinel_position[1].next = position_list2->next; + position_list2->next = &sentinel_position[1]; + sentinel_position[1].offset = position_list2->offset + 1; /* These are never dereferenced, only compared by value, so we * can safely fake these up and the void* cast is OK. @@ -257,45 +254,45 @@ sentinel_position[0].node = (void*)&sentinel_position[0]; sentinel_position[1].node = (void*)&sentinel_position[1]; - d = length[abs(1 - idx)] - length[idx]; + d = length[0] - length[1]; /* k = -1 will be the first to be used to get previous * position information from, make sure it holds sane * data */ - fp[-1].position[0] = sentinel_position[0].next; - fp[-1].position[1] = &sentinel_position[1]; + fp[d - 1].position[0] = sentinel_position[0].next; + fp[d - 1].position[1] = &sentinel_position[1]; p = 0; do { /* Forward */ - for (k = -p; k < d; k++) + for (k = (d < 0 ? d : 0) - p; k < 0; k++) { - svn_diff__snake(k, fp, idx, &lcs_freelist, pool); + svn_diff__snake(k, fp, &lcs_freelist, pool); } - for (k = d + p; k >= d; k--) + for (k = (d > 0 ? d : 0) + p; k >= 0; k--) { - svn_diff__snake(k, fp, idx, &lcs_freelist, pool); + svn_diff__snake(k, fp, &lcs_freelist, pool); } p++; } - while (fp[d].position[1] != &sentinel_position[1]); + while (fp[0].position[1] != &sentinel_position[1]); if (suffix_lines) - lcs->next = prepend_lcs(fp[d].lcs, suffix_lines, + lcs->next = prepend_lcs(fp[0].lcs, suffix_lines, lcs->position[0]->offset - suffix_lines, lcs->position[1]->offset - suffix_lines, pool); else - lcs->next = fp[d].lcs; + lcs->next = fp[0].lcs; lcs = svn_diff__lcs_reverse(lcs); - position_list1->next = sentinel_position[idx].next; - position_list2->next = sentinel_position[abs(1 - idx)].next; + position_list1->next = sentinel_position[0].next; + position_list2->next = sentinel_position[1].next; if (prefix_lines) return prepend_lcs(lcs, prefix_lines, 1, 1, pool);