Author: stefan2
Date: Fri Apr 5 16:49:05 2013
New Revision: 1465038
URL: http://svn.apache.org/r1465038
Log:
On the fsfs-format7 branch: Use containers to store the change lists
in packed rev files. This gives a ~10x compression for this data.
This patch does not support reading packed change lists, yet (only
passively as part of block-read).
* subversion/libsvn_fs_fs/index.h
(SVN_FS_FS__ITEM_TYPE_CHANGES_CONT): declare new item type
* subversion/libsvn_fs_fs/cached_data.c
(block_read_changes_container): new reader function
(block_read): allow for containers and handle changes containers
* subversion/libsvn_fs_fs/pack.c
(auto_pad_block): factor out from ...
(copy_items_from_temp): ... this
(write_changes_container,
get_block_left,
write_changes_containers): new functions to create changes containers
(write_l2p_index): fix a pointer reference
(pack_range): call the new packing function
Modified:
subversion/branches/fsfs-format7/subversion/libsvn_fs_fs/cached_data.c
subversion/branches/fsfs-format7/subversion/libsvn_fs_fs/index.h
subversion/branches/fsfs-format7/subversion/libsvn_fs_fs/pack.c
Modified: subversion/branches/fsfs-format7/subversion/libsvn_fs_fs/cached_data.c
URL:
http://svn.apache.org/viewvc/subversion/branches/fsfs-format7/subversion/libsvn_fs_fs/cached_data.c?rev=1465038&r1=1465037&r2=1465038&view=diff
==============================================================================
--- subversion/branches/fsfs-format7/subversion/libsvn_fs_fs/cached_data.c
(original)
+++ subversion/branches/fsfs-format7/subversion/libsvn_fs_fs/cached_data.c Fri
Apr 5 16:49:05 2013
@@ -33,6 +33,7 @@
#include "pack.h"
#include "temp_serializer.h"
#include "index.h"
+#include "changes.h"
#include "../libsvn_fs/fs-loader.h"
@@ -2425,6 +2426,34 @@ block_read_changes(apr_array_header_t **
}
static svn_error_t *
+block_read_changes_container(apr_array_header_t **changes,
+ svn_fs_t *fs,
+ apr_file_t *file,
+ svn_stream_t *file_stream,
+ svn_fs_fs__p2l_entry_t* entry,
+ apr_uint32_t sub_item,
+ svn_boolean_t must_read,
+ apr_pool_t *pool)
+{
+ svn_fs_fs__changes_t *container;
+ svn_stream_t *stream;
+ if (!must_read)
+ return SVN_NO_ERROR;
+
+ SVN_ERR(auto_select_stream(&stream, fs, file, file_stream, entry, pool));
+
+ /* read changes from revision file */
+
+ SVN_ERR(svn_fs_fs__read_changes_container(&container, stream, pool, pool));
+
+ /* extract requested data */
+
+ SVN_ERR(svn_fs_fs__changes_get_list(changes, container, sub_item, pool));
+
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
block_read_noderev(node_revision_t **noderev_p,
svn_fs_t *fs,
apr_file_t *file,
@@ -2518,9 +2547,6 @@ block_read(void **result,
if (entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED)
continue;
- /* we don't support containers, yet */
- SVN_ERR_ASSERT(entry->item_count == 1);
-
is_result = result && entry->item_count
&& entry->items[0].revision == revision
&& entry->items[0].number == item_index;
@@ -2566,6 +2592,14 @@ block_read(void **result,
entry, is_result, pool));
break;
+ case SVN_FS_FS__ITEM_TYPE_CHANGES_CONT:
+ SVN_ERR(block_read_changes_container
+ ((apr_array_header_t **)&item,
+ fs, revision_file, stream,
+ entry, sub_item, is_result,
+ pool));
+ break;
+
default:
break;
}
Modified: subversion/branches/fsfs-format7/subversion/libsvn_fs_fs/index.h
URL:
http://svn.apache.org/viewvc/subversion/branches/fsfs-format7/subversion/libsvn_fs_fs/index.h?rev=1465038&r1=1465037&r2=1465038&view=diff
==============================================================================
--- subversion/branches/fsfs-format7/subversion/libsvn_fs_fs/index.h (original)
+++ subversion/branches/fsfs-format7/subversion/libsvn_fs_fs/index.h Fri Apr 5
16:49:05 2013
@@ -47,6 +47,8 @@
#define SVN_FS_FS__ITEM_TYPE_ANY_REP 7 /* item is any representation.
Only used in pre-format7. */
+#define SVN_FS_FS__ITEM_TYPE_CHANGES_CONT 8 /* item is a changes container */
+
/* (user visible) entry in the phys-to-log index. It describes a section
* of some packed / non-packed rev file as containing a specific item.
* There must be no overlapping / conflicting entries.
Modified: subversion/branches/fsfs-format7/subversion/libsvn_fs_fs/pack.c
URL:
http://svn.apache.org/viewvc/subversion/branches/fsfs-format7/subversion/libsvn_fs_fs/pack.c?rev=1465038&r1=1465037&r2=1465038&view=diff
==============================================================================
--- subversion/branches/fsfs-format7/subversion/libsvn_fs_fs/pack.c (original)
+++ subversion/branches/fsfs-format7/subversion/libsvn_fs_fs/pack.c Fri Apr 5
16:49:05 2013
@@ -35,6 +35,7 @@
#include "index.h"
#include "low_level.h"
#include "cached_data.h"
+#include "changes.h"
#include "../libsvn_fs/fs-loader.h"
@@ -859,6 +860,48 @@ sort_reps(pack_context_t *context)
}
}
+/* To prevent items from overlapping a block boundary, we will usually
+ * put them into the next block and top up the old one with NUL bytes.
+ * Pad CONTEXT's pack file to the end of the current block, if that padding
+ * is short enough. Use POOL for allocations.
+ */
+static svn_error_t *
+auto_pad_block(pack_context_t *context,
+ apr_pool_t *pool)
+{
+ fs_fs_data_t *ffd = context->fs->fsap_data;
+
+ /* This is the maximum number of bytes "wasted" that way per block.
+ * Larger items will cross the block boundaries. */
+ const apr_off_t max_padding = MAX(ffd->block_size / 50, 512);
+
+ /* Is wasted space small enough to align the current item to the next
+ * block? */
+ apr_off_t padding
+ = ffd->block_size - (context->pack_offset % ffd->block_size);
+
+ if (padding < max_padding)
+ {
+ /* Yes. To up with NUL bytes and don't forget to create
+ * an P2L index entry marking this section as unused. */
+ svn_fs_fs__p2l_entry_t null_entry;
+ svn_fs_fs__id_part_t rev_item = { 0, SVN_FS_FS__ITEM_INDEX_UNUSED };
+
+ null_entry.offset = context->pack_offset;
+ null_entry.size = padding;
+ null_entry.type = SVN_FS_FS__ITEM_TYPE_UNUSED;
+ null_entry.item_count = 1;
+ null_entry.items = &rev_item;
+
+ SVN_ERR(write_null_bytes(context->pack_file, padding, pool));
+ SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry
+ (context->proto_p2l_index, &null_entry, pool));
+ context->pack_offset += padding;
+ }
+
+ return SVN_NO_ERROR;
+}
+
/* Copy (append) the items identified by svn_fs_fs__p2l_entry_t * elements
* in ENTRIES strictly in order from TEMP_FILE into CONTEXT->PACK_FILE.
* Use POOL for temporary allocations.
@@ -873,12 +916,6 @@ copy_items_from_temp(pack_context_t *con
apr_pool_t *iterpool = svn_pool_create(pool);
int i;
- /* To prevent items from overlapping a block boundary, we will usually
- * put them into the next block and top up the old one with NUL bytes.
- * This is the maximum number of bytes "wasted" that way per block.
- * Larger items will cross the block boundaries. */
- const apr_off_t alignment_limit = MAX(ffd->block_size / 50, 512);
-
/* copy all items in strict order */
for (i = 0; i < entries->nelts; ++i)
{
@@ -898,32 +935,9 @@ copy_items_from_temp(pack_context_t *con
safe_size += 80;
/* still enough space in current block? */
+ /* If not, small sections at the end of a block should be padded. */
if (in_block_offset + safe_size > ffd->block_size)
- {
- /* No. Is wasted space small enough to align the current item
- * to the next block? */
- apr_off_t bytes_to_alignment = ffd->block_size - in_block_offset;
- if (bytes_to_alignment < alignment_limit)
- {
- /* Yes. To up with NUL bytes and don't forget to create
- * an P2L index entry marking this section as unused. */
- svn_fs_fs__p2l_entry_t null_entry;
- svn_fs_fs__id_part_t rev_item
- = { 0, SVN_FS_FS__ITEM_INDEX_UNUSED };
-
- null_entry.offset = context->pack_offset;
- null_entry.size = bytes_to_alignment;
- null_entry.type = SVN_FS_FS__ITEM_TYPE_UNUSED;
- null_entry.item_count = 1;
- null_entry.items = &rev_item;
-
- SVN_ERR(write_null_bytes(context->pack_file,
- bytes_to_alignment, iterpool));
- SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry
- (context->proto_p2l_index, &null_entry, iterpool));
- context->pack_offset += bytes_to_alignment;
- }
- }
+ SVN_ERR(auto_pad_block(context, iterpool));
/* select the item in the source file and copy it into the target
* pack file */
@@ -945,6 +959,133 @@ copy_items_from_temp(pack_context_t *con
return SVN_NO_ERROR;
}
+/* Finalize CONTAINER and write it to CONTEXT's pack file.
+ * Append an P2L entry containing the given SUB_ITEMS to NEW_ENTRIES.
+ * Use POOL for temporary allocations.
+ */
+static svn_error_t *
+write_changes_container(pack_context_t *context,
+ svn_fs_fs__changes_t *container,
+ apr_array_header_t *sub_items,
+ apr_array_header_t *new_entries,
+ apr_pool_t *pool)
+{
+ apr_off_t offset = 0;
+ svn_fs_fs__p2l_entry_t container_entry;
+
+ svn_stream_t *pack_stream
+ = svn_stream_from_aprfile2(context->pack_file, TRUE, pool);
+
+ SVN_ERR(svn_fs_fs__write_changes_container(pack_stream,
+ container,
+ pool));
+ SVN_ERR(svn_io_file_seek(context->pack_file, SEEK_CUR, &offset, pool));
+
+ container_entry.offset = context->pack_offset;
+ container_entry.size = offset - container_entry.offset;
+ container_entry.type = SVN_FS_FS__ITEM_TYPE_CHANGES_CONT;
+ container_entry.item_count = sub_items->nelts;
+ container_entry.items = (svn_fs_fs__id_part_t *)sub_items->elts;
+
+ context->pack_offset = offset;
+ APR_ARRAY_PUSH(new_entries, svn_fs_fs__p2l_entry_t *)
+ = copy_p2l_entry(&container_entry, context->info_pool);
+
+ SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry
+ (context->proto_p2l_index, &container_entry, pool));
+
+ return SVN_NO_ERROR;
+}
+
+/* Return the remaining unused bytes in the current block in CONTEXT's
+ * pack file.
+ */
+static apr_ssize_t
+get_block_left(pack_context_t *context)
+{
+ fs_fs_data_t *ffd = context->fs->fsap_data;
+ return ffd->block_size - (context->pack_offset % ffd->block_size);
+}
+
+/* Read the change lists identified by svn_fs_fs__p2l_entry_t * elements
+ * in ENTRIES strictly in from TEMP_FILE, aggregate them and write them
+ * into CONTEXT->PACK_FILE. Use POOL for temporary allocations.
+ */
+static svn_error_t *
+write_changes_containers(pack_context_t *context,
+ apr_array_header_t *entries,
+ apr_file_t *temp_file,
+ apr_pool_t *pool)
+{
+ fs_fs_data_t *ffd = context->fs->fsap_data;
+ apr_pool_t *iterpool = svn_pool_create(pool);
+ apr_pool_t *container_pool = svn_pool_create(pool);
+ int i;
+
+ apr_ssize_t block_left = get_block_left(context);
+ svn_fs_fs__changes_t *container
+ = svn_fs_fs__changes_create(1000, container_pool);
+ apr_array_header_t *sub_items
+ = apr_array_make(pool, 64, sizeof(svn_fs_fs__id_part_t));
+ apr_array_header_t *new_entries
+ = apr_array_make(context->info_pool, 16, entries->elt_size);
+ svn_stream_t *temp_stream
+ = svn_stream_from_aprfile2(temp_file, TRUE, pool);
+
+ /* copy all items in strict order */
+ for (i = entries->nelts-1; i >= 0; --i)
+ {
+ apr_array_header_t *changes;
+ apr_size_t list_index;
+ svn_fs_fs__p2l_entry_t *entry
+ = APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t *);
+
+ if (block_left < entry->size)
+ block_left = get_block_left(context)
+ - svn_fs_fs__changes_estimate_size(container);
+
+ if ((block_left < entry->size) && sub_items->elts)
+ {
+ SVN_ERR(write_changes_container(context, container, sub_items,
+ new_entries, iterpool));
+
+ apr_array_clear(sub_items);
+ svn_pool_clear(container_pool);
+ container = svn_fs_fs__changes_create(1000, container_pool);
+ block_left = get_block_left(context);
+ }
+
+ /* still enough space in current block? */
+ if (block_left < entry->size)
+ {
+ SVN_ERR(auto_pad_block(context, iterpool));
+ block_left = get_block_left(context);
+ }
+
+ /* select the change list in the source file, parse it and add it to
+ * the container */
+ SVN_ERR(svn_io_file_seek(temp_file, SEEK_SET, &entry->offset,
+ iterpool));
+ SVN_ERR(svn_fs_fs__read_changes(&changes, temp_stream, iterpool));
+ SVN_ERR(svn_fs_fs__changes_append_list(&list_index, container, changes));
+ SVN_ERR_ASSERT(list_index == sub_items->nelts);
+
+ APR_ARRAY_PUSH(sub_items, svn_fs_fs__id_part_t) = entry->items[0];
+
+ svn_pool_clear(iterpool);
+ }
+
+ if (sub_items->elts)
+ SVN_ERR(write_changes_container(context, container, sub_items,
+ new_entries, iterpool));
+
+ *entries = *new_entries;
+ svn_pool_destroy(iterpool);
+ svn_pool_destroy(container_pool);
+
+ return SVN_NO_ERROR;
+}
+
/* Append all entries of svn_fs_fs__p2l_entry_t * array TO_APPEND to
* svn_fs_fs__p2l_entry_t * array DEST.
*/
@@ -1024,7 +1165,7 @@ write_l2p_index(pack_context_t *context,
{
SVN_ERR_ASSERT(entry->items[0].revision <= entry->items[1].revision);
++entry->items;
- svn__priority_queue_push(queue, entry);
+ svn__priority_queue_push(queue, &entry);
}
/* keep memory usage in check */
@@ -1147,8 +1288,8 @@ pack_range(pack_context_t *context,
sort_reps(context);
/* phase 4: copy bucket data to pack file. Write P2L index. */
- SVN_ERR(copy_items_from_temp(context, context->changes,
- context->changes_file, revpool));
+ SVN_ERR(write_changes_containers(context, context->changes,
+ context->changes_file, revpool));
svn_pool_clear(revpool);
SVN_ERR(copy_items_from_temp(context, context->file_props,
context->file_props_file, revpool));