Hi Richard
I've added a new patch. See some in-line comments. That said, I made a
couple of more changes to my patch, re-using the free_blocks for doing
all allocations (i.e. not using h->endblocks at all).
On 2018-07-25 07:32 AM, Richard W.M. Jones wrote:
On Mon, Jul 23, 2018 at 02:38:18PM -0400, Shreyas Khare wrote:
Hello Richard
As discussed in the IRC channel, when merging a moderately large reg
file (~35MB) to a hiv file (~118 MB); hivex generates a huge hiv
file (~580 MB). These changes address that by creating a list of
unallocated blocks and reassigning unused blocks. I used
https://github.com/msuhanov/regf/blob/master/Windows%20registry%20file%20format%20specification.md
as a reference for the structure of the hiv file (in addition to the
source code itself)
Attaching the patch file.
Just as a general comment, for the patch to go upstream at all it will
need to be reformatted so it fits with the existing code style.
[...]
Will do.
/* Allocate a single block, first allocating an hbin (page) at the end
* of the current file if necessary. NB. To keep the implementation
* simple and more likely to be correct, we do not reuse existing free
This comment is left in place, but it's now obviously wrong.
Fixed.
I'm a bit confused why we need a separate free list. Can't we just
use the existing bitmap?
I tried to use the existing bitmap, but wasn't successful initially with
the 20 byte hbin headers on every page.
Rich.
--
NOTICE OF CONFIDENTIALITY: At Rapid7, the privacy of our customers,
partners, and employees is paramount. If you received this email in error,
please notify the sender and delete it from your inbox right away. Learn
how Rapid7 handles privacy at rapid7.com/privacy-policy
<https://www.rapid7.com/privacy-policy/>. To opt-out of Rapid7 marketing
emails, please click here
<https://information.rapid7.com/manage-subscription.html> or email
[email protected] <mailto:mailto:[email protected]>.
>From 6b62153af7ccdfed4a1c720457a6b9987f349c6c Mon Sep 17 00:00:00 2001
From: Shreyas Khare <[email protected]>
Date: Mon, 23 Jul 2018 14:28:29 -0400
Subject: [PATCH] Re-allocating unused blocks before assigning new page.
---
lib/hivex-internal.h | 99 ++++++++++++++++++++++++++++++++++++++++++++
lib/write.c | 56 +++++++++++--------------
2 files changed, 123 insertions(+), 32 deletions(-)
diff --git a/lib/hivex-internal.h b/lib/hivex-internal.h
index a22e732..9f4ab96 100644
--- a/lib/hivex-internal.h
+++ b/lib/hivex-internal.h
@@ -45,6 +45,13 @@ typedef enum {
nr_recode_types,
} recode_type;
+typedef struct unallocated_block {
+ size_t offset;
+ size_t block_size;
+ struct unallocated_block *next;
+} unallocated_block;
+
+
struct hive_h {
char *filename;
int fd;
@@ -52,6 +59,7 @@ struct hive_h {
int msglvl; /* 1 = verbose, 2 or 3 = debug */
int writable;
int unsafe;
+ unallocated_block *free_blocks;
/* Registry file, memory mapped if read-only, or malloc'd if writing. */
union {
@@ -343,4 +351,95 @@ extern int _hivex_get_values (hive_h *h, hive_node_h node, hive_value_h **values
#define HIVEX_MAX_VALUE_LEN 8000000
#define HIVEX_MAX_ALLOCATION 1000000
+/*
+ * Get the first available unused block from free_blocks, using first-fit algorithm.
+ * NB: Attempted to use the best-fit algorithm. This does not work too well because
+ * allocating a block in the smallest possible available block causes fragmentation.
+ */
+static size_t
+get_unused_block(hive_h *h, size_t seglen)
+{
+ unallocated_block *before = NULL, *found=NULL;
+ for (unallocated_block *current = h->free_blocks; current != NULL ; current = current->next) {
+ if (current->block_size >= seglen) {
+ found = current;
+ break;
+ }
+ before = current;
+ }
+ if (found) {
+ size_t offset = found->offset;
+ size_t unused = found->block_size - seglen;
+ if (unused > 0) {
+ // Need to write unused memory to hive (to preserve hiv integrity)
+ struct ntreg_hbin_block *blockhdr = (struct ntreg_hbin_block *) ((char *) h->addr + found->offset + seglen);
+ blockhdr->seg_len = htole32 ((int32_t) unused);
+ // Lets move and resize the found block
+ found->offset = found->offset+seglen;
+ found->block_size = unused;
+ } else {
+ //Full buffer was used
+ if (before) {
+ before->next = found->next;
+ } else {
+ h->free_blocks = found->next;
+ }
+ free(found);
+ }
+ return offset;
+ } else return -1;
+}
+
+/*
+ * Add freed block to free_blocks. Block size is determined by size at the actual offset.
+ * This list is ordered by offset. If the free'd block is preceded or succeded
+ * by another free block, they are merged.
+ */
+static void
+set_unused_block(hive_h *h, size_t offset)
+{
+ size_t seg_len = block_len (h, offset, NULL);
+ unallocated_block *current = malloc(sizeof(unallocated_block));
+ current->offset = offset;
+ current->block_size = seg_len;
+ current->next = NULL;
+
+ unallocated_block *before = NULL, *after = NULL;
+ // There are no free blocks
+ if (h->free_blocks == NULL) {
+ h->free_blocks = current;
+ return;
+ }
+
+ // Find the first block _after_ this one
+ for (after = h->free_blocks; after != NULL; after = after->next) {
+ if (after->offset > current->offset) {
+ // Can we merge with the next block?
+ if (current->offset + current->block_size == after->offset) {
+ current->block_size += after->block_size;
+ current->next = after->next;
+ free(after);
+ } else {
+ current->next = after;
+ }
+ break;
+ }
+ before = after;
+ }
+ // This was the block before this one
+ if (before) {
+ // Can we merge with the previous block?
+ if (before->offset + before->block_size == current->offset) {
+ before->block_size += current->block_size;
+ before->next = current->next;
+ free(current); //discard current block, lets reuse before block
+ } else {
+ before->next = current;
+ }
+ } else {
+ h->free_blocks = current;
+ }
+}
+
+
#endif /* HIVEX_INTERNAL_H_ */
diff --git a/lib/write.c b/lib/write.c
index 70105c9..49fdd5b 100644
--- a/lib/write.c
+++ b/lib/write.c
@@ -127,9 +127,7 @@ allocate_page (hive_h *h, size_t allocation_hint)
}
/* Allocate a single block, first allocating an hbin (page) at the end
- * of the current file if necessary. NB. To keep the implementation
- * simple and more likely to be correct, we do not reuse existing free
- * blocks.
+ * of the current file if necessary.
*
* seg_len is the size of the block (this INCLUDES the block header).
* The header of the block is initialized to -seg_len (negative to
@@ -148,6 +146,9 @@ allocate_page (hive_h *h, size_t allocation_hint)
static size_t
allocate_block (hive_h *h, size_t seg_len, const char id[2])
{
+ struct ntreg_hbin_block *blockhdr;
+ size_t offset;
+
CHECK_WRITABLE (0);
if (seg_len < 4) {
@@ -170,19 +171,27 @@ allocate_block (hive_h *h, size_t seg_len, const char id[2])
*/
seg_len = (seg_len + 7) & ~7;
- /* Allocate a new page if necessary. */
- if (h->endblocks == 0 || h->endblocks + seg_len > h->endpages) {
- size_t newendblocks = allocate_page (h, seg_len);
- if (newendblocks == 0)
- return 0;
- h->endblocks = newendblocks;
+ // Attempt to find a free block before getting a new block
+ offset = get_unused_block(h, seg_len);
+ if (offset != -1) {
+ DEBUG(2, "Re-use offset found at 0x%zx, size %zu", offset, seg_len);
+ } else {
+ offset = allocate_page(h, seg_len);
+ if (offset == 0) return 0;
+ ssize_t endblock = offset + seg_len;
+ DEBUG(2, "new block at 0x%zx, size %zu", offset, seg_len);
+ ssize_t rem = h->endpages - endblock;
+ if (rem > 0) {
+ DEBUG (2, "marking remainder of page free"
+ " starting at 0x%zx, size %zd", endblock, rem);
+ assert (rem >= 4);
+ blockhdr = (struct ntreg_hbin_block *) ((char *) h->addr + endblock);
+ blockhdr->seg_len = htole32 ((int32_t) rem);
+ set_unused_block(h, endblock);
+ }
}
- size_t offset = h->endblocks;
-
- DEBUG (2, "new block at 0x%zx, size %zu", offset, seg_len);
-
- struct ntreg_hbin_block *blockhdr =
+ blockhdr =
(struct ntreg_hbin_block *) ((char *) h->addr + offset);
memset (blockhdr, 0, seg_len);
@@ -194,24 +203,6 @@ allocate_block (hive_h *h, size_t seg_len, const char id[2])
}
BITMAP_SET (h->bitmap, offset);
-
- h->endblocks += seg_len;
-
- /* If there is space after the last block in the last page, then we
- * have to put a dummy free block header here to mark the rest of
- * the page as free.
- */
- ssize_t rem = h->endpages - h->endblocks;
- if (rem > 0) {
- DEBUG (2, "marking remainder of page free"
- " starting at 0x%zx, size %zd", h->endblocks, rem);
-
- assert (rem >= 4);
-
- blockhdr = (struct ntreg_hbin_block *) ((char *) h->addr + h->endblocks);
- blockhdr->seg_len = htole32 ((int32_t) rem);
- }
-
return offset;
}
@@ -236,6 +227,7 @@ mark_block_unused (hive_h *h, size_t offset)
size_t seg_len = block_len (h, offset, NULL);
blockhdr->seg_len = htole32 (seg_len);
+ set_unused_block(h, offset);
BITMAP_CLR (h->bitmap, offset);
}
--
2.17.0
_______________________________________________
Libguestfs mailing list
[email protected]
https://www.redhat.com/mailman/listinfo/libguestfs