Signed-off-by: Benoit Canet <ben...@irqsave.net> --- block/qcow2-dedup.c | 312 +++++++++++++++++++++++++++++++++++++++++++++++++++ block/qcow2.h | 13 +++ 2 files changed, 325 insertions(+)
diff --git a/block/qcow2-dedup.c b/block/qcow2-dedup.c index e4e6108..a7c7202 100644 --- a/block/qcow2-dedup.c +++ b/block/qcow2-dedup.c @@ -29,6 +29,8 @@ #include "qemu-common.h" #include "qcow2.h" +#define HASH_LENGTH 32 + /** * Read some data from the QCOW2 file * @@ -148,3 +150,313 @@ int qcow2_dedup_read_missing_and_concatenate(BlockDriverState *bs, fail: return ret; } + +/* + * Build a QCowHashNode structure + * + * @hash: the given hash + * @physical_offset: the cluster offset in the QCOW2 file + * @first_offset: the first logical cluster offset written + * @ret: the build QCowHashNode + */ +static QCowHashNode *qcow2_dedup_build_qcow_hash_node(uint8_t *hash, + uint64_t physical_offset, + uint64_t first_offset) +{ + QCowHashNode *data; + + data = g_new0(QCowHashNode, 1); + data->hash = hash; + data->offset = physical_offset; + data->first_logical_offset = first_offset; + + return data; +} + +/* + * Compute the hash of a given cluster + * + * @data: a buffer containing the cluster data + * @ret: a HASH_LENGTH long dynamically allocated array containing the hash + */ +static uint8_t *qcow2_compute_cluster_hash(BlockDriverState *bs, + uint8_t *data) +{ + return NULL; +} + +/* Try to find the offset of a given cluster if it's duplicated + * Exceptionally we cast return value to int64_t to use as error code. + * + * @data: a buffer containing the cluster + * @skip_cluster_nr: the number of cluster to skip in the buffer + * @hash: if hash is provided it's used else it's computed + * @ret: QCowHashNode of the duplicated cluster or NULL + */ +static QCowHashNode *qcow2_build_hash_lookup_offset(BlockDriverState *bs, + uint8_t *data, + int skip_cluster_nr, + uint8_t **hash) +{ + BDRVQcowState *s = bs->opaque; + QCowHashNode *hash_node; + if (!*hash) { + /* no hash has been provided compute it and store it for caller usage */ + *hash = qcow2_compute_cluster_hash(bs, + data + skip_cluster_nr * + s->cluster_size); + } + hash_node = g_tree_lookup(s->dedup_tree_by_hash, *hash); + if (hash_node) { + return hash_node; + } + + /* cluster not duplicated */ + hash_node = qcow2_dedup_build_qcow_hash_node(*hash, + QCOW_FLAG_EMPTY, + QCOW_FLAG_EMPTY); + g_tree_insert(s->dedup_tree_by_hash, *hash, hash_node); + + return NULL; +} + +/* + * Helper used to build a QCowHashElement + * + * @hash: the hash to use + * @ret: a newly allocated QCowHashElement pointing to the given hash + */ +static QCowHashElement *qcow2_build_dedup_hash(uint8_t *hash) +{ + QCowHashElement *dedup_hash; + dedup_hash = g_new0(QCowHashElement, 1); + dedup_hash->hash = hash; + return dedup_hash; +} + +/* + * Helper used to link a deduplicated cluster in the l2 + * + * @logical_cluster_offset: the cluster offset seen by the guest (in sectors) + * @physical_cluster_offset: the cluster offset in the QCOW2 file (in sectors) + * @overwrite: true if we are overwriting an l2 table + * @ret: + */ +static int qcow2_dedup_link_l2(BlockDriverState *bs, + uint64_t logical_cluster_offset, + uint64_t physical_cluster_offset, + bool overwrite) +{ + QCowL2Meta m; + /* function correctness regarding copy on write ? */ + m.offset = logical_cluster_offset << 9; + m.alloc_offset = physical_cluster_offset << 9; + m.nb_clusters = 1; /* we are linking only one cluster in l2 */ + m.cluster_offset = 0; + m.n_start = 0; + m.nb_available = 0; + m.oflag_copied = false; + m.overwrite = overwrite; + return qcow2_alloc_cluster_link_l2(bs, &m); +} + +/* This function tries to deduplicate a given cluster. + * + * @sector_num: the logical sector number we are trying to deduplicate + * @precomputed_hash: Used instead of computing the hash if provided + * @data: the buffer in which to look for a duplicated cluster + * @skip_clusters_nr: the number of cluster that must be skipped in data + * @non_duplicated_dedup_hash: returned if the cluster is not deduplicated + * @ret: ret < 0 on error, 1 on deduplication else 0 + */ +static int qcow2_dedup_cluster(BlockDriverState *bs, + uint64_t sector_num, + uint8_t **precomputed_hash, + uint8_t *data, + int skip_clusters_nr, + QCowHashElement **non_duplicated_dedup_hash) +{ + BDRVQcowState *s = bs->opaque; + QCowHashNode *hash_node; + int64_t physical_cluster_offset; + uint64_t first_logical_offset; + uint64_t logical_cluster_offset; + uint64_t existing_physical_cluster_offset; + int ret; + int pnum = s->cluster_sectors; + *non_duplicated_dedup_hash = NULL; + + /* look if the cluster is duplicated */ + hash_node = qcow2_build_hash_lookup_offset(bs, + data, + skip_clusters_nr, + precomputed_hash); + + /* round the logical cluster offset to cluster boundaries */ + logical_cluster_offset = sector_num & ~(s->cluster_sectors - 1); + ret = qcow2_get_cluster_offset(bs, logical_cluster_offset << 9, + &pnum, &existing_physical_cluster_offset); + if (ret < 0) { + goto exit; + } + + if (!hash_node) { + /* no duplicated cluster found, store the hash for later usage */ + *non_duplicated_dedup_hash = qcow2_build_dedup_hash(*precomputed_hash); + return 0; + } else { + /* duplicated cluster found */ + physical_cluster_offset = hash_node->offset; + first_logical_offset = hash_node->first_logical_offset; + + if (existing_physical_cluster_offset != physical_cluster_offset << 9) { + + ret = update_cluster_refcount(bs, + physical_cluster_offset / + s->cluster_sectors, + 1, + false); + if (ret < 0) { + goto exit; + } + + ret = qcow2_dedup_link_l2(bs, logical_cluster_offset, + physical_cluster_offset, + false); + if (ret < 0) { + goto exit; + } + + /* if refcount was one remove the QCOW_FLAG_FIRST flag */ + if (first_logical_offset & QCOW_FLAG_FIRST) { + first_logical_offset &= ~QCOW_FLAG_FIRST; + ret = qcow2_dedup_link_l2(bs, first_logical_offset, + physical_cluster_offset, + true); + if (ret < 0) { + goto exit; + } + hash_node->first_logical_offset = first_logical_offset; + } + } + } + + ret = 1; +exit: + g_free(*precomputed_hash); + *precomputed_hash = NULL; + return ret; +} + +/* + * Deduplicate all the cluster that can be deduplicated. + * + * Next it compute the number of non deduplicable sectors to come while storing + * the hashes of these sectors in a linked list for later usage. + * Then it compute the first duplicated cluster hash that come after non + * deduplicable cluster, this hash will be used at next call of the function + * + * @u: where to store the list of undedupable hashes + * @sector_num: the first sector to deduplicate (in sectors) + * @data: the buffer containing the data to deduplicate + * @data_nr: the size of the buffer in sectors + * @skip_cluster_nr: the number of cluster to skip at the begining of data + * @next_non_dedupable_sectors_nr: result containing the number of non + * deduplicable sectors to come + * next_call_first_hash: hash saved between the function call + * + * + * + */ +int qcow2_dedup(BlockDriverState *bs, + UndedupableHashes *u, + uint64_t sector_num, + uint8_t *data, + int data_nr, + int *skip_clusters_nr, + int *next_non_dedupable_sectors_nr, + uint8_t **next_call_first_hash) +{ + BDRVQcowState *s = bs->opaque; + int ret; + int deduped_clusters_nr = 0; + int left_to_test_clusters_nr; + int begining_index; + uint8_t *hash = NULL; + QCowHashElement *non_duplicated_dedup_hash = NULL; + + /* should already be zero when entering this function */ + assert(*next_non_dedupable_sectors_nr == 0); + + begining_index = sector_num & (s->cluster_sectors - 1); + + left_to_test_clusters_nr = (data_nr / s->cluster_sectors) - + *skip_clusters_nr; + + /* Deduplicate all that can be */ + while (left_to_test_clusters_nr-- && + (ret = qcow2_dedup_cluster(bs, + sector_num, + next_call_first_hash, + data, + (*skip_clusters_nr)++, + &non_duplicated_dedup_hash)) == 1) { + sector_num += s->cluster_sectors; + deduped_clusters_nr++; + } + + if (ret < 0) { + *next_call_first_hash = NULL; + goto exit; + } + + /* We deduped everything till the end */ + if (!non_duplicated_dedup_hash) { + *next_call_first_hash = NULL; + *next_non_dedupable_sectors_nr = 0; + goto exit; + } + + /* We consumed the precomputed hash */ + *next_call_first_hash = NULL; + + /* remember that the last sector we tested in non deduplicable */ + *next_non_dedupable_sectors_nr += s->cluster_sectors; + + /* Memorize the hash of the first non duplicated cluster. + * we will store it before writing the cluster to disk. + */ + QTAILQ_INSERT_TAIL(&u->undedupable_hashes, non_duplicated_dedup_hash, next); + + /* Count how many non duplicated sector can be written and memorize the + * hashes for later. + * We make sure we pass hash == NULL to force computation of the hash. + */ + hash = NULL; + while (left_to_test_clusters_nr-- > 0 && + !qcow2_build_hash_lookup_offset(bs, + data, + *skip_clusters_nr, + &hash)) { + *next_non_dedupable_sectors_nr += s->cluster_sectors; + non_duplicated_dedup_hash = qcow2_build_dedup_hash(hash); + QTAILQ_INSERT_TAIL(&u->undedupable_hashes, + non_duplicated_dedup_hash, next); + hash = NULL; + (*skip_clusters_nr)++; + } + + /* We find a duplicated cluster before stopping iterating store it for + * next call. + */ + if (hash && non_duplicated_dedup_hash && + non_duplicated_dedup_hash->hash != hash) { + *next_call_first_hash = hash; + } + +exit: + if (!deduped_clusters_nr) { + return 0; + } + return deduped_clusters_nr * s->cluster_sectors - begining_index; +} diff --git a/block/qcow2.h b/block/qcow2.h index ccb24ad..5c18425 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -58,6 +58,11 @@ #define DEFAULT_CLUSTER_SIZE 65536 +/* indicate that the hash structure is empty and miss offset */ +#define QCOW_FLAG_EMPTY (1LL << 62) +/* indicate that the cluster for this hash has QCOW_OFLAG_COPIED on disk */ +#define QCOW_FLAG_FIRST (1LL << 63) + /* deduplication node */ typedef struct { uint8_t *hash; /* 32 bytes hash of a given cluster */ @@ -372,5 +377,13 @@ int qcow2_dedup_read_missing_and_concatenate(BlockDriverState *bs, int sectors_nr, uint8_t **dedup_cluster_data, int *dedup_cluster_data_nr); +int qcow2_dedup(BlockDriverState *bs, + UndedupableHashes *u, + uint64_t sector_num, + uint8_t *data, + int data_nr, + int *skip_clusters_nr, + int *next_non_dedupable_sectors_nr, + uint8_t **next_call_first_hash); #endif -- 1.7.10.4