On Wed, Feb 06, 2013 at 01:31:41PM +0100, Benoît Canet wrote: > @@ -291,7 +297,11 @@ static int > qcow2_clear_l2_copied_flag_if_needed(BlockDriverState *bs, > /* remember that we dont't need to clear QCOW_OFLAG_COPIED again */ > hash_node->first_logical_sect &= first_logical_sect; > > - return 0; > + /* clear the QCOW_FLAG_FIRST flag from disk */ > + return qcow2_dedup_read_write_hash(bs, &hash_node->hash, > + &hash_node->first_logical_sect, > + hash_node->physical_sect, > + true);
The order of metadata updates needs to be thought through, especially what happens on power failure partway through. I don't understand the dedup code well enough to decide whether this call is safe. Hopefully towards the end of this patch series I'll be able to think through this. > } > > /* This function deduplicate a cluster > @@ -553,3 +563,316 @@ exit: > > return deduped_clusters_nr * s->cluster_sectors - begining_index; > } > + > + > +/* Create a deduplication table hash block, write it's offset to disk and > + * reference it in the RAM deduplication table > + * > + * sync this to disk and get the dedup cluster cache entry > + * > + * @index: index in the RAM deduplication table > + * @ret: offset on success, negative on error > + */ > +static uint64_t qcow2_create_block(BlockDriverState *bs, > + int32_t index) qcow2_create_block() is a vague name. qcow2_create_dedup_block()? > +{ > + BDRVQcowState *s = bs->opaque; > + int64_t offset; > + uint64_t data64; > + int ret = 0; > + > + /* allocate a new dedup table hash block */ > + offset = qcow2_alloc_clusters(bs, s->hash_block_size); > + > + if (offset < 0) { > + return offset; > + } > + > + ret = qcow2_cache_flush(bs, s->refcount_block_cache); > + if (ret < 0) { > + goto free_fail; > + } The hash block needs to be initialized and on disk before it gets linked into the dedup L1 table. Otherwise the L1 table entry would point to a hash table with undefined values. > +static inline bool qcow2_has_dedup_block(BlockDriverState *bs, > + uint32_t index) > +{ > + BDRVQcowState *s = bs->opaque; > + return s->dedup_table[index] == 0 ? false : true; Zero is false and non-zeor is true, so the following is equivalent: return s->dedup_table[index]; > +} > + > +static inline void qcow2_write_hash_to_block_and_dirty(BlockDriverState *bs, > + uint8_t *block, > + QCowHash *hash, > + int offset, > + uint64_t > *logical_sect) Nothing gets written to disk here, so "write" shouldn't be in the name. How about calling it qcow2_set_hash_block_entry()? > +{ > + BDRVQcowState *s = bs->opaque; > + uint64_t first; > + first = cpu_to_be64(*logical_sect); > + memcpy(block + offset, hash->data, HASH_LENGTH); > + memcpy(block + offset + HASH_LENGTH, &first, 8); > + qcow2_cache_entry_mark_dirty(s->dedup_cluster_cache, block); > +} > + > +static inline uint64_t qcow2_read_hash_from_block(uint8_t *block, > + QCowHash *hash, > + int offset) > +{ > + uint64_t first; > + memcpy(hash->data, block + offset, HASH_LENGTH); > + memcpy(&first, block + offset + HASH_LENGTH, 8); > + return be64_to_cpu(first); > +} > + > +/* Read/write a given hash and cluster_sect from/to the dedup table > + * > + * This function doesn't flush the dedup cache to disk > + * > + * @hash: the hash to read or store > + * @first_logical_sect: logical sector of the QCOW_FLAG_OCOPIED cluster Why mention QCOW_FLAG_OCOPIED here? Not sure if this holds true after the first logical sector contents are changed. Probably best not to mention QCOW_FLAG_OCOPIED. > + * @physical_sect: sector of the cluster in QCOW2 file (in > sectors) > + * @write: true to write, false to read > + * @ret: 0 on succes, errno on error s/succes/success/ s/errno/-errno/ > +/* This function store a hash information to disk and RAM > + * > + * @hash: the QCowHash to process > + * @logical_sect: the logical sector of the cluster seen by the guest > + * @physical_sect: the physical sector of the stored cluster > + * @ret: 0 on success, negative on error > + */ > +static int qcow2_store_hash(BlockDriverState *bs, > + QCowHash *hash, > + uint64_t logical_sect, > + uint64_t physical_sect) > +{ > + BDRVQcowState *s = bs->opaque; > + QCowHashNode *hash_node; > + > + hash_node = g_tree_lookup(s->dedup_tree_by_hash, hash); > + > + /* no hash node found for this hash */ > + if (!hash_node) { > + return 0; > + } > + > + /* the hash node information are already completed */ > + if (!is_hash_node_empty(hash_node)) { > + return 0; > + } > + > + /* Remember that this QCowHashNoderepresent the first occurence of the s/QCowHashNoderepresent/QCowHashNode represents/ s/occurence/occurrence/ > + * cluste so we will be able to clear QCOW_OFLAG_COPIED from the L2 table s/cluste/cluster/