On 11.02.19 г. 15:23 ч., Qu Wenruo wrote:
> 
> 
> On 2019/2/11 下午8:55, Nikolay Borisov wrote:
>>
>>
>> On 11.02.19 г. 7:16 ч., Qu Wenruo wrote:
>>> Current delayed ref interface has several problems:
>>> - Longer and longer parameter lists
>>>   bytenr
>>>   num_bytes
>>>   parent
>>>   ---------- so far so good
>>>   ref_root
>>>   owner
>>>   offset
>>>   ---------- I don't feel good now
>>>
>>> - Different interpretation for the same parameter
>>>   Above @owner for data ref is inode number (u64),
>>>   while for tree ref, it's level (int).
>>>
>>>   They are even in different size range.
>>>   For level we only need 0~8, while for ino it's
>>>   BTRFS_FIRST_FREE_OBJECTID~BTRFS_LAST_FREE_OBJECTID.
>>>
>>>   And @offset doesn't even makes sense for tree ref.
>>>
>>>   Such parameter reuse may look clever as an hidden union, but it
>>>   destroys code readability.
>>>
>>> To solve both problems, we introduce a new structure, btrfs_ref to solve
>>> them:
>>>
>>> - Structure instead of long parameter list
>>>   This makes later expansion easier, and better documented.
>>>
>>> - Use btrfs_ref::type to distinguish data and tree ref
>>>
>>> - Use proper union to store data/tree ref specific structures.
>>>
>>> - Use separate functions to fill data/tree ref data, with a common generic
>>>   function to fill common bytenr/num_bytes members.
>>>
>>> All parameters will find its place in btrfs_ref, and an extra member,
>>> @real_root, inspired by ref-verify code, is newly introduced for later
>>> qgroup code, to record which tree is triggered this extent modification.
>>>
>>> This patch doesn't touch any code, but provides the basis for incoming
>>> refactors.
>>>
>>> Signed-off-by: Qu Wenruo <w...@suse.com>
>>> ---
>>>  fs/btrfs/delayed-ref.h | 116 +++++++++++++++++++++++++++++++++++++++++
>>>  1 file changed, 116 insertions(+)
>>>
>>> diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
>>> index d2af974f68a1..24addc5163bc 100644
>>> --- a/fs/btrfs/delayed-ref.h
>>> +++ b/fs/btrfs/delayed-ref.h
>>> @@ -187,6 +187,90 @@ struct btrfs_delayed_ref_root {
>>>     u64 qgroup_to_skip;
>>>  };
>>>  
>>> +enum btrfs_ref_type {
>>> +   BTRFS_REF_NOT_SET,
>>> +   BTRFS_REF_DATA,
>>> +   BTRFS_REF_METADATA,
>>> +   BTRFS_REF_LAST,
>>> +};
>>> +
>>> +struct btrfs_data_ref {
>>> +   /* For EXTENT_DATA_REF */
>>> +
>>> +   /* Root who refers to this data extent */
>> nit: s/who/which/
>>> +   u64 ref_root;
>>> +
>>> +   /* Inode who refers to this data extent */
>> nit: DITTO
>>> +   u64 ino;
>>> +
>>> +   /*
>>> +    * file_offset - extent_offset
>>> +    *
>>> +    * file_offset is the key.offset of the EXTENT_DATA key.
>>> +    * extent_offset is btrfs_file_extent_offset() of the EXTENT_DATA data.
>>> +    */
>>
>> This needs rewording since it's rather cryptic now.
> 
> It's cryptic due to the EXTENT_ITEM design from the very beginning.
> I'm all ears to improve this description.
> 
>> Looking at the dev
>> docs and the description for 'offset' field in btrfs_file_extent_item I
>> can sort of deduce that this field will only be different than null if
>> this reference is for an extent which is shared between 2 snapshots.
> 
> Don't forget reflink and data CoW.
> 
> Like this:
> 
>       item 6 key (257 EXTENT_DATA 0) itemoff 15813 itemsize 53
>               generation 6 type 1 (regular)
>               extent data disk byte 13631488 nr 1048576
>               extent data offset 0 nr 4096 ram 1048576
>       item 7 key (257 EXTENT_DATA 4096) itemoff 15760 itemsize 53
>               generation 7 type 1 (regular)
>               extent data disk byte 14680064 nr 4096
>               extent data offset 0 nr 4096 ram 4096
>       item 8 key (257 EXTENT_DATA 8192) itemoff 15707 itemsize 53
>               generation 6 type 1 (regular)
>               extent data disk byte 13631488 nr 1048576
>               extent data offset 8192 nr 1040384 ram 1048576
> 
> EXTENT_DATA items at 0 and 8K offset are original from one larger
> extent, EXTENT_DATA item at 4K offset is newly written one.

Okay this makes sense, however if we take item 8 being inserted then
according to the comments, the 'offset' member for this data ref will be
0 since 8k (from key.offset) - 8k (from btrfs_file_extent_offset)?  WHy
is that, shouldn't the offset here be 8k rather than 0?

> 
> But the current design makes EXTENT_ITEM inline data backref pretty clean:
> 
>         item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 16230 itemsize 53
>                 refs 2 gen 6 flags DATA
>                 extent data backref root FS_TREE objectid 257 offset 0
> count 2
> 
> No need for an extra inline data backref, just increase the original
> count from 1 to 2.
>>
>> So if file foo is shared between two snapshots, has 1 extent and in
>> snapshot2 this extent is partially changed then I'd expect extent_offset
>> to point to the start in the original (unchanged extent), correct?
> 
> As long as there is some new DATA_EXTENT pointing to the original
> unchanged extent, then yes, the 'offset' will change.
> 
> Just like the EXTENT_DATA at 8K offset above.
> 
>>
>>> +   u64 offset;
>>> +};
>>> +
>>> +struct btrfs_tree_ref {
>>> +   /*
>>> +    * Level of this tree block
>>> +    *
>>> +    * Shared for skinny (TREE_BLOCK_REF) and normal tree ref.
>>
>> This sentence is also not very clear? You mean this level applies to
>> tree block refs (irrespective of whether they are shared or normal tree
>> block refs)?
> 
> This is for any keyed or inlined tree ref who uses skinny metadata
> (level stored in key.offset, the common case now) or non-skinny
> EXTENT_ITEM who uses btrfs_tree_block_info like:
> 
>       item 7 key (30507008 EXTENT_ITEM 16384) itemoff 15956 itemsize 51
>               refs 1 gen 4 flags TREE_BLOCK
>               tree block key (0 UNKNOWN.0 0) level 0 <<< here.
>               tree block backref root UUID_TREE
> 
> 
> It's possible for extent tree to not include above cases, like the
> following case:
>         item 1 key (12648448 EXTENT_ITEM 16384) itemoff 16235 itemsize 24
>                 refs 9 gen 7 flags TREE_BLOCK
>         item 2 key (12648448 SHARED_BLOCK_REF 4481024) itemoff 3461
> itemsize 0
>                 shared block backref
> 
> So I'm not sure how to describe such case clearly.
> 
> Thanks,
> Qu
> 
>>
>>> +    */
>>> +   int level;
>>> +
>>> +   /*
>>> +    * Root who refers to this tree block.
>>
>> nit:s/who/which
>>
>>> +    *
>>> +    * For TREE_BLOCK_REF (skinny metadata, either inline or keyed)
>>> +    */
>>> +   u64 root;
>>> +
>>> +   /* For non-skinny metadata, no special member needed */
>>> +};
>>> +
>>> +struct btrfs_ref {
>>> +   enum btrfs_ref_type type;
>>> +   int action;
>>> +
>>> +   /*
>>> +    * Only use parent pointers as backref (SHARED_BLOCK_REF or
>>> +    * SHARED_DATA_REF) for this extent and its children.
>>> +    * Set for reloc trees.
>>> +    */
>>> +   bool only_backreferences:1;
>>> +
>>> +   /*
>>> +    * Whether this extent should go through qgroup record.
>>> +    *
>>> +    * Normally false, but for certain case like delayed subtree scan,
>>> +    * setting this flag can hugely reduce qgroup overhead.
>>> +    */
>>> +   bool skip_qgroup:1;
>>> +
>>> +   /*
>>> +    * Optional. To which root this modification is for.
>>> +    * Mostly used for qgroup optimization.
>>> +    *
>>> +    * When unset, data/tree ref init code will populate it.
>>> +    * In certain case, we're modifying reference for a different root.
>>> +    * E.g. Cow fs tree blocks for balance.
>>> +    * In that case, tree_ref::root will be fs tree, but we're doing this
>>> +    * for reloc tree, then we should set @real_root to reloc tree.
>>> +    */
>>> +   u64 real_root;
>>> +   u64 bytenr;
>>> +   u64 len;
>>> +
>>> +   /* Bytenr of the parent tree block */
>>> +   u64 parent;
>>> +   union {
>>> +           struct btrfs_data_ref data_ref;
>>> +           struct btrfs_tree_ref tree_ref;
>>> +   };
>>> +};
>>> +
>>>  extern struct kmem_cache *btrfs_delayed_ref_head_cachep;
>>>  extern struct kmem_cache *btrfs_delayed_tree_ref_cachep;
>>>  extern struct kmem_cache *btrfs_delayed_data_ref_cachep;
>>> @@ -195,6 +279,38 @@ extern struct kmem_cache 
>>> *btrfs_delayed_extent_op_cachep;
>>>  int __init btrfs_delayed_ref_init(void);
>>>  void __cold btrfs_delayed_ref_exit(void);
>>>  
>>> +static inline void btrfs_init_generic_ref(struct btrfs_ref *generic_ref,
>>> +                           int action, u64 bytenr, u64 len, u64 parent)
>>> +{
>>> +   generic_ref->action = action;
>>> +   generic_ref->bytenr = bytenr;
>>> +   generic_ref->len = len;
>>> +   generic_ref->parent = parent;
>>> +}
>>> +
>>> +static inline void btrfs_init_tree_ref(struct btrfs_ref *generic_ref,
>>> +                           int level, u64 root)
>>> +{
>>> +   /* If @real_root not set, use @root as fallback */
>>> +   if (!generic_ref->real_root)
>>> +           generic_ref->real_root = root;
>>> +   generic_ref->tree_ref.level = level;
>>> +   generic_ref->tree_ref.root = root;
>>> +   generic_ref->type = BTRFS_REF_METADATA;
>>> +}
>>> +
>>> +static inline void btrfs_init_data_ref(struct btrfs_ref *generic_ref,
>>> +                           u64 ref_root, u64 ino, u64 offset)
>>> +{
>>> +   /* If @real_root not set, use @root as fallback */
>>> +   if (!generic_ref->real_root)
>>> +           generic_ref->real_root = ref_root;
>>> +   generic_ref->data_ref.ref_root = ref_root;
>>> +   generic_ref->data_ref.ino = ino;
>>> +   generic_ref->data_ref.offset = offset;
>>> +   generic_ref->type = BTRFS_REF_DATA;
>>> +}
>>> +
>>>  static inline struct btrfs_delayed_extent_op *
>>>  btrfs_alloc_delayed_extent_op(void)
>>>  {
>>>
> 

Reply via email to