Re: [RFC v2 04/83] NOVA inode definition.
On Wed, Mar 14, 2018 at 10:06 PM, Darrick J. Wong wrote: > On Sat, Mar 10, 2018 at 10:17:45AM -0800, Andiry Xu wrote: >> From: Andiry Xu >> >> inode.h defines the non-volatile and volatile NOVA inode data structures. >> >> The non-volatile NOVA inode (nova_inode) is aligned to 128 bytes and contains >> file/directory metadata information. The most important fields >> are log_head and log_tail. log_head points to the start of >> the log, and log_tail points to the end of the latest committed >> log entry. NOVA make updates to the inode by appending >> to the log tail and update the log_tail pointer atomically. >> >> The volatile NOVA inode (nova_inode_info) contains necessary >> information to limit access to the non-volatile NOVA inode during runtime. >> It has a radix tree to map file offset or filenames to the corresponding >> log entries. >> >> Signed-off-by: Andiry Xu >> --- >> fs/nova/inode.h | 187 >> >> 1 file changed, 187 insertions(+) >> create mode 100644 fs/nova/inode.h >> >> diff --git a/fs/nova/inode.h b/fs/nova/inode.h >> new file mode 100644 >> index 000..f9187e3 >> --- /dev/null >> +++ b/fs/nova/inode.h >> @@ -0,0 +1,187 @@ >> +#ifndef __INODE_H >> +#define __INODE_H >> + >> +struct nova_inode_info_header; >> +struct nova_inode; >> + >> +#include "super.h" >> + >> +enum nova_new_inode_type { >> + TYPE_CREATE = 0, >> + TYPE_MKNOD, >> + TYPE_SYMLINK, >> + TYPE_MKDIR >> +}; >> + >> + >> +/* >> + * Structure of an inode in PMEM >> + * Keep the inode size to within 120 bytes: We use the last eight bytes >> + * as inode table tail pointer. > > I would've expected a > BUILD_BUG_ON(NOVA_INODE_SIZE - sizeof(struct nova_inode) == 8); > or something to enforce this. > Thanks, will do. > (Or just equate inode number with byte offset? I looked ahead at the > directory entries and they seem to be 64-bit...) > > I guess I'm being lazy and doing a on-disk-format-only review. :) > >> + */ >> +struct nova_inode { >> + >> + /* first 40 bytes */ >> + u8 i_rsvd; /* reserved. used to be checksum */ > > Magic number? > OK. >> + u8 valid; /* Is this inode valid? */ >> + u8 deleted; /* Is this inode deleted? */ > > Would i_mode == 0 cover these? > Deleted flag comes from NOVA-Fortis code. I will check if i_mode can cover it. >> + u8 i_blk_type; /* data block size this inode uses */ > > I would've thought these would just be bits of i_flags? > > Also, if I have a 1G blocksize file and free space fragments to the > point that there's > 1G of free space but none of it contiguous, I guess > I can expect ENOSPC? > Yes, but 1G blocksize has not been tested. >> + __le32 i_flags; /* Inode flags */ >> + __le64 i_size; /* Size of data in bytes */ >> + __le32 i_ctime; /* Inode modification time */ >> + __le32 i_mtime; /* Inode b-tree Modification time */ >> + __le32 i_atime; /* Access time */ > > Same y2038 grumble from the previous patch. > Will fix. >> + __le16 i_mode; /* File mode */ >> + __le16 i_links_count; /* Links count */ >> + >> + __le64 i_xattr; /* Extended attribute block */ >> + >> + /* second 40 bytes */ >> + __le32 i_uid; /* Owner Uid */ >> + __le32 i_gid; /* Group Id */ >> + __le32 i_generation;/* File version (for NFS) */ >> + __le32 i_create_time; /* Create time */ >> + __le64 nova_ino;/* nova inode number */ >> + >> + __le64 log_head;/* Log head pointer */ >> + __le64 log_tail;/* Log tail pointer */ >> + >> + /* last 40 bytes */ >> + __le64 create_epoch_id; /* Transaction ID when create */ >> + __le64 delete_epoch_id; /* Transaction ID when deleted */ >> + >> + struct { >> + __le32 rdev; /* major/minor # */ >> + } dev; /* device inode */ >> + >> + __le32 csum;/* CRC32 checksum */ >> + /* Leave 8 bytes for inode table tail pointer */ >> +} __attribute((__packed__)); >> + >> +/* >> + * NOVA-specific inode state kept in DRAM >> + */ >> +struct nova_inode_info_header { >> + /* For files, tree holds a map from file offsets to >> + * write log entries. >> + * >> + * For directories, tree holds a map from a hash of the file name to >> + * dentry log entry. >> + */ >> + struct radix_tree_root tree; >> + struct rw_semaphore i_sem; /* Protect log and tree */ >> + unsigned short i_mode; /* Dir or file? */ >> + unsigned int i_flags; >> + unsigned long log_pages;/* Num of log pages */ >> + unsigned long i_size; >> + unsigned long i_blocks; >> + unsigned long ino; >> + unsigned long pi_addr; >> + unsigned long valid_entries;/* For thorough GC */ >> + unsigned long num_entries; /* For thorough GC
Re: [RFC v2 04/83] NOVA inode definition.
On Sat, Mar 10, 2018 at 10:17:45AM -0800, Andiry Xu wrote: > From: Andiry Xu > > inode.h defines the non-volatile and volatile NOVA inode data structures. > > The non-volatile NOVA inode (nova_inode) is aligned to 128 bytes and contains > file/directory metadata information. The most important fields > are log_head and log_tail. log_head points to the start of > the log, and log_tail points to the end of the latest committed > log entry. NOVA make updates to the inode by appending > to the log tail and update the log_tail pointer atomically. > > The volatile NOVA inode (nova_inode_info) contains necessary > information to limit access to the non-volatile NOVA inode during runtime. > It has a radix tree to map file offset or filenames to the corresponding > log entries. > > Signed-off-by: Andiry Xu > --- > fs/nova/inode.h | 187 > > 1 file changed, 187 insertions(+) > create mode 100644 fs/nova/inode.h > > diff --git a/fs/nova/inode.h b/fs/nova/inode.h > new file mode 100644 > index 000..f9187e3 > --- /dev/null > +++ b/fs/nova/inode.h > @@ -0,0 +1,187 @@ > +#ifndef __INODE_H > +#define __INODE_H > + > +struct nova_inode_info_header; > +struct nova_inode; > + > +#include "super.h" > + > +enum nova_new_inode_type { > + TYPE_CREATE = 0, > + TYPE_MKNOD, > + TYPE_SYMLINK, > + TYPE_MKDIR > +}; > + > + > +/* > + * Structure of an inode in PMEM > + * Keep the inode size to within 120 bytes: We use the last eight bytes > + * as inode table tail pointer. I would've expected a BUILD_BUG_ON(NOVA_INODE_SIZE - sizeof(struct nova_inode) == 8); or something to enforce this. (Or just equate inode number with byte offset? I looked ahead at the directory entries and they seem to be 64-bit...) I guess I'm being lazy and doing a on-disk-format-only review. :) > + */ > +struct nova_inode { > + > + /* first 40 bytes */ > + u8 i_rsvd; /* reserved. used to be checksum */ Magic number? > + u8 valid; /* Is this inode valid? */ > + u8 deleted; /* Is this inode deleted? */ Would i_mode == 0 cover these? > + u8 i_blk_type; /* data block size this inode uses */ I would've thought these would just be bits of i_flags? Also, if I have a 1G blocksize file and free space fragments to the point that there's > 1G of free space but none of it contiguous, I guess I can expect ENOSPC? > + __le32 i_flags; /* Inode flags */ > + __le64 i_size; /* Size of data in bytes */ > + __le32 i_ctime; /* Inode modification time */ > + __le32 i_mtime; /* Inode b-tree Modification time */ > + __le32 i_atime; /* Access time */ Same y2038 grumble from the previous patch. > + __le16 i_mode; /* File mode */ > + __le16 i_links_count; /* Links count */ > + > + __le64 i_xattr; /* Extended attribute block */ > + > + /* second 40 bytes */ > + __le32 i_uid; /* Owner Uid */ > + __le32 i_gid; /* Group Id */ > + __le32 i_generation;/* File version (for NFS) */ > + __le32 i_create_time; /* Create time */ > + __le64 nova_ino;/* nova inode number */ > + > + __le64 log_head;/* Log head pointer */ > + __le64 log_tail;/* Log tail pointer */ > + > + /* last 40 bytes */ > + __le64 create_epoch_id; /* Transaction ID when create */ > + __le64 delete_epoch_id; /* Transaction ID when deleted */ > + > + struct { > + __le32 rdev; /* major/minor # */ > + } dev; /* device inode */ > + > + __le32 csum;/* CRC32 checksum */ > + /* Leave 8 bytes for inode table tail pointer */ > +} __attribute((__packed__)); > + > +/* > + * NOVA-specific inode state kept in DRAM > + */ > +struct nova_inode_info_header { > + /* For files, tree holds a map from file offsets to > + * write log entries. > + * > + * For directories, tree holds a map from a hash of the file name to > + * dentry log entry. > + */ > + struct radix_tree_root tree; > + struct rw_semaphore i_sem; /* Protect log and tree */ > + unsigned short i_mode; /* Dir or file? */ > + unsigned int i_flags; > + unsigned long log_pages;/* Num of log pages */ > + unsigned long i_size; > + unsigned long i_blocks; > + unsigned long ino; > + unsigned long pi_addr; > + unsigned long valid_entries;/* For thorough GC */ > + unsigned long num_entries; /* For thorough GC */ > + u64 last_setattr; /* Last setattr entry */ > + u64 last_link_change; /* Last link change entry */ > + u64 last_dentry;/* Last updated dentry */ > + u64 trans_id; /* Transaction ID */ > + u64 log_head; /* Log head pointer */ > + u64 log_tail;
[RFC v2 04/83] NOVA inode definition.
From: Andiry Xu inode.h defines the non-volatile and volatile NOVA inode data structures. The non-volatile NOVA inode (nova_inode) is aligned to 128 bytes and contains file/directory metadata information. The most important fields are log_head and log_tail. log_head points to the start of the log, and log_tail points to the end of the latest committed log entry. NOVA make updates to the inode by appending to the log tail and update the log_tail pointer atomically. The volatile NOVA inode (nova_inode_info) contains necessary information to limit access to the non-volatile NOVA inode during runtime. It has a radix tree to map file offset or filenames to the corresponding log entries. Signed-off-by: Andiry Xu --- fs/nova/inode.h | 187 1 file changed, 187 insertions(+) create mode 100644 fs/nova/inode.h diff --git a/fs/nova/inode.h b/fs/nova/inode.h new file mode 100644 index 000..f9187e3 --- /dev/null +++ b/fs/nova/inode.h @@ -0,0 +1,187 @@ +#ifndef __INODE_H +#define __INODE_H + +struct nova_inode_info_header; +struct nova_inode; + +#include "super.h" + +enum nova_new_inode_type { + TYPE_CREATE = 0, + TYPE_MKNOD, + TYPE_SYMLINK, + TYPE_MKDIR +}; + + +/* + * Structure of an inode in PMEM + * Keep the inode size to within 120 bytes: We use the last eight bytes + * as inode table tail pointer. + */ +struct nova_inode { + + /* first 40 bytes */ + u8 i_rsvd; /* reserved. used to be checksum */ + u8 valid; /* Is this inode valid? */ + u8 deleted; /* Is this inode deleted? */ + u8 i_blk_type; /* data block size this inode uses */ + __le32 i_flags; /* Inode flags */ + __le64 i_size; /* Size of data in bytes */ + __le32 i_ctime; /* Inode modification time */ + __le32 i_mtime; /* Inode b-tree Modification time */ + __le32 i_atime; /* Access time */ + __le16 i_mode; /* File mode */ + __le16 i_links_count; /* Links count */ + + __le64 i_xattr; /* Extended attribute block */ + + /* second 40 bytes */ + __le32 i_uid; /* Owner Uid */ + __le32 i_gid; /* Group Id */ + __le32 i_generation;/* File version (for NFS) */ + __le32 i_create_time; /* Create time */ + __le64 nova_ino;/* nova inode number */ + + __le64 log_head;/* Log head pointer */ + __le64 log_tail;/* Log tail pointer */ + + /* last 40 bytes */ + __le64 create_epoch_id; /* Transaction ID when create */ + __le64 delete_epoch_id; /* Transaction ID when deleted */ + + struct { + __le32 rdev; /* major/minor # */ + } dev; /* device inode */ + + __le32 csum;/* CRC32 checksum */ + + /* Leave 8 bytes for inode table tail pointer */ +} __attribute((__packed__)); + +/* + * NOVA-specific inode state kept in DRAM + */ +struct nova_inode_info_header { + /* For files, tree holds a map from file offsets to +* write log entries. +* +* For directories, tree holds a map from a hash of the file name to +* dentry log entry. +*/ + struct radix_tree_root tree; + struct rw_semaphore i_sem; /* Protect log and tree */ + unsigned short i_mode; /* Dir or file? */ + unsigned int i_flags; + unsigned long log_pages;/* Num of log pages */ + unsigned long i_size; + unsigned long i_blocks; + unsigned long ino; + unsigned long pi_addr; + unsigned long valid_entries;/* For thorough GC */ + unsigned long num_entries; /* For thorough GC */ + u64 last_setattr; /* Last setattr entry */ + u64 last_link_change; /* Last link change entry */ + u64 last_dentry;/* Last updated dentry */ + u64 trans_id; /* Transaction ID */ + u64 log_head; /* Log head pointer */ + u64 log_tail; /* Log tail pointer */ + u8 i_blk_type; +}; + +/* + * DRAM state for inodes + */ +struct nova_inode_info { + struct nova_inode_info_header header; + struct inode vfs_inode; +}; + + +static inline struct nova_inode_info *NOVA_I(struct inode *inode) +{ + return container_of(inode, struct nova_inode_info, vfs_inode); +} + +static inline void sih_lock(struct nova_inode_info_header *header) +{ + down_write(&header->i_sem); +} + +static inline void sih_unlock(struct nova_inode_info_header *header) +{ + up_write(&header->i_sem); +} + +static inline void sih_lock_shared(struct nova_inode_info_header *header) +{ + down_read(&header->i_sem); +} + +static inline void sih_unlock_shared(struct nova_inode_info_header *header) +{ + up_read(&header->i_sem