Re: [PATCH 1/1] f2fs: merge equivalent flags F2FS_GET_BLOCK_[READ|DIO]

2017-08-08 Thread Sun Qiuyang




On 2017/8/8 18:27, sunqiuyang wrote:

From: Qiuyang Sun 

Currently, the two flags F2FS_GET_BLOCK_[READ|DIO] are totally equivalent
and can be used interchangably in all scenarios they are involved in. This
patch deletes F2FS_GET_BLOCK_READ and uses F2FS_GET_BLOCK_DIO instead.


Seems weird from readability point of view, so how about keeping
F2FS_GET_BLOCK_READ alive but sharing defined value with F2FS_GET_BLOCK_DIO?

enum {
F2FS_GET_BLOCK_DIO,
F2FS_GET_BLOCK_READ = F2FS_GET_BLOCK_DIO,
F2FS_GET_BLOCK_FIEMAP,
...
}

Thanks,
How about renaming both of them F2FS_GET_BLOCK_DEFAULT? Actually neither 
READ nor DIO is explicitly referenced in f2fs_map_blocks(); it is just 
the "default" case. We have no reason to limit the use of this flag to 
"read" or "direct IO" only.


Thanks,




Signed-off-by: Qiuyang Sun 
---
 fs/f2fs/data.c | 2 +-
 fs/f2fs/f2fs.h | 1 -
 fs/f2fs/file.c | 4 ++--
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index c43262d..e0a59bf 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1244,7 +1244,7 @@ static int f2fs_mpage_readpages(struct address_space 
*mapping,
map.m_len = last_block - block_in_file;

if (f2fs_map_blocks(inode, , 0,
-   F2FS_GET_BLOCK_READ))
+   F2FS_GET_BLOCK_DIO))
goto set_error_page;
}
 got_it:
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index cea329f..0593ca7 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -514,7 +514,6 @@ struct f2fs_map_blocks {
 };

 /* for flag in get_data_block */
-#define F2FS_GET_BLOCK_READ0
 #define F2FS_GET_BLOCK_DIO 1
 #define F2FS_GET_BLOCK_FIEMAP  2
 #define F2FS_GET_BLOCK_BMAP3
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index e2b33b8..8271cb5 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2074,7 +2074,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
 */
while (map.m_lblk < pg_end) {
map.m_len = pg_end - map.m_lblk;
-   err = f2fs_map_blocks(inode, , 0, F2FS_GET_BLOCK_READ);
+   err = f2fs_map_blocks(inode, , 0, F2FS_GET_BLOCK_DIO);
if (err)
goto out;

@@ -2116,7 +2116,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,

 do_map:
map.m_len = pg_end - map.m_lblk;
-   err = f2fs_map_blocks(inode, , 0, F2FS_GET_BLOCK_READ);
+   err = f2fs_map_blocks(inode, , 0, F2FS_GET_BLOCK_DIO);
if (err)
goto clear_out;





.





Re: [PATCH 1/1] f2fs: merge equivalent flags F2FS_GET_BLOCK_[READ|DIO]

2017-08-08 Thread Sun Qiuyang




On 2017/8/8 18:27, sunqiuyang wrote:

From: Qiuyang Sun 

Currently, the two flags F2FS_GET_BLOCK_[READ|DIO] are totally equivalent
and can be used interchangably in all scenarios they are involved in. This
patch deletes F2FS_GET_BLOCK_READ and uses F2FS_GET_BLOCK_DIO instead.


Seems weird from readability point of view, so how about keeping
F2FS_GET_BLOCK_READ alive but sharing defined value with F2FS_GET_BLOCK_DIO?

enum {
F2FS_GET_BLOCK_DIO,
F2FS_GET_BLOCK_READ = F2FS_GET_BLOCK_DIO,
F2FS_GET_BLOCK_FIEMAP,
...
}

Thanks,
How about renaming both of them F2FS_GET_BLOCK_DEFAULT? Actually neither 
READ nor DIO is explicitly referenced in f2fs_map_blocks(); it is just 
the "default" case. We have no reason to limit the use of this flag to 
"read" or "direct IO" only.


Thanks,




Signed-off-by: Qiuyang Sun 
---
 fs/f2fs/data.c | 2 +-
 fs/f2fs/f2fs.h | 1 -
 fs/f2fs/file.c | 4 ++--
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index c43262d..e0a59bf 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1244,7 +1244,7 @@ static int f2fs_mpage_readpages(struct address_space 
*mapping,
map.m_len = last_block - block_in_file;

if (f2fs_map_blocks(inode, , 0,
-   F2FS_GET_BLOCK_READ))
+   F2FS_GET_BLOCK_DIO))
goto set_error_page;
}
 got_it:
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index cea329f..0593ca7 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -514,7 +514,6 @@ struct f2fs_map_blocks {
 };

 /* for flag in get_data_block */
-#define F2FS_GET_BLOCK_READ0
 #define F2FS_GET_BLOCK_DIO 1
 #define F2FS_GET_BLOCK_FIEMAP  2
 #define F2FS_GET_BLOCK_BMAP3
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index e2b33b8..8271cb5 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2074,7 +2074,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
 */
while (map.m_lblk < pg_end) {
map.m_len = pg_end - map.m_lblk;
-   err = f2fs_map_blocks(inode, , 0, F2FS_GET_BLOCK_READ);
+   err = f2fs_map_blocks(inode, , 0, F2FS_GET_BLOCK_DIO);
if (err)
goto out;

@@ -2116,7 +2116,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,

 do_map:
map.m_len = pg_end - map.m_lblk;
-   err = f2fs_map_blocks(inode, , 0, F2FS_GET_BLOCK_READ);
+   err = f2fs_map_blocks(inode, , 0, F2FS_GET_BLOCK_DIO);
if (err)
goto clear_out;





.





Re: [PATCH v8 1/1] f2fs: dax: implement direct access

2017-07-24 Thread Sun Qiuyang

Hi Jaegeuk,

Below is the error message I got from this testcase:
---
write (Invalid argument) len 1024 dio [dax to nondax | both nodax]
read (Bad address) len [4096 | 16777216 | 67108864] dio dax to nondax
---
The write error is expected, as F2FS does not support unaligned direct 
IO (1024 B).


The read error is more complex. In the test script, when we mmap the src 
file (dax), the flags (VM_MIXEDMAP | VM_HUGEPAGE) are added into
vma->vm_flags. Later on, when we write to the dest file (nondax) and 
then read from it by direct IO, we will fail to get pages from this 
"special" vma.


Functions involved:
f2fs_direct_IO
blockdev_direct_IO
__blockdev_direct_IO
do_direct_IO
dio_get_page
dio_refill_pages
iov_iter_get_pages
get_user_pages_unlocked
__get_user_pages_fast
__get_user_pages_unlocked
__get_user_pages_locked
__get_user_pages
follow_page_mask
follow_p4d_mask
follow_pud_mask
follow_pmd_mask
follow_page_pte
vm_normal_page
follow_pfn_pte

In my test environment HAVE_PTE_SPECIAL is true, and vm_normal_page() 
returns NULL due to VM_MIXEDMAP in vm_flags. Then follow_page_pte() 
continue to call follow_pfn_pte(), which returns -EFAULT. This is how we 
get a "bad address" error finally.


This error also occurs in EXT4-DAX for similar reasons.

Thanks,



Hi Qiuyang,

This fails xfstests/generic/413.

Thanks,

On 07/20, sunqiuyang wrote:

From: Qiuyang Sun 

This patch implements Direct Access (DAX) in F2FS, including:
 - a mount option to choose whether to enable DAX or not
 - read/write and mmap of regular files in the DAX way
 - zero-out of unaligned partial blocks in the DAX way
 - garbage collection of DAX files, by mapping both old and new physical
   addresses of a data page into memory and copy data between them directly
 - incompatibility of DAX with inline data, atomic or volatile write,
   collapse|insert_range, etc.

Signed-off-by: Qiuyang Sun 
---
Changelog v7 -> v8:
 - Introduce the macro f2fs_dax_file() to judge if a file is DAX for cases
   when CONFIG_FS_DAX is set or not
 - Return -ENOTSUPP when an operation does not support DAX
 - In f2fs_iomap_begin(), convert the inline data of an inode (if any)
   before mapping blocks
 - Minor cleanups
---
 Documentation/filesystems/f2fs.txt |   2 +
 fs/f2fs/data.c | 132 +-
 fs/f2fs/f2fs.h |  15 +++
 fs/f2fs/file.c | 183 -
 fs/f2fs/gc.c   | 103 -
 fs/f2fs/inline.c   |   3 +
 fs/f2fs/inode.c|   8 +-
 fs/f2fs/namei.c|   5 +
 fs/f2fs/super.c|  15 +++
 9 files changed, 454 insertions(+), 12 deletions(-)

diff --git a/Documentation/filesystems/f2fs.txt 
b/Documentation/filesystems/f2fs.txt
index 273ccb2..c86c421 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -164,6 +164,8 @@ io_bits=%u Set the bit size of write IO 
requests. It should be set
with "mode=lfs".
 usrquota   Enable plain user disk quota accounting.
 grpquota   Enable plain group disk quota accounting.
+daxUse direct access (no page cache). See
+   Documentation/filesystems/dax.txt.

 

 DEBUGFS ENTRIES
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 87c1f41..4eb4b76 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -910,6 +910,15 @@ int f2fs_map_blocks(struct inode *inode, struct 
f2fs_map_blocks *map,
err = -EIO;
goto sync_out;
}
+   /*
+* If newly allocated blocks are to be zeroed out later,
+* a single f2fs_map_blocks must not contain both old
+* and new blocks at the same time.
+*/
+   if (flag == F2FS_GET_BLOCK_ZERO
+   && (map->m_flags & F2FS_MAP_MAPPED)
+   && !(map->m_flags & F2FS_MAP_NEW))
+   goto sync_out;
if (flag == F2FS_GET_BLOCK_PRE_AIO) {
if (blkaddr == NULL_ADDR) {
prealloc++;
@@ -938,6 +947,8 @@ int f2fs_map_blocks(struct inode *inode, struct 
f2fs_map_blocks *map,
blkaddr != NEW_ADDR)
goto sync_out;
}
+   } else if (flag == F2FS_GET_BLOCK_ZERO && map->m_flags & F2FS_MAP_NEW) {
+   goto sync_out;
}

if (flag == F2FS_GET_BLOCK_PRE_AIO)
@@ -996,6 +1007,12 @@ int f2fs_map_blocks(struct inode *inode, struct 
f2fs_map_blocks 

Re: [PATCH v8 1/1] f2fs: dax: implement direct access

2017-07-24 Thread Sun Qiuyang

Hi Jaegeuk,

Below is the error message I got from this testcase:
---
write (Invalid argument) len 1024 dio [dax to nondax | both nodax]
read (Bad address) len [4096 | 16777216 | 67108864] dio dax to nondax
---
The write error is expected, as F2FS does not support unaligned direct 
IO (1024 B).


The read error is more complex. In the test script, when we mmap the src 
file (dax), the flags (VM_MIXEDMAP | VM_HUGEPAGE) are added into
vma->vm_flags. Later on, when we write to the dest file (nondax) and 
then read from it by direct IO, we will fail to get pages from this 
"special" vma.


Functions involved:
f2fs_direct_IO
blockdev_direct_IO
__blockdev_direct_IO
do_direct_IO
dio_get_page
dio_refill_pages
iov_iter_get_pages
get_user_pages_unlocked
__get_user_pages_fast
__get_user_pages_unlocked
__get_user_pages_locked
__get_user_pages
follow_page_mask
follow_p4d_mask
follow_pud_mask
follow_pmd_mask
follow_page_pte
vm_normal_page
follow_pfn_pte

In my test environment HAVE_PTE_SPECIAL is true, and vm_normal_page() 
returns NULL due to VM_MIXEDMAP in vm_flags. Then follow_page_pte() 
continue to call follow_pfn_pte(), which returns -EFAULT. This is how we 
get a "bad address" error finally.


This error also occurs in EXT4-DAX for similar reasons.

Thanks,



Hi Qiuyang,

This fails xfstests/generic/413.

Thanks,

On 07/20, sunqiuyang wrote:

From: Qiuyang Sun 

This patch implements Direct Access (DAX) in F2FS, including:
 - a mount option to choose whether to enable DAX or not
 - read/write and mmap of regular files in the DAX way
 - zero-out of unaligned partial blocks in the DAX way
 - garbage collection of DAX files, by mapping both old and new physical
   addresses of a data page into memory and copy data between them directly
 - incompatibility of DAX with inline data, atomic or volatile write,
   collapse|insert_range, etc.

Signed-off-by: Qiuyang Sun 
---
Changelog v7 -> v8:
 - Introduce the macro f2fs_dax_file() to judge if a file is DAX for cases
   when CONFIG_FS_DAX is set or not
 - Return -ENOTSUPP when an operation does not support DAX
 - In f2fs_iomap_begin(), convert the inline data of an inode (if any)
   before mapping blocks
 - Minor cleanups
---
 Documentation/filesystems/f2fs.txt |   2 +
 fs/f2fs/data.c | 132 +-
 fs/f2fs/f2fs.h |  15 +++
 fs/f2fs/file.c | 183 -
 fs/f2fs/gc.c   | 103 -
 fs/f2fs/inline.c   |   3 +
 fs/f2fs/inode.c|   8 +-
 fs/f2fs/namei.c|   5 +
 fs/f2fs/super.c|  15 +++
 9 files changed, 454 insertions(+), 12 deletions(-)

diff --git a/Documentation/filesystems/f2fs.txt 
b/Documentation/filesystems/f2fs.txt
index 273ccb2..c86c421 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -164,6 +164,8 @@ io_bits=%u Set the bit size of write IO 
requests. It should be set
with "mode=lfs".
 usrquota   Enable plain user disk quota accounting.
 grpquota   Enable plain group disk quota accounting.
+daxUse direct access (no page cache). See
+   Documentation/filesystems/dax.txt.

 

 DEBUGFS ENTRIES
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 87c1f41..4eb4b76 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -910,6 +910,15 @@ int f2fs_map_blocks(struct inode *inode, struct 
f2fs_map_blocks *map,
err = -EIO;
goto sync_out;
}
+   /*
+* If newly allocated blocks are to be zeroed out later,
+* a single f2fs_map_blocks must not contain both old
+* and new blocks at the same time.
+*/
+   if (flag == F2FS_GET_BLOCK_ZERO
+   && (map->m_flags & F2FS_MAP_MAPPED)
+   && !(map->m_flags & F2FS_MAP_NEW))
+   goto sync_out;
if (flag == F2FS_GET_BLOCK_PRE_AIO) {
if (blkaddr == NULL_ADDR) {
prealloc++;
@@ -938,6 +947,8 @@ int f2fs_map_blocks(struct inode *inode, struct 
f2fs_map_blocks *map,
blkaddr != NEW_ADDR)
goto sync_out;
}
+   } else if (flag == F2FS_GET_BLOCK_ZERO && map->m_flags & F2FS_MAP_NEW) {
+   goto sync_out;
}

if (flag == F2FS_GET_BLOCK_PRE_AIO)
@@ -996,6 +1007,12 @@ int f2fs_map_blocks(struct inode *inode, struct 
f2fs_map_blocks *map,
goto next_dnode;

 sync_out:
+   

Re: [PATCH v7 1/1] f2fs: dax: implement direct access

2017-07-18 Thread Sun Qiuyang

Hi Jaegeuk,

Thank you for the edits and comments. I will accept them, except that 
f2fs_iomap_end() does not need to return any error. See below for details.



Hi Qiuyang,

On 07/18, sunqiuyang wrote:

From: Qiuyang Sun 

This patch implements Direct Access (DAX) in F2FS, including:
- a mount option to choose whether to enable DAX or not
- read/write and mmap of regular files in the DAX way
- zero-out of unaligned partial blocks in the DAX way
- garbage collection of DAX files, by mapping both old and new physical
  addresses of a data page into memory and copy data between them directly
- incompatibility of DAX with inline data, atomic or volatile write, etc.

Signed-off-by: Qiuyang Sun 
---
Changlog v6 -> v7:
- Document the mount option "dax" for this feature in f2fs.txt
- Minor cleanup in dax_move_data_page()

---
 Documentation/filesystems/f2fs.txt |   2 +
 fs/f2fs/data.c | 132 +++--
 fs/f2fs/f2fs.h |   9 ++
 fs/f2fs/file.c | 192 -
 fs/f2fs/gc.c   | 105 +++-
 fs/f2fs/inline.c   |   4 +
 fs/f2fs/inode.c|   8 +-
 fs/f2fs/namei.c|   5 +
 fs/f2fs/super.c|  15 +++
 9 files changed, 459 insertions(+), 13 deletions(-)

diff --git a/Documentation/filesystems/f2fs.txt 
b/Documentation/filesystems/f2fs.txt
index 273ccb2..c86c421 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -164,6 +164,8 @@ io_bits=%u Set the bit size of write IO 
requests. It should be set
with "mode=lfs".
 usrquota   Enable plain user disk quota accounting.
 grpquota   Enable plain group disk quota accounting.
+daxUse direct access (no page cache). See
+   Documentation/filesystems/dax.txt.

 

 DEBUGFS ENTRIES
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 87c1f41..26b908a 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -910,6 +910,15 @@ int f2fs_map_blocks(struct inode *inode, struct 
f2fs_map_blocks *map,
err = -EIO;
goto sync_out;
}
+   /*
+* If newly allocated blocks are to be zeroed out later,
+* a single f2fs_map_blocks must not contain both old
+* and new blocks at the same time.
+*/
+   if (flag == F2FS_GET_BLOCK_ZERO
+   && (map->m_flags & F2FS_MAP_MAPPED)
+   && !(map->m_flags & F2FS_MAP_NEW))
+   goto sync_out;
if (flag == F2FS_GET_BLOCK_PRE_AIO) {
if (blkaddr == NULL_ADDR) {
prealloc++;
@@ -938,7 +947,8 @@ int f2fs_map_blocks(struct inode *inode, struct 
f2fs_map_blocks *map,
blkaddr != NEW_ADDR)
goto sync_out;
}
-   }
+   } else if (flag == F2FS_GET_BLOCK_ZERO && map->m_flags & F2FS_MAP_NEW)


} else if () {
}


+   goto sync_out;

if (flag == F2FS_GET_BLOCK_PRE_AIO)
goto skip;
@@ -996,6 +1006,12 @@ int f2fs_map_blocks(struct inode *inode, struct 
f2fs_map_blocks *map,
goto next_dnode;

 sync_out:
+   if (flag == F2FS_GET_BLOCK_ZERO && map->m_flags & F2FS_MAP_NEW) {
+   clean_bdev_aliases(inode->i_sb->s_bdev,
+   map->m_pblk, map->m_len);
+   err = sb_issue_zeroout(inode->i_sb, map->m_pblk,
+   map->m_len, GFP_NOFS);
+   }
f2fs_put_dnode();
 unlock_out:
if (create) {
@@ -1808,16 +1824,19 @@ static int f2fs_write_data_pages(struct address_space 
*mapping,
return 0;
 }

-static void f2fs_write_failed(struct address_space *mapping, loff_t to)
+static void f2fs_write_failed(struct address_space *mapping, loff_t to,
+   bool lock)
 {
struct inode *inode = mapping->host;
loff_t i_size = i_size_read(inode);

if (to > i_size) {
-   down_write(_I(inode)->i_mmap_sem);
+   if (lock)
+   down_write(_I(inode)->i_mmap_sem);
truncate_pagecache(inode, i_size);
truncate_blocks(inode, i_size, true);
-   up_write(_I(inode)->i_mmap_sem);
+   if (lock)
+   up_write(_I(inode)->i_mmap_sem);
}
 }

@@ -2000,7 +2019,7 @@ static int f2fs_write_begin(struct file 

Re: [PATCH v7 1/1] f2fs: dax: implement direct access

2017-07-18 Thread Sun Qiuyang

Hi Jaegeuk,

Thank you for the edits and comments. I will accept them, except that 
f2fs_iomap_end() does not need to return any error. See below for details.



Hi Qiuyang,

On 07/18, sunqiuyang wrote:

From: Qiuyang Sun 

This patch implements Direct Access (DAX) in F2FS, including:
- a mount option to choose whether to enable DAX or not
- read/write and mmap of regular files in the DAX way
- zero-out of unaligned partial blocks in the DAX way
- garbage collection of DAX files, by mapping both old and new physical
  addresses of a data page into memory and copy data between them directly
- incompatibility of DAX with inline data, atomic or volatile write, etc.

Signed-off-by: Qiuyang Sun 
---
Changlog v6 -> v7:
- Document the mount option "dax" for this feature in f2fs.txt
- Minor cleanup in dax_move_data_page()

---
 Documentation/filesystems/f2fs.txt |   2 +
 fs/f2fs/data.c | 132 +++--
 fs/f2fs/f2fs.h |   9 ++
 fs/f2fs/file.c | 192 -
 fs/f2fs/gc.c   | 105 +++-
 fs/f2fs/inline.c   |   4 +
 fs/f2fs/inode.c|   8 +-
 fs/f2fs/namei.c|   5 +
 fs/f2fs/super.c|  15 +++
 9 files changed, 459 insertions(+), 13 deletions(-)

diff --git a/Documentation/filesystems/f2fs.txt 
b/Documentation/filesystems/f2fs.txt
index 273ccb2..c86c421 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -164,6 +164,8 @@ io_bits=%u Set the bit size of write IO 
requests. It should be set
with "mode=lfs".
 usrquota   Enable plain user disk quota accounting.
 grpquota   Enable plain group disk quota accounting.
+daxUse direct access (no page cache). See
+   Documentation/filesystems/dax.txt.

 

 DEBUGFS ENTRIES
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 87c1f41..26b908a 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -910,6 +910,15 @@ int f2fs_map_blocks(struct inode *inode, struct 
f2fs_map_blocks *map,
err = -EIO;
goto sync_out;
}
+   /*
+* If newly allocated blocks are to be zeroed out later,
+* a single f2fs_map_blocks must not contain both old
+* and new blocks at the same time.
+*/
+   if (flag == F2FS_GET_BLOCK_ZERO
+   && (map->m_flags & F2FS_MAP_MAPPED)
+   && !(map->m_flags & F2FS_MAP_NEW))
+   goto sync_out;
if (flag == F2FS_GET_BLOCK_PRE_AIO) {
if (blkaddr == NULL_ADDR) {
prealloc++;
@@ -938,7 +947,8 @@ int f2fs_map_blocks(struct inode *inode, struct 
f2fs_map_blocks *map,
blkaddr != NEW_ADDR)
goto sync_out;
}
-   }
+   } else if (flag == F2FS_GET_BLOCK_ZERO && map->m_flags & F2FS_MAP_NEW)


} else if () {
}


+   goto sync_out;

if (flag == F2FS_GET_BLOCK_PRE_AIO)
goto skip;
@@ -996,6 +1006,12 @@ int f2fs_map_blocks(struct inode *inode, struct 
f2fs_map_blocks *map,
goto next_dnode;

 sync_out:
+   if (flag == F2FS_GET_BLOCK_ZERO && map->m_flags & F2FS_MAP_NEW) {
+   clean_bdev_aliases(inode->i_sb->s_bdev,
+   map->m_pblk, map->m_len);
+   err = sb_issue_zeroout(inode->i_sb, map->m_pblk,
+   map->m_len, GFP_NOFS);
+   }
f2fs_put_dnode();
 unlock_out:
if (create) {
@@ -1808,16 +1824,19 @@ static int f2fs_write_data_pages(struct address_space 
*mapping,
return 0;
 }

-static void f2fs_write_failed(struct address_space *mapping, loff_t to)
+static void f2fs_write_failed(struct address_space *mapping, loff_t to,
+   bool lock)
 {
struct inode *inode = mapping->host;
loff_t i_size = i_size_read(inode);

if (to > i_size) {
-   down_write(_I(inode)->i_mmap_sem);
+   if (lock)
+   down_write(_I(inode)->i_mmap_sem);
truncate_pagecache(inode, i_size);
truncate_blocks(inode, i_size, true);
-   up_write(_I(inode)->i_mmap_sem);
+   if (lock)
+   up_write(_I(inode)->i_mmap_sem);
}
 }

@@ -2000,7 +2019,7 @@ static int f2fs_write_begin(struct file *file, struct 
address_space *mapping,

 fail:
  

Re: [f2fs-dev] [PATCH v6 1/1] f2fs: dax: implement direct access

2017-07-17 Thread Sun Qiuyang



On 2017/7/12 17:06, sunqiuyang wrote:

From: Qiuyang Sun 

This patch implements Direct Access (DAX) in F2FS, including:
- a mount option to choose whether to enable DAX or not
- read/write and mmap of regular files in the DAX way
- zero-out of unaligned partial blocks in the DAX way
- garbage collection of DAX files, by mapping both old and new physical
  addresses of a data page into memory and copy data between them directly
- incompatibility of DAX with inline data, atomic or volatile write, etc.

Signed-off-by: Qiuyang Sun 
---
Changlog v5 -> v6:
- In f2fs_map_blocks(), optimize the separation of new allocated and old
  mapped blocks for the flag F2FS_GET_BLOCK_ZERO, and check the return
  value of zeroout;
- In f2fs_iomap_begin(), cover the truncation of failed allocation with the
  rwsemaphore i_mmap_sem when necessary;
- Optimize the order of exception handling in dax_move_data_page().

---
 fs/f2fs/data.c   | 132 --
 fs/f2fs/f2fs.h   |   9 +++
 fs/f2fs/file.c   | 192 ++-
 fs/f2fs/gc.c | 105 --
 fs/f2fs/inline.c |   4 ++
 fs/f2fs/inode.c  |   8 ++-
 fs/f2fs/namei.c  |   5 ++
 fs/f2fs/super.c  |  15 +
 8 files changed, 457 insertions(+), 13 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 87c1f41..26b908a 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -910,6 +910,15 @@ int f2fs_map_blocks(struct inode *inode, struct 
f2fs_map_blocks *map,
err = -EIO;
goto sync_out;
}
+   /*
+* If newly allocated blocks are to be zeroed out later,
+* a single f2fs_map_blocks must not contain both old
+* and new blocks at the same time.
+*/
+   if (flag == F2FS_GET_BLOCK_ZERO
+   && (map->m_flags & F2FS_MAP_MAPPED)
+   && !(map->m_flags & F2FS_MAP_NEW))
+   goto sync_out;
if (flag == F2FS_GET_BLOCK_PRE_AIO) {
if (blkaddr == NULL_ADDR) {
prealloc++;
@@ -938,7 +947,8 @@ int f2fs_map_blocks(struct inode *inode, struct 
f2fs_map_blocks *map,
blkaddr != NEW_ADDR)
goto sync_out;
}
-   }
+   } else if (flag == F2FS_GET_BLOCK_ZERO && map->m_flags & F2FS_MAP_NEW)
+   goto sync_out;

if (flag == F2FS_GET_BLOCK_PRE_AIO)
goto skip;
@@ -996,6 +1006,12 @@ int f2fs_map_blocks(struct inode *inode, struct 
f2fs_map_blocks *map,
goto next_dnode;

 sync_out:
+   if (flag == F2FS_GET_BLOCK_ZERO && map->m_flags & F2FS_MAP_NEW) {
+   clean_bdev_aliases(inode->i_sb->s_bdev,
+   map->m_pblk, map->m_len);
+   err = sb_issue_zeroout(inode->i_sb, map->m_pblk,
+   map->m_len, GFP_NOFS);
+   }
f2fs_put_dnode();
 unlock_out:
if (create) {
@@ -1808,16 +1824,19 @@ static int f2fs_write_data_pages(struct address_space 
*mapping,
return 0;
 }

-static void f2fs_write_failed(struct address_space *mapping, loff_t to)
+static void f2fs_write_failed(struct address_space *mapping, loff_t to,
+   bool lock)
 {
struct inode *inode = mapping->host;
loff_t i_size = i_size_read(inode);

if (to > i_size) {
-   down_write(_I(inode)->i_mmap_sem);
+   if (lock)
+   down_write(_I(inode)->i_mmap_sem);
truncate_pagecache(inode, i_size);
truncate_blocks(inode, i_size, true);
-   up_write(_I(inode)->i_mmap_sem);
+   if (lock)
+   up_write(_I(inode)->i_mmap_sem);
}
 }

@@ -2000,7 +2019,7 @@ static int f2fs_write_begin(struct file *file, struct 
address_space *mapping,

 fail:
f2fs_put_page(page, 1);
-   f2fs_write_failed(mapping, pos + len);
+   f2fs_write_failed(mapping, pos + len, true);
return err;
 }

@@ -2077,7 +2096,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct 
iov_iter *iter)
if (err > 0)
set_inode_flag(inode, FI_UPDATE_WRITE);
else if (err < 0)
-   f2fs_write_failed(mapping, offset + count);
+   f2fs_write_failed(mapping, offset + count, true);
}

trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
@@ -2274,3 +2293,104 @@ int f2fs_migrate_page(struct address_space *mapping,
.migratepage= f2fs_migrate_page,
 #endif
 };
+
+#ifdef 

Re: [f2fs-dev] [PATCH v6 1/1] f2fs: dax: implement direct access

2017-07-17 Thread Sun Qiuyang



On 2017/7/12 17:06, sunqiuyang wrote:

From: Qiuyang Sun 

This patch implements Direct Access (DAX) in F2FS, including:
- a mount option to choose whether to enable DAX or not
- read/write and mmap of regular files in the DAX way
- zero-out of unaligned partial blocks in the DAX way
- garbage collection of DAX files, by mapping both old and new physical
  addresses of a data page into memory and copy data between them directly
- incompatibility of DAX with inline data, atomic or volatile write, etc.

Signed-off-by: Qiuyang Sun 
---
Changlog v5 -> v6:
- In f2fs_map_blocks(), optimize the separation of new allocated and old
  mapped blocks for the flag F2FS_GET_BLOCK_ZERO, and check the return
  value of zeroout;
- In f2fs_iomap_begin(), cover the truncation of failed allocation with the
  rwsemaphore i_mmap_sem when necessary;
- Optimize the order of exception handling in dax_move_data_page().

---
 fs/f2fs/data.c   | 132 --
 fs/f2fs/f2fs.h   |   9 +++
 fs/f2fs/file.c   | 192 ++-
 fs/f2fs/gc.c | 105 --
 fs/f2fs/inline.c |   4 ++
 fs/f2fs/inode.c  |   8 ++-
 fs/f2fs/namei.c  |   5 ++
 fs/f2fs/super.c  |  15 +
 8 files changed, 457 insertions(+), 13 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 87c1f41..26b908a 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -910,6 +910,15 @@ int f2fs_map_blocks(struct inode *inode, struct 
f2fs_map_blocks *map,
err = -EIO;
goto sync_out;
}
+   /*
+* If newly allocated blocks are to be zeroed out later,
+* a single f2fs_map_blocks must not contain both old
+* and new blocks at the same time.
+*/
+   if (flag == F2FS_GET_BLOCK_ZERO
+   && (map->m_flags & F2FS_MAP_MAPPED)
+   && !(map->m_flags & F2FS_MAP_NEW))
+   goto sync_out;
if (flag == F2FS_GET_BLOCK_PRE_AIO) {
if (blkaddr == NULL_ADDR) {
prealloc++;
@@ -938,7 +947,8 @@ int f2fs_map_blocks(struct inode *inode, struct 
f2fs_map_blocks *map,
blkaddr != NEW_ADDR)
goto sync_out;
}
-   }
+   } else if (flag == F2FS_GET_BLOCK_ZERO && map->m_flags & F2FS_MAP_NEW)
+   goto sync_out;

if (flag == F2FS_GET_BLOCK_PRE_AIO)
goto skip;
@@ -996,6 +1006,12 @@ int f2fs_map_blocks(struct inode *inode, struct 
f2fs_map_blocks *map,
goto next_dnode;

 sync_out:
+   if (flag == F2FS_GET_BLOCK_ZERO && map->m_flags & F2FS_MAP_NEW) {
+   clean_bdev_aliases(inode->i_sb->s_bdev,
+   map->m_pblk, map->m_len);
+   err = sb_issue_zeroout(inode->i_sb, map->m_pblk,
+   map->m_len, GFP_NOFS);
+   }
f2fs_put_dnode();
 unlock_out:
if (create) {
@@ -1808,16 +1824,19 @@ static int f2fs_write_data_pages(struct address_space 
*mapping,
return 0;
 }

-static void f2fs_write_failed(struct address_space *mapping, loff_t to)
+static void f2fs_write_failed(struct address_space *mapping, loff_t to,
+   bool lock)
 {
struct inode *inode = mapping->host;
loff_t i_size = i_size_read(inode);

if (to > i_size) {
-   down_write(_I(inode)->i_mmap_sem);
+   if (lock)
+   down_write(_I(inode)->i_mmap_sem);
truncate_pagecache(inode, i_size);
truncate_blocks(inode, i_size, true);
-   up_write(_I(inode)->i_mmap_sem);
+   if (lock)
+   up_write(_I(inode)->i_mmap_sem);
}
 }

@@ -2000,7 +2019,7 @@ static int f2fs_write_begin(struct file *file, struct 
address_space *mapping,

 fail:
f2fs_put_page(page, 1);
-   f2fs_write_failed(mapping, pos + len);
+   f2fs_write_failed(mapping, pos + len, true);
return err;
 }

@@ -2077,7 +2096,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct 
iov_iter *iter)
if (err > 0)
set_inode_flag(inode, FI_UPDATE_WRITE);
else if (err < 0)
-   f2fs_write_failed(mapping, offset + count);
+   f2fs_write_failed(mapping, offset + count, true);
}

trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
@@ -2274,3 +2293,104 @@ int f2fs_migrate_page(struct address_space *mapping,
.migratepage= f2fs_migrate_page,
 #endif
 };
+
+#ifdef CONFIG_FS_DAX
+#include 
+#include 
+
+static 

Re: [PATCH v4 1/1] f2fs: dax: implement direct access

2017-06-22 Thread Sun Qiuyang

Hi Chao,

Thanks for pointing it out. See below for how to fix this issue.



Hi Qiuyang

As I tested with pmem, this patch will corrupt f2fs image with generic/051
of fstest suit.

Could you please take a look at this issue?

Thanks,

On 2017/6/15 16:56, sunqiuyang wrote:

From: Qiuyang Sun 

This patch implements Direct Access (DAX) in F2FS.

Signed-off-by: Qiuyang Sun 
---

Changelog v3 -> v4:


  In f2fs_iomap_begin():
- For the write branch, if f2fs_map_blocks() returns error (probably due to
  ENOSPC), the allocated blocks beyond original_i_size are truncated.
- For the read branch, use F2FS_GET_BLOCK_FIEMAP instead of READ for
  f2fs_map_blocks(), so that contiguous unwritten blocks can be treated in
  a batch. Accordingly, judge F2FS_MAP_UNWRITTEN before F2FS_MAP_MAPPED for
  iomap->type.

- Add a call of f2fs_update_time() in f2fs_iomap_end().


- In f2fs_move_file_range() and f2fs_ioc_defragment(), return -EINVAL for
  DAX files, as the current implementation uses page cache.
- Call f2fs_bug_on() in f2fs_ioc_commit_atomic_write() and
  f2fs_ioc_(release|abort)_volatile_write() when the inode is DAX, which
  should not happen.


- Optimize the logic in dax_move_data_page().


- Enable setting the S_DAX flag for an inode in f2fs_set_inode_flags().

The v4 patch is at f2fs-dev-test.

---
 fs/f2fs/data.c   | 100 +
 fs/f2fs/f2fs.h   |   8 +++
 fs/f2fs/file.c   | 192 ++-
 fs/f2fs/gc.c | 104 --
 fs/f2fs/inline.c |   4 ++
 fs/f2fs/inode.c  |   8 ++-
 fs/f2fs/namei.c  |   5 ++
 fs/f2fs/super.c  |  15 +
 8 files changed, 429 insertions(+), 7 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7d3af48..58efce0 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2257,3 +2257,103 @@ int f2fs_migrate_page(struct address_space *mapping,
.migratepage= f2fs_migrate_page,
 #endif
 };
+
+#ifdef CONFIG_FS_DAX
+#include 
+#include 
+
+static int f2fs_iomap_begin(struct inode *inode, loff_t offset,
+   loff_t length, unsigned int flags, struct iomap *iomap)
+{
+   struct block_device *bdev;
+   unsigned long first_block = F2FS_BYTES_TO_BLK(offset);
+   unsigned long last_block = F2FS_BYTES_TO_BLK(offset + length - 1);
+   struct f2fs_map_blocks map;
+   int ret;
+
+   if (WARN_ON_ONCE(f2fs_has_inline_data(inode)))
+   return -ERANGE;
+
+   map.m_lblk = first_block;
+   map.m_len = last_block - first_block + 1;
+   map.m_next_pgofs = NULL;
+
+   if (!(flags & IOMAP_WRITE))
+   ret = f2fs_map_blocks(inode, , 0, F2FS_GET_BLOCK_FIEMAP);
+   else {
+   /* i_size should be kept here and changed later in f2fs_iomap_end */
+   loff_t original_i_size = i_size_read(inode);
+
+   ret = f2fs_map_blocks(inode, , 1, F2FS_GET_BLOCK_PRE_DIO);


The flag F2FS_GET_BLOCK_PRE_DIO will allow allocating new blocks in 
batch whose physical addresses are not contiguous, and thus user data 
could be written to incorrect block addresses afterwards, even 
overwriting metadata blocks and causing FS inconsistency.


The test "generic/051" can be passed by using F2FS_GET_BLOCK_FIEMAP 
instead in the write branch here.



+   if (i_size_read(inode) > original_i_size) {
+   f2fs_i_size_write(inode, original_i_size);
+   if (ret) {
+   truncate_pagecache(inode, original_i_size);
+   truncate_blocks(inode, original_i_size, true);
+   }
+   }
+   }
+
+   if (ret)
+   return ret;
+
+   iomap->flags = 0;
+   bdev = inode->i_sb->s_bdev;
+   iomap->bdev = bdev;
+   if (blk_queue_dax(bdev->bd_queue))
+   iomap->dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
+   else
+   iomap->dax_dev = NULL;
+   iomap->offset = F2FS_BLK_TO_BYTES((u64)first_block);
+
+   if (map.m_len == 0) {
+   iomap->type = IOMAP_HOLE;
+   iomap->blkno = IOMAP_NULL_BLOCK;
+   iomap->length = F2FS_BLKSIZE;
+   } else {
+   if (map.m_flags & F2FS_MAP_UNWRITTEN) {
+   iomap->type = IOMAP_UNWRITTEN;
+   } else if (map.m_flags & F2FS_MAP_MAPPED) {
+   iomap->type = IOMAP_MAPPED;
+   } else {
+   WARN_ON_ONCE(1);
+   return -EIO;
+   }
+   iomap->blkno =
+   (sector_t)map.m_pblk << F2FS_LOG_SECTORS_PER_BLOCK;
+   iomap->length = F2FS_BLK_TO_BYTES((u64)map.m_len);
+   }
+
+   if (map.m_flags & F2FS_MAP_NEW)
+   iomap->flags |= IOMAP_F_NEW;
+   return 0;
+}
+
+static int f2fs_iomap_end(struct inode *inode, loff_t offset, loff_t length,
+   ssize_t written, 

Re: [PATCH v4 1/1] f2fs: dax: implement direct access

2017-06-22 Thread Sun Qiuyang

Hi Chao,

Thanks for pointing it out. See below for how to fix this issue.



Hi Qiuyang

As I tested with pmem, this patch will corrupt f2fs image with generic/051
of fstest suit.

Could you please take a look at this issue?

Thanks,

On 2017/6/15 16:56, sunqiuyang wrote:

From: Qiuyang Sun 

This patch implements Direct Access (DAX) in F2FS.

Signed-off-by: Qiuyang Sun 
---

Changelog v3 -> v4:


  In f2fs_iomap_begin():
- For the write branch, if f2fs_map_blocks() returns error (probably due to
  ENOSPC), the allocated blocks beyond original_i_size are truncated.
- For the read branch, use F2FS_GET_BLOCK_FIEMAP instead of READ for
  f2fs_map_blocks(), so that contiguous unwritten blocks can be treated in
  a batch. Accordingly, judge F2FS_MAP_UNWRITTEN before F2FS_MAP_MAPPED for
  iomap->type.

- Add a call of f2fs_update_time() in f2fs_iomap_end().


- In f2fs_move_file_range() and f2fs_ioc_defragment(), return -EINVAL for
  DAX files, as the current implementation uses page cache.
- Call f2fs_bug_on() in f2fs_ioc_commit_atomic_write() and
  f2fs_ioc_(release|abort)_volatile_write() when the inode is DAX, which
  should not happen.


- Optimize the logic in dax_move_data_page().


- Enable setting the S_DAX flag for an inode in f2fs_set_inode_flags().

The v4 patch is at f2fs-dev-test.

---
 fs/f2fs/data.c   | 100 +
 fs/f2fs/f2fs.h   |   8 +++
 fs/f2fs/file.c   | 192 ++-
 fs/f2fs/gc.c | 104 --
 fs/f2fs/inline.c |   4 ++
 fs/f2fs/inode.c  |   8 ++-
 fs/f2fs/namei.c  |   5 ++
 fs/f2fs/super.c  |  15 +
 8 files changed, 429 insertions(+), 7 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7d3af48..58efce0 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2257,3 +2257,103 @@ int f2fs_migrate_page(struct address_space *mapping,
.migratepage= f2fs_migrate_page,
 #endif
 };
+
+#ifdef CONFIG_FS_DAX
+#include 
+#include 
+
+static int f2fs_iomap_begin(struct inode *inode, loff_t offset,
+   loff_t length, unsigned int flags, struct iomap *iomap)
+{
+   struct block_device *bdev;
+   unsigned long first_block = F2FS_BYTES_TO_BLK(offset);
+   unsigned long last_block = F2FS_BYTES_TO_BLK(offset + length - 1);
+   struct f2fs_map_blocks map;
+   int ret;
+
+   if (WARN_ON_ONCE(f2fs_has_inline_data(inode)))
+   return -ERANGE;
+
+   map.m_lblk = first_block;
+   map.m_len = last_block - first_block + 1;
+   map.m_next_pgofs = NULL;
+
+   if (!(flags & IOMAP_WRITE))
+   ret = f2fs_map_blocks(inode, , 0, F2FS_GET_BLOCK_FIEMAP);
+   else {
+   /* i_size should be kept here and changed later in f2fs_iomap_end */
+   loff_t original_i_size = i_size_read(inode);
+
+   ret = f2fs_map_blocks(inode, , 1, F2FS_GET_BLOCK_PRE_DIO);


The flag F2FS_GET_BLOCK_PRE_DIO will allow allocating new blocks in 
batch whose physical addresses are not contiguous, and thus user data 
could be written to incorrect block addresses afterwards, even 
overwriting metadata blocks and causing FS inconsistency.


The test "generic/051" can be passed by using F2FS_GET_BLOCK_FIEMAP 
instead in the write branch here.



+   if (i_size_read(inode) > original_i_size) {
+   f2fs_i_size_write(inode, original_i_size);
+   if (ret) {
+   truncate_pagecache(inode, original_i_size);
+   truncate_blocks(inode, original_i_size, true);
+   }
+   }
+   }
+
+   if (ret)
+   return ret;
+
+   iomap->flags = 0;
+   bdev = inode->i_sb->s_bdev;
+   iomap->bdev = bdev;
+   if (blk_queue_dax(bdev->bd_queue))
+   iomap->dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
+   else
+   iomap->dax_dev = NULL;
+   iomap->offset = F2FS_BLK_TO_BYTES((u64)first_block);
+
+   if (map.m_len == 0) {
+   iomap->type = IOMAP_HOLE;
+   iomap->blkno = IOMAP_NULL_BLOCK;
+   iomap->length = F2FS_BLKSIZE;
+   } else {
+   if (map.m_flags & F2FS_MAP_UNWRITTEN) {
+   iomap->type = IOMAP_UNWRITTEN;
+   } else if (map.m_flags & F2FS_MAP_MAPPED) {
+   iomap->type = IOMAP_MAPPED;
+   } else {
+   WARN_ON_ONCE(1);
+   return -EIO;
+   }
+   iomap->blkno =
+   (sector_t)map.m_pblk << F2FS_LOG_SECTORS_PER_BLOCK;
+   iomap->length = F2FS_BLK_TO_BYTES((u64)map.m_len);
+   }
+
+   if (map.m_flags & F2FS_MAP_NEW)
+   iomap->flags |= IOMAP_F_NEW;
+   return 0;
+}
+
+static int f2fs_iomap_end(struct inode *inode, loff_t offset, loff_t length,
+   ssize_t written, unsigned int flags, struct iomap *iomap)
+{
+   

Re: [f2fs-dev] [PATCH v3] f2fs: dax: implement direct access

2017-06-08 Thread Sun Qiuyang

Hi Chao,
See my comments below.


Hi Qiuyang,

On 2017/6/7 17:29, sunqiuyang wrote:

From: Qiuyang Sun 

This is a new version of PATCH v2 2/2 with the following minor changes:
- In dax_move_data_page(), the call of allocate_data_block() is changed
  according to the new definition of this function in f2fs-dev, and the
  usage of wio_mutex is removed;
- put_dax() is added in f2fs_iomap_end().

Signed-off-by: Qiuyang Sun 
---
 fs/f2fs/data.c   |  93 ++
 fs/f2fs/f2fs.h   |   8 +++
 fs/f2fs/file.c   | 194 ++-
 fs/f2fs/gc.c |  93 --
 fs/f2fs/inline.c |   4 ++
 fs/f2fs/namei.c  |   6 ++
 fs/f2fs/super.c  |  15 +
 7 files changed, 407 insertions(+), 6 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7d3af48..2285a10 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2257,3 +2257,96 @@ int f2fs_migrate_page(struct address_space *mapping,
.migratepage= f2fs_migrate_page,
 #endif
 };
+
+#ifdef CONFIG_FS_DAX
+#include 
+#include 
+
+static int f2fs_iomap_begin(struct inode *inode, loff_t offset,
+   loff_t length, unsigned int flags, struct iomap *iomap)
+{
+   struct block_device *bdev;
+   unsigned long first_block = F2FS_BYTES_TO_BLK(offset);
+   unsigned long last_block = F2FS_BYTES_TO_BLK(offset + length - 1);
+   struct f2fs_map_blocks map;
+   int ret;
+   loff_t original_i_size = i_size_read(inode);
+
+   if (WARN_ON_ONCE(f2fs_has_inline_data(inode)))
+   return -ERANGE;
+
+   map.m_lblk = first_block;
+   map.m_len = last_block - first_block + 1;
+   map.m_next_pgofs = NULL;
+
+   if (!(flags & IOMAP_WRITE))
+   ret = f2fs_map_blocks(inode, , 0, F2FS_GET_BLOCK_READ);
+   else {
+   ret = f2fs_map_blocks(inode, , 1, F2FS_GET_BLOCK_PRE_DIO);
+   /* i_size should be kept here and changed later in f2fs_iomap_end */
+   if (i_size_read(inode) != original_i_size)
+   f2fs_i_size_write(inode, original_i_size);


If we allocated partial blocks in f2fs_map_blocks, then failed to
allocate left ones due to ENOSPC or ENOMEM..., it needs to do the
truncation according to original i_size.


+   }
+
+   if (ret)
+   return ret;
+
+   iomap->flags = 0;
+   bdev = inode->i_sb->s_bdev;
+   iomap->bdev = bdev;
+   if (blk_queue_dax(bdev->bd_queue))
+   iomap->dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
+   else
+   iomap->dax_dev = NULL;
+   iomap->offset = F2FS_BLK_TO_BYTES((u64)first_block);
+
+   if (map.m_len == 0) {
+   iomap->type = IOMAP_HOLE;
+   iomap->blkno = IOMAP_NULL_BLOCK;
+   iomap->length = F2FS_BLKSIZE;
+   } else {
+   if (map.m_flags & F2FS_MAP_MAPPED) {
+   iomap->type = IOMAP_MAPPED;
+   } else if (map.m_flags & F2FS_MAP_UNWRITTEN) {
+   iomap->type = IOMAP_UNWRITTEN;


For read path, if blkaddr loaded in dnode is NEW_ADDR, we will set both
F2FS_MAP_MAPPED and F2FS_MAP_UNWRITTEN flag in m_flags. With above
condition we will set IOMAP_MAPPED instead of IOMAP_UNWRITTEN which may
result in incorrectly using of map.m_pblk. So how about reverse above
judgment condition to correct it?


For the read path of f2fs_map_blocks(), if blkaddr == NEW_ADDR, then it 
will goto sync_out before setting the F2FS_MAP_UNWRITTEN flag. Thus, 
this flag would never be set in read or write paths here, so I suggest 
simply removing the judgment about UNWRITTEN.


Thanks,




+   } else {
+   WARN_ON_ONCE(1);
+   return -EIO;
+   }
+   iomap->blkno =
+   (sector_t)map.m_pblk << F2FS_LOG_SECTORS_PER_BLOCK;
+   iomap->length = F2FS_BLK_TO_BYTES((u64)map.m_len);
+   }
+
+   if (map.m_flags & F2FS_MAP_NEW)
+   iomap->flags |= IOMAP_F_NEW;
+   return 0;
+}
+
+static int f2fs_iomap_end(struct inode *inode, loff_t offset, loff_t length,
+   ssize_t written, unsigned int flags, struct iomap *iomap)
+{
+   put_dax(iomap->dax_dev);


Why should we use dax_get_by_host & put_dax here?


+   if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT))
+   return 0;
+
+   if (offset + written > i_size_read(inode))
+   f2fs_i_size_write(inode, offset + written);
+
+   if (iomap->offset + iomap->length >
+   ALIGN(i_size_read(inode), F2FS_BLKSIZE)) {
+   block_t written_blk = F2FS_BYTES_TO_BLK(offset + written);
+   block_t end_blk = F2FS_BYTES_TO_BLK(offset + length);
+
+   if (written_blk < end_blk)
+   f2fs_write_failed(inode->i_mapping, offset + length);
+   }


f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);


+
+  

Re: [f2fs-dev] [PATCH v3] f2fs: dax: implement direct access

2017-06-08 Thread Sun Qiuyang

Hi Chao,
See my comments below.


Hi Qiuyang,

On 2017/6/7 17:29, sunqiuyang wrote:

From: Qiuyang Sun 

This is a new version of PATCH v2 2/2 with the following minor changes:
- In dax_move_data_page(), the call of allocate_data_block() is changed
  according to the new definition of this function in f2fs-dev, and the
  usage of wio_mutex is removed;
- put_dax() is added in f2fs_iomap_end().

Signed-off-by: Qiuyang Sun 
---
 fs/f2fs/data.c   |  93 ++
 fs/f2fs/f2fs.h   |   8 +++
 fs/f2fs/file.c   | 194 ++-
 fs/f2fs/gc.c |  93 --
 fs/f2fs/inline.c |   4 ++
 fs/f2fs/namei.c  |   6 ++
 fs/f2fs/super.c  |  15 +
 7 files changed, 407 insertions(+), 6 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7d3af48..2285a10 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2257,3 +2257,96 @@ int f2fs_migrate_page(struct address_space *mapping,
.migratepage= f2fs_migrate_page,
 #endif
 };
+
+#ifdef CONFIG_FS_DAX
+#include 
+#include 
+
+static int f2fs_iomap_begin(struct inode *inode, loff_t offset,
+   loff_t length, unsigned int flags, struct iomap *iomap)
+{
+   struct block_device *bdev;
+   unsigned long first_block = F2FS_BYTES_TO_BLK(offset);
+   unsigned long last_block = F2FS_BYTES_TO_BLK(offset + length - 1);
+   struct f2fs_map_blocks map;
+   int ret;
+   loff_t original_i_size = i_size_read(inode);
+
+   if (WARN_ON_ONCE(f2fs_has_inline_data(inode)))
+   return -ERANGE;
+
+   map.m_lblk = first_block;
+   map.m_len = last_block - first_block + 1;
+   map.m_next_pgofs = NULL;
+
+   if (!(flags & IOMAP_WRITE))
+   ret = f2fs_map_blocks(inode, , 0, F2FS_GET_BLOCK_READ);
+   else {
+   ret = f2fs_map_blocks(inode, , 1, F2FS_GET_BLOCK_PRE_DIO);
+   /* i_size should be kept here and changed later in f2fs_iomap_end */
+   if (i_size_read(inode) != original_i_size)
+   f2fs_i_size_write(inode, original_i_size);


If we allocated partial blocks in f2fs_map_blocks, then failed to
allocate left ones due to ENOSPC or ENOMEM..., it needs to do the
truncation according to original i_size.


+   }
+
+   if (ret)
+   return ret;
+
+   iomap->flags = 0;
+   bdev = inode->i_sb->s_bdev;
+   iomap->bdev = bdev;
+   if (blk_queue_dax(bdev->bd_queue))
+   iomap->dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
+   else
+   iomap->dax_dev = NULL;
+   iomap->offset = F2FS_BLK_TO_BYTES((u64)first_block);
+
+   if (map.m_len == 0) {
+   iomap->type = IOMAP_HOLE;
+   iomap->blkno = IOMAP_NULL_BLOCK;
+   iomap->length = F2FS_BLKSIZE;
+   } else {
+   if (map.m_flags & F2FS_MAP_MAPPED) {
+   iomap->type = IOMAP_MAPPED;
+   } else if (map.m_flags & F2FS_MAP_UNWRITTEN) {
+   iomap->type = IOMAP_UNWRITTEN;


For read path, if blkaddr loaded in dnode is NEW_ADDR, we will set both
F2FS_MAP_MAPPED and F2FS_MAP_UNWRITTEN flag in m_flags. With above
condition we will set IOMAP_MAPPED instead of IOMAP_UNWRITTEN which may
result in incorrectly using of map.m_pblk. So how about reverse above
judgment condition to correct it?


For the read path of f2fs_map_blocks(), if blkaddr == NEW_ADDR, then it 
will goto sync_out before setting the F2FS_MAP_UNWRITTEN flag. Thus, 
this flag would never be set in read or write paths here, so I suggest 
simply removing the judgment about UNWRITTEN.


Thanks,




+   } else {
+   WARN_ON_ONCE(1);
+   return -EIO;
+   }
+   iomap->blkno =
+   (sector_t)map.m_pblk << F2FS_LOG_SECTORS_PER_BLOCK;
+   iomap->length = F2FS_BLK_TO_BYTES((u64)map.m_len);
+   }
+
+   if (map.m_flags & F2FS_MAP_NEW)
+   iomap->flags |= IOMAP_F_NEW;
+   return 0;
+}
+
+static int f2fs_iomap_end(struct inode *inode, loff_t offset, loff_t length,
+   ssize_t written, unsigned int flags, struct iomap *iomap)
+{
+   put_dax(iomap->dax_dev);


Why should we use dax_get_by_host & put_dax here?


+   if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT))
+   return 0;
+
+   if (offset + written > i_size_read(inode))
+   f2fs_i_size_write(inode, offset + written);
+
+   if (iomap->offset + iomap->length >
+   ALIGN(i_size_read(inode), F2FS_BLKSIZE)) {
+   block_t written_blk = F2FS_BYTES_TO_BLK(offset + written);
+   block_t end_blk = F2FS_BYTES_TO_BLK(offset + length);
+
+   if (written_blk < end_blk)
+   f2fs_write_failed(inode->i_mapping, offset + length);
+   }


f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);


+
+   return 0;
+}
+
+struct iomap_ops