[PATCH v2] workqueue: Use IS_ERR and PTR_ERR instead of PTR_ERR_OR_ZERO.

2020-04-28 Thread Sean Fu
Replace inline function PTR_ERR_OR_ZERO with IS_ERR and PTR_ERR to
remove redundant parameter definitions and checks.
Reduce code size.
Before:
   textdata bss dec hex filename
  475105979 840   54329d439 kernel/workqueue.o
After:
   textdata bss dec hex filename
  474745979 840   54293d415 kernel/workqueue.o

Signed-off-by: Sean Fu 
---
Changes in v2:
- make commit message more clear.

 kernel/workqueue.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 891ccad5f271..ddf0537dce14 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -4197,7 +4197,6 @@ static int wq_clamp_max_active(int max_active, unsigned 
int flags,
 static int init_rescuer(struct workqueue_struct *wq)
 {
struct worker *rescuer;
-   int ret;

if (!(wq->flags & WQ_MEM_RECLAIM))
return 0;
@@ -4208,10 +4207,9 @@ static int init_rescuer(struct workqueue_struct *wq)

rescuer->rescue_wq = wq;
rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", wq->name);
-   ret = PTR_ERR_OR_ZERO(rescuer->task);
-   if (ret) {
+   if (IS_ERR(rescuer->task)) {
kfree(rescuer);
-   return ret;
+   return PTR_ERR(rescuer->task);
}

wq->rescuer = rescuer;
--
2.16.4



[PATCH] workqueue: Use IS_ERR and PTR_ERR instead of PTR_ERR_OR_ZERO to remove redundant definitions and checks.

2020-04-28 Thread Sean Fu
Reduce code size.
Before:
   textdata bss dec hex filename
  475105979 840   54329d439 kernel/workqueue.o
After:
   textdata bss dec hex filename
  474745979 840   54293d415 kernel/workqueue.o

Signed-off-by: Sean Fu 
---
 kernel/workqueue.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 891ccad5f271..ddf0537dce14 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -4197,7 +4197,6 @@ static int wq_clamp_max_active(int max_active, unsigned 
int flags,
 static int init_rescuer(struct workqueue_struct *wq)
 {
struct worker *rescuer;
-   int ret;
 
if (!(wq->flags & WQ_MEM_RECLAIM))
return 0;
@@ -4208,10 +4207,9 @@ static int init_rescuer(struct workqueue_struct *wq)
 
rescuer->rescue_wq = wq;
rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", wq->name);
-   ret = PTR_ERR_OR_ZERO(rescuer->task);
-   if (ret) {
+   if (IS_ERR(rescuer->task)) {
kfree(rescuer);
-   return ret;
+   return PTR_ERR(rescuer->task);
}
 
wq->rescuer = rescuer;
-- 
2.16.4



Re: [PATCH] xfs: Use kmem_zalloc for bp->b_pages.

2019-03-09 Thread Sean Fu
On Sat, Mar 09, 2019 at 09:32:30AM -0800, Darrick J. Wong wrote:
> On Sat, Mar 09, 2019 at 11:36:36PM +0800, Sean Fu wrote:
> > Change the allocation of bp->b_pages to use kmem_zalloc instead of
> > kmem_alloc.
> > Remove unnecessary memset for bp->b_pages.
> >
> > This reduces text size by 42 bytes.
> > Before:
> >textdata bss dec hex filename
> >   23335 588   8   239315d7b ./fs/xfs/xfs_buf.o
> > After:
> >textdata bss dec hex filename
> >   23293 588   8   238895d51 ./fs/xfs/xfs_buf.o
> >
> > Signed-off-by: Sean Fu 
> > ---
> >  fs/xfs/xfs_buf.c | 3 +--
> >  1 file changed, 1 insertion(+), 2 deletions(-)
> >
> > diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> > index 4f5f2ff3f70f..be4f740b97c1 100644
> > --- a/fs/xfs/xfs_buf.c
> > +++ b/fs/xfs/xfs_buf.c
> > @@ -289,12 +289,11 @@ _xfs_buf_get_pages(
> > if (page_count <= XB_PAGES) {
> > bp->b_pages = bp->b_page_array;
> > } else {
> > -   bp->b_pages = kmem_alloc(sizeof(struct page *) *
> > +   bp->b_pages = kmem_zalloc(sizeof(struct page *) *
> >  page_count, KM_NOFS);
> > if (bp->b_pages == NULL)
> > return -ENOMEM;
> > }
> > -   memset(bp->b_pages, 0, sizeof(struct page *) * page_count);
>
> Does this leave b_pages uninitialized in the page_count <= XB_PAGES
> case?
bp is allocated by kmem_zone_zalloc, But i will take a deep look at 
xfs_buf_associate_memory.
>
> --D
>
> > }
> > return 0;
> >  }
> > --
> > 2.16.4
> >


[PATCH] xfs: Use kmem_zalloc for bp->b_pages.

2019-03-09 Thread Sean Fu
Change the allocation of bp->b_pages to use kmem_zalloc instead of
kmem_alloc.
Remove unnecessary memset for bp->b_pages.

This reduces text size by 42 bytes.
Before:
   textdata bss dec hex filename
  23335 588   8   239315d7b ./fs/xfs/xfs_buf.o
After:
   textdata bss dec hex filename
  23293 588   8   238895d51 ./fs/xfs/xfs_buf.o

Signed-off-by: Sean Fu 
---
 fs/xfs/xfs_buf.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 4f5f2ff3f70f..be4f740b97c1 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -289,12 +289,11 @@ _xfs_buf_get_pages(
if (page_count <= XB_PAGES) {
bp->b_pages = bp->b_page_array;
} else {
-   bp->b_pages = kmem_alloc(sizeof(struct page *) *
+   bp->b_pages = kmem_zalloc(sizeof(struct page *) *
 page_count, KM_NOFS);
if (bp->b_pages == NULL)
return -ENOMEM;
}
-   memset(bp->b_pages, 0, sizeof(struct page *) * page_count);
}
return 0;
 }
-- 
2.16.4



Re: [PATCH] md/bcache: Remove NULL check.

2018-08-13 Thread Sean Fu
On Mon, Aug 13, 2018 at 06:00:11PM +0800, Coly Li wrote:
> On 2018/8/13 5:38 PM, Sean Fu wrote:
> > Remove unnessesary NULL check before kmem_cache_destroy() in
> > drivers/md/bcache/request.c
> > 
> > Signed-off-by: Sean Fu 
> 
> Hi Sean,
> 
> A same change is posted in my previous checkpatch fixes series. You may
> find it from,
> https://git.kernel.org/pub/scm/linux/kernel/git/colyli/linux-bcache.git/?h=for-next
> 
> The identical patch is,
> https://git.kernel.org/pub/scm/linux/kernel/git/colyli/linux-bcache.git/commit/?h=for-next=26b39d0732955ecb839d0afe8d6211d70f213384
>
Thank you for your reply.

Sean Fu
> Thanks.
> 
> Coly Li
> 
> > ---
> >  drivers/md/bcache/request.c | 3 +--
> >  1 file changed, 1 insertion(+), 2 deletions(-)
> > 
> > diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
> > index ae67f5f..0ddee35 100644
> > --- a/drivers/md/bcache/request.c
> > +++ b/drivers/md/bcache/request.c
> > @@ -1306,8 +1306,7 @@ void bch_flash_dev_request_init(struct bcache_device 
> > *d)
> >  
> >  void bch_request_exit(void)
> >  {
> > -   if (bch_search_cache)
> > -   kmem_cache_destroy(bch_search_cache);
> > +   kmem_cache_destroy(bch_search_cache);
> >  }
> >  
> >  int __init bch_request_init(void)
> > 
> 


Re: [PATCH] md/bcache: Remove NULL check.

2018-08-13 Thread Sean Fu
On Mon, Aug 13, 2018 at 06:00:11PM +0800, Coly Li wrote:
> On 2018/8/13 5:38 PM, Sean Fu wrote:
> > Remove unnessesary NULL check before kmem_cache_destroy() in
> > drivers/md/bcache/request.c
> > 
> > Signed-off-by: Sean Fu 
> 
> Hi Sean,
> 
> A same change is posted in my previous checkpatch fixes series. You may
> find it from,
> https://git.kernel.org/pub/scm/linux/kernel/git/colyli/linux-bcache.git/?h=for-next
> 
> The identical patch is,
> https://git.kernel.org/pub/scm/linux/kernel/git/colyli/linux-bcache.git/commit/?h=for-next=26b39d0732955ecb839d0afe8d6211d70f213384
>
Thank you for your reply.

Sean Fu
> Thanks.
> 
> Coly Li
> 
> > ---
> >  drivers/md/bcache/request.c | 3 +--
> >  1 file changed, 1 insertion(+), 2 deletions(-)
> > 
> > diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
> > index ae67f5f..0ddee35 100644
> > --- a/drivers/md/bcache/request.c
> > +++ b/drivers/md/bcache/request.c
> > @@ -1306,8 +1306,7 @@ void bch_flash_dev_request_init(struct bcache_device 
> > *d)
> >  
> >  void bch_request_exit(void)
> >  {
> > -   if (bch_search_cache)
> > -   kmem_cache_destroy(bch_search_cache);
> > +   kmem_cache_destroy(bch_search_cache);
> >  }
> >  
> >  int __init bch_request_init(void)
> > 
> 


[PATCH] md/bcache: Remove NULL check.

2018-08-13 Thread Sean Fu
Remove unnessesary NULL check before kmem_cache_destroy() in
drivers/md/bcache/request.c

Signed-off-by: Sean Fu 
---
 drivers/md/bcache/request.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index ae67f5f..0ddee35 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -1306,8 +1306,7 @@ void bch_flash_dev_request_init(struct bcache_device *d)
 
 void bch_request_exit(void)
 {
-   if (bch_search_cache)
-   kmem_cache_destroy(bch_search_cache);
+   kmem_cache_destroy(bch_search_cache);
 }
 
 int __init bch_request_init(void)
-- 
2.6.2



[PATCH] md/bcache: Remove NULL check.

2018-08-13 Thread Sean Fu
Remove unnessesary NULL check before kmem_cache_destroy() in
drivers/md/bcache/request.c

Signed-off-by: Sean Fu 
---
 drivers/md/bcache/request.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index ae67f5f..0ddee35 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -1306,8 +1306,7 @@ void bch_flash_dev_request_init(struct bcache_device *d)
 
 void bch_request_exit(void)
 {
-   if (bch_search_cache)
-   kmem_cache_destroy(bch_search_cache);
+   kmem_cache_destroy(bch_search_cache);
 }
 
 int __init bch_request_init(void)
-- 
2.6.2



Re: [PATCH] fs/buffer.c: Optimize grow_buffer function.

2018-08-08 Thread Sean Fu
On Sat, Jul 21, 2018 at 07:21:16PM +0100, Al Viro wrote:
> On Sun, Jul 22, 2018 at 01:30:17AM +0800, Sean Fu wrote:
> > Use PAGE_SHIFT and i_blkbits of bd_inode directly to avoid ugly sizebits
> > calculation.
> > Remove ugly sizebits calculation.
> > Remove unnecessary sizebits parameter of grow_dev_page.
> > 
> > Reduces code size:
> > 
> > Before:
> > 
> > sean@linux-zmni:~/sda5/source/linus_repo/linux> size fs/buffer.o
> >textdata bss dec hex filename
> >   340371510  16   355638aeb fs/buffer.o
> > 
> > After:
> > 
> > sean@linux-zmni:~/sda5/source/linus_repo/linux> size fs/buffer.o
> >textdata bss dec hex filename
> >   340211510  16   355478adb fs/buffer.o
> 
> First of all, 16 bytes is pretty much noise.  What's more, the "remove ugly
> sizebits calculation" part really needs an explanation of the reasons why
> it's safe.  You assume that size == 1<bd_inode->i_blkbits; where's
> the proof that it always holds?

Got it, Thanks
This size can be page size(4096) besides block size 
(1<bd_inode->i_blkbits).

I have another question about __getblk_slow function in fs/buffer.c
Actually __find_get_block already was invoked before entering __getblk_slow.
Can we move __find_get_block after grow_buffers in for loop of __getblk_slow?

for (;;) {
...
ret = grow_buffers(bdev, block, size, gfp);
...
bh = __find_get_block(bdev, block, size);
...
}


Re: [PATCH] fs/buffer.c: Optimize grow_buffer function.

2018-08-08 Thread Sean Fu
On Sat, Jul 21, 2018 at 07:21:16PM +0100, Al Viro wrote:
> On Sun, Jul 22, 2018 at 01:30:17AM +0800, Sean Fu wrote:
> > Use PAGE_SHIFT and i_blkbits of bd_inode directly to avoid ugly sizebits
> > calculation.
> > Remove ugly sizebits calculation.
> > Remove unnecessary sizebits parameter of grow_dev_page.
> > 
> > Reduces code size:
> > 
> > Before:
> > 
> > sean@linux-zmni:~/sda5/source/linus_repo/linux> size fs/buffer.o
> >textdata bss dec hex filename
> >   340371510  16   355638aeb fs/buffer.o
> > 
> > After:
> > 
> > sean@linux-zmni:~/sda5/source/linus_repo/linux> size fs/buffer.o
> >textdata bss dec hex filename
> >   340211510  16   355478adb fs/buffer.o
> 
> First of all, 16 bytes is pretty much noise.  What's more, the "remove ugly
> sizebits calculation" part really needs an explanation of the reasons why
> it's safe.  You assume that size == 1<bd_inode->i_blkbits; where's
> the proof that it always holds?

Got it, Thanks
This size can be page size(4096) besides block size 
(1<bd_inode->i_blkbits).

I have another question about __getblk_slow function in fs/buffer.c
Actually __find_get_block already was invoked before entering __getblk_slow.
Can we move __find_get_block after grow_buffers in for loop of __getblk_slow?

for (;;) {
...
ret = grow_buffers(bdev, block, size, gfp);
...
bh = __find_get_block(bdev, block, size);
...
}


Re: [PATCH] fs/buffer.c: Optimize grow_buffer function.

2018-07-24 Thread Sean Fu
On Sat, Jul 21, 2018 at 07:21:16PM +0100, Al Viro wrote:
> On Sun, Jul 22, 2018 at 01:30:17AM +0800, Sean Fu wrote:
> > Use PAGE_SHIFT and i_blkbits of bd_inode directly to avoid ugly sizebits
> > calculation.
> > Remove ugly sizebits calculation.
> > Remove unnecessary sizebits parameter of grow_dev_page.
> > 
> > Reduces code size:
> > 
> > Before:
> > 
> > sean@linux-zmni:~/sda5/source/linus_repo/linux> size fs/buffer.o
> >textdata bss dec hex filename
> >   340371510  16   355638aeb fs/buffer.o
> > 
> > After:
> > 
> > sean@linux-zmni:~/sda5/source/linus_repo/linux> size fs/buffer.o
> >textdata bss dec hex filename
> >   340211510  16   355478adb fs/buffer.o
> 
> First of all, 16 bytes is pretty much noise.  What's more, the "remove ugly
> sizebits calculation" part really needs an explanation of the reasons why
> it's safe.  You assume that size == 1<bd_inode->i_blkbits; where's
> the proof that it always holds?
Thank your for taking a look and apologize for the noise.

I will take a deep look at it and especially history commits regarding with 
this.

BestRegards
Sean


Re: [PATCH] fs/buffer.c: Optimize grow_buffer function.

2018-07-24 Thread Sean Fu
On Sat, Jul 21, 2018 at 07:21:16PM +0100, Al Viro wrote:
> On Sun, Jul 22, 2018 at 01:30:17AM +0800, Sean Fu wrote:
> > Use PAGE_SHIFT and i_blkbits of bd_inode directly to avoid ugly sizebits
> > calculation.
> > Remove ugly sizebits calculation.
> > Remove unnecessary sizebits parameter of grow_dev_page.
> > 
> > Reduces code size:
> > 
> > Before:
> > 
> > sean@linux-zmni:~/sda5/source/linus_repo/linux> size fs/buffer.o
> >textdata bss dec hex filename
> >   340371510  16   355638aeb fs/buffer.o
> > 
> > After:
> > 
> > sean@linux-zmni:~/sda5/source/linus_repo/linux> size fs/buffer.o
> >textdata bss dec hex filename
> >   340211510  16   355478adb fs/buffer.o
> 
> First of all, 16 bytes is pretty much noise.  What's more, the "remove ugly
> sizebits calculation" part really needs an explanation of the reasons why
> it's safe.  You assume that size == 1<bd_inode->i_blkbits; where's
> the proof that it always holds?
Thank your for taking a look and apologize for the noise.

I will take a deep look at it and especially history commits regarding with 
this.

BestRegards
Sean


[PATCH] fs/buffer.c: Optimize grow_buffer function.

2018-07-21 Thread Sean Fu
Use PAGE_SHIFT and i_blkbits of bd_inode directly to avoid ugly sizebits
calculation.
Remove ugly sizebits calculation.
Remove unnecessary sizebits parameter of grow_dev_page.

Reduces code size:

Before:

sean@linux-zmni:~/sda5/source/linus_repo/linux> size fs/buffer.o
   textdata bss dec hex filename
  340371510  16   355638aeb fs/buffer.o

After:

sean@linux-zmni:~/sda5/source/linus_repo/linux> size fs/buffer.o
   textdata bss dec hex filename
  340211510  16   355478adb fs/buffer.o

Signed-off-by: Sean Fu 
---
 fs/buffer.c | 20 +++-
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 249b83f..143ad78 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -918,7 +918,7 @@ init_page_buffers(struct page *page, struct block_device 
*bdev,
  */
 static int
 grow_dev_page(struct block_device *bdev, sector_t block,
- pgoff_t index, int size, int sizebits, gfp_t gfp)
+ pgoff_t index, int size, gfp_t gfp)
 {
struct inode *inode = bdev->bd_inode;
struct page *page;
@@ -945,7 +945,7 @@ grow_dev_page(struct block_device *bdev, sector_t block,
bh = page_buffers(page);
if (bh->b_size == size) {
end_block = init_page_buffers(page, bdev,
-   (sector_t)index << sizebits,
+   (sector_t)index << (PAGE_SHIFT 
- inode->i_blkbits),
size);
goto done;
}
@@ -965,7 +965,8 @@ grow_dev_page(struct block_device *bdev, sector_t block,
 */
spin_lock(>i_mapping->private_lock);
link_dev_buffers(page, bh);
-   end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits,
+   end_block = init_page_buffers(page, bdev,
+   (sector_t)index << (PAGE_SHIFT - inode->i_blkbits),
size);
spin_unlock(>i_mapping->private_lock);
 done:
@@ -984,20 +985,13 @@ static int
 grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
 {
pgoff_t index;
-   int sizebits;
-
-   sizebits = -1;
-   do {
-   sizebits++;
-   } while ((size << sizebits) < PAGE_SIZE);
-
-   index = block >> sizebits;
+   index = block >> (PAGE_SHIFT - bdev->bd_inode->i_blkbits);
 
/*
 * Check for a block which wants to lie outside our maximum possible
 * pagecache index.  (this comparison is done using sector_t types).
 */
-   if (unlikely(index != block >> sizebits)) {
+   if (unlikely(index != block >> (PAGE_SHIFT - 
bdev->bd_inode->i_blkbits))) {
printk(KERN_ERR "%s: requested out-of-range block %llu for "
"device %pg\n",
__func__, (unsigned long long)block,
@@ -1006,7 +1000,7 @@ grow_buffers(struct block_device *bdev, sector_t block, 
int size, gfp_t gfp)
}
 
/* Create a page with the proper size buffers.. */
-   return grow_dev_page(bdev, block, index, size, sizebits, gfp);
+   return grow_dev_page(bdev, block, index, size, gfp);
 }
 
 static struct buffer_head *
-- 
2.6.2



[PATCH] fs/buffer.c: Optimize grow_buffer function.

2018-07-21 Thread Sean Fu
Use PAGE_SHIFT and i_blkbits of bd_inode directly to avoid ugly sizebits
calculation.
Remove ugly sizebits calculation.
Remove unnecessary sizebits parameter of grow_dev_page.

Reduces code size:

Before:

sean@linux-zmni:~/sda5/source/linus_repo/linux> size fs/buffer.o
   textdata bss dec hex filename
  340371510  16   355638aeb fs/buffer.o

After:

sean@linux-zmni:~/sda5/source/linus_repo/linux> size fs/buffer.o
   textdata bss dec hex filename
  340211510  16   355478adb fs/buffer.o

Signed-off-by: Sean Fu 
---
 fs/buffer.c | 20 +++-
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 249b83f..143ad78 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -918,7 +918,7 @@ init_page_buffers(struct page *page, struct block_device 
*bdev,
  */
 static int
 grow_dev_page(struct block_device *bdev, sector_t block,
- pgoff_t index, int size, int sizebits, gfp_t gfp)
+ pgoff_t index, int size, gfp_t gfp)
 {
struct inode *inode = bdev->bd_inode;
struct page *page;
@@ -945,7 +945,7 @@ grow_dev_page(struct block_device *bdev, sector_t block,
bh = page_buffers(page);
if (bh->b_size == size) {
end_block = init_page_buffers(page, bdev,
-   (sector_t)index << sizebits,
+   (sector_t)index << (PAGE_SHIFT 
- inode->i_blkbits),
size);
goto done;
}
@@ -965,7 +965,8 @@ grow_dev_page(struct block_device *bdev, sector_t block,
 */
spin_lock(>i_mapping->private_lock);
link_dev_buffers(page, bh);
-   end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits,
+   end_block = init_page_buffers(page, bdev,
+   (sector_t)index << (PAGE_SHIFT - inode->i_blkbits),
size);
spin_unlock(>i_mapping->private_lock);
 done:
@@ -984,20 +985,13 @@ static int
 grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
 {
pgoff_t index;
-   int sizebits;
-
-   sizebits = -1;
-   do {
-   sizebits++;
-   } while ((size << sizebits) < PAGE_SIZE);
-
-   index = block >> sizebits;
+   index = block >> (PAGE_SHIFT - bdev->bd_inode->i_blkbits);
 
/*
 * Check for a block which wants to lie outside our maximum possible
 * pagecache index.  (this comparison is done using sector_t types).
 */
-   if (unlikely(index != block >> sizebits)) {
+   if (unlikely(index != block >> (PAGE_SHIFT - 
bdev->bd_inode->i_blkbits))) {
printk(KERN_ERR "%s: requested out-of-range block %llu for "
"device %pg\n",
__func__, (unsigned long long)block,
@@ -1006,7 +1000,7 @@ grow_buffers(struct block_device *bdev, sector_t block, 
int size, gfp_t gfp)
}
 
/* Create a page with the proper size buffers.. */
-   return grow_dev_page(bdev, block, index, size, sizebits, gfp);
+   return grow_dev_page(bdev, block, index, size, gfp);
 }
 
 static struct buffer_head *
-- 
2.6.2



[PATCH] ext4: Remove unnecessary NULL checks in ext4.

2018-02-06 Thread Sean Fu
NULL check is done in kmem_cache_destroy. So remove NULL checks in ext4.

Signed-off-by: Sean Fu <fxinr...@gmail.com>
---
 fs/ext4/extents_status.c | 3 +--
 fs/ext4/mballoc.c| 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 763ef18..c4e6fb1 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -162,8 +162,7 @@ int __init ext4_init_es(void)
 
 void ext4_exit_es(void)
 {
-   if (ext4_es_cachep)
-   kmem_cache_destroy(ext4_es_cachep);
+   kmem_cache_destroy(ext4_es_cachep);
 }
 
 void ext4_es_init_tree(struct ext4_es_tree *tree)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index d9f8b90a..4fd1349 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2549,8 +2549,7 @@ static void ext4_groupinfo_destroy_slabs(void)
int i;
 
for (i = 0; i < NR_GRPINFO_CACHES; i++) {
-   if (ext4_groupinfo_caches[i])
-   kmem_cache_destroy(ext4_groupinfo_caches[i]);
+   kmem_cache_destroy(ext4_groupinfo_caches[i]);
ext4_groupinfo_caches[i] = NULL;
}
 }
-- 
2.6.2



[PATCH] ext4: Remove unnecessary NULL checks in ext4.

2018-02-06 Thread Sean Fu
NULL check is done in kmem_cache_destroy. So remove NULL checks in ext4.

Signed-off-by: Sean Fu 
---
 fs/ext4/extents_status.c | 3 +--
 fs/ext4/mballoc.c| 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 763ef18..c4e6fb1 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -162,8 +162,7 @@ int __init ext4_init_es(void)
 
 void ext4_exit_es(void)
 {
-   if (ext4_es_cachep)
-   kmem_cache_destroy(ext4_es_cachep);
+   kmem_cache_destroy(ext4_es_cachep);
 }
 
 void ext4_es_init_tree(struct ext4_es_tree *tree)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index d9f8b90a..4fd1349 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2549,8 +2549,7 @@ static void ext4_groupinfo_destroy_slabs(void)
int i;
 
for (i = 0; i < NR_GRPINFO_CACHES; i++) {
-   if (ext4_groupinfo_caches[i])
-   kmem_cache_destroy(ext4_groupinfo_caches[i]);
+   kmem_cache_destroy(ext4_groupinfo_caches[i]);
ext4_groupinfo_caches[i] = NULL;
}
 }
-- 
2.6.2



Re: [PATCH] ext4: Remove repeated test in ext4_file_read_iter.

2018-01-16 Thread Sean Fu
On Wed, Jan 10, 2018 at 03:02:55PM -0500, Theodore Ts'o wrote:
> On Wed, Jan 10, 2018 at 10:01:49PM +0800, Sean Fu wrote:
> >
> > Correct, IOCB_NOWAIT read with zero count can return -EAGAIN, But I
> > think that it is reasonable. while it got lock, zero would be returned
> > in this case.
> 
> Returning -EAGAIN and 0 are not the same thing.  Specifically
> returning 0 means "end of file".  Hence, it is NOT reasonable.
> 
I know that two returnning value mean different things.
once the inode is not under lock contention, read again with zero count
would return zero.
> See the man page for read(2) or the relevant Single Unix Specification
> standard:
> 
> RETURN VALUE
>  On success, the number of bytes read is returned (zero indicates
>  end of file)
> 
> Changing the behavior of the system in which will break userspace is
> never acceptable, and even more so given that this is just a "cleanup
> patch".
>
I agree with you at this point.
Thanks for your reply.

>   - Ted
> 


Re: [PATCH] ext4: Remove repeated test in ext4_file_read_iter.

2018-01-16 Thread Sean Fu
On Wed, Jan 10, 2018 at 03:02:55PM -0500, Theodore Ts'o wrote:
> On Wed, Jan 10, 2018 at 10:01:49PM +0800, Sean Fu wrote:
> >
> > Correct, IOCB_NOWAIT read with zero count can return -EAGAIN, But I
> > think that it is reasonable. while it got lock, zero would be returned
> > in this case.
> 
> Returning -EAGAIN and 0 are not the same thing.  Specifically
> returning 0 means "end of file".  Hence, it is NOT reasonable.
> 
I know that two returnning value mean different things.
once the inode is not under lock contention, read again with zero count
would return zero.
> See the man page for read(2) or the relevant Single Unix Specification
> standard:
> 
> RETURN VALUE
>  On success, the number of bytes read is returned (zero indicates
>  end of file)
> 
> Changing the behavior of the system in which will break userspace is
> never acceptable, and even more so given that this is just a "cleanup
> patch".
>
I agree with you at this point.
Thanks for your reply.

>   - Ted
> 


Re: [PATCH] ext4: Remove repeated test in ext4_file_read_iter.

2018-01-10 Thread Sean Fu
On Wed, Jan 03, 2018 at 02:08:05AM +, Al Viro wrote:
> On Wed, Dec 27, 2017 at 04:19:58PM +0800, Sean Fu wrote:
> > generic_file_read_iter has done the count test.
> > So ext4_file_read_iter don't need to test the count repeatedly.
> 
> Huh?  You do realize that generic_file_read_iter() is not the
> only variant possible there, right?
>
Yes, I know that generic_file_read_iter() is not the only variant possible.
The other possible dax_iomap_rw() with zero count would return zero.
> static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
> {
> struct inode *inode = file_inode(iocb->ki_filp);
> ssize_t ret;
> 
> if (!inode_trylock_shared(inode)) {
> if (iocb->ki_flags & IOCB_NOWAIT)
> return -EAGAIN;
> inode_lock_shared(inode);
> }
> 
> ... and now IOCB_NOWAIT read with zero count can fail with -EAGAIN.
>
Correct, IOCB_NOWAIT read with zero count can return -EAGAIN, But I
think that it is reasonable. while it got lock, zero would be returned
in this case.


Re: [PATCH] ext4: Remove repeated test in ext4_file_read_iter.

2018-01-10 Thread Sean Fu
On Wed, Jan 03, 2018 at 02:08:05AM +, Al Viro wrote:
> On Wed, Dec 27, 2017 at 04:19:58PM +0800, Sean Fu wrote:
> > generic_file_read_iter has done the count test.
> > So ext4_file_read_iter don't need to test the count repeatedly.
> 
> Huh?  You do realize that generic_file_read_iter() is not the
> only variant possible there, right?
>
Yes, I know that generic_file_read_iter() is not the only variant possible.
The other possible dax_iomap_rw() with zero count would return zero.
> static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
> {
> struct inode *inode = file_inode(iocb->ki_filp);
> ssize_t ret;
> 
> if (!inode_trylock_shared(inode)) {
> if (iocb->ki_flags & IOCB_NOWAIT)
> return -EAGAIN;
> inode_lock_shared(inode);
> }
> 
> ... and now IOCB_NOWAIT read with zero count can fail with -EAGAIN.
>
Correct, IOCB_NOWAIT read with zero count can return -EAGAIN, But I
think that it is reasonable. while it got lock, zero would be returned
in this case.


Re: [PATCH 3/3] md: bitmap: Support circular buffer list.

2018-01-01 Thread Sean Fu
On Tue, Jan 02, 2018 at 02:54:49PM +0800, Sean Fu wrote:
> Modify write_page free_buffers and read_page to support circular buffer
> list.
> 
> Signed-off-by: Sean Fu <fxinr...@gmail.com>
> ---
>  drivers/md/md-bitmap.c | 36 +++-
>  1 file changed, 19 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
> index 239c7bb..b8412c2 100644
> --- a/drivers/md/md-bitmap.c
> +++ b/drivers/md/md-bitmap.c
> @@ -286,7 +286,7 @@ static void bitmap_file_kick(struct bitmap *bitmap);
>   */
>  static void write_page(struct bitmap *bitmap, struct page *page, int wait)
>  {
> - struct buffer_head *bh;
> + struct buffer_head *bh, *head;
>  
>   if (bitmap->storage.file == NULL) {
>   switch (write_sb_page(bitmap, page, wait)) {
> @@ -295,15 +295,16 @@ static void write_page(struct bitmap *bitmap, struct 
> page *page, int wait)
>   }
>   } else {
>  
> - bh = page_buffers(page);
> + bh = head = page_buffers(page);
>  
> - while (bh && bh->b_blocknr) {
> - atomic_inc(>pending_writes);
> - set_buffer_locked(bh);
> - set_buffer_mapped(bh);
> - submit_bh(REQ_OP_WRITE, REQ_SYNC, bh);
> - bh = bh->b_this_page;
> - }
> + do {
> + if (bh && bh->b_blocknr) {
> + atomic_inc(>pending_writes);
> + set_buffer_locked(bh);
> + set_buffer_mapped(bh);
> + submit_bh(REQ_OP_WRITE, REQ_SYNC, bh);
> + }
> + } while ((bh = bh->b_this_page) != head);
>  
>   if (wait)
>   wait_event(bitmap->write_wait,
> @@ -333,17 +334,18 @@ __clear_page_buffers(struct page *page)
>  }
>  static void free_buffers(struct page *page)
>  {
> - struct buffer_head *bh;
> + struct buffer_head *bh, *head;
>  
>   if (!PagePrivate(page))
>   return;
>  
> - bh = page_buffers(page);
> - while (bh) {
> + bh = head = page_buffers(page);
> + do {
>   struct buffer_head *next = bh->b_this_page;
>   free_buffer_head(bh);
>   bh = next;
> - }
> + } while (bh != head);
> +
>   __clear_page_buffers(page);
>   put_page(page);
>  }
> @@ -362,20 +364,20 @@ static int read_page(struct file *file, unsigned long 
> index,
>  {
>   int ret = 0;
>   struct inode *inode = file_inode(file);
> - struct buffer_head *bh;
> + struct buffer_head *bh, *head;
>   sector_t block;
>  
>   pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE,
>(unsigned long long)index << PAGE_SHIFT);
>  
> - bh = alloc_page_buffers(page, 1<i_blkbits, false);
> + bh = head = alloc_page_buffers(page, 1<i_blkbits, false);
>   if (!bh) {
>   ret = -ENOMEM;
>   goto out;
>   }
>   attach_page_buffers(page, bh);
>   block = index << (PAGE_SHIFT - inode->i_blkbits);
> - while (bh) {
> + do {
>   if (count == 0)
>   bh->b_blocknr = 0;
>   else {
> @@ -400,7 +402,7 @@ static int read_page(struct file *file, unsigned long 
> index,
>   }
>   block++;
>   bh = bh->b_this_page;
> - }
> + } while (bh != head);
>   page->index = index;
>  
>   wait_event(bitmap->write_wait,
> -- 
> 2.6.2
>
Before
sean@linux-zmni:~/sda5/source/linus_repo/linux> size fs/buffer.o
   textdata bss dec hex filename
  336931466  16   351758967 fs/buffer.o
sean@linux-zmni:~/sda5/source/linus_repo/linux> size drivers/md/md-bitmap.o
   textdata bss dec hex filename
  281492168   0   30317766d drivers/md/md-bitmap.o
sean@linux-zmni:~/sda5/source/linus_repo/linux> size fs/ntfs/mft.o 
   textdata bss dec hex filename
   2133  36   02169 879 fs/ntfs/mft.o
sean@linux-zmni:~/sda5/source/linus_repo/linux> size fs/ntfs/aops.o
   textdata bss dec hex filename
   6125 168   062931895 fs/ntfs/aops.o
sean@linux-zmni:~/sda5/source/linus_repo/linux> size vmlinux
   textdata bss dec hex filename
114802605730762 1646084 1885710611fbc92 vmlinux
sean@linux-zmni:~/sda5/source/linus_repo/linux> size ./arch/x86/boot/bzImage
size: ./arch/x86/boot/bzI

Re: [PATCH 3/3] md: bitmap: Support circular buffer list.

2018-01-01 Thread Sean Fu
On Tue, Jan 02, 2018 at 02:54:49PM +0800, Sean Fu wrote:
> Modify write_page free_buffers and read_page to support circular buffer
> list.
> 
> Signed-off-by: Sean Fu 
> ---
>  drivers/md/md-bitmap.c | 36 +++-
>  1 file changed, 19 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
> index 239c7bb..b8412c2 100644
> --- a/drivers/md/md-bitmap.c
> +++ b/drivers/md/md-bitmap.c
> @@ -286,7 +286,7 @@ static void bitmap_file_kick(struct bitmap *bitmap);
>   */
>  static void write_page(struct bitmap *bitmap, struct page *page, int wait)
>  {
> - struct buffer_head *bh;
> + struct buffer_head *bh, *head;
>  
>   if (bitmap->storage.file == NULL) {
>   switch (write_sb_page(bitmap, page, wait)) {
> @@ -295,15 +295,16 @@ static void write_page(struct bitmap *bitmap, struct 
> page *page, int wait)
>   }
>   } else {
>  
> - bh = page_buffers(page);
> + bh = head = page_buffers(page);
>  
> - while (bh && bh->b_blocknr) {
> - atomic_inc(>pending_writes);
> - set_buffer_locked(bh);
> - set_buffer_mapped(bh);
> - submit_bh(REQ_OP_WRITE, REQ_SYNC, bh);
> - bh = bh->b_this_page;
> - }
> + do {
> + if (bh && bh->b_blocknr) {
> + atomic_inc(>pending_writes);
> + set_buffer_locked(bh);
> + set_buffer_mapped(bh);
> + submit_bh(REQ_OP_WRITE, REQ_SYNC, bh);
> + }
> + } while ((bh = bh->b_this_page) != head);
>  
>   if (wait)
>   wait_event(bitmap->write_wait,
> @@ -333,17 +334,18 @@ __clear_page_buffers(struct page *page)
>  }
>  static void free_buffers(struct page *page)
>  {
> - struct buffer_head *bh;
> + struct buffer_head *bh, *head;
>  
>   if (!PagePrivate(page))
>   return;
>  
> - bh = page_buffers(page);
> - while (bh) {
> + bh = head = page_buffers(page);
> + do {
>   struct buffer_head *next = bh->b_this_page;
>   free_buffer_head(bh);
>   bh = next;
> - }
> + } while (bh != head);
> +
>   __clear_page_buffers(page);
>   put_page(page);
>  }
> @@ -362,20 +364,20 @@ static int read_page(struct file *file, unsigned long 
> index,
>  {
>   int ret = 0;
>   struct inode *inode = file_inode(file);
> - struct buffer_head *bh;
> + struct buffer_head *bh, *head;
>   sector_t block;
>  
>   pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE,
>(unsigned long long)index << PAGE_SHIFT);
>  
> - bh = alloc_page_buffers(page, 1<i_blkbits, false);
> + bh = head = alloc_page_buffers(page, 1<i_blkbits, false);
>   if (!bh) {
>   ret = -ENOMEM;
>   goto out;
>   }
>   attach_page_buffers(page, bh);
>   block = index << (PAGE_SHIFT - inode->i_blkbits);
> - while (bh) {
> + do {
>   if (count == 0)
>   bh->b_blocknr = 0;
>   else {
> @@ -400,7 +402,7 @@ static int read_page(struct file *file, unsigned long 
> index,
>   }
>   block++;
>   bh = bh->b_this_page;
> - }
> + } while (bh != head);
>   page->index = index;
>  
>   wait_event(bitmap->write_wait,
> -- 
> 2.6.2
>
Before
sean@linux-zmni:~/sda5/source/linus_repo/linux> size fs/buffer.o
   textdata bss dec hex filename
  336931466  16   351758967 fs/buffer.o
sean@linux-zmni:~/sda5/source/linus_repo/linux> size drivers/md/md-bitmap.o
   textdata bss dec hex filename
  281492168   0   30317766d drivers/md/md-bitmap.o
sean@linux-zmni:~/sda5/source/linus_repo/linux> size fs/ntfs/mft.o 
   textdata bss dec hex filename
   2133  36   02169 879 fs/ntfs/mft.o
sean@linux-zmni:~/sda5/source/linus_repo/linux> size fs/ntfs/aops.o
   textdata bss dec hex filename
   6125 168   062931895 fs/ntfs/aops.o
sean@linux-zmni:~/sda5/source/linus_repo/linux> size vmlinux
   textdata bss dec hex filename
114802605730762 1646084 1885710611fbc92 vmlinux
sean@linux-zmni:~/sda5/source/linus_repo/linux> size ./arch/x86/boot/bzImage
size: ./arch/x86/boot/bzImage: Warning: Ignoring secti

[PATCH 3/3] md: bitmap: Support circular buffer list.

2018-01-01 Thread Sean Fu
Modify write_page free_buffers and read_page to support circular buffer
list.

Signed-off-by: Sean Fu <fxinr...@gmail.com>
---
 drivers/md/md-bitmap.c | 36 +++-
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index 239c7bb..b8412c2 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -286,7 +286,7 @@ static void bitmap_file_kick(struct bitmap *bitmap);
  */
 static void write_page(struct bitmap *bitmap, struct page *page, int wait)
 {
-   struct buffer_head *bh;
+   struct buffer_head *bh, *head;
 
if (bitmap->storage.file == NULL) {
switch (write_sb_page(bitmap, page, wait)) {
@@ -295,15 +295,16 @@ static void write_page(struct bitmap *bitmap, struct page 
*page, int wait)
}
} else {
 
-   bh = page_buffers(page);
+   bh = head = page_buffers(page);
 
-   while (bh && bh->b_blocknr) {
-   atomic_inc(>pending_writes);
-   set_buffer_locked(bh);
-   set_buffer_mapped(bh);
-   submit_bh(REQ_OP_WRITE, REQ_SYNC, bh);
-   bh = bh->b_this_page;
-   }
+   do {
+   if (bh && bh->b_blocknr) {
+   atomic_inc(>pending_writes);
+   set_buffer_locked(bh);
+   set_buffer_mapped(bh);
+   submit_bh(REQ_OP_WRITE, REQ_SYNC, bh);
+   }
+   } while ((bh = bh->b_this_page) != head);
 
if (wait)
wait_event(bitmap->write_wait,
@@ -333,17 +334,18 @@ __clear_page_buffers(struct page *page)
 }
 static void free_buffers(struct page *page)
 {
-   struct buffer_head *bh;
+   struct buffer_head *bh, *head;
 
if (!PagePrivate(page))
return;
 
-   bh = page_buffers(page);
-   while (bh) {
+   bh = head = page_buffers(page);
+   do {
struct buffer_head *next = bh->b_this_page;
free_buffer_head(bh);
bh = next;
-   }
+   } while (bh != head);
+
__clear_page_buffers(page);
put_page(page);
 }
@@ -362,20 +364,20 @@ static int read_page(struct file *file, unsigned long 
index,
 {
int ret = 0;
struct inode *inode = file_inode(file);
-   struct buffer_head *bh;
+   struct buffer_head *bh, *head;
sector_t block;
 
pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE,
 (unsigned long long)index << PAGE_SHIFT);
 
-   bh = alloc_page_buffers(page, 1<i_blkbits, false);
+   bh = head = alloc_page_buffers(page, 1<i_blkbits, false);
if (!bh) {
ret = -ENOMEM;
goto out;
}
attach_page_buffers(page, bh);
block = index << (PAGE_SHIFT - inode->i_blkbits);
-   while (bh) {
+   do {
if (count == 0)
bh->b_blocknr = 0;
else {
@@ -400,7 +402,7 @@ static int read_page(struct file *file, unsigned long index,
}
block++;
bh = bh->b_this_page;
-   }
+   } while (bh != head);
page->index = index;
 
wait_event(bitmap->write_wait,
-- 
2.6.2



[PATCH 3/3] md: bitmap: Support circular buffer list.

2018-01-01 Thread Sean Fu
Modify write_page free_buffers and read_page to support circular buffer
list.

Signed-off-by: Sean Fu 
---
 drivers/md/md-bitmap.c | 36 +++-
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index 239c7bb..b8412c2 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -286,7 +286,7 @@ static void bitmap_file_kick(struct bitmap *bitmap);
  */
 static void write_page(struct bitmap *bitmap, struct page *page, int wait)
 {
-   struct buffer_head *bh;
+   struct buffer_head *bh, *head;
 
if (bitmap->storage.file == NULL) {
switch (write_sb_page(bitmap, page, wait)) {
@@ -295,15 +295,16 @@ static void write_page(struct bitmap *bitmap, struct page 
*page, int wait)
}
} else {
 
-   bh = page_buffers(page);
+   bh = head = page_buffers(page);
 
-   while (bh && bh->b_blocknr) {
-   atomic_inc(>pending_writes);
-   set_buffer_locked(bh);
-   set_buffer_mapped(bh);
-   submit_bh(REQ_OP_WRITE, REQ_SYNC, bh);
-   bh = bh->b_this_page;
-   }
+   do {
+   if (bh && bh->b_blocknr) {
+   atomic_inc(>pending_writes);
+   set_buffer_locked(bh);
+   set_buffer_mapped(bh);
+   submit_bh(REQ_OP_WRITE, REQ_SYNC, bh);
+   }
+   } while ((bh = bh->b_this_page) != head);
 
if (wait)
wait_event(bitmap->write_wait,
@@ -333,17 +334,18 @@ __clear_page_buffers(struct page *page)
 }
 static void free_buffers(struct page *page)
 {
-   struct buffer_head *bh;
+   struct buffer_head *bh, *head;
 
if (!PagePrivate(page))
return;
 
-   bh = page_buffers(page);
-   while (bh) {
+   bh = head = page_buffers(page);
+   do {
struct buffer_head *next = bh->b_this_page;
free_buffer_head(bh);
bh = next;
-   }
+   } while (bh != head);
+
__clear_page_buffers(page);
put_page(page);
 }
@@ -362,20 +364,20 @@ static int read_page(struct file *file, unsigned long 
index,
 {
int ret = 0;
struct inode *inode = file_inode(file);
-   struct buffer_head *bh;
+   struct buffer_head *bh, *head;
sector_t block;
 
pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE,
 (unsigned long long)index << PAGE_SHIFT);
 
-   bh = alloc_page_buffers(page, 1<i_blkbits, false);
+   bh = head = alloc_page_buffers(page, 1<i_blkbits, false);
if (!bh) {
ret = -ENOMEM;
goto out;
}
attach_page_buffers(page, bh);
block = index << (PAGE_SHIFT - inode->i_blkbits);
-   while (bh) {
+   do {
if (count == 0)
bh->b_blocknr = 0;
else {
@@ -400,7 +402,7 @@ static int read_page(struct file *file, unsigned long index,
}
block++;
bh = bh->b_this_page;
-   }
+   } while (bh != head);
page->index = index;
 
wait_event(bitmap->write_wait,
-- 
2.6.2



[PATCH 0/3] Create circular buffer list for every page instead of linear list.

2018-01-01 Thread Sean Fu
1.Create circular buffer list in alloc_page_buffers.
Remove unnecessary traversal in link_dev_buffers to create circular buffer list.
Make nobh_write_begin and nobh_write_end to support circular buffer list.

2.fs/ntfs: Make ntfs to support circular buffer list.

3.md: bitmap: Support circular buffer list.
Modify write_page free_buffers and read_page to support circular buffer list.

Sean Fu (3):
  fs: buffer: Create circular buffer list for pages.
  fs/ntfs: Make ntfs to support circular buffer list.
  md: bitmap: Support circular buffer list.

 drivers/md/md-bitmap.c | 36 +++-
 fs/buffer.c| 48 +---
 fs/ntfs/aops.c |  6 ++
 fs/ntfs/mft.c  |  4 
 4 files changed, 42 insertions(+), 52 deletions(-)

-- 
2.6.2



[PATCH 0/3] Create circular buffer list for every page instead of linear list.

2018-01-01 Thread Sean Fu
1.Create circular buffer list in alloc_page_buffers.
Remove unnecessary traversal in link_dev_buffers to create circular buffer list.
Make nobh_write_begin and nobh_write_end to support circular buffer list.

2.fs/ntfs: Make ntfs to support circular buffer list.

3.md: bitmap: Support circular buffer list.
Modify write_page free_buffers and read_page to support circular buffer list.

Sean Fu (3):
  fs: buffer: Create circular buffer list for pages.
  fs/ntfs: Make ntfs to support circular buffer list.
  md: bitmap: Support circular buffer list.

 drivers/md/md-bitmap.c | 36 +++-
 fs/buffer.c| 48 +---
 fs/ntfs/aops.c |  6 ++
 fs/ntfs/mft.c  |  4 
 4 files changed, 42 insertions(+), 52 deletions(-)

-- 
2.6.2



[PATCH 2/3] fs/ntfs: Make ntfs to support circular buffer list.

2018-01-01 Thread Sean Fu
Modify mark_ntfs_record_dirty to support circular buffer list.
alloc_page_buffers created circular buffer list. So the circular list
linking in ntfs_sync_mft_mirror is unnecessary.

Signed-off-by: Sean Fu <fxinr...@gmail.com>
---
 fs/ntfs/aops.c | 6 ++
 fs/ntfs/mft.c  | 4 
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 3a2e509..4e69577 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1746,10 +1746,8 @@ void mark_ntfs_record_dirty(struct page *page, const 
unsigned int ofs) {
 
do {
set_buffer_uptodate(bh);
-   tail = bh;
bh = bh->b_this_page;
-   } while (bh);
-   tail->b_this_page = head;
+   } while (bh != head);
attach_page_buffers(page, head);
} else
buffers_to_free = bh;
@@ -1771,7 +1769,7 @@ void mark_ntfs_record_dirty(struct page *page, const 
unsigned int ofs) {
bh = buffers_to_free->b_this_page;
free_buffer_head(buffers_to_free);
buffers_to_free = bh;
-   } while (buffers_to_free);
+   } while (buffers_to_free != head);
}
 }
 
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index ee8392a..26ba4f6 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -505,15 +505,11 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned 
long mft_no,
memcpy(kmirr, m, vol->mft_record_size);
/* Create uptodate buffers if not present. */
if (unlikely(!page_has_buffers(page))) {
-   struct buffer_head *tail;
-
bh = head = alloc_page_buffers(page, blocksize, true);
do {
set_buffer_uptodate(bh);
-   tail = bh;
bh = bh->b_this_page;
} while (bh);
-   tail->b_this_page = head;
attach_page_buffers(page, head);
}
bh = head = page_buffers(page);
-- 
2.6.2



[PATCH 1/3] fs: buffer: Create circular buffer list for pages.

2018-01-01 Thread Sean Fu
Make alloc_page_buffers to create circular buffer list instead linear
list.
Remove unnecessary traversal in link_dev_buffers to create circular
buffer list.
Make nobh_write_begin and nobh_write_end to support circular buffer list
traversal.

Signed-off-by: Sean Fu <fxinr...@gmail.com>
---
 fs/buffer.c | 48 +---
 1 file changed, 21 insertions(+), 27 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 0736a6a..7e62c75 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -842,29 +842,36 @@ int remove_inode_buffers(struct inode *inode)
 struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
bool retry)
 {
-   struct buffer_head *bh, *head;
+   struct buffer_head *bh, *head, *tail;
gfp_t gfp = GFP_NOFS;
long offset;
 
if (retry)
gfp |= __GFP_NOFAIL;
 
-   head = NULL;
+   head = tail = NULL;
offset = PAGE_SIZE;
while ((offset -= size) >= 0) {
bh = alloc_buffer_head(gfp);
if (!bh)
goto no_grow;
 
-   bh->b_this_page = head;
+   if (unlikely(!head))
+   tail = bh;
+   else
+   bh->b_this_page = head;
+
bh->b_blocknr = -1;
head = bh;
-
bh->b_size = size;
 
/* Link the buffer to its page */
set_bh_page(bh, page, offset);
}
+
+   if (tail)
+   tail->b_this_page = head;
+
return head;
 /*
  * In case anything failed, we just free everything we got.
@@ -882,20 +889,6 @@ struct buffer_head *alloc_page_buffers(struct page *page, 
unsigned long size,
 }
 EXPORT_SYMBOL_GPL(alloc_page_buffers);
 
-static inline void
-link_dev_buffers(struct page *page, struct buffer_head *head)
-{
-   struct buffer_head *bh, *tail;
-
-   bh = head;
-   do {
-   tail = bh;
-   bh = bh->b_this_page;
-   } while (bh);
-   tail->b_this_page = head;
-   attach_page_buffers(page, head);
-}
-
 static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
 {
sector_t retval = ~((sector_t)0);
@@ -993,7 +986,7 @@ grow_dev_page(struct block_device *bdev, sector_t block,
 * run under the page lock.
 */
spin_lock(>i_mapping->private_lock);
-   link_dev_buffers(page, bh);
+   attach_page_buffers(page, bh);
end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits,
size);
spin_unlock(>i_mapping->private_lock);
@@ -1533,16 +1526,14 @@ EXPORT_SYMBOL(block_invalidatepage);
 void create_empty_buffers(struct page *page,
unsigned long blocksize, unsigned long b_state)
 {
-   struct buffer_head *bh, *head, *tail;
+   struct buffer_head *bh, *head;
 
head = alloc_page_buffers(page, blocksize, true);
bh = head;
do {
bh->b_state |= b_state;
-   tail = bh;
bh = bh->b_this_page;
-   } while (bh);
-   tail->b_this_page = head;
+   } while (bh != head);
 
spin_lock(>mapping->private_lock);
if (PageUptodate(page) || PageDirty(page)) {
@@ -2655,11 +2646,14 @@ int nobh_write_begin(struct address_space *mapping,
 * any VM or truncate activity.  Hence we don't need to care
 * for the buffer_head refcounts.
 */
-   for (bh = head; bh; bh = bh->b_this_page) {
+   bh = head;
+   do {
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
ret = -EIO;
-   }
+   bh = bh->b_this_page;
+   } while (bh != head);
+
if (ret)
goto failed;
}
@@ -2717,11 +2711,11 @@ int nobh_write_end(struct file *file, struct 
address_space *mapping,
unlock_page(page);
put_page(page);
 
-   while (head) {
+   do {
bh = head;
head = head->b_this_page;
free_buffer_head(bh);
-   }
+   } while (head != fsdata);
 
return copied;
 }
-- 
2.6.2



[PATCH 2/3] fs/ntfs: Make ntfs to support circular buffer list.

2018-01-01 Thread Sean Fu
Modify mark_ntfs_record_dirty to support circular buffer list.
alloc_page_buffers created circular buffer list. So the circular list
linking in ntfs_sync_mft_mirror is unnecessary.

Signed-off-by: Sean Fu 
---
 fs/ntfs/aops.c | 6 ++
 fs/ntfs/mft.c  | 4 
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 3a2e509..4e69577 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1746,10 +1746,8 @@ void mark_ntfs_record_dirty(struct page *page, const 
unsigned int ofs) {
 
do {
set_buffer_uptodate(bh);
-   tail = bh;
bh = bh->b_this_page;
-   } while (bh);
-   tail->b_this_page = head;
+   } while (bh != head);
attach_page_buffers(page, head);
} else
buffers_to_free = bh;
@@ -1771,7 +1769,7 @@ void mark_ntfs_record_dirty(struct page *page, const 
unsigned int ofs) {
bh = buffers_to_free->b_this_page;
free_buffer_head(buffers_to_free);
buffers_to_free = bh;
-   } while (buffers_to_free);
+   } while (buffers_to_free != head);
}
 }
 
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index ee8392a..26ba4f6 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -505,15 +505,11 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned 
long mft_no,
memcpy(kmirr, m, vol->mft_record_size);
/* Create uptodate buffers if not present. */
if (unlikely(!page_has_buffers(page))) {
-   struct buffer_head *tail;
-
bh = head = alloc_page_buffers(page, blocksize, true);
do {
set_buffer_uptodate(bh);
-   tail = bh;
bh = bh->b_this_page;
} while (bh);
-   tail->b_this_page = head;
attach_page_buffers(page, head);
}
bh = head = page_buffers(page);
-- 
2.6.2



[PATCH 1/3] fs: buffer: Create circular buffer list for pages.

2018-01-01 Thread Sean Fu
Make alloc_page_buffers to create circular buffer list instead linear
list.
Remove unnecessary traversal in link_dev_buffers to create circular
buffer list.
Make nobh_write_begin and nobh_write_end to support circular buffer list
traversal.

Signed-off-by: Sean Fu 
---
 fs/buffer.c | 48 +---
 1 file changed, 21 insertions(+), 27 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 0736a6a..7e62c75 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -842,29 +842,36 @@ int remove_inode_buffers(struct inode *inode)
 struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
bool retry)
 {
-   struct buffer_head *bh, *head;
+   struct buffer_head *bh, *head, *tail;
gfp_t gfp = GFP_NOFS;
long offset;
 
if (retry)
gfp |= __GFP_NOFAIL;
 
-   head = NULL;
+   head = tail = NULL;
offset = PAGE_SIZE;
while ((offset -= size) >= 0) {
bh = alloc_buffer_head(gfp);
if (!bh)
goto no_grow;
 
-   bh->b_this_page = head;
+   if (unlikely(!head))
+   tail = bh;
+   else
+   bh->b_this_page = head;
+
bh->b_blocknr = -1;
head = bh;
-
bh->b_size = size;
 
/* Link the buffer to its page */
set_bh_page(bh, page, offset);
}
+
+   if (tail)
+   tail->b_this_page = head;
+
return head;
 /*
  * In case anything failed, we just free everything we got.
@@ -882,20 +889,6 @@ struct buffer_head *alloc_page_buffers(struct page *page, 
unsigned long size,
 }
 EXPORT_SYMBOL_GPL(alloc_page_buffers);
 
-static inline void
-link_dev_buffers(struct page *page, struct buffer_head *head)
-{
-   struct buffer_head *bh, *tail;
-
-   bh = head;
-   do {
-   tail = bh;
-   bh = bh->b_this_page;
-   } while (bh);
-   tail->b_this_page = head;
-   attach_page_buffers(page, head);
-}
-
 static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
 {
sector_t retval = ~((sector_t)0);
@@ -993,7 +986,7 @@ grow_dev_page(struct block_device *bdev, sector_t block,
 * run under the page lock.
 */
spin_lock(>i_mapping->private_lock);
-   link_dev_buffers(page, bh);
+   attach_page_buffers(page, bh);
end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits,
size);
spin_unlock(>i_mapping->private_lock);
@@ -1533,16 +1526,14 @@ EXPORT_SYMBOL(block_invalidatepage);
 void create_empty_buffers(struct page *page,
unsigned long blocksize, unsigned long b_state)
 {
-   struct buffer_head *bh, *head, *tail;
+   struct buffer_head *bh, *head;
 
head = alloc_page_buffers(page, blocksize, true);
bh = head;
do {
bh->b_state |= b_state;
-   tail = bh;
bh = bh->b_this_page;
-   } while (bh);
-   tail->b_this_page = head;
+   } while (bh != head);
 
spin_lock(>mapping->private_lock);
if (PageUptodate(page) || PageDirty(page)) {
@@ -2655,11 +2646,14 @@ int nobh_write_begin(struct address_space *mapping,
 * any VM or truncate activity.  Hence we don't need to care
 * for the buffer_head refcounts.
 */
-   for (bh = head; bh; bh = bh->b_this_page) {
+   bh = head;
+   do {
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
ret = -EIO;
-   }
+   bh = bh->b_this_page;
+   } while (bh != head);
+
if (ret)
goto failed;
}
@@ -2717,11 +2711,11 @@ int nobh_write_end(struct file *file, struct 
address_space *mapping,
unlock_page(page);
put_page(page);
 
-   while (head) {
+   do {
bh = head;
head = head->b_this_page;
free_buffer_head(bh);
-   }
+   } while (head != fsdata);
 
return copied;
 }
-- 
2.6.2



[PATCH] ext4: Remove repeated test in ext4_file_read_iter.

2017-12-27 Thread Sean Fu
generic_file_read_iter has done the count test.
So ext4_file_read_iter don't need to test the count repeatedly.

Signed-off-by: Sean Fu <fxinr...@gmail.com>
---
 fs/ext4/file.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index a0ae27b..87ca13e 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -67,9 +67,6 @@ static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct 
iov_iter *to)
if 
(unlikely(ext4_forced_shutdown(EXT4_SB(file_inode(iocb->ki_filp)->i_sb
return -EIO;
 
-   if (!iov_iter_count(to))
-   return 0; /* skip atime */
-
 #ifdef CONFIG_FS_DAX
if (IS_DAX(file_inode(iocb->ki_filp)))
return ext4_dax_read_iter(iocb, to);
-- 
2.6.2



[PATCH] ext4: Remove repeated test in ext4_file_read_iter.

2017-12-27 Thread Sean Fu
generic_file_read_iter has done the count test.
So ext4_file_read_iter don't need to test the count repeatedly.

Signed-off-by: Sean Fu 
---
 fs/ext4/file.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index a0ae27b..87ca13e 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -67,9 +67,6 @@ static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct 
iov_iter *to)
if 
(unlikely(ext4_forced_shutdown(EXT4_SB(file_inode(iocb->ki_filp)->i_sb
return -EIO;
 
-   if (!iov_iter_count(to))
-   return 0; /* skip atime */
-
 #ifdef CONFIG_FS_DAX
if (IS_DAX(file_inode(iocb->ki_filp)))
return ext4_dax_read_iter(iocb, to);
-- 
2.6.2



[PATCH] fs: buffer: Remove unnecessary initialisation for bh->b_state.

2017-12-20 Thread Sean Fu
The memory is allocated by kmem_cache_zalloc and initialized with zero.
The bh->b_state initialisation is unnecessary in nobh_write_begin.

Signed-off-by: Sean Fu <fxinr...@gmail.com>
---
 fs/buffer.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 0736a6a..e9a1861 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2618,7 +2618,6 @@ int nobh_write_begin(struct address_space *mapping,
int create;
 
block_end = block_start + blocksize;
-   bh->b_state = 0;
create = 1;
if (block_start >= to)
create = 0;
-- 
2.6.2



[PATCH] fs: buffer: Remove unnecessary initialisation for bh->b_state.

2017-12-20 Thread Sean Fu
The memory is allocated by kmem_cache_zalloc and initialized with zero.
The bh->b_state initialisation is unnecessary in nobh_write_begin.

Signed-off-by: Sean Fu 
---
 fs/buffer.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 0736a6a..e9a1861 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2618,7 +2618,6 @@ int nobh_write_begin(struct address_space *mapping,
int create;
 
block_end = block_start + blocksize;
-   bh->b_state = 0;
create = 1;
if (block_start >= to)
create = 0;
-- 
2.6.2



[tip:x86/urgent] x86/sysfs: Fix off-by-one error in loop termination

2017-09-25 Thread tip-bot for Sean Fu
Commit-ID:  7d7099433d9eaaa5a989a55f1fa354c16a3ad297
Gitweb: http://git.kernel.org/tip/7d7099433d9eaaa5a989a55f1fa354c16a3ad297
Author: Sean Fu <fxinr...@gmail.com>
AuthorDate: Mon, 11 Sep 2017 08:33:21 +0800
Committer:  Thomas Gleixner <t...@linutronix.de>
CommitDate: Mon, 25 Sep 2017 09:36:16 +0200

x86/sysfs: Fix off-by-one error in loop termination

An off-by-one error in loop terminantion conditions in
create_setup_data_nodes() will lead to memory leak when
create_setup_data_node() failed.

Signed-off-by: Sean Fu <fxinr...@gmail.com>
Signed-off-by: Thomas Gleixner <t...@linutronix.de>
Link: 
http://lkml.kernel.org/r/1505090001-1157-1-git-send-email-fxinr...@gmail.com

---
 arch/x86/kernel/ksysfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c
index 4b0592c..8c1cc08 100644
--- a/arch/x86/kernel/ksysfs.c
+++ b/arch/x86/kernel/ksysfs.c
@@ -299,7 +299,7 @@ static int __init create_setup_data_nodes(struct kobject 
*parent)
return 0;
 
 out_clean_nodes:
-   for (j = i - 1; j > 0; j--)
+   for (j = i - 1; j >= 0; j--)
cleanup_setup_data_node(*(kobjp + j));
kfree(kobjp);
 out_setup_data_kobj:


[tip:x86/urgent] x86/sysfs: Fix off-by-one error in loop termination

2017-09-25 Thread tip-bot for Sean Fu
Commit-ID:  7d7099433d9eaaa5a989a55f1fa354c16a3ad297
Gitweb: http://git.kernel.org/tip/7d7099433d9eaaa5a989a55f1fa354c16a3ad297
Author: Sean Fu 
AuthorDate: Mon, 11 Sep 2017 08:33:21 +0800
Committer:  Thomas Gleixner 
CommitDate: Mon, 25 Sep 2017 09:36:16 +0200

x86/sysfs: Fix off-by-one error in loop termination

An off-by-one error in loop terminantion conditions in
create_setup_data_nodes() will lead to memory leak when
create_setup_data_node() failed.

Signed-off-by: Sean Fu 
Signed-off-by: Thomas Gleixner 
Link: 
http://lkml.kernel.org/r/1505090001-1157-1-git-send-email-fxinr...@gmail.com

---
 arch/x86/kernel/ksysfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c
index 4b0592c..8c1cc08 100644
--- a/arch/x86/kernel/ksysfs.c
+++ b/arch/x86/kernel/ksysfs.c
@@ -299,7 +299,7 @@ static int __init create_setup_data_nodes(struct kobject 
*parent)
return 0;
 
 out_clean_nodes:
-   for (j = i - 1; j > 0; j--)
+   for (j = i - 1; j >= 0; j--)
cleanup_setup_data_node(*(kobjp + j));
kfree(kobjp);
 out_setup_data_kobj:


Re: [PATCH] x86: Fix off-by-one error in loop termination.

2017-09-10 Thread Sean Fu
On Mon, Sep 11, 2017 at 08:33:21AM +0800, Sean Fu wrote:
> An off-by-one error in loop terminantion conditions in
> create_setup_data_nodes will lead to memory leak when
> create_setup_data_node return error.
> 
> Signed-off-by: Sean Fu <fxinr...@gmail.com>
> ---
>  arch/x86/kernel/ksysfs.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c
> index 4afc67f..cfde6c0 100644
> --- a/arch/x86/kernel/ksysfs.c
> +++ b/arch/x86/kernel/ksysfs.c
> @@ -299,7 +299,7 @@ static int __init create_setup_data_nodes(struct kobject 
> *parent)
>   return 0;
>  
>  out_clean_nodes:
> - for (j = i - 1; j > 0; j--)
> + for (j = i - 1; j >= 0; j--)
>   cleanup_setup_data_node(*(kobjp + j));
>   kfree(kobjp);
>  out_setup_data_kobj:
> -- 
> 2.6.2
>
Appologize for the wrong subject prefix in previous email.
Resent the patch right now.

Could you please review this patch?

Thanks


Re: [PATCH] x86: Fix off-by-one error in loop termination.

2017-09-10 Thread Sean Fu
On Mon, Sep 11, 2017 at 08:33:21AM +0800, Sean Fu wrote:
> An off-by-one error in loop terminantion conditions in
> create_setup_data_nodes will lead to memory leak when
> create_setup_data_node return error.
> 
> Signed-off-by: Sean Fu 
> ---
>  arch/x86/kernel/ksysfs.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c
> index 4afc67f..cfde6c0 100644
> --- a/arch/x86/kernel/ksysfs.c
> +++ b/arch/x86/kernel/ksysfs.c
> @@ -299,7 +299,7 @@ static int __init create_setup_data_nodes(struct kobject 
> *parent)
>   return 0;
>  
>  out_clean_nodes:
> - for (j = i - 1; j > 0; j--)
> + for (j = i - 1; j >= 0; j--)
>   cleanup_setup_data_node(*(kobjp + j));
>   kfree(kobjp);
>  out_setup_data_kobj:
> -- 
> 2.6.2
>
Appologize for the wrong subject prefix in previous email.
Resent the patch right now.

Could you please review this patch?

Thanks


[PATCH] x86: Fix off-by-one error in loop termination.

2017-09-10 Thread Sean Fu
An off-by-one error in loop terminantion conditions in
create_setup_data_nodes will lead to memory leak when
create_setup_data_node return error.

Signed-off-by: Sean Fu <fxinr...@gmail.com>
---
 arch/x86/kernel/ksysfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c
index 4afc67f..cfde6c0 100644
--- a/arch/x86/kernel/ksysfs.c
+++ b/arch/x86/kernel/ksysfs.c
@@ -299,7 +299,7 @@ static int __init create_setup_data_nodes(struct kobject 
*parent)
return 0;
 
 out_clean_nodes:
-   for (j = i - 1; j > 0; j--)
+   for (j = i - 1; j >= 0; j--)
cleanup_setup_data_node(*(kobjp + j));
kfree(kobjp);
 out_setup_data_kobj:
-- 
2.6.2



[PATCH] x86: Fix off-by-one error in loop termination.

2017-09-10 Thread Sean Fu
An off-by-one error in loop terminantion conditions in
create_setup_data_nodes will lead to memory leak when
create_setup_data_node return error.

Signed-off-by: Sean Fu 
---
 arch/x86/kernel/ksysfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c
index 4afc67f..cfde6c0 100644
--- a/arch/x86/kernel/ksysfs.c
+++ b/arch/x86/kernel/ksysfs.c
@@ -299,7 +299,7 @@ static int __init create_setup_data_nodes(struct kobject 
*parent)
return 0;
 
 out_clean_nodes:
-   for (j = i - 1; j > 0; j--)
+   for (j = i - 1; j >= 0; j--)
cleanup_setup_data_node(*(kobjp + j));
kfree(kobjp);
 out_setup_data_kobj:
-- 
2.6.2



[PATCH 2/2] x86: Fix off-by-one error in loop termination.

2017-09-10 Thread Sean Fu
An off-by-one error in loop terminantion conditions in
create_setup_data_nodes will lead to memory leak when
create_setup_data_node return error.

Signed-off-by: Sean Fu <fxinr...@gmail.com>
---
 arch/x86/kernel/ksysfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c
index 4afc67f..cfde6c0 100644
--- a/arch/x86/kernel/ksysfs.c
+++ b/arch/x86/kernel/ksysfs.c
@@ -299,7 +299,7 @@ static int __init create_setup_data_nodes(struct kobject 
*parent)
return 0;
 
 out_clean_nodes:
-   for (j = i - 1; j > 0; j--)
+   for (j = i - 1; j >= 0; j--)
cleanup_setup_data_node(*(kobjp + j));
kfree(kobjp);
 out_setup_data_kobj:
-- 
2.6.2



[PATCH 2/2] x86: Fix off-by-one error in loop termination.

2017-09-10 Thread Sean Fu
An off-by-one error in loop terminantion conditions in
create_setup_data_nodes will lead to memory leak when
create_setup_data_node return error.

Signed-off-by: Sean Fu 
---
 arch/x86/kernel/ksysfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c
index 4afc67f..cfde6c0 100644
--- a/arch/x86/kernel/ksysfs.c
+++ b/arch/x86/kernel/ksysfs.c
@@ -299,7 +299,7 @@ static int __init create_setup_data_nodes(struct kobject 
*parent)
return 0;
 
 out_clean_nodes:
-   for (j = i - 1; j > 0; j--)
+   for (j = i - 1; j >= 0; j--)
cleanup_setup_data_node(*(kobjp + j));
kfree(kobjp);
 out_setup_data_kobj:
-- 
2.6.2



Re: [PATCH] fs: buffer: Modify alloc_page_buffers.

2017-06-23 Thread Sean Fu
On Mon, Jun 19, 2017 at 05:03:16PM +0100, Al Viro wrote:
> On Mon, Jun 19, 2017 at 09:01:36PM +0800, Sean Fu wrote:
> > Make alloc_page_buffers support circular buffer list and initialise
> > b_state field.
> > Optimize the performance by removing the buffer list traversal to create
> > circular buffer list.
> 
> > -   bh = head = alloc_page_buffers(page, blocksize, 1);
> > +   bh = head = alloc_page_buffers(page, blocksize, 1, 0, 0);
> 
> Frankly, I don't like that change of calling conventions; it's very easy to
> mess the order of arguments when using interfaces like that and it's hell
> to find when trying to debug the resulting mess.
> 
> Do you really get an observable change in performance?  What loads are
> triggering it?
Sorry for my mistake.
Infact, The time of writting a large file depends on saveral other
factors, eg system workload.
Yesterday, I tried to test the time of single calling create_empty_buffers by 
kretprobe
and found that the performance difference is too small to measure it.

before:
[  944.632027] create_empty_buffers returned 878736736 and took 2160 ns
to execute
[  944.632286] create_empty_buffers returned 878962832 and took 451 ns
to execute
[  944.632302] create_empty_buffers returned 878962016 and took 226 ns
to execute
[  944.632728] create_empty_buffers returned 878962832 and took 235 ns
to execute
[  944.633105] create_empty_buffers returned 878962832 and took 167 ns
to execute
[  944.633421] create_empty_buffers returned 878962832 and took 160 ns
to execute

after:
[39209.076519] create_empty_buffers returned 383804768 and took 1666 ns
to execute
[39209.077032] create_empty_buffers returned 383804768 and took 366 ns
to execute
[39209.077120] create_empty_buffers returned 558412336 and took 179 ns
to execute
[39209.077127] create_empty_buffers returned 558413152 and took 148 ns
to execute
[39209.077525] create_empty_buffers returned 558412336 and took 201 ns
to execute
[39209.078255] create_empty_buffers returned 814328768 and took 880 ns
to execute
[39209.078498] create_empty_buffers returned 558412336 and took 564 ns
to execute
[39209.078737] create_empty_buffers returned 558413152 and took 196 ns
to execute

This patch also complicates code.
Please ignore it.
Thanks all of you.


Re: [PATCH] fs: buffer: Modify alloc_page_buffers.

2017-06-23 Thread Sean Fu
On Mon, Jun 19, 2017 at 05:03:16PM +0100, Al Viro wrote:
> On Mon, Jun 19, 2017 at 09:01:36PM +0800, Sean Fu wrote:
> > Make alloc_page_buffers support circular buffer list and initialise
> > b_state field.
> > Optimize the performance by removing the buffer list traversal to create
> > circular buffer list.
> 
> > -   bh = head = alloc_page_buffers(page, blocksize, 1);
> > +   bh = head = alloc_page_buffers(page, blocksize, 1, 0, 0);
> 
> Frankly, I don't like that change of calling conventions; it's very easy to
> mess the order of arguments when using interfaces like that and it's hell
> to find when trying to debug the resulting mess.
> 
> Do you really get an observable change in performance?  What loads are
> triggering it?
Sorry for my mistake.
Infact, The time of writting a large file depends on saveral other
factors, eg system workload.
Yesterday, I tried to test the time of single calling create_empty_buffers by 
kretprobe
and found that the performance difference is too small to measure it.

before:
[  944.632027] create_empty_buffers returned 878736736 and took 2160 ns
to execute
[  944.632286] create_empty_buffers returned 878962832 and took 451 ns
to execute
[  944.632302] create_empty_buffers returned 878962016 and took 226 ns
to execute
[  944.632728] create_empty_buffers returned 878962832 and took 235 ns
to execute
[  944.633105] create_empty_buffers returned 878962832 and took 167 ns
to execute
[  944.633421] create_empty_buffers returned 878962832 and took 160 ns
to execute

after:
[39209.076519] create_empty_buffers returned 383804768 and took 1666 ns
to execute
[39209.077032] create_empty_buffers returned 383804768 and took 366 ns
to execute
[39209.077120] create_empty_buffers returned 558412336 and took 179 ns
to execute
[39209.077127] create_empty_buffers returned 558413152 and took 148 ns
to execute
[39209.077525] create_empty_buffers returned 558412336 and took 201 ns
to execute
[39209.078255] create_empty_buffers returned 814328768 and took 880 ns
to execute
[39209.078498] create_empty_buffers returned 558412336 and took 564 ns
to execute
[39209.078737] create_empty_buffers returned 558413152 and took 196 ns
to execute

This patch also complicates code.
Please ignore it.
Thanks all of you.


Re: [PATCH] fs: buffer: Modify alloc_page_buffers.

2017-06-21 Thread Sean Fu
On Mon, Jun 19, 2017 at 05:03:16PM +0100, Al Viro wrote:
> On Mon, Jun 19, 2017 at 09:01:36PM +0800, Sean Fu wrote:
> > Make alloc_page_buffers support circular buffer list and initialise
> > b_state field.
> > Optimize the performance by removing the buffer list traversal to create
> > circular buffer list.
> 
> > -   bh = head = alloc_page_buffers(page, blocksize, 1);
> > +   bh = head = alloc_page_buffers(page, blocksize, 1, 0, 0);
> 
> Frankly, I don't like that change of calling conventions; it's very easy to
> mess the order of arguments when using interfaces like that and it's hell
> to find when trying to debug the resulting mess.
> 
> Do you really get an observable change in performance?  What loads are
> triggering it?
Yes, I have got the performance change with ext3 file system which block
size is 1024 bytes. It has at least %5 performance improvement.

I have found the performance improvements when writting/reading a 800M
size of file on ext3 file system with 1024 block size.
In this case, Each page has four buffer. In other word, the buffer list
has 4 elements.

I have compared the time that the process spent in kernel mode.

Improvements via this patch
Before  After
Write:
0m5.604s0m4.116s
0m4.408s0m3.924s
0m4.184s0m3.708s
0m4.352s0m3.656s
0m4.380s0m3.608s
0m4.240s0m3.612s
0m4.460s0m3.552s
0m4.072s0m3.832s
0m4.300s0m3.736s
0m4.400s0m3.480s

Read:
0m3.128s0m3.036s
0m2.976s0m2.568s
0m3.384s0m2.544s
0m3.112s0m2.752s
0m2.924s0m2.684s
0m3.084s0m2.856s
0m3.348s0m2.576s
0m3.000s0m2.968s
0m3.012s0m2.560s
0m2.768s0m2.752s

Reproduce steps:
1 mkfs.ext3 -b 1024 /dev/sdb1
2 ./test_write.sh ./writetest 10

Test shell script:
#!/bin/bash
i=$2;
while test $((i)) -ge 1; do
mount /dev/sdb1 /mnt/sdb1/
time $1 -b 800 -o /mnt/sdb1/fileout
rm /mnt/sdb1/fileout
sync
sleep 1
umount /mnt/sdb1/
echo "aaa"
i=$i-1
done


The attachment are the code for writetest and test result.
/*
 *
 *   Copyright (c) CHANG Industry, Inc., 2004
 *
 *   This program is free software;  you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 *   the GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program;  if not, write to the Free Software
 *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/*
 *  FILE: writetest.c
 *  DESCRIPTION : The purpose of this test is to verify that writes to
 *disk occur without corruption.  It writes one 1MB
 *buffer at a time, where each byte in the buffer is
 *generated from a random number.  Once done
 *completed, the file is re-opened, the random number
 *generator is re-seeded, and the file is verified.
 *
 *  HISTORY:
 *   05/12/2004 : Written by Danny Sung <dan...@changind.com> to
 *verify integrity of disk writes.
 *
 */

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

//#include "test.h"

#define BLOCKSIZE (1024*1024)
#define FILE_OUT"fileout"
#define FILE_MODE   0644
#define MAX_FILENAME_LEN 1024

#define tst_resm(prefix, fmt, args...)	\
do {	\
	printf(fmt, ##args);		\
} while (0)

int Verbosity = 0;
int DefaultSeed = 0;
char Filename[MAX_FILENAME_LEN] = FILE_OUT;
off_t NumBlocks = 1;
char *TCID = "writetest";
int TST_TOTAL = 2;

void buf_init(void)
{
	static int seed = 0;
	if (seed == 0)
		seed = DefaultSeed;
	srand(seed);
}

void buf_fill(uint8_t * buf)
{
	int i;
	for (i = 0; i < BLOCKSIZE; i++) {
		*buf = (rand() & 0xff);
		buf++;
	}
}

int write_file(off_t num_blocks, const char *filename)
{
	int fd;
	int ret = 0;
	off_t block;
	uint8_t buf[BLOCKSIZE];

	fd = open(filename, O_RDWR | O_CREAT | O_TRUNC/* | O_LARGEFILE*/,
		  FILE_MODE);
	if (fd < 0) {
		perror(TCID);
		return (-1);
	}
	for (block = 0; block < num_blocks; block++) {
		int rv;
		if (Verbosity > 2)
			tst_resm(TINFO, "Block: %lld/%lld  (%3lld%%)\r",
 (long long int)block,
 (long long int)num_blocks,
 (long long int)(block * 100 / num_blocks));
		buf_fill(buf);
		rv = write(fd, buf, BLOCKSIZE);
		if (rv != BLOCKSIZE) {
			ret = -1;
			break;
		}
	}
	if (Verbosity > 

Re: [PATCH] fs: buffer: Modify alloc_page_buffers.

2017-06-21 Thread Sean Fu
On Mon, Jun 19, 2017 at 05:03:16PM +0100, Al Viro wrote:
> On Mon, Jun 19, 2017 at 09:01:36PM +0800, Sean Fu wrote:
> > Make alloc_page_buffers support circular buffer list and initialise
> > b_state field.
> > Optimize the performance by removing the buffer list traversal to create
> > circular buffer list.
> 
> > -   bh = head = alloc_page_buffers(page, blocksize, 1);
> > +   bh = head = alloc_page_buffers(page, blocksize, 1, 0, 0);
> 
> Frankly, I don't like that change of calling conventions; it's very easy to
> mess the order of arguments when using interfaces like that and it's hell
> to find when trying to debug the resulting mess.
> 
> Do you really get an observable change in performance?  What loads are
> triggering it?
Yes, I have got the performance change with ext3 file system which block
size is 1024 bytes. It has at least %5 performance improvement.

I have found the performance improvements when writting/reading a 800M
size of file on ext3 file system with 1024 block size.
In this case, Each page has four buffer. In other word, the buffer list
has 4 elements.

I have compared the time that the process spent in kernel mode.

Improvements via this patch
Before  After
Write:
0m5.604s0m4.116s
0m4.408s0m3.924s
0m4.184s0m3.708s
0m4.352s0m3.656s
0m4.380s0m3.608s
0m4.240s0m3.612s
0m4.460s0m3.552s
0m4.072s0m3.832s
0m4.300s0m3.736s
0m4.400s0m3.480s

Read:
0m3.128s0m3.036s
0m2.976s0m2.568s
0m3.384s0m2.544s
0m3.112s0m2.752s
0m2.924s0m2.684s
0m3.084s0m2.856s
0m3.348s0m2.576s
0m3.000s0m2.968s
0m3.012s0m2.560s
0m2.768s0m2.752s

Reproduce steps:
1 mkfs.ext3 -b 1024 /dev/sdb1
2 ./test_write.sh ./writetest 10

Test shell script:
#!/bin/bash
i=$2;
while test $((i)) -ge 1; do
mount /dev/sdb1 /mnt/sdb1/
time $1 -b 800 -o /mnt/sdb1/fileout
rm /mnt/sdb1/fileout
sync
sleep 1
umount /mnt/sdb1/
echo "aaa"
i=$i-1
done


The attachment are the code for writetest and test result.
/*
 *
 *   Copyright (c) CHANG Industry, Inc., 2004
 *
 *   This program is free software;  you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 *   the GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program;  if not, write to the Free Software
 *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/*
 *  FILE: writetest.c
 *  DESCRIPTION : The purpose of this test is to verify that writes to
 *disk occur without corruption.  It writes one 1MB
 *buffer at a time, where each byte in the buffer is
 *generated from a random number.  Once done
 *completed, the file is re-opened, the random number
 *generator is re-seeded, and the file is verified.
 *
 *  HISTORY:
 *   05/12/2004 : Written by Danny Sung  to
 *verify integrity of disk writes.
 *
 */

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

//#include "test.h"

#define BLOCKSIZE (1024*1024)
#define FILE_OUT"fileout"
#define FILE_MODE   0644
#define MAX_FILENAME_LEN 1024

#define tst_resm(prefix, fmt, args...)	\
do {	\
	printf(fmt, ##args);		\
} while (0)

int Verbosity = 0;
int DefaultSeed = 0;
char Filename[MAX_FILENAME_LEN] = FILE_OUT;
off_t NumBlocks = 1;
char *TCID = "writetest";
int TST_TOTAL = 2;

void buf_init(void)
{
	static int seed = 0;
	if (seed == 0)
		seed = DefaultSeed;
	srand(seed);
}

void buf_fill(uint8_t * buf)
{
	int i;
	for (i = 0; i < BLOCKSIZE; i++) {
		*buf = (rand() & 0xff);
		buf++;
	}
}

int write_file(off_t num_blocks, const char *filename)
{
	int fd;
	int ret = 0;
	off_t block;
	uint8_t buf[BLOCKSIZE];

	fd = open(filename, O_RDWR | O_CREAT | O_TRUNC/* | O_LARGEFILE*/,
		  FILE_MODE);
	if (fd < 0) {
		perror(TCID);
		return (-1);
	}
	for (block = 0; block < num_blocks; block++) {
		int rv;
		if (Verbosity > 2)
			tst_resm(TINFO, "Block: %lld/%lld  (%3lld%%)\r",
 (long long int)block,
 (long long int)num_blocks,
 (long long int)(block * 100 / num_blocks));
		buf_fill(buf);
		rv = write(fd, buf, BLOCKSIZE);
		if (rv != BLOCKSIZE) {
			ret = -1;
			break;
		}
	}
	if (Verbosity > 2)
		tst_resm(TINFO, "Bl

[PATCH] fs: buffer: Modify alloc_page_buffers.

2017-06-19 Thread Sean Fu
Make alloc_page_buffers support circular buffer list and initialise
b_state field.
Optimize the performance by removing the buffer list traversal to create
circular buffer list.

Signed-off-by: Sean Fu <fxinr...@gmail.com>
---
 drivers/md/bitmap.c |  2 +-
 fs/buffer.c | 37 +++--
 fs/ntfs/aops.c  |  2 +-
 fs/ntfs/mft.c   |  2 +-
 include/linux/buffer_head.h |  2 +-
 5 files changed, 19 insertions(+), 26 deletions(-)

diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index f4eace5..615a46e 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -367,7 +367,7 @@ static int read_page(struct file *file, unsigned long index,
pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE,
 (unsigned long long)index << PAGE_SHIFT);
 
-   bh = alloc_page_buffers(page, 1<i_blkbits, 0);
+   bh = alloc_page_buffers(page, 1<i_blkbits, 0, 0, 0);
if (!bh) {
ret = -ENOMEM;
goto out;
diff --git a/fs/buffer.c b/fs/buffer.c
index 161be58..8111eca 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -864,9 +864,9 @@ int remove_inode_buffers(struct inode *inode)
  * which may not fail from ordinary buffer allocations.
  */
 struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
-   int retry)
+   int retry, int circular, unsigned long b_state)
 {
-   struct buffer_head *bh, *head;
+   struct buffer_head *bh, *head, *tail;
long offset;
 
 try_again:
@@ -879,13 +879,21 @@ struct buffer_head *alloc_page_buffers(struct page *page, 
unsigned long size,
 
bh->b_this_page = head;
bh->b_blocknr = -1;
-   head = bh;
 
+   if (head == NULL)
+   tail = bh;
+
+   head = bh;
+   bh->b_state = b_state;
bh->b_size = size;
 
/* Link the buffer to its page */
set_bh_page(bh, page, offset);
}
+
+   if (circular)
+   tail->b_this_page = head;
+
return head;
 /*
  * In case anything failed, we just free everything we got.
@@ -922,14 +930,6 @@ EXPORT_SYMBOL_GPL(alloc_page_buffers);
 static inline void
 link_dev_buffers(struct page *page, struct buffer_head *head)
 {
-   struct buffer_head *bh, *tail;
-
-   bh = head;
-   do {
-   tail = bh;
-   bh = bh->b_this_page;
-   } while (bh);
-   tail->b_this_page = head;
attach_page_buffers(page, head);
 }
 
@@ -1024,7 +1024,7 @@ grow_dev_page(struct block_device *bdev, sector_t block,
/*
 * Allocate some buffers for this page
 */
-   bh = alloc_page_buffers(page, size, 0);
+   bh = alloc_page_buffers(page, size, 0, 1, 0);
if (!bh)
goto failed;
 
@@ -1578,16 +1578,9 @@ EXPORT_SYMBOL(block_invalidatepage);
 void create_empty_buffers(struct page *page,
unsigned long blocksize, unsigned long b_state)
 {
-   struct buffer_head *bh, *head, *tail;
+   struct buffer_head *bh, *head;
 
-   head = alloc_page_buffers(page, blocksize, 1);
-   bh = head;
-   do {
-   bh->b_state |= b_state;
-   tail = bh;
-   bh = bh->b_this_page;
-   } while (bh);
-   tail->b_this_page = head;
+   head = alloc_page_buffers(page, blocksize, 1, 1, b_state);
 
spin_lock(>mapping->private_lock);
if (PageUptodate(page) || PageDirty(page)) {
@@ -2642,7 +2635,7 @@ int nobh_write_begin(struct address_space *mapping,
 * Be careful: the buffer linked list is a NULL terminated one, rather
 * than the circular one we're used to.
 */
-   head = alloc_page_buffers(page, blocksize, 0);
+   head = alloc_page_buffers(page, blocksize, 0, 0, 0);
if (!head) {
ret = -ENOMEM;
goto out_release;
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index cc91856..e692142 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1739,7 +1739,7 @@ void mark_ntfs_record_dirty(struct page *page, const 
unsigned int ofs) {
spin_lock(>private_lock);
if (unlikely(!page_has_buffers(page))) {
spin_unlock(>private_lock);
-   bh = head = alloc_page_buffers(page, bh_size, 1);
+   bh = head = alloc_page_buffers(page, bh_size, 1, 0, 0);
spin_lock(>private_lock);
if (likely(!page_has_buffers(page))) {
struct buffer_head *tail;
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index b6f4021..175a02b 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -507,7 +507,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned 
long mft_no,
if (unlikely(!page_has_buffers(page))) {
struct buffer_head *tail;
 
-

[PATCH] fs: buffer: Modify alloc_page_buffers.

2017-06-19 Thread Sean Fu
Make alloc_page_buffers support circular buffer list and initialise
b_state field.
Optimize the performance by removing the buffer list traversal to create
circular buffer list.

Signed-off-by: Sean Fu 
---
 drivers/md/bitmap.c |  2 +-
 fs/buffer.c | 37 +++--
 fs/ntfs/aops.c  |  2 +-
 fs/ntfs/mft.c   |  2 +-
 include/linux/buffer_head.h |  2 +-
 5 files changed, 19 insertions(+), 26 deletions(-)

diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index f4eace5..615a46e 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -367,7 +367,7 @@ static int read_page(struct file *file, unsigned long index,
pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE,
 (unsigned long long)index << PAGE_SHIFT);
 
-   bh = alloc_page_buffers(page, 1<i_blkbits, 0);
+   bh = alloc_page_buffers(page, 1<i_blkbits, 0, 0, 0);
if (!bh) {
ret = -ENOMEM;
goto out;
diff --git a/fs/buffer.c b/fs/buffer.c
index 161be58..8111eca 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -864,9 +864,9 @@ int remove_inode_buffers(struct inode *inode)
  * which may not fail from ordinary buffer allocations.
  */
 struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
-   int retry)
+   int retry, int circular, unsigned long b_state)
 {
-   struct buffer_head *bh, *head;
+   struct buffer_head *bh, *head, *tail;
long offset;
 
 try_again:
@@ -879,13 +879,21 @@ struct buffer_head *alloc_page_buffers(struct page *page, 
unsigned long size,
 
bh->b_this_page = head;
bh->b_blocknr = -1;
-   head = bh;
 
+   if (head == NULL)
+   tail = bh;
+
+   head = bh;
+   bh->b_state = b_state;
bh->b_size = size;
 
/* Link the buffer to its page */
set_bh_page(bh, page, offset);
}
+
+   if (circular)
+   tail->b_this_page = head;
+
return head;
 /*
  * In case anything failed, we just free everything we got.
@@ -922,14 +930,6 @@ EXPORT_SYMBOL_GPL(alloc_page_buffers);
 static inline void
 link_dev_buffers(struct page *page, struct buffer_head *head)
 {
-   struct buffer_head *bh, *tail;
-
-   bh = head;
-   do {
-   tail = bh;
-   bh = bh->b_this_page;
-   } while (bh);
-   tail->b_this_page = head;
attach_page_buffers(page, head);
 }
 
@@ -1024,7 +1024,7 @@ grow_dev_page(struct block_device *bdev, sector_t block,
/*
 * Allocate some buffers for this page
 */
-   bh = alloc_page_buffers(page, size, 0);
+   bh = alloc_page_buffers(page, size, 0, 1, 0);
if (!bh)
goto failed;
 
@@ -1578,16 +1578,9 @@ EXPORT_SYMBOL(block_invalidatepage);
 void create_empty_buffers(struct page *page,
unsigned long blocksize, unsigned long b_state)
 {
-   struct buffer_head *bh, *head, *tail;
+   struct buffer_head *bh, *head;
 
-   head = alloc_page_buffers(page, blocksize, 1);
-   bh = head;
-   do {
-   bh->b_state |= b_state;
-   tail = bh;
-   bh = bh->b_this_page;
-   } while (bh);
-   tail->b_this_page = head;
+   head = alloc_page_buffers(page, blocksize, 1, 1, b_state);
 
spin_lock(>mapping->private_lock);
if (PageUptodate(page) || PageDirty(page)) {
@@ -2642,7 +2635,7 @@ int nobh_write_begin(struct address_space *mapping,
 * Be careful: the buffer linked list is a NULL terminated one, rather
 * than the circular one we're used to.
 */
-   head = alloc_page_buffers(page, blocksize, 0);
+   head = alloc_page_buffers(page, blocksize, 0, 0, 0);
if (!head) {
ret = -ENOMEM;
goto out_release;
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index cc91856..e692142 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1739,7 +1739,7 @@ void mark_ntfs_record_dirty(struct page *page, const 
unsigned int ofs) {
spin_lock(>private_lock);
if (unlikely(!page_has_buffers(page))) {
spin_unlock(>private_lock);
-   bh = head = alloc_page_buffers(page, bh_size, 1);
+   bh = head = alloc_page_buffers(page, bh_size, 1, 0, 0);
spin_lock(>private_lock);
if (likely(!page_has_buffers(page))) {
struct buffer_head *tail;
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index b6f4021..175a02b 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -507,7 +507,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned 
long mft_no,
if (unlikely(!page_has_buffers(page))) {
struct buffer_head *tail;
 
-   bh = head 

Re: [PATCH] fs/buffer.c: make bh_lru_install() more efficient

2017-06-03 Thread Sean Fu
On Sat, Mar 25, 2017 at 08:34:30PM -0700, Eric Biggers wrote:
> On Thu, Dec 29, 2016 at 01:34:45PM -0600, Eric Biggers wrote:
> > From: Eric Biggers 
> > 
> > To install a buffer_head into the cpu's LRU queue, bh_lru_install()
> > would construct a new copy of the queue and then memcpy it over the real
> > queue.  But it's easily possible to do the update in-place, which is
> > faster and simpler.  Some work can also be skipped if the buffer_head
> > was already in the queue.
> > 
> > As a microbenchmark I timed how long it takes to run sb_getblk()
> > 10,000,000 times alternating between BH_LRU_SIZE + 1 blocks.
> > Effectively, this benchmarks looking up buffer_heads that are in the
> > page cache but not in the LRU:
> > 
> > Before this patch: 1.758s
> > After this patch: 1.653s
> > 
> > This patch also removes about 350 bytes of compiled code (on x86_64),
> > partly due to removal of the memcpy() which was being inlined+unrolled.
> > 
> > Signed-off-by: Eric Biggers 
> 
> Ping?  Al, do you have any interest in taking this patch?
> 
> - Eric
IMO, It is great patch.
Could you please share your microbenchmark steps?
I wanna take a deep look at it.


Re: [PATCH] fs/buffer.c: make bh_lru_install() more efficient

2017-06-03 Thread Sean Fu
On Sat, Mar 25, 2017 at 08:34:30PM -0700, Eric Biggers wrote:
> On Thu, Dec 29, 2016 at 01:34:45PM -0600, Eric Biggers wrote:
> > From: Eric Biggers 
> > 
> > To install a buffer_head into the cpu's LRU queue, bh_lru_install()
> > would construct a new copy of the queue and then memcpy it over the real
> > queue.  But it's easily possible to do the update in-place, which is
> > faster and simpler.  Some work can also be skipped if the buffer_head
> > was already in the queue.
> > 
> > As a microbenchmark I timed how long it takes to run sb_getblk()
> > 10,000,000 times alternating between BH_LRU_SIZE + 1 blocks.
> > Effectively, this benchmarks looking up buffer_heads that are in the
> > page cache but not in the LRU:
> > 
> > Before this patch: 1.758s
> > After this patch: 1.653s
> > 
> > This patch also removes about 350 bytes of compiled code (on x86_64),
> > partly due to removal of the memcpy() which was being inlined+unrolled.
> > 
> > Signed-off-by: Eric Biggers 
> 
> Ping?  Al, do you have any interest in taking this patch?
> 
> - Eric
IMO, It is great patch.
Could you please share your microbenchmark steps?
I wanna take a deep look at it.


Re: [PATCH] Btrfs: remove unnecessary code of chunk_root assignment in btrfs_read_chunk_tree.

2016-09-10 Thread Sean Fu
On Thu, Sep 08, 2016 at 11:25:48PM -0400, Jeff Mahoney wrote:
> On 9/8/16 11:08 PM, Sean Fu wrote:
> > On Tue, Sep 06, 2016 at 11:12:20AM -0400, Jeff Mahoney wrote:
> >> On 9/6/16 5:58 AM, David Sterba wrote:
> >>> On Mon, Sep 05, 2016 at 11:13:40PM -0400, Jeff Mahoney wrote:
> >>>>>> Since root is only used to get fs_info->chunk_root, why not use fs_info
> >>>>>> directly?
> >>>>>
> >>>>> Weird.  Exactly this was a part of my fs_info patchset.  I guess I need
> >>>>> to go back and check what else is missing.
> >>>>
> >>>> Actually, most of this didn't land.  Pretty much anything that's a root
> >>>> ->fs_info conversion is in there.
> >>>
> >>> Only half of the patchset has been merged so far because it did not pass
> >>> testing, so I bisected to some point. I was about to let you know once
> >>> most of 4.9 patches are prepared so there are less merge conflicts.
> >>
> >> Ok, thanks.  I was going to start the rebase today but I'll hold off
> >> until you're set for 4.9.
> >>
> > Hi Jeff, Could you please share your patch? Where can i get it?
> > I wanna have a look at it.
> 
> Sure, it's the whole series that starts with this commit:
> commit 160ceedfd40085cfb1e08305917fcc24cefdad93
> Author: Jeff Mahoney <je...@suse.com>
> Date:   Wed Aug 31 23:55:33 2016 -0400
> 
> btrfs: add dynamic debug support
> 
> ... I still need to do clean up some commits that need merging.
> 
> https://git.kernel.org/cgit/linux/kernel/git/jeffm/linux-btrfs.git/log/?h=btrfs-testing/kdave/misc-4.9/root-fsinfo-cleanup
> 
Nice work. Thanks for your explaination.
I have one more question about it.
Although the total text size of this function is not changed, using
fs_info should need one more instruction to get chunk_root field of
fs_info than using chunk_root directly.
Does it cause any performance impact?

Sean
> -Jeff
> 
> 
> > Thanks
> >> -Jeff
> >>
> >> -- 
> >> Jeff Mahoney
> >> SUSE Labs
> >>
> > 
> > 
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> > the body of a message to majord...@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > 
> 
> 
> -- 
> Jeff Mahoney
> SUSE Labs
> 





Re: [PATCH] Btrfs: remove unnecessary code of chunk_root assignment in btrfs_read_chunk_tree.

2016-09-10 Thread Sean Fu
On Thu, Sep 08, 2016 at 11:25:48PM -0400, Jeff Mahoney wrote:
> On 9/8/16 11:08 PM, Sean Fu wrote:
> > On Tue, Sep 06, 2016 at 11:12:20AM -0400, Jeff Mahoney wrote:
> >> On 9/6/16 5:58 AM, David Sterba wrote:
> >>> On Mon, Sep 05, 2016 at 11:13:40PM -0400, Jeff Mahoney wrote:
> >>>>>> Since root is only used to get fs_info->chunk_root, why not use fs_info
> >>>>>> directly?
> >>>>>
> >>>>> Weird.  Exactly this was a part of my fs_info patchset.  I guess I need
> >>>>> to go back and check what else is missing.
> >>>>
> >>>> Actually, most of this didn't land.  Pretty much anything that's a root
> >>>> ->fs_info conversion is in there.
> >>>
> >>> Only half of the patchset has been merged so far because it did not pass
> >>> testing, so I bisected to some point. I was about to let you know once
> >>> most of 4.9 patches are prepared so there are less merge conflicts.
> >>
> >> Ok, thanks.  I was going to start the rebase today but I'll hold off
> >> until you're set for 4.9.
> >>
> > Hi Jeff, Could you please share your patch? Where can i get it?
> > I wanna have a look at it.
> 
> Sure, it's the whole series that starts with this commit:
> commit 160ceedfd40085cfb1e08305917fcc24cefdad93
> Author: Jeff Mahoney 
> Date:   Wed Aug 31 23:55:33 2016 -0400
> 
> btrfs: add dynamic debug support
> 
> ... I still need to do clean up some commits that need merging.
> 
> https://git.kernel.org/cgit/linux/kernel/git/jeffm/linux-btrfs.git/log/?h=btrfs-testing/kdave/misc-4.9/root-fsinfo-cleanup
> 
Nice work. Thanks for your explaination.
I have one more question about it.
Although the total text size of this function is not changed, using
fs_info should need one more instruction to get chunk_root field of
fs_info than using chunk_root directly.
Does it cause any performance impact?

Sean
> -Jeff
> 
> 
> > Thanks
> >> -Jeff
> >>
> >> -- 
> >> Jeff Mahoney
> >> SUSE Labs
> >>
> > 
> > 
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> > the body of a message to majord...@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > 
> 
> 
> -- 
> Jeff Mahoney
> SUSE Labs
> 





Re: [PATCH] Btrfs: remove unnecessary code of chunk_root assignment in btrfs_read_chunk_tree.

2016-09-08 Thread Sean Fu
On Thu, Sep 08, 2016 at 11:25:48PM -0400, Jeff Mahoney wrote:
> On 9/8/16 11:08 PM, Sean Fu wrote:
> > On Tue, Sep 06, 2016 at 11:12:20AM -0400, Jeff Mahoney wrote:
> >> On 9/6/16 5:58 AM, David Sterba wrote:
> >>> On Mon, Sep 05, 2016 at 11:13:40PM -0400, Jeff Mahoney wrote:
> >>>>>> Since root is only used to get fs_info->chunk_root, why not use fs_info
> >>>>>> directly?
> >>>>>
> >>>>> Weird.  Exactly this was a part of my fs_info patchset.  I guess I need
> >>>>> to go back and check what else is missing.
> >>>>
> >>>> Actually, most of this didn't land.  Pretty much anything that's a root
> >>>> ->fs_info conversion is in there.
> >>>
> >>> Only half of the patchset has been merged so far because it did not pass
> >>> testing, so I bisected to some point. I was about to let you know once
> >>> most of 4.9 patches are prepared so there are less merge conflicts.
> >>
> >> Ok, thanks.  I was going to start the rebase today but I'll hold off
> >> until you're set for 4.9.
> >>
> > Hi Jeff, Could you please share your patch? Where can i get it?
> > I wanna have a look at it.
> 
> Sure, it's the whole series that starts with this commit:
> commit 160ceedfd40085cfb1e08305917fcc24cefdad93
> Author: Jeff Mahoney <je...@suse.com>
> Date:   Wed Aug 31 23:55:33 2016 -0400
> 
> btrfs: add dynamic debug support
> 
> ... I still need to do clean up some commits that need merging.
> 
> https://git.kernel.org/cgit/linux/kernel/git/jeffm/linux-btrfs.git/log/?h=btrfs-testing/kdave/misc-4.9/root-fsinfo-cleanup
>
Nice work.
Thanks

> -Jeff
> 
> 
> > Thanks
> >> -Jeff
> >>
> >> -- 
> >> Jeff Mahoney
> >> SUSE Labs
> >>
> > 
> > 
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> > the body of a message to majord...@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > 
> 
> 
> -- 
> Jeff Mahoney
> SUSE Labs
> 





Re: [PATCH] Btrfs: remove unnecessary code of chunk_root assignment in btrfs_read_chunk_tree.

2016-09-08 Thread Sean Fu
On Thu, Sep 08, 2016 at 11:25:48PM -0400, Jeff Mahoney wrote:
> On 9/8/16 11:08 PM, Sean Fu wrote:
> > On Tue, Sep 06, 2016 at 11:12:20AM -0400, Jeff Mahoney wrote:
> >> On 9/6/16 5:58 AM, David Sterba wrote:
> >>> On Mon, Sep 05, 2016 at 11:13:40PM -0400, Jeff Mahoney wrote:
> >>>>>> Since root is only used to get fs_info->chunk_root, why not use fs_info
> >>>>>> directly?
> >>>>>
> >>>>> Weird.  Exactly this was a part of my fs_info patchset.  I guess I need
> >>>>> to go back and check what else is missing.
> >>>>
> >>>> Actually, most of this didn't land.  Pretty much anything that's a root
> >>>> ->fs_info conversion is in there.
> >>>
> >>> Only half of the patchset has been merged so far because it did not pass
> >>> testing, so I bisected to some point. I was about to let you know once
> >>> most of 4.9 patches are prepared so there are less merge conflicts.
> >>
> >> Ok, thanks.  I was going to start the rebase today but I'll hold off
> >> until you're set for 4.9.
> >>
> > Hi Jeff, Could you please share your patch? Where can i get it?
> > I wanna have a look at it.
> 
> Sure, it's the whole series that starts with this commit:
> commit 160ceedfd40085cfb1e08305917fcc24cefdad93
> Author: Jeff Mahoney 
> Date:   Wed Aug 31 23:55:33 2016 -0400
> 
> btrfs: add dynamic debug support
> 
> ... I still need to do clean up some commits that need merging.
> 
> https://git.kernel.org/cgit/linux/kernel/git/jeffm/linux-btrfs.git/log/?h=btrfs-testing/kdave/misc-4.9/root-fsinfo-cleanup
>
Nice work.
Thanks

> -Jeff
> 
> 
> > Thanks
> >> -Jeff
> >>
> >> -- 
> >> Jeff Mahoney
> >> SUSE Labs
> >>
> > 
> > 
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> > the body of a message to majord...@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > 
> 
> 
> -- 
> Jeff Mahoney
> SUSE Labs
> 





Re: [PATCH] Btrfs: remove unnecessary code of chunk_root assignment in btrfs_read_chunk_tree.

2016-09-08 Thread Sean Fu
On Tue, Sep 06, 2016 at 11:12:20AM -0400, Jeff Mahoney wrote:
> On 9/6/16 5:58 AM, David Sterba wrote:
> > On Mon, Sep 05, 2016 at 11:13:40PM -0400, Jeff Mahoney wrote:
>  Since root is only used to get fs_info->chunk_root, why not use fs_info
>  directly?
> >>>
> >>> Weird.  Exactly this was a part of my fs_info patchset.  I guess I need
> >>> to go back and check what else is missing.
> >>
> >> Actually, most of this didn't land.  Pretty much anything that's a root
> >> ->fs_info conversion is in there.
> > 
> > Only half of the patchset has been merged so far because it did not pass
> > testing, so I bisected to some point. I was about to let you know once
> > most of 4.9 patches are prepared so there are less merge conflicts.
> 
> Ok, thanks.  I was going to start the rebase today but I'll hold off
> until you're set for 4.9.
> 
Hi Jeff, Could you please share your patch? Where can i get it?
I wanna have a look at it.

Thanks
> -Jeff
> 
> -- 
> Jeff Mahoney
> SUSE Labs
> 





Re: [PATCH] Btrfs: remove unnecessary code of chunk_root assignment in btrfs_read_chunk_tree.

2016-09-08 Thread Sean Fu
On Tue, Sep 06, 2016 at 11:12:20AM -0400, Jeff Mahoney wrote:
> On 9/6/16 5:58 AM, David Sterba wrote:
> > On Mon, Sep 05, 2016 at 11:13:40PM -0400, Jeff Mahoney wrote:
>  Since root is only used to get fs_info->chunk_root, why not use fs_info
>  directly?
> >>>
> >>> Weird.  Exactly this was a part of my fs_info patchset.  I guess I need
> >>> to go back and check what else is missing.
> >>
> >> Actually, most of this didn't land.  Pretty much anything that's a root
> >> ->fs_info conversion is in there.
> > 
> > Only half of the patchset has been merged so far because it did not pass
> > testing, so I bisected to some point. I was about to let you know once
> > most of 4.9 patches are prepared so there are less merge conflicts.
> 
> Ok, thanks.  I was going to start the rebase today but I'll hold off
> until you're set for 4.9.
> 
Hi Jeff, Could you please share your patch? Where can i get it?
I wanna have a look at it.

Thanks
> -Jeff
> 
> -- 
> Jeff Mahoney
> SUSE Labs
> 





Re: [PATCH] Btrfs: remove unnecessary code of chunk_root assignment in btrfs_read_chunk_tree.

2016-09-06 Thread Sean Fu
On Tue, Sep 06, 2016 at 11:12:20AM -0400, Jeff Mahoney wrote:
> On 9/6/16 5:58 AM, David Sterba wrote:
> > On Mon, Sep 05, 2016 at 11:13:40PM -0400, Jeff Mahoney wrote:
>  Since root is only used to get fs_info->chunk_root, why not use fs_info
>  directly?
> >>>
> >>> Weird.  Exactly this was a part of my fs_info patchset.  I guess I need
> >>> to go back and check what else is missing.
> >>
> >> Actually, most of this didn't land.  Pretty much anything that's a root
> >> ->fs_info conversion is in there.
> > 
> > Only half of the patchset has been merged so far because it did not pass
> > testing, so I bisected to some point. I was about to let you know once
> > most of 4.9 patches are prepared so there are less merge conflicts.
> 
> Ok, thanks.  I was going to start the rebase today but I'll hold off
> until you're set for 4.9.
Sorry for late reply.
Many thanks to all of you.
> 
> -Jeff
> 
> -- 
> Jeff Mahoney
> SUSE Labs
> 





Re: [PATCH] Btrfs: remove unnecessary code of chunk_root assignment in btrfs_read_chunk_tree.

2016-09-06 Thread Sean Fu
On Tue, Sep 06, 2016 at 11:12:20AM -0400, Jeff Mahoney wrote:
> On 9/6/16 5:58 AM, David Sterba wrote:
> > On Mon, Sep 05, 2016 at 11:13:40PM -0400, Jeff Mahoney wrote:
>  Since root is only used to get fs_info->chunk_root, why not use fs_info
>  directly?
> >>>
> >>> Weird.  Exactly this was a part of my fs_info patchset.  I guess I need
> >>> to go back and check what else is missing.
> >>
> >> Actually, most of this didn't land.  Pretty much anything that's a root
> >> ->fs_info conversion is in there.
> > 
> > Only half of the patchset has been merged so far because it did not pass
> > testing, so I bisected to some point. I was about to let you know once
> > most of 4.9 patches are prepared so there are less merge conflicts.
> 
> Ok, thanks.  I was going to start the rebase today but I'll hold off
> until you're set for 4.9.
Sorry for late reply.
Many thanks to all of you.
> 
> -Jeff
> 
> -- 
> Jeff Mahoney
> SUSE Labs
> 





Re: [PATCH v2] Btrfs: remove unnecessary code of chunk_root assignment in btrfs_read_chunk_tree.

2016-09-06 Thread Sean Fu
On Tue, Sep 06, 2016 at 01:19:39PM +0800, Zhao Lei wrote:
> Hi, Sean Fu
> 
> > -Original Message-
> > From: Sean Fu [mailto:fxinr...@gmail.com]
> > Sent: Tuesday, September 06, 2016 11:51 AM
> > To: dste...@suse.com
> > Cc: c...@fb.com; anand.j...@oracle.com; fdman...@suse.com;
> > zhao...@cn.fujitsu.com; linux-kernel@vger.kernel.org;
> > linux-bt...@vger.kernel.org; Sean Fu <fxinr...@gmail.com>
> > Subject: [PATCH v2] Btrfs: remove unnecessary code of chunk_root assignment
> > in btrfs_read_chunk_tree.
> > 
> > The input argument root is already set with "fs_info->chunk_root".
> > "chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info)" in caller
> > "open_ctree".
> > "root->fs_info = fs_info” in "btrfs_alloc_root".
> > 
> > Signed-off-by: Sean Fu <fxinr...@gmail.com>
> > ---
> > Changes in v2:
> >   - Renaming root to chunk_root to make it clear.
> > 
> >  fs/btrfs/volumes.c | 4 +---
> >  1 file changed, 1 insertion(+), 3 deletions(-)
> > 
> > diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> > index 366b335..eb3d04a 100644
> > --- a/fs/btrfs/volumes.c
> > +++ b/fs/btrfs/volumes.c
> > @@ -6591,7 +6591,7 @@ out_short_read:
> > return -EIO;
> >  }
> > 
> > -int btrfs_read_chunk_tree(struct btrfs_root *root)
> > +int btrfs_read_chunk_tree(struct btrfs_root *chunk_root)
> 
> Maybe you also need to modify function body to make it
> pass compile, and the header file also need to be modified.
Sorry for my mistake. I have released v3 for it.
> 
> BTW, Qu Wenruo give us a better way in reply, we
> can use fs_info directly.
Sorry, I don't know why we should use fs_info directly.
I have asked Qu for more detail.
Thanks for your review.
> 
> Thanks
> Zhaolei
> 
> >  {
> > struct btrfs_path *path;
> > struct extent_buffer *leaf;
> > @@ -6600,8 +6600,6 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
> > int ret;
> > int slot;
> > 
> > -   root = root->fs_info->chunk_root;
> > -
> > path = btrfs_alloc_path();
> > if (!path)
> > return -ENOMEM;
> > --
> > 2.6.2
> > 
> 
> 
> 
> 


Re: [PATCH v2] Btrfs: remove unnecessary code of chunk_root assignment in btrfs_read_chunk_tree.

2016-09-06 Thread Sean Fu
On Tue, Sep 06, 2016 at 01:19:39PM +0800, Zhao Lei wrote:
> Hi, Sean Fu
> 
> > -Original Message-
> > From: Sean Fu [mailto:fxinr...@gmail.com]
> > Sent: Tuesday, September 06, 2016 11:51 AM
> > To: dste...@suse.com
> > Cc: c...@fb.com; anand.j...@oracle.com; fdman...@suse.com;
> > zhao...@cn.fujitsu.com; linux-kernel@vger.kernel.org;
> > linux-bt...@vger.kernel.org; Sean Fu 
> > Subject: [PATCH v2] Btrfs: remove unnecessary code of chunk_root assignment
> > in btrfs_read_chunk_tree.
> > 
> > The input argument root is already set with "fs_info->chunk_root".
> > "chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info)" in caller
> > "open_ctree".
> > "root->fs_info = fs_info” in "btrfs_alloc_root".
> > 
> > Signed-off-by: Sean Fu 
> > ---
> > Changes in v2:
> >   - Renaming root to chunk_root to make it clear.
> > 
> >  fs/btrfs/volumes.c | 4 +---
> >  1 file changed, 1 insertion(+), 3 deletions(-)
> > 
> > diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> > index 366b335..eb3d04a 100644
> > --- a/fs/btrfs/volumes.c
> > +++ b/fs/btrfs/volumes.c
> > @@ -6591,7 +6591,7 @@ out_short_read:
> > return -EIO;
> >  }
> > 
> > -int btrfs_read_chunk_tree(struct btrfs_root *root)
> > +int btrfs_read_chunk_tree(struct btrfs_root *chunk_root)
> 
> Maybe you also need to modify function body to make it
> pass compile, and the header file also need to be modified.
Sorry for my mistake. I have released v3 for it.
> 
> BTW, Qu Wenruo give us a better way in reply, we
> can use fs_info directly.
Sorry, I don't know why we should use fs_info directly.
I have asked Qu for more detail.
Thanks for your review.
> 
> Thanks
> Zhaolei
> 
> >  {
> > struct btrfs_path *path;
> > struct extent_buffer *leaf;
> > @@ -6600,8 +6600,6 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
> > int ret;
> > int slot;
> > 
> > -   root = root->fs_info->chunk_root;
> > -
> > path = btrfs_alloc_path();
> > if (!path)
> > return -ENOMEM;
> > --
> > 2.6.2
> > 
> 
> 
> 
> 


Re: [PATCH] Btrfs: remove unnecessary code of chunk_root assignment in btrfs_read_chunk_tree.

2016-09-06 Thread Sean Fu
On Mon, Sep 05, 2016 at 03:56:41PM +0800, Qu Wenruo wrote:
> 
> 
> At 09/05/2016 09:19 AM, Zhao Lei wrote:
> >Hi, Sean Fu
> >
> >>From: Sean Fu [mailto:fxinr...@gmail.com]
> >>Sent: Sunday, September 04, 2016 7:54 PM
> >>To: dste...@suse.com
> >>Cc: c...@fb.com; anand.j...@oracle.com; fdman...@suse.com;
> >>zhao...@cn.fujitsu.com; linux-bt...@vger.kernel.org;
> >>linux-kernel@vger.kernel.org; Sean Fu <fxinr...@gmail.com>
> >>Subject: [PATCH] Btrfs: remove unnecessary code of chunk_root assignment in
> >>btrfs_read_chunk_tree.
> >>
> >>The input argument root is already set with "fs_info->chunk_root".
> >>"chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info)" in caller
> >>"open_ctree".
> >>“root->fs_info = fs_info” in "btrfs_alloc_root".
> >>
> >The root argument of this function means "any root".
> >And the function is designed getting chunk root from
> >"any root" in head.
> >
> >Since there is only one caller of this function,
> >and the caller always send chunk_root as root argument in
> >current code, we can remove above conversion,
> >and I suggest renaming root to chunk_root to make it clear,
> >something like:
> >
> >- btrfs_read_chunk_tree(struct btrfs_root *root)
> >+ btrfs_read_chunk_tree(struct btrfs_root *chunk_root)
> 
> Since root is only used to get fs_info->chunk_root, why not use fs_info
> directly?
Sorry for late reply.
chunk_root is processed in btrfs_read_chunk_tree.
Why should we pass fs_info directly to btrfs_read_chunk_tree?
Could you give me more detail?

Many thanks
> 
> Thanks,
> Qu
> 
> >
> >Thanks
> >Zhaolei
> >
> >>Signed-off-by: Sean Fu <fxinr...@gmail.com>
> >>---
> >> fs/btrfs/volumes.c | 2 --
> >> 1 file changed, 2 deletions(-)
> >>
> >>diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> >>index 366b335..384a6d2 100644
> >>--- a/fs/btrfs/volumes.c
> >>+++ b/fs/btrfs/volumes.c
> >>@@ -6600,8 +6600,6 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
> >>int ret;
> >>int slot;
> >>
> >>-   root = root->fs_info->chunk_root;
> >>-
> >>path = btrfs_alloc_path();
> >>if (!path)
> >>return -ENOMEM;
> >>--
> >>2.6.2
> >>
> >
> >
> >
> >
> >--
> >To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> >the body of a message to majord...@vger.kernel.org
> >More majordomo info at  http://vger.kernel.org/majordomo-info.html
> >
> 
> 


Re: [PATCH] Btrfs: remove unnecessary code of chunk_root assignment in btrfs_read_chunk_tree.

2016-09-06 Thread Sean Fu
On Mon, Sep 05, 2016 at 03:56:41PM +0800, Qu Wenruo wrote:
> 
> 
> At 09/05/2016 09:19 AM, Zhao Lei wrote:
> >Hi, Sean Fu
> >
> >>From: Sean Fu [mailto:fxinr...@gmail.com]
> >>Sent: Sunday, September 04, 2016 7:54 PM
> >>To: dste...@suse.com
> >>Cc: c...@fb.com; anand.j...@oracle.com; fdman...@suse.com;
> >>zhao...@cn.fujitsu.com; linux-bt...@vger.kernel.org;
> >>linux-kernel@vger.kernel.org; Sean Fu 
> >>Subject: [PATCH] Btrfs: remove unnecessary code of chunk_root assignment in
> >>btrfs_read_chunk_tree.
> >>
> >>The input argument root is already set with "fs_info->chunk_root".
> >>"chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info)" in caller
> >>"open_ctree".
> >>“root->fs_info = fs_info” in "btrfs_alloc_root".
> >>
> >The root argument of this function means "any root".
> >And the function is designed getting chunk root from
> >"any root" in head.
> >
> >Since there is only one caller of this function,
> >and the caller always send chunk_root as root argument in
> >current code, we can remove above conversion,
> >and I suggest renaming root to chunk_root to make it clear,
> >something like:
> >
> >- btrfs_read_chunk_tree(struct btrfs_root *root)
> >+ btrfs_read_chunk_tree(struct btrfs_root *chunk_root)
> 
> Since root is only used to get fs_info->chunk_root, why not use fs_info
> directly?
Sorry for late reply.
chunk_root is processed in btrfs_read_chunk_tree.
Why should we pass fs_info directly to btrfs_read_chunk_tree?
Could you give me more detail?

Many thanks
> 
> Thanks,
> Qu
> 
> >
> >Thanks
> >Zhaolei
> >
> >>Signed-off-by: Sean Fu 
> >>---
> >> fs/btrfs/volumes.c | 2 --
> >> 1 file changed, 2 deletions(-)
> >>
> >>diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> >>index 366b335..384a6d2 100644
> >>--- a/fs/btrfs/volumes.c
> >>+++ b/fs/btrfs/volumes.c
> >>@@ -6600,8 +6600,6 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
> >>int ret;
> >>int slot;
> >>
> >>-   root = root->fs_info->chunk_root;
> >>-
> >>path = btrfs_alloc_path();
> >>if (!path)
> >>return -ENOMEM;
> >>--
> >>2.6.2
> >>
> >
> >
> >
> >
> >--
> >To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> >the body of a message to majord...@vger.kernel.org
> >More majordomo info at  http://vger.kernel.org/majordomo-info.html
> >
> 
> 


Re: Fail to build tools/all

2016-05-02 Thread Sean Fu
On Mon, May 02, 2016 at 12:20:08PM +0200, Thomas Renninger wrote:
> On Sunday, May 01, 2016 01:11:33 PM Sean Fu wrote:
> > Hi guys:
> > I encountered a build error when running "make V=1 tools/all".
> > Shall we write a patch to fix it?
> 
> This is not a bug.
> 
> > The following is error log.
> > 
> >  start ==
> > commit 05cf8077e54b20dddb756eaa26f3aeb5c38dd3cf
> > Merge: cf78031 db5dd0d
> > Author: Linus Torvalds <torva...@linux-foundation.org>
> > Date:   Fri Apr 1 20:03:33 2016 -0500
> > = end  =
> > 
> > == start ===
> > mkdir -p
> > /home/sean/work/source/upstream/kernel.linus/linux/tools/power/cpupower
> > && make O=/home/sean/work/source/upstream/kernel.linus/linux
> > subdir=tools/power/cpupower --no-print-directory -C power/cpupower
> > gcc  -DVERSION=\"4.6.rc1.190.g05cf80\" -DPACKAGE=\"cpupower\"
> > -DPACKAGE_BUGREPORT=\"linux...@vger.kernel.org\" -D_GNU_SOURCE -pipe
> > -DNLS -Wall -Wchar-subscripts -Wpointer-arith -Wsign-compare
> > -Wno-pointer-sign -Wdeclaration-after-statement -Wshadow -O1 -g -DDEBUG
> > -I./lib -I ./utils -o
> > /home/sean/work/source/upstream/kernel.linus/linux/utils/helpers/amd.o
> > -c utils/helpers/amd.c
> > utils/helpers/amd.c:7:21: fatal error: pci/pci.h: No such file or
> > directory
> >  #include 
> 
> You need pciutils header and library:
> ftp://ftp.kernel.org/pub/software/utils/pciutils
> 
> Thomas
Thanks


Re: Fail to build tools/all

2016-05-02 Thread Sean Fu
On Mon, May 02, 2016 at 12:20:08PM +0200, Thomas Renninger wrote:
> On Sunday, May 01, 2016 01:11:33 PM Sean Fu wrote:
> > Hi guys:
> > I encountered a build error when running "make V=1 tools/all".
> > Shall we write a patch to fix it?
> 
> This is not a bug.
> 
> > The following is error log.
> > 
> >  start ==
> > commit 05cf8077e54b20dddb756eaa26f3aeb5c38dd3cf
> > Merge: cf78031 db5dd0d
> > Author: Linus Torvalds 
> > Date:   Fri Apr 1 20:03:33 2016 -0500
> > = end  =
> > 
> > == start ===
> > mkdir -p
> > /home/sean/work/source/upstream/kernel.linus/linux/tools/power/cpupower
> > && make O=/home/sean/work/source/upstream/kernel.linus/linux
> > subdir=tools/power/cpupower --no-print-directory -C power/cpupower
> > gcc  -DVERSION=\"4.6.rc1.190.g05cf80\" -DPACKAGE=\"cpupower\"
> > -DPACKAGE_BUGREPORT=\"linux...@vger.kernel.org\" -D_GNU_SOURCE -pipe
> > -DNLS -Wall -Wchar-subscripts -Wpointer-arith -Wsign-compare
> > -Wno-pointer-sign -Wdeclaration-after-statement -Wshadow -O1 -g -DDEBUG
> > -I./lib -I ./utils -o
> > /home/sean/work/source/upstream/kernel.linus/linux/utils/helpers/amd.o
> > -c utils/helpers/amd.c
> > utils/helpers/amd.c:7:21: fatal error: pci/pci.h: No such file or
> > directory
> >  #include 
> 
> You need pciutils header and library:
> ftp://ftp.kernel.org/pub/software/utils/pciutils
> 
> Thomas
Thanks


Fail to build tools/all

2016-04-30 Thread Sean Fu
Hi guys:
I encountered a build error when running "make V=1 tools/all".
Shall we write a patch to fix it?
The following is error log.

 start ==
commit 05cf8077e54b20dddb756eaa26f3aeb5c38dd3cf
Merge: cf78031 db5dd0d
Author: Linus Torvalds 
Date:   Fri Apr 1 20:03:33 2016 -0500
= end  =

== start ===
mkdir -p
/home/sean/work/source/upstream/kernel.linus/linux/tools/power/cpupower
&& make O=/home/sean/work/source/upstream/kernel.linus/linux
subdir=tools/power/cpupower --no-print-directory -C power/cpupower 
gcc  -DVERSION=\"4.6.rc1.190.g05cf80\" -DPACKAGE=\"cpupower\"
-DPACKAGE_BUGREPORT=\"linux...@vger.kernel.org\" -D_GNU_SOURCE -pipe
-DNLS -Wall -Wchar-subscripts -Wpointer-arith -Wsign-compare
-Wno-pointer-sign -Wdeclaration-after-statement -Wshadow -O1 -g -DDEBUG
-I./lib -I ./utils -o
/home/sean/work/source/upstream/kernel.linus/linux/utils/helpers/amd.o
-c utils/helpers/amd.c
utils/helpers/amd.c:7:21: fatal error: pci/pci.h: No such file or
directory
 #include 
  ^
  compilation terminated.
  Makefile:218: recipe for target
  
'/home/sean/work/source/upstream/kernel.linus/linux/utils/helpers/amd.o'
  failed
  make[2]: ***
  
[/home/sean/work/source/upstream/kernel.linus/linux/utils/helpers/amd.o]
  Error 1
  Makefile:56: recipe for target 'cpupower' failed
  make[1]: *** [cpupower] Error 2
  Makefile:1548: recipe for target 'tools/all'
  failed
  make: *** [tools/all] Error 2
=== end ==

BestRegards
Sean


Fail to build tools/all

2016-04-30 Thread Sean Fu
Hi guys:
I encountered a build error when running "make V=1 tools/all".
Shall we write a patch to fix it?
The following is error log.

 start ==
commit 05cf8077e54b20dddb756eaa26f3aeb5c38dd3cf
Merge: cf78031 db5dd0d
Author: Linus Torvalds 
Date:   Fri Apr 1 20:03:33 2016 -0500
= end  =

== start ===
mkdir -p
/home/sean/work/source/upstream/kernel.linus/linux/tools/power/cpupower
&& make O=/home/sean/work/source/upstream/kernel.linus/linux
subdir=tools/power/cpupower --no-print-directory -C power/cpupower 
gcc  -DVERSION=\"4.6.rc1.190.g05cf80\" -DPACKAGE=\"cpupower\"
-DPACKAGE_BUGREPORT=\"linux...@vger.kernel.org\" -D_GNU_SOURCE -pipe
-DNLS -Wall -Wchar-subscripts -Wpointer-arith -Wsign-compare
-Wno-pointer-sign -Wdeclaration-after-statement -Wshadow -O1 -g -DDEBUG
-I./lib -I ./utils -o
/home/sean/work/source/upstream/kernel.linus/linux/utils/helpers/amd.o
-c utils/helpers/amd.c
utils/helpers/amd.c:7:21: fatal error: pci/pci.h: No such file or
directory
 #include 
  ^
  compilation terminated.
  Makefile:218: recipe for target
  
'/home/sean/work/source/upstream/kernel.linus/linux/utils/helpers/amd.o'
  failed
  make[2]: ***
  
[/home/sean/work/source/upstream/kernel.linus/linux/utils/helpers/amd.o]
  Error 1
  Makefile:56: recipe for target 'cpupower' failed
  make[1]: *** [cpupower] Error 2
  Makefile:1548: recipe for target 'tools/all'
  failed
  make: *** [tools/all] Error 2
=== end ==

BestRegards
Sean


Re: [PATCH] kernel/sysctl.c: If "count" including the terminating byte '\0' the write system call should retrun success.

2015-09-15 Thread Sean Fu
On Mon, Sep 14, 2015 at 12:44 AM, Eric W. Biederman
 wrote:
> Sean Fu  writes:
>
>> On Sat, Sep 12, 2015 at 1:01 AM, Eric W. Biederman
>>  wrote:
>>> Sean Fu  writes:
> '\0' is not and has never been valid in a text file.
> proc files are a text interface.
> Expecting '\0' to be accepted is very strange, and apparently there is
> only one program in existence that does.
>
> That a trailing '\0' was ever accepted was due to a bug in the code.
>
> Accepting '\0' in general in a text interface is a very dangerous and
> buggy pattern so it must be done very carefully or else other
> regressions or bugs could be easily introduced.
Ok,
Could you please give me more detail about the potential risk from the patch?
What is the different behavior between the patch and old kernel?
It seems like entirely same.

> Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl.c: If "count" including the terminating byte '\0' the write system call should retrun success.

2015-09-15 Thread Sean Fu
According to POSIX standard,  "The write() function shall attempt to
write nbyte bytes from the buffer pointed to by buf to the file
associated with the open file descriptor, fildes.".
So it is not the length of string(strlen).

On Mon, Sep 14, 2015 at 4:05 AM, Steven Rostedt  wrote:
> On Sun, 13 Sep 2015 20:39:31 +0800
> Sean Fu  wrote:
>
>
>> > Accepting a '\0' is not at all reasonable for a text interface.  The
>> > application that does it is buggy.
>> It is hard to comprehend that the current kernel can accept  two bytes
>> "1 ", "1\t", "1\n" except "1\0".
>
> Um, it does not seem hard to comprehend at all. As this is a string,
> and it acts the same as a printf() or strlen.
>
> strlen("1 ") == 2
> strlen("1\t") == 2
> strlen("1\n") == 2
> strlen("1\0") == 1
>
> Big difference to me.
>
> -- Steve
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl.c: If "count" including the terminating byte '\0' the write system call should retrun success.

2015-09-15 Thread Sean Fu
According to POSIX standard,  "The write() function shall attempt to
write nbyte bytes from the buffer pointed to by buf to the file
associated with the open file descriptor, fildes.".
So it is not the length of string(strlen).

On Mon, Sep 14, 2015 at 4:05 AM, Steven Rostedt <rost...@goodmis.org> wrote:
> On Sun, 13 Sep 2015 20:39:31 +0800
> Sean Fu <fxinr...@gmail.com> wrote:
>
>
>> > Accepting a '\0' is not at all reasonable for a text interface.  The
>> > application that does it is buggy.
>> It is hard to comprehend that the current kernel can accept  two bytes
>> "1 ", "1\t", "1\n" except "1\0".
>
> Um, it does not seem hard to comprehend at all. As this is a string,
> and it acts the same as a printf() or strlen.
>
> strlen("1 ") == 2
> strlen("1\t") == 2
> strlen("1\n") == 2
> strlen("1\0") == 1
>
> Big difference to me.
>
> -- Steve
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl.c: If "count" including the terminating byte '\0' the write system call should retrun success.

2015-09-15 Thread Sean Fu
On Mon, Sep 14, 2015 at 12:44 AM, Eric W. Biederman
<ebied...@xmission.com> wrote:
> Sean Fu <fxinr...@gmail.com> writes:
>
>> On Sat, Sep 12, 2015 at 1:01 AM, Eric W. Biederman
>> <ebied...@xmission.com> wrote:
>>> Sean Fu <fxinr...@gmail.com> writes:
> '\0' is not and has never been valid in a text file.
> proc files are a text interface.
> Expecting '\0' to be accepted is very strange, and apparently there is
> only one program in existence that does.
>
> That a trailing '\0' was ever accepted was due to a bug in the code.
>
> Accepting '\0' in general in a text interface is a very dangerous and
> buggy pattern so it must be done very carefully or else other
> regressions or bugs could be easily introduced.
Ok,
Could you please give me more detail about the potential risk from the patch?
What is the different behavior between the patch and old kernel?
It seems like entirely same.

> Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl.c: If "count" including the terminating byte '\0' the write system call should retrun success.

2015-09-13 Thread Sean Fu
On Sat, Sep 12, 2015 at 1:01 AM, Eric W. Biederman
 wrote:
> Sean Fu  writes:
>
>>> Sounds like a reasonable compromise. Sean, can you make a patch that
>>> only affects the one proc file (comment it well in the code), and have
>>> it accept nothing past the '\0'. Even if someone passed in "1 \0 2", it
>>> would only see "1 "
>> The current code uses uniform handler (e.g. "proc_dointvec") for all
>> same type proc file.
>> So all integer type proc file are affected.
>
> No.  I do not believe the proprietary binary application you are dealing
> with writes to all proc files that use the proc_dointvec handler.
I means all ctl_table whose .proc_handler is "proc_dointvec" are affected.
>
>> In fact, The behavior of all integer type proc file should be changed.
>
> Not at all.  The only files that we can possibly justify changing today
> are the files where an actual regression is being observed.
>
> Because quite frankly 5 years is way too long to wait to report a
> regression.  By and large software is reasonable and treats proc
> files as text files where '\0' is an invalid character.
5 years is not enough long for distros, specially enterprise distros.
The most of HuaWei machines run our SLES10sp3(2.6.16, SUSE LINUX
ENTERPRISE SERVER).
They use one enterprise version for 5+ years usually.
>
> Accepting a '\0' is not at all reasonable for a text interface.  The
> application that does it is buggy.
It is hard to comprehend that the current kernel can accept  two bytes
"1 ", "1\t", "1\n" except "1\0".
>
> Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl.c: If "count" including the terminating byte '\0' the write system call should retrun success.

2015-09-13 Thread Sean Fu
On Sat, Sep 12, 2015 at 1:01 AM, Eric W. Biederman
<ebied...@xmission.com> wrote:
> Sean Fu <fxinr...@gmail.com> writes:
>
>>> Sounds like a reasonable compromise. Sean, can you make a patch that
>>> only affects the one proc file (comment it well in the code), and have
>>> it accept nothing past the '\0'. Even if someone passed in "1 \0 2", it
>>> would only see "1 "
>> The current code uses uniform handler (e.g. "proc_dointvec") for all
>> same type proc file.
>> So all integer type proc file are affected.
>
> No.  I do not believe the proprietary binary application you are dealing
> with writes to all proc files that use the proc_dointvec handler.
I means all ctl_table whose .proc_handler is "proc_dointvec" are affected.
>
>> In fact, The behavior of all integer type proc file should be changed.
>
> Not at all.  The only files that we can possibly justify changing today
> are the files where an actual regression is being observed.
>
> Because quite frankly 5 years is way too long to wait to report a
> regression.  By and large software is reasonable and treats proc
> files as text files where '\0' is an invalid character.
5 years is not enough long for distros, specially enterprise distros.
The most of HuaWei machines run our SLES10sp3(2.6.16, SUSE LINUX
ENTERPRISE SERVER).
They use one enterprise version for 5+ years usually.
>
> Accepting a '\0' is not at all reasonable for a text interface.  The
> application that does it is buggy.
It is hard to comprehend that the current kernel can accept  two bytes
"1 ", "1\t", "1\n" except "1\0".
>
> Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl.c: If "count" including the terminating byte '\0' the write system call should retrun success.

2015-09-11 Thread Sean Fu
On Wed, Sep 9, 2015 at 12:36 AM, Steven Rostedt  wrote:
> On Tue, 08 Sep 2015 11:19:14 -0500
> ebied...@xmission.com (Eric W. Biederman) wrote:
>
>
>> This patch does not implement the old behavior.
>>
>> The old code does use '\0' as a buffer terminator, and because it does
>> not check things closely I can see how it could accept a '\0' from
>> userspace and treat that as an early buffer terminator.
>>
>> The patch treats '\0' as a number separator and allows things that have
>> never been allowed before and quite frankly is very scary as it just
>> invites bugs.
>>
>> So I do not think we should merge the given patch.  It is just wrong.
>> One that simply truncates the input buffer at the first '\0' character I
>> think we can consider, although I am not a fan.
>
> I agree, and was thinking this patch did that, but I didn't look close
> enough (why I never gave a Reviewed-by to it either).
>
>
>>
>> Steve as far as what I think would break.  I don't think the current
>> behavior should have broken anything and apparently it did.  I don't see
>> what a change that simply truncates the buffer at the first embedded
>> '\0' would break, but I don't know how to test that there isn't anything
>> that it will.  We are way past the point of reasonable expectations
>> being able to guide us.  4 years should have been more than enough soak
>> time to have been able to say that the change was good, but apparently
>> it was not.
>
> Well, to be fair, a lot of people (distros, etc) do not use the most
> recent kernels. 4 years may be the first time a tool touches a kernel.
>
>>
>> My gut feel says that if we are going to change this, at this late date,
>> we find the one specific proc file that matters and change it just for
>> that one proc file, and in that change we treat '\0' as a terminator not
>> as a separator.  I never did see in the conversation which proc file it
>> is that actually matters.  The principle is that the more precise and
>> the more localized such a change is the less chance it has of causing a
>> regression of something else, and the greater the chance we can look at
>> a specific issue.
>
> Sounds like a reasonable compromise. Sean, can you make a patch that
> only affects the one proc file (comment it well in the code), and have
> it accept nothing past the '\0'. Even if someone passed in "1 \0 2", it
> would only see "1 "
The current code uses uniform handler (e.g. "proc_dointvec") for all
same type proc file.
So all integer type proc file are affected.
In fact, The behavior of all integer type proc file should be changed.
>
> -- Steve
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl.c: If "count" including the terminating byte '\0' the write system call should retrun success.

2015-09-11 Thread Sean Fu
On Wed, Sep 9, 2015 at 12:36 AM, Steven Rostedt  wrote:
> On Tue, 08 Sep 2015 11:19:14 -0500
> ebied...@xmission.com (Eric W. Biederman) wrote:
>
>
>> This patch does not implement the old behavior.
>>
>> The old code does use '\0' as a buffer terminator, and because it does
>> not check things closely I can see how it could accept a '\0' from
>> userspace and treat that as an early buffer terminator.
>>
>> The patch treats '\0' as a number separator and allows things that have
>> never been allowed before and quite frankly is very scary as it just
>> invites bugs.
>>
>> So I do not think we should merge the given patch.  It is just wrong.
>> One that simply truncates the input buffer at the first '\0' character I
>> think we can consider, although I am not a fan.
>
> I agree, and was thinking this patch did that, but I didn't look close
> enough (why I never gave a Reviewed-by to it either).
>
>
>>
>> Steve as far as what I think would break.  I don't think the current
>> behavior should have broken anything and apparently it did.  I don't see
>> what a change that simply truncates the buffer at the first embedded
>> '\0' would break, but I don't know how to test that there isn't anything
>> that it will.  We are way past the point of reasonable expectations
>> being able to guide us.  4 years should have been more than enough soak
>> time to have been able to say that the change was good, but apparently
>> it was not.
>
> Well, to be fair, a lot of people (distros, etc) do not use the most
> recent kernels. 4 years may be the first time a tool touches a kernel.
>
>>
>> My gut feel says that if we are going to change this, at this late date,
>> we find the one specific proc file that matters and change it just for
>> that one proc file, and in that change we treat '\0' as a terminator not
>> as a separator.  I never did see in the conversation which proc file it
>> is that actually matters.  The principle is that the more precise and
>> the more localized such a change is the less chance it has of causing a
>> regression of something else, and the greater the chance we can look at
>> a specific issue.
>
> Sounds like a reasonable compromise. Sean, can you make a patch that
> only affects the one proc file (comment it well in the code), and have
> it accept nothing past the '\0'. Even if someone passed in "1 \0 2", it
> would only see "1 "
The current code uses uniform handler (e.g. "proc_dointvec") for all
same type proc file.
So all integer type proc file are affected.
In fact, The behavior of all integer type proc file should be changed.
>
> -- Steve
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl.c: If "count" including the terminating byte '\0' the write system call should retrun success.

2015-09-07 Thread Sean Fu
On Fri, Aug 28, 2015 at 11:31 AM, Sean Fu  wrote:
> On Thu, Aug 27, 2015 at 4:32 PM, Sean Fu  wrote:
>> On Thu, Aug 27, 2015 at 10:32 AM, Steven Rostedt  wrote:
>>> On Thu, 27 Aug 2015 08:17:29 +0800
>>> Sean Fu  wrote:
>>>> strace execute result:
>>>> write(3, "1\2\0", 3)= -1 EINVAL (Invalid argument)
> If vleft > 1, "1\0 2" is treated as invalid paraments and all string
> include '\0' will be invalid.
Hi All experts,
Could you please signed off this patch?
>
>>>>
>>>> >
>>>> > -- Steve
>>>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl.c: If "count" including the terminating byte '\0' the write system call should retrun success.

2015-09-07 Thread Sean Fu
On Fri, Aug 28, 2015 at 11:31 AM, Sean Fu <fxinr...@gmail.com> wrote:
> On Thu, Aug 27, 2015 at 4:32 PM, Sean Fu <fxinr...@gmail.com> wrote:
>> On Thu, Aug 27, 2015 at 10:32 AM, Steven Rostedt <rost...@goodmis.org> wrote:
>>> On Thu, 27 Aug 2015 08:17:29 +0800
>>> Sean Fu <fxinr...@gmail.com> wrote:
>>>> strace execute result:
>>>> write(3, "1\2\0", 3)= -1 EINVAL (Invalid argument)
> If vleft > 1, "1\0 2" is treated as invalid paraments and all string
> include '\0' will be invalid.
Hi All experts,
Could you please signed off this patch?
>
>>>>
>>>> >
>>>> > -- Steve
>>>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl.c: If "count" including the terminating byte '\0' the write system call should retrun success.

2015-08-27 Thread Sean Fu
On Thu, Aug 27, 2015 at 4:32 PM, Sean Fu  wrote:
> On Thu, Aug 27, 2015 at 10:32 AM, Steven Rostedt  wrote:
>> On Thu, 27 Aug 2015 08:17:29 +0800
>> Sean Fu  wrote:
>>
>>> On Thu, Aug 27, 2015 at 4:36 AM, Steven Rostedt  wrote:
>>> > On Wed, 26 Aug 2015 23:48:01 +0800
>>> > Sean Fu  wrote:
>>> >
>>> >
>>> >> > Defending the patch, I can't imagine any user space code expecting the
>>> >> > current behavior. The current behavior is that if you write "1\0" it
>>> >> > will error out instead of accepting the "1". I can't come up with a
>>> >> > scenario that would require userspace to expect "1\0" to fail. Can you?
>>> >> Thanks
>>> >
>>> > Although, with the current patch, would "1\02" fail? It should.
>>> Yes, "1\02" is equal to "1\2"(count=2) or "1\2\0"(count=3), So it should 
>>> fail.
>>
>> Sorry, I meant "1\0 2"
> In this case, The patch behavior is accepting the "1" and discarding
> other bytes.
> for (; left && vleft--; i++, first=0) {//vleft is 1 for integer
> type or unsigned long type proc file
>
>>
>> -- Steve
>>
>>>
>>> code
>>> len = write(fd, "1\0\2", 3);
>>>
>>> strace execute result:
>>> write(3, "1\2\0", 3)= -1 EINVAL (Invalid argument)
If vleft > 1, "1\0 2" is treated as invalid paraments and all string
include '\0' will be invalid.

>>>
>>> >
>>> > -- Steve
>>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl.c: If "count" including the terminating byte '\0' the write system call should retrun success.

2015-08-27 Thread Sean Fu
On Thu, Aug 27, 2015 at 10:32 AM, Steven Rostedt  wrote:
> On Thu, 27 Aug 2015 08:17:29 +0800
> Sean Fu  wrote:
>
>> On Thu, Aug 27, 2015 at 4:36 AM, Steven Rostedt  wrote:
>> > On Wed, 26 Aug 2015 23:48:01 +0800
>> > Sean Fu  wrote:
>> >
>> >
>> >> > Defending the patch, I can't imagine any user space code expecting the
>> >> > current behavior. The current behavior is that if you write "1\0" it
>> >> > will error out instead of accepting the "1". I can't come up with a
>> >> > scenario that would require userspace to expect "1\0" to fail. Can you?
>> >> Thanks
>> >
>> > Although, with the current patch, would "1\02" fail? It should.
>> Yes, "1\02" is equal to "1\2"(count=2) or "1\2\0"(count=3), So it should 
>> fail.
>
> Sorry, I meant "1\0 2"
In this case, The patch behavior is accepting the "1" and discarding
other bytes.
for (; left && vleft--; i++, first=0) {//vleft is 1 for integer
type or unsigned long type proc file

>
> -- Steve
>
>>
>> code
>> len = write(fd, "1\0\2", 3);
>>
>> strace execute result:
>> write(3, "1\2\0", 3)= -1 EINVAL (Invalid argument)
>>
>> >
>> > -- Steve
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl.c: If count including the terminating byte '\0' the write system call should retrun success.

2015-08-27 Thread Sean Fu
On Thu, Aug 27, 2015 at 4:32 PM, Sean Fu fxinr...@gmail.com wrote:
 On Thu, Aug 27, 2015 at 10:32 AM, Steven Rostedt rost...@goodmis.org wrote:
 On Thu, 27 Aug 2015 08:17:29 +0800
 Sean Fu fxinr...@gmail.com wrote:

 On Thu, Aug 27, 2015 at 4:36 AM, Steven Rostedt rost...@goodmis.org wrote:
  On Wed, 26 Aug 2015 23:48:01 +0800
  Sean Fu fxinr...@gmail.com wrote:
 
 
   Defending the patch, I can't imagine any user space code expecting the
   current behavior. The current behavior is that if you write 1\0 it
   will error out instead of accepting the 1. I can't come up with a
   scenario that would require userspace to expect 1\0 to fail. Can you?
  Thanks
 
  Although, with the current patch, would 1\02 fail? It should.
 Yes, 1\02 is equal to 1\2(count=2) or 1\2\0(count=3), So it should 
 fail.

 Sorry, I meant 1\0 2
 In this case, The patch behavior is accepting the 1 and discarding
 other bytes.
 for (; left  vleft--; i++, first=0) {//vleft is 1 for integer
 type or unsigned long type proc file


 -- Steve


 code
 len = write(fd, 1\0\2, 3);

 strace execute result:
 write(3, 1\2\0, 3)= -1 EINVAL (Invalid argument)
If vleft  1, 1\0 2 is treated as invalid paraments and all string
include '\0' will be invalid.


 
  -- Steve

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl.c: If count including the terminating byte '\0' the write system call should retrun success.

2015-08-27 Thread Sean Fu
On Thu, Aug 27, 2015 at 10:32 AM, Steven Rostedt rost...@goodmis.org wrote:
 On Thu, 27 Aug 2015 08:17:29 +0800
 Sean Fu fxinr...@gmail.com wrote:

 On Thu, Aug 27, 2015 at 4:36 AM, Steven Rostedt rost...@goodmis.org wrote:
  On Wed, 26 Aug 2015 23:48:01 +0800
  Sean Fu fxinr...@gmail.com wrote:
 
 
   Defending the patch, I can't imagine any user space code expecting the
   current behavior. The current behavior is that if you write 1\0 it
   will error out instead of accepting the 1. I can't come up with a
   scenario that would require userspace to expect 1\0 to fail. Can you?
  Thanks
 
  Although, with the current patch, would 1\02 fail? It should.
 Yes, 1\02 is equal to 1\2(count=2) or 1\2\0(count=3), So it should 
 fail.

 Sorry, I meant 1\0 2
In this case, The patch behavior is accepting the 1 and discarding
other bytes.
for (; left  vleft--; i++, first=0) {//vleft is 1 for integer
type or unsigned long type proc file


 -- Steve


 code
 len = write(fd, 1\0\2, 3);

 strace execute result:
 write(3, 1\2\0, 3)= -1 EINVAL (Invalid argument)

 
  -- Steve

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl.c: If "count" including the terminating byte '\0' the write system call should retrun success.

2015-08-26 Thread Sean Fu
On Wed, Aug 26, 2015 at 4:39 AM, Heinrich Schuchardt  wrote:
>
>
> On 24.08.2015 10:56, Sean Fu wrote:
>> when the input argument "count" including the terminating byte "\0",
>> The write system call return EINVAL on proc file.
>> But it return success on regular file.
>>
>> E.g. Writting two bytes ("1\0") to "/proc/sys/net/ipv4/conf/eth0/rp_filter".
>> write(fd, "1\0", 2) return EINVAL.
>
> Reading through kernel/sysctl.c it looks like you are allowing
> "1\01" to be used to pass two integers or two longs.
> This is not what you describe as target of your patch.
"1\01" actually is "1\1", So either of them will fail.
>
> Parameter tr returned from proc_get_long should be checked in
> __do_proc_dointvec,
> __do_proc_doulongvec_minmax.
>
> Best regards
>
> Heinrich Schuchardt
>
>>
>> Signed-off-by: Sean Fu 
>> ---
>>  kernel/sysctl.c | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
>> index 19b62b5..c2b0594 100644
>> --- a/kernel/sysctl.c
>> +++ b/kernel/sysctl.c
>> @@ -2004,7 +2004,7 @@ static int do_proc_dointvec_conv(bool *negp,
>> unsigned long *lvalp,
>> return 0;
>>  }
>>
>> -static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
>> +static const char proc_wspace_sep[] = { ' ', '\t', '\n', '\0' };
>>
>>  static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
>>   int write, void __user *buffer,
>>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl.c: If "count" including the terminating byte '\0' the write system call should retrun success.

2015-08-26 Thread Sean Fu
On Thu, Aug 27, 2015 at 4:36 AM, Steven Rostedt  wrote:
> On Wed, 26 Aug 2015 23:48:01 +0800
> Sean Fu  wrote:
>
>
>> > Defending the patch, I can't imagine any user space code expecting the
>> > current behavior. The current behavior is that if you write "1\0" it
>> > will error out instead of accepting the "1". I can't come up with a
>> > scenario that would require userspace to expect "1\0" to fail. Can you?
>> Thanks
>
> Although, with the current patch, would "1\02" fail? It should.
Yes, "1\02" is equal to "1\2"(count=2) or "1\2\0"(count=3), So it should fail.

code
len = write(fd, "1\0\2", 3);

strace execute result:
write(3, "1\2\0", 3)= -1 EINVAL (Invalid argument)

>
> -- Steve
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl.c: If "count" including the terminating byte '\0' the write system call should retrun success.

2015-08-26 Thread Sean Fu
On Wed, Aug 26, 2015 at 3:05 AM, Steven Rostedt  wrote:
> On Tue, 25 Aug 2015 13:33:57 -0400
> Austin S Hemmelgarn  wrote:
>
>> >> How do you know that?
>> > I will prove that all other write usage is not impacted later.
>> Except that you can only really do this for programs that you have
>> access to, and by definition you can not have access to every program
>> ever written that writes to /proc.
>>
>> If you were going to do this, it would need to be itself controlled by
>> another sysctl to toggle the behavior, which would need to default to
>> the current behavior.
>
> Defending the patch, I can't imagine any user space code expecting the
> current behavior. The current behavior is that if you write "1\0" it
> will error out instead of accepting the "1". I can't come up with a
> scenario that would require userspace to expect "1\0" to fail. Can you?
Thanks
>
>
> -- Steve
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl.c: If "count" including the terminating byte '\0' the write system call should retrun success.

2015-08-26 Thread Sean Fu
On Wed, Aug 26, 2015 at 4:39 AM, Heinrich Schuchardt  wrote:
>
>
> On 24.08.2015 10:56, Sean Fu wrote:
>> when the input argument "count" including the terminating byte "\0",
>> The write system call return EINVAL on proc file.
>> But it return success on regular file.
>>
>> E.g. Writting two bytes ("1\0") to "/proc/sys/net/ipv4/conf/eth0/rp_filter".
>> write(fd, "1\0", 2) return EINVAL.
>
> Reading through kernel/sysctl.c it looks like you are allowing
> "1\01" to be used to pass two integers or two longs.
> This is not what you describe as target of your patch.
1st 2nd 3rd Change?
'0'~'9' '\0'   non '\0'No

proc_get_long-->simple_strtoul-->simple_strtoull-->_parse_integer
__do_proc_dointvec
...
vleft = table->maxlen / sizeof(*i);   //vleft = 1 if it is
integer type proc file
...
for (; left && vleft--; i++, first=0) {  //In last loop
left=2, but vleft = 0 cause exit.

>
> Parameter tr returned from proc_get_long should be checked in
> __do_proc_dointvec,
> __do_proc_doulongvec_minmax.
>
> Best regards
>
> Heinrich Schuchardt
>
>>
>> Signed-off-by: Sean Fu 
>> ---
>>  kernel/sysctl.c | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
>> index 19b62b5..c2b0594 100644
>> --- a/kernel/sysctl.c
>> +++ b/kernel/sysctl.c
>> @@ -2004,7 +2004,7 @@ static int do_proc_dointvec_conv(bool *negp,
>> unsigned long *lvalp,
>> return 0;
>>  }
>>
>> -static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
>> +static const char proc_wspace_sep[] = { ' ', '\t', '\n', '\0' };
>>
>>  static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
>>   int write, void __user *buffer,
>>
All possibilities are listed.

1 byte data(count = 1)

1st Change?
'\0'  NO
non '\0'NO

2 bytes data(count = 2)

1st 2nd Change?
'0'~'9'  '\0'  Yes
'0'~'9'  non '\0'No
non number'\0'   No
non numbernon '\0' No

3 bytes data(count = 3)

1st 2nd 3rd Change?
'0'~'9' '0'~'9''\0' Yes
'0'~'9' '0'~'9'non '\0'   No
'0'~'9' non '0'~'9'  '\0' No
'0'~'9' non '0'~'9'  non '\0'   No
'0'~'9' '\0''\0' No
'0'~'9' '\0'   non '\0'No
non '0'~'9'   Any AnyNo

More 3 bytes data(count > 3)
Number sequence the next character  Change?
"x1...xn"  '\0' Yes
"x1...xn"  non '\0'   No
Non "x1...xn"   '\0'  No
Non "x1...xn"   non '\0'No

"x1...xn" is a string whose all members are "0"~'9'
Non "x1...xn" means the first character is not "0"~'9'.

"Yes" means the behavior is changed.
"No" means the behavior is Not changed.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl.c: If count including the terminating byte '\0' the write system call should retrun success.

2015-08-26 Thread Sean Fu
On Wed, Aug 26, 2015 at 3:05 AM, Steven Rostedt rost...@goodmis.org wrote:
 On Tue, 25 Aug 2015 13:33:57 -0400
 Austin S Hemmelgarn ahferro...@gmail.com wrote:

  How do you know that?
  I will prove that all other write usage is not impacted later.
 Except that you can only really do this for programs that you have
 access to, and by definition you can not have access to every program
 ever written that writes to /proc.

 If you were going to do this, it would need to be itself controlled by
 another sysctl to toggle the behavior, which would need to default to
 the current behavior.

 Defending the patch, I can't imagine any user space code expecting the
 current behavior. The current behavior is that if you write 1\0 it
 will error out instead of accepting the 1. I can't come up with a
 scenario that would require userspace to expect 1\0 to fail. Can you?
Thanks


 -- Steve
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl.c: If count including the terminating byte '\0' the write system call should retrun success.

2015-08-26 Thread Sean Fu
On Wed, Aug 26, 2015 at 4:39 AM, Heinrich Schuchardt xypron.g...@gmx.de wrote:


 On 24.08.2015 10:56, Sean Fu wrote:
 when the input argument count including the terminating byte \0,
 The write system call return EINVAL on proc file.
 But it return success on regular file.

 E.g. Writting two bytes (1\0) to /proc/sys/net/ipv4/conf/eth0/rp_filter.
 write(fd, 1\0, 2) return EINVAL.

 Reading through kernel/sysctl.c it looks like you are allowing
 1\01 to be used to pass two integers or two longs.
 This is not what you describe as target of your patch.
1st 2nd 3rd Change?
'0'~'9' '\0'   non '\0'No

proc_get_long--simple_strtoul--simple_strtoull--_parse_integer
__do_proc_dointvec
...
vleft = table-maxlen / sizeof(*i);   //vleft = 1 if it is
integer type proc file
...
for (; left  vleft--; i++, first=0) {  //In last loop
left=2, but vleft = 0 cause exit.


 Parameter tr returned from proc_get_long should be checked in
 __do_proc_dointvec,
 __do_proc_doulongvec_minmax.

 Best regards

 Heinrich Schuchardt


 Signed-off-by: Sean Fu fxinr...@gmail.com
 ---
  kernel/sysctl.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

 diff --git a/kernel/sysctl.c b/kernel/sysctl.c
 index 19b62b5..c2b0594 100644
 --- a/kernel/sysctl.c
 +++ b/kernel/sysctl.c
 @@ -2004,7 +2004,7 @@ static int do_proc_dointvec_conv(bool *negp,
 unsigned long *lvalp,
 return 0;
  }

 -static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
 +static const char proc_wspace_sep[] = { ' ', '\t', '\n', '\0' };

  static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
   int write, void __user *buffer,

All possibilities are listed.

1 byte data(count = 1)

1st Change?
'\0'  NO
non '\0'NO

2 bytes data(count = 2)

1st 2nd Change?
'0'~'9'  '\0'  Yes
'0'~'9'  non '\0'No
non number'\0'   No
non numbernon '\0' No

3 bytes data(count = 3)

1st 2nd 3rd Change?
'0'~'9' '0'~'9''\0' Yes
'0'~'9' '0'~'9'non '\0'   No
'0'~'9' non '0'~'9'  '\0' No
'0'~'9' non '0'~'9'  non '\0'   No
'0'~'9' '\0''\0' No
'0'~'9' '\0'   non '\0'No
non '0'~'9'   Any AnyNo

More 3 bytes data(count  3)
Number sequence the next character  Change?
x1...xn  '\0' Yes
x1...xn  non '\0'   No
Non x1...xn   '\0'  No
Non x1...xn   non '\0'No

x1...xn is a string whose all members are 0~'9'
Non x1...xn means the first character is not 0~'9'.

Yes means the behavior is changed.
No means the behavior is Not changed.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl.c: If count including the terminating byte '\0' the write system call should retrun success.

2015-08-26 Thread Sean Fu
On Wed, Aug 26, 2015 at 4:39 AM, Heinrich Schuchardt xypron.g...@gmx.de wrote:


 On 24.08.2015 10:56, Sean Fu wrote:
 when the input argument count including the terminating byte \0,
 The write system call return EINVAL on proc file.
 But it return success on regular file.

 E.g. Writting two bytes (1\0) to /proc/sys/net/ipv4/conf/eth0/rp_filter.
 write(fd, 1\0, 2) return EINVAL.

 Reading through kernel/sysctl.c it looks like you are allowing
 1\01 to be used to pass two integers or two longs.
 This is not what you describe as target of your patch.
1\01 actually is 1\1, So either of them will fail.

 Parameter tr returned from proc_get_long should be checked in
 __do_proc_dointvec,
 __do_proc_doulongvec_minmax.

 Best regards

 Heinrich Schuchardt


 Signed-off-by: Sean Fu fxinr...@gmail.com
 ---
  kernel/sysctl.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

 diff --git a/kernel/sysctl.c b/kernel/sysctl.c
 index 19b62b5..c2b0594 100644
 --- a/kernel/sysctl.c
 +++ b/kernel/sysctl.c
 @@ -2004,7 +2004,7 @@ static int do_proc_dointvec_conv(bool *negp,
 unsigned long *lvalp,
 return 0;
  }

 -static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
 +static const char proc_wspace_sep[] = { ' ', '\t', '\n', '\0' };

  static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
   int write, void __user *buffer,

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl.c: If count including the terminating byte '\0' the write system call should retrun success.

2015-08-26 Thread Sean Fu
On Thu, Aug 27, 2015 at 4:36 AM, Steven Rostedt rost...@goodmis.org wrote:
 On Wed, 26 Aug 2015 23:48:01 +0800
 Sean Fu fxinr...@gmail.com wrote:


  Defending the patch, I can't imagine any user space code expecting the
  current behavior. The current behavior is that if you write 1\0 it
  will error out instead of accepting the 1. I can't come up with a
  scenario that would require userspace to expect 1\0 to fail. Can you?
 Thanks

 Although, with the current patch, would 1\02 fail? It should.
Yes, 1\02 is equal to 1\2(count=2) or 1\2\0(count=3), So it should fail.

code
len = write(fd, 1\0\2, 3);

strace execute result:
write(3, 1\2\0, 3)= -1 EINVAL (Invalid argument)


 -- Steve
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl.c: If "count" including the terminating byte '\0' the write system call should retrun success.

2015-08-25 Thread Sean Fu
On Tue, Aug 25, 2015 at 10:15 PM, Steven Rostedt  wrote:
> On Tue, 25 Aug 2015 15:50:18 +0800
> Sean Fu  wrote:
>
>> On Tue, Aug 25, 2015 at 10:24 AM, Eric W. Biederman
>>  wrote:
>> >
>> >
>> > On August 24, 2015 6:57:57 PM MDT, Sean Fu  wrote:
>> >>An application from HuaWei which works fine on 2.6 encounters this
>> >>issue on 3.0 or later kernel.
>> >
>> > My sympathies.  Being stuck with a 3rd party application you can barely 
>> > talk about that has been broken for 5years and no one reported it.
>> >
>> > Ordinarily we would fix a regression like this. As it has been 5years the 
>> > challenge now is how do we tell if there are applications that depend on 
>> > the current behavior.
>> >
>> > Before we can change the behavior back we need a convincing argument that 
>> > we won't cause a regression in another application by making the change.
>> >
>> > I do not see how such an argument can be made.  So you have my sympathies 
>> > but I do not see how we can help you.
>> We should consider this patch basing on my following arguments.
>> 1 Different version kernel should keep consistent on this behavior.
>
> The thing is, the above argument is against the patch. The behavior
> changed 2 years ago, and nobody noticed. Changing it back only causes
> more inconsistent behavior.
It is impossible to cause more inconsistent behavior.
it just enhance compatibility(support "xx...x\0").
This patch just modify "proc_wspace_sep" array. and "proc_wspace_sep" is static.
Only "proc_get_long" used this array, "proc_get_long" is also static.
There are only 4 place to call "proc_get_long" in kernel/sysctl.c.
I will prove that these 4 callers have no bad impact later.

>
>
>> 2 This writting behavior on proc file should be same with writting on
>> regular file as possible as we can.
>
> Writing to a proc file causes kernel actions. Writing to a regular file
> just saves data. That's not an argument here.
>
>> 3 This patch does not have any potential compatibility risk with 3rd
>> party application.
>
> How do you know that?
I will prove that all other write usage is not impacted later.

Thanks for all reply.

Sean
>
> -- Steve
>
>> 4 Support writting "1...\0" to proc file.
>>
>> >
>> > Eric
>> >
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl.c: If "count" including the terminating byte '\0' the write system call should retrun success.

2015-08-25 Thread Sean Fu
On Tue, Aug 25, 2015 at 10:24 AM, Eric W. Biederman
 wrote:
>
>
> On August 24, 2015 6:57:57 PM MDT, Sean Fu  wrote:
>>An application from HuaWei which works fine on 2.6 encounters this
>>issue on 3.0 or later kernel.
>
> My sympathies.  Being stuck with a 3rd party application you can barely talk 
> about that has been broken for 5years and no one reported it.
>
> Ordinarily we would fix a regression like this. As it has been 5years the 
> challenge now is how do we tell if there are applications that depend on the 
> current behavior.
>
> Before we can change the behavior back we need a convincing argument that we 
> won't cause a regression in another application by making the change.
>
> I do not see how such an argument can be made.  So you have my sympathies but 
> I do not see how we can help you.
We should consider this patch basing on my following arguments.
1 Different version kernel should keep consistent on this behavior.
2 This writting behavior on proc file should be same with writting on
regular file as possible as we can.
3 This patch does not have any potential compatibility risk with 3rd
party application.
4 Support writting "1...\0" to proc file.

>
> Eric
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl.c: If count including the terminating byte '\0' the write system call should retrun success.

2015-08-25 Thread Sean Fu
On Tue, Aug 25, 2015 at 10:15 PM, Steven Rostedt rost...@goodmis.org wrote:
 On Tue, 25 Aug 2015 15:50:18 +0800
 Sean Fu fxinr...@gmail.com wrote:

 On Tue, Aug 25, 2015 at 10:24 AM, Eric W. Biederman
 ebied...@xmission.com wrote:
 
 
  On August 24, 2015 6:57:57 PM MDT, Sean Fu fxinr...@gmail.com wrote:
 An application from HuaWei which works fine on 2.6 encounters this
 issue on 3.0 or later kernel.
 
  My sympathies.  Being stuck with a 3rd party application you can barely 
  talk about that has been broken for 5years and no one reported it.
 
  Ordinarily we would fix a regression like this. As it has been 5years the 
  challenge now is how do we tell if there are applications that depend on 
  the current behavior.
 
  Before we can change the behavior back we need a convincing argument that 
  we won't cause a regression in another application by making the change.
 
  I do not see how such an argument can be made.  So you have my sympathies 
  but I do not see how we can help you.
 We should consider this patch basing on my following arguments.
 1 Different version kernel should keep consistent on this behavior.

 The thing is, the above argument is against the patch. The behavior
 changed 2 years ago, and nobody noticed. Changing it back only causes
 more inconsistent behavior.
It is impossible to cause more inconsistent behavior.
it just enhance compatibility(support xx...x\0).
This patch just modify proc_wspace_sep array. and proc_wspace_sep is static.
Only proc_get_long used this array, proc_get_long is also static.
There are only 4 place to call proc_get_long in kernel/sysctl.c.
I will prove that these 4 callers have no bad impact later.



 2 This writting behavior on proc file should be same with writting on
 regular file as possible as we can.

 Writing to a proc file causes kernel actions. Writing to a regular file
 just saves data. That's not an argument here.

 3 This patch does not have any potential compatibility risk with 3rd
 party application.

 How do you know that?
I will prove that all other write usage is not impacted later.

Thanks for all reply.

Sean

 -- Steve

 4 Support writting 1...\0 to proc file.

 
  Eric
 

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl.c: If count including the terminating byte '\0' the write system call should retrun success.

2015-08-25 Thread Sean Fu
On Tue, Aug 25, 2015 at 10:24 AM, Eric W. Biederman
ebied...@xmission.com wrote:


 On August 24, 2015 6:57:57 PM MDT, Sean Fu fxinr...@gmail.com wrote:
An application from HuaWei which works fine on 2.6 encounters this
issue on 3.0 or later kernel.

 My sympathies.  Being stuck with a 3rd party application you can barely talk 
 about that has been broken for 5years and no one reported it.

 Ordinarily we would fix a regression like this. As it has been 5years the 
 challenge now is how do we tell if there are applications that depend on the 
 current behavior.

 Before we can change the behavior back we need a convincing argument that we 
 won't cause a regression in another application by making the change.

 I do not see how such an argument can be made.  So you have my sympathies but 
 I do not see how we can help you.
We should consider this patch basing on my following arguments.
1 Different version kernel should keep consistent on this behavior.
2 This writting behavior on proc file should be same with writting on
regular file as possible as we can.
3 This patch does not have any potential compatibility risk with 3rd
party application.
4 Support writting 1...\0 to proc file.


 Eric

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl.c: If "count" including the terminating byte '\0' the write system call should retrun success.

2015-08-24 Thread Sean Fu
Call path is 
"proc_dointvec-->do_proc_dointvec-->__do_proc_dointvec-->proc_get_long".
file: kernel/sysctl.c
function: proc_get_long
...
1927 if (len < *size && perm_tr_len && !memchr(perm_tr, *p,
perm_tr_len))   //this line should accept two bytes
"1\0".
1928 return -EINVAL;
...


The latest upstream kernel is also tested, and it is same as 3.16.7 kernel.

3.16.7 kernel:
sean@linux-dunz:~/work/suse_lab/proc_test> cat /proc/version
Linux version 3.16.7-7-desktop (geeko@buildhost) (gcc version 4.8.3
20140627 [gcc-4_8-branch revision 212064] (SUSE Linux) ) #1 SMP
PREEMPT Wed Dec 17 18:00:44 UTC 2014 (762f27a)
sean@linux-dunz:~/work/suse_lab/proc_test> gcc ./proc_test.c -o proc_test
sean@linux-dunz:~/work/suse_lab/proc_test> sudo ./proc_test enp0s25
Input: ./proc_test, enp0s25
file is: /proc/sys/net/ipv4/conf/enp0s25/rp_filter.
open /proc/sys/net/ipv4/conf/enp0s25/rp_filter ok, fd=3
write 3: len=-1, errno=22, Invalid argument

2.6.16.60 kernel:
linux-8lij:~ # gcc ./proc_test.c -o ./proc_test
linux-8lij:~ # cat /proc/version
Linux version 2.6.16.60-0.83.2-bigsmp (geeko@buildhost) (gcc version
4.1.2 20070115 (SUSE Linux)) #1 SMP Fri Sep 2 13:49:16 UTC 2011
linux-8lij:~ # gcc ./proc_test.c -o ./proc_test
linux-8lij:~ # ./proc_test eth7
Input: ./proc_test, eth7
file is: /proc/sys/net/ipv4/conf/eth7/rp_filter.
open /proc/sys/net/ipv4/conf/eth7/rp_filter ok, fd=3
write 3: len=1, errno=0, Success

On Tue, Aug 25, 2015 at 8:57 AM, Sean Fu  wrote:
> An application from HuaWei which works fine on 2.6 encounters this
> issue on 3.0 or later kernel.
>
> Test code:
>
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
>
> #define MAXLEN (100)
>
> int main(int argc, char** argv)
> {
> int fd = 0;
> int len = 0;
> char pathname[MAXLEN] = {0};
> char buf[2] = {0};
> int ret = 0xF;
> int value = 0xF;
>
> if (argc < 2)
> {
> printf("Input param error, less 2 param: %s, %s.\n",
> argv[0], argv[1]);
> return 1;
> }
>
> printf("Input: %s, %s \n", argv[0], argv[1]);
>
> ret = sprintf(pathname,
> "/proc/sys/net/ipv4/conf/%s/rp_filter", argv[1]);
> if (ret < 0)
> printf("sprintf error, errno %d, %s\n", errno, 
> strerror(errno));
> printf("file is: %s. \n", pathname);
>
> fd = open(pathname, O_RDWR, S_IRUSR);
> if (fd <=0 )
> {
> printf("open %s failed, errno=%d, %s.\n", pathname,
> errno, strerror(errno));
> return 1;
> }
> printf("open %s ok, fd=%d\n", pathname, fd);
>
> len = write(fd, "0\0", 2);
> printf("write %d: len=%d, errno=%d, %s\n", fd, len, errno,
> strerror(errno));
>
> close(fd);
> return 0;
> }
>
> On Tue, Aug 25, 2015 at 12:59 AM, Steven Rostedt  wrote:
>> On Mon, 24 Aug 2015 16:56:13 +0800
>> Sean Fu  wrote:
>>
>>> when the input argument "count" including the terminating byte "\0",
>>> The write system call return EINVAL on proc file.
>>> But it return success on regular file.
>>>
>>> E.g. Writting two bytes ("1\0") to "/proc/sys/net/ipv4/conf/eth0/rp_filter".
>>> write(fd, "1\0", 2) return EINVAL.
>>
>> And what would do that? What tool broke because of this?
>>
>>  echo 1 > /proc/sys/net/ipv4/conf/eth0/rp_filter
>>
>> works just fine. strlen("string") would not include the nul character.
>> The only thing I could think of would be a sizeof(str), but then that
>> would include someone hardcoding an integer in a string, like:
>>
>> char val[] = "1"
>>
>> write(fd, val, sizeof(val));
>>
>> Again, what tool does that?
>>
>> If there is a tool out in the wild that use to work on 2.6 (and was
>> running on 2.6 then, and not something that was created after that
>> change), then we can consider this fix.
>>
>> -- Steve
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl.c: If "count" including the terminating byte '\0' the write system call should retrun success.

2015-08-24 Thread Sean Fu
An application from HuaWei which works fine on 2.6 encounters this
issue on 3.0 or later kernel.

Test code:

#include 
#include 
#include 
#include 
#include 
#include 
#include 

#define MAXLEN (100)

int main(int argc, char** argv)
{
int fd = 0;
int len = 0;
char pathname[MAXLEN] = {0};
char buf[2] = {0};
int ret = 0xF;
int value = 0xF;

if (argc < 2)
{
printf("Input param error, less 2 param: %s, %s.\n",
argv[0], argv[1]);
return 1;
}

printf("Input: %s, %s \n", argv[0], argv[1]);

ret = sprintf(pathname,
"/proc/sys/net/ipv4/conf/%s/rp_filter", argv[1]);
if (ret < 0)
printf("sprintf error, errno %d, %s\n", errno, strerror(errno));
printf("file is: %s. \n", pathname);

fd = open(pathname, O_RDWR, S_IRUSR);
if (fd <=0 )
{
printf("open %s failed, errno=%d, %s.\n", pathname,
errno, strerror(errno));
return 1;
}
printf("open %s ok, fd=%d\n", pathname, fd);

len = write(fd, "0\0", 2);
printf("write %d: len=%d, errno=%d, %s\n", fd, len, errno,
strerror(errno));

close(fd);
return 0;
}

On Tue, Aug 25, 2015 at 12:59 AM, Steven Rostedt  wrote:
> On Mon, 24 Aug 2015 16:56:13 +0800
> Sean Fu  wrote:
>
>> when the input argument "count" including the terminating byte "\0",
>> The write system call return EINVAL on proc file.
>> But it return success on regular file.
>>
>> E.g. Writting two bytes ("1\0") to "/proc/sys/net/ipv4/conf/eth0/rp_filter".
>> write(fd, "1\0", 2) return EINVAL.
>
> And what would do that? What tool broke because of this?
>
>  echo 1 > /proc/sys/net/ipv4/conf/eth0/rp_filter
>
> works just fine. strlen("string") would not include the nul character.
> The only thing I could think of would be a sizeof(str), but then that
> would include someone hardcoding an integer in a string, like:
>
> char val[] = "1"
>
> write(fd, val, sizeof(val));
>
> Again, what tool does that?
>
> If there is a tool out in the wild that use to work on 2.6 (and was
> running on 2.6 then, and not something that was created after that
> change), then we can consider this fix.
>
> -- Steve
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl.c: If "count" including the terminating byte '\0' the write system call should retrun success.

2015-08-24 Thread Sean Fu
On Mon, Aug 24, 2015 at 8:27 PM, Eric W. Biederman
 wrote:
>
>
> On August 24, 2015 1:56:13 AM PDT, Sean Fu  wrote:
>>when the input argument "count" including the terminating byte "\0",
>>The write system call return EINVAL on proc file.
>>But it return success on regular file.
>
> Nonsense.  It will write the '\0' to a regular file because it is just data.
>
> Integers in proc are more than data.
>
> So I see no justification for this change.
In fact, "write(fd, "1\0", 2)" on Integers proc file return success on
2.6 kernel. I already tested it on 2.6.6.60 kernel.

So, The latest behavior of "write(fd, "1\0", 2)" is different from old
kernel(2.6).
This maybe impact the compatibility of some user space program.
>
>
> Eric
>
>>E.g. Writting two bytes ("1\0") to
>>"/proc/sys/net/ipv4/conf/eth0/rp_filter".
>>write(fd, "1\0", 2) return EINVAL.
>>
>>Signed-off-by: Sean Fu 
>>---
>> kernel/sysctl.c | 2 +-
>> 1 file changed, 1 insertion(+), 1 deletion(-)
>>
>>diff --git a/kernel/sysctl.c b/kernel/sysctl.c
>>index 19b62b5..c2b0594 100644
>>--- a/kernel/sysctl.c
>>+++ b/kernel/sysctl.c
>>@@ -2004,7 +2004,7 @@ static int do_proc_dointvec_conv(bool *negp,
>>unsigned long *lvalp,
>>return 0;
>> }
>>
>>-static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
>>+static const char proc_wspace_sep[] = { ' ', '\t', '\n', '\0' };
>>
>> static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
>>  int write, void __user *buffer,
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] kernel/sysctl.c: If "count" including the terminating byte '\0' the write system call should retrun success.

2015-08-24 Thread Sean Fu
when the input argument "count" including the terminating byte "\0",
The write system call return EINVAL on proc file.
But it return success on regular file.

E.g. Writting two bytes ("1\0") to "/proc/sys/net/ipv4/conf/eth0/rp_filter".
write(fd, "1\0", 2) return EINVAL.

Signed-off-by: Sean Fu 
---
 kernel/sysctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 19b62b5..c2b0594 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2004,7 +2004,7 @@ static int do_proc_dointvec_conv(bool *negp,
unsigned long *lvalp,
return 0;
 }

-static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
+static const char proc_wspace_sep[] = { ' ', '\t', '\n', '\0' };

 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
  int write, void __user *buffer,
-- 
2.1.2
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl.c: If count including the terminating byte '\0' the write system call should retrun success.

2015-08-24 Thread Sean Fu
On Mon, Aug 24, 2015 at 8:27 PM, Eric W. Biederman
ebied...@xmission.com wrote:


 On August 24, 2015 1:56:13 AM PDT, Sean Fu fxinr...@gmail.com wrote:
when the input argument count including the terminating byte \0,
The write system call return EINVAL on proc file.
But it return success on regular file.

 Nonsense.  It will write the '\0' to a regular file because it is just data.

 Integers in proc are more than data.

 So I see no justification for this change.
In fact, write(fd, 1\0, 2) on Integers proc file return success on
2.6 kernel. I already tested it on 2.6.6.60 kernel.

So, The latest behavior of write(fd, 1\0, 2) is different from old
kernel(2.6).
This maybe impact the compatibility of some user space program.


 Eric

E.g. Writting two bytes (1\0) to
/proc/sys/net/ipv4/conf/eth0/rp_filter.
write(fd, 1\0, 2) return EINVAL.

Signed-off-by: Sean Fu fxinr...@gmail.com
---
 kernel/sysctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 19b62b5..c2b0594 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2004,7 +2004,7 @@ static int do_proc_dointvec_conv(bool *negp,
unsigned long *lvalp,
return 0;
 }

-static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
+static const char proc_wspace_sep[] = { ' ', '\t', '\n', '\0' };

 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
  int write, void __user *buffer,

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] kernel/sysctl.c: If count including the terminating byte '\0' the write system call should retrun success.

2015-08-24 Thread Sean Fu
when the input argument count including the terminating byte \0,
The write system call return EINVAL on proc file.
But it return success on regular file.

E.g. Writting two bytes (1\0) to /proc/sys/net/ipv4/conf/eth0/rp_filter.
write(fd, 1\0, 2) return EINVAL.

Signed-off-by: Sean Fu fxinr...@gmail.com
---
 kernel/sysctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 19b62b5..c2b0594 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2004,7 +2004,7 @@ static int do_proc_dointvec_conv(bool *negp,
unsigned long *lvalp,
return 0;
 }

-static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
+static const char proc_wspace_sep[] = { ' ', '\t', '\n', '\0' };

 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
  int write, void __user *buffer,
-- 
2.1.2
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl.c: If count including the terminating byte '\0' the write system call should retrun success.

2015-08-24 Thread Sean Fu
An application from HuaWei which works fine on 2.6 encounters this
issue on 3.0 or later kernel.

Test code:

#include sys/types.h
#include sys/stat.h
#include fcntl.h
#include unistd.h
#include stdio.h
#include string.h
#include errno.h

#define MAXLEN (100)

int main(int argc, char** argv)
{
int fd = 0;
int len = 0;
char pathname[MAXLEN] = {0};
char buf[2] = {0};
int ret = 0xF;
int value = 0xF;

if (argc  2)
{
printf(Input param error, less 2 param: %s, %s.\n,
argv[0], argv[1]);
return 1;
}

printf(Input: %s, %s \n, argv[0], argv[1]);

ret = sprintf(pathname,
/proc/sys/net/ipv4/conf/%s/rp_filter, argv[1]);
if (ret  0)
printf(sprintf error, errno %d, %s\n, errno, strerror(errno));
printf(file is: %s. \n, pathname);

fd = open(pathname, O_RDWR, S_IRUSR);
if (fd =0 )
{
printf(open %s failed, errno=%d, %s.\n, pathname,
errno, strerror(errno));
return 1;
}
printf(open %s ok, fd=%d\n, pathname, fd);

len = write(fd, 0\0, 2);
printf(write %d: len=%d, errno=%d, %s\n, fd, len, errno,
strerror(errno));

close(fd);
return 0;
}

On Tue, Aug 25, 2015 at 12:59 AM, Steven Rostedt rost...@goodmis.org wrote:
 On Mon, 24 Aug 2015 16:56:13 +0800
 Sean Fu fxinr...@gmail.com wrote:

 when the input argument count including the terminating byte \0,
 The write system call return EINVAL on proc file.
 But it return success on regular file.

 E.g. Writting two bytes (1\0) to /proc/sys/net/ipv4/conf/eth0/rp_filter.
 write(fd, 1\0, 2) return EINVAL.

 And what would do that? What tool broke because of this?

  echo 1  /proc/sys/net/ipv4/conf/eth0/rp_filter

 works just fine. strlen(string) would not include the nul character.
 The only thing I could think of would be a sizeof(str), but then that
 would include someone hardcoding an integer in a string, like:

 char val[] = 1

 write(fd, val, sizeof(val));

 Again, what tool does that?

 If there is a tool out in the wild that use to work on 2.6 (and was
 running on 2.6 then, and not something that was created after that
 change), then we can consider this fix.

 -- Steve
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl.c: If count including the terminating byte '\0' the write system call should retrun success.

2015-08-24 Thread Sean Fu
Call path is 
proc_dointvec--do_proc_dointvec--__do_proc_dointvec--proc_get_long.
file: kernel/sysctl.c
function: proc_get_long
...
1927 if (len  *size  perm_tr_len  !memchr(perm_tr, *p,
perm_tr_len))   //this line should accept two bytes
1\0.
1928 return -EINVAL;
...


The latest upstream kernel is also tested, and it is same as 3.16.7 kernel.

3.16.7 kernel:
sean@linux-dunz:~/work/suse_lab/proc_test cat /proc/version
Linux version 3.16.7-7-desktop (geeko@buildhost) (gcc version 4.8.3
20140627 [gcc-4_8-branch revision 212064] (SUSE Linux) ) #1 SMP
PREEMPT Wed Dec 17 18:00:44 UTC 2014 (762f27a)
sean@linux-dunz:~/work/suse_lab/proc_test gcc ./proc_test.c -o proc_test
sean@linux-dunz:~/work/suse_lab/proc_test sudo ./proc_test enp0s25
Input: ./proc_test, enp0s25
file is: /proc/sys/net/ipv4/conf/enp0s25/rp_filter.
open /proc/sys/net/ipv4/conf/enp0s25/rp_filter ok, fd=3
write 3: len=-1, errno=22, Invalid argument

2.6.16.60 kernel:
linux-8lij:~ # gcc ./proc_test.c -o ./proc_test
linux-8lij:~ # cat /proc/version
Linux version 2.6.16.60-0.83.2-bigsmp (geeko@buildhost) (gcc version
4.1.2 20070115 (SUSE Linux)) #1 SMP Fri Sep 2 13:49:16 UTC 2011
linux-8lij:~ # gcc ./proc_test.c -o ./proc_test
linux-8lij:~ # ./proc_test eth7
Input: ./proc_test, eth7
file is: /proc/sys/net/ipv4/conf/eth7/rp_filter.
open /proc/sys/net/ipv4/conf/eth7/rp_filter ok, fd=3
write 3: len=1, errno=0, Success

On Tue, Aug 25, 2015 at 8:57 AM, Sean Fu fxinr...@gmail.com wrote:
 An application from HuaWei which works fine on 2.6 encounters this
 issue on 3.0 or later kernel.

 Test code:

 #include sys/types.h
 #include sys/stat.h
 #include fcntl.h
 #include unistd.h
 #include stdio.h
 #include string.h
 #include errno.h

 #define MAXLEN (100)

 int main(int argc, char** argv)
 {
 int fd = 0;
 int len = 0;
 char pathname[MAXLEN] = {0};
 char buf[2] = {0};
 int ret = 0xF;
 int value = 0xF;

 if (argc  2)
 {
 printf(Input param error, less 2 param: %s, %s.\n,
 argv[0], argv[1]);
 return 1;
 }

 printf(Input: %s, %s \n, argv[0], argv[1]);

 ret = sprintf(pathname,
 /proc/sys/net/ipv4/conf/%s/rp_filter, argv[1]);
 if (ret  0)
 printf(sprintf error, errno %d, %s\n, errno, 
 strerror(errno));
 printf(file is: %s. \n, pathname);

 fd = open(pathname, O_RDWR, S_IRUSR);
 if (fd =0 )
 {
 printf(open %s failed, errno=%d, %s.\n, pathname,
 errno, strerror(errno));
 return 1;
 }
 printf(open %s ok, fd=%d\n, pathname, fd);

 len = write(fd, 0\0, 2);
 printf(write %d: len=%d, errno=%d, %s\n, fd, len, errno,
 strerror(errno));

 close(fd);
 return 0;
 }

 On Tue, Aug 25, 2015 at 12:59 AM, Steven Rostedt rost...@goodmis.org wrote:
 On Mon, 24 Aug 2015 16:56:13 +0800
 Sean Fu fxinr...@gmail.com wrote:

 when the input argument count including the terminating byte \0,
 The write system call return EINVAL on proc file.
 But it return success on regular file.

 E.g. Writting two bytes (1\0) to /proc/sys/net/ipv4/conf/eth0/rp_filter.
 write(fd, 1\0, 2) return EINVAL.

 And what would do that? What tool broke because of this?

  echo 1  /proc/sys/net/ipv4/conf/eth0/rp_filter

 works just fine. strlen(string) would not include the nul character.
 The only thing I could think of would be a sizeof(str), but then that
 would include someone hardcoding an integer in a string, like:

 char val[] = 1

 write(fd, val, sizeof(val));

 Again, what tool does that?

 If there is a tool out in the wild that use to work on 2.6 (and was
 running on 2.6 then, and not something that was created after that
 change), then we can consider this fix.

 -- Steve
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


  1   2   >