[PATCH v2] vhost: remove unused lock check flag in vhost_dev_cleanup()

2018-01-05 Thread (Caspar)
From: Caspar Zhang 

In commit ea5d404655ba ("vhost: fix release path lockdep checks"),
Michael added a flag to check whether we should hold a lock in
vhost_dev_cleanup(), however, in commit 47283bef7ed3 ("vhost: move
memory pointer to VQs"), RCU operations have been replaced by
mutex, we can remove the no-longer-used `locked' parameter now.

Signed-off-by: Caspar Zhang 
Acked-by: Jason Wang 
---
v1->v2: fix author name with UTF-8 characters.

 drivers/vhost/net.c   | 2 +-
 drivers/vhost/scsi.c  | 2 +-
 drivers/vhost/test.c  | 2 +-
 drivers/vhost/vhost.c | 5 ++---
 drivers/vhost/vhost.h | 2 +-
 drivers/vhost/vsock.c | 2 +-
 6 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index c7bdeb655646..a354d8d731e3 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -996,7 +996,7 @@ static int vhost_net_release(struct inode *inode, struct 
file *f)
vhost_net_stop(n, &tx_sock, &rx_sock);
vhost_net_flush(n);
vhost_dev_stop(&n->dev);
-   vhost_dev_cleanup(&n->dev, false);
+   vhost_dev_cleanup(&n->dev);
vhost_net_vq_reset(n);
if (tx_sock)
sockfd_put(tx_sock);
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 71517b3c5558..797d08916553 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -1420,7 +1420,7 @@ static int vhost_scsi_release(struct inode *inode, struct 
file *f)
mutex_unlock(&vs->dev.mutex);
vhost_scsi_clear_endpoint(vs, &t);
vhost_dev_stop(&vs->dev);
-   vhost_dev_cleanup(&vs->dev, false);
+   vhost_dev_cleanup(&vs->dev);
/* Jobs can re-queue themselves in evt kick handler. Do extra flush. */
vhost_scsi_flush(vs);
kfree(vs->dev.vqs);
diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
index 3cc98c07dcd3..906b8f0f19f7 100644
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c
@@ -157,7 +157,7 @@ static int vhost_test_release(struct inode *inode, struct 
file *f)
 
vhost_test_stop(n, &private);
vhost_test_flush(n);
-   vhost_dev_cleanup(&n->dev, false);
+   vhost_dev_cleanup(&n->dev);
/* We do an extra flush before freeing memory,
 * since jobs can re-queue themselves. */
vhost_test_flush(n);
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 33ac2b186b85..014675c3d569 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -544,7 +544,7 @@ void vhost_dev_reset_owner(struct vhost_dev *dev, struct 
vhost_umem *umem)
 {
int i;
 
-   vhost_dev_cleanup(dev, true);
+   vhost_dev_cleanup(dev);
 
/* Restore memory to default empty mapping. */
INIT_LIST_HEAD(&umem->umem_list);
@@ -611,8 +611,7 @@ static void vhost_clear_msg(struct vhost_dev *dev)
spin_unlock(&dev->iotlb_lock);
 }
 
-/* Caller should have device mutex if and only if locked is set */
-void vhost_dev_cleanup(struct vhost_dev *dev, bool locked)
+void vhost_dev_cleanup(struct vhost_dev *dev)
 {
int i;
 
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 79c6e7a60a5e..ff4d918e3e0a 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -181,7 +181,7 @@ bool vhost_dev_has_owner(struct vhost_dev *dev);
 long vhost_dev_check_owner(struct vhost_dev *);
 struct vhost_umem *vhost_dev_reset_owner_prepare(void);
 void vhost_dev_reset_owner(struct vhost_dev *, struct vhost_umem *);
-void vhost_dev_cleanup(struct vhost_dev *, bool locked);
+void vhost_dev_cleanup(struct vhost_dev *);
 void vhost_dev_stop(struct vhost_dev *);
 long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user 
*argp);
 long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp);
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 5a5e981bd8e4..0d14e2ff19f1 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -599,7 +599,7 @@ static int vhost_vsock_dev_release(struct inode *inode, 
struct file *file)
}
spin_unlock_bh(&vsock->send_pkt_list_lock);
 
-   vhost_dev_cleanup(&vsock->dev, false);
+   vhost_dev_cleanup(&vsock->dev);
kfree(vsock->dev.vqs);
vhost_vsock_free(vsock);
return 0;
-- 
2.15.1



[PATCH v2] mm/fadvise: discard partial page if endbyte is also EOF

2018-01-04 Thread (Caspar)
From: shidao.ytt 

During our recent testing with fadvise(FADV_DONTNEED), we find that if
given offset/length is not page-aligned, the last page will not be
discarded. The tool we use is vmtouch (https://hoytech.com/vmtouch/), we
map a 10KB-sized file into memory and then try to run this tool to evict
the whole file mapping, but the last single page always remains staying
in the memory:

$./vmtouch -e test_10K
   Files: 1
 Directories: 0
   Evicted Pages: 3 (12K)
 Elapsed: 2.1e-05 seconds

$./vmtouch test_10K
   Files: 1
 Directories: 0
  Resident Pages: 1/3  4K/12K  33.3%
 Elapsed: 5.5e-05 seconds

However when we test with an older kernel, say 3.10, this problem is
gone. So we wonder if this is a regression:

$./vmtouch -e test_10K
   Files: 1
 Directories: 0
   Evicted Pages: 3 (12K)
 Elapsed: 8.2e-05 seconds

$./vmtouch test_10K
   Files: 1
 Directories: 0
  Resident Pages: 0/3  0/12K  0%  <-- partial page also discarded
 Elapsed: 5e-05 seconds

After digging a little bit into this problem, we find it seems not a
regression. Not discarding partial page is likely to be on purpose
according to commit 441c228f817f7 ("mm: fadvise: document the
fadvise(FADV_DONTNEED) behaviour for partial pages") written by
Mel Gorman. He explained why partial pages should be preserved instead
of being discarded when using fadvise(FADV_DONTNEED). However, the
interesting part is that the actual code did NOT work as the same as it
was described, the partial page was still discarded anyway, due to a
calculation mistake of `end_index' passed to invalidate_mapping_pages().
This mistake has not been fixed until recently, that's why we fail to
reproduce our problem in old kernels. The fix is done in commit
18aba41cbf ("mm/fadvise.c: do not discard partial pages with
POSIX_FADV_DONTNEED") by Oleg Drokin.

Back to the original testing, our problem becomes that there is a
speical case that, if the page-unaligned `endbyte' is also the end
of file, it is not necessary at all to preserve the last partial page,
as we all know no one else will use the rest of it. It should be safe
enough if we just discard the whole page. So we add an EOF check in this
patch.

We also find a poosbile real world issue in mainline kernel. Assume such
scenario: A userspace backup application want to backup a huge amount of
small files (<4k) at once, the developer might (I guess) want to use
fadvise(FADV_DONTNEED) to save memory. However, FADV_DONTNEED won't
really happen since the only page mapped is a partial page, and kernel
will preserve it. Our patch also fixes this problem, since we know the
endbyte is EOF, so we discard it.

Here is a simple reproducer to reproduce and verify each scenario we
described above:

  test_fadvise.c
  ==
  #include 
  #include 
  #include 
  #include 
  #include 
  #include 
  #include 

  int main(int argc, char **argv)
  {
int i, fd, ret, len;
struct stat buf;
void *addr;
unsigned char *vec;
char *strbuf;
ssize_t pagesize = getpagesize();
ssize_t filesize;

fd = open(argv[1], O_RDWR|O_CREAT, S_IRUSR|S_IWUSR);
if (fd < 0)
return -1;
filesize = strtoul(argv[2], NULL, 10);

strbuf = malloc(filesize);
memset(strbuf, 42, filesize);
write(fd, strbuf, filesize);
free(strbuf);
fsync(fd);

len = (filesize + pagesize - 1) / pagesize;
printf("length of pages: %d\n", len);

addr = mmap(NULL, filesize, PROT_READ, MAP_SHARED, fd, 0);
if (addr == MAP_FAILED)
return -1;

ret = posix_fadvise(fd, 0, filesize, POSIX_FADV_DONTNEED);
if (ret < 0)
return -1;

vec = malloc(len);
ret = mincore(addr, filesize, (void *)vec);
if (ret < 0)
return -1;

for (i = 0; i < len; i++)
printf("pages[%d]: %x\n", i, vec[i] & 0x1);

free(vec);
close(fd);

return 0;
  }
  ==

Test 1: running on kernel with commit 18aba41cbf reverted:

[root@caspar ~]# uname -r
4.15.0-rc6.revert+
[root@caspar ~]# ./test_fadvise file1 1024
length of pages: 1
pages[0]: 0# <-- partial page discarded
[root@caspar ~]# ./test_fadvise file2 8192
length of pages: 2
pages[0]: 0
pages[1]: 0
[root@caspar ~]# ./test_fadvise file3 10240
length of pages: 3
pages[0]: 0
pages[1]: 0
pages[2]: 0# <-- partial page discarded

Test 2: running on mainline kernel:

[root@caspar ~]# uname -r
4.15.0-rc6+
[root@caspar ~]# ./test_fadvise test1 1024
length of pages: 1
pages[0]: 1# <-- partial and the only page not discarded
[root@caspar ~]# ./test_fadvise test2 8192
length of pages: 2
pages[0]: 0
pages[1]: 0
[root@caspar ~]# ./test_fadvise test3 10240
length of pages: 3
pages[0]: 0
pages[1]: 0
pages[2]: 1# <-- partial page not discarded

Test 3: running on ker

Re: [PATCH] mm/fadvise: discard partial pages iff endbyte is also eof

2018-01-04 Thread (Caspar)



On 2018/1/4 19:34, Mel Gorman wrote:

On Thu, Jan 04, 2018 at 02:13:43PM +0800, ??(Caspar) wrote:



On 2018/1/3 18:48, Mel Gorman wrote:

On Wed, Jan 03, 2018 at 02:53:43PM +0800, ??(Caspar) wrote:




?? 2017??12??2312:16??  ??

From: "shidao.ytt" 

in commit 441c228f817f7 ("mm: fadvise: document the
fadvise(FADV_DONTNEED) behaviour for partial pages") Mel Gorman
explained why partial pages should be preserved instead of discarded
when using fadvise(FADV_DONTNEED), however the actual codes to calcuate
end_index was unexpectedly wrong, the code behavior didn't match to the
statement in comments; Luckily in another commit 18aba41cbf
("mm/fadvise.c: do not discard partial pages with POSIX_FADV_DONTNEED")
Oleg Drokin fixed this behavior

Here I come up with a new idea that actually we can still discard the
last parital page iff the page-unaligned endbyte is also the end of
file, since no one else will use the rest of the page and it should be
safe enough to discard.


+akpm...

Hi Mel, Andrew:

Would you please take a look at this patch, to see if this proposal
is reasonable enough, thanks in advance!



I'm backlogged after being out for the Christmas. Superficially the patch
looks ok but I wondered how often it happened in practice as we already
would discard files smaller than a page on DONTNEED. It also requires


Actually, we would *not*. Let's look into the codes.



You're right of course. I suggest updating the changelog with what you
found and the test case. I think it's reasonable to special case the
discarding of partial pages if it's the end of a file with the potential
addendum of checking if the endbyte is past the end of the file. The man
page should also be updated.


Sure, will do and send out v2.

Thanks,
Caspar




Re: [PATCH] mm/fadvise: discard partial pages iff endbyte is also eof

2018-01-04 Thread (Caspar)



On 2018/1/4 08:17, Andrew Morton wrote:

On Wed, 3 Jan 2018 10:48:00 + Mel Gorman  
wrote:


On Wed, Jan 03, 2018 at 02:53:43PM +0800, ??(Caspar) wrote:




?? 2017??12??2312:16??  ??

From: "shidao.ytt" 

in commit 441c228f817f7 ("mm: fadvise: document the
fadvise(FADV_DONTNEED) behaviour for partial pages") Mel Gorman
explained why partial pages should be preserved instead of discarded
when using fadvise(FADV_DONTNEED), however the actual codes to calcuate
end_index was unexpectedly wrong, the code behavior didn't match to the
statement in comments; Luckily in another commit 18aba41cbf
("mm/fadvise.c: do not discard partial pages with POSIX_FADV_DONTNEED")
Oleg Drokin fixed this behavior

Here I come up with a new idea that actually we can still discard the
last parital page iff the page-unaligned endbyte is also the end of
file, since no one else will use the rest of the page and it should be
safe enough to discard.


+akpm...

Hi Mel, Andrew:

Would you please take a look at this patch, to see if this proposal
is reasonable enough, thanks in advance!



I'm backlogged after being out for the Christmas. Superficially the patch
looks ok but I wondered how often it happened in practice as we already
would discard files smaller than a page on DONTNEED. It also requires
that the system call get the exact size of the file correct and would not
discard if the off + len was past the end of the file for whatever reason
(e.g. a stat to read the size, a truncate in parallel and fadvise using
stale data from stat) and that's why the patch looked like it might have
no impact in practice. Is the patch known to help a real workload or is
it motivated by a code inspection?


The current whole-pages-only logic was introduced (accidentally, I
think) by yours truly when fixing a bug in the initial fadvise()
commit in 2003.

https://kernel.opensuse.org/cgit/kernel/commit/?h=v2.6.0-test4&id=7161ee20fea6e25a32feb91503ca2b7c7333c886

Namely:

: invalidate_mapping_pages() takes start/end, but fadvise is currently passing
: it start/len.
:
:
:
:  mm/fadvise.c |8 ++--
:  1 files changed, 6 insertions(+), 2 deletions(-)
:
: diff -puN mm/fadvise.c~fadvise-fix mm/fadvise.c
: --- 25/mm/fadvise.c~fadvise-fix   2003-08-14 18:16:12.0 -0700
: +++ 25-akpm/mm/fadvise.c  2003-08-14 18:16:12.0 -0700
: @@ -26,6 +26,8 @@ long sys_fadvise64(int fd, loff_t offset
:   struct inode *inode;
:   struct address_space *mapping;
:   struct backing_dev_info *bdi;
: + pgoff_t start_index;
: + pgoff_t end_index;
:   int ret = 0;
:
:   if (!file)
: @@ -65,8 +67,10 @@ long sys_fadvise64(int fd, loff_t offset
:   case POSIX_FADV_DONTNEED:
:   if (!bdi_write_congested(mapping->backing_dev_info))
:   filemap_flush(mapping);
: - invalidate_mapping_pages(mapping, offset >> PAGE_CACHE_SHIFT,
: - (len >> PAGE_CACHE_SHIFT) + 1);
: + start_index = offset >> PAGE_CACHE_SHIFT;
: + end_index = (offset + len + PAGE_CACHE_SIZE - 1) >>
: + PAGE_CACHE_SHIFT;
: + invalidate_mapping_pages(mapping, start_index, end_index);
:   break;
:   default:
:   ret = -EINVAL;
:

So I'm not sure that the whole "don't discard partial pages" thing is
well-founded and I see no reason why we cannot alter it.

So, thinking caps on: why not just discard them?  After all, that's
what userspace asked us to do.


Hi Andrew, I doubt if "just discard them" is a proper action to match 
the userspace's expectation. Maybe we will never meet the userspace's 
expectation since we are doing pages in kernel while userspace is 
passing bytes offset/length to the kernel. Note that Mel Gorman has 
already documented page-unaligned behaviors in posix_fadvise() man 
page[1] but obviously not all people (including /me) are able to read 
the _latest_ version, so someone might still uses the syscall with page 
unaligned offset/length. The userspace might only ask for discarding 
certain *bytes*, instead of *pages*.


And I think we need to look back first why we thought "preserved is 
better than discard". If we throw the whole page, the rest part of the 
page might still be required (consider the offset and length is in the 
middle of a file) because it's untagged:


  ...| PAGE --|...
  ...| DONTNEED |-- UNTAGGED -|...

but the page has gone, page fault occurs and we need to reload it from 
the disk -- performance degradation happens.


Maybe that's why we would rather preserv the whole page before.

But if we don't throw the partial page at all, and if the tail partial 
page is _exactly the end of the file_, a page that advised to be NONEED 
would be left in memory. And we all know that it is safe to throw it.


So we come up with this patch -- to keep the partial page not been 
throwing away, and add a special cas

Re: [PATCH] mm/fadvise: discard partial pages iff endbyte is also eof

2018-01-03 Thread (Caspar)



On 2018/1/4 14:13, 夷则(Caspar) wrote:


This patch is trying to help to solve a real issue. Sometimes we need to 
evict the whole file from page cache because we are sure it will not be 
used in the near future. We try to use posix_fadvise() to finish our 
work but we often see a "small tail" at the end of some files could not 
be evicted, after digging a little bit, we find those file sizes are not 
page-aligned and the "tail" turns out to be partial pages.


We fail to find a standard from posix_fadvise() manual page to subscribe 
the function behaviors if the `offset' and `len' params are not 


Oops, I find a 'standard' documented in latest man-pages.git[1], blame 
my centos7, it runs with an old man-pages.rpm :-(


Thanks,
Caspar

[1] 
https://git.kernel.org/pub/scm/docs/man-pages/man-pages.git/commit/?h=ceb1c326b9f3e863dfd9bf33bc7118bb1fa29bfc



page-aligned, then we go to kernel tree and see this:

     /*
  * First and last FULL page! Partial pages are deliberately
  * preserved on the expectation that it is better to preserve
  * needed memory than to discard unneeded memory.
  */


Re: [PATCH] mm/fadvise: discard partial pages iff endbyte is also eof

2018-01-03 Thread (Caspar)



On 2018/1/3 18:48, Mel Gorman wrote:

On Wed, Jan 03, 2018 at 02:53:43PM +0800, ??(Caspar) wrote:




?? 2017??12??2312:16??  ??

From: "shidao.ytt" 

in commit 441c228f817f7 ("mm: fadvise: document the
fadvise(FADV_DONTNEED) behaviour for partial pages") Mel Gorman
explained why partial pages should be preserved instead of discarded
when using fadvise(FADV_DONTNEED), however the actual codes to calcuate
end_index was unexpectedly wrong, the code behavior didn't match to the
statement in comments; Luckily in another commit 18aba41cbf
("mm/fadvise.c: do not discard partial pages with POSIX_FADV_DONTNEED")
Oleg Drokin fixed this behavior

Here I come up with a new idea that actually we can still discard the
last parital page iff the page-unaligned endbyte is also the end of
file, since no one else will use the rest of the page and it should be
safe enough to discard.


+akpm...

Hi Mel, Andrew:

Would you please take a look at this patch, to see if this proposal
is reasonable enough, thanks in advance!



I'm backlogged after being out for the Christmas. Superficially the patch
looks ok but I wondered how often it happened in practice as we already
would discard files smaller than a page on DONTNEED. It also requires


Actually, we would *not*. Let's look into the codes.

Clue 1: start_index is a round-up page-aligned addr and end_index is a 
round-down page-aligned addr, while offset & endbyte might be unaligned 
obviously (mm/fadvise.c):


  start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT;
  end_index = (endbyte >> PAGE_SHIFT);
  if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK) {
 /* First page is tricky as 0 - 1 = -1, but pgoff_t
  * is unsigned, so the end_index >= start_index
  * check below would be true and we'll discard the whole
  * file cache which is not what was asked.
  */
 if (end_index == 0)
break;

 end_index--;
  }

  if (end_index >= start_index) {
 
 count = invalidate_mapping_pages(mapping,
  start_index, end_index);
 


Clue 2: looking into invalidate_mapping_pages() definition in 
mm/truncate.c, we see the end_index is included:


* @end: the offset 'to' which to invalidate (inclusive)

Now we know:

+ if `offset' is an unaligned addr, the start partial page will not be 
discarded,
+ if `endbyte' is not aligned: behaviors before and after commit 
18aba41cbf ("mm/fadvise.c: do not discard partial pages with 
POSIX_FADV_DONTNEED") are different.
 + before: end_index is the start addr of the last partial page, thus 
the whole page will be invalidated according to 
invalidate_mapping_page() comments and implementation;
 + after: in commit 18aba41cbf, `endbyte' gets checked again, if it is 
not aligned, draw back by one page so that the partial page would not be 
included; and a special case is that if end_index == 0, means the length 
mapped less than a single page, the code just breaks and never runs into 
invalidate_mapping_pages().


We have done some experiments based on an opensource tool vmtouch[1], to 
simplify the reproducer, I also make a simple .c program [2]. Here is 
the output:


* Test 1, upstream:

[root@caspar ~]# uname -r
4.15.0-rc6+

[root@caspar ~]# dd if=/dev/zero of=testfile_1k bs=1k count=1
1+0 records in
1+0 records out
1024 bytes (1.0 kB) copied, 0.000852646 s, 1.2 MB/s

[root@caspar ~]# ./test_fadvise testfile_1k
file size: 1024 Bytes
length of pages: 1
start addr of mmap: 0x7f7aa0f1b000
do posix_fadvise(DONTNEED)
still resident in memory?
vec[0]: 1

[root@caspar ~]# dd if=/dev/zero of=testfile_10k bs=1k count=10
10+0 records in
10+0 records out
10240 bytes (10 kB) copied, 0.000931652 s, 11.0 MB/s

[root@caspar ~]# ./test_fadvise testfile_10k
file size: 10240 Bytes
length of pages: 3
start addr of mmap: 0x7ff57de8f000
do posix_fadvise(DONTNEED)
still resident in memory?
vec[0]: 0
vec[1]: 0
vec[2]: 1

* Test 2, reverted 18aba41cbf

[root@caspar ~]# uname -r
4.15.0-rc6.revert+

[root@caspar ~]# dd if=/dev/zero of=testfile_10k bs=1k count=1
1+0 records in
1+0 records out
1024 bytes (1.0 kB) copied, 0.000858957 s, 1.2 MB/s

[root@caspar ~]# ./test_fadvise testfile_1k
file size: 1024 Bytes
length of pages: 1
start addr of mmap: 0x7f07fe08b000
do posix_fadvise(DONTNEED)
still resident in memory?
vec[0]: 0

[root@caspar ~]# dd if=/dev/zero of=testfile_10k bs=1k count=10
10+0 records in
10+0 records out
10240 bytes (10 kB) copied, 0.00083475 s, 12.3 MB/s
[root@caspar ~]# ./test_fadvise testfile_10k
file size: 10240 Bytes
length of pages: 3
start addr of mmap: 0x7f6a49541000
do posix_fadvise(DONTNEED)
still resident in memory?
vec[0]: 0
vec[1]: 0
vec[2]: 0

* Test 3, patched with our original proposal

[root@caspar ~]# uname -r
4.15.0-rc6.patched+

[root@caspar ~]# dd if=/dev/zero of=testfile_1k bs=1k count=1
1+0 records in
1+0 records out
1024 bytes (1.0 kB) copied, 0.000852275 s, 1.2 MB/s

[root@caspar ~]# ./test_fadvise testfil

Re: [PATCH] mm/fadvise: discard partial pages iff endbyte is also eof

2018-01-02 Thread (Caspar)


> 在 2017年12月23日,12:16,十刀  写道:
> 
> From: "shidao.ytt" 
> 
> in commit 441c228f817f7 ("mm: fadvise: document the
> fadvise(FADV_DONTNEED) behaviour for partial pages") Mel Gorman
> explained why partial pages should be preserved instead of discarded
> when using fadvise(FADV_DONTNEED), however the actual codes to calcuate
> end_index was unexpectedly wrong, the code behavior didn't match to the
> statement in comments; Luckily in another commit 18aba41cbf
> ("mm/fadvise.c: do not discard partial pages with POSIX_FADV_DONTNEED")
> Oleg Drokin fixed this behavior
> 
> Here I come up with a new idea that actually we can still discard the
> last parital page iff the page-unaligned endbyte is also the end of
> file, since no one else will use the rest of the page and it should be
> safe enough to discard.

+akpm...

Hi Mel, Andrew:

Would you please take a look at this patch, to see if this proposal
is reasonable enough, thanks in advance!

Thanks,
Caspar

> 
> Signed-off-by: shidao.ytt 
> Signed-off-by: Caspar Zhang 
> ---
> mm/fadvise.c | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/mm/fadvise.c b/mm/fadvise.c
> index ec70d6e..f74b21e 100644
> --- a/mm/fadvise.c
> +++ b/mm/fadvise.c
> @@ -127,7 +127,8 @@
>*/
>   start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT;
>   end_index = (endbyte >> PAGE_SHIFT);
> - if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK) {
> + if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK &&
> + endbyte != inode->i_size - 1) {
>   /* First page is tricky as 0 - 1 = -1, but pgoff_t
>* is unsigned, so the end_index >= start_index
>* check below would be true and we'll discard the whole
> -- 
> 1.8.3.1



[PATCH] vhost: remove unused lock check flag in vhost_dev_cleanup()

2017-12-24 Thread (Caspar)
In commit ea5d404655ba ("vhost: fix release path lockdep checks"),
Michael added a flag to check whether we should hold a lock in
vhost_dev_cleanup(), however, in commit 47283bef7ed3 ("vhost: move
memory pointer to VQs"), RCU operations have been replaced by
mutex, we can remove the no-longer-used `locked' parameter now.

Signed-off-by: Caspar Zhang 
---
 drivers/vhost/net.c   | 2 +-
 drivers/vhost/scsi.c  | 2 +-
 drivers/vhost/test.c  | 2 +-
 drivers/vhost/vhost.c | 5 ++---
 drivers/vhost/vhost.h | 2 +-
 drivers/vhost/vsock.c | 2 +-
 6 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index c7bdeb655646..a354d8d731e3 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -996,7 +996,7 @@ static int vhost_net_release(struct inode *inode, struct 
file *f)
vhost_net_stop(n, &tx_sock, &rx_sock);
vhost_net_flush(n);
vhost_dev_stop(&n->dev);
-   vhost_dev_cleanup(&n->dev, false);
+   vhost_dev_cleanup(&n->dev);
vhost_net_vq_reset(n);
if (tx_sock)
sockfd_put(tx_sock);
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 71517b3c5558..797d08916553 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -1420,7 +1420,7 @@ static int vhost_scsi_release(struct inode *inode, struct 
file *f)
mutex_unlock(&vs->dev.mutex);
vhost_scsi_clear_endpoint(vs, &t);
vhost_dev_stop(&vs->dev);
-   vhost_dev_cleanup(&vs->dev, false);
+   vhost_dev_cleanup(&vs->dev);
/* Jobs can re-queue themselves in evt kick handler. Do extra flush. */
vhost_scsi_flush(vs);
kfree(vs->dev.vqs);
diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
index 3cc98c07dcd3..906b8f0f19f7 100644
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c
@@ -157,7 +157,7 @@ static int vhost_test_release(struct inode *inode, struct 
file *f)
 
vhost_test_stop(n, &private);
vhost_test_flush(n);
-   vhost_dev_cleanup(&n->dev, false);
+   vhost_dev_cleanup(&n->dev);
/* We do an extra flush before freeing memory,
 * since jobs can re-queue themselves. */
vhost_test_flush(n);
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 33ac2b186b85..014675c3d569 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -544,7 +544,7 @@ void vhost_dev_reset_owner(struct vhost_dev *dev, struct 
vhost_umem *umem)
 {
int i;
 
-   vhost_dev_cleanup(dev, true);
+   vhost_dev_cleanup(dev);
 
/* Restore memory to default empty mapping. */
INIT_LIST_HEAD(&umem->umem_list);
@@ -611,8 +611,7 @@ static void vhost_clear_msg(struct vhost_dev *dev)
spin_unlock(&dev->iotlb_lock);
 }
 
-/* Caller should have device mutex if and only if locked is set */
-void vhost_dev_cleanup(struct vhost_dev *dev, bool locked)
+void vhost_dev_cleanup(struct vhost_dev *dev)
 {
int i;
 
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 79c6e7a60a5e..ff4d918e3e0a 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -181,7 +181,7 @@ bool vhost_dev_has_owner(struct vhost_dev *dev);
 long vhost_dev_check_owner(struct vhost_dev *);
 struct vhost_umem *vhost_dev_reset_owner_prepare(void);
 void vhost_dev_reset_owner(struct vhost_dev *, struct vhost_umem *);
-void vhost_dev_cleanup(struct vhost_dev *, bool locked);
+void vhost_dev_cleanup(struct vhost_dev *);
 void vhost_dev_stop(struct vhost_dev *);
 long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user 
*argp);
 long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp);
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 5a5e981bd8e4..0d14e2ff19f1 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -599,7 +599,7 @@ static int vhost_vsock_dev_release(struct inode *inode, 
struct file *file)
}
spin_unlock_bh(&vsock->send_pkt_list_lock);
 
-   vhost_dev_cleanup(&vsock->dev, false);
+   vhost_dev_cleanup(&vsock->dev);
kfree(vsock->dev.vqs);
vhost_vsock_free(vsock);
return 0;
-- 
2.15.1