[Cluster-devel] [PATCHv2 dlm-tool 2/3] dlm_controld: constify lsnames
This patch constify some ls name parameter which are used read only in ls_create() and ls_find(). --- dlm_controld/dlm_daemon.h | 2 +- dlm_controld/main.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dlm_controld/dlm_daemon.h b/dlm_controld/dlm_daemon.h index b829e0de..f0bad90f 100644 --- a/dlm_controld/dlm_daemon.h +++ b/dlm_controld/dlm_daemon.h @@ -479,7 +479,7 @@ int client_add(int fd, void (*workfn)(int ci), void (*deadfn)(int ci)); int client_fd(int ci); void client_ignore(int ci, int fd); void client_back(int ci, int fd); -struct lockspace *find_ls(char *name); +struct lockspace *find_ls(const char *name); struct lockspace *find_ls_id(uint32_t id); const char *dlm_mode_str(int mode); void cluster_dead(int ci); diff --git a/dlm_controld/main.c b/dlm_controld/main.c index 2c534a1e..31489d54 100644 --- a/dlm_controld/main.c +++ b/dlm_controld/main.c @@ -537,7 +537,7 @@ static int check_run_operation(char *uuid_str, uint32_t flags, struct dlmc_run_c return 0; } -static struct lockspace *create_ls(char *name) +static struct lockspace *create_ls(const char *name) { struct lockspace *ls; @@ -562,7 +562,7 @@ static struct lockspace *create_ls(char *name) return ls; } -struct lockspace *find_ls(char *name) +struct lockspace *find_ls(const char *name) { struct lockspace *ls; -- 2.31.1
[Cluster-devel] [PATCHv2 dlm-tool 1/3] dlm_controld: increase uevent recv buffer
This patch increases the uevent recv buffer from 256 bytes to 4096 bytes. To ensure everything fits into one recv() call. --- dlm_controld/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dlm_controld/main.c b/dlm_controld/main.c index 7cf6348e..2c534a1e 100644 --- a/dlm_controld/main.c +++ b/dlm_controld/main.c @@ -682,7 +682,7 @@ const char *dlm_mode_str(int mode) /* recv "online" (join) and "offline" (leave) messages from dlm via uevents */ -#define MAX_LINE_UEVENT 256 +#define MAX_LINE_UEVENT 4096 static void process_uevent(int ci) { -- 2.31.1
[Cluster-devel] [PATCHv2 dlm-tool 3/3] dlm_controld: better uevent filtering
When I did test with dlm_locktorture module I got several log messages about: uevent message has 3 args: add@/module/dlm_locktorture uevent message has 3 args: remove@/module/dlm_locktorture which are not expected and not able to parse by dlm_controld process_uevent() function, because mismatch of argument counts. Debugging it more, I figured out that those uevents are for loading/unloading the dlm_locktorture module and there are uevents for loading and unloading modules which have nothing todo with dlm lockspace uevent handling. The current filter works as: if (!strstr(buf, "dlm")) for matching the dlm joining/leaving uevent string which looks like: offline@/kernel/dlm/locktorture to avoid matching with other uevent which has somehow the string "dlm" in it, we switch to the match the uevent env variables for action, devpath (just to check if it's set) and subsystem. Additional the dlm subsystem sets the LOCKSPACE variable which can be used to get the lockspace name instead of extracting it previously from the devpath. The code to decode the uevent envs were taken from the gfs2_controld utility [0]. [0] https://github.com/andyprice/gfs2-utils/blob/91c3e9a69ed70d3d522f5b47015da5e5868722ec/group/gfs_controld/main.c --- dlm_controld/main.c | 126 +--- 1 file changed, 71 insertions(+), 55 deletions(-) diff --git a/dlm_controld/main.c b/dlm_controld/main.c index 31489d54..c9d1c5f1 100644 --- a/dlm_controld/main.c +++ b/dlm_controld/main.c @@ -46,6 +46,50 @@ struct client { struct lockspace *ls; }; +enum { + Env_ACTION = 0, + Env_DEVPATH, + Env_SUBSYSTEM, + Env_LOCKSPACE, + Env_Last, /* Flag for end of vars */ +}; + +static const char *uevent_vars[] = { + [Env_ACTION]= "ACTION=", + [Env_DEVPATH] = "DEVPATH=", + [Env_SUBSYSTEM] = "SUBSYSTEM=", + [Env_LOCKSPACE] = "LOCKSPACE=", +}; + +static void decode_uevent(const char *buf, unsigned len, const char *vars[], + unsigned nvars, const char *vals[]) +{ + const char *ptr; + unsigned int i; + int slen, vlen; + + memset(vals, 0, sizeof(const char *) * nvars); + + while (len > 0) { + ptr = buf; + slen = strlen(ptr); + buf += slen; + len -= slen; + buf++; + len--; + + for (i = 0; i < nvars; i++) { + vlen = strlen(vars[i]); + if (vlen > slen) + continue; + if (memcmp(vars[i], ptr, vlen) != 0) + continue; + vals[i] = ptr + vlen; + break; + } + } +} + int do_read(int fd, void *buf, size_t count) { int rv, off = 0; @@ -627,38 +671,6 @@ static void fs_register_del(char *name) } } -#define MAXARGS 8 - -static char *get_args(char *buf, int *argc, char **argv, char sep, int want) -{ - char *p = buf, *rp = NULL; - int i; - - argv[0] = p; - - for (i = 1; i < MAXARGS; i++) { - p = strchr(buf, sep); - if (!p) - break; - *p = '\0'; - - if (want == i) { - rp = p + 1; - break; - } - - argv[i] = p + 1; - buf = p + 1; - } - *argc = i; - - /* we ended by hitting \0, return the point following that */ - if (!rp) - rp = strchr(buf, '\0') + 1; - - return rp; -} - const char *dlm_mode_str(int mode) { switch (mode) { @@ -686,13 +698,12 @@ const char *dlm_mode_str(int mode) static void process_uevent(int ci) { + const char *uevent_vals[Env_Last]; struct lockspace *ls; char buf[MAX_LINE_UEVENT]; - char *argv[MAXARGS], *act, *sys; - int rv, argc = 0; + int rv; memset(buf, 0, sizeof(buf)); - memset(argv, 0, sizeof(char *) * MAXARGS); retry_recv: rv = recv(client[ci].fd, , sizeof(buf), 0); @@ -704,35 +715,38 @@ static void process_uevent(int ci) return; } - if (!strstr(buf, "dlm")) - return; + decode_uevent(buf, rv, uevent_vars, Env_Last, uevent_vals); - log_debug("uevent: %s", buf); - - get_args(buf, , argv, '/', 4); - if (argc != 4) - log_error("uevent message has %d args", argc); - act = argv[0]; - sys = argv[2]; - - if (!act || !sys || !argv[3]) + if (!uevent_vals[Env_ACTION] || + !uevent_vals[Env_DEVPATH] || + !uevent_vals[Env_SUBSYSTEM] || + !uevent_vals[Env_LOCKSPACE]) { + log_debug("failed to validate uevent, action: %p, devpath: %p, subsystem: %p, lockspace: %p", + uevent_vals[Env_ACTION],
Re: [Cluster-devel] [PATCH dlm-tool] dlm_controld: better uevent filtering
Hi, On Mon, Jan 16, 2023 at 9:26 AM Alexander Aring wrote: > > Hi, > > On Mon, Jan 16, 2023 at 5:36 AM Steven Whitehouse wrote: > > > > Hi, > > > > On Fri, 2023-01-13 at 17:43 -0500, Alexander Aring wrote: > > > When I did test with dlm_locktorture module I got several log > > > messages > > > about: > > > > > > uevent message has 3 args: add@/module/dlm_locktorture > > > uevent message has 3 args: remove@/module/dlm_locktorture > > > > > > which are not expected and not able to parse by dlm_controld > > > process_uevent() function, because mismatch of argument counts. > > > Debugging it more, I figured out that those uevents are for > > > loading/unloading the dlm_locktorture module and there are uevents > > > for > > > loading and unloading modules which have nothing todo with dlm > > > lockspace > > > uevent handling. > > > > > > The current filter works as: > > > > > > if (!strstr(buf, "dlm")) > > > > > I think that is the problem. If you want to filter out all events > > except those sent by the DLM module, then you need to look at the > > variables sent along with the request. Otherwise whatever string you > > look for here might appear in some other random request from a > > different subsystem. The event variables are much easier to parse than > > the actual event string... > > > > See a similar example in decode_uevent(), etc here: > > > > https://github.com/andyprice/gfs2-utils/blob/91c3e9a69ed70d3d522f5b47015da5e5868722ec/group/gfs_controld/main.c > > > > There are probably nicer ways of doing that, than what I did there, but > > it makes it is easier to get at the variables that are passed with the > > notification, than to try and parse the first line of the response, > > This requires us to switch to the kernel uevent trigger function call > from kobject_uevent() [0] to kobject_uevent_env() [1], because we > don't have those envs like SUBSYSTEM being sent right now. > I was curious because I was sure that I printed out the "raw string" > from udev netlink socket and didn't see those envs, otherwise I > probably would use it if I saw those. Now I figured out we need to > have a UAPI change for that. > > Unfortunately, for me it looks like older dlm_controld versions cannot > handle such change. after Steven mentioned to me that kobject_uevent() calls kobject_uevent_env(), I saw that there is the SUBSYSTEM environment, etc. in there after the first nul terminated string. I will send a v2 based on the gfs2_controld parser to not parse the first "offline@/kernel/dlm/locktorture" string anymore and get each field by its env which works as a key-value pair. - Alex
Re: [Cluster-devel] [RFC v6 04/10] iomap: Add iomap_get_folio helper
On Mon, Jan 16, 2023 at 01:18:07PM +, Matthew Wilcox wrote: > Essentially reverting 44835d20b2a0. Yep. > Although we retain the merging of > the lock & get functions via the use of FGP flags. Let me think about > it for a day. Yes. But looking at the code again I wonder if even that is needed. Out of the users of FGP_ENTRY / __filemap_get_folio_entry: - split_huge_pages_in_file really should not be using it at all, given that it checks for xa_is_value and treats that as !folio - one doesn't pass FGP_LOCK and could just use filemap_get_entry - the othr two are in shmem, so we could move the locking logic there (and maybe in future optimize it in the callers) That would be something like this, although it should be split into two or three patches: diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 29e1f9e76eb6dd..ecd1ff40a80621 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -504,9 +504,9 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping, #define FGP_NOFS 0x0010 #define FGP_NOWAIT 0x0020 #define FGP_FOR_MMAP 0x0040 -#define FGP_ENTRY 0x0080 -#define FGP_STABLE 0x0100 +#define FGP_STABLE 0x0080 +void *filemap_get_entry(struct address_space *mapping, pgoff_t index); struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, int fgp_flags, gfp_t gfp); struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, diff --git a/mm/filemap.c b/mm/filemap.c index c4d4ace9cc7003..85bd86c44e14d2 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1832,7 +1832,7 @@ EXPORT_SYMBOL(page_cache_prev_miss); */ /* - * mapping_get_entry - Get a page cache entry. + * filemap_get_entry - Get a page cache entry. * @mapping: the address_space to search * @index: The page cache index. * @@ -1843,7 +1843,7 @@ EXPORT_SYMBOL(page_cache_prev_miss); * * Return: The folio, swap or shadow entry, %NULL if nothing is found. */ -static void *mapping_get_entry(struct address_space *mapping, pgoff_t index) +void *filemap_get_entry(struct address_space *mapping, pgoff_t index) { XA_STATE(xas, >i_pages, index); struct folio *folio; @@ -1887,8 +1887,6 @@ static void *mapping_get_entry(struct address_space *mapping, pgoff_t index) * * * %FGP_ACCESSED - The folio will be marked accessed. * * %FGP_LOCK - The folio is returned locked. - * * %FGP_ENTRY - If there is a shadow / swap / DAX entry, return it - * instead of allocating a new folio to replace it. * * %FGP_CREAT - If no page is present then a new page is allocated using * @gfp and added to the page cache and the VM's LRU list. * The page is returned locked and with an increased refcount. @@ -1913,12 +1911,9 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, struct folio *folio; repeat: - folio = mapping_get_entry(mapping, index); - if (xa_is_value(folio)) { - if (fgp_flags & FGP_ENTRY) - return folio; + folio = filemap_get_entry(mapping, index); + if (xa_is_value(folio)) folio = NULL; - } if (!folio) goto no_page; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index abe6cfd92ffa0e..b182eb99044e9a 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3088,11 +3088,10 @@ static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start, mapping = candidate->f_mapping; for (index = off_start; index < off_end; index += nr_pages) { - struct folio *folio = __filemap_get_folio(mapping, index, - FGP_ENTRY, 0); + struct folio *folio = filemap_get_folio(mapping, index); nr_pages = 1; - if (xa_is_value(folio) || !folio) + if (!folio) continue; if (!folio_test_large(folio)) diff --git a/mm/shmem.c b/mm/shmem.c index 028675cd97d445..4650192dbcb91b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -880,6 +880,28 @@ void shmem_unlock_mapping(struct address_space *mapping) } } +static struct folio *shmem_get_entry(struct address_space *mapping, + pgoff_t index) +{ + struct folio *folio; + +repeat: + folio = filemap_get_entry(mapping, index); + if (folio && !xa_is_value(folio)) { + folio_lock(folio); + + /* Has the page been truncated? */ + if (unlikely(folio->mapping != mapping)) { + folio_unlock(folio); + folio_put(folio); + goto repeat; + } + VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio); + } + + return folio; +} + static struct folio *shmem_get_partial_folio(struct inode *inode, pgoff_t index) {
Re: [Cluster-devel] [PATCH dlm-tool] dlm_controld: better uevent filtering
Hi, On Mon, Jan 16, 2023 at 5:36 AM Steven Whitehouse wrote: > > Hi, > > On Fri, 2023-01-13 at 17:43 -0500, Alexander Aring wrote: > > When I did test with dlm_locktorture module I got several log > > messages > > about: > > > > uevent message has 3 args: add@/module/dlm_locktorture > > uevent message has 3 args: remove@/module/dlm_locktorture > > > > which are not expected and not able to parse by dlm_controld > > process_uevent() function, because mismatch of argument counts. > > Debugging it more, I figured out that those uevents are for > > loading/unloading the dlm_locktorture module and there are uevents > > for > > loading and unloading modules which have nothing todo with dlm > > lockspace > > uevent handling. > > > > The current filter works as: > > > > if (!strstr(buf, "dlm")) > > > I think that is the problem. If you want to filter out all events > except those sent by the DLM module, then you need to look at the > variables sent along with the request. Otherwise whatever string you > look for here might appear in some other random request from a > different subsystem. The event variables are much easier to parse than > the actual event string... > > See a similar example in decode_uevent(), etc here: > > https://github.com/andyprice/gfs2-utils/blob/91c3e9a69ed70d3d522f5b47015da5e5868722ec/group/gfs_controld/main.c > > There are probably nicer ways of doing that, than what I did there, but > it makes it is easier to get at the variables that are passed with the > notification, than to try and parse the first line of the response, This requires us to switch to the kernel uevent trigger function call from kobject_uevent() [0] to kobject_uevent_env() [1], because we don't have those envs like SUBSYSTEM being sent right now. I was curious because I was sure that I printed out the "raw string" from udev netlink socket and didn't see those envs, otherwise I probably would use it if I saw those. Now I figured out we need to have a UAPI change for that. Unfortunately, for me it looks like older dlm_controld versions cannot handle such change. - Alex [0] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/fs/dlm/lockspace.c?h=v6.2-rc4#n200 [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/kobject_uevent.c?h=v6.2-rc4#n447
Re: [Cluster-devel] [RFC v6 04/10] iomap: Add iomap_get_folio helper
On Sun, Jan 15, 2023 at 11:34:26PM -0800, Christoph Hellwig wrote: > We could do that. But while reading what Darrick wrote I came up with > another idea I quite like. Just split the FGP_ENTRY handling into > a separate helper. The logic and use cases are quite different from > the normal page cache lookup, and the returning of the xarray entry > is exactly the kind of layering violation that Dave is complaining > about. So what about just splitting that use case into a separate > self contained helper? Essentially reverting 44835d20b2a0. Although we retain the merging of the lock & get functions via the use of FGP flags. Let me think about it for a day. > --- > >From b4d10f98ea57f8480c03c0b00abad6f2b7186f56 Mon Sep 17 00:00:00 2001 > From: Christoph Hellwig > Date: Mon, 16 Jan 2023 08:26:57 +0100 > Subject: mm: replace FGP_ENTRY with a new __filemap_get_folio_entry helper > > Split the xarray entry returning logic into a separate helper. This will > allow returning ERR_PTRs from __filemap_get_folio, and also isolates the > logic that needs to known about xarray internals into a separate > function. This causes some code duplication, but as most flags to > __filemap_get_folio are not applicable for the users that care about an > entry that amount is very limited. > > Signed-off-by: Christoph Hellwig > --- > include/linux/pagemap.h | 6 +++-- > mm/filemap.c| 50 - > mm/huge_memory.c| 4 ++-- > mm/shmem.c | 5 ++--- > mm/swap_state.c | 2 +- > 5 files changed, 53 insertions(+), 14 deletions(-) > > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h > index 4b3a7124c76712..e06c14b610caf2 100644 > --- a/include/linux/pagemap.h > +++ b/include/linux/pagemap.h > @@ -504,8 +504,7 @@ pgoff_t page_cache_prev_miss(struct address_space > *mapping, > #define FGP_NOFS 0x0010 > #define FGP_NOWAIT 0x0020 > #define FGP_FOR_MMAP 0x0040 > -#define FGP_ENTRY0x0080 > -#define FGP_STABLE 0x0100 > +#define FGP_STABLE 0x0080 > > struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t > index, > int fgp_flags, gfp_t gfp); > @@ -546,6 +545,9 @@ static inline struct folio *filemap_lock_folio(struct > address_space *mapping, > return __filemap_get_folio(mapping, index, FGP_LOCK, 0); > } > > +struct folio *__filemap_get_folio_entry(struct address_space *mapping, > + pgoff_t index, int fgp_flags); > + > /** > * find_get_page - find and get a page reference > * @mapping: the address_space to search > diff --git a/mm/filemap.c b/mm/filemap.c > index c4d4ace9cc7003..d04613347b3e71 100644 > --- a/mm/filemap.c > +++ b/mm/filemap.c > @@ -1887,8 +1887,6 @@ static void *mapping_get_entry(struct address_space > *mapping, pgoff_t index) > * > * * %FGP_ACCESSED - The folio will be marked accessed. > * * %FGP_LOCK - The folio is returned locked. > - * * %FGP_ENTRY - If there is a shadow / swap / DAX entry, return it > - * instead of allocating a new folio to replace it. > * * %FGP_CREAT - If no page is present then a new page is allocated using > * @gfp and added to the page cache and the VM's LRU list. > * The page is returned locked and with an increased refcount. > @@ -1914,11 +1912,8 @@ struct folio *__filemap_get_folio(struct address_space > *mapping, pgoff_t index, > > repeat: > folio = mapping_get_entry(mapping, index); > - if (xa_is_value(folio)) { > - if (fgp_flags & FGP_ENTRY) > - return folio; > + if (xa_is_value(folio)) > folio = NULL; > - } > if (!folio) > goto no_page; > > @@ -1994,6 +1989,49 @@ struct folio *__filemap_get_folio(struct address_space > *mapping, pgoff_t index, > } > EXPORT_SYMBOL(__filemap_get_folio); > > + > +/** > + * __filemap_get_folio_entry - Find and get a reference to a folio. > + * @mapping: The address_space to search. > + * @index: The page index. > + * @fgp_flags: %FGP flags modify how the folio is returned. > + * > + * Looks up the page cache entry at @mapping & @index. If there is a shadow > / > + * swap / DAX entry, return it instead of allocating a new folio to replace > it. > + * > + * @fgp_flags can be zero or more of these flags: > + * > + * * %FGP_LOCK - The folio is returned locked. > + * > + * If there is a page cache page, it is returned with an increased refcount. > + * > + * Return: The found folio or %NULL otherwise. > + */ > +struct folio *__filemap_get_folio_entry(struct address_space *mapping, > + pgoff_t index, int fgp_flags) > +{ > + struct folio *folio; > + > + if (WARN_ON_ONCE(fgp_flags & ~FGP_LOCK)) > + return NULL; > + > +repeat: > + folio = mapping_get_entry(mapping, index); > + if (folio && !xa_is_value(folio) && (fgp_flags & FGP_LOCK)) { > +
Re: [Cluster-devel] [PATCH dlm-tool] dlm_controld: better uevent filtering
Hi, On Fri, 2023-01-13 at 17:43 -0500, Alexander Aring wrote: > When I did test with dlm_locktorture module I got several log > messages > about: > > uevent message has 3 args: add@/module/dlm_locktorture > uevent message has 3 args: remove@/module/dlm_locktorture > > which are not expected and not able to parse by dlm_controld > process_uevent() function, because mismatch of argument counts. > Debugging it more, I figured out that those uevents are for > loading/unloading the dlm_locktorture module and there are uevents > for > loading and unloading modules which have nothing todo with dlm > lockspace > uevent handling. > > The current filter works as: > > if (!strstr(buf, "dlm")) > I think that is the problem. If you want to filter out all events except those sent by the DLM module, then you need to look at the variables sent along with the request. Otherwise whatever string you look for here might appear in some other random request from a different subsystem. The event variables are much easier to parse than the actual event string... See a similar example in decode_uevent(), etc here: https://github.com/andyprice/gfs2-utils/blob/91c3e9a69ed70d3d522f5b47015da5e5868722ec/group/gfs_controld/main.c There are probably nicer ways of doing that, than what I did there, but it makes it is easier to get at the variables that are passed with the notification, than to try and parse the first line of the response, Steve. > for matching the dlm joining/leaving uevent string which looks like: > > offline@/kernel/dlm/locktorture > > to avoid matching with other uevent which has somehow the string > "dlm" > in it, we switch to the match "/dlm/" which should match only to dlm > uevent system events. Uevent uses itself '/' as a separator in the > hope > that uevents cannot put a '/' as application data for an event. > --- > dlm_controld/main.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/dlm_controld/main.c b/dlm_controld/main.c > index 7cf6348e..40689c5c 100644 > --- a/dlm_controld/main.c > +++ b/dlm_controld/main.c > @@ -704,7 +704,7 @@ static void process_uevent(int ci) > return; > } > > - if (!strstr(buf, "dlm")) > + if (!strstr(buf, "/dlm/")) > return; > > log_debug("uevent: %s", buf);
Re: [Cluster-devel] [RFC v6 04/10] iomap: Add iomap_get_folio helper
On Mon, Jan 16, 2023 at 05:46:01AM +, Matthew Wilcox wrote: > > OFC now I wonder, can we simply say that the return value is "The found > > folio or NULL if you set FGP_ENTRY; or the found folio or a negative > > errno if you don't" ? > > Erm ... I would rather not! Agreed. > > Part of me remembers that x86-64 has the rather nice calling convention > of being able to return a struct containing two values in two registers: We could do that. But while reading what Darrick wrote I came up with another idea I quite like. Just split the FGP_ENTRY handling into a separate helper. The logic and use cases are quite different from the normal page cache lookup, and the returning of the xarray entry is exactly the kind of layering violation that Dave is complaining about. So what about just splitting that use case into a separate self contained helper? --- >From b4d10f98ea57f8480c03c0b00abad6f2b7186f56 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Jan 2023 08:26:57 +0100 Subject: mm: replace FGP_ENTRY with a new __filemap_get_folio_entry helper Split the xarray entry returning logic into a separate helper. This will allow returning ERR_PTRs from __filemap_get_folio, and also isolates the logic that needs to known about xarray internals into a separate function. This causes some code duplication, but as most flags to __filemap_get_folio are not applicable for the users that care about an entry that amount is very limited. Signed-off-by: Christoph Hellwig --- include/linux/pagemap.h | 6 +++-- mm/filemap.c| 50 - mm/huge_memory.c| 4 ++-- mm/shmem.c | 5 ++--- mm/swap_state.c | 2 +- 5 files changed, 53 insertions(+), 14 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 4b3a7124c76712..e06c14b610caf2 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -504,8 +504,7 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping, #define FGP_NOFS 0x0010 #define FGP_NOWAIT 0x0020 #define FGP_FOR_MMAP 0x0040 -#define FGP_ENTRY 0x0080 -#define FGP_STABLE 0x0100 +#define FGP_STABLE 0x0080 struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, int fgp_flags, gfp_t gfp); @@ -546,6 +545,9 @@ static inline struct folio *filemap_lock_folio(struct address_space *mapping, return __filemap_get_folio(mapping, index, FGP_LOCK, 0); } +struct folio *__filemap_get_folio_entry(struct address_space *mapping, + pgoff_t index, int fgp_flags); + /** * find_get_page - find and get a page reference * @mapping: the address_space to search diff --git a/mm/filemap.c b/mm/filemap.c index c4d4ace9cc7003..d04613347b3e71 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1887,8 +1887,6 @@ static void *mapping_get_entry(struct address_space *mapping, pgoff_t index) * * * %FGP_ACCESSED - The folio will be marked accessed. * * %FGP_LOCK - The folio is returned locked. - * * %FGP_ENTRY - If there is a shadow / swap / DAX entry, return it - * instead of allocating a new folio to replace it. * * %FGP_CREAT - If no page is present then a new page is allocated using * @gfp and added to the page cache and the VM's LRU list. * The page is returned locked and with an increased refcount. @@ -1914,11 +1912,8 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, repeat: folio = mapping_get_entry(mapping, index); - if (xa_is_value(folio)) { - if (fgp_flags & FGP_ENTRY) - return folio; + if (xa_is_value(folio)) folio = NULL; - } if (!folio) goto no_page; @@ -1994,6 +1989,49 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, } EXPORT_SYMBOL(__filemap_get_folio); + +/** + * __filemap_get_folio_entry - Find and get a reference to a folio. + * @mapping: The address_space to search. + * @index: The page index. + * @fgp_flags: %FGP flags modify how the folio is returned. + * + * Looks up the page cache entry at @mapping & @index. If there is a shadow / + * swap / DAX entry, return it instead of allocating a new folio to replace it. + * + * @fgp_flags can be zero or more of these flags: + * + * * %FGP_LOCK - The folio is returned locked. + * + * If there is a page cache page, it is returned with an increased refcount. + * + * Return: The found folio or %NULL otherwise. + */ +struct folio *__filemap_get_folio_entry(struct address_space *mapping, + pgoff_t index, int fgp_flags) +{ + struct folio *folio; + + if (WARN_ON_ONCE(fgp_flags & ~FGP_LOCK)) + return NULL; + +repeat: + folio = mapping_get_entry(mapping, index); + if (folio && !xa_is_value(folio) && (fgp_flags & FGP_LOCK)) { +