[PATCH 2/2] ndctl: merge firmware-update to dimm.c as one of the dimm ops

2018-02-27 Thread Dave Jiang
Since update-firmware replicate a lot of the parsing code that dimm.c
already uses, moving update-firmware to dimm.c and utilize the already
working functionalities.

Signed-off-by: Dave Jiang 
---
 ndctl/Makefile.am |1 
 ndctl/dimm.c  |  495 
 ndctl/update.c|  549 -
 3 files changed, 493 insertions(+), 552 deletions(-)
 delete mode 100644 ndctl/update.c

diff --git a/ndctl/Makefile.am b/ndctl/Makefile.am
index e0db97ba..213cabd7 100644
--- a/ndctl/Makefile.am
+++ b/ndctl/Makefile.am
@@ -15,7 +15,6 @@ ndctl_SOURCES = ndctl.c \
util/json-smart.c \
util/json-firmware.c \
inject-error.c \
-   update.c \
inject-smart.c
 
 if ENABLE_DESTRUCTIVE
diff --git a/ndctl/dimm.c b/ndctl/dimm.c
index 7259506f..576e6342 100644
--- a/ndctl/dimm.c
+++ b/ndctl/dimm.c
@@ -13,6 +13,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 #include 
 #include 
@@ -29,11 +31,49 @@
 #include 
 #include 
 
+#define ND_CMD_STATUS_SUCCESS  0
+#define ND_CMD_STATUS_NOTSUPP  1
+#defineND_CMD_STATUS_NOTEXIST  2
+#define ND_CMD_STATUS_INVALPARM3
+#define ND_CMD_STATUS_HWERR4
+#define ND_CMD_STATUS_RETRY5
+#define ND_CMD_STATUS_UNKNOWN  6
+#define ND_CMD_STATUS_EXTEND   7
+#define ND_CMD_STATUS_NORES8
+#define ND_CMD_STATUS_NOTREADY 9
+
+#define ND_CMD_STATUS_START_BUSY   0x1
+#define ND_CMD_STATUS_SEND_CTXINVAL0x1
+#define ND_CMD_STATUS_FIN_CTXINVAL 0x1
+#define ND_CMD_STATUS_FIN_DONE 0x2
+#define ND_CMD_STATUS_FIN_BAD  0x3
+#define ND_CMD_STATUS_FIN_ABORTED  0x4
+#define ND_CMD_STATUS_FQ_CTXINVAL  0x1
+#define ND_CMD_STATUS_FQ_BUSY  0x2
+#define ND_CMD_STATUS_FQ_BAD   0x3
+#define ND_CMD_STATUS_FQ_ORDER 0x4
+
+struct fw_info {
+   uint32_t store_size;
+   uint32_t update_size;
+   uint32_t query_interval;
+   uint32_t max_query;
+   uint64_t run_version;
+   uint32_t context;
+};
+
+struct update_context {
+   size_t fw_size;
+   struct fw_info dimm_fw;
+   struct ndctl_cmd *start;
+};
+
 struct action_context {
struct json_object *jdimms;
enum ndctl_namespace_version labelversion;
FILE *f_out;
FILE *f_in;
+   struct update_context update;
 };
 
 static int action_disable(struct ndctl_dimm *dimm, struct action_context *actx)
@@ -371,6 +411,433 @@ static int action_read(struct ndctl_dimm *dimm, struct 
action_context *actx)
return rc;
 }
 
+static int update_verify_input(struct action_context *actx)
+{
+   int rc;
+   struct stat st;
+
+   rc = fstat(fileno(actx->f_in), );
+   if (rc == -1) {
+   rc = -errno;
+   fprintf(stderr, "fstat failed: %s\n", strerror(errno));
+   return rc;
+   }
+
+   if (!S_ISREG(st.st_mode)) {
+   fprintf(stderr, "Input not a regular file.\n");
+   return -EINVAL;
+   }
+
+   if (st.st_size == 0) {
+   fprintf(stderr, "Input file size is 0.\n");
+   return -EINVAL;
+   }
+
+   actx->update.fw_size = st.st_size;
+   return 0;
+}
+
+static int verify_fw_size(struct update_context *uctx)
+{
+   struct fw_info *fw = >dimm_fw;
+
+   if (uctx->fw_size > fw->store_size) {
+   error("Firmware file size greater than DIMM store\n");
+   return -ENOSPC;
+   }
+
+   return 0;
+}
+
+static int submit_get_firmware_info(struct ndctl_dimm *dimm,
+   struct action_context *actx)
+{
+   struct update_context *uctx = >update;
+   struct fw_info *fw = >dimm_fw;
+   struct ndctl_cmd *cmd;
+   int rc;
+   enum ND_FW_STATUS status;
+
+   cmd = ndctl_dimm_cmd_new_fw_get_info(dimm);
+   if (!cmd)
+   return -ENXIO;
+
+   rc = ndctl_cmd_submit(cmd);
+   if (rc < 0)
+   return rc;
+
+   status = ndctl_cmd_fw_xlat_firmware_status(cmd);
+   if (status != FW_SUCCESS) {
+   fprintf(stderr, "GET FIRMWARE INFO on DIMM %s failed: %#x\n",
+   ndctl_dimm_get_devname(dimm), status);
+   return -ENXIO;
+   }
+
+   fw->store_size = ndctl_cmd_fw_info_get_storage_size(cmd);
+   if (fw->store_size == UINT_MAX)
+   return -ENXIO;
+
+   fw->update_size = ndctl_cmd_fw_info_get_max_send_len(cmd);
+   if (fw->update_size == UINT_MAX)
+   return -ENXIO;
+
+   fw->query_interval = ndctl_cmd_fw_info_get_query_interval(cmd);
+   if (fw->query_interval == UINT_MAX)
+   return -ENXIO;
+
+   fw->max_query = ndctl_cmd_fw_info_get_max_query_time(cmd);
+   if (fw->max_query == UINT_MAX)
+   return -ENXIO;
+
+   fw->run_version = 

[PATCH 1/2] ndctl: add check for update firmware supported

2018-02-27 Thread Dave Jiang
Adding generic and intel support function to allow check if update firmware
is supported by the kernel.

Signed-off-by: Dave Jiang 
---
 ndctl/lib/firmware.c   |   11 +++
 ndctl/lib/intel.c  |   24 
 ndctl/lib/libndctl.sym |1 +
 ndctl/lib/private.h|1 +
 ndctl/libndctl.h   |1 +
 ndctl/update.c |4 
 6 files changed, 42 insertions(+)

diff --git a/ndctl/lib/firmware.c b/ndctl/lib/firmware.c
index f6deec5d..78d753ca 100644
--- a/ndctl/lib/firmware.c
+++ b/ndctl/lib/firmware.c
@@ -107,3 +107,14 @@ ndctl_cmd_fw_xlat_firmware_status(struct ndctl_cmd *cmd)
else
return FW_EUNKNOWN;
 }
+
+NDCTL_EXPORT bool
+ndctl_dimm_fw_update_supported(struct ndctl_dimm *dimm)
+{
+   struct ndctl_dimm_ops *ops = dimm->ops;
+
+   if (ops && ops->fw_update_supported)
+   return ops->fw_update_supported(dimm);
+   else
+   return false;
+}
diff --git a/ndctl/lib/intel.c b/ndctl/lib/intel.c
index cee5204c..7d976c50 100644
--- a/ndctl/lib/intel.c
+++ b/ndctl/lib/intel.c
@@ -650,6 +650,29 @@ intel_dimm_cmd_new_lss(struct ndctl_dimm *dimm)
return cmd;
 }
 
+static bool intel_dimm_fw_update_supported(struct ndctl_dimm *dimm)
+{
+   struct ndctl_ctx *ctx = ndctl_dimm_get_ctx(dimm);
+
+   if (!ndctl_dimm_is_cmd_supported(dimm, ND_CMD_CALL)) {
+   dbg(ctx, "unsupported cmd: %d\n", ND_CMD_CALL);
+   return false;
+   }
+
+   /*
+* We only need to check FW_GET_INFO. If that isn't supported then
+* the others aren't either.
+*/
+   if (test_dimm_dsm(dimm, ND_INTEL_FW_GET_INFO)
+   == DIMM_DSM_UNSUPPORTED) {
+   dbg(ctx, "unsupported function: %d\n",
+   ND_INTEL_FW_GET_INFO);
+   return false;
+   }
+
+   return true;
+}
+
 struct ndctl_dimm_ops * const intel_dimm_ops = &(struct ndctl_dimm_ops) {
.cmd_desc = intel_cmd_desc,
.new_smart = intel_dimm_cmd_new_smart,
@@ -703,4 +726,5 @@ struct ndctl_dimm_ops * const intel_dimm_ops = &(struct 
ndctl_dimm_ops) {
.fw_fquery_get_fw_rev = intel_cmd_fw_fquery_get_fw_rev,
.fw_xlat_firmware_status = intel_cmd_fw_xlat_firmware_status,
.new_ack_shutdown_count = intel_dimm_cmd_new_lss,
+   .fw_update_supported = intel_dimm_fw_update_supported,
 };
diff --git a/ndctl/lib/libndctl.sym b/ndctl/lib/libndctl.sym
index af9b7d54..cc580f9c 100644
--- a/ndctl/lib/libndctl.sym
+++ b/ndctl/lib/libndctl.sym
@@ -344,4 +344,5 @@ global:
ndctl_cmd_fw_fquery_get_fw_rev;
ndctl_cmd_fw_xlat_firmware_status;
ndctl_dimm_cmd_new_ack_shutdown_count;
+   ndctl_dimm_fw_update_supported;
 } LIBNDCTL_13;
diff --git a/ndctl/lib/private.h b/ndctl/lib/private.h
index 015eeb2d..ae4454cf 100644
--- a/ndctl/lib/private.h
+++ b/ndctl/lib/private.h
@@ -325,6 +325,7 @@ struct ndctl_dimm_ops {
unsigned long long (*fw_fquery_get_fw_rev)(struct ndctl_cmd *);
enum ND_FW_STATUS (*fw_xlat_firmware_status)(struct ndctl_cmd *);
struct ndctl_cmd *(*new_ack_shutdown_count)(struct ndctl_dimm *);
+   bool (*fw_update_supported)(struct ndctl_dimm *);
 };
 
 struct ndctl_dimm_ops * const intel_dimm_ops;
diff --git a/ndctl/libndctl.h b/ndctl/libndctl.h
index 9db775ba..08030d35 100644
--- a/ndctl/libndctl.h
+++ b/ndctl/libndctl.h
@@ -625,6 +625,7 @@ unsigned int ndctl_cmd_fw_start_get_context(struct 
ndctl_cmd *cmd);
 unsigned long long ndctl_cmd_fw_fquery_get_fw_rev(struct ndctl_cmd *cmd);
 enum ND_FW_STATUS ndctl_cmd_fw_xlat_firmware_status(struct ndctl_cmd *cmd);
 struct ndctl_cmd *ndctl_dimm_cmd_new_ack_shutdown_count(struct ndctl_dimm 
*dimm);
+bool ndctl_dimm_fw_update_supported(struct ndctl_dimm *dimm);
 
 #ifdef __cplusplus
 } /* extern "C" */
diff --git a/ndctl/update.c b/ndctl/update.c
index 0f0f0d81..72f5839b 100644
--- a/ndctl/update.c
+++ b/ndctl/update.c
@@ -454,6 +454,10 @@ static int get_ndctl_dimm(struct update_context *uctx, 
void *ctx)
ndctl_dimm_foreach(bus, dimm) {
if (!util_dimm_filter(dimm, uctx->dimm_id))
continue;
+   if (!ndctl_dimm_fw_update_supported(dimm)) {
+   error("DIMM firmware update not supported by 
the kernel.");
+   return -ENOTSUP;
+   }
uctx->dimm = dimm;
return 0;
}

___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


Re: [PATCH v4 00/12] vfio, dax: prevent long term filesystem-dax pins and other fixes

2018-02-27 Thread Dave Chinner
On Mon, Feb 26, 2018 at 08:19:54PM -0800, Dan Williams wrote:
> The following series implements...
> Changes since v3 [1]:
> 
> * Kill IS_DAX() in favor of explicit IS_FSDAX() and IS_DEVDAX() helpers.
>   Jan noted, "having IS_DAX() and IS_FSDAX() doing almost the same, just
>   not exactly the same, is IMHO a recipe for confusion", and I agree. A
>   nice side effect of this elimination is a cleanup to remove occasions of
>   "#ifdef CONFIG_FS_DAX" in C files, it is all moved to header files
>   now. (Jan)

Dan, can you please stop sending random patches in a patch set to
random lists?  Your patchsets are hitting 4 or 5 different procmail
filters here and so it gets split across several different mailing
list buckets. It's really annoying to have to go reconstruct every
patch set you send back into a single series in a single bucket

Can you please fix up your patch set sending again?

-Dave.
-- 
Dave Chinner
da...@fromorbit.com
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


Re: xfstests 344 deadlock on NOVA

2018-02-27 Thread Dave Chinner
On Tue, Feb 27, 2018 at 11:15:55AM -0800, Andiry Xu wrote:
> Hi,
> 
> I encounter a problem when running xfstests on NOVA. I appreciate your
> help very much.
> 
> Some background: NOVA adopts a per-inode logging design. Metadata
> changes are appended to the log and persisted before returning to the
> user space. For example, a write() in NOVA works like this:
> 
> Allocate new pmem blocks and fill with user data
> Append the metadata that describes this write to the end of the inode log
> Update the log tail pointer atomically to commit the write
> Update in-DRAM radix tree to point to the metadata (for fast lookup)
> 
> The log appending and radix tree update are protected by inode_lock().
> 
> For mmap, nova_dax_get_blocks (similar to ext2_get_blocks)  needs to
> persist the metadata for new allocations. So it has to append the new
> allocation metadata to the log, and hence needs to lock the inode.
> This causes deadlock in xfstests 344 with concurrent pwrite and mmap
> write:
> 
> Thread 1:
> pwrite
> -> nova_dax_file_write()
> ---> inode_lock()
> -> invalidate_inode_pages2_range()
> ---> schedule()

Why did this thread schedule here?

> Thread 2:
> dax_fault
> -> nova_dax_get_blocks()
> ---> inode_lock() // deadlock

It's just waiting on an inode_lock() to be released by another
thread. What resource is it holding locked that the first thread
needs to make progress?

> If I remove invalidate_inode_pages2_range() in write path, xfstests
> 344 passed, but 428 will fail.
> 
> It appears to me that ext2/ext4 does not have this issue because they
> don't persist metadata immediately and hence do not take inode lock.

Did you realise that you can't take the inode->i_rwsem in the page
fault path (i.e. under the mmap_sem) because that's a known deadlock
vector against the read/write IO path?

(i.e. you can use a mmap'd buffer over a range of the same file as
the data buffer for the IO, then take a page fault when trying to
copy data into/out of that buffer while holding the inode->i_rwsem)

> But nova_dax_get_blocks() has to persist the metadata and needs to
> lock the inode to access the log. Is there a way to workaround this?
> Thank you very much.

I'm pretty sure you don't want to use inode->i_rwsem for internal
metadata serialisation requirements. XFS uses xfs_inode->i_ilock for
this, ext4 uses a combination of other locks, etc, and it's done to
separate internal serialisation requirements from user data access
and VFS serialisation requirements...

Cheers,

Dave.
-- 
Dave Chinner
da...@fromorbit.com
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


xfstests 344 deadlock on NOVA

2018-02-27 Thread Andiry Xu
Hi,

I encounter a problem when running xfstests on NOVA. I appreciate your
help very much.

Some background: NOVA adopts a per-inode logging design. Metadata
changes are appended to the log and persisted before returning to the
user space. For example, a write() in NOVA works like this:

Allocate new pmem blocks and fill with user data
Append the metadata that describes this write to the end of the inode log
Update the log tail pointer atomically to commit the write
Update in-DRAM radix tree to point to the metadata (for fast lookup)

The log appending and radix tree update are protected by inode_lock().

For mmap, nova_dax_get_blocks (similar to ext2_get_blocks)  needs to
persist the metadata for new allocations. So it has to append the new
allocation metadata to the log, and hence needs to lock the inode.
This causes deadlock in xfstests 344 with concurrent pwrite and mmap
write:

Thread 1:
pwrite
-> nova_dax_file_write()
---> inode_lock()
-> invalidate_inode_pages2_range()
---> schedule()

Thread 2:
dax_fault
-> nova_dax_get_blocks()
---> inode_lock() // deadlock

If I remove invalidate_inode_pages2_range() in write path, xfstests
344 passed, but 428 will fail.

It appears to me that ext2/ext4 does not have this issue because they
don't persist metadata immediately and hence do not take inode lock.
But nova_dax_get_blocks() has to persist the metadata and needs to
lock the inode to access the log. Is there a way to workaround this?
Thank you very much.

Thanks,
Andiry
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


[PATCH 3/3] nfit_test: prevent parsing error of nfit_test.0

2018-02-27 Thread Ross Zwisler
When you load nfit_test you currently see the following error in dmesg:

 nfit_test nfit_test.0: found a zero length table '0' parsing nfit

This happens because when we parse the nfit_test.0 table via
acpi_nfit_init(), we specify a size of nfit_test->nfit_size.  For the first
pass through nfit_test.0 where (t->setup_hotplug == 0) this is the size of
the entire buffer we allocated, including space for the hot plug
structures, not the size that we've actually filled in.

Fix this by only trying to parse the size of the structures that we've
filled in.

Signed-off-by: Ross Zwisler 
---
 tools/testing/nvdimm/test/nfit.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index fcd233342273..d785235ba04e 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -154,6 +154,7 @@ struct nfit_test {
void *nfit_buf;
dma_addr_t nfit_dma;
size_t nfit_size;
+   size_t nfit_filled;
int dcr_idx;
int num_dcr;
int num_pm;
@@ -2053,6 +2054,8 @@ static void nfit_test0_setup(struct nfit_test *t)
WARN_ON(offset != t->nfit_size);
}
 
+   t->nfit_filled = offset;
+
post_ars_status(>ars_state, >badrange, t->spa_set_dma[0],
SPA0_SIZE);
 
@@ -2172,6 +2175,8 @@ static void nfit_test1_setup(struct nfit_test *t)
/* sanity check to make sure we've filled the buffer */
WARN_ON(offset != t->nfit_size);
 
+   t->nfit_filled = offset;
+
post_ars_status(>ars_state, >badrange, t->spa_set_dma[0],
SPA2_SIZE);
 
@@ -2529,7 +2534,7 @@ static int nfit_test_probe(struct platform_device *pdev)
nd_desc->ndctl = nfit_test_ctl;
 
rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf,
-   nfit_test->nfit_size);
+   nfit_test->nfit_filled);
if (rc)
return rc;
 
-- 
2.14.3

___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


[PATCH 2/3] nfit_test: fix buffer overrun, add sanity check

2018-02-27 Thread Ross Zwisler
It turns out that we were overrunning the 'nfit_buf' buffer in
nfit_test0_setup() in the (t->setup_hotplug == 1) case because we failed to
correctly account for all of the acpi_nfit_memory_map structures.

Fix the structure count which will increase the allocation size of
'nfit_buf' in nfit_test0_alloc().  Also add some WARN_ON()s to
nfit_test0_setup() and nfit_test1_setup() to catch future issues where the
size of the buffer doesn't match the amount of data we're writing.

Signed-off-by: Ross Zwisler 
---
 tools/testing/nvdimm/test/nfit.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 1376fc95c33a..fcd233342273 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -104,7 +104,8 @@ enum {
NUM_HINTS = 8,
NUM_BDW = NUM_DCR,
NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW,
-   NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + 4 /* spa1 iset */,
+   NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */
+   + 4 /* spa1 iset */ + 1 /* spa11 iset */,
DIMM_SIZE = SZ_32M,
LABEL_SIZE = SZ_128K,
SPA_VCD_SIZE = SZ_4M,
@@ -2047,6 +2048,9 @@ static void nfit_test0_setup(struct nfit_test *t)
flush->hint_address[i] = t->flush_dma[4]
+ i * sizeof(u64);
offset += flush->header.length;
+
+   /* sanity check to make sure we've filled the buffer */
+   WARN_ON(offset != t->nfit_size);
}
 
post_ars_status(>ars_state, >badrange, t->spa_set_dma[0],
@@ -2165,6 +2169,9 @@ static void nfit_test1_setup(struct nfit_test *t)
dcr->windows = 0;
offset += dcr->header.length;
 
+   /* sanity check to make sure we've filled the buffer */
+   WARN_ON(offset != t->nfit_size);
+
post_ars_status(>ars_state, >badrange, t->spa_set_dma[0],
SPA2_SIZE);
 
-- 
2.14.3

___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


[PATCH 1/3] nfit_test: improve structure offset handling

2018-02-27 Thread Ross Zwisler
In nfit_test0_setup() and nfit_test1_setup() we keep an 'offset' value
which we use to calculate where in our 'nfit_buf' we will place our next
structure.  The handling of 'offset' and the calculation of the placement
of the next structure is a bit inconsistent, though.  We don't update
'offset' after we insert each structure, sometimes causing us to update it
for multiple structures' sizes at once.  When calculating the position of
the next structure we aren't always able to just use 'offset', but
sometimes have to add in other structure sizes as well.

Fix this by updating 'offset' after each structure insertion in a
consistent way, allowing us to always calculate the position of the next
structure to be inserted by just using 'nfit_buf + offset'.

Signed-off-by: Ross Zwisler 
---
 tools/testing/nvdimm/test/nfit.c | 183 +++
 1 file changed, 109 insertions(+), 74 deletions(-)

diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 620fa78b3b1b..1376fc95c33a 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -1366,7 +1366,7 @@ static void nfit_test0_setup(struct nfit_test *t)
struct acpi_nfit_data_region *bdw;
struct acpi_nfit_flush_address *flush;
struct acpi_nfit_capabilities *pcap;
-   unsigned int offset, i;
+   unsigned int offset = 0, i;
 
/*
 * spa0 (interleave first half of dimm0 and dimm1, note storage
@@ -1380,93 +1380,102 @@ static void nfit_test0_setup(struct nfit_test *t)
spa->range_index = 0+1;
spa->address = t->spa_set_dma[0];
spa->length = SPA0_SIZE;
+   offset += spa->header.length;
 
/*
 * spa1 (interleave last half of the 4 DIMMS, note storage
 * does not actually alias the related block-data-window
 * regions)
 */
-   spa = nfit_buf + sizeof(*spa);
+   spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
spa->range_index = 1+1;
spa->address = t->spa_set_dma[1];
spa->length = SPA1_SIZE;
+   offset += spa->header.length;
 
/* spa2 (dcr0) dimm0 */
-   spa = nfit_buf + sizeof(*spa) * 2;
+   spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
spa->range_index = 2+1;
spa->address = t->dcr_dma[0];
spa->length = DCR_SIZE;
+   offset += spa->header.length;
 
/* spa3 (dcr1) dimm1 */
-   spa = nfit_buf + sizeof(*spa) * 3;
+   spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
spa->range_index = 3+1;
spa->address = t->dcr_dma[1];
spa->length = DCR_SIZE;
+   offset += spa->header.length;
 
/* spa4 (dcr2) dimm2 */
-   spa = nfit_buf + sizeof(*spa) * 4;
+   spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
spa->range_index = 4+1;
spa->address = t->dcr_dma[2];
spa->length = DCR_SIZE;
+   offset += spa->header.length;
 
/* spa5 (dcr3) dimm3 */
-   spa = nfit_buf + sizeof(*spa) * 5;
+   spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
spa->range_index = 5+1;
spa->address = t->dcr_dma[3];
spa->length = DCR_SIZE;
+   offset += spa->header.length;
 
/* spa6 (bdw for dcr0) dimm0 */
-   spa = nfit_buf + sizeof(*spa) * 6;
+   spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
spa->range_index = 6+1;
spa->address = t->dimm_dma[0];
spa->length = DIMM_SIZE;
+   offset += spa->header.length;
 
/* spa7 (bdw for dcr1) dimm1 */
-   spa = nfit_buf + sizeof(*spa) * 7;
+   spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
spa->range_index = 7+1;
spa->address = t->dimm_dma[1];
spa->length = DIMM_SIZE;
+   offset += spa->header.length;
 
/* spa8 (bdw for dcr2) dimm2 */
-   spa = nfit_buf + sizeof(*spa) * 8;
+   spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;

Re: [PATCH v6 3/3] xfs: reject removal of realtime flag when datadev doesn't support DAX

2018-02-27 Thread Darrick J. Wong
On Tue, Feb 27, 2018 at 09:46:54AM -0700, Dave Jiang wrote:
> 
> 
> On 02/20/2018 04:23 PM, Darrick J. Wong wrote:
> > On Wed, Feb 21, 2018 at 10:15:24AM +1100, Dave Chinner wrote:
> >> On Tue, Feb 20, 2018 at 03:01:09PM -0800, Darrick J. Wong wrote:
> >>> On Sun, Feb 18, 2018 at 11:23:17AM +1100, Dave Chinner wrote:
>  On Fri, Feb 16, 2018 at 09:22:47AM -0800, Darrick J. Wong wrote:
> > On Fri, Feb 16, 2018 at 10:04:26AM -0700, Dave Jiang wrote:
> >> In a situation where the rt_dev is DAX and data_dev is not DAX, if the 
> >> user
> >> requests to remove the realtime flag via ioctl we can no longer 
> >> support DAX
> >> for that file. Dynamic changing of S_DAX on the inode is not supported 
> >> due
> >> to various complications in the existing implementation. Therefore 
> >> until we
> >> address the dynamic S_DAX change issues, we must disallow realtime flag
> >> being removed.
> >>
> >> Signed-off-by: Dave Jiang 
> >> Reviewed-by: Christoph Hellwig 
> >> ---
> >>  fs/xfs/xfs_ioctl.c |   14 ++
> >>  1 file changed, 14 insertions(+)
> >>
> >> diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
> >> index 2c70a0a4f59f..edd97d527fe8 100644
> >> --- a/fs/xfs/xfs_ioctl.c
> >> +++ b/fs/xfs/xfs_ioctl.c
> >> @@ -1030,6 +1030,20 @@ xfs_ioctl_setattr_xflags(
> >>  {
> >>struct xfs_mount*mp = ip->i_mount;
> >>uint64_tdi_flags2;
> >> +  struct inode*inode = VFS_I(ip);
> >> +  struct super_block  *sb = inode->i_sb;
> >> +
> >> +  /*
> >> +   * In the case that the inode is realtime, and we are trying to 
> >> remove
> >> +   * the realtime flag, and the rtdev supports DAX but the 
> >> datadev does
> >> +   * not support DAX, we can't allow the realtime flag to be 
> >> removed
> >> +   * since we do not support dynamic S_DAX flag removal yet.
> >> +   */
> >> +  if (XFS_IS_REALTIME_INODE(ip) &&
> >> +  !(fa->fsx_xflags & FS_XFLAG_REALTIME) &&
> >> +  bdev_dax_supported(mp->m_rtdev_targp->bt_bdev, 
> >> sb->s_blocksize) &&
> >> +  !bdev_dax_supported(mp->m_ddev_targp->bt_bdev, 
> >> sb->s_blocksize))
> >
> > What happens here if we have a non-rt file that we're trying to turn
> > into an rt file and the data dev supports dax but not the rt dev?
> >
> > Changing the rt flag is only supported on files with no data blocks (no
> > extents, no delalloc blocks), so why can't we remove S_DAX from an empty
> > file?  There aren't any memory mappings or page cache to get in the way,
> > correct?
> 
>  File size can be non-zero, so you can have DAX read-over-hole
>  mappings present. I simply don't think it's safe to remove/add S_DAX
>  flags via ioctls right now. If we have a DAX capable rtdev, then the
>  only way we should allow rtdev+dax to be used right now is via the
>  RT inherit bit on the dir that creates files in the rtdev right from
>  the start. i.e. we can't set/remove the RT inode flag on an inode
>  via ioctl if rtdev+dax is enabled until the whole dynamic S_DAX
>  inode flag thing is resolved.
> >>>
> >>> Could we deal with the restriction that the DAX flag can't change
> >>> (whether by user ioctl or by toggling the rt flag) unless the file size
> >>> is zero?  That adds another way setting/clearing the realtime flag can
> >>> fail, but at least it'd be the same EINVAL.
> >>
> >> I thought we still mmap a zero length file and get a page fault that
> >> returns a zeroed page? Or does that segv?
> > 
> > I think it segfaults, but let's see...
> > 
> > $ rm -rf /opt/b ; xfs_io -f -c 'mmap -rw 0 1m' -c 'mread 512 20' /opt/b
> > Bus error
> > $ rm -rf /opt/b ; xfs_io -f -c 'mmap -rw 0 1m' -c 'mwrite 512 20' /opt/b
> > Bus error
> 
> Darrick,
> So you want the change to be if the file size is 0 then we can modify
> the RT bit, otherwise reject if DAX is involved?

The other way around -- reject any change to the DAX flag if the file
size is not zero, regardless of whether the user tried to change the DAX
flag directly or the change is happening because the user changed the RT
flag and the device dax support is different between the rt & data
devices.

We'll need more rigorous testing of this current theory that we can
change S_DAX without problems if the file size is zero, once this change
has been written.

--D

> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


Re: [PATCH v4 10/12] fs, dax: kill IS_DAX()

2018-02-27 Thread Jan Kara
On Mon 26-02-18 20:20:48, Dan Williams wrote:
> In preparation for fixing the broken definition of S_DAX in the
> CONFIG_FS_DAX=n + CONFIG_DEV_DAX=y case, convert all the remaining
> IS_DAX() usages to use explicit tests for FSDAX.
> 
> Cc: Jan Kara 
> Cc: Matthew Wilcox 
> Cc: Ross Zwisler 
> Cc: 
> Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap")
> Signed-off-by: Dan Williams 

Looks good. You can add:

Reviewed-by: Jan Kara 

Honza

> ---
>  fs/iomap.c  |2 +-
>  include/linux/dax.h |2 +-
>  include/linux/fs.h  |3 +--
>  3 files changed, 3 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/iomap.c b/fs/iomap.c
> index afd163586aa0..fe379d8949fd 100644
> --- a/fs/iomap.c
> +++ b/fs/iomap.c
> @@ -377,7 +377,7 @@ iomap_zero_range_actor(struct inode *inode, loff_t pos, 
> loff_t count,
>   offset = pos & (PAGE_SIZE - 1); /* Within page */
>   bytes = min_t(loff_t, PAGE_SIZE - offset, count);
>  
> - if (IS_DAX(inode))
> + if (IS_FSDAX(inode))
>   status = iomap_dax_zero(pos, offset, bytes, iomap);
>   else
>   status = iomap_zero(inode, pos, offset, bytes, iomap);
> diff --git a/include/linux/dax.h b/include/linux/dax.h
> index 47edbce4fc52..ce520e932adc 100644
> --- a/include/linux/dax.h
> +++ b/include/linux/dax.h
> @@ -124,7 +124,7 @@ static inline ssize_t dax_iomap_rw(struct kiocb *iocb, 
> struct iov_iter *iter,
>  
>  static inline bool dax_mapping(struct address_space *mapping)
>  {
> - return mapping->host && IS_DAX(mapping->host);
> + return mapping->host && IS_FSDAX(mapping->host);
>  }
>  
>  struct writeback_control;
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 4a5aa8761011..8021f10068d3 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1903,7 +1903,6 @@ static inline bool sb_rdonly(const struct super_block 
> *sb) { return sb->s_flags
>  #define IS_IMA(inode)((inode)->i_flags & S_IMA)
>  #define IS_AUTOMOUNT(inode)  ((inode)->i_flags & S_AUTOMOUNT)
>  #define IS_NOSEC(inode)  ((inode)->i_flags & S_NOSEC)
> -#define IS_DAX(inode)((inode)->i_flags & S_DAX)
>  #define IS_ENCRYPTED(inode)  ((inode)->i_flags & S_ENCRYPTED)
>  
>  #define IS_WHITEOUT(inode)   (S_ISCHR(inode->i_mode) && \
> @@ -3203,7 +3202,7 @@ extern int file_update_time(struct file *file);
>  
>  static inline bool io_is_direct(struct file *filp)
>  {
> - return (filp->f_flags & O_DIRECT) || IS_DAX(filp->f_mapping->host);
> + return (filp->f_flags & O_DIRECT) || IS_FSDAX(filp->f_mapping->host);
>  }
>  
>  static inline bool vma_is_dax(struct vm_area_struct *vma)
> 
> 
-- 
Jan Kara 
SUSE Labs, CR
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


Re: [PATCH v4 09/12] mm, dax: replace IS_DAX() with IS_DEVDAX() or IS_FSDAX()

2018-02-27 Thread Jan Kara
On Mon 26-02-18 20:20:42, Dan Williams wrote:
> In preparation for fixing the broken definition of S_DAX in the
> CONFIG_FS_DAX=n + CONFIG_DEV_DAX=y case, convert all IS_DAX() usages to
> use explicit tests for the DEVDAX and FSDAX sub-cases of DAX
> functionality.
> 
> Cc: Jan Kara 
> Cc: Matthew Wilcox 
> Cc: Ross Zwisler 
> Cc: 
> Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap")
> Signed-off-by: Dan Williams 

Just one nit below. With that fixed you can add:

Reviewed-by: Jan Kara 

> @@ -3208,21 +3208,19 @@ static inline bool io_is_direct(struct file *filp)
>  
>  static inline bool vma_is_dax(struct vm_area_struct *vma)
>  {
> - return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
> + struct inode *inode;
> +
> + if (!vma->vm_file)
> + return false;
> + inode = vma->vm_file->f_mapping->host;

When changing this, use file_inode() here as well?

Honza
-- 
Jan Kara 
SUSE Labs, CR
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


Re: [PATCH v4 08/12] xfs, dax: replace IS_DAX() with IS_FSDAX()

2018-02-27 Thread Jan Kara
On Mon 26-02-18 20:20:37, Dan Williams wrote:
> In preparation for fixing the broken definition of S_DAX in the
> CONFIG_FS_DAX=n + CONFIG_DEV_DAX=y case, convert all IS_DAX() usages to
> use explicit tests for FSDAX since DAX is ambiguous.
> 
> Cc: Jan Kara 
> Cc: "Darrick J. Wong" 
> Cc: linux-...@vger.kernel.org
> Cc: Matthew Wilcox 
> Cc: Ross Zwisler 
> Cc: 
> Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap")
> Signed-off-by: Dan Williams 

Looks good. You can add:

Reviewed-by: Jan Kara 

Honza

> ---
>  fs/xfs/xfs_file.c|   14 +++---
>  fs/xfs/xfs_ioctl.c   |4 ++--
>  fs/xfs/xfs_iomap.c   |6 +++---
>  fs/xfs/xfs_reflink.c |2 +-
>  4 files changed, 13 insertions(+), 13 deletions(-)
> 
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 9ea08326f876..46a098b90fd0 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -288,7 +288,7 @@ xfs_file_read_iter(
>   if (XFS_FORCED_SHUTDOWN(mp))
>   return -EIO;
>  
> - if (IS_DAX(inode))
> + if (IS_FSDAX(inode))
>   ret = xfs_file_dax_read(iocb, to);
>   else if (iocb->ki_flags & IOCB_DIRECT)
>   ret = xfs_file_dio_aio_read(iocb, to);
> @@ -726,7 +726,7 @@ xfs_file_write_iter(
>   if (XFS_FORCED_SHUTDOWN(ip->i_mount))
>   return -EIO;
>  
> - if (IS_DAX(inode))
> + if (IS_FSDAX(inode))
>   ret = xfs_file_dax_write(iocb, from);
>   else if (iocb->ki_flags & IOCB_DIRECT) {
>   /*
> @@ -1045,7 +1045,7 @@ __xfs_filemap_fault(
>   }
>  
>   xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
> - if (IS_DAX(inode)) {
> + if (IS_FSDAX(inode)) {
>   pfn_t pfn;
>  
>   ret = dax_iomap_fault(vmf, pe_size, , NULL, _iomap_ops);
> @@ -1070,7 +1070,7 @@ xfs_filemap_fault(
>  {
>   /* DAX can shortcut the normal fault path on write faults! */
>   return __xfs_filemap_fault(vmf, PE_SIZE_PTE,
> - IS_DAX(file_inode(vmf->vma->vm_file)) &&
> + IS_FSDAX(file_inode(vmf->vma->vm_file)) &&
>   (vmf->flags & FAULT_FLAG_WRITE));
>  }
>  
> @@ -1079,7 +1079,7 @@ xfs_filemap_huge_fault(
>   struct vm_fault *vmf,
>   enum page_entry_sizepe_size)
>  {
> - if (!IS_DAX(file_inode(vmf->vma->vm_file)))
> + if (!IS_FSDAX(file_inode(vmf->vma->vm_file)))
>   return VM_FAULT_FALLBACK;
>  
>   /* DAX can shortcut the normal fault path on write faults! */
> @@ -1124,12 +1124,12 @@ xfs_file_mmap(
>* We don't support synchronous mappings for non-DAX files. At least
>* until someone comes with a sensible use case.
>*/
> - if (!IS_DAX(file_inode(filp)) && (vma->vm_flags & VM_SYNC))
> + if (!IS_FSDAX(file_inode(filp)) && (vma->vm_flags & VM_SYNC))
>   return -EOPNOTSUPP;
>  
>   file_accessed(filp);
>   vma->vm_ops = _file_vm_ops;
> - if (IS_DAX(file_inode(filp)))
> + if (IS_FSDAX(file_inode(filp)))
>   vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
>   return 0;
>  }
> diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
> index 89fb1eb80aae..234279ff66ce 100644
> --- a/fs/xfs/xfs_ioctl.c
> +++ b/fs/xfs/xfs_ioctl.c
> @@ -1108,9 +1108,9 @@ xfs_ioctl_setattr_dax_invalidate(
>   }
>  
>   /* If the DAX state is not changing, we have nothing to do here. */
> - if ((fa->fsx_xflags & FS_XFLAG_DAX) && IS_DAX(inode))
> + if ((fa->fsx_xflags & FS_XFLAG_DAX) && IS_FSDAX(inode))
>   return 0;
> - if (!(fa->fsx_xflags & FS_XFLAG_DAX) && !IS_DAX(inode))
> + if (!(fa->fsx_xflags & FS_XFLAG_DAX) && !IS_FSDAX(inode))
>   return 0;
>  
>   /* lock, flush and invalidate mapping in preparation for flag change */
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index 66e1edbfb2b2..cf794d429aec 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -241,7 +241,7 @@ xfs_iomap_write_direct(
>* the reserve block pool for bmbt block allocation if there is no space
>* left but we need to do unwritten extent conversion.
>*/
> - if (IS_DAX(VFS_I(ip))) {
> + if (IS_FSDAX(VFS_I(ip))) {
>   bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO;
>   if (imap->br_state == XFS_EXT_UNWRITTEN) {
>   tflags |= XFS_TRANS_RESERVE;
> @@ -952,7 +952,7 @@ static inline bool imap_needs_alloc(struct inode *inode,
>   return !nimaps ||
>   imap->br_startblock == HOLESTARTBLOCK ||
>   imap->br_startblock == DELAYSTARTBLOCK ||
> - (IS_DAX(inode) && imap->br_state == XFS_EXT_UNWRITTEN);
> + (IS_FSDAX(inode) && imap->br_state == XFS_EXT_UNWRITTEN);
>  }
>  
>  

Re: [PATCH v4 07/12] ext4, dax: replace IS_DAX() with IS_FSDAX()

2018-02-27 Thread Jan Kara
On Mon 26-02-18 20:20:32, Dan Williams wrote:
> In preparation for fixing the broken definition of S_DAX in the
> CONFIG_FS_DAX=n + CONFIG_DEV_DAX=y case, convert all IS_DAX() usages to
> use explicit tests for FSDAX since DAX is ambiguous.
> 
> Cc: Jan Kara 
> Cc: "Theodore Ts'o" 
> Cc: Andreas Dilger 
> Cc: Alexander Viro 
> Cc: Matthew Wilcox 
> Cc: Ross Zwisler 
> Cc: 
> Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap")
> Signed-off-by: Dan Williams 

Looks good. You can add:

Reviewed-by: Jan Kara 

Honza


> ---
>  fs/ext4/file.c  |   12 +---
>  fs/ext4/inode.c |4 ++--
>  fs/ext4/ioctl.c |2 +-
>  fs/ext4/super.c |2 +-
>  4 files changed, 9 insertions(+), 11 deletions(-)
> 
> diff --git a/fs/ext4/file.c b/fs/ext4/file.c
> index 51854e7608f0..561ea843b458 100644
> --- a/fs/ext4/file.c
> +++ b/fs/ext4/file.c
> @@ -48,7 +48,7 @@ static ssize_t ext4_dax_read_iter(struct kiocb *iocb, 
> struct iov_iter *to)
>* Recheck under inode lock - at this point we are sure it cannot
>* change anymore
>*/
> - if (!IS_DAX(inode)) {
> + if (!IS_FSDAX(inode)) {
>   inode_unlock_shared(inode);
>   /* Fallback to buffered IO in case we cannot support DAX */
>   return generic_file_read_iter(iocb, to);
> @@ -68,7 +68,7 @@ static ssize_t ext4_file_read_iter(struct kiocb *iocb, 
> struct iov_iter *to)
>   if (!iov_iter_count(to))
>   return 0; /* skip atime */
>  
> - if (IS_DAX(file_inode(iocb->ki_filp)))
> + if (IS_FSDAX(file_inode(iocb->ki_filp)))
>   return ext4_dax_read_iter(iocb, to);
>   return generic_file_read_iter(iocb, to);
>  }
> @@ -216,10 +216,8 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter 
> *from)
>   if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb
>   return -EIO;
>  
> -#ifdef CONFIG_FS_DAX
> - if (IS_DAX(inode))
> + if (IS_FSDAX(inode))
>   return ext4_dax_write_iter(iocb, from);
> -#endif
>   if (!o_direct && (iocb->ki_flags & IOCB_NOWAIT))
>   return -EOPNOTSUPP;
>  
> @@ -361,11 +359,11 @@ static int ext4_file_mmap(struct file *file, struct 
> vm_area_struct *vma)
>* We don't support synchronous mappings for non-DAX files. At least
>* until someone comes with a sensible use case.
>*/
> - if (!IS_DAX(file_inode(file)) && (vma->vm_flags & VM_SYNC))
> + if (!IS_FSDAX(file_inode(file)) && (vma->vm_flags & VM_SYNC))
>   return -EOPNOTSUPP;
>  
>   file_accessed(file);
> - if (IS_DAX(file_inode(file))) {
> + if (IS_FSDAX(file_inode(file))) {
>   vma->vm_ops = _dax_vm_ops;
>   vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
>   } else {
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index c94780075b04..1879b33aa391 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -3858,7 +3858,7 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, 
> struct iov_iter *iter)
>   return 0;
>  
>   /* DAX uses iomap path now */
> - if (WARN_ON_ONCE(IS_DAX(inode)))
> + if (WARN_ON_ONCE(IS_FSDAX(inode)))
>   return 0;
>  
>   trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
> @@ -4076,7 +4076,7 @@ static int ext4_block_zero_page_range(handle_t *handle,
>   if (length > max || length < 0)
>   length = max;
>  
> - if (IS_DAX(inode)) {
> + if (IS_FSDAX(inode)) {
>   return iomap_zero_range(inode, from, length, NULL,
>   _iomap_ops);
>   }
> diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
> index 7e99ad02f1ba..040fc6570ddb 100644
> --- a/fs/ext4/ioctl.c
> +++ b/fs/ext4/ioctl.c
> @@ -790,7 +790,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, 
> unsigned long arg)
>"Online defrag not supported with bigalloc");
>   err = -EOPNOTSUPP;
>   goto mext_out;
> - } else if (IS_DAX(inode)) {
> + } else if (IS_FSDAX(inode)) {
>   ext4_msg(sb, KERN_ERR,
>"Online defrag not supported with DAX");
>   err = -EOPNOTSUPP;
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 39bf464c35f1..933e12940181 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -1161,7 +1161,7 @@ static int ext4_set_context(struct inode *inode, const 
> void *ctx, size_t len,
>   if (inode->i_ino == EXT4_ROOT_INO)
>   return -EPERM;
>  
> - if (WARN_ON_ONCE(IS_DAX(inode) && i_size_read(inode)))
> + if (WARN_ON_ONCE(IS_FSDAX(inode) && i_size_read(inode)))
>

Re: [PATCH v4 05/12] ext4, dax: define ext4_dax_*() infrastructure in all cases

2018-02-27 Thread Jan Kara
On Mon 26-02-18 20:20:21, Dan Williams wrote:
> In preparation for fixing S_DAX to be defined in the CONFIG_FS_DAX=n +
> CONFIG_DEV_DAX=y case, move the definition of these routines outside of
> the "#ifdef CONFIG_FS_DAX" guard. This is also a coding-style fix to
> move all ifdef handling to header files rather than in the source. The
> compiler will still be able to determine that all the related code can
> be discarded in the CONFIG_FS_DAX=n case.
> 
> Cc: Jan Kara 
> Cc: 
> Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap")
> Signed-off-by: Dan Williams 

Looks good. You can add:

Reviewed-by: Jan Kara 

Honza

> ---
>  fs/ext4/file.c |6 --
>  1 file changed, 6 deletions(-)
> 
> diff --git a/fs/ext4/file.c b/fs/ext4/file.c
> index fb6f023622fe..51854e7608f0 100644
> --- a/fs/ext4/file.c
> +++ b/fs/ext4/file.c
> @@ -34,7 +34,6 @@
>  #include "xattr.h"
>  #include "acl.h"
>  
> -#ifdef CONFIG_FS_DAX
>  static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
>  {
>   struct inode *inode = file_inode(iocb->ki_filp);
> @@ -60,7 +59,6 @@ static ssize_t ext4_dax_read_iter(struct kiocb *iocb, 
> struct iov_iter *to)
>   file_accessed(iocb->ki_filp);
>   return ret;
>  }
> -#endif
>  
>  static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
>  {
> @@ -70,10 +68,8 @@ static ssize_t ext4_file_read_iter(struct kiocb *iocb, 
> struct iov_iter *to)
>   if (!iov_iter_count(to))
>   return 0; /* skip atime */
>  
> -#ifdef CONFIG_FS_DAX
>   if (IS_DAX(file_inode(iocb->ki_filp)))
>   return ext4_dax_read_iter(iocb, to);
> -#endif
>   return generic_file_read_iter(iocb, to);
>  }
>  
> @@ -179,7 +175,6 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, 
> struct iov_iter *from)
>   return iov_iter_count(from);
>  }
>  
> -#ifdef CONFIG_FS_DAX
>  static ssize_t
>  ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
>  {
> @@ -208,7 +203,6 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter 
> *from)
>   ret = generic_write_sync(iocb, ret);
>   return ret;
>  }
> -#endif
>  
>  static ssize_t
>  ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
> 
> 
-- 
Jan Kara 
SUSE Labs, CR
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


Re: [PATCH v4 06/12] ext2, dax: replace IS_DAX() with IS_FSDAX()

2018-02-27 Thread Jan Kara
On Mon 26-02-18 20:20:27, Dan Williams wrote:
> In preparation for fixing the broken definition of S_DAX in the
> CONFIG_FS_DAX=n + CONFIG_DEV_DAX=y case, convert all IS_DAX() usages to
> use explicit tests for FSDAX since DAX is ambiguous.
> 
> Cc: Jan Kara 
> Cc: Matthew Wilcox 
> Cc: Ross Zwisler 
> Cc: 
> Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap")
> Signed-off-by: Dan Williams 
> ---
>  fs/ext2/file.c  |6 +++---
>  fs/ext2/inode.c |6 +++---
>  2 files changed, 6 insertions(+), 6 deletions(-)

Looks good. You can add:

Reviewed-by: Jan Kara 

Honza


> 
> diff --git a/fs/ext2/file.c b/fs/ext2/file.c
> index 5ac98d074323..702a36df6c01 100644
> --- a/fs/ext2/file.c
> +++ b/fs/ext2/file.c
> @@ -119,7 +119,7 @@ static const struct vm_operations_struct ext2_dax_vm_ops 
> = {
>  
>  static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma)
>  {
> - if (!IS_DAX(file_inode(file)))
> + if (!IS_FSDAX(file_inode(file)))
>   return generic_file_mmap(file, vma);
>  
>   file_accessed(file);
> @@ -158,14 +158,14 @@ int ext2_fsync(struct file *file, loff_t start, loff_t 
> end, int datasync)
>  
>  static ssize_t ext2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
>  {
> - if (IS_DAX(iocb->ki_filp->f_mapping->host))
> + if (IS_FSDAX(iocb->ki_filp->f_mapping->host))
>   return ext2_dax_read_iter(iocb, to);
>   return generic_file_read_iter(iocb, to);
>  }
>  
>  static ssize_t ext2_file_write_iter(struct kiocb *iocb, struct iov_iter 
> *from)
>  {
> - if (IS_DAX(iocb->ki_filp->f_mapping->host))
> + if (IS_FSDAX(iocb->ki_filp->f_mapping->host))
>   return ext2_dax_write_iter(iocb, from);
>   return generic_file_write_iter(iocb, from);
>  }
> diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
> index e04295e99d90..72284f9fd034 100644
> --- a/fs/ext2/inode.c
> +++ b/fs/ext2/inode.c
> @@ -733,7 +733,7 @@ static int ext2_get_blocks(struct inode *inode,
>   goto cleanup;
>   }
>  
> - if (IS_DAX(inode)) {
> + if (IS_FSDAX(inode)) {
>   /*
>* We must unmap blocks before zeroing so that writeback cannot
>* overwrite zeros with stale data from block device page cache.
> @@ -940,7 +940,7 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
>   loff_t offset = iocb->ki_pos;
>   ssize_t ret;
>  
> - if (WARN_ON_ONCE(IS_DAX(inode)))
> + if (WARN_ON_ONCE(IS_FSDAX(inode)))
>   return -EIO;
>  
>   ret = blockdev_direct_IO(iocb, inode, iter, ext2_get_block);
> @@ -1294,7 +1294,7 @@ static int ext2_setsize(struct inode *inode, loff_t 
> newsize)
>  
>   inode_dio_wait(inode);
>  
> - if (IS_DAX(inode)) {
> + if (IS_FSDAX(inode)) {
>   error = iomap_zero_range(inode, newsize,
>PAGE_ALIGN(newsize) - newsize, NULL,
>_iomap_ops);
> 
> 
-- 
Jan Kara 
SUSE Labs, CR
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


Re: [PATCH v4 04/12] ext2, dax: define ext2_dax_*() infrastructure in all cases

2018-02-27 Thread Jan Kara
On Mon 26-02-18 20:20:16, Dan Williams wrote:
> In preparation for fixing S_DAX to be defined in the CONFIG_FS_DAX=n +
> CONFIG_DEV_DAX=y case, move the definition of these routines outside of
> the "#ifdef CONFIG_FS_DAX" guard. This is also a coding-style fix to
> move all ifdef handling to header files rather than in the source. The
> compiler will still be able to determine that all the related code can
> be discarded in the CONFIG_FS_DAX=n case.
> 
> Cc: Jan Kara 
> Cc: 
> Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap")
> Signed-off-by: Dan Williams 

Looks good. You can add:

Reviewed-by: Jan Kara 

Honza

> ---
>  fs/ext2/file.c  |8 
>  include/linux/dax.h |   10 --
>  2 files changed, 8 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/ext2/file.c b/fs/ext2/file.c
> index 1c7ea1bcddde..5ac98d074323 100644
> --- a/fs/ext2/file.c
> +++ b/fs/ext2/file.c
> @@ -29,7 +29,6 @@
>  #include "xattr.h"
>  #include "acl.h"
>  
> -#ifdef CONFIG_FS_DAX
>  static ssize_t ext2_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
>  {
>   struct inode *inode = iocb->ki_filp->f_mapping->host;
> @@ -128,9 +127,6 @@ static int ext2_file_mmap(struct file *file, struct 
> vm_area_struct *vma)
>   vma->vm_flags |= VM_MIXEDMAP;
>   return 0;
>  }
> -#else
> -#define ext2_file_mmap   generic_file_mmap
> -#endif
>  
>  /*
>   * Called when filp is released. This happens when all file descriptors
> @@ -162,19 +158,15 @@ int ext2_fsync(struct file *file, loff_t start, loff_t 
> end, int datasync)
>  
>  static ssize_t ext2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
>  {
> -#ifdef CONFIG_FS_DAX
>   if (IS_DAX(iocb->ki_filp->f_mapping->host))
>   return ext2_dax_read_iter(iocb, to);
> -#endif
>   return generic_file_read_iter(iocb, to);
>  }
>  
>  static ssize_t ext2_file_write_iter(struct kiocb *iocb, struct iov_iter 
> *from)
>  {
> -#ifdef CONFIG_FS_DAX
>   if (IS_DAX(iocb->ki_filp->f_mapping->host))
>   return ext2_dax_write_iter(iocb, from);
> -#endif
>   return generic_file_write_iter(iocb, from);
>  }
>  
> diff --git a/include/linux/dax.h b/include/linux/dax.h
> index 0185ecdae135..47edbce4fc52 100644
> --- a/include/linux/dax.h
> +++ b/include/linux/dax.h
> @@ -93,8 +93,6 @@ void dax_flush(struct dax_device *dax_dev, void *addr, 
> size_t size);
>  void dax_write_cache(struct dax_device *dax_dev, bool wc);
>  bool dax_write_cache_enabled(struct dax_device *dax_dev);
>  
> -ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
> - const struct iomap_ops *ops);
>  int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
>   pfn_t *pfnp, int *errp, const struct iomap_ops *ops);
>  int dax_finish_sync_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
> @@ -107,6 +105,8 @@ int dax_invalidate_mapping_entry_sync(struct 
> address_space *mapping,
>  int __dax_zero_page_range(struct block_device *bdev,
>   struct dax_device *dax_dev, sector_t sector,
>   unsigned int offset, unsigned int length);
> +ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
> + const struct iomap_ops *ops);
>  #else
>  static inline int __dax_zero_page_range(struct block_device *bdev,
>   struct dax_device *dax_dev, sector_t sector,
> @@ -114,6 +114,12 @@ static inline int __dax_zero_page_range(struct 
> block_device *bdev,
>  {
>   return -ENXIO;
>  }
> +
> +static inline ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
> + const struct iomap_ops *ops)
> +{
> + return -ENXIO;
> +}
>  #endif
>  
>  static inline bool dax_mapping(struct address_space *mapping)
> 
> 
-- 
Jan Kara 
SUSE Labs, CR
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


Re: [PATCH v4 02/12] dax: introduce IS_DEVDAX() and IS_FSDAX()

2018-02-27 Thread Jan Kara
On Mon 26-02-18 20:20:05, Dan Williams wrote:
> The current IS_DAX() helper that checks the S_DAX inode flag is
> ambiguous, and currently has the broken assumption that the S_DAX flag

I don't think S_DAX flag is really ambiguous. It is just that in
CONFIG_FS_DAX=n + CONFIG_DEV_DAX=y case, the compiler is not able to figure
out some calls behind IS_DAX() are dead code and so the kernel won't
compile / link. Or is there any other problem I'm missing?

If I'm indeed right, then please tell this in the changelog and don't talk
about abstract ambiguity of S_DAX flag.

As much as I'd prefer to solve link-time problems with stubs instead of
relying on dead-code elimination, I can live with split macros so once the
changelog is settled, feel free to add:

Reviewed-by: Jan Kara 

Honza

> is only non-zero in the CONFIG_FS_DAX=y case. In preparation for
> defining S_DAX to non-zero in the  CONFIG_FS_DAX=n + CONFIG_DEV_DAX=y
> case, introduce two explicit helpers to replace IS_DAX().
> 
> Cc: "Theodore Ts'o" 
> Cc: Andreas Dilger 
> Cc: Alexander Viro 
> Cc: "Darrick J. Wong" 
> Cc: linux-...@vger.kernel.org (supporter:XFS FILESYSTEM)
> Cc: Matthew Wilcox 
> Cc: Ross Zwisler 
> Cc: 
> Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap")
> Reported-by: Jan Kara 
> Signed-off-by: Dan Williams 
> ---
>  include/linux/fs.h |   22 ++
>  1 file changed, 22 insertions(+)
> 
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 79c413985305..bd0c46880572 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1909,6 +1909,28 @@ static inline bool sb_rdonly(const struct super_block 
> *sb) { return sb->s_flags
>  #define IS_WHITEOUT(inode)   (S_ISCHR(inode->i_mode) && \
>(inode)->i_rdev == WHITEOUT_DEV)
>  
> +static inline bool IS_DEVDAX(struct inode *inode)
> +{
> + if (!IS_ENABLED(CONFIG_DEV_DAX))
> + return false;
> + if ((inode->i_flags & S_DAX) == 0)
> + return false;
> + if (!S_ISCHR(inode->i_mode))
> + return false;
> + return true;
> +}
> +
> +static inline bool IS_FSDAX(struct inode *inode)
> +{
> + if (!IS_ENABLED(CONFIG_FS_DAX))
> + return false;
> + if ((inode->i_flags & S_DAX) == 0)
> + return false;
> + if (S_ISCHR(inode->i_mode))
> + return false;
> + return true;
> +}
> +
>  static inline bool HAS_UNMAPPED_ID(struct inode *inode)
>  {
>   return !uid_valid(inode->i_uid) || !gid_valid(inode->i_gid);
> 
-- 
Jan Kara 
SUSE Labs, CR
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


Re: [PATCH v6 3/3] xfs: reject removal of realtime flag when datadev doesn't support DAX

2018-02-27 Thread Dave Jiang


On 02/20/2018 04:23 PM, Darrick J. Wong wrote:
> On Wed, Feb 21, 2018 at 10:15:24AM +1100, Dave Chinner wrote:
>> On Tue, Feb 20, 2018 at 03:01:09PM -0800, Darrick J. Wong wrote:
>>> On Sun, Feb 18, 2018 at 11:23:17AM +1100, Dave Chinner wrote:
 On Fri, Feb 16, 2018 at 09:22:47AM -0800, Darrick J. Wong wrote:
> On Fri, Feb 16, 2018 at 10:04:26AM -0700, Dave Jiang wrote:
>> In a situation where the rt_dev is DAX and data_dev is not DAX, if the 
>> user
>> requests to remove the realtime flag via ioctl we can no longer support 
>> DAX
>> for that file. Dynamic changing of S_DAX on the inode is not supported 
>> due
>> to various complications in the existing implementation. Therefore until 
>> we
>> address the dynamic S_DAX change issues, we must disallow realtime flag
>> being removed.
>>
>> Signed-off-by: Dave Jiang 
>> Reviewed-by: Christoph Hellwig 
>> ---
>>  fs/xfs/xfs_ioctl.c |   14 ++
>>  1 file changed, 14 insertions(+)
>>
>> diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
>> index 2c70a0a4f59f..edd97d527fe8 100644
>> --- a/fs/xfs/xfs_ioctl.c
>> +++ b/fs/xfs/xfs_ioctl.c
>> @@ -1030,6 +1030,20 @@ xfs_ioctl_setattr_xflags(
>>  {
>>  struct xfs_mount*mp = ip->i_mount;
>>  uint64_tdi_flags2;
>> +struct inode*inode = VFS_I(ip);
>> +struct super_block  *sb = inode->i_sb;
>> +
>> +/*
>> + * In the case that the inode is realtime, and we are trying to 
>> remove
>> + * the realtime flag, and the rtdev supports DAX but the 
>> datadev does
>> + * not support DAX, we can't allow the realtime flag to be 
>> removed
>> + * since we do not support dynamic S_DAX flag removal yet.
>> + */
>> +if (XFS_IS_REALTIME_INODE(ip) &&
>> +!(fa->fsx_xflags & FS_XFLAG_REALTIME) &&
>> +bdev_dax_supported(mp->m_rtdev_targp->bt_bdev, 
>> sb->s_blocksize) &&
>> +!bdev_dax_supported(mp->m_ddev_targp->bt_bdev, 
>> sb->s_blocksize))
>
> What happens here if we have a non-rt file that we're trying to turn
> into an rt file and the data dev supports dax but not the rt dev?
>
> Changing the rt flag is only supported on files with no data blocks (no
> extents, no delalloc blocks), so why can't we remove S_DAX from an empty
> file?  There aren't any memory mappings or page cache to get in the way,
> correct?

 File size can be non-zero, so you can have DAX read-over-hole
 mappings present. I simply don't think it's safe to remove/add S_DAX
 flags via ioctls right now. If we have a DAX capable rtdev, then the
 only way we should allow rtdev+dax to be used right now is via the
 RT inherit bit on the dir that creates files in the rtdev right from
 the start. i.e. we can't set/remove the RT inode flag on an inode
 via ioctl if rtdev+dax is enabled until the whole dynamic S_DAX
 inode flag thing is resolved.
>>>
>>> Could we deal with the restriction that the DAX flag can't change
>>> (whether by user ioctl or by toggling the rt flag) unless the file size
>>> is zero?  That adds another way setting/clearing the realtime flag can
>>> fail, but at least it'd be the same EINVAL.
>>
>> I thought we still mmap a zero length file and get a page fault that
>> returns a zeroed page? Or does that segv?
> 
> I think it segfaults, but let's see...
> 
> $ rm -rf /opt/b ; xfs_io -f -c 'mmap -rw 0 1m' -c 'mread 512 20' /opt/b
> Bus error
> $ rm -rf /opt/b ; xfs_io -f -c 'mmap -rw 0 1m' -c 'mwrite 512 20' /opt/b
> Bus error

Darrick,
So you want the change to be if the file size is 0 then we can modify
the RT bit, otherwise reject if DAX is involved?
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm