[ndctl PATCH v3 3/4] libndctl: enable ND_CMD_CALL

2016-09-16 Thread Brian Boylston
. Enable parsing /sys/bus/nd/devices/nmemX/commands for ND_CMD_CALL
. Enable translation of ND_CMD_CALL to ND_IOCTL_CALL

Cc: Dan Williams 
Signed-off-by: Brian Boylston 
---
 ndctl/lib/libndctl.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ndctl/lib/libndctl.c b/ndctl/lib/libndctl.c
index 99938be..7a847fa 100644
--- a/ndctl/lib/libndctl.c
+++ b/ndctl/lib/libndctl.c
@@ -719,7 +719,7 @@ static int to_dsm_index(const char *name, int dimm)
int i, end_cmd;
 
if (dimm) {
-   end_cmd = ND_CMD_VENDOR;
+   end_cmd = ND_CMD_CALL;
cmd_name_fn = nvdimm_cmd_name;
} else {
end_cmd = nd_cmd_clear_error;
@@ -2183,6 +2183,7 @@ static int to_ioctl_cmd(int cmd, int dimm)
case ND_CMD_GET_CONFIG_DATA:return ND_IOCTL_GET_CONFIG_DATA;
case ND_CMD_SET_CONFIG_DATA:return ND_IOCTL_SET_CONFIG_DATA;
case ND_CMD_VENDOR: return ND_IOCTL_VENDOR;
+   case ND_CMD_CALL:   return ND_IOCTL_CALL;
case ND_CMD_VENDOR_EFFECT_LOG_SIZE:
case ND_CMD_VENDOR_EFFECT_LOG:
default:
-- 
2.8.3

___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


[ndctl PATCH v3 4/4] libndctl: add support for the HPE1 family of DSM SMART functions

2016-09-16 Thread Brian Boylston
This patch introduces a set of ndctl_smart_ops for the HPE1 DSM family.
The implementation calls the HPE1 DSM functions defined in [1] and
translates the results to match the existing Intel DSM-inspired
smart_ops.  This delivers health reporting parity for HPE type N
NVDIMMs, but we are planning a future patch to add reporting for some of
the additional health data defined in [1].

[1] 
https://github.com/HewlettPackard/hpe-nvm/raw/master/Documentation/NFIT_DSM_DDR4_NVDIMM-N_v84s.pdf

Cc: Dan Williams 
Signed-off-by: Brian Boylston 
---
Changes in v3:
  - Added missing bibliography to the commit message.
  - I realized that v2 wouldn't compile if HAS_SMART was not 1, so I
changed intel_smart_ops and hpe1_smart_ops into pointers and set them
to NULL if HAS_SMART != 1.  They are const because that seems to
avoid compiler warnings about unused variables in cases where
libndctl-private.h is included by something other than libndctl.c.

 ndctl/Makefile.am|   1 +
 ndctl/lib/libndctl-hpe1.c| 303 ++
 ndctl/lib/libndctl-private.h |   2 +
 ndctl/lib/libndctl.c |   2 +
 ndctl/lib/ndctl-hpe1.h   | 335 +++
 5 files changed, 643 insertions(+)
 create mode 100644 ndctl/lib/libndctl-hpe1.c
 create mode 100644 ndctl/lib/ndctl-hpe1.h

diff --git a/ndctl/Makefile.am b/ndctl/Makefile.am
index 04f3a63..fdec355 100644
--- a/ndctl/Makefile.am
+++ b/ndctl/Makefile.am
@@ -31,6 +31,7 @@ endif
 
 if ENABLE_SMART
 lib_libndctl_la_SOURCES += lib/libndctl-smart.c
+lib_libndctl_la_SOURCES += lib/libndctl-hpe1.c
 endif
 
 bin_PROGRAMS = ndctl
diff --git a/ndctl/lib/libndctl-hpe1.c b/ndctl/lib/libndctl-hpe1.c
new file mode 100644
index 000..48d8e02
--- /dev/null
+++ b/ndctl/lib/libndctl-hpe1.c
@@ -0,0 +1,303 @@
+/*
+ * Copyright (C) 2016 Hewlett Packard Enterprise Development LP
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU Lesser General Public License,
+ * version 2.1, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ */
+#include 
+#include 
+#include 
+#include 
+#include "libndctl-private.h"
+
+#include "ndctl-hpe1.h"
+
+#define CMD_HPE1(_c) ((struct ndn_pkg_hpe1 *)((_c)->cmd_buf))
+#define CMD_HPE1_SMART(_c) \
+   ((struct ndn_hpe1_smart_data *)(CMD_HPE1(_c)->u.smart.data))
+#define CMD_HPE1_SMART_THRESH(_c) \
+   ((struct ndn_hpe1_smart_threshold_data *)(CMD_HPE1(_c)->u.thresh.data))
+
+static struct ndctl_cmd *hpe1_dimm_cmd_new_smart(struct ndctl_dimm *dimm)
+{
+   struct ndctl_bus *bus = ndctl_dimm_get_bus(dimm);
+   struct ndctl_ctx *ctx = ndctl_bus_get_ctx(bus);
+   struct ndctl_cmd *cmd;
+   size_t size;
+   struct ndn_pkg_hpe1 *hpe1;
+
+   if (!ndctl_dimm_is_cmd_supported(dimm, ND_CMD_CALL)) {
+   dbg(ctx, "unsupported cmd\n");
+   return NULL;
+   }
+
+   size = sizeof(*cmd) + sizeof(struct ndn_pkg_hpe1);
+   cmd = calloc(1, size);
+   if (!cmd)
+   return NULL;
+
+   cmd->dimm = dimm;
+   ndctl_cmd_ref(cmd);
+   cmd->type = ND_CMD_CALL;
+   cmd->size = size;
+   cmd->status = 1;
+
+   hpe1 = CMD_HPE1(cmd);
+   hpe1->gen.nd_family = NVDIMM_FAMILY_HPE1;
+   hpe1->gen.nd_command = NDN_HPE1_CMD_SMART;
+   hpe1->gen.nd_fw_size = 0;
+   hpe1->gen.nd_size_in = offsetof(struct ndn_hpe1_smart, status);
+   hpe1->gen.nd_size_out = sizeof(hpe1->u.smart);
+   hpe1->u.smart.status = 3;
+
+   hpe1->u.smart.in_valid_flags = 0;
+   hpe1->u.smart.in_valid_flags |= NDN_HPE1_SMART_HEALTH_VALID;
+   hpe1->u.smart.in_valid_flags |= NDN_HPE1_SMART_TEMP_VALID;
+   hpe1->u.smart.in_valid_flags |= NDN_HPE1_SMART_SPARES_VALID;
+   hpe1->u.smart.in_valid_flags |= NDN_HPE1_SMART_ALARM_VALID;
+   hpe1->u.smart.in_valid_flags |= NDN_HPE1_SMART_USED_VALID;
+   hpe1->u.smart.in_valid_flags |= NDN_HPE1_SMART_SHUTDOWN_VALID;
+   hpe1->u.smart.in_valid_flags |= NDN_HPE1_SMART_VENDOR_VALID;
+
+   cmd->firmware_status = >u.smart.status;
+
+   return cmd;
+}
+
+static int hpe1_smart_valid(struct ndctl_cmd *cmd)
+{
+   if (cmd->type != ND_CMD_CALL ||
+   cmd->size != sizeof(*cmd) + sizeof(struct ndn_pkg_hpe1) ||
+   CMD_HPE1(cmd)->gen.nd_family != NVDIMM_FAMILY_HPE1 ||
+   CMD_HPE1(cmd)->gen.nd_command != NDN_HPE1_CMD_SMART ||
+   cmd->status != 0)
+   return cmd->status < 0 ? cmd->status : -EINVAL;
+   return 0;
+}
+
+static unsigned int hpe1_cmd_smart_get_flags(struct ndctl_cmd *cmd)
+{
+   

[ndctl PATCH v3 2/4] libndctl: record dsm family in add_dimm()

2016-09-16 Thread Brian Boylston
The recorded DSM family can be used to provide family-specific
functionality.

Cc: Dan Williams 
Signed-off-by: Brian Boylston 
---
Changes in v3:
  - Do not fail add_dimm() if the read of nfit/commands fails, just set
dimm->dsm_family to -1 instead (suggested by Dan).  As part of this,
I also moved the read to after the has_nfit() check.

 ndctl/lib/libndctl.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/ndctl/lib/libndctl.c b/ndctl/lib/libndctl.c
index 3c9a506..99938be 100644
--- a/ndctl/lib/libndctl.c
+++ b/ndctl/lib/libndctl.c
@@ -136,6 +136,7 @@ struct ndctl_dimm {
unsigned short subsystem_revision_id;
unsigned short manufacturing_date;
unsigned char manufacturing_location;
+   unsigned long dsm_family;
unsigned long dsm_mask;
char *unique_id;
char *dimm_path;
@@ -1176,6 +1177,7 @@ static int add_dimm(void *parent, int id, const char 
*dimm_base)
dimm->subsystem_revision_id = -1;
dimm->manufacturing_date = -1;
dimm->manufacturing_location = -1;
+   dimm->dsm_family = -1;
for (i = 0; i < formats; i++)
dimm->format[i] = -1;
 
@@ -1239,6 +1241,10 @@ static int add_dimm(void *parent, int id, const char 
*dimm_base)
if (sysfs_read_attr(ctx, path, buf) == 0)
dimm->subsystem_revision_id = strtoul(buf, NULL, 0);
 
+   sprintf(path, "%s/nfit/family", dimm_base);
+   if (sysfs_read_attr(ctx, path, buf) == 0)
+   dimm->dsm_family = strtoul(buf, NULL, 0);
+
dimm->formats = formats;
sprintf(path, "%s/nfit/format", dimm_base);
if (sysfs_read_attr(ctx, path, buf) == 0)
-- 
2.8.3

___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


[ndctl PATCH v3 1/4] libndctl: introduce ndctl_smart_ops

2016-09-16 Thread Brian Boylston
Add a layer of indirection for the ndctl_cmd_smart*() family of
interfaces.  This will allow the underlying implementation to be
switched based on the DSM family supported by the DIMM.

Cc: Dan Williams 
Signed-off-by: Brian Boylston 
---
Changes in v3:
  - Dropped the smart_dimm_op() macro and open coded
ndctl_dimm_cmd_new_smart() and ndctl_dimm_cmd_new_smart_threshold().
(suggested by Dan)
  - I realized that v2 wouldn't compile if HAS_SMART was not 1, so I
changed intel_smart_ops and hpe1_smart_ops into pointers and set them
to NULL if HAS_SMART != 1.  They are const because that seems to
avoid compiler warnings about unused variables in cases where
libndctl-private.h is included by something other than libndctl.c.

 ndctl/lib/libndctl-private.h |  23 +
 ndctl/lib/libndctl-smart.c   | 116 ++-
 ndctl/lib/libndctl.c |   7 +++
 ndctl/libndctl.h.in  |   1 +
 4 files changed, 124 insertions(+), 23 deletions(-)

diff --git a/ndctl/lib/libndctl-private.h b/ndctl/lib/libndctl-private.h
index 65ef86d..5ff4fec 100644
--- a/ndctl/lib/libndctl-private.h
+++ b/ndctl/lib/libndctl-private.h
@@ -201,6 +201,29 @@ struct ndctl_cmd {
};
 };
 
+struct ndctl_smart_ops {
+   struct ndctl_cmd *(*new_smart)(struct ndctl_dimm *);
+   unsigned int (*smart_get_flags)(struct ndctl_cmd *);
+   unsigned int (*smart_get_health)(struct ndctl_cmd *);
+   unsigned int (*smart_get_temperature)(struct ndctl_cmd *);
+   unsigned int (*smart_get_spares)(struct ndctl_cmd *);
+   unsigned int (*smart_get_alarm_flags)(struct ndctl_cmd *);
+   unsigned int (*smart_get_life_used)(struct ndctl_cmd *);
+   unsigned int (*smart_get_shutdown_state)(struct ndctl_cmd *);
+   unsigned int (*smart_get_vendor_size)(struct ndctl_cmd *);
+   unsigned char *(*smart_get_vendor_data)(struct ndctl_cmd *);
+   struct ndctl_cmd *(*new_smart_threshold)(struct ndctl_dimm *);
+   unsigned int (*smart_threshold_get_alarm_control)(struct ndctl_cmd *);
+   unsigned int (*smart_threshold_get_temperature)(struct ndctl_cmd *);
+   unsigned int (*smart_threshold_get_spares)(struct ndctl_cmd *);
+};
+
+#if HAS_SMART == 1
+struct ndctl_smart_ops * const intel_smart_ops;
+#else
+static struct ndctl_smart_ops * const intel_smart_ops = NULL;
+#endif
+
 /* internal library helpers for conditionally defined command numbers */
 #ifdef HAVE_NDCTL_ARS
 static const int nd_cmd_ars_status = ND_CMD_ARS_STATUS;
diff --git a/ndctl/lib/libndctl-smart.c b/ndctl/lib/libndctl-smart.c
index cba1e9d..73a49ef 100644
--- a/ndctl/lib/libndctl-smart.c
+++ b/ndctl/lib/libndctl-smart.c
@@ -16,7 +16,60 @@
 #include 
 #include "libndctl-private.h"
 
-NDCTL_EXPORT struct ndctl_cmd *ndctl_dimm_cmd_new_smart(struct ndctl_dimm 
*dimm)
+/*
+ * Define the wrappers around the ndctl_smart_ops:
+ */
+
+NDCTL_EXPORT struct ndctl_cmd *ndctl_dimm_cmd_new_smart(
+   struct ndctl_dimm *dimm)
+{
+   struct ndctl_smart_ops *ops = ndctl_dimm_get_smart_ops(dimm);
+   if (ops && ops->new_smart)
+   return ops->new_smart(dimm);
+   else
+   return NULL;
+}
+
+NDCTL_EXPORT struct ndctl_cmd *ndctl_dimm_cmd_new_smart_threshold(
+   struct ndctl_dimm *dimm)
+{
+   struct ndctl_smart_ops *ops = ndctl_dimm_get_smart_ops(dimm);
+   if (ops && ops->new_smart_threshold)
+   return ops->new_smart_threshold(dimm);
+   else
+   return NULL;
+}
+
+#define smart_cmd_op(name, op, rettype, defretvalue) \
+NDCTL_EXPORT rettype name(struct ndctl_cmd *cmd) \
+{ \
+   if (cmd->dimm) { \
+   struct ndctl_smart_ops *ops = 
ndctl_dimm_get_smart_ops(cmd->dimm); \
+   if (ops && ops->op) \
+   return ops->op(cmd); \
+   } \
+   return defretvalue; \
+}
+
+smart_cmd_op(ndctl_cmd_smart_get_flags, smart_get_flags, unsigned int, 0)
+smart_cmd_op(ndctl_cmd_smart_get_health, smart_get_health, unsigned int, 0)
+smart_cmd_op(ndctl_cmd_smart_get_temperature, smart_get_temperature, unsigned 
int, 0)
+smart_cmd_op(ndctl_cmd_smart_get_spares, smart_get_spares, unsigned int, 0)
+smart_cmd_op(ndctl_cmd_smart_get_alarm_flags, smart_get_alarm_flags, unsigned 
int, 0)
+smart_cmd_op(ndctl_cmd_smart_get_life_used, smart_get_life_used, unsigned int, 
0)
+smart_cmd_op(ndctl_cmd_smart_get_shutdown_state, smart_get_shutdown_state, 
unsigned int, 0)
+smart_cmd_op(ndctl_cmd_smart_get_vendor_size, smart_get_vendor_size, unsigned 
int, 0)
+smart_cmd_op(ndctl_cmd_smart_get_vendor_data, smart_get_vendor_data, unsigned 
char *, NULL)
+smart_cmd_op(ndctl_cmd_smart_threshold_get_alarm_control, 
smart_threshold_get_alarm_control, unsigned int, 0)
+smart_cmd_op(ndctl_cmd_smart_threshold_get_temperature, 
smart_threshold_get_temperature, unsigned int, 0)
+smart_cmd_op(ndctl_cmd_smart_threshold_get_spares, 

[ndctl PATCH v3 0/4] ndctl: add support for HPE type N SMART health data

2016-09-16 Thread Brian Boylston
This set of patches adds support for the HPE SMART DSM functions and enables
ndctl to report DIMM health data for HPE type N NVDIMMs.  The relevant
firmware interfaces are described in [1].

The first patch virtualizes the ndctl_cmd_smart*() family of libndctl
interfaces into a set of ndctl_smart_ops, allowing runtime implementation
differentiation depending on the firmware support provided by a DIMM.

The second and third patches add miscellaneous pieces needed for the
final patch:

The fourth patch adds a set of ndctl_smart_ops for the HPE1 DSM family,
based on the firmware interfaces defined in [1].  These ndctl_smart_ops
translate the HPE1 DSM output to match the interface of the existing
Intel DSM-inspired smart_ops.  This delivers health reporting parity for
HPE type N NVDIMMs, however:

When evaluating this ndctl_smart_ops approach, please consider our goal of
adding JSON exports for some of the additional health data defined in [1].
I expect this would entail adding additional accessor functions to
ndctl_smart_ops, but it's not clear whether or how to extend the existing
get_flags()/check flags/get_data() model used by util_dimm_health_to_json().

If you'd like to test these changes, note the following:

. Some of the DSM functions for HPE type N NVDIMMs, including the ones used
  by this patch, require the acpi_ipmi kernel module to be loaded, and you
  may need to manually modprobe it.

. Without [2], you'll need to include '--idle' in your ndctl invocation as
  ndctl will consider type Ns to be disabled and will otherwise omit them.

. Without [3], "alarm_temperature" and "alarm_spares" will be inaccurate.

[1] 
https://github.com/HewlettPackard/hpe-nvm/raw/master/Documentation/NFIT_DSM_DDR4_NVDIMM-N_v84s.pdf
[2] https://lists.01.org/pipermail/linux-nvdimm/2016-August/006619.html
[3] https://lists.01.org/pipermail/linux-nvdimm/2016-September/006810.html

Changes in v3:
  - Dropped the smart_dimm_op() macro and open coded
ndctl_dimm_cmd_new_smart() and ndctl_dimm_cmd_new_smart_threshold().
(suggested by Dan)
  - Do not fail add_dimm() if the read of nfit/commands fails, just set
dimm->dsm_family to -1 instead (suggested by Dan).  As part of this,
I also moved the read to after the has_nfit() check.
  - Added missing bibliography to the commit message for the last patch.
  - I realized that v2 wouldn't compile if HAS_SMART was not 1, so I
changed intel_smart_ops and hpe1_smart_ops into pointers and set them
to NULL if HAS_SMART != 1.  They are const because that seems to
avoid compiler warnings about unused variables in cases where
libndctl-private.h is included by something other than libndctl.c.

Changes in v2:
  New approach: taught libndctl how to translate between the HPE1 DSM
  family and the existing ndctl_cmd_smart*() libndctl interfaces
  (as suggested by Dan).

Brian Boylston (4):
  libndctl: introduce ndctl_smart_ops
  libndctl: record dsm family in add_dimm()
  libndctl: enable ND_CMD_CALL
  libndctl: add support for the HPE1 family of DSM SMART functions

 ndctl/Makefile.am|   1 +
 ndctl/lib/libndctl-hpe1.c| 303 ++
 ndctl/lib/libndctl-private.h |  25 
 ndctl/lib/libndctl-smart.c   | 116 ---
 ndctl/lib/libndctl.c |  18 ++-
 ndctl/lib/ndctl-hpe1.h   | 335 +++
 ndctl/libndctl.h.in  |   1 +
 7 files changed, 775 insertions(+), 24 deletions(-)
 create mode 100644 ndctl/lib/libndctl-hpe1.c
 create mode 100644 ndctl/lib/ndctl-hpe1.h

-- 
2.8.3

___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


Re: [PATCH] ndctl: move test/dax-errors buffer to global to avoid gcc optimization

2016-09-16 Thread Dave Jiang
On 09/15/2016 06:18 PM, Elliott, Robert (Persistent Memory) wrote:
> 
> 
>> -Original Message-
>> From: Linux-nvdimm [mailto:linux-nvdimm-boun...@lists.01.org] On
>> Behalf Of Dave Jiang
>> Sent: Thursday, September 15, 2016 5:28 PM
>> To: vishal.l.ve...@intel.com
>> Cc: linux-nvdimm@lists.01.org
>> Subject: [PATCH] ndctl: move test/dax-errors buffer to global to
>> avoid gcc optimization
>>
>> Some gcc toolchain are optimizing out the memcpy and this causes dax-
>> errors
>> to not trigger the SIG_BUS when doing memcpy on an mmap'd buffer. By
>> moving
>> the buffer to a global variable this bypasses the optimization and
>> allow
>> the test to work as intended.
>>
>> Signed-off-by: Dave Jiang 
>> ---
>>  test/dax-errors.c |4 +++-
>>  1 file changed, 3 insertions(+), 1 deletion(-)
>>
>> diff --git a/test/dax-errors.c b/test/dax-errors.c
>> index 11d0031..9ea5c91 100644
>> --- a/test/dax-errors.c
>> +++ b/test/dax-errors.c
>> @@ -17,6 +17,8 @@
>>
>>  static sigjmp_buf sj_env;
>>  static int sig_count;
>> +/* buf is global in order to avoid gcc memcpy optimization */
>> +static void *buf;
>>
>>  static void sigbus_hdl(int sig, siginfo_t *siginfo, void *ptr)
>>  {
>> @@ -27,7 +29,7 @@ static void sigbus_hdl(int sig, siginfo_t *siginfo,
>> void *ptr)
>>
>>  static int test_dax_read_err(int fd)
>>  {
>> -void *base, *buf;
>> +void *base;
>>  int rc = 0;
>>
>>  if (fd < 0) {
>>
> 
> I've run into that kind of problem before, and found that
> marking *buf as volatile (and leaving it inside the function)
> tends to be honored better by aggressive optimizing compilers
> and linkers.

Doesn't appear to work. The compiler discards the volatile.


  CC   dax-errors.o
dax-errors.c: In function ‘test_dax_read_err’:
dax-errors.c:66:9: warning: passing argument 1 of ‘memcpy’ discards
‘volatile’ qualifier from pointer target type [-Wdiscarded-qualifiers]
  memcpy(buf, base, 4096);
 ^~~
In file included from dax-errors.c:8:0:
/usr/include/string.h:42:14: note: expected ‘void * restrict’ but
argument is of type ‘volatile void *’
 extern void *memcpy (void *__restrict __dest, const void *__restrict __src,
  ^~
  CCLD dax-errors

> 
> ---
> Robert Elliott, HPE Persistent Memory
> 
> 
> 
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


Re: [PATCH] ndctl: move test/dax-errors buffer to global to avoid gcc optimization

2016-09-16 Thread Dave Jiang


On 09/15/2016 06:18 PM, Elliott, Robert (Persistent Memory) wrote:
> 
> 
>> -Original Message-
>> From: Linux-nvdimm [mailto:linux-nvdimm-boun...@lists.01.org] On
>> Behalf Of Dave Jiang
>> Sent: Thursday, September 15, 2016 5:28 PM
>> To: vishal.l.ve...@intel.com
>> Cc: linux-nvdimm@lists.01.org
>> Subject: [PATCH] ndctl: move test/dax-errors buffer to global to
>> avoid gcc optimization
>>
>> Some gcc toolchain are optimizing out the memcpy and this causes dax-
>> errors
>> to not trigger the SIG_BUS when doing memcpy on an mmap'd buffer. By
>> moving
>> the buffer to a global variable this bypasses the optimization and
>> allow
>> the test to work as intended.
>>
>> Signed-off-by: Dave Jiang 
>> ---
>>  test/dax-errors.c |4 +++-
>>  1 file changed, 3 insertions(+), 1 deletion(-)
>>
>> diff --git a/test/dax-errors.c b/test/dax-errors.c
>> index 11d0031..9ea5c91 100644
>> --- a/test/dax-errors.c
>> +++ b/test/dax-errors.c
>> @@ -17,6 +17,8 @@
>>
>>  static sigjmp_buf sj_env;
>>  static int sig_count;
>> +/* buf is global in order to avoid gcc memcpy optimization */
>> +static void *buf;
>>
>>  static void sigbus_hdl(int sig, siginfo_t *siginfo, void *ptr)
>>  {
>> @@ -27,7 +29,7 @@ static void sigbus_hdl(int sig, siginfo_t *siginfo,
>> void *ptr)
>>
>>  static int test_dax_read_err(int fd)
>>  {
>> -void *base, *buf;
>> +void *base;
>>  int rc = 0;
>>
>>  if (fd < 0) {
>>
> 
> I've run into that kind of problem before, and found that
> marking *buf as volatile (and leaving it inside the function)
> tends to be honored better by aggressive optimizing compilers
> and linkers.

I'll make the change.

> 
> ---
> Robert Elliott, HPE Persistent Memory
> 
> 
> 
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


[PATCH 11/12] ext2: stop passing buffer_head to ext2_get_blocks

2016-09-16 Thread Christoph Hellwig
Signed-off-by: Christoph Hellwig 
Reviewed-by: Ross Zwisler 
---
 fs/ext2/inode.c | 39 ---
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index d5c7d09..2a69ab2 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -618,7 +618,7 @@ static void ext2_splice_branch(struct inode *inode,
  */
 static int ext2_get_blocks(struct inode *inode,
   sector_t iblock, unsigned long maxblocks,
-  struct buffer_head *bh_result,
+  u32 *bno, bool *new, bool *boundary,
   int create)
 {
int err = -EIO;
@@ -644,7 +644,6 @@ static int ext2_get_blocks(struct inode *inode,
/* Simplest case - block found, no allocation needed */
if (!partial) {
first_block = le32_to_cpu(chain[depth - 1].key);
-   clear_buffer_new(bh_result); /* What's this do? */
count++;
/*map more blocks*/
while (count < maxblocks && count <= blocks_to_boundary) {
@@ -699,7 +698,6 @@ static int ext2_get_blocks(struct inode *inode,
mutex_unlock(>truncate_mutex);
if (err)
goto cleanup;
-   clear_buffer_new(bh_result);
goto got_it;
}
}
@@ -745,15 +743,16 @@ static int ext2_get_blocks(struct inode *inode,
mutex_unlock(>truncate_mutex);
goto cleanup;
}
-   } else
-   set_buffer_new(bh_result);
+   } else {
+   *new = true;
+   }
 
ext2_splice_branch(inode, iblock, partial, indirect_blks, count);
mutex_unlock(>truncate_mutex);
 got_it:
-   map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
+   *bno = le32_to_cpu(chain[depth-1].key);
if (count > blocks_to_boundary)
-   set_buffer_boundary(bh_result);
+   *boundary = true;
err = count;
/* Clean up and exit */
partial = chain + depth - 1;/* the whole chain */
@@ -765,16 +764,26 @@ cleanup:
return err;
 }
 
-int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head 
*bh_result, int create)
+int ext2_get_block(struct inode *inode, sector_t iblock,
+   struct buffer_head *bh_result, int create)
 {
unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
-   int ret = ext2_get_blocks(inode, iblock, max_blocks,
- bh_result, create);
-   if (ret > 0) {
-   bh_result->b_size = (ret << inode->i_blkbits);
-   ret = 0;
-   }
-   return ret;
+   bool new = false, boundary = false;
+   u32 bno;
+   int ret;
+
+   ret = ext2_get_blocks(inode, iblock, max_blocks, , , ,
+   create);
+   if (ret <= 0)
+   return ret;
+
+   map_bh(bh_result, inode->i_sb, bno);
+   bh_result->b_size = (ret << inode->i_blkbits);
+   if (new)
+   set_buffer_new(bh_result);
+   if (boundary)
+   set_buffer_boundary(bh_result);
+   return 0;
 
 }
 
-- 
2.1.4

___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


[PATCH 12/12] ext2: use iomap to implement DAX

2016-09-16 Thread Christoph Hellwig
Signed-off-by: Christoph Hellwig 
Reviewed-by: Ross Zwisler 
---
 fs/ext2/Kconfig |  1 +
 fs/ext2/ext2.h  |  1 +
 fs/ext2/file.c  | 76 +++--
 fs/ext2/inode.c | 63 +++
 4 files changed, 129 insertions(+), 12 deletions(-)

diff --git a/fs/ext2/Kconfig b/fs/ext2/Kconfig
index c634874e..36bea5a 100644
--- a/fs/ext2/Kconfig
+++ b/fs/ext2/Kconfig
@@ -1,5 +1,6 @@
 config EXT2_FS
tristate "Second extended fs support"
+   select FS_IOMAP if FS_DAX
help
  Ext2 is a standard Linux file system for hard disks.
 
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 06af2f9..37e2be7 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -814,6 +814,7 @@ extern const struct file_operations ext2_file_operations;
 /* inode.c */
 extern const struct address_space_operations ext2_aops;
 extern const struct address_space_operations ext2_nobh_aops;
+extern struct iomap_ops ext2_iomap_ops;
 
 /* namei.c */
 extern const struct inode_operations ext2_dir_inode_operations;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 5efeefe..423cc01 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -22,11 +22,59 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
 
 #ifdef CONFIG_FS_DAX
+static ssize_t ext2_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+   struct inode *inode = iocb->ki_filp->f_mapping->host;
+   ssize_t ret;
+
+   if (!iov_iter_count(to))
+   return 0; /* skip atime */
+
+   inode_lock_shared(inode);
+   ret = iomap_dax_rw(iocb, to, _iomap_ops);
+   inode_unlock_shared(inode);
+
+   file_accessed(iocb->ki_filp);
+   return ret;
+}
+
+static ssize_t ext2_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+   struct file *file = iocb->ki_filp;
+   struct inode *inode = file->f_mapping->host;
+   ssize_t ret;
+
+   inode_lock(inode);
+   ret = generic_write_checks(iocb, from);
+   if (ret <= 0)
+   goto out_unlock;
+   ret = file_remove_privs(file);
+   if (ret)
+   goto out_unlock;
+   ret = file_update_time(file);
+   if (ret)
+   goto out_unlock;
+
+   ret = iomap_dax_rw(iocb, from, _iomap_ops);
+   if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
+   i_size_write(inode, iocb->ki_pos);
+   mark_inode_dirty(inode);
+   }
+
+out_unlock:
+   inode_unlock(inode);
+   if (ret > 0)
+   ret = generic_write_sync(iocb, ret);
+   return ret;
+}
+
 /*
  * The lock ordering for ext2 DAX fault paths is:
  *
@@ -51,7 +99,7 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct 
vm_fault *vmf)
}
down_read(>dax_sem);
 
-   ret = dax_fault(vma, vmf, ext2_get_block);
+   ret = iomap_dax_fault(vma, vmf, _iomap_ops);
 
up_read(>dax_sem);
if (vmf->flags & FAULT_FLAG_WRITE)
@@ -156,14 +204,28 @@ int ext2_fsync(struct file *file, loff_t start, loff_t 
end, int datasync)
return ret;
 }
 
-/*
- * We have mostly NULL's here: the current defaults are ok for
- * the ext2 filesystem.
- */
+static ssize_t ext2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+#ifdef CONFIG_FS_DAX
+   if (IS_DAX(iocb->ki_filp->f_mapping->host))
+   return ext2_dax_read_iter(iocb, to);
+#endif
+   return generic_file_read_iter(iocb, to);
+}
+
+static ssize_t ext2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+#ifdef CONFIG_FS_DAX
+   if (IS_DAX(iocb->ki_filp->f_mapping->host))
+   return ext2_dax_write_iter(iocb, from);
+#endif
+   return generic_file_write_iter(iocb, from);
+}
+
 const struct file_operations ext2_file_operations = {
.llseek = generic_file_llseek,
-   .read_iter  = generic_file_read_iter,
-   .write_iter = generic_file_write_iter,
+   .read_iter  = ext2_file_read_iter,
+   .write_iter = ext2_file_write_iter,
.unlocked_ioctl = ext2_ioctl,
 #ifdef CONFIG_COMPAT
.compat_ioctl   = ext2_compat_ioctl,
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 2a69ab2..aae5f61 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -32,6 +32,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include "ext2.h"
@@ -787,6 +788,59 @@ int ext2_get_block(struct inode *inode, sector_t iblock,
 
 }
 
+#ifdef CONFIG_FS_DAX
+static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+   unsigned flags, struct iomap *iomap)
+{
+   unsigned int blkbits = inode->i_blkbits;
+   unsigned long first_block = offset >> blkbits;
+   unsigned long max_blocks = (length + (1 << blkbits) - 1) >> blkbits;
+   bool new = false, boundary = false;
+   u32 bno;
+   int ret;
+
+   ret = ext2_get_blocks(inode, 

[PATCH 10/12] xfs: use iomap to implement DAX

2016-09-16 Thread Christoph Hellwig
Another users of buffer_heads bytes the dust.

Signed-off-by: Christoph Hellwig 
Reviewed-by: Ross Zwisler 
---
 fs/xfs/xfs_file.c  | 61 +++---
 fs/xfs/xfs_iomap.c | 11 ++
 2 files changed, 24 insertions(+), 48 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 62649cc..f99d7fa 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -332,10 +332,7 @@ xfs_file_dax_read(
struct kiocb*iocb,
struct iov_iter *to)
 {
-   struct address_space*mapping = iocb->ki_filp->f_mapping;
-   struct inode*inode = mapping->host;
-   struct xfs_inode*ip = XFS_I(inode);
-   struct iov_iter data = *to;
+   struct xfs_inode*ip = XFS_I(iocb->ki_filp->f_mapping->host);
size_t  count = iov_iter_count(to);
ssize_t ret = 0;
 
@@ -345,11 +342,7 @@ xfs_file_dax_read(
return 0; /* skip atime */
 
xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
-   ret = dax_do_io(iocb, inode, , xfs_get_blocks_direct, NULL, 0);
-   if (ret > 0) {
-   iocb->ki_pos += ret;
-   iov_iter_advance(to, ret);
-   }
+   ret = iomap_dax_rw(iocb, to, _iomap_ops);
xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
 
file_accessed(iocb->ki_filp);
@@ -711,52 +704,32 @@ xfs_file_dax_write(
struct kiocb*iocb,
struct iov_iter *from)
 {
-   struct address_space*mapping = iocb->ki_filp->f_mapping;
-   struct inode*inode = mapping->host;
+   struct inode*inode = iocb->ki_filp->f_mapping->host;
struct xfs_inode*ip = XFS_I(inode);
-   ssize_t ret = 0;
int iolock = XFS_IOLOCK_EXCL;
-   struct iov_iter data;
+   ssize_t ret, error = 0;
+   size_t  count;
+   loff_t  pos;
 
xfs_rw_ilock(ip, iolock);
ret = xfs_file_aio_write_checks(iocb, from, );
if (ret)
goto out;
 
-   /*
-* Yes, even DAX files can have page cache attached to them:  A zeroed
-* page is inserted into the pagecache when we have to serve a write
-* fault on a hole.  It should never be dirtied and can simply be
-* dropped from the pagecache once we get real data for the page.
-*
-* XXX: This is racy against mmap, and there's nothing we can do about
-* it. dax_do_io() should really do this invalidation internally as
-* it will know if we've allocated over a holei for this specific IO and
-* if so it needs to update the mapping tree and invalidate existing
-* PTEs over the newly allocated range. Remove this invalidation when
-* dax_do_io() is fixed up.
-*/
-   if (mapping->nrpages) {
-   loff_t end = iocb->ki_pos + iov_iter_count(from) - 1;
-
-   ret = invalidate_inode_pages2_range(mapping,
-   iocb->ki_pos >> PAGE_SHIFT,
-   end >> PAGE_SHIFT);
-   WARN_ON_ONCE(ret);
-   }
+   pos = iocb->ki_pos;
+   count = iov_iter_count(from);
 
-   trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);
+   trace_xfs_file_dax_write(ip, count, pos);
 
-   data = *from;
-   ret = dax_do_io(iocb, inode, , xfs_get_blocks_direct,
-   xfs_end_io_direct_write, 0);
-   if (ret > 0) {
-   iocb->ki_pos += ret;
-   iov_iter_advance(from, ret);
+   ret = iomap_dax_rw(iocb, from, _iomap_ops);
+   if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
+   i_size_write(inode, iocb->ki_pos);
+   error = xfs_setfilesize(ip, pos, ret);
}
+
 out:
xfs_rw_iunlock(ip, iolock);
-   return ret;
+   return error ? error : ret;
 }
 
 STATIC ssize_t
@@ -1495,7 +1468,7 @@ xfs_filemap_page_mkwrite(
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
 
if (IS_DAX(inode)) {
-   ret = dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault);
+   ret = iomap_dax_fault(vma, vmf, _iomap_ops);
} else {
ret = iomap_page_mkwrite(vma, vmf, _iomap_ops);
ret = block_page_mkwrite_return(ret);
@@ -1529,7 +1502,7 @@ xfs_filemap_fault(
 * changes to xfs_get_blocks_direct() to map unwritten extent
 * ioend for conversion on read-only mappings.
 */
-   ret = dax_fault(vma, vmf, xfs_get_blocks_dax_fault);
+   ret = iomap_dax_fault(vma, vmf, _iomap_ops);
} else
ret = filemap_fault(vma, vmf);
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
diff --git a/fs/xfs/xfs_iomap.c 

[PATCH 07/12] xfs: fix locking for DAX writes

2016-09-16 Thread Christoph Hellwig
So far DAX writes inherited the locking from direct I/O writes, but the direct
I/O model of using shared locks for writes is actually wrong for DAX.  For
direct I/O we're out of any standards and don't have to provide the Posix
required exclusion between writers, but for DAX which gets transparently
enable on applications without any knowledge of it we can't simply drop the
requirement.  Even worse this only happens for aligned writes and thus
doesn't show up for many typical use cases.

Signed-off-by: Christoph Hellwig 
---
 fs/xfs/xfs_file.c | 20 +---
 1 file changed, 1 insertion(+), 19 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index e612a02..62649cc 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -714,24 +714,11 @@ xfs_file_dax_write(
struct address_space*mapping = iocb->ki_filp->f_mapping;
struct inode*inode = mapping->host;
struct xfs_inode*ip = XFS_I(inode);
-   struct xfs_mount*mp = ip->i_mount;
ssize_t ret = 0;
-   int unaligned_io = 0;
-   int iolock;
+   int iolock = XFS_IOLOCK_EXCL;
struct iov_iter data;
 
-   /* "unaligned" here means not aligned to a filesystem block */
-   if ((iocb->ki_pos & mp->m_blockmask) ||
-   ((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) {
-   unaligned_io = 1;
-   iolock = XFS_IOLOCK_EXCL;
-   } else if (mapping->nrpages) {
-   iolock = XFS_IOLOCK_EXCL;
-   } else {
-   iolock = XFS_IOLOCK_SHARED;
-   }
xfs_rw_ilock(ip, iolock);
-
ret = xfs_file_aio_write_checks(iocb, from, );
if (ret)
goto out;
@@ -758,11 +745,6 @@ xfs_file_dax_write(
WARN_ON_ONCE(ret);
}
 
-   if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) {
-   xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
-   iolock = XFS_IOLOCK_SHARED;
-   }
-
trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);
 
data = *from;
-- 
2.1.4

___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


[PATCH 08/12] xfs: take the ilock shared if possible in xfs_file_iomap_begin

2016-09-16 Thread Christoph Hellwig
We always just read the extent first, and will later lock exlusively
after first dropping the lock in case we actually allocate blocks.

Signed-off-by: Christoph Hellwig 
---
 fs/xfs/xfs_iomap.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 5d06a2d..c3cc175 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -955,6 +955,7 @@ xfs_file_iomap_begin(
struct xfs_bmbt_irecimap;
xfs_fileoff_t   offset_fsb, end_fsb;
int nimaps = 1, error = 0;
+   unsignedlockmode;
 
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
@@ -964,7 +965,7 @@ xfs_file_iomap_begin(
iomap);
}
 
-   xfs_ilock(ip, XFS_ILOCK_EXCL);
+   lockmode = xfs_ilock_data_map_shared(ip);
 
ASSERT(offset <= mp->m_super->s_maxbytes);
if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
@@ -975,7 +976,7 @@ xfs_file_iomap_begin(
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, ,
   , XFS_BMAPI_ENTIRE);
if (error) {
-   xfs_iunlock(ip, XFS_ILOCK_EXCL);
+   xfs_iunlock(ip, lockmode);
return error;
}
 
@@ -995,7 +996,8 @@ xfs_file_iomap_begin(
 * xfs_iomap_write_direct() expects the shared lock. It
 * is unlocked on return.
 */
-   xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
+   if (lockmode == XFS_ILOCK_EXCL)
+   xfs_ilock_demote(ip, lockmode);
error = xfs_iomap_write_direct(ip, offset, length, ,
nimaps);
if (error)
@@ -1006,7 +1008,7 @@ xfs_file_iomap_begin(
} else {
ASSERT(nimaps);
 
-   xfs_iunlock(ip, XFS_ILOCK_EXCL);
+   xfs_iunlock(ip, lockmode);
trace_xfs_iomap_found(ip, offset, length, 0, );
}
 
-- 
2.1.4

___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


[PATCH 09/12] xfs: refactor xfs_setfilesize

2016-09-16 Thread Christoph Hellwig
Rename the current function to __xfs_setfilesize and add a non-static
wrapper that also takes care of creating the transaction.  This new
helper will be used by the new iomap-based DAX path.

Signed-off-by: Christoph Hellwig 
---
 fs/xfs/xfs_aops.c | 31 +--
 fs/xfs/xfs_aops.h |  1 +
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 7575cfc..4a28fa9 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -200,7 +200,7 @@ xfs_setfilesize_trans_alloc(
  * Update on-disk file size now that data has been written to disk.
  */
 STATIC int
-xfs_setfilesize(
+__xfs_setfilesize(
struct xfs_inode*ip,
struct xfs_trans*tp,
xfs_off_t   offset,
@@ -225,6 +225,23 @@ xfs_setfilesize(
return xfs_trans_commit(tp);
 }
 
+int
+xfs_setfilesize(
+   struct xfs_inode*ip,
+   xfs_off_t   offset,
+   size_t  size)
+{
+   struct xfs_mount*mp = ip->i_mount;
+   struct xfs_trans*tp;
+   int error;
+
+   error = xfs_trans_alloc(mp, _RES(mp)->tr_fsyncts, 0, 0, 0, );
+   if (error)
+   return error;
+
+   return __xfs_setfilesize(ip, tp, offset, size);
+}
+
 STATIC int
 xfs_setfilesize_ioend(
struct xfs_ioend*ioend,
@@ -247,7 +264,7 @@ xfs_setfilesize_ioend(
return error;
}
 
-   return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
+   return __xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
 }
 
 /*
@@ -1336,13 +1353,12 @@ xfs_end_io_direct_write(
 {
struct inode*inode = file_inode(iocb->ki_filp);
struct xfs_inode*ip = XFS_I(inode);
-   struct xfs_mount*mp = ip->i_mount;
uintptr_t   flags = (uintptr_t)private;
int error = 0;
 
trace_xfs_end_io_direct_write(ip, offset, size);
 
-   if (XFS_FORCED_SHUTDOWN(mp))
+   if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
 
if (size <= 0)
@@ -1380,14 +1396,9 @@ xfs_end_io_direct_write(
 
error = xfs_iomap_write_unwritten(ip, offset, size);
} else if (flags & XFS_DIO_FLAG_APPEND) {
-   struct xfs_trans *tp;
-
trace_xfs_end_io_direct_write_append(ip, offset, size);
 
-   error = xfs_trans_alloc(mp, _RES(mp)->tr_fsyncts, 0, 0, 0,
-   );
-   if (!error)
-   error = xfs_setfilesize(ip, tp, offset, size);
+   error = xfs_setfilesize(ip, offset, size);
}
 
return error;
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index bf2d9a1..1950e3b 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -62,6 +62,7 @@ int   xfs_get_blocks_dax_fault(struct inode *inode, sector_t 
offset,
 
 intxfs_end_io_direct_write(struct kiocb *iocb, loff_t offset,
ssize_t size, void *private);
+intxfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
 
 extern void xfs_count_page_state(struct page *, int *, int *);
 extern struct block_device *xfs_find_bdev_for_inode(struct inode *);
-- 
2.1.4

___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


[PATCH 05/12] dax: provide an iomap based dax read/write path

2016-09-16 Thread Christoph Hellwig
This is a much simpler implementation of the DAX read/write path that makes
use of the iomap infrastructure.  It does not try to mirror the direct I/O
calling conventions and thus doesn't have to deal with i_dio_count or the
end_io handler, but instead leaves locking and filesystem-specific I/O
completion to the caller.

Signed-off-by: Christoph Hellwig 
Reviewed-by: Ross Zwisler 
---
 fs/dax.c| 114 
 include/linux/dax.h |   4 ++
 2 files changed, 118 insertions(+)

diff --git a/fs/dax.c b/fs/dax.c
index 84343ce..1f9f2d4 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -31,6 +31,8 @@
 #include 
 #include 
 #include 
+#include 
+#include "internal.h"
 
 /*
  * We use lowest available bit in exceptional entry for locking, other two
@@ -1241,3 +1243,115 @@ int dax_truncate_page(struct inode *inode, loff_t from, 
get_block_t get_block)
return dax_zero_page_range(inode, from, length, get_block);
 }
 EXPORT_SYMBOL_GPL(dax_truncate_page);
+
+#ifdef CONFIG_FS_IOMAP
+static loff_t
+iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
+   struct iomap *iomap)
+{
+   struct iov_iter *iter = data;
+   loff_t end = pos + length, done = 0;
+   ssize_t ret = 0;
+
+   if (iov_iter_rw(iter) == READ) {
+   end = min(end, i_size_read(inode));
+   if (pos >= end)
+   return 0;
+
+   if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
+   return iov_iter_zero(min(length, end - pos), iter);
+   }
+
+   if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
+   return -EIO;
+
+   while (pos < end) {
+   unsigned offset = pos & (PAGE_SIZE - 1);
+   struct blk_dax_ctl dax = { 0 };
+   ssize_t map_len;
+
+   dax.sector = iomap->blkno +
+   (((pos & PAGE_MASK) - iomap->offset) >> 9);
+   dax.size = (length + offset + PAGE_SIZE - 1) & PAGE_MASK;
+   map_len = dax_map_atomic(iomap->bdev, );
+   if (map_len < 0) {
+   ret = map_len;
+   break;
+   }
+
+   dax.addr += offset;
+   map_len -= offset;
+   if (map_len > end - pos)
+   map_len = end - pos;
+
+   if (iov_iter_rw(iter) == WRITE)
+   map_len = copy_from_iter_pmem(dax.addr, map_len, iter);
+   else
+   map_len = copy_to_iter(dax.addr, map_len, iter);
+   dax_unmap_atomic(iomap->bdev, );
+   if (map_len <= 0) {
+   ret = map_len ? map_len : -EFAULT;
+   break;
+   }
+
+   pos += map_len;
+   length -= map_len;
+   done += map_len;
+   }
+
+   return done ? done : ret;
+}
+
+/**
+ * iomap_dax_rw - Perform I/O to a DAX file
+ * @iocb:  The control block for this I/O
+ * @iter:  The addresses to do I/O from or to
+ * @ops:   iomap ops passed from the file system
+ *
+ * This function performs read and write operations to directly mapped
+ * persistent memory.  The callers needs to take care of read/write exclusion
+ * and evicting any page cache pages in the region under I/O.
+ */
+ssize_t
+iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
+   struct iomap_ops *ops)
+{
+   struct address_space *mapping = iocb->ki_filp->f_mapping;
+   struct inode *inode = mapping->host;
+   loff_t pos = iocb->ki_pos, ret = 0, done = 0;
+   unsigned flags = 0;
+
+   if (iov_iter_rw(iter) == WRITE)
+   flags |= IOMAP_WRITE;
+
+   /*
+* Yes, even DAX files can have page cache attached to them:  A zeroed
+* page is inserted into the pagecache when we have to serve a write
+* fault on a hole.  It should never be dirtied and can simply be
+* dropped from the pagecache once we get real data for the page.
+*
+* XXX: This is racy against mmap, and there's nothing we can do about
+* it. We'll eventually need to shift this down even further so that
+* we can check if we allocated blocks over a hole first.
+*/
+   if (mapping->nrpages) {
+   ret = invalidate_inode_pages2_range(mapping,
+   pos >> PAGE_SHIFT,
+   (pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT);
+   WARN_ON_ONCE(ret);
+   }
+
+   while (iov_iter_count(iter)) {
+   ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops,
+   iter, iomap_dax_actor);
+   if (ret <= 0)
+   break;
+   pos += ret;
+   done += ret;
+   }
+
+   iocb->ki_pos += done;
+   return done ? 

iomap based DAX path V3

2016-09-16 Thread Christoph Hellwig
This series adds a DAX I/O path based on the iomap interface.  This
allows more efficient block mapping including defined hole semantics,
and is an important step toward getting rid of buffer_heads in XFS.

Changes since V2:
 - feedback to various small comments from Ross
 - added Reviewed-by: tags

Changes since V1:
 - added a conversion of ext2 to the iomap interface
 - addresse feedback from Ross, Dave and Robert

___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


[PATCH 02/12] iomap: expose iomap_apply outside iomap.c

2016-09-16 Thread Christoph Hellwig
This allows the DAX code to use it.

Signed-off-by: Christoph Hellwig 
Reviewed-by: Ross Zwisler 
---
 fs/internal.h | 11 +++
 fs/iomap.c|  5 +
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/fs/internal.h b/fs/internal.h
index ba07376..8591786 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -12,6 +12,7 @@
 struct super_block;
 struct file_system_type;
 struct iomap;
+struct iomap_ops;
 struct linux_binprm;
 struct path;
 struct mount;
@@ -164,3 +165,13 @@ extern struct dentry_operations ns_dentry_operations;
 extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd,
unsigned long arg);
 extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+
+/*
+ * iomap support:
+ */
+typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
+   void *data, struct iomap *iomap);
+
+loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
+   unsigned flags, struct iomap_ops *ops, void *data,
+   iomap_actor_t actor);
diff --git a/fs/iomap.c b/fs/iomap.c
index 706270f..f4df9c6 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -27,9 +27,6 @@
 #include 
 #include "internal.h"
 
-typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
-   void *data, struct iomap *iomap);
-
 /*
  * Execute a iomap write on a segment of the mapping that spans a
  * contiguous range of pages that have identical block mapping state.
@@ -41,7 +38,7 @@ typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t 
pos, loff_t len,
  * resources they require in the iomap_begin call, and release them in the
  * iomap_end call.
  */
-static loff_t
+loff_t
 iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
struct iomap_ops *ops, void *data, iomap_actor_t actor)
 {
-- 
2.1.4

___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm