[PATCH v2 17/25] staging: lustre: libcfs: rename cpumask_var_t variables to *_mask

2018-05-29 Thread James Simmons
From: Dmitry Eremin 

Because we handle both cpu mask as well as core identifiers it can
easily be confused. To avoid this rename various cpumask_var_t to
have appended *_mask to their names.

Signed-off-by: Dmitry Eremin 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8703
Reviewed-on: https://review.whamcloud.com/23222
Reviewed-by: Amir Shehata 
Reviewed-by: James Simmons 
Reviewed-by: Oleg Drokin 
Signed-off-by: James Simmons 
---
Changelog:

v1) Initial patch
v2) Rebased patch. No changes in code from earlier patch

 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c | 62 -
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c 
b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
index 1c10529..fb27dac 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
@@ -710,23 +710,23 @@ int cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
  * We always prefer to choose CPU in the same core/socket.
  */
 static int cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt,
-   cpumask_t *node, int number)
+   cpumask_t *node_mask, int number)
 {
-   cpumask_var_t socket;
-   cpumask_var_t core;
+   cpumask_var_t socket_mask;
+   cpumask_var_t core_mask;
int rc = 0;
int cpu;
 
LASSERT(number > 0);
 
-   if (number >= cpumask_weight(node)) {
-   while (!cpumask_empty(node)) {
-   cpu = cpumask_first(node);
+   if (number >= cpumask_weight(node_mask)) {
+   while (!cpumask_empty(node_mask)) {
+   cpu = cpumask_first(node_mask);
 
rc = cfs_cpt_set_cpu(cptab, cpt, cpu);
if (!rc)
return -EINVAL;
-   cpumask_clear_cpu(cpu, node);
+   cpumask_clear_cpu(cpu, node_mask);
}
return 0;
}
@@ -736,34 +736,34 @@ static int cfs_cpt_choose_ncpus(struct cfs_cpt_table 
*cptab, int cpt,
 * As we cannot initialize a cpumask_var_t, we need
 * to alloc both before we can risk trying to free either
 */
-   if (!zalloc_cpumask_var(, GFP_NOFS))
+   if (!zalloc_cpumask_var(_mask, GFP_NOFS))
rc = -ENOMEM;
-   if (!zalloc_cpumask_var(, GFP_NOFS))
+   if (!zalloc_cpumask_var(_mask, GFP_NOFS))
rc = -ENOMEM;
if (rc)
goto out;
 
-   while (!cpumask_empty(node)) {
-   cpu = cpumask_first(node);
+   while (!cpumask_empty(node_mask)) {
+   cpu = cpumask_first(node_mask);
 
/* get cpumask for cores in the same socket */
-   cpumask_copy(socket, topology_core_cpumask(cpu));
-   cpumask_and(socket, socket, node);
+   cpumask_copy(socket_mask, topology_core_cpumask(cpu));
+   cpumask_and(socket_mask, socket_mask, node_mask);
 
-   LASSERT(!cpumask_empty(socket));
+   LASSERT(!cpumask_empty(socket_mask));
 
-   while (!cpumask_empty(socket)) {
+   while (!cpumask_empty(socket_mask)) {
int i;
 
/* get cpumask for hts in the same core */
-   cpumask_copy(core, topology_sibling_cpumask(cpu));
-   cpumask_and(core, core, node);
+   cpumask_copy(core_mask, topology_sibling_cpumask(cpu));
+   cpumask_and(core_mask, core_mask, node_mask);
 
-   LASSERT(!cpumask_empty(core));
+   LASSERT(!cpumask_empty(core_mask));
 
-   for_each_cpu(i, core) {
-   cpumask_clear_cpu(i, socket);
-   cpumask_clear_cpu(i, node);
+   for_each_cpu(i, core_mask) {
+   cpumask_clear_cpu(i, socket_mask);
+   cpumask_clear_cpu(i, node_mask);
 
rc = cfs_cpt_set_cpu(cptab, cpt, i);
if (!rc) {
@@ -774,13 +774,13 @@ static int cfs_cpt_choose_ncpus(struct cfs_cpt_table 
*cptab, int cpt,
if (!--number)
goto out;
}
-   cpu = cpumask_first(socket);
+   cpu = cpumask_first(socket_mask);
}
}
 
 out:
-   free_cpumask_var(socket);
-   free_cpumask_var(core);
+   free_cpumask_var(socket_mask);
+   free_cpumask_var(core_mask);
return rc;
 }
 
@@ -831,7 +831,7 @@ static int cfs_cpt_num_estimate(void)
 static struct cfs_cpt_table *cfs_cpt_table_create(int ncpt)
 {
struct cfs_cpt_table *cptab = NULL;
-   cpumask_v

[PATCH v2 25/25] staging: lustre: ptlrpc: use current CPU instead of hardcoded 0

2018-05-29 Thread James Simmons
From: Dmitry Eremin 

fix crash if CPU 0 disabled.

Signed-off-by: Dmitry Eremin 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8710
Reviewed-on: https://review.whamcloud.com/23305
Reviewed-by: Doug Oucharek 
Reviewed-by: Andreas Dilger 
Signed-off-by: James Simmons 
---
Changelog:

v1) New patch to address crash in ptlrpc

 drivers/staging/lustre/lustre/ptlrpc/service.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/staging/lustre/lustre/ptlrpc/service.c 
b/drivers/staging/lustre/lustre/ptlrpc/service.c
index 3fd8c74..8e74a45 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/service.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/service.c
@@ -421,7 +421,7 @@ static void ptlrpc_at_timer(struct timer_list *t)
 * there are.
 */
/* weight is # of HTs */
-   if (cpumask_weight(topology_sibling_cpumask(0)) > 1) {
+   if 
(cpumask_weight(topology_sibling_cpumask(smp_processor_id())) > 1) {
/* depress thread factor for hyper-thread */
factor = factor - (factor >> 1) + (factor >> 3);
}
@@ -2221,15 +2221,16 @@ static int ptlrpc_hr_main(void *arg)
struct ptlrpc_hr_thread *hrt = arg;
struct ptlrpc_hr_partition *hrp = hrt->hrt_partition;
LIST_HEAD(replies);
-   char threadname[20];
int rc;
 
-   snprintf(threadname, sizeof(threadname), "ptlrpc_hr%02d_%03d",
-hrp->hrp_cpt, hrt->hrt_id);
unshare_fs_struct();
 
rc = cfs_cpt_bind(ptlrpc_hr.hr_cpt_table, hrp->hrp_cpt);
if (rc != 0) {
+   char threadname[20];
+
+   snprintf(threadname, sizeof(threadname), "ptlrpc_hr%02d_%03d",
+hrp->hrp_cpt, hrt->hrt_id);
CWARN("Failed to bind %s on CPT %d of CPT table %p: rc = %d\n",
  threadname, hrp->hrp_cpt, ptlrpc_hr.hr_cpt_table, rc);
}
@@ -2528,7 +2529,7 @@ int ptlrpc_hr_init(void)
 
init_waitqueue_head(_hr.hr_waitq);
 
-   weight = cpumask_weight(topology_sibling_cpumask(0));
+   weight = cpumask_weight(topology_sibling_cpumask(smp_processor_id()));
 
cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) {
hrp->hrp_cpt = i;
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v2 19/25] staging: lustre: libcfs: update debug messages

2018-05-29 Thread James Simmons
From: Dmitry Eremin 

For cfs_cpt_bind() change the CERROR to CDEBUG. Make the debug
message in cfs_cpt_table_create_pattern() more understandable.
Report rc value for when cfs_cpt_create_table() fails.

Signed-off-by: Dmitry Eremin 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8703
Reviewed-on: https://review.whamcloud.com/23222
Reviewed-by: Amir Shehata 
Reviewed-by: James Simmons 
Reviewed-by: Oleg Drokin 
Signed-off-by: James Simmons 
---
Changelog:

v1) Initial patch
v2) Rebased patch. No changes in code from earlier patch

 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c 
b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
index e12d337..7f1061e 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
@@ -480,7 +480,8 @@ void cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int 
cpt, int cpu)
/* caller doesn't know the partition ID */
cpt = cptab->ctb_cpu2cpt[cpu];
if (cpt < 0) { /* not set in this CPT-table */
-   CDEBUG(D_INFO, "Try to unset cpu %d which is not in 
CPT-table %p\n",
+   CDEBUG(D_INFO,
+  "Try to unset cpu %d which is not in CPT-table 
%p\n",
   cpt, cptab);
return;
}
@@ -506,7 +507,8 @@ int cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int 
cpt,
 
if (!cpumask_weight(mask) ||
cpumask_any_and(mask, cpu_online_mask) >= nr_cpu_ids) {
-   CDEBUG(D_INFO, "No online CPU is found in the CPU mask for CPU 
partition %d\n",
+   CDEBUG(D_INFO,
+  "No online CPU is found in the CPU mask for CPU 
partition %d\n",
   cpt);
return 0;
}
@@ -683,7 +685,8 @@ int cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
}
 
if (cpumask_any_and(*cpumask, cpu_online_mask) >= nr_cpu_ids) {
-   CERROR("No online CPU found in CPU partition %d, did someone do 
CPU hotplug on system? You might need to reload Lustre modules to keep system 
working well.\n",
+   CDEBUG(D_INFO,
+  "No online CPU found in CPU partition %d, did someone do 
CPU hotplug on system? You might need to reload Lustre modules to keep system 
working well.\n",
   cpt);
return -EINVAL;
}
@@ -914,8 +917,8 @@ static struct cfs_cpt_table *cfs_cpt_table_create(int ncpt)
 failed_mask:
free_cpumask_var(node_mask);
 failed:
-   CERROR("Failed to setup CPU-partition-table with %d CPU-partitions, 
online HW nodes: %d, HW cpus: %d.\n",
-  ncpt, num_online_nodes(), num_online_cpus());
+   CERROR("Failed (rc = %d) to setup CPU partition table with %d 
partitions, online HW NUMA nodes: %d, HW CPU cores: %d.\n",
+  rc, ncpt, num_online_nodes(), num_online_cpus());
 
if (cptab)
cfs_cpt_table_free(cptab);
@@ -1030,7 +1033,7 @@ static struct cfs_cpt_table 
*cfs_cpt_table_create_pattern(char *pattern)
 
bracket = strchr(str, ']');
if (!bracket) {
-   CERROR("missing right bracket for cpt %d, %s\n",
+   CERROR("Missing right bracket for partition %d, %s\n",
   cpt, str);
goto failed;
}
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v2 20/25] staging: lustre: libcfs: make tolerant to offline CPUs and empty NUMA nodes

2018-05-29 Thread James Simmons
From: Dmitry Eremin 

Rework CPU partition code in the way of make it more tolerant to
offline CPUs and empty nodes.

Signed-off-by: Dmitry Eremin 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8703
Reviewed-on: https://review.whamcloud.com/23222
Reviewed-by: Amir Shehata 
Reviewed-by: James Simmons 
Reviewed-by: Oleg Drokin 
Signed-off-by: James Simmons 
---
Changelog:

v1) Initial patch
v2) Rebased patch. No changes in code from earlier patch

 .../lustre/include/linux/libcfs/libcfs_cpu.h   |   2 +
 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c| 132 +
 drivers/staging/lustre/lnet/lnet/lib-msg.c |   2 +
 3 files changed, 60 insertions(+), 76 deletions(-)

diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h 
b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
index 9f4ba9d..c0aa0b3 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
@@ -91,6 +91,8 @@ struct cfs_cpu_partition {
unsigned int*cpt_distance;
/* spread rotor for NUMA allocator */
int cpt_spread_rotor;
+   /* NUMA node if cpt_nodemask is empty */
+   int cpt_node;
 };
 #endif /* CONFIG_SMP */
 
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c 
b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
index 7f1061e..99a9494 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
@@ -457,8 +457,16 @@ int cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, 
int cpu)
return 0;
}
 
-   LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_cpumask));
-   LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask));
+   if (cpumask_test_cpu(cpu, cptab->ctb_cpumask)) {
+   CDEBUG(D_INFO, "CPU %d is already in cpumask\n", cpu);
+   return 0;
+   }
+
+   if (cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask)) {
+   CDEBUG(D_INFO, "CPU %d is already in partition %d cpumask\n",
+  cpu, cptab->ctb_cpu2cpt[cpu]);
+   return 0;
+   }
 
cfs_cpt_add_cpu(cptab, cpt, cpu);
cfs_cpt_add_node(cptab, cpt, cpu_to_node(cpu));
@@ -527,8 +535,10 @@ void cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, 
int cpt,
 {
int cpu;
 
-   for_each_cpu(cpu, mask)
-   cfs_cpt_unset_cpu(cptab, cpt, cpu);
+   for_each_cpu(cpu, mask) {
+   cfs_cpt_del_cpu(cptab, cpt, cpu);
+   cfs_cpt_del_node(cptab, cpt, cpu_to_node(cpu));
+   }
 }
 EXPORT_SYMBOL(cfs_cpt_unset_cpumask);
 
@@ -579,10 +589,8 @@ int cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int 
cpt,
 {
int node;
 
-   for_each_node_mask(node, *mask) {
-   if (!cfs_cpt_set_node(cptab, cpt, node))
-   return 0;
-   }
+   for_each_node_mask(node, *mask)
+   cfs_cpt_set_node(cptab, cpt, node);
 
return 1;
 }
@@ -603,7 +611,7 @@ int cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int 
cpt)
nodemask_t *mask;
int weight;
int rotor;
-   int node;
+   int node = 0;
 
/* convert CPU partition ID to HW node id */
 
@@ -613,20 +621,20 @@ int cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int 
cpt)
} else {
mask = cptab->ctb_parts[cpt].cpt_nodemask;
rotor = cptab->ctb_parts[cpt].cpt_spread_rotor++;
+   node  = cptab->ctb_parts[cpt].cpt_node;
}
 
weight = nodes_weight(*mask);
-   LASSERT(weight > 0);
-
-   rotor %= weight;
+   if (weight > 0) {
+   rotor %= weight;
 
-   for_each_node_mask(node, *mask) {
-   if (!rotor--)
-   return node;
+   for_each_node_mask(node, *mask) {
+   if (!rotor--)
+   return node;
+   }
}
 
-   LBUG();
-   return 0;
+   return node;
 }
 EXPORT_SYMBOL(cfs_cpt_spread_node);
 
@@ -719,17 +727,21 @@ static int cfs_cpt_choose_ncpus(struct cfs_cpt_table 
*cptab, int cpt,
cpumask_var_t core_mask;
int rc = 0;
int cpu;
+   int i;
 
LASSERT(number > 0);
 
if (number >= cpumask_weight(node_mask)) {
while (!cpumask_empty(node_mask)) {
cpu = cpumask_first(node_mask);
+   cpumask_clear_cpu(cpu, node_mask);
+
+   if (!cpu_online(cpu))
+   continue;
 
rc = cfs_cpt_set_cpu(cptab, cpt, cpu);
if (!rc)
return -EINVAL;
-   cpumask_clear_cpu(cpu, node_mask);
}

[PATCH v2 23/25] staging: lustre: libcfs: rework CPU pattern parsing code

2018-05-29 Thread James Simmons
From: Dmitry Eremin 

Currently the module param string for CPU pattern can be
modified which is wrong. Rewrite CPU pattern parsing code
to avoid the passed buffer from being changed. This change
also enables us to add real errors propogation to the caller
functions.

Signed-off-by: Dmitry Eremin 
Signed-off-by: Amir Shehata 
Signed-off-by: Andreas Dilger 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8703
Reviewed-on: https://review.whamcloud.com/23306
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-9715
Reviewed-on: https://review.whamcloud.com/27872
Reviewed-by: James Simmons 
Reviewed-by: Andreas Dilger 
Reviewed-by: Patrick Farrell 
Reviewed-by: Olaf Weber 
Reviewed-by: Oleg Drokin 
Signed-off-by: James Simmons 
---
Changelog:

v1) Initial patch
v2) Rebased patch. No changes in code from earlier patch

 .../lustre/include/linux/libcfs/libcfs_cpu.h   |   2 +-
 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c| 146 -
 2 files changed, 87 insertions(+), 61 deletions(-)

diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h 
b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
index c0aa0b3..12ed0a9 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
@@ -393,7 +393,7 @@ static inline int cfs_cpu_init(void)
 
 static inline void cfs_cpu_fini(void)
 {
-   if (cfs_cpt_tab) {
+   if (!IS_ERR_OR_NULL(cfs_cpt_tab)) {
cfs_cpt_table_free(cfs_cpt_tab);
cfs_cpt_tab = NULL;
}
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c 
b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
index 649f7f9..aed48de 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
@@ -692,11 +692,11 @@ int cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
nodemask = cptab->ctb_parts[cpt].cpt_nodemask;
}
 
-   if (cpumask_any_and(*cpumask, cpu_online_mask) >= nr_cpu_ids) {
+   if (!cpumask_intersects(*cpumask, cpu_online_mask)) {
CDEBUG(D_INFO,
   "No online CPU found in CPU partition %d, did someone do 
CPU hotplug on system? You might need to reload Lustre modules to keep system 
working well.\n",
   cpt);
-   return -EINVAL;
+   return -ENODEV;
}
 
for_each_online_cpu(cpu) {
@@ -860,11 +860,13 @@ static struct cfs_cpt_table *cfs_cpt_table_create(int 
ncpt)
cptab = cfs_cpt_table_alloc(ncpt);
if (!cptab) {
CERROR("Failed to allocate CPU map(%d)\n", ncpt);
+   rc = -ENOMEM;
goto failed;
}
 
if (!zalloc_cpumask_var(_mask, GFP_NOFS)) {
CERROR("Failed to allocate scratch cpumask\n");
+   rc = -ENOMEM;
goto failed;
}
 
@@ -879,8 +881,10 @@ static struct cfs_cpt_table *cfs_cpt_table_create(int ncpt)
 
rc = cfs_cpt_choose_ncpus(cptab, cpt, node_mask,
  num - ncpu);
-   if (rc < 0)
+   if (rc < 0) {
+   rc = -EINVAL;
goto failed_mask;
+   }
 
ncpu = cpumask_weight(part->cpt_cpumask);
if (ncpu == num + !!(rem > 0)) {
@@ -903,37 +907,51 @@ static struct cfs_cpt_table *cfs_cpt_table_create(int 
ncpt)
if (cptab)
cfs_cpt_table_free(cptab);
 
-   return NULL;
+   return ERR_PTR(rc);
 }
 
-static struct cfs_cpt_table *cfs_cpt_table_create_pattern(char *pattern)
+static struct cfs_cpt_table *cfs_cpt_table_create_pattern(const char *pattern)
 {
struct cfs_cpt_table *cptab;
+   char *pattern_dup;
+   char *bracket;
char *str;
int node = 0;
-   int high;
int ncpt = 0;
-   int cpt;
+   int cpt = 0;
+   int high;
int rc;
int c;
int i;
 
-   str = strim(pattern);
+   pattern_dup = kstrdup(pattern, GFP_KERNEL);
+   if (!pattern_dup) {
+   CERROR("Failed to duplicate pattern '%s'\n", pattern);
+   return ERR_PTR(-ENOMEM);
+   }
+
+   str = strim(pattern_dup);
if (*str == 'n' || *str == 'N') {
-   pattern = str + 1;
-   if (*pattern != '\0') {
-   node = 1;
-   } else { /* shortcut to create CPT from NUMA & CPU topology */
+   str++; /* skip 'N' char */
+   node = 1; /* NUMA pattern */
+   if (*str == '\0') {
node = -1;
-   ncpt = num_online_nodes();
+   for_each_online_node(i) {
+   if (!cpumask_empty(cpu

[PATCH v2 24/25] staging: lustre: libcfs: change CPT estimate algorithm

2018-05-29 Thread James Simmons
From: Dmitry Eremin 

The main idea to have more CPU partitions is based on KNL experience.
When a thread submit IO for network communication one of threads from
current CPT is used for network stack. Whith high parallelization many
threads become involved in network submission but having less CPU
partitions they will wait until single thread process them from network
queue. So, the bottleneck just moves into network layer in case of
small amount of CPU partitions. My experiments showed that the best
performance was when for each IO thread we have one network thread.
This condition can be provided having 2 real HW cores (without hyper
threads) per CPT. This is exactly what implemented in this patch.

Change CPT estimate algorithm from 2 * (N - 1)^2 < NCPUS <= 2 * N^2
to 2 HW cores per CPT. This is critical for machines with number of
cores different from 2^N.

Current algorithm splits CPTs in KNL:
LNet: HW CPU cores: 272, npartitions: 16
cpu_partition_table=
0   : 0-4,68-71,136-139,204-207
1   : 5-9,73-76,141-144,209-212
2   : 10-14,78-81,146-149,214-217
3   : 15-17,72,77,83-85,140,145,151-153,208,219-221
4   : 18-21,82,86-88,150,154-156,213,218,222-224
5   : 22-26,90-93,158-161,226-229
6   : 27-31,95-98,163-166,231-234
7   : 32-35,89,100-103,168-171,236-239
8   : 36-38,94,99,104-105,157,162,167,172-173,225,230,235,240-241
9   : 39-43,107-110,175-178,243-246
10  : 44-48,112-115,180-183,248-251
11  : 49-51,106,111,117-119,174,179,185-187,242,253-255
12  : 52-55,116,120-122,184,188-190,247,252,256-258
13  : 56-60,124-127,192-195,260-263
14  : 61-65,129-132,197-200,265-268
15  : 66-67,123,128,133-135,191,196,201-203,259,264,269-271

New algorithm will split CPTs in KNL:
LNet: HW CPU cores: 272, npartitions: 34
cpu_partition_table=
0   : 0-1,68-69,136-137,204-205
1   : 2-3,70-71,138-139,206-207
2   : 4-5,72-73,140-141,208-209
3   : 6-7,74-75,142-143,210-211
4   : 8-9,76-77,144-145,212-213
5   : 10-11,78-79,146-147,214-215
6   : 12-13,80-81,148-149,216-217
7   : 14-15,82-83,150-151,218-219
8   : 16-17,84-85,152-153,220-221
9   : 18-19,86-87,154-155,222-223
10  : 20-21,88-89,156-157,224-225
11  : 22-23,90-91,158-159,226-227
12  : 24-25,92-93,160-161,228-229
13  : 26-27,94-95,162-163,230-231
14  : 28-29,96-97,164-165,232-233
15  : 30-31,98-99,166-167,234-235
16  : 32-33,100-101,168-169,236-237
17  : 34-35,102-103,170-171,238-239
18  : 36-37,104-105,172-173,240-241
19  : 38-39,106-107,174-175,242-243
20  : 40-41,108-109,176-177,244-245
21  : 42-43,110-111,178-179,246-247
22  : 44-45,112-113,180-181,248-249
23  : 46-47,114-115,182-183,250-251
24  : 48-49,116-117,184-185,252-253
25  : 50-51,118-119,186-187,254-255
26  : 52-53,120-121,188-189,256-257
27  : 54-55,122-123,190-191,258-259
28  : 56-57,124-125,192-193,260-261
29  : 58-59,126-127,194-195,262-263
30  : 60-61,128-129,196-197,264-265
31  : 62-63,130-131,198-199,266-267
32  : 64-65,132-133,200-201,268-269
33  : 66-67,134-135,202-203,270-271

'N' pattern in KNL works is not always good.
in flat mode it will be one CPT with all CPUs inside.

in SNC-4 mode:
cpu_partition_table=
0   : 0-17,68-85,136-153,204-221
1   : 18-35,86-103,154-171,222-239
2   : 36-51,104-119,172-187,240-255
3   : 52-67,120-135,188-203,256-271

Signed-off-by: Dmitry Eremin 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8703
Reviewed-on: https://review.whamcloud.com/24304
Reviewed-by: James Simmons 
Reviewed-by: Andreas Dilger 
Reviewed-by: Oleg Drokin 
Signed-off-by: James Simmons 
---
Changelog:

v1) Initial patch
v2) Rebased patch. No changes in code from earlier patch

 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c | 30 +
 1 file changed, 5 insertions(+), 25 deletions(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c 
b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
index aed48de..ff752d5 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
@@ -798,34 +798,14 @@ static int cfs_cpt_choose_ncpus(struct cfs_cpt_table 
*cptab, int cpt,
 
 static int cfs_cpt_num_estimate(void)
 {
-   int nnode = num_online_nodes();
+   int nthr = cpumask_weight(topology_sibling_cpumask(smp_processor_id()));
int ncpu = num_online_cpus();
-   int ncpt;
+   int ncpt = 1;
 
-   if (ncpu <= CPT_WEIGHT_MIN) {
-   ncpt = 1;
-   goto out;
-   }
-
-   /* generate reasonable number of CPU partitions based on total number
-* of CPUs, Preferred N should be power2 and match this condition:
-* 2 * (N - 1)^

[PATCH v2 22/25] staging: lustre: libcfs: update debug messages in CPT code

2018-05-29 Thread James Simmons
From: Dmitry Eremin 

Update the debug messages for the CPT table creation code. Place
the passed in string in quotes to make it clear what it is.
Captialize cpu in the debug strings.

Signed-off-by: Dmitry Eremin 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8703
Reviewed-on: https://review.whamcloud.com/23306
Reviewed-by: James Simmons 
Reviewed-by: Andreas Dilger 
Reviewed-by: Patrick Farrell 
Reviewed-by: Olaf Weber 
Reviewed-by: Oleg Drokin 
Signed-off-by: James Simmons 
---
Changelog:

v1) Initial patch
v2) Rebased patch. No changes in code from earlier patch

 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c 
b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
index 0fc102c..649f7f9 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
@@ -496,7 +496,7 @@ void cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int 
cpt, int cpu)
 
} else if (cpt != cptab->ctb_cpu2cpt[cpu]) {
CDEBUG(D_INFO,
-  "CPU %d is not in cpu-partition %d\n", cpu, cpt);
+  "CPU %d is not in CPU partition %d\n", cpu, cpt);
return;
}
 
@@ -940,14 +940,14 @@ static struct cfs_cpt_table 
*cfs_cpt_table_create_pattern(char *pattern)
if (!ncpt ||
(node && ncpt > num_online_nodes()) ||
(!node && ncpt > num_online_cpus())) {
-   CERROR("Invalid pattern %s, or too many partitions %d\n",
+   CERROR("Invalid pattern '%s', or too many partitions %d\n",
   pattern, ncpt);
return NULL;
}
 
cptab = cfs_cpt_table_alloc(ncpt);
if (!cptab) {
-   CERROR("Failed to allocate cpu partition table\n");
+   CERROR("Failed to allocate CPU partition table\n");
return NULL;
}
 
@@ -978,11 +978,11 @@ static struct cfs_cpt_table 
*cfs_cpt_table_create_pattern(char *pattern)
 
if (!bracket) {
if (*str) {
-   CERROR("Invalid pattern %s\n", str);
+   CERROR("Invalid pattern '%s'\n", str);
goto failed;
}
if (c != ncpt) {
-   CERROR("expect %d partitions but found %d\n",
+   CERROR("Expect %d partitions but found %d\n",
   ncpt, c);
goto failed;
}
@@ -990,7 +990,7 @@ static struct cfs_cpt_table 
*cfs_cpt_table_create_pattern(char *pattern)
}
 
if (sscanf(str, "%d%n", , ) < 1) {
-   CERROR("Invalid cpu pattern %s\n", str);
+   CERROR("Invalid CPU pattern '%s'\n", str);
goto failed;
}
 
@@ -1007,20 +1007,20 @@ static struct cfs_cpt_table 
*cfs_cpt_table_create_pattern(char *pattern)
 
str = strim(str + n);
if (str != bracket) {
-   CERROR("Invalid pattern %s\n", str);
+   CERROR("Invalid pattern '%s'\n", str);
goto failed;
}
 
bracket = strchr(str, ']');
if (!bracket) {
-   CERROR("Missing right bracket for partition %d, %s\n",
+   CERROR("Missing right bracket for partition %d in 
'%s'\n",
   cpt, str);
goto failed;
}
 
if (cfs_expr_list_parse(str, (bracket - str) + 1,
0, high, )) {
-   CERROR("Can't parse number range: %s\n", str);
+   CERROR("Can't parse number range in '%s'\n", str);
goto failed;
}
 
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v2 18/25] staging: lustre: libcfs: rename goto label in cfs_cpt_table_print

2018-05-29 Thread James Simmons
From: Dmitry Eremin 

Change goto label out to err.

Signed-off-by: Dmitry Eremin 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8703
Reviewed-on: https://review.whamcloud.com/23222
Reviewed-by: Amir Shehata 
Reviewed-by: James Simmons 
Reviewed-by: Oleg Drokin 
Signed-off-by: James Simmons 
---
Changelog:

v1) Initial patch
v2) Rebased patch. No changes in code from earlier patch

 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c 
b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
index fb27dac..e12d337 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
@@ -193,20 +193,20 @@ int cfs_cpt_table_print(struct cfs_cpt_table *cptab, char 
*buf, int len)
 
for (i = 0; i < cptab->ctb_nparts; i++) {
if (len <= 0)
-   goto out;
+   goto err;
 
rc = snprintf(tmp, len, "%d\t:", i);
len -= rc;
 
if (len <= 0)
-   goto out;
+   goto err;
 
tmp += rc;
for_each_cpu(j, cptab->ctb_parts[i].cpt_cpumask) {
rc = snprintf(tmp, len, "%d ", j);
len -= rc;
if (len <= 0)
-   goto out;
+   goto err;
tmp += rc;
}
 
@@ -216,7 +216,7 @@ int cfs_cpt_table_print(struct cfs_cpt_table *cptab, char 
*buf, int len)
}
 
return tmp - buf;
-out:
+err:
return -E2BIG;
 }
 EXPORT_SYMBOL(cfs_cpt_table_print);
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v2 13/25] staging: lustre: libcfs: fix libcfs_cpu coding style

2018-05-29 Thread James Simmons
From: Dmitry Eremin 

This patch bring the lustre CPT code into alignment with the
Linux kernel coding style.

Signed-off-by: Dmitry Eremin 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8703
Reviewed-on: https://review.whamcloud.com/23304
Reviewed-by: James Simmons 
Reviewed-by: Doug Oucharek 
Reviewed-by: Oleg Drokin 
Signed-off-by: James Simmons 
---
Changelog:

v1) Initial patch
v2) Rebased patch to handle recent libcfs changes

 .../lustre/include/linux/libcfs/libcfs_cpu.h   | 76 --
 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c| 92 --
 2 files changed, 66 insertions(+), 102 deletions(-)

diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h 
b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
index d5237d0..2c97adf 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
@@ -144,8 +144,7 @@ struct cfs_cpt_table {
 /**
  * return total number of CPU partitions in \a cptab
  */
-int
-cfs_cpt_number(struct cfs_cpt_table *cptab);
+int cfs_cpt_number(struct cfs_cpt_table *cptab);
 /**
  * return number of HW cores or hyper-threadings in a CPU partition \a cpt
  */
@@ -207,25 +206,24 @@ void cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab,
  * remove all cpus in NUMA node \a node from CPU partition \a cpt
  */
 void cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node);
-
 /**
  * add all cpus in node mask \a mask to CPU partition \a cpt
  * return 1 if successfully set all CPUs, otherwise return 0
  */
 int cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab,
-int cpt, nodemask_t *mask);
+int cpt, const nodemask_t *mask);
 /**
  * remove all cpus in node mask \a mask from CPU partition \a cpt
  */
 void cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab,
-   int cpt, nodemask_t *mask);
+   int cpt, const nodemask_t *mask);
 /**
  * convert partition id \a cpt to numa node id, if there are more than one
  * nodes in this partition, it might return a different node id each time.
  */
 int cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt);
 
-int  cfs_cpu_init(void);
+int cfs_cpu_init(void);
 void cfs_cpu_fini(void);
 
 #else /* !CONFIG_SMP */
@@ -282,32 +280,29 @@ static inline int cfs_cpt_distance_print(struct 
cfs_cpt_table *cptab,
return rc;
 }
 
-static inline cpumask_var_t *
-cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt)
+static inline cpumask_var_t *cfs_cpt_cpumask(struct cfs_cpt_table *cptab,
+int cpt)
 {
return >ctb_cpumask;
 }
 
-static inline int
-cfs_cpt_number(struct cfs_cpt_table *cptab)
+static inline int cfs_cpt_number(struct cfs_cpt_table *cptab)
 {
return 1;
 }
 
-static inline int
-cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt)
+static inline int cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt)
 {
return 1;
 }
 
-static inline int
-cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt)
+static inline int cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt)
 {
return 1;
 }
 
-static inline nodemask_t *
-cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt)
+static inline nodemask_t *cfs_cpt_nodemask(struct cfs_cpt_table *cptab,
+  int cpt)
 {
return >ctb_nodemask;
 }
@@ -318,66 +313,61 @@ static inline unsigned int cfs_cpt_distance(struct 
cfs_cpt_table *cptab,
return 1;
 }
 
-static inline int
-cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
+static inline int cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt,
+ int cpu)
 {
return 1;
 }
 
-static inline void
-cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
+static inline void cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt,
+int cpu)
 {
 }
 
-static inline int
-cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt,
-   const cpumask_t *mask)
+static inline int cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt,
+ const cpumask_t *mask)
 {
return 1;
 }
 
-static inline void
-cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt,
- const cpumask_t *mask)
+static inline void cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt,
+const cpumask_t *mask)
 {
 }
 
-static inline int
-cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node)
+static inline int cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt,
+  int node)
 {
return 1;
 }
 
-static inline void
-cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node)
+static inline void cfs_cpt_unset_node(struct cfs_cpt_table *cptab, i

[PATCH v2 16/25] staging: lustre: libcfs: rename i to cpu for cfs_cpt_bind

2018-05-29 Thread James Simmons
From: Dmitry Eremin 

Rename variable i to cpu to make code easier to understand.

Signed-off-by: Dmitry Eremin 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8703
Reviewed-on: https://review.whamcloud.com/23222
Reviewed-by: Amir Shehata 
Reviewed-by: James Simmons 
Reviewed-by: Oleg Drokin 
Signed-off-by: James Simmons 
---
Changelog:

v1) Initial patch
v2) Rebased patch. No changes in code

 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c 
b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
index bac5601..1c10529 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
@@ -669,8 +669,8 @@ int cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
 {
cpumask_var_t *cpumask;
nodemask_t *nodemask;
+   int cpu;
int rc;
-   int i;
 
LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
 
@@ -688,8 +688,8 @@ int cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
return -EINVAL;
}
 
-   for_each_online_cpu(i) {
-   if (cpumask_test_cpu(i, *cpumask))
+   for_each_online_cpu(cpu) {
+   if (cpumask_test_cpu(cpu, *cpumask))
continue;
 
rc = set_cpus_allowed_ptr(current, *cpumask);
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v2 15/25] staging: lustre: libcfs: rename i to node for cfs_cpt_set_nodemask

2018-05-29 Thread James Simmons
From: Dmitry Eremin 

Rename variable i to node to make code easier to understand.

Signed-off-by: Dmitry Eremin 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8703
Reviewed-on: https://review.whamcloud.com/23222
Reviewed-by: Amir Shehata 
Reviewed-by: James Simmons 
Reviewed-by: Oleg Drokin 
Signed-off-by: James Simmons 
---
Changelog:

v1) Initial patch
v2) Rebased patch. No changes in code

 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c 
b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
index 14d5791..bac5601 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
@@ -575,10 +575,10 @@ void cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int 
cpt, int node)
 int cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt,
 const nodemask_t *mask)
 {
-   int i;
+   int node;
 
-   for_each_node_mask(i, *mask) {
-   if (!cfs_cpt_set_node(cptab, cpt, i))
+   for_each_node_mask(node, *mask) {
+   if (!cfs_cpt_set_node(cptab, cpt, node))
return 0;
}
 
@@ -589,10 +589,10 @@ int cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int 
cpt,
 void cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt,
const nodemask_t *mask)
 {
-   int i;
+   int node;
 
-   for_each_node_mask(i, *mask)
-   cfs_cpt_unset_node(cptab, cpt, i);
+   for_each_node_mask(node, *mask)
+   cfs_cpt_unset_node(cptab, cpt, node);
 }
 EXPORT_SYMBOL(cfs_cpt_unset_nodemask);
 
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v2 21/25] staging: lustre: libcfs: report NUMA node instead of just node

2018-05-29 Thread James Simmons
From: Dmitry Eremin 

Reporting "HW nodes" is too generic. It really is reporting
"HW NUMA nodes". Update the debug message.

Signed-off-by: Dmitry Eremin 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8703
Reviewed-on: https://review.whamcloud.com/23306
Reviewed-by: James Simmons 
Reviewed-by: Andreas Dilger 
Reviewed-by: Patrick Farrell 
Reviewed-by: Olaf Weber 
Reviewed-by: Oleg Drokin 
Signed-off-by: James Simmons 
---
Changelog:

v1) Initial patch
v2) Rebased patch. No changes in code from earlier patch

 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c 
b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
index 99a9494..0fc102c 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
@@ -1138,7 +1138,7 @@ int cfs_cpu_init(void)
 
put_online_cpus();
 
-   LCONSOLE(0, "HW nodes: %d, HW CPU cores: %d, npartitions: %d\n",
+   LCONSOLE(0, "HW NUMA nodes: %d, HW CPU cores: %d, npartitions: %d\n",
 num_online_nodes(), num_online_cpus(),
 cfs_cpt_number(cfs_cpt_tab));
return 0;
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v2 08/25] staging: lustre: libcfs: NUMA support

2018-05-29 Thread James Simmons
From: Amir Shehata 

This patch adds NUMA node support. NUMA node information is stored
in the CPT table. A NUMA node mask is maintained for the entire
table as well as for each CPT to track the NUMA nodes related to
each of the CPTs. Add new function cfs_cpt_of_node() which returns
the CPT of a particular NUMA node.

Signed-off-by: Amir Shehata 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-7734
Reviewed-on: http://review.whamcloud.com/18916
Reviewed-by: Olaf Weber 
Reviewed-by: Doug Oucharek 
Signed-off-by: James Simmons 
---
Changelog:

v1) Initial patch
v2) Rebased patch to handle recent libcfs changes

 .../lustre/include/linux/libcfs/libcfs_cpu.h| 11 +++
 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c | 21 +
 2 files changed, 32 insertions(+)

diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h 
b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
index 3626969..487625d 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
@@ -103,6 +103,8 @@ struct cfs_cpt_table {
struct cfs_cpu_partition*ctb_parts;
/* shadow HW CPU to CPU partition ID */
int *ctb_cpu2cpt;
+   /* shadow HW node to CPU partition ID */
+   int *ctb_node2cpt;
/* all nodes in this partition table */
nodemask_t  *ctb_nodemask;
 #else
@@ -157,6 +159,10 @@ struct cfs_cpt_table {
  */
 int cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu);
 /**
+ * shadow HW node ID \a NODE to CPU-partition ID by \a cptab
+ */
+int cfs_cpt_of_node(struct cfs_cpt_table *cptab, int node);
+/**
  * bind current thread on a CPU-partition \a cpt of \a cptab
  */
 int cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt);
@@ -345,6 +351,11 @@ static inline int cfs_cpt_table_print(struct cfs_cpt_table 
*cptab,
return 0;
 }
 
+static inline int cfs_cpt_of_node(struct cfs_cpt_table *cptab, int node)
+{
+   return 0;
+}
+
 static inline int
 cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
 {
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c 
b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
index 3f855a8..f616073 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
@@ -98,6 +98,15 @@ struct cfs_cpt_table *
memset(cptab->ctb_cpu2cpt, -1,
   nr_cpu_ids * sizeof(cptab->ctb_cpu2cpt[0]));
 
+   cptab->ctb_node2cpt = kvmalloc_array(nr_node_ids,
+sizeof(cptab->ctb_node2cpt[0]),
+GFP_KERNEL);
+   if (!cptab->ctb_node2cpt)
+   goto failed_alloc_node2cpt;
+
+   memset(cptab->ctb_node2cpt, -1,
+  nr_node_ids * sizeof(cptab->ctb_node2cpt[0]));
+
cptab->ctb_parts = kvmalloc_array(ncpt, sizeof(cptab->ctb_parts[0]),
  GFP_KERNEL);
if (!cptab->ctb_parts)
@@ -129,6 +138,8 @@ struct cfs_cpt_table *
 
kvfree(cptab->ctb_parts);
 failed_alloc_ctb_parts:
+   kvfree(cptab->ctb_node2cpt);
+failed_alloc_node2cpt:
kvfree(cptab->ctb_cpu2cpt);
 failed_alloc_cpu2cpt:
kfree(cptab->ctb_nodemask);
@@ -146,6 +157,7 @@ struct cfs_cpt_table *
int i;
 
kvfree(cptab->ctb_cpu2cpt);
+   kvfree(cptab->ctb_node2cpt);
 
for (i = 0; cptab->ctb_parts && i < cptab->ctb_nparts; i++) {
struct cfs_cpu_partition *part = >ctb_parts[i];
@@ -511,6 +523,15 @@ struct cfs_cpt_table *
 }
 EXPORT_SYMBOL(cfs_cpt_of_cpu);
 
+int cfs_cpt_of_node(struct cfs_cpt_table *cptab, int node)
+{
+   if (node < 0 || node > nr_node_ids)
+   return CFS_CPT_ANY;
+
+   return cptab->ctb_node2cpt[node];
+}
+EXPORT_SYMBOL(cfs_cpt_of_node);
+
 int
 cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
 {
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v2 14/25] staging: lustre: libcfs: use int type for CPT identification.

2018-05-29 Thread James Simmons
From: Dmitry Eremin 

Use int type for CPT identification to match the linux kernel
CPU identification.

Signed-off-by: Dmitry Eremin 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8703
Reviewed-on: https://review.whamcloud.com/23304
Reviewed-by: James Simmons 
Reviewed-by: Doug Oucharek 
Reviewed-by: Oleg Drokin 
Signed-off-by: James Simmons 
---
Changelog:

v1) Initial patch
v2) Rebased patch to handle recent libcfs changes

 drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h |  8 
 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c  | 14 +++---
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h 
b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
index 2c97adf..9f4ba9d 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
@@ -90,7 +90,7 @@ struct cfs_cpu_partition {
/* NUMA distance between CPTs */
unsigned int*cpt_distance;
/* spread rotor for NUMA allocator */
-   unsigned intcpt_spread_rotor;
+   int cpt_spread_rotor;
 };
 #endif /* CONFIG_SMP */
 
@@ -98,11 +98,11 @@ struct cfs_cpu_partition {
 struct cfs_cpt_table {
 #ifdef CONFIG_SMP
/* spread rotor for NUMA allocator */
-   unsigned intctb_spread_rotor;
+   int ctb_spread_rotor;
/* maximum NUMA distance between all nodes in table */
unsigned intctb_distance;
/* # of CPU partitions */
-   unsigned intctb_nparts;
+   int ctb_nparts;
/* partitions tables */
struct cfs_cpu_partition*ctb_parts;
/* shadow HW CPU to CPU partition ID */
@@ -128,7 +128,7 @@ struct cfs_cpt_table {
 /**
  * create a cfs_cpt_table with \a ncpt number of partitions
  */
-struct cfs_cpt_table *cfs_cpt_table_alloc(unsigned int ncpt);
+struct cfs_cpt_table *cfs_cpt_table_alloc(int ncpt);
 /**
  * return cpumask of CPU partition \a cpt
  */
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c 
b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
index fab6675..14d5791 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
@@ -69,7 +69,7 @@
 module_param(cpu_pattern, charp, 0444);
 MODULE_PARM_DESC(cpu_pattern, "CPU partitions pattern");
 
-struct cfs_cpt_table *cfs_cpt_table_alloc(unsigned int ncpt)
+struct cfs_cpt_table *cfs_cpt_table_alloc(int ncpt)
 {
struct cfs_cpt_table *cptab;
int i;
@@ -784,13 +784,13 @@ static int cfs_cpt_choose_ncpus(struct cfs_cpt_table 
*cptab, int cpt,
return rc;
 }
 
-#define CPT_WEIGHT_MIN  4u
+#define CPT_WEIGHT_MIN 4
 
-static unsigned int cfs_cpt_num_estimate(void)
+static int cfs_cpt_num_estimate(void)
 {
-   unsigned int nnode = num_online_nodes();
-   unsigned int ncpu = num_online_cpus();
-   unsigned int ncpt;
+   int nnode = num_online_nodes();
+   int ncpu = num_online_cpus();
+   int ncpt;
 
if (ncpu <= CPT_WEIGHT_MIN) {
ncpt = 1;
@@ -820,7 +820,7 @@ static unsigned int cfs_cpt_num_estimate(void)
/* config many CPU partitions on 32-bit system could consume
 * too much memory
 */
-   ncpt = min(2U, ncpt);
+   ncpt = min(2, ncpt);
 #endif
while (ncpu % ncpt)
ncpt--; /* worst case is 1 */
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v2 12/25] staging: lustre: libcfs: invert error handling for cfs_cpt_table_print

2018-05-29 Thread James Simmons
From: Amir Shehata 

Instead of setting rc to -EFBIG for several cases in the loop lets
just go to the out label on error which returns -E2BIG directly.

Signed-off-by: Amir Shehata 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-7734
Reviewed-on: http://review.whamcloud.com/18916
Reviewed-by: Olaf Weber 
Reviewed-by: Doug Oucharek 
Signed-off-by: James Simmons 
---
Changelog:

v1) New patch to replace several patches. Went crazy for the one
change per patch approach.

 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c | 25 ++---
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c 
b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
index 9ff9fe9..bf41ba3 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
@@ -190,29 +190,26 @@ struct cfs_cpt_table *
 cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
 {
char *tmp = buf;
-   int rc = 0;
+   int rc;
int i;
int j;
 
for (i = 0; i < cptab->ctb_nparts; i++) {
-   if (len > 0) {
-   rc = snprintf(tmp, len, "%d\t:", i);
-   len -= rc;
-   }
+   if (len <= 0)
+   goto out;
+
+   rc = snprintf(tmp, len, "%d\t:", i);
+   len -= rc;
 
-   if (len <= 0) {
-   rc = -EFBIG;
+   if (len <= 0)
goto out;
-   }
 
tmp += rc;
for_each_cpu(j, cptab->ctb_parts[i].cpt_cpumask) {
rc = snprintf(tmp, len, "%d ", j);
len -= rc;
-   if (len <= 0) {
-   rc = -EFBIG;
+   if (len <= 0)
goto out;
-   }
tmp += rc;
}
 
@@ -221,11 +218,9 @@ struct cfs_cpt_table *
len--;
}
 
- out:
-   if (rc < 0)
-   return rc;
-
return tmp - buf;
+out:
+   return -E2BIG;
 }
 EXPORT_SYMBOL(cfs_cpt_table_print);
 
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v2 10/25] staging: lustre: libcfs: use distance in cpu and node handling

2018-05-29 Thread James Simmons
From: Amir Shehata 

Take into consideration the location of NUMA nodes and core
when calling cfs_cpt_[un]set_cpu() and cfs_cpt_[un]set_node().
This enables functioning on platforms with 100s of cores and
NUMA nodes.

Signed-off-by: Amir Shehata 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-7734
Reviewed-on: http://review.whamcloud.com/18916
Reviewed-by: Olaf Weber 
Reviewed-by: Doug Oucharek 
Signed-off-by: James Simmons 
---
Changelog:

v1) Initial patch
v2) Rebased patch to handle recent libcfs changes

 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c | 192 ++--
 1 file changed, 143 insertions(+), 49 deletions(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c 
b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
index 2a74e51..9ff9fe9 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
@@ -330,11 +330,134 @@ unsigned int cfs_cpt_distance(struct cfs_cpt_table 
*cptab, int cpt1, int cpt2)
 }
 EXPORT_SYMBOL(cfs_cpt_distance);
 
+/*
+ * Calculate the maximum NUMA distance between all nodes in the
+ * from_mask and all nodes in the to_mask.
+ */
+static unsigned int cfs_cpt_distance_calculate(nodemask_t *from_mask,
+  nodemask_t *to_mask)
+{
+   unsigned int maximum;
+   unsigned int distance;
+   int from;
+   int to;
+
+   maximum = 0;
+   for_each_node_mask(from, *from_mask) {
+   for_each_node_mask(to, *to_mask) {
+   distance = node_distance(from, to);
+   if (maximum < distance)
+   maximum = distance;
+   }
+   }
+   return maximum;
+}
+
+static void cfs_cpt_add_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
+{
+   cptab->ctb_cpu2cpt[cpu] = cpt;
+
+   cpumask_set_cpu(cpu, cptab->ctb_cpumask);
+   cpumask_set_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
+}
+
+static void cfs_cpt_del_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
+{
+   cpumask_clear_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
+   cpumask_clear_cpu(cpu, cptab->ctb_cpumask);
+
+   cptab->ctb_cpu2cpt[cpu] = -1;
+}
+
+static void cfs_cpt_add_node(struct cfs_cpt_table *cptab, int cpt, int node)
+{
+   struct cfs_cpu_partition *part;
+
+   if (!node_isset(node, *cptab->ctb_nodemask)) {
+   unsigned int dist;
+
+   /* first time node is added to the CPT table */
+   node_set(node, *cptab->ctb_nodemask);
+   cptab->ctb_node2cpt[node] = cpt;
+
+   dist = cfs_cpt_distance_calculate(cptab->ctb_nodemask,
+ cptab->ctb_nodemask);
+   cptab->ctb_distance = dist;
+   }
+
+   part = >ctb_parts[cpt];
+   if (!node_isset(node, *part->cpt_nodemask)) {
+   int cpt2;
+
+   /* first time node is added to this CPT */
+   node_set(node, *part->cpt_nodemask);
+   for (cpt2 = 0; cpt2 < cptab->ctb_nparts; cpt2++) {
+   struct cfs_cpu_partition *part2;
+   unsigned int dist;
+
+   part2 = >ctb_parts[cpt2];
+   dist = cfs_cpt_distance_calculate(part->cpt_nodemask,
+ part2->cpt_nodemask);
+   part->cpt_distance[cpt2] = dist;
+   dist = cfs_cpt_distance_calculate(part2->cpt_nodemask,
+ part->cpt_nodemask);
+   part2->cpt_distance[cpt] = dist;
+   }
+   }
+}
+
+static void cfs_cpt_del_node(struct cfs_cpt_table *cptab, int cpt, int node)
+{
+   struct cfs_cpu_partition *part = >ctb_parts[cpt];
+   int cpu;
+
+   for_each_cpu(cpu, part->cpt_cpumask) {
+   /* this CPT has other CPU belonging to this node? */
+   if (cpu_to_node(cpu) == node)
+   break;
+   }
+
+   if (cpu >= nr_cpu_ids && node_isset(node,  *part->cpt_nodemask)) {
+   int cpt2;
+
+   /* No more CPUs in the node for this CPT. */
+   node_clear(node, *part->cpt_nodemask);
+   for (cpt2 = 0; cpt2 < cptab->ctb_nparts; cpt2++) {
+   struct cfs_cpu_partition *part2;
+   unsigned int dist;
+
+   part2 = >ctb_parts[cpt2];
+   if (node_isset(node, *part2->cpt_nodemask))
+   cptab->ctb_node2cpt[node] = cpt2;
+
+   dist = cfs_cpt_distance_calculate(part->cpt_nodemask,
+ part2->cpt_nodemask);
+  

[PATCH v2 01/25] staging: lustre: libcfs: restore UMP handling

2018-05-29 Thread James Simmons
With the cleanup of the libcfs SMP handling all UMP handling
was removed. In the process now various NULL pointers and
empty fields are return in the UMP case which causes lustre
to crash hard. Restore the proper UMP handling so Lustre can
properly function.

Signed-off-by: James Simmons 
Signed-off-by: Amir Shehata 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-7734
Reviewed-on: http://review.whamcloud.com/18916
Reviewed-by: Olaf Weber 
Reviewed-by: Doug Oucharek 
Signed-off-by: James Simmons 
---
Changelog:

v1) New patch to handle the disappearence of UMP support

 .../lustre/include/linux/libcfs/libcfs_cpu.h   | 87 --
 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c|  4 -
 drivers/staging/lustre/lnet/libcfs/module.c|  4 +
 3 files changed, 69 insertions(+), 26 deletions(-)

diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h 
b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
index 61641c4..2ad12a6 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
@@ -74,6 +74,7 @@
 
 #include 
 #include 
+#include 
 #include 
 
 /* any CPU partition */
@@ -89,10 +90,11 @@ struct cfs_cpu_partition {
/* spread rotor for NUMA allocator */
unsigned intcpt_spread_rotor;
 };
-
+#endif /* CONFIG_SMP */
 
 /** descriptor for CPU partitions */
 struct cfs_cpt_table {
+#ifdef CONFIG_SMP
/* version, reserved for hotplug */
unsigned intctb_version;
/* spread rotor for NUMA allocator */
@@ -103,14 +105,26 @@ struct cfs_cpt_table {
struct cfs_cpu_partition*ctb_parts;
/* shadow HW CPU to CPU partition ID */
int *ctb_cpu2cpt;
-   /* all cpus in this partition table */
-   cpumask_var_t   ctb_cpumask;
/* all nodes in this partition table */
nodemask_t  *ctb_nodemask;
+#else
+   nodemask_t  ctb_nodemask;
+#endif /* CONFIG_SMP */
+   /* all cpus in this partition table */
+   cpumask_var_t   ctb_cpumask;
 };
 
 extern struct cfs_cpt_table*cfs_cpt_tab;
 
+#ifdef CONFIG_SMP
+/**
+ * destroy a CPU partition table
+ */
+void cfs_cpt_table_free(struct cfs_cpt_table *cptab);
+/**
+ * create a cfs_cpt_table with \a ncpt number of partitions
+ */
+struct cfs_cpt_table *cfs_cpt_table_alloc(unsigned int ncpt);
 /**
  * return cpumask of CPU partition \a cpt
  */
@@ -208,20 +222,52 @@ void cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab,
 void cfs_cpu_fini(void);
 
 #else /* !CONFIG_SMP */
-struct cfs_cpt_table;
-#define cfs_cpt_tab ((struct cfs_cpt_table *)NULL)
 
-static inline cpumask_var_t *
-cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt)
+static inline void cfs_cpt_table_free(struct cfs_cpt_table *cptab)
 {
-   return NULL;
+   kfree(cptab);
 }
 
-static inline int
-cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
+static inline struct cfs_cpt_table *cfs_cpt_table_alloc(int ncpt)
 {
-   return 0;
+   struct cfs_cpt_table *cptab;
+
+   if (ncpt != 1)
+   return NULL;
+
+   cptab = kzalloc(sizeof(*cptab), GFP_NOFS);
+   if (!cptab)
+   return NULL;
+
+   if (!zalloc_cpumask_var(>ctb_cpumask, GFP_NOFS)) {
+   kfree(cptab);
+   return NULL;
+   }
+   cpumask_set_cpu(0, cptab->ctb_cpumask);
+   node_set(0, cptab->ctb_nodemask);
+
+   return cptab;
+}
+
+static inline int cfs_cpt_table_print(struct cfs_cpt_table *cptab,
+ char *buf, int len)
+{
+   int rc;
+
+   rc = snprintf(buf, len, "0\t: 0\n");
+   len -= rc;
+   if (len <= 0)
+   return -EFBIG;
+
+   return rc;
 }
+
+static inline cpumask_var_t *
+cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt)
+{
+   return >ctb_cpumask;
+}
+
 static inline int
 cfs_cpt_number(struct cfs_cpt_table *cptab)
 {
@@ -243,7 +289,7 @@ void cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab,
 static inline nodemask_t *
 cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt)
 {
-   return NULL;
+   return >ctb_nodemask;
 }
 
 static inline int
@@ -328,24 +374,21 @@ void cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab,
 static inline int
 cfs_cpu_init(void)
 {
-   return 0;
+   cfs_cpt_tab = cfs_cpt_table_alloc(1);
+
+   return cfs_cpt_tab ? 0 : -1;
 }
 
 static inline void cfs_cpu_fini(void)
 {
+   if (cfs_cpt_tab) {
+   cfs_cpt_table_free(cfs_cpt_tab);
+   cfs_cpt_tab = NULL;
+   }
 }
 
 #endif /* CONFIG_SMP */
 
-/**
- * destroy a CPU partition table
- */
-void cfs_cpt_table_free(struct cfs_cpt_table *cptab);
-/**
- * create a cfs_cpt_table with \a ncpt number of partitions
- */
-struct cfs_cpt_table *cfs_cpt_table_alloc(unsigned int n

[PATCH v2 04/25] staging: lustre: libcfs: properly handle failure cases in SMP code

2018-05-29 Thread James Simmons
While pushing the SMP work some bugs were pointed out by Dan
Carpenter in the code. Due to single err label in cfs_cpu_init()
and cfs_cpt_table_alloc() a few items were being cleaned up that
were never initialized. This can lead to crashed and other problems.
In those initialization function introduce individual labels to
jump to only the thing initialized get freed on failure.

Signed-off-by: James Simmons 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-10932
Reviewed-on: https://review.whamcloud.com/32085
Reviewed-by: Dmitry Eremin 
Reviewed-by: Andreas Dilger 
Signed-off-by: James Simmons 
---
Changelog:

v1) New patch to make libcfs SMP code handle failure paths correctly.

 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c | 72 ++---
 1 file changed, 52 insertions(+), 20 deletions(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c 
b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
index 34df7ed..b67a60c 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
@@ -81,17 +81,19 @@ struct cfs_cpt_table *
 
cptab->ctb_nparts = ncpt;
 
+   if (!zalloc_cpumask_var(>ctb_cpumask, GFP_NOFS))
+   goto failed_alloc_cpumask;
+
cptab->ctb_nodemask = kzalloc(sizeof(*cptab->ctb_nodemask),
  GFP_NOFS);
-   if (!zalloc_cpumask_var(>ctb_cpumask, GFP_NOFS) ||
-   !cptab->ctb_nodemask)
-   goto failed;
+   if (!cptab->ctb_nodemask)
+   goto failed_alloc_nodemask;
 
cptab->ctb_cpu2cpt = kvmalloc_array(num_possible_cpus(),
sizeof(cptab->ctb_cpu2cpt[0]),
GFP_KERNEL);
if (!cptab->ctb_cpu2cpt)
-   goto failed;
+   goto failed_alloc_cpu2cpt;
 
memset(cptab->ctb_cpu2cpt, -1,
   num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0]));
@@ -99,22 +101,41 @@ struct cfs_cpt_table *
cptab->ctb_parts = kvmalloc_array(ncpt, sizeof(cptab->ctb_parts[0]),
  GFP_KERNEL);
if (!cptab->ctb_parts)
-   goto failed;
+   goto failed_alloc_ctb_parts;
+
+   memset(cptab->ctb_parts, -1, ncpt * sizeof(cptab->ctb_parts[0]));
 
for (i = 0; i < ncpt; i++) {
struct cfs_cpu_partition *part = >ctb_parts[i];
 
+   if (!zalloc_cpumask_var(>cpt_cpumask, GFP_NOFS))
+   goto failed_setting_ctb_parts;
+
part->cpt_nodemask = kzalloc(sizeof(*part->cpt_nodemask),
 GFP_NOFS);
-   if (!zalloc_cpumask_var(>cpt_cpumask, GFP_NOFS) ||
-   !part->cpt_nodemask)
-   goto failed;
+   if (!part->cpt_nodemask)
+   goto failed_setting_ctb_parts;
}
 
return cptab;
 
- failed:
-   cfs_cpt_table_free(cptab);
+failed_setting_ctb_parts:
+   while (i-- >= 0) {
+   struct cfs_cpu_partition *part = >ctb_parts[i];
+
+   kfree(part->cpt_nodemask);
+   free_cpumask_var(part->cpt_cpumask);
+   }
+
+   kvfree(cptab->ctb_parts);
+failed_alloc_ctb_parts:
+   kvfree(cptab->ctb_cpu2cpt);
+failed_alloc_cpu2cpt:
+   kfree(cptab->ctb_nodemask);
+failed_alloc_nodemask:
+   free_cpumask_var(cptab->ctb_cpumask);
+failed_alloc_cpumask:
+   kfree(cptab);
return NULL;
 }
 EXPORT_SYMBOL(cfs_cpt_table_alloc);
@@ -940,7 +961,7 @@ static int cfs_cpu_dead(unsigned int cpu)
 int
 cfs_cpu_init(void)
 {
-   int ret = 0;
+   int ret;
 
LASSERT(!cfs_cpt_tab);
 
@@ -949,23 +970,23 @@ static int cfs_cpu_dead(unsigned int cpu)
"staging/lustre/cfe:dead", NULL,
cfs_cpu_dead);
if (ret < 0)
-   goto failed;
+   goto failed_cpu_dead;
+
ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
"staging/lustre/cfe:online",
cfs_cpu_online, NULL);
if (ret < 0)
-   goto failed;
+   goto failed_cpu_online;
+
lustre_cpu_online = ret;
 #endif
-   ret = -EINVAL;
-
get_online_cpus();
if (*cpu_pattern) {
char *cpu_pattern_dup = kstrdup(cpu_pattern, GFP_KERNEL);
 
if (!cpu_pattern_dup) {
CERROR("Failed to duplicate cpu_pattern\n");
-   goto failed;
+   goto failed_alloc_table;
}
 
cfs_cpt_tab = cfs_cpt_table_create_pattern(cpu_pattern_dup);
@@ -973,7 +994,7 @@ static int cfs_

[PATCH v2 09/25] staging: lustre: libcfs: add cpu distance handling

2018-05-29 Thread James Simmons
From: Amir Shehata 

Add functionality to calculate the distance between two CPTs.
Expose those distance in debugfs so people deploying a setup
can debug what is being created for CPTs.

Signed-off-by: Amir Shehata 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-7734
Reviewed-on: http://review.whamcloud.com/18916
Reviewed-by: Olaf Weber 
Reviewed-by: Doug Oucharek 
Signed-off-by: James Simmons 
---
Changelog:

v1) Initial patch
v2) Rebased patch to handle recent libcfs changes

 .../lustre/include/linux/libcfs/libcfs_cpu.h   | 31 +++
 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c| 61 ++
 2 files changed, 92 insertions(+)

diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h 
b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
index 487625d..d5237d0 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
@@ -87,6 +87,8 @@ struct cfs_cpu_partition {
cpumask_var_t   cpt_cpumask;
/* nodes mask for this partition */
nodemask_t  *cpt_nodemask;
+   /* NUMA distance between CPTs */
+   unsigned int*cpt_distance;
/* spread rotor for NUMA allocator */
unsigned intcpt_spread_rotor;
 };
@@ -97,6 +99,8 @@ struct cfs_cpt_table {
 #ifdef CONFIG_SMP
/* spread rotor for NUMA allocator */
unsigned intctb_spread_rotor;
+   /* maximum NUMA distance between all nodes in table */
+   unsigned intctb_distance;
/* # of CPU partitions */
unsigned intctb_nparts;
/* partitions tables */
@@ -134,6 +138,10 @@ struct cfs_cpt_table {
  */
 int cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len);
 /**
+ * print distance information of cpt-table
+ */
+int cfs_cpt_distance_print(struct cfs_cpt_table *cptab, char *buf, int len);
+/**
  * return total number of CPU partitions in \a cptab
  */
 int
@@ -163,6 +171,10 @@ struct cfs_cpt_table {
  */
 int cfs_cpt_of_node(struct cfs_cpt_table *cptab, int node);
 /**
+ * NUMA distance between \a cpt1 and \a cpt2 in \a cptab
+ */
+unsigned int cfs_cpt_distance(struct cfs_cpt_table *cptab, int cpt1, int cpt2);
+/**
  * bind current thread on a CPU-partition \a cpt of \a cptab
  */
 int cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt);
@@ -257,6 +269,19 @@ static inline int cfs_cpt_table_print(struct cfs_cpt_table 
*cptab,
return rc;
 }
 
+static inline int cfs_cpt_distance_print(struct cfs_cpt_table *cptab,
+char *buf, int len)
+{
+   int rc;
+
+   rc = snprintf(buf, len, "0\t: 0:1\n");
+   len -= rc;
+   if (len <= 0)
+   return -EFBIG;
+
+   return rc;
+}
+
 static inline cpumask_var_t *
 cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt)
 {
@@ -287,6 +312,12 @@ static inline int cfs_cpt_table_print(struct cfs_cpt_table 
*cptab,
return >ctb_nodemask;
 }
 
+static inline unsigned int cfs_cpt_distance(struct cfs_cpt_table *cptab,
+   int cpt1, int cpt2)
+{
+   return 1;
+}
+
 static inline int
 cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
 {
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c 
b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
index f616073..2a74e51 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
@@ -124,6 +124,15 @@ struct cfs_cpt_table *
 GFP_NOFS);
if (!part->cpt_nodemask)
goto failed_setting_ctb_parts;
+
+   part->cpt_distance = kvmalloc_array(cptab->ctb_nparts,
+   
sizeof(part->cpt_distance[0]),
+   GFP_KERNEL);
+   if (!part->cpt_distance)
+   goto failed_setting_ctb_parts;
+
+   memset(part->cpt_distance, -1,
+  cptab->ctb_nparts * sizeof(part->cpt_distance[0]));
}
 
return cptab;
@@ -134,6 +143,7 @@ struct cfs_cpt_table *
 
kfree(part->cpt_nodemask);
free_cpumask_var(part->cpt_cpumask);
+   kvfree(part->cpt_distance);
}
 
kvfree(cptab->ctb_parts);
@@ -164,6 +174,7 @@ struct cfs_cpt_table *
 
kfree(part->cpt_nodemask);
free_cpumask_var(part->cpt_cpumask);
+   kvfree(part->cpt_distance);
}
 
kvfree(cptab->ctb_parts);
@@ -218,6 +229,44 @@ struct cfs_cpt_table *
 }
 EXPORT_SYMBOL(cfs_cpt_table_print);
 
+int cfs_cpt_distance_print(struct cfs_cpt_table *cptab, char *buf, int len)
+{
+   char *tmp = buf;
+   int r

[PATCH v2 11/25] staging: lustre: libcfs: provide debugfs files for distance handling

2018-05-29 Thread James Simmons
From: Amir Shehata 

On systems with large number of NUMA nodes and cores it is easy
to incorrectly configure their use with Lustre. Provide debugfs
files which can help track down any issues.

Signed-off-by: Amir Shehata 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-7734
Reviewed-on: http://review.whamcloud.com/18916
Reviewed-by: Olaf Weber 
Reviewed-by: Doug Oucharek 
Signed-off-by: James Simmons 
---
Changelog:

v1) Initial patch
v2) Rebased patch. No code changes from original patch

 drivers/staging/lustre/lnet/libcfs/module.c | 53 +
 1 file changed, 53 insertions(+)

diff --git a/drivers/staging/lustre/lnet/libcfs/module.c 
b/drivers/staging/lustre/lnet/libcfs/module.c
index b438d456..d2dfc29 100644
--- a/drivers/staging/lustre/lnet/libcfs/module.c
+++ b/drivers/staging/lustre/lnet/libcfs/module.c
@@ -468,6 +468,53 @@ static int proc_cpt_table(struct ctl_table *table, int 
write,
__proc_cpt_table);
 }
 
+static int __proc_cpt_distance(void *data, int write,
+  loff_t pos, void __user *buffer, int nob)
+{
+   char *buf = NULL;
+   int len = 4096;
+   int rc = 0;
+
+   if (write)
+   return -EPERM;
+
+   LASSERT(cfs_cpt_tab);
+
+   while (1) {
+   buf = kzalloc(len, GFP_KERNEL);
+   if (!buf)
+   return -ENOMEM;
+
+   rc = cfs_cpt_distance_print(cfs_cpt_tab, buf, len);
+   if (rc >= 0)
+   break;
+
+   if (rc == -EFBIG) {
+   kfree(buf);
+   len <<= 1;
+   continue;
+   }
+   goto out;
+   }
+
+   if (pos >= rc) {
+   rc = 0;
+   goto out;
+   }
+
+   rc = cfs_trace_copyout_string(buffer, nob, buf + pos, NULL);
+out:
+   kfree(buf);
+   return rc;
+}
+
+static int proc_cpt_distance(struct ctl_table *table, int write,
+void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+   return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
+   __proc_cpt_distance);
+}
+
 static struct ctl_table lnet_table[] = {
{
.procname = "debug",
@@ -497,6 +544,12 @@ static int proc_cpt_table(struct ctl_table *table, int 
write,
.proc_handler = _cpt_table,
},
{
+   .procname = "cpu_partition_distance",
+   .maxlen   = 128,
+   .mode = 0444,
+   .proc_handler = _cpt_distance,
+   },
+   {
.procname = "debug_log_upcall",
.data = lnet_debug_log_upcall,
.maxlen   = sizeof(lnet_debug_log_upcall),
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v2 05/25] staging: lustre: libcfs: replace MAX_NUMNODES with nr_node_ids

2018-05-29 Thread James Simmons
From: Amir Shehata 

Replace MAX_NUMNODES which is considered deprocated with
nr_nodes_ids. Looking at page_malloc.c you will see that
nr_nodes_ids is equal to MAX_NUMNODES. MAX_NUMNODES is
actually setup with Kconfig.

Signed-off-by: Amir Shehata 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-7734
Reviewed-on: http://review.whamcloud.com/18916
Reviewed-by: Olaf Weber 
Reviewed-by: Doug Oucharek 
Signed-off-by: James Simmons 
---
Changelog:

v1) Initial patch
v2) Same code but added in more details in commit message

 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c 
b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
index b67a60c..d3017e8 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
@@ -395,7 +395,7 @@ struct cfs_cpt_table *
 {
const cpumask_t *mask;
 
-   if (node < 0 || node >= MAX_NUMNODES) {
+   if (node < 0 || node >= nr_node_ids) {
CDEBUG(D_INFO,
   "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
return 0;
@@ -412,7 +412,7 @@ struct cfs_cpt_table *
 {
const cpumask_t *mask;
 
-   if (node < 0 || node >= MAX_NUMNODES) {
+   if (node < 0 || node >= nr_node_ids) {
CDEBUG(D_INFO,
   "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
return;
@@ -836,7 +836,7 @@ struct cfs_cpt_table *
return cptab;
}
 
-   high = node ? MAX_NUMNODES - 1 : nr_cpu_ids - 1;
+   high = node ? nr_node_ids - 1 : nr_cpu_ids - 1;
 
for (str = strim(pattern), c = 0;; c++) {
struct cfs_range_expr *range;
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v2 v2 07/25] staging: lustre: libcfs: replace num_possible_cpus() with nr_cpu_ids

2018-05-29 Thread James Simmons
From: Amir Shehata 

Move from num_possible_cpus() to nr_cpu_ids.

Signed-off-by: Amir Shehata 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-7734
Reviewed-on: http://review.whamcloud.com/18916
Reviewed-by: Olaf Weber 
Reviewed-by: Doug Oucharek 
Signed-off-by: James Simmons 
---
Changelog:

v1) Initial patch
v2) Rebased patch. Same code

 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c 
b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
index d9d1388..3f855a8 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
@@ -89,14 +89,14 @@ struct cfs_cpt_table *
if (!cptab->ctb_nodemask)
goto failed_alloc_nodemask;
 
-   cptab->ctb_cpu2cpt = kvmalloc_array(num_possible_cpus(),
+   cptab->ctb_cpu2cpt = kvmalloc_array(nr_cpu_ids,
sizeof(cptab->ctb_cpu2cpt[0]),
GFP_KERNEL);
if (!cptab->ctb_cpu2cpt)
goto failed_alloc_cpu2cpt;
 
memset(cptab->ctb_cpu2cpt, -1,
-  num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0]));
+  nr_cpu_ids * sizeof(cptab->ctb_cpu2cpt[0]));
 
cptab->ctb_parts = kvmalloc_array(ncpt, sizeof(cptab->ctb_parts[0]),
  GFP_KERNEL);
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v2 00/25] staging: lustre: libcfs: SMP rework

2018-05-29 Thread James Simmons
From: James Simmons 

Recently lustre support has been expanded to extreme machines with as
many as a 1000+ cores. On the other end lustre also has been ported
to platforms like ARM and KNL which have uniquie NUMA and core setup.
For example some devices exist that have NUMA nodes with no cores.
With these new platforms the limitations of the Lustre's SMP code
came to light so a lot of work was needed. This resulted in this
patch set which has been tested on these platforms.

Amir Shehata (8):
  staging: lustre: libcfs: replace MAX_NUMNODES with nr_node_ids
  staging: lustre: libcfs: remove excess space
  staging: lustre: libcfs: replace num_possible_cpus() with nr_cpu_ids
  staging: lustre: libcfs: NUMA support
  staging: lustre: libcfs: add cpu distance handling
  staging: lustre: libcfs: use distance in cpu and node handling
  staging: lustre: libcfs: provide debugfs files for distance handling
  staging: lustre: libcfs: invert error handling for cfs_cpt_table_print

Dmitry Eremin (15):
  staging: lustre: libcfs: remove useless CPU partition code
  staging: lustre: libcfs: rename variable i to cpu
  staging: lustre: libcfs: fix libcfs_cpu coding style
  staging: lustre: libcfs: use int type for CPT identification.
  staging: lustre: libcfs: rename i to node for cfs_cpt_set_nodemask
  staging: lustre: libcfs: rename i to cpu for cfs_cpt_bind
  staging: lustre: libcfs: rename cpumask_var_t variables to *_mask
  staging: lustre: libcfs: rename goto label in cfs_cpt_table_print
  staging: lustre: libcfs: update debug messages
  staging: lustre: libcfs: make tolerant to offline CPUs and empty NUMA nodes
  staging: lustre: libcfs: report NUMA node instead of just node
  staging: lustre: libcfs: update debug messages in CPT code
  staging: lustre: libcfs: rework CPU pattern parsing code
  staging: lustre: libcfs: change CPT estimate algorithm
  staging: lustre: ptlrpc: use current CPU instead of hardcoded 0

James Simmons (2):
  staging: lustre: libcfs: restore UMP handling
  staging: lustre: libcfs: properly handle failure cases in SMP code

 .../lustre/include/linux/libcfs/libcfs_cpu.h   | 225 +++--
 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c| 965 +++--
 drivers/staging/lustre/lnet/libcfs/module.c|  57 ++
 drivers/staging/lustre/lnet/lnet/lib-msg.c |   2 +
 drivers/staging/lustre/lustre/ptlrpc/service.c |  11 +-
 5 files changed, 728 insertions(+), 532 deletions(-)

-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v2 03/25] staging: lustre: libcfs: rename variable i to cpu

2018-05-29 Thread James Simmons
From: Dmitry Eremin 

Change the name of the variable i used for for_each_cpu() to cpu
for code readability.

Signed-off-by: Dmitry Eremin 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8703
Reviewed-on: https://review.whamcloud.com/23303
Reviewed-by: James Simmons 
Reviewed-by: Doug Oucharek 
Reviewed-by: Oleg Drokin 
Signed-off-by: James Simmons 
---
Changelog:

v1) Initial patch
v2) Rebased to handle recent cleanups in libcfs

 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c 
b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
index 951a9ca..34df7ed 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
@@ -340,7 +340,7 @@ struct cfs_cpt_table *
 cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt,
const cpumask_t *mask)
 {
-   int i;
+   int cpu;
 
if (!cpumask_weight(mask) ||
cpumask_any_and(mask, cpu_online_mask) >= nr_cpu_ids) {
@@ -349,8 +349,8 @@ struct cfs_cpt_table *
return 0;
}
 
-   for_each_cpu(i, mask) {
-   if (!cfs_cpt_set_cpu(cptab, cpt, i))
+   for_each_cpu(cpu, mask) {
+   if (!cfs_cpt_set_cpu(cptab, cpt, cpu))
return 0;
}
 
@@ -362,10 +362,10 @@ struct cfs_cpt_table *
 cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt,
  const cpumask_t *mask)
 {
-   int i;
+   int cpu;
 
-   for_each_cpu(i, mask)
-   cfs_cpt_unset_cpu(cptab, cpt, i);
+   for_each_cpu(cpu, mask)
+   cfs_cpt_unset_cpu(cptab, cpt, cpu);
 }
 EXPORT_SYMBOL(cfs_cpt_unset_cpumask);
 
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v2 3/6] staging: lustre: llite: remove unused parameters from md_{get, set}xattr()

2018-05-29 Thread James Simmons
From: "John L. Hammond" 

md_getxattr() and md_setxattr() each have several unused
parameters. Remove them and improve the naming or remaining
parameters.

Signed-off-by: John L. Hammond 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-10792
Reviewed-on: https://review.whamcloud.com/
Reviewed-by: Dmitry Eremin 
Reviewed-by: James Simmons 
Signed-off-by: James Simmons 
---
Changelog:

v1) Initial patch

v2) Rebased to new parent patch

v3) Rebased againt to new parent patch using acl.c file

 drivers/staging/lustre/lustre/include/obd.h   |  7 ++---
 drivers/staging/lustre/lustre/include/obd_class.h | 21 ++
 drivers/staging/lustre/lustre/llite/acl.c |  2 +-
 drivers/staging/lustre/lustre/llite/file.c|  3 +-
 drivers/staging/lustre/lustre/llite/xattr.c   |  6 ++--
 drivers/staging/lustre/lustre/lmv/lmv_obd.c   | 22 +++
 drivers/staging/lustre/lustre/mdc/mdc_request.c   | 34 +--
 7 files changed, 46 insertions(+), 49 deletions(-)

diff --git a/drivers/staging/lustre/lustre/include/obd.h 
b/drivers/staging/lustre/lustre/include/obd.h
index da99a0f..b1907bb 100644
--- a/drivers/staging/lustre/lustre/include/obd.h
+++ b/drivers/staging/lustre/lustre/include/obd.h
@@ -940,12 +940,11 @@ struct md_ops {
  struct ptlrpc_request **);
 
int (*setxattr)(struct obd_export *, const struct lu_fid *,
-   u64, const char *, const char *, int, int, int, __u32,
-   struct ptlrpc_request **);
+   u64, const char *, const void *, size_t, unsigned int,
+   u32, struct ptlrpc_request **);
 
int (*getxattr)(struct obd_export *, const struct lu_fid *,
-   u64, const char *, const char *, int, int, int,
-   struct ptlrpc_request **);
+   u64, const char *, size_t, struct ptlrpc_request **);
 
int (*init_ea_size)(struct obd_export *, u32, u32);
 
diff --git a/drivers/staging/lustre/lustre/include/obd_class.h 
b/drivers/staging/lustre/lustre/include/obd_class.h
index a3b1465..fc9c772 100644
--- a/drivers/staging/lustre/lustre/include/obd_class.h
+++ b/drivers/staging/lustre/lustre/include/obd_class.h
@@ -1385,29 +1385,26 @@ static inline int md_merge_attr(struct obd_export *exp,
 }
 
 static inline int md_setxattr(struct obd_export *exp, const struct lu_fid *fid,
- u64 valid, const char *name,
- const char *input, int input_size,
- int output_size, int flags, __u32 suppgid,
+ u64 obd_md_valid, const char *name,
+ const char *value, size_t value_size,
+ unsigned int xattr_flags, u32 suppgid,
  struct ptlrpc_request **request)
 {
EXP_CHECK_MD_OP(exp, setxattr);
EXP_MD_COUNTER_INCREMENT(exp, setxattr);
-   return MDP(exp->exp_obd, setxattr)(exp, fid, valid, name, input,
-  input_size, output_size, flags,
+   return MDP(exp->exp_obd, setxattr)(exp, fid, obd_md_valid, name,
+  value, value_size, xattr_flags,
   suppgid, request);
 }
 
 static inline int md_getxattr(struct obd_export *exp, const struct lu_fid *fid,
- u64 valid, const char *name,
- const char *input, int input_size,
- int output_size, int flags,
- struct ptlrpc_request **request)
+ u64 obd_md_valid, const char *name,
+ size_t buf_size, struct ptlrpc_request **req)
 {
EXP_CHECK_MD_OP(exp, getxattr);
EXP_MD_COUNTER_INCREMENT(exp, getxattr);
-   return MDP(exp->exp_obd, getxattr)(exp, fid, valid, name, input,
-  input_size, output_size, flags,
-  request);
+   return MDP(exp->exp_obd, getxattr)(exp, fid, obd_md_valid, name,
+  buf_size, req);
 }
 
 static inline int md_set_open_replay_data(struct obd_export *exp,
diff --git a/drivers/staging/lustre/lustre/llite/acl.c 
b/drivers/staging/lustre/lustre/llite/acl.c
index de1499b..2ee9ff9 100644
--- a/drivers/staging/lustre/lustre/llite/acl.c
+++ b/drivers/staging/lustre/lustre/llite/acl.c
@@ -94,7 +94,7 @@ int ll_set_acl(struct inode *inode, struct posix_acl *acl, 
int type)
 
rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode),
 value ? OBD_MD_FLXATTR : OBD_MD_FLXATTRRM,
-name, value, value_size, 0, 0, 0, );
+name, value, value_size, 0, 0, );
 
ptlrpc_req_finished(req);
 out_value:
diff --git a/drivers/staging/lustre/lustr

[PATCH v2 06/25] staging: lustre: libcfs: remove excess space

2018-05-29 Thread James Simmons
From: Amir Shehata 

The function cfs_cpt_table_print() was adding two spaces
to the string buffer. Just add it once.

Signed-off-by: Amir Shehata 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-7734
Reviewed-on: http://review.whamcloud.com/18916
Reviewed-by: Olaf Weber 
Reviewed-by: Doug Oucharek 
Reviewed-by: Oleg Drokin 
Signed-off-by: James Simmons 
---
Changelog:

v1) Initial patch
v2) Rebased patch. Same code

 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c 
b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
index d3017e8..d9d1388 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
@@ -173,7 +173,7 @@ struct cfs_cpt_table *
 
for (i = 0; i < cptab->ctb_nparts; i++) {
if (len > 0) {
-   rc = snprintf(tmp, len, "%d\t: ", i);
+   rc = snprintf(tmp, len, "%d\t:", i);
len -= rc;
}
 
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v2 02/25] staging: lustre: libcfs: remove useless CPU partition code

2018-05-29 Thread James Simmons
From: Dmitry Eremin 

* remove scratch buffer and mutex which guard it.
* remove global cpumask and spinlock which guard it.
* remove cpt_version for checking CPUs state change during setup
  because of just disable CPUs state change during setup.
* remove whole global struct cfs_cpt_data cpt_data.
* remove few unused APIs.

Signed-off-by: Dmitry Eremin 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8703
Reviewed-on: https://review.whamcloud.com/23303
Reviewed-on: https://review.whamcloud.com/25048
Reviewed-by: James Simmons 
Reviewed-by: Doug Oucharek 
Reviewed-by: Andreas Dilger 
Reviewed-by: Olaf Weber 
Reviewed-by: Oleg Drokin 
Signed-off-by: James Simmons 
---
Changelog:

v1) Initial patch
v2) Rebased to handle recent cleanups in libcfs

 .../lustre/include/linux/libcfs/libcfs_cpu.h   |  32 ++
 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c| 115 +++--
 2 files changed, 22 insertions(+), 125 deletions(-)

diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h 
b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
index 2ad12a6..3626969 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
@@ -95,8 +95,6 @@ struct cfs_cpu_partition {
 /** descriptor for CPU partitions */
 struct cfs_cpt_table {
 #ifdef CONFIG_SMP
-   /* version, reserved for hotplug */
-   unsigned intctb_version;
/* spread rotor for NUMA allocator */
unsigned intctb_spread_rotor;
/* # of CPU partitions */
@@ -176,12 +174,12 @@ struct cfs_cpt_table {
  * return 1 if successfully set all CPUs, otherwise return 0
  */
 int cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab,
-   int cpt, cpumask_t *mask);
+   int cpt, const cpumask_t *mask);
 /**
  * remove all cpus in \a mask from CPU partition \a cpt
  */
 void cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab,
-  int cpt, cpumask_t *mask);
+  int cpt, const cpumask_t *mask);
 /**
  * add all cpus in NUMA node \a node to CPU partition \a cpt
  * return 1 if successfully set all CPUs, otherwise return 0
@@ -204,20 +202,11 @@ int cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab,
 void cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab,
int cpt, nodemask_t *mask);
 /**
- * unset all cpus for CPU partition \a cpt
- */
-void cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt);
-/**
  * convert partition id \a cpt to numa node id, if there are more than one
  * nodes in this partition, it might return a different node id each time.
  */
 int cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt);
 
-/**
- * return number of HTs in the same core of \a cpu
- */
-int cfs_cpu_ht_nsiblings(int cpu);
-
 int  cfs_cpu_init(void);
 void cfs_cpu_fini(void);
 
@@ -304,13 +293,15 @@ static inline int cfs_cpt_table_print(struct 
cfs_cpt_table *cptab,
 }
 
 static inline int
-cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
+cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt,
+   const cpumask_t *mask)
 {
return 1;
 }
 
 static inline void
-cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
+cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt,
+ const cpumask_t *mask)
 {
 }
 
@@ -336,11 +327,6 @@ static inline int cfs_cpt_table_print(struct cfs_cpt_table 
*cptab,
 {
 }
 
-static inline void
-cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt)
-{
-}
-
 static inline int
 cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt)
 {
@@ -348,12 +334,6 @@ static inline int cfs_cpt_table_print(struct cfs_cpt_table 
*cptab,
 }
 
 static inline int
-cfs_cpu_ht_nsiblings(int cpu)
-{
-   return 1;
-}
-
-static inline int
 cfs_cpt_current(struct cfs_cpt_table *cptab, int remap)
 {
return 0;
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c 
b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
index 803fc58..951a9ca 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
@@ -69,19 +69,6 @@
 module_param(cpu_pattern, charp, 0444);
 MODULE_PARM_DESC(cpu_pattern, "CPU partitions pattern");
 
-static struct cfs_cpt_data {
-   /* serialize hotplug etc */
-   spinlock_t  cpt_lock;
-   /* reserved for hotplug */
-   unsigned long   cpt_version;
-   /* mutex to protect cpt_cpumask */
-   struct mutexcpt_mutex;
-   /* scratch buffer for set/unset_node */
-   cpumask_var_t   cpt_cpumask;
-} cpt_data;
-
-#define CFS_CPU_VERSION_MAGIC 0xbabecafe
-
 struct cfs_cpt_table *
 cfs_cpt_table_alloc(unsigned int ncpt)
 {
@@ -124,11 +111,6 @@ struct cfs_cpt_table *
goto failed;
}
 
-   spin_lock(_dat

[PATCH v2 6/6] staging: lustre: mdc: use large xattr buffers for old servers

2018-05-29 Thread James Simmons
From: "John L. Hammond" 

Pre 2.10.1 MDTs will crash when they receive a listxattr (MDS_GETXATTR
with OBD_MD_FLXATTRLS) RPC for an orphan or dead object. So for
clients connected to these older MDTs, try to avoid sending listxattr
RPCs by making the bulk getxattr (MDS_GETXATTR with OBD_MD_FLXATTRALL)
more likely to succeed and thereby reducing the chances of falling
back to listxattr.

Signed-off-by: John L. Hammond 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-10912
Reviewed-on: https://review.whamcloud.com/31990
Reviewed-by: Andreas Dilger 
Reviewed-by: Fan Yong 
Reviewed-by: Oleg Drokin 
Signed-off-by: James Simmons 
---
Changelog:

v1) Initial patch
v2) Rebased patch. No changes

 drivers/staging/lustre/lustre/mdc/mdc_locks.c | 31 +--
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/drivers/staging/lustre/lustre/mdc/mdc_locks.c 
b/drivers/staging/lustre/lustre/mdc/mdc_locks.c
index a8aa0fa..b991c6f 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_locks.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_locks.c
@@ -326,8 +326,10 @@ static void mdc_realloc_openmsg(struct ptlrpc_request *req,
 {
struct ptlrpc_request   *req;
struct ldlm_intent  *lit;
+   u32 min_buf_size = 0;
int rc, count = 0;
LIST_HEAD(cancels);
+   u32 buf_size = 0;
 
req = ptlrpc_request_alloc(class_exp2cliimp(exp),
   _LDLM_INTENT_GETXATTR);
@@ -344,18 +346,33 @@ static void mdc_realloc_openmsg(struct ptlrpc_request 
*req,
lit = req_capsule_client_get(>rq_pill, _LDLM_INTENT);
lit->opc = IT_GETXATTR;
 
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 53, 0)
+   /* If the supplied buffer is too small then the server will
+* return -ERANGE and llite will fallback to using non cached
+* xattr operations. On servers before 2.10.1 a (non-cached)
+* listxattr RPC for an orphan or dead file causes an oops. So
+* let's try to avoid sending too small a buffer to too old a
+* server. This is effectively undoing the memory conservation
+* of LU-9417 when it would be *more* likely to crash the
+* server. See LU-9856.
+*/
+   if (exp->exp_connect_data.ocd_version < OBD_OCD_VERSION(2, 10, 1, 0))
+   min_buf_size = exp->exp_connect_data.ocd_max_easize;
+#endif
+   buf_size = max_t(u32, min_buf_size,
+GA_DEFAULT_EA_NAME_LEN * GA_DEFAULT_EA_NUM);
+
/* pack the intended request */
-   mdc_pack_body(req, _data->op_fid1, op_data->op_valid,
- GA_DEFAULT_EA_NAME_LEN * GA_DEFAULT_EA_NUM, -1, 0);
+   mdc_pack_body(req, _data->op_fid1, op_data->op_valid, buf_size,
+ -1, 0);
 
-   req_capsule_set_size(>rq_pill, _EADATA, RCL_SERVER,
-GA_DEFAULT_EA_NAME_LEN * GA_DEFAULT_EA_NUM);
+   req_capsule_set_size(>rq_pill, _EADATA, RCL_SERVER, buf_size);
 
-   req_capsule_set_size(>rq_pill, _EAVALS, RCL_SERVER,
-GA_DEFAULT_EA_NAME_LEN * GA_DEFAULT_EA_NUM);
+   req_capsule_set_size(>rq_pill, _EAVALS, RCL_SERVER, buf_size);
 
req_capsule_set_size(>rq_pill, _EAVALS_LENS, RCL_SERVER,
-sizeof(u32) * GA_DEFAULT_EA_NUM);
+max_t(u32, min_buf_size,
+  sizeof(u32) * GA_DEFAULT_EA_NUM));
 
req_capsule_set_size(>rq_pill, _ACL, RCL_SERVER, 0);
 
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v2 1/6] staging: lustre: llite: create acl.c file

2018-05-29 Thread James Simmons
Move ll_get_acl() to its own file acl.c just like all the other
linux file systems do.

Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-6142
Signed-off-by: James Simmons 
---
Changelog:

v1) Initial patch to add acl.c file which contains acl handling
for lustre

 drivers/staging/lustre/lustre/llite/Makefile   |  2 +
 drivers/staging/lustre/lustre/llite/acl.c  | 51 ++
 drivers/staging/lustre/lustre/llite/file.c | 13 --
 .../staging/lustre/lustre/llite/llite_internal.h   |  5 +++
 4 files changed, 58 insertions(+), 13 deletions(-)
 create mode 100644 drivers/staging/lustre/lustre/llite/acl.c

diff --git a/drivers/staging/lustre/lustre/llite/Makefile 
b/drivers/staging/lustre/lustre/llite/Makefile
index 519fd74..5200924 100644
--- a/drivers/staging/lustre/lustre/llite/Makefile
+++ b/drivers/staging/lustre/lustre/llite/Makefile
@@ -9,3 +9,5 @@ lustre-y := dcache.o dir.o file.o llite_lib.o llite_nfs.o \
super25.o statahead.o glimpse.o lcommon_cl.o lcommon_misc.o \
vvp_dev.o vvp_page.o vvp_lock.o vvp_io.o vvp_object.o \
lproc_llite.o
+
+lustre-$(CONFIG_FS_POSIX_ACL) += acl.o
diff --git a/drivers/staging/lustre/lustre/llite/acl.c 
b/drivers/staging/lustre/lustre/llite/acl.c
new file mode 100644
index 000..d7c3bf9
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/acl.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2015, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/llite/acl.c
+ */
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include "llite_internal.h"
+
+struct posix_acl *ll_get_acl(struct inode *inode, int type)
+{
+   struct ll_inode_info *lli = ll_i2info(inode);
+   struct posix_acl *acl = NULL;
+
+   spin_lock(>lli_lock);
+   /* VFS' acl_permission_check->check_acl will release the refcount */
+   acl = posix_acl_dup(lli->lli_posix_acl);
+   spin_unlock(>lli_lock);
+
+   return acl;
+}
diff --git a/drivers/staging/lustre/lustre/llite/file.c 
b/drivers/staging/lustre/lustre/llite/file.c
index a77cadc..ccbf91b 100644
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ b/drivers/staging/lustre/lustre/llite/file.c
@@ -3030,19 +3030,6 @@ static int ll_fiemap(struct inode *inode, struct 
fiemap_extent_info *fieinfo,
return rc;
 }
 
-struct posix_acl *ll_get_acl(struct inode *inode, int type)
-{
-   struct ll_inode_info *lli = ll_i2info(inode);
-   struct posix_acl *acl = NULL;
-
-   spin_lock(>lli_lock);
-   /* VFS' acl_permission_check->check_acl will release the refcount */
-   acl = posix_acl_dup(lli->lli_posix_acl);
-   spin_unlock(>lli_lock);
-
-   return acl;
-}
-
 int ll_inode_permission(struct inode *inode, int mask)
 {
struct ll_sb_info *sbi;
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h 
b/drivers/staging/lustre/lustre/llite/llite_internal.h
index 379d88e..bdb1564 100644
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ b/drivers/staging/lustre/lustre/llite/llite_internal.h
@@ -754,7 +754,12 @@ enum ldlm_mode ll_take_md_lock(struct inode *inode, __u64 
bits,
 int ll_md_real_close(struct inode *inode, fmode_t fmode);
 int ll_getattr(const struct path *path, struct kstat *stat,
   u32 request_mask, unsigned int flags);
+#ifdef CONFIG_FS_POSIX_ACL
 struct posix_acl *ll_get_acl(struct inode *inode, int type);
+#else
+#define ll_get_acl NULL
+#endif /* CONFIG_FS_POSIX_ACL */
+
 int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
   const char *name, int namelen);
 int ll_get_fid_by_name(struct inode *parent, const char *name,
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v2 4/6] staging: lustre: acl: increase ACL entries limitation

2018-05-29 Thread James Simmons
From: Fan Yong 

Originally, the limitation of ACL entries is 32, that is not
enough for some use cases. In fact, restricting ACL entries
count is mainly for preparing the RPC reply buffer to receive
the ACL data. So we cannot make the ACL entries count to be
unlimited. But we can enlarge the RPC reply buffer to hold
more ACL entries. On the other hand, MDT backend filesystem
has its own EA size limitation. For example, for ldiskfs case,
if large EA enable, then the max ACL size is 1048492 bytes;
otherwise, it is 4012 bytes. For ZFS backend, such value is
32768 bytes. With such hard limitation, we can calculate how
many ACL entries we can have at most. This patch increases
the RPC reply buffer to match such hard limitation. For old
client, to avoid buffer overflow because of large ACL data
(more than 32 ACL entries), the MDT will forbid the old client
to access the file with large ACL data. As for how to know
whether it is old client or new, a new connection flag
OBD_CONNECT_LARGE_ACL is used for that.

Signed-off-by: Fan Yong 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-7473
Reviewed-on: https://review.whamcloud.com/19790
Reviewed-by: Andreas Dilger 
Reviewed-by: Li Xi 
Reviewed-by: Lai Siyao 
Reviewed-by: Oleg Drokin 
Signed-off-by: James Simmons 
---

Changelog:

v1) Initial patch
v2) Rebased patch. No changes

 drivers/staging/lustre/include/uapi/linux/lustre/lustre_idl.h | 2 +-
 drivers/staging/lustre/lustre/include/lustre_acl.h| 7 ++-
 drivers/staging/lustre/lustre/llite/llite_lib.c   | 3 ++-
 drivers/staging/lustre/lustre/mdc/mdc_locks.c | 6 ++
 drivers/staging/lustre/lustre/mdc/mdc_reint.c | 2 ++
 drivers/staging/lustre/lustre/mdc/mdc_request.c   | 4 
 drivers/staging/lustre/lustre/ptlrpc/layout.c | 4 +---
 drivers/staging/lustre/lustre/ptlrpc/wiretest.c   | 4 ++--
 8 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_idl.h 
b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_idl.h
index aac98db..8778c6f 100644
--- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_idl.h
+++ b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_idl.h
@@ -615,7 +615,7 @@ struct ptlrpc_body_v2 {
 #define OBD_CONNECT_REQPORTAL   0x40ULL /*Separate non-IO req portal */
 #define OBD_CONNECT_ACL 0x80ULL /*access control lists 
*/
 #define OBD_CONNECT_XATTR  0x100ULL /*client use extended attr */
-#define OBD_CONNECT_CROW   0x200ULL /*MDS+OST create obj on write*/
+#define OBD_CONNECT_LARGE_ACL  0x200ULL /* more than 32 ACL entries */
 #define OBD_CONNECT_TRUNCLOCK  0x400ULL /*locks on server for punch */
 #define OBD_CONNECT_TRANSNO0x800ULL /*replay sends init transno */
 #define OBD_CONNECT_IBITS 0x1000ULL /*support for inodebits locks*/
diff --git a/drivers/staging/lustre/lustre/include/lustre_acl.h 
b/drivers/staging/lustre/lustre/include/lustre_acl.h
index 35ff61c..e7575a1 100644
--- a/drivers/staging/lustre/lustre/include/lustre_acl.h
+++ b/drivers/staging/lustre/lustre/include/lustre_acl.h
@@ -36,11 +36,16 @@
 
 #include 
 #include 
+#ifdef CONFIG_FS_POSIX_ACL
 #include 
 
 #define LUSTRE_POSIX_ACL_MAX_ENTRIES   32
-#define LUSTRE_POSIX_ACL_MAX_SIZE  
\
+#define LUSTRE_POSIX_ACL_MAX_SIZE_OLD  
\
(sizeof(struct posix_acl_xattr_header) +
\
 LUSTRE_POSIX_ACL_MAX_ENTRIES * sizeof(struct posix_acl_xattr_entry))
 
+#else /* ! CONFIG_FS_POSIX_ACL */
+#define LUSTRE_POSIX_ACL_MAX_SIZE_OLD 0
+#endif /* CONFIG_FS_POSIX_ACL */
+
 #endif
diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c 
b/drivers/staging/lustre/lustre/llite/llite_lib.c
index 1bc0782..36066c8 100644
--- a/drivers/staging/lustre/lustre/llite/llite_lib.c
+++ b/drivers/staging/lustre/lustre/llite/llite_lib.c
@@ -199,7 +199,8 @@ static int client_common_fill_super(struct super_block *sb, 
char *md, char *dt)
if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
 #ifdef CONFIG_FS_POSIX_ACL
-   data->ocd_connect_flags |= OBD_CONNECT_ACL | OBD_CONNECT_UMASK;
+   data->ocd_connect_flags |= OBD_CONNECT_ACL | OBD_CONNECT_UMASK |
+  OBD_CONNECT_LARGE_ACL;
 #endif
 
if (OBD_FAIL_CHECK(OBD_FAIL_MDC_LIGHTWEIGHT))
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_locks.c 
b/drivers/staging/lustre/lustre/mdc/mdc_locks.c
index 253a545..65a5341 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_locks.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_locks.c
@@ -308,6 +308,8 @@ static void mdc_realloc_openmsg(struct ptlrpc_request *req,
 
req_capsule_set_size(>rq_pill, _MDT_MD, RCL_SERVER,

[PATCH v2 2/6] staging: lustre: llite: add support set_acl method in inode operations

2018-05-29 Thread James Simmons
From: Dmitry Eremin 

Linux kernel v3.14 adds set_acl method to inode operations.
This patch adds support to Lustre for proper acl management.

Signed-off-by: Dmitry Eremin 
Signed-off-by: John L. Hammond 
Signed-off-by: James Simmons 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-9183
Reviewed-on: https://review.whamcloud.com/25965
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-10541
Reviewed-on: https://review.whamcloud.com/31588
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-10926
Reviewed-on: https://review.whamcloud.com/32045
Reviewed-by: Bob Glossman 
Reviewed-by: James Simmons 
Reviewed-by: Andreas Dilger 
Reviewed-by: Dmitry Eremin 
Reviewed-by: Oleg Drokin 
Signed-off-by: James Simmons 
---
Changelog:

v1) Initial ported patch

v2) Updated patch with fixes that address issues pointed out by
   Can Carpenter

v3) Rebased to contain new code in acl.c

 drivers/staging/lustre/lustre/llite/acl.c  | 57 ++
 .../staging/lustre/lustre/llite/llite_internal.h   |  2 +
 2 files changed, 59 insertions(+)

diff --git a/drivers/staging/lustre/lustre/llite/acl.c 
b/drivers/staging/lustre/lustre/llite/acl.c
index d7c3bf9..de1499b 100644
--- a/drivers/staging/lustre/lustre/llite/acl.c
+++ b/drivers/staging/lustre/lustre/llite/acl.c
@@ -49,3 +49,60 @@ struct posix_acl *ll_get_acl(struct inode *inode, int type)
 
return acl;
 }
+
+int ll_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+{
+   struct ll_sb_info *sbi = ll_i2sbi(inode);
+   struct ptlrpc_request *req = NULL;
+   const char *name = NULL;
+   size_t value_size = 0;
+   char *value = NULL;
+   int rc = 0;
+
+   switch (type) {
+   case ACL_TYPE_ACCESS:
+   name = XATTR_NAME_POSIX_ACL_ACCESS;
+   if (acl)
+   rc = posix_acl_update_mode(inode, >i_mode, );
+   break;
+
+   case ACL_TYPE_DEFAULT:
+   name = XATTR_NAME_POSIX_ACL_DEFAULT;
+   if (!S_ISDIR(inode->i_mode))
+   rc = acl ? -EACCES : 0;
+   break;
+
+   default:
+   rc = -EINVAL;
+   break;
+   }
+   if (rc)
+   return rc;
+
+   if (acl) {
+   value_size = posix_acl_xattr_size(acl->a_count);
+   value = kmalloc(value_size, GFP_NOFS);
+   if (!value) {
+   rc = -ENOMEM;
+   goto out;
+   }
+
+   rc = posix_acl_to_xattr(_user_ns, acl, value, value_size);
+   if (rc < 0)
+   goto out_value;
+   }
+
+   rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode),
+value ? OBD_MD_FLXATTR : OBD_MD_FLXATTRRM,
+name, value, value_size, 0, 0, 0, );
+
+   ptlrpc_req_finished(req);
+out_value:
+   kfree(value);
+out:
+   if (rc)
+   forget_cached_acl(inode, type);
+   else
+   set_cached_acl(inode, type, acl);
+   return rc;
+}
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h 
b/drivers/staging/lustre/lustre/llite/llite_internal.h
index bdb1564..c08a6e1 100644
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ b/drivers/staging/lustre/lustre/llite/llite_internal.h
@@ -756,8 +756,10 @@ int ll_getattr(const struct path *path, struct kstat *stat,
   u32 request_mask, unsigned int flags);
 #ifdef CONFIG_FS_POSIX_ACL
 struct posix_acl *ll_get_acl(struct inode *inode, int type);
+int ll_set_acl(struct inode *inode, struct posix_acl *acl, int type);
 #else
 #define ll_get_acl NULL
+#define ll_set_acl NULL
 #endif /* CONFIG_FS_POSIX_ACL */
 
 int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v2 5/6] staging: lustre: mdc: excessive memory consumption by the xattr cache

2018-05-29 Thread James Simmons
From: Andrew Perepechko 

The refill operation of the xattr cache does not know the
reply size in advance, so it makes a guess based on
the maxeasize value returned by the MDS.

In practice, it allocates 16 KiB for the common case and
4 MiB for the large xattr case. However, a typical reply
is just a few hundred bytes.

If we follow the conservative approach, we can prepare a
single memory page for the reply. It is large enough for
any reasonable xattr set and, at the same time, it does
not require multiple page memory reclaim, which can be
costly.

If, for a specific file, the reply is larger than a single
page, the client is prepared to handle that and will fall back
to non-cached xattr code. Indeed, if this happens often and
xattrs are often used to store large values, it makes sense to
disable the xattr cache at all since it wasn't designed for
such [mis]use.

Signed-off-by: Andrew Perepechko 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-9417
Reviewed-on: https://review.whamcloud.com/26887
Reviewed-by: Fan Yong 
Reviewed-by: Ben Evans 
Reviewed-by: Oleg Drokin 
Signed-off-by: James Simmons 
---
Changelog:

v1) Initial patch
v2) rebased patch. No changes

 drivers/staging/lustre/lustre/mdc/mdc_locks.c | 23 +--
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/staging/lustre/lustre/mdc/mdc_locks.c 
b/drivers/staging/lustre/lustre/mdc/mdc_locks.c
index 65a5341..a8aa0fa 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_locks.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_locks.c
@@ -315,6 +315,10 @@ static void mdc_realloc_openmsg(struct ptlrpc_request *req,
return req;
 }
 
+#define GA_DEFAULT_EA_NAME_LEN 20
+#define GA_DEFAULT_EA_VAL_LEN  250
+#define GA_DEFAULT_EA_NUM  10
+
 static struct ptlrpc_request *
 mdc_intent_getxattr_pack(struct obd_export *exp,
 struct lookup_intent *it,
@@ -323,7 +327,6 @@ static void mdc_realloc_openmsg(struct ptlrpc_request *req,
struct ptlrpc_request   *req;
struct ldlm_intent  *lit;
int rc, count = 0;
-   u32 maxdata;
LIST_HEAD(cancels);
 
req = ptlrpc_request_alloc(class_exp2cliimp(exp),
@@ -341,20 +344,20 @@ static void mdc_realloc_openmsg(struct ptlrpc_request 
*req,
lit = req_capsule_client_get(>rq_pill, _LDLM_INTENT);
lit->opc = IT_GETXATTR;
 
-   maxdata = class_exp2cliimp(exp)->imp_connect_data.ocd_max_easize;
-
/* pack the intended request */
-   mdc_pack_body(req, _data->op_fid1, op_data->op_valid, maxdata, -1,
- 0);
+   mdc_pack_body(req, _data->op_fid1, op_data->op_valid,
+ GA_DEFAULT_EA_NAME_LEN * GA_DEFAULT_EA_NUM, -1, 0);
 
-   req_capsule_set_size(>rq_pill, _EADATA, RCL_SERVER, maxdata);
+   req_capsule_set_size(>rq_pill, _EADATA, RCL_SERVER,
+GA_DEFAULT_EA_NAME_LEN * GA_DEFAULT_EA_NUM);
 
-   req_capsule_set_size(>rq_pill, _EAVALS, RCL_SERVER, maxdata);
+   req_capsule_set_size(>rq_pill, _EAVALS, RCL_SERVER,
+GA_DEFAULT_EA_NAME_LEN * GA_DEFAULT_EA_NUM);
 
-   req_capsule_set_size(>rq_pill, _EAVALS_LENS,
-RCL_SERVER, maxdata);
+   req_capsule_set_size(>rq_pill, _EAVALS_LENS, RCL_SERVER,
+sizeof(u32) * GA_DEFAULT_EA_NUM);
 
-   req_capsule_set_size(>rq_pill, _ACL, RCL_SERVER, maxdata);
+   req_capsule_set_size(>rq_pill, _ACL, RCL_SERVER, 0);
 
ptlrpc_request_set_replen(req);
 
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v2 0/6] staging: lustre: llite: remaining xattr patches

2018-05-29 Thread James Simmons
From: James Simmons 

Fixed the bugs in the set_acl patch pointed out by Dan Carpenter.
Rebased the next patch 'remove unused parameter..." on the parent
patch. Created new acl.c file to match what other linx kernel file
systems do. Added newer xattr fixes that were recently pushed.

Andrew Perepechko (1):
  staging: lustre: mdc: excessive memory consumption by the xattr cache

Dmitry Eremin (1):
  staging: lustre: llite: add support set_acl method in inode operations

Fan Yong (1):
  staging: lustre: acl: increase ACL entries limitation

James Simmons (1):
  staging: lustre: llite: create acl.c file

John L. Hammond (2):
  staging: lustre: llite: remove unused parameters from md_{get,set}xattr()
  staging: lustre: mdc: use large xattr buffers for old servers

 .../lustre/include/uapi/linux/lustre/lustre_idl.h  |   2 +-
 drivers/staging/lustre/lustre/include/lustre_acl.h |   7 +-
 drivers/staging/lustre/lustre/include/obd.h|   7 +-
 drivers/staging/lustre/lustre/include/obd_class.h  |  21 ++--
 drivers/staging/lustre/lustre/llite/Makefile   |   2 +
 drivers/staging/lustre/lustre/llite/acl.c  | 108 +
 drivers/staging/lustre/lustre/llite/file.c |  16 +--
 .../staging/lustre/lustre/llite/llite_internal.h   |   7 ++
 drivers/staging/lustre/lustre/llite/llite_lib.c|   3 +-
 drivers/staging/lustre/lustre/llite/xattr.c|   6 +-
 drivers/staging/lustre/lustre/lmv/lmv_obd.c|  22 ++---
 drivers/staging/lustre/lustre/mdc/mdc_locks.c  |  42 ++--
 drivers/staging/lustre/lustre/mdc/mdc_reint.c  |   2 +
 drivers/staging/lustre/lustre/mdc/mdc_request.c|  38 +---
 drivers/staging/lustre/lustre/ptlrpc/layout.c  |   4 +-
 drivers/staging/lustre/lustre/ptlrpc/wiretest.c|   4 +-
 16 files changed, 214 insertions(+), 77 deletions(-)
 create mode 100644 drivers/staging/lustre/lustre/llite/acl.c
--
Changelog:

v1) Initial patch set with fixes to address issues pointed by Dan.
v2) Created new acl.c file and rebased the patches due to that change

-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH 4/4] staging: lustre: obdclass: change object lookup to no wait mode

2018-05-16 Thread James Simmons

> > > Anyway, I understand that Intel has been ignoring kernel.org instead of
> > > sending forwarding their patches properly so you're doing a difficult
> > > and thankless job...  Thanks for that.  I'm sure it's frustrating to
> > > look at these patches for you as well.
> > 
> > Thank you for the complement. Also thank you for taking time to review
> > these patches. Your feedback is most welcomed and benefitical to the
> > health of the lustre client.
> > 
> > Sadly its not just Intel but other vendors that don't directly contribute
> > to the linux lustre client. I have spoke to the vendors about contributing 
> > and they all say the same thing. No working with drivers in the staging 
> > tree. Sadly all the parties involved are very interested in the success 
> > of the lustre client. No one has ever told me directly why they don't get
> > involved but I suspect it has to deal with 2 reasons. One is that staging
> > drivers are not normally enabled by distributions so their clients 
> > normally will never deal with the staging lustre client. Secondly vendors
> > just lack the man power to contribute in a meanful way.
> 
> If staging is hurting you, why is it in staging at all?  Why not just
> drop it, go off and spend a few months to clean up all the issues in
> your own tree (with none of those pesky requirements of easy-to-review
> patches) and then submit a "clean" filesystem for inclusion in the
> "real" part of the kernel tree?
> 
> It doesn't sound like anyone is actually using this code in the tree
> as-is, so why even keep it here?

I never said being in staging is hurting the progression of Lustre. In 
fact it is the exact opposite otherwise I wouldn't be active in this work.
What I was pointing out to Dan was that many vendors are reluctant to 
partcipate in broader open source development of this type.

The whole point of this is to evolve Lustre into a proper open source 
project not dependent on vendors for survival. Several years ago Lustre 
changed hands several times and the HPC community was worried about its
survival. Various institutions band togther to raise the resources to 
keep it alive. Over time Lustre has been migrating to a more open source 
community effort. An awesome example is the work the University of Indiana 
did for the sptlrpc layer. Now we see efforts expanding into the realm of 
the linux lustre client. Actually HPC sites that are community members are 
testing and running the linux client. In spite of the lack of vendor 
involvement the linux lustre client is making excellent progress. How 
often do you see style patches anymore? The headers are properly split
between userspace UAPI headers and kernel space. One of the major barriers
to leave staging was the the lack of a strong presence to continue moving
the lustre client forward. That is no longer the case. The finish line is
in view.
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH 4/4] staging: lustre: obdclass: change object lookup to no wait mode

2018-05-15 Thread James Simmons

> > /*
> >  * Allocate new object. This may result in rather complicated
> >  * operations, including fld queries, inode loading, etc.
> >  */
> > o = lu_object_alloc(env, dev, f, conf);
> > -   if (IS_ERR(o))
> > +   if (unlikely(IS_ERR(o)))
> > return o;
> >  
> 
> This is an unrelated and totally pointless.  likely/unlikely annotations
> hurt readability, and they should only be added if it's something which
> is going to show up in benchmarking.  lu_object_alloc() is already too
> slow for the unlikely() to make a difference and anyway IS_ERR() has an
> unlikely built in so it's duplicative...

Sounds like a good checkpatch case to test for :-) Some people like to try
and milk ever cycle they can. Personally for me I never use those 
annotations. With modern processors I'm skeptical if their benefits.
I do cleanup up the patches to some extent to make it compliant with 
kernel standards but leave the core code in place for people to comment 
on.

> Anyway, I understand that Intel has been ignoring kernel.org instead of
> sending forwarding their patches properly so you're doing a difficult
> and thankless job...  Thanks for that.  I'm sure it's frustrating to
> look at these patches for you as well.

Thank you for the complement. Also thank you for taking time to review
these patches. Your feedback is most welcomed and benefitical to the
health of the lustre client.

Sadly its not just Intel but other vendors that don't directly contribute
to the linux lustre client. I have spoke to the vendors about contributing 
and they all say the same thing. No working with drivers in the staging 
tree. Sadly all the parties involved are very interested in the success 
of the lustre client. No one has ever told me directly why they don't get
involved but I suspect it has to deal with 2 reasons. One is that staging
drivers are not normally enabled by distributions so their clients 
normally will never deal with the staging lustre client. Secondly vendors
just lack the man power to contribute in a meanful way.
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH v2 1/5] staging: lustre: llite: add support set_acl method in inode operations

2018-05-15 Thread James Simmons

> On Mon, May 14, 2018 at 10:16:59PM -0400, James Simmons wrote:
> > +#ifdef CONFIG_FS_POSIX_ACL
> >  struct posix_acl *ll_get_acl(struct inode *inode, int type)
> >  {
> > struct ll_inode_info *lli = ll_i2info(inode);
> > @@ -3043,6 +3044,64 @@ struct posix_acl *ll_get_acl(struct inode *inode, 
> > int type)
> > return acl;
> >  }
> >  
> > +int ll_set_acl(struct inode *inode, struct posix_acl *acl, int type)
> > +{
> > +   struct ll_sb_info *sbi = ll_i2sbi(inode);
> > +   struct ptlrpc_request *req = NULL;
> > +   const char *name = NULL;
> > +   size_t value_size = 0;
> > +   char *value = NULL;
> > +   int rc;
> 
> "rc" needs to be initialized to zero.  It's disapppointing that GCC
> doesn't catch this.

Thanks Dan. Will fix.
 
> > +
> > +   switch (type) {
> > +   case ACL_TYPE_ACCESS:
> > +   name = XATTR_NAME_POSIX_ACL_ACCESS;
> > +   if (acl)
> > +   rc = posix_acl_update_mode(inode, >i_mode, );
> > +   break;
> > +
> > +   case ACL_TYPE_DEFAULT:
> > +   name = XATTR_NAME_POSIX_ACL_DEFAULT;
> > +   if (!S_ISDIR(inode->i_mode))
> > +   rc = acl ? -EACCES : 0;
> > +   break;
> > +
> > +   default:
> > +   rc = -EINVAL;
> > +   break;
> > +   }
> > +   if (rc)
> > +   return rc;
> 
> Otherwise rc can be uninitialized here.
> 
> regards,
> dan carpenter
> 
> 
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 4/5] staging: lustre: mdc: excessive memory consumption by the xattr cache

2018-05-14 Thread James Simmons
From: Andrew Perepechko <c17...@cray.com>

The refill operation of the xattr cache does not know the
reply size in advance, so it makes a guess based on
the maxeasize value returned by the MDS.

In practice, it allocates 16 KiB for the common case and
4 MiB for the large xattr case. However, a typical reply
is just a few hundred bytes.

If we follow the conservative approach, we can prepare a
single memory page for the reply. It is large enough for
any reasonable xattr set and, at the same time, it does
not require multiple page memory reclaim, which can be
costly.

If, for a specific file, the reply is larger than a single
page, the client is prepared to handle that and will fall back
to non-cached xattr code. Indeed, if this happens often and
xattrs are often used to store large values, it makes sense to
disable the xattr cache at all since it wasn't designed for
such [mis]use.

Signed-off-by: Andrew Perepechko <c17...@cray.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-9417
Reviewed-on: https://review.whamcloud.com/26887
Reviewed-by: Fan Yong <fan.y...@intel.com>
Reviewed-by: Ben Evans <bev...@cray.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lustre/mdc/mdc_locks.c | 23 +--
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/staging/lustre/lustre/mdc/mdc_locks.c 
b/drivers/staging/lustre/lustre/mdc/mdc_locks.c
index 65a5341..a8aa0fa 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_locks.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_locks.c
@@ -315,6 +315,10 @@ static void mdc_realloc_openmsg(struct ptlrpc_request *req,
return req;
 }
 
+#define GA_DEFAULT_EA_NAME_LEN 20
+#define GA_DEFAULT_EA_VAL_LEN  250
+#define GA_DEFAULT_EA_NUM  10
+
 static struct ptlrpc_request *
 mdc_intent_getxattr_pack(struct obd_export *exp,
 struct lookup_intent *it,
@@ -323,7 +327,6 @@ static void mdc_realloc_openmsg(struct ptlrpc_request *req,
struct ptlrpc_request   *req;
struct ldlm_intent  *lit;
int rc, count = 0;
-   u32 maxdata;
LIST_HEAD(cancels);
 
req = ptlrpc_request_alloc(class_exp2cliimp(exp),
@@ -341,20 +344,20 @@ static void mdc_realloc_openmsg(struct ptlrpc_request 
*req,
lit = req_capsule_client_get(>rq_pill, _LDLM_INTENT);
lit->opc = IT_GETXATTR;
 
-   maxdata = class_exp2cliimp(exp)->imp_connect_data.ocd_max_easize;
-
/* pack the intended request */
-   mdc_pack_body(req, _data->op_fid1, op_data->op_valid, maxdata, -1,
- 0);
+   mdc_pack_body(req, _data->op_fid1, op_data->op_valid,
+ GA_DEFAULT_EA_NAME_LEN * GA_DEFAULT_EA_NUM, -1, 0);
 
-   req_capsule_set_size(>rq_pill, _EADATA, RCL_SERVER, maxdata);
+   req_capsule_set_size(>rq_pill, _EADATA, RCL_SERVER,
+GA_DEFAULT_EA_NAME_LEN * GA_DEFAULT_EA_NUM);
 
-   req_capsule_set_size(>rq_pill, _EAVALS, RCL_SERVER, maxdata);
+   req_capsule_set_size(>rq_pill, _EAVALS, RCL_SERVER,
+GA_DEFAULT_EA_NAME_LEN * GA_DEFAULT_EA_NUM);
 
-   req_capsule_set_size(>rq_pill, _EAVALS_LENS,
-RCL_SERVER, maxdata);
+   req_capsule_set_size(>rq_pill, _EAVALS_LENS, RCL_SERVER,
+sizeof(u32) * GA_DEFAULT_EA_NUM);
 
-   req_capsule_set_size(>rq_pill, _ACL, RCL_SERVER, maxdata);
+   req_capsule_set_size(>rq_pill, _ACL, RCL_SERVER, 0);
 
ptlrpc_request_set_replen(req);
 
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 5/5] staging: lustre: mdc: use large xattr buffers for old servers

2018-05-14 Thread James Simmons
From: "John L. Hammond" <john.hamm...@intel.com>

Pre 2.10.1 MDTs will crash when they receive a listxattr (MDS_GETXATTR
with OBD_MD_FLXATTRLS) RPC for an orphan or dead object. So for
clients connected to these older MDTs, try to avoid sending listxattr
RPCs by making the bulk getxattr (MDS_GETXATTR with OBD_MD_FLXATTRALL)
more likely to succeed and thereby reducing the chances of falling
back to listxattr.

Signed-off-by: John L. Hammond <john.hamm...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-10912
Reviewed-on: https://review.whamcloud.com/31990
Reviewed-by: Andreas Dilger <andreas.dil...@intel.com>
Reviewed-by: Fan Yong <fan.y...@intel.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lustre/mdc/mdc_locks.c | 31 +--
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/drivers/staging/lustre/lustre/mdc/mdc_locks.c 
b/drivers/staging/lustre/lustre/mdc/mdc_locks.c
index a8aa0fa..b991c6f 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_locks.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_locks.c
@@ -326,8 +326,10 @@ static void mdc_realloc_openmsg(struct ptlrpc_request *req,
 {
struct ptlrpc_request   *req;
struct ldlm_intent  *lit;
+   u32 min_buf_size = 0;
int rc, count = 0;
LIST_HEAD(cancels);
+   u32 buf_size = 0;
 
req = ptlrpc_request_alloc(class_exp2cliimp(exp),
   _LDLM_INTENT_GETXATTR);
@@ -344,18 +346,33 @@ static void mdc_realloc_openmsg(struct ptlrpc_request 
*req,
lit = req_capsule_client_get(>rq_pill, _LDLM_INTENT);
lit->opc = IT_GETXATTR;
 
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 53, 0)
+   /* If the supplied buffer is too small then the server will
+* return -ERANGE and llite will fallback to using non cached
+* xattr operations. On servers before 2.10.1 a (non-cached)
+* listxattr RPC for an orphan or dead file causes an oops. So
+* let's try to avoid sending too small a buffer to too old a
+* server. This is effectively undoing the memory conservation
+* of LU-9417 when it would be *more* likely to crash the
+* server. See LU-9856.
+*/
+   if (exp->exp_connect_data.ocd_version < OBD_OCD_VERSION(2, 10, 1, 0))
+   min_buf_size = exp->exp_connect_data.ocd_max_easize;
+#endif
+   buf_size = max_t(u32, min_buf_size,
+GA_DEFAULT_EA_NAME_LEN * GA_DEFAULT_EA_NUM);
+
/* pack the intended request */
-   mdc_pack_body(req, _data->op_fid1, op_data->op_valid,
- GA_DEFAULT_EA_NAME_LEN * GA_DEFAULT_EA_NUM, -1, 0);
+   mdc_pack_body(req, _data->op_fid1, op_data->op_valid, buf_size,
+ -1, 0);
 
-   req_capsule_set_size(>rq_pill, _EADATA, RCL_SERVER,
-GA_DEFAULT_EA_NAME_LEN * GA_DEFAULT_EA_NUM);
+   req_capsule_set_size(>rq_pill, _EADATA, RCL_SERVER, buf_size);
 
-   req_capsule_set_size(>rq_pill, _EAVALS, RCL_SERVER,
-GA_DEFAULT_EA_NAME_LEN * GA_DEFAULT_EA_NUM);
+   req_capsule_set_size(>rq_pill, _EAVALS, RCL_SERVER, buf_size);
 
req_capsule_set_size(>rq_pill, _EAVALS_LENS, RCL_SERVER,
-sizeof(u32) * GA_DEFAULT_EA_NUM);
+max_t(u32, min_buf_size,
+  sizeof(u32) * GA_DEFAULT_EA_NUM));
 
req_capsule_set_size(>rq_pill, _ACL, RCL_SERVER, 0);
 
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 0/5] staging: lustre: llite: remaining xattr fixes

2018-05-14 Thread James Simmons
Fixed the bugs in the set_acl patch pointed out by Dan Carpenter.
Rebased the next patch 'remove unused parameter..." on the parent
patch. Added newer xattr fixes that were recently pushed.

Andrew Perepechko (1):
  staging: lustre: mdc: excessive memory consumption by the xattr cache

Dmitry Eremin (1):
  staging: lustre: llite: add support set_acl method in inode operations

Fan Yong (1):
  staging: lustre: acl: increase ACL entries limitation

John L. Hammond (2):
  staging: lustre: llite: remove unused parameters from md_{get,set}xattr()
  staging: lustre: mdc: use large xattr buffers for old servers

 .../lustre/include/uapi/linux/lustre/lustre_idl.h  |  2 +-
 drivers/staging/lustre/lustre/include/lustre_acl.h |  7 ++-
 drivers/staging/lustre/lustre/include/obd.h|  7 +--
 drivers/staging/lustre/lustre/include/obd_class.h  | 21 +++
 drivers/staging/lustre/lustre/llite/file.c | 65 +-
 .../staging/lustre/lustre/llite/llite_internal.h   |  4 ++
 drivers/staging/lustre/lustre/llite/llite_lib.c|  3 +-
 drivers/staging/lustre/lustre/llite/namei.c| 10 +++-
 drivers/staging/lustre/lustre/llite/xattr.c|  6 +-
 drivers/staging/lustre/lustre/lmv/lmv_obd.c| 22 
 drivers/staging/lustre/lustre/mdc/mdc_locks.c  | 42 +++---
 drivers/staging/lustre/lustre/mdc/mdc_reint.c  |  2 +
 drivers/staging/lustre/lustre/mdc/mdc_request.c| 38 -
 drivers/staging/lustre/lustre/ptlrpc/layout.c  |  4 +-
 drivers/staging/lustre/lustre/ptlrpc/wiretest.c|  4 +-
 15 files changed, 171 insertions(+), 66 deletions(-)

-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 3/5] staging: lustre: acl: increase ACL entries limitation

2018-05-14 Thread James Simmons
From: Fan Yong <fan.y...@intel.com>

Originally, the limitation of ACL entries is 32, that is not
enough for some use cases. In fact, restricting ACL entries
count is mainly for preparing the RPC reply buffer to receive
the ACL data. So we cannot make the ACL entries count to be
unlimited. But we can enlarge the RPC reply buffer to hold
more ACL entries. On the other hand, MDT backend filesystem
has its own EA size limitation. For example, for ldiskfs case,
if large EA enable, then the max ACL size is 1048492 bytes;
otherwise, it is 4012 bytes. For ZFS backend, such value is
32768 bytes. With such hard limitation, we can calculate how
many ACL entries we can have at most. This patch increases
the RPC reply buffer to match such hard limitation. For old
client, to avoid buffer overflow because of large ACL data
(more than 32 ACL entries), the MDT will forbid the old client
to access the file with large ACL data. As for how to know
whether it is old client or new, a new connection flag
OBD_CONNECT_LARGE_ACL is used for that.

Signed-off-by: Fan Yong <fan.y...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-7473
Reviewed-on: https://review.whamcloud.com/19790
Reviewed-by: Andreas Dilger <andreas.dil...@intel.com>
Reviewed-by: Li Xi <l...@ddn.com>
Reviewed-by: Lai Siyao <lai.si...@intel.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/include/uapi/linux/lustre/lustre_idl.h | 2 +-
 drivers/staging/lustre/lustre/include/lustre_acl.h| 7 ++-
 drivers/staging/lustre/lustre/llite/llite_lib.c   | 3 ++-
 drivers/staging/lustre/lustre/mdc/mdc_locks.c | 6 ++
 drivers/staging/lustre/lustre/mdc/mdc_reint.c | 2 ++
 drivers/staging/lustre/lustre/mdc/mdc_request.c   | 4 
 drivers/staging/lustre/lustre/ptlrpc/layout.c | 4 +---
 drivers/staging/lustre/lustre/ptlrpc/wiretest.c   | 4 ++--
 8 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_idl.h 
b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_idl.h
index aac98db..8778c6f 100644
--- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_idl.h
+++ b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_idl.h
@@ -615,7 +615,7 @@ struct ptlrpc_body_v2 {
 #define OBD_CONNECT_REQPORTAL   0x40ULL /*Separate non-IO req portal */
 #define OBD_CONNECT_ACL 0x80ULL /*access control lists 
*/
 #define OBD_CONNECT_XATTR  0x100ULL /*client use extended attr */
-#define OBD_CONNECT_CROW   0x200ULL /*MDS+OST create obj on write*/
+#define OBD_CONNECT_LARGE_ACL  0x200ULL /* more than 32 ACL entries */
 #define OBD_CONNECT_TRUNCLOCK  0x400ULL /*locks on server for punch */
 #define OBD_CONNECT_TRANSNO0x800ULL /*replay sends init transno */
 #define OBD_CONNECT_IBITS 0x1000ULL /*support for inodebits locks*/
diff --git a/drivers/staging/lustre/lustre/include/lustre_acl.h 
b/drivers/staging/lustre/lustre/include/lustre_acl.h
index 35ff61c..e7575a1 100644
--- a/drivers/staging/lustre/lustre/include/lustre_acl.h
+++ b/drivers/staging/lustre/lustre/include/lustre_acl.h
@@ -36,11 +36,16 @@
 
 #include 
 #include 
+#ifdef CONFIG_FS_POSIX_ACL
 #include 
 
 #define LUSTRE_POSIX_ACL_MAX_ENTRIES   32
-#define LUSTRE_POSIX_ACL_MAX_SIZE  
\
+#define LUSTRE_POSIX_ACL_MAX_SIZE_OLD  
\
(sizeof(struct posix_acl_xattr_header) +
\
 LUSTRE_POSIX_ACL_MAX_ENTRIES * sizeof(struct posix_acl_xattr_entry))
 
+#else /* ! CONFIG_FS_POSIX_ACL */
+#define LUSTRE_POSIX_ACL_MAX_SIZE_OLD 0
+#endif /* CONFIG_FS_POSIX_ACL */
+
 #endif
diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c 
b/drivers/staging/lustre/lustre/llite/llite_lib.c
index 83eb2da..b5c287b 100644
--- a/drivers/staging/lustre/lustre/llite/llite_lib.c
+++ b/drivers/staging/lustre/lustre/llite/llite_lib.c
@@ -198,7 +198,8 @@ static int client_common_fill_super(struct super_block *sb, 
char *md, char *dt)
if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
 #ifdef CONFIG_FS_POSIX_ACL
-   data->ocd_connect_flags |= OBD_CONNECT_ACL | OBD_CONNECT_UMASK;
+   data->ocd_connect_flags |= OBD_CONNECT_ACL | OBD_CONNECT_UMASK |
+  OBD_CONNECT_LARGE_ACL;
 #endif
 
if (OBD_FAIL_CHECK(OBD_FAIL_MDC_LIGHTWEIGHT))
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_locks.c 
b/drivers/staging/lustre/lustre/mdc/mdc_locks.c
index 253a545..65a5341 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_locks.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_locks.c
@@ -308,6 +308,8 @@ 

[PATCH v2 1/5] staging: lustre: llite: add support set_acl method in inode operations

2018-05-14 Thread James Simmons
From: Dmitry Eremin <dmitry.ere...@intel.com>

Linux kernel v3.14 adds set_acl method to inode operations.
This patch adds support to Lustre for proper acl management.

Signed-off-by: Dmitry Eremin <dmitry.ere...@intel.com>
Signed-off-by: John L. Hammond <john.hamm...@intel.com>
Signed-off-by: James Simmons <uja.o...@yahoo.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-9183
Reviewed-on: https://review.whamcloud.com/25965
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-10541
Reviewed-on: https://review.whamcloud.com/31588
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-10926
Reviewed-on: https://review.whamcloud.com/32045
Reviewed-by: Bob Glossman <bob.gloss...@intel.com>
Reviewed-by: James Simmons <uja.o...@yahoo.com>
Reviewed-by: Andreas Dilger <andreas.dil...@intel.com>
Reviewed-by: Dmitry Eremin <dmitry.ere...@intel.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
Changelog:

v1) Initial patch ported to staging tree
v2) Fixed up goto handling and avoid BUG() when calling
forget_cached_acl()with invalid type as pointed out by Dan Carpenter

 drivers/staging/lustre/lustre/llite/file.c | 62 ++
 .../staging/lustre/lustre/llite/llite_internal.h   |  4 ++
 drivers/staging/lustre/lustre/llite/namei.c| 10 +++-
 3 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/lustre/lustre/llite/file.c 
b/drivers/staging/lustre/lustre/llite/file.c
index 0026fde..64a5698 100644
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ b/drivers/staging/lustre/lustre/llite/file.c
@@ -3030,6 +3030,7 @@ static int ll_fiemap(struct inode *inode, struct 
fiemap_extent_info *fieinfo,
return rc;
 }
 
+#ifdef CONFIG_FS_POSIX_ACL
 struct posix_acl *ll_get_acl(struct inode *inode, int type)
 {
struct ll_inode_info *lli = ll_i2info(inode);
@@ -3043,6 +3044,64 @@ struct posix_acl *ll_get_acl(struct inode *inode, int 
type)
return acl;
 }
 
+int ll_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+{
+   struct ll_sb_info *sbi = ll_i2sbi(inode);
+   struct ptlrpc_request *req = NULL;
+   const char *name = NULL;
+   size_t value_size = 0;
+   char *value = NULL;
+   int rc;
+
+   switch (type) {
+   case ACL_TYPE_ACCESS:
+   name = XATTR_NAME_POSIX_ACL_ACCESS;
+   if (acl)
+   rc = posix_acl_update_mode(inode, >i_mode, );
+   break;
+
+   case ACL_TYPE_DEFAULT:
+   name = XATTR_NAME_POSIX_ACL_DEFAULT;
+   if (!S_ISDIR(inode->i_mode))
+   rc = acl ? -EACCES : 0;
+   break;
+
+   default:
+   rc = -EINVAL;
+   break;
+   }
+   if (rc)
+   return rc;
+
+   if (acl) {
+   value_size = posix_acl_xattr_size(acl->a_count);
+   value = kmalloc(value_size, GFP_NOFS);
+   if (!value) {
+   rc = -ENOMEM;
+   goto out;
+   }
+
+   rc = posix_acl_to_xattr(_user_ns, acl, value, value_size);
+   if (rc < 0)
+   goto out_value;
+   }
+
+   rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode),
+value ? OBD_MD_FLXATTR : OBD_MD_FLXATTRRM,
+name, value, value_size, 0, 0, 0, );
+
+   ptlrpc_req_finished(req);
+out_value:
+   kfree(value);
+out:
+   if (rc)
+   forget_cached_acl(inode, type);
+   else
+   set_cached_acl(inode, type, acl);
+   return rc;
+}
+#endif /* CONFIG_FS_POSIX_ACL */
+
 int ll_inode_permission(struct inode *inode, int mask)
 {
struct ll_sb_info *sbi;
@@ -3164,7 +3223,10 @@ int ll_inode_permission(struct inode *inode, int mask)
.permission = ll_inode_permission,
.listxattr  = ll_listxattr,
.fiemap = ll_fiemap,
+#ifdef CONFIG_FS_POSIX_ACL
.get_acl= ll_get_acl,
+   .set_acl= ll_set_acl,
+#endif
 };
 
 /* dynamic ioctl number support routines */
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h 
b/drivers/staging/lustre/lustre/llite/llite_internal.h
index 6504850..2280327 100644
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ b/drivers/staging/lustre/lustre/llite/llite_internal.h
@@ -754,7 +754,11 @@ enum ldlm_mode ll_take_md_lock(struct inode *inode, __u64 
bits,
 int ll_md_real_close(struct inode *inode, fmode_t fmode);
 int ll_getattr(const struct path *path, struct kstat *stat,
   u32 request_mask, unsigned int flags);
+#ifdef CONFIG_FS_POSIX_ACL
 struct posix_acl *ll_get_acl(struct inode *inode, int type);
+int ll_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+#endif /* CONFIG_FS_POSIX_ACL */
+
 int ll_migrate(struct inode *

[PATCH v2] staging: lustre: obdclass: change object lookup to no wait mode

2018-05-14 Thread James Simmons
From: Lai Siyao <lai.si...@intel.com>

Currently we set LU_OBJECT_HEARD_BANSHEE on object when we want
to remove object from cache, but this may lead to deadlock, because
when other process lookup such object, it needs to wait for this
object until release (done at last refcount put), while that process
maybe already hold an LDLM lock.

Now that current code can handle dying object correctly, we can just
return such object in lookup, thus the above deadlock can be avoided.

Signed-off-by: Lai Siyao <lai.si...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-9049
Reviewed-on: https://review.whamcloud.com/26965
Reviewed-by: Alex Zhuravlev <alexey.zhurav...@intel.com>
Tested-by: Cliff White <cliff.wh...@intel.com>
Reviewed-by: Fan Yong <fan.y...@intel.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
Changelog:

v1) Initial patch that didn't apply to staging-testing branch
v2) Rebased after Neil's patches landed. Remove unlikely() test
as requested by Dan Carpenter

 drivers/staging/lustre/lustre/obdclass/lu_object.c | 39 +-
 1 file changed, 9 insertions(+), 30 deletions(-)

diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c 
b/drivers/staging/lustre/lustre/obdclass/lu_object.c
index f14e350..e0abd4f 100644
--- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
@@ -593,15 +593,10 @@ static struct lu_object *htable_lookup(struct lu_site *s,
   const struct lu_fid *f,
   __u64 *version)
 {
-   struct cfs_hash *hs = s->ls_obj_hash;
struct lu_site_bkt_data *bkt;
struct lu_object_header *h;
struct hlist_node   *hnode;
-   __u64 ver;
-   wait_queue_entry_t waiter;
-
-retry:
-   ver = cfs_hash_bd_version_get(bd);
+   u64 ver = cfs_hash_bd_version_get(bd);
 
if (*version == ver)
return ERR_PTR(-ENOENT);
@@ -618,31 +613,13 @@ static struct lu_object *htable_lookup(struct lu_site *s,
}
 
h = container_of(hnode, struct lu_object_header, loh_hash);
-   if (likely(!lu_object_is_dying(h))) {
-   cfs_hash_get(s->ls_obj_hash, hnode);
-   lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_HIT);
-   if (!list_empty(>loh_lru)) {
-   list_del_init(>loh_lru);
-   percpu_counter_dec(>ls_lru_len_counter);
-   }
-   return lu_object_top(h);
+   cfs_hash_get(s->ls_obj_hash, hnode);
+   lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_HIT);
+   if (!list_empty(>loh_lru)) {
+   list_del_init(>loh_lru);
+   percpu_counter_dec(>ls_lru_len_counter);
}
-
-   /*
-* Lookup found an object being destroyed this object cannot be
-* returned (to assure that references to dying objects are eventually
-* drained), and moreover, lookup has to wait until object is freed.
-*/
-
-   init_waitqueue_entry(, current);
-   add_wait_queue(>lsb_marche_funebre, );
-   set_current_state(TASK_UNINTERRUPTIBLE);
-   lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_DEATH_RACE);
-   cfs_hash_bd_unlock(hs, bd, 1);
-   schedule();
-   remove_wait_queue(>lsb_marche_funebre, );
-   cfs_hash_bd_lock(hs, bd, 1);
-   goto retry;
+   return lu_object_top(h);
 }
 
 /**
@@ -683,6 +660,8 @@ static void lu_object_limit(const struct lu_env *env, 
struct lu_device *dev)
 }
 
 /**
+ * Core logic of lu_object_find*() functions.
+ *
  * Much like lu_object_find(), but top level device of object is specifically
  * \a dev rather than top level device of the site. This interface allows
  * objects of different "stacking" to be created within the same site.
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH 4/4] staging: lustre: obdclass: change object lookup to no wait mode

2018-05-14 Thread James Simmons

> >> On Wed, May 02 2018, James Simmons wrote:
> >> 
> >> > From: Lai Siyao <lai.si...@intel.com>
> >> >
> >> > Currently we set LU_OBJECT_HEARD_BANSHEE on object when we want
> >> > to remove object from cache, but this may lead to deadlock, because
> >> > when other process lookup such object, it needs to wait for this
> >> > object until release (done at last refcount put), while that process
> >> > maybe already hold an LDLM lock.
> >> >
> >> > Now that current code can handle dying object correctly, we can just
> >> > return such object in lookup, thus the above deadlock can be avoided.
> >> 
> >> I think one of the reasons that I didn't apply this to mainline myself
> >> is that "Now that" comment.  When is the "now" that it is referring to?
> >> Are were sure that all code in mainline "can handle dying objects
> >> correctly"??
> >
> > So I talked to Lai and he posted the LU-9049 ticket what patches need to
> > land before this one. Only one patch is of concern and its for LU-9203
> > which doesn't apply to the staging tree since we don't have the LNet SMP
> > updates in our tree. I saved notes about making sure LU-9203 lands 
> > together with the future LNet SMP changes. As it stands it is safe to
> > land to staging.
> 
> Thanks a lot for looking into this.  Nice to have the safety of this
> change confirmed.
> 
> What do you think of:
> 
> >> > @@ -713,36 +691,46 @@ struct lu_object *lu_object_find_at(const struct 
> >> > lu_env *env,
> >> >   * It is unnecessary to perform lookup-alloc-lookup-insert, 
> >> > instead,
> >> >   * just alloc and insert directly.
> >> >   *
> >> > + * If dying object is found during index search, add @waiter to 
> >> > the
> >> > + * site wait-queue and return ERR_PTR(-EAGAIN).
> >> 
> >> It seems odd to add this comment here, when it seems to describe code
> >> that is being removed.
> >> I can see that this comment is added by the upstream patch
> >> Commit: fa14bdf6b648 ("LU-9049 obdclass: change object lookup to no wait 
> >> mode")
> >> but I cannot see what it refers to.
> >> 
> 
> ??
> 
> Am I misunderstanding something, or is that comment wrong?

I think the comment is wrong. That comment was in the other tree before 
the patch was landed. It got included with this push due to me diffing the
tree by accident. I will remove it with the next push.
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH 4/4] staging: lustre: obdclass: change object lookup to no wait mode

2018-05-14 Thread James Simmons

> On Wed, May 02 2018, James Simmons wrote:
> 
> > From: Lai Siyao <lai.si...@intel.com>
> >
> > Currently we set LU_OBJECT_HEARD_BANSHEE on object when we want
> > to remove object from cache, but this may lead to deadlock, because
> > when other process lookup such object, it needs to wait for this
> > object until release (done at last refcount put), while that process
> > maybe already hold an LDLM lock.
> >
> > Now that current code can handle dying object correctly, we can just
> > return such object in lookup, thus the above deadlock can be avoided.
> 
> I think one of the reasons that I didn't apply this to mainline myself
> is that "Now that" comment.  When is the "now" that it is referring to?
> Are were sure that all code in mainline "can handle dying objects
> correctly"??

So I talked to Lai and he posted the LU-9049 ticket what patches need to
land before this one. Only one patch is of concern and its for LU-9203
which doesn't apply to the staging tree since we don't have the LNet SMP
updates in our tree. I saved notes about making sure LU-9203 lands 
together with the future LNet SMP changes. As it stands it is safe to
land to staging.
 
> > Signed-off-by: Lai Siyao <lai.si...@intel.com>
> > Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-9049
> > Reviewed-on: https://review.whamcloud.com/26965
> > Reviewed-by: Alex Zhuravlev <alexey.zhurav...@intel.com>
> > Tested-by: Cliff White <cliff.wh...@intel.com>
> > Reviewed-by: Fan Yong <fan.y...@intel.com>
> > Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
> > Signed-off-by: James Simmons <jsimm...@infradead.org>
> > ---
> >  drivers/staging/lustre/lustre/include/lu_object.h  |  2 +-
> >  drivers/staging/lustre/lustre/obdclass/lu_object.c | 82 
> > +-
> >  2 files changed, 36 insertions(+), 48 deletions(-)
> >
> > diff --git a/drivers/staging/lustre/lustre/include/lu_object.h 
> > b/drivers/staging/lustre/lustre/include/lu_object.h
> > index f29bbca..232063a 100644
> > --- a/drivers/staging/lustre/lustre/include/lu_object.h
> > +++ b/drivers/staging/lustre/lustre/include/lu_object.h
> > @@ -673,7 +673,7 @@ static inline void lu_object_get(struct lu_object *o)
> >  }
> >  
> >  /**
> > - * Return true of object will not be cached after last reference to it is
> > + * Return true if object will not be cached after last reference to it is
> >   * released.
> >   */
> >  static inline int lu_object_is_dying(const struct lu_object_header *h)
> > diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c 
> > b/drivers/staging/lustre/lustre/obdclass/lu_object.c
> > index 8b507f1..9311703 100644
> > --- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
> > +++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
> > @@ -589,19 +589,13 @@ static struct lu_object *htable_lookup(struct lu_site 
> > *s,
> >const struct lu_fid *f,
> >__u64 *version)
> >  {
> > -   struct cfs_hash *hs = s->ls_obj_hash;
> > struct lu_site_bkt_data *bkt;
> > struct lu_object_header *h;
> > struct hlist_node   *hnode;
> > -   __u64 ver;
> > -   wait_queue_entry_t waiter;
> > +   u64 ver = cfs_hash_bd_version_get(bd);
> >  
> > -retry:
> > -   ver = cfs_hash_bd_version_get(bd);
> > -
> > -   if (*version == ver) {
> > +   if (*version == ver)
> > return ERR_PTR(-ENOENT);
> > -   }
> >  
> > *version = ver;
> > bkt = cfs_hash_bd_extra_get(s->ls_obj_hash, bd);
> > @@ -615,31 +609,13 @@ static struct lu_object *htable_lookup(struct lu_site 
> > *s,
> > }
> >  
> > h = container_of(hnode, struct lu_object_header, loh_hash);
> > -   if (likely(!lu_object_is_dying(h))) {
> > -   cfs_hash_get(s->ls_obj_hash, hnode);
> > -   lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_HIT);
> > -   if (!list_empty(>loh_lru)) {
> > -   list_del_init(>loh_lru);
> > -   percpu_counter_dec(>ls_lru_len_counter);
> > -   }
> > -   return lu_object_top(h);
> > +   cfs_hash_get(s->ls_obj_hash, hnode);
> > +   lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_HIT);
> > +   if (!list_empty(>loh_lru)) {
> > +   list_del_init(>loh_lru);
> > +   percpu_counter_dec(>ls_lru_len_counter);
> > }
> > -
> > - 

[PATCH 0/4] staging: lustre: obdclass: missing lu_object fixes

2018-05-02 Thread James Simmons
With the work going for lu_object by Neil I noticed him solving the
same problem as the Intel developers in a very similar approach. Also
with the changes we don't want to lose these important changes. This
is more mean for a basic review since in the end Neil and this work
will be combined in some fashion. Note this patch set is based on
top of Neil's cleanup patches for lu_objects published a few days ago.

Hongchao Zhang (1):
  staging: lustre: obdclass: guarantee all keys filled

John L. Hammond (1):
  staging: lustre: obdclass: hoist locking in lu_context_exit()

Lai Siyao (1):
  staging: lustre: obdclass: change object lookup to no wait mode

Li Xi (1):
  staging: lustre: obdclass: change spinlock of key to rwlock

 drivers/staging/lustre/lustre/include/lu_object.h  |   2 +-
 drivers/staging/lustre/lustre/obdclass/lu_object.c | 153 +++--
 2 files changed, 83 insertions(+), 72 deletions(-)

-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 4/4] staging: lustre: obdclass: change object lookup to no wait mode

2018-05-02 Thread James Simmons
From: Lai Siyao <lai.si...@intel.com>

Currently we set LU_OBJECT_HEARD_BANSHEE on object when we want
to remove object from cache, but this may lead to deadlock, because
when other process lookup such object, it needs to wait for this
object until release (done at last refcount put), while that process
maybe already hold an LDLM lock.

Now that current code can handle dying object correctly, we can just
return such object in lookup, thus the above deadlock can be avoided.

Signed-off-by: Lai Siyao <lai.si...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-9049
Reviewed-on: https://review.whamcloud.com/26965
Reviewed-by: Alex Zhuravlev <alexey.zhurav...@intel.com>
Tested-by: Cliff White <cliff.wh...@intel.com>
Reviewed-by: Fan Yong <fan.y...@intel.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lustre/include/lu_object.h  |  2 +-
 drivers/staging/lustre/lustre/obdclass/lu_object.c | 82 +-
 2 files changed, 36 insertions(+), 48 deletions(-)

diff --git a/drivers/staging/lustre/lustre/include/lu_object.h 
b/drivers/staging/lustre/lustre/include/lu_object.h
index f29bbca..232063a 100644
--- a/drivers/staging/lustre/lustre/include/lu_object.h
+++ b/drivers/staging/lustre/lustre/include/lu_object.h
@@ -673,7 +673,7 @@ static inline void lu_object_get(struct lu_object *o)
 }
 
 /**
- * Return true of object will not be cached after last reference to it is
+ * Return true if object will not be cached after last reference to it is
  * released.
  */
 static inline int lu_object_is_dying(const struct lu_object_header *h)
diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c 
b/drivers/staging/lustre/lustre/obdclass/lu_object.c
index 8b507f1..9311703 100644
--- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
@@ -589,19 +589,13 @@ static struct lu_object *htable_lookup(struct lu_site *s,
   const struct lu_fid *f,
   __u64 *version)
 {
-   struct cfs_hash *hs = s->ls_obj_hash;
struct lu_site_bkt_data *bkt;
struct lu_object_header *h;
struct hlist_node   *hnode;
-   __u64 ver;
-   wait_queue_entry_t waiter;
+   u64 ver = cfs_hash_bd_version_get(bd);
 
-retry:
-   ver = cfs_hash_bd_version_get(bd);
-
-   if (*version == ver) {
+   if (*version == ver)
return ERR_PTR(-ENOENT);
-   }
 
*version = ver;
bkt = cfs_hash_bd_extra_get(s->ls_obj_hash, bd);
@@ -615,31 +609,13 @@ static struct lu_object *htable_lookup(struct lu_site *s,
}
 
h = container_of(hnode, struct lu_object_header, loh_hash);
-   if (likely(!lu_object_is_dying(h))) {
-   cfs_hash_get(s->ls_obj_hash, hnode);
-   lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_HIT);
-   if (!list_empty(>loh_lru)) {
-   list_del_init(>loh_lru);
-   percpu_counter_dec(>ls_lru_len_counter);
-   }
-   return lu_object_top(h);
+   cfs_hash_get(s->ls_obj_hash, hnode);
+   lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_HIT);
+   if (!list_empty(>loh_lru)) {
+   list_del_init(>loh_lru);
+   percpu_counter_dec(>ls_lru_len_counter);
}
-
-   /*
-* Lookup found an object being destroyed this object cannot be
-* returned (to assure that references to dying objects are eventually
-* drained), and moreover, lookup has to wait until object is freed.
-*/
-
-   init_waitqueue_entry(, current);
-   add_wait_queue(>lsb_marche_funebre, );
-   set_current_state(TASK_UNINTERRUPTIBLE);
-   lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_DEATH_RACE);
-   cfs_hash_bd_unlock(hs, bd, 1);
-   schedule();
-   remove_wait_queue(>lsb_marche_funebre, );
-   cfs_hash_bd_lock(hs, bd, 1);
-   goto retry;
+   return lu_object_top(h);
 }
 
 /**
@@ -680,6 +656,8 @@ static void lu_object_limit(const struct lu_env *env, 
struct lu_device *dev)
 }
 
 /**
+ * Core logic of lu_object_find*() functions.
+ *
  * Much like lu_object_find(), but top level device of object is specifically
  * \a dev rather than top level device of the site. This interface allows
  * objects of different "stacking" to be created within the same site.
@@ -713,36 +691,46 @@ struct lu_object *lu_object_find_at(const struct lu_env 
*env,
 * It is unnecessary to perform lookup-alloc-lookup-insert, instead,
 * just alloc and insert directly.
 *
+* If dying object is found during index search, add @waiter to the
+* site wait-queue and return ERR_PTR(-EAGAIN).
 */
-   s  = dev->ld_site;
-   hs = s->

[PATCH 1/4] staging: lustre: obdclass: change spinlock of key to rwlock

2018-05-02 Thread James Simmons
From: Li Xi <l...@ddn.com>

Most of the time, keys are never changed. So rwlock might be
better for the concurrency of key read.

Signed-off-by: Li Xi <l...@ddn.com>
Signed-off-by: Gu Zheng <gzh...@ddn.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-6800
Reviewed-on: http://review.whamcloud.com/15558
Reviewed-by: Faccini Bruno <bruno.facc...@intel.com>
Reviewed-by: James Simmons <uja.o...@yahoo.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lustre/obdclass/lu_object.c | 38 +++---
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c 
b/drivers/staging/lustre/lustre/obdclass/lu_object.c
index fa986f2..04475e9 100644
--- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
@@ -1317,7 +1317,7 @@ enum {
 
 static struct lu_context_key *lu_keys[LU_CONTEXT_KEY_NR] = { NULL, };
 
-static DEFINE_SPINLOCK(lu_keys_guard);
+static DEFINE_RWLOCK(lu_keys_guard);
 static atomic_t lu_key_initing_cnt = ATOMIC_INIT(0);
 
 /**
@@ -1341,7 +1341,7 @@ int lu_context_key_register(struct lu_context_key *key)
LASSERT(key->lct_tags != 0);
 
result = -ENFILE;
-   spin_lock(_keys_guard);
+   write_lock(_keys_guard);
for (i = 0; i < ARRAY_SIZE(lu_keys); ++i) {
if (!lu_keys[i]) {
key->lct_index = i;
@@ -1353,7 +1353,7 @@ int lu_context_key_register(struct lu_context_key *key)
break;
}
}
-   spin_unlock(_keys_guard);
+   write_unlock(_keys_guard);
return result;
 }
 EXPORT_SYMBOL(lu_context_key_register);
@@ -1387,7 +1387,7 @@ void lu_context_key_degister(struct lu_context_key *key)
lu_context_key_quiesce(key);
 
++key_set_version;
-   spin_lock(_keys_guard);
+   write_lock(_keys_guard);
key_fini(_shrink_env.le_ctx, key->lct_index);
 
/**
@@ -1395,18 +1395,18 @@ void lu_context_key_degister(struct lu_context_key *key)
 * run lu_context_key::lct_fini() method.
 */
while (atomic_read(>lct_used) > 1) {
-   spin_unlock(_keys_guard);
+   write_unlock(_keys_guard);
CDEBUG(D_INFO, "%s: \"%s\" %p, %d\n",
   __func__, module_name(key->lct_owner),
   key, atomic_read(>lct_used));
schedule();
-   spin_lock(_keys_guard);
+   write_lock(_keys_guard);
}
if (lu_keys[key->lct_index]) {
lu_keys[key->lct_index] = NULL;
lu_ref_fini(>lct_reference);
}
-   spin_unlock(_keys_guard);
+   write_unlock(_keys_guard);
 
LASSERTF(atomic_read(>lct_used) == 1,
 "key has instances: %d\n",
@@ -1526,7 +1526,7 @@ void lu_context_key_quiesce(struct lu_context_key *key)
/*
 * XXX memory barrier has to go here.
 */
-   spin_lock(_keys_guard);
+   write_lock(_keys_guard);
key->lct_tags |= LCT_QUIESCENT;
 
/**
@@ -1534,19 +1534,19 @@ void lu_context_key_quiesce(struct lu_context_key *key)
 * have completed.
 */
while (atomic_read(_key_initing_cnt) > 0) {
-   spin_unlock(_keys_guard);
+   write_unlock(_keys_guard);
CDEBUG(D_INFO, "%s: \"%s\" %p, %d (%d)\n",
   __func__,
   module_name(key->lct_owner),
   key, atomic_read(>lct_used),
atomic_read(_key_initing_cnt));
schedule();
-   spin_lock(_keys_guard);
+   write_lock(_keys_guard);
}
 
list_for_each_entry(ctx, _context_remembered, lc_remember)
key_fini(ctx, key->lct_index);
-   spin_unlock(_keys_guard);
+   write_unlock(_keys_guard);
++key_set_version;
}
 }
@@ -1584,9 +1584,9 @@ static int keys_fill(struct lu_context *ctx)
 * An atomic_t variable is still used, in order not to reacquire the
 * lock when decrementing the counter.
 */
-   spin_lock(_keys_guard);
+   read_lock(_keys_guard);
atomic_inc(_key_initing_cnt);
-   spin_unlock(_keys_guard);
+   read_unlock(_keys_guard);
 
LINVRNT(ctx->lc_value);
for (i = 0; i < ARRAY_SIZE(lu_keys); ++i) {
@@ -1655,9 +1655,9 @@ int lu_context_init(struct lu_context *ctx, __u32 tags)
ctx->lc_state = LCS_INITIALIZED;
ctx->lc_tags = tags

Re: [lustre_init] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004

2018-05-02 Thread James Simmons

> Hello,
> 
> FYI this happens in mainline kernel 4.17.0-rc3.
> It looks like a new regression since v4.17-rc1.
> 
> It occurs in 2 out of 2 boots.
> 
> [   54.222599]   Magic number: 14:276:994
> [   54.223261] tty ttyd7: hash matches
> [   54.223841] tty ttyaa: hash matches
> [   54.227288] Lustre: Lustre: Build Version: 2.6.99
> [   54.232977] LustreError: 1:0:(class_obd.c:465:obdclass_init()) cannot 
> register 241 err -16

This looks like the misc register bug that is now fixed in the 
staging-test branch. Can you try 

git commit ba833f145745c5ca4d1d45b1de2541fe34b8f100 (staging: lustre: 
libcfs: use  dynamic minors for /dev/{lnet, obd})

from the staging-test branch to see if it resolves your problems?

> [   54.236561] BUG: unable to handle kernel NULL pointer dereference at 
> 0004
> [   54.237836] PGD 0 P4D 0
> [   54.238266] Oops:  [#1] SMP
> [   54.238780] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.17.0-rc3 #1
> [   54.239775] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
> 1.10.2-1 04/01/2014
> [   54.241214] RIP: 0010:kmem_cache_alloc+0x27/0x2ce:
>   slob_alloc_node at mm/slob.c:546
>(inlined by) kmem_cache_alloc 
> at mm/slob.c:567
> [   54.241956] RSP: :88001d21bde8 EFLAGS: 00010246
> [   54.242791] RAX:  RBX: 01408040 RCX: 
> 
> [   54.243933] RDX: 88001d216000 RSI:  RDI: 
> 83752918
> [   54.245072] RBP:  R08:  R09: 
> 
> [   54.246213] R10:  R11: 0020 R12: 
> a000
> [   54.247337] R13:  R14: a000 R15: 
> 8407cb7e
> [   54.248613] FS:  () GS:88001e40() 
> knlGS:
> [   54.249887] CS:  0010 DS:  ES:  CR0: 80050033
> [   54.250803] CR2: 0004 CR3: 03269000 CR4: 
> 06a0
> [   54.251939] Call Trace:
> [   54.252358]  ? native_patch+0x195/0x195:
>   
> __raw_callee_save___native_queued_spin_unlock at ??:?
> [   54.252976]  ? lustre_init+0x189/0x247:
>   IS_ERR at include/linux/err.h:36
>(inlined by) lustre_init at 
> drivers/staging/lustre/lustre/llite/super25.c:133
> [   54.253590]  cl_env_new+0x2b/0xb9:
>   cl_env_new at 
> drivers/staging/lustre/lustre/obdclass/cl_object.c:597
> [   54.254126]  cl_env_alloc+0x11/0xae:
>   IS_ERR at include/linux/err.h:36
>(inlined by) cl_env_alloc at 
> drivers/staging/lustre/lustre/obdclass/cl_object.c:718
> [   54.254713]  ? lmv_init+0x2d/0x2d:
>   cfs_cdebug_show at 
> drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h:111
>(inlined by) lustre_init at 
> drivers/staging/lustre/lustre/llite/super25.c:97
> [   54.255259]  lustre_init+0x189/0x247:
>   IS_ERR at include/linux/err.h:36
>(inlined by) lustre_init at 
> drivers/staging/lustre/lustre/llite/super25.c:133
> [   54.255839]  do_one_initcall+0x13d/0x36c:
>   __read_once_size at 
> include/linux/compiler.h:188
>(inlined by) arch_atomic_read 
> at arch/x86/include/asm/atomic.h:31
>(inlined by) atomic_read at 
> include/asm-generic/atomic-instrumented.h:22
>(inlined by) static_key_count 
> at include/linux/jump_label.h:194
>(inlined by) static_key_false 
> at include/linux/jump_label.h:206
>(inlined by) 
> trace_initcall_finish at include/trace/events/initcall.h:44
>(inlined by) do_one_initcall 
> at init/main.c:884
> [   54.256597]  ? parse_args+0x81/0x273:
>   arch_local_save_flags at 
> arch/x86/include/asm/paravirt.h:778
>(inlined by) parse_args at 
> kernel/params.c:190
> [   54.257177]  ? do_early_param+0x88/0x88:
>   repair_env_string at 
> init/main.c:251
> [   54.257791]  kernel_init_freeable+0x338/0x3d3:
>   do_initcall_level at 
> init/main.c:950
>(inlined by) do_initcalls at 
> init/main.c:959
>(inlined by) do_basic_setup at 
> 

Re: [cfs_trace_lock_tcd] BUG: unable to handle kernel NULL pointer dereference at 00000050

2018-04-18 Thread James Simmons

> Hello,
> 
> FYI this happens in mainline kernel 4.17.0-rc1.
> It looks like a new regression.
> 
> [7.587002]  lnet_selftest_init+0x2c4/0x5d9:
>   lnet_selftest_init at 
> drivers/staging/lustre/lnet/selftest/module.c:134
> [7.587002]  ? lnet_selftest_exit+0x8d/0x8d:
>   lnet_selftest_init at 
> drivers/staging/lustre/lnet/selftest/module.c:90

Are you running lnet selftest ? Is this a UMP setup? The reason I ask is
that their is a SMP handling bug in lnet selftest. If you look at the
mailing list I pushed a SMP patch series. Can you try that series and
tell me if it works for you. Thanks
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 14/22] staging: lustre: llite: record in stats attempted removal of lma/link xattr

2018-04-15 Thread James Simmons
Keep track of attempted deletions as well as changing of the
lma/link xattrs.

Signed-off-by: James Simmons <uja.o...@yahoo.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-9183
Reviewed-on: https://review.whamcloud.com/27240
Reviewed-by: Dmitry Eremin <dmitry.ere...@intel.com>
Reviewed-by: Bob Glossman <bob.gloss...@intel.com>
Reviewed-by: Sebastien Buisson <sbuis...@ddn.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lustre/llite/xattr.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/lustre/lustre/llite/xattr.c 
b/drivers/staging/lustre/lustre/llite/xattr.c
index 4b1e565..3ab7ae0 100644
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ b/drivers/staging/lustre/lustre/llite/xattr.c
@@ -296,7 +296,10 @@ static int ll_xattr_set(const struct xattr_handler 
*handler,
return ll_setstripe_ea(dentry, (struct lov_user_md *)value,
   size);
} else if (!strcmp(name, "lma") || !strcmp(name, "link")) {
-   ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_SETXATTR, 1);
+   int op_type = flags == XATTR_REPLACE ? LPROC_LL_REMOVEXATTR :
+  LPROC_LL_SETXATTR;
+
+   ll_stats_ops_tally(ll_i2sbi(inode), op_type, 1);
return 0;
}
 
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 05/22] staging: lustre: llite: handle xattr cache refill race

2018-04-15 Thread James Simmons
From: "John L. Hammond" <john.hamm...@intel.com>

In ll_xattr_cache_refill() if the xattr cache was invalid (and no
request was sent) then return -EAGAIN so that ll_getxattr_common()
caller will fetch the xattr from the MDT.

Signed-off-by: John L. Hammond <john.hamm...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-10132
Reviewed-on: https://review.whamcloud.com/29654
Reviewed-by: Andreas Dilger <andreas.dil...@intel.com>
Reviewed-by: Lai Siyao <lai.si...@intel.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lustre/llite/xattr_cache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/lustre/lustre/llite/xattr_cache.c 
b/drivers/staging/lustre/lustre/llite/xattr_cache.c
index 53dfaea..5da69ba0 100644
--- a/drivers/staging/lustre/lustre/llite/xattr_cache.c
+++ b/drivers/staging/lustre/lustre/llite/xattr_cache.c
@@ -357,7 +357,7 @@ static int ll_xattr_cache_refill(struct inode *inode)
if (unlikely(!req)) {
CDEBUG(D_CACHE, "cancelled by a parallel getxattr\n");
ll_intent_drop_lock();
-   rc = -EIO;
+   rc = -EAGAIN;
goto err_unlock;
}
 
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 08/22] staging: lustre: llite: add simple comment about lustre.lov xattrs

2018-04-15 Thread James Simmons
From: Niu Yawei <yawei@intel.com>

Simple comment added to ll_xattr_set.

Signed-off-by: Bobi Jam <bobijam...@intel.com>
Signed-off-by: Niu Yawei <yawei@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8998
Reviewed-on: https://review.whamcloud.com/24851
Reviewed-by: Andreas Dilger <andreas.dil...@intel.com>
Reviewed-by: Lai Siyao <lai.si...@intel.com>
Reviewed-by: Jinshan Xiong <jinshan.xi...@gmail.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lustre/llite/xattr.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/staging/lustre/lustre/llite/xattr.c 
b/drivers/staging/lustre/lustre/llite/xattr.c
index 1b462e4..c1600b9 100644
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ b/drivers/staging/lustre/lustre/llite/xattr.c
@@ -264,6 +264,7 @@ static int ll_xattr_set(const struct xattr_handler *handler,
CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), xattr %s\n",
   PFID(ll_inode2fid(inode)), inode, name);
 
+   /* lustre/trusted.lov.xxx would be passed through xattr API */
if (!strcmp(name, "lov")) {
int op_type = flags == XATTR_REPLACE ? LPROC_LL_REMOVEXATTR :
   LPROC_LL_SETXATTR;
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 22/22] staging: lustre: llite: remove unused parameters from md_{get, set}xattr()

2018-04-15 Thread James Simmons
From: "John L. Hammond" <john.hamm...@intel.com>

md_getxattr() and md_setxattr() each have several unused
parameters. Remove them and improve the naming or remaining
parameters.

Signed-off-by: John L. Hammond <john.hamm...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-10792
Reviewed-on: https://review.whamcloud.com/
Reviewed-by: Dmitry Eremin <dmitry.ere...@intel.com>
Reviewed-by: James Simmons <uja.o...@yahoo.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lustre/include/obd.h   |  7 ++---
 drivers/staging/lustre/lustre/include/obd_class.h | 21 ++
 drivers/staging/lustre/lustre/llite/file.c|  5 ++--
 drivers/staging/lustre/lustre/llite/xattr.c   |  6 ++--
 drivers/staging/lustre/lustre/lmv/lmv_obd.c   | 22 +++
 drivers/staging/lustre/lustre/mdc/mdc_request.c   | 34 +--
 6 files changed, 46 insertions(+), 49 deletions(-)

diff --git a/drivers/staging/lustre/lustre/include/obd.h 
b/drivers/staging/lustre/lustre/include/obd.h
index 48cf7ab..0f9e5dc 100644
--- a/drivers/staging/lustre/lustre/include/obd.h
+++ b/drivers/staging/lustre/lustre/include/obd.h
@@ -935,12 +935,11 @@ struct md_ops {
  struct ptlrpc_request **);
 
int (*setxattr)(struct obd_export *, const struct lu_fid *,
-   u64, const char *, const char *, int, int, int, __u32,
-   struct ptlrpc_request **);
+   u64, const char *, const void *, size_t, unsigned int,
+   u32, struct ptlrpc_request **);
 
int (*getxattr)(struct obd_export *, const struct lu_fid *,
-   u64, const char *, const char *, int, int, int,
-   struct ptlrpc_request **);
+   u64, const char *, size_t, struct ptlrpc_request **);
 
int (*init_ea_size)(struct obd_export *, u32, u32);
 
diff --git a/drivers/staging/lustre/lustre/include/obd_class.h 
b/drivers/staging/lustre/lustre/include/obd_class.h
index a76f016..0081578 100644
--- a/drivers/staging/lustre/lustre/include/obd_class.h
+++ b/drivers/staging/lustre/lustre/include/obd_class.h
@@ -1385,29 +1385,26 @@ static inline int md_merge_attr(struct obd_export *exp,
 }
 
 static inline int md_setxattr(struct obd_export *exp, const struct lu_fid *fid,
- u64 valid, const char *name,
- const char *input, int input_size,
- int output_size, int flags, __u32 suppgid,
+ u64 obd_md_valid, const char *name,
+ const char *value, size_t value_size,
+ unsigned int xattr_flags, u32 suppgid,
  struct ptlrpc_request **request)
 {
EXP_CHECK_MD_OP(exp, setxattr);
EXP_MD_COUNTER_INCREMENT(exp, setxattr);
-   return MDP(exp->exp_obd, setxattr)(exp, fid, valid, name, input,
-  input_size, output_size, flags,
+   return MDP(exp->exp_obd, setxattr)(exp, fid, obd_md_valid, name,
+  value, value_size, xattr_flags,
   suppgid, request);
 }
 
 static inline int md_getxattr(struct obd_export *exp, const struct lu_fid *fid,
- u64 valid, const char *name,
- const char *input, int input_size,
- int output_size, int flags,
- struct ptlrpc_request **request)
+ u64 obd_md_valid, const char *name,
+ size_t buf_size, struct ptlrpc_request **req)
 {
EXP_CHECK_MD_OP(exp, getxattr);
EXP_MD_COUNTER_INCREMENT(exp, getxattr);
-   return MDP(exp->exp_obd, getxattr)(exp, fid, valid, name, input,
-  input_size, output_size, flags,
-  request);
+   return MDP(exp->exp_obd, getxattr)(exp, fid, obd_md_valid, name,
+  buf_size, req);
 }
 
 static inline int md_set_open_replay_data(struct obd_export *exp,
diff --git a/drivers/staging/lustre/lustre/llite/file.c 
b/drivers/staging/lustre/lustre/llite/file.c
index 35f5bda..9197891 100644
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ b/drivers/staging/lustre/lustre/llite/file.c
@@ -3093,7 +3093,7 @@ int ll_set_acl(struct inode *inode, struct posix_acl 
*acl, int type)
 
rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode),
 value ? OBD_MD_FLXATTR : OBD_MD_FLXATTRRM,
-name, value, value_size, 0, 0, 0, );
+name, value, value_size, 0, 0, );
 
ptlrpc_req_finished(req);
 out_value:
@@ -3405,8 +3405,7 @@ static int ll_layout_fetch(s

[PATCH 17/22] staging: lustre: llite: cleanup xattr code comments

2018-04-15 Thread James Simmons
Add proper punctuation to the comments. Change buf_size to size
for comment in ll_listxattr() since buf_size doesn't exit which
will confuse someone reading the code.

Signed-off-by: James Simmons <uja.o...@yahoo.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-9183
Reviewed-on: https://review.whamcloud.com/27240
Reviewed-by: Dmitry Eremin <dmitry.ere...@intel.com>
Reviewed-by: Bob Glossman <bob.gloss...@intel.com>
Reviewed-by: Sebastien Buisson <sbuis...@ddn.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lustre/llite/xattr.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/lustre/lustre/llite/xattr.c 
b/drivers/staging/lustre/lustre/llite/xattr.c
index d6cee3b..835d00f 100644
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ b/drivers/staging/lustre/lustre/llite/xattr.c
@@ -564,7 +564,7 @@ ssize_t ll_listxattr(struct dentry *dentry, char *buffer, 
size_t size)
return rc;
/*
 * If we're being called to get the size of the xattr list
-* (buf_size == 0) then just assume that a lustre.lov xattr
+* (size == 0) then just assume that a lustre.lov xattr
 * exists.
 */
if (!size)
@@ -577,14 +577,14 @@ ssize_t ll_listxattr(struct dentry *dentry, char *buffer, 
size_t size)
len = strnlen(xattr_name, rem - 1) + 1;
rem -= len;
if (!xattr_type_filter(sbi, get_xattr_type(xattr_name))) {
-   /* Skip OK xattr type leave it in buffer */
+   /* Skip OK xattr type, leave it in buffer. */
xattr_name += len;
continue;
}
 
/*
 * Move up remaining xattrs in buffer
-* removing the xattr that is not OK
+* removing the xattr that is not OK.
 */
memmove(xattr_name, xattr_name + len, rem);
rc -= len;
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 07/22] staging: lustre: llite: refactor lustre.lov xattr handling

2018-04-15 Thread James Simmons
From: Niu Yawei <yawei@intel.com>

The function ll_xattr_set() contains special code to handle
the lustre specific xattr lustre.lov. Move all this code to
a new function ll_setstripe_ea().

Signed-off-by: Bobi Jam <bobijam...@intel.com>
Signed-off-by: Niu Yawei <yawei@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8998
Reviewed-on: https://review.whamcloud.com/24851
Reviewed-by: Andreas Dilger <andreas.dil...@intel.com>
Reviewed-by: Lai Siyao <lai.si...@intel.com>
Reviewed-by: Jinshan Xiong <jinshan.xi...@gmail.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lustre/llite/xattr.c | 131 +++-
 1 file changed, 69 insertions(+), 62 deletions(-)

diff --git a/drivers/staging/lustre/lustre/llite/xattr.c 
b/drivers/staging/lustre/lustre/llite/xattr.c
index 55a19a5..1b462e4 100644
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ b/drivers/staging/lustre/lustre/llite/xattr.c
@@ -186,6 +186,73 @@ static int get_hsm_state(struct inode *inode, u32 
*hus_states)
return rc;
 }
 
+static int ll_setstripe_ea(struct dentry *dentry, struct lov_user_md *lump,
+  size_t size)
+{
+   struct inode *inode = d_inode(dentry);
+   int rc = 0;
+
+   if (size != 0 && size < sizeof(struct lov_user_md))
+   return -EINVAL;
+
+   /*
+* It is possible to set an xattr to a "" value of zero size.
+* For this case we are going to treat it as a removal.
+*/
+   if (!size && lump)
+   lump = NULL;
+
+   /* Attributes that are saved via getxattr will always have
+* the stripe_offset as 0.  Instead, the MDS should be
+* allowed to pick the starting OST index.   b=17846
+*/
+   if (lump && lump->lmm_stripe_offset == 0)
+   lump->lmm_stripe_offset = -1;
+
+   /* Avoid anyone directly setting the RELEASED flag. */
+   if (lump && (lump->lmm_pattern & LOV_PATTERN_F_RELEASED)) {
+   /* Only if we have a released flag check if the file
+* was indeed archived.
+*/
+   u32 state = HS_NONE;
+
+   rc = get_hsm_state(inode, );
+   if (rc)
+   return rc;
+
+   if (!(state & HS_ARCHIVED)) {
+   CDEBUG(D_VFSTRACE,
+  "hus_states state = %x, pattern = %x\n",
+   state, lump->lmm_pattern);
+   /*
+* Here the state is: real file is not
+* archived but user is requesting to set
+* the RELEASED flag so we mask off the
+* released flag from the request
+*/
+   lump->lmm_pattern ^= LOV_PATTERN_F_RELEASED;
+   }
+   }
+
+   if (lump && S_ISREG(inode->i_mode)) {
+   __u64 it_flags = FMODE_WRITE;
+   int lum_size;
+
+   lum_size = ll_lov_user_md_size(lump);
+   if (lum_size < 0 || size < lum_size)
+   return 0; /* b=10667: ignore error */
+
+   rc = ll_lov_setstripe_ea_info(inode, dentry, it_flags, lump,
+ lum_size);
+   /* b=10667: rc always be 0 here for now */
+   rc = 0;
+   } else if (S_ISDIR(inode->i_mode)) {
+   rc = ll_dir_setstripe(inode, lump, 0);
+   }
+
+   return rc;
+}
+
 static int ll_xattr_set(const struct xattr_handler *handler,
struct dentry *dentry, struct inode *inode,
const char *name, const void *value, size_t size,
@@ -198,73 +265,13 @@ static int ll_xattr_set(const struct xattr_handler 
*handler,
   PFID(ll_inode2fid(inode)), inode, name);
 
if (!strcmp(name, "lov")) {
-   struct lov_user_md *lump = (struct lov_user_md *)value;
int op_type = flags == XATTR_REPLACE ? LPROC_LL_REMOVEXATTR :
   LPROC_LL_SETXATTR;
-   int rc = 0;
 
ll_stats_ops_tally(ll_i2sbi(inode), op_type, 1);
 
-   if (size != 0 && size < sizeof(struct lov_user_md))
-   return -EINVAL;
-
-   /*
-* It is possible to set an xattr to a "" value of zero size.
-* For this case we are going to treat it as a removal.
-*/
-   if (!size && lump)
-   lump = NULL;
-
-   /* Attributes that are saved via getxattr will always have
-* the stripe_offset as 0.  Instead, the MDS should be
-* a

[PATCH 03/22] staging: lustre: obd: change debug reporting in lmv_enqueue()

2018-04-15 Thread James Simmons
From: Vitaly Fertman <vitaly.fert...@seagate.com>

Remove LL_IT2STR(it) from debug macros in lmv_enqueue(). The
removal makes it possible to simplify the md_enqueue() functions.

Signed-off-by: Vitaly Fertman <vitaly.fert...@seagate.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-7433
Seagate-bug-id: MRP-3072 MRP-3137
Reviewed-on: http://review.whamcloud.com/17220
Reviewed-by: Andrew Perepechko <andrew.perepec...@seagate.com>
Reviewed-by: Andriy Skulysh <andriy.skul...@seagate.com>
Tested-by: Elena V. Gryaznova <elena.gryazn...@seagate.com>
Reviewed-by: John L. Hammond <john.hamm...@intel.com>
Reviewed-by: Lai Siyao <lai.si...@intel.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lustre/lmv/lmv_obd.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/lustre/lustre/lmv/lmv_obd.c 
b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
index 7be9310..e1c93cd 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_obd.c
+++ b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
@@ -1660,15 +1660,14 @@ static int lmv_create(struct obd_export *exp, struct 
md_op_data *op_data,
struct lmv_obd *lmv = >u.lmv;
struct lmv_tgt_desc  *tgt;
 
-   CDEBUG(D_INODE, "ENQUEUE '%s' on " DFID "\n",
-  LL_IT2STR(it), PFID(_data->op_fid1));
+   CDEBUG(D_INODE, "ENQUEUE on " DFID "\n", PFID(_data->op_fid1));
 
tgt = lmv_locate_mds(lmv, op_data, _data->op_fid1);
if (IS_ERR(tgt))
return PTR_ERR(tgt);
 
-   CDEBUG(D_INODE, "ENQUEUE '%s' on " DFID " -> mds #%u\n",
-  LL_IT2STR(it), PFID(_data->op_fid1), tgt->ltd_idx);
+   CDEBUG(D_INODE, "ENQUEUE on " DFID " -> mds #%u\n",
+  PFID(_data->op_fid1), tgt->ltd_idx);
 
return md_enqueue(tgt->ltd_exp, einfo, policy, it, op_data, lockh,
extra_lock_flags);
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 20/22] staging: lustre: llite: use xattr_handler name for ACLs

2018-04-15 Thread James Simmons
From: "John L. Hammond" <john.hamm...@intel.com>

If struct xattr_handler has a name member then use it (rather than
prefix) for the ACL xattrs. This avoids a bug where ACL operations
failed for some kernels.

Signed-off-by: John L. Hammond <john.hamm...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-10785
Reviewed-on: https://review.whamcloud.com/
Reviewed-by: Dmitry Eremin <dmitry.ere...@intel.com>
Reviewed-by: James Simmons <uja.o...@yahoo.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lustre/llite/xattr.c | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/staging/lustre/lustre/llite/xattr.c 
b/drivers/staging/lustre/lustre/llite/xattr.c
index d08bf1e..e835c8e 100644
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ b/drivers/staging/lustre/lustre/llite/xattr.c
@@ -46,15 +46,16 @@
 
 const struct xattr_handler *get_xattr_type(const char *name)
 {
-   int i = 0;
+   int i;
 
-   while (ll_xattr_handlers[i]) {
-   size_t len = strlen(ll_xattr_handlers[i]->prefix);
+   for (i = 0; ll_xattr_handlers[i]; i++) {
+   const char *prefix = xattr_prefix(ll_xattr_handlers[i]);
+   size_t prefix_len = strlen(prefix);
 
-   if (!strncmp(ll_xattr_handlers[i]->prefix, name, len))
+   if (!strncmp(prefix, name, prefix_len))
return ll_xattr_handlers[i];
-   i++;
}
+
return NULL;
 }
 
@@ -627,14 +628,14 @@ ssize_t ll_listxattr(struct dentry *dentry, char *buffer, 
size_t size)
 };
 
 static const struct xattr_handler ll_acl_access_xattr_handler = {
-   .prefix = XATTR_NAME_POSIX_ACL_ACCESS,
+   .name = XATTR_NAME_POSIX_ACL_ACCESS,
.flags = XATTR_ACL_ACCESS_T,
.get = ll_xattr_get_common,
.set = ll_xattr_set_common,
 };
 
 static const struct xattr_handler ll_acl_default_xattr_handler = {
-   .prefix = XATTR_NAME_POSIX_ACL_DEFAULT,
+   .name = XATTR_NAME_POSIX_ACL_DEFAULT,
.flags = XATTR_ACL_DEFAULT_T,
.get = ll_xattr_get_common,
.set = ll_xattr_set_common,
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 16/22] staging: lustre: llite: use proper types in the xattr code

2018-04-15 Thread James Simmons
Convert __uXX types to uXX types since this is kernel code.
The function ll_lov_user_md_size() returns ssize_t so change
lum_size from int to ssize_t.

Signed-off-by: James Simmons <uja.o...@yahoo.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-9183
Reviewed-on: https://review.whamcloud.com/27240
Reviewed-by: Dmitry Eremin <dmitry.ere...@intel.com>
Reviewed-by: Bob Glossman <bob.gloss...@intel.com>
Reviewed-by: Sebastien Buisson <sbuis...@ddn.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lustre/llite/xattr.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/lustre/lustre/llite/xattr.c 
b/drivers/staging/lustre/lustre/llite/xattr.c
index 147ffcc..d6cee3b 100644
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ b/drivers/staging/lustre/lustre/llite/xattr.c
@@ -91,7 +91,7 @@ static int xattr_type_filter(struct ll_sb_info *sbi,
struct ptlrpc_request *req = NULL;
const char *pv = value;
char *fullname;
-   __u64 valid;
+   u64 valid;
int rc;
 
if (flags == XATTR_REPLACE) {
@@ -246,8 +246,8 @@ static int ll_setstripe_ea(struct dentry *dentry, struct 
lov_user_md *lump,
return rc;
 
if (lump && S_ISREG(inode->i_mode)) {
-   __u64 it_flags = FMODE_WRITE;
-   int lum_size;
+   u64 it_flags = FMODE_WRITE;
+   ssize_t lum_size;
 
lum_size = ll_lov_user_md_size(lump);
if (lum_size < 0 || size < lum_size)
@@ -309,7 +309,7 @@ static int ll_xattr_set(const struct xattr_handler *handler,
 
 int
 ll_xattr_list(struct inode *inode, const char *name, int type, void *buffer,
- size_t size, __u64 valid)
+ size_t size, u64 valid)
 {
struct ll_inode_info *lli = ll_i2info(inode);
struct ll_sb_info *sbi = ll_i2sbi(inode);
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 15/22] staging: lustre: llite: cleanup posix acl xattr code

2018-04-15 Thread James Simmons
Having an extra ifdef makes the code harder to read. For the case
of ll_xattr_get_common() we have a variable initialized at the
start of the function but it is only used in XATTR_ACL_ACCESS_T
code block. Lets move that variable to that location since its
only used there and make the code look cleaner.

Signed-off-by: James Simmons <uja.o...@yahoo.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-9183
Reviewed-on: https://review.whamcloud.com/27240
Reviewed-by: Dmitry Eremin <dmitry.ere...@intel.com>
Reviewed-by: Bob Glossman <bob.gloss...@intel.com>
Reviewed-by: Sebastien Buisson <sbuis...@ddn.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lustre/llite/xattr.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/staging/lustre/lustre/llite/xattr.c 
b/drivers/staging/lustre/lustre/llite/xattr.c
index 3ab7ae0..147ffcc 100644
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ b/drivers/staging/lustre/lustre/llite/xattr.c
@@ -396,9 +396,6 @@ static int ll_xattr_get_common(const struct xattr_handler 
*handler,
   const char *name, void *buffer, size_t size)
 {
struct ll_sb_info *sbi = ll_i2sbi(inode);
-#ifdef CONFIG_FS_POSIX_ACL
-   struct ll_inode_info *lli = ll_i2info(inode);
-#endif
char *fullname;
int rc;
 
@@ -422,6 +419,7 @@ static int ll_xattr_get_common(const struct xattr_handler 
*handler,
 * chance that cached ACL is uptodate.
 */
if (handler->flags == XATTR_ACL_ACCESS_T) {
+   struct ll_inode_info *lli = ll_i2info(inode);
struct posix_acl *acl;
 
spin_lock(>lli_lock);
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 09/22] staging: lustre: llite: break up ll_setstripe_ea function

2018-04-15 Thread James Simmons
From: Bobi Jam <bobijam...@intel.com>

Place all the handling of information of trusted.lov that
is not stripe related into the new function ll_adjust_lum().
Now ll_setstripe_ea() only handles striping information.

Signed-off-by: Bobi Jam <bobijam...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-9484
Reviewed-on: https://review.whamcloud.com/27126
Reviewed-by: Dmitry Eremin <dmitry.ere...@intel.com>
Reviewed-by: Niu Yawei <yawei@intel.com>
Reviewed-by: James Simmons <uja.o...@yahoo.com>
Reviewed-by: Andreas Dilger <andreas.dil...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lustre/llite/xattr.c | 37 +++--
 1 file changed, 24 insertions(+), 13 deletions(-)

diff --git a/drivers/staging/lustre/lustre/llite/xattr.c 
b/drivers/staging/lustre/lustre/llite/xattr.c
index c1600b9..78ce85b 100644
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ b/drivers/staging/lustre/lustre/llite/xattr.c
@@ -186,22 +186,10 @@ static int get_hsm_state(struct inode *inode, u32 
*hus_states)
return rc;
 }
 
-static int ll_setstripe_ea(struct dentry *dentry, struct lov_user_md *lump,
-  size_t size)
+static int ll_adjust_lum(struct inode *inode, struct lov_user_md *lump)
 {
-   struct inode *inode = d_inode(dentry);
int rc = 0;
 
-   if (size != 0 && size < sizeof(struct lov_user_md))
-   return -EINVAL;
-
-   /*
-* It is possible to set an xattr to a "" value of zero size.
-* For this case we are going to treat it as a removal.
-*/
-   if (!size && lump)
-   lump = NULL;
-
/* Attributes that are saved via getxattr will always have
 * the stripe_offset as 0.  Instead, the MDS should be
 * allowed to pick the starting OST index.   b=17846
@@ -234,6 +222,29 @@ static int ll_setstripe_ea(struct dentry *dentry, struct 
lov_user_md *lump,
}
}
 
+   return rc;
+}
+
+static int ll_setstripe_ea(struct dentry *dentry, struct lov_user_md *lump,
+  size_t size)
+{
+   struct inode *inode = d_inode(dentry);
+   int rc = 0;
+
+   if (size != 0 && size < sizeof(struct lov_user_md))
+   return -EINVAL;
+
+   /*
+* It is possible to set an xattr to a "" value of zero size.
+* For this case we are going to treat it as a removal.
+*/
+   if (!size && lump)
+   lump = NULL;
+
+   rc = ll_adjust_lum(inode, lump);
+   if (rc)
+   return rc;
+
if (lump && S_ISREG(inode->i_mode)) {
__u64 it_flags = FMODE_WRITE;
int lum_size;
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 06/22] staging: lustre: llite: Remove filtering of seclabel xattr

2018-04-15 Thread James Simmons
From: Robin Humble <plaguedbypengu...@gmail.com>

The security.capability xattr is used to implement File
Capabilities in recent Linux versions. Capabilities are a
fine grained approach to granting executables elevated
privileges. eg. /bin/ping can have capabilities
cap_net_admin, cap_net_raw+ep instead of being setuid root.

This xattr has long been filtered out by llite, initially for
stability reasons (b15587), and later over performance
concerns as this xattr is read for every file with eg.
'ls --color'. Since LU-2869 xattr's are cached on clients,
alleviating most performance concerns.

Removing llite's filtering of the security.capability xattr
enables using Lustre as a root filesystem, which is used on
some large clusters.

Signed-off-by: Robin Humble <plaguedbypengu...@gmail.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-9562
Reviewed-on: https://review.whamcloud.com/27292
Reviewed-by: John L. Hammond <john.hamm...@intel.com>
Reviewed-by: Sebastien Buisson <sbuis...@ddn.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lustre/llite/xattr.c | 9 -
 1 file changed, 9 deletions(-)

diff --git a/drivers/staging/lustre/lustre/llite/xattr.c 
b/drivers/staging/lustre/lustre/llite/xattr.c
index 2d78432..55a19a5 100644
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ b/drivers/staging/lustre/lustre/llite/xattr.c
@@ -117,11 +117,6 @@ static int xattr_type_filter(struct ll_sb_info *sbi,
 (handler->flags == XATTR_LUSTRE_T && !strcmp(name, "lov"
return 0;
 
-   /* b15587: ignore security.capability xattr for now */
-   if ((handler->flags == XATTR_SECURITY_T &&
-!strcmp(name, "capability")))
-   return 0;
-
/* LU-549:  Disable security.selinux when selinux is disabled */
if (handler->flags == XATTR_SECURITY_T && !selinux_is_enabled() &&
strcmp(name, "selinux") == 0)
@@ -383,10 +378,6 @@ static int ll_xattr_get_common(const struct xattr_handler 
*handler,
if (rc)
return rc;
 
-   /* b15587: ignore security.capability xattr for now */
-   if ((handler->flags == XATTR_SECURITY_T && !strcmp(name, "capability")))
-   return -ENODATA;
-
/* LU-549:  Disable security.selinux when selinux is disabled */
if (handler->flags == XATTR_SECURITY_T && !selinux_is_enabled() &&
!strcmp(name, "selinux"))
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 04/22] staging: lustre: ldlm: xattr locks are lost on mdt

2018-04-15 Thread James Simmons
From: Vitaly Fertman <vitaly.fert...@seagate.com>

On the server side mdt_intent_getxattr() can return EFAULT if a
buffer cannot be found, it is returned after lock_replace, where a
new lock is installed into lockp. An error forces ldlm_lock_enqueue()
to destroy the original lock, but ldlm_handle_enqueue0() drops the
reference on the new lock. The xattr client code implied intent
error is returned under a lock, which is immediately cancelled.
Check if a lock obtained and cancel it properly for error cases.
Note: we should support both cases for interop needs, an intent
error under a lock and with a lock abort. Keep returning a lock
with an intent error for interop purposes for now, to be dropped
later when client will get old enough. make all intent ops to
work through md_intent_lock: getxattr and layout, which should
extract the intent error.

Signed-off-by: Vitaly Fertman <vitaly.fert...@seagate.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-7433
Seagate-bug-id: MRP-3072 MRP-3137
Reviewed-on: http://review.whamcloud.com/17220
Reviewed-by: Andrew Perepechko <andrew.perepec...@seagate.com>
Reviewed-by: Andriy Skulysh <andriy.skul...@seagate.com>
Tested-by: Elena V. Gryaznova <elena.gryazn...@seagate.com>
Reviewed-by: John L. Hammond <john.hamm...@intel.com>
Reviewed-by: Lai Siyao <lai.si...@intel.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lustre/include/obd.h   |  3 +-
 drivers/staging/lustre/lustre/include/obd_class.h |  3 +-
 drivers/staging/lustre/lustre/llite/file.c| 16 ++---
 drivers/staging/lustre/lustre/llite/xattr_cache.c | 75 ---
 drivers/staging/lustre/lustre/lmv/lmv_intent.c| 12 ++--
 drivers/staging/lustre/lustre/lmv/lmv_obd.c   |  7 +--
 drivers/staging/lustre/lustre/mdc/mdc_internal.h  |  4 +-
 drivers/staging/lustre/lustre/mdc/mdc_locks.c | 66 ++--
 8 files changed, 95 insertions(+), 91 deletions(-)

diff --git a/drivers/staging/lustre/lustre/include/obd.h 
b/drivers/staging/lustre/lustre/include/obd.h
index ea6056b..48cf7ab 100644
--- a/drivers/staging/lustre/lustre/include/obd.h
+++ b/drivers/staging/lustre/lustre/include/obd.h
@@ -909,8 +909,7 @@ struct md_ops {
  const void *, size_t, umode_t, uid_t, gid_t,
  cfs_cap_t, __u64, struct ptlrpc_request **);
int (*enqueue)(struct obd_export *, struct ldlm_enqueue_info *,
-  const union ldlm_policy_data *,
-  struct lookup_intent *, struct md_op_data *,
+  const union ldlm_policy_data *, struct md_op_data *,
   struct lustre_handle *, __u64);
int (*getattr)(struct obd_export *, struct md_op_data *,
   struct ptlrpc_request **);
diff --git a/drivers/staging/lustre/lustre/include/obd_class.h 
b/drivers/staging/lustre/lustre/include/obd_class.h
index 176b63e..a76f016 100644
--- a/drivers/staging/lustre/lustre/include/obd_class.h
+++ b/drivers/staging/lustre/lustre/include/obd_class.h
@@ -1241,7 +1241,6 @@ static inline int md_create(struct obd_export *exp, 
struct md_op_data *op_data,
 static inline int md_enqueue(struct obd_export *exp,
 struct ldlm_enqueue_info *einfo,
 const union ldlm_policy_data *policy,
-struct lookup_intent *it,
 struct md_op_data *op_data,
 struct lustre_handle *lockh,
 __u64 extra_lock_flags)
@@ -1250,7 +1249,7 @@ static inline int md_enqueue(struct obd_export *exp,
 
EXP_CHECK_MD_OP(exp, enqueue);
EXP_MD_COUNTER_INCREMENT(exp, enqueue);
-   rc = MDP(exp->exp_obd, enqueue)(exp, einfo, policy, it, op_data, lockh,
+   rc = MDP(exp->exp_obd, enqueue)(exp, einfo, policy, op_data, lockh,
extra_lock_flags);
return rc;
 }
diff --git a/drivers/staging/lustre/lustre/llite/file.c 
b/drivers/staging/lustre/lustre/llite/file.c
index ca5faea..0026fde 100644
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ b/drivers/staging/lustre/lustre/llite/file.c
@@ -2514,7 +2514,7 @@ int ll_fsync(struct file *file, loff_t start, loff_t end, 
int datasync)
   PFID(ll_inode2fid(inode)), flock.l_flock.pid, flags,
   einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
 
-   rc = md_enqueue(sbi->ll_md_exp, , , NULL, op_data, ,
+   rc = md_enqueue(sbi->ll_md_exp, , , op_data, ,
flags);
 
/* Restore the file lock type if not TEST lock. */
@@ -2527,7 +2527,7 @@ int ll_fsync(struct file *file, loff_t start, loff_t end, 
int datasync)
 
if (rc2 && file_lock->fl_type != F_UNLCK) {
einfo.ei_mode = LCK_NL;
-   md_enqueue

[PATCH 13/22] staging: lustre: llite: remove newline in fullname strings

2018-04-15 Thread James Simmons
In creating the full name of a xattr a new line was added that
was seen by the remote MDS server which confused it. Remove the
newline.

Signed-off-by: James Simmons <uja.o...@yahoo.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-9183
Reviewed-on: https://review.whamcloud.com/27240
Reviewed-by: Dmitry Eremin <dmitry.ere...@intel.com>
Reviewed-by: Bob Glossman <bob.gloss...@intel.com>
Reviewed-by: Sebastien Buisson <sbuis...@ddn.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lustre/llite/xattr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/lustre/lustre/llite/xattr.c 
b/drivers/staging/lustre/lustre/llite/xattr.c
index 42a6fb4..4b1e565 100644
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ b/drivers/staging/lustre/lustre/llite/xattr.c
@@ -136,7 +136,7 @@ static int xattr_type_filter(struct ll_sb_info *sbi,
return -EPERM;
}
 
-   fullname = kasprintf(GFP_KERNEL, "%s%s\n", handler->prefix, name);
+   fullname = kasprintf(GFP_KERNEL, "%s%s", handler->prefix, name);
if (!fullname)
return -ENOMEM;
rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode),
@@ -435,7 +435,7 @@ static int ll_xattr_get_common(const struct xattr_handler 
*handler,
if (handler->flags == XATTR_ACL_DEFAULT_T && !S_ISDIR(inode->i_mode))
return -ENODATA;
 #endif
-   fullname = kasprintf(GFP_KERNEL, "%s%s\n", handler->prefix, name);
+   fullname = kasprintf(GFP_KERNEL, "%s%s", handler->prefix, name);
if (!fullname)
return -ENOMEM;
rc = ll_xattr_list(inode, fullname, handler->flags, buffer, size,
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 00/22] staging: lustre: llite: fix xattr handling

2018-04-15 Thread James Simmons
From: James Simmons <uja.o...@yahoo.com>

Lustre utilities and user land APIs heavly depend on special xattr
handling. Sadly much of the xattr handling for lustre client has
been broken for awhile. This is all the fixes needed to make xattr
handling work properly with the latest kernels.

Bobi Jam (3):
  staging: lustre: llite: break up ll_setstripe_ea function
  staging: lustre: llite: return from ll_adjust_lum() if lump is NULL
  staging: lustre: llite: eat -EEXIST on setting trusted.lov

Dmitry Eremin (1):
  staging: lustre: llite: add support set_acl method in inode operations

James Simmons (9):
  staging: lustre: llite: initialize xattr->xe_namelen
  staging: lustre: llite: fix invalid size test in ll_setstripe_ea()
  staging: lustre: llite: remove newline in fullname strings
  staging: lustre: llite: record in stats attempted removal of lma/link xattr
  staging: lustre: llite: cleanup posix acl xattr code
  staging: lustre: llite: use proper types in the xattr code
  staging: lustre: llite: cleanup xattr code comments
  staging: lustre: llite: style changes in xattr.c
  staging: lustre: llite: correct removexattr detection

John L. Hammond (3):
  staging: lustre: llite: handle xattr cache refill race
  staging: lustre: llite: use xattr_handler name for ACLs
  staging: lustre: llite: remove unused parameters from md_{get,set}xattr()

Niu Yawei (2):
  staging: lustre: llite: refactor lustre.lov xattr handling
  staging: lustre: llite: add simple comment about lustre.lov xattrs

Robin Humble (1):
  staging: lustre: llite: Remove filtering of seclabel xattr

Vitaly Fertman (3):
  staging: lustre: obd: create it_has_reply_body()
  staging: lustre: obd: change debug reporting in lmv_enqueue()
  staging: lustre: ldlm: xattr locks are lost on mdt

 drivers/staging/lustre/lustre/include/obd.h|  20 +-
 drivers/staging/lustre/lustre/include/obd_class.h  |  24 +--
 drivers/staging/lustre/lustre/llite/file.c |  86 ++--
 .../staging/lustre/lustre/llite/llite_internal.h   |   4 +
 drivers/staging/lustre/lustre/llite/namei.c|  10 +-
 drivers/staging/lustre/lustre/llite/xattr.c| 231 -
 drivers/staging/lustre/lustre/llite/xattr_cache.c  |  83 +++-
 drivers/staging/lustre/lustre/lmv/lmv_intent.c |  12 +-
 drivers/staging/lustre/lustre/lmv/lmv_obd.c|  36 ++--
 drivers/staging/lustre/lustre/mdc/mdc_internal.h   |   4 +-
 drivers/staging/lustre/lustre/mdc/mdc_locks.c  |  68 --
 drivers/staging/lustre/lustre/mdc/mdc_request.c|  34 +--
 12 files changed, 364 insertions(+), 248 deletions(-)

-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 12/22] staging: lustre: llite: fix invalid size test in ll_setstripe_ea()

2018-04-15 Thread James Simmons
The size check at the start of ll_setstripe_ea() is only
valid for a directory. Move that check to the section of
code handling the S_ISDIR case.

Signed-off-by: James Simmons <uja.o...@yahoo.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-9183
Reviewed-on: https://review.whamcloud.com/27240
Reviewed-by: Dmitry Eremin <dmitry.ere...@intel.com>
Reviewed-by: Bob Glossman <bob.gloss...@intel.com>
Reviewed-by: Sebastien Buisson <sbuis...@ddn.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lustre/llite/xattr.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/lustre/lustre/llite/xattr.c 
b/drivers/staging/lustre/lustre/llite/xattr.c
index 69c5227..42a6fb4 100644
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ b/drivers/staging/lustre/lustre/llite/xattr.c
@@ -234,9 +234,6 @@ static int ll_setstripe_ea(struct dentry *dentry, struct 
lov_user_md *lump,
struct inode *inode = d_inode(dentry);
int rc = 0;
 
-   if (size != 0 && size < sizeof(struct lov_user_md))
-   return -EINVAL;
-
/*
 * It is possible to set an xattr to a "" value of zero size.
 * For this case we are going to treat it as a removal.
@@ -269,6 +266,9 @@ static int ll_setstripe_ea(struct dentry *dentry, struct 
lov_user_md *lump,
if (rc == -EEXIST)
rc = 0;
} else if (S_ISDIR(inode->i_mode)) {
+   if (size != 0 && size < sizeof(struct lov_user_md))
+   return -EINVAL;
+
rc = ll_dir_setstripe(inode, lump, 0);
}
 
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 02/22] staging: lustre: obd: create it_has_reply_body()

2018-04-15 Thread James Simmons
From: Vitaly Fertman <vitaly.fert...@seagate.com>

The lookup_intent it_op fields in many cases will be compared
to the settings of IT_OPEN | IT_UNLINK | IT_LOOKUP | IT_GETATTR.
Create a simple inline function for this common case.

Signed-off-by: Vitaly Fertman <vitaly.fert...@seagate.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-7433
Seagate-bug-id: MRP-3072 MRP-3137
Reviewed-on: http://review.whamcloud.com/17220
Reviewed-by: Andrew Perepechko <andrew.perepec...@seagate.com>
Reviewed-by: Andriy Skulysh <andriy.skul...@seagate.com>
Tested-by: Elena V. Gryaznova <elena.gryazn...@seagate.com>
Reviewed-by: John L. Hammond <john.hamm...@intel.com>
Reviewed-by: Lai Siyao <lai.si...@intel.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lustre/include/obd.h   | 10 ++
 drivers/staging/lustre/lustre/mdc/mdc_locks.c |  2 +-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/lustre/lustre/include/obd.h 
b/drivers/staging/lustre/lustre/include/obd.h
index f1233ca..ea6056b 100644
--- a/drivers/staging/lustre/lustre/include/obd.h
+++ b/drivers/staging/lustre/lustre/include/obd.h
@@ -686,6 +686,16 @@ enum md_cli_flags {
CLI_MIGRATE = BIT(4),
 };
 
+/**
+ * GETXATTR is not included as only a couple of fields in the reply body
+ * is filled, but not FID which is needed for common intent handling in
+ * mdc_finish_intent_lock()
+ */
+static inline bool it_has_reply_body(const struct lookup_intent *it)
+{
+   return it->it_op & (IT_OPEN | IT_UNLINK | IT_LOOKUP | IT_GETATTR);
+}
+
 struct md_op_data {
struct lu_fid  op_fid1; /* operation fid1 (usually parent) */
struct lu_fid  op_fid2; /* operation fid2 (usually child) */
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_locks.c 
b/drivers/staging/lustre/lustre/mdc/mdc_locks.c
index 695ef44..309ead1 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_locks.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_locks.c
@@ -568,7 +568,7 @@ static int mdc_finish_enqueue(struct obd_export *exp,
  it->it_op, it->it_disposition, it->it_status);
 
/* We know what to expect, so we do any byte flipping required here */
-   if (it->it_op & (IT_OPEN | IT_UNLINK | IT_LOOKUP | IT_GETATTR)) {
+   if (it_has_reply_body(it)) {
struct mdt_body *body;
 
body = req_capsule_server_get(pill, _MDT_BODY);
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 01/22] staging: lustre: llite: initialize xattr->xe_namelen

2018-04-15 Thread James Simmons
When the allocation of xattr->xe_name was moved to kstrdup()
setting xattr->xe_namelen was dropped. This field is used
in several parts of the xattr cache code so it broke xattr
handling. Initialize xattr->xe_namelen when allocating
xattr->xe_name succeeds. Also change the debugging statement
to really report the xattr name instead of its length which
wasn't event being set.

Fixes: b3dd8957c23a ("staging: lustre: lustre: llite: Use kstrdup"
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lustre/llite/xattr_cache.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/lustre/lustre/llite/xattr_cache.c 
b/drivers/staging/lustre/lustre/llite/xattr_cache.c
index 4dc799d..ef66949 100644
--- a/drivers/staging/lustre/lustre/llite/xattr_cache.c
+++ b/drivers/staging/lustre/lustre/llite/xattr_cache.c
@@ -121,10 +121,12 @@ static int ll_xattr_cache_add(struct list_head *cache,
 
xattr->xe_name = kstrdup(xattr_name, GFP_NOFS);
if (!xattr->xe_name) {
-   CDEBUG(D_CACHE, "failed to alloc xattr name %u\n",
-  xattr->xe_namelen);
+   CDEBUG(D_CACHE, "failed to alloc xattr name %s\n",
+  xattr_name);
goto err_name;
}
+   xattr->xe_namelen = strlen(xattr_name) + 1;
+
xattr->xe_value = kmemdup(xattr_val, xattr_val_len, GFP_NOFS);
if (!xattr->xe_value)
goto err_value;
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 19/22] staging: lustre: llite: add support set_acl method in inode operations

2018-04-15 Thread James Simmons
From: Dmitry Eremin <dmitry.ere...@intel.com>

Linux kernel v3.14 adds set_acl method to inode operations.
This patch adds support to Lustre for proper acl management.

Signed-off-by: Dmitry Eremin <dmitry.ere...@intel.com>
Signed-off-by: John L. Hammond <john.hamm...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-9183
Reviewed-on: https://review.whamcloud.com/25965
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-10541
Reviewed-on: https://review.whamcloud.com/
Reviewed-by: Bob Glossman <bob.gloss...@intel.com>
Reviewed-by: James Simmons <uja.o...@yahoo.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lustre/llite/file.c | 67 ++
 .../staging/lustre/lustre/llite/llite_internal.h   |  4 ++
 drivers/staging/lustre/lustre/llite/namei.c| 10 +++-
 3 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/lustre/lustre/llite/file.c 
b/drivers/staging/lustre/lustre/llite/file.c
index 0026fde..35f5bda 100644
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ b/drivers/staging/lustre/lustre/llite/file.c
@@ -3030,6 +3030,7 @@ static int ll_fiemap(struct inode *inode, struct 
fiemap_extent_info *fieinfo,
return rc;
 }
 
+#ifdef CONFIG_FS_POSIX_ACL
 struct posix_acl *ll_get_acl(struct inode *inode, int type)
 {
struct ll_inode_info *lli = ll_i2info(inode);
@@ -3043,6 +3044,69 @@ struct posix_acl *ll_get_acl(struct inode *inode, int 
type)
return acl;
 }
 
+int ll_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+{
+   struct ll_sb_info *sbi = ll_i2sbi(inode);
+   struct ptlrpc_request *req = NULL;
+   const char *name = NULL;
+   size_t value_size = 0;
+   char *value = NULL;
+   int rc;
+
+   switch (type) {
+   case ACL_TYPE_ACCESS:
+   name = XATTR_NAME_POSIX_ACL_ACCESS;
+   if (acl) {
+   rc = posix_acl_update_mode(inode, >i_mode, );
+   if (rc)
+   goto out;
+   }
+
+   break;
+
+   case ACL_TYPE_DEFAULT:
+   name = XATTR_NAME_POSIX_ACL_DEFAULT;
+   if (!S_ISDIR(inode->i_mode)) {
+   rc = acl ? -EACCES : 0;
+   goto out;
+   }
+
+   break;
+
+   default:
+   rc = -EINVAL;
+   goto out;
+   }
+
+   if (acl) {
+   value_size = posix_acl_xattr_size(acl->a_count);
+   value = kmalloc(value_size, GFP_NOFS);
+   if (!value) {
+   rc = -ENOMEM;
+   goto out;
+   }
+
+   rc = posix_acl_to_xattr(_user_ns, acl, value, value_size);
+   if (rc < 0)
+   goto out_value;
+   }
+
+   rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode),
+value ? OBD_MD_FLXATTR : OBD_MD_FLXATTRRM,
+name, value, value_size, 0, 0, 0, );
+
+   ptlrpc_req_finished(req);
+out_value:
+   kfree(value);
+out:
+   if (!rc)
+   set_cached_acl(inode, type, acl);
+   else
+   forget_cached_acl(inode, type);
+   return rc;
+}
+#endif /* CONFIG_FS_POSIX_ACL */
+
 int ll_inode_permission(struct inode *inode, int mask)
 {
struct ll_sb_info *sbi;
@@ -3164,7 +3228,10 @@ int ll_inode_permission(struct inode *inode, int mask)
.permission = ll_inode_permission,
.listxattr  = ll_listxattr,
.fiemap = ll_fiemap,
+#ifdef CONFIG_FS_POSIX_ACL
.get_acl= ll_get_acl,
+   .set_acl= ll_set_acl,
+#endif
 };
 
 /* dynamic ioctl number support routines */
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h 
b/drivers/staging/lustre/lustre/llite/llite_internal.h
index 6504850..2280327 100644
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ b/drivers/staging/lustre/lustre/llite/llite_internal.h
@@ -754,7 +754,11 @@ enum ldlm_mode ll_take_md_lock(struct inode *inode, __u64 
bits,
 int ll_md_real_close(struct inode *inode, fmode_t fmode);
 int ll_getattr(const struct path *path, struct kstat *stat,
   u32 request_mask, unsigned int flags);
+#ifdef CONFIG_FS_POSIX_ACL
 struct posix_acl *ll_get_acl(struct inode *inode, int type);
+int ll_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+#endif /* CONFIG_FS_POSIX_ACL */
+
 int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
   const char *name, int namelen);
 int ll_get_fid_by_name(struct inode *parent, const char *name,
diff --git a/drivers/staging/lustre/lustre/llite/namei.c 
b/drivers/staging/lustre/lustre/llite/namei.c
index 6c9ec46..d7c4c58 100644
--- a/drivers/staging/lustre/lustre/llite/namei.c
+++ b/drivers/staging/lust

[PATCH 11/22] staging: lustre: llite: eat -EEXIST on setting trusted.lov

2018-04-15 Thread James Simmons
From: Bobi Jam <bobijam...@intel.com>

Tools like rsync, tar, cp may copy and restore the xattrs on a file.
The client previously ignored the setting of trusted.lov/lustre.lov
if the layout had already been specified, to avoid causing these
tools to fail for no reason.

For PFL files we still need to silently eat -EEXIST on setting these
attributes to avoid problems.

Signed-off-by: Bobi Jam <bobijam...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-9484
Reviewed-on: https://review.whamcloud.com/27126
Reviewed-by: Dmitry Eremin <dmitry.ere...@intel.com>
Reviewed-by: Niu Yawei <yawei@intel.com>
Reviewed-by: James Simmons <uja.o...@yahoo.com>
Reviewed-by: Andreas Dilger <andreas.dil...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lustre/llite/xattr.c | 14 +++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/lustre/lustre/llite/xattr.c 
b/drivers/staging/lustre/lustre/llite/xattr.c
index 56ac07e..69c5227 100644
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ b/drivers/staging/lustre/lustre/llite/xattr.c
@@ -254,12 +254,20 @@ static int ll_setstripe_ea(struct dentry *dentry, struct 
lov_user_md *lump,
 
lum_size = ll_lov_user_md_size(lump);
if (lum_size < 0 || size < lum_size)
-   return 0; /* b=10667: ignore error */
+   return -ERANGE;
 
rc = ll_lov_setstripe_ea_info(inode, dentry, it_flags, lump,
  lum_size);
-   /* b=10667: rc always be 0 here for now */
-   rc = 0;
+   /**
+* b=10667: ignore -EEXIST.
+* Silently eat error on setting trusted.lov/lustre.lov
+* attribute for platforms that added the default option
+* to copy all attributes in 'cp' command. Both rsync and
+* tar --xattrs also will try to set LOVEA for existing
+* files.
+*/
+   if (rc == -EEXIST)
+   rc = 0;
} else if (S_ISDIR(inode->i_mode)) {
rc = ll_dir_setstripe(inode, lump, 0);
}
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 10/22] staging: lustre: llite: return from ll_adjust_lum() if lump is NULL

2018-04-15 Thread James Simmons
From: Bobi Jam <bobijam...@intel.com>

No need to check several times if lump is NULL. Just test once and
return 0 if NULL.

Signed-off-by: Bobi Jam <bobijam...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-9484
Reviewed-on: https://review.whamcloud.com/27126
Reviewed-by: Dmitry Eremin <dmitry.ere...@intel.com>
Reviewed-by: Niu Yawei <yawei@intel.com>
Reviewed-by: James Simmons <uja.o...@yahoo.com>
Reviewed-by: Andreas Dilger <andreas.dil...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lustre/llite/xattr.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/lustre/lustre/llite/xattr.c 
b/drivers/staging/lustre/lustre/llite/xattr.c
index 78ce85b..56ac07e 100644
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ b/drivers/staging/lustre/lustre/llite/xattr.c
@@ -190,15 +190,18 @@ static int ll_adjust_lum(struct inode *inode, struct 
lov_user_md *lump)
 {
int rc = 0;
 
+   if (!lump)
+   return 0;
+
/* Attributes that are saved via getxattr will always have
 * the stripe_offset as 0.  Instead, the MDS should be
 * allowed to pick the starting OST index.   b=17846
 */
-   if (lump && lump->lmm_stripe_offset == 0)
+   if (lump->lmm_stripe_offset == 0)
lump->lmm_stripe_offset = -1;
 
/* Avoid anyone directly setting the RELEASED flag. */
-   if (lump && (lump->lmm_pattern & LOV_PATTERN_F_RELEASED)) {
+   if (lump->lmm_pattern & LOV_PATTERN_F_RELEASED) {
/* Only if we have a released flag check if the file
 * was indeed archived.
 */
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 18/22] staging: lustre: llite: style changes in xattr.c

2018-04-15 Thread James Simmons
Small style changes to match more the kernel code standard
and it make it more readable.

Signed-off-by: James Simmons <uja.o...@yahoo.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-9183
Reviewed-on: https://review.whamcloud.com/27240
Reviewed-by: Dmitry Eremin <dmitry.ere...@intel.com>
Reviewed-by: Bob Glossman <bob.gloss...@intel.com>
Reviewed-by: Sebastien Buisson <sbuis...@ddn.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lustre/llite/xattr.c | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/staging/lustre/lustre/llite/xattr.c 
b/drivers/staging/lustre/lustre/llite/xattr.c
index 835d00f..d08bf1e 100644
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ b/drivers/staging/lustre/lustre/llite/xattr.c
@@ -81,11 +81,10 @@ static int xattr_type_filter(struct ll_sb_info *sbi,
return 0;
 }
 
-static int
-ll_xattr_set_common(const struct xattr_handler *handler,
-   struct dentry *dentry, struct inode *inode,
-   const char *name, const void *value, size_t size,
-   int flags)
+static int ll_xattr_set_common(const struct xattr_handler *handler,
+  struct dentry *dentry, struct inode *inode,
+  const char *name, const void *value, size_t size,
+  int flags)
 {
struct ll_sb_info *sbi = ll_i2sbi(inode);
struct ptlrpc_request *req = NULL;
@@ -139,9 +138,9 @@ static int xattr_type_filter(struct ll_sb_info *sbi,
fullname = kasprintf(GFP_KERNEL, "%s%s", handler->prefix, name);
if (!fullname)
return -ENOMEM;
-   rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode),
-valid, fullname, pv, size, 0, flags,
-ll_i2suppgid(inode), );
+
+   rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode), valid, fullname,
+pv, size, 0, flags, ll_i2suppgid(inode), );
kfree(fullname);
if (rc) {
if (rc == -EOPNOTSUPP && handler->flags == XATTR_USER_T) {
@@ -307,9 +306,8 @@ static int ll_xattr_set(const struct xattr_handler *handler,
   flags);
 }
 
-int
-ll_xattr_list(struct inode *inode, const char *name, int type, void *buffer,
- size_t size, u64 valid)
+int ll_xattr_list(struct inode *inode, const char *name, int type, void 
*buffer,
+ size_t size, u64 valid)
 {
struct ll_inode_info *lli = ll_i2info(inode);
struct ll_sb_info *sbi = ll_i2sbi(inode);
@@ -439,6 +437,7 @@ static int ll_xattr_get_common(const struct xattr_handler 
*handler,
fullname = kasprintf(GFP_KERNEL, "%s%s", handler->prefix, name);
if (!fullname)
return -ENOMEM;
+
rc = ll_xattr_list(inode, fullname, handler->flags, buffer, size,
   OBD_MD_FLXATTR);
kfree(fullname);
@@ -562,6 +561,7 @@ ssize_t ll_listxattr(struct dentry *dentry, char *buffer, 
size_t size)
   OBD_MD_FLXATTRLS);
if (rc < 0)
return rc;
+
/*
 * If we're being called to get the size of the xattr list
 * (size == 0) then just assume that a lustre.lov xattr
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 21/22] staging: lustre: llite: correct removexattr detection

2018-04-15 Thread James Simmons
In ll_xattr_set_common() detect the removexattr() case correctly by
testing for a NULL value as well as XATTR_REPLACE.

Signed-off-by: John L. Hammond <john.hamm...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-10787
Reviewed-on: https://review.whamcloud.com/
Reviewed-by: Dmitry Eremin <dmitry.ere...@intel.com>
Reviewed-by: James Simmons <uja.o...@yahoo.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lustre/llite/xattr.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/lustre/lustre/llite/xattr.c 
b/drivers/staging/lustre/lustre/llite/xattr.c
index e835c8e..1a597a6 100644
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ b/drivers/staging/lustre/lustre/llite/xattr.c
@@ -94,7 +94,11 @@ static int ll_xattr_set_common(const struct xattr_handler 
*handler,
u64 valid;
int rc;
 
-   if (flags == XATTR_REPLACE) {
+   /* When setxattr() is called with a size of 0 the value is
+* unconditionally replaced by "". When removexattr() is
+* called we get a NULL value and XATTR_REPLACE for flags.
+*/
+   if (!value && flags == XATTR_REPLACE) {
ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_REMOVEXATTR, 1);
valid = OBD_MD_FLXATTRRM;
} else {
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 17/25] staging: lustre: libcfs: rename goto label in cfs_cpt_table_print

2018-04-15 Thread James Simmons
From: Dmitry Eremin <dmitry.ere...@intel.com>

Change goto label out to err.

Signed-off-by: Dmitry Eremin <dmitry.ere...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8703
Reviewed-on: https://review.whamcloud.com/23222
Reviewed-by: Amir Shehata <amir.sheh...@intel.com>
Reviewed-by: James Simmons <uja.o...@yahoo.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c 
b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
index ae5ff58..435ee8e 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
@@ -161,20 +161,20 @@ int cfs_cpt_table_print(struct cfs_cpt_table *cptab, char 
*buf, int len)
 
for (i = 0; i < cptab->ctb_nparts; i++) {
if (len <= 0)
-   goto out;
+   goto err;
 
rc = snprintf(tmp, len, "%d\t:", i);
len -= rc;
 
if (len <= 0)
-   goto out;
+   goto err;
 
tmp += rc;
for_each_cpu(j, cptab->ctb_parts[i].cpt_cpumask) {
rc = snprintf(tmp, len, " %d", j);
len -= rc;
if (len <= 0)
-   goto out;
+   goto err;
tmp += rc;
}
 
@@ -184,7 +184,7 @@ int cfs_cpt_table_print(struct cfs_cpt_table *cptab, char 
*buf, int len)
}
 
rc = 0;
-out:
+err:
if (rc < 0)
return rc;
 
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 12/25] staging: lustre: libcfs: fix libcfs_cpu coding style

2018-04-15 Thread James Simmons
From: Dmitry Eremin <dmitry.ere...@intel.com>

This patch bring the lustre CPT code into alignment with the
Linux kernel coding style.

Signed-off-by: Dmitry Eremin <dmitry.ere...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8703
Reviewed-on: https://review.whamcloud.com/23304
Reviewed-by: James Simmons <uja.o...@yahoo.com>
Reviewed-by: Doug Oucharek <dou...@me.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 .../lustre/include/linux/libcfs/libcfs_cpu.h   |  35 ---
 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c|  70 +-
 .../staging/lustre/lnet/libcfs/linux/linux-cpu.c   | 107 +
 3 files changed, 86 insertions(+), 126 deletions(-)

diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h 
b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
index c0922fc..bda81ab 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
@@ -43,16 +43,16 @@
  *
  * Example: if there are 8 cores on the system, while creating a CPT
  * with cpu_npartitions=4:
- *   core[0, 1] = partition[0], core[2, 3] = partition[1]
- *   core[4, 5] = partition[2], core[6, 7] = partition[3]
+ * core[0, 1] = partition[0], core[2, 3] = partition[1]
+ * core[4, 5] = partition[2], core[6, 7] = partition[3]
  *
- *   cpu_npartitions=1:
- *   core[0, 1, ... 7] = partition[0]
+ *  cpu_npartitions=1:
+ * core[0, 1, ... 7] = partition[0]
  *
  *   . User can also specify CPU partitions by string pattern
  *
  * Examples: cpu_partitions="0[0,1], 1[2,3]"
- *cpu_partitions="N 0[0-3], 1[4-8]"
+ *  cpu_partitions="N 0[0-3], 1[4-8]"
  *
  * The first character "N" means following numbers are numa ID
  *
@@ -92,8 +92,8 @@ struct cfs_cpt_table {
u64 ctb_version;
 };
 
-static inline int
-cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
+static inline int cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf,
+ int len)
 {
return 0;
 }
@@ -116,8 +116,7 @@ struct cfs_cpt_table {
 /**
  * return total number of CPU partitions in \a cptab
  */
-int
-cfs_cpt_number(struct cfs_cpt_table *cptab);
+int cfs_cpt_number(struct cfs_cpt_table *cptab);
 /**
  * return number of HW cores or hyper-threadings in a CPU partition \a cpt
  */
@@ -167,13 +166,13 @@ struct cfs_cpt_table {
  * add all cpus in \a mask to CPU partition \a cpt
  * return 1 if successfully set all CPUs, otherwise return 0
  */
-int cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab,
-   int cpt, const cpumask_t *mask);
+int cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt,
+   const cpumask_t *mask);
 /**
  * remove all cpus in \a mask from CPU partition \a cpt
  */
-void cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab,
-  int cpt, const cpumask_t *mask);
+void cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt,
+  const cpumask_t *mask);
 /**
  * add all cpus in NUMA node \a node to CPU partition \a cpt
  * return 1 if successfully set all CPUs, otherwise return 0
@@ -188,13 +187,13 @@ void cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab,
  * add all cpus in node mask \a mask to CPU partition \a cpt
  * return 1 if successfully set all CPUs, otherwise return 0
  */
-int cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab,
-int cpt, nodemask_t *mask);
+int cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt,
+const nodemask_t *mask);
 /**
  * remove all cpus in node mask \a mask from CPU partition \a cpt
  */
-void cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab,
-   int cpt, nodemask_t *mask);
+void cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt,
+   const nodemask_t *mask);
 /**
  * convert partition id \a cpt to numa node id, if there are more than one
  * nodes in this partition, it might return a different node id each time.
@@ -240,7 +239,7 @@ enum {
 
 struct cfs_percpt_lock {
/* cpu-partition-table for this lock */
-   struct cfs_cpt_table *pcl_cptab;
+   struct cfs_cpt_table *pcl_cptab;
/* exclusively locked */
unsigned int  pcl_locked;
/* private lock table */
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c 
b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
index 7ac2796..f9fcbb1 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
@@ -34,7 +34,7 @@
 #include 
 
 /** Global CPU partition table */
-struct cfs_cpt_table   *cfs_cpt_table __rea

[PATCH 21/25] staging: lustre: libcfs: report NUMA node instead of just node

2018-04-15 Thread James Simmons
From: Dmitry Eremin <dmitry.ere...@intel.com>

Reporting "HW nodes" is too generic. It really is reporting
"HW NUMA nodes". Update the debug message.

Signed-off-by: Dmitry Eremin <dmitry.ere...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8703
Reviewed-on: https://review.whamcloud.com/23306
Reviewed-by: James Simmons <uja.o...@yahoo.com>
Reviewed-by: Andreas Dilger <andreas.dil...@intel.com>
Reviewed-by: Patrick Farrell <p...@cray.com>
Reviewed-by: Olaf Weber <olaf.we...@hpe.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c 
b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
index 80db008..28b2acb 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
@@ -1108,7 +1108,7 @@ int cfs_cpu_init(void)
 
put_online_cpus();
 
-   LCONSOLE(0, "HW nodes: %d, HW CPU cores: %d, npartitions: %d\n",
+   LCONSOLE(0, "HW NUMA nodes: %d, HW CPU cores: %d, npartitions: %d\n",
 num_online_nodes(), num_online_cpus(),
 cfs_cpt_number(cfs_cpt_table));
return 0;
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 11/25] staging: lustre: libcfs: invert error handling for cfs_cpt_table_print

2018-04-15 Thread James Simmons
From: Amir Shehata <amir.sheh...@intel.com>

Instead of setting rc to -EFBIG for several cases in the loop lets
initialize rc to -EFBIG and just break out of the loop in case of
failure. Just set rc to zero once we successfully finish the loop.

Signed-off-by: Amir Shehata <amir.sheh...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-7734
Reviewed-on: http://review.whamcloud.com/18916
Reviewed-by: Olaf Weber <o...@sgi.com>
Reviewed-by: Doug Oucharek <dou...@me.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 .../staging/lustre/lnet/libcfs/linux/linux-cpu.c   | 22 ++
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c 
b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
index bbf89b8..6d8dcd3 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
@@ -158,29 +158,26 @@ struct cfs_cpt_table *
 cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
 {
char *tmp = buf;
-   int rc = 0;
+   int rc = -EFBIG;
int i;
int j;
 
for (i = 0; i < cptab->ctb_nparts; i++) {
-   if (len > 0) {
-   rc = snprintf(tmp, len, "%d\t:", i);
-   len -= rc;
-   }
+   if (len <= 0)
+   goto out;
+
+   rc = snprintf(tmp, len, "%d\t:", i);
+   len -= rc;
 
-   if (len <= 0) {
-   rc = -EFBIG;
+   if (len <= 0)
goto out;
-   }
 
tmp += rc;
for_each_cpu(j, cptab->ctb_parts[i].cpt_cpumask) {
-   rc = snprintf(tmp, len, "%d ", j);
+   rc = snprintf(tmp, len, " %d", j);
len -= rc;
-   if (len <= 0) {
-   rc = -EFBIG;
+   if (len <= 0)
goto out;
-   }
tmp += rc;
}
 
@@ -189,6 +186,7 @@ struct cfs_cpt_table *
len--;
}
 
+   rc = 0;
  out:
if (rc < 0)
return rc;
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 15/25] staging: lustre: libcfs: rename i to cpu for cfs_cpt_bind

2018-04-15 Thread James Simmons
From: Dmitry Eremin <dmitry.ere...@intel.com>

Rename variable i to cpu to make code easier to understand.

Signed-off-by: Dmitry Eremin <dmitry.ere...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8703
Reviewed-on: https://review.whamcloud.com/23222
Reviewed-by: Amir Shehata <amir.sheh...@intel.com>
Reviewed-by: James Simmons <uja.o...@yahoo.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c 
b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
index 5f2ab30..b985b3d 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
@@ -644,8 +644,8 @@ int cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
 {
cpumask_var_t *cpumask;
nodemask_t *nodemask;
+   int cpu;
int rc;
-   int i;
 
LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
 
@@ -663,8 +663,8 @@ int cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
return -EINVAL;
}
 
-   for_each_online_cpu(i) {
-   if (cpumask_test_cpu(i, *cpumask))
+   for_each_online_cpu(cpu) {
+   if (cpumask_test_cpu(cpu, *cpumask))
continue;
 
rc = set_cpus_allowed_ptr(current, *cpumask);
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 06/25] staging: lustre: libcfs: replace num_possible_cpus() with nr_cpu_ids

2018-04-15 Thread James Simmons
From: Amir Shehata <amir.sheh...@intel.com>

Move from num_possible_cpus() to nr_cpu_ids.

Signed-off-by: Amir Shehata <amir.sheh...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-7734
Reviewed-on: http://review.whamcloud.com/18916
Reviewed-by: Olaf Weber <o...@sgi.com>
Reviewed-by: Doug Oucharek <dou...@me.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c 
b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
index b2a88ef..741db69 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
@@ -105,14 +105,14 @@ struct cfs_cpt_table *
!cptab->ctb_nodemask)
goto failed;
 
-   cptab->ctb_cpu2cpt = kvmalloc_array(num_possible_cpus(),
+   cptab->ctb_cpu2cpt = kvmalloc_array(nr_cpu_ids,
sizeof(cptab->ctb_cpu2cpt[0]),
GFP_KERNEL);
if (!cptab->ctb_cpu2cpt)
goto failed;
 
memset(cptab->ctb_cpu2cpt, -1,
-  num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0]));
+  nr_cpu_ids * sizeof(cptab->ctb_cpu2cpt[0]));
 
cptab->ctb_parts = kvmalloc_array(ncpt, sizeof(cptab->ctb_parts[0]),
  GFP_KERNEL);
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 14/25] staging: lustre: libcfs: rename i to node for cfs_cpt_set_nodemask

2018-04-15 Thread James Simmons
From: Dmitry Eremin <dmitry.ere...@intel.com>

Rename variable i to node to make code easier to understand.

Signed-off-by: Dmitry Eremin <dmitry.ere...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8703
Reviewed-on: https://review.whamcloud.com/23222
Reviewed-by: Amir Shehata <amir.sheh...@intel.com>
Reviewed-by: James Simmons <uja.o...@yahoo.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c 
b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
index 1669669..5f2ab30 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
@@ -550,10 +550,10 @@ void cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int 
cpt, int node)
 int cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt,
 const nodemask_t *mask)
 {
-   int i;
+   int node;
 
-   for_each_node_mask(i, *mask) {
-   if (!cfs_cpt_set_node(cptab, cpt, i))
+   for_each_node_mask(node, *mask) {
+   if (!cfs_cpt_set_node(cptab, cpt, node))
return 0;
}
 
@@ -564,10 +564,10 @@ int cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int 
cpt,
 void cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt,
const nodemask_t *mask)
 {
-   int i;
+   int node;
 
-   for_each_node_mask(i, *mask)
-   cfs_cpt_unset_node(cptab, cpt, i);
+   for_each_node_mask(node, *mask)
+   cfs_cpt_unset_node(cptab, cpt, node);
 }
 EXPORT_SYMBOL(cfs_cpt_unset_nodemask);
 
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 23/25] staging: lustre: libcfs: rework CPU pattern parsing code

2018-04-15 Thread James Simmons
From: Dmitry Eremin <dmitry.ere...@intel.com>

Currently the module param string for CPU pattern can be
modified which is wrong. Rewrite CPU pattern parsing code
to avoid the passed buffer from being changed. This change
also enables us to add real errors propogation to the caller
functions.

Signed-off-by: Dmitry Eremin <dmitry.ere...@intel.com>
Signed-off-by: Amir Shehata <amir.sheh...@intel.com>
Signed-off-by: Andreas Dilger <andreas.dil...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8703
Reviewed-on: https://review.whamcloud.com/23306
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-9715
Reviewed-on: https://review.whamcloud.com/27872
Reviewed-by: James Simmons <uja.o...@yahoo.com>
Reviewed-by: Andreas Dilger <andreas.dil...@intel.com>
Reviewed-by: Patrick Farrell <p...@cray.com>
Reviewed-by: Olaf Weber <olaf.we...@hpe.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 .../staging/lustre/lnet/libcfs/linux/linux-cpu.c   | 151 -
 1 file changed, 88 insertions(+), 63 deletions(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c 
b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
index a08816a..915cfca 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
@@ -662,11 +662,11 @@ int cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
nodemask = cptab->ctb_parts[cpt].cpt_nodemask;
}
 
-   if (cpumask_any_and(*cpumask, cpu_online_mask) >= nr_cpu_ids) {
+   if (!cpumask_intersects(*cpumask, cpu_online_mask)) {
CDEBUG(D_INFO,
   "No online CPU found in CPU partition %d, did someone do 
CPU hotplug on system? You might need to reload Lustre modules to keep system 
working well.\n",
   cpt);
-   return -EINVAL;
+   return -ENODEV;
}
 
for_each_online_cpu(cpu) {
@@ -830,11 +830,13 @@ static struct cfs_cpt_table *cfs_cpt_table_create(int 
ncpt)
cptab = cfs_cpt_table_alloc(ncpt);
if (!cptab) {
CERROR("Failed to allocate CPU map(%d)\n", ncpt);
+   rc = -ENOMEM;
goto failed;
}
 
if (!zalloc_cpumask_var(_mask, GFP_NOFS)) {
CERROR("Failed to allocate scratch cpumask\n");
+   rc = -ENOMEM;
goto failed;
}
 
@@ -849,8 +851,10 @@ static struct cfs_cpt_table *cfs_cpt_table_create(int ncpt)
 
rc = cfs_cpt_choose_ncpus(cptab, cpt, node_mask,
  num - ncpu);
-   if (rc < 0)
+   if (rc < 0) {
+   rc = -EINVAL;
goto failed_mask;
+   }
 
ncpu = cpumask_weight(part->cpt_cpumask);
if (ncpu == num + !!(rem > 0)) {
@@ -873,37 +877,51 @@ static struct cfs_cpt_table *cfs_cpt_table_create(int 
ncpt)
if (cptab)
cfs_cpt_table_free(cptab);
 
-   return NULL;
+   return ERR_PTR(rc);
 }
 
-static struct cfs_cpt_table *cfs_cpt_table_create_pattern(char *pattern)
+static struct cfs_cpt_table *cfs_cpt_table_create_pattern(const char *pattern)
 {
struct cfs_cpt_table *cptab;
+   char *pattern_dup;
+   char *bracket;
char *str;
int node = 0;
-   int high;
int ncpt = 0;
-   int cpt;
+   int cpt = 0;
+   int high;
int rc;
int c;
int i;
 
-   str = strim(pattern);
+   pattern_dup = kstrdup(pattern, GFP_KERNEL);
+   if (!pattern_dup) {
+   CERROR("Failed to duplicate pattern '%s'\n", pattern);
+   return ERR_PTR(-ENOMEM);
+   }
+
+   str = strim(pattern_dup);
if (*str == 'n' || *str == 'N') {
-   pattern = str + 1;
-   if (*pattern != '\0') {
-   node = 1;
-   } else { /* shortcut to create CPT from NUMA & CPU topology */
+   str++; /* skip 'N' char */
+   node = 1; /* NUMA pattern */
+   if (*str == '\0') {
node = -1;
-   ncpt = num_online_nodes();
+   for_each_online_node(i) {
+   if (!cpumask_empty(cpumask_of_node(i)))
+   ncpt++;
+   }
+   if (ncpt == 1) { /* single NUMA node */
+   kfree(pattern_dup);
+   return cfs_cpt_table_create(cpu_npartitions);
+   }
}
}
 
if (!ncpt) { /* scanning bracket wh

[PATCH 02/25] staging: lustre: libcfs: rename variable i to cpu

2018-04-15 Thread James Simmons
From: Dmitry Eremin <dmitry.ere...@intel.com>

Change the name of the variable i used for for_each_cpu() to cpu
for code readability.

Signed-off-by: Dmitry Eremin <dmitry.ere...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8703
Reviewed-on: https://review.whamcloud.com/23303
Reviewed-by: James Simmons <uja.o...@yahoo.com>
Reviewed-by: Doug Oucharek <dou...@me.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c 
b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
index 134b239..d8c190c 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
@@ -334,7 +334,7 @@ struct cfs_cpt_table *
 int
 cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, const cpumask_t 
*mask)
 {
-   int i;
+   int cpu;
 
if (!cpumask_weight(mask) ||
cpumask_any_and(mask, cpu_online_mask) >= nr_cpu_ids) {
@@ -343,8 +343,8 @@ struct cfs_cpt_table *
return 0;
}
 
-   for_each_cpu(i, mask) {
-   if (!cfs_cpt_set_cpu(cptab, cpt, i))
+   for_each_cpu(cpu, mask) {
+   if (!cfs_cpt_set_cpu(cptab, cpt, cpu))
return 0;
}
 
@@ -356,10 +356,10 @@ struct cfs_cpt_table *
 cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt,
  const cpumask_t *mask)
 {
-   int i;
+   int cpu;
 
-   for_each_cpu(i, mask)
-   cfs_cpt_unset_cpu(cptab, cpt, i);
+   for_each_cpu(cpu, mask)
+   cfs_cpt_unset_cpu(cptab, cpt, cpu);
 }
 EXPORT_SYMBOL(cfs_cpt_unset_cpumask);
 
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 03/25] staging: lustre: libcfs: implement cfs_cpt_cpumask for UMP case

2018-04-15 Thread James Simmons
From: Amir Shehata <amir.sheh...@intel.com>

The function cfs_cpt_cpumask() exist for SMP systems but when
CONFIG_SMP is disabled it only returns NULL. Fill in this missing
function. Also properly initialize ctb_mask for the UMP
case.

Signed-off-by: Amir Shehata <amir.sheh...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-7734
Reviewed-on: http://review.whamcloud.com/18916
Reviewed-by: Olaf Weber <o...@sgi.com>
Reviewed-by: Doug Oucharek <dou...@me.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h | 16 +---
 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c  |  9 +
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h 
b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
index 1f2cd78..070f8fe 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
@@ -77,10 +77,6 @@
 
 #ifdef CONFIG_SMP
 /**
- * return cpumask of CPU partition \a cpt
- */
-cpumask_var_t *cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt);
-/**
  * print string information of cpt-table
  */
 int cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len);
@@ -89,19 +85,13 @@ struct cfs_cpt_table {
/* # of CPU partitions */
int ctb_nparts;
/* cpu mask */
-   cpumask_t   ctb_mask;
+   cpumask_var_t   ctb_mask;
/* node mask */
nodemask_t  ctb_nodemask;
/* version */
u64 ctb_version;
 };
 
-static inline cpumask_var_t *
-cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt)
-{
-   return NULL;
-}
-
 static inline int
 cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
 {
@@ -133,6 +123,10 @@ struct cfs_cpt_table {
  */
 int cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt);
 /**
+ * return cpumask of CPU partition \a cpt
+ */
+cpumask_var_t *cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt);
+/**
  * return nodemask of CPU partition \a cpt
  */
 nodemask_t *cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt);
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c 
b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
index 705abf2..5ea294f 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
@@ -54,6 +54,9 @@ struct cfs_cpt_table *
cptab = kzalloc(sizeof(*cptab), GFP_NOFS);
if (cptab) {
cptab->ctb_version = CFS_CPU_VERSION_MAGIC;
+   if (!zalloc_cpumask_var(>ctb_mask, GFP_NOFS))
+   return NULL;
+   cpumask_set_cpu(0, cptab->ctb_mask);
node_set(0, cptab->ctb_nodemask);
cptab->ctb_nparts  = ncpt;
}
@@ -108,6 +111,12 @@ struct cfs_cpt_table *
 }
 EXPORT_SYMBOL(cfs_cpt_online);
 
+cpumask_var_t *cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt)
+{
+   return >ctb_mask;
+}
+EXPORT_SYMBOL(cfs_cpt_cpumask);
+
 nodemask_t *
 cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt)
 {
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 20/25] staging: lustre: libcfs: make tolerant to offline CPUs and empty NUMA nodes

2018-04-15 Thread James Simmons
From: Dmitry Eremin <dmitry.ere...@intel.com>

Rework CPU partition code in the way of make it more tolerant to
offline CPUs and empty nodes.

Signed-off-by: Dmitry Eremin <dmitry.ere...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8703
Reviewed-on: https://review.whamcloud.com/23222
Reviewed-by: Amir Shehata <amir.sheh...@intel.com>
Reviewed-by: James Simmons <uja.o...@yahoo.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 .../lustre/include/linux/libcfs/linux/linux-cpu.h  |   2 +
 .../staging/lustre/lnet/libcfs/linux/linux-cpu.c   | 132 +
 drivers/staging/lustre/lnet/lnet/lib-msg.c |   2 +
 3 files changed, 60 insertions(+), 76 deletions(-)

diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h 
b/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h
index b3bc4e7..ed4351b 100644
--- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h
@@ -56,6 +56,8 @@ struct cfs_cpu_partition {
unsigned int*cpt_distance;
/* spread rotor for NUMA allocator */
int  cpt_spread_rotor;
+   /* NUMA node if cpt_nodemask is empty */
+   int  cpt_node;
 };
 
 /** descriptor for CPU partitions */
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c 
b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
index 32ebd0f..80db008 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
@@ -427,8 +427,16 @@ int cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, 
int cpu)
return 0;
}
 
-   LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_cpumask));
-   LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask));
+   if (cpumask_test_cpu(cpu, cptab->ctb_cpumask)) {
+   CDEBUG(D_INFO, "CPU %d is already in cpumask\n", cpu);
+   return 0;
+   }
+
+   if (cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask)) {
+   CDEBUG(D_INFO, "CPU %d is already in partition %d cpumask\n",
+  cpu, cptab->ctb_cpu2cpt[cpu]);
+   return 0;
+   }
 
cfs_cpt_add_cpu(cptab, cpt, cpu);
cfs_cpt_add_node(cptab, cpt, cpu_to_node(cpu));
@@ -497,8 +505,10 @@ void cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, 
int cpt,
 {
int cpu;
 
-   for_each_cpu(cpu, mask)
-   cfs_cpt_unset_cpu(cptab, cpt, cpu);
+   for_each_cpu(cpu, mask) {
+   cfs_cpt_del_cpu(cptab, cpt, cpu);
+   cfs_cpt_del_node(cptab, cpt, cpu_to_node(cpu));
+   }
 }
 EXPORT_SYMBOL(cfs_cpt_unset_cpumask);
 
@@ -549,10 +559,8 @@ int cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int 
cpt,
 {
int node;
 
-   for_each_node_mask(node, *mask) {
-   if (!cfs_cpt_set_node(cptab, cpt, node))
-   return 0;
-   }
+   for_each_node_mask(node, *mask)
+   cfs_cpt_set_node(cptab, cpt, node);
 
return 1;
 }
@@ -573,7 +581,7 @@ int cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int 
cpt)
nodemask_t *mask;
int weight;
int rotor;
-   int node;
+   int node = 0;
 
/* convert CPU partition ID to HW node id */
 
@@ -583,20 +591,20 @@ int cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int 
cpt)
} else {
mask = cptab->ctb_parts[cpt].cpt_nodemask;
rotor = cptab->ctb_parts[cpt].cpt_spread_rotor++;
+   node  = cptab->ctb_parts[cpt].cpt_node;
}
 
weight = nodes_weight(*mask);
-   LASSERT(weight > 0);
-
-   rotor %= weight;
+   if (weight > 0) {
+   rotor %= weight;
 
-   for_each_node_mask(node, *mask) {
-   if (!rotor--)
-   return node;
+   for_each_node_mask(node, *mask) {
+   if (!rotor--)
+   return node;
+   }
}
 
-   LBUG();
-   return 0;
+   return node;
 }
 EXPORT_SYMBOL(cfs_cpt_spread_node);
 
@@ -689,17 +697,21 @@ static int cfs_cpt_choose_ncpus(struct cfs_cpt_table 
*cptab, int cpt,
cpumask_var_t core_mask;
int rc = 0;
int cpu;
+   int i;
 
LASSERT(number > 0);
 
if (number >= cpumask_weight(node_mask)) {
while (!cpumask_empty(node_mask)) {
cpu = cpumask_first(node_mask);
+   cpumask_clear_cpu(cpu, node_mask);
+
+   if (!cpu_online(cpu))
+   continue;
 
rc = cfs_cpt_set_cpu(cptab, cpt, cpu);
if (!rc)
 

[PATCH 22/25] staging: lustre: libcfs: update debug messages in CPT code

2018-04-15 Thread James Simmons
From: Dmitry Eremin <dmitry.ere...@intel.com>

Update the debug messages for the CPT table creation code. Place
the passed in string in quotes to make it clear what it is.
Captialize cpu in the debug strings.

Signed-off-by: Dmitry Eremin <dmitry.ere...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8703
Reviewed-on: https://review.whamcloud.com/23306
Reviewed-by: James Simmons <uja.o...@yahoo.com>
Reviewed-by: Andreas Dilger <andreas.dil...@intel.com>
Reviewed-by: Patrick Farrell <p...@cray.com>
Reviewed-by: Olaf Weber <olaf.we...@hpe.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c 
b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
index 28b2acb..a08816a 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
@@ -466,7 +466,7 @@ void cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int 
cpt, int cpu)
 
} else if (cpt != cptab->ctb_cpu2cpt[cpu]) {
CDEBUG(D_INFO,
-  "CPU %d is not in cpu-partition %d\n", cpu, cpt);
+  "CPU %d is not in CPU partition %d\n", cpu, cpt);
return;
}
 
@@ -910,14 +910,14 @@ static struct cfs_cpt_table 
*cfs_cpt_table_create_pattern(char *pattern)
if (!ncpt ||
(node && ncpt > num_online_nodes()) ||
(!node && ncpt > num_online_cpus())) {
-   CERROR("Invalid pattern %s, or too many partitions %d\n",
+   CERROR("Invalid pattern '%s', or too many partitions %d\n",
   pattern, ncpt);
return NULL;
}
 
cptab = cfs_cpt_table_alloc(ncpt);
if (!cptab) {
-   CERROR("Failed to allocate cpu partition table\n");
+   CERROR("Failed to allocate CPU partition table\n");
return NULL;
}
 
@@ -948,11 +948,11 @@ static struct cfs_cpt_table 
*cfs_cpt_table_create_pattern(char *pattern)
 
if (!bracket) {
if (*str) {
-   CERROR("Invalid pattern %s\n", str);
+   CERROR("Invalid pattern '%s'\n", str);
goto failed;
}
if (c != ncpt) {
-   CERROR("expect %d partitions but found %d\n",
+   CERROR("Expect %d partitions but found %d\n",
   ncpt, c);
goto failed;
}
@@ -960,7 +960,7 @@ static struct cfs_cpt_table 
*cfs_cpt_table_create_pattern(char *pattern)
}
 
if (sscanf(str, "%d%n", , ) < 1) {
-   CERROR("Invalid cpu pattern %s\n", str);
+   CERROR("Invalid CPU pattern '%s'\n", str);
goto failed;
}
 
@@ -977,20 +977,20 @@ static struct cfs_cpt_table 
*cfs_cpt_table_create_pattern(char *pattern)
 
str = strim(str + n);
if (str != bracket) {
-   CERROR("Invalid pattern %s\n", str);
+   CERROR("Invalid pattern '%s'\n", str);
goto failed;
}
 
bracket = strchr(str, ']');
if (!bracket) {
-   CERROR("Missing right bracket for partition %d, %s\n",
+   CERROR("Missing right bracket for partition %d in 
'%s'\n",
   cpt, str);
goto failed;
}
 
if (cfs_expr_list_parse(str, (bracket - str) + 1,
0, high, )) {
-   CERROR("Can't parse number range: %s\n", str);
+   CERROR("Can't parse number range in '%s'\n", str);
goto failed;
}
 
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 08/25] staging: lustre: libcfs: add cpu distance handling

2018-04-15 Thread James Simmons
From: Amir Shehata <amir.sheh...@intel.com>

Add functionality to calculate the distance between two CPTs.
Expose those distance in debugfs so people deploying a setup
can debug what is being created for CPTs.

Signed-off-by: Amir Shehata <amir.sheh...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-7734
Reviewed-on: http://review.whamcloud.com/18916
Reviewed-by: Olaf Weber <o...@sgi.com>
Reviewed-by: Doug Oucharek <dou...@me.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 .../lustre/include/linux/libcfs/libcfs_cpu.h   |  8 +++
 .../lustre/include/linux/libcfs/linux/linux-cpu.h  |  4 ++
 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c| 21 
 .../staging/lustre/lnet/libcfs/linux/linux-cpu.c   | 59 ++
 4 files changed, 92 insertions(+)

diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h 
b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
index 839ec02..c0922fc 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
@@ -110,6 +110,10 @@ struct cfs_cpt_table {
  */
 struct cfs_cpt_table *cfs_cpt_table_alloc(unsigned int ncpt);
 /**
+ * print distance information of cpt-table
+ */
+int cfs_cpt_distance_print(struct cfs_cpt_table *cptab, char *buf, int len);
+/**
  * return total number of CPU partitions in \a cptab
  */
 int
@@ -143,6 +147,10 @@ struct cfs_cpt_table {
  */
 int cfs_cpt_of_node(struct cfs_cpt_table *cptab, int node);
 /**
+ * NUMA distance between \a cpt1 and \a cpt2 in \a cptab
+ */
+unsigned int cfs_cpt_distance(struct cfs_cpt_table *cptab, int cpt1, int cpt2);
+/**
  * bind current thread on a CPU-partition \a cpt of \a cptab
  */
 int cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt);
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h 
b/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h
index 1bed0ba..4ac1670 100644
--- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h
@@ -52,6 +52,8 @@ struct cfs_cpu_partition {
cpumask_var_t   cpt_cpumask;
/* nodes mask for this partition */
nodemask_t  *cpt_nodemask;
+   /* NUMA distance between CPTs */
+   unsigned int*cpt_distance;
/* spread rotor for NUMA allocator */
unsigned intcpt_spread_rotor;
 };
@@ -60,6 +62,8 @@ struct cfs_cpu_partition {
 struct cfs_cpt_table {
/* spread rotor for NUMA allocator */
unsigned intctb_spread_rotor;
+   /* maximum NUMA distance between all nodes in table */
+   unsigned intctb_distance;
/* # of CPU partitions */
unsigned intctb_nparts;
/* partitions tables */
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c 
b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
index e6d1512..7ac2796 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
@@ -41,6 +41,8 @@
 
 #define CFS_CPU_VERSION_MAGIC 0xbabecafe
 
+#define CFS_CPT_DISTANCE   1   /* Arbitrary positive value */
+
 struct cfs_cpt_table *
 cfs_cpt_table_alloc(unsigned int ncpt)
 {
@@ -90,6 +92,19 @@ struct cfs_cpt_table *
 EXPORT_SYMBOL(cfs_cpt_table_print);
 #endif /* CONFIG_SMP */
 
+int cfs_cpt_distance_print(struct cfs_cpt_table *cptab, char *buf, int len)
+{
+   int rc;
+
+   rc = snprintf(buf, len, "0\t: 0:%d\n", CFS_CPT_DISTANCE);
+   len -= rc;
+   if (len <= 0)
+   return -EFBIG;
+
+   return rc;
+}
+EXPORT_SYMBOL(cfs_cpt_distance_print);
+
 int
 cfs_cpt_number(struct cfs_cpt_table *cptab)
 {
@@ -124,6 +139,12 @@ cpumask_var_t *cfs_cpt_cpumask(struct cfs_cpt_table 
*cptab, int cpt)
 }
 EXPORT_SYMBOL(cfs_cpt_nodemask);
 
+unsigned int cfs_cpt_distance(struct cfs_cpt_table *cptab, int cpt1, int cpt2)
+{
+   return CFS_CPT_DISTANCE;
+}
+EXPORT_SYMBOL(cfs_cpt_distance);
+
 int
 cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
 {
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c 
b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
index fd0c451..1e184b1 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
@@ -76,6 +76,7 @@
struct cfs_cpu_partition *part = >ctb_parts[i];
 
kfree(part->cpt_nodemask);
+   kfree(part->cpt_distance);
free_cpumask_var(part->cpt_cpumask);
}
 
@@ -137,6 +138,12 @@ struct cfs_cpt_table *
if (!zalloc_cpumask_var(>cpt_cpumask, GFP_NOFS) ||
!part->cpt_nodemask)
goto failed;
+
+   

[PATCH 05/25] staging: lustre: libcfs: remove excess space

2018-04-15 Thread James Simmons
From: Amir Shehata <amir.sheh...@intel.com>

The function cfs_cpt_table_print() was adding two spaces
to the string buffer. Just add it once.

Signed-off-by: Amir Shehata <amir.sheh...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-7734
Reviewed-on: http://review.whamcloud.com/18916
Reviewed-by: Olaf Weber <o...@sgi.com>
Reviewed-by: Doug Oucharek <dou...@me.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c 
b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
index d207ae5..b2a88ef 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
@@ -147,7 +147,7 @@ struct cfs_cpt_table *
 
for (i = 0; i < cptab->ctb_nparts; i++) {
if (len > 0) {
-   rc = snprintf(tmp, len, "%d\t: ", i);
+   rc = snprintf(tmp, len, "%d\t:", i);
len -= rc;
}
 
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 01/25] staging: lustre: libcfs: remove useless CPU partition code

2018-04-15 Thread James Simmons
From: Dmitry Eremin <dmitry.ere...@intel.com>

* remove scratch buffer and mutex which guard it.
* remove global cpumask and spinlock which guard it.
* remove cpt_version for checking CPUs state change during setup
  because of just disable CPUs state change during setup.
* remove whole global struct cfs_cpt_data cpt_data.
* remove few unused APIs.

Signed-off-by: Dmitry Eremin <dmitry.ere...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8703
Reviewed-on: https://review.whamcloud.com/23303
Reviewed-on: https://review.whamcloud.com/25048
Reviewed-by: James Simmons <uja.o...@yahoo.com>
Reviewed-by: Doug Oucharek <dou...@me.com>
Reviewed-by: Andreas Dilger <andreas.dil...@intel.com>
Reviewed-by: Olaf Weber <o...@sgi.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 .../lustre/include/linux/libcfs/libcfs_cpu.h   |  13 +--
 .../lustre/include/linux/libcfs/linux/linux-cpu.h  |   2 -
 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c|  18 +---
 .../staging/lustre/lnet/libcfs/linux/linux-cpu.c   | 114 +++--
 4 files changed, 20 insertions(+), 127 deletions(-)

diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h 
b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
index 61bce77..1f2cd78 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
@@ -162,12 +162,12 @@ struct cfs_cpt_table {
  * return 1 if successfully set all CPUs, otherwise return 0
  */
 int cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab,
-   int cpt, cpumask_t *mask);
+   int cpt, const cpumask_t *mask);
 /**
  * remove all cpus in \a mask from CPU partition \a cpt
  */
 void cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab,
-  int cpt, cpumask_t *mask);
+  int cpt, const cpumask_t *mask);
 /**
  * add all cpus in NUMA node \a node to CPU partition \a cpt
  * return 1 if successfully set all CPUs, otherwise return 0
@@ -190,20 +190,11 @@ int cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab,
 void cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab,
int cpt, nodemask_t *mask);
 /**
- * unset all cpus for CPU partition \a cpt
- */
-void cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt);
-/**
  * convert partition id \a cpt to numa node id, if there are more than one
  * nodes in this partition, it might return a different node id each time.
  */
 int cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt);
 
-/**
- * return number of HTs in the same core of \a cpu
- */
-int cfs_cpu_ht_nsiblings(int cpu);
-
 /*
  * allocate per-cpu-partition data, returned value is an array of pointers,
  * variable can be indexed by CPU ID.
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h 
b/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h
index 6035376..e8bbbaa 100644
--- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h
@@ -58,8 +58,6 @@ struct cfs_cpu_partition {
 
 /** descriptor for CPU partitions */
 struct cfs_cpt_table {
-   /* version, reserved for hotplug */
-   unsigned intctb_version;
/* spread rotor for NUMA allocator */
unsigned intctb_spread_rotor;
/* # of CPU partitions */
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c 
b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
index 76291a3..705abf2 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
@@ -129,14 +129,15 @@ struct cfs_cpt_table *
 EXPORT_SYMBOL(cfs_cpt_unset_cpu);
 
 int
-cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
+cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, const cpumask_t 
*mask)
 {
return 1;
 }
 EXPORT_SYMBOL(cfs_cpt_set_cpumask);
 
 void
-cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
+cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt,
+ const cpumask_t *mask)
 {
 }
 EXPORT_SYMBOL(cfs_cpt_unset_cpumask);
@@ -167,12 +168,6 @@ struct cfs_cpt_table *
 }
 EXPORT_SYMBOL(cfs_cpt_unset_nodemask);
 
-void
-cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt)
-{
-}
-EXPORT_SYMBOL(cfs_cpt_clear);
-
 int
 cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt)
 {
@@ -181,13 +176,6 @@ struct cfs_cpt_table *
 EXPORT_SYMBOL(cfs_cpt_spread_node);
 
 int
-cfs_cpu_ht_nsiblings(int cpu)
-{
-   return 1;
-}
-EXPORT_SYMBOL(cfs_cpu_ht_nsiblings);
-
-int
 cfs_cpt_current(struct cfs_cpt_table *cptab, int remap)
 {
return 0;
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c 
b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
index 388521e..

[PATCH 24/25] staging: lustre: libcfs: change CPT estimate algorithm

2018-04-15 Thread James Simmons
From: Dmitry Eremin <dmitry.ere...@intel.com>

The main idea to have more CPU partitions is based on KNL experience.
When a thread submit IO for network communication one of threads from
current CPT is used for network stack. Whith high parallelization many
threads become involved in network submission but having less CPU
partitions they will wait until single thread process them from network
queue. So, the bottleneck just moves into network layer in case of
small amount of CPU partitions. My experiments showed that the best
performance was when for each IO thread we have one network thread.
This condition can be provided having 2 real HW cores (without hyper
threads) per CPT. This is exactly what implemented in this patch.

Change CPT estimate algorithm from 2 * (N - 1)^2 < NCPUS <= 2 * N^2
to 2 HW cores per CPT. This is critical for machines with number of
cores different from 2^N.

Current algorithm splits CPTs in KNL:
LNet: HW CPU cores: 272, npartitions: 16
cpu_partition_table=
0   : 0-4,68-71,136-139,204-207
1   : 5-9,73-76,141-144,209-212
2   : 10-14,78-81,146-149,214-217
3   : 15-17,72,77,83-85,140,145,151-153,208,219-221
4   : 18-21,82,86-88,150,154-156,213,218,222-224
5   : 22-26,90-93,158-161,226-229
6   : 27-31,95-98,163-166,231-234
7   : 32-35,89,100-103,168-171,236-239
8   : 36-38,94,99,104-105,157,162,167,172-173,225,230,235,240-241
9   : 39-43,107-110,175-178,243-246
10  : 44-48,112-115,180-183,248-251
11  : 49-51,106,111,117-119,174,179,185-187,242,253-255
12  : 52-55,116,120-122,184,188-190,247,252,256-258
13  : 56-60,124-127,192-195,260-263
14  : 61-65,129-132,197-200,265-268
15  : 66-67,123,128,133-135,191,196,201-203,259,264,269-271

New algorithm will split CPTs in KNL:
LNet: HW CPU cores: 272, npartitions: 34
cpu_partition_table=
0   : 0-1,68-69,136-137,204-205
1   : 2-3,70-71,138-139,206-207
2   : 4-5,72-73,140-141,208-209
3   : 6-7,74-75,142-143,210-211
4   : 8-9,76-77,144-145,212-213
5   : 10-11,78-79,146-147,214-215
6   : 12-13,80-81,148-149,216-217
7   : 14-15,82-83,150-151,218-219
8   : 16-17,84-85,152-153,220-221
9   : 18-19,86-87,154-155,222-223
10  : 20-21,88-89,156-157,224-225
11  : 22-23,90-91,158-159,226-227
12  : 24-25,92-93,160-161,228-229
13  : 26-27,94-95,162-163,230-231
14  : 28-29,96-97,164-165,232-233
15  : 30-31,98-99,166-167,234-235
16  : 32-33,100-101,168-169,236-237
17  : 34-35,102-103,170-171,238-239
18  : 36-37,104-105,172-173,240-241
19  : 38-39,106-107,174-175,242-243
20  : 40-41,108-109,176-177,244-245
21  : 42-43,110-111,178-179,246-247
22  : 44-45,112-113,180-181,248-249
23  : 46-47,114-115,182-183,250-251
24  : 48-49,116-117,184-185,252-253
25  : 50-51,118-119,186-187,254-255
26  : 52-53,120-121,188-189,256-257
27  : 54-55,122-123,190-191,258-259
28  : 56-57,124-125,192-193,260-261
29  : 58-59,126-127,194-195,262-263
30  : 60-61,128-129,196-197,264-265
31  : 62-63,130-131,198-199,266-267
32  : 64-65,132-133,200-201,268-269
33  : 66-67,134-135,202-203,270-271

'N' pattern in KNL works is not always good.
in flat mode it will be one CPT with all CPUs inside.

in SNC-4 mode:
cpu_partition_table=
0   : 0-17,68-85,136-153,204-221
1   : 18-35,86-103,154-171,222-239
2   : 36-51,104-119,172-187,240-255
3   : 52-67,120-135,188-203,256-271

Signed-off-by: Dmitry Eremin <dmitry.ere...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8703
Reviewed-on: https://review.whamcloud.com/24304
Reviewed-by: James Simmons <uja.o...@yahoo.com>
Reviewed-by: Andreas Dilger <andreas.dil...@intel.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 .../staging/lustre/lnet/libcfs/linux/linux-cpu.c   | 30 --
 1 file changed, 5 insertions(+), 25 deletions(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c 
b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
index 915cfca..ae5fd16 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
@@ -768,34 +768,14 @@ static int cfs_cpt_choose_ncpus(struct cfs_cpt_table 
*cptab, int cpt,
 
 static int cfs_cpt_num_estimate(void)
 {
-   int nnode = num_online_nodes();
+   int nthr = cpumask_weight(topology_sibling_cpumask(smp_processor_id()));
int ncpu = num_online_cpus();
-   int ncpt;
+   int ncpt = 1;
 
-   if (ncpu <= CPT_WEIGHT_MIN) {
-   ncpt = 1;
-   goto out;
-   }
-
-   /* generate reasonable number of CPU partitions based on

[PATCH 10/25] staging: lustre: libcfs: provide debugfs files for distance handling

2018-04-15 Thread James Simmons
From: Amir Shehata <amir.sheh...@intel.com>

On systems with large number of NUMA nodes and cores it is easy
to incorrectly configure their use with Lustre. Provide debugfs
files which can help track down any issues.

Signed-off-by: Amir Shehata <amir.sheh...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-7734
Reviewed-on: http://review.whamcloud.com/18916
Reviewed-by: Olaf Weber <o...@sgi.com>
Reviewed-by: Doug Oucharek <dou...@me.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lnet/libcfs/module.c | 53 +
 1 file changed, 53 insertions(+)

diff --git a/drivers/staging/lustre/lnet/libcfs/module.c 
b/drivers/staging/lustre/lnet/libcfs/module.c
index a03f924..95af000 100644
--- a/drivers/staging/lustre/lnet/libcfs/module.c
+++ b/drivers/staging/lustre/lnet/libcfs/module.c
@@ -336,6 +336,53 @@ static int proc_cpt_table(struct ctl_table *table, int 
write,
__proc_cpt_table);
 }
 
+static int __proc_cpt_distance(void *data, int write,
+  loff_t pos, void __user *buffer, int nob)
+{
+   char *buf = NULL;
+   int len = 4096;
+   int rc = 0;
+
+   if (write)
+   return -EPERM;
+
+   LASSERT(cfs_cpt_table);
+
+   while (1) {
+   buf = kzalloc(len, GFP_KERNEL);
+   if (!buf)
+   return -ENOMEM;
+
+   rc = cfs_cpt_distance_print(cfs_cpt_table, buf, len);
+   if (rc >= 0)
+   break;
+
+   if (rc == -EFBIG) {
+   kfree(buf);
+   len <<= 1;
+   continue;
+   }
+   goto out;
+   }
+
+   if (pos >= rc) {
+   rc = 0;
+   goto out;
+   }
+
+   rc = cfs_trace_copyout_string(buffer, nob, buf + pos, NULL);
+out:
+   kfree(buf);
+   return rc;
+}
+
+static int proc_cpt_distance(struct ctl_table *table, int write,
+void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+   return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
+   __proc_cpt_distance);
+}
+
 static struct ctl_table lnet_table[] = {
{
.procname = "debug",
@@ -365,6 +412,12 @@ static int proc_cpt_table(struct ctl_table *table, int 
write,
.proc_handler = _cpt_table,
},
{
+   .procname = "cpu_partition_distance",
+   .maxlen   = 128,
+   .mode = 0444,
+   .proc_handler = _cpt_distance,
+   },
+   {
.procname = "debug_log_upcall",
.data = lnet_debug_log_upcall,
.maxlen   = sizeof(lnet_debug_log_upcall),
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 09/25] staging: lustre: libcfs: use distance in cpu and node handling

2018-04-15 Thread James Simmons
From: Amir Shehata <amir.sheh...@intel.com>

Take into consideration the location of NUMA nodes and core
when calling cfs_cpt_[un]set_cpu() and cfs_cpt_[un]set_node().
This enables functioning on platforms with 100s of cores and
NUMA nodes.

Signed-off-by: Amir Shehata <amir.sheh...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-7734
Reviewed-on: http://review.whamcloud.com/18916
Reviewed-by: Olaf Weber <o...@sgi.com>
Reviewed-by: Doug Oucharek <dou...@me.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 .../staging/lustre/lnet/libcfs/linux/linux-cpu.c   | 192 +++--
 1 file changed, 143 insertions(+), 49 deletions(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c 
b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
index 1e184b1..bbf89b8 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
@@ -300,11 +300,134 @@ unsigned int cfs_cpt_distance(struct cfs_cpt_table 
*cptab, int cpt1, int cpt2)
 }
 EXPORT_SYMBOL(cfs_cpt_distance);
 
+/*
+ * Calculate the maximum NUMA distance between all nodes in the
+ * from_mask and all nodes in the to_mask.
+ */
+static unsigned int cfs_cpt_distance_calculate(nodemask_t *from_mask,
+  nodemask_t *to_mask)
+{
+   unsigned int maximum;
+   unsigned int distance;
+   int from;
+   int to;
+
+   maximum = 0;
+   for_each_node_mask(from, *from_mask) {
+   for_each_node_mask(to, *to_mask) {
+   distance = node_distance(from, to);
+   if (maximum < distance)
+   maximum = distance;
+   }
+   }
+   return maximum;
+}
+
+static void cfs_cpt_add_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
+{
+   cptab->ctb_cpu2cpt[cpu] = cpt;
+
+   cpumask_set_cpu(cpu, cptab->ctb_cpumask);
+   cpumask_set_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
+}
+
+static void cfs_cpt_del_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
+{
+   cpumask_clear_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
+   cpumask_clear_cpu(cpu, cptab->ctb_cpumask);
+
+   cptab->ctb_cpu2cpt[cpu] = -1;
+}
+
+static void cfs_cpt_add_node(struct cfs_cpt_table *cptab, int cpt, int node)
+{
+   struct cfs_cpu_partition *part;
+
+   if (!node_isset(node, *cptab->ctb_nodemask)) {
+   unsigned int dist;
+
+   /* first time node is added to the CPT table */
+   node_set(node, *cptab->ctb_nodemask);
+   cptab->ctb_node2cpt[node] = cpt;
+
+   dist = cfs_cpt_distance_calculate(cptab->ctb_nodemask,
+ cptab->ctb_nodemask);
+   cptab->ctb_distance = dist;
+   }
+
+   part = >ctb_parts[cpt];
+   if (!node_isset(node, *part->cpt_nodemask)) {
+   int cpt2;
+
+   /* first time node is added to this CPT */
+   node_set(node, *part->cpt_nodemask);
+   for (cpt2 = 0; cpt2 < cptab->ctb_nparts; cpt2++) {
+   struct cfs_cpu_partition *part2;
+   unsigned int dist;
+
+   part2 = >ctb_parts[cpt2];
+   dist = cfs_cpt_distance_calculate(part->cpt_nodemask,
+ part2->cpt_nodemask);
+   part->cpt_distance[cpt2] = dist;
+   dist = cfs_cpt_distance_calculate(part2->cpt_nodemask,
+ part->cpt_nodemask);
+   part2->cpt_distance[cpt] = dist;
+   }
+   }
+}
+
+static void cfs_cpt_del_node(struct cfs_cpt_table *cptab, int cpt, int node)
+{
+   struct cfs_cpu_partition *part = >ctb_parts[cpt];
+   int cpu;
+
+   for_each_cpu(cpu, part->cpt_cpumask) {
+   /* this CPT has other CPU belonging to this node? */
+   if (cpu_to_node(cpu) == node)
+   break;
+   }
+
+   if (cpu >= nr_cpu_ids && node_isset(node,  *part->cpt_nodemask)) {
+   int cpt2;
+
+   /* No more CPUs in the node for this CPT. */
+   node_clear(node, *part->cpt_nodemask);
+   for (cpt2 = 0; cpt2 < cptab->ctb_nparts; cpt2++) {
+   struct cfs_cpu_partition *part2;
+   unsigned int dist;
+
+   part2 = >ctb_parts[cpt2];
+   if (node_isset(node, *part2->cpt_nodemask))
+   cptab->ctb_node2cpt[node] = cpt2;
+
+   dist = cfs_cpt_distance_calculate(part->cpt_nodemask,
+ part2->

[PATCH 13/25] staging: lustre: libcfs: use int type for CPT identification.

2018-04-15 Thread James Simmons
From: Dmitry Eremin <dmitry.ere...@intel.com>

Use int type for CPT identification to match the linux kernel
CPU identification.

Signed-off-by: Dmitry Eremin <dmitry.ere...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8703
Reviewed-on: https://review.whamcloud.com/23304
Reviewed-by: James Simmons <uja.o...@yahoo.com>
Reviewed-by: Doug Oucharek <dou...@me.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h   |  2 +-
 .../staging/lustre/include/linux/libcfs/linux/linux-cpu.h  |  6 +++---
 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c|  2 +-
 drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c   | 14 +++---
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h 
b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
index bda81ab..19a3489 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
@@ -108,7 +108,7 @@ static inline int cfs_cpt_table_print(struct cfs_cpt_table 
*cptab, char *buf,
 /**
  * create a cfs_cpt_table with \a ncpt number of partitions
  */
-struct cfs_cpt_table *cfs_cpt_table_alloc(unsigned int ncpt);
+struct cfs_cpt_table *cfs_cpt_table_alloc(int ncpt);
 /**
  * print distance information of cpt-table
  */
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h 
b/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h
index 4ac1670..b3bc4e7 100644
--- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h
@@ -55,17 +55,17 @@ struct cfs_cpu_partition {
/* NUMA distance between CPTs */
unsigned int*cpt_distance;
/* spread rotor for NUMA allocator */
-   unsigned intcpt_spread_rotor;
+   int  cpt_spread_rotor;
 };
 
 /** descriptor for CPU partitions */
 struct cfs_cpt_table {
/* spread rotor for NUMA allocator */
-   unsigned intctb_spread_rotor;
+   int ctb_spread_rotor;
/* maximum NUMA distance between all nodes in table */
unsigned intctb_distance;
/* # of CPU partitions */
-   unsigned intctb_nparts;
+   int ctb_nparts;
/* partitions tables */
struct cfs_cpu_partition*ctb_parts;
/* shadow HW CPU to CPU partition ID */
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c 
b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
index f9fcbb1..5d7d44d 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
@@ -43,7 +43,7 @@
 
 #define CFS_CPT_DISTANCE   1   /* Arbitrary positive value */
 
-struct cfs_cpt_table *cfs_cpt_table_alloc(unsigned int ncpt)
+struct cfs_cpt_table *cfs_cpt_table_alloc(int ncpt)
 {
struct cfs_cpt_table *cptab;
 
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c 
b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
index 5c9cdf4..1669669 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
@@ -88,7 +88,7 @@ void cfs_cpt_table_free(struct cfs_cpt_table *cptab)
 }
 EXPORT_SYMBOL(cfs_cpt_table_free);
 
-struct cfs_cpt_table *cfs_cpt_table_alloc(unsigned int ncpt)
+struct cfs_cpt_table *cfs_cpt_table_alloc(int ncpt)
 {
struct cfs_cpt_table *cptab;
int i;
@@ -759,13 +759,13 @@ static int cfs_cpt_choose_ncpus(struct cfs_cpt_table 
*cptab, int cpt,
return rc;
 }
 
-#define CPT_WEIGHT_MIN  4u
+#define CPT_WEIGHT_MIN 4
 
-static unsigned int cfs_cpt_num_estimate(void)
+static int cfs_cpt_num_estimate(void)
 {
-   unsigned int nnode = num_online_nodes();
-   unsigned int ncpu = num_online_cpus();
-   unsigned int ncpt;
+   int nnode = num_online_nodes();
+   int ncpu = num_online_cpus();
+   int ncpt;
 
if (ncpu <= CPT_WEIGHT_MIN) {
ncpt = 1;
@@ -795,7 +795,7 @@ static unsigned int cfs_cpt_num_estimate(void)
/* config many CPU partitions on 32-bit system could consume
 * too much memory
 */
-   ncpt = min(2U, ncpt);
+   ncpt = min(2, ncpt);
 #endif
while (ncpu % ncpt)
ncpt--; /* worst case is 1 */
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 04/25] staging: lustre: libcfs: replace MAX_NUMNODES with nr_node_ids

2018-04-15 Thread James Simmons
From: Amir Shehata <amir.sheh...@intel.com>

Replace depricated MAX_NUMNODES with nr_node_ids.

Signed-off-by: Amir Shehata <amir.sheh...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-7734
Reviewed-on: http://review.whamcloud.com/18916
Reviewed-by: Olaf Weber <o...@sgi.com>
Reviewed-by: Doug Oucharek <dou...@me.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c 
b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
index d8c190c..d207ae5 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
@@ -368,7 +368,7 @@ struct cfs_cpt_table *
 {
const cpumask_t *mask;
 
-   if (node < 0 || node >= MAX_NUMNODES) {
+   if (node < 0 || node >= nr_node_ids) {
CDEBUG(D_INFO,
   "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
return 0;
@@ -385,7 +385,7 @@ struct cfs_cpt_table *
 {
const cpumask_t *mask;
 
-   if (node < 0 || node >= MAX_NUMNODES) {
+   if (node < 0 || node >= nr_node_ids) {
CDEBUG(D_INFO,
   "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
return;
@@ -809,7 +809,7 @@ struct cfs_cpt_table *
return cptab;
}
 
-   high = node ? MAX_NUMNODES - 1 : nr_cpu_ids - 1;
+   high = node ? nr_node_ids - 1 : nr_cpu_ids - 1;
 
for (str = strim(pattern), c = 0;; c++) {
struct cfs_range_expr *range;
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 16/25] staging: lustre: libcfs: rename cpumask_var_t variables to *_mask

2018-04-15 Thread James Simmons
From: Dmitry Eremin <dmitry.ere...@intel.com>

Because we handle both cpu mask as well as core identifiers it can
easily be confused. To avoid this rename various cpumask_var_t to
have appended *_mask to their names.

Signed-off-by: Dmitry Eremin <dmitry.ere...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8703
Reviewed-on: https://review.whamcloud.com/23222
Reviewed-by: Amir Shehata <amir.sheh...@intel.com>
Reviewed-by: James Simmons <uja.o...@yahoo.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 .../staging/lustre/lnet/libcfs/linux/linux-cpu.c   | 62 +++---
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c 
b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
index b985b3d..ae5ff58 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
@@ -685,23 +685,23 @@ int cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
  * We always prefer to choose CPU in the same core/socket.
  */
 static int cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt,
-   cpumask_t *node, int number)
+   cpumask_t *node_mask, int number)
 {
-   cpumask_var_t socket;
-   cpumask_var_t core;
+   cpumask_var_t socket_mask;
+   cpumask_var_t core_mask;
int rc = 0;
int cpu;
 
LASSERT(number > 0);
 
-   if (number >= cpumask_weight(node)) {
-   while (!cpumask_empty(node)) {
-   cpu = cpumask_first(node);
+   if (number >= cpumask_weight(node_mask)) {
+   while (!cpumask_empty(node_mask)) {
+   cpu = cpumask_first(node_mask);
 
rc = cfs_cpt_set_cpu(cptab, cpt, cpu);
if (!rc)
return -EINVAL;
-   cpumask_clear_cpu(cpu, node);
+   cpumask_clear_cpu(cpu, node_mask);
}
return 0;
}
@@ -711,34 +711,34 @@ static int cfs_cpt_choose_ncpus(struct cfs_cpt_table 
*cptab, int cpt,
 * As we cannot initialize a cpumask_var_t, we need
 * to alloc both before we can risk trying to free either
 */
-   if (!zalloc_cpumask_var(, GFP_NOFS))
+   if (!zalloc_cpumask_var(_mask, GFP_NOFS))
rc = -ENOMEM;
-   if (!zalloc_cpumask_var(, GFP_NOFS))
+   if (!zalloc_cpumask_var(_mask, GFP_NOFS))
rc = -ENOMEM;
if (rc)
goto out;
 
-   while (!cpumask_empty(node)) {
-   cpu = cpumask_first(node);
+   while (!cpumask_empty(node_mask)) {
+   cpu = cpumask_first(node_mask);
 
/* get cpumask for cores in the same socket */
-   cpumask_copy(socket, topology_core_cpumask(cpu));
-   cpumask_and(socket, socket, node);
+   cpumask_copy(socket_mask, topology_core_cpumask(cpu));
+   cpumask_and(socket_mask, socket_mask, node_mask);
 
-   LASSERT(!cpumask_empty(socket));
+   LASSERT(!cpumask_empty(socket_mask));
 
-   while (!cpumask_empty(socket)) {
+   while (!cpumask_empty(socket_mask)) {
int i;
 
/* get cpumask for hts in the same core */
-   cpumask_copy(core, topology_sibling_cpumask(cpu));
-   cpumask_and(core, core, node);
+   cpumask_copy(core_mask, topology_sibling_cpumask(cpu));
+   cpumask_and(core_mask, core_mask, node_mask);
 
-   LASSERT(!cpumask_empty(core));
+   LASSERT(!cpumask_empty(core_mask));
 
-   for_each_cpu(i, core) {
-   cpumask_clear_cpu(i, socket);
-   cpumask_clear_cpu(i, node);
+   for_each_cpu(i, core_mask) {
+   cpumask_clear_cpu(i, socket_mask);
+   cpumask_clear_cpu(i, node_mask);
 
rc = cfs_cpt_set_cpu(cptab, cpt, i);
if (!rc) {
@@ -749,13 +749,13 @@ static int cfs_cpt_choose_ncpus(struct cfs_cpt_table 
*cptab, int cpt,
if (!--number)
goto out;
}
-   cpu = cpumask_first(socket);
+   cpu = cpumask_first(socket_mask);
}
}
 
 out:
-   free_cpumask_var(socket);
-   free_cpumask_var(core);
+   free_cpumask_var(socket_mask);
+   free_cpumask_var(core_mask);
return rc;
 }
 
@@ -806,7 +806,7 @@ static int cfs_cpt_num_estimate(void)
 static struct

[PATCH 07/25] staging: lustre: libcfs: NUMA support

2018-04-15 Thread James Simmons
From: Amir Shehata <amir.sheh...@intel.com>

This patch adds NUMA node support. NUMA node information is stored
in the CPT table. A NUMA node mask is maintained for the entire
table as well as for each CPT to track the NUMA nodes related to
each of the CPTs. Add new function cfs_cpt_of_node() which returns
the CPT of a particular NUMA node.

Signed-off-by: Amir Shehata <amir.sheh...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-7734
Reviewed-on: http://review.whamcloud.com/18916
Reviewed-by: Olaf Weber <o...@sgi.com>
Reviewed-by: Doug Oucharek <dou...@me.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 .../staging/lustre/include/linux/libcfs/libcfs_cpu.h  |  4 
 .../lustre/include/linux/libcfs/linux/linux-cpu.h |  2 ++
 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c   |  6 ++
 drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c  | 19 +++
 4 files changed, 31 insertions(+)

diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h 
b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
index 070f8fe..839ec02 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
@@ -139,6 +139,10 @@ struct cfs_cpt_table {
  */
 int cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu);
 /**
+ * shadow HW node ID \a NODE to CPU-partition ID by \a cptab
+ */
+int cfs_cpt_of_node(struct cfs_cpt_table *cptab, int node);
+/**
  * bind current thread on a CPU-partition \a cpt of \a cptab
  */
 int cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt);
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h 
b/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h
index e8bbbaa..1bed0ba 100644
--- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h
@@ -68,6 +68,8 @@ struct cfs_cpt_table {
int *ctb_cpu2cpt;
/* all cpus in this partition table */
cpumask_var_t   ctb_cpumask;
+   /* shadow HW node to CPU partition ID */
+   int *ctb_node2cpt;
/* all nodes in this partition table */
nodemask_t  *ctb_nodemask;
 };
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c 
b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
index 5ea294f..e6d1512 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
@@ -198,6 +198,12 @@ cpumask_var_t *cfs_cpt_cpumask(struct cfs_cpt_table 
*cptab, int cpt)
 }
 EXPORT_SYMBOL(cfs_cpt_of_cpu);
 
+int cfs_cpt_of_node(struct cfs_cpt_table *cptab, int node)
+{
+   return 0;
+}
+EXPORT_SYMBOL(cfs_cpt_of_node);
+
 int
 cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
 {
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c 
b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
index 741db69..fd0c451 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
@@ -70,6 +70,7 @@
int i;
 
kvfree(cptab->ctb_cpu2cpt);
+   kvfree(cptab->ctb_node2cpt);
 
for (i = 0; cptab->ctb_parts && i < cptab->ctb_nparts; i++) {
struct cfs_cpu_partition *part = >ctb_parts[i];
@@ -114,6 +115,15 @@ struct cfs_cpt_table *
memset(cptab->ctb_cpu2cpt, -1,
   nr_cpu_ids * sizeof(cptab->ctb_cpu2cpt[0]));
 
+   cptab->ctb_node2cpt = kvmalloc_array(nr_node_ids,
+sizeof(cptab->ctb_node2cpt[0]),
+GFP_KERNEL);
+   if (!cptab->ctb_node2cpt)
+   goto failed;
+
+   memset(cptab->ctb_node2cpt, -1,
+  nr_node_ids * sizeof(cptab->ctb_node2cpt[0]));
+
cptab->ctb_parts = kvmalloc_array(ncpt, sizeof(cptab->ctb_parts[0]),
  GFP_KERNEL);
if (!cptab->ctb_parts)
@@ -484,6 +494,15 @@ struct cfs_cpt_table *
 }
 EXPORT_SYMBOL(cfs_cpt_of_cpu);
 
+int cfs_cpt_of_node(struct cfs_cpt_table *cptab, int node)
+{
+   if (node < 0 || node > nr_node_ids)
+   return CFS_CPT_ANY;
+
+   return cptab->ctb_node2cpt[node];
+}
+EXPORT_SYMBOL(cfs_cpt_of_node);
+
 int
 cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
 {
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 19/25] staging: lustre: libcfs: update debug messages

2018-04-15 Thread James Simmons
From: Dmitry Eremin <dmitry.ere...@intel.com>

For cfs_cpt_bind() change the CERROR to CDEBUG. Make the debug
message in cfs_cpt_table_create_pattern() more understandable.
Report rc value for when cfs_cpt_create_table() fails.

Signed-off-by: Dmitry Eremin <dmitry.ere...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8703
Reviewed-on: https://review.whamcloud.com/23222
Reviewed-by: Amir Shehata <amir.sheh...@intel.com>
Reviewed-by: James Simmons <uja.o...@yahoo.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c 
b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
index c4f53ab..32ebd0f 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
@@ -655,7 +655,8 @@ int cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
}
 
if (cpumask_any_and(*cpumask, cpu_online_mask) >= nr_cpu_ids) {
-   CERROR("No online CPU found in CPU partition %d, did someone do 
CPU hotplug on system? You might need to reload Lustre modules to keep system 
working well.\n",
+   CDEBUG(D_INFO,
+  "No online CPU found in CPU partition %d, did someone do 
CPU hotplug on system? You might need to reload Lustre modules to keep system 
working well.\n",
   cpt);
return -EINVAL;
}
@@ -886,8 +887,8 @@ static struct cfs_cpt_table *cfs_cpt_table_create(int ncpt)
 failed_mask:
free_cpumask_var(node_mask);
 failed:
-   CERROR("Failed to setup CPU-partition-table with %d CPU-partitions, 
online HW nodes: %d, HW cpus: %d.\n",
-  ncpt, num_online_nodes(), num_online_cpus());
+   CERROR("Failed (rc = %d) to setup CPU partition table with %d 
partitions, online HW NUMA nodes: %d, HW CPU cores: %d.\n",
+  rc, ncpt, num_online_nodes(), num_online_cpus());
 
if (cptab)
cfs_cpt_table_free(cptab);
@@ -1002,7 +1003,7 @@ static struct cfs_cpt_table 
*cfs_cpt_table_create_pattern(char *pattern)
 
bracket = strchr(str, ']');
if (!bracket) {
-   CERROR("missing right bracket for cpt %d, %s\n",
+   CERROR("Missing right bracket for partition %d, %s\n",
   cpt, str);
goto failed;
}
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 18/25] staging: lustre: libcfs: clear up failure patch in cfs_cpt_*_print

2018-04-15 Thread James Simmons
From: Dmitry Eremin <dmitry.ere...@intel.com>

Currently both cfs_cpt_table_print() and cfs_cpt_distance_print()
handle the error path in a confusing way. Simplify it so it just
returns E2BIG on failure instead of testing rc value before
exiting.

Signed-off-by: Dmitry Eremin <dmitry.ere...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8703
Reviewed-on: https://review.whamcloud.com/23222
Reviewed-by: Amir Shehata <amir.sheh...@intel.com>
Reviewed-by: James Simmons <uja.o...@yahoo.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c | 19 ---
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c 
b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
index 435ee8e..c4f53ab 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
@@ -155,7 +155,7 @@ struct cfs_cpt_table *cfs_cpt_table_alloc(int ncpt)
 int cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
 {
char *tmp = buf;
-   int rc = -EFBIG;
+   int rc;
int i;
int j;
 
@@ -183,19 +183,17 @@ int cfs_cpt_table_print(struct cfs_cpt_table *cptab, char 
*buf, int len)
len--;
}
 
-   rc = 0;
-err:
-   if (rc < 0)
-   return rc;
-
return tmp - buf;
+
+err:
+   return -E2BIG;
 }
 EXPORT_SYMBOL(cfs_cpt_table_print);
 
 int cfs_cpt_distance_print(struct cfs_cpt_table *cptab, char *buf, int len)
 {
char *tmp = buf;
-   int rc = -EFBIG;
+   int rc;
int i;
int j;
 
@@ -223,12 +221,11 @@ int cfs_cpt_distance_print(struct cfs_cpt_table *cptab, 
char *buf, int len)
tmp++;
len--;
}
-   rc = 0;
-err:
-   if (rc < 0)
-   return rc;
 
return tmp - buf;
+
+err:
+   return -E2BIG;
 }
 EXPORT_SYMBOL(cfs_cpt_distance_print);
 
-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 25/25] staging: lustre: libcfs: merge UMP and SMP libcfs cpu header code

2018-04-15 Thread James Simmons
Currently we have two headers, linux-cpu.h that contains the SMP
version and libcfs_cpu.h contains the UMP version. We can simplify
the headers into a single header which handles both cases.

Signed-off-by: James Simmons <uja.o...@yahoo.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-9859
Reviewed-on: https://review.whamcloud.com/30873
Reviewed-by: Andreas Dilger <andreas.dil...@intel.com>
Reviewed-by: Dmitry Eremin <dmitry.ere...@intel.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 .../lustre/include/linux/libcfs/libcfs_cpu.h   | 67 +++--
 .../lustre/include/linux/libcfs/linux/libcfs.h |  1 -
 .../lustre/include/linux/libcfs/linux/linux-cpu.h  | 84 --
 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c| 18 ++---
 4 files changed, 52 insertions(+), 118 deletions(-)
 delete mode 100644 
drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h

diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h 
b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
index 19a3489..0611fcd 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
@@ -72,32 +72,55 @@
 #ifndef __LIBCFS_CPU_H__
 #define __LIBCFS_CPU_H__
 
-/* any CPU partition */
-#define CFS_CPT_ANY(-1)
+#include 
+#include 
+#include 
+#include 
+#include 
 
 #ifdef CONFIG_SMP
-/**
- * print string information of cpt-table
- */
-int cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len);
-#else /* !CONFIG_SMP */
+
+/** virtual processing unit */
+struct cfs_cpu_partition {
+   /* CPUs mask for this partition */
+   cpumask_var_tcpt_cpumask;
+   /* nodes mask for this partition */
+   nodemask_t  *cpt_nodemask;
+   /* NUMA distance between CPTs */
+   unsigned int*cpt_distance;
+   /* spread rotor for NUMA allocator */
+   int  cpt_spread_rotor;
+   /* NUMA node if cpt_nodemask is empty */
+   int  cpt_node;
+};
+#endif /* CONFIG_SMP */
+
+/** descriptor for CPU partitions */
 struct cfs_cpt_table {
+#ifdef CONFIG_SMP
+   /* spread rotor for NUMA allocator */
+   int  ctb_spread_rotor;
+   /* maximum NUMA distance between all nodes in table */
+   unsigned int ctb_distance;
+   /* partitions tables */
+   struct cfs_cpu_partition*ctb_parts;
+   /* shadow HW CPU to CPU partition ID */
+   int *ctb_cpu2cpt;
+   /* shadow HW node to CPU partition ID */
+   int *ctb_node2cpt;
/* # of CPU partitions */
-   int ctb_nparts;
-   /* cpu mask */
-   cpumask_var_t   ctb_mask;
-   /* node mask */
-   nodemask_t  ctb_nodemask;
-   /* version */
-   u64 ctb_version;
+   int  ctb_nparts;
+   /* all nodes in this partition table */
+   nodemask_t  *ctb_nodemask;
+#else
+   nodemask_t   ctb_nodemask;
+#endif /* CONFIG_SMP */
+   /* all cpus in this partition table */
+   cpumask_var_tctb_cpumask;
 };
 
-static inline int cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf,
- int len)
-{
-   return 0;
-}
-#endif /* CONFIG_SMP */
+/* any CPU partition */
+#define CFS_CPT_ANY(-1)
 
 extern struct cfs_cpt_table*cfs_cpt_table;
 
@@ -110,6 +133,10 @@ static inline int cfs_cpt_table_print(struct cfs_cpt_table 
*cptab, char *buf,
  */
 struct cfs_cpt_table *cfs_cpt_table_alloc(int ncpt);
 /**
+ * print string information of cpt-table
+ */
+int cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len);
+/**
  * print distance information of cpt-table
  */
 int cfs_cpt_distance_print(struct cfs_cpt_table *cptab, char *buf, int len);
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/libcfs.h 
b/drivers/staging/lustre/include/linux/libcfs/linux/libcfs.h
index 07d3cb2..07610be 100644
--- a/drivers/staging/lustre/include/linux/libcfs/linux/libcfs.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/libcfs.h
@@ -78,7 +78,6 @@
 #include 
 #include 
 #include 
-#include "linux-cpu.h"
 
 #if !defined(__x86_64__)
 # ifdef __ia64__
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h 
b/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h
deleted file mode 100644
index ed4351b..000
--- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h
+++ /dev/null
@@ -1,84 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMO

[PATCH 00/25] staging: lustre: libcfs: SMP rework

2018-04-15 Thread James Simmons
Recently lustre support has been expanded to extreme machines with as
many as a 1000+ cores. On the other end lustre also has been ported
to platforms like ARM and KNL which have uniquie NUMA and core setup.
For example some devices exist that have NUMA nodes with no cores.
With these new platforms the limitations of the Lustre's SMP code
came to light so a lot of work was needed. This resulted in this
patch set which has been tested on these platforms.

Amir Shehata (9):
  staging: lustre: libcfs: implement cfs_cpt_cpumask for UMP case
  staging: lustre: libcfs: replace MAX_NUMNODES with nr_node_ids
  staging: lustre: libcfs: remove excess space
  staging: lustre: libcfs: replace num_possible_cpus() with nr_cpu_ids
  staging: lustre: libcfs: NUMA support
  staging: lustre: libcfs: add cpu distance handling
  staging: lustre: libcfs: use distance in cpu and node handling
  staging: lustre: libcfs: provide debugfs files for distance handling
  staging: lustre: libcfs: invert error handling for cfs_cpt_table_print

Dmitry Eremin (15):
  staging: lustre: libcfs: remove useless CPU partition code
  staging: lustre: libcfs: rename variable i to cpu
  staging: lustre: libcfs: fix libcfs_cpu coding style
  staging: lustre: libcfs: use int type for CPT identification.
  staging: lustre: libcfs: rename i to node for cfs_cpt_set_nodemask
  staging: lustre: libcfs: rename i to cpu for cfs_cpt_bind
  staging: lustre: libcfs: rename cpumask_var_t variables to *_mask
  staging: lustre: libcfs: rename goto label in cfs_cpt_table_print
  staging: lustre: libcfs: clear up failure patch in cfs_cpt_*_print
  staging: lustre: libcfs: update debug messages
  staging: lustre: libcfs: make tolerant to offline CPUs and empty NUMA nodes
  staging: lustre: libcfs: report NUMA node instead of just node
  staging: lustre: libcfs: update debug messages in CPT creation code
  staging: lustre: libcfs: rework CPU pattern parsing code
  staging: lustre: libcfs: change CPT estimate algorithm

James Simmons (1):
  staging: lustre: libcfs: merge UMP and SMP libcfs cpu header code

 .../lustre/include/linux/libcfs/libcfs_cpu.h   | 135 +--
 .../lustre/include/linux/libcfs/linux/libcfs.h |   1 -
 .../lustre/include/linux/libcfs/linux/linux-cpu.h  |  78 --
 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c| 126 ++-
 .../staging/lustre/lnet/libcfs/linux/linux-cpu.c   | 912 +++--
 drivers/staging/lustre/lnet/libcfs/module.c|  53 ++
 drivers/staging/lustre/lnet/lnet/lib-msg.c |   2 +
 7 files changed, 676 insertions(+), 631 deletions(-)
 delete mode 100644 
drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h

-- 
1.8.3.1

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH] staging: lustre: libcfs: use dynamic minors for /dev/{lnet, obd}

2018-03-30 Thread James Simmons
From: "John L. Hammond" <john.hamm...@intel.com>

Request dynamic minor allocation when registering /dev/lnet and
/dev/obd.

Signed-off-by: John L. Hammond <john.hamm...@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-100086
Reviewed-on: https://review.whamcloud.com/29741
Reviewed-by: Andreas Dilger <andreas.dil...@intel.com>
Reviewed-by: Jian Yu <jian...@intel.com>
Reviewed-by: Oleg Drokin <oleg.dro...@intel.com>
Signed-off-by: James Simmons <jsimm...@infradead.org>
---
 drivers/staging/lustre/include/linux/libcfs/linux/libcfs.h|  1 -
 drivers/staging/lustre/include/uapi/linux/lnet/lnetctl.h  | 11 ---
 .../staging/lustre/include/uapi/linux/lustre/lustre_ioctl.h   |  2 --
 drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c|  1 -
 drivers/staging/lustre/lnet/libcfs/linux/linux-module.c   |  5 ++---
 drivers/staging/lustre/lnet/libcfs/module.c   |  1 +
 drivers/staging/lustre/lustre/obdclass/class_obd.c|  6 --
 drivers/staging/lustre/lustre/obdclass/linux/linux-module.c   |  3 +--
 8 files changed, 8 insertions(+), 22 deletions(-)

diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/libcfs.h 
b/drivers/staging/lustre/include/linux/libcfs/linux/libcfs.h
index 30e333a..cf4c606 100644
--- a/drivers/staging/lustre/include/linux/libcfs/linux/libcfs.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/libcfs.h
@@ -50,7 +50,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
diff --git a/drivers/staging/lustre/include/uapi/linux/lnet/lnetctl.h 
b/drivers/staging/lustre/include/uapi/linux/lnet/lnetctl.h
index d9da625..cccb32d 100644
--- a/drivers/staging/lustre/include/uapi/linux/lnet/lnetctl.h
+++ b/drivers/staging/lustre/include/uapi/linux/lnet/lnetctl.h
@@ -119,16 +119,5 @@ struct lnet_fault_stat {
 
 #define LNET_DEV_ID 0
 #define LNET_DEV_PATH "/dev/lnet"
-#define LNET_DEV_MAJOR 10
-#define LNET_DEV_MINOR 240
-#define OBD_DEV_ID 1
-#define OBD_DEV_NAME "obd"
-#define OBD_DEV_PATH "/dev/" OBD_DEV_NAME
-#define OBD_DEV_MAJOR 10
-#define OBD_DEV_MINOR 241
-#define SMFS_DEV_ID  2
-#define SMFS_DEV_PATH "/dev/snapdev"
-#define SMFS_DEV_MAJOR 10
-#define SMFS_DEV_MINOR 242
 
 #endif
diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ioctl.h 
b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ioctl.h
index 9590864..6e4e109 100644
--- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ioctl.h
+++ b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ioctl.h
@@ -51,8 +51,6 @@ enum md_echo_cmd {
 #define OBD_DEV_ID 1
 #define OBD_DEV_NAME "obd"
 #define OBD_DEV_PATH "/dev/" OBD_DEV_NAME
-#define OBD_DEV_MAJOR 10
-#define OBD_DEV_MINOR 241
 
 #define OBD_IOCTL_VERSION  0x00010004
 #define OBD_DEV_BY_DEVNAME 0xd0de
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c 
b/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c
index 0092166..1d728f1 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c
@@ -48,7 +48,6 @@
 #include 
 #include 
 #include 
-#include 
 
 # define DEBUG_SUBSYSTEM S_LNET
 
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c 
b/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c
index ddf6256..c8908e8 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c
@@ -33,10 +33,9 @@
 
 #define DEBUG_SUBSYSTEM S_LNET
 
+#include 
 #include 
 
-#define LNET_MINOR 240
-
 static inline size_t libcfs_ioctl_packlen(struct libcfs_ioctl_data *data)
 {
size_t len = sizeof(*data);
@@ -191,7 +190,7 @@ int libcfs_ioctl_getdata(struct libcfs_ioctl_hdr **hdr_pp,
 };
 
 struct miscdevice libcfs_dev = {
-   .minor = LNET_MINOR,
+   .minor = MISC_DYNAMIC_MINOR,
.name = "lnet",
.fops = _fops,
 };
diff --git a/drivers/staging/lustre/lnet/libcfs/module.c 
b/drivers/staging/lustre/lnet/libcfs/module.c
index a03f924..4b9acd7 100644
--- a/drivers/staging/lustre/lnet/libcfs/module.c
+++ b/drivers/staging/lustre/lnet/libcfs/module.c
@@ -30,6 +30,7 @@
  * This file is part of Lustre, http://www.lustre.org/
  * Lustre is a trademark of Sun Microsystems, Inc.
  */
+#include 
 #include 
 #include 
 #include 
diff --git a/drivers/staging/lustre/lustre/obdclass/class_obd.c 
b/drivers/staging/lustre/lustre/obdclass/class_obd.c
index 3e24b76..7b5be6b 100644
--- a/drivers/staging/lustre/lustre/obdclass/class_obd.c
+++ b/drivers/staging/lustre/lustre/obdclass/class_obd.c
@@ -32,7 +32,9 @@
  */
 
 #define DEBUG_SUBSYSTEM S_CLASS
-# include 
+
+#include 
+#include 
 
 #include 
 #include 
@@ -462,7 +464,7 @@ static int __init obdclass_init(void)
 
err = misc_register(_psdev);
if (err) {
-   CERROR("c

Re: [PATCH 2/3] staging: lustre: lmv: correctly iput lmo_root

2018-02-26 Thread James Simmons

> Commit 8f18c8a48b73 ("staging: lustre: lmv: separate master object
> with master stripe") changed how lmo_root inodes were managed,
> particularly when LMV_HASH_FLAG_MIGRATION is not set.
> Previously lsm_md_oinfo[0].lmo_root was always a borrowed
> inode reference and didn't need to by iput().
> Since the change, that special case only applies when
> LMV_HASH_FLAG_MIGRATION is set
> 
> In the upstream (lustre-release) version of this patch [Commit
> 60e07b972114 ("LU-4690 lod: separate master object with master
> stripe")] the for loop in the lmv_unpack_md() was changed to count
> from 0 and to ignore entry 0 if LMV_HASH_FLAG_MIGRATION is set.
> In the patch that got applied to Linux, that change was missing,
> so lsm_md_oinfo[0].lmo_root is never iput().
> This results in a "VFS: Busy inodes" warning at unmount.
> 
> Fixes: 8f18c8a48b73 ("staging: lustre: lmv: separate master object with 
> master stripe")
> Signed-off-by: NeilBrown <ne...@suse.com>

Reviewed-by: James Simmons <jsimm...@infradead.org>


> ---
>  drivers/staging/lustre/lustre/lmv/lmv_obd.c |2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/staging/lustre/lustre/lmv/lmv_obd.c 
> b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
> index 179651531862..e8a9b9902c37 100644
> --- a/drivers/staging/lustre/lustre/lmv/lmv_obd.c
> +++ b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
> @@ -2695,7 +2695,7 @@ static int lmv_unpackmd(struct obd_export *exp, struct 
> lmv_stripe_md **lsmp,
>   if (lsm && !lmm) {
>   int i;
>  
> - for (i = 1; i < lsm->lsm_md_stripe_count; i++) {
> + for (i = 0; i < lsm->lsm_md_stripe_count; i++) {
>   /*
>* For migrating inode, the master stripe and master
>* object will be the same, so do not need iput, see
> 
> 
> 
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


  1   2   3   4   5   6   7   8   9   10   >