date:20161028

From: Frank Rowand 

Add a single pr_err() to cover a range of errors that were reported
by several pr_err() that were removed earlier in this series.

Signed-off-by: Frank Rowand 
---
 drivers/of/resolver.c | 18 --
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/of/resolver.c b/drivers/of/resolver.c
index f842dbd1585c..eb78010c21a3 100644
--- a/drivers/of/resolver.c
+++ b/drivers/of/resolver.c
@@ -293,13 +293,17 @@ int of_resolve_phandles(struct device_node *overlay)
phandle phandle, phandle_delta;
int err;
 
+   tree_symbols = NULL;
+
if (!overlay) {
pr_err("null overlay\n");
-   return -EINVAL;
+   err = -EINVAL;
+   goto err_out;
}
if (!of_node_check_flag(overlay, OF_DETACHED)) {
pr_err("overlay not detached\n");
-   return -EINVAL;
+   err = -EINVAL;
+   goto err_out;
}
 
phandle_delta = live_tree_max_phandle() + 1;
@@ -311,7 +315,7 @@ int of_resolve_phandles(struct device_node *overlay)
 
err = adjust_local_phandle_references(local_fixups, overlay, 
phandle_delta);
if (err)
-   return err;
+   goto err_out;
 
overlay_symbols = NULL;
overlay_fixups = NULL;
@@ -333,7 +337,7 @@ int of_resolve_phandles(struct device_node *overlay)
if (!tree_symbols) {
pr_err("no symbols in root of device tree.\n");
err = -EINVAL;
-   goto out;
+   goto err_out;
}
 
for_each_property_of_node(overlay_fixups, prop) {
@@ -345,12 +349,12 @@ int of_resolve_phandles(struct device_node *overlay)
err = of_property_read_string(tree_symbols,
prop->name, &refpath);
if (err)
-   goto out;
+   goto err_out;
 
refnode = of_find_node_by_path(refpath);
if (!refnode) {
err = -ENOENT;
-   goto out;
+   goto err_out;
}
 
phandle = refnode->phandle;
@@ -361,6 +365,8 @@ int of_resolve_phandles(struct device_node *overlay)
break;
}
 
+err_out:
+   pr_err("overlay phandle fixup failed: %d\n", err);
 out:
of_node_put(tree_symbols);
 
-- 
1.9.1

[PATCH 03/12] of: Convert comparisons to zero or NULL to logical expressions

From: Frank Rowand 

Convert comparisons to zero or NULL to logical expressions.  A
small number of such comparisons remain where they provide more
clarity of the numeric nature of a variable.

Signed-off-by: Frank Rowand 
---
 drivers/of/resolver.c | 42 --
 1 file changed, 20 insertions(+), 22 deletions(-)

diff --git a/drivers/of/resolver.c b/drivers/of/resolver.c
index c61ba99a1792..31fd3800787a 100644
--- a/drivers/of/resolver.c
+++ b/drivers/of/resolver.c
@@ -33,10 +33,10 @@ static struct device_node 
*__of_find_node_by_full_name(struct device_node *node,
 {
struct device_node *child, *found;
 
-   if (node == NULL)
+   if (!node)
return NULL;
 
-   if (of_node_cmp(node->full_name, full_name) == 0)
+   if (!of_node_cmp(node->full_name, full_name))
return of_node_get(node);
 
for_each_child_of_node(node, child) {
@@ -86,8 +86,8 @@ static void __of_adjust_tree_phandles(struct device_node 
*node,
 
for_each_property_of_node(node, prop) {
 
-   if (of_prop_cmp(prop->name, "phandle") != 0 &&
-   of_prop_cmp(prop->name, "linux,phandle") != 0)
+   if (of_prop_cmp(prop->name, "phandle") &&
+   of_prop_cmp(prop->name, "linux,phandle"))
continue;
 
if (prop->length < 4)
@@ -140,7 +140,7 @@ static int __of_adjust_phandle_ref(struct device_node *node,
 
*s++ = '\0';
err = kstrtoint(s, 10, &offset);
-   if (err != 0)
+   if (err)
goto err_fail;
 
refnode = __of_find_node_by_full_name(node, nodestr);
@@ -148,7 +148,7 @@ static int __of_adjust_phandle_ref(struct device_node *node,
continue;
 
for_each_property_of_node(refnode, sprop) {
-   if (of_prop_cmp(sprop->name, propstr) == 0)
+   if (!of_prop_cmp(sprop->name, propstr))
break;
}
of_node_put(refnode);
@@ -193,15 +193,15 @@ static int __of_adjust_tree_phandle_references(struct 
device_node *node,
unsigned int off;
phandle phandle;
 
-   if (node == NULL)
+   if (!node)
return 0;
 
for_each_property_of_node(node, rprop) {
 
/* skip properties added automatically */
-   if (of_prop_cmp(rprop->name, "name") == 0 ||
-   of_prop_cmp(rprop->name, "phandle") == 0 ||
-   of_prop_cmp(rprop->name, "linux,phandle") == 0)
+   if (!of_prop_cmp(rprop->name, "name") ||
+   !of_prop_cmp(rprop->name, "phandle") ||
+   !of_prop_cmp(rprop->name, "linux,phandle"))
continue;
 
if ((rprop->length % 4) != 0 || rprop->length == 0)
@@ -209,11 +209,11 @@ static int __of_adjust_tree_phandle_references(struct 
device_node *node,
count = rprop->length / sizeof(__be32);
 
for_each_property_of_node(target, sprop) {
-   if (of_prop_cmp(sprop->name, rprop->name) == 0)
+   if (!of_prop_cmp(sprop->name, rprop->name))
break;
}
 
-   if (sprop == NULL)
+   if (!sprop)
return -EINVAL;
 
for (i = 0; i < count; i++) {
@@ -232,7 +232,7 @@ static int __of_adjust_tree_phandle_references(struct 
device_node *node,
for_each_child_of_node(node, child) {
 
for_each_child_of_node(target, childtarget)
-   if (__of_node_name_cmp(child, childtarget) == 0)
+   if (!__of_node_name_cmp(child, childtarget))
break;
 
if (!childtarget)
@@ -240,7 +240,7 @@ static int __of_adjust_tree_phandle_references(struct 
device_node *node,
 
err = __of_adjust_tree_phandle_references(child, childtarget,
phandle_delta);
-   if (err != 0)
+   if (err)
return err;
}
 
@@ -282,13 +282,13 @@ int of_resolve_phandles(struct device_node *resolve)
 
childroot = NULL;
for_each_child_of_node(resolve, childroot)
-   if (of_node_cmp(childroot->name, "__local_fixups__") == 0)
+   if (!of_node_cmp(childroot->name, "__local_fixups__"))
break;
 
if (childroot != NULL) {
err = __of_adjust_tree_phandle_references(childroot,
resolve, 0);
-   if (err != 0)
+   if (err)
return err;
 
BUG_ON(__of_adjust_tree_phandle_references(childroot,
@@ -303,12 +303,10 @@ int of_resolve_phandles(struct device_node *resolve)
 
for_each_child_of_node(resolve,

[PATCH 08/12] of: Remove redundant size check

From: Frank Rowand 

Remove a redundant check of buffer size.

Signed-off-by: Frank Rowand 
---
 drivers/of/resolver.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/of/resolver.c b/drivers/of/resolver.c
index 708daca1d522..76c09cb57eae 100644
--- a/drivers/of/resolver.c
+++ b/drivers/of/resolver.c
@@ -216,7 +216,7 @@ static int adjust_local_phandle_references(struct 
device_node *local_fixups,
 
for (i = 0; i < count; i++) {
off = be32_to_cpu(((__be32 *)prop_fix->value)[i]);
-   if (off >= prop->length || (off + 4) > prop->length)
+   if ((off + 4) > prop->length)
return -EINVAL;
 
phandle = be32_to_cpu(*(__be32 *)(prop->value + off));
-- 
1.9.1

[PATCH 11/12] of: Move setting of pointer to beside test for non-null

From: Frank Rowand 

Value of pointer was calculated in an earlier block than
where it was used.  Move it down into the block where it
is used, immediately before where is is checked to be valid.

Signed-off-by: Frank Rowand 
---
 drivers/of/resolver.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/of/resolver.c b/drivers/of/resolver.c
index eb78010c21a3..53353cc8f2bb 100644
--- a/drivers/of/resolver.c
+++ b/drivers/of/resolver.c
@@ -320,8 +320,6 @@ int of_resolve_phandles(struct device_node *overlay)
overlay_symbols = NULL;
overlay_fixups = NULL;
 
-   tree_symbols = of_find_node_by_path("/__symbols__");
-
for_each_child_of_node(overlay, child) {
if (!of_node_cmp(child->name, "__symbols__"))
overlay_symbols = child;
@@ -334,6 +332,7 @@ int of_resolve_phandles(struct device_node *overlay)
goto out;
}
 
+   tree_symbols = of_find_node_by_path("/__symbols__");
if (!tree_symbols) {
pr_err("no symbols in root of device tree.\n");
err = -EINVAL;
-- 
1.9.1

[PATCH 04/12] of: Rename functions to more accurately reflect what they do

From: Frank Rowand 

Some function names are misleading or do not provide a good
sense of what they do.  Rename the functions to ne more
informative.

Signed-off-by: Frank Rowand 
---
 drivers/of/resolver.c | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/of/resolver.c b/drivers/of/resolver.c
index 31fd3800787a..3d123b612789 100644
--- a/drivers/of/resolver.c
+++ b/drivers/of/resolver.c
@@ -53,7 +53,7 @@ static struct device_node *__of_find_node_by_full_name(struct 
device_node *node,
 /*
  * Find live tree's maximum phandle value.
  */
-static phandle of_get_tree_max_phandle(void)
+static phandle live_tree_max_phandle(void)
 {
struct device_node *node;
phandle phandle;
@@ -74,7 +74,7 @@ static phandle of_get_tree_max_phandle(void)
 /*
  * Adjust a subtree's phandle values by a given delta.
  */
-static void __of_adjust_tree_phandles(struct device_node *node,
+static void adjust_overlay_phandles(struct device_node *node,
int phandle_delta)
 {
struct device_node *child;
@@ -101,10 +101,10 @@ static void __of_adjust_tree_phandles(struct device_node 
*node,
}
 
for_each_child_of_node(node, child)
-   __of_adjust_tree_phandles(child, phandle_delta);
+   adjust_overlay_phandles(child, phandle_delta);
 }
 
-static int __of_adjust_phandle_ref(struct device_node *node,
+static int update_usages_of_a_phandle_reference(struct device_node *node,
struct property *rprop, int value)
 {
phandle phandle;
@@ -184,7 +184,7 @@ static int __of_node_name_cmp(const struct device_node *dn1,
  * Does not take any devtree locks so make sure you call this on a tree
  * which is at the detached state.
  */
-static int __of_adjust_tree_phandle_references(struct device_node *node,
+static int adjust_local_phandle_references(struct device_node *node,
struct device_node *target, int phandle_delta)
 {
struct device_node *child, *childtarget;
@@ -238,7 +238,7 @@ static int __of_adjust_tree_phandle_references(struct 
device_node *node,
if (!childtarget)
return -EINVAL;
 
-   err = __of_adjust_tree_phandle_references(child, childtarget,
+   err = adjust_local_phandle_references(child, childtarget,
phandle_delta);
if (err)
return err;
@@ -277,8 +277,8 @@ int of_resolve_phandles(struct device_node *resolve)
if (!resolve || !of_node_check_flag(resolve, OF_DETACHED))
return -EINVAL;
 
-   phandle_delta = of_get_tree_max_phandle() + 1;
-   __of_adjust_tree_phandles(resolve, phandle_delta);
+   phandle_delta = live_tree_max_phandle() + 1;
+   adjust_overlay_phandles(resolve, phandle_delta);
 
childroot = NULL;
for_each_child_of_node(resolve, childroot)
@@ -286,12 +286,12 @@ int of_resolve_phandles(struct device_node *resolve)
break;
 
if (childroot != NULL) {
-   err = __of_adjust_tree_phandle_references(childroot,
+   err = adjust_local_phandle_references(childroot,
resolve, 0);
if (err)
return err;
 
-   BUG_ON(__of_adjust_tree_phandle_references(childroot,
+   BUG_ON(adjust_local_phandle_references(childroot,
resolve, phandle_delta));
}
 
@@ -344,7 +344,7 @@ int of_resolve_phandles(struct device_node *resolve)
phandle = refnode->phandle;
of_node_put(refnode);
 
-   err = __of_adjust_phandle_ref(resolve, rprop, phandle);
+   err = update_usages_of_a_phandle_reference(resolve, rprop, 
phandle);
if (err)
break;
}
-- 
1.9.1

[PATCH 12/12] of: Remove unused variable overlay_symbols

From: Frank Rowand 

Remove unused pointer to node "__symbols__".

Signed-off-by: Frank Rowand 
---
 drivers/of/resolver.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/of/resolver.c b/drivers/of/resolver.c
index 53353cc8f2bb..783bd09463b5 100644
--- a/drivers/of/resolver.c
+++ b/drivers/of/resolver.c
@@ -287,7 +287,7 @@ static int adjust_local_phandle_references(struct 
device_node *local_fixups,
 int of_resolve_phandles(struct device_node *overlay)
 {
struct device_node *child, *local_fixups, *refnode;
-   struct device_node *tree_symbols, *overlay_symbols, *overlay_fixups;
+   struct device_node *tree_symbols, *overlay_fixups;
struct property *prop;
const char *refpath;
phandle phandle, phandle_delta;
@@ -317,12 +317,9 @@ int of_resolve_phandles(struct device_node *overlay)
if (err)
goto err_out;
 
-   overlay_symbols = NULL;
overlay_fixups = NULL;
 
for_each_child_of_node(overlay, child) {
-   if (!of_node_cmp(child->name, "__symbols__"))
-   overlay_symbols = child;
if (!of_node_cmp(child->name, "__fixups__"))
overlay_fixups = child;
}
-- 
1.9.1

[PATCH 02/12] of: Remove excessive printks to reduce clutter.

From: Frank Rowand 

Remove extra debug and error printks.  A single pr_err() will
be added at the end of this series to replace many of these
error messages.

Signed-off-by: Frank Rowand 
---
 drivers/of/resolver.c | 53 ---
 1 file changed, 8 insertions(+), 45 deletions(-)

diff --git a/drivers/of/resolver.c b/drivers/of/resolver.c
index 4ff0220d7aa2..c61ba99a1792 100644
--- a/drivers/of/resolver.c
+++ b/drivers/of/resolver.c
@@ -115,11 +115,8 @@ static int __of_adjust_phandle_ref(struct device_node 
*node,
int err = 0;
 
propval = kmalloc(rprop->length, GFP_KERNEL);
-   if (!propval) {
-   pr_err("%s: Could not copy value of '%s'\n",
-   __func__, rprop->name);
+   if (!propval)
return -ENOMEM;
-   }
memcpy(propval, rprop->value, rprop->length);
 
propend = propval + rprop->length;
@@ -129,8 +126,6 @@ static int __of_adjust_phandle_ref(struct device_node *node,
nodestr = propcur;
s = strchr(propcur, ':');
if (!s) {
-   pr_err("%s: Illegal symbol entry '%s' (1)\n",
-   __func__, propcur);
err = -EINVAL;
goto err_fail;
}
@@ -139,26 +134,18 @@ static int __of_adjust_phandle_ref(struct device_node 
*node,
propstr = s;
s = strchr(s, ':');
if (!s) {
-   pr_err("%s: Illegal symbol entry '%s' (2)\n",
-   __func__, (char *)rprop->value);
err = -EINVAL;
goto err_fail;
}
 
*s++ = '\0';
err = kstrtoint(s, 10, &offset);
-   if (err != 0) {
-   pr_err("%s: Could get offset '%s'\n",
-   __func__, (char *)rprop->value);
+   if (err != 0)
goto err_fail;
-   }
 
refnode = __of_find_node_by_full_name(node, nodestr);
-   if (!refnode) {
-   pr_warn("%s: Could not find refnode '%s'\n",
-   __func__, (char *)rprop->value);
+   if (!refnode)
continue;
-   }
 
for_each_property_of_node(refnode, sprop) {
if (of_prop_cmp(sprop->name, propstr) == 0)
@@ -167,8 +154,6 @@ static int __of_adjust_phandle_ref(struct device_node *node,
of_node_put(refnode);
 
if (!sprop) {
-   pr_err("%s: Could not find property '%s'\n",
-   __func__, (char *)rprop->value);
err = -ENOENT;
goto err_fail;
}
@@ -219,11 +204,8 @@ static int __of_adjust_tree_phandle_references(struct 
device_node *node,
of_prop_cmp(rprop->name, "linux,phandle") == 0)
continue;
 
-   if ((rprop->length % 4) != 0 || rprop->length == 0) {
-   pr_err("%s: Illegal property (size) '%s' @%s\n",
-   __func__, rprop->name, node->full_name);
+   if ((rprop->length % 4) != 0 || rprop->length == 0)
return -EINVAL;
-   }
count = rprop->length / sizeof(__be32);
 
for_each_property_of_node(target, sprop) {
@@ -231,21 +213,13 @@ static int __of_adjust_tree_phandle_references(struct 
device_node *node,
break;
}
 
-   if (sprop == NULL) {
-   pr_err("%s: Could not find target property '%s' @%s\n",
-   __func__, rprop->name, node->full_name);
+   if (sprop == NULL)
return -EINVAL;
-   }
 
for (i = 0; i < count; i++) {
off = be32_to_cpu(((__be32 *)rprop->value)[i]);
-   if (off >= sprop->length ||
-   (off + 4) > sprop->length) {
-   pr_err("%s: Illegal property '%s' @%s\n",
-   __func__, rprop->name,
-   node->full_name);
+   if (off >= sprop->length || (off + 4) > sprop->length)
return -EINVAL;
-   }
 
if (phandle_delta) {
phandle = be32_to_cpu(*(__be32 *)(sprop->value 
+ off));
@@ -261,11 +235,8 @@ static int __of_adjust_tree_phandle_references(struct 
device_node *node,
if (__of_node_name_cmp(child, childtarget) == 0)
break;
 
-   if (!childtar

[PATCH 01/12] of: Remove comments that state the obvious, to reduce clutter

From: Frank Rowand 

Remove comments that report what is obvious from the code.

Signed-off-by: Frank Rowand 
---
 drivers/of/resolver.c | 31 ++-
 1 file changed, 2 insertions(+), 29 deletions(-)

diff --git a/drivers/of/resolver.c b/drivers/of/resolver.c
index 46325d6394cf..4ff0220d7aa2 100644
--- a/drivers/of/resolver.c
+++ b/drivers/of/resolver.c
@@ -36,7 +36,6 @@ static struct device_node *__of_find_node_by_full_name(struct 
device_node *node,
if (node == NULL)
return NULL;
 
-   /* check */
if (of_node_cmp(node->full_name, full_name) == 0)
return of_node_get(node);
 
@@ -60,7 +59,6 @@ static phandle of_get_tree_max_phandle(void)
phandle phandle;
unsigned long flags;
 
-   /* now search recursively */
raw_spin_lock_irqsave(&devtree_lock, flags);
phandle = 0;
for_each_of_allnodes(node) {
@@ -75,8 +73,6 @@ static phandle of_get_tree_max_phandle(void)
 
 /*
  * Adjust a subtree's phandle values by a given delta.
- * Makes sure not to just adjust the device node's phandle value,
- * but modify the phandle properties values as well.
  */
 static void __of_adjust_tree_phandles(struct device_node *node,
int phandle_delta)
@@ -85,32 +81,25 @@ static void __of_adjust_tree_phandles(struct device_node 
*node,
struct property *prop;
phandle phandle;
 
-   /* first adjust the node's phandle direct value */
if (node->phandle != 0 && node->phandle != OF_PHANDLE_ILLEGAL)
node->phandle += phandle_delta;
 
-   /* now adjust phandle & linux,phandle values */
for_each_property_of_node(node, prop) {
 
-   /* only look for these two */
if (of_prop_cmp(prop->name, "phandle") != 0 &&
of_prop_cmp(prop->name, "linux,phandle") != 0)
continue;
 
-   /* must be big enough */
if (prop->length < 4)
continue;
 
-   /* read phandle value */
phandle = be32_to_cpup(prop->value);
-   if (phandle == OF_PHANDLE_ILLEGAL)  /* unresolved */
+   if (phandle == OF_PHANDLE_ILLEGAL)
continue;
 
-   /* adjust */
*(uint32_t *)prop->value = cpu_to_be32(node->phandle);
}
 
-   /* now do the children recursively */
for_each_child_of_node(node, child)
__of_adjust_tree_phandles(child, phandle_delta);
 }
@@ -125,7 +114,6 @@ static int __of_adjust_phandle_ref(struct device_node *node,
int offset, propcurlen;
int err = 0;
 
-   /* make a copy */
propval = kmalloc(rprop->length, GFP_KERNEL);
if (!propval) {
pr_err("%s: Could not copy value of '%s'\n",
@@ -165,7 +153,6 @@ static int __of_adjust_phandle_ref(struct device_node *node,
goto err_fail;
}
 
-   /* look into the resolve node for the full path */
refnode = __of_find_node_by_full_name(node, nodestr);
if (!refnode) {
pr_warn("%s: Could not find refnode '%s'\n",
@@ -173,7 +160,6 @@ static int __of_adjust_phandle_ref(struct device_node *node,
continue;
}
 
-   /* now find the property */
for_each_property_of_node(refnode, sprop) {
if (of_prop_cmp(sprop->name, propstr) == 0)
break;
@@ -240,7 +226,6 @@ static int __of_adjust_tree_phandle_references(struct 
device_node *node,
}
count = rprop->length / sizeof(__be32);
 
-   /* now find the target property */
for_each_property_of_node(target, sprop) {
if (of_prop_cmp(sprop->name, rprop->name) == 0)
break;
@@ -254,7 +239,6 @@ static int __of_adjust_tree_phandle_references(struct 
device_node *node,
 
for (i = 0; i < count; i++) {
off = be32_to_cpu(((__be32 *)rprop->value)[i]);
-   /* make sure the offset doesn't overstep (even wrap) */
if (off >= sprop->length ||
(off + 4) > sprop->length) {
pr_err("%s: Illegal property '%s' @%s\n",
@@ -264,7 +248,6 @@ static int __of_adjust_tree_phandle_references(struct 
device_node *node,
}
 
if (phandle_delta) {
-   /* adjust */
phandle = be32_to_cpu(*(__be32 *)(sprop->value 
+ off));
phandle += phandle_delta;
*(__be32 *)(sprop->value + off) = 
cpu_to_be32(phandle);
@@ -320,22 +303,18 @@ int of_resolve_phandles(struct device_node *resolve)
if (re

[PATCH 07/12] of: Update structure of code to be clearer, also remove BUG_ON()

From: Frank Rowand 

Remove BUG_ON(), which is frowned upon and not needed here.
Restructure to remove some excessive complexity.

Signed-off-by: Frank Rowand 
---
 drivers/of/resolver.c | 48 +---
 1 file changed, 17 insertions(+), 31 deletions(-)

diff --git a/drivers/of/resolver.c b/drivers/of/resolver.c
index 0778747cdd58..708daca1d522 100644
--- a/drivers/of/resolver.c
+++ b/drivers/of/resolver.c
@@ -136,8 +136,8 @@ static int update_usages_of_a_phandle_reference(struct 
device_node *overlay,
err = -EINVAL;
goto err_fail;
}
-
*s++ = '\0';
+
err = kstrtoint(s, 10, &offset);
if (err)
goto err_fail;
@@ -219,11 +219,9 @@ static int adjust_local_phandle_references(struct 
device_node *local_fixups,
if (off >= prop->length || (off + 4) > prop->length)
return -EINVAL;
 
-   if (phandle_delta) {
-   phandle = be32_to_cpu(*(__be32 *)(prop->value + 
off));
-   phandle += phandle_delta;
-   *(__be32 *)(prop->value + off) = 
cpu_to_be32(phandle);
-   }
+   phandle = be32_to_cpu(*(__be32 *)(prop->value + off));
+   phandle += phandle_delta;
+   *(__be32 *)(prop->value + off) = cpu_to_be32(phandle);
}
}
 
@@ -267,48 +265,36 @@ int of_resolve_phandles(struct device_node *overlay)
phandle phandle, phandle_delta;
int err;
 
-   if (!overlay)
-   pr_err("%s: null overlay\n", __func__);
-   if (overlay && !of_node_check_flag(overlay, OF_DETACHED))
-   pr_err("%s: node %s not detached\n", __func__,
-overlay->full_name);
-   if (!overlay || !of_node_check_flag(overlay, OF_DETACHED))
+   if (!overlay) {
+   pr_err("null overlay\n");
+   return -EINVAL;
+   }
+   if (!of_node_check_flag(overlay, OF_DETACHED)) {
+   pr_err("overlay not detached\n");
return -EINVAL;
+   }
 
phandle_delta = live_tree_max_phandle() + 1;
adjust_overlay_phandles(overlay, phandle_delta);
 
-   local_fixups = NULL;
for_each_child_of_node(overlay, local_fixups)
if (!of_node_cmp(local_fixups->name, "__local_fixups__"))
break;
 
-   if (local_fixups != NULL) {
-   err = adjust_local_phandle_references(local_fixups,
-   overlay, 0);
-   if (err)
-   return err;
+   err = adjust_local_phandle_references(local_fixups, overlay, 
phandle_delta);
+   if (err)
+   return err;
 
-   BUG_ON(adjust_local_phandle_references(local_fixups,
-   overlay, phandle_delta));
-   }
-
-   tree_symbols = NULL;
overlay_symbols = NULL;
overlay_fixups = NULL;
 
tree_symbols = of_find_node_by_path("/__symbols__");
 
for_each_child_of_node(overlay, child) {
-
-   if (!overlay_symbols && !of_node_cmp(child->name, 
"__symbols__"))
+   if (!of_node_cmp(child->name, "__symbols__"))
overlay_symbols = child;
-
-   if (!overlay_fixups && !of_node_cmp(child->name, "__fixups__"))
+   if (!of_node_cmp(child->name, "__fixups__"))
overlay_fixups = child;
-
-   if (overlay_symbols && overlay_fixups)
-   break;
}
 
if (!overlay_fixups) {
@@ -317,7 +303,7 @@ int of_resolve_phandles(struct device_node *overlay)
}
 
if (!tree_symbols) {
-   pr_err("%s: no symbols in root of device tree.\n", __func__);
+   pr_err("no symbols in root of device tree.\n");
err = -EINVAL;
goto out;
}
-- 
1.9.1

[PATCH 00/12] of: Make drivers/of/resolver.c more readable

From: Frank Rowand 

drivers/of/resolve.c is a bit difficult to read.  Clean it up so
that review of future overlay related patches will be easier.

Most of the patches are intended to be reformatting, with no functional
change.  Patches that are expected to have a functional change are:

  Remove excessive printks to reduce clutter.
  Update structure of code to be clearer, also remove BUG_ON()
Any functional change would reflect undefined behavior on bad overlay.
Some error message text modified.
BUG_ON() removed.
  Add back an error message, restructured

The patches are grouped into sets of changes that are intended
to be easy to verify correctness through simple inspection.

Some of the individual patches have checkpatch warnings or errors.
But after all patches are applied, the number of errors and
warnings from running checkpatch against the entire file are
reduced to two line size warnings.

These patches are only tested via the unit tests. I do not have
expansion boards to test with real hardware.

changes from rfc to v1:
  - Remove fewer one line comments
  - Add more extensive header comment to of_resolve_phandles()
to explain the how and why of resolving phandles
  - Update patch header comments
  - Incorporated patch "Remove braces around single line blocks"
into the previous patch in the series


Frank Rowand (12):
  of: Remove comments that state the obvious, to reduce clutter
  of: Remove excessive printks to reduce clutter.
  of: Convert comparisons to zero or NULL to logical expressions
  of: Rename functions to more accurately reflect what they do
  of: Remove prefix "__of_" from local function names
  of: Rename variables to better reflect purpose or follow convention
  of: Update structure of code to be clearer, also remove BUG_ON()
  of: Remove redundant size check
  of: Update comments to reflect changes and increase clarity
  of: Add back an error message, restructured
  of: Move setting of pointer to beside test for non-null
  of: Remove unused variable overlay_symbols

 drivers/of/resolver.c | 364 ++
 1 file changed, 156 insertions(+), 208 deletions(-)

-- 
1.9.1

[PATCH 09/12] of: Update comments to reflect changes and increase clarity

From: Frank Rowand 

Update comments to better explain what functions are doing.

Signed-off-by: Frank Rowand 
---
 drivers/of/resolver.c | 66 ---
 1 file changed, 47 insertions(+), 19 deletions(-)

diff --git a/drivers/of/resolver.c b/drivers/of/resolver.c
index 76c09cb57eae..f842dbd1585c 100644
--- a/drivers/of/resolver.c
+++ b/drivers/of/resolver.c
@@ -50,9 +50,6 @@ static struct device_node *find_node_by_full_name(struct 
device_node *node,
return NULL;
 }
 
-/*
- * Find live tree's maximum phandle value.
- */
 static phandle live_tree_max_phandle(void)
 {
struct device_node *node;
@@ -71,9 +68,6 @@ static phandle live_tree_max_phandle(void)
return phandle;
 }
 
-/*
- * Adjust a subtree's phandle values by a given delta.
- */
 static void adjust_overlay_phandles(struct device_node *overlay,
int phandle_delta)
 {
@@ -81,9 +75,11 @@ static void adjust_overlay_phandles(struct device_node 
*overlay,
struct property *prop;
phandle phandle;
 
+   /* adjust node's phandle in node */
if (overlay->phandle != 0 && overlay->phandle != OF_PHANDLE_ILLEGAL)
overlay->phandle += phandle_delta;
 
+   /* copy adjusted phandle into *phandle properties */
for_each_property_of_node(overlay, prop) {
 
if (of_prop_cmp(prop->name, "phandle") &&
@@ -118,6 +114,7 @@ static int update_usages_of_a_phandle_reference(struct 
device_node *overlay,
return -ENOMEM;
memcpy(value, prop_fixup->value, prop_fixup->length);
 
+   /* prop_fixup contains a list of tuples of path:property_name:offset */
end = value + prop_fixup->length;
for (cur = value; cur < end; cur += len + 1) {
len = strlen(cur);
@@ -177,10 +174,14 @@ static int node_name_cmp(const struct device_node *dn1,
 
 /*
  * Adjust the local phandle references by the given phandle delta.
- * Assumes the existances of a __local_fixups__ node at the root.
- * Assumes that __of_verify_tree_phandle_references has been called.
- * Does not take any devtree locks so make sure you call this on a tree
- * which is at the detached state.
+ *
+ * Subtree @local_fixups, which is overlay node __local_fixups__,
+ * mirrors the fragment node structure at the root of the overlay.
+ *
+ * For each property in the fragments that contains a phandle reference,
+ * @local_fixups has a property of the same name that contains a list
+ * of offsets of the phandle reference(s) within the respective property
+ * value(s).  The values at these offsets will be fixed up.
  */
 static int adjust_local_phandle_references(struct device_node *local_fixups,
struct device_node *overlay, int phandle_delta)
@@ -225,6 +226,13 @@ static int adjust_local_phandle_references(struct 
device_node *local_fixups,
}
}
 
+   /*
+* These nested loops recurse down two subtrees in parallel, where the
+* node names in the two subtrees match.
+*
+* The roots of the subtrees are the overlay's __local_fixups__ node
+* and the overlay's root node.
+*/
for_each_child_of_node(local_fixups, child) {
 
for_each_child_of_node(overlay, overlay_child)
@@ -244,17 +252,37 @@ static int adjust_local_phandle_references(struct 
device_node *local_fixups,
 }
 
 /**
- * of_resolve  - Resolve the given node against the live tree.
+ * of_resolve_phandles - Relocate and resolve overlay against live tree
+ *
+ * @overlay:   Pointer to devicetree overlay to relocate and resolve
+ *
+ * Modify (relocate) values of local phandles in @overlay to a range that
+ * does not conflict with the live expanded devicetree.  Update references
+ * to the local phandles in @overlay.  Update (resolve) phandle references
+ * in @overlay that refer to the live expanded devicetree.
+ *
+ * Phandle values in the live tree are in the range of
+ * 1 .. live_tree_max_phandle().  The range of phandle values in the overlay
+ * also begin with at 1.  Adjust the phandle values in the overlay to begin
+ * at live_tree_max_phandle() + 1.  Update references to the phandles to
+ * the adjusted phandle values.
+ *
+ * The name of each property in the "__fixups__" node in the overlay matches
+ * the name of a symbol (a label) in the live tree.  The values of each
+ * property in the "__fixups__" node is a list of the property values in the
+ * overlay that need to be updated to contain the phandle reference
+ * corresponding to that symbol in the live tree.  Update the references in
+ * the overlay with the phandle values in the live tree.
+ *
+ * @overlay must be detached.
  *
- * @resolve:   Node to resolve
+ * Resolving and applying @overlay to the live expanded devicetree must be
+ * protected by a mechanism to ensure that multiple overlays are processed
+ * in a single threaded manner so that multiple overlays will not relocate
+ * phandles to overlap

[PATCH 06/12] of: Rename variables to better reflect purpose or follow convention

From: Frank Rowand 

Rename variables to better reflect what their purpose is.  As a side
effect, this reduces the need for some of the comments previously
removed in this series.

Signed-off-by: Frank Rowand 
---
 drivers/of/resolver.c | 172 +-
 1 file changed, 85 insertions(+), 87 deletions(-)

diff --git a/drivers/of/resolver.c b/drivers/of/resolver.c
index 0ce38aa0ed3c..0778747cdd58 100644
--- a/drivers/of/resolver.c
+++ b/drivers/of/resolver.c
@@ -74,17 +74,17 @@ static phandle live_tree_max_phandle(void)
 /*
  * Adjust a subtree's phandle values by a given delta.
  */
-static void adjust_overlay_phandles(struct device_node *node,
+static void adjust_overlay_phandles(struct device_node *overlay,
int phandle_delta)
 {
struct device_node *child;
struct property *prop;
phandle phandle;
 
-   if (node->phandle != 0 && node->phandle != OF_PHANDLE_ILLEGAL)
-   node->phandle += phandle_delta;
+   if (overlay->phandle != 0 && overlay->phandle != OF_PHANDLE_ILLEGAL)
+   overlay->phandle += phandle_delta;
 
-   for_each_property_of_node(node, prop) {
+   for_each_property_of_node(overlay, prop) {
 
if (of_prop_cmp(prop->name, "phandle") &&
of_prop_cmp(prop->name, "linux,phandle"))
@@ -97,41 +97,40 @@ static void adjust_overlay_phandles(struct device_node 
*node,
if (phandle == OF_PHANDLE_ILLEGAL)
continue;
 
-   *(uint32_t *)prop->value = cpu_to_be32(node->phandle);
+   *(uint32_t *)prop->value = cpu_to_be32(overlay->phandle);
}
 
-   for_each_child_of_node(node, child)
+   for_each_child_of_node(overlay, child)
adjust_overlay_phandles(child, phandle_delta);
 }
 
-static int update_usages_of_a_phandle_reference(struct device_node *node,
-   struct property *rprop, int value)
+static int update_usages_of_a_phandle_reference(struct device_node *overlay,
+   struct property *prop_fixup, phandle phandle)
 {
-   phandle phandle;
struct device_node *refnode;
-   struct property *sprop;
-   char *propval, *propcur, *propend, *nodestr, *propstr, *s;
-   int offset, propcurlen;
+   struct property *prop;
+   char *value, *cur, *end, *node_path, *prop_name, *s;
+   int offset, len;
int err = 0;
 
-   propval = kmalloc(rprop->length, GFP_KERNEL);
-   if (!propval)
+   value = kmalloc(prop_fixup->length, GFP_KERNEL);
+   if (!value)
return -ENOMEM;
-   memcpy(propval, rprop->value, rprop->length);
+   memcpy(value, prop_fixup->value, prop_fixup->length);
 
-   propend = propval + rprop->length;
-   for (propcur = propval; propcur < propend; propcur += propcurlen + 1) {
-   propcurlen = strlen(propcur);
+   end = value + prop_fixup->length;
+   for (cur = value; cur < end; cur += len + 1) {
+   len = strlen(cur);
 
-   nodestr = propcur;
-   s = strchr(propcur, ':');
+   node_path = cur;
+   s = strchr(cur, ':');
if (!s) {
err = -EINVAL;
goto err_fail;
}
*s++ = '\0';
 
-   propstr = s;
+   prop_name = s;
s = strchr(s, ':');
if (!s) {
err = -EINVAL;
@@ -143,27 +142,26 @@ static int update_usages_of_a_phandle_reference(struct 
device_node *node,
if (err)
goto err_fail;
 
-   refnode = find_node_by_full_name(node, nodestr);
+   refnode = find_node_by_full_name(overlay, node_path);
if (!refnode)
continue;
 
-   for_each_property_of_node(refnode, sprop) {
-   if (!of_prop_cmp(sprop->name, propstr))
+   for_each_property_of_node(refnode, prop) {
+   if (!of_prop_cmp(prop->name, prop_name))
break;
}
of_node_put(refnode);
 
-   if (!sprop) {
+   if (!prop) {
err = -ENOENT;
goto err_fail;
}
 
-   phandle = value;
-   *(__be32 *)(sprop->value + offset) = cpu_to_be32(phandle);
+   *(__be32 *)(prop->value + offset) = cpu_to_be32(phandle);
}
 
 err_fail:
-   kfree(propval);
+   kfree(value);
return err;
 }
 
@@ -184,61 +182,61 @@ static int node_name_cmp(const struct device_node *dn1,
  * Does not take any devtree locks so make sure you call this on a tree
  * which is at the detached state.
  */
-static int adjust_local_phandle_references(struct device_node *node,
-   struct device_node *target, int phandle_delta)
+static int adjus

[PATCH 05/12] of: Remove prefix "__of_" from local function names

From: Frank Rowand 

Remove "__of_" prefix from local function names.  The pattern of
a leading "__" is used in drivers/of/ to signify a function that
must be called with a lock held.  These functions do not fit
that pattern.

Signed-off-by: Frank Rowand 
---
 drivers/of/resolver.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/of/resolver.c b/drivers/of/resolver.c
index 3d123b612789..0ce38aa0ed3c 100644
--- a/drivers/of/resolver.c
+++ b/drivers/of/resolver.c
@@ -28,7 +28,7 @@
  * Find a node with the give full name by recursively following any of
  * the child node links.
  */
-static struct device_node *__of_find_node_by_full_name(struct device_node 
*node,
+static struct device_node *find_node_by_full_name(struct device_node *node,
const char *full_name)
 {
struct device_node *child, *found;
@@ -40,7 +40,7 @@ static struct device_node *__of_find_node_by_full_name(struct 
device_node *node,
return of_node_get(node);
 
for_each_child_of_node(node, child) {
-   found = __of_find_node_by_full_name(child, full_name);
+   found = find_node_by_full_name(child, full_name);
if (found != NULL) {
of_node_put(child);
return found;
@@ -143,7 +143,7 @@ static int update_usages_of_a_phandle_reference(struct 
device_node *node,
if (err)
goto err_fail;
 
-   refnode = __of_find_node_by_full_name(node, nodestr);
+   refnode = find_node_by_full_name(node, nodestr);
if (!refnode)
continue;
 
@@ -168,7 +168,7 @@ static int update_usages_of_a_phandle_reference(struct 
device_node *node,
 }
 
 /* compare nodes taking into account that 'name' strips out the @ part */
-static int __of_node_name_cmp(const struct device_node *dn1,
+static int node_name_cmp(const struct device_node *dn1,
const struct device_node *dn2)
 {
const char *n1 = strrchr(dn1->full_name, '/') ? : "/";
@@ -232,7 +232,7 @@ static int adjust_local_phandle_references(struct 
device_node *node,
for_each_child_of_node(node, child) {
 
for_each_child_of_node(target, childtarget)
-   if (!__of_node_name_cmp(child, childtarget))
+   if (!node_name_cmp(child, childtarget))
break;
 
if (!childtarget)
-- 
1.9.1

[ANNOUNCE] Git v2.10.2

2016-10-28 Thread Junio C Hamano

The latest maintenance release Git v2.10.2 is now available at
the usual places.

The tarballs are found at:

https://www.kernel.org/pub/software/scm/git/

The following public repositories all have a copy of the 'v2.10.2'
tag and the 'maint' branch that the tag points at:

  url = https://kernel.googlesource.com/pub/scm/git/git
  url = git://repo.or.cz/alt-git.git
  url = git://git.sourceforge.jp/gitroot/git-core/git.git
  url = git://git-core.git.sourceforge.net/gitroot/git-core/git-core
  url = https://github.com/gitster/git



Git v2.10.2 Release Notes
=

Fixes since v2.10.1
---

 * The code that parses the format parameter of for-each-ref command
   has seen a micro-optimization.

 * The "graph" API used in "git log --graph" miscounted the number of
   output columns consumed so far when drawing a padding line, which
   has been fixed; this did not affect any existing code as nobody
   tried to write anything after the padding on such a line, though.

 * Almost everybody uses DEFAULT_ABBREV to refer to the default
   setting for the abbreviation, but "git blame" peeked into
   underlying variable bypassing the macro for no good reason.

 * Doc update to clarify what "log -3 --reverse" does.

 * An author name, that spelled a backslash-quoted double quote in the
   human readable part "My \"double quoted\" name", was not unquoted
   correctly while applying a patch from a piece of e-mail.

 * The original command line syntax for "git merge", which was "git
   merge  HEAD ...", has been deprecated for quite some
   time, and "git gui" was the last in-tree user of the syntax.  This
   is finally fixed, so that we can move forward with the deprecation.

 * Codepaths that read from an on-disk loose object were too loose in
   validating what they are reading is a proper object file and
   sometimes read past the data they read from the disk, which has
   been corrected.  H/t to Gustavo Grieco for reporting.

 * "git worktree", even though it used the default_abbrev setting that
   ought to be affected by core.abbrev configuration variable, ignored
   the variable setting.  The command has been taught to read the
   default set of configuration variables to correct this.

 * A low-level function verify_packfile() was meant to show errors
   that were detected without dying itself, but under some conditions
   it didn't and died instead, which has been fixed.

 * When "git fetch" tries to find where the history of the repository
   it runs in has diverged from what the other side has, it has a
   mechanism to avoid digging too deep into irrelevant side branches.
   This however did not work well over the "smart-http" transport due
   to a design bug, which has been fixed.

 * When we started cURL to talk to imap server when a new enough
   version of cURL library is available, we forgot to explicitly add
   imap(s):// before the destination.  To some folks, that didn't work
   and the library tried to make HTTP(s) requests instead.

 * The ./configure script generated from configure.ac was taught how
   to detect support of SSL by libcurl better.

 * http.emptyauth configuration is a way to allow an empty username to
   pass when attempting to authenticate using mechanisms like
   Kerberos.  We took an unspecified (NULL) username and sent ":"
   (i.e. no username, no password) to CURLOPT_USERPWD, but did not do
   the same when the username is explicitly set to an empty string.

 * "git clone" of a local repository can be done at the filesystem
   level, but the codepath did not check errors while copying and
   adjusting the file that lists alternate object stores.

 * Documentation for "git commit" was updated to clarify that "commit
   -p " adds to the current contents of the index to come up
   with what to commit.

 * A stray symbolic link in $GIT_DIR/refs/ directory could make name
   resolution loop forever, which has been corrected.

 * The "submodule..path" stored in .gitmodules is never copied
   to .git/config and such a key in .git/config has no meaning, but
   the documentation described it and submodule..url next to
   each other as if both belong to .git/config.  This has been fixed.

 * Recent git allows submodule..branch to use a special token
   "." instead of the branch name; the documentation has been updated
   to describe it.

 * In a worktree connected to a repository elsewhere, created via "git
   worktree", "git checkout" attempts to protect users from confusion
   by refusing to check out a branch that is already checked out in
   another worktree.  However, this also prevented checking out a
   branch, which is designated as the primary branch of a bare
   reopsitory, in a worktree that is connected to the bare
   repository.  The check has been corrected to allow it.

 * "git rebase" immediately after "git clone" failed to find the fork
   point from the upstream.

 * When f

[RFC PATCH] hugetlbfs: fix the hugetlbfs can not be mounted

2016-10-28 Thread zhongjiang

From: zhong jiang 

Since 'commit 3e89e1c5ea84 ("hugetlb: make mm and fs code explicitly 
non-modular")'
bring in the mainline. mount hugetlbfs will result in the following issue.

mount: unknown filesystme type 'hugetlbfs'

because previous patch remove the module_alias_fs, when we mount the fs type,
the caller get_fs_type can not find the filesystem.

The patch just recover the module_alias_fs to identify the hugetlbfs.

Signed-off-by: zhong jiang 
---
 fs/hugetlbfs/inode.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 4fb7b10..b63e7de 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -1209,6 +1210,7 @@ static struct dentry *hugetlbfs_mount(struct 
file_system_type *fs_type,
.mount  = hugetlbfs_mount,
.kill_sb= kill_litter_super,
 };
+MODULE_ALIAS_FS("hugetlbfs");
 
 static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE];
 
-- 
1.8.3.1

Re: net/dccp: warning in dccp_feat_clone_sp_val/__might_sleep

2016-10-28 Thread Cong Wang

On Fri, Oct 28, 2016 at 5:40 PM, Andrey Konovalov  wrote:
> Hi,
>
> I've got the following error report while running the syzkaller fuzzer:
>
> [ cut here ]
> WARNING: CPU: 0 PID: 4608 at kernel/sched/core.c:7724
> __might_sleep+0x14c/0x1a0 kernel/sched/core.c:7719
> do not call blocking ops when !TASK_RUNNING; state=1 set at
> [] prepare_to_wait+0xbc/0x210
> kernel/sched/wait.c:178
> Modules linked in:
> CPU: 0 PID: 4608 Comm: syz-executor Not tainted 4.9.0-rc2+ #320
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
>  88006625f7a0 81b46914 88006625f818 
>  84052960  88006625f7e8 8237
>  88006aceac00 1e2c ed000cc4beff 84052960
> Call Trace:
>  [< inline >] __dump_stack lib/dump_stack.c:15
>  [] dump_stack+0xb3/0x10f lib/dump_stack.c:51
>  [] __warn+0x1a7/0x1f0 kernel/panic.c:550
>  [] warn_slowpath_fmt+0xac/0xd0 kernel/panic.c:565
>  [] __might_sleep+0x14c/0x1a0 kernel/sched/core.c:7719
>  [< inline >] slab_pre_alloc_hook mm/slab.h:393
>  [< inline >] slab_alloc_node mm/slub.c:2634
>  [< inline >] slab_alloc mm/slub.c:2716
>  [] __kmalloc_track_caller+0x150/0x2a0 mm/slub.c:4240
>  [] kmemdup+0x24/0x50 mm/util.c:113
>  [] dccp_feat_clone_sp_val.part.5+0x4f/0xe0
> net/dccp/feat.c:374
>  [< inline >] dccp_feat_clone_sp_val net/dccp/feat.c:1141
>  [< inline >] dccp_feat_change_recv net/dccp/feat.c:1141
>  [] dccp_feat_parse_options+0xaa1/0x13d0 
> net/dccp/feat.c:1411
>  [] dccp_parse_options+0x721/0x1010 net/dccp/options.c:128
>  [] dccp_rcv_state_process+0x200/0x15b0 net/dccp/input.c:644
>  [] dccp_v4_do_rcv+0xf4/0x1a0 net/dccp/ipv4.c:681
>  [< inline >] sk_backlog_rcv ./include/net/sock.h:872
>  [] __release_sock+0x126/0x3a0 net/core/sock.c:2044
>  [] release_sock+0x59/0x1c0 net/core/sock.c:2502
>  [< inline >] inet_wait_for_connect net/ipv4/af_inet.c:547
>  [] __inet_stream_connect+0x5d2/0xbb0 net/ipv4/af_inet.c:617
>  [] inet_stream_connect+0x55/0xa0 net/ipv4/af_inet.c:656
>  [] SYSC_connect+0x244/0x2f0 net/socket.c:1533
>  [] SyS_connect+0x24/0x30 net/socket.c:1514
>  [] entry_SYSCALL_64_fastpath+0x1f/0xc2
> arch/x86/entry/entry_64.S:209

Should be fixed the attached patch. I will verify it with your
reproducer tomorrow.

Thanks!
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 1704948..c90cb35 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -367,11 +367,11 @@ static inline int dccp_feat_must_be_understood(u8 
feat_num)
 }
 
 /* copy constructor, fval must not already contain allocated memory */
-static int dccp_feat_clone_sp_val(dccp_feat_val *fval, u8 const *val, u8 len)
+static int dccp_feat_clone_sp_val(dccp_feat_val *fval, u8 const *val, u8 len, 
gfp_t flags)
 {
fval->sp.len = len;
if (fval->sp.len > 0) {
-   fval->sp.vec = kmemdup(val, len, gfp_any());
+   fval->sp.vec = kmemdup(val, len, flags);
if (fval->sp.vec == NULL) {
fval->sp.len = 0;
return -ENOBUFS;
@@ -404,7 +404,8 @@ static void dccp_feat_val_destructor(u8 feat_num, 
dccp_feat_val *val)
 
if (type == FEAT_SP && dccp_feat_clone_sp_val(&new->val,
  original->val.sp.vec,
- original->val.sp.len)) {
+ original->val.sp.len,
+ gfp_any())) {
kfree(new);
return NULL;
}
@@ -735,7 +736,7 @@ static int __feat_register_sp(struct list_head *fn, u8 
feat, u8 is_local,
if (feat == DCCPF_CCID && !ccid_support_check(sp_val, sp_len))
return -EOPNOTSUPP;
 
-   if (dccp_feat_clone_sp_val(&fval, sp_val, sp_len))
+   if (dccp_feat_clone_sp_val(&fval, sp_val, sp_len, gfp_any()))
return -ENOMEM;
 
return dccp_feat_push_change(fn, feat, is_local, mandatory, &fval);
@@ -1138,7 +1139,7 @@ static u8 dccp_feat_change_recv(struct list_head *fn, u8 
is_mandatory, u8 opt,
 *   otherwise we accept the preferred value;
 * - else if we are the client, we use the first list element.
 */
-   if (dccp_feat_clone_sp_val(&fval, val, 1))
+   if (dccp_feat_clone_sp_val(&fval, val, 1, GFP_ATOMIC))
return DCCP_RESET_CODE_TOO_BUSY;
 
if (len > 1 && server) {

[ANNOUNCE] Multiple Queue Skiplist Scheduler version 0.120

2016-10-28 Thread Con Kolivas

This is to announce the first major ~stable public release of MuQSS (pronounced 
mux), the Multiple Queue Skiplist Scheduler. 

MuQSS for linux 4.8:
http://ck.kolivas.org/patches/muqss/4.0/4.8/4.8-sched-MuQSS_120.patch

MuQSS for linux 4.7:
http://ck.kolivas.org/patches/muqss/4.0/4.7/4.7-sched-MuQSS_120.patch

Git tree:
https://github.com/ckolivas/linux


MuQSS is the evolution and drop-in replacement for BFS, the Brain Fuck 
Scheduler, both of which were designed with responsiveness and interactivity 
as their primary goal. MuQSS is my response for requests for a more scalable 
version of BFS for our ever-increasing multicore hardware. It is a massive 
rewrite of BFS designed to maintain the same interactivity and responsiveness 
using the same algorithm for scheduling decisions, along with a very simple 
overall design that is easy to understand, model, and hack on, yet able to 
scale to hardware of virtually any size.

It is meant as a replacement for BFS primarily, and NOT a comprehensive 
replacement for the mainline scheduler since it lacks some of the features of 
mainline still (specifically cgroups and sched deadline support.) However 
outside of these feature requirements it performs better in latency and 
responsiveness areas and, while it performs poorer in some throughput 
benchmarks, it also performs better in many others. Additionally MuQSS has a 
number of unique features missing from mainline described in the full 
documentation below. Results from early 12x benchmarks below.

Interbench interactivity comparison:
http://ck-hack.blogspot.com/2016/10/interbench-benchmarks-for-muqss-116.html

Throughput comparison:
http://ck.kolivas.org/patches/muqss/Benchmarks/20161018/

Very little effort has gone into optimising throughput in its existing form, 
being effectively a first stable working version of the scheduler, though 
clearly it does well in some areas (postgres particularly.)

To be clear what my intentions are, this will be maintained out of mainline 
development - as BFS was - for those that wish an alternative scheduler with a 
different focus and set of features. There is plenty of interest in this 
outside of mainline development and there are some perverse things I do in the 
code that would make most mainline developers' skin crawl, but application and 
utility are far more important than technology in my opinion. I do not have 
the energy and time to engage with the mainline development process alone to 
try and make this replace the existing scheduler though I'm always happy to 
accept outside help and patches for this project.

Below is the complete documentation as included in the patch.

Enjoy!
お楽しみください
-- 
-ck

MuQSS - The Multiple Queue Skiplist Scheduler by Con Kolivas.

MuQSS is a per-cpu runqueue variant of the original BFS scheduler with
one 8 level skiplist per runqueue, and fine grained locking for much more
scalability.


Goals.

The goal of the Multiple Queue Skiplist Scheduler, referred to as MuQSS from
here on (pronounced mux) is to completely do away with the complex designs of
the past for the cpu process scheduler and instead implement one that is very
simple in basic design. The main focus of MuQSS is to achieve excellent 
desktop
interactivity and responsiveness without heuristics and tuning knobs that are
difficult to understand, impossible to model and predict the effect of, and 
when
tuned to one workload cause massive detriment to another, while still being
scalable to many CPUs and processes.


Design summary.

MuQSS is best described as per-cpu multiple runqueue, O(log n) insertion, O(1)
lookup, earliest effective virtual deadline first tickless design, loosely 
based
on EEVDF (earliest eligible virtual deadline first) and my previous Staircase
Deadline scheduler, and evolved from the single runqueue O(n) BFS scheduler.
Each component shall be described in order to understand the significance of,
and reasoning for it.


Design reasoning.

In BFS, the use of a single runqueue across all CPUs meant that each CPU would
need to scan the entire runqueue looking for the process with the earliest
deadline and schedule that next, regardless of which CPU it originally came
from. This made BFS deterministic with respect to latency and provided
guaranteed latencies dependent on number of processes and CPUs. The single
runqueue, however, meant that all CPUs would compete for the single lock
protecting it, which would lead to increasing lock contention as the number of
CPUs rose and appeared to limit scalability of common workloads beyond 16
logical CPUs. Additionally, the O(n) lookup of the runqueue list obviously
increased overhead proportionate to the number of queued proecesses and led to
cache thrashing while iterating over the linked list.

MuQSS is an evolution of BFS, designed to maintain the same scheduling
decision mechanism and be virtually deterministic without relying on the
constrained design of the single runqueue by splitting out the single runqueue

Re: [PATCH 00/14] introduce the BFQ-v0 I/O scheduler as an extra scheduler

2016-10-28 Thread Paolo Valente


> Il giorno 26 ott 2016, alle ore 18:12, Jens Axboe  ha 
> scritto:
> 
> On 10/26/2016 10:04 AM, Paolo Valente wrote:
>> 
>>> Il giorno 26 ott 2016, alle ore 17:32, Jens Axboe  ha 
>>> scritto:
>>> 
>>> On 10/26/2016 09:29 AM, Christoph Hellwig wrote:
 On Wed, Oct 26, 2016 at 05:13:07PM +0200, Arnd Bergmann wrote:
> The question to ask first is whether to actually have pluggable
> schedulers on blk-mq at all, or just have one that is meant to
> do the right thing in every case (and possibly can be bypassed
> completely).
 
 That would be my preference.  Have a BFQ-variant for blk-mq as an
 option (default to off unless opted in by the driver or user), and
 not other scheduler for blk-mq.  Don't bother with bfq for non
 blk-mq.  It's not like there is any advantage in the legacy-request
 device even for slow devices, except for the option of having I/O
 scheduling.
>>> 
>>> It's the only right way forward. blk-mq might not offer any substantial
>>> advantages to rotating storage, but with scheduling, it won't offer a
>>> downside either. And it'll take us towards the real goal, which is to
>>> have just one IO path.
>> 
>> ok
>> 
>>> Adding a new scheduler for the legacy IO path
>>> makes no sense.
>> 
>> I would fully agree if effective and stable I/O scheduling would be
>> available in blk-mq in one or two months.  But I guess that it will
>> take at least one year optimistically, given the current status of the
>> needed infrastructure, and given the great difficulties of doing
>> effective scheduling at the high parallelism and extreme target speeds
>> of blk-mq.  Of course, this holds true unless little clever scheduling
>> is performed.
>> 
>> So, what's the point in forcing a lot of users wait another year or
>> more, for a solution that has yet to be even defined, while they could
>> enjoy a much better system, and then switch an even better system when
>> scheduling is ready in blk-mq too?
> 
> That same argument could have been made 2 years ago. Saying no to a new
> scheduler for the legacy framework goes back roughly that long. We could
> have had BFQ for mq NOW, if we didn't keep coming back to this very
> point.
> 
> I'm hesistant to add a new scheduler because it's very easy to add, very
> difficult to get rid of. If we do add BFQ as a legacy scheduler now,
> it'll take us years and years to get rid of it again. We should be
> moving towards LESS moving parts in the legacy path, not more.
> 
> We can keep having this discussion every few years, but I think we'd
> both prefer to make some actual progress here.

ok Jens, I give up

> It's perfectly fine to
> add an interface for a single queue interface for an IO scheduler for
> blk-mq, since we don't care too much about scalability there. And that
> won't take years, that should be a few weeks. Retrofitting BFQ on top of
> that should not be hard either. That can co-exist with a real multiqueue
> scheduler as well, something that's geared towards some fairness for
> faster devices.
> 

AFAICT this solution is good, for many practical reasons.  I don't
have the expertise to make such an infrastructure well on my own.  At
least not in an acceptable amount of time, because working on this
nice stuff is unfortunately not my job (although Linaro is now
supporting me for BFQ).

Then, assuming that this solution may be of general interest, and that
BFQ benefits convinced you a little bit too, may I get significant
collaboration/help on implementing this infrastructure?  If so, Jens
and all possibly interested parties, could we have a sort of short
kick-off technical meeting during KS/LPC?

Thanks,
Paolo

> -- 
> Jens Axboe

Re: [Xen-devel] [PATCH v6 00/11] implement vcpu preempted check

2016-10-28 Thread Pan Xinhui




在 2016/10/29 03:38, Konrad Rzeszutek Wilk 写道:

On Fri, Oct 28, 2016 at 04:11:16AM -0400, Pan Xinhui wrote:

change from v5:
spilt x86/kvm patch into guest/host part.
introduce kvm_write_guest_offset_cached.
fix some typos.
rebase patch onto 4.9.2
change from v4:
spilt x86 kvm vcpu preempted check into two patches.
add documentation patch.
add x86 vcpu preempted check patch under xen
add s390 vcpu preempted check patch
change from v3:
add x86 vcpu preempted check patch
change from v2:
no code change, fix typos, update some comments
change from v1:
a simplier definition of default vcpu_is_preempted
skip mahcine type check on ppc, and add config. remove dedicated macro.
add one patch to drop overload of rwsem_spin_on_owner and 
mutex_spin_on_owner.
add more comments
thanks boqun and Peter's suggestion.

This patch set aims to fix lock holder preemption issues.


Do you have a git tree with these patches?


Currently no, sorry :(

I make a tar file for this patcheset. Maybe a little easier to apply :)

thanks
xinhui



test-case:
perf record -a perf bench sched messaging -g 400 -p && perf report

18.09%  sched-messaging  [kernel.vmlinux]  [k] osq_lock
12.28%  sched-messaging  [kernel.vmlinux]  [k] rwsem_spin_on_owner
 5.27%  sched-messaging  [kernel.vmlinux]  [k] mutex_unlock
 3.89%  sched-messaging  [kernel.vmlinux]  [k] wait_consider_task
 3.64%  sched-messaging  [kernel.vmlinux]  [k] _raw_write_lock_irq
 3.41%  sched-messaging  [kernel.vmlinux]  [k] mutex_spin_on_owner.is
 2.49%  sched-messaging  [kernel.vmlinux]  [k] system_call

We introduce interface bool vcpu_is_preempted(int cpu) and use it in some spin
loops of osq_lock, rwsem_spin_on_owner and mutex_spin_on_owner.
These spin_on_onwer variant also cause rcu stall before we apply this patch set

We also have observed some performace improvements in uninx benchmark tests.

PPC test result:
1 copy - 0.94%
2 copy - 7.17%
4 copy - 11.9%
8 copy -  3.04%
16 copy - 15.11%

details below:
Without patch:

1 copy - File Write 4096 bufsize 8000 maxblocks  2188223.0 KBps  (30.0 s, 1 
samples)
2 copy - File Write 4096 bufsize 8000 maxblocks  1804433.0 KBps  (30.0 s, 1 
samples)
4 copy - File Write 4096 bufsize 8000 maxblocks  1237257.0 KBps  (30.0 s, 1 
samples)
8 copy - File Write 4096 bufsize 8000 maxblocks  1032658.0 KBps  (30.0 s, 1 
samples)
16 copy - File Write 4096 bufsize 8000 maxblocks   768000.0 KBps  (30.1 s, 
1 samples)

With patch:

1 copy - File Write 4096 bufsize 8000 maxblocks  2209189.0 KBps  (30.0 s, 1 
samples)
2 copy - File Write 4096 bufsize 8000 maxblocks  1943816.0 KBps  (30.0 s, 1 
samples)
4 copy - File Write 4096 bufsize 8000 maxblocks  1405591.0 KBps  (30.0 s, 1 
samples)
8 copy - File Write 4096 bufsize 8000 maxblocks  1065080.0 KBps  (30.0 s, 1 
samples)
16 copy - File Write 4096 bufsize 8000 maxblocks   904762.0 KBps  (30.0 s, 
1 samples)

X86 test result:
test-case   after-patch   before-patch
Execl Throughput   |18307.9 lps  |11701.6 lps
File Copy 1024 bufsize 2000 maxblocks  |  1352407.3 KBps |   790418.9 KBps
File Copy 256 bufsize 500 maxblocks|   367555.6 KBps |   222867.7 KBps
File Copy 4096 bufsize 8000 maxblocks  |  3675649.7 KBps |  1780614.4 KBps
Pipe Throughput| 11872208.7 lps  | 11855628.9 lps
Pipe-based Context Switching   |  1495126.5 lps  |  1490533.9 lps
Process Creation   |29881.2 lps  |28572.8 lps
Shell Scripts (1 concurrent)   |23224.3 lpm  |22607.4 lpm
Shell Scripts (8 concurrent)   | 3531.4 lpm  | 3211.9 lpm
System Call Overhead   | 10385653.0 lps  | 10419979.0 lps

Christian Borntraeger (1):
  s390/spinlock: Provide vcpu_is_preempted

Juergen Gross (1):
  x86, xen: support vcpu preempted check

Pan Xinhui (9):
  kernel/sched: introduce vcpu preempted check interface
  locking/osq: Drop the overload of osq_lock()
  kernel/locking: Drop the overload of {mutex,rwsem}_spin_on_owner
  powerpc/spinlock: support vcpu preempted check
  x86, paravirt: Add interface to support kvm/xen vcpu preempted check
  KVM: Introduce kvm_write_guest_offset_cached
  x86, kvm/x86.c: support vcpu preempted check
  x86, kernel/kvm.c: support vcpu preempted check
  Documentation: virtual: kvm: Support vcpu preempted check

 Documentation/virtual/kvm/msr.txt |  9 -
 arch/powerpc/include/asm/spinlock.h   |  8 
 arch/s390/include/asm/spinlock.h  |  8 
 arch/s390/kernel/smp.c|  9 +++--
 arch/s390/lib/spinlock.c  | 25 -
 arch/x86/include/asm/paravirt_types.h |  2 ++
 arch/x86/include/asm/spinlock.h   |  8 
 arch/x86/include/uapi/asm/kvm_para.h  |  4 +++-
 arch/x86/kernel/kvm.c | 12 
 arch/x8

Re: [PATCH v10 01/19] vfio: Mediated device Core driver

2016-10-28 Thread Jike Song

On 10/27/2016 05:29 AM, Kirti Wankhede wrote:
> +int mdev_register_device(struct device *dev, const struct parent_ops *ops)
> +{
> + int ret;
> + struct parent_device *parent;
> +
> + /* check for mandatory ops */
> + if (!ops || !ops->create || !ops->remove || !ops->supported_type_groups)
> + return -EINVAL;
> +
> + dev = get_device(dev);
> + if (!dev)
> + return -EINVAL;
> +
> + mutex_lock(&parent_list_lock);
> +
> + /* Check for duplicate */
> + parent = __find_parent_device(dev);
> + if (parent) {
> + ret = -EEXIST;
> + goto add_dev_err;
> + }
> +
> + parent = kzalloc(sizeof(*parent), GFP_KERNEL);
> + if (!parent) {
> + ret = -ENOMEM;
> + goto add_dev_err;
> + }
> +
> + kref_init(&parent->ref);
> + mutex_init(&parent->lock);
> +
> + parent->dev = dev;
> + parent->ops = ops;
> +
> + ret = parent_create_sysfs_files(parent);
> + if (ret) {
> + mutex_unlock(&parent_list_lock);
> + mdev_put_parent(parent);
> + return ret;
> + }
> +
> + ret = class_compat_create_link(mdev_bus_compat_class, dev, NULL);
> + if (ret)
> + dev_warn(dev, "Failed to create compatibility class link\n");
> +

Hi Kirti,

Like I replied to previous version:

http://www.spinics.net/lists/kvm/msg139331.html

You can always check if mdev_bus_compat_class already registered
here, and register it if not yet. Same logic should be adopted to
mdev_init.

Current implementation will simply panic if configured as builtin,
which is rare but far from impossible.

--
Thanks,
Jike


> + list_add(&parent->next, &parent_list);
> + mutex_unlock(&parent_list_lock);
> +
> + dev_info(dev, "MDEV: Registered\n");
> + return 0;
> +
> +add_dev_err:
> + mutex_unlock(&parent_list_lock);
> + put_device(dev);
> + return ret;
> +}
> +EXPORT_SYMBOL(mdev_register_device);
> +
> +/*
> + * mdev_unregister_device : Unregister a parent device
> + * @dev: device structure representing parent device.
> + *
> + * Remove device from list of registered parent devices. Give a chance to 
> free
> + * existing mediated devices for given device.
> + */
> +
> +void mdev_unregister_device(struct device *dev)
> +{
> + struct parent_device *parent;
> + bool force_remove = true;
> +
> + mutex_lock(&parent_list_lock);
> + parent = __find_parent_device(dev);
> +
> + if (!parent) {
> + mutex_unlock(&parent_list_lock);
> + return;
> + }
> + dev_info(dev, "MDEV: Unregistering\n");
> +
> + list_del(&parent->next);
> + class_compat_remove_link(mdev_bus_compat_class, dev, NULL);
> +
> + device_for_each_child(dev, (void *)&force_remove,
> +   mdev_device_remove_cb);
> +
> + parent_remove_sysfs_files(parent);
> +
> + mutex_unlock(&parent_list_lock);
> + mdev_put_parent(parent);
> +}
> +EXPORT_SYMBOL(mdev_unregister_device);
> +
> +static void mdev_device_release(struct device *dev)
> +{
> + struct mdev_device *mdev = to_mdev_device(dev);
> +
> + dev_dbg(&mdev->dev, "MDEV: destroying\n");
> + kfree(mdev);
> +}
> +
> +int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le 
> uuid)
> +{
> + int ret;
> + struct mdev_device *mdev;
> + struct parent_device *parent;
> + struct mdev_type *type = to_mdev_type(kobj);
> +
> + parent = mdev_get_parent(type->parent);
> + if (!parent)
> + return -EINVAL;
> +
> + mutex_lock(&parent->lock);
> +
> + /* Check for duplicate */
> + if (mdev_device_exist(parent, uuid)) {
> + ret = -EEXIST;
> + goto create_err;
> + }
> +
> + mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
> + if (!mdev) {
> + ret = -ENOMEM;
> + goto create_err;
> + }
> +
> + memcpy(&mdev->uuid, &uuid, sizeof(uuid_le));
> + mdev->parent = parent;
> + kref_init(&mdev->ref);
> +
> + mdev->dev.parent  = dev;
> + mdev->dev.bus = &mdev_bus_type;
> + mdev->dev.release = mdev_device_release;
> + dev_set_name(&mdev->dev, "%pUl", uuid.b);
> +
> + ret = device_register(&mdev->dev);
> + if (ret) {
> + put_device(&mdev->dev);
> + goto create_err;
> + }
> +
> + ret = mdev_device_create_ops(kobj, mdev);
> + if (ret)
> + goto create_failed;
> +
> + ret = mdev_create_sysfs_files(&mdev->dev, type);
> + if (ret) {
> + mdev_device_remove_ops(mdev, true);
> + goto create_failed;
> + }
> +
> + mdev->type_kobj = kobj;
> + dev_dbg(&mdev->dev, "MDEV: created\n");
> +
> + mutex_unlock(&parent->lock);
> + return ret;
> +
> +create_failed:
> + device_unregister(&mdev->dev);
> +
> +create_err:
> + mutex_unlock(&parent->lock);
> + mdev_put_parent(parent);
> + return ret;
> +}
> +
> +int mdev_device_

Re: [PATCH v4] time: alarmtimer: Add the trcepoints for alarmtimer

2016-10-28 Thread John Stultz

On Thu, Oct 27, 2016 at 12:32 AM, Baolin Wang  wrote:
> On 18 October 2016 at 14:47, Baolin Wang  wrote:
>> From the trace log, we can find out the 'Binder:3292_2' process
>> set one alarm timer which resumes the system.
>>
>> Signed-off-by: Baolin Wang 
>> Acked-by: Steven Rostedt 
>> ---
>> Changes since v3:
>>  - Remove the "ALARM_" prefix in the string.
>>  - Add the ACK by Steven Rostedt.
>>
>> Changes since v2:
>>  - Save time as s64 type.
>>  - Remove 'process_name' parameter and add 'now' parameter.
>>  - Rename the trace event name.
>>  - Remove restart trace event.
>>  - Other optimization.
>
> Any comments about this version? Thanks.

Sorry for the slow response.

I've got this in my "to-queue" list, but I've not had the chance to
queue it up and get it tested.
Hopefully I'll find some time next week during plumbers to get that
done, but it may not be till the week after.

thanks
-john

Re: [PATCH 1/4] sched: move IO scheduling accounting from io_schedule_timeout() to __schedule()

2016-10-28 Thread Peter Zijlstra

On Fri, Oct 28, 2016 at 03:12:32PM -0400, Tejun Heo wrote:
> Hello, Peter.
> 
> On Fri, Oct 28, 2016 at 09:07:02PM +0200, Peter Zijlstra wrote:
> > One alternative is to inherit the iowait state of the task we block on.
> > That'll not get rid of the branches much, but it will remove the new
> > mutex APIs.
> 
> Yeah, thought about that briefly but we don't necessarily track mutex

This one I actually fixed and should be in -next. And it would be
sufficient to cover the use case here.

> or other synchronization construct owners, things get gnarly with
> rwsems (the inode ones sometimes end up in a similar situation), and
> we'll probably end up dealing with some surprising propagations down
> the line.

rwsems could be done for writers only.

Re: [PATCH v2 3/3] usb: musb: da8xx: Only execute the OTG workaround when phy in OTG mode

2016-10-28 Thread Bin Liu

On Fri, Oct 28, 2016 at 12:11:21PM -0500, David Lechner wrote:
> On 10/28/2016 07:39 AM, Alexandre Bailon wrote:
> >On 10/28/2016 04:56 AM, David Lechner wrote:
> >>On 10/26/2016 05:58 AM, Alexandre Bailon wrote:
> >>>When the phy is forced in host mode, only the first hot plug and
> >>>hot remove works. That is actually because the driver execute the
> >>>OTG workaround, whereas it is not applicable in host or device mode.
> >>>Indeed, to work correctly, the VBUS sense and session end comparator
> >>>must be enabled, what is only possible when the phy is in OTG mode.
> >>>Only execute the workaround if the phy is in OTG mode.
> >>>
> >>>Signed-off-by: Alexandre Bailon 
> >>>---
> >>> drivers/usb/musb/da8xx.c | 11 +++
> >>> 1 file changed, 11 insertions(+)
> >>>
> >>>diff --git a/drivers/usb/musb/da8xx.c b/drivers/usb/musb/da8xx.c
> >>>index 6749aa1..b8a6b65 100644
> >>>--- a/drivers/usb/musb/da8xx.c
> >>>+++ b/drivers/usb/musb/da8xx.c
> >>>@@ -145,6 +145,17 @@ static void otg_timer(unsigned long _musb)
> >>> unsigned longflags;
> >>>
> >>> /*
> >>>+ * We should only execute the OTG workaround when the phy is in OTG
> >>>+ * mode. The workaround require the VBUS sense and the session end
> >>>+ * comparator to be enabled, what is only possible if the phy is in
> >>>+ * OTG mode. As the workaround is only required to detect if the
> >>>+ * controller must act as host or device, we can safely exit OTG is
> >>>+ * not in use.
> >>>+ */
> >>>+if (musb->port_mode != MUSB_PORT_MODE_DUAL_ROLE)
> >>
> >>musb->port_mode is not valid if we have changed the mode via sysfs. It
> >>only reflects the mode set during driver probe.
> >>
> >>Furthermore, this breaks the host mode completely for me. The first hot
> >>plug is not even detected.
> >>
> >>>+return;
> >>>+
> >>>+/*
> >>>  * We poll because DaVinci's won't expose several OTG-critical
> >>>  * status change events (from the transceiver) otherwise.
> >>>  */
> >>>
> >>
> >>
> >>The way this is working for me (on AM1808) is this:
> >>
> >>The problem is not that the OTG workaround is being used. The problem is
> >>that after disconnect, the VBUSDRV is turned off. If you look at the
> >>handler for DA8XX_INTR_DRVVBUS in da8xx_musb_interrupt(), you will see
> >>that if VBUSDRV is off, then drvvbus == 0, which puts the musb state
> >>back to device mode.
> >>
> >>I also ran into a similar problem a while back[1] that if you use a
> >>self-powered device in host mode, it immediately becomes disconnected.
> >>This is for the exact same reason. When a port detects a self-powered
> >>device, it turns of VBUSDRV, which triggers the DA8XX_INTR_DRVVBUS
> >>interrupt. As we have seen above, this takes the port out of host mode.
> >>
> >>The workaround that I have found that seems to fix both cases is to add
> >>and else if statement that toggles the PHY host override when we are
> >>forcing host mode and the VBUSDRV is turned off.
> >I like this workaround.
> >>
> >>Here is a partial diff of drivers/usb/musb/da8xx.c to show what I mean:
> >>
> >>@@ -304,10 +309,14 @@ static irqreturn_t da8xx_musb_interrupt(int irq,
> >>void *hci)
> >> * Also, DRVVBUS pulses for SRP (but not at 5 V)...
> >> */
> >>if (status & (DA8XX_INTR_DRVVBUS << DA8XX_INTR_USB_SHIFT)) {
> >>+   struct da8xx_glue *glue =
> >>+   dev_get_drvdata(musb->controller->parent);
> >>int drvvbus = musb_readl(reg_base, DA8XX_USB_STAT_REG);
> >>void __iomem *mregs = musb->mregs;
> >>u8 devctl = musb_readb(mregs, MUSB_DEVCTL);
> >>-   int err;
> >>+   int cfgchip2, err;
> >>+
> >>+   regmap_read(glue->cfgchip, CFGCHIP(2), &cfgchip2);
> >>
> >>err = musb->int_usb & MUSB_INTR_VBUSERROR;
> >>if (err) {
> >>@@ -332,10 +341,25 @@ static irqreturn_t da8xx_musb_interrupt(int irq,
> >>void *hci)
> >>musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
> >>portstate(musb->port1_status |=
> >>USB_PORT_STAT_POWER);
> >>del_timer(&otg_workaround);
> >>+   } else if ((cfgchip2 & CFGCHIP2_OTGMODE_MASK)
> >>+  == CFGCHIP2_OTGMODE_FORCE_HOST) {
> >>+   /*
> >>+* If we are forcing host mode, VBUSDRV is
> >>turned off
> >>+* after a device is disconnected. We need to
> >>toggle the
> >>+* VBUS/ID override to trigger turn it back on,
> >>which
> >>+* has the effect of triggering
> >>DA8XX_INTR_DRVVBUS again.
> >>+*/
> >>+   regmap_write_bits(glue->cfgchip, CFGCHIP(2),
> >>+   CFGCHIP2_OTGMODE_MASK,
> >>+   CFGCHIP2_OTGMODE_NO_OVERRIDE);
> >>+   reg

Re: [RFC] v4l2 support for thermopile devices

2016-10-28 Thread Matt Ranostay

On Fri, Oct 28, 2016 at 2:53 PM, Hans Verkuil  wrote:
> Hi Matt,
>
> On 28/10/16 22:14, Matt Ranostay wrote:
>>
>> So want to toss a few thoughts on adding support for thermopile
>> devices (could be used for FLIR Lepton as well) that output pixel
>> data.
>> These typically aren't DMA'able devices since they are low speed
>> (partly to limiting the functionality to be in compliance with ITAR)
>> and data is piped over i2c/spi.
>>
>> My question is that there doesn't seem to be an other driver that
>> polls frames off of a device and pushes it to the video buffer, and
>> wanted to be sure that this doesn't currently exist somewhere.
>
>
> Not anymore, but if you go back to kernel 3.6 then you'll find this driver:
>
> drivers/media/video/bw-qcam.c
>
> It was for a grayscale parallel port webcam (which explains why it was
> removed in 3.7 :-) ), and it used polling to get the pixels.

Yikes parallel port, but I'll take a look at that for some reference :)

>
>> Also more importantly does the mailing list thinks it belongs in v4l2?
>
>
> I think it fits. It's a sensor, just with a very small resolution and
> infrared
> instead of visible light.
>
>> We already came up the opinion on the IIO list that it doesn't belong
>> in that subsystem since pushing raw pixel data to a buffer is a bit
>> hacky. Also could be generically written with regmap so other devices
>> (namely FLIR Lepton) could be easily supported.
>>
>> Need some input for the video pixel data types, which the device we
>> are using (see datasheet links below) is outputting pixel data in
>> little endian 16-bit of which a 12-bits signed value is used.  Does it
>> make sense to do some basic processing on the data since greyscale is
>> going to look weird with temperatures under 0C degrees? Namely a cold
>> object is going to be brighter than the hottest object it could read.
>
>
>> Or should a new V4L2_PIX_FMT_* be defined and processing done in
>> software?
>
>
> I would recommend that. It's no big deal, as long as the new format is
> documented.
>
>> Another issue is how to report the scaling value of 0.25 C
>> for each LSB of the pixels to the respecting recording application.
>
>
> Probably through a read-only control, but I'm not sure.
>
> Regards,
>
> Hans
>
>>
>> Datasheet:
>> http://media.digikey.com/pdf/Data%20Sheets/Panasonic%20Sensors%20PDFs/Grid-EYE_AMG88.pdf
>> Datasheet:
>> https://eewiki.net/download/attachments/13599167/Grid-EYE%20SPECIFICATIONS%28Reference%29.pdf?version=1&modificationDate=1380660426690&api=v2
>>
>> Thanks,
>>
>> Matt
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-media" in
>> the body of a message to majord...@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
>

Re: [RFC] v4l2 support for thermopile devices

2016-10-28 Thread Matt Ranostay

On Fri, Oct 28, 2016 at 1:30 PM, Devin Heitmueller
 wrote:
> Hi Matt,
>
>> Need some input for the video pixel data types, which the device we
>> are using (see datasheet links below) is outputting pixel data in
>> little endian 16-bit of which a 12-bits signed value is used.  Does it
>> make sense to do some basic processing on the data since greyscale is
>> going to look weird with temperatures under 0C degrees? Namely a cold
>> object is going to be brighter than the hottest object it could read.
>> Or should a new V4L2_PIX_FMT_* be defined and processing done in
>> software?  Another issue is how to report the scaling value of 0.25 C
>> for each LSB of the pixels to the respecting recording application.
>
> Regarding the format for the pixel data:  I did some research into
> this when doing some driver work for the Seek Thermal (a product
> similar to the FLIR Lepton).  While it would be nice to be able to use
> an existing application like VLC or gStreamer to just take the video
> and capture from the V4L2 interface with no additional userland code,
> the reality is that how you colorize the data is going to be highly
> user specific (e.g. what thermal ranges to show with what colors,
> etc).  If your goal is really to do a V4L2 driver which returns the
> raw data, then you're probably best returning it in the native
> greyscale format (whether that be an existing V4L2 PIX_FMT or a new
> one needs to be defined), and then in software you can figure out how
> to colorize it.
>

Good point I was leaning to having userspace do it. But didn't think
of the color mapping part though so even more reason.

> Just my opinion though
>
> Devin
>
> --
> Devin J. Heitmueller - Kernel Labs
> http://www.kernellabs.com

Re: [RFC] v4l2 support for thermopile devices

2016-10-28 Thread Matt Ranostay

On Fri, Oct 28, 2016 at 1:40 PM, Marek Vasut  wrote:
> On 10/28/2016 10:30 PM, Devin Heitmueller wrote:
>> Hi Matt,
>>
>>> Need some input for the video pixel data types, which the device we
>>> are using (see datasheet links below) is outputting pixel data in
>>> little endian 16-bit of which a 12-bits signed value is used.  Does it
>>> make sense to do some basic processing on the data since greyscale is
>>> going to look weird with temperatures under 0C degrees? Namely a cold
>>> object is going to be brighter than the hottest object it could read.
>>> Or should a new V4L2_PIX_FMT_* be defined and processing done in
>>> software?  Another issue is how to report the scaling value of 0.25 C
>>> for each LSB of the pixels to the respecting recording application.
>>
>> Regarding the format for the pixel data:  I did some research into
>> this when doing some driver work for the Seek Thermal (a product
>> similar to the FLIR Lepton).  While it would be nice to be able to use
>> an existing application like VLC or gStreamer to just take the video
>> and capture from the V4L2 interface with no additional userland code,
>> the reality is that how you colorize the data is going to be highly
>> user specific (e.g. what thermal ranges to show with what colors,
>> etc).  If your goal is really to do a V4L2 driver which returns the
>> raw data, then you're probably best returning it in the native
>> greyscale format (whether that be an existing V4L2 PIX_FMT or a new
>> one needs to be defined), and then in software you can figure out how
>> to colorize it.
>
> All true, I also did my share of poking into SEEK Thermal USB and it is
> an excellent candidate for a V4L2 driver, that one. But I think this
> device here is producing much smaller images, something like 8x8 pixels.

Yes this is only 64 pixel (8x8 grid) but it is video still. Does have
some major pluses over a FLIR camera though, mainly power usage is
really low, and cost is lower (although that reason is decreasing
everyday).

>
> --
> Best regards,
> Marek Vasut

[PATCH v2] checkpatch: fix uninitialized var when run with --no-tree

2016-10-28 Thread Brian Norris

If checkpatch.pl gets copied out of the tree, --no-tree shouldn't start
complaining:

  Use of uninitialized value $root in concatenation (.) or string at
  /path/to/checkpatch.pl line 764.

Let's just give the safe answer instead -- don't warn about "obsolete"
files.

Fixes: 85b0ee18bbf8 ("checkpatch: see if modified files are marked obsolete in 
MAINTAINERS")
Signed-off-by: Brian Norris 
---
v2: change condition to check for $root, not $tree

 scripts/checkpatch.pl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index a8368d1c4348..c9cbfed326a7 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -761,6 +761,8 @@ sub seed_camelcase_file {
 sub is_maintained_obsolete {
my ($filename) = @_;
 
+   return 0 if (!defined $root);
+
return 0 if (!(-e "$root/scripts/get_maintainer.pl"));
 
my $status = `perl $root/scripts/get_maintainer.pl --status --nom --nol 
--nogit --nogit-fallback -f $filename 2>&1`;
-- 
2.8.0.rc3.226.g39d4020

Re: [PATCH] checkpatch: fix uninitialized var when run with --no-tree

2016-10-28 Thread Brian Norris

On Fri, Oct 28, 2016 at 07:26:31PM -0700, Brian Norris wrote:
> From: Brian Norris 
> 
> If checkpatch.pl gets copied out of the tree, --no-tree shouldn't start
> complaining:
> 
>   Use of uninitialized value $root in concatenation (.) or string at
>   /path/to/checkpatch.pl line 764.
> 
> Let's just give the safe answer instead -- don't warn about "obsolete"
> files.
> 
> Fixes: 85b0ee18bbf8 ("checkpatch: see if modified files are marked obsolete 
> in MAINTAINERS")
> Signed-off-by: Brian Norris 
> ---
> This is a 4.9-rc1 regression
> 
>  scripts/checkpatch.pl | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
> index a8368d1c4348..c8cd643dbc6f 100755
> --- a/scripts/checkpatch.pl
> +++ b/scripts/checkpatch.pl
> @@ -761,6 +761,8 @@ sub seed_camelcase_file {
>  sub is_maintained_obsolete {
>   my ($filename) = @_;
>  
> + return 0 if (!$tree);

Actually, I'm torn on this. It looks really odd to check for !$tree
here, but it's the only supported case where $root shouldn't be defined.
Maybe (!defined $root) is a better test? (Sorry, I did a double-take on
this after I sent it.)

Both would be equally correct, but I suppose the latter would be
clearer. I'll send v2.

Brian

> +
>   return 0 if (!(-e "$root/scripts/get_maintainer.pl"));
>  
>   my $status = `perl $root/scripts/get_maintainer.pl --status --nom --nol 
> --nogit --nogit-fallback -f $filename 2>&1`;
> -- 
> 2.8.0.rc3.226.g39d4020
>

Re: [PATCH 2/3] firmware: qcom: scm: Remove core, iface and bus clocks dependency

2016-10-28 Thread kbuild test robot

Hi Sarangdhar,

[auto build test WARNING on linus/master]
[also build test WARNING on v4.9-rc2 next-20161028]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]
[Suggest to use git(>=2.9.0) format-patch --base= (or --base=auto for 
convenience) to record what (public, well-known) commit your patch series was 
built on]
[Check https://git-scm.com/docs/git-format-patch for more information]

url:
https://github.com/0day-ci/linux/commits/Sarangdhar-Joshi/Remove-clocks-dependency-from-SCM-driver/20161029-081333
config: arm-allmodconfig (attached as .config)
compiler: arm-linux-gnueabi-gcc (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=arm 

All warnings (new ones prefixed by >>):

   drivers/firmware/qcom_scm.c: In function 'qcom_scm_probe':
>> drivers/firmware/qcom_scm.c:337:9: warning: cast from pointer to integer of 
>> different size [-Wpointer-to-int-cast]
 clks = (uint64_t)of_device_get_match_data(&pdev->dev);
^

vim +337 drivers/firmware/qcom_scm.c

   321  bool qcom_scm_is_available(void)
   322  {
   323  return !!__scm;
   324  }
   325  EXPORT_SYMBOL(qcom_scm_is_available);
   326  
   327  static int qcom_scm_probe(struct platform_device *pdev)
   328  {
   329  struct qcom_scm *scm;
   330  uint64_t clks;
   331  int ret;
   332  
   333  scm = devm_kzalloc(&pdev->dev, sizeof(*scm), GFP_KERNEL);
   334  if (!scm)
   335  return -ENOMEM;
   336  
 > 337  clks = (uint64_t)of_device_get_match_data(&pdev->dev);
   338  if (clks & SCM_HAS_CORE_CLK) {
   339  scm->core_clk = devm_clk_get(&pdev->dev, "core");
   340  if (IS_ERR(scm->core_clk)) {
   341  if (PTR_ERR(scm->core_clk) == -EPROBE_DEFER)
   342  return PTR_ERR(scm->core_clk);
   343  
   344  scm->core_clk = NULL;
   345  }

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip

[PATCH] checkpatch: fix uninitialized var when run with --no-tree

2016-10-28 Thread Brian Norris

From: Brian Norris 

If checkpatch.pl gets copied out of the tree, --no-tree shouldn't start
complaining:

  Use of uninitialized value $root in concatenation (.) or string at
  /path/to/checkpatch.pl line 764.

Let's just give the safe answer instead -- don't warn about "obsolete"
files.

Fixes: 85b0ee18bbf8 ("checkpatch: see if modified files are marked obsolete in 
MAINTAINERS")
Signed-off-by: Brian Norris 
---
This is a 4.9-rc1 regression

 scripts/checkpatch.pl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index a8368d1c4348..c8cd643dbc6f 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -761,6 +761,8 @@ sub seed_camelcase_file {
 sub is_maintained_obsolete {
my ($filename) = @_;
 
+   return 0 if (!$tree);
+
return 0 if (!(-e "$root/scripts/get_maintainer.pl"));
 
my $status = `perl $root/scripts/get_maintainer.pl --status --nom --nol 
--nogit --nogit-fallback -f $filename 2>&1`;
-- 
2.8.0.rc3.226.g39d4020

Re: [PATCH v2 0/5] make POSIX timers optional with some Kconfig help

2016-10-28 Thread Nicolas Pitre

On Sat, 29 Oct 2016, Paul Bolle wrote:

> On Tue, 2016-10-25 at 22:28 -0400, Nicolas Pitre wrote:
> > When POSIX timers are configured out, the PTP clock subsystem should be
> > left out as well. However a bunch of ethernet drivers currently *select*
> > the later in their Kconfig entries. Therefore some more work was needed
> > to break that hard dependency from those drivers without preventing their
> > usage altogether.
> 
> By the way: would you have pointers to threads that discussed attempts
> to achieve this using currently available Kconfig options?

You could probably go backward from here:
https://lkml.org/lkml/2016/9/20/606


Nicolas

[GIT PULL] ACPI fixes for v4.9-rc3

2016-10-28 Thread Rafael J. Wysocki

Hi Linus,

Please pull from the tag

 git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git \
 acpi-4.9-rc3

with top-most commit 21e2d9d5292f8a5cd1aa4e63891437589d233d35

 Merge branches 'acpica-fixes', 'acpi-pci-fixes' and 'acpi-apei-fixes'

on top of commit 07d9a380680d1c0eb51ef87ff2eab5c994949e69

 Linux 4.9-rc2

to receive ACPI fixes for v4.9-rc3.

These fix recent ACPICA regressions, an older PCI IRQ management
regression, and an incorrect return value of a function in the APEI
code.

Specifics:

 - Fix three ACPICA issues related to the interpreter locking and
   introduced by recent changes in that area (Lv Zheng).

 - Fix a PCI IRQ management regression introduced during the 4.7
   cycle and related to the configuration of shared IRQs on systems
   with an ISA bus (Sinan Kaya).

 - Fix up a return value of one function in the APEI code (Punit
   Agrawal).

Thanks!


---

Lv Zheng (3):
  ACPICA: Dispatcher: Fix order issue of method termination
  ACPICA: Dispatcher: Fix an unbalanced lock exit path in
acpi_ds_auto_serialize_method()
  ACPICA: Dispatcher: Fix interpreter locking around
acpi_ev_initialize_region()

Punit Agrawal (1):
  ACPI / APEI: Fix incorrect return value of ghes_proc()

Sinan Kaya (3):
  ACPI/PCI/IRQ: assign ISA IRQ directly during early boot stages
  ACPI/PCI: pci_link: penalize SCI correctly
  ACPI/PCI: pci_link: Include PIRQ_PENALTY_PCI_USING for ISA IRQs

---

 arch/x86/kernel/acpi/boot.c|  1 +
 drivers/acpi/acpica/dsinit.c   | 11 +++---
 drivers/acpi/acpica/dsmethod.c | 50 +++---
 drivers/acpi/acpica/dswload2.c |  2 --
 drivers/acpi/acpica/evrgnini.c |  3 +++
 drivers/acpi/acpica/nsload.c   |  2 ++
 drivers/acpi/apei/ghes.c   |  2 +-
 drivers/acpi/pci_link.c| 38 ++--
 include/linux/acpi.h   |  1 +
 9 files changed, 54 insertions(+), 56 deletions(-)

[GIT PULL] Power management fixes for v4.9-rc3

2016-10-28 Thread Rafael J. Wysocki

Hi Linus,

Please pull from the tag

 git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git \
 pm-4.9-rc3

with top-most commit 8b2ada27dc1045e8191673bf769a1136ce8a0127

 Merge branches 'pm-cpufreq-fixes' and 'pm-sleep-fixes'

on top of commit 07d9a380680d1c0eb51ef87ff2eab5c994949e69

 Linux 4.9-rc2

to receive power management fixes for v4.9-rc3.

These fix two intel_pstate issues related to the way it works when
the scaling_governor sysfs attribute is set to "performance" and
fix up messages in the system suspend core code.

Specifics:

 - Fix a missing KERN_CONT in a system suspend message by converting
   the affected code to using pr_info() and pr_cont() instead of the
   "raw" printk() (Jon Hunter).

 - Make intel_pstate set the CPU P-state from its .set_policy()
   callback when the scaling_governor sysfs attribute is set to
   "performance" so that it interacts with NOHZ_FULL more
   predictably which was the case before 4.7 (Rafael Wysocki).

 - Make intel_pstate always request the maximum allowed P-state when
   the scaling_governor sysfs attribute is set to "performance" to
   prevent it from effectively ingoring that setting is some
   situations (Rafael Wysocki).

Thanks!

---

Jon Hunter (1):
  PM / suspend: Fix missing KERN_CONT for suspend message

Rafael J. Wysocki (2):
  cpufreq: intel_pstate: Set P-state upfront in performance mode
  cpufreq: intel_pstate: Always set max P-state in performance mode

---

 drivers/cpufreq/intel_pstate.c | 38 --
 kernel/power/suspend.c |  4 ++--
 2 files changed, 34 insertions(+), 8 deletions(-)

net/dccp: warning in dccp_feat_clone_sp_val/__might_sleep

2016-10-28 Thread Andrey Konovalov

Hi,

I've got the following error report while running the syzkaller fuzzer:

[ cut here ]
WARNING: CPU: 0 PID: 4608 at kernel/sched/core.c:7724
__might_sleep+0x14c/0x1a0 kernel/sched/core.c:7719
do not call blocking ops when !TASK_RUNNING; state=1 set at
[] prepare_to_wait+0xbc/0x210
kernel/sched/wait.c:178
Modules linked in:
CPU: 0 PID: 4608 Comm: syz-executor Not tainted 4.9.0-rc2+ #320
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
 88006625f7a0 81b46914 88006625f818 
 84052960  88006625f7e8 8237
 88006aceac00 1e2c ed000cc4beff 84052960
Call Trace:
 [< inline >] __dump_stack lib/dump_stack.c:15
 [] dump_stack+0xb3/0x10f lib/dump_stack.c:51
 [] __warn+0x1a7/0x1f0 kernel/panic.c:550
 [] warn_slowpath_fmt+0xac/0xd0 kernel/panic.c:565
 [] __might_sleep+0x14c/0x1a0 kernel/sched/core.c:7719
 [< inline >] slab_pre_alloc_hook mm/slab.h:393
 [< inline >] slab_alloc_node mm/slub.c:2634
 [< inline >] slab_alloc mm/slub.c:2716
 [] __kmalloc_track_caller+0x150/0x2a0 mm/slub.c:4240
 [] kmemdup+0x24/0x50 mm/util.c:113
 [] dccp_feat_clone_sp_val.part.5+0x4f/0xe0
net/dccp/feat.c:374
 [< inline >] dccp_feat_clone_sp_val net/dccp/feat.c:1141
 [< inline >] dccp_feat_change_recv net/dccp/feat.c:1141
 [] dccp_feat_parse_options+0xaa1/0x13d0 net/dccp/feat.c:1411
 [] dccp_parse_options+0x721/0x1010 net/dccp/options.c:128
 [] dccp_rcv_state_process+0x200/0x15b0 net/dccp/input.c:644
 [] dccp_v4_do_rcv+0xf4/0x1a0 net/dccp/ipv4.c:681
 [< inline >] sk_backlog_rcv ./include/net/sock.h:872
 [] __release_sock+0x126/0x3a0 net/core/sock.c:2044
 [] release_sock+0x59/0x1c0 net/core/sock.c:2502
 [< inline >] inet_wait_for_connect net/ipv4/af_inet.c:547
 [] __inet_stream_connect+0x5d2/0xbb0 net/ipv4/af_inet.c:617
 [] inet_stream_connect+0x55/0xa0 net/ipv4/af_inet.c:656
 [] SYSC_connect+0x244/0x2f0 net/socket.c:1533
 [] SyS_connect+0x24/0x30 net/socket.c:1514
 [] entry_SYSCALL_64_fastpath+0x1f/0xc2
arch/x86/entry/entry_64.S:209
---[ end trace 0dc4109d69f4e51e ]---

On commit 14970f204b1993af7459d5bd34aaff38dfee6670 (Oct 27).

A reproducer is attached.


dccp-feat-warn-poc.c
Description: Binary data

[PATCH 1/1] arm64: dts: msm8996: Add SCM DT node

Add SCM DT node to enable SCM functionality on MSM8996.

Signed-off-by: Sarangdhar Joshi 
---
 arch/arm64/boot/dts/qcom/msm8996.dtsi | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi 
b/arch/arm64/boot/dts/qcom/msm8996.dtsi
index 9e960c1..4927097 100644
--- a/arch/arm64/boot/dts/qcom/msm8996.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi
@@ -164,6 +164,12 @@
 
};
 
+   firmware {
+   scm {
+   compatible = "qcom,scm-msm8996";
+   };
+   };
+
tcsr_mutex: hwlock {
compatible = "qcom,tcsr-mutex";
syscon = <&tcsr_mutex_regs 0 0x1000>;
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

[PATCH 1/3] dt-bindings: firmware: scm: Add MSM8996 DT bindings

Add SCM DT bindings for Qualcomm's MSM8996 platform.

Signed-off-by: Sarangdhar Joshi 
---
 Documentation/devicetree/bindings/firmware/qcom,scm.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/firmware/qcom,scm.txt 
b/Documentation/devicetree/bindings/firmware/qcom,scm.txt
index 3b4436e..20f26fb 100644
--- a/Documentation/devicetree/bindings/firmware/qcom,scm.txt
+++ b/Documentation/devicetree/bindings/firmware/qcom,scm.txt
@@ -10,8 +10,10 @@ Required properties:
  * "qcom,scm-apq8064" for APQ8064 platforms
  * "qcom,scm-msm8660" for MSM8660 platforms
  * "qcom,scm-msm8690" for MSM8690 platforms
+ * "qcom,scm-msm8996" for MSM8996 platforms
  * "qcom,scm" for later processors (MSM8916, APQ8084, MSM8974, etc)
 - clocks: One to three clocks may be required based on compatible.
+ * No clock required for "qcom,scm-msm8996"
  * Only core clock required for "qcom,scm-apq8064", "qcom,scm-msm8660", and 
"qcom,scm-msm8960"
  * Core, iface, and bus clocks required for "qcom,scm"
 - clock-names: Must contain "core" for the core clock, "iface" for the 
interface
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

[PATCH 2/3] firmware: qcom: scm: Remove core, iface and bus clocks dependency

Core, iface and bus clocks are not required to be voted from SCM
driver for some of the Qualcomm chipsets. Remove dependency on
these clocks from driver.

Suggested-by: Bjorn Andersson 
Signed-off-by: Sarangdhar Joshi 
---
 drivers/firmware/qcom_scm.c | 65 -
 1 file changed, 46 insertions(+), 19 deletions(-)

diff --git a/drivers/firmware/qcom_scm.c b/drivers/firmware/qcom_scm.c
index d79fecd..010a350 100644
--- a/drivers/firmware/qcom_scm.c
+++ b/drivers/firmware/qcom_scm.c
@@ -28,6 +28,10 @@
 
 #include "qcom_scm.h"
 
+#define SCM_HAS_CORE_CLK   BIT(0)
+#define SCM_HAS_IFACE_CLK  BIT(1)
+#define SCM_HAS_BUS_CLKBIT(2)
+
 struct qcom_scm {
struct device *dev;
struct clk *core_clk;
@@ -380,33 +384,43 @@ EXPORT_SYMBOL(qcom_scm_is_available);
 static int qcom_scm_probe(struct platform_device *pdev)
 {
struct qcom_scm *scm;
+   uint64_t clks;
int ret;
 
scm = devm_kzalloc(&pdev->dev, sizeof(*scm), GFP_KERNEL);
if (!scm)
return -ENOMEM;
 
-   scm->core_clk = devm_clk_get(&pdev->dev, "core");
-   if (IS_ERR(scm->core_clk)) {
-   if (PTR_ERR(scm->core_clk) == -EPROBE_DEFER)
-   return PTR_ERR(scm->core_clk);
+   clks = (uint64_t)of_device_get_match_data(&pdev->dev);
+   if (clks & SCM_HAS_CORE_CLK) {
+   scm->core_clk = devm_clk_get(&pdev->dev, "core");
+   if (IS_ERR(scm->core_clk)) {
+   if (PTR_ERR(scm->core_clk) == -EPROBE_DEFER)
+   return PTR_ERR(scm->core_clk);
 
-   scm->core_clk = NULL;
+   scm->core_clk = NULL;
+   }
}
 
if (of_device_is_compatible(pdev->dev.of_node, "qcom,scm")) {
-   scm->iface_clk = devm_clk_get(&pdev->dev, "iface");
-   if (IS_ERR(scm->iface_clk)) {
-   if (PTR_ERR(scm->iface_clk) != -EPROBE_DEFER)
-   dev_err(&pdev->dev, "failed to acquire iface 
clk\n");
-   return PTR_ERR(scm->iface_clk);
+   if (clks & SCM_HAS_IFACE_CLK) {
+   scm->iface_clk = devm_clk_get(&pdev->dev, "iface");
+   if (IS_ERR(scm->iface_clk)) {
+   if (PTR_ERR(scm->iface_clk) != -EPROBE_DEFER)
+   dev_err(&pdev->dev,
+   "failed to acquire iface 
clk\n");
+   return PTR_ERR(scm->iface_clk);
+   }
}
 
-   scm->bus_clk = devm_clk_get(&pdev->dev, "bus");
-   if (IS_ERR(scm->bus_clk)) {
-   if (PTR_ERR(scm->bus_clk) != -EPROBE_DEFER)
-   dev_err(&pdev->dev, "failed to acquire bus 
clk\n");
-   return PTR_ERR(scm->bus_clk);
+   if (clks & SCM_HAS_BUS_CLK) {
+   scm->bus_clk = devm_clk_get(&pdev->dev, "bus");
+   if (IS_ERR(scm->bus_clk)) {
+   if (PTR_ERR(scm->bus_clk) != -EPROBE_DEFER)
+   dev_err(&pdev->dev,
+   "failed to acquire bus clk\n");
+   return PTR_ERR(scm->bus_clk);
+   }
}
}
 
@@ -429,10 +443,23 @@ static int qcom_scm_probe(struct platform_device *pdev)
 }
 
 static const struct of_device_id qcom_scm_dt_match[] = {
-   { .compatible = "qcom,scm-apq8064",},
-   { .compatible = "qcom,scm-msm8660",},
-   { .compatible = "qcom,scm-msm8960",},
-   { .compatible = "qcom,scm",},
+   { .compatible = "qcom,scm-apq8064",
+ .data = (void *) SCM_HAS_CORE_CLK,
+   },
+   { .compatible = "qcom,scm-msm8660",
+ .data = (void *) SCM_HAS_CORE_CLK,
+   },
+   { .compatible = "qcom,scm-msm8960",
+ .data = (void *) SCM_HAS_CORE_CLK,
+   },
+   { .compatible = "qcom,scm-msm8996",
+ .data = NULL, /* no clocks */
+   },
+   { .compatible = "qcom,scm",
+ .data = (void *) ( SCM_HAS_CORE_CLK
+   | SCM_HAS_IFACE_CLK
+   | SCM_HAS_BUS_CLK ),
+   },
{}
 };
 
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

[PATCH 3/3] firmware: qcom: scm: Return PTR_ERR when devm_clk_get fails

When devm_clk_get fails for core clock, the failure was ignored
and the core_clk was explicitly set to NULL so that other
remaining clocks can be queried. However, now that we have a
clean way of expressing the clock dependency, return failure when
devm_clk_get fails for core clock.

Signed-off-by: Sarangdhar Joshi 
---
 drivers/firmware/qcom_scm.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/firmware/qcom_scm.c b/drivers/firmware/qcom_scm.c
index 010a350..5c472da 100644
--- a/drivers/firmware/qcom_scm.c
+++ b/drivers/firmware/qcom_scm.c
@@ -395,10 +395,10 @@ static int qcom_scm_probe(struct platform_device *pdev)
if (clks & SCM_HAS_CORE_CLK) {
scm->core_clk = devm_clk_get(&pdev->dev, "core");
if (IS_ERR(scm->core_clk)) {
-   if (PTR_ERR(scm->core_clk) == -EPROBE_DEFER)
-   return PTR_ERR(scm->core_clk);
-
-   scm->core_clk = NULL;
+   if (PTR_ERR(scm->core_clk) != -EPROBE_DEFER)
+   dev_err(&pdev->dev,
+   "failed to acquire core clk\n");
+   return PTR_ERR(scm->core_clk);
}
}
 
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

[PATCH 0/3] Remove clocks dependency from SCM driver

On earlier chiptsets (APQ8064, MSM8660, MSM8690, MSM8916,
APQ8084, MSM8974) crypto operations of TZ were depends on crypto
clocks controlled by users/clients. However on MSM8996 crypto clocks
control is handled internally in TZ itself. The current series of
patches handle this clock dependency in SCM driver.

Sarangdhar Joshi (3):
  dt-bindings: firmware: scm: Add MSM8996 DT bindings
  firmware: qcom: scm: Remove core, iface and bus clocks dependency
  firmware: qcom: scm: Return PTR_ERR when devm_clk_get fails

 .../devicetree/bindings/firmware/qcom,scm.txt  |  2 +
 drivers/firmware/qcom_scm.c| 65 +++---
 2 files changed, 48 insertions(+), 19 deletions(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

[GIT PULL] ARC updates for 4.9-rc3

Hi Linus,

Please pull

Thx,
-Vineet

-->
The following changes since commit 1001354ca34179f3db924eb66672442a173147dc:

  Linux 4.9-rc1 (2016-10-15 12:17:50 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc.git/ tags/arc-4.9-rc3

for you to fetch changes up to b75dcd9c7d352c7d9ea9010e95c708595094896a:

  ARC: module: print pretty section names (2016-10-28 10:10:29 -0700)


ARC updates for 4.9-rc4

- supporting IDU intc for UP builds

- Support gz, lzma compressed uImage [Daniel Mentz]

- Adjust /proc/cpuinfo for non-continuous cpu ids [Noam Camus]

- syscall for userspace cmpxchg assist for configs lacking hardware atomics

- rework of boot log printing mainly for identifying older arc700 cores

- retiring some old code, build toggles


Daniel Mentz (1):
  ARC: [build] Support gz, lzma compressed uImage

Noam Camus (1):
  ARC: Adjust cpuinfo for non-continuous cpu ids

Vineet Gupta (13):
  ARCv2: intc: untangle SMP, MCIP and IDU
  ARC: fix build warning in elf.h
  ARC: syscall for userspace cmpxchg assist
  ARCv2: IOC: use @ioc_enable not @ioc_exist where intended
  ARCv2: boot log: print IOC exists as well as enabled status
  ARC: boot log: refactor printing abt features not captured in BCRs
  ARC: boot log: don't assume SWAPE instruction support
  ARC: boot log: remove awkward space comma from MMU line
  ARC: boot log: refactor cpu name/release printing
  ARC: build: retire old toggles
  ARC: mm: retire ARC_DBG_TLB_MISS_COUNT...
  ARC: module: elide loop to save reference to .eh_frame
  ARC: module: print pretty section names

 arch/arc/Kconfig   |  27 -
 arch/arc/Makefile  |   3 -
 arch/arc/boot/Makefile |  16 +-
 arch/arc/include/asm/arcregs.h |   3 +-
 arch/arc/include/asm/cache.h   |   2 +-
 arch/arc/include/asm/elf.h |   2 +-
 arch/arc/include/asm/mcip.h|  16 ++
 arch/arc/include/asm/module.h  |   1 +
 arch/arc/include/asm/setup.h   |   6 +-
 arch/arc/include/asm/syscalls.h|   1 +
 arch/arc/include/uapi/asm/unistd.h |   9 +--
 arch/arc/kernel/mcip.c |  31 --
 arch/arc/kernel/module.c   |  53 +
 arch/arc/kernel/process.c  |  33 +++
 arch/arc/kernel/setup.c| 113 +
 arch/arc/kernel/troubleshoot.c | 110 
 arch/arc/mm/cache.c|  19 +++
 arch/arc/mm/dma.c  |   4 +-
 arch/arc/mm/tlb.c  |   6 +-
 arch/arc/mm/tlbex.S|  21 ---
 20 files changed, 203 insertions(+), 273 deletions(-)

Re: /dev/mem and PCI memory = EFAULT (regression?)

2016-10-28 Thread Stas Sergeev


29.10.2016 02:26, Linus Torvalds пишет:

On Fri, Oct 28, 2016 at 2:36 PM, Stas Sergeev  wrote:

On Fri, Oct 28, 2016 at 2:03 PM, Stas Sergeev  wrote:

Hello.

For the long time dosemu used /dev/mem for vga pass-through.
Now it appears /dev/mem has this check:
http://lxr.free-electrons.com/source/drivers/char/mem.c#L51
which prevents an accesses to PCI memory regions if the
"high_memory" points low enough. It seems "high_memory"
just points to the end of the physical ram, so depending on
the ram size you either can access PCI devices or you get
EFAULT.
Was it wrong to use /dev/mem for accessing the PCI devices?
How should I do that now?

Has it ever worked for you?

Well, yes, even today: if the ram size is large enough to last
beyond the PCI space, then I am not getting EFAULT (i've yet
to check if the results are as expected, but at least no error
returned).
It is exactly that ram size envolvement made me think this
is a bug (and regression).


  That code is ancient, going back in some
form or another at least ten years.

Yes, indeed, I can see that on lxr now...


So /dev/mem does not allow "read()/write()" on IO memory, and really
hasn't in a long long time (maybe ever, quite frankly). It does allow
it on regular RAM, but STRICT_DEV_MEM then disallows that too for
security reasons (and realistically, everybody uses STRICT_DEV_MEM
these days).

What people do use /dev/mem for is to mmap() PCI memory, and then you
can access it from user space. That's the traditional model that X.org
used to do etc.

Yes, thanks, I was confused. mmap() is indeed unaffected,
only read and write are. So the only problem I have, is the test-case
I was trying, that does read/write and works depending on the
ram size (under qemu). But there is no any problem for dosemu.
Perhaps for consistency it would be good to make things independent
of the ram size, but then there is no real problem here.
So sorry for the noise!

Re: [PATCH 09/10] scripts/basic/fixdep: Complete error handling in print_cmdline()

2016-10-28 Thread Jim Davis

On Fri, Oct 28, 2016 at 1:40 AM, SF Markus Elfring
 wrote:

> +   if (printf("cmd_%s := %s\n\n", target, cmdline) < 10) {

Rather than scatter fragile magic numbers, like 10, throughout the
code, if you're hell-bent on checking for printf errors you could
write a little wrapper function that hid the magic number and bundled
up the errno stuff.

But what would you expect printf error checking to tell a user?
Perhaps that he or she ran out of disk space, but that's going to be
painfully obvious anyway in almost every case.

-- 
Jim

Re: [PATCHv2] clk: xgene: Don't call __pa on ioremaped address

2016-10-28 Thread Stephen Boyd

On 10/28, Laura Abbott wrote:
> ioremaped addresses are not linearly mapped so the physical
> address can not be figured out via __pa. More generally, there
> is no guarantee that backing value of an ioremapped address
> is a physical address at all. The value here is only used
> for debugging so just drop the call to __pa on the ioremapped
> address.
> 
> Fixes: 6ae5fd381251 ("clk: xgene: Silence sparse warnings")
> Signed-off-by: Laura Abbott 
> ---

Applied to clk-fixes

-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project

Re: [PATCH] [v2 netfilter-next] netfilter: nf_tables: fib warnings

2016-10-28 Thread Florian Westphal

Arnd Bergmann  wrote:
> The newly added nft fib code produces two warnings:
> 
> net/ipv4/netfilter/nft_fib_ipv4.c: In function 'nft_fib4_eval':
> net/ipv4/netfilter/nft_fib_ipv4.c:80:6: error: unused variable 'i' 
> [-Werror=unused-variable]
> net/ipv4/netfilter/nft_fib_ipv4.c: In function ‘nft_fib4_eval’:
> net/ipv4/netfilter/nft_fib_ipv4.c:137:6: error: ‘oif’ may be used 
> uninitialized in this function [-Werror=maybe-uninitialized]
> 
> The first one is obvious as the only user of that variable is
> inside of an #ifdef
> 
> The second one is a bit trickier. It's clear that oif is in fact
> uninitialized when it gets used when neither NFTA_FIB_F_IIF nor
> NFTA_FIB_F_OIF are set, and just setting it to NULL won't work
> as it may later get dereferenced.
> 
> However, there is no need to search the result list if it is
> NULL, as Florian pointed out. This integrates his (untested)
> change to do so. I have confirmed that the combined patch
> solves both warnings, but as I don't fully understand Florian's
> change, I can't tell if it's correct.
> 
> Suggested-by: Florian Westphal 
> Fixes: 84f5eedb983e ("netfilter: nf_tables: add fib expression")
> Signed-off-by: Arnd Bergmann 

chain pre {
type filter hook prerouting priority 0; policy accept;
fib saddr oif "eth0"
}

eth0: default route, 192.168.7.10/16
eth1: 10.0.0.2/8

ping from 192.168.7.1 from peer on eth0: result eth0, ok
ping from 10.0.0.2 from peer on eth0: no result, ok
ping from 10.0.0.3 from peer on eth0: result eth1, ok

chain pre {
type filter hook prerouting priority 0; policy accept;
fib saddr . iif oif "eth0"
}

ping from 192.168.7.1 from peer on eth0: result eth0, ok
ping from 10.0.0.2 from peer on eth0: no result, ok
ping from 10.0.0.3 from peer on eth0: no result, ok

so:

Tested-by: Florian Westphal

Re: /dev/mem and PCI memory = EFAULT (regression?)

2016-10-28 Thread Linus Torvalds

On Fri, Oct 28, 2016 at 2:36 PM, Stas Sergeev  wrote:
 On Fri, Oct 28, 2016 at 2:03 PM, Stas Sergeev  wrote:
>
> Hello.
>
> For the long time dosemu used /dev/mem for vga pass-through.
> Now it appears /dev/mem has this check:
> http://lxr.free-electrons.com/source/drivers/char/mem.c#L51
> which prevents an accesses to PCI memory regions if the
> "high_memory" points low enough. It seems "high_memory"
> just points to the end of the physical ram, so depending on
> the ram size you either can access PCI devices or you get
> EFAULT.
> Was it wrong to use /dev/mem for accessing the PCI devices?
> How should I do that now?

Has it ever worked for you? That code is ancient, going back in some
form or another at least ten years.

So /dev/mem does not allow "read()/write()" on IO memory, and really
hasn't in a long long time (maybe ever, quite frankly). It does allow
it on regular RAM, but STRICT_DEV_MEM then disallows that too for
security reasons (and realistically, everybody uses STRICT_DEV_MEM
these days).

What people do use /dev/mem for is to mmap() PCI memory, and then you
can access it from user space. That's the traditional model that X.org
used to do etc.

I'm surprised if there is a regression here, because I think
read/write really hasn't been supported in like forever. It's
fundamentally impossible to do on some architectures: the physical
access depends on the _size_ of the access on at least some alpha
CPU's, so you can't just do a random access to PCI address X, you need
to have a real size etc.

But if you can point to a particular commit that broke something (eg
with bisection), I'll certainly take a second look. I might have
missed something.

   Linus

Re: [PATCH] lpfc: use %zd format string for size_t

On 10/28/2016 03:03 PM, Arnd Bergmann wrote:
> On Friday, October 28, 2016 2:58:33 PM CEST Vineet Gupta wrote:
>> On 10/28/2016 02:52 PM, Vineet Gupta wrote:
>>> On 10/28/2016 02:44 PM, Vineet Gupta wrote:
 This is configuration specific, and something caused your compiler to
> be built assuming that size_t is unsigned long, while the kernel
> headers are assuming it should be unsigned int.
>>>
>>> So yes this seems to be target specific gcc thing
>>>
>>> for ARC 4.8
>>>
>>> #define PTRDIFF_TYPE "int"
>>>
>>> ARM
>>>
>>> #ifndef PTRDIFF_TYPE
>>> #define PTRDIFF_TYPE (TARGET_AAPCS_BASED ? "int" : "long int")
>>> #endif
>>>
>>> ARC gcc 6.2
>>>
>>> #undef PTRDIFF_TYPE
>>> #define PTRDIFF_TYPE "long int"
>>
>> Actually we need to adjust SIZE_TYPE (unsigned int) and PTRDIFF_TYPE (int) 
>> in the
>> gcc 6.x to fix this issue. And that is exactly what ARC gcc 4.8 have.
> 
> What compiler versions are most commonly used these days?

gcc 4.8 is used in production, but internally we are now moving towards 6.0 (to 
be
officially released soon)

> You should probably stay with the version that most people have
> and then update either the compiler or the kernel, whichever
> diverges from it.

In this case, the issue is simple - gcc 6.x doesn't behave the same as 4.8 so it
needs fixing.

> I see in the gcc git log that the version that had "int" got removed
> at some point, and the version that had "unsigned int" was added
> later.

The upstream version (per initial port) always had

#define SIZE_TYPE "long unsigned int"

which we fixed out-of-tree for 4.8 and this needs to be fixed now for gcc 6.x in
upstream too.

-Vineet

[PATCH 3/6] Input: gpio_keys - annotate PM methods as __maybe_unused

Instead of using #ifdef, let's mark suspend and resume methods as
__maybe_unused to provide better compile coverage.

Signed-off-by: Dmitry Torokhov 
---
 drivers/input/keyboard/gpio_keys.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/input/keyboard/gpio_keys.c 
b/drivers/input/keyboard/gpio_keys.c
index 890eb39..8f7c20b 100644
--- a/drivers/input/keyboard/gpio_keys.c
+++ b/drivers/input/keyboard/gpio_keys.c
@@ -814,8 +814,7 @@ static int gpio_keys_remove(struct platform_device *pdev)
return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int gpio_keys_suspend(struct device *dev)
+static int __maybe_unused gpio_keys_suspend(struct device *dev)
 {
struct gpio_keys_drvdata *ddata = dev_get_drvdata(dev);
struct input_dev *input = ddata->input;
@@ -837,7 +836,7 @@ static int gpio_keys_suspend(struct device *dev)
return 0;
 }
 
-static int gpio_keys_resume(struct device *dev)
+static int __maybe_unused gpio_keys_resume(struct device *dev)
 {
struct gpio_keys_drvdata *ddata = dev_get_drvdata(dev);
struct input_dev *input = ddata->input;
@@ -863,7 +862,6 @@ static int gpio_keys_resume(struct device *dev)
gpio_keys_report_state(ddata);
return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(gpio_keys_pm_ops, gpio_keys_suspend, 
gpio_keys_resume);
 
-- 
2.8.0.rc3.226.g39d4020

[PATCH 1/6] Input: gpio_keys_polled - keep button data constant

Commit 633a21d80b4a ("input: gpio_keys_polled: Add support for GPIO
descriptors") placed gpio descriptor into gpio_keys_button structure, which
is supposed to be part of platform data and not modifiable by the driver.
To keep the data constant, let's move the descriptor to
gpio_keys_button_data structure instead.

Signed-off-by: Dmitry Torokhov 
---
 drivers/input/keyboard/gpio_keys.c|  10 +--
 drivers/input/keyboard/gpio_keys_polled.c | 105 +-
 include/linux/gpio_keys.h |   4 +-
 3 files changed, 64 insertions(+), 55 deletions(-)

diff --git a/drivers/input/keyboard/gpio_keys.c 
b/drivers/input/keyboard/gpio_keys.c
index 2909365..890eb39 100644
--- a/drivers/input/keyboard/gpio_keys.c
+++ b/drivers/input/keyboard/gpio_keys.c
@@ -624,7 +624,6 @@ gpio_keys_get_devtree_pdata(struct device *dev)
struct gpio_keys_button *button;
int error;
int nbuttons;
-   int i;
 
node = dev->of_node;
if (!node)
@@ -640,19 +639,18 @@ gpio_keys_get_devtree_pdata(struct device *dev)
if (!pdata)
return ERR_PTR(-ENOMEM);
 
-   pdata->buttons = (struct gpio_keys_button *)(pdata + 1);
+   button = (struct gpio_keys_button *)(pdata + 1);
+
+   pdata->buttons = button;
pdata->nbuttons = nbuttons;
 
pdata->rep = !!of_get_property(node, "autorepeat", NULL);
 
of_property_read_string(node, "label", &pdata->name);
 
-   i = 0;
for_each_available_child_of_node(node, pp) {
enum of_gpio_flags flags;
 
-   button = &pdata->buttons[i++];
-
button->gpio = of_get_gpio_flags(pp, 0, &flags);
if (button->gpio < 0) {
error = button->gpio;
@@ -694,6 +692,8 @@ gpio_keys_get_devtree_pdata(struct device *dev)
if (of_property_read_u32(pp, "debounce-interval",
 &button->debounce_interval))
button->debounce_interval = 5;
+
+   button++;
}
 
if (pdata->nbuttons == 0)
diff --git a/drivers/input/keyboard/gpio_keys_polled.c 
b/drivers/input/keyboard/gpio_keys_polled.c
index 62bdb1d..2cf4078 100644
--- a/drivers/input/keyboard/gpio_keys_polled.c
+++ b/drivers/input/keyboard/gpio_keys_polled.c
@@ -30,6 +30,7 @@
 #define DRV_NAME   "gpio-keys-polled"
 
 struct gpio_keys_button_data {
+   struct gpio_desc *gpiod;
int last_state;
int count;
int threshold;
@@ -46,7 +47,7 @@ struct gpio_keys_polled_dev {
 };
 
 static void gpio_keys_button_event(struct input_polled_dev *dev,
-  struct gpio_keys_button *button,
+  const struct gpio_keys_button *button,
   int state)
 {
struct gpio_keys_polled_dev *bdev = dev->private;
@@ -70,15 +71,15 @@ static void gpio_keys_button_event(struct input_polled_dev 
*dev,
 }
 
 static void gpio_keys_polled_check_state(struct input_polled_dev *dev,
-struct gpio_keys_button *button,
+const struct gpio_keys_button *button,
 struct gpio_keys_button_data *bdata)
 {
int state;
 
if (bdata->can_sleep)
-   state = !!gpiod_get_value_cansleep(button->gpiod);
+   state = !!gpiod_get_value_cansleep(bdata->gpiod);
else
-   state = !!gpiod_get_value(button->gpiod);
+   state = !!gpiod_get_value(bdata->gpiod);
 
gpio_keys_button_event(dev, button, state);
 
@@ -142,48 +143,35 @@ static void gpio_keys_polled_close(struct 
input_polled_dev *dev)
pdata->disable(bdev->dev);
 }
 
-static struct gpio_keys_platform_data 
*gpio_keys_polled_get_devtree_pdata(struct device *dev)
+static struct gpio_keys_platform_data *
+gpio_keys_polled_get_devtree_pdata(struct device *dev)
 {
struct gpio_keys_platform_data *pdata;
struct gpio_keys_button *button;
struct fwnode_handle *child;
-   int error;
int nbuttons;
 
nbuttons = device_get_child_node_count(dev);
if (nbuttons == 0)
-   return NULL;
+   return ERR_PTR(-EINVAL);
 
pdata = devm_kzalloc(dev, sizeof(*pdata) + nbuttons * sizeof(*button),
 GFP_KERNEL);
if (!pdata)
return ERR_PTR(-ENOMEM);
 
-   pdata->buttons = (struct gpio_keys_button *)(pdata + 1);
+   button = (struct gpio_keys_button *)(pdata + 1);
+
+   pdata->buttons = button;
+   pdata->nbuttons = nbuttons;
 
pdata->rep = device_property_present(dev, "autorepeat");
device_property_read_u32(dev, "poll-interval", &pdata->poll_interval);
 
device_for_each_child_node(dev, child) {
-   struct gpio_desc *desc;
-
-   desc = devm_get_gpiod_from_child(dev, NULL, child);

[PATCH 5/6] Input: gpio_keys - add support for GPIO descriptors

From: Geert Uytterhoeven 

GPIO descriptors are the preferred way over legacy GPIO numbers
nowadays. Convert the driver to use GPIO descriptors internally but
still allow passing legacy GPIO numbers from platform data to support
existing platforms.

Based on commits 633a21d80b4a2cd6 ("input: gpio_keys_polled: Add support
for GPIO descriptors") and 1ae5ddb6f8837558 ("Input: gpio_keys_polled -
request GPIO pin as input.").

Signed-off-by: Geert Uytterhoeven 
Signed-off-by: Dmitry Torokhov 
---
 drivers/input/keyboard/gpio_keys.c | 37 ++---
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/drivers/input/keyboard/gpio_keys.c 
b/drivers/input/keyboard/gpio_keys.c
index d75a25c..0f04cb1 100644
--- a/drivers/input/keyboard/gpio_keys.c
+++ b/drivers/input/keyboard/gpio_keys.c
@@ -26,6 +26,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -35,6 +36,7 @@
 struct gpio_button_data {
const struct gpio_keys_button *button;
struct input_dev *input;
+   struct gpio_desc *gpiod;
 
struct timer_list release_timer;
unsigned int release_delay; /* in msecs, for IRQ-only buttons */
@@ -140,7 +142,7 @@ static void gpio_keys_disable_button(struct 
gpio_button_data *bdata)
 */
disable_irq(bdata->irq);
 
-   if (gpio_is_valid(bdata->button->gpio))
+   if (bdata->gpiod)
cancel_delayed_work_sync(&bdata->work);
else
del_timer_sync(&bdata->release_timer);
@@ -358,19 +360,20 @@ static void gpio_keys_gpio_report_event(struct 
gpio_button_data *bdata)
const struct gpio_keys_button *button = bdata->button;
struct input_dev *input = bdata->input;
unsigned int type = button->type ?: EV_KEY;
-   int state = gpio_get_value_cansleep(button->gpio);
+   int state;
 
+   state = gpiod_get_value_cansleep(bdata->gpiod);
if (state < 0) {
-   dev_err(input->dev.parent, "failed to get gpio state\n");
+   dev_err(input->dev.parent,
+   "failed to get gpio state: %d\n", state);
return;
}
 
-   state = (state ? 1 : 0) ^ button->active_low;
if (type == EV_ABS) {
if (state)
input_event(input, type, button->code, button->value);
} else {
-   input_event(input, type, button->code, !!state);
+   input_event(input, type, button->code, state);
}
input_sync(input);
 }
@@ -456,7 +459,7 @@ static void gpio_keys_quiesce_key(void *data)
 {
struct gpio_button_data *bdata = data;
 
-   if (gpio_is_valid(bdata->button->gpio))
+   if (bdata->gpiod)
cancel_delayed_work_sync(&bdata->work);
else
del_timer_sync(&bdata->release_timer);
@@ -478,18 +481,30 @@ static int gpio_keys_setup_key(struct platform_device 
*pdev,
bdata->button = button;
spin_lock_init(&bdata->lock);
 
+   /*
+* Legacy GPIO number, so request the GPIO here and
+* convert it to descriptor.
+*/
if (gpio_is_valid(button->gpio)) {
+   unsigned flags = GPIOF_IN;
+
+   if (button->active_low)
+   flags |= GPIOF_ACTIVE_LOW;
 
-   error = devm_gpio_request_one(&pdev->dev, button->gpio,
- GPIOF_IN, desc);
+   error = devm_gpio_request_one(&pdev->dev, button->gpio, flags,
+ desc);
if (error < 0) {
dev_err(dev, "Failed to request GPIO %d, error %d\n",
button->gpio, error);
return error;
}
 
+   bdata->gpiod = gpio_to_desc(button->gpio);
+   if (!bdata->gpiod)
+   return -EINVAL;
+
if (button->debounce_interval) {
-   error = gpio_set_debounce(button->gpio,
+   error = gpiod_set_debounce(bdata->gpiod,
button->debounce_interval * 1000);
/* use timer if gpiolib doesn't provide debounce */
if (error < 0)
@@ -500,7 +515,7 @@ static int gpio_keys_setup_key(struct platform_device *pdev,
if (button->irq) {
bdata->irq = button->irq;
} else {
-   irq = gpio_to_irq(button->gpio);
+   irq = gpiod_to_irq(bdata->gpiod);
if (irq < 0) {
error = irq;
dev_err(dev,
@@ -575,7 +590,7 @@ static void gpio_keys_report_state(struct gpio_keys_drvdata 
*ddata)
 
for (i = 0; i < ddata->pdata->nbuttons; i++) {
struct gpio_button_data *bdata =

[PATCH 4/6] Input: gpio_keys - fix leaking DT node references

for_each_available_child_of_node(node, pp) takes reference to 'pp' and
drops it when attempting next iteration. However if we exit the loop early
we need to drop the reference ourselves.

Signed-off-by: Dmitry Torokhov 
---
 drivers/input/keyboard/gpio_keys.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/input/keyboard/gpio_keys.c 
b/drivers/input/keyboard/gpio_keys.c
index 8f7c20b..d75a25c 100644
--- a/drivers/input/keyboard/gpio_keys.c
+++ b/drivers/input/keyboard/gpio_keys.c
@@ -659,6 +659,7 @@ gpio_keys_get_devtree_pdata(struct device *dev)
dev_err(dev,
"Failed to get gpio flags, 
error: %d\n",
error);
+   of_node_put(pp);
return ERR_PTR(error);
}
} else {
@@ -669,12 +670,14 @@ gpio_keys_get_devtree_pdata(struct device *dev)
 
if (!gpio_is_valid(button->gpio) && !button->irq) {
dev_err(dev, "Found button without gpios or irqs\n");
+   of_node_put(pp);
return ERR_PTR(-EINVAL);
}
 
if (of_property_read_u32(pp, "linux,code", &button->code)) {
dev_err(dev, "Button without keycode: 0x%x\n",
button->gpio);
+   of_node_put(pp);
return ERR_PTR(-EINVAL);
}
 
-- 
2.8.0.rc3.226.g39d4020

[PATCH 6/6] Input: gpio_keys - switch to using generic device properties

Make use of the device property API in this driver so that both OF based
systems and ACPI based systems can use this driver.

Suggested-by: Geert Uytterhoeven 
Suggested-by: Mika Westerberg 
Signed-off-by: Dmitry Torokhov 
---
 drivers/input/keyboard/gpio_keys.c | 141 ++---
 1 file changed, 69 insertions(+), 72 deletions(-)

diff --git a/drivers/input/keyboard/gpio_keys.c 
b/drivers/input/keyboard/gpio_keys.c
index 0f04cb1..5576f2a 100644
--- a/drivers/input/keyboard/gpio_keys.c
+++ b/drivers/input/keyboard/gpio_keys.c
@@ -28,8 +28,6 @@
 #include 
 #include 
 #include 
-#include 
-#include 
 #include 
 #include 
 
@@ -468,7 +466,8 @@ static void gpio_keys_quiesce_key(void *data)
 static int gpio_keys_setup_key(struct platform_device *pdev,
struct input_dev *input,
struct gpio_button_data *bdata,
-   const struct gpio_keys_button *button)
+   const struct gpio_keys_button *button,
+   struct fwnode_handle *child)
 {
const char *desc = button->desc ? button->desc : "gpio_keys";
struct device *dev = &pdev->dev;
@@ -481,11 +480,28 @@ static int gpio_keys_setup_key(struct platform_device 
*pdev,
bdata->button = button;
spin_lock_init(&bdata->lock);
 
-   /*
-* Legacy GPIO number, so request the GPIO here and
-* convert it to descriptor.
-*/
-   if (gpio_is_valid(button->gpio)) {
+   if (child) {
+   bdata->gpiod = devm_get_gpiod_from_child(dev, NULL, child);
+   if (IS_ERR(bdata->gpiod)) {
+   error = PTR_ERR(bdata->gpiod);
+   if (error == -ENOENT) {
+   /*
+* GPIO is optional, we may be dealing with
+* purely interrupt-driven setup.
+*/
+   bdata->gpiod = NULL;
+   } else {
+   if (error != -EPROBE_DEFER)
+   dev_err(dev, "failed to get gpio: %d\n",
+   error);
+   return error;
+   }
+   }
+   } else if (gpio_is_valid(button->gpio)) {
+   /*
+* Legacy GPIO number, so request the GPIO here and
+* convert it to descriptor.
+*/
unsigned flags = GPIOF_IN;
 
if (button->active_low)
@@ -502,7 +518,9 @@ static int gpio_keys_setup_key(struct platform_device *pdev,
bdata->gpiod = gpio_to_desc(button->gpio);
if (!bdata->gpiod)
return -EINVAL;
+   }
 
+   if (bdata->gpiod) {
if (button->debounce_interval) {
error = gpiod_set_debounce(bdata->gpiod,
button->debounce_interval * 1000);
@@ -533,9 +551,10 @@ static int gpio_keys_setup_key(struct platform_device 
*pdev,
 
} else {
if (!button->irq) {
-   dev_err(dev, "No IRQ specified\n");
+   dev_err(dev, "Found button without gpio or irq\n");
return -EINVAL;
}
+
bdata->irq = button->irq;
 
if (button->type && button->type != EV_KEY) {
@@ -627,24 +646,18 @@ static void gpio_keys_close(struct input_dev *input)
  * Handlers for alternative sources of platform_data
  */
 
-#ifdef CONFIG_OF
 /*
- * Translate OpenFirmware node properties into platform_data
+ * Translate properties into platform_data
  */
 static struct gpio_keys_platform_data *
 gpio_keys_get_devtree_pdata(struct device *dev)
 {
-   struct device_node *node, *pp;
struct gpio_keys_platform_data *pdata;
struct gpio_keys_button *button;
-   int error;
+   struct fwnode_handle *child;
int nbuttons;
 
-   node = dev->of_node;
-   if (!node)
-   return ERR_PTR(-ENODEV);
-
-   nbuttons = of_get_available_child_count(node);
+   nbuttons = device_get_child_node_count(dev);
if (nbuttons == 0)
return ERR_PTR(-ENODEV);
 
@@ -659,64 +672,43 @@ gpio_keys_get_devtree_pdata(struct device *dev)
pdata->buttons = button;
pdata->nbuttons = nbuttons;
 
-   pdata->rep = !!of_get_property(node, "autorepeat", NULL);
+   pdata->rep = device_property_read_bool(dev, "autorepeat");
 
-   of_property_read_string(node, "label", &pdata->name);
+   device_property_read_string(dev, "label", &pdata->name);
 
-   for_each_available_child_of_node(node, pp) {
-   enum of_gpio_flags flags;
+   device_for_each_child_node(dev, child) {
+   if (is_of_node(child))
+   button->i

Re: Linux Foundation Technical Advisory Board Elections updates and location

2016-10-28 Thread Frank Rowand

Hi Chris,

It's time for the annual question of: where do I find the TAB minutes?

I was sure I had a bookmark of the link, but I seem to have misplaced it.

Thanks,

Frank


On 10/28/16 12:39, Chris Mason wrote:
> Hello everyone,
> 
> A few updates about the TAB election.  It is scheduled for 5pm on Wednesday 
> November 2nd, in the Coronado/DeVargas room, at the conference center.  We 
> move through the voting pretty quickly, and you'll finish with plenty of time 
> to spare before the evening reception.
> 
> The nominees so far:
> 
> Josh Triplett
> Rik van Riel
> Dave Taht
> Chris Mason
> Dan Williams
> 
> Steve Rostedt is maintaining a list of the nominees and their statements at 
> the URL below.  He promises to stay at the keyboard and keep updating it 
> until 12pm EDT Oct 30th:
> 
> https://goo.gl/xlAoJl
> 
> The elections for five of the ten members of the Linux Foundation Technical 
> Advisory Board (TAB) are held every year[1]. This year the
> election will be at the 2016 Kernel Summit in Santa Fe, NM.
> 
> The elections will take place at the conference center on Wednesday Nov 2nd, 
> shortly before the evening Kernel Summit/Plumbers reception.  The elections 
> will be open to all attendees of both the Kernel Summit and the Linux 
> Plumbers.
> 
> Anyone is eligible to stand for election, simply send your nomination to:
> 
> tech-board-discuss at lists.linux-foundation.org
> 
> Just before the election, everyone will have a chance to introduce themselves 
> and briefly talk about why they would like to participate on the Technical 
> Advisory Board.   This year, we're encouraging everyone to include those 
> details along with their nomination, which we will compile into an online 
> document for quick reference.
> 
> The deadline for receiving nominations is up until the beginning of
> the event where the election is held.  Any statements for the online document 
> need to be sent by Friday Oct 28th.  Please remember if
> you're not going to be present that things go wrong with both networks
> and mailing lists, so get your nomination in early).
> 
> Chris Mason, TAB Chair
> 
> [1] TAB members sit for a term of two years, and half of the board is up
> for election every year. Five of the seats are up for election now.
> The other five are halfway through their term and will be up for
> election next year.
>

[PATCH 2/6] Input: gpio_keys_polled - always use gpiod_get_value_cansleep

It does not matter if given GPIO may sleep or not when reading state,
polling is always done in a non-atomic context, so we should always
be able to simply use gpiod_get_value_cansleep().

Also let's note in the logs when we fail to read gpio state.

Reviewed-by: Hans de Goede 
Reviewed-by: Mika Westerberg 
Acked-by: Linus Walleij 
Signed-off-by: Dmitry Torokhov 
---
 drivers/input/keyboard/gpio_keys_polled.c | 21 ++---
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/drivers/input/keyboard/gpio_keys_polled.c 
b/drivers/input/keyboard/gpio_keys_polled.c
index 2cf4078..72b3503 100644
--- a/drivers/input/keyboard/gpio_keys_polled.c
+++ b/drivers/input/keyboard/gpio_keys_polled.c
@@ -34,7 +34,6 @@ struct gpio_keys_button_data {
int last_state;
int count;
int threshold;
-   int can_sleep;
 };
 
 struct gpio_keys_polled_dev {
@@ -76,16 +75,17 @@ static void gpio_keys_polled_check_state(struct 
input_polled_dev *dev,
 {
int state;
 
-   if (bdata->can_sleep)
-   state = !!gpiod_get_value_cansleep(bdata->gpiod);
-   else
-   state = !!gpiod_get_value(bdata->gpiod);
-
-   gpio_keys_button_event(dev, button, state);
+   state = gpiod_get_value_cansleep(bdata->gpiod);
+   if (state < 0) {
+   dev_err(dev->input->dev.parent,
+   "failed to get gpio state: %d\n", state);
+   } else {
+   gpio_keys_button_event(dev, button, state);
 
-   if (state != bdata->last_state) {
-   bdata->count = 0;
-   bdata->last_state = state;
+   if (state != bdata->last_state) {
+   bdata->count = 0;
+   bdata->last_state = state;
+   }
}
 }
 
@@ -342,7 +342,6 @@ static int gpio_keys_polled_probe(struct platform_device 
*pdev)
}
}
 
-   bdata->can_sleep = gpiod_cansleep(bdata->gpiod);
bdata->last_state = -1;
bdata->threshold = DIV_ROUND_UP(button->debounce_interval,
pdata->poll_interval);
-- 
2.8.0.rc3.226.g39d4020

[PATCH] usb: gadget: mv_u3d: add check for dma mapping error

2016-10-28 Thread Alexey Khoroshilov

mv_u3d_req_to_trb() does not check for dma mapping errors.

By the way, the patch improves readability of mv_u3d_start_queue()
by rearranging its code with two semantic modifications:
- assignment zero to ep->processing if usb_gadget_map_request() fails;
- propagation of error code from mv_u3d_req_to_trb() instead of 
  hardcoded -ENOMEM.

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Alexey Khoroshilov 
---
 drivers/usb/gadget/udc/mv_u3d_core.c | 34 +-
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/drivers/usb/gadget/udc/mv_u3d_core.c 
b/drivers/usb/gadget/udc/mv_u3d_core.c
index b9e19a591322..8d726bd767fd 100644
--- a/drivers/usb/gadget/udc/mv_u3d_core.c
+++ b/drivers/usb/gadget/udc/mv_u3d_core.c
@@ -462,6 +462,12 @@ static int mv_u3d_req_to_trb(struct mv_u3d_req *req)
req->trb_head->trb_hw,
trb_num * sizeof(*trb_hw),
DMA_BIDIRECTIONAL);
+   if (dma_mapping_error(u3d->gadget.dev.parent,
+   req->trb_head->trb_dma)) {
+   kfree(req->trb_head->trb_hw);
+   kfree(req->trb_head);
+   return -EFAULT;
+   }
 
req->chain = 1;
}
@@ -487,30 +493,32 @@ mv_u3d_start_queue(struct mv_u3d_ep *ep)
ret = usb_gadget_map_request(&u3d->gadget, &req->req,
mv_u3d_ep_dir(ep));
if (ret)
-   return ret;
+   goto break_processing;
 
req->req.status = -EINPROGRESS;
req->req.actual = 0;
req->trb_count = 0;
 
-   /* build trbs and push them to device queue */
-   if (!mv_u3d_req_to_trb(req)) {
-   ret = mv_u3d_queue_trb(ep, req);
-   if (ret) {
-   ep->processing = 0;
-   return ret;
-   }
-   } else {
-   ep->processing = 0;
+   /* build trbs */
+   ret = mv_u3d_req_to_trb(req);
+   if (ret) {
dev_err(u3d->dev, "%s, mv_u3d_req_to_trb fail\n", __func__);
-   return -ENOMEM;
+   goto break_processing;
}
 
+   /* and push them to device queue */
+   ret = mv_u3d_queue_trb(ep, req);
+   if (ret)
+   goto break_processing;
+
/* irq handler advances the queue */
-   if (req)
-   list_add_tail(&req->queue, &ep->queue);
+   list_add_tail(&req->queue, &ep->queue);
 
return 0;
+
+break_processing:
+   ep->processing = 0;
+   return ret;
 }
 
 static int mv_u3d_ep_enable(struct usb_ep *_ep,
-- 
2.7.4

Re: [RFC PATCH] kbuild: add -fno-PIE

2016-10-28 Thread Andrew Morton

On Thu, 27 Oct 2016 09:28:24 +0200 Sebastian Andrzej Siewior 
 wrote:

> > Building with gcc 3.3 is apparently still possible, although it produces
> > tons of warnings and a modpost section mismatch.  Still, requiring gcc
> > 4.1 or newer would not be unreasonable, I think (still released a few
> > months earlier than binutils 2.17).
> 
> I remember you had once a server box running some enterprise distro
> which had an old gcc. Do you see any reason for not lifting the minimum
> gcc version to v4.1 ?

Seems OK to me.  I do have a gcc-3.4.5 for mips, sh and sparc64 sitting
around but I basically never use them and should update.

[PATCH 2/2] blk-mq: immediately dispatch big size request

2016-10-28 Thread Shaohua Li

This is corresponding part for blk-mq. Disk with multiple hardware
queues doesn't need this as we only hold 1 request at most.

Signed-off-by: Shaohua Li 
---
 block/blk-mq.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index f3d27a6..9612306 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1401,13 +1401,18 @@ static blk_qc_t blk_sq_make_request(struct 
request_queue *q, struct bio *bio)
 */
plug = current->plug;
if (plug) {
+   struct request *first = NULL;
+
blk_mq_bio_to_request(rq, bio);
if (!request_count)
trace_block_plug(q);
+   else
+   first = list_entry_rq(plug->mq_list.next);
 
blk_mq_put_ctx(data.ctx);
 
-   if (request_count >= BLK_MAX_REQUEST_COUNT) {
+   if (request_count >= BLK_MAX_REQUEST_COUNT || (first &&
+   blk_rq_bytes(first) >= BLK_PLUG_FLUSH_SIZE)) {
blk_flush_plug_list(plug, false);
trace_block_plug(q);
}
-- 
2.9.3

[PATCH 1/2] block: immediately dispatch big size request

2016-10-28 Thread Shaohua Li

Currently block plug holds up to 16 non-mergeable requests. This makes
sense if the request size is small, eg, reduce lock contention. But if
request size is big enough, we don't need to worry about lock
contention. Holding such request makes no sense and it lows the disk
utilization.

In practice, this improves 10% throughput for my raid5 sequential write
workload.

The size (128k) is arbitrary right now, but it makes sure lock
contention is small. This probably could be more intelligent, eg, check
average request size holded. Since this is mainly for sequential IO,
probably not worthy.

Signed-off-by: Shaohua Li 
---
 block/blk-core.c   | 4 +++-
 include/linux/blkdev.h | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 14d7c07..0a396e9 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1763,7 +1763,9 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, 
struct bio *bio)
if (!request_count)
trace_block_plug(q);
else {
-   if (request_count >= BLK_MAX_REQUEST_COUNT) {
+   struct request *first = list_entry_rq(plug->list.next);
+   if (request_count >= BLK_MAX_REQUEST_COUNT ||
+   blk_rq_bytes(first) >= BLK_PLUG_FLUSH_SIZE) {
blk_flush_plug_list(plug, false);
trace_block_plug(q);
}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c47c358..72fa505 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1078,6 +1078,7 @@ struct blk_plug {
struct list_head cb_list; /* md requires an unplug callback */
 };
 #define BLK_MAX_REQUEST_COUNT 16
+#define BLK_PLUG_FLUSH_SIZE (128 * 1024)
 
 struct blk_plug_cb;
 typedef void (*blk_plug_cb_fn)(struct blk_plug_cb *, bool);
-- 
2.9.3

Re: [PATCH v2 0/5] make POSIX timers optional with some Kconfig help

2016-10-28 Thread Paul Bolle

On Tue, 2016-10-25 at 22:28 -0400, Nicolas Pitre wrote:
> When POSIX timers are configured out, the PTP clock subsystem should be
> left out as well. However a bunch of ethernet drivers currently *select*
> the later in their Kconfig entries. Therefore some more work was needed
> to break that hard dependency from those drivers without preventing their
> usage altogether.

By the way: would you have pointers to threads that discussed attempts
to achieve this using currently available Kconfig options?

Thanks,


Paul Bolle

[PATCH] Staging: ks7010: ks7010_stio: Fixed several coding style issues

2016-10-28 Thread Manoj Sawai

Fixed all the "errors" reported by checkpath.pl in ks7010_stio.c
The "TODO" file ask to take the 80 character limit lightly, so
the file still has some warnings about character limit. All the
errors have been removed.

Signed-off-by: Manoj Sawai 
---
 drivers/staging/ks7010/ks7010_sdio.c | 60 
 1 file changed, 33 insertions(+), 27 deletions(-)

diff --git a/drivers/staging/ks7010/ks7010_sdio.c 
b/drivers/staging/ks7010/ks7010_sdio.c
index 81c46f4..6bbfda4 100644
--- a/drivers/staging/ks7010/ks7010_sdio.c
+++ b/drivers/staging/ks7010/ks7010_sdio.c
@@ -35,18 +35,17 @@ MODULE_DEVICE_TABLE(sdio, ks7010_sdio_ids);
 /* macro */
 
 #define inc_txqhead(priv) \
-(priv->tx_dev.qhead = (priv->tx_dev.qhead + 1) % TX_DEVICE_BUFF_SIZE)
+   (priv->tx_dev.qhead = (priv->tx_dev.qhead + 1) % TX_DEVICE_BUFF_SIZE)
 #define inc_txqtail(priv) \
-(priv->tx_dev.qtail = (priv->tx_dev.qtail + 1) % TX_DEVICE_BUFF_SIZE)
+   (priv->tx_dev.qtail = (priv->tx_dev.qtail + 1) % TX_DEVICE_BUFF_SIZE)
 #define cnt_txqbody(priv) \
-(((priv->tx_dev.qtail + TX_DEVICE_BUFF_SIZE) - (priv->tx_dev.qhead)) % 
TX_DEVICE_BUFF_SIZE)
-
+   (((priv->tx_dev.qtail + TX_DEVICE_BUFF_SIZE) - (priv->tx_dev.qhead)) % 
TX_DEVICE_BUFF_SIZE)
 #define inc_rxqhead(priv) \
-(priv->rx_dev.qhead = (priv->rx_dev.qhead + 1) % RX_DEVICE_BUFF_SIZE)
+   (priv->rx_dev.qhead = (priv->rx_dev.qhead + 1) % RX_DEVICE_BUFF_SIZE)
 #define inc_rxqtail(priv) \
-(priv->rx_dev.qtail = (priv->rx_dev.qtail + 1) % RX_DEVICE_BUFF_SIZE)
+   (priv->rx_dev.qtail = (priv->rx_dev.qtail + 1) % RX_DEVICE_BUFF_SIZE)
 #define cnt_rxqbody(priv) \
-(((priv->rx_dev.qtail + RX_DEVICE_BUFF_SIZE) - (priv->rx_dev.qhead)) % 
RX_DEVICE_BUFF_SIZE)
+   (((priv->rx_dev.qtail + RX_DEVICE_BUFF_SIZE) - (priv->rx_dev.qhead)) % 
RX_DEVICE_BUFF_SIZE)
 
 static int ks7010_sdio_read(struct ks_wlan_private *priv, unsigned int address,
unsigned char *buffer, int length)
@@ -190,9 +189,9 @@ static int _ks_wlan_hw_power_save(struct ks_wlan_private 
*priv)

atomic_read(&priv->psstatus.snooze_guard),
cnt_txqbody(priv));
 
-   if (!atomic_read(&priv->psstatus.confirm_wait)
-   && 
!atomic_read(&priv->psstatus.snooze_guard)
-   && !cnt_txqbody(priv)) {
+   if (!atomic_read(&priv->psstatus.confirm_wait) 
&&
+   !atomic_read(&priv->psstatus.snooze_guard) 
&&
+   !cnt_txqbody(priv)) {
retval =
ks7010_sdio_read(priv, INT_PENDING,
 &rw_data,
@@ -255,7 +254,7 @@ int ks_wlan_hw_power_save(struct ks_wlan_private *priv)
 
 static int enqueue_txdev(struct ks_wlan_private *priv, unsigned char *p,
 unsigned long size,
-void (*complete_handler) (void *arg1, void *arg2),
+void (*complete_handler)(void *arg1, void *arg2),
 void *arg1, void *arg2)
 {
struct tx_device_buffer *sp;
@@ -294,6 +293,7 @@ static int write_to_device(struct ks_wlan_private *priv, 
unsigned char *buffer,
int retval;
unsigned char rw_data;
struct hostif_hdr *hdr;
+
hdr = (struct hostif_hdr *)buffer;
 
DPRINTK(4, "size=%d\n", hdr->size);
@@ -326,8 +326,8 @@ static void tx_device_task(void *dev)
int rc = 0;
 
DPRINTK(4, "\n");
-   if (cnt_txqbody(priv) > 0
-   && atomic_read(&priv->psstatus.status) != PS_SNOOZE) {
+   if (cnt_txqbody(priv) > 0 &&
+   atomic_read(&priv->psstatus.status) != PS_SNOOZE) {
sp = &priv->tx_dev.tx_dev_buff[priv->tx_dev.qhead];
if (priv->dev_state >= DEVICE_STATE_BOOT) {
rc = write_to_device(priv, sp->sendp, sp->size);
@@ -353,11 +353,12 @@ static void tx_device_task(void *dev)
 }
 
 int ks_wlan_hw_tx(struct ks_wlan_private *priv, void *p, unsigned long size,
- void (*complete_handler) (void *arg1, void *arg2),
+ void (*complete_handler)(void *arg1, void *arg2),
  void *arg1, void *arg2)
 {
int result = 0;
struct hostif_hdr *hdr;
+
hdr = (struct hostif_hdr *)p;
 
if (hdr->event < HIF_DATA_REQ || HIF_REQ_MAX < hdr->event) {
@@ -412,7 +413,7 @@ static void ks_wlan_hw_rx(void *dev, uint16_t size)
/* receive data */
if (cnt_rxqbody(priv) >= (RX_DEVICE_BUFF_SIZE - 1)) {
/* in case of buffer overflow */
-   DPRINTK(1, "rx buffer overflow \n");
+   DPRINTK(1, "rx buffer overflow\n");
goto error_out;
}
rx_buffer = &priv->

Re: [PATCH 01/10] scripts/basic/bin2c: Complete error handling in main()

2016-10-28 Thread Jim Davis

On Fri, Oct 28, 2016 at 1:31 AM, SF Markus Elfring
 wrote:
> From: Markus Elfring 
> Date: Thu, 27 Oct 2016 16:15:04 +0200
>
> Return values were not checked from five calls of the function "printf".
>
> This issue was detected also by using the Coccinelle software.
>
>
> * Add a bit of exception handling there.
>
> * Optimise this function implementation a bit.

The most interesting thing about this patch was trying to figure out
how to actually get bin2c to run at all.  Making a defconfig kernel
didn't run it.  Making a kernel with the latest Ubuntu 16.10 config
file didn't run it.  Setting CONFIG_IKCONFIG runs it (once), for the
folks who want to use scripts/extract-ikconfig.  After that, if you
dig about in the makefiles, it looks like you have to turn on the
Tomoyo LSM -- which doesn't seem to be a common occurrence -- or else
set CONFIG_KEXEC_FILE to generate the 'purgatory' thing it uses.
Again, not the most frequent of events, as far as I can tell.

Given how uncommon running bin2c seems to be, "optimizing" it may not
be a useful project.

-- 
Jim

Re: [PATCH v2] NFSv4: replace seqcount_t with a seqlock_t

2016-10-28 Thread Trond Myklebust


> On Oct 28, 2016, at 17:05, Sebastian Andrzej Siewior  
> wrote:
> 
> The raw_write_seqcount_begin() in nfs4_reclaim_open_state() bugs me
> because it maps to preempt_disable() in -RT which I can't have at this
> point. So I took a look at the code.
> It the lockdep part was removed in commit abbec2da13f0 ("NFS: Use
> raw_write_seqcount_begin/end int nfs4_reclaim_open_state") because
> lockdep complained. The whole seqcount thing was introduced in commit
> c137afabe330 ("NFSv4: Allow the state manager to mark an open_owner as
> being recovered").
> I don't understand how it is possible that we don't end up with two
> writers for the same ressource because the `sp->so_lock' lock is dropped
> is soon in the list_for_each_entry() loop. It might be the
> test_and_clear_bit() check in nfs4_do_reclaim() but it might clear one
> bit on each iteration so I *think* we could have two invocations of the
> same struct nfs4_state_owner in nfs4_reclaim_open_state().
> So there is that.
> 
> But back to the list_for_each_entry() macro.
> It seems that this lock protects the ->so_states list among other
> atomic_t & flags members. So at the begin of the loop we inc ->count
> ensuring that this field is not removed while we use it. So we drop the
> ->so_lock loc during the loop it seems. And after nfs4_reclaim_locks()
> invocation we nfs4_put_open_state() and grab the ->so_lock again. So if
> we were the last user of this struct and we remove it, then the
> following list_next_entry() invocation is a use-after-free. Even if we
> use list_for_each_entry_safe() there is no guarantee that the following
> member is still valid because it might have been removed by another
> writer, right?
> So there is this.
> 
> However to address my initial problem I have here a patch :) So it uses
> a seqlock_t which ensures that there is only one writer at a time. So it
> should be basically what is happening now plus a tiny tiny tiny lock
> plus lockdep coverage. I tried to this myself but I don't manage to get
> into this code path at all so I might be doing something wrong.
> 
> Could you please check if this patch is working for you and whether my
> list_for_each_entry() observation is correct or not?
> 
> v1…v2: write_seqlock() disables preemption and some function need it
> (thread_run(), non-GFP_ATOMIC memory alloction()). We don't want
> preemption enabled because a preempted writer would stall the reader
> spinning. This is a duct tape mutex. Maybe the seqlock should go.
> 
> Signed-off-by: Sebastian Andrzej Siewior 
> ---
> fs/nfs/delegation.c |  4 ++--
> fs/nfs/nfs4_fs.h|  3 ++-
> fs/nfs/nfs4proc.c   |  4 ++--
> fs/nfs/nfs4state.c  | 23 +--
> 4 files changed, 23 insertions(+), 11 deletions(-)
> 
> diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
> index dff600ae0d74..d726d2e09353 100644
> --- a/fs/nfs/delegation.c
> +++ b/fs/nfs/delegation.c
> @@ -150,11 +150,11 @@ static int nfs_delegation_claim_opens(struct inode 
> *inode,
>   sp = state->owner;
>   /* Block nfs4_proc_unlck */
>   mutex_lock(&sp->so_delegreturn_mutex);
> - seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
> + seq = read_seqbegin(&sp->so_reclaim_seqlock);
>   err = nfs4_open_delegation_recall(ctx, state, stateid, type);
>   if (!err)
>   err = nfs_delegation_claim_locks(ctx, state, stateid);
> - if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
> + if (!err && read_seqretry(&sp->so_reclaim_seqlock, seq))
>   err = -EAGAIN;
>   mutex_unlock(&sp->so_delegreturn_mutex);
>   put_nfs_open_context(ctx);
> diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
> index 9b3a82abab07..2fee1a2e8b57 100644
> --- a/fs/nfs/nfs4_fs.h
> +++ b/fs/nfs/nfs4_fs.h
> @@ -111,7 +111,8 @@ struct nfs4_state_owner {
>   unsigned longso_flags;
>   struct list_head so_states;
>   struct nfs_seqid_counter so_seqid;
> - seqcount_t   so_reclaim_seqcount;
> + seqlock_tso_reclaim_seqlock;
> + struct mutex so_reclaim_seqlock_mutex;
>   struct mutex so_delegreturn_mutex;
> };
> 
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index 7897826d7c51..9b9d53cd85f9 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -2685,7 +2685,7 @@ static int _nfs4_open_and_get_state(struct 
> nfs4_opendata *opendata,
>   unsigned int seq;
>   int ret;
> 
> - seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
> + seq = raw_seqcount_begin(&sp->so_reclaim_seqlock.seqcount);
> 
>   ret = _nfs4_proc_open(opendata);
>   if (ret != 0)
> @@ -2723,7 +2723,7 @@ static int _nfs4_open_and_get_state(struct 
> nfs4_opendata *opendata,
>   ctx->state = state;
>   if (d_inode(dentry) == state->inode) {
>   nfs_inode_attach_open_context(ctx);
> - if (read_seqcount_retry(&

Re: [PATCH] Staging: ks7010: ks7010_stio: Fixed several coding style issues

2016-10-28 Thread Greg KH

On Sat, Oct 29, 2016 at 03:48:17AM +0530, Manoj Sawai wrote:
> Fixed all the "errors" reported by checkpath.pl in ks7010_stio.c

"all"?  Please break this up into one-patch-per-thing series of patches.
And no, as my patchbot told you earlier today, "all coding style issues"
is not one thing.

thanks,

greg k-h

Re: [PATCH] ubifs: Fix regression in ubifs_readdir()

2016-10-28 Thread Jörg Krause

Richard,

On Fri, 2016-10-28 at 19:07 +0200, Richard Weinberger wrote:
> Jörg,
> 
> On 28.10.2016 18:19, Jörg Krause wrote:
> > Hi,
> > 
> > On Fri, 2016-10-28 at 11:53 +0200, Richard Weinberger wrote:
> > > Commit c83ed4c9dbb35 ("ubifs: Abort readdir upon error") broke
> > > overlayfs support because the fix exposed an internal error
> > > code to VFS.
> > > 
> > > Reported-by: Peter Rosin 
> > > Tested-by: Peter Rosin 
> > > Reported-by: Ralph Sennhauser 
> > > Fixes: c83ed4c9dbb35 ("ubifs: Abort readdir upon error")
> > > Cc: sta...@vger.kernel.org
> > > Signed-off-by: Richard Weinberger 
> > > ---
> > >  fs/ubifs/dir.c | 8 
> > >  1 file changed, 8 insertions(+)
> > > 
> > > diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
> > > index bd4a5e8ce441..ca16c5d7bab1 100644
> > > --- a/fs/ubifs/dir.c
> > > +++ b/fs/ubifs/dir.c
> > > @@ -543,6 +543,14 @@ static int ubifs_readdir(struct file *file,
> > > struct dir_context *ctx)
> > >  
> > >   if (err != -ENOENT)
> > >   ubifs_err(c, "cannot find next direntry, error
> > > %d",
> > > err);
> > > + else
> > > + /*
> > > +  * -ENOENT is a non-fatal error in this context,
> > > the
> > > TNC uses
> > > +  * it to indicate that the cursor moved past the
> > > current directory
> > > +  * and readdir() has to stop.
> > > +  */
> > > + err = 0;
> > > +
> > >  
> > >   /* 2 is a special value indicating that there are no
> > > more
> > > direntries */
> > >   ctx->pos = 2;
> > 
> > I'm not sure if it's related to the issue reported by Peter Rosin
> > and
> > Ralph Sennhauser, but I am still getting a kernel panic using UBIFS
> > with OverlayFS on Linux v4.9.0-rc2 with this patch applied:
> 
> Does reverting c83ed4c9dbb35 help?
> And are you 100% sure you applied the fix?

I double double checked. The fix was applied on the git tree, but the
compiler cache (I am using Buildroot with this option enabled) fooled
me by using an old copy. After disabling the compiler cache I got a
fixed build of the kernel. The panic is gone! Thanks!

> 
> Does the following WARN_ON() trigger?
> diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
> index ca16c5d7bab1..12ffc91f7ef8 100644
> --- a/fs/ubifs/dir.c
> +++ b/fs/ubifs/dir.c
> @@ -554,6 +554,9 @@ static int ubifs_readdir(struct file *file,
> struct dir_context *ctx)
> 
>   /* 2 is a special value indicating that there are no more
> direntries */
>   ctx->pos = 2;
> +
> + WARN_ON(err);
> +
>   return err;
>  }

Best regards,
Jörg Krause

Re: [PATCH] ipc/sem: ensure we left shift a ULL rather than a 32 bit integer

2016-10-28 Thread Davidlohr Bueso


On Fri, 28 Oct 2016, Colin King wrote:

Thanks.

Re: [PATCH v2 1/5] kconfig: introduce the "imply" keyword

2016-10-28 Thread Paul Bolle

On Thu, 2016-10-27 at 23:10 -0400, Nicolas Pitre wrote:
> On Fri, 28 Oct 2016, Paul Bolle wrote:
> > And in your example BAR is bool, right? Does the above get more
> > complicated if BAR would be tristate?
> 
> If BAR=m then implying BAZ from FOO=y will force BAZ to y or n, 
> bypassing the restriction provided by BAR like "select" does.  This is 
> somewhat questionable for "select" to do that, and the code emits a 
> warning when "select" bypasses a direct dependency set to n, but not 
> when set to m. For now "imply" simply tries to be consistent with 
> the "select" behavior.

Side note: yes, one can select a symbol that's missing one or more
dependencies. But since Kconfig has two separate methods to describe
relations (ie, selecting and depending) there's logically the
possibility of conflict. So we need a rule to resolve that conflict.
That rule is: "select" beats "depends on". I don't think that this rule
is less plausible than the opposite rule.

Paul Bolle

Re: /dev/mem and PCI memory = EFAULT (regression?)

2016-10-28 Thread Stas Sergeev


OK, thanks for the prompt reply, Andy!
I'll try your sysfs suggestion.

Let me just add a bit of CCs to get more people involved.
I believe the ram size involvement makes this smell like a bug,
plus it looks like a regression. Not that I am going to suffer
much (this pass-through stuff always breaks in a million of
different ways, I'll probably just remove it), but its still a very
questionable change IMO.

29.10.2016 00:26, Andy Lutomirski пишет:

On Fri, Oct 28, 2016 at 2:18 PM, Stas Sergeev  wrote:

29.10.2016 00:05, Andy Lutomirski пишет:


On Fri, Oct 28, 2016 at 2:03 PM, Stas Sergeev  wrote:

Hello.

For the long time dosemu used /dev/mem for vga pass-through.
Now it appears /dev/mem has this check:
http://lxr.free-electrons.com/source/drivers/char/mem.c#L51
which prevents an accesses to PCI memory regions if the
"high_memory" points low enough. It seems "high_memory"
just points to the end of the physical ram, so depending on
the ram size you either can access PCI devices or you get
EFAULT.
Was it wrong to use /dev/mem for accessing the PCI devices?
How should I do that now?


What is DOSEMU trying to do here?  Access the framebuffer?

ISTM it would be better to use the DRM or FB layer directly (just map
the framebuffer itself) or, if necessary, use VFIO.

Yes, framebuffer.
Mapping fb directly is not really an option because dosemu does
its own modesetting when you do vga pass-through. So it is
usually started that way with "nomodeset=1" and w/o fb.
Yes, some crazy people try the pass-through even out of fb
console, but that's weird (the problem is most SDL2 builds do
not have directfb backend compiled in, otherwise we could
just use SDL rendering on top of fb).

The thing is, I needed (for testing purposes, unrelated to dosemu)
some quick way to access the PCI memory space, and to my surprise I
couldn't do that with /dev/mem. Was this really disallowed intentionally?

I believe so.

Try the /sys/devices/.../resource? and resource?_wc files.

--Andy

[PATCH v6 02/10] x86/intel_rdt: Build structures for each resource based on cache topology

From: Tony Luck 

We use the cpu hotplug notifier to catch each cpu in turn and look at
its cache topology w.r.t each of the resource groups. As we discover
new resources, we initialize the bitmask array for each to the default
(full access) value.

Signed-off-by: Tony Luck 
Signed-off-by: Fenghua Yu 
---
 arch/x86/include/asm/intel_rdt.h |  35 
 arch/x86/kernel/cpu/intel_rdt.c  | 189 +++
 2 files changed, 224 insertions(+)

diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 9780409..c0d0a6e 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -39,6 +39,34 @@ struct rdt_resource {
int cbm_idx_offset;
 };
 
+/**
+ * struct rdt_domain - group of cpus sharing an RDT resource
+ * @list:  all instances of this resource
+ * @id:unique id for this instance
+ * @cpu_mask:  which cpus share this resource
+ * @cbm:   array of cache bit masks (indexed by CLOSID)
+ */
+struct rdt_domain {
+   struct list_headlist;
+   int id;
+   struct cpumask  cpu_mask;
+   u32 *cbm;
+};
+
+/**
+ * struct msr_param - set a range of MSRs from a domain
+ * @res:   The resource to use
+ * @low:   Beginning index from base MSR
+ * @high:  End index
+ */
+struct msr_param {
+   struct rdt_resource *res;
+   int low;
+   int high;
+};
+
+extern struct mutex rdtgroup_mutex;
+
 extern struct rdt_resource rdt_resources_all[];
 
 enum {
@@ -56,6 +84,11 @@ enum {
 r++) \
if (r->capable)
 
+#define for_each_enabled_rdt_resource(r) \
+   for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
+r++) \
+   if (r->enabled)
+
 /* CPUID.(EAX=10H, ECX=ResID=1).EAX */
 union cpuid_0x10_1_eax {
struct {
@@ -71,4 +104,6 @@ union cpuid_0x10_1_edx {
} split;
unsigned int full;
 };
+
+void rdt_cbm_update(void *arg);
 #endif /* _ASM_X86_INTEL_RDT_H */
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 157dc8d0..6b3fd78 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -26,11 +26,16 @@
 
 #include 
 #include 
+#include 
+#include 
 
 #include 
 #include 
 #include 
 
+/* Mutex to protect rdtgroup access. */
+DEFINE_MUTEX(rdtgroup_mutex);
+
 #define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].domains)
 
 struct rdt_resource rdt_resources_all[] = {
@@ -72,6 +77,11 @@ struct rdt_resource rdt_resources_all[] = {
},
 };
 
+static int cbm_idx(struct rdt_resource *r, int closid)
+{
+   return closid * r->cbm_idx_multi + r->cbm_idx_offset;
+}
+
 /*
  * cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs
  * as they do not have CPUID enumeration support for Cache allocation.
@@ -176,13 +186,192 @@ static inline bool get_rdt_resources(void)
return ret;
 }
 
+static int get_cache_id(int cpu, int level)
+{
+   struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
+   int i;
+
+   for (i = 0; i < ci->num_leaves; i++) {
+   if (ci->info_list[i].level == level)
+   return ci->info_list[i].id;
+   }
+
+   return -1;
+}
+
+void rdt_cbm_update(void *arg)
+{
+   struct msr_param *m = (struct msr_param *)arg;
+   struct rdt_resource *r = m->res;
+   int i, cpu = smp_processor_id();
+   struct rdt_domain *d;
+
+   list_for_each_entry(d, &r->domains, list) {
+   /* Find the domain that contains this CPU */
+   if (cpumask_test_cpu(cpu, &d->cpu_mask))
+   goto found;
+   }
+   pr_info_once("cpu %d not found in any domain for resource %s\n",
+cpu, r->name);
+
+   return;
+
+found:
+   for (i = m->low; i < m->high; i++) {
+   int idx = cbm_idx(r, i);
+
+   wrmsrl(r->msr_base + idx, d->cbm[i]);
+   }
+}
+
+/*
+ * rdt_find_domain - Find a domain in a resource that matches input resource id
+ *
+ * Search a resource r's domain list to find the resource id. If the resource
+ * id is found in a domain, return the domain. Otherwise, if requested by
+ * caller, return the first domain whose id is bigger than the input id.
+ * The domain list is sorted by id in ascending order.
+ */
+static struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
+ struct list_head **pos)
+{
+   struct rdt_domain *d;
+   struct list_head *l;
+
+   if (id < 0)
+   return ERR_PTR(id);
+
+   list_for_each(l, &r->domains) {
+   d = list_entry(l, struct rdt_domain, list);
+

[PATCH v6 06/10] x86/intel_rdt: Add cpus file

From: Tony Luck 

Now we populate each directory with a read/write (mode 0644) file
named "cpus". This is used to over-ride the resources available
to processes in the default resource group when running on specific
CPUs.  Each "cpus" file reads as a cpumask showing which CPUs belong
to this resource group. Initially all online CPUs are assigned to
the default group. They can be added to other groups by writing a
cpumask to the "cpus" file in the directory for the resource group
(which will remove them from the previous group to which they were
assigned). CPU online/offline operations will delete CPUs that go
offline from whatever group they are in and add new CPUs to the
default group.

If there are CPUs assigned to a group when the directory is removed,
they are returned to the default group.

Signed-off-by: Tony Luck 
Signed-off-by: Fenghua Yu 
---
 arch/x86/include/asm/intel_rdt.h |   5 ++
 arch/x86/kernel/cpu/intel_rdt.c  |  23 +-
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 131 ++-
 3 files changed, 154 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index a6c7d94..3e8450f 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -12,13 +12,16 @@
  * @kn:kernfs node
  * @rdtgroup_list: linked list for all rdtgroups
  * @closid:closid for this rdtgroup
+ * @cpu_mask:  CPUs assigned to this rdtgroup
  * @flags: status bits
  * @waitcount: how many cpus expect to find this
+ * group when they acquire rdtgroup_mutex
  */
 struct rdtgroup {
struct kernfs_node  *kn;
struct list_headrdtgroup_list;
int closid;
+   struct cpumask  cpu_mask;
int flags;
atomic_twaitcount;
 };
@@ -160,6 +163,8 @@ union cpuid_0x10_1_edx {
unsigned int full;
 };
 
+DECLARE_PER_CPU_READ_MOSTLY(int, cpu_closid);
+
 void rdt_cbm_update(void *arg);
 struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);
 void rdtgroup_kn_unlock(struct kernfs_node *kn);
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 63a0045..9e1f901 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -36,6 +36,8 @@
 /* Mutex to protect rdtgroup access. */
 DEFINE_MUTEX(rdtgroup_mutex);
 
+DEFINE_PER_CPU_READ_MOSTLY(int, cpu_closid);
+
 #define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].domains)
 
 struct rdt_resource rdt_resources_all[] = {
@@ -331,16 +333,25 @@ static void domain_remove_cpu(int cpu, struct 
rdt_resource *r)
}
 }
 
-static int intel_rdt_online_cpu(unsigned int cpu)
+static void clear_closid(int cpu)
 {
struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
+
+   per_cpu(cpu_closid, cpu) = 0;
+   state->closid = 0;
+   wrmsr(MSR_IA32_PQR_ASSOC, state->rmid, 0);
+}
+
+static int intel_rdt_online_cpu(unsigned int cpu)
+{
struct rdt_resource *r;
 
mutex_lock(&rdtgroup_mutex);
for_each_capable_rdt_resource(r)
domain_add_cpu(cpu, r);
-   state->closid = 0;
-   wrmsr(MSR_IA32_PQR_ASSOC, state->rmid, 0);
+   /* The cpu is set in default rdtgroup after online. */
+   cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask);
+   clear_closid(cpu);
mutex_unlock(&rdtgroup_mutex);
 
return 0;
@@ -348,11 +359,17 @@ static int intel_rdt_online_cpu(unsigned int cpu)
 
 static int intel_rdt_offline_cpu(unsigned int cpu)
 {
+   struct rdtgroup *rdtgrp;
struct rdt_resource *r;
 
mutex_lock(&rdtgroup_mutex);
for_each_capable_rdt_resource(r)
domain_remove_cpu(cpu, r);
+   list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
+   if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask))
+   break;
+   }
+   clear_closid(cpu);
mutex_unlock(&rdtgroup_mutex);
 
return 0;
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c 
b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 6ff35786..931b406 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -20,6 +20,7 @@
 
 #define pr_fmt(fmt)KBUILD_MODNAME ": " fmt
 
+#include 
 #include 
 #include 
 #include 
@@ -172,6 +173,111 @@ static struct kernfs_ops rdtgroup_kf_single_ops = {
.seq_show   = rdtgroup_seqfile_show,
 };
 
+static int rdtgroup_cpus_show(struct kernfs_open_file *of,
+ struct seq_file *s, void *v)
+{
+   struct rdtgroup *rdtgrp;
+   int ret = 0;
+
+   rdtgrp = rdtgroup_kn_lock_live(of->kn);
+
+   if (rdtgrp)
+   seq_printf(s, "%*pb\n", cpumask_pr_args(&rdtgrp->cpu_mask));
+   else
+

[PATCH v6 09/10] x86/intel_rdt: Add scheduler hook

From: Fenghua Yu 

Hook the x86 scheduler code to update closid based on whether the current
task is assigned to a specific closid or running on a CPU assigned to a
specific closid.

Signed-off-by: Fenghua Yu 
---
 arch/x86/include/asm/intel_rdt.h | 42 
 arch/x86/kernel/cpu/intel_rdt.c  |  1 -
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c |  3 +++
 arch/x86/kernel/process_32.c |  4 +++
 arch/x86/kernel/process_64.c |  4 +++
 5 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index f847189..100e53c 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -1,8 +1,12 @@
 #ifndef _ASM_X86_INTEL_RDT_H
 #define _ASM_X86_INTEL_RDT_H
 
+#ifdef CONFIG_INTEL_RDT_A
+
 #include 
 
+#include 
+
 #define IA32_L3_QOS_CFG0xc81
 #define IA32_L3_CBM_BASE   0xc90
 #define IA32_L2_CBM_BASE   0xd10
@@ -174,4 +178,42 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file 
*of,
char *buf, size_t nbytes, loff_t off);
 int rdtgroup_schemata_show(struct kernfs_open_file *of,
   struct seq_file *s, void *v);
+
+/*
+ * intel_rdt_sched_in() - Writes the task's CLOSid to IA32_PQR_MSR
+ *
+ * Following considerations are made so that this has minimal impact
+ * on scheduler hot path:
+ * - This will stay as no-op unless we are running on an Intel SKU
+ *   which supports resource control and we enable by mounting the
+ *   resctrl file system.
+ * - Caches the per cpu CLOSid values and does the MSR write only
+ *   when a task with a different CLOSid is scheduled in.
+ */
+static inline void intel_rdt_sched_in(void)
+{
+   if (static_branch_likely(&rdt_enable_key)) {
+   struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
+   int closid;
+
+   /*
+* If this task has a closid assigned, use it.
+* Else use the closid assigned to this cpu.
+*/
+   closid = current->closid;
+   if (closid == 0)
+   closid = this_cpu_read(cpu_closid);
+
+   if (closid != state->closid) {
+   state->closid = closid;
+   wrmsr(MSR_IA32_PQR_ASSOC, state->rmid, closid);
+   }
+   }
+}
+
+#else
+
+static inline void intel_rdt_sched_in(void) {}
+
+#endif /* CONFIG_INTEL_RDT_A */
 #endif /* _ASM_X86_INTEL_RDT_H */
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 9e1f901..3232efd 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -29,7 +29,6 @@
 #include 
 #include 
 
-#include 
 #include 
 #include 
 
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c 
b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index d20be87..1c9bd29 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -292,6 +292,9 @@ static void move_myself(struct callback_head *head)
kfree(rdtgrp);
}
 
+   /* update PQR_ASSOC MSR to make resource group go into effect */
+   intel_rdt_sched_in();
+
kfree(callback);
 }
 
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index bd7be8e..efe7f9f 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -54,6 +54,7 @@
 #include 
 #include 
 #include 
+#include 
 
 void __show_regs(struct pt_regs *regs, int all)
 {
@@ -299,5 +300,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct 
*next_p)
 
this_cpu_write(current_task, next_p);
 
+   /* Load the Intel cache allocation PQR MSR. */
+   intel_rdt_sched_in();
+
return prev_p;
 }
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b3760b3..acd7d6f 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -50,6 +50,7 @@
 #include 
 #include 
 #include 
+#include 
 
 __visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
 
@@ -473,6 +474,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct 
*next_p)
loadsegment(ss, __KERNEL_DS);
}
 
+   /* Load the Intel cache allocation PQR MSR. */
+   intel_rdt_sched_in();
+
return prev_p;
 }
 
-- 
2.5.0

[PATCH v6 03/10] x86/intel_rdt: Add basic resctrl filesystem support

From: Fenghua Yu 

Use kernfs as basis for our user interface filesystem. This patch
supports mount/umount, and one mount parameter "cdp" to enable code/data
prioritization (though all we do at this point is ensure that the system
can support CDP).  The file system is not populated yet in this patch.

Signed-off-by: Fenghua Yu 
---
 arch/x86/include/asm/intel_rdt.h |  24 +++
 arch/x86/kernel/cpu/Makefile |   2 +-
 arch/x86/kernel/cpu/intel_rdt.c  |   8 +-
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 268 +++
 include/uapi/linux/magic.h   |   1 +
 5 files changed, 301 insertions(+), 2 deletions(-)
 create mode 100644 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c

diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index c0d0a6e..8e1d002 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -1,10 +1,30 @@
 #ifndef _ASM_X86_INTEL_RDT_H
 #define _ASM_X86_INTEL_RDT_H
 
+#include 
+
+#define IA32_L3_QOS_CFG0xc81
 #define IA32_L3_CBM_BASE   0xc90
 #define IA32_L2_CBM_BASE   0xd10
 
 /**
+ * struct rdtgroup - store rdtgroup's data in resctrl file system.
+ * @kn:kernfs node
+ * @rdtgroup_list: linked list for all rdtgroups
+ * @closid:closid for this rdtgroup
+ */
+struct rdtgroup {
+   struct kernfs_node  *kn;
+   struct list_headrdtgroup_list;
+   int closid;
+};
+
+/* List of all resource groups */
+extern struct list_head rdt_all_groups;
+
+int __init rdtgroup_init(void);
+
+/**
  * struct rdt_resource - attributes of an RDT resource
  * @enabled:   Is this feature enabled on this machine
  * @capable:   Is this feature available on this machine
@@ -68,6 +88,10 @@ struct msr_param {
 extern struct mutex rdtgroup_mutex;
 
 extern struct rdt_resource rdt_resources_all[];
+extern struct rdtgroup rdtgroup_default;
+DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
+
+int __init rdtgroup_init(void);
 
 enum {
RDT_RESOURCE_L3,
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index cf4bfd0..b4334e8 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -34,7 +34,7 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
 obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
 obj-$(CONFIG_CPU_SUP_UMC_32)   += umc.o
 
-obj-$(CONFIG_INTEL_RDT_A)  += intel_rdt.o
+obj-$(CONFIG_INTEL_RDT_A)  += intel_rdt.o intel_rdt_rdtgroup.o
 
 obj-$(CONFIG_X86_MCE)  += mcheck/
 obj-$(CONFIG_MTRR) += mtrr/
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 6b3fd78..63a0045 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -361,7 +361,7 @@ static int intel_rdt_offline_cpu(unsigned int cpu)
 static int __init intel_rdt_late_init(void)
 {
struct rdt_resource *r;
-   int state;
+   int state, ret;
 
if (!get_rdt_resources())
return -ENODEV;
@@ -372,6 +372,12 @@ static int __init intel_rdt_late_init(void)
if (state < 0)
return state;
 
+   ret = rdtgroup_init();
+   if (ret) {
+   cpuhp_remove_state(state);
+   return ret;
+   }
+
for_each_capable_rdt_resource(r)
pr_info("Intel RDT %s allocation detected\n", r->name);
 
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c 
b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
new file mode 100644
index 000..74d7f72f
--- /dev/null
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -0,0 +1,268 @@
+/*
+ * User interface for Resource Alloction in Resource Director Technology(RDT)
+ *
+ * Copyright (C) 2016 Intel Corporation
+ *
+ * Author: Fenghua Yu 
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * More information about RDT be found in the Intel (R) x86 Architecture
+ * Software Developer Manual.
+ */
+
+#define pr_fmt(fmt)KBUILD_MODNAME ": " fmt
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include 
+
+DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
+struct kernfs_root *rdt_root;
+struct rdtgroup rdtgroup_default;
+LIST_HEAD(rdt_all_groups);
+
+static void l3_qos_cfg_update(void *arg)
+{
+   bool *enable = arg;
+
+   wrmsrl(IA32_L3_QOS_CFG, *enable);
+}
+
+static int set_l3_qos_cfg(struct rdt_resource *r, bool enable)
+{
+   cpumask_var_t cpu_mask;
+   struct rdt_domain *d;
+   int cpu;

[PATCH v6 07/10] x86/intel_rdt: Add tasks files

From: Fenghua Yu 

The root directory all subdirectories are automatically populated
with a read/write (mode 0644) file named "tasks". When read it will
show all the task IDs assigned to the resource group. Tasks can be
added (one at a time) to a group by writing the task ID to the file.
E.g.

Membership in a resource group is indicated by a new field in the
task_struct "int closid" which holds the CLOSID for each task. The
default resource group uses CLOSID=0 which means that all existing
tasks when the resctrl file system is mounted belong to the default
group.

A resource group cannot be removed while there are tasks assigned
to it.

Signed-off-by: Fenghua Yu 
---
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 180 +++
 include/linux/sched.h|   3 +
 2 files changed, 183 insertions(+)

diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c 
b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 931b406..50fa946 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -28,6 +28,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -267,6 +268,162 @@ static ssize_t rdtgroup_cpus_write(struct 
kernfs_open_file *of,
return ret ?: nbytes;
 }
 
+struct task_move_callback {
+   struct callback_headwork;
+   struct rdtgroup *rdtgrp;
+};
+
+static void move_myself(struct callback_head *head)
+{
+   struct task_move_callback *callback;
+   struct rdtgroup *rdtgrp;
+
+   callback = container_of(head, struct task_move_callback, work);
+   rdtgrp = callback->rdtgrp;
+
+   /*
+* If resource group was deleted before this task work callback
+* was invoked, then assign the task to root group and free the
+* resource group.
+*/
+   if (atomic_dec_and_test(&rdtgrp->waitcount) &&
+   (rdtgrp->flags & RDT_DELETED)) {
+   current->closid = 0;
+   kfree(rdtgrp);
+   }
+
+   kfree(callback);
+}
+
+static int __rdtgroup_move_task(struct task_struct *tsk,
+   struct rdtgroup *rdtgrp)
+{
+   struct task_move_callback *callback;
+   int ret;
+
+   callback = kzalloc(sizeof(*callback), GFP_KERNEL);
+   if (!callback)
+   return -ENOMEM;
+   callback->work.func = move_myself;
+   callback->rdtgrp = rdtgrp;
+
+   /*
+* Take a refcount, so rdtgrp cannot be freed before the
+* callback has been invoked.
+*/
+   atomic_inc(&rdtgrp->waitcount);
+   ret = task_work_add(tsk, &callback->work, true);
+   if (ret) {
+   /*
+* Task is exiting. Drop the refcount and free the callback.
+* No need to check the refcount as the group cannot be
+* deleted before the write function unlocks rdtgroup_mutex.
+*/
+   atomic_dec(&rdtgrp->waitcount);
+   kfree(callback);
+   } else {
+   tsk->closid = rdtgrp->closid;
+   }
+   return ret;
+}
+
+static int rdtgroup_task_write_permission(struct task_struct *task,
+ struct kernfs_open_file *of)
+{
+   const struct cred *tcred = get_task_cred(task);
+   const struct cred *cred = current_cred();
+   int ret = 0;
+
+   /*
+* Even if we're attaching all tasks in the thread group, we only
+* need to check permissions on one of them.
+*/
+   if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
+   !uid_eq(cred->euid, tcred->uid) &&
+   !uid_eq(cred->euid, tcred->suid))
+   ret = -EPERM;
+
+   put_cred(tcred);
+   return ret;
+}
+
+static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp,
+ struct kernfs_open_file *of)
+{
+   struct task_struct *tsk;
+   int ret;
+
+   rcu_read_lock();
+   if (pid) {
+   tsk = find_task_by_vpid(pid);
+   if (!tsk) {
+   rcu_read_unlock();
+   return -ESRCH;
+   }
+   } else {
+   tsk = current;
+   }
+
+   get_task_struct(tsk);
+   rcu_read_unlock();
+
+   ret = rdtgroup_task_write_permission(tsk, of);
+   if (!ret)
+   ret = __rdtgroup_move_task(tsk, rdtgrp);
+
+   put_task_struct(tsk);
+   return ret;
+}
+
+static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
+   char *buf, size_t nbytes, loff_t off)
+{
+   struct rdtgroup *rdtgrp;
+   int ret = 0;
+   pid_t pid;
+
+   if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
+   return -EINVAL;
+   rdtgrp = rdtgroup_kn_lock_live(of->kn);
+
+   if (rdtgrp)
+   ret = rdtgroup_move_task(pid, rdtgrp, of);
+   else
+   ret = -ENOENT;
+
+   rdtgroup_kn_unlock(of->kn);
+
+   return ret ?: nbytes;
+}
+

[PATCH v6 08/10] x86/intel_rdt: Add schemata file

From: Tony Luck 

Last of the per resource group files. Also mode 0644. This one shows
the resources available to the group. Syntax depends on whether the
"cdp" mount option was given. With code/data prioritization disabled
it is simply a list of masks for each cache domain. Initial value
allows access to all of the L3 cache on all domains. E.g. on a 2 socket
Broadwell:
L3:0=f;1=f
With CDP enabled, separate masks for data and instructions are provided:
L3DATA:0=f;1=f
L3CODE:0=f;1=f

Signed-off-by: Tony Luck 
Signed-off-by: Fenghua Yu 
---
 arch/x86/include/asm/intel_rdt.h |   6 +
 arch/x86/kernel/cpu/Makefile |   2 +-
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c |   7 +
 arch/x86/kernel/cpu/intel_rdt_schemata.c | 245 +++
 4 files changed, 259 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kernel/cpu/intel_rdt_schemata.c

diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 3e8450f..f847189 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -71,6 +71,7 @@ struct rftype {
  * @num_domains:   Number of domains active
  * @msr_base:  Base MSR address for CBMs
  * @tmp_cbms:  Scratch space when updating schemata
+ * @num_tmp_cbms:  Number of CBMs in tmp_cbms
  * @cache_level:   Which cache level defines scope of this domain
  * @cbm_idx_multi: Multiplier of CBM index
  * @cbm_idx_offset:Offset of CBM index. CBM index is computed by:
@@ -88,6 +89,7 @@ struct rdt_resource {
int num_domains;
int msr_base;
u32 *tmp_cbms;
+   int num_tmp_cbms;
int cache_level;
int cbm_idx_multi;
int cbm_idx_offset;
@@ -168,4 +170,8 @@ DECLARE_PER_CPU_READ_MOSTLY(int, cpu_closid);
 void rdt_cbm_update(void *arg);
 struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);
 void rdtgroup_kn_unlock(struct kernfs_node *kn);
+ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
+   char *buf, size_t nbytes, loff_t off);
+int rdtgroup_schemata_show(struct kernfs_open_file *of,
+  struct seq_file *s, void *v);
 #endif /* _ASM_X86_INTEL_RDT_H */
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index b4334e8..c9f8c81 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -34,7 +34,7 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
 obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
 obj-$(CONFIG_CPU_SUP_UMC_32)   += umc.o
 
-obj-$(CONFIG_INTEL_RDT_A)  += intel_rdt.o intel_rdt_rdtgroup.o
+obj-$(CONFIG_INTEL_RDT_A)  += intel_rdt.o intel_rdt_rdtgroup.o 
intel_rdt_schemata.o
 
 obj-$(CONFIG_X86_MCE)  += mcheck/
 obj-$(CONFIG_MTRR) += mtrr/
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c 
b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 50fa946..d20be87 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -440,6 +440,13 @@ static struct rftype rdtgroup_base_files[] = {
.write  = rdtgroup_tasks_write,
.seq_show   = rdtgroup_tasks_show,
},
+   {
+   .name   = "schemata",
+   .mode   = 0644,
+   .kf_ops = &rdtgroup_kf_single_ops,
+   .write  = rdtgroup_schemata_write,
+   .seq_show   = rdtgroup_schemata_show,
+   },
 };
 
 static int rdt_num_closids_show(struct kernfs_open_file *of,
diff --git a/arch/x86/kernel/cpu/intel_rdt_schemata.c 
b/arch/x86/kernel/cpu/intel_rdt_schemata.c
new file mode 100644
index 000..f369cb8
--- /dev/null
+++ b/arch/x86/kernel/cpu/intel_rdt_schemata.c
@@ -0,0 +1,245 @@
+/*
+ * Resource Director Technology(RDT)
+ * - Cache Allocation code.
+ *
+ * Copyright (C) 2016 Intel Corporation
+ *
+ * Authors:
+ *Fenghua Yu 
+ *Tony Luck 
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * More information about RDT be found in the Intel (R) x86 Architecture
+ * Software Developer Manual June 2016, volume 3, section 17.17.
+ */
+
+#define pr_fmt(fmt)KBUILD_MODNAME ": " fmt
+
+#include 
+#include 
+#include 
+#include 
+
+/*
+ * Check whether a cache bit mask is valid. The SDM says

[PATCH v6 05/10] x86/intel_rdt: Add mkdir to resctrl file system

From: Fenghua Yu 

Resource control groups are represented as directories in the resctrl
file system. The root directory describes the default resources available
to tasks that have not been assigned specific resources. Other directories
can be created at the root level to make new resource groups. It is not
permitted to make directories within other directories.

Hardware uses a CLOSID (Class of service ID) to determine which resource
limits are currently in effect. The exact number available is enumerated
by CPUID leaf 0x10, but on current implementations it is a small number.
We implement a simple bitmask allocator for CLOSIDs.

Each resource control group uses one CLOSID, which limits the total number
of directories that can be created.

Resource groups can be removed using rmdir.

Signed-off-by: Fenghua Yu 
---
 arch/x86/include/asm/intel_rdt.h |   9 ++
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 231 +++
 2 files changed, 240 insertions(+)

diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 39ed561..a6c7d94 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -12,13 +12,20 @@
  * @kn:kernfs node
  * @rdtgroup_list: linked list for all rdtgroups
  * @closid:closid for this rdtgroup
+ * @flags: status bits
+ * @waitcount: how many cpus expect to find this
  */
 struct rdtgroup {
struct kernfs_node  *kn;
struct list_headrdtgroup_list;
int closid;
+   int flags;
+   atomic_twaitcount;
 };
 
+/* rdtgroup.flags */
+#defineRDT_DELETED 1
+
 /* List of all resource groups */
 extern struct list_head rdt_all_groups;
 
@@ -154,4 +161,6 @@ union cpuid_0x10_1_edx {
 };
 
 void rdt_cbm_update(void *arg);
+struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);
+void rdtgroup_kn_unlock(struct kernfs_node *kn);
 #endif /* _ASM_X86_INTEL_RDT_H */
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c 
b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 58aa8f7..6ff35786 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -26,10 +26,12 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
 #include 
+#include 
 
 DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
 struct kernfs_root *rdt_root;
@@ -39,6 +41,55 @@ LIST_HEAD(rdt_all_groups);
 /* Kernel fs node for "info" directory under root */
 static struct kernfs_node *kn_info;
 
+/*
+ * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
+ * we can keep a bitmap of free CLOSIDs in a single integer.
+ *
+ * Using a global CLOSID across all resources has some advantages and
+ * some drawbacks:
+ * + We can simply set "current->closid" to assign a task to a resource
+ *   group.
+ * + Context switch code can avoid extra memory references deciding which
+ *   CLOSID to load into the PQR_ASSOC MSR
+ * - We give up some options in configuring resource groups across multi-socket
+ *   systems.
+ * - Our choices on how to configure each resource become progressively more
+ *   limited as the number of resources grows.
+ */
+static int closid_free_map;
+
+static void closid_init(void)
+{
+   struct rdt_resource *r;
+   int rdt_min_closid = 32;
+
+   /* Compute rdt_min_closid across all resources */
+   for_each_enabled_rdt_resource(r)
+   rdt_min_closid = min(rdt_min_closid, r->num_closid);
+
+   closid_free_map = BIT_MASK(rdt_min_closid) - 1;
+
+   /* CLOSID 0 is always reserved for the default group */
+   closid_free_map &= ~1;
+}
+
+int closid_alloc(void)
+{
+   int closid = ffs(closid_free_map);
+
+   if (closid == 0)
+   return -ENOSPC;
+   closid--;
+   closid_free_map &= ~(1 << closid);
+
+   return closid;
+}
+
+static void closid_free(int closid)
+{
+   closid_free_map |= 1 << closid;
+}
+
 /* set uid and gid of rdtgroup dirs and files to that of the creator */
 static int rdtgroup_kn_set_ugid(struct kernfs_node *kn)
 {
@@ -271,6 +322,54 @@ static int parse_rdtgroupfs_options(char *data)
return ret;
 }
 
+/*
+ * We don't allow rdtgroup directories to be created anywhere
+ * except the root directory. Thus when looking for the rdtgroup
+ * structure for a kernfs node we are either looking at a directory,
+ * in which case the rdtgroup structure is pointed at by the "priv"
+ * field, otherwise we have a file, and need only look to the parent
+ * to find the rdtgroup.
+ */
+static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn)
+{
+   if (kernfs_type(kn) == KERNFS_DIR)
+   return kn->priv;
+   else
+   return kn->parent->priv;
+}
+
+struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
+{
+   struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn

Re: [RFC][PATCH] arm64: Add support for CONFIG_DEBUG_VIRTUAL

2016-10-28 Thread Laura Abbott


diff --git a/arch/arm64/mm/physaddr.c b/arch/arm64/mm/physaddr.c
new file mode 100644
index 000..6c271e2
--- /dev/null
+++ b/arch/arm64/mm/physaddr.c
@@ -0,0 +1,17 @@
+#include 
+
+#include 
+
+unsigned long __virt_to_phys(unsigned long x)
+{
+phys_addr_t __x = (phys_addr_t)x;
+
+if (__x & BIT(VA_BITS - 1)) {
+/* The bit check ensures this is the right range */
+return (__x & ~PAGE_OFFSET) + PHYS_OFFSET;
+} else {
+VIRTUAL_BUG_ON(x < kimage_vaddr || x > (unsigned long)_end);


IIUC, in (3) you were asking if the last check should be '>' or '>='?

To match high_memory, I suspect the latter, as _end doesn't fall within
the mapped virtual address space.



I was actually concerned about if _end would be correct with KASLR.
Ard confirmed that it gets fixed up to be correct. I'll change the
check to check for >=.



While testing this, I found two places with __pa(_end) to get bounds,
one in arm64 code and one in memblock code. x86 gets away with this
because memblock is actually __pa_symbol and x86 does image placement
different and can check against the maximum image size. I think
including _end in __pa_symbol but excluding it from the generic
__virt_to_phys makes sense. It's a bit nicer than doing _end - 1 +
1 everywhere.

Thanks,
Laura

[PATCH v6 01/10] Documentation, x86: Documentation for Intel resource allocation user interface

From: Fenghua Yu 

The documentation describes user interface of how to allocate resource
in Intel RDT.

Please note that the documentation covers generic user interface. Current
patch set code only implemente CAT L3. CAT L2 code will be sent later.

Signed-off-by: Fenghua Yu 
---
 Documentation/x86/intel_rdt_ui.txt | 167 +
 1 file changed, 167 insertions(+)
 create mode 100644 Documentation/x86/intel_rdt_ui.txt

diff --git a/Documentation/x86/intel_rdt_ui.txt 
b/Documentation/x86/intel_rdt_ui.txt
new file mode 100644
index 000..c4fd112
--- /dev/null
+++ b/Documentation/x86/intel_rdt_ui.txt
@@ -0,0 +1,167 @@
+User Interface for Resource Allocation in Intel Resource Director Technology
+
+Copyright (C) 2016 Intel Corporation
+
+Fenghua Yu 
+Tony Luck 
+
+This feature is enabled by the CONFIG_INTEL_RDT_A Kconfig and the
+X86 /proc/cpuinfo flag bits "rdt", "cat_l3" and "cdp_l3".
+
+To use the feature mount the file system:
+
+ # mount -t resctrl resctrl [-o cdp] /sys/fs/resctrl
+
+mount options are:
+
+"cdp": Enable code/data prioritization in L3 cache allocations.
+
+
+Resource groups
+---
+Resource groups are represented as directories in the resctrl file
+system. The default group is the root directory. Other groups may be
+created as desired by the system administrator using the "mkdir(1)"
+command, and removed using "rmdir(1)".
+
+There are three files associated with each group:
+
+"tasks": A list of tasks that belongs to this group. Tasks can be
+   added to a group by writing the task ID to the "tasks" file
+   (which will automatically remove them from the previous
+   group to which they belonged). New tasks created by fork(2)
+   and clone(2) are added to the same group as their parent.
+   If a pid is not in any sub partition, it is in root partition
+   (i.e. default partition).
+
+"cpus": A bitmask of logical CPUs assigned to this group. Writing
+   a new mask can add/remove CPUs from this group. Added CPUs
+   are removed from their previous group. Removed ones are
+   given to the default (root) group. You cannot remove CPUs
+   from the default group.
+
+"schemata": A list of all the resources available to this group.
+   Each resource has its own line and format - see below for
+   details.
+
+When a task is running the following rules define which resources
+are available to it:
+
+1) If the task is a member of a non-default group, then the schemata
+for that group is used.
+
+2) Else if the task belongs to the default group, but is running on a
+CPU that is assigned to some specific group, then the schemata for
+the CPU's group is used.
+
+3) Otherwise the schemata for the default group is used.
+
+
+Schemata files - general concepts
+-
+Each line in the file describes one resource. The line starts with
+the name of the resource, followed by specific values to be applied
+in each of the instances of that resource on the system.
+
+Cache IDs
+-
+On current generation systems there is one L3 cache per socket and L2
+caches are generally just shared by the hyperthreads on a core, but this
+isn't an architectural requirement. We could have multiple separate L3
+caches on a socket, multiple cores could share an L2 cache. So instead
+of using "socket" or "core" to define the set of logical cpus sharing
+a resource we use a "Cache ID". At a given cache level this will be a
+unique number across the whole system (but it isn't guaranteed to be a
+contiguous sequence, there may be gaps).  To find the ID for each logical
+CPU look in /sys/devices/system/cpu/cpu*/cache/index*/id
+
+Cache Bit Masks (CBM)
+-
+For cache resources we describe the portion of the cache that is available
+for allocation using a bitmask. The maximum value of the mask is defined
+by each cpu model (and may be different for different cache levels). It
+is found using CPUID, but is also provided in the "info" directory of
+the resctrl file system in "info/{resource}/cbm_mask". X86 hardware
+requires that these masks have all the '1' bits in a contiguous block. So
+0x3, 0x6 and 0xC are legal 4-bit masks with two bits set, but 0x5, 0x9
+and 0xA are not.  On a system with a 20-bit mask each bit represents 5%
+of the capacity of the cache. You could partition the cache into four
+equal parts with masks: 0x1f, 0x3e0, 0x7c00, 0xf8000.
+
+
+L3 details (code and data prioritization disabled)
+--
+With CDP disabled the L3 schemata format is:
+
+   L3:=;=;...
+
+L3 details (CDP enabled via mount option to resctrl)
+
+When CDP is enabled L3 control is split into two separate resources
+so you can specify independent masks for code and data like this:
+
+   L3data:=;=;...
+   L3code:=;=;...
+
+L2 details
+--
+L2 cache does not support code and data prioritization, s

[PATCH v6 00/10] Intel Cache Allocation Technology

From: Fenghua Yu 

This version is supposed to cover all comments from Thomas.

Patch 1-4, 6-9 of 18 v5 patches are in tip x86/cache branch. We
have rest of 10 patches in v6 on top of tip tree.

Change log in v6:

patch 1 (i.e. patch 5 in v5): Change max_cbm_val to cbm_mask.

patch 2 (i.e. patch 10 in v5): Change comments for rdt_find_domain and
domain_add_cpu() to explain why domain list is sorted by id. Change
the pos argument to optional in rdt_rind_domain and assign NULL to it
in caller domain_remove_cpu.

patch 3 (i.e. patch 11 in v5): Remove arg's type cast in
l3_qos_cfg_update(). Change the local variable declaration lines in
more readable way. Disable CDP in umount only when it's enabled. Fix
an error handling issue in rdt_mount.

patch 4 (i.e. patch 12 in v5): Change names from "num_closid" to
"num_closids" and from "cbm_val" to "cbm_mask" in res_info_files. Remove
the unused label "out" in rdtgroup_setup_root() and will add it later when
it's used.

patch 5 (i.e. patch 13 in v5): Init closid_free_map by minimal closid
instead of by maximum closid in all resources.

patch 6 (i.e. patch 14 in v5): Change local variable declaration lines
to more readable way.

patch 7 (i.e. patch 15 in v5): Change for_each_process to
for_each_process_thread

patch 8 (i.e. patch 16 in v5): Change local variable declaration lines
to more readable way.

patch 9 (i.e. patch 17 in v5): No change.

patch 10 (i.e. patch 18 in v5): No change.

Fenghua Yu (7):
  Documentation, x86: Documentation for Intel resource allocation user
interface
  x86/intel_rdt: Add basic resctrl filesystem support
  x86/intel_rdt: Add "info" files to resctrl file system
  x86/intel_rdt: Add mkdir to resctrl file system
  x86/intel_rdt: Add tasks files
  x86/intel_rdt: Add scheduler hook
  MAINTAINERS: Add maintainer for Intel RDT resource allocation

Tony Luck (3):
  x86/intel_rdt: Build structures for each resource based on cache
topology
  x86/intel_rdt: Add cpus file
  x86/intel_rdt: Add schemata file

 Documentation/x86/intel_rdt_ui.txt   |  167 +
 MAINTAINERS  |8 +
 arch/x86/include/asm/intel_rdt.h |  145 +
 arch/x86/kernel/cpu/Makefile |2 +-
 arch/x86/kernel/cpu/intel_rdt.c  |  213 ++-
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 1007 ++
 arch/x86/kernel/cpu/intel_rdt_schemata.c |  245 
 arch/x86/kernel/process_32.c |4 +
 arch/x86/kernel/process_64.c |4 +
 include/linux/sched.h|3 +
 include/uapi/linux/magic.h   |1 +
 11 files changed, 1797 insertions(+), 2 deletions(-)
 create mode 100644 Documentation/x86/intel_rdt_ui.txt
 create mode 100644 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
 create mode 100644 arch/x86/kernel/cpu/intel_rdt_schemata.c

-- 
2.5.0

[PATCH v6 04/10] x86/intel_rdt: Add "info" files to resctrl file system

From: Fenghua Yu 

For the convenience of applications we make the decoded values of some
of the CPUID values available in read-only (0444) files.

Signed-off-by: Fenghua Yu 
---
 arch/x86/include/asm/intel_rdt.h |  24 
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 185 +++
 2 files changed, 209 insertions(+)

diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 8e1d002..39ed561 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -25,6 +25,30 @@ extern struct list_head rdt_all_groups;
 int __init rdtgroup_init(void);
 
 /**
+ * struct rftype - describe each file in the resctrl file system
+ * @name: file name
+ * @mode: access mode
+ * @kf_ops: operations
+ * @seq_show: show content of the file
+ * @write: write to the file
+ */
+struct rftype {
+   char*name;
+   umode_t mode;
+   struct kernfs_ops   *kf_ops;
+
+   int (*seq_show)(struct kernfs_open_file *of,
+   struct seq_file *sf, void *v);
+   /*
+* write() is the generic write callback which maps directly to
+* kernfs write operation and overrides all other operations.
+* Maximum write size is determined by ->max_write_len.
+*/
+   ssize_t (*write)(struct kernfs_open_file *of,
+char *buf, size_t nbytes, loff_t off);
+};
+
+/**
  * struct rdt_resource - attributes of an RDT resource
  * @enabled:   Is this feature enabled on this machine
  * @capable:   Is this feature available on this machine
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c 
b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 74d7f72f..58aa8f7 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -23,6 +23,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 
 #include 
@@ -34,6 +36,176 @@ struct kernfs_root *rdt_root;
 struct rdtgroup rdtgroup_default;
 LIST_HEAD(rdt_all_groups);
 
+/* Kernel fs node for "info" directory under root */
+static struct kernfs_node *kn_info;
+
+/* set uid and gid of rdtgroup dirs and files to that of the creator */
+static int rdtgroup_kn_set_ugid(struct kernfs_node *kn)
+{
+   struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
+   .ia_uid = current_fsuid(),
+   .ia_gid = current_fsgid(), };
+
+   if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
+   gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
+   return 0;
+
+   return kernfs_setattr(kn, &iattr);
+}
+
+static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft)
+{
+   struct kernfs_node *kn;
+   int ret;
+
+   kn = __kernfs_create_file(parent_kn, rft->name, rft->mode,
+ 0, rft->kf_ops, rft, NULL, NULL);
+   if (IS_ERR(kn))
+   return PTR_ERR(kn);
+
+   ret = rdtgroup_kn_set_ugid(kn);
+   if (ret) {
+   kernfs_remove(kn);
+   return ret;
+   }
+
+   return 0;
+}
+
+static int rdtgroup_add_files(struct kernfs_node *kn, struct rftype *rfts,
+ int len)
+{
+   struct rftype *rft;
+   int ret;
+
+   lockdep_assert_held(&rdtgroup_mutex);
+
+   for (rft = rfts; rft < rfts + len; rft++) {
+   ret = rdtgroup_add_file(kn, rft);
+   if (ret)
+   goto error;
+   }
+
+   return 0;
+error:
+   pr_warn("Failed to add %s, err=%d\n", rft->name, ret);
+   while (--rft >= rfts)
+   kernfs_remove_by_name(kn, rft->name);
+   return ret;
+}
+
+static int rdtgroup_seqfile_show(struct seq_file *m, void *arg)
+{
+   struct kernfs_open_file *of = m->private;
+   struct rftype *rft = of->kn->priv;
+
+   if (rft->seq_show)
+   return rft->seq_show(of, m, arg);
+   return 0;
+}
+
+static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf,
+  size_t nbytes, loff_t off)
+{
+   struct rftype *rft = of->kn->priv;
+
+   if (rft->write)
+   return rft->write(of, buf, nbytes, off);
+
+   return -EINVAL;
+}
+
+static struct kernfs_ops rdtgroup_kf_single_ops = {
+   .atomic_write_len   = PAGE_SIZE,
+   .write  = rdtgroup_file_write,
+   .seq_show   = rdtgroup_seqfile_show,
+};
+
+static int rdt_num_closids_show(struct kernfs_open_file *of,
+   struct seq_file *seq, void *v)
+{
+   struct rdt_resource *r = of->kn->parent->priv;
+
+   seq_printf(seq, "%d\n", r->num_closid);
+
+   return 0;
+}
+
+static int rdt_cbm_mask_show(struct kernfs_open_file *of,
+struct seq_file *seq, void *v)
+{
+   struct rdt_resource *r = of->kn->parent->priv;
+
+   seq_printf(seq, "%x\n",

[PATCH v6 10/10] MAINTAINERS: Add maintainer for Intel RDT resource allocation

From: Fenghua Yu 

We create five new files for Intel RDT resource allocation:
arch/x86/kernel/cpu/intel_rdt.c
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
arch/x86/kernel/cpu/intel_rdt_schemata.c
arch/x86/include/asm/intel_rdt.h
Documentation/x86/intel_rdt_ui.txt

Fenghua Yu will maintain this code.

Signed-off-by: Fenghua Yu 
---
 MAINTAINERS | 8 
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index c447953..4e6a044 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10107,6 +10107,14 @@ L: linux-r...@vger.kernel.org
 S: Supported
 F: drivers/infiniband/sw/rdmavt
 
+RDT - RESOURCE ALLOCATION
+M: Fenghua Yu 
+L: linux-kernel@vger.kernel.org
+S: Supported
+F: arch/x86/kernel/cpu/intel_rdt*
+F: arch/x86/include/asm/intel_rdt*
+F: Documentation/x86/intel_rdt*
+
 READ-COPY UPDATE (RCU)
 M: "Paul E. McKenney" 
 M: Josh Triplett 
-- 
2.5.0

Re: [PATCH 1/5] KVM: x86: avoid atomic operations on APICv vmentry

2016-10-28 Thread Michael S. Tsirkin

On Fri, Oct 28, 2016 at 11:39:44AM +0200, Paolo Bonzini wrote:
> 
> 
> On 27/10/2016 19:06, Radim Krčmář wrote:
> > 2016-10-27 19:51+0300, Michael S. Tsirkin:
> >> On Thu, Oct 27, 2016 at 06:44:00PM +0200, Radim Krčmář wrote:
> >>> 2016-10-27 00:42+0300, Michael S. Tsirkin:
>  On Wed, Oct 26, 2016 at 09:53:45PM +0200, Radim Krčmář wrote:
> > 2016-10-14 20:21+0200, Paolo Bonzini:
> >> On some benchmarks (e.g. netperf with ioeventfd disabled), APICv
> >> posted interrupts turn out to be slower than interrupt injection via
> >> KVM_REQ_EVENT.
> >>
> >> This patch optimizes a bit the IRR update, avoiding expensive atomic
> >> operations in the common case where PI.ON=0 at vmentry or the PIR 
> >> vector
> >> is mostly zero.  This saves at least 20 cycles (1%) per vmexit, as
> >> measured by kvm-unit-tests' inl_from_qemu test (20 runs):
> >>
> >>   | enable_apicv=1  |  enable_apicv=0
> >>   | mean stdev  |  mean stdev
> >> --|-|--
> >> before| 5826 32.65  |  5765 47.09
> >> after | 5809 43.42  |  5777 77.02
> >>
> >> Of course, any change in the right column is just placebo effect. :)
> >> The savings are bigger if interrupts are frequent.
> >>
> >> Signed-off-by: Paolo Bonzini 
> >> ---
> >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> >> @@ -521,6 +521,12 @@ static inline void pi_set_sn(struct pi_desc 
> >> *pi_desc)
> >>(unsigned long *)&pi_desc->control);
> >>  }
> >>  
> >> +static inline void pi_clear_on(struct pi_desc *pi_desc)
> >> +{
> >> +  clear_bit(POSTED_INTR_ON,
> >> +(unsigned long *)&pi_desc->control);
> >> +}
> >
> > We should add an explicit smp_mb__after_atomic() for extra correctness,
> > because clear_bit() does not guarantee a memory barrier and we must make
> > sure that pir reads can't be reordered before it.
> > x86 clear_bit() currently uses locked instruction, though.
> 
>  smp_mb__after_atomic is empty on x86 so it's
>  a documentation thing, not a correctness thing anyway.
> >>>
> >>> All atomics currently contain a barrier, but the code is also
> >>> future-proofing, not just documentation: implementation of clear_bit()
> >>> could drop the barrier and smp_mb__after_atomic() would then become a
> >>> real barrier.
> >>>
> >>> Adding dma_mb__after_atomic() would be even better as this bug could
> >>> happen even on a uniprocessor with an assigned device, but people who
> >>> buy a SMP chip to run a UP kernel deserve it.
> >>
> >> Not doing dma so does not seem to make sense ...
> > 
> > IOMMU does -- it writes to the PIR and sets ON asynchronously.
> 
> I can use either __smp_mb__after_atomic or virt_mb__after_atomic.  The
> difference is documentation only, since all of them are
> compiler-barriers only on x86.

A comment is also an option.

> Preferences?
> 
> Thanks,
> 
> Paolo

virt_ is for a VM guest. Pls don't use for host side code.
I thought it's clear enough but maybe I should add
more documentation.

> >> Why do you need a barrier on a UP kernel?
> > 
> > If pi_clear_on() doesn't contain a memory barrier (possible future),
> > then we have the following race: (pir[0] begins as 0.)
> > 
> > KVM   |  IOMMU
> >---+-
> >pir_val = ACCESS_ONCE(pir[0])  |
> >   | pir[0] = 123
> >   | pi_set_on()
> >pi_clear_on()  |
> >if (pir_val)   |
> > 
> > ACCESS_ONCE() does not prevent the CPU to prefetch pir[0] (ACCESS_ONCE
> > does nothing in this patch), so if there was 0 in pir[0] before IOMMU
> > wrote to it, then our optimization to avoid the xchg would yield a false
> > negative and the interrupt would be lost.
> >

Re: [PATCH] lpfc: use %zd format string for size_t

On Friday, October 28, 2016 2:58:33 PM CEST Vineet Gupta wrote:
> On 10/28/2016 02:52 PM, Vineet Gupta wrote:
> > On 10/28/2016 02:44 PM, Vineet Gupta wrote:
> >> This is configuration specific, and something caused your compiler to
> >>> be built assuming that size_t is unsigned long, while the kernel
> >>> headers are assuming it should be unsigned int.
> > 
> > So yes this seems to be target specific gcc thing
> > 
> > for ARC 4.8
> > 
> > #define PTRDIFF_TYPE "int"
> > 
> > ARM
> > 
> > #ifndef PTRDIFF_TYPE
> > #define PTRDIFF_TYPE (TARGET_AAPCS_BASED ? "int" : "long int")
> > #endif
> > 
> > ARC gcc 6.2
> > 
> > #undef PTRDIFF_TYPE
> > #define PTRDIFF_TYPE "long int"
> 
> Actually we need to adjust SIZE_TYPE (unsigned int) and PTRDIFF_TYPE (int) in 
> the
> gcc 6.x to fix this issue. And that is exactly what ARC gcc 4.8 have.

What compiler versions are most commonly used these days?

You should probably stay with the version that most people have
and then update either the compiler or the kernel, whichever
diverges from it.

I see in the gcc git log that the version that had "int" got removed
at some point, and the version that had "unsigned int" was added
later.

Arnd

Re: [PATCH v2 1/5] kconfig: introduce the "imply" keyword

2016-10-28 Thread Nicolas Pitre

On Fri, 28 Oct 2016, Paul Bolle wrote:

> On Thu, 2016-10-27 at 23:10 -0400, Nicolas Pitre wrote:
> > On Fri, 28 Oct 2016, Paul Bolle wrote:
> > > What happens when a tristate symbol is implied by a symbol set to 'y'
> > > and by a symbol set to 'm'?
> > 
> > That's respectively the third and second rows in the table above.
> 
> I meant: two separate symbols implying the same symbol at the same
> time. One of those symbols set to 'y' and the other set to 'm'.

Then it's the greatest of the set i.e. y.


Nicolas

[GIT PULL] Please pull powerpc/linux.git powerpc-4.9-4 tag

2016-10-28 Thread Michael Ellerman

Hi Linus,

Please pull some more powerpc fixes for 4.9:

The following changes since commit 78914ff0843623ee6dbeae92fa0bb8761828684e:

  powerpc: Ignore the pkey system calls for now (2016-10-19 20:36:24 +1100)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
tags/powerpc-4.9-4

for you to fetch changes up to fb479e44a9e240a23c2d208c2ace23542a47f41c:

  powerpc/64s: relocation, register save fixes for system reset interrupt 
(2016-10-27 21:55:14 +1100)


powerpc fixes for 4.9 #4

Fixes marked for stable:
 - Convert cmp to cmpd in idle enter sequence (Segher Boessenkool)
 - cxl: Fix leaking pid refs in some error paths (Vaibhav Jain)
 - Re-fix race condition between going idle and entering guest (Paul Mackerras)
 - Fix race condition in setting lock bit in idle/wakeup code (Paul Mackerras)
 - radix: Use tlbiel only if we ever ran on the current cpu (Aneesh Kumar K.V)
 - relocation, register save fixes for system reset interrupt (Nicholas Piggin)

Fixes for code merged this cycle:
 - Fix CONFIG_ALIVEC typo in restore_tm_state() (Valentin Rothberg)
 - KVM: PPC: Book3S HV: Fix build error when SMP=n (Michael Ellerman)


Aneesh Kumar K.V (1):
  powerpc/mm/radix: Use tlbiel only if we ever ran on the current cpu

Michael Ellerman (1):
  KVM: PPC: Book3S HV: Fix build error when SMP=n

Nicholas Piggin (1):
  powerpc/64s: relocation, register save fixes for system reset interrupt

Paul Mackerras (2):
  powerpc/64: Re-fix race condition between going idle and entering guest
  powerpc/64: Fix race condition in setting lock bit in idle/wakeup code

Segher Boessenkool (1):
  powerpc: Convert cmp to cmpd in idle enter sequence

Vaibhav Jain (1):
  cxl: Fix leaking pid refs in some error paths

Valentin Rothberg (1):
  powerpc/process: Fix CONFIG_ALIVEC typo in restore_tm_state()

 arch/powerpc/include/asm/cpuidle.h   |  2 +-
 arch/powerpc/include/asm/exception-64s.h | 16 ++
 arch/powerpc/include/asm/tlb.h   | 12 
 arch/powerpc/kernel/exceptions-64s.S | 50 ++--
 arch/powerpc/kernel/idle_book3s.S| 35 ++
 arch/powerpc/kernel/process.c|  2 +-
 arch/powerpc/kvm/book3s_hv_rm_xics.c |  1 +
 arch/powerpc/mm/tlb-radix.c  |  8 ++---
 drivers/misc/cxl/api.c   |  2 ++
 drivers/misc/cxl/file.c  | 22 --
 10 files changed, 108 insertions(+), 42 deletions(-)


signature.asc
Description: PGP signature

Re: [PATCH v2 3/4] input: Deprecate real timestamps beyond year 2106

On Friday, October 28, 2016 2:56:10 PM CEST Dmitry Torokhov wrote:
> On Fri, Oct 28, 2016 at 2:47 PM, Arnd Bergmann  wrote:
> > On Friday, October 28, 2016 2:39:35 PM CEST Deepa Dinamani wrote:
> >> >> >> @@ -55,24 +60,24 @@ struct ff_effect_compat {
> >> >> >>
> >> >> >>  static inline size_t input_event_size(void)
> >> >> >>  {
> >> >> >> -   return (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) ?
> >> >> >> -   sizeof(struct input_event_compat) : sizeof(struct 
> >> >> >> input_event);
> >> >> >> +   return in_compat_syscall() ? sizeof(struct 
> >> >> >> raw_input_event_compat) :
> >> >> >> +sizeof(struct raw_input_event);
> >> >> >>  }
> >> >> >
> >> >> > I think the COMPAT_USE_64BIT_TIME check has to stay here,
> >> >> > it's needed for x32 mode on x86-64.
> >> >>
> >
> > We have to distinguish four cases on x86:
> >
> > - native 32-bit, input_event with 32-bit time_t
> > - compat 32-bit, input_event_compat with 32-bit time_t
> > - native 64-bit, input_event with 64-bit time_t
> > - compat x32, input_event with 64-bit time_t
> >
> > The first three can happen on other architectures too,
> > the last one is x86 specific. There are probably other ways
> > to express the condition above, but I can't think of one
> > that is better than the one we have today.
> 
> Can we detect if given task is compat x32, like we do for compat
> 64/32? Or entire userspace has to be x32?

Yes, this works fine per task, with the definition of COMPAT_USE_64BIT_TIME
that is hardcoded to zero everywhere except on x86 where it is

#define COMPAT_USE_64BIT_TIME \
(!!(task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT))

This is unrelated to the patch in question, the existing code
is correct as long as we don't change the logic and just
replace input_event with raw_input_event (or __kernel_input_event
or whichever you prefer).

Arnd

Re: [PATCH] lpfc: use %zd format string for size_t

On 10/28/2016 02:52 PM, Vineet Gupta wrote:
> On 10/28/2016 02:44 PM, Vineet Gupta wrote:
>> This is configuration specific, and something caused your compiler to
>>> be built assuming that size_t is unsigned long, while the kernel
>>> headers are assuming it should be unsigned int.
> 
> So yes this seems to be target specific gcc thing
> 
> for ARC 4.8
> 
> #define PTRDIFF_TYPE "int"
> 
> ARM
> 
> #ifndef PTRDIFF_TYPE
> #define PTRDIFF_TYPE (TARGET_AAPCS_BASED ? "int" : "long int")
> #endif
> 
> ARC gcc 6.2
> 
> #undef PTRDIFF_TYPE
> #define PTRDIFF_TYPE "long int"

Actually we need to adjust SIZE_TYPE (unsigned int) and PTRDIFF_TYPE (int) in 
the
gcc 6.x to fix this issue. And that is exactly what ARC gcc 4.8 have.

Re: [PATCH v2 3/4] input: Deprecate real timestamps beyond year 2106

On Fri, Oct 28, 2016 at 2:47 PM, Arnd Bergmann  wrote:
> On Friday, October 28, 2016 2:39:35 PM CEST Deepa Dinamani wrote:
>> >> >> @@ -55,24 +60,24 @@ struct ff_effect_compat {
>> >> >>
>> >> >>  static inline size_t input_event_size(void)
>> >> >>  {
>> >> >> -   return (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) ?
>> >> >> -   sizeof(struct input_event_compat) : sizeof(struct 
>> >> >> input_event);
>> >> >> +   return in_compat_syscall() ? sizeof(struct 
>> >> >> raw_input_event_compat) :
>> >> >> +sizeof(struct raw_input_event);
>> >> >>  }
>> >> >
>> >> > I think the COMPAT_USE_64BIT_TIME check has to stay here,
>> >> > it's needed for x32 mode on x86-64.
>> >>
>> >> There is no time_t anymore in the raw_input_event structure.
>> >> The struct uses __kernel_ulong_t type.
>> >> This should take care of x32 support.
>> >
>> > I don't think it does.
>> >
>> >> From this cover letter:
>> >> https://www.spinics.net/lists/linux-arch/msg16356.html
>> >>
>> >> I see that that the __kernel types were introduced to address the ABI
>> >> issues for x32.
>> >
>> > This is a variation of the problem we are trying to solve for
>> > the other architectures in your patch set:
>> >
>> > On x32, the kernel uses produces a structure with the 64-bit
>> > layout, using __u64 tv_sec, to match the current user space
>> > that has 64-bit __kernel_ulong_t and 64-bit time_t, but
>> > in_compat_syscall() also returns 'true' here, as this is
>> > mostly a 32-bit ABI (time_t being one of the exceptions).
>>
>> Yes, I missed this.
>>
>> in_compat_syscall() is true for x32, this would mean we end up here
>> even if it is a x32 syscall.
>> But, wouldn't it be better to use in_x32_syscall() here since there is
>> no timeval any more?
>
> We have to distinguish four cases on x86:
>
> - native 32-bit, input_event with 32-bit time_t
> - compat 32-bit, input_event_compat with 32-bit time_t
> - native 64-bit, input_event with 64-bit time_t
> - compat x32, input_event with 64-bit time_t
>
> The first three can happen on other architectures too,
> the last one is x86 specific. There are probably other ways
> to express the condition above, but I can't think of one
> that is better than the one we have today.

Can we detect if given task is compat x32, like we do for compat
64/32? Or entire userspace has to be x32?

Thanks.

-- 
Dmitry

Re: [RFC] v4l2 support for thermopile devices

2016-10-28 Thread Hans Verkuil

Hi Matt,

On 28/10/16 22:14, Matt Ranostay wrote:

So want to toss a few thoughts on adding support for thermopile
devices (could be used for FLIR Lepton as well) that output pixel
data.
These typically aren't DMA'able devices since they are low speed
(partly to limiting the functionality to be in compliance with ITAR)
and data is piped over i2c/spi.

My question is that there doesn't seem to be an other driver that
polls frames off of a device and pushes it to the video buffer, and
wanted to be sure that this doesn't currently exist somewhere.

Not anymore, but if you go back to kernel 3.6 then you'll find this driver:

drivers/media/video/bw-qcam.c

It was for a grayscale parallel port webcam (which explains why it was
removed in 3.7 :-) ), and it used polling to get the pixels.

Also more importantly does the mailing list thinks it belongs in v4l2?

I think it fits. It's a sensor, just with a very small resolution and
infrared

instead of visible light.

We already came up the opinion on the IIO list that it doesn't belong
in that subsystem since pushing raw pixel data to a buffer is a bit
hacky. Also could be generically written with regmap so other devices
(namely FLIR Lepton) could be easily supported.

Need some input for the video pixel data types, which the device we
are using (see datasheet links below) is outputting pixel data in
little endian 16-bit of which a 12-bits signed value is used. Does it
make sense to do some basic processing on the data since greyscale is
going to look weird with temperatures under 0C degrees? Namely a cold
object is going to be brighter than the hottest object it could read.

Or should a new V4L2_PIX_FMT_* be defined and processing done in
software?

I would recommend that. It's no big deal, as long as the new format is
documented.

Another issue is how to report the scaling value of 0.25 C
for each LSB of the pixels to the respecting recording application.

Probably through a read-only control, but I'm not sure.

Regards,

Hans

Datasheet:
http://media.digikey.com/pdf/Data%20Sheets/Panasonic%20Sensors%20PDFs/Grid-EYE_AMG88.pdf
Datasheet:
https://eewiki.net/download/attachments/13599167/Grid-EYE%20SPECIFICATIONS%28Reference%29.pdf?version=1&modificationDate=1380660426690&api=v2

Thanks,

Matt
--
To unsubscribe from this list: send the line "unsubscribe linux-media" in
the body of a message to majord...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html

Re: [PATCH] lpfc: use %zd format string for size_t

On 10/28/2016 02:44 PM, Vineet Gupta wrote:
> This is configuration specific, and something caused your compiler to
>> be built assuming that size_t is unsigned long, while the kernel
>> headers are assuming it should be unsigned int.

So yes this seems to be target specific gcc thing

for ARC 4.8

#define PTRDIFF_TYPE "int"

ARM

#ifndef PTRDIFF_TYPE
#define PTRDIFF_TYPE (TARGET_AAPCS_BASED ? "int" : "long int")
#endif

ARC gcc 6.2

#undef PTRDIFF_TYPE
#define PTRDIFF_TYPE "long int"

[PATCH] device-dax: check devm_nsio_enable() return value

2016-10-28 Thread Dan Williams

If the dax_pmem driver is passed a resource that is already busy the
driver probe attempt should fail with a message like the following:

  dax_pmem dax0.1: could not reserve region [mem 0x1-0x11fff]

However, if we do not catch the error we crash for the obvious reason of
accessing memory that is not mapped.

 BUG: unable to handle kernel paging request at c90020001000
 IP: [] __memcpy+0x12/0x20
 [..]
 Call Trace:
  [] ? nsio_rw_bytes+0x60/0x180
  [] nd_pfn_validate+0x75/0x320
  [] nvdimm_setup_pfn+0xb9/0x5d0
  [] ? devm_nsio_enable+0xff/0x110
  [] dax_pmem_probe+0x59/0x260

Cc: 
Fixes: ab68f2622136 ("/dev/dax, pmem: direct access to persistent memory")
Reported-by: Dave Hansen 
Signed-off-by: Dan Williams 
---
 drivers/dax/pmem.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c
index 4a15fa5df98b..73c6ce93a0d9 100644
--- a/drivers/dax/pmem.c
+++ b/drivers/dax/pmem.c
@@ -78,7 +78,9 @@ static int dax_pmem_probe(struct device *dev)
nsio = to_nd_namespace_io(&ndns->dev);
 
/* parse the 'pfn' info block via ->rw_bytes */
-   devm_nsio_enable(dev, nsio);
+   rc = devm_nsio_enable(dev, nsio);
+   if (rc)
+   return rc;
altmap = nvdimm_setup_pfn(nd_pfn, &res, &__altmap);
if (IS_ERR(altmap))
return PTR_ERR(altmap);

Re: [PATCH] lpfc: use %zd format string for size_t

On Friday, October 28, 2016 2:44:13 PM CEST Vineet Gupta wrote:
> 
> Indeed if I hack include/linux/types.h
> 
> -typedef __kernel_size_tsize_t;
> +typedef unsigned long  size_t;
> 
> then the warning goes away, so gcc is indeed assuming size_t to be unsigned 
> long
> and not unsigned int. That helps a lot.

Ok, just be aware that this will introduce warnings for any
compiler that is built to expect an 'unsigned int size_t'
typedef.

Arnd

Re: [PATCH v2 3/4] input: Deprecate real timestamps beyond year 2106

On Friday, October 28, 2016 2:39:35 PM CEST Deepa Dinamani wrote:
> >> >> @@ -55,24 +60,24 @@ struct ff_effect_compat {
> >> >>
> >> >>  static inline size_t input_event_size(void)
> >> >>  {
> >> >> -   return (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) ?
> >> >> -   sizeof(struct input_event_compat) : sizeof(struct 
> >> >> input_event);
> >> >> +   return in_compat_syscall() ? sizeof(struct 
> >> >> raw_input_event_compat) :
> >> >> +sizeof(struct raw_input_event);
> >> >>  }
> >> >
> >> > I think the COMPAT_USE_64BIT_TIME check has to stay here,
> >> > it's needed for x32 mode on x86-64.
> >>
> >> There is no time_t anymore in the raw_input_event structure.
> >> The struct uses __kernel_ulong_t type.
> >> This should take care of x32 support.
> >
> > I don't think it does.
> >
> >> From this cover letter:
> >> https://www.spinics.net/lists/linux-arch/msg16356.html
> >>
> >> I see that that the __kernel types were introduced to address the ABI
> >> issues for x32.
> >
> > This is a variation of the problem we are trying to solve for
> > the other architectures in your patch set:
> >
> > On x32, the kernel uses produces a structure with the 64-bit
> > layout, using __u64 tv_sec, to match the current user space
> > that has 64-bit __kernel_ulong_t and 64-bit time_t, but
> > in_compat_syscall() also returns 'true' here, as this is
> > mostly a 32-bit ABI (time_t being one of the exceptions).
> 
> Yes, I missed this.
> 
> in_compat_syscall() is true for x32, this would mean we end up here
> even if it is a x32 syscall.
> But, wouldn't it be better to use in_x32_syscall() here since there is
> no timeval any more?

We have to distinguish four cases on x86:

- native 32-bit, input_event with 32-bit time_t
- compat 32-bit, input_event_compat with 32-bit time_t
- native 64-bit, input_event with 64-bit time_t
- compat x32, input_event with 64-bit time_t

The first three can happen on other architectures too,
the last one is x86 specific. There are probably other ways
to express the condition above, but I can't think of one
that is better than the one we have today.

Arnd

Re: [PATCH] lpfc: use %zd format string for size_t

On 10/28/2016 02:33 PM, Arnd Bergmann wrote:
> On Friday, October 28, 2016 2:03:21 PM CEST Vineet Gupta wrote:
>>
>> I'm trying to use about to be released ARC gcc 6.x with current kernels and 
>> see a
>> flood of warnings due to these legit fixes - i.e.g arc gcc 6.2 complains 
>> when it
>> sees -zx formats.
>>
>> CC  mm/percpu.o
>> ../mm/percpu.c: In function ‘pcpu_alloc’:
>> ../mm/percpu.c:890:14: warning: format ‘%zu’ expects argument of type 
>> ‘size_t’,
>> but argument 4 has type ‘unsigned int’ [-Wformat=]
>>WARN(true, "illegal size (%zu) or align (%zu) for percpu allocation\n",
>>
>> I'm not sure what is going on since the data type is size_t alright - 
>> although
>> from posix_types.h is
>>
>> typedef unsigned int __kernel_size_t;
>> typedef __kernel_size_t size_t;
>>
>> And this seems to be same for ARC as well as ARM. I tried ARM gcc 6.1 @
>> https://snapshots.linaro.org/components/toolchain/binaries/6.1-2016.08-rc1/arm-linux-gnueabihf/
>>
>> which doesn't seem to be complaining.
>>
>> With V=1, I checked the respective ARM and ARC toggles in play, but nothing
>> related to this seems to be standing out.
>>
>> I know this is more of a question to our GNU folks, but was wondering if you 
>> had
>> more insight into it - which you almost always do 
> 
> I've seen the problem you describe before, but I don't remember the
> exact details. I think what happened is that the compiler knows
> what type size_t is supposed to be, either unsigned int or unsigned
> long, regardless of what our kernel headers say it is.
> 
> This is configuration specific, and something caused your compiler to
> be built assuming that size_t is unsigned long, while the kernel
> headers are assuming it should be unsigned int.
> 
> You can try overriding __kernel_size_t in your asm/posix_types.h
> to define it as unsigned long, 


Indeed if I hack include/linux/types.h

-typedef __kernel_size_tsize_t;
+typedef unsigned long  size_t;

then the warning goes away, so gcc is indeed assuming size_t to be unsigned long
and not unsigned int. That helps a lot.

or try to build your compiler
> to match the kernel headers, but the first step would be to find
> out why the compiler changed in the first place, assuming that older
> compiler versions were matching the kernel here.
> 
>   Arnd

Re: [PATCH 3/3] trace-cmd record: add --cpu-list option

2016-10-28 Thread Steven Rostedt

On Fri, 28 Oct 2016 17:15:57 -0400
Luiz Capitulino  wrote:

> On Fri, 28 Oct 2016 15:50:10 -0400
> Steven Rostedt  wrote:
> 
> > On Fri, 28 Oct 2016 15:49:22 -0400
> > Steven Rostedt  wrote:
> > 
> > > Sorry it took so long to look at this, but I finally got around to
> > > it ;-)
> > >   
> > 
> > I did apply patches 1 and 2. I'm hoping to get them out tomorrow, or
> > while I'm at plumbers.
> 
> OK, I also posted a minor fix:
> 
>  [PATCH] trace-cmd: Documentation: ignore all man sections

Yep, I included that one already.

Thanks!

-- Steve

Re: [PATCH v2 3/4] input: Deprecate real timestamps beyond year 2106

2016-10-28 Thread Deepa Dinamani

>> >> @@ -55,24 +60,24 @@ struct ff_effect_compat {
>> >>
>> >>  static inline size_t input_event_size(void)
>> >>  {
>> >> -   return (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) ?
>> >> -   sizeof(struct input_event_compat) : sizeof(struct 
>> >> input_event);
>> >> +   return in_compat_syscall() ? sizeof(struct 
>> >> raw_input_event_compat) :
>> >> +sizeof(struct raw_input_event);
>> >>  }
>> >
>> > I think the COMPAT_USE_64BIT_TIME check has to stay here,
>> > it's needed for x32 mode on x86-64.
>>
>> There is no time_t anymore in the raw_input_event structure.
>> The struct uses __kernel_ulong_t type.
>> This should take care of x32 support.
>
> I don't think it does.
>
>> From this cover letter:
>> https://www.spinics.net/lists/linux-arch/msg16356.html
>>
>> I see that that the __kernel types were introduced to address the ABI
>> issues for x32.
>
> This is a variation of the problem we are trying to solve for
> the other architectures in your patch set:
>
> On x32, the kernel uses produces a structure with the 64-bit
> layout, using __u64 tv_sec, to match the current user space
> that has 64-bit __kernel_ulong_t and 64-bit time_t, but
> in_compat_syscall() also returns 'true' here, as this is
> mostly a 32-bit ABI (time_t being one of the exceptions).

Yes, I missed this.

in_compat_syscall() is true for x32, this would mean we end up here
even if it is a x32 syscall.
But, wouldn't it be better to use in_x32_syscall() here since there is
no timeval any more?

Thanks,
Deepa

[PATCH v2] NFSv4: replace seqcount_t with a seqlock_t

2016-10-28 Thread Sebastian Andrzej Siewior

The raw_write_seqcount_begin() in nfs4_reclaim_open_state() bugs me
because it maps to preempt_disable() in -RT which I can't have at this
point. So I took a look at the code.
It the lockdep part was removed in commit abbec2da13f0 ("NFS: Use
raw_write_seqcount_begin/end int nfs4_reclaim_open_state") because
lockdep complained. The whole seqcount thing was introduced in commit
c137afabe330 ("NFSv4: Allow the state manager to mark an open_owner as
being recovered").
I don't understand how it is possible that we don't end up with two
writers for the same ressource because the `sp->so_lock' lock is dropped
is soon in the list_for_each_entry() loop. It might be the
test_and_clear_bit() check in nfs4_do_reclaim() but it might clear one
bit on each iteration so I *think* we could have two invocations of the
same struct nfs4_state_owner in nfs4_reclaim_open_state().
So there is that.

But back to the list_for_each_entry() macro.
It seems that this lock protects the ->so_states list among other
atomic_t & flags members. So at the begin of the loop we inc ->count
ensuring that this field is not removed while we use it. So we drop the
->so_lock loc during the loop it seems. And after nfs4_reclaim_locks()
invocation we nfs4_put_open_state() and grab the ->so_lock again. So if
we were the last user of this struct and we remove it, then the
following list_next_entry() invocation is a use-after-free. Even if we
use list_for_each_entry_safe() there is no guarantee that the following
member is still valid because it might have been removed by another
writer, right?
So there is this.

However to address my initial problem I have here a patch :) So it uses
a seqlock_t which ensures that there is only one writer at a time. So it
should be basically what is happening now plus a tiny tiny tiny lock
plus lockdep coverage. I tried to this myself but I don't manage to get
into this code path at all so I might be doing something wrong.

Could you please check if this patch is working for you and whether my
list_for_each_entry() observation is correct or not?

v1…v2: write_seqlock() disables preemption and some function need it
(thread_run(), non-GFP_ATOMIC memory alloction()). We don't want
preemption enabled because a preempted writer would stall the reader
spinning. This is a duct tape mutex. Maybe the seqlock should go.

Signed-off-by: Sebastian Andrzej Siewior 
---
 fs/nfs/delegation.c |  4 ++--
 fs/nfs/nfs4_fs.h|  3 ++-
 fs/nfs/nfs4proc.c   |  4 ++--
 fs/nfs/nfs4state.c  | 23 +--
 4 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index dff600ae0d74..d726d2e09353 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -150,11 +150,11 @@ static int nfs_delegation_claim_opens(struct inode *inode,
sp = state->owner;
/* Block nfs4_proc_unlck */
mutex_lock(&sp->so_delegreturn_mutex);
-   seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
+   seq = read_seqbegin(&sp->so_reclaim_seqlock);
err = nfs4_open_delegation_recall(ctx, state, stateid, type);
if (!err)
err = nfs_delegation_claim_locks(ctx, state, stateid);
-   if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
+   if (!err && read_seqretry(&sp->so_reclaim_seqlock, seq))
err = -EAGAIN;
mutex_unlock(&sp->so_delegreturn_mutex);
put_nfs_open_context(ctx);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 9b3a82abab07..2fee1a2e8b57 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -111,7 +111,8 @@ struct nfs4_state_owner {
unsigned longso_flags;
struct list_head so_states;
struct nfs_seqid_counter so_seqid;
-   seqcount_t   so_reclaim_seqcount;
+   seqlock_tso_reclaim_seqlock;
+   struct mutex so_reclaim_seqlock_mutex;
struct mutex so_delegreturn_mutex;
 };
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7897826d7c51..9b9d53cd85f9 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2685,7 +2685,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata 
*opendata,
unsigned int seq;
int ret;
 
-   seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
+   seq = raw_seqcount_begin(&sp->so_reclaim_seqlock.seqcount);
 
ret = _nfs4_proc_open(opendata);
if (ret != 0)
@@ -2723,7 +2723,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata 
*opendata,
ctx->state = state;
if (d_inode(dentry) == state->inode) {
nfs_inode_attach_open_context(ctx);
-   if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
+   if (read_seqretry(&sp->so_reclaim_seqlock, seq))
nfs4_schedule_stateid_recovery(server, state);
}
 out:
diff --git a/fs/nfs/nfs4sta

Re: [PATCH] lpfc: use %zd format string for size_t