date:20151014

[PATCH v4 09/11] smack: namespace groundwork

2015-10-14 Thread Lukasz Pawelczyk

This commit introduces several changes to Smack to prepare it for
namespace implementation. All the changes are related to namespaces.

Overview of the changes:
- Adds required data structures for mapped labels and functions to
  operate on them.
- Implements the proc interface /proc/$PID/attr/label_map that can be
  used for remapping of labels for a specific namespace. Also for
  checking the map.
- Modifies handling of special built-in labels. Detects them on import
  and assigns the same char* pointer regardless whether it's used in a
  normal or a mapped label. This way we can always compare them by ==
  instead of strcmp().
- Adds User namespace hooks implementation

This patch introduces both internal and user-space visible APIs to
handle namespaced labels and Smack namespaces but the behaviour of Smack
should not be changed. The APIs are there, but they have no impact yet.

Signed-off-by: Lukasz Pawelczyk 
Reviewed-by: Casey Schaufler 
---
 security/smack/Kconfig|  10 ++
 security/smack/Makefile   |   1 +
 security/smack/smack.h|  45 -
 security/smack/smack_access.c |  47 -
 security/smack/smack_lsm.c| 134 +-
 security/smack/smack_ns.c | 404 ++
 6 files changed, 626 insertions(+), 15 deletions(-)
 create mode 100644 security/smack/smack_ns.c

diff --git a/security/smack/Kconfig b/security/smack/Kconfig
index 271adae..b19a7fb 100644
--- a/security/smack/Kconfig
+++ b/security/smack/Kconfig
@@ -40,3 +40,13 @@ config SECURITY_SMACK_NETFILTER
  This enables security marking of network packets using
  Smack labels.
  If you are unsure how to answer this question, answer N.
+
+config SECURITY_SMACK_NS
+   bool "Smack namespace"
+   depends on SECURITY_SMACK
+   depends on USER_NS
+   help
+ This enables Smack namespace that makes it possible to map
+ specific labels within user namespace (analogously to mapping
+ UIDs) and to gain MAC capabilities over them.
+ If you are unsure how to answer this question, answer N.
diff --git a/security/smack/Makefile b/security/smack/Makefile
index ee2ebd5..5faebd7 100644
--- a/security/smack/Makefile
+++ b/security/smack/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_SECURITY_SMACK) := smack.o
 
 smack-y := smack_lsm.o smack_access.o smackfs.o
 smack-$(CONFIG_SECURITY_SMACK_NETFILTER) += smack_netfilter.o
+smack-$(CONFIG_SECURITY_SMACK_NS) += smack_ns.o
diff --git a/security/smack/smack.h b/security/smack/smack.h
index 98bb676..4b7489f 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -24,6 +24,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * Use IPv6 port labeling if IPv6 is enabled and secmarks
@@ -74,8 +75,36 @@ struct smack_known {
struct netlbl_lsm_secattr   smk_netlabel;   /* on wire labels */
struct list_headsmk_rules;  /* access rules */
struct mutexsmk_rules_lock; /* lock for rules */
+#ifdef CONFIG_SECURITY_SMACK_NS
+   struct list_headsmk_mapped; /* namespaced labels */
+   struct mutexsmk_mapped_lock;
+#endif /* CONFIG_SECURITY_SMACK_NS */
 };
 
+#ifdef CONFIG_SECURITY_SMACK_NS
+
+/*
+ * User namespace security pointer content.
+ */
+struct smack_ns {
+   struct list_headsmk_mapped; /* namespaced labels */
+   struct mutexsmk_mapped_lock;
+};
+
+/*
+ * A single entry for a namespaced/mapped label.
+ */
+struct smack_known_ns {
+   struct list_headsmk_list_known;
+   struct list_headsmk_list_ns;
+   struct user_namespace   *smk_ns;
+   char*smk_mapped;
+   struct smack_known  *smk_unmapped;
+   boolsmk_allocated;
+};
+
+#endif /* CONFIG_SECURITY_SMACK_NS */
+
 /*
  * Maximum number of bytes for the levels in a CIPSO IP option.
  * Why 23? CIPSO is constrained to 30, so a 32 byte buffer is
@@ -295,7 +324,7 @@ int smk_tskacc(struct task_struct *, struct smack_known *,
   u32, struct smk_audit_info *);
 int smk_curacc(struct smack_known *, u32, struct smk_audit_info *);
 struct smack_known *smack_from_secid(const u32);
-char *smk_parse_smack(const char *string, int len);
+char *smk_parse_smack(const char *string, int len, bool *allocated);
 int smk_netlbl_mls(int, char *, struct netlbl_lsm_secattr *, int);
 struct smack_known *smk_import_entry(const char *, int);
 void smk_insert_entry(struct smack_known *skp);
@@ -310,6 +339,20 @@ char *smk_find_label_name(struct smack_known *skp);
 struct smack_known *smk_get_label(const char *string, int len, bool import);
 
 /*
+ * These functions are in smack_ns.c
+ */
+#ifdef CONFIG_SECURITY_SMACK_NS
+struct user_namespace *smk_find_mapped_ns(struct user_namespace *ns);
+struct smack_known_ns *smk_find_mapped(struct smack_known *skp,
+  struct

[PATCH 25/31] bpf tools: Extract and collect map names from BPF object file

2015-10-14 Thread Wang Nan

This patch collect name of map in BPF object files and saves them into
'maps' field in 'struct bpf_object'. 'bpf_object__get_map_by_name' is
introduced to retrive map fd and definitions through its name.

Signed-off-by: Wang Nan 
Signed-off-by: He Kuang 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexei Starovoitov 
Cc: Brendan Gregg 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: He Kuang 
Cc: Jiri Olsa 
Cc: Kaixu Xia 
Cc: Masami Hiramatsu 
Cc: Namhyung Kim 
Cc: Paul Mackerras 
Cc: Peter Zijlstra 
Cc: Zefan Li 
Cc: pi3or...@163.com
Link: http://lkml.kernel.org/n/ebpf-ccbekxapr4xy91a3z57xc...@git.kernel.org
---
 tools/lib/bpf/libbpf.c | 63 --
 tools/lib/bpf/libbpf.h |  3 +++
 2 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 8ae501b..c4283c7 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -111,6 +111,7 @@ struct bpf_program {
 
 struct bpf_map {
int fd;
+   char *name;
struct bpf_map_def def;
void *priv;
bpf_map_clear_priv_t clear_priv;
@@ -471,12 +472,46 @@ bpf_object__init_maps(struct bpf_object *obj, void *data,
return 0;
 }
 
+static void
+bpf_object__init_maps_name(struct bpf_object *obj, int maps_shndx)
+{
+   int i;
+   Elf_Data *symbols = obj->efile.symbols;
+
+   if (!symbols || maps_shndx < 0)
+   return;
+
+   for (i = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
+   GElf_Sym sym;
+   size_t map_idx;
+   const char *map_name;
+
+   if (!gelf_getsym(symbols, i, ))
+   continue;
+   if (sym.st_shndx != maps_shndx)
+   continue;
+
+   map_name = elf_strptr(obj->efile.elf,
+ obj->efile.ehdr.e_shstrndx,
+ sym.st_name);
+   map_idx = sym.st_value / sizeof(struct bpf_map_def);
+   if (map_idx >= obj->nr_maps) {
+   pr_warning("index of map \"%s\" is buggy: %zu > %zu\n",
+  map_name, map_idx, obj->nr_maps);
+   continue;
+   }
+   obj->maps[map_idx].name = strdup(map_name);
+   pr_debug("map %zu is \"%s\"\n", map_idx,
+obj->maps[map_idx].name);
+   }
+}
+
 static int bpf_object__elf_collect(struct bpf_object *obj)
 {
Elf *elf = obj->efile.elf;
GElf_Ehdr *ep = >efile.ehdr;
Elf_Scn *scn = NULL;
-   int idx = 0, err = 0;
+   int idx = 0, err = 0, maps_shndx = -1;
 
/* Elf is corrupted/truncated, avoid calling elf_strptr. */
if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) {
@@ -526,9 +561,11 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
err = bpf_object__init_kversion(obj,
data->d_buf,
data->d_size);
-   else if (strcmp(name, "maps") == 0)
+   else if (strcmp(name, "maps") == 0) {
err = bpf_object__init_maps(obj, data->d_buf,
data->d_size);
+   maps_shndx = idx;
+   }
else if (sh.sh_type == SHT_SYMTAB) {
if (obj->efile.symbols) {
pr_warning("bpf: multiple SYMTAB in %s\n",
@@ -569,6 +606,9 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
if (err)
goto out;
}
+
+   if (maps_shndx >= 0)
+   bpf_object__init_maps_name(obj, maps_shndx);
 out:
return err;
 }
@@ -1194,6 +1234,13 @@ int bpf_map__get_def(struct bpf_map *map, struct 
bpf_map_def *pdef)
return 0;
 }
 
+const char *bpf_map__get_name(struct bpf_map *map)
+{
+   if (!map)
+   return NULL;
+   return map->name;
+}
+
 int bpf_map__set_private(struct bpf_map *map, void *priv,
 bpf_map_clear_priv_t clear_priv)
 {
@@ -1245,3 +1292,15 @@ bpf_map__next(struct bpf_map *prev, struct bpf_object 
*obj)
return NULL;
return >maps[idx];
 }
+
+struct bpf_map *
+bpf_object__get_map_by_name(struct bpf_object *obj, const char *name)
+{
+   struct bpf_map *pos;
+
+   bpf_map__for_each(pos, obj) {
+   if (strcmp(pos->name, name) == 0)
+   return pos;
+   }
+   return NULL;
+}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index a3bf71e..37b8f27 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -107,6 +107,8 @@ struct bpf_map_def {
  * it is not a uapi header so no need to consider name confliction.
  */
 struct bpf_map;
+struct bpf_map *
+bpf_object__get_map_by_name(struct bpf_object *obj, const

[PATCH 03/31] perf tools: Enable passing bpf object file to --event

2015-10-14 Thread Wang Nan

By introducing new rules in tools/perf/util/parse-events.[ly], this
patch enables 'perf record --event bpf_file.o' to select events by an
eBPF object file. It calls parse_events_load_bpf() to load that file,
which uses bpf__prepare_load() and finally calls bpf_object__open() for
the object files.

After applying this patch, commands like:

 # perf record --event foo.o sleep

become possible.

However, at this point it is unable to link any useful things onto the
evsel list because the creating of probe points and BPF program
attaching have not been implemented.  Before real events are possible to
be extracted, to avoid perf report error because of empty evsel list,
this patch link a dummy evsel. The dummy event related code will be
removed when probing and extracting code is ready.

Signed-off-by: Wang Nan 
Cc: Alexei Starovoitov 
Cc: Arnaldo Carvalho de Melo 
Cc: Brendan Gregg 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: He Kuang 
Cc: Jiri Olsa 
Cc: Kaixu Xia 
Cc: Masami Hiramatsu 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Cc: Zefan Li 
Cc: pi3or...@163.com
Link: http://lkml.kernel.org/n/ebpf-6yw9eg0ej3l4jnqhinngk...@git.kernel.org
---
 tools/perf/perf.c  |  2 ++
 tools/perf/util/Build  |  1 +
 tools/perf/util/parse-events.c | 57 ++
 tools/perf/util/parse-events.h |  8 ++
 tools/perf/util/parse-events.l |  3 +++
 tools/perf/util/parse-events.y | 18 -
 6 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 5437134..3d4c7c0 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -15,6 +15,7 @@
 #include "util/run-command.h"
 #include "util/parse-events.h"
 #include "util/parse-options.h"
+#include "util/bpf-loader.h"
 #include "util/debug.h"
 #include 
 #include 
@@ -385,6 +386,7 @@ static int run_builtin(struct cmd_struct *p, int argc, 
const char **argv)
status = p->fn(argc, argv, prefix);
exit_browser(status);
perf_env__exit(_env);
+   bpf__clear();
 
if (status)
return status & 0xff;
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 9217119..591b3fe 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -87,6 +87,7 @@ libperf-$(CONFIG_AUXTRACE) += intel-bts.o
 libperf-y += parse-branch-options.o
 libperf-y += parse-regs-options.o
 
+libperf-$(CONFIG_LIBBPF) += bpf-loader.o
 libperf-$(CONFIG_LIBELF) += symbol-elf.o
 libperf-$(CONFIG_LIBELF) += probe-file.o
 libperf-$(CONFIG_LIBELF) += probe-event.o
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 991bbd4..a02abd3 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -11,6 +11,7 @@
 #include "symbol.h"
 #include "cache.h"
 #include "header.h"
+#include "bpf-loader.h"
 #include "debug.h"
 #include 
 #include "parse-events-bison.h"
@@ -529,6 +530,62 @@ static int add_tracepoint_multi_sys(struct list_head 
*list, int *idx,
return ret;
 }
 
+int parse_events_load_bpf_obj(struct parse_events_evlist *data,
+ struct list_head *list,
+ struct bpf_object *obj)
+{
+   int err;
+   char errbuf[BUFSIZ];
+
+   if (IS_ERR(obj) || !obj) {
+   snprintf(errbuf, sizeof(errbuf),
+"Internal error: load bpf obj with NULL");
+   err = -EINVAL;
+   goto errout;
+   }
+
+   /*
+* Temporary add a dummy event here so we can check whether
+* basic bpf loader works. Following patches will replace
+* dummy event by useful evsels.
+*/
+   return parse_events_add_numeric(data, list, PERF_TYPE_SOFTWARE,
+   PERF_COUNT_SW_DUMMY, NULL);
+errout:
+   data->error->help = strdup("(add -v to see detail)");
+   data->error->str = strdup(errbuf);
+   return err;
+}
+
+int parse_events_load_bpf(struct parse_events_evlist *data,
+ struct list_head *list,
+ char *bpf_file_name)
+{
+   struct bpf_object *obj;
+
+   obj = bpf__prepare_load(bpf_file_name);
+   if (IS_ERR(obj) || !obj) {
+   char errbuf[BUFSIZ];
+   int err;
+
+   err = obj ? PTR_ERR(obj) : -EINVAL;
+
+   if (err == -ENOTSUP)
+   snprintf(errbuf, sizeof(errbuf),
+"BPF support is not compiled");
+   else
+   snprintf(errbuf, sizeof(errbuf),
+"BPF object file '%s' is invalid",
+bpf_file_name);
+
+   data->error->help = strdup("(add -v to see detail)");
+   data->error->str = strdup(errbuf);
+   return err;
+   }
+
+   return parse_events_load_bpf_obj(data, list, obj);
+}
+
 static int
 parse_breakpoint_type(const char *type, struct perf_event_attr *attr)
 {

[PATCH 18/31] perf tools: Use same BPF program if arguments are identical

2015-10-14 Thread Wang Nan

This patch allows creating only one BPF program for different
'probe_trace_event'(tev) generated by one 'perf_probe_event'(pev), if
their prologues are identical.

This is done by comparing argument list of different tev, and maps type
of prologue and tev using a mapping array. This patch utilizes qsort to
sort tevs. After sorting, tevs with identical argument list will be
grouped together.

Signed-off-by: Wang Nan 
Cc: Alexei Starovoitov 
Cc: Brendan Gregg 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: He Kuang 
Cc: Jiri Olsa 
Cc: Kaixu Xia 
Cc: Masami Hiramatsu 
Cc: Namhyung Kim 
Cc: Paul Mackerras 
Cc: Peter Zijlstra 
Cc: Zefan Li 
Cc: pi3or...@163.com
Cc: Arnaldo Carvalho de Melo 
Link: http://lkml.kernel.org/n/ebpf-6yw9eg0ej3l4jnqhinngk...@git.kernel.org
---
 tools/perf/util/bpf-loader.c | 133 ---
 1 file changed, 126 insertions(+), 7 deletions(-)

diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index c363907..af549ea 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -37,6 +37,8 @@ struct bpf_prog_priv {
struct perf_probe_event pev;
bool need_prologue;
struct bpf_insn *insns_buf;
+   int nr_types;
+   int *type_mapping;
 };
 
 struct bpf_object *
@@ -104,6 +106,7 @@ bpf_prog_priv__clear(struct bpf_program *prog 
__maybe_unused,
 
cleanup_perf_probe_events(>pev, 1);
zfree(>insns_buf);
+   zfree(>type_mapping);
free(priv);
 }
 
@@ -205,7 +208,7 @@ preproc_gen_prologue(struct bpf_program *prog, int n,
struct bpf_prog_priv *priv;
struct bpf_insn *buf;
size_t prologue_cnt = 0;
-   int err;
+   int i, err;
 
err = bpf_program__get_private(prog, (void **));
if (err || !priv)
@@ -213,10 +216,20 @@ preproc_gen_prologue(struct bpf_program *prog, int n,
 
pev = >pev;
 
-   if (n < 0 || n >= pev->ntevs)
+   if (n < 0 || n >= priv->nr_types)
goto errout;
 
-   tev = >tevs[n];
+   /* Find a tev belongs to that type */
+   for (i = 0; i < pev->ntevs; i++)
+   if (priv->type_mapping[i] == n)
+   break;
+
+   if (i >= pev->ntevs) {
+   pr_debug("Internal error: prologue type %d not found\n", n);
+   return -ENOENT;
+   }
+
+   tev = >tevs[i];
 
buf = priv->insns_buf;
err = bpf__gen_prologue(tev->args, tev->nargs,
@@ -247,6 +260,98 @@ errout:
return -EINVAL;
 }
 
+/*
+ * compare_tev_args is reflexive, transitive and antisymmetric.
+ * I can show that but this margin is too narrow to contain.
+ */
+static int compare_tev_args(const void *ptev1, const void *ptev2)
+{
+   int i, ret;
+   const struct probe_trace_event *tev1 =
+   *(const struct probe_trace_event **)ptev1;
+   const struct probe_trace_event *tev2 =
+   *(const struct probe_trace_event **)ptev2;
+
+   ret = tev2->nargs - tev1->nargs;
+   if (ret)
+   return ret;
+
+   for (i = 0; i < tev1->nargs; i++) {
+   struct probe_trace_arg *arg1, *arg2;
+   struct probe_trace_arg_ref *ref1, *ref2;
+
+   arg1 = >args[i];
+   arg2 = >args[i];
+
+   ret = strcmp(arg1->value, arg2->value);
+   if (ret)
+   return ret;
+
+   ref1 = arg1->ref;
+   ref2 = arg2->ref;
+
+   while (ref1 && ref2) {
+   ret = ref2->offset - ref1->offset;
+   if (ret)
+   return ret;
+
+   ref1 = ref1->next;
+   ref2 = ref2->next;
+   }
+
+   if (ref1 || ref2)
+   return ref2 ? 1 : -1;
+   }
+
+   return 0;
+}
+
+static int map_prologue(struct perf_probe_event *pev, int *mapping,
+   int *nr_types)
+{
+   int i, type = 0;
+   struct {
+   struct probe_trace_event *tev;
+   int idx;
+   } *stevs;
+   size_t array_sz = sizeof(*stevs) * pev->ntevs;
+
+   stevs = malloc(array_sz);
+   if (!stevs) {
+   pr_debug("No ehough memory: alloc stevs failed\n");
+   return -ENOMEM;
+   }
+
+   pr_debug("In map_prologue, ntevs=%d\n", pev->ntevs);
+   for (i = 0; i < pev->ntevs; i++) {
+   stevs[i].tev = >tevs[i];
+   stevs[i].idx = i;
+   }
+   qsort(stevs, pev->ntevs, sizeof(*stevs),
+ compare_tev_args);
+
+   for (i = 0; i < pev->ntevs; i++) {
+   if (i == 0) {
+   mapping[stevs[i].idx] = type;
+   pr_debug("mapping[%d]=%d\n", stevs[i].idx,
+type);
+   continue;
+   }
+
+   if (compare_tev_args(stevs + i, stevs + i - 1) == 0)
+

[PATCH 05/31] perf record: Load eBPF object into kernel

2015-10-14 Thread Wang Nan

This patch utilizes bpf_object__load() provided by libbpf to load all
objects into kernel.

Signed-off-by: Wang Nan 
Cc: Alexei Starovoitov 
Cc: Brendan Gregg 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: He Kuang 
Cc: Jiri Olsa 
Cc: Kaixu Xia 
Cc: Masami Hiramatsu 
Cc: Namhyung Kim 
Cc: Paul Mackerras 
Cc: Peter Zijlstra 
Cc: Zefan Li 
Cc: pi3or...@163.com
Cc: Arnaldo Carvalho de Melo 
Link: http://lkml.kernel.org/n/ebpf-6yw9eg0ej3l4jnqhinngk...@git.kernel.org
---
 tools/perf/util/bpf-loader.c   | 22 ++
 tools/perf/util/bpf-loader.h   | 11 +++
 tools/perf/util/parse-events.c |  6 ++
 3 files changed, 39 insertions(+)

diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 56f6fe8..7279558 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -243,6 +243,18 @@ int bpf__unprobe(struct bpf_object *obj)
return ret;
 }
 
+int bpf__load(struct bpf_object *obj)
+{
+   int err;
+
+   err = bpf_object__load(obj);
+   if (err) {
+   pr_debug("bpf: load objects failed\n");
+   return err;
+   }
+   return 0;
+}
+
 #define bpf__strerror_head(err, buf, size) \
char sbuf[STRERR_BUFSIZE], *emsg;\
if (!size)\
@@ -275,3 +287,13 @@ int bpf__strerror_probe(struct bpf_object *obj 
__maybe_unused,
bpf__strerror_end(buf, size);
return 0;
 }
+
+int bpf__strerror_load(struct bpf_object *obj __maybe_unused,
+  int err, char *buf, size_t size)
+{
+   bpf__strerror_head(err, buf, size);
+   bpf__strerror_entry(EINVAL, "%s: Are you root and runing a 
CONFIG_BPF_SYSCALL kernel?",
+   emsg)
+   bpf__strerror_end(buf, size);
+   return 0;
+}
diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h
index b819622..b091ceb 100644
--- a/tools/perf/util/bpf-loader.h
+++ b/tools/perf/util/bpf-loader.h
@@ -23,6 +23,9 @@ int bpf__unprobe(struct bpf_object *obj);
 int bpf__strerror_probe(struct bpf_object *obj, int err,
char *buf, size_t size);
 
+int bpf__load(struct bpf_object *obj);
+int bpf__strerror_load(struct bpf_object *obj, int err,
+  char *buf, size_t size);
 #else
 static inline struct bpf_object *
 bpf__prepare_load(const char *filename __maybe_unused)
@@ -35,6 +38,7 @@ static inline void bpf__clear(void) { }
 
 static inline int bpf__probe(struct bpf_object *obj __maybe_unused) { return 
0;}
 static inline int bpf__unprobe(struct bpf_object *obj __maybe_unused) { return 
0;}
+static inline int bpf__load(struct bpf_object *obj __maybe_unused) { return 0; 
}
 
 static inline int
 __bpf_strerror(char *buf, size_t size)
@@ -55,5 +59,12 @@ bpf__strerror_probe(struct bpf_object *obj __maybe_unused,
 {
return __bpf_strerror(buf, size);
 }
+
+static inline int bpf__strerror_load(struct bpf_object *obj __maybe_unused,
+int err __maybe_unused,
+char *buf, size_t size)
+{
+   return __bpf_strerror(buf, size);
+}
 #endif
 #endif
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 06ff8d6..5b17b88 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -561,6 +561,12 @@ int parse_events_load_bpf_obj(struct parse_events_evlist 
*data,
goto errout;
}
 
+   err = bpf__load(obj);
+   if (err) {
+   bpf__strerror_load(obj, err, errbuf, sizeof(errbuf));
+   goto errout;
+   }
+
/*
 * Temporary add a dummy event here so we can check whether
 * basic bpf loader works. Following patches will replace
-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 04/31] perf record, bpf: Create probe points for BPF programs

2015-10-14 Thread Wang Nan

This patch introduces bpf__{un,}probe() functions to enable callers to
create kprobe points based on section names a BPF program. It parses
the section names in the program and creates corresponding 'struct
perf_probe_event' structures. The parse_perf_probe_command() function is
used to do the main parsing work. The resuling 'struct perf_probe_event'
is stored into program private data for further using.

By utilizing the new probing API, this patch creates probe points during
event parsing.

To ensure probe points be removed correctly, register an atexit hook
so even perf quit through exit() bpf__clear() is still called, so probing
points are cleared. Note that bpf_clear() should be registered before
bpf__probe() is called, so failure of bpf__probe() can still trigger
bpf__clear() to remove probe points which are already probed.

strerror style error reporting scaffold is created by this patch.
bpf__strerror_probe() is the first error reporting function in bpf-loader.c.

Signed-off-by: Wang Nan 
Cc: Alexei Starovoitov 
Cc: Brendan Gregg 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: He Kuang 
Cc: Jiri Olsa 
Cc: Kaixu Xia 
Cc: Masami Hiramatsu 
Cc: Namhyung Kim 
Cc: Paul Mackerras 
Cc: Peter Zijlstra 
Cc: Zefan Li 
Cc: pi3or...@163.com
Signed-off-by: Arnaldo Carvalho de Melo 
Link: http://lkml.kernel.org/n/ebpf-6yw9eg0ej3l4jnqhinngk...@git.kernel.org
---
 tools/perf/util/bpf-loader.c   | 222 -
 tools/perf/util/bpf-loader.h   |  30 ++
 tools/perf/util/parse-events.c |  17 
 3 files changed, 268 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index ab56073..56f6fe8 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -10,6 +10,8 @@
 #include "perf.h"
 #include "debug.h"
 #include "bpf-loader.h"
+#include "probe-event.h"
+#include "probe-finder.h" // for MAX_PROBES
 
 #define DEFINE_PRINT_FN(name, level) \
 static int libbpf_##name(const char *fmt, ...) \
@@ -27,6 +29,10 @@ DEFINE_PRINT_FN(warning, 0)
 DEFINE_PRINT_FN(info, 0)
 DEFINE_PRINT_FN(debug, 1)
 
+struct bpf_prog_priv {
+   struct perf_probe_event pev;
+};
+
 struct bpf_object *bpf__prepare_load(const char *filename)
 {
struct bpf_object *obj;
@@ -52,6 +58,220 @@ void bpf__clear(void)
 {
struct bpf_object *obj, *tmp;
 
-   bpf_object__for_each_safe(obj, tmp)
+   bpf_object__for_each_safe(obj, tmp) {
+   bpf__unprobe(obj);
bpf_object__close(obj);
+   }
+}
+
+static void
+bpf_prog_priv__clear(struct bpf_program *prog __maybe_unused,
+void *_priv)
+{
+   struct bpf_prog_priv *priv = _priv;
+
+   cleanup_perf_probe_events(>pev, 1);
+   free(priv);
+}
+
+static int
+config_bpf_program(struct bpf_program *prog)
+{
+   struct perf_probe_event *pev = NULL;
+   struct bpf_prog_priv *priv = NULL;
+   const char *config_str;
+   int err;
+
+   config_str = bpf_program__title(prog, false);
+   if (!config_str) {
+   pr_debug("bpf: unable to get title for program\n");
+   return -EINVAL;
+   }
+
+   priv = calloc(sizeof(*priv), 1);
+   if (!priv) {
+   pr_debug("bpf: failed to alloc priv\n");
+   return -ENOMEM;
+   }
+   pev = >pev;
+
+   pr_debug("bpf: config program '%s'\n", config_str);
+   err = parse_perf_probe_command(config_str, pev);
+   if (err < 0) {
+   pr_debug("bpf: '%s' is not a valid config string\n",
+config_str);
+   err = -EINVAL;
+   goto errout;
+   }
+
+   if (pev->group && strcmp(pev->group, PERF_BPF_PROBE_GROUP)) {
+   pr_debug("bpf: '%s': group for event is set and not '%s'.\n",
+config_str, PERF_BPF_PROBE_GROUP);
+   err = -EINVAL;
+   goto errout;
+   } else if (!pev->group)
+   pev->group = strdup(PERF_BPF_PROBE_GROUP);
+
+   if (!pev->group) {
+   pr_debug("bpf: strdup failed\n");
+   err = -ENOMEM;
+   goto errout;
+   }
+
+   if (!pev->event) {
+   pr_debug("bpf: '%s': event name is missing\n",
+config_str);
+   err = -EINVAL;
+   goto errout;
+   }
+   pr_debug("bpf: config '%s' is ok\n", config_str);
+
+   err = bpf_program__set_private(prog, priv, bpf_prog_priv__clear);
+   if (err) {
+   pr_debug("Failed to set priv for program '%s'\n", config_str);
+   goto errout;
+   }
+
+   return 0;
+
+errout:
+   if (pev)
+   clear_perf_probe_event(pev);
+   free(priv);
+   return err;
+}
+
+static int bpf__prepare_probe(void)
+{
+   static int err = 0;
+   static bool initialized = false;
+
+   /*
+* Make err static, so if init failed the first, bpf__prepare_probe()
+* fails

[PATCH 26/31] perf tools: Support perf event alias name

2015-10-14 Thread Wang Nan

From: He Kuang 

This patch adds new bison rules for specifying an alias name to a perf
event, which allows cmdline refer to previous defined perf event through
its name. With this patch user can give alias name to a perf event using
following cmdline:

 # perf record -e mypmu=cycles ...

To allow parser refer to existing event selecter, pass event list to
'struct parse_events_evlist'. perf_evlist__find_evsel_by_alias() is
introduced to get evsel through its alias.

Signed-off-by: He Kuang 
Signed-off-by: Wang Nan 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexei Starovoitov 
Cc: Brendan Gregg 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: He Kuang 
Cc: Jiri Olsa 
Cc: Kaixu Xia 
Cc: Masami Hiramatsu 
Cc: Namhyung Kim 
Cc: Paul Mackerras 
Cc: Peter Zijlstra 
Cc: Zefan Li 
Cc: pi3or...@163.com
Link: http://lkml.kernel.org/n/ebpf-7w1s62o0s6ovqlaqwrmx2...@git.kernel.org
---
 tools/perf/util/evlist.c   | 16 
 tools/perf/util/evlist.h   |  4 
 tools/perf/util/evsel.h|  1 +
 tools/perf/util/parse-events.c | 31 ---
 tools/perf/util/parse-events.h |  5 +
 tools/perf/util/parse-events.y | 15 ++-
 6 files changed, 68 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index d139219..8dd59aa 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1753,3 +1753,19 @@ void perf_evlist__set_tracking_event(struct perf_evlist 
*evlist,
 
tracking_evsel->tracking = true;
 }
+
+struct perf_evsel *
+perf_evlist__find_evsel_by_alias(struct perf_evlist *evlist,
+const char *alias)
+{
+   struct perf_evsel *evsel;
+
+   evlist__for_each(evlist, evsel) {
+   if (!evsel->alias)
+   continue;
+   if (strcmp(alias, evsel->alias) == 0)
+   return evsel;
+   }
+
+   return NULL;
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index a459fe7..4e25342 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -292,4 +292,8 @@ void perf_evlist__set_tracking_event(struct perf_evlist 
*evlist,
 struct perf_evsel *tracking_evsel);
 
 void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr);
+
+struct perf_evsel *
+perf_evlist__find_evsel_by_alias(struct perf_evlist *evlist, const char 
*alias);
+
 #endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index a60b5d5..9a95e73 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -87,6 +87,7 @@ struct perf_evsel {
int idx;
u32 ids;
char*name;
+   char*alias;
double  scale;
const char  *unit;
struct event_format *tp_format;
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 4849dbd..06ba5a6 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1020,6 +1020,30 @@ int parse_events__modifier_group(struct list_head *list,
return parse_events__modifier_event(list, event_mod, true);
 }
 
+int parse_events__set_event_alias(struct parse_events_evlist *data,
+ struct list_head *list,
+ const char *str,
+ void *loc_alias_)
+{
+   struct perf_evsel *evsel;
+   YYLTYPE *loc_alias = loc_alias_;
+
+   if (!str)
+   return 0;
+
+   if (!list_is_singular(list)) {
+   struct parse_events_error *err = data->error;
+
+   err->idx = loc_alias->first_column;
+   err->str = strdup("One alias can be applied to one event only");
+   return -EINVAL;
+   }
+
+   evsel = list_first_entry(list, struct perf_evsel, node);
+   evsel->alias = strdup(str);
+   return evsel->alias ? 0 : -ENOMEM;
+}
+
 void parse_events__set_leader(char *name, struct list_head *list)
 {
struct perf_evsel *leader;
@@ -1373,9 +1397,10 @@ int parse_events(struct perf_evlist *evlist, const char 
*str,
 struct parse_events_error *err)
 {
struct parse_events_evlist data = {
-   .list  = LIST_HEAD_INIT(data.list),
-   .idx   = evlist->nr_entries,
-   .error = err,
+   .list   = LIST_HEAD_INIT(data.list),
+   .idx= evlist->nr_entries,
+   .error  = err,
+   .evlist = evlist,
};
int ret;
 
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 8f17c83..b525353 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -96,6 +96,7 @@ struct parse_events_evlist {
intidx;
intnr_groups;
struct

[PATCH 02/31] perf ebpf: Add the libbpf glue

2015-10-14 Thread Wang Nan

The 'bpf-loader.[ch]' files are introduced in this patch. Which will be
the interface between perf and libbpf. bpf__prepare_load() resides in
bpf-loader.c. Following patches will enrich these two files.

Signed-off-by: Wang Nan 
Signed-off-by: Arnaldo Carvalho de Melo 
Acked-by: Alexei Starovoitov 
Cc: Brendan Gregg 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: He Kuang 
Cc: Jiri Olsa 
Cc: Kaixu Xia 
Cc: Masami Hiramatsu 
Cc: Namhyung Kim 
Cc: Paul Mackerras 
Cc: Peter Zijlstra 
Cc: Zefan Li 
Cc: pi3or...@163.com
Link: http://lkml.kernel.org/n/ebpf-6yw9eg0ej3l4jnqhinngk...@git.kernel.org
---
 tools/perf/util/bpf-loader.c | 57 
 tools/perf/util/bpf-loader.h | 29 ++
 2 files changed, 86 insertions(+)
 create mode 100644 tools/perf/util/bpf-loader.c
 create mode 100644 tools/perf/util/bpf-loader.h

diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
new file mode 100644
index 000..ab56073
--- /dev/null
+++ b/tools/perf/util/bpf-loader.c
@@ -0,0 +1,57 @@
+/*
+ * bpf-loader.c
+ *
+ * Copyright (C) 2015 Wang Nan 
+ * Copyright (C) 2015 Huawei Inc.
+ */
+
+#include 
+#include 
+#include "perf.h"
+#include "debug.h"
+#include "bpf-loader.h"
+
+#define DEFINE_PRINT_FN(name, level) \
+static int libbpf_##name(const char *fmt, ...) \
+{  \
+   va_list args;   \
+   int ret;\
+   \
+   va_start(args, fmt);\
+   ret = veprintf(level, verbose, pr_fmt(fmt), args);\
+   va_end(args);   \
+   return ret; \
+}
+
+DEFINE_PRINT_FN(warning, 0)
+DEFINE_PRINT_FN(info, 0)
+DEFINE_PRINT_FN(debug, 1)
+
+struct bpf_object *bpf__prepare_load(const char *filename)
+{
+   struct bpf_object *obj;
+   static bool libbpf_initialized;
+
+   if (!libbpf_initialized) {
+   libbpf_set_print(libbpf_warning,
+libbpf_info,
+libbpf_debug);
+   libbpf_initialized = true;
+   }
+
+   obj = bpf_object__open(filename);
+   if (!obj) {
+   pr_debug("bpf: failed to load %s\n", filename);
+   return ERR_PTR(-EINVAL);
+   }
+
+   return obj;
+}
+
+void bpf__clear(void)
+{
+   struct bpf_object *obj, *tmp;
+
+   bpf_object__for_each_safe(obj, tmp)
+   bpf_object__close(obj);
+}
diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h
new file mode 100644
index 000..f402d7c
--- /dev/null
+++ b/tools/perf/util/bpf-loader.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2015, Wang Nan 
+ * Copyright (C) 2015, Huawei Inc.
+ */
+#ifndef __BPF_LOADER_H
+#define __BPF_LOADER_H
+
+#include 
+#include 
+#include 
+#include "debug.h"
+
+struct bpf_object;
+
+#ifdef HAVE_LIBBPF_SUPPORT
+struct bpf_object *bpf__prepare_load(const char *filename);
+
+void bpf__clear(void);
+#else
+static inline struct bpf_object *
+bpf__prepare_load(const char *filename __maybe_unused)
+{
+   pr_debug("ERROR: eBPF object loading is disabled during compiling.\n");
+   return ERR_PTR(-ENOTSUP);
+}
+
+static inline void bpf__clear(void) { }
+#endif
+#endif
-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 24/31] bpf tools: Collect map definition in bpf_object

2015-10-14 Thread Wang Nan

This patch collects more information from maps sections in BPF object
files into 'struct bpf_object', enables later patches access those
information (such as the type and size of the map).

In this patch, a new handler 'struct bpf_map' is extracted in parallel
with bpf_object and bpf_program. Its iterator and accessor is also
created.

Signed-off-by: Wang Nan 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexei Starovoitov 
Cc: Brendan Gregg 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: He Kuang 
Cc: Jiri Olsa 
Cc: Kaixu Xia 
Cc: Masami Hiramatsu 
Cc: Namhyung Kim 
Cc: Paul Mackerras 
Cc: Peter Zijlstra 
Cc: Zefan Li 
Cc: pi3or...@163.com
Link: http://lkml.kernel.org/n/ebpf-oewgxlae7jpwqou9hba4e...@git.kernel.org
---
 tools/lib/bpf/libbpf.c | 186 +
 tools/lib/bpf/libbpf.h |  21 ++
 2 files changed, 147 insertions(+), 60 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 6a07b26..8ae501b 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -109,22 +109,24 @@ struct bpf_program {
bpf_program_clear_priv_t clear_priv;
 };
 
+struct bpf_map {
+   int fd;
+   struct bpf_map_def def;
+   void *priv;
+   bpf_map_clear_priv_t clear_priv;
+};
+
 static LIST_HEAD(bpf_objects_list);
 
 struct bpf_object {
char license[64];
u32 kern_version;
-   void *maps_buf;
-   size_t maps_buf_sz;
 
struct bpf_program *programs;
size_t nr_programs;
-   int *map_fds;
-   /*
-* This field is required because maps_buf will be freed and
-* maps_buf_sz will be set to 0 after loaded.
-*/
-   size_t nr_map_fds;
+   struct bpf_map *maps;
+   size_t nr_maps;
+
bool loaded;
 
/*
@@ -434,21 +436,38 @@ static int
 bpf_object__init_maps(struct bpf_object *obj, void *data,
  size_t size)
 {
-   if (size == 0) {
+   size_t nr_maps;
+   int i;
+
+   nr_maps = size / sizeof(struct bpf_map_def);
+   if (!data || !nr_maps) {
pr_debug("%s doesn't need map definition\n",
 obj->path);
return 0;
}
 
-   obj->maps_buf = malloc(size);
-   if (!obj->maps_buf) {
-   pr_warning("malloc maps failed: %s\n", obj->path);
+   pr_debug("maps in %s: %ld bytes\n", obj->path, (long)size);
+
+   obj->maps = calloc(1, sizeof(obj->maps[0]) * nr_maps);
+   if (!obj->maps) {
+   pr_warning("alloc maps for object failed\n");
return -ENOMEM;
}
+   obj->nr_maps = nr_maps;
 
-   obj->maps_buf_sz = size;
-   memcpy(obj->maps_buf, data, size);
-   pr_debug("maps in %s: %ld bytes\n", obj->path, (long)size);
+   for (i = 0; i < nr_maps; i++) {
+   struct bpf_map_def *def = >maps[i].def;
+
+   /*
+* fill all fd with -1 so won't close incorrect
+* fd (0, stdin) when failure.
+*/
+   obj->maps[i].fd = -1;
+
+   /* Save map definition into obj->maps */
+   *def = *(struct bpf_map_def *)(data +
+   i * sizeof(struct bpf_map_def));
+   }
return 0;
 }
 
@@ -632,37 +651,15 @@ static int
 bpf_object__create_maps(struct bpf_object *obj)
 {
unsigned int i;
-   size_t nr_maps;
-   int *pfd;
-
-   nr_maps = obj->maps_buf_sz / sizeof(struct bpf_map_def);
-   if (!obj->maps_buf || !nr_maps) {
-   pr_debug("don't need create maps for %s\n",
-obj->path);
-   return 0;
-   }
-
-   obj->map_fds = malloc(sizeof(int) * nr_maps);
-   if (!obj->map_fds) {
-   pr_warning("realloc perf_bpf_map_fds failed\n");
-   return -ENOMEM;
-   }
-   obj->nr_map_fds = nr_maps;
 
-   /* fill all fd with -1 */
-   memset(obj->map_fds, -1, sizeof(int) * nr_maps);
+   for (i = 0; i < obj->nr_maps; i++) {
+   struct bpf_map_def *def = >maps[i].def;
+   int *pfd = >maps[i].fd;
 
-   pfd = obj->map_fds;
-   for (i = 0; i < nr_maps; i++) {
-   struct bpf_map_def def;
-
-   def = *(struct bpf_map_def *)(obj->maps_buf +
-   i * sizeof(struct bpf_map_def));
-
-   *pfd = bpf_create_map(def.type,
- def.key_size,
- def.value_size,
- def.max_entries);
+   *pfd = bpf_create_map(def->type,
+ def->key_size,
+ def->value_size,
+ def->max_entries);
if (*pfd < 0) {
size_t j;
int err = *pfd;
@@ -670,22 +667,17 @@ bpf_object__create_maps(struct bpf_object *obj)

[PATCH 01/31] perf tools: Make perf depend on libbpf

2015-10-14 Thread Wang Nan

By adding libbpf into perf's Makefile, this patch enables perf to build
libbpf during building if libelf is found and neither NO_LIBELF nor
NO_LIBBPF is set. The newly introduced code is similar to libapi and
libtraceevent building in Makefile.perf.

MANIFEST is also updated for 'make perf-*-src-pkg'.

Append make_no_libbpf to tools/perf/tests/make.

'bpf' feature check is appended into default FEATURE_TESTS and
FEATURE_DISPLAY, so perf will check API version of bpf in
/path/to/kernel/include/uapi/linux/bpf.h. Which should not fail except
when we are trying to port this code to an old kernel.

Error messages are also updated to notify users about the disable of BPF
support of 'perf record' if libelf is missed or BPF API check failed.

tools/lib/bpf is added into TAG_FOLDERS to allow us to navigate on
libbpf files when working on perf using tools/perf/tags.

Signed-off-by: Wang Nan 
Acked-by: Alexei Starovoitov 
Cc: Brendan Gregg 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: He Kuang 
Cc: Jiri Olsa 
Cc: Kaixu Xia 
Cc: Masami Hiramatsu 
Cc: Namhyung Kim 
Cc: Paul Mackerras 
Cc: Peter Zijlstra 
Cc: Zefan Li 
Cc: pi3or...@163.com
Link: http://lkml.kernel.org/n/tip-9tj3h70vyoku9rkrb8xaf...@git.kernel.org
[ Document NO_LIBBPF in Makefile.perf, noted by Jiri Olsa ]
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/build/Makefile.feature |  6 --
 tools/perf/MANIFEST  |  3 +++
 tools/perf/Makefile.perf | 21 +++--
 tools/perf/config/Makefile   | 19 ++-
 tools/perf/tests/make|  4 +++-
 5 files changed, 47 insertions(+), 6 deletions(-)

diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 72817e4..37ff4c9 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -53,7 +53,8 @@ FEATURE_TESTS ?=  \
libdw-dwarf-unwind  \
zlib\
lzma\
-   get_cpuid
+   get_cpuid   \
+   bpf
 
 FEATURE_DISPLAY ?= \
dwarf   \
@@ -71,7 +72,8 @@ FEATURE_DISPLAY ?=\
libdw-dwarf-unwind  \
zlib\
lzma\
-   get_cpuid
+   get_cpuid   \
+   bpf
 
 # Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features.
 # If in the future we need per-feature checks/flags for features not
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 9e6bdf5..39c38cb 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -17,6 +17,7 @@ tools/build
 tools/arch/x86/include/asm/atomic.h
 tools/arch/x86/include/asm/rmwcc.h
 tools/lib/traceevent
+tools/lib/bpf
 tools/lib/api
 tools/lib/bpf
 tools/lib/hweight.c
@@ -69,6 +70,8 @@ arch/*/lib/memset*.S
 include/linux/poison.h
 include/linux/hw_breakpoint.h
 include/uapi/linux/perf_event.h
+include/uapi/linux/bpf.h
+include/uapi/linux/bpf_common.h
 include/uapi/linux/const.h
 include/uapi/linux/swab.h
 include/uapi/linux/hw_breakpoint.h
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 56517d3..1e2e2d1 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -75,6 +75,8 @@ include config/utilities.mak
 # Define NO_LZMA if you do not want to support compressed (xz) kernel modules
 #
 # Define NO_AUXTRACE if you do not want AUX area tracing support
+#
+# Define NO_LIBBPF if you do not want BPF support
 
 # As per kernel Makefile, avoid funny character set dependencies
 unexport LC_ALL
@@ -145,6 +147,7 @@ AWK = awk
 
 LIB_DIR  = $(srctree)/tools/lib/api/
 TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/
+BPF_DIR = $(srctree)/tools/lib/bpf/
 
 # include config/Makefile by default and rule out
 # non-config cases
@@ -180,6 +183,7 @@ strip-libs = $(filter-out -l%,$(1))
 
 ifneq ($(OUTPUT),)
   TE_PATH=$(OUTPUT)
+  BPF_PATH=$(OUTPUT)
 ifneq ($(subdir),)
   LIB_PATH=$(OUTPUT)/../lib/api/
 else
@@ -188,6 +192,7 @@ endif
 else
   TE_PATH=$(TRACE_EVENT_DIR)
   LIB_PATH=$(LIB_DIR)
+  BPF_PATH=$(BPF_DIR)
 endif
 
 LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
@@ -199,6 +204,8 @@ LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = -Xlinker 
--dynamic-list=$(LIBTRACEEVENT_DYN
 LIBAPI = $(LIB_PATH)libapi.a
 export LIBAPI
 
+LIBBPF = $(BPF_PATH)libbpf.a
+
 # python extension build directories
 PYTHON_EXTBUILD := $(OUTPUT)python_ext_build/
 PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/
@@ -251,6 +258,9 @@ export PERL_PATH
 LIB_FILE=$(OUTPUT)libperf.a
 
 PERFLIBS = $(LIB_FILE) $(LIBAPI) $(LIBTRACEEVENT)
+ifndef NO_LIBBPF
+  PERFLIBS += $(LIBBPF)
+endif
 
 # We choose to avoid "if .. else if .. else .. endif endif"
 # because maintaining the nesting to match is a pain.  If
@@ -420,6 +430,13 @@ $(LIBAPI)-clean:
$(call QUIET_CLEAN, libapi)
$(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
 
+$(LIBBPF): FORCE
+   $(Q)$(MAKE) -C $(BPF_DIR)

[PATCH 06/31] perf tools: Collect perf_evsel in BPF object files

2015-10-14 Thread Wang Nan

This patch collects 'struct perf_evsel' for every probing points in BPF
object file(s) and fill 'struct evlist'. The previous introduced dummy
event now removed. After this patch, following command:

 # perf record --event filter.o ls

Can trace on each probing points defined in filter.o.

The core of this patch is bpf__foreach_tev(), which calls a callback
function for each 'struct probe_trace_event' events for a bpf program
with their file descriptors. Callback function add_bpf_event()
creates evsels by calling parse_events_add_tracepoint().

Since bpf-loader.c will not be built if libbpf is turned off, an empty
bpf__foreach_tev() is defined in bpf-loader.h to avoid compiling
error.

Signed-off-by: Wang Nan 
Cc: Alexei Starovoitov 
Cc: Brendan Gregg 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: He Kuang 
Cc: Jiri Olsa 
Cc: Kaixu Xia 
Cc: Masami Hiramatsu 
Cc: Namhyung Kim 
Cc: Paul Mackerras 
Cc: Peter Zijlstra 
Cc: Zefan Li 
Cc: pi3or...@163.com
Cc: Arnaldo Carvalho de Melo 
Link: http://lkml.kernel.org/n/ebpf-6yw9eg0ej3l4jnqhinngk...@git.kernel.org
---
 tools/perf/util/bpf-loader.c   | 40 
 tools/perf/util/bpf-loader.h   | 14 
 tools/perf/util/parse-events.c | 52 --
 3 files changed, 99 insertions(+), 7 deletions(-)

diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 7279558..aa784a4 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -255,6 +255,46 @@ int bpf__load(struct bpf_object *obj)
return 0;
 }
 
+int bpf__foreach_tev(struct bpf_object *obj,
+bpf_prog_iter_callback_t func,
+void *arg)
+{
+   struct bpf_program *prog;
+   int err;
+
+   bpf_object__for_each_program(prog, obj) {
+   struct probe_trace_event *tev;
+   struct perf_probe_event *pev;
+   struct bpf_prog_priv *priv;
+   int i, fd;
+
+   err = bpf_program__get_private(prog,
+   (void **));
+   if (err || !priv) {
+   pr_debug("bpf: failed to get private field\n");
+   return -EINVAL;
+   }
+
+   pev = >pev;
+   for (i = 0; i < pev->ntevs; i++) {
+   tev = >tevs[i];
+
+   fd = bpf_program__fd(prog);
+   if (fd < 0) {
+   pr_debug("bpf: failed to get file 
descriptor\n");
+   return fd;
+   }
+
+   err = (*func)(tev, fd, arg);
+   if (err) {
+   pr_debug("bpf: call back failed, stop 
iterate\n");
+   return err;
+   }
+   }
+   }
+   return 0;
+}
+
 #define bpf__strerror_head(err, buf, size) \
char sbuf[STRERR_BUFSIZE], *emsg;\
if (!size)\
diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h
index b091ceb..a8f25ee 100644
--- a/tools/perf/util/bpf-loader.h
+++ b/tools/perf/util/bpf-loader.h
@@ -8,11 +8,15 @@
 #include 
 #include 
 #include 
+#include "probe-event.h"
 #include "debug.h"
 
 struct bpf_object;
 #define PERF_BPF_PROBE_GROUP "perf_bpf_probe"
 
+typedef int (*bpf_prog_iter_callback_t)(struct probe_trace_event *tev,
+   int fd, void *arg);
+
 #ifdef HAVE_LIBBPF_SUPPORT
 struct bpf_object *bpf__prepare_load(const char *filename);
 
@@ -26,6 +30,8 @@ int bpf__strerror_probe(struct bpf_object *obj, int err,
 int bpf__load(struct bpf_object *obj);
 int bpf__strerror_load(struct bpf_object *obj, int err,
   char *buf, size_t size);
+int bpf__foreach_tev(struct bpf_object *obj,
+bpf_prog_iter_callback_t func, void *arg);
 #else
 static inline struct bpf_object *
 bpf__prepare_load(const char *filename __maybe_unused)
@@ -41,6 +47,14 @@ static inline int bpf__unprobe(struct bpf_object *obj 
__maybe_unused) { return 0
 static inline int bpf__load(struct bpf_object *obj __maybe_unused) { return 0; 
}
 
 static inline int
+bpf__foreach_tev(struct bpf_object *obj __maybe_unused,
+bpf_prog_iter_callback_t func __maybe_unused,
+void *arg __maybe_unused)
+{
+   return 0;
+}
+
+static inline int
 __bpf_strerror(char *buf, size_t size)
 {
if (!size)
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 5b17b88..61c7a47 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -530,12 +530,49 @@ static int add_tracepoint_multi_sys(struct list_head 
*list, int *idx,
return ret;
 }
 
+struct __add_bpf_event_param {
+   struct parse_events_evlist *data;
+   struct list_head *list;
+};
+
+static int add_bpf_event(struct probe_trace_event *tev, int fd,
+void *_param)
+{

[PATCH 27/31] perf tools: Pass available CPU number to clang compiler

2015-10-14 Thread Wang Nan

This patch introduces a new macro "__NR_CPUS__" to perf's embedded
clang compiler, which represent the available CPU counters in this
system. BPF program can use this macro to create a map with same
number of system CPUs. For exmaple:

 struct bpf_map_def SEC("maps") pmu_map = {
 .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
 .key_size = sizeof(int),
 .value_size = sizeof(u32),
 .max_entries = __NR_CPUS__,
 };

Signed-off-by: Wang Nan 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexei Starovoitov 
Cc: Brendan Gregg 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: He Kuang 
Cc: Jiri Olsa 
Cc: Kaixu Xia 
Cc: Masami Hiramatsu 
Cc: Namhyung Kim 
Cc: Paul Mackerras 
Cc: Peter Zijlstra 
Cc: Zefan Li 
Cc: pi3or...@163.com
Link: http://lkml.kernel.org/n/ebpf-y603iy62s8w4br4t2gxfo...@git.kernel.org
---
 tools/perf/util/llvm-utils.c | 24 ++--
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
index 4f6a478..80eecef 100644
--- a/tools/perf/util/llvm-utils.c
+++ b/tools/perf/util/llvm-utils.c
@@ -11,10 +11,11 @@
 #include "cache.h"
 
 #define CLANG_BPF_CMD_DEFAULT_TEMPLATE \
-   "$CLANG_EXEC -D__KERNEL__ $CLANG_OPTIONS "  \
-   "$KERNEL_INC_OPTIONS -Wno-unused-value "\
-   "-Wno-pointer-sign -working-directory " \
-   "$WORKING_DIR -c \"$CLANG_SOURCE\" -target bpf -O2 -o -"
+   "$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\
+   "$CLANG_OPTIONS $KERNEL_INC_OPTIONS "   \
+   "-Wno-unused-value -Wno-pointer-sign "  \
+   "-working-directory $WORKING_DIR "  \
+   "-c \"$CLANG_SOURCE\" -target bpf -O2 -o -"
 
 struct llvm_param llvm_param = {
.clang_path = "clang",
@@ -326,8 +327,8 @@ get_kbuild_opts(char **kbuild_dir, char 
**kbuild_include_opts)
 int llvm__compile_bpf(const char *path, void **p_obj_buf,
  size_t *p_obj_buf_sz)
 {
-   int err;
-   char clang_path[PATH_MAX];
+   int err, nr_cpus_avail;
+   char clang_path[PATH_MAX], nr_cpus_avail_str[64];
const char *clang_opt = llvm_param.clang_opt;
const char *template = llvm_param.clang_bpf_cmd_template;
char *kbuild_dir = NULL, *kbuild_include_opts = NULL;
@@ -354,6 +355,17 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
 */
get_kbuild_opts(_dir, _include_opts);
 
+   nr_cpus_avail = sysconf(_SC_NPROCESSORS_CONF);
+   if (nr_cpus_avail <= 0) {
+   pr_err(
+"WARNING:\tunable to get available CPUs in this system: %s\n"
+"\tUse 128 instead.\n", strerror(errno));
+   nr_cpus_avail = 128;
+   }
+   snprintf(nr_cpus_avail_str, sizeof(nr_cpus_avail_str), "%d",
+nr_cpus_avail);
+
+   force_set_env("NR_CPUS", nr_cpus_avail_str);
force_set_env("CLANG_EXEC", clang_path);
force_set_env("CLANG_OPTIONS", clang_opt);
force_set_env("KERNEL_INC_OPTIONS", kbuild_include_opts);
-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v4 08/11] smack: misc cleanups in preparation for a namespace patch

2015-10-14 Thread Lukasz Pawelczyk

This patch does some small miscellaneous cleanups and additions that
should not change the code behaviour in any way. Its only purpose is to
shape the code in a way that the smack namespace patches would be
smaller and easier to understand.

Changes:
- four small helper functions added
- minor code reformatting in several places for readability
- unnecessarily increasing string size has been fixed

This patch should not change the behaviour of the Smack in any way.

Signed-off-by: Lukasz Pawelczyk 
Reviewed-by: Casey Schaufler 
---
 security/smack/smack.h| 47 ++-
 security/smack/smack_access.c | 18 +-
 security/smack/smack_lsm.c| 58 ---
 security/smack/smackfs.c  |  4 +--
 4 files changed, 81 insertions(+), 46 deletions(-)

diff --git a/security/smack/smack.h b/security/smack/smack.h
index 091efc2..98bb676 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -291,7 +291,7 @@ struct smk_audit_info {
 int smk_access_entry(char *, char *, struct list_head *);
 int smk_access(struct smack_known *, struct smack_known *,
   int, struct smk_audit_info *);
-int smk_tskacc(struct task_smack *, struct smack_known *,
+int smk_tskacc(struct task_struct *, struct smack_known *,
   u32, struct smk_audit_info *);
 int smk_curacc(struct smack_known *, u32, struct smk_audit_info *);
 struct smack_known *smack_from_secid(const u32);
@@ -348,6 +348,7 @@ extern struct hlist_head smack_known_hash[SMACK_HASH_SLOTS];
 static inline int smk_inode_transmutable(const struct inode *isp)
 {
struct inode_smack *sip = isp->i_security;
+
return (sip->smk_flags & SMK_INODE_TRANSMUTE) != 0;
 }
 
@@ -357,10 +358,31 @@ static inline int smk_inode_transmutable(const struct 
inode *isp)
 static inline struct smack_known *smk_of_inode(const struct inode *isp)
 {
struct inode_smack *sip = isp->i_security;
+
return sip->smk_inode;
 }
 
 /*
+ * Present a pointer to the smack label entry in an inode blob for an exec.
+ */
+static inline struct smack_known *smk_of_exec(const struct inode *isp)
+{
+   struct inode_smack *sip = isp->i_security;
+
+   return sip->smk_task;
+}
+
+/*
+ * Present a pointer to the smack label entry in an inode blob for an mmap.
+ */
+static inline struct smack_known *smk_of_mmap(const struct inode *isp)
+{
+   struct inode_smack *sip = isp->i_security;
+
+   return sip->smk_mmap;
+}
+
+/*
  * Present a pointer to the smack label entry in an task blob.
  */
 static inline struct smack_known *smk_of_task(const struct task_smack *tsp)
@@ -395,6 +417,29 @@ static inline struct smack_known *smk_of_current(void)
 }
 
 /*
+ * Present a pointer to the user namespace entry in an task blob.
+ */
+static inline
+struct user_namespace *ns_of_task_struct(const struct task_struct *t)
+{
+   struct user_namespace *ns;
+
+   rcu_read_lock();
+   ns = __task_cred(t)->user_ns;
+   rcu_read_unlock();
+
+   return ns;
+}
+
+/*
+ * Present a pointer to the user namespace entry in the current task blob.
+ */
+static inline struct user_namespace *ns_of_current(void)
+{
+   return current_user_ns();
+}
+
+/*
  * logging functions
  */
 #define SMACK_AUDIT_DENIED 0x1
diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c
index 131c742..750aa9c 100644
--- a/security/smack/smack_access.c
+++ b/security/smack/smack_access.c
@@ -167,6 +167,7 @@ int smk_access(struct smack_known *subject, struct 
smack_known *object,
if (subject == _known_hat)
goto out_audit;
}
+
/*
 * Beyond here an explicit relationship is required.
 * If the requested access is contained in the available
@@ -183,6 +184,7 @@ int smk_access(struct smack_known *subject, struct 
smack_known *object,
rc = -EACCES;
goto out_audit;
}
+
 #ifdef CONFIG_SECURITY_SMACK_BRINGUP
/*
 * Return a positive value if using bringup mode.
@@ -225,10 +227,10 @@ out_audit:
  * non zero otherwise. It allows that the task may have the capability
  * to override the rules.
  */
-int smk_tskacc(struct task_smack *tsp, struct smack_known *obj_known,
+int smk_tskacc(struct task_struct *task, struct smack_known *obj_known,
   u32 mode, struct smk_audit_info *a)
 {
-   struct smack_known *sbj_known = smk_of_task(tsp);
+   struct smack_known *sbj_known = smk_of_task_struct(task);
int may;
int rc;
 
@@ -237,13 +239,19 @@ int smk_tskacc(struct task_smack *tsp, struct smack_known 
*obj_known,
 */
rc = smk_access(sbj_known, obj_known, mode, NULL);
if (rc >= 0) {
+   struct task_smack *tsp;
+
/*
 * If there is an entry in the task's rule list
 * it can further restrict access.
 */
+   rcu_read_lock();
+

[PATCH 12/31] perf probe: Reset args and nargs for probe_trace_event when failure

2015-10-14 Thread Wang Nan

When failure occures in add_probe_trace_event(), args in
probe_trace_event is incomplete. Since information in it may be used
in futher, this patch frees the allocated memory and set it to NULL
to avoid dangling pointer.

Signed-off-by: Wang Nan 
Cc: Alexei Starovoitov 
Cc: Brendan Gregg 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: He Kuang 
Cc: Jiri Olsa 
Cc: Kaixu Xia 
Cc: Masami Hiramatsu 
Cc: Namhyung Kim 
Cc: Paul Mackerras 
Cc: Peter Zijlstra 
Cc: Zefan Li 
Cc: pi3or...@163.com
Cc: Arnaldo Carvalho de Melo 
Link: http://lkml.kernel.org/n/ebpf-6yw9eg0ej3l4jnqhinngk...@git.kernel.org
---
 tools/perf/util/probe-finder.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index bd8f03d..b1581d7 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -1235,6 +1235,10 @@ static int add_probe_trace_event(Dwarf_Die *sc_die, 
struct probe_finder *pf)
 
 end:
free(args);
+   if (ret) {
+   tev->nargs = 0;
+   zfree(>args);
+   }
return ret;
 }
 
-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 13/31] bpf tools: Load a program with different instances using preprocessor

2015-10-14 Thread Wang Nan

In this patch, caller of libbpf is able to control the loaded programs
by installing a preprocessor callback for a BPF program. With
preprocessor, different instances can be created from one BPF program.

This patch will be used by perf to generate different prologue for
different 'struct probe_trace_event' instances matched by one
'struct perf_probe_event'.

bpf_program__set_prep() is added to support this feature. Caller
should pass libbpf the number of instances should be created and a
preprocessor function which will be called when doing real loading.
The callback should return instructions arrays for each instances.

fd field in bpf_programs is replaced by instance, which has an nr field
and fds array. bpf_program__nth_fd() is introduced for read fd of
instances. Old interface bpf_program__fd() is reimplemented by
returning the first fd.

Signed-off-by: Wang Nan 
Signed-off-by: He Kuang 
Cc: Alexei Starovoitov 
Cc: Brendan Gregg 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: He Kuang 
Cc: Jiri Olsa 
Cc: Kaixu Xia 
Cc: Masami Hiramatsu 
Cc: Namhyung Kim 
Cc: Paul Mackerras 
Cc: Peter Zijlstra 
Cc: Zefan Li 
Cc: pi3or...@163.com
Cc: Arnaldo Carvalho de Melo 
Link: http://lkml.kernel.org/n/ebpf-6yw9eg0ej3l4jnqhinngk...@git.kernel.org
---
 tools/lib/bpf/libbpf.c | 143 +
 tools/lib/bpf/libbpf.h |  22 
 2 files changed, 156 insertions(+), 9 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 4252fc2..6a07b26 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -98,7 +98,11 @@ struct bpf_program {
} *reloc_desc;
int nr_reloc;
 
-   int fd;
+   struct {
+   int nr;
+   int *fds;
+   } instance;
+   bpf_program_prep_t preprocessor;
 
struct bpf_object *obj;
void *priv;
@@ -152,10 +156,24 @@ struct bpf_object {
 
 static void bpf_program__unload(struct bpf_program *prog)
 {
+   int i;
+
if (!prog)
return;
 
-   zclose(prog->fd);
+   /*
+* If the object is opened but the program is never loaded,
+* it is possible that prog->instance.nr == -1.
+*/
+   if (prog->instance.nr > 0) {
+   for (i = 0; i < prog->instance.nr; i++)
+   zclose(prog->instance.fds[i]);
+   } else if (prog->instance.nr != -1)
+   pr_warning("Internal error: instance.nr is %d\n",
+  prog->instance.nr);
+
+   prog->instance.nr = -1;
+   zfree(>instance.fds);
 }
 
 static void bpf_program__exit(struct bpf_program *prog)
@@ -206,7 +224,8 @@ bpf_program__init(void *data, size_t size, char *name, int 
idx,
memcpy(prog->insns, data,
   prog->insns_cnt * sizeof(struct bpf_insn));
prog->idx = idx;
-   prog->fd = -1;
+   prog->instance.fds = NULL;
+   prog->instance.nr = -1;
 
return 0;
 errout:
@@ -795,13 +814,71 @@ static int
 bpf_program__load(struct bpf_program *prog,
  char *license, u32 kern_version)
 {
-   int err, fd;
+   int err = 0, fd, i;
+
+   if (prog->instance.nr < 0 || !prog->instance.fds) {
+   if (prog->preprocessor) {
+   pr_warning("Internal error: can't load program '%s'\n",
+  prog->section_name);
+   return -EINVAL;
+   }
+
+   prog->instance.fds = malloc(sizeof(int));
+   if (!prog->instance.fds) {
+   pr_warning("No enough memory for fds\n");
+   return -ENOMEM;
+   }
+   prog->instance.nr = 1;
+   prog->instance.fds[0] = -1;
+   }
+
+   if (!prog->preprocessor) {
+   if (prog->instance.nr != 1)
+   pr_warning("Program '%s' inconsistent: nr(%d) not 1\n",
+  prog->section_name, prog->instance.nr);
 
-   err = load_program(prog->insns, prog->insns_cnt,
-  license, kern_version, );
-   if (!err)
-   prog->fd = fd;
+   err = load_program(prog->insns, prog->insns_cnt,
+  license, kern_version, );
+   if (!err)
+   prog->instance.fds[0] = fd;
+   goto out;
+   }
+
+   for (i = 0; i < prog->instance.nr; i++) {
+   struct bpf_prog_prep_result result;
+   bpf_program_prep_t preprocessor = prog->preprocessor;
+
+   bzero(, sizeof(result));
+   err = preprocessor(prog, i, prog->insns,
+  prog->insns_cnt, );
+   if (err) {
+   pr_warning("Preprocessing %dth instance of program '%s' 
failed\n",
+   i, prog->section_name);
+   goto out;
+   }
+
+   if (!result.new_insn_ptr

Re: [PATCH v2] ARM: fix vdsomunge not to depend on glibc specific byteswap.h

2015-10-14 Thread H. Nikolaus Schaller

ping.

Am 03.10.2015 um 22:46 schrieb H. Nikolaus Schaller :

> If the host toolchain is not glibc based then the arm kernel build
> fails with
> 
> HOSTCC  arch/arm/vdso/vdsomunge
> arch/arm/vdso/vdsomunge.c:48:22: fatal error: byteswap.h: No such file or 
> directory
> 
> Observed: with omap2plus_defconfig and compile on Mac OS X with arm ELF
> cross-compiler.
> 
> Reason: byteswap.h is a glibc only header.
> 
> Solution: replace by private byte-swapping macros (taken from
> arch/mips/boot/elf2ecoff.c)
> 
> Tested to compile on Mac OS X 10.9.5 host.
> 
> Signed-off-by: H. Nikolaus Schaller 
> ---
> arch/arm/vdso/vdsomunge.c | 19 +++
> 1 file changed, 15 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/arm/vdso/vdsomunge.c b/arch/arm/vdso/vdsomunge.c
> index aedec81..27a9a0b 100644
> --- a/arch/arm/vdso/vdsomunge.c
> +++ b/arch/arm/vdso/vdsomunge.c
> @@ -45,7 +45,18 @@
> * it does.
> */
> 
> -#include 
> +#define swab16(x) \
> + ((unsigned short)( \
> + (((unsigned short)(x) & (unsigned short)0x00ffU) << 8) | \
> + (((unsigned short)(x) & (unsigned short)0xff00U) >> 8) ))
> +
> +#define swab32(x) \
> + ((unsigned int)( \
> + (((unsigned int)(x) & (unsigned int)0x00ffUL) << 24) | \
> + (((unsigned int)(x) & (unsigned int)0xff00UL) <<  8) | \
> + (((unsigned int)(x) & (unsigned int)0x00ffUL) >>  8) | \
> + (((unsigned int)(x) & (unsigned int)0xff00UL) >> 24) ))
> +
> #include 
> #include 
> #include 
> @@ -104,17 +115,17 @@ static void cleanup(void)
> 
> static Elf32_Word read_elf_word(Elf32_Word word, bool swap)
> {
> - return swap ? bswap_32(word) : word;
> + return swap ? swab32(word) : word;
> }
> 
> static Elf32_Half read_elf_half(Elf32_Half half, bool swap)
> {
> - return swap ? bswap_16(half) : half;
> + return swap ? swab16(half) : half;
> }
> 
> static void write_elf_word(Elf32_Word val, Elf32_Word *dst, bool swap)
> {
> - *dst = swap ? bswap_32(val) : val;
> + *dst = swap ? swab32(val) : val;
> }
> 
> int main(int argc, char **argv)
> -- 
> 2.5.1
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 17/31] perf tools: Generate prologue for BPF programs

2015-10-14 Thread Wang Nan

This patch generates prologue for each 'struct probe_trace_event' for
fetching arguments for BPF programs.

After bpf__probe(), iterate over each programs to check whether
prologue is required. If none of 'struct perf_probe_event' a program
will attach to has at least one argument, simply skip preprocessor
hooking. For those who prologue is required, calls bpf__gen_prologue()
and paste original instruction after prologue.

Signed-off-by: Wang Nan 
Cc: Alexei Starovoitov 
Cc: Brendan Gregg 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: He Kuang 
Cc: Jiri Olsa 
Cc: Kaixu Xia 
Cc: Masami Hiramatsu 
Cc: Namhyung Kim 
Cc: Paul Mackerras 
Cc: Peter Zijlstra 
Cc: Zefan Li 
Cc: pi3or...@163.com
Cc: Arnaldo Carvalho de Melo 
Link: http://lkml.kernel.org/n/ebpf-6yw9eg0ej3l4jnqhinngk...@git.kernel.org
---
 tools/perf/util/bpf-loader.c | 120 ++-
 1 file changed, 119 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index f4c690f..c363907 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -5,11 +5,14 @@
  * Copyright (C) 2015 Huawei Inc.
  */
 
+#include 
 #include 
 #include 
 #include "perf.h"
 #include "debug.h"
 #include "bpf-loader.h"
+#include "bpf-prologue.h"
+#include "llvm-utils.h"
 #include "probe-event.h"
 #include "probe-finder.h" // for MAX_PROBES
 #include "llvm-utils.h"
@@ -32,6 +35,8 @@ DEFINE_PRINT_FN(debug, 1)
 
 struct bpf_prog_priv {
struct perf_probe_event pev;
+   bool need_prologue;
+   struct bpf_insn *insns_buf;
 };
 
 struct bpf_object *
@@ -98,6 +103,7 @@ bpf_prog_priv__clear(struct bpf_program *prog __maybe_unused,
struct bpf_prog_priv *priv = _priv;
 
cleanup_perf_probe_events(>pev, 1);
+   zfree(>insns_buf);
free(priv);
 }
 
@@ -189,6 +195,102 @@ static int bpf__prepare_probe(void)
return err;
 }
 
+static int
+preproc_gen_prologue(struct bpf_program *prog, int n,
+struct bpf_insn *orig_insns, int orig_insns_cnt,
+struct bpf_prog_prep_result *res)
+{
+   struct probe_trace_event *tev;
+   struct perf_probe_event *pev;
+   struct bpf_prog_priv *priv;
+   struct bpf_insn *buf;
+   size_t prologue_cnt = 0;
+   int err;
+
+   err = bpf_program__get_private(prog, (void **));
+   if (err || !priv)
+   goto errout;
+
+   pev = >pev;
+
+   if (n < 0 || n >= pev->ntevs)
+   goto errout;
+
+   tev = >tevs[n];
+
+   buf = priv->insns_buf;
+   err = bpf__gen_prologue(tev->args, tev->nargs,
+   buf, _cnt,
+   BPF_MAXINSNS - orig_insns_cnt);
+   if (err) {
+   const char *title;
+
+   title = bpf_program__title(prog, false);
+   if (!title)
+   title = "[unknown]";
+
+   pr_debug("Failed to generate prologue for program %s\n",
+title);
+   return err;
+   }
+
+   memcpy([prologue_cnt], orig_insns,
+  sizeof(struct bpf_insn) * orig_insns_cnt);
+
+   res->new_insn_ptr = buf;
+   res->new_insn_cnt = prologue_cnt + orig_insns_cnt;
+   res->pfd = NULL;
+   return 0;
+
+errout:
+   pr_debug("Internal error in preproc_gen_prologue\n");
+   return -EINVAL;
+}
+
+static int hook_load_preprocessor(struct bpf_program *prog)
+{
+   struct perf_probe_event *pev;
+   struct bpf_prog_priv *priv;
+   bool need_prologue = false;
+   int err, i;
+
+   err = bpf_program__get_private(prog, (void **));
+   if (err || !priv) {
+   pr_debug("Internal error when hook preprocessor\n");
+   return -EINVAL;
+   }
+
+   pev = >pev;
+   for (i = 0; i < pev->ntevs; i++) {
+   struct probe_trace_event *tev = >tevs[i];
+
+   if (tev->nargs > 0) {
+   need_prologue = true;
+   break;
+   }
+   }
+
+   /*
+* Since all tev doesn't have argument, we don't need generate
+* prologue.
+*/
+   if (!need_prologue) {
+   priv->need_prologue = false;
+   return 0;
+   }
+
+   priv->need_prologue = true;
+   priv->insns_buf = malloc(sizeof(struct bpf_insn) * BPF_MAXINSNS);
+   if (!priv->insns_buf) {
+   pr_debug("No enough memory: alloc insns_buf failed\n");
+   return -ENOMEM;
+   }
+
+   err = bpf_program__set_prep(prog, pev->ntevs,
+   preproc_gen_prologue);
+   return err;
+}
+
 int bpf__probe(struct bpf_object *obj)
 {
int err = 0;
@@ -223,6 +325,18 @@ int bpf__probe(struct bpf_object *obj)
pr_debug("bpf_probe: failed to apply perf probe 
events");
goto out;
}
+
+   /*
+* After

[PATCH 10/31] perf test: Enforce LLVM test for BPF test

2015-10-14 Thread Wang Nan

This patch replaces the original toy BPF program with previous introduced
bpf-script-example.c. Dynamically embedded it into 'llvm-src.c'.

The newly introduced BPF program attaches a BPF program at
'sys_epoll_pwait()', and collect half samples from it. perf itself never
use that syscall, so further test can verify their result with it.

Since BPF program require LINUX_VERSION_CODE of runtime kernel, this
patch computes that code from uname.

Since the resuling BPF object is useful for further testcases, this patch
introduces 'prepare' and 'cleanup' method to tests, and makes test__llvm()
create a MAP_SHARED memory array to hold the resulting object.

Signed-off-by: He Kuang 
Signed-off-by: Wang Nan 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexei Starovoitov 
Cc: Brendan Gregg 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: Jiri Olsa 
Cc: Kaixu Xia 
Cc: Masami Hiramatsu 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Cc: Zefan Li 
Cc: pi3or...@163.com
Link: http://lkml.kernel.org/n/ebpf-6yw9eg0ej3l4jnqhinngk...@git.kernel.org
---
 tools/perf/tests/Build  |   9 +++-
 tools/perf/tests/builtin-test.c |   6 +++
 tools/perf/tests/llvm.c | 104 +++-
 tools/perf/tests/llvm.h |  14 ++
 tools/perf/tests/tests.h|   4 ++
 5 files changed, 123 insertions(+), 14 deletions(-)
 create mode 100644 tools/perf/tests/llvm.h

diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 50de225..4afc8c8 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -31,9 +31,16 @@ perf-y += sample-parsing.o
 perf-y += parse-no-sample-id-all.o
 perf-y += kmod-path.o
 perf-y += thread-map.o
-perf-y += llvm.o
+perf-y += llvm.o llvm-src.o
 perf-y += topology.o
 
+$(OUTPUT)tests/llvm-src.c: tests/bpf-script-example.c
+   $(call rule_mkdir)
+   $(Q)echo '#include ' > $@
+   $(Q)echo 'const char test_llvm__bpf_prog[] =' >> $@
+   $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
+   $(Q)echo ';' >> $@
+
 ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64))
 perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
 endif
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 66f72d3..e812a0c 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -160,6 +160,8 @@ static struct test generic_tests[] = {
{
.desc = "Test LLVM searching and compiling",
.func = test__llvm,
+   .prepare = test__llvm_prepare,
+   .cleanup = test__llvm_cleanup,
},
{
.desc = "Test topology in session",
@@ -261,7 +263,11 @@ static int __cmd_test(int argc, const char *argv[], struct 
intlist *skiplist)
}
 
pr_debug("\n--- start ---\n");
+   if (t->prepare)
+   t->prepare();
err = run_test(t);
+   if (t->cleanup)
+   t->cleanup();
pr_debug(" end \n%s:", t->desc);
 
switch (err) {
diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c
index 52d5597..236bf39 100644
--- a/tools/perf/tests/llvm.c
+++ b/tools/perf/tests/llvm.c
@@ -1,9 +1,13 @@
 #include 
+#include 
 #include 
 #include 
 #include 
+#include 
+#include 
 #include "tests.h"
 #include "debug.h"
+#include "llvm.h"
 
 static int perf_config_cb(const char *var, const char *val,
  void *arg __maybe_unused)
@@ -11,16 +15,6 @@ static int perf_config_cb(const char *var, const char *val,
return perf_default_config(var, val, arg);
 }
 
-/*
- * Randomly give it a "version" section since we don't really load it
- * into kernel
- */
-static const char test_bpf_prog[] =
-   "__attribute__((section(\"do_fork\"), used)) "
-   "int fork(void *ctx) {return 0;} "
-   "char _license[] __attribute__((section(\"license\"), used)) = \"GPL\";"
-   "int _version __attribute__((section(\"version\"), used)) = 0x40100;";
-
 #ifdef HAVE_LIBBPF_SUPPORT
 static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz)
 {
@@ -41,12 +35,44 @@ static int test__bpf_parsing(void *obj_buf __maybe_unused,
 }
 #endif
 
+static char *
+compose_source(void)
+{
+   struct utsname utsname;
+   int version, patchlevel, sublevel, err;
+   unsigned long version_code;
+   char *code;
+
+   if (uname())
+   return NULL;
+
+   err = sscanf(utsname.release, "%d.%d.%d",
+, , );
+   if (err != 3) {
+   fprintf(stderr, " (Can't get kernel version from uname '%s')",
+   utsname.release);
+   return NULL;
+   }
+
+   version_code = (version << 16) + (patchlevel << 8) + sublevel;
+   err = asprintf(, "#define LINUX_VERSION_CODE 0x%08lx;\n%s",
+  version_code, test_llvm__bpf_prog);
+   if (err < 0)
+   return NULL;
+
+   return code;
+}
+
+#define SHARED_BUF_INIT_SIZE   (1 << 20)

[PATCH 16/31] perf tools: Add prologue for BPF programs for fetching arguments

2015-10-14 Thread Wang Nan

From: He Kuang 

This patch generates prologue for a BPF program which fetch arguments
for it. With this patch, the program can have arguments as follow:

 SEC("lock_page=__lock_page page->flags")
 int lock_page(struct pt_regs *ctx, int err, unsigned long flags)
 {
 return 1;
 }

This patch passes at most 3 arguments from r3, r4 and r5. r1 is still
the ctx pointer. r2 is used to indicate the successfulness of
dereferencing.

This patch uses r6 to hold ctx (struct pt_regs) and r7 to hold stack
pointer for result. Result of each arguments first store on stack:

 low address
 BPF_REG_FP - 24  ARG3
 BPF_REG_FP - 16  ARG2
 BPF_REG_FP - 8   ARG1
 BPF_REG_FP
 high address

Then loaded into r3, r4 and r5.

The output prologue for offn(...off2(off1(reg should be:

 r6 <- r1   // save ctx into a callee saved register
 r7 <- fp
 r7 <- r7 - stack_offset// pointer to result slot
 /* load r3 with the offset in pt_regs of 'reg' */
 (r7) <- r3 // make slot valid
 r3 <- r3 + off1// prepare to read unsafe pointer
 r2 <- 8
 r1 <- r7   // result put onto stack
 call probe_read// read unsafe pointer
 jnei r0, 0, err// error checking
 r3 <- (r7) // read result
 r3 <- r3 + off2// prepare to read unsafe pointer
 r2 <- 8
 r1 <- r7
 call probe_read
 jnei r0, 0, err
 ...
 /* load r2, r3, r4 from stack */
 goto success
err:
 r2 <- 1
 /* load r3, r4, r5 with 0 */
 goto usercode
success:
 r2 <- 0
usercode:
 r1 <- r6   // restore ctx
 // original user code

If all of arguments reside in register (dereferencing is not
required), gen_prologue_fastpath() will be used to create
fast prologue:

 r3 <- (r1 + offset of reg1)
 r4 <- (r1 + offset of reg2)
 r5 <- (r1 + offset of reg3)
 r2 <- 0

P.S.

eBPF calling convention is defined as:

* r0- return value from in-kernel function, and exit value
  for eBPF program
* r1 - r5   - arguments from eBPF program to in-kernel function
* r6 - r9   - callee saved registers that in-kernel function will
  preserve
* r10   - read-only frame pointer to access stack

Signed-off-by: He Kuang 
Signed-off-by: Wang Nan 
Cc: Alexei Starovoitov 
Cc: Brendan Gregg 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: He Kuang 
Cc: Jiri Olsa 
Cc: Kaixu Xia 
Cc: Masami Hiramatsu 
Cc: Namhyung Kim 
Cc: Paul Mackerras 
Cc: Peter Zijlstra 
Cc: Zefan Li 
Cc: pi3or...@163.com
Cc: Arnaldo Carvalho de Melo 
Link: http://lkml.kernel.org/n/ebpf-6yw9eg0ej3l4jnqhinngk...@git.kernel.org
---
 tools/perf/util/Build  |   1 +
 tools/perf/util/bpf-prologue.c | 443 +
 tools/perf/util/bpf-prologue.h |  34 
 3 files changed, 478 insertions(+)
 create mode 100644 tools/perf/util/bpf-prologue.c
 create mode 100644 tools/perf/util/bpf-prologue.h

diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 591b3fe..b9d56f2 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -88,6 +88,7 @@ libperf-y += parse-branch-options.o
 libperf-y += parse-regs-options.o
 
 libperf-$(CONFIG_LIBBPF) += bpf-loader.o
+libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
 libperf-$(CONFIG_LIBELF) += symbol-elf.o
 libperf-$(CONFIG_LIBELF) += probe-file.o
 libperf-$(CONFIG_LIBELF) += probe-event.o
diff --git a/tools/perf/util/bpf-prologue.c b/tools/perf/util/bpf-prologue.c
new file mode 100644
index 000..e4adb18
--- /dev/null
+++ b/tools/perf/util/bpf-prologue.c
@@ -0,0 +1,443 @@
+/*
+ * bpf-prologue.c
+ *
+ * Copyright (C) 2015 He Kuang 
+ * Copyright (C) 2015 Wang Nan 
+ * Copyright (C) 2015 Huawei Inc.
+ */
+
+#include 
+#include "perf.h"
+#include "debug.h"
+#include "bpf-prologue.h"
+#include "probe-finder.h"
+#include 
+#include 
+
+#define BPF_REG_SIZE   8
+
+#define JMP_TO_ERROR_CODE  -1
+#define JMP_TO_SUCCESS_CODE-2
+#define JMP_TO_USER_CODE   -3
+
+struct bpf_insn_pos {
+   struct bpf_insn *begin;
+   struct bpf_insn *end;
+   struct bpf_insn *pos;
+};
+
+static inline int
+pos_get_cnt(struct bpf_insn_pos *pos)
+{
+   return pos->pos - pos->begin;
+}
+
+static int
+append_insn(struct bpf_insn new_insn, struct bpf_insn_pos *pos)
+{
+   if (!pos->pos)
+   return -ERANGE;
+
+   if (pos->pos + 1 >= pos->end) {
+   pr_err("bpf prologue: prologue too long\n");
+   pos->pos = NULL;
+   return -ERANGE;
+   }
+
+   *(pos->pos)++ = new_insn;
+   return 0;
+}
+
+static int
+check_pos(struct bpf_insn_pos *pos)
+{
+   if (!pos->pos || pos->pos >= pos->end)
+   return -ERANGE;
+   return 0;
+}
+
+/* Give it a shorter name */
+#define ins(i, p) append_insn((i), (p))
+
+/*
+ * Give a register name (in 'reg'), generate instruction to
+ * load register into an eBPF register rd:
+ *

[PATCH 09/31] perf tools: Compile scriptlets to BPF objects when passing '.c' to --event

2015-10-14 Thread Wang Nan

This patch provides infrastructure for passing source files to --event
directly using:

 # perf record --event bpf-file.c command

This patch does following works:

 1) Allow passing '.c' file to '--event'. parse_events_load_bpf() is
expanded to allow caller tell it whether the passed file is source
file or object.

 2) llvm__compile_bpf() is called to compile the '.c' file, the result
is saved into memory. Use bpf_object__open_buffer() to load the
in-memory object.

Introduces a bpf-script-example.c so we can manually test it:

 # perf record --clang-opt "-DLINUX_VERSION_CODE=0x40200" --event 
./bpf-script-example.c sleep 1

Note that '--clang-opt' must put before '--event'.

Futher patches will merge it into a testcase so can be tested automatically.

Signed-off-by: Wang Nan 
Signed-off-by: He Kuang 
Acked-by: Alexei Starovoitov 
Cc: Brendan Gregg 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: Jiri Olsa 
Cc: Kaixu Xia 
Cc: Masami Hiramatsu 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Cc: Zefan Li 
Cc: pi3or...@163.com
Signed-off-by: Arnaldo Carvalho de Melo 
Link: http://lkml.kernel.org/n/ebpf-6yw9eg0ej3l4jnqhinngk...@git.kernel.org
---
 tools/perf/tests/bpf-script-example.c | 44 +++
 tools/perf/util/bpf-loader.c  | 17 --
 tools/perf/util/bpf-loader.h  |  5 ++--
 tools/perf/util/parse-events.c|  5 ++--
 tools/perf/util/parse-events.h|  3 ++-
 tools/perf/util/parse-events.l|  3 +++
 tools/perf/util/parse-events.y| 15 ++--
 7 files changed, 83 insertions(+), 9 deletions(-)
 create mode 100644 tools/perf/tests/bpf-script-example.c

diff --git a/tools/perf/tests/bpf-script-example.c 
b/tools/perf/tests/bpf-script-example.c
new file mode 100644
index 000..410a70b
--- /dev/null
+++ b/tools/perf/tests/bpf-script-example.c
@@ -0,0 +1,44 @@
+#ifndef LINUX_VERSION_CODE
+# error Need LINUX_VERSION_CODE
+# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" 
into llvm section of ~/.perfconfig'
+#endif
+#define BPF_ANY 0
+#define BPF_MAP_TYPE_ARRAY 2
+#define BPF_FUNC_map_lookup_elem 1
+#define BPF_FUNC_map_update_elem 2
+
+static void *(*bpf_map_lookup_elem)(void *map, void *key) =
+   (void *) BPF_FUNC_map_lookup_elem;
+static void *(*bpf_map_update_elem)(void *map, void *key, void *value, int 
flags) =
+   (void *) BPF_FUNC_map_update_elem;
+
+struct bpf_map_def {
+   unsigned int type;
+   unsigned int key_size;
+   unsigned int value_size;
+   unsigned int max_entries;
+};
+
+#define SEC(NAME) __attribute__((section(NAME), used))
+struct bpf_map_def SEC("maps") flip_table = {
+   .type = BPF_MAP_TYPE_ARRAY,
+   .key_size = sizeof(int),
+   .value_size = sizeof(int),
+   .max_entries = 1,
+};
+
+SEC("func=sys_epoll_pwait")
+int bpf_func__sys_epoll_pwait(void *ctx)
+{
+   int ind =0;
+   int *flag = bpf_map_lookup_elem(_table, );
+   int new_flag;
+   if (!flag)
+   return 0;
+   /* flip flag and store back */
+   new_flag = !*flag;
+   bpf_map_update_elem(_table, , _flag, BPF_ANY);
+   return new_flag;
+}
+char _license[] SEC("license") = "GPL";
+int _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index aa784a4..ba6f752 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -12,6 +12,7 @@
 #include "bpf-loader.h"
 #include "probe-event.h"
 #include "probe-finder.h" // for MAX_PROBES
+#include "llvm-utils.h"
 
 #define DEFINE_PRINT_FN(name, level) \
 static int libbpf_##name(const char *fmt, ...) \
@@ -33,7 +34,7 @@ struct bpf_prog_priv {
struct perf_probe_event pev;
 };
 
-struct bpf_object *bpf__prepare_load(const char *filename)
+struct bpf_object *bpf__prepare_load(const char *filename, bool source)
 {
struct bpf_object *obj;
static bool libbpf_initialized;
@@ -45,7 +46,19 @@ struct bpf_object *bpf__prepare_load(const char *filename)
libbpf_initialized = true;
}
 
-   obj = bpf_object__open(filename);
+   if (source) {
+   int err;
+   void *obj_buf;
+   size_t obj_buf_sz;
+
+   err = llvm__compile_bpf(filename, _buf, _buf_sz);
+   if (err)
+   return ERR_PTR(err);
+   obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, filename);
+   free(obj_buf);
+   } else
+   obj = bpf_object__open(filename);
+
if (!obj) {
pr_debug("bpf: failed to load %s\n", filename);
return ERR_PTR(-EINVAL);
diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h
index a8f25ee..ccd8d7f 100644
--- a/tools/perf/util/bpf-loader.h
+++ b/tools/perf/util/bpf-loader.h
@@ -18,7 +18,7 @@ typedef int (*bpf_prog_iter_callback_t)(struct 
probe_trace_event *tev,
int fd,

[PATCH 19/31] perf record: Support custom vmlinux path

2015-10-14 Thread Wang Nan

From: He Kuang 

Make perf-record command support --vmlinux option if BPF_PROLOGUE is on.

'perf record' needs vmlinux as the source of DWARF info to generate
prologue for BPF programs, so path of vmlinux should be specified.

Short name 'k' has been taken by 'clockid'. This patch skips the short
option name and use '--vmlinux' for vmlinux path.

Signed-off-by: He Kuang 
Signed-off-by: Wang Nan 
Cc: Alexei Starovoitov 
Cc: Brendan Gregg 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: He Kuang 
Cc: Jiri Olsa 
Cc: Kaixu Xia 
Cc: Masami Hiramatsu 
Cc: Namhyung Kim 
Cc: Paul Mackerras 
Cc: Peter Zijlstra 
Cc: Zefan Li 
Cc: pi3or...@163.com
Cc: Arnaldo Carvalho de Melo 
Link: http://lkml.kernel.org/n/ebpf-6yw9eg0ej3l4jnqhinngk...@git.kernel.org
---
 tools/perf/builtin-record.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 847cc67..200f221 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1123,6 +1123,10 @@ struct option __record_options[] = {
   "clang binary to use for compiling BPF scriptlets"),
OPT_STRING(0, "clang-opt", _param.clang_opt, "clang options",
   "options passed to clang when compiling BPF scriptlets"),
+#ifdef HAVE_BPF_PROLOGUE
+   OPT_STRING(0, "vmlinux", _conf.vmlinux_name,
+  "file", "vmlinux pathname"),
+#endif
 #endif
OPT_END()
 };
-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[GIT PULL 00/31] perf tools: filtering events using eBPF programs

2015-10-14 Thread Wang Nan

Hi Arnaldo,

   I know you don't have enough time to review my code. I send
this patchset to let you and other know what we are working on.

   In this new patchset, we create a new perf cmdline syntax so
perf users are able to pass perf events created by perf to BPF
maps, which makes bpf_perf_event_read() usable. Compare with out
previous solution[1] which embedded 'struct perf_event_attr' to
"maps" section, this solution is easier to use.

   If you or anyone have any different views on this solution,
please let us know so we can stop our further development base
on it as soon as possible.

Thank you.

 [1] 
http://lkml.kernel.org/r/1440672142-89311-1-git-send-email-xiaka...@huawei.com

The following changes since commit 31eb4360546b4bd890f349db01295a173c09b0fb:

  perf hists browser: Add 'm' key for context menu display (2015-10-12 23:29:14 
-0300)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/pi3orama/linux.git 
tags/perf-ebpf-for-acme-20151014

for you to fetch changes up to 6df036cb7d42a2e0ebf312e127b02425bd57bc55:

  perf tools: Enable BPF object configure syntax (2015-10-14 10:09:17 +)


EBPF support for perf

 - Rebase to newest perf/core

 - Bugfix: kprobe events not remove if bpf__probe() failure occur
   after it creates some kprobe points successfully.

 - Bugfix: when multiple BPF functions reside in one BPF object,
   the last BPF program would be attached to all kprobe events.
   This bug is introduced by removal of dummy event placeholder.

 - New function: support BPF program reading counter through
   bpf_perf_event_read() by adding new syntax and support code
   in event selector. The new BPF object configuration mechanism
   can be extended to support BPF data output.

   In this patchset, following BPF function can be used to measure
   cycles a kernel function cost:

   = BPF program bpf_program.c =

   struct bpf_map_def SEC("maps") pmu_map = {
   .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
   .key_size = sizeof(int),
   .value_size = sizeof(u32),
   .max_entries = __NR_CPUS__,
   };

   SEC("func_write=sys_write")
   int func_write(void *ctx)
   {
   unsigned long long val;
   char fmt[] = "sys_write:pmu=%llu\n";
   val = bpf_perf_event_read(_map, bpf_get_smp_processor_id());
   bpf_trace_printk(fmt, sizeof(fmt), val);
   return 0;
   }

   SEC("func_write_return=sys_write%return")
   int func_write_return(void *ctx)
   {
   unsigned long long val = 0;
   char fmt[] = "sys_write_return: pmu=%llu\n";
   val = bpf_perf_event_read(_map, bpf_get_smp_processor_id());
   bpf_trace_printk(fmt, sizeof(fmt), val);
   return 0;
   }

   With cmdline like this:

   = cmdline =
   # echo "" > /sys/kernel/debug/tracing/trace
   # perf record -e evt=cycles/period=0x7fff/ \
 -e bpf_program.c/maps.pmu_map.event=evt/
 -a ls
   # cat /sys/kernel/debug/tracing/trace | grep ls
ls-3363  [003] d... 75475.056190: : sys_write:
pmu=3961415
ls-3363  [003] dN.. 75475.056212: : sys_write_return: 
pmu=4051390
ls-3363  [003] d... 75475.056216: : sys_write:
pmu=4065447
ls-3363  [003] dN.. 75475.056227: : sys_write_return: 
pmu=4109760
ls-3363  [003] d... 75475.056230: : sys_write:
pmu=4120776
ls-3363  [003] dN.. 75475.056245: : sys_write_return: 
pmu=4178441
...
   # perf report --stdio
   Error:
   The perf.data file has no samples!

Signed-off-by: Wang Nan 


He Kuang (5):
  perf tools: Add prologue for BPF programs for fetching arguments
  perf record: Support custom vmlinux path
  bpf tools: Add helper function for updating bpf maps elements
  perf tools: Support perf event alias name
  perf record: Apply config to BPF objects before recording

Wang Nan (26):
  perf tools: Make perf depend on libbpf
  perf ebpf: Add the libbpf glue
  perf tools: Enable passing bpf object file to --event
  perf record, bpf: Create probe points for BPF programs
  perf record: Load eBPF object into kernel
  perf tools: Collect perf_evsel in BPF object files
  perf tools: Attach eBPF program to perf event
  perf record: Add clang options for compiling BPF scripts
  perf tools: Compile scriptlets to BPF objects when passing '.c' to --event
  perf test: Enforce LLVM test for BPF test
  perf test: Add 'perf test BPF'
  perf probe: Reset args and nargs for probe_trace_event when failure
  bpf tools: Load a program with different instances using preprocessor
  perf tools: Add BPF_PROLOGUE config options for further patches
  perf tools: Compile dwarf-regs.c

[PATCH v4 10/11] smack: namespace implementation

2015-10-14 Thread Lukasz Pawelczyk

This commit uses all the changes introduced in "namespace groundwork"
and previous preparation patches and makes smack aware of its namespace
and mapped labels.

It modifies the following functions to be namespace aware:
- smk_access
- smk_find_label_name
- smk_get_label

And all functions that use them (e.g. smk_tskacc).

It also adds another function that is used throughout Smack LSM hooks:
- smk_labels_valid - it checks whether both, subject and object labels
  are properly mapped in a namespace where they are to be used. This
  function is used mostly together with a capability check when there is
  no proper access check that usually checks for that.

All the Smack LSM hooks have been adapted to be namespace aware.

The capabilities (CAP_MAC_ADMIN, CAP_MAC_OVERRIDE) has been allowed in
the namespace for few cases. Check the documentation for the details.

Signed-off-by: Lukasz Pawelczyk 
Reviewed-by: Casey Schaufler 
---
 security/smack/smack.h|  29 +++-
 security/smack/smack_access.c | 109 ++--
 security/smack/smack_lsm.c| 390 ++
 security/smack/smack_ns.c |  39 +
 security/smack/smackfs.c  |  63 ---
 5 files changed, 483 insertions(+), 147 deletions(-)

diff --git a/security/smack/smack.h b/security/smack/smack.h
index 4b7489f..3d432f4 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -119,6 +119,7 @@ struct superblock_smack {
struct smack_known  *smk_floor;
struct smack_known  *smk_hat;
struct smack_known  *smk_default;
+   struct user_namespace   *smk_ns;
int smk_initialized;
 };
 
@@ -126,6 +127,7 @@ struct socket_smack {
struct smack_known  *smk_out;   /* outbound label */
struct smack_known  *smk_in;/* inbound label */
struct smack_known  *smk_packet;/* TCP peer label */
+   struct user_namespace   *smk_ns;/* user namespace */
 };
 
 /*
@@ -146,6 +148,14 @@ struct task_smack {
struct mutexsmk_rules_lock; /* lock for the rules */
 };
 
+/*
+ * Used for IPC objects (sem, shm, etc)
+ */
+struct ipc_smack {
+   struct smack_known  *smk_known; /* label for access control */
+   struct user_namespace   *smk_ns;/* user namespace */
+};
+
 #defineSMK_INODE_INSTANT   0x01/* inode is instantiated */
 #defineSMK_INODE_TRANSMUTE 0x02/* directory is transmuting */
 #defineSMK_INODE_CHANGED   0x04/* smack was transmuted */
@@ -319,10 +329,11 @@ struct smk_audit_info {
  */
 int smk_access_entry(char *, char *, struct list_head *);
 int smk_access(struct smack_known *, struct smack_known *,
-  int, struct smk_audit_info *);
+  struct user_namespace *, int, struct smk_audit_info *);
 int smk_tskacc(struct task_struct *, struct smack_known *,
+  struct user_namespace *, u32, struct smk_audit_info *);
+int smk_curacc(struct smack_known *, struct user_namespace *,
   u32, struct smk_audit_info *);
-int smk_curacc(struct smack_known *, u32, struct smk_audit_info *);
 struct smack_known *smack_from_secid(const u32);
 char *smk_parse_smack(const char *string, int len, bool *allocated);
 int smk_netlbl_mls(int, char *, struct netlbl_lsm_secattr *, int);
@@ -335,8 +346,9 @@ int smack_has_ns_privilege(struct task_struct *task,
 int smack_has_privilege(struct task_struct *task, int cap);
 int smack_ns_privileged(struct user_namespace *user_ns, int cap);
 int smack_privileged(int cap);
-char *smk_find_label_name(struct smack_known *skp);
-struct smack_known *smk_get_label(const char *string, int len, bool import);
+char *smk_find_label_name(struct smack_known *skp, struct user_namespace *ns);
+struct smack_known *smk_get_label(const char *string, int len, bool import,
+ struct user_namespace *ns);
 
 /*
  * These functions are in smack_ns.c
@@ -350,6 +362,15 @@ struct smack_known *smk_find_unmapped(const char *string, 
int len,
 extern const struct seq_operations proc_label_map_seq_operations;
 ssize_t proc_label_map_write(struct task_struct *p, const struct cred *f_cred,
 void *value, size_t size);
+bool smk_labels_valid(struct smack_known *sbj, struct smack_known *obj,
+ struct user_namespace *ns);
+#else
+static inline bool smk_labels_valid(struct smack_known *sbj,
+   struct smack_known *obj,
+   struct user_namespace *ns)
+{
+   return true;
+}
 #endif /* CONFIG_SECURITY_SMACK_NS */
 
 /*
diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c
index 17b7e2c..e230948 100644
--- a/security/smack/smack_access.c
+++ b/security/smack/smack_access.c
@@ -14,6 +14,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "smack.h"
 
 struct smack_known smack_known_huh = {
@@ -113,6 +114,7 @@ int

[PATCH 20/31] perf tools: Allow BPF program attach to uprobe events

2015-10-14 Thread Wang Nan

This patch appends new syntax to BPF object section name to support
probing at uprobe event. Now we can use BPF program like this:

 SEC(
 "target=/lib64/libc.so.6\n"
 "libcwrite=__write"
 )
 int libcwrite(void *ctx)
 {
 return 1;
 }

Where, in section name of a program, before the main config string,
we can use 'key=value' style options. Now the only option key "target"
is for uprobe probing.

Signed-off-by: Wang Nan 
Cc: Alexei Starovoitov 
Cc: Brendan Gregg 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: He Kuang 
Cc: Jiri Olsa 
Cc: Kaixu Xia 
Cc: Masami Hiramatsu 
Cc: Namhyung Kim 
Cc: Paul Mackerras 
Cc: Peter Zijlstra 
Cc: Zefan Li 
Cc: pi3or...@163.com
Cc: Arnaldo Carvalho de Melo 
Link: http://lkml.kernel.org/n/ebpf-6yw9eg0ej3l4jnqhinngk...@git.kernel.org
---
 tools/perf/util/bpf-loader.c | 86 
 1 file changed, 80 insertions(+), 6 deletions(-)

diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index af549ea..73ff9a9 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -111,6 +111,84 @@ bpf_prog_priv__clear(struct bpf_program *prog 
__maybe_unused,
 }
 
 static int
+do_config(const char *key, const char *value,
+ struct perf_probe_event *pev)
+{
+   pr_debug("config bpf program: %s=%s\n", key, value);
+   if (strcmp(key, "target") == 0) {
+   pev->uprobes = true;
+   pev->target = strdup(value);
+   return 0;
+   }
+
+   pr_warning("BPF: WARNING: invalid config option in object: %s=%s\n",
+  key, value);
+   pr_warning("\tHint: Currently only valid option is 'target='\n");
+   return 0;
+}
+
+static const char *
+parse_config_kvpair(const char *config_str, struct perf_probe_event *pev)
+{
+   char *text = strdup(config_str);
+   char *sep, *line;
+   const char *main_str = NULL;
+   int err = 0;
+
+   if (!text) {
+   pr_debug("No enough memory: dup config_str failed\n");
+   return NULL;
+   }
+
+   line = text;
+   while ((sep = strchr(line, '\n'))) {
+   char *equ;
+
+   *sep = '\0';
+   equ = strchr(line, '=');
+   if (!equ) {
+   pr_warning("WARNING: invalid config in BPF object: 
%s\n",
+  line);
+   pr_warning("\tShould be 'key=value'.\n");
+   goto nextline;
+   }
+   *equ = '\0';
+
+   err = do_config(line, equ + 1, pev);
+   if (err)
+   break;
+nextline:
+   line = sep + 1;
+   }
+
+   if (!err)
+   main_str = config_str + (line - text);
+   free(text);
+
+   return main_str;
+}
+
+static int
+parse_config(const char *config_str, struct perf_probe_event *pev)
+{
+   const char *main_str;
+   int err;
+
+   main_str = parse_config_kvpair(config_str, pev);
+   if (!main_str)
+   return -EINVAL;
+
+   err = parse_perf_probe_command(main_str, pev);
+   if (err < 0) {
+   pr_debug("bpf: '%s' is not a valid config string\n",
+config_str);
+   /* parse failed, don't need clear pev. */
+   return -EINVAL;
+   }
+   return 0;
+}
+
+static int
 config_bpf_program(struct bpf_program *prog)
 {
struct perf_probe_event *pev = NULL;
@@ -132,13 +210,9 @@ config_bpf_program(struct bpf_program *prog)
pev = >pev;
 
pr_debug("bpf: config program '%s'\n", config_str);
-   err = parse_perf_probe_command(config_str, pev);
-   if (err < 0) {
-   pr_debug("bpf: '%s' is not a valid config string\n",
-config_str);
-   err = -EINVAL;
+   err = parse_config(config_str, pev);
+   if (err)
goto errout;
-   }
 
if (pev->group && strcmp(pev->group, PERF_BPF_PROBE_GROUP)) {
pr_debug("bpf: '%s': group for event is set and not '%s'.\n",
-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 14/31] perf tools: Add BPF_PROLOGUE config options for further patches

2015-10-14 Thread Wang Nan

If both LIBBPF and DWARF are detected, it is possible to create prologue
for eBPF programs to help them accessing kernel data. HAVE_BPF_PROLOGUE
and CONFIG_BPF_PROLOGUE is added as flags for this feature.

PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET indicates an architecture
supports converting name of a register to its offset in
'struct pt_regs'. Without this support, BPF_PROLOGUE should be turned off.

HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET is introduced as the corresponding
CFLAGS of PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET.

Signed-off-by: Wang Nan 
Cc: Alexei Starovoitov 
Cc: Brendan Gregg 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: He Kuang 
Cc: Jiri Olsa 
Cc: Kaixu Xia 
Cc: Masami Hiramatsu 
Cc: Namhyung Kim 
Cc: Paul Mackerras 
Cc: Peter Zijlstra 
Cc: Zefan Li 
Cc: pi3or...@163.com
Cc: Arnaldo Carvalho de Melo 
Link: http://lkml.kernel.org/n/ebpf-6yw9eg0ej3l4jnqhinngk...@git.kernel.org
---
 tools/perf/config/Makefile | 12 
 1 file changed, 12 insertions(+)

diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index de89ec5..6eb9a95 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -318,6 +318,18 @@ ifndef NO_LIBELF
   CFLAGS += -DHAVE_LIBBPF_SUPPORT
   $(call detected,CONFIG_LIBBPF)
 endif
+
+ifndef NO_DWARF
+  ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
+CFLAGS += -DHAVE_BPF_PROLOGUE
+$(call detected,CONFIG_BPF_PROLOGUE)
+  else
+msg := $(warning BPF prologue is not supported by architecture 
$(ARCH), missing regs_query_register_offset());
+  endif
+else
+  msg := $(warning DWARF support is off, BPF prologue is disabled);
+endif
+
   endif # NO_LIBBPF
 endif # NO_LIBELF
 
-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 15/31] perf tools: Compile dwarf-regs.c if CONFIG_BPF_PROLOGUE is on

2015-10-14 Thread Wang Nan

regs_query_register_offset() in dwarf-regs.c is required by BPF prologue.
Make it be compiled if CONFIG_BPF_PROLOGUE is on to avoid building failure
when CONFIG_BPF_PROLOGUE is on but CONFIG_DWARF is not set.

Signed-off-by: Wang Nan 
Signed-off-by: He Kuang 
Acked-by: Masami Hiramatsu 
Cc: Alexei Starovoitov 
Cc: Brendan Gregg 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: He Kuang 
Cc: Jiri Olsa 
Cc: Kaixu Xia 
Cc: Namhyung Kim 
Cc: Paul Mackerras 
Cc: Peter Zijlstra 
Cc: Zefan Li 
Cc: pi3or...@163.com
Cc: Arnaldo Carvalho de Melo 
Link: http://lkml.kernel.org/n/ebpf-6yw9eg0ej3l4jnqhinngk...@git.kernel.org
---
 tools/perf/arch/x86/util/Build | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index ff63649..4659703 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -5,6 +5,7 @@ libperf-y += kvm-stat.o
 libperf-y += perf_regs.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
+libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
 
 libperf-$(CONFIG_LIBUNWIND)  += unwind-libunwind.o
 libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v4 06/11] smack: don't use implicit star to display smackfs/syslog

2015-10-14 Thread Lukasz Pawelczyk

Smackfs/syslog is analogous to onlycap and unconfined. When not filled
they don't do anything. In such cases onlycap and unconfined displayed
nothing when read, but syslog unconditionally displayed star. This
doesn't work well with namespaces where the star could have been
unmapped. Besides the meaning of this star was different then a star
that could be written to this file. This was misleading.

This also brings syslog read/write functions on par with onlycap and
unconfined where it is possible to reset the value to NULL as should be
possible according to comment in smackfs.c describing smack_syslog_label
variable.

Before that the initial state was to allow (smack_syslog_label was
NULL), but after writing star to it the current had to be labeled star
as well to have an access, even thought reading the smackfs/syslog
returned the same result in both cases.

Signed-off-by: Lukasz Pawelczyk 
Acked-by: Serge Hallyn 
---
 security/smack/smackfs.c | 42 +++---
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index ce8d503..05e09ee2 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -2634,23 +2634,20 @@ static const struct file_operations smk_change_rule_ops 
= {
 static ssize_t smk_read_syslog(struct file *filp, char __user *buf,
size_t cn, loff_t *ppos)
 {
-   struct smack_known *skp;
+   char *smack = "";
ssize_t rc = -EINVAL;
int asize;
 
if (*ppos != 0)
return 0;
 
-   if (smack_syslog_label == NULL)
-   skp = _known_star;
-   else
-   skp = smack_syslog_label;
+   if (smack_syslog_label != NULL)
+   smack = smack_syslog_label->smk_known;
 
-   asize = strlen(skp->smk_known) + 1;
+   asize = strlen(smack) + 1;
 
if (cn >= asize)
-   rc = simple_read_from_buffer(buf, cn, ppos, skp->smk_known,
-   asize);
+   rc = simple_read_from_buffer(buf, cn, ppos, smack, asize);
 
return rc;
 }
@@ -2678,16 +2675,31 @@ static ssize_t smk_write_syslog(struct file *file, 
const char __user *buf,
if (data == NULL)
return -ENOMEM;
 
-   if (copy_from_user(data, buf, count) != 0)
+   if (copy_from_user(data, buf, count) != 0) {
rc = -EFAULT;
-   else {
-   skp = smk_import_entry(data, count);
-   if (IS_ERR(skp))
-   rc = PTR_ERR(skp);
-   else
-   smack_syslog_label = skp;
+   goto freeout;
}
 
+   /*
+* Clear the smack_syslog_label on invalid label errors. This means
+* that we can pass a null string to unset the syslog value.
+*
+* Importing will also reject a label beginning with '-',
+* so "-syslog" will also work.
+*
+* But do so only on invalid label, not on system errors.
+*/
+   skp = smk_import_entry(data, count);
+   if (PTR_ERR(skp) == -EINVAL)
+   skp = NULL;
+   else if (IS_ERR(skp)) {
+   rc = PTR_ERR(skp);
+   goto freeout;
+   }
+
+   smack_syslog_label = skp;
+
+freeout:
kfree(data);
return rc;
 }
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 30/31] perf record: Apply config to BPF objects before recording

2015-10-14 Thread Wang Nan

From: He Kuang 

In perf record, before start recording, call bpf__apply_config() to
turn on all BPF config options.

Signed-off-by: Wang Nan 
Signed-off-by: He Kuang 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexei Starovoitov 
Cc: Brendan Gregg 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: He Kuang 
Cc: Jiri Olsa 
Cc: Kaixu Xia 
Cc: Masami Hiramatsu 
Cc: Namhyung Kim 
Cc: Paul Mackerras 
Cc: Peter Zijlstra 
Cc: Zefan Li 
Cc: pi3or...@163.com
Link: http://lkml.kernel.org/n/ebpf-ziazd5s4t9j96d01t5bdb...@git.kernel.org
---
 tools/perf/builtin-record.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 200f221..a47ce9e 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -32,6 +32,7 @@
 #include "util/parse-branch-options.h"
 #include "util/parse-regs-options.h"
 #include "util/llvm-utils.h"
+#include "util/bpf-loader.h"
 
 #include 
 #include 
@@ -524,6 +525,15 @@ static int __cmd_record(struct record *rec, int argc, 
const char **argv)
goto out_child;
}
 
+   err = bpf__apply_config();
+   if (err) {
+   char errbuf[BUFSIZ];
+
+   bpf__strerror_apply_config(err, errbuf, sizeof(errbuf));
+   pr_warning("WARNING: Apply config to BPF failed: %s\n",
+  errbuf);
+   }
+
/*
 * Normally perf_session__new would do this, but it doesn't have the
 * evlist.
-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 23/31] bpf tools: Add helper function for updating bpf maps elements

2015-10-14 Thread Wang Nan

From: He Kuang 

Add helper function bpf_map_update_elem() which calls sys_bpf syscall
to update elements in bpf maps.

Signed-off-by: Wang Nan 
Signed-off-by: He Kuang 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexei Starovoitov 
Cc: Brendan Gregg 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: He Kuang 
Cc: Jiri Olsa 
Cc: Kaixu Xia 
Cc: Masami Hiramatsu 
Cc: Namhyung Kim 
Cc: Paul Mackerras 
Cc: Peter Zijlstra 
Cc: Zefan Li 
Cc: pi3or...@163.com
Link: http://lkml.kernel.org/n/ebpf-yflv9p2x75ht7okavpk97...@git.kernel.org
---
 tools/lib/bpf/bpf.c | 14 ++
 tools/lib/bpf/bpf.h |  2 ++
 2 files changed, 16 insertions(+)

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index a633105..5bdc6ea 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -83,3 +83,17 @@ int bpf_load_program(enum bpf_prog_type type, struct 
bpf_insn *insns,
log_buf[0] = 0;
return sys_bpf(BPF_PROG_LOAD, , sizeof(attr));
 }
+
+int bpf_map_update_elem(int fd, void *key, void *value,
+   u64 flags)
+{
+   union bpf_attr attr;
+
+   bzero(, sizeof(attr));
+   attr.map_fd = fd;
+   attr.key = ptr_to_u64(key);
+   attr.value = ptr_to_u64(value);
+   attr.flags = flags;
+
+   return sys_bpf(BPF_MAP_UPDATE_ELEM, , sizeof(attr));
+}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 854b736..a764655 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -20,4 +20,6 @@ int bpf_load_program(enum bpf_prog_type type, struct bpf_insn 
*insns,
 u32 kern_version, char *log_buf,
 size_t log_buf_sz);
 
+int bpf_map_update_elem(int fd, void *key, void *value,
+   u64 flags);
 #endif
-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 11/31] perf test: Add 'perf test BPF'

2015-10-14 Thread Wang Nan

This patch adds BPF testcase for testing BPF event filtering.

By utilizing the result of 'perf test LLVM', this patch compiles the
eBPF sample program then test it ability. The BPF script in 'perf test
LLVM' collects half of execution of epoll_pwait(). This patch runs 111
times of it, so the resule should contains 56 samples.

Signed-off-by: Wang Nan 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexei Starovoitov 
Cc: Brendan Gregg 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: He Kuang 
Cc: Jiri Olsa 
Cc: Kaixu Xia 
Cc: Masami Hiramatsu 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Cc: Zefan Li 
Cc: pi3or...@163.com
Link: http://lkml.kernel.org/n/ebpf-6yw9eg0ej3l4jnqhinngk...@git.kernel.org
---
 tools/perf/tests/Build  |   1 +
 tools/perf/tests/bpf.c  | 171 
 tools/perf/tests/builtin-test.c |   4 +
 tools/perf/tests/llvm.c |  19 +
 tools/perf/tests/llvm.h |   1 +
 tools/perf/tests/tests.h|   1 +
 tools/perf/util/bpf-loader.c|  14 
 tools/perf/util/bpf-loader.h|   9 +++
 8 files changed, 220 insertions(+)
 create mode 100644 tools/perf/tests/bpf.c

diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 4afc8c8..d0278a9 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -32,6 +32,7 @@ perf-y += parse-no-sample-id-all.o
 perf-y += kmod-path.o
 perf-y += thread-map.o
 perf-y += llvm.o llvm-src.o
+perf-y += bpf.o
 perf-y += topology.o
 
 $(OUTPUT)tests/llvm-src.c: tests/bpf-script-example.c
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
new file mode 100644
index 000..d7cdc84
--- /dev/null
+++ b/tools/perf/tests/bpf.c
@@ -0,0 +1,171 @@
+#include 
+#include 
+#include 
+#include 
+#include "tests.h"
+#include "llvm.h"
+#include "debug.h"
+#define NR_ITERS   111
+
+#ifdef HAVE_LIBBPF_SUPPORT
+
+static int epoll_pwait_loop(void)
+{
+   int i;
+
+   /* Should fail NR_ITERS times */
+   for (i = 0; i < NR_ITERS; i++)
+   epoll_pwait(-(i + 1), NULL, 0, 0, NULL);
+   return 0;
+}
+
+static struct bpf_object *prepare_bpf(void *obj_buf, size_t obj_buf_sz)
+{
+   struct bpf_object *obj;
+
+   obj = bpf__prepare_load_buffer(obj_buf, obj_buf_sz, "[buffer]");
+   if (IS_ERR(obj)) {
+   fprintf(stderr, " (compile failed)");
+   return NULL;
+   }
+   return obj;
+}
+
+static int do_test(struct bpf_object *obj)
+{
+   struct record_opts opts = {
+   .target = {
+   .uid = UINT_MAX,
+   .uses_mmap = true,
+   },
+   .freq = 0,
+   .mmap_pages   = 256,
+   .default_interval = 1,
+   };
+
+   int i, err = 0, count = 0;
+   char pid[16];
+   char sbuf[STRERR_BUFSIZE];
+   struct perf_evlist *evlist;
+
+   struct parse_events_evlist parse_evlist;
+   struct parse_events_error parse_error;
+
+   bzero(_error, sizeof(parse_error));
+   bzero(_evlist, sizeof(parse_evlist));
+   parse_evlist.error = _error;
+   INIT_LIST_HEAD(_evlist.list);
+
+   err = parse_events_load_bpf_obj(_evlist, _evlist.list, obj);
+   if (err || list_empty(_evlist.list)) {
+   fprintf(stderr, " (Failed to add events selected by BPF)");
+   if (!err)
+   err = -EINVAL;
+   goto out;
+   }
+
+   snprintf(pid, sizeof(pid), "%d", getpid());
+   pid[sizeof(pid) - 1] = '\0';
+   opts.target.tid = opts.target.pid = pid;
+
+   /* Instead of perf_evlist__new_default, don't add default events */
+   evlist = perf_evlist__new();
+   if (!evlist) {
+   pr_debug("No ehough memory to create evlist\n");
+   return -ENOMEM;
+   }
+
+   err = perf_evlist__create_maps(evlist, );
+   if (err < 0) {
+   pr_debug("Not enough memory to create thread/cpu maps\n");
+   goto out_delete_evlist;
+   }
+
+   perf_evlist__splice_list_tail(evlist, _evlist.list);
+   evlist->nr_groups = parse_evlist.nr_groups;
+
+   perf_evlist__config(evlist, );
+
+   err = perf_evlist__open(evlist);
+   if (err < 0) {
+   pr_debug("perf_evlist__open: %s\n",
+strerror_r(errno, sbuf, sizeof(sbuf)));
+   goto out_delete_evlist;
+   }
+
+   err = perf_evlist__mmap(evlist, opts.mmap_pages, false);
+   if (err < 0) {
+   pr_debug("perf_evlist__mmap: %s\n",
+strerror_r(errno, sbuf, sizeof(sbuf)));
+   goto out_delete_evlist;
+   }
+
+   perf_evlist__enable(evlist);
+   epoll_pwait_loop();
+   perf_evlist__disable(evlist);
+
+   for (i = 0; i < evlist->nr_mmaps; i++) {
+   union perf_event *event;
+
+   while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
+   const u32 type = event->header.type;
+
+

[PATCH 08/31] perf record: Add clang options for compiling BPF scripts

2015-10-14 Thread Wang Nan

Although previous patch allows setting BPF compiler related options in
perfconfig, on some ad-hoc situation it still requires passing options
through cmdline. This patch introduces 2 options to 'perf record' for
this propose: --clang-path and --clang-opt.

Signed-off-by: Wang Nan 
Cc: Alexei Starovoitov 
Cc: Brendan Gregg 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: He Kuang 
Cc: Jiri Olsa 
Cc: Kaixu Xia 
Cc: Masami Hiramatsu 
Cc: Namhyung Kim 
Cc: Paul Mackerras 
Cc: Peter Zijlstra 
Cc: Zefan Li 
Cc: pi3or...@163.com
Cc: Arnaldo Carvalho de Melo 
Link: http://lkml.kernel.org/n/ebpf-6yw9eg0ej3l4jnqhinngk...@git.kernel.org
---
 tools/perf/builtin-record.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 24ace2f..847cc67 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -31,6 +31,7 @@
 #include "util/auxtrace.h"
 #include "util/parse-branch-options.h"
 #include "util/parse-regs-options.h"
+#include "util/llvm-utils.h"
 
 #include 
 #include 
@@ -1117,6 +1118,12 @@ struct option __record_options[] = {
"per thread proc mmap processing timeout in ms"),
OPT_BOOLEAN(0, "switch-events", _switch_events,
"Record context switch events"),
+#ifdef HAVE_LIBBPF_SUPPORT
+   OPT_STRING(0, "clang-path", _param.clang_path, "clang path",
+  "clang binary to use for compiling BPF scriptlets"),
+   OPT_STRING(0, "clang-opt", _param.clang_opt, "clang options",
+  "options passed to clang when compiling BPF scriptlets"),
+#endif
OPT_END()
 };
 
-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 29/31] perf tools: Add API to apply config to BPF map

2015-10-14 Thread Wang Nan

bpf__apply_config() is introduced as the core CPI to apply config
options to all BPF objects. This patch also does the real work for
setting maps events for BPF_MAP_TYPE_PERF_EVENT_ARRAY maps by inserting
file descriptions of a evsel into the BPF map.

This patch is required because we are unable to set all BPF config
during parsing. Events in BPF_MAP_TYPE_PERF_EVENT_ARRAY maps is an
example: during parsing, fds of events is not ready yet.

Signed-off-by: Wang Nan 
Signed-off-by: He Kuang 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexei Starovoitov 
Cc: Brendan Gregg 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: He Kuang 
Cc: Jiri Olsa 
Cc: Kaixu Xia 
Cc: Masami Hiramatsu 
Cc: Namhyung Kim 
Cc: Paul Mackerras 
Cc: Peter Zijlstra 
Cc: Zefan Li 
Cc: pi3or...@163.com
Link: http://lkml.kernel.org/n/ebpf-tmg65cm1zaf1zxs7zmvxm...@git.kernel.org
---
 tools/perf/util/bpf-loader.c | 109 +++
 tools/perf/util/bpf-loader.h |  15 ++
 2 files changed, 124 insertions(+)

diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index b92c2f7..9d661c0 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -7,6 +7,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include "perf.h"
 #include "debug.h"
@@ -767,6 +768,107 @@ int bpf__config_obj(struct bpf_object *obj,
return -ENODEV;
 }
 
+static int
+bpf__apply_config_map(struct bpf_map *map)
+{
+   struct bpf_map_priv *priv;
+   struct bpf_map_def def;
+   const char *name;
+   int err, map_fd;
+
+   name = bpf_map__get_name(map);
+   err = bpf_map__get_private(map, (void **));
+   if (err) {
+   pr_debug("ERROR: failed to get private field from map %s\n",
+name);
+   return err;
+   }
+   if (!priv) {
+   pr_debug("INFO: nothing to config for map %s\n", name);
+   return 0;
+   }
+
+   map_fd = bpf_map__get_fd(map);
+   if (map_fd < 0) {
+   pr_debug("ERROR: failed to get fd from map %s\n", name);
+   return map_fd;
+   }
+
+   err = bpf_map__get_def(map, );
+   if (err) {
+   pr_debug("ERROR: failed to retrive map def from map %s\n",
+name);
+   return err;
+   }
+
+   if (priv->evsel) {
+   struct xyarray *xy = priv->evsel->fd;
+   unsigned int cpus, i;
+
+   if (!xy) {
+   pr_debug("ERROR: event is not ready for map %s\n", 
name);
+   return -EINVAL;
+   }
+
+   if (xy->row_size / xy->entry_size != 1) {
+   pr_debug("ERROR: Dimension of target event is incorrect 
for map %s\n",
+name);
+   return -EINVAL;
+   }
+
+   cpus = xy->entries / (xy->row_size / xy->entry_size);
+   if (cpus > def.max_entries) {
+   pr_debug("ERROR: map %s needs to be enlarge to %d for 
its event\n",
+name, cpus);
+   return -EINVAL;
+   } else if (cpus < def.max_entries)
+   pr_debug("WARNING: map %s has more entries than 
required\n",
+name);
+
+   for (i = 0; i < cpus; i++) {
+   int *evt_fd = xyarray__entry(xy, i, 0);
+
+   err = bpf_map_update_elem(map_fd, , evt_fd,
+ BPF_ANY);
+
+   if (err) {
+   pr_debug("ERROR: failed to insert fd %d to 
%s[%d]\n",
+*evt_fd, name, i);
+   return -errno;
+   }
+   }
+   }
+   return 0;
+}
+
+static int
+bpf__apply_config_object(struct bpf_object *obj)
+{
+   struct bpf_map *map;
+   int err;
+
+   bpf_map__for_each(map, obj) {
+   err = bpf__apply_config_map(map);
+   if (err)
+   return err;
+   }
+   return 0;
+}
+
+int bpf__apply_config(void)
+{
+   struct bpf_object *obj, *tmp;
+   int err;
+
+   bpf_object__for_each_safe(obj, tmp) {
+   err = bpf__apply_config_object(obj);
+   if (err)
+   return err;
+   }
+
+   return 0;
+}
+
 #define bpf__strerror_head(err, buf, size) \
char sbuf[STRERR_BUFSIZE], *emsg;\
if (!size)\
@@ -822,3 +924,10 @@ int bpf__strerror_config_obj(struct bpf_object *obj 
__maybe_unused,
bpf__strerror_end(buf, size);
return 0;
 }
+
+int bpf__strerror_apply_config(int err, char *buf, size_t size)
+{
+   bpf__strerror_head(err, buf, size);
+   bpf__strerror_end(buf, size);
+   return 0;
+}
diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h
index 4c99b21..3a93ba3

[PATCH 28/31] perf tools: Add API to config maps in bpf object

2015-10-14 Thread Wang Nan

bpf__config_obj() is introduced as a core API to config BPF object
after loading. One configuration option of maps is introduced. After
this patch BPF object can accept configuration like:

 maps.my_pmy.event=evt

Where evt is a predefined event with alias "evt".

Signed-off-by: Wang Nan 
Signed-off-by: He Kuang 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexei Starovoitov 
Cc: Brendan Gregg 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: He Kuang 
Cc: Jiri Olsa 
Cc: Kaixu Xia 
Cc: Masami Hiramatsu 
Cc: Namhyung Kim 
Cc: Paul Mackerras 
Cc: Peter Zijlstra 
Cc: Zefan Li 
Cc: pi3or...@163.com
Link: http://lkml.kernel.org/n/ebpf-36xcrahy9n0ayc05mu7aa...@git.kernel.org
---
 tools/perf/util/bpf-loader.c | 147 +++
 tools/perf/util/bpf-loader.h |  39 
 2 files changed, 186 insertions(+)

diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 73ff9a9..b92c2f7 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -10,6 +10,7 @@
 #include 
 #include "perf.h"
 #include "debug.h"
+#include "util.h"
 #include "bpf-loader.h"
 #include "bpf-prologue.h"
 #include "llvm-utils.h"
@@ -633,6 +634,139 @@ int bpf__foreach_tev(struct bpf_object *obj,
return 0;
 }
 
+struct bpf_map_priv {
+   struct perf_evsel *evsel;
+};
+
+static void
+bpf_map_priv__clear(struct bpf_map *map __maybe_unused,
+   void *_priv)
+{
+   struct bpf_map_priv *priv = _priv;
+
+   free(priv);
+}
+
+static int
+bpf__config_obj_map_event(struct bpf_map *map, const char *val,
+ struct perf_evlist *evlist)
+{
+   struct bpf_map_priv *priv;
+   struct perf_evsel *evsel;
+   struct bpf_map_def def;
+   const char *map_name;
+   int err;
+
+   map_name = bpf_map__get_name(map);
+
+   evsel = perf_evlist__find_evsel_by_alias(evlist, val);
+   if (!evsel) {
+   pr_debug("Event '%s' doesn't exist\n", val);
+   return -EINVAL;
+   }
+
+   err = bpf_map__get_def(map, );
+   if (err) {
+   pr_debug("Unable to get map definition from '%s'\n",
+map_name);
+   return -EINVAL;
+   }
+
+   if (def.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+   pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
+map_name);
+   return -EINVAL;
+   }
+
+   priv = calloc(sizeof(*priv), 1);
+   if (!priv) {
+   pr_debug("No enough memory to alloc map private\n");
+   return -ENOMEM;
+   }
+
+   priv->evsel = evsel;
+   return bpf_map__set_private(map, priv, bpf_map_priv__clear);
+}
+
+struct bpf_config_map_func {
+   const char *config_opt;
+   int (*config_func)(struct bpf_map *, const char *,
+  struct perf_evlist *);
+};
+
+struct bpf_config_map_func bpf_config_map_funcs[] = {
+   {"event", bpf__config_obj_map_event},
+};
+
+static int
+bpf__config_obj_map(struct bpf_object *obj,
+   const char *key,
+   const char *val,
+   struct perf_evlist *evlist)
+{
+   /* key is "maps.." */
+   char *map_name = strdup(key + sizeof("maps.") - 1);
+   struct bpf_map *map;
+   int err = -ENOENT;
+   char *map_opt;
+   size_t i;
+
+   if (!map_name)
+   return -ENOMEM;
+
+   map_opt = strchr(map_name, '.');
+   if (!map_opt) {
+   pr_debug("ERROR: Invalid map config: %s\n", map_name);
+   goto out;
+   }
+
+   *map_opt++ = '\0';
+   if (*map_opt == '\0') {
+   pr_debug("ERROR: Invalid map option: %s\n", key);
+   goto out;
+   }
+
+   map = bpf_object__get_map_by_name(obj, map_name);
+   if (!map) {
+   pr_debug("ERROR: Map %s doesn't exist\n", map_name);
+   goto out;
+   }
+
+   for (i = 0; i < ARRAY_SIZE(bpf_config_map_funcs); i++) {
+   struct bpf_config_map_func *func = _config_map_funcs[i];
+
+   if (strcmp(map_opt, func->config_opt) == 0) {
+   err = func->config_func(map, val, evlist);
+   goto out;
+   }
+   }
+
+   pr_debug("ERROR: invalid config option '%s' for maps\n",
+map_opt);
+   err = -ENOENT;
+out:
+   free(map_name);
+   return err;
+}
+
+int bpf__config_obj(struct bpf_object *obj,
+   const char *key,
+   struct bpf_config_val *val,
+   struct perf_evlist *evlist)
+{
+   if (!obj || !key || !val)
+   return -ENODEV;
+
+   if (!prefixcmp(key, "maps.")) {
+   if (val->type != BPF_CONFIG_VAL_STRING) {
+   pr_debug("ERROR: incorrect value type\n");
+   return -EINVAL;
+   }
+   return bpf__config_obj_map(obj, key, val->string,

[PATCH 31/31] perf tools: Enable BPF object configure syntax

2015-10-14 Thread Wang Nan

This patch adds the final step for BPF map configuration. A new syntax
is appended into parser so user can config BPF objects through '/' '/'
enclosed config terms.

After this patch, BPF programs for perf are finally able to utilize
bpf_perf_event_read() introduced in commit 35578d7984003097af2b1e3
(bpf: Implement function bpf_perf_event_read() that get the selected
hardware PMU conuter) by following way:

 = BPF program bpf_program.c =

 struct bpf_map_def SEC("maps") pmu_map = {
 .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
 .key_size = sizeof(int),
 .value_size = sizeof(u32),
 .max_entries = __NR_CPUS__,
 };

 SEC("func_write=sys_write")
 int func_write(void *ctx)
 {
 unsigned long long val;
 char fmt[] = "sys_write:pmu=%llu\n";
 val = bpf_perf_event_read(_map, bpf_get_smp_processor_id());
 bpf_trace_printk(fmt, sizeof(fmt), val);
 return 0;
 }

 SEC("func_write_return=sys_write%return")
 int func_write_return(void *ctx)
 {
 unsigned long long val = 0;
 char fmt[] = "sys_write_return: pmu=%llu\n";
 val = bpf_perf_event_read(_map, bpf_get_smp_processor_id());
 bpf_trace_printk(fmt, sizeof(fmt), val);
 return 0;
 }

 = cmdline =
 # echo "" > /sys/kernel/debug/tracing/trace
 # perf record -e evt=cycles/period=0x7fff/ \
   -e bpf_program.c/maps.pmu_map.event=evt/
   -a ls
 # cat /sys/kernel/debug/tracing/trace | grep ls
  ls-3363  [003] d... 75475.056190: : sys_write:pmu=3961415
  ls-3363  [003] dN.. 75475.056212: : sys_write_return: pmu=4051390
  ls-3363  [003] d... 75475.056216: : sys_write:pmu=4065447
  ls-3363  [003] dN.. 75475.056227: : sys_write_return: pmu=4109760
  ls-3363  [003] d... 75475.056230: : sys_write:pmu=4120776
  ls-3363  [003] dN.. 75475.056245: : sys_write_return: pmu=4178441
  ...
 # perf report --stdio
Error:
The perf.data file has no samples!

Where, setting period of cycles Set a very large value to period
of cycles event because we want to use this event as a counter
only, don't need sampling.

Signed-off-by: Wang Nan 
Signed-off-by: He Kuang 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexei Starovoitov 
Cc: Brendan Gregg 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: He Kuang 
Cc: Jiri Olsa 
Cc: Kaixu Xia 
Cc: Masami Hiramatsu 
Cc: Namhyung Kim 
Cc: Paul Mackerras 
Cc: Peter Zijlstra 
Cc: Zefan Li 
Cc: pi3or...@163.com
Link: http://lkml.kernel.org/n/ebpf-2mjd96mowgzslkj8jrwbn...@git.kernel.org
---
 tools/perf/util/parse-events.c | 71 --
 tools/perf/util/parse-events.h |  3 +-
 tools/perf/util/parse-events.y | 19 ---
 3 files changed, 85 insertions(+), 8 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 06ba5a6..f8b2bb8 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -624,17 +624,79 @@ errout:
return err;
 }
 
+static int
+parse_events_config_bpf(struct parse_events_evlist *data,
+  struct bpf_object *obj,
+  struct list_head *head_config)
+{
+   struct parse_events_term *term;
+
+   if (!head_config || list_empty(head_config))
+   return 0;
+
+   list_for_each_entry(term, head_config, list) {
+   struct bpf_config_val val;
+   char errbuf[BUFSIZ];
+   int err;
+
+   if (term->type_term != PARSE_EVENTS__TERM_TYPE_USER) {
+   snprintf(errbuf, sizeof(errbuf),
+"Invalid config term for BPF object");
+   errbuf[BUFSIZ - 1] = '\0';
+
+   data->error->idx = term->err_term;
+   data->error->str = strdup(errbuf);
+   return -EINVAL;
+   }
+
+   switch (term->type_val) {
+   case PARSE_EVENTS__TERM_TYPE_NUM:
+   val.type = BPF_CONFIG_VAL_NUM;
+   val.num = (unsigned long long)term->val.num;
+   break;
+   case PARSE_EVENTS__TERM_TYPE_STR:
+   val.type = BPF_CONFIG_VAL_STRING;
+   val.string = term->val.str;
+   break;
+   default:
+   data->error->idx = term->err_val;
+   data->error->str = strdup("Invalid config value");
+   return -EINVAL;
+   }
+
+   err = bpf__config_obj(obj, term->config, , data->evlist);
+   if (err) {
+   bpf__strerror_config_obj(obj, term->config, ,
+data->evlist, err, errbuf,
+sizeof(errbuf));
+   data->error->help = strdup(
+"Hint:\tValid config term:\n"
+" \tmaps..event\n"
+"

[PATCH v4 00/11] Smack namespace

2015-10-14 Thread Lukasz Pawelczyk

Fourth version of Smack namespace. Rebased for smack-for-4.4 with some
minor cosmetic changes.

Readme from v3 as there were the most significant changes:

https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg899383.html
https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg898638.html

1. the label map should be in /proc/.../attr/label_map and be handled
   generically.
2. The proper file system label (unmapped) should be written only once
   to remove a state where an incorrect label is on the filesystem.

Ad 1: Contrary to what Stephen said this unfortunately required LSM
modifications.

For reading: the map can be long, in principle longer than PAGE_SIZE to
which normal getprocattr hook is limited. So I invented a way for
getprocattr to be handled by seq operations. I think it is generic and
can be reused nicely by other LSMs. Also it doesn't break current LSM
code in any way. This created a new patch.

For writing: the default setprocattr arguments were not enough for me
to securely decide if the write access should be granted. To be in
parallel with user namespace I also needed credentials of the process
that actually opened the map (in addition to current). So I added a new
argument. This is also a new patch.

Ad 2: I really tried to make it work without introducing a new LSM
hook but changing a little semantics behind the current ones. Finally
I just added a simple inode_pre_setxattr hook that can swap the label
before it is written to the filesystem.  Hopefully this is ok. I
couldn't do this in inode_setxattr hook as Stephen suggested as this
hook is called before __vfs_setxattr_noperm which is an exported
symbol and is used sometimes without setxattr hence the logic had to
be inside that one. This is also a new patch.

I also added a new patch that "fixes" smackfs/syslog. I've noticed that
inside a namespace when I cat the file it shows "*". Even when I
remapped the star. After looking at the code it had it implicitly
displayed when it's not set. There were few problems with it:

1. In a namespace we can see a label that is not mapped.
2. There was no way to actually reset the value to default (NULL)
3. It was inconsistent from user space point of view:

# cat /smack/syslog
*

After the reboot the syslog hook doesn't limit anything, the
smack_syslog_label is NULL, but it displays star.

# echo '*' > /smack/syslog
# cat /smack/syslog
*

>From user space POV this is the same, file has star inside, but now for
the hook to pass the current needs to be star as well. And there is no
way to reset it back to NULL. So I treated syslog file the same way
unconfined and onlycap are handled. If it's empty, there is no label
set, hook doesn't limit anything (except for the cap). When it's filled
current needs to be equal for the hook to pass (as was before). But now
it can be reset back to NULL by writing EINVAL value (e.g. -syslog).
The syslog hook itself was not modified, only the file handling.

Changes from v3:
- rebased to the latest smack branch (smack-for-4.4)
- cosmetics in second patch reported by Paul Moore
- Acks from Paul Moore and Serge E. Hallyn on selected patches

Changes from v2:
- fix for config ifdefs in user_ns LSM hooks patch (CONFIG_USER_NS
  should've been used instead of CONFIG_SECURITY in several places)
- new patch for "smack_map" -> "attr/label_map" and new related
  getprocattr_seq lsm hook. With this change the code in further
  patches for handling smack_map has been moved to this new method
- new patch for setprocattr hook new argument, file's opener creds
- new patch for inode_pre_setxattr LSM hook
- new patch related to handling smackfs/syslog

Changes from v1:
- "kernel/exit.c: make sure current's nsproxy != NULL while checking
  caps" patch has been dropped
- fixed the title of the user_ns operations patch

Lukasz Pawelczyk (11):
  user_ns: 3 new LSM hooks for user namespace operations
  lsm: /proc/$PID/attr/label_map file and getprocattr_seq hook
  lsm: add file opener's cred to a setprocattr arguments
  lsm: inode_pre_setxattr hook
  smack: extend capability functions and fix 2 checks
  smack: don't use implicit star to display smackfs/syslog
  smack: abstraction layer for 2 common Smack operations
  smack: misc cleanups in preparation for a namespace patch
  smack: namespace groundwork
  smack: namespace implementation
  smack: documentation for the Smack namespace

 Documentation/security/00-INDEX|   2 +
 Documentation/security/Smack-namespace.txt | 231 +++
 MAINTAINERS|   1 +
 fs/proc/base.c |  83 +++-
 fs/xattr.c |  10 +
 include/linux/lsm_hooks.h  |  70 +++-
 include/linux/security.h   |  49 ++-
 include/linux/user_namespace.h |   4 +
 kernel/user.c  |   3 +
 kernel/user_namespace.c|  18 +
 security/apparmor/lsm.c|   5 +-

[PATCH v4 04/11] lsm: inode_pre_setxattr hook

2015-10-14 Thread Lukasz Pawelczyk

Add a new LSM hook called before inode's setxattr. It is required for
LSM to be able to reliably replace the xattr's value to be set to
filesystem in __vfs_setxattr_noperm(). Useful for mapped values, like in
the upcoming Smack namespace patches.

Signed-off-by: Lukasz Pawelczyk 
Acked-by: Serge Hallyn 
---
 fs/xattr.c| 10 ++
 include/linux/lsm_hooks.h |  9 +
 include/linux/security.h  | 10 ++
 security/security.c   | 12 
 4 files changed, 41 insertions(+)

diff --git a/fs/xattr.c b/fs/xattr.c
index 072fee1..cbc8d19 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -100,12 +100,22 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const 
char *name,
if (issec)
inode->i_flags &= ~S_NOSEC;
if (inode->i_op->setxattr) {
+   bool alloc = false;
+
+   error = security_inode_pre_setxattr(dentry, name, ,
+   , flags, );
+   if (error)
+   return error;
+
error = inode->i_op->setxattr(dentry, name, value, size, flags);
if (!error) {
fsnotify_xattr(dentry);
security_inode_post_setxattr(dentry, name, value,
 size, flags);
}
+
+   if (alloc)
+   kfree(value);
} else if (issec) {
const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
error = security_inode_setsecurity(inode, suffix, value,
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 4f16640..85bfdde 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -349,6 +349,11 @@
  * Check permission before setting the extended attributes
  * @value identified by @name for @dentry.
  * Return 0 if permission is granted.
+ * @inode_pre_setxattr:
+ * Be able to do some operation before setting the @value identified
+ * by @name on the filesystem. Replacing the @value and its @size is
+ * possible. Useful for mapped values. Set @alloc to true if @value
+ * needs to be kfreed afterwards.
  * @inode_post_setxattr:
  * Update inode security field after successful setxattr operation.
  * @value identified by @name for @dentry.
@@ -1448,6 +1453,9 @@ union security_list_options {
int (*inode_getattr)(const struct path *path);
int (*inode_setxattr)(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags);
+   int (*inode_pre_setxattr)(struct dentry *dentry, const char *name,
+ const void **value, size_t *size,
+ int flags, bool *alloc);
void (*inode_post_setxattr)(struct dentry *dentry, const char *name,
const void *value, size_t size,
int flags);
@@ -1730,6 +1738,7 @@ struct security_hook_heads {
struct list_head inode_setattr;
struct list_head inode_getattr;
struct list_head inode_setxattr;
+   struct list_head inode_pre_setxattr;
struct list_head inode_post_setxattr;
struct list_head inode_getxattr;
struct list_head inode_listxattr;
diff --git a/include/linux/security.h b/include/linux/security.h
index 12bd011..4de4865 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -263,6 +263,9 @@ int security_inode_setattr(struct dentry *dentry, struct 
iattr *attr);
 int security_inode_getattr(const struct path *path);
 int security_inode_setxattr(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags);
+int security_inode_pre_setxattr(struct dentry *dentry, const char *name,
+   const void **value, size_t *size, int flags,
+   bool *alloc);
 void security_inode_post_setxattr(struct dentry *dentry, const char *name,
  const void *value, size_t size, int flags);
 int security_inode_getxattr(struct dentry *dentry, const char *name);
@@ -691,6 +694,13 @@ static inline int security_inode_setxattr(struct dentry 
*dentry,
return cap_inode_setxattr(dentry, name, value, size, flags);
 }
 
+static inline int security_inode_pre_setxattr(struct dentry *dentry,
+   const char *name, const void **value,
+   size_t *size, int flags, bool *alloc)
+{
+   return 0;
+}
+
 static inline void security_inode_post_setxattr(struct dentry *dentry,
const char *name, const void *value, size_t size, int flags)
 { }
diff --git a/security/security.c b/security/security.c
index abfc207..75d46b6 100644
--- a/security/security.c
+++ b/security/security.c
@@ -644,6 +644,16 @@ int security_inode_setxattr(struct dentry

Re: [PATCHv3 01/11] arm64: Move swapper pagetable definitions

2015-10-14 Thread Suzuki K. Poulose


On 14/10/15 12:42, Mark Rutland wrote:

On Wed, Oct 14, 2015 at 12:20:24PM +0100, Suzuki K. Poulose wrote:




Also renames the symbols to prevent conflicts. e.g,
BLOCK_SHIFT => SWAPPER_BLOCK_SHIFT


This sounds sensible to be, so FWIW:

Acked-by: Mark Rutland 

I have a couple of minor nits below, though.



diff --git a/arch/arm64/include/asm/kernel-pgtable.h 
b/arch/arm64/include/asm/kernel-pgtable.h
new file mode 100644
index 000..622929d
--- /dev/null
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -0,0 +1,65 @@
+/*
+ * asm/kernel-pgtable.h : Kernel page table mapping


Drop the filename from the comment. It's redundant and ends up being
painful when refactoring and moving things around.



OK


+
+
+#endif


It would be nice to have the usual comment here for what this is ending,
i.e.

#endif /* __ASM_KERNEL_PGTABLE_H */


Oh yes, will add it.

Thanks
Suzuki

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH V2 1/2] bpf: control the trace data output on current cpu when perf sampling

2015-10-14 Thread Kaixu Xia

This patch adds the flag sample_disable to control the trace data
output process when perf sampling. By setting this flag and
integrating with ebpf, we can control the data output process and
get the samples we are most interested in.

The bpf helper bpf_perf_event_sample_control() can control the
perf_event on current cpu.

Signed-off-by: Kaixu Xia 
---
 include/linux/perf_event.h  |  1 +
 include/uapi/linux/bpf.h|  5 +
 include/uapi/linux/perf_event.h |  3 ++-
 kernel/bpf/verifier.c   |  3 ++-
 kernel/events/core.c| 13 +
 kernel/trace/bpf_trace.c| 32 
 6 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 092a0e8..dcbf7d5 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -472,6 +472,7 @@ struct perf_event {
struct irq_work pending;
 
atomic_tevent_limit;
+   atomic_tsample_disable;
 
void (*destroy)(struct perf_event *);
struct rcu_head rcu_head;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 564f1f0..e2c99c6 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -287,6 +287,11 @@ enum bpf_func_id {
 * Return: realm if != 0
 */
BPF_FUNC_get_route_realm,
+
+   /**
+* u64 bpf_perf_event_sample_control(, index, flag)
+*/
+   BPF_FUNC_perf_event_sample_control,
__BPF_FUNC_MAX_ID,
 };
 
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 2881145..a2b9dd7 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -331,7 +331,8 @@ struct perf_event_attr {
comm_exec  :  1, /* flag comm events that 
are due to an exec */
use_clockid:  1, /* use @clockid for time 
fields */
context_switch :  1, /* context switch data */
-   __reserved_1   : 37;
+   sample_disable :  1, /* don't output data on 
samples */
+   __reserved_1   : 36;
 
union {
__u32   wakeup_events;/* wakeup every n events */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1d6b97b..3ffe630 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -245,6 +245,7 @@ static const struct {
 } func_limit[] = {
{BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call},
{BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read},
+   {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_sample_control},
 };
 
 static void print_verifier_state(struct verifier_env *env)
@@ -910,7 +911,7 @@ static int check_map_func_compatibility(struct bpf_map 
*map, int func_id)
 * don't allow any other map type to be passed into
 * the special func;
 */
-   if (bool_map != bool_func)
+   if (bool_func && bool_map != bool_func)
return -EINVAL;
}
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index b11756f..942351c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6337,6 +6337,9 @@ static int __perf_event_overflow(struct perf_event *event,
irq_work_queue(>pending);
}
 
+   if (!atomic_read(>sample_disable))
+   return ret;
+
if (event->overflow_handler)
event->overflow_handler(event, data, regs);
else
@@ -7709,6 +7712,14 @@ static void account_event(struct perf_event *event)
account_event_cpu(event, event->cpu);
 }
 
+static void perf_event_check_sample_flag(struct perf_event *event)
+{
+   if (event->attr.sample_disable == 1)
+   atomic_set(>sample_disable, 0);
+   else
+   atomic_set(>sample_disable, 1);
+}
+
 /*
  * Allocate and initialize a event structure
  */
@@ -7840,6 +7851,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
}
}
 
+   perf_event_check_sample_flag(event);
+
return event;
 
 err_per_task:
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 0fe96c7..f261333 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -215,6 +215,36 @@ const struct bpf_func_proto bpf_perf_event_read_proto = {
.arg2_type  = ARG_ANYTHING,
 };
 
+static u64 bpf_perf_event_sample_control(u64 r1, u64 index, u64 flag, u64 r4, 
u64 r5)
+{
+   struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
+   struct bpf_array *array = container_of(map, struct bpf_array, map);
+   struct perf_event *event;
+
+   if (unlikely(index >= array->map.max_entries))
+   return -E2BIG;
+
+   event = (struct perf_event *)array->ptrs[index];

[PATCH V2 2/2] bpf: control a set of perf events by creating a new ioctl PERF_EVENT_IOC_SET_ENABLER

2015-10-14 Thread Kaixu Xia

This patch creates a new ioctl PERF_EVENT_IOC_SET_ENABLER to let
perf to select an event as 'enabler'. So we can set this 'enabler'
event to enable/disable a set of events. The event on CPU 0 is
treated as the 'enabler' event by default.

Signed-off-by: Kaixu Xia 
---
 include/linux/perf_event.h  |  1 +
 include/uapi/linux/perf_event.h |  1 +
 kernel/events/core.c| 42 -
 kernel/trace/bpf_trace.c|  5 -
 4 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index dcbf7d5..bc9fe77 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -473,6 +473,7 @@ struct perf_event {
 
atomic_tevent_limit;
atomic_tsample_disable;
+   atomic_t*p_sample_disable;
 
void (*destroy)(struct perf_event *);
struct rcu_head rcu_head;
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index a2b9dd7..3b4fb90 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -393,6 +393,7 @@ struct perf_event_attr {
 #define PERF_EVENT_IOC_SET_FILTER  _IOW('$', 6, char *)
 #define PERF_EVENT_IOC_ID  _IOR('$', 7, __u64 *)
 #define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32)
+#define PERF_EVENT_IOC_SET_ENABLER _IO ('$', 9)
 
 enum perf_event_ioc_flags {
PERF_IOC_FLAG_GROUP = 1U << 0,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 942351c..03d2594 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4152,6 +4152,7 @@ static int perf_event_set_output(struct perf_event *event,
 struct perf_event *output_event);
 static int perf_event_set_filter(struct perf_event *event, void __user *arg);
 static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd);
+static int perf_event_set_sample_enabler(struct perf_event *event, u32 
enabler_fd);
 
 static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned 
long arg)
 {
@@ -4208,6 +4209,9 @@ static long _perf_ioctl(struct perf_event *event, 
unsigned int cmd, unsigned lon
case PERF_EVENT_IOC_SET_BPF:
return perf_event_set_bpf_prog(event, arg);
 
+   case PERF_EVENT_IOC_SET_ENABLER:
+   return perf_event_set_sample_enabler(event, arg);
+
default:
return -ENOTTY;
}
@@ -6337,7 +6341,7 @@ static int __perf_event_overflow(struct perf_event *event,
irq_work_queue(>pending);
}
 
-   if (!atomic_read(>sample_disable))
+   if (!atomic_read(event->p_sample_disable))
return ret;
 
if (event->overflow_handler)
@@ -6989,6 +6993,35 @@ static int perf_event_set_bpf_prog(struct perf_event 
*event, u32 prog_fd)
return 0;
 }
 
+static int perf_event_set_sample_enabler(struct perf_event *event, u32 
enabler_fd)
+{
+   int ret;
+   struct fd enabler;
+   struct perf_event *enabler_event;
+
+   if (enabler_fd == -1)
+   return 0;
+
+   ret = perf_fget_light(enabler_fd, );
+   if (ret)
+   return ret;
+   enabler_event = enabler.file->private_data;
+   if (event == enabler_event) {
+   fdput(enabler);
+   return 0;
+   }
+
+   /* they must be on the same PMU*/
+   if (event->pmu != enabler_event->pmu) {
+   fdput(enabler);
+   return -EINVAL;
+   }
+
+   event->p_sample_disable = _event->sample_disable;
+   fdput(enabler);
+   return 0;
+}
+
 static void perf_event_free_bpf_prog(struct perf_event *event)
 {
struct bpf_prog *prog;
@@ -7023,6 +7056,11 @@ static int perf_event_set_bpf_prog(struct perf_event 
*event, u32 prog_fd)
return -ENOENT;
 }
 
+static int perf_event_set_sample_enabler(struct perf_event *event, u32 
group_fd)
+{
+   return -ENOENT;
+}
+
 static void perf_event_free_bpf_prog(struct perf_event *event)
 {
 }
@@ -7718,6 +7756,8 @@ static void perf_event_check_sample_flag(struct 
perf_event *event)
atomic_set(>sample_disable, 0);
else
atomic_set(>sample_disable, 1);
+
+   event->p_sample_disable = >sample_disable;
 }
 
 /*
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index f261333..d012be3 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -221,9 +221,12 @@ static u64 bpf_perf_event_sample_control(u64 r1, u64 
index, u64 flag, u64 r4, u6
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct perf_event *event;
 
-   if (unlikely(index >= array->map.max_entries))
+   if (unlikely(index > array->map.max_entries))
return -E2BIG;
 
+   if (index == array->map.max_entries)
+   index = 0;
+
event = (struct

[PATCH V2 0/2] bpf: enable/disable events stored in PERF_EVENT_ARRAY maps trace data output when perf sampling

2015-10-14 Thread Kaixu Xia

Previous RFC patch url:
https://lkml.org/lkml/2015/10/12/135

changes in V2:
 - rebase the whole patch set to net-next tree(4b418bf);
 - remove the added flag perf_sample_disable in bpf_map;
 - move the added fields in structure perf_event to proper place
   to avoid cacheline miss;
 - use counter based flag instead of 0/1 switcher in considering
   of reentering events;
 - use a single helper bpf_perf_event_sample_control() to enable/
   disable events;
 - implement a light-weight solution to control the trace data
   output on current cpu;
 - create a new ioctl PERF_EVENT_IOC_SET_ENABLER to enable/disable
   a set of events;

Before this patch,
   $ ./perf record -e cycles -a sleep 1
   $ ./perf report --stdio
# To display the perf.data header info, please use 
--header/--header-only option
#
#
# Total Lost Samples: 0
#
# Samples: 643  of event 'cycles'
# Event count (approx.): 128313904
...

After this patch,
   $ ./perf record -e pmux=cycles --event perf-bpf.o/my_cycles_map=pmux/ -a 
sleep 1
   $ ./perf report --stdio
# To display the perf.data header info, please use 
--header/--header-only option
#
#
# Total Lost Samples: 0
#
# Samples: 25  of event 'cycles'
# Event count (approx.): 5788400
...

The bpf program example:

  struct bpf_map_def SEC("maps") my_cycles_map = {
  .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
  .key_size = sizeof(int),
  .value_size = sizeof(u32),
  .max_entries = 32, 
  };

  SEC("enter=sys_write")
  int bpf_prog_1(struct pt_regs *ctx)
  {
  bpf_perf_event_sample_control(_cycles_map, 32, 0); 
  return 0;
  }

  SEC("exit=sys_write%return")
  int bpf_prog_2(struct pt_regs *ctx)
  {
  bpf_perf_event_sample_control(_cycles_map, 32, 1); 
  return 0;
  }

Consider control sampling in function level, if we don't use the
PERF_EVENT_IOC_SET_ENABLER ioctl in perf user side, we must set
a high sample frequency to dump trace data.

Kaixu Xia (2):
  bpf: control the trace data output on current cpu when perf sampling
  bpf: control a set of perf events by creating a new ioctl
PERF_EVENT_IOC_SET_ENABLER

 include/linux/perf_event.h  |  2 ++
 include/uapi/linux/bpf.h|  5 
 include/uapi/linux/perf_event.h |  4 +++-
 kernel/bpf/verifier.c   |  3 ++-
 kernel/events/core.c| 53 +
 kernel/trace/bpf_trace.c| 35 +++
 6 files changed, 100 insertions(+), 2 deletions(-)

-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 0/4] ARM: berlin: improve multi_v7_defconfig support

2015-10-14 Thread Arnd Bergmann

On Wednesday 14 October 2015 12:34:00 Antoine Tenart wrote:
> Hi all,
> 
> Some drivers used on a Marvell Berlin kernel were missing from
> multi_v7_defconfig. This series add them.

Adding them is great, but we try to have as much as possible
in loadable modules for new additions. Can you do this?

Having a single patch instead of four different ones is fine too,
by the way.

Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] mutex: make mutex_lock_nested an inline function

2015-10-14 Thread Mark Brown

On Wed, Oct 14, 2015 at 01:07:17PM +0200, Peter Zijlstra wrote:
> On Wed, Oct 14, 2015 at 11:27:06AM +0100, Mark Brown wrote:
> > On Wed, Oct 14, 2015 at 10:20:50AM +0200, Peter Zijlstra wrote:

> > > Uuh, I just looked at next and saw this regulator_lock_supply()
> > > function. How is that limited? subclass must be <8 otherwise bad things
> > > happen.

> > Can we please get some more discoverable documentation of the arbitrary
> > limits in the lockdep code? 

> include/linux/lockdep.h:#define MAX_LOCKDEP_SUBCLASSES  8UL

Sure, but I don't really expect to have to trawl the implementation of
an API to find out about this sort of thing (I hadn't even been aware
that the subclasses were required to be small positive integers, never
mind needing to check what the limit was).  I think the main place I'd
have expected to see it was in lockdep-design.txt or somewhere near
that.

> > I seem to keep seeing code that bumps into
> > surprising limits like this and I'm not sure how I'm supposed to know
> > about them except through finding out after the fact or trawling the
> > code every time someone touches locking.

> Not knowing what other limits you've hit, I'm not entirely sure how to
> help out there.

The other big one that came up recently was that lockdep apparently
works out what a class is by looking at the point of allocation which
causes a lot of problems for regmap since it makes all regmap locks look
like a single class.  That's fixed now by explicitly allocating a class
per regmap with some macro magic but it was a bit of a surprise.  The
documentation doesn't make this obvious.

signature.asc
Description: PGP signature

Re: [PATCH v2 4/5] mfd: arizona: Update DT binding documentation for mic detection

2015-10-14 Thread Lee Jones

On Wed, 14 Oct 2015, Mark Brown wrote:

> On Wed, Oct 14, 2015 at 08:28:12AM +0100, Lee Jones wrote:
> > On Tue, 13 Oct 2015, Mark Brown wrote:
> > > On Tue, Oct 13, 2015 at 09:02:18AM +0100, Lee Jones wrote:
> 
> > > > Providing Acks should not (and has not to my knowledge) be a binding
> > > > contract to continue providing Acks.  However, should more bindings be
> > > > submitted which appear as though they are related to a particular
> > > > maintainer, then sure, you'll be asked for your expert eye again.
> 
> > > It's a bit concerning when it seems like my review is becoming a blocker
> > > for something and I don't understand why.
> 
> > Not necessarily _your_ review.  Just someone, other than the
> > submitter, who I trust and knows about this stuff.  You just happen 
> > to tick those boxes this time.  Cross pollination and knowledge sharing
> > is one of the characteristics of MLs which I'm particularly proud of.
> 
> That was what it read like.
> 
> > > When your power budget is in the low double digit microamps and you're
> > > trying to respond promptly and reliably to rapidly changing and variable
> > > physical inputs it gets complicated.
> 
> > This is exactly why I asked you.
> 
> > Thanks for adding structure to my PoV. ;) 
> 
> So, a more specific question would have helped - just a general "any
> thoughts" type question isn't very clear.

To both comments -- this is how I phrased it:

"I either need an Ack from the DT folks, or at least someone who knows
about this stuff.  Mark perhaps."

-- 
Lee Jones
Linaro STMicroelectronics Landing Team Lead
Linaro.org │ Open source software for ARM SoCs
Follow Linaro: Facebook | Twitter | Blog
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c:70:2-8: preceding lock on line 67

2015-10-14 Thread Julia Lawall

Please check whether not releasing the lock is intentional.

julia

On Wed, 14 Oct 2015, kbuild test robot wrote:

> CC: kbuild-...@01.org
> CC: linux-kernel@vger.kernel.org
> TO: Ben Skeggs 
>
> tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
> master
> head:   5b5f1455272e23f4e7889cec37228802d8d01adf
> commit: 344c2d429dd86b1b0113177e18f15adb74e9d936 drm/nouveau/fb: remove 
> dependence on namedb/engctx lookup
> date:   7 weeks ago
> :: branch date: 16 hours ago
> :: commit date: 7 weeks ago
>
> >> drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c:70:2-8: preceding lock on 
> >> line 67
>
> git remote add linus 
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
> git remote update linus
> git checkout 344c2d429dd86b1b0113177e18f15adb74e9d936
> vim +70 drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c
>
> 344c2d42 Ben Skeggs 2015-08-20  61  }
> 344c2d42 Ben Skeggs 2015-08-20  62
> 344c2d42 Ben Skeggs 2015-08-20  63  struct nvkm_fifo_chan *
> 344c2d42 Ben Skeggs 2015-08-20  64  nvkm_fifo_chan_chid(struct nvkm_fifo 
> *fifo, int chid, unsigned long *rflags)
> 344c2d42 Ben Skeggs 2015-08-20  65  {
> 344c2d42 Ben Skeggs 2015-08-20  66unsigned long flags;
> 344c2d42 Ben Skeggs 2015-08-20 @67spin_lock_irqsave(>lock, flags);
> 344c2d42 Ben Skeggs 2015-08-20  68if (fifo->channel[chid]) {
> 344c2d42 Ben Skeggs 2015-08-20  69*rflags = flags;
> 344c2d42 Ben Skeggs 2015-08-20 @70return (void 
> *)fifo->channel[chid];
> 344c2d42 Ben Skeggs 2015-08-20  71}
> 344c2d42 Ben Skeggs 2015-08-20  72spin_unlock_irqrestore(>lock, 
> flags);
> 344c2d42 Ben Skeggs 2015-08-20  73return NULL;
>
> ---
> 0-DAY kernel test infrastructureOpen Source Technology Center
> https://lists.01.org/pipermail/kbuild-all   Intel Corporation
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v2 9/9] mm/compaction: new threshold for compaction depleted zone

2015-10-14 Thread Vlastimil Babka

On 08/24/2015 04:19 AM, Joonsoo Kim wrote:
> Now, compaction algorithm become powerful. Migration scanner traverses
> whole zone range. So, old threshold for depleted zone which is designed
> to imitate compaction deferring approach isn't appropriate for current
> compaction algorithm. If we adhere to current threshold, 1, we can't
> avoid excessive overhead caused by compaction, because one compaction
> for low order allocation would be easily successful in any situation.
> 
> This patch re-implements threshold calculation based on zone size and
> allocation requested order. We judge whther compaction possibility is
> depleted or not by number of successful compaction. Roughly, 1/100
> of future scanned area should be allocated for high order page during
> one comaction iteration in order to determine whether zone's compaction
> possiblity is depleted or not.

Finally finishing my review, sorry it took that long...

> Below is test result with following setup.
> 
> Memory is artificially fragmented to make order 3 allocation hard. And,
> most of pageblocks are changed to movable migratetype.
> 
>   System: 512 MB with 32 MB Zram
>   Memory: 25% memory is allocated to make fragmentation and 200 MB is
>   occupied by memory hogger. Most pageblocks are movable
>   migratetype.
>   Fragmentation: Successful order 3 allocation candidates may be around
>   1500 roughly.
>   Allocation attempts: Roughly 3000 order 3 allocation attempts
>   with GFP_NORETRY. This value is determined to saturate allocation
>   success.
> 
> Test: hogger-frag-movable
> 
> Success(N)94  83
> compact_stall   36424048
> compact_success  144 212
> compact_fail34983835
> pgmigrate_success   15897219  216387
> compact_isolated31899553  487712
> compact_migrate_scanned 59146745 2513245
> compact_free_scanned49566134 4124319

The decrease in scanned/isolated/migrated counts looks definitely nice, but why
did success regress when compact_success improved substantially?

> This change results in greatly decreasing compaction overhead when
> zone's compaction possibility is nearly depleted. But, I should admit
> that it's not perfect because compaction success rate is decreased.
> More precise tuning threshold would restore this regression, but,
> it highly depends on workload so I'm not doing it here.
> 
> Other test doesn't show big regression.
> 
>   System: 512 MB with 32 MB Zram
>   Memory: 25% memory is allocated to make fragmentation and kernel
>   build is running on background. Most pageblocks are movable
>   migratetype.
>   Fragmentation: Successful order 3 allocation candidates may be around
>   1500 roughly.
>   Allocation attempts: Roughly 3000 order 3 allocation attempts
>   with GFP_NORETRY. This value is determined to saturate allocation
>   success.
> 
> Test: build-frag-movable
> 
> Success(N)89  87
> compact_stall   40533642
> compact_success  264 202
> compact_fail37883440
> pgmigrate_success6497642  153413
> compact_isolated13292640  353445
> compact_migrate_scanned 69714502 2307433
> compact_free_scanned20243121 2325295

Here compact_success decreased relatively a lot, while success just barely.
Less counterintuitive than the first result, but still a bit.

> This looks like reasonable trade-off.
> 
> Signed-off-by: Joonsoo Kim 
> ---
>  mm/compaction.c | 19 ---
>  1 file changed, 12 insertions(+), 7 deletions(-)
> 
> diff --git a/mm/compaction.c b/mm/compaction.c
> index e61ee77..e1b44a5 100644
> --- a/mm/compaction.c
> +++ b/mm/compaction.c
> @@ -129,19 +129,24 @@ static struct page *pageblock_pfn_to_page(unsigned long 
> start_pfn,
>  
>  /* Do not skip compaction more than 64 times */
>  #define COMPACT_MAX_FAILED 4
> -#define COMPACT_MIN_DEPLETE_THRESHOLD 1UL
> +#define COMPACT_MIN_DEPLETE_THRESHOLD 4UL
>  #define COMPACT_MIN_SCAN_LIMIT (pageblock_nr_pages)
>  
>  static bool compaction_depleted(struct zone *zone)
>  {
> - unsigned long threshold;
> + unsigned long nr_possible;
>   unsigned long success = zone->compact_success;
> + unsigned long threshold;
>  
> - /*
> -  * Now, to imitate current compaction deferring approach,
> -  * choose threshold to 1. It will be changed in the future.
> -  */
> - threshold = COMPACT_MIN_DEPLETE_THRESHOLD;
> + nr_possible = zone->managed_pages >> zone->compact_order_failed;
> +
> + /* Migration scanner normally scans less than 1/4 range of zone */
> + nr_possible >>= 2;
> +
> + /* We hope to succeed more than 1/100 roughly */
> + threshold = nr_possible >> 7;
> +
> + threshold = max(threshold, COMPACT_MIN_DEPLETE_THRESHOLD);
>   if (success >=

Re: [PATCH] pwm-backlight: fix the panel power sequence

2015-10-14 Thread Lee Jones

On Wed, 14 Oct 2015, YH Huang wrote:

> Hi all,
> 
> If you have any suggestion, please let me know.
> Thanks.

Please don't do that.

If you think the patch has fallen through the gaps, please just RESEND
it, like [RESEND vX] $subject.

> On Wed, 2015-09-16 at 22:42 +0800, YH Huang wrote:
> > In order to match the panel power sequence, disable the enable_gpio
> > in the probe function. Also, reorder the code in the power_on and
> > power_off function to match the timing.
> > 
> > Signed-off-by: YH Huang 
> > ---
> >  drivers/video/backlight/pwm_bl.c |   15 +--
> >  1 file changed, 9 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/video/backlight/pwm_bl.c 
> > b/drivers/video/backlight/pwm_bl.c
> > index eff379b..99eca1e 100644
> > --- a/drivers/video/backlight/pwm_bl.c
> > +++ b/drivers/video/backlight/pwm_bl.c
> > @@ -54,10 +54,11 @@ static void pwm_backlight_power_on(struct pwm_bl_data 
> > *pb, int brightness)
> > if (err < 0)
> > dev_err(pb->dev, "failed to enable power supply\n");
> >  
> > +   pwm_enable(pb->pwm);
> > +
> > if (pb->enable_gpio)
> > gpiod_set_value(pb->enable_gpio, 1);
> >  
> > -   pwm_enable(pb->pwm);
> > pb->enabled = true;
> >  }
> >  
> > @@ -66,12 +67,12 @@ static void pwm_backlight_power_off(struct pwm_bl_data 
> > *pb)
> > if (!pb->enabled)
> > return;
> >  
> > -   pwm_config(pb->pwm, 0, pb->period);
> > -   pwm_disable(pb->pwm);
> > -
> > if (pb->enable_gpio)
> > gpiod_set_value(pb->enable_gpio, 0);
> >  
> > +   pwm_config(pb->pwm, 0, pb->period);
> > +   pwm_disable(pb->pwm);
> > +
> > regulator_disable(pb->power_supply);
> > pb->enabled = false;
> >  }
> > @@ -241,8 +242,7 @@ static int pwm_backlight_probe(struct platform_device 
> > *pdev)
> > pb->dev = >dev;
> > pb->enabled = false;
> >  
> > -   pb->enable_gpio = devm_gpiod_get_optional(>dev, "enable",
> > - GPIOD_OUT_HIGH);
> > +   pb->enable_gpio = devm_gpiod_get_optional(>dev, "enable");
> > if (IS_ERR(pb->enable_gpio)) {
> > ret = PTR_ERR(pb->enable_gpio);
> > goto err_alloc;
> > @@ -264,6 +264,9 @@ static int pwm_backlight_probe(struct platform_device 
> > *pdev)
> > pb->enable_gpio = gpio_to_desc(data->enable_gpio);
> > }
> >  
> > +   if (pb->enable_gpio)
> > +   gpiod_direction_output(pb->enable_gpio, 0);
> > +
> > pb->power_supply = devm_regulator_get(>dev, "power");
> > if (IS_ERR(pb->power_supply)) {
> > ret = PTR_ERR(pb->power_supply);
> 
> 

-- 
Lee Jones
Linaro STMicroelectronics Landing Team Lead
Linaro.org │ Open source software for ARM SoCs
Follow Linaro: Facebook | Twitter | Blog
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v4 2/2] arm64: Expand the stack trace feature to support IRQ stack

2015-10-14 Thread Jungseok Lee

On Oct 14, 2015, at 4:13 PM, AKASHI Takahiro wrote:
> On 10/09/2015 11:24 PM, James Morse wrote:
>> Hi Jungseok,
>> 
>> On 07/10/15 16:28, Jungseok Lee wrote:
>>> Currently, a call trace drops a process stack walk when a separate IRQ
>>> stack is used. It makes a call trace information much less useful when
>>> a system gets paniked in interrupt context.
>> 
>> panicked
>> 
>>> This patch addresses the issue with the following schemes:
>>> 
>>>   - Store aborted stack frame data
>>>   - Decide whether another stack walk is needed or not via current sp
>>>   - Loosen the frame pointer upper bound condition
>> 
>> It may be worth merging this patch with its predecessor - anyone trying to
>> bisect a problem could land between these two patches, and spend time
>> debugging the truncated call traces.
>> 
>> 
>>> diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
>>> index 6ea82e8..e5904a1 100644
>>> --- a/arch/arm64/include/asm/irq.h
>>> +++ b/arch/arm64/include/asm/irq.h
>>> @@ -2,13 +2,25 @@
>>>  #define __ASM_IRQ_H
>>> 
>>>  #include 
>>> +#include 
>>> 
>>>  #include 
>>> 
>>>  struct irq_stack {
>>> void *stack;
>>> +   struct stackframe frame;
>>>  };
>>> 
>>> +DECLARE_PER_CPU(struct irq_stack, irq_stacks);
>> 
>> Good idea, storing this in the per-cpu data makes it immune to stack
>> corruption.
> 
> Is this the only reason that you have a dummy stack frame in per-cpu data?
> By placing this frame in an interrupt stack, I think, we will be able to 
> eliminate
> changes in dump_stace(). and
> 
>> 
>>> diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
>>> index 407991b..5124649 100644
>>> --- a/arch/arm64/kernel/stacktrace.c
>>> +++ b/arch/arm64/kernel/stacktrace.c
>>> @@ -43,7 +43,27 @@ int notrace unwind_frame(struct stackframe *frame)
>>> low  = frame->sp;
>>> high = ALIGN(low, THREAD_SIZE);
>>> 
>>> -   if (fp < low || fp > high - 0x18 || fp & 0xf)
>>> +   /*
>>> +* A frame pointer would reach an upper bound if a prologue of the
>>> +* first function of call trace looks as follows:
>>> +*
>>> +*  stp x29, x30, [sp,#-16]!
>>> +*  mov x29, sp
>>> +*
>>> +* Thus, the upper bound is (top of stack - 0x20) with consideration
>> 
>> The terms 'top' and 'bottom' of the stack are confusing, your 'top' appears
>> to be the highest address, which is used first, making it the bottom of the
>> stack.
>> 
>> I would try to use the terms low/est and high/est, in keeping with the
>> variable names in use here.
>> 
>> 
>>> +* of a 16-byte empty space in THREAD_START_SP.
>>> +*
>>> +* The value, 0x20, however, does not cover all cases as interrupts
>>> +* are handled using a separate stack. That is, a call trace can start
>>> +* from elx_irq exception vectors. The symbols could not be promoted
>>> +* to candidates for a stack trace under the restriction, 0x20.
>>> +*
>>> +* The scenario is handled without complexity as 1) considering
>>> +* (bottom of stack + THREAD_START_SP) as a dummy frame pointer, the
>>> +* content of which is 0, and 2) allowing the case, which changes
>>> +* the value to 0x10 from 0x20.
>> 
>> Where has 0x20 come from? The old value was 0x18.
>> 
>> My understanding is the highest part of the stack looks like this:
>> high[ off-stack ]
>> high - 0x08 [ left free by THREAD_START_SP ]
>> high - 0x10 [ left free by THREAD_START_SP ]
>> high - 0x18 [#1 x30 ]
>> high - 0x20 [#1 x29 ]
>> 
>> So the condition 'fp > high - 0x18' prevents returning either 'left free'
>> address, or off-stack-value as a frame. Changing it to 'fp > high - 0x10'
>> allows the first half of that reserved area to be a valid stack frame.
>> 
>> This change is breaking perf using incantations [0] and [1]:
>> 
>> Before, with just patch 1/2:
>>   ---__do_softirq
>>  |
>>  |--92.95%-- __handle_domain_irq
>>  |  __irqentry_text_start
>>  |  el1_irq
>>  |
>> 
>> After, with both patches:
>>  ---__do_softirq
>> |
>> |--83.83%-- __handle_domain_irq
>> |  __irqentry_text_start
>> |  el1_irq
>> |  |
>> |  |--99.39%-- 0x48040d0c
>> |   --0.61%-- [...]
>> |
> 
> This also shows that walk_stackframe() doesn't walk through a process stack.
> Now I'm trying the following hack on top of Jungseok's patch.
> (It doesn't traverse from an irq stack to an process stack yet. I need modify
> unwind_frame().)

I've got a difference between perf and dump_backtrace() as reviewing perf call
chain operation. Perf relies on walk_stackframe(), but dump_backtrace() does 
not.
That is, a symbol is printed out *before* unwind_frame() call in case of

Re: [PATCHv4 1/1] SCSI: hosts: update to use ida_simple for host_no management

2015-10-14 Thread Johannes Thumshirn

On Wed, 2015-10-07 at 16:51 -0700, Lee Duncan wrote:
> Update the SCSI hosts module to use the ida_simple*() routines
> to manage its host_no index instead of an ATOMIC integer. This
> means that the SCSI host number will now be reclaimable.
> 
> Signed-off-by: Lee Duncan 
> ---
>  drivers/scsi/hosts.c | 22 ++
>  1 file changed, 14 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
> index 8bb173e01084..b6a5ffa886b7 100644
> --- a/drivers/scsi/hosts.c
> +++ b/drivers/scsi/hosts.c
> @@ -33,7 +33,7 @@
>  #include 
>  #include 
>  #include 
> -
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -42,7 +42,7 @@
>  #include "scsi_logging.h"
>  
>  
> -static atomic_t scsi_host_next_hn = ATOMIC_INIT(0);  /*
> host_no for next new host */
> +static DEFINE_IDA(host_index_ida);
>  
>  
>  static void scsi_host_cls_release(struct device *dev)
> @@ -337,6 +337,8 @@ static void scsi_host_dev_release(struct device
> *dev)
>  
>   kfree(shost->shost_data);
>  
> + ida_simple_remove(_index_ida, shost->host_no);
> +
>   if (parent)
>   put_device(parent);
>   kfree(shost);
> @@ -370,6 +372,7 @@ struct Scsi_Host *scsi_host_alloc(struct
> scsi_host_template *sht, int privsize)
>  {
>   struct Scsi_Host *shost;
>   gfp_t gfp_mask = GFP_KERNEL;
> + int index;
>  
>   if (sht->unchecked_isa_dma && privsize)
>   gfp_mask |= __GFP_DMA;
> @@ -388,11 +391,11 @@ struct Scsi_Host *scsi_host_alloc(struct
> scsi_host_template *sht, int privsize)
>   init_waitqueue_head(>host_wait);
>   mutex_init(>scan_mutex);
>  
> - /*
> -  * subtract one because we increment first then return, but
> we need to
> -  * know what the next host number was before increment
> -  */
> - shost->host_no = atomic_inc_return(_host_next_hn) - 1;
> + index = ida_simple_get(_index_ida, 0, 0, GFP_KERNEL);
> + if (index < 0)
> + goto fail_kfree;
> + shost->host_no = index;
> +
>   shost->dma_channel = 0xff;
>  
>   /* These three are default values which can be overridden */
> @@ -477,7 +480,7 @@ struct Scsi_Host *scsi_host_alloc(struct
> scsi_host_template *sht, int privsize)
>   shost_printk(KERN_WARNING, shost,
>   "error handler thread failed to spawn, error
> = %ld\n",
>   PTR_ERR(shost->ehandler));
> - goto fail_kfree;
> + goto fail_index_remove;
>   }
>  
>   shost->tmf_work_q = alloc_workqueue("scsi_tmf_%d",
> @@ -493,6 +496,8 @@ struct Scsi_Host *scsi_host_alloc(struct
> scsi_host_template *sht, int privsize)
>  
>   fail_kthread:
>   kthread_stop(shost->ehandler);
> + fail_index_remove:
> + ida_simple_remove(_index_ida, shost->host_no);
>   fail_kfree:
>   kfree(shost);
>   return NULL;
> @@ -588,6 +593,7 @@ int scsi_init_hosts(void)
>  void scsi_exit_hosts(void)
>  {
>   class_unregister(_class);
> + ida_destroy(_index_ida);
>  }
>  
>  int scsi_is_host_device(const struct device *dev)

Reviewed-by: Johannes Thumshirn 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Silent hang up caused by pages being not scanned?

2015-10-14 Thread Tetsuo Handa

Linus Torvalds wrote:
> On Tue, Oct 13, 2015 at 5:21 AM, Tetsuo Handa
>  wrote:
> >
> > If I remove
> >
> > /* Any of the zones still reclaimable?  Don't OOM. */
> > if (zones_reclaimable)
> > return 1;
> >
> > the OOM killer is invoked even when there are so much memory which can be
> > reclaimed after written to disk. This is definitely premature invocation of
> > the OOM killer.
> 
> Right. The rest of the code knows that the return value right now
> means "there is no memory at all" rather than "I made progress".
> 
> > Yes. But we can't simply do
> >
> > if (order <= PAGE_ALLOC_COSTLY_ORDER || ..
> >
> > because we won't be able to call out_of_memory(), can we?
> 
> So I think that whole thing is kind of senseless. Not just that
> particular conditional, but what it *does* too.
> 
> What can easily happen is that we are a blocking allocation, but
> because we're __GFP_FS or something, the code doesn't actually start
> writing anything out. Nor is anything congested. So the thing just
> loops.

congestion_wait() sounds like a source of silent hang up.
http://lkml.kernel.org/r/201406052145.cib35534.oqlvmsjfoht...@i-love.sakura.ne.jp

> 
> And looping is stupid, because we may be not able to actually free
> anything exactly because of limitations like __GFP_FS.
> 
> So
> 
>  (a) the looping condition is senseless
> 
>  (b) what we do when looping is senseless
> 
> and we actually do try to wake up kswapd in the loop, but we never
> *wait* for it, so that's largely pointless too.

Aren't we waiting for kswapd forever?
In other words, we never check whether kswapd can make some progress.
http://lkml.kernel.org/r/20150812091104.ga14...@dhcp22.suse.cz

> 
> So *of*course* the direct reclaim code has to set "I made progress",
> because if it doesn't lie and say so, then the code will randomly not
> loop, and will oom, and things go to hell.
> 
> But I hate the "let's tweak the zone_reclaimable" idea, because it
> doesn't actually fix anything. It just perpetuates this "the code
> doesn't make sense, so let's add *more* senseless heusristics to this
> whole loop".

I also don't think that tweaking current reclaim logic solves bugs
which bothered me via unexplained hangups / reboots.
To me, current memory allocator is too puzzling that it is as if

   if (there_is_much_free_memory() == TRUE)
   goto OK;
   if (do_some_heuristic1() == SUCCESS)
   goto OK;
   if (do_some_heuristic2() == SUCCESS)
   goto OK;
   if (do_some_heuristic3() == SUCCESS)
   goto OK;
   (...snipped...)
   if (do_some_heuristicN() == SUCCESS)
   goto OK;
   while (1);

and we don't know how many heuristics we need to add in order to avoid
reaching the "while (1);". (We are reaching the "while (1);" before

   if (out_of_memory() == SUCCESS)
   goto OK;

is called.)

> 
> So instead of that senseless thing, how about trying something
> *sensible*. Make the code do something that we can actually explain as
> making sense.
> 
> I'd suggest something like:
> 
>  - add a "retry count"
> 
>  - if direct reclaim made no progress, or made less progress than the target:
> 
>   if (order > PAGE_ALLOC_COSTLY_ORDER) goto noretry;

Yes.

> 
>  - regardless of whether we made progress or not:
> 
>   if (retry count < X) goto retry;
> 
>   if (retry count < 2*X) yield/sleep 10ms/wait-for-kswapd and then
> goto retry

I tried sleeping for reducing CPU usage and reporting via SysRq-w.
http://lkml.kernel.org/r/201411231353.bde90173.fqomjtholvf...@i-love.sakura.ne.jp

I complained at 
http://lkml.kernel.org/r/201502162023.gge26089.tjoofqmffhl...@i-love.sakura.ne.jp

| Oh, why every thread trying to allocate memory has to repeat
| the loop that might defer somebody who can make progress if CPU time was
| given? I wish only somebody like kswapd repeats the loop on behalf of all
| threads waiting at memory allocation slowpath...

Direct reclaim can defer termination upon SIGKILL if blocked at unkillable
lock. If performance were not a problem, is direct reclaim mandatory?

Of course, performance is the problem. Thus we would try direct reclaim
for at least once. But I wish memory allocation logic were as simple as

  (1) If there are enough free memory, allocate it.

  (2) If there are not enough free memory, join on the
  waitqueue list

wait_event_timeout(waiter, memory_reclaimed, timeout)

  and wait for reclaiming kernel threads (e.g. kswapd) to wake
  the waiters up. If the caller is willing to give up upon SIGKILL
  (e.g. __GFP_KILLABLE) then

wait_event_killable_timeout(waiter, memory_reclaimed, timeout)

  and return NULL upon SIGKILL.

  (3) Whenever reclaiming kernel threads reclaimed memory and there are
  waiters, wake the waiters up.

  (4) If reclaiming kernel threads cannot reclaim memory,
  the caller will wake up due to timeout, and invoke the OOM
  killer unless the caller does not want (e.g. __GFP_NO_OOMKILL).

> 
>

Re: [PATCH 2/3] iommu/hisilicon: Add hi6220 iommu driver

2015-10-14 Thread Joerg Roedel

On Thu, Oct 08, 2015 at 03:45:47PM +0800, Chen Feng wrote:
> +static int hi6220_smmu_attach_dev(struct iommu_domain *domain,
> +   struct device *dev)
> +{
> + struct hi6220_domain *m_domain = to_hi6220_domain(domain);
> +
> + smmu_domain_prepare(m_domain);
> + dev->archdata.iommu = _domain->smmu_dev->iova_allocator;
> +
> + return 0;
> +}

What happens when you attach devices behind different smmus to one
domain? Will that overwrite the smmu_dev pointer in the domain?

> +static size_t hi6220_smmu_unmap(struct iommu_domain *domain, unsigned long 
> iova,
> + size_t size)
> +{
> + struct hi6220_domain *m_domain = to_hi6220_domain(domain);
> + size_t page_size = m_domain->smmu_dev->page_size;
> + struct hi6220_smmu *smmu_dev = m_domain->smmu_dev;
> + int *page_table = (unsigned int *)smmu_dev->pgtable_virt;
> +
> + if (size != page_size) {
> + pr_err("unmap size error, only support %zd\n", page_size);
> + return 0;
> + }
> +
> + __clear_smmu_pte(page_table + IOVA_PFN(iova));
> +
> + return page_size;
> +}

Don't you need a call to __invalid_smmu_tlb here too?



Joerg

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v4 2/2] arm64: Expand the stack trace feature to support IRQ stack

2015-10-14 Thread Jungseok Lee

On Oct 14, 2015, at 12:00 AM, Jungseok Lee wrote:
> On Oct 13, 2015, at 8:00 PM, James Morse wrote:
>> Hi Jungseok,
> 
> Hi James,
> 
>> On 12/10/15 23:13, Jungseok Lee wrote:
>>> On Oct 13, 2015, at 1:34 AM, James Morse wrote:
 Having two kmem_caches for 16K stacks on a 64K page system may be wasteful
 (especially for systems with few cpus)…
>>> 
>>> This would be a single concern. To address this issue, I drop the 'static'
>>> keyword in thread_info_cache. Please refer to the below hunk.
>> 
>> Its only a problem on systems with 64K pages, which don't have a multiple
>> of 4 cpus. I suspect if you turn on 64K pages, you have many cores with
>> plenty of memory…
> 
> Yes, the problem 'two kmem_caches' comes from only 64K page system.
> 
> I don't get the statement 'which don't have a multiple of 4 cpus'.
> Could you point out what I am missing?

You're talking about sl{a|u}b allocator behavior. If so, I got what you meant.

> Since I don't have platforms which have many cores and huge memory,
> I cannot play with this series on them.
> 
 The alternative is to defining CONFIG_ARCH_THREAD_INFO_ALLOCATOR and
 allocate all stack memory from arch code. (Largely copied code, prevents
 irq stacks being a different size, and nothing uses that define today!)
 
 
 Thoughts?
>>> 
>>> Almost same story I've been testing.
>>> 
>>> I'm aligned with yours Regarding CONFIG_ARCH_THREAD_INFO_ALLOCATOR.
>>> 
>>> Another approach I've tried is the following data structure, but it's not
>>> a good fit for this case due to __per_cpu_offset which is page-size aligned,
>>> not thread-size.
>>> 
>>> struct irq_stack {
>>> char stack[THREAD_SIZE];
>>> char *highest;
>>> } __aligned(THREAD_SIZE);
>>> 
>>> DEFINE_PER_CPU(struct irq_stack, irq_stacks);
>> 
>> Yes, x86 does this - but it increases the Image size by 16K, as that space
>> could have some initialisation values. This isn't a problem on x86 as
>> no-one uses the uncompressed image.
>> 
>> I would avoid this approach due to the bloat!
>> 
>>> 
>>> 8<-
>>> diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
>>> index 6ea82e8..d3619b3 100644
>>> --- a/arch/arm64/include/asm/irq.h
>>> +++ b/arch/arm64/include/asm/irq.h
>>> @@ -1,7 +1,9 @@
>>> #ifndef __ASM_IRQ_H
>>> #define __ASM_IRQ_H
>>> 
>>> +#include 
>>> #include 
>>> +#include 
>>> 
>>> #include 
>>> 
>>> @@ -9,6 +11,21 @@ struct irq_stack {
>>>   void *stack;
>>> };
>>> 
>>> +#if THREAD_SIZE >= PAGE_SIZE
>>> +static inline void *__alloc_irq_stack(void)
>>> +{
>>> +   return (void *)__get_free_pages(THREADINFO_GFP | __GFP_ZERO,
>>> +   THREAD_SIZE_ORDER);
>>> +}
>>> +#else
>>> +extern struct kmem_cache *thread_info_cache;
>> 
>> If this has been made a published symbol, it should go in a header file.
> 
> Sure.

I had the wrong impression that there is a room under include/linux/*.

IMO, this is architectural option whether arch relies on thread_info_cache or 
not.
In other words, it would be clear to put this extern under arch/*/include/asm/*.

Thoughts?

Best Regards
Jungseok Lee--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 3/3] ARM: at91/defconfig: update sama5 defconfig

2015-10-14 Thread Ludovic Desroches

Add SAMA5D2 SoC plus Atmel flexcom and Atmel sdhci devices.

Signed-off-by: Ludovic Desroches 
---
 arch/arm/configs/sama5_defconfig | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/arm/configs/sama5_defconfig b/arch/arm/configs/sama5_defconfig
index 31eb951..6e806f5 100644
--- a/arch/arm/configs/sama5_defconfig
+++ b/arch/arm/configs/sama5_defconfig
@@ -15,7 +15,7 @@ CONFIG_LBDAF=y
 # CONFIG_IOSCHED_DEADLINE is not set
 # CONFIG_IOSCHED_CFQ is not set
 CONFIG_ARCH_AT91=y
-CONFIG_SOC_SAM_V7=y
+CONFIG_SOC_SAMA5D2=y
 CONFIG_SOC_SAMA5D3=y
 CONFIG_SOC_SAMA5D4=y
 CONFIG_AEABI=y
@@ -135,6 +135,7 @@ CONFIG_POWER_SUPPLY=y
 CONFIG_POWER_RESET=y
 # CONFIG_HWMON is not set
 CONFIG_SSB=m
+CONFIG_MFD_ATMEL_FLEXCOM=y
 CONFIG_REGULATOR=y
 CONFIG_REGULATOR_FIXED_VOLTAGE=y
 CONFIG_REGULATOR_ACT8865=y
@@ -171,6 +172,9 @@ CONFIG_USB_ATMEL_USBA=y
 CONFIG_USB_G_SERIAL=y
 CONFIG_MMC=y
 # CONFIG_MMC_BLOCK_BOUNCE is not set
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_SDHCI_OF_AT91=y
 CONFIG_MMC_ATMELMCI=y
 CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=y
-- 
2.5.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1/3] ARM: at91/dt: sama5d2: add missing devices

2015-10-14 Thread Ludovic Desroches

Big update of the sama5d2.dtsi file since many patches have reached te
mainline:
- add generated clocks
- enable crypto devices
- add flexcom devices
- add tdes device
- add sdmmc devices

Signed-off-by: Ludovic Desroches 
Signed-off-by: Nicolas Ferre 
Signed-off-by: Cyrille Pitchen 
---
 arch/arm/boot/dts/sama5d2.dtsi | 152 -
 1 file changed, 150 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi
index cc05cde..5afccbc 100644
--- a/arch/arm/boot/dts/sama5d2.dtsi
+++ b/arch/arm/boot/dts/sama5d2.dtsi
@@ -263,6 +263,24 @@
cache-level = <2>;
};
 
+   sdmmc0: sdio-host@a000 {
+   compatible = "atmel,sama5d2-sdhci";
+   reg = <0xa000 0x300>;
+   interrupts = <31 IRQ_TYPE_LEVEL_HIGH 0>;
+   clocks = <_hclk>, <_gclk>, <>;
+   clock-names = "hclock", "multclk", "baseclk";
+   status = "disabled";
+   };
+
+   sdmmc1: sdio-host@b000 {
+   compatible = "atmel,sama5d2-sdhci";
+   reg = <0xb000 0x300>;
+   interrupts = <32 IRQ_TYPE_LEVEL_HIGH 0>;
+   clocks = <_hclk>, <_gclk>, <>;
+   clock-names = "hclock", "multclk", "baseclk";
+   status = "disabled";
+   };
+
apb {
compatible = "simple-bus";
#address-cells = <1>;
@@ -619,6 +637,18 @@
atmel,clk-output-range = <0 
8300>;
};
 
+   i2s0_clk: i2s0_clk {
+   #clock-cells = <0>;
+   reg = <54>;
+   atmel,clk-output-range = <0 
8300>;
+   };
+
+   i2s1_clk: i2s1_clk {
+   #clock-cells = <0>;
+   reg = <55>;
+   atmel,clk-output-range = <0 
8300>;
+   };
+
classd_clk: classd_clk {
#clock-cells = <0>;
reg = <59>;
@@ -697,6 +727,52 @@
reg = <53>;
};
};
+
+   gck {
+   compatible = 
"atmel,sama5d2-clk-generated";
+   #address-cells = <1>;
+   #size-cells = <0>;
+   interrupt-parent = <>;
+   clocks = <>, <>, 
<>, <>, <>;
+
+   sdmmc0_gclk: sdmmc0_gclk {
+   #clock-cells = <0>;
+   reg = <31>;
+   };
+
+   sdmmc1_gclk: sdmmc1_gclk {
+   #clock-cells = <0>;
+   reg = <32>;
+   };
+
+   tcb0_gclk: tcb0_gclk {
+   #clock-cells = <0>;
+   reg = <35>;
+   atmel,clk-output-range = <0 
8300>;
+   };
+
+   tcb1_gclk: tcb1_gclk {
+   #clock-cells = <0>;
+   reg = <36>;
+   atmel,clk-output-range = <0 
8300>;
+   };
+
+   pwm_gclk: pwm_gclk {
+   #clock-cells = <0>;
+   reg = <38>;
+   atmel,clk-output-range = <0 
8300>;
+   };
+
+   i2s0_gclk: i2s0_gclk {
+   #clock-cells = <0>;
+   reg = <54>;
+   };
+
+   i2s1_gclk: i2s1_gclk {
+   #clock-cells = <0>;
+

[PATCH 2/3] ARM: at91/dt: sama5d2 Xplained: add several devices

2015-10-14 Thread Ludovic Desroches

Add sdmmc and flexcom devices

Signed-off-by: Ludovic Desroches 
Signed-off-by: Cyrille Pitchen 
---
 arch/arm/boot/dts/at91-sama5d2_xplained.dts | 106 
 1 file changed, 106 insertions(+)

diff --git a/arch/arm/boot/dts/at91-sama5d2_xplained.dts 
b/arch/arm/boot/dts/at91-sama5d2_xplained.dts
index dc23979..bce9597 100644
--- a/arch/arm/boot/dts/at91-sama5d2_xplained.dts
+++ b/arch/arm/boot/dts/at91-sama5d2_xplained.dts
@@ -45,6 +45,7 @@
 /dts-v1/;
 #include "sama5d2.dtsi"
 #include "sama5d2-pinfunc.h"
+#include 
 
 / {
model = "Atmel SAMA5D2 Xplained";
@@ -91,6 +92,22 @@
status = "okay";
};
 
+   sdmmc0: sdio-host@a000 {
+   bus-width = <8>;
+   pinctrl-names = "default";
+   pinctrl-0 = <_sdmmc0_default>;
+   non-removable;
+   mmc-ddr-1_8v;
+   status = "okay";
+   };
+
+   sdmmc1: sdio-host@b000 {
+   bus-width = <4>;
+   pinctrl-names = "default";
+   pinctrl-0 = <_sdmmc1_default>;
+   status = "okay"; /* conflict with qspi0 */
+   };
+
apb {
spi0: spi@f800 {
pinctrl-names = "default";
@@ -124,12 +141,49 @@
status = "okay";
};
 
+   flx0: flexcom@f8034000 {
+   atmel,flexcom-mode = ;
+   status = "disabled"; /* conflict with ISC_D2 & 
ISC_D3 data pins */
+
+   uart5: serial@200 {
+   compatible = "atmel,at91sam9260-usart";
+   reg = <0x200 0x200>;
+   interrupts = <19 IRQ_TYPE_LEVEL_HIGH 7>;
+   clocks = <_clk>;
+   clock-names = "usart";
+   pinctrl-names = "default";
+   pinctrl-0 = <_flx0_default>;
+   atmel,fifo-size = <32>;
+   status = "okay";
+   };
+   };
+
uart3: serial@fc008000 {
pinctrl-names = "default";
pinctrl-0 = <_uart3_default>;
status = "okay";
};
 
+   flx4: flexcom@fc018000 {
+   atmel,flexcom-mode = ;
+   status = "okay";
+
+   i2c2: i2c@600 {
+   compatible = "atmel,sama5d2-i2c";
+   reg = <0x600 0x200>;
+   interrupts = <23 IRQ_TYPE_LEVEL_HIGH 7>;
+   dmas = <0>, <0>;
+   dma-names = "tx", "rx";
+   #address-cells = <1>;
+   #size-cells = <0>;
+   clocks = <_clk>;
+   pinctrl-names = "default";
+   pinctrl-0 = <_flx4_default>;
+   atmel,fifo-size = <16>;
+   status = "okay";
+   };
+   };
+
i2c1: i2c@fc028000 {
dmas = <0>, <0>;
pinctrl-names = "default";
@@ -144,6 +198,18 @@
};
 
pinctrl@fc038000 {
+   pinctrl_flx0_default: flx0_default {
+   pinmux = ,
+;
+   bias-disable;
+   };
+
+   pinctrl_flx4_default: flx4_default {
+   pinmux = ,
+;
+   bias-disable;
+   };
+
pinctrl_i2c0_default: i2c0_default {
pinmux = ,
 ;
@@ -170,6 +236,46 @@
bias-disable;
};
 
+   pinctrl_sdmmc0_default: sdmmc0_default {
+   cmd_data {
+   pinmux = ,
+

[PATCH v3 5/5] KVM: nVMX: expose VPID capability to L1

2015-10-14 Thread Wanpeng Li

Expose VPID capability to L1. For nested guests, we don't do anything 
specific for single context invalidation. Hence, only advertise support 
for global context invalidation. The major benefit of nested VPID comes 
from having separate vpids when switching between L1 and L2, and also 
when L2's vCPUs not sched in/out on L1.

Reviewed-by: Wincy Van 
Signed-off-by: Wanpeng Li 
---
 arch/x86/kvm/vmx.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2a54cc7..0b558ae 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2622,7 +2622,11 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx 
*vmx)
} else
vmx->nested.nested_vmx_ept_caps = 0;
 
-   vmx->nested.nested_vmx_vpid_caps = 0;
+   if (enable_vpid)
+   vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT |
+   VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
+   else
+   vmx->nested.nested_vmx_vpid_caps = 0;
 
if (enable_unrestricted_guest)
vmx->nested.nested_vmx_secondary_ctls_high |=
@@ -2739,7 +2743,8 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 
msr_index, u64 *pdata)
break;
case MSR_IA32_VMX_EPT_VPID_CAP:
/* Currently, no nested vpid support */
-   *pdata = vmx->nested.nested_vmx_ept_caps;
+   *pdata = vmx->nested.nested_vmx_ept_caps |
+   ((u64)vmx->nested.nested_vmx_vpid_caps << 32);
break;
default:
return 1;
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 4/5] KVM: nVMX: nested VPID emulation

2015-10-14 Thread Wanpeng Li

VPID is used to tag address space and avoid a TLB flush. Currently L0 use
the same VPID to run L1 and all its guests. KVM flushes VPID when switching
between L1 and L2.

This patch advertises VPID to the L1 hypervisor, then address space of L1
and L2 can be separately treated and avoid TLB flush when swithing between
L1 and L2. For each nested vmentry, if vpid12 is changed, reuse shadow vpid
w/ an invvpid.

Performance:

run lmbench on L2 w/ 3.5 kernel.

Context switching - times in microseconds - smaller is better
-
Host OS  2p/0K 2p/16K 2p/64K 8p/16K 8p/64K 16p/16K 16p/64K
 ctxsw  ctxsw  ctxsw ctxsw  ctxsw   ctxsw   ctxsw
- - -- -- -- -- -- --- ---
kernelLinux 3.5.0-1 1.2200 1.3700 1.4500 4.7800 2.3300 5.6 2.88000  
nested VPID
kernelLinux 3.5.0-1 1.2600 1.4300 1.5600   12.7   12.9 3.49000 7.46000  
vanilla

Reviewed-by: Jan Kiszka 
Reviewed-by: Wincy Van 
Signed-off-by: Wanpeng Li 
---
 arch/x86/kvm/vmx.c | 39 ---
 1 file changed, 32 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ca0b526..2a54cc7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -426,6 +426,9 @@ struct nested_vmx {
/* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
u64 vmcs01_debugctl;
 
+   u16 vpid02;
+   u16 last_vpid;
+
u32 nested_vmx_procbased_ctls_low;
u32 nested_vmx_procbased_ctls_high;
u32 nested_vmx_true_procbased_ctls_low;
@@ -1213,6 +1216,11 @@ static inline bool 
nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12)
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
 }
 
+static inline bool nested_cpu_has_vpid(struct vmcs12 *vmcs12)
+{
+   return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VPID);
+}
+
 static inline bool nested_cpu_has_apic_reg_virt(struct vmcs12 *vmcs12)
 {
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_APIC_REGISTER_VIRT);
@@ -2590,6 +2598,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx 
*vmx)
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
SECONDARY_EXEC_RDTSCP |
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
+   SECONDARY_EXEC_ENABLE_VPID |
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
SECONDARY_EXEC_WBINVD_EXITING |
@@ -6832,6 +6841,7 @@ static void free_nested(struct vcpu_vmx *vmx)
return;
 
vmx->nested.vmxon = false;
+   free_vpid(vmx->nested.vpid02);
nested_release_vmcs12(vmx);
if (enable_shadow_vmcs)
free_vmcs(vmx->nested.current_shadow_vmcs);
@@ -7393,7 +7403,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
return 1;
}
-   vmx_flush_tlb(vcpu);
+   __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02);
nested_vmx_succeed(vcpu);
break;
default:
@@ -8773,8 +8783,10 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, 
unsigned int id)
goto free_vmcs;
}
 
-   if (nested)
+   if (nested) {
nested_vmx_setup_ctls_msrs(vmx);
+   vmx->nested.vpid02 = allocate_vpid();
+   }
 
vmx->nested.posted_intr_nv = -1;
vmx->nested.current_vmptr = -1ull;
@@ -8795,6 +8807,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, 
unsigned int id)
return >vcpu;
 
 free_vmcs:
+   free_vpid(vmx->nested.vpid02);
free_loaded_vmcs(vmx->loaded_vmcs);
 free_msrs:
kfree(vmx->guest_msrs);
@@ -9679,12 +9692,24 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, 
struct vmcs12 *vmcs12)
 
if (enable_vpid) {
/*
-* Trivially support vpid by letting L2s share their parent
-* L1's vpid. TODO: move to a more elaborate solution, giving
-* each L2 its own vpid and exposing the vpid feature to L1.
+* There is no direct mapping between vpid02 and vpid12, the
+* vpid02 is per-vCPU for L0 and reused while the value of
+* vpid12 is changed w/ one invvpid during nested vmentry.
+* The vpid12 is allocated by L1 for L2, so it will not
+* influence global bitmap(for vpid01 and vpid02 allocation)
+* even if spawn a lot of nested vCPUs.
 */
-   vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
-   vmx_flush_tlb(vcpu);
+   if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) {
+   vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
+   if

Re: [PATCHv3 02/11] arm64: Handle section maps for swapper/idmap

2015-10-14 Thread Mark Rutland

On Wed, Oct 14, 2015 at 12:20:25PM +0100, Suzuki K. Poulose wrote:
> We use section maps with 4K page size to create the swapper/idmaps.
> So far we have used !64K or 4K checks to handle the case where we
> use the section maps.
> This patch adds a new symbol, ARM64_SWAPPER_USES_SECTION_MAPS, to
> handle cases where we use section maps, instead of using the page size
> symbols.
> 
> Cc: Ard Biesheuvel 
> Cc: Mark Rutland 
> Cc: Catalin Marinas 
> Cc: Will Deacon 
> Signed-off-by: Suzuki K. Poulose 
> ---
>  arch/arm64/include/asm/kernel-pgtable.h |   31 -
>  arch/arm64/mm/mmu.c |   72 
> ++-
>  2 files changed, 52 insertions(+), 51 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kernel-pgtable.h 
> b/arch/arm64/include/asm/kernel-pgtable.h
> index 622929d..5876a36 100644
> --- a/arch/arm64/include/asm/kernel-pgtable.h
> +++ b/arch/arm64/include/asm/kernel-pgtable.h
> @@ -19,6 +19,13 @@
>  #ifndef __ASM_KERNEL_PGTABLE_H
>  #define __ASM_KERNEL_PGTABLE_H
>  
> +/* With 4K pages, we use section maps. */
> +#ifdef CONFIG_ARM64_4K_PAGES
> +#define ARM64_SWAPPER_USES_SECTION_MAPS 1
> +#else
> +#define ARM64_SWAPPER_USES_SECTION_MAPS 0
> +#endif

The comment is somewhat redunant. It would be better to state why we do
this for 4K and not 64K (or 16K).

> @@ -406,14 +407,11 @@ static void __init map_mem(void)
>* memory addressable from the initial direct kernel mapping.
>*
>* The initial direct kernel mapping, located at swapper_pg_dir, gives
> -  * us PUD_SIZE (4K pages) or PMD_SIZE (64K pages) memory starting from
> -  * PHYS_OFFSET (which must be aligned to 2MB as per
> -  * Documentation/arm64/booting.txt).
> +  * us PUD_SIZE (with SECTION maps, i.e, 4K) or PMD_SIZE (without
> +  * SECTION maps, i.e, 64K pages) memory starting from PHYS_OFFSET
> +  * (which must be aligned to 2MB as per 
> Documentation/arm64/booting.txt).

This didn't seem to get updated for 16K later in the series, unless I
missed something.

Perhaps drop the mention of 4K / 64K entirely here?

> @@ -551,7 +552,7 @@ int kern_addr_valid(unsigned long addr)
>   return pfn_valid(pte_pfn(*pte));
>  }
>  #ifdef CONFIG_SPARSEMEM_VMEMMAP
> -#ifdef CONFIG_ARM64_64K_PAGES
> +#if !ARM64_SWAPPER_USES_SECTION_MAPS

This leaves the comments on the #else and #endif stale. Please update
those too.

Otherwise this looks good!

Thanks,
Mark.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 3/5] KVM: nVMX: emulate the INVVPID instruction

2015-10-14 Thread Wanpeng Li

Add the INVVPID instruction emulation.

Reviewed-by: Wincy Van 
Signed-off-by: Wanpeng Li 
---
 arch/x86/include/asm/vmx.h |  1 +
 arch/x86/kvm/vmx.c | 61 +-
 2 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index d25f32a..aa336ff 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -416,6 +416,7 @@ enum vmcs_field {
 #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25)
 #define VMX_EPT_EXTENT_GLOBAL_BIT  (1ull << 26)
 
+#define VMX_VPID_INVVPID_BIT(1ull << 0) /* (32 - 32) */
 #define VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT  (1ull << 9) /* (41 - 32) */
 #define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT  (1ull << 10) /* (42 - 32) */
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d21b9a6..ca0b526 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -442,6 +442,7 @@ struct nested_vmx {
u32 nested_vmx_misc_low;
u32 nested_vmx_misc_high;
u32 nested_vmx_ept_caps;
+   u32 nested_vmx_vpid_caps;
 };
 
 #define POSTED_INTR_ON  0
@@ -2612,6 +2613,8 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx 
*vmx)
} else
vmx->nested.nested_vmx_ept_caps = 0;
 
+   vmx->nested.nested_vmx_vpid_caps = 0;
+
if (enable_unrestricted_guest)
vmx->nested.nested_vmx_secondary_ctls_high |=
SECONDARY_EXEC_UNRESTRICTED_GUEST;
@@ -7343,7 +7346,63 @@ static int handle_invept(struct kvm_vcpu *vcpu)
 
 static int handle_invvpid(struct kvm_vcpu *vcpu)
 {
-   kvm_queue_exception(vcpu, UD_VECTOR);
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+   u32 vmx_instruction_info;
+   unsigned long type, types;
+   gva_t gva;
+   struct x86_exception e;
+   int vpid;
+
+   if (!(vmx->nested.nested_vmx_secondary_ctls_high &
+ SECONDARY_EXEC_ENABLE_VPID) ||
+   !(vmx->nested.nested_vmx_vpid_caps & 
VMX_VPID_INVVPID_BIT)) {
+   kvm_queue_exception(vcpu, UD_VECTOR);
+   return 1;
+   }
+
+   if (!nested_vmx_check_permission(vcpu))
+   return 1;
+
+   vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+   type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
+
+   types = (vmx->nested.nested_vmx_vpid_caps >> 8) & 0x7;
+
+   if (!(types & (1UL << type))) {
+   nested_vmx_failValid(vcpu,
+   VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+   return 1;
+   }
+
+   /* according to the intel vmx instruction reference, the memory
+* operand is read even if it isn't needed (e.g., for type==global)
+*/
+   if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
+   vmx_instruction_info, false, ))
+   return 1;
+   if (kvm_read_guest_virt(>arch.emulate_ctxt, gva, ,
+   sizeof(u32), )) {
+   kvm_inject_page_fault(vcpu, );
+   return 1;
+   }
+
+   switch (type) {
+   case VMX_VPID_EXTENT_ALL_CONTEXT:
+   if (get_vmcs12(vcpu)->virtual_processor_id == 0) {
+   nested_vmx_failValid(vcpu,
+   VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+   return 1;
+   }
+   vmx_flush_tlb(vcpu);
+   nested_vmx_succeed(vcpu);
+   break;
+   default:
+   /* Trap single context invalidation invvpid calls */
+   BUG_ON(1);
+   break;
+   }
+
+   skip_emulated_instruction(vcpu);
return 1;
 }
 
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 0/5] KVM: nVMX: nested VPID emulation

2015-10-14 Thread Wanpeng Li

v2 -> v3:
 * separate nested_vmx_vpid_caps and move checks to patch 3/5,
   only rejoin them when reading the MSR.

v1 -> v2:
 * set bit 32 of the VMX_EPT_VPID_CAP MSR
 * check against the supported types in the implementation of 
   the INVVPID instruction
 * the memory operand must always be read even if it isn't needed 
   (e.g., for type==global), similar to INVEPT
 * for single-context invalidation to check that VPID != 0, though in 
   practice that doesn't matter because we don't want to support
   single-context invalidation
 * don't set msr's ept related bits if !enable_ept 


VPID is used to tag address space and avoid a TLB flush. Currently L0 use
the same VPID to run L1 and all its guests. KVM flushes VPID when switching
between L1 and L2.

This patch advertises VPID to the L1 hypervisor, then address space of L1
and L2 can be separately treated and avoid TLB flush when swithing between
L1 and L2. For each nested vmentry, if vpid12 is changed, reuse shadow vpid
w/ an invvpid.

Performance:

run lmbench on L2 w/ 3.5 kernel.

Context switching - times in microseconds - smaller is better
-
Host OS  2p/0K 2p/16K 2p/64K 8p/16K 8p/64K 16p/16K 16p/64K
 ctxsw  ctxsw  ctxsw ctxsw  ctxsw   ctxsw   ctxsw
- - -- -- -- -- -- --- ---
kernelLinux 3.5.0-1 1.2200 1.3700 1.4500 4.7800 2.3300 5.6 2.88000  
nested VPID
kernelLinux 3.5.0-1 1.2600 1.4300 1.5600   12.7   12.9 3.49000 7.46000  
vanilla

Wanpeng Li (5):
  KVM: VMX: adjust interface to allocate/free_vpid
  KVM: VMX: introduce __vmx_flush_tlb to handle specific vpid
  KVM: nVMX: emulate the INVVPID instruction
  KVM: nVMX: nested VPID emulation
  KVM: nVMX: expose VPID capability to L1

 arch/x86/include/asm/vmx.h |   1 +
 arch/x86/kvm/vmx.c | 151 -
 2 files changed, 123 insertions(+), 29 deletions(-)

-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 1/5] KVM: VMX: adjust interface to allocate/free_vpid

2015-10-14 Thread Wanpeng Li

Adjust allocate/free_vid so that they can be reused for the nested vpid.

Reviewed-by: Wincy Van 
Signed-off-by: Wanpeng Li 
---
 arch/x86/kvm/vmx.c | 25 -
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c5c2283..1a0e336 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4277,29 +4277,28 @@ static int alloc_identity_pagetable(struct kvm *kvm)
return r;
 }
 
-static void allocate_vpid(struct vcpu_vmx *vmx)
+static int allocate_vpid(void)
 {
int vpid;
 
-   vmx->vpid = 0;
if (!enable_vpid)
-   return;
+   return 0;
spin_lock(_vpid_lock);
vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
-   if (vpid < VMX_NR_VPIDS) {
-   vmx->vpid = vpid;
+   if (vpid < VMX_NR_VPIDS)
__set_bit(vpid, vmx_vpid_bitmap);
-   }
+   else
+   vpid = 0;
spin_unlock(_vpid_lock);
+   return vpid;
 }
 
-static void free_vpid(struct vcpu_vmx *vmx)
+static void free_vpid(int vpid)
 {
-   if (!enable_vpid)
+   if (!enable_vpid || vpid == 0)
return;
spin_lock(_vpid_lock);
-   if (vmx->vpid != 0)
-   __clear_bit(vmx->vpid, vmx_vpid_bitmap);
+   __clear_bit(vpid, vmx_vpid_bitmap);
spin_unlock(_vpid_lock);
 }
 
@@ -8643,7 +8642,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 
if (enable_pml)
vmx_disable_pml(vmx);
-   free_vpid(vmx);
+   free_vpid(vmx->vpid);
leave_guest_mode(vcpu);
vmx_load_vmcs01(vcpu);
free_nested(vmx);
@@ -8662,7 +8661,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, 
unsigned int id)
if (!vmx)
return ERR_PTR(-ENOMEM);
 
-   allocate_vpid(vmx);
+   vmx->vpid = allocate_vpid();
 
err = kvm_vcpu_init(>vcpu, kvm, id);
if (err)
@@ -8738,7 +8737,7 @@ free_msrs:
 uninit_vcpu:
kvm_vcpu_uninit(>vcpu);
 free_vcpu:
-   free_vpid(vmx);
+   free_vpid(vmx->vpid);
kmem_cache_free(kvm_vcpu_cache, vmx);
return ERR_PTR(err);
 }
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 2/5] KVM: VMX: introduce __vmx_flush_tlb to handle specific vpid

2015-10-14 Thread Wanpeng Li

Introduce __vmx_flush_tlb() to handle specific vpid. It will be 
used by later patches, note that the "all context" variant can 
be mapped to vpid_sync_vcpu_single with vpid02 as the argument 
(a nice side effect of vpid02 design).

Reviewed-by: Wincy Van 
Signed-off-by: Wanpeng Li 
---
 arch/x86/kvm/vmx.c | 21 +
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1a0e336..d21b9a6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1392,13 +1392,13 @@ static void loaded_vmcs_clear(struct loaded_vmcs 
*loaded_vmcs)
 __loaded_vmcs_clear, loaded_vmcs, 1);
 }
 
-static inline void vpid_sync_vcpu_single(struct vcpu_vmx *vmx)
+static inline void vpid_sync_vcpu_single(int vpid)
 {
-   if (vmx->vpid == 0)
+   if (vpid == 0)
return;
 
if (cpu_has_vmx_invvpid_single())
-   __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0);
+   __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vpid, 0);
 }
 
 static inline void vpid_sync_vcpu_global(void)
@@ -1407,10 +1407,10 @@ static inline void vpid_sync_vcpu_global(void)
__invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0);
 }
 
-static inline void vpid_sync_context(struct vcpu_vmx *vmx)
+static inline void vpid_sync_context(int vpid)
 {
if (cpu_has_vmx_invvpid_single())
-   vpid_sync_vcpu_single(vmx);
+   vpid_sync_vcpu_single(vpid);
else
vpid_sync_vcpu_global();
 }
@@ -3563,9 +3563,9 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
 
 #endif
 
-static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
+static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid)
 {
-   vpid_sync_context(to_vmx(vcpu));
+   vpid_sync_context(vpid);
if (enable_ept) {
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
return;
@@ -3573,6 +3573,11 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
}
 }
 
+static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
+{
+   __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid);
+}
+
 static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
 {
ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
@@ -4924,7 +4929,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool 
init_event)
vmx_fpu_activate(vcpu);
update_exception_bitmap(vcpu);
 
-   vpid_sync_context(vmx);
+   vpid_sync_context(vmx->vpid);
 }
 
 /*
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC] arm64: ioremap: add ioremap_cache macro

2015-10-14 Thread yalin wang


> On Oct 14, 2015, at 16:23, Arnd Bergmann  wrote:
> 
> On Wednesday 14 October 2015 10:41:26 yalin wang wrote:
> 
>>> On Oct 13, 2015, at 23:20, Catalin Marinas  wrote:
 I'm not sure we want this. See:
 
 https://lkml.org/lkml/2015/10/9/699
>>> 
>>> Thanks Will and Arnd, I missed this. Patch reverted.
>>> 
>> i don’t understand why conflict with Dan Williams’ patch.
>> Dan Williams ’s patch also define ioremap_cache  for arch ia64  & arch sh & 
>> arch xtensa ,
>> i see this :
>> # git show   92281dee825f
>> am i miss something?
> 
> I meant the new series, see https://lkml.org/lkml/2015/10/9/716 for the
> patch that removes it again.
> 
>   Arnd
Got it ,
Thanks :)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] fix return value error

2015-10-14 Thread Leon Romanovsky

On Wed, Oct 14, 2015 at 11:17 AM, Heloise NH  wrote:
> Signed-off-by: Heloise NH 
The patch is a correct one, however can you update the subject and
description to be more informative?
Please add that new_inode() function can fail for allocation only.

> ---
>  drivers/infiniband/hw/ipath/ipath_fs.c | 2 +-
>  drivers/infiniband/hw/qib/qib_fs.c | 2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c 
> b/drivers/infiniband/hw/ipath/ipath_fs.c
> index 25422a3..da753bc 100644
> --- a/drivers/infiniband/hw/ipath/ipath_fs.c
> +++ b/drivers/infiniband/hw/ipath/ipath_fs.c
> @@ -53,7 +53,7 @@ static int ipathfs_mknod(struct inode *dir, struct dentry 
> *dentry,
> struct inode *inode = new_inode(dir->i_sb);
>
> if (!inode) {
> -   error = -EPERM;
> +   error = -ENOMEM;
> goto bail;
> }
>
> diff --git a/drivers/infiniband/hw/qib/qib_fs.c 
> b/drivers/infiniband/hw/qib/qib_fs.c
> index 13ef22b..a4c5a6a 100644
> --- a/drivers/infiniband/hw/qib/qib_fs.c
> +++ b/drivers/infiniband/hw/qib/qib_fs.c
> @@ -55,7 +55,7 @@ static int qibfs_mknod(struct inode *dir, struct dentry 
> *dentry,
> struct inode *inode = new_inode(dir->i_sb);
>
> if (!inode) {
> -   error = -EPERM;
> +   error = -ENOMEM;
> goto bail;
> }
>
> --
> 1.9.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2/2] ARM: dts: omap3: keep ssi ports by default

2015-10-14 Thread Roger Quadros

On 14/10/15 14:37, Sebastian Reichel wrote:
> Hi,
> 
> On Wed, Oct 14, 2015 at 02:27:27PM +0300, Roger Quadros wrote:
>> On 14/10/15 14:19, Sebastian Reichel wrote:
>>> On Wed, Oct 14, 2015 at 01:44:16PM +0300, Roger Quadros wrote:
 Let's keep the SSI ports disabled in the omap3.dtsi to avoid
 getting the following noise on the console for boards that don't
 use the SSI ports.

 "omap_ssi_port 4805a000.ssi-port: DT data is missing cawake gpio (err=-2)"

 As omap3-n900 uses one SSI port, mark it enabled there.
>>
>> Would it be preferable to disable the ssi-controller node as well in the
>> omap3.dtsi file?
> 
> If I remember it correctly, existing, but unused IP-Cores
> should not be disabled in DT, so that hwmod picks them up
> for power management.

OK.
> 
> Note, that it actually is disabled in omap3.dtsi and then enabled in
> omap34xx/omap36xx dts files (the other variants do not have an ssi
> module).

Thanks for the info :)

cheers,
-roger



signature.asc
Description: OpenPGP digital signature

Re: [PATCH v6 4/5] devfreq_cooling: add trace information

2015-10-14 Thread Javi Merino

Hi Steve,

On Thu, Sep 10, 2015 at 06:19:28PM +0100, Steven Rostedt wrote:
> On Thu, 10 Sep 2015 18:09:31 +0100
> Javi Merino  wrote:
> 
> > Tracing is useful for debugging and performance tuning.  Add similar
> > traces to what's present in the cpu cooling device.
> > 
> > Cc: Zhang Rui 
> > Cc: Eduardo Valentin 
> > Cc: Steven Rostedt 
> > Cc: Ingo Molnar 
> > Signed-off-by: Javi Merino 
> > ---
> >  drivers/thermal/devfreq_cooling.c |  6 +
> >  include/trace/events/thermal.h| 53 
> > +++
> >  2 files changed, 59 insertions(+)
> > 
> > diff --git a/drivers/thermal/devfreq_cooling.c 
> > b/drivers/thermal/devfreq_cooling.c
> > index a032c5d5c374..a27206815066 100644
> > --- a/drivers/thermal/devfreq_cooling.c
> > +++ b/drivers/thermal/devfreq_cooling.c
> > @@ -25,6 +25,8 @@
> >  #include 
> >  #include 
> >  
> > +#include 
> > +
> >  static DEFINE_MUTEX(devfreq_lock);
> >  static DEFINE_IDR(devfreq_idr);
> >  
> > @@ -293,6 +295,9 @@ static int devfreq_cooling_get_requested_power(struct 
> > thermal_cooling_device *cd
> > /* Get static power */
> > static_power = get_static_power(dfc, freq);
> >  
> > +   trace_thermal_power_devfreq_get_power(cdev, status, freq, dyn_power,
> > + static_power);
> > +
> > *power = dyn_power + static_power;
> >  
> > return 0;
> > @@ -348,6 +353,7 @@ static int devfreq_cooling_power2state(struct 
> > thermal_cooling_device *cdev,
> > break;
> >  
> > *state = i;
> > +   trace_thermal_power_devfreq_limit(cdev, freq, *state, power);
> 
> I'm curious, does changing the above to:
> 
>   trace_thermal_power_devfreq_limit(cdev, freq, i, power);
> 
> make the compiled code better?
> 
> A tracepoint does some whacky things, and gcc may not optimize this.
> 
> The rest looks fine to me.

Can I treat that last statement as an Acked-by?
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 1/5] mtd: nand: omap2: Support parsing dma channel information from DT

2015-10-14 Thread Roger Quadros

Franklin,

On 14/10/15 14:36, Roger Quadros wrote:
> On 13/10/15 04:38, Franklin S Cooper Jr wrote:
>> Switch from dma_request_channel to allow passing dma channel
>> information from DT rather than hardcoding a value.
>>
>> Signed-off-by: Franklin S Cooper Jr 
> 
> Acked-by: Roger Quadros 
> 
>> ---
>>  drivers/mtd/nand/omap2.c | 4 +++-
>>  1 file changed, 3 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
>> index d0f2620..957c32f 100644
>> --- a/drivers/mtd/nand/omap2.c
>> +++ b/drivers/mtd/nand/omap2.c
>> @@ -1866,7 +1866,9 @@ static int omap_nand_probe(struct platform_device 
>> *pdev)
>>  dma_cap_zero(mask);
>>  dma_cap_set(DMA_SLAVE, mask);
>>  sig = OMAP24XX_DMA_GPMC;
>> -info->dma = dma_request_channel(mask, omap_dma_filter_fn, );
>> +info->dma = dma_request_slave_channel_compat(mask,
>> +omap_dma_filter_fn, , pdev->dev.parent, "rxtx");
>> +

Just discovered that you are using the parent device node.

How about moving the dma bindings to the nand node instead and using
pdev->dev here?

>>  if (!info->dma) {
>>  dev_err(>dev, "DMA engine request failed\n");
>>  err = -ENXIO;
>>
> 

cheers,
-roger

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] n_tty: Remove reader wakeups for TTY_BREAK/TTY_PARITY chars

2015-10-14 Thread Peter Hurley

Waking the reader immediately upon receipt of TTY_BREAK or TTY_PARITY
chars has no effect on the outcome of read():
1. Only non-canonical/EXTPROC mode applies since canonical mode
   will not return data until a line termination is received anyway
2. EXTPROC mode - the reader will always be woken by the input worker
3. Non-canonical modes
   a. MIN == 0, TIME == 0
   b. MIN == 0, TIME > 0
   c. MIN > 0, TIME > 0
  minimum_to_wake is always 1 in these modes so the reader will always
  be woken by the input worker
   d. MIN > 0, TIME == 0
  although the reader will not be woken by the input worker unless the
  minimum data is received, the reader would not otherwise have
  returned the received data

Signed-off-by: Peter Hurley 
---
 drivers/tty/n_tty.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index bd383c4..53ba0d6 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -1180,8 +1180,6 @@ static void n_tty_receive_break(struct tty_struct *tty)
put_tty_queue('\0', ldata);
}
put_tty_queue('\0', ldata);
-   if (waitqueue_active(>read_wait))
-   wake_up_interruptible_poll(>read_wait, POLLIN);
 }
 
 /**
@@ -1238,8 +1236,6 @@ static void n_tty_receive_parity_error(struct tty_struct 
*tty, unsigned char c)
put_tty_queue('\0', ldata);
} else
put_tty_queue(c, ldata);
-   if (waitqueue_active(>read_wait))
-   wake_up_interruptible_poll(>read_wait, POLLIN);
 }
 
 static void
-- 
2.6.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v5 02/24] ARM: common: edma: Remove unused functions

2015-10-14 Thread Peter Ujfalusi

We no longer have users for these functions so they can be removed.
Remove also unused enums from the header file.

Signed-off-by: Peter Ujfalusi 
---
 arch/arm/common/edma.c | 376 -
 include/linux/platform_data/edma.h |  33 
 2 files changed, 409 deletions(-)

diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
index 56fc339571f9..e9c4cb16a47e 100644
--- a/arch/arm/common/edma.c
+++ b/arch/arm/common/edma.c
@@ -510,62 +510,6 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data)
return IRQ_HANDLED;
 }
 
-static int reserve_contiguous_slots(int ctlr, unsigned int id,
-unsigned int num_slots,
-unsigned int start_slot)
-{
-   int i, j;
-   unsigned int count = num_slots;
-   int stop_slot = start_slot;
-   DECLARE_BITMAP(tmp_inuse, EDMA_MAX_PARAMENTRY);
-
-   for (i = start_slot; i < edma_cc[ctlr]->num_slots; ++i) {
-   j = EDMA_CHAN_SLOT(i);
-   if (!test_and_set_bit(j, edma_cc[ctlr]->edma_inuse)) {
-   /* Record our current beginning slot */
-   if (count == num_slots)
-   stop_slot = i;
-
-   count--;
-   set_bit(j, tmp_inuse);
-
-   if (count == 0)
-   break;
-   } else {
-   clear_bit(j, tmp_inuse);
-
-   if (id == EDMA_CONT_PARAMS_FIXED_EXACT) {
-   stop_slot = i;
-   break;
-   } else {
-   count = num_slots;
-   }
-   }
-   }
-
-   /*
-* We have to clear any bits that we set
-* if we run out parameter RAM slots, i.e we do find a set
-* of contiguous parameter RAM slots but do not find the exact number
-* requested as we may reach the total number of parameter RAM slots
-*/
-   if (i == edma_cc[ctlr]->num_slots)
-   stop_slot = i;
-
-   j = start_slot;
-   for_each_set_bit_from(j, tmp_inuse, stop_slot)
-   clear_bit(j, edma_cc[ctlr]->edma_inuse);
-
-   if (count)
-   return -EBUSY;
-
-   for (j = i - num_slots + 1; j <= i; ++j)
-   memcpy_toio(edmacc_regs_base[ctlr] + PARM_OFFSET(j),
-   _paramset, PARM_SIZE);
-
-   return EDMA_CTLR_CHAN(ctlr, i - num_slots + 1);
-}
-
 static int prepare_unused_channel_list(struct device *dev, void *data)
 {
struct platform_device *pdev = to_platform_device(dev);
@@ -818,186 +762,11 @@ void edma_free_slot(unsigned slot)
 }
 EXPORT_SYMBOL(edma_free_slot);
 
-
-/**
- * edma_alloc_cont_slots- alloc contiguous parameter RAM slots
- * The API will return the starting point of a set of
- * contiguous parameter RAM slots that have been requested
- *
- * @id: can only be EDMA_CONT_PARAMS_ANY or EDMA_CONT_PARAMS_FIXED_EXACT
- * or EDMA_CONT_PARAMS_FIXED_NOT_EXACT
- * @count: number of contiguous Paramter RAM slots
- * @slot  - the start value of Parameter RAM slot that should be passed if id
- * is EDMA_CONT_PARAMS_FIXED_EXACT or EDMA_CONT_PARAMS_FIXED_NOT_EXACT
- *
- * If id is EDMA_CONT_PARAMS_ANY then the API starts looking for a set of
- * contiguous Parameter RAM slots from parameter RAM 64 in the case of
- * DaVinci SOCs and 32 in the case of DA8xx SOCs.
- *
- * If id is EDMA_CONT_PARAMS_FIXED_EXACT then the API starts looking for a
- * set of contiguous parameter RAM slots from the "slot" that is passed as an
- * argument to the API.
- *
- * If id is EDMA_CONT_PARAMS_FIXED_NOT_EXACT then the API initially tries
- * starts looking for a set of contiguous parameter RAMs from the "slot"
- * that is passed as an argument to the API. On failure the API will try to
- * find a set of contiguous Parameter RAM slots from the remaining Parameter
- * RAM slots
- */
-int edma_alloc_cont_slots(unsigned ctlr, unsigned int id, int slot, int count)
-{
-   /*
-* The start slot requested should be greater than
-* the number of channels and lesser than the total number
-* of slots
-*/
-   if ((id != EDMA_CONT_PARAMS_ANY) &&
-   (slot < edma_cc[ctlr]->num_channels ||
-   slot >= edma_cc[ctlr]->num_slots))
-   return -EINVAL;
-
-   /*
-* The number of parameter RAM slots requested cannot be less than 1
-* and cannot be more than the number of slots minus the number of
-* channels
-*/
-   if (count < 1 || count >
-   (edma_cc[ctlr]->num_slots - edma_cc[ctlr]->num_channels))
-   return -EINVAL;
-
-   switch (id) {
-   case EDMA_CONT_PARAMS_ANY:
-   return reserve_contiguous_slots(ctlr, id, count,
-

[PATCH v5 05/24] ARM/dmaengine: edma: Move of_dma_controller_register to the dmaengine driver

2015-10-14 Thread Peter Ujfalusi

If the of_dma_controller is registered in the non dmaengine driver we could
have race condition:
the of_dma_controller has been registered, but the dmaengine driver is not
yet probed. Drivers requesting DMA channels during this window will fail
since we do not yet have dmaengine drivers registered.

Signed-off-by: Peter Ujfalusi 
---
 arch/arm/common/edma.c | 10 --
 drivers/dma/edma.c | 16 
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
index 7c2fe527e53b..d82fceda13a3 100644
--- a/arch/arm/common/edma.c
+++ b/arch/arm/common/edma.c
@@ -29,7 +29,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 
@@ -1191,10 +1190,6 @@ static int edma_of_parse_dt(struct device *dev,
return ret;
 }
 
-static struct of_dma_filter_info edma_filter_info = {
-   .filter_fn = edma_filter_fn,
-};
-
 static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev,
  struct device_node *node)
 {
@@ -1209,11 +1204,6 @@ static struct edma_soc_info 
*edma_setup_info_from_dt(struct device *dev,
if (ret)
return ERR_PTR(ret);
 
-   dma_cap_set(DMA_SLAVE, edma_filter_info.dma_cap);
-   dma_cap_set(DMA_CYCLIC, edma_filter_info.dma_cap);
-   of_dma_controller_register(dev->of_node, of_dma_simple_xlate,
-  _filter_info);
-
return info;
 }
 #else
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index 19fa49d6f555..fcb4680efed7 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -25,6 +25,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -987,9 +988,14 @@ static void edma_dma_init(struct edma_cc *ecc, struct 
dma_device *dma,
INIT_LIST_HEAD(>channels);
 }
 
+static struct of_dma_filter_info edma_filter_info = {
+   .filter_fn = edma_filter_fn,
+};
+
 static int edma_probe(struct platform_device *pdev)
 {
struct edma_cc *ecc;
+   struct device_node *parent_node = pdev->dev.parent->of_node;
int ret;
 
ret = dma_set_mask_and_coherent(>dev, DMA_BIT_MASK(32));
@@ -1024,6 +1030,13 @@ static int edma_probe(struct platform_device *pdev)
 
platform_set_drvdata(pdev, ecc);
 
+   if (parent_node) {
+   dma_cap_set(DMA_SLAVE, edma_filter_info.dma_cap);
+   dma_cap_set(DMA_CYCLIC, edma_filter_info.dma_cap);
+   of_dma_controller_register(parent_node, of_dma_simple_xlate,
+  _filter_info);
+   }
+
dev_info(>dev, "TI EDMA DMA engine driver\n");
 
return 0;
@@ -1037,7 +1050,10 @@ static int edma_remove(struct platform_device *pdev)
 {
struct device *dev = >dev;
struct edma_cc *ecc = dev_get_drvdata(dev);
+   struct device_node *parent_node = pdev->dev.parent->of_node;
 
+   if (parent_node)
+   of_dma_controller_free(parent_node);
dma_async_device_unregister(>dma_slave);
edma_free_slot(ecc->dummy_slot);
 
-- 
2.6.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 5/5] ARM: OMAP2+: Update gpmc and nand DT binding documentation

2015-10-14 Thread Roger Quadros

On 13/10/15 04:38, Franklin S Cooper Jr wrote:
> Add additional details to the gpmc and nand documentation to clarify
> what is needed to enable nand dma prefetch.
> 
> Signed-off-by: Franklin S Cooper Jr 
> ---
>  Documentation/devicetree/bindings/memory-controllers/omap-gpmc.txt | 7 
> ++-
>  Documentation/devicetree/bindings/mtd/gpmc-nand.txt| 2 ++
>  2 files changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git 
> a/Documentation/devicetree/bindings/memory-controllers/omap-gpmc.txt 
> b/Documentation/devicetree/bindings/memory-controllers/omap-gpmc.txt
> index 704be93..b1e2802 100644
> --- a/Documentation/devicetree/bindings/memory-controllers/omap-gpmc.txt
> +++ b/Documentation/devicetree/bindings/memory-controllers/omap-gpmc.txt
> @@ -33,6 +33,10 @@ Required properties:
>   As this will change in the future, filling correct
>   values here is a requirement.
>  
> +GPMC DMA information. Required only when GPMC nand prefetch is enabled.
> + - dmas  GPMC nand prefetch dma channel

s/nand/NAND

> + - dma-names DMA channel name use as a reference within the Nand 
> driver

s/Nand/NAND

This is inevitably going to be "rxtx". So why not say that it should be "rxtx"

Should these bindings go in bindings/mtd/gpmc-nand.txt instead?

> +
>  Timing properties for child nodes. All are optional and default to 0.
>  
>   - gpmc,sync-clk-ps: Minimum clock period for synchronous mode, in 
> picoseconds
> @@ -119,7 +123,8 @@ Example for an AM33xx board:
>   ti,hwmods = "gpmc";
>   reg = <0x5000 0x2000>;
>   interrupts = <100>;
> -
> + dmas = < 52>;
> + dma-names = "rxtx";

Why not define these in the NAND node instead of gpmc node?

>   gpmc,num-cs = <8>;
>   gpmc,num-waitpins = <2>;
>   #address-cells = <2>;
> diff --git a/Documentation/devicetree/bindings/mtd/gpmc-nand.txt 
> b/Documentation/devicetree/bindings/mtd/gpmc-nand.txt
> index 253e6de..4b0c240 100644
> --- a/Documentation/devicetree/bindings/mtd/gpmc-nand.txt
> +++ b/Documentation/devicetree/bindings/mtd/gpmc-nand.txt
> @@ -61,6 +61,8 @@ Example for an AM33xx board:
>   ti,hwmods = "gpmc";
>   reg = <0x5000 0x36c>;
>   interrupts = <100>;
> + dmas = < 52>;
> + dma-names = "rxtx";
>   gpmc,num-cs = <8>;
>   gpmc,num-waitpins = <2>;
>   #address-cells = <2>;
> 

cheers,
-roger
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v5 08/24] ARM/dmaengine: edma: Remove limitation on the number of eDMA controllers

2015-10-14 Thread Peter Ujfalusi

Since the driver stack no longer depends on lookup with id number in a
global array of pointers, the limitation for the number of eDMAs are no
longer needed. We can handle as many eDMAs in legacy and DT boot as we have
memory for them to allocate the needed structures.

Signed-off-by: Peter Ujfalusi 
---
 arch/arm/common/edma.c | 22 +-
 drivers/dma/edma.c | 17 -
 2 files changed, 13 insertions(+), 26 deletions(-)

diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
index 03692520812a..5b747f1bc8b5 100644
--- a/arch/arm/common/edma.c
+++ b/arch/arm/common/edma.c
@@ -1227,24 +1227,7 @@ static int edma_probe(struct platform_device *pdev)
.parent = >dev,
};
 
-   /* When booting with DT the pdev->id is -1 */
-   if (dev_id < 0)
-   dev_id = arch_num_cc;
-
-   if (dev_id >= EDMA_MAX_CC) {
-   dev_err(dev,
-   "eDMA3 with device id 0 and 1 is supported (id: %d)\n",
-   dev_id);
-   return -EINVAL;
-   }
-
if (node) {
-   /* Check if this is a second instance registered */
-   if (arch_num_cc) {
-   dev_err(dev, "only one EDMA instance is supported via 
DT\n");
-   return -ENODEV;
-   }
-
info = edma_setup_info_from_dt(dev, node);
if (IS_ERR(info)) {
dev_err(dev, "failed to get DT data\n");
@@ -1278,6 +1261,11 @@ static int edma_probe(struct platform_device *pdev)
 
cc->dev = dev;
cc->id = dev_id;
+   /* When booting with DT the pdev->id is -1 */
+   if (dev_id < 0) {
+   cc->id = 0;
+   dev_id = arch_num_cc;
+   }
dev_set_drvdata(dev, cc);
 
cc->base = devm_ioremap_resource(dev, mem);
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index 53d48b2a700d..fc91ab9dd1bb 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -991,14 +991,12 @@ static void edma_dma_init(struct edma_cc *ecc, struct 
dma_device *dma,
INIT_LIST_HEAD(>channels);
 }
 
-static struct of_dma_filter_info edma_filter_info = {
-   .filter_fn = edma_filter_fn,
-};
-
 static int edma_probe(struct platform_device *pdev)
 {
struct edma_cc *ecc;
struct device_node *parent_node = pdev->dev.parent->of_node;
+   struct platform_device *parent_pdev =
+   to_platform_device(pdev->dev.parent);
int ret;
 
ret = dma_set_mask_and_coherent(>dev, DMA_BIT_MASK(32));
@@ -1015,7 +1013,10 @@ static int edma_probe(struct platform_device *pdev)
if (!ecc->cc)
return -ENODEV;
 
-   ecc->ctlr = pdev->id;
+   ecc->ctlr = parent_pdev->id;
+   if (ecc->ctlr < 0)
+   ecc->ctlr = 0;
+
ecc->dummy_slot = edma_alloc_slot(ecc->cc, EDMA_SLOT_ANY);
if (ecc->dummy_slot < 0) {
dev_err(>dev, "Can't allocate PaRAM dummy slot\n");
@@ -1038,10 +1039,8 @@ static int edma_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, ecc);
 
if (parent_node) {
-   dma_cap_set(DMA_SLAVE, edma_filter_info.dma_cap);
-   dma_cap_set(DMA_CYCLIC, edma_filter_info.dma_cap);
-   of_dma_controller_register(parent_node, of_dma_simple_xlate,
-  _filter_info);
+   of_dma_controller_register(parent_node, of_dma_xlate_by_chan_id,
+  >dma_slave);
}
 
dev_info(>dev, "TI EDMA DMA engine driver\n");
-- 
2.6.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v5 10/24] ARM: davinci: Add dma_mask to eDMA devices

2015-10-14 Thread Peter Ujfalusi

The upcoming change to merge the arch/arm/common/edma.c into
drivers/dma/edma.c will need this change when booting daVinci devices in
no DT mode.

Signed-off-by: Peter Ujfalusi 
Acked-by: Sekhar Nori 
---
 arch/arm/mach-davinci/devices-da8xx.c | 2 ++
 arch/arm/mach-davinci/dm355.c | 1 +
 arch/arm/mach-davinci/dm644x.c| 1 +
 arch/arm/mach-davinci/dm646x.c| 1 +
 4 files changed, 5 insertions(+)

diff --git a/arch/arm/mach-davinci/devices-da8xx.c 
b/arch/arm/mach-davinci/devices-da8xx.c
index 9f7d266faa0c..28c90bc372bd 100644
--- a/arch/arm/mach-davinci/devices-da8xx.c
+++ b/arch/arm/mach-davinci/devices-da8xx.c
@@ -216,6 +216,7 @@ static struct resource da850_edma1_resources[] = {
 static const struct platform_device_info da8xx_edma0_device __initconst = {
.name   = "edma",
.id = 0,
+   .dma_mask   = DMA_BIT_MASK(32),
.res= da8xx_edma0_resources,
.num_res= ARRAY_SIZE(da8xx_edma0_resources),
.data   = _edma0_pdata,
@@ -225,6 +226,7 @@ static const struct platform_device_info da8xx_edma0_device 
__initconst = {
 static const struct platform_device_info da850_edma1_device __initconst = {
.name   = "edma",
.id = 1,
+   .dma_mask   = DMA_BIT_MASK(32),
.res= da850_edma1_resources,
.num_res= ARRAY_SIZE(da850_edma1_resources),
.data   = _edma1_pdata,
diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c
index 5f10c6695e31..609950b8c191 100644
--- a/arch/arm/mach-davinci/dm355.c
+++ b/arch/arm/mach-davinci/dm355.c
@@ -616,6 +616,7 @@ static struct resource edma_resources[] = {
 static const struct platform_device_info dm355_edma_device __initconst = {
.name   = "edma",
.id = 0,
+   .dma_mask   = DMA_BIT_MASK(32),
.res= edma_resources,
.num_res= ARRAY_SIZE(edma_resources),
.data   = _edma_pdata,
diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c
index aa3453b40d5f..d38f5049d56e 100644
--- a/arch/arm/mach-davinci/dm644x.c
+++ b/arch/arm/mach-davinci/dm644x.c
@@ -545,6 +545,7 @@ static struct resource edma_resources[] = {
 static const struct platform_device_info dm644x_edma_device __initconst = {
.name   = "edma",
.id = 0,
+   .dma_mask   = DMA_BIT_MASK(32),
.res= edma_resources,
.num_res= ARRAY_SIZE(edma_resources),
.data   = _edma_pdata,
diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c
index 79c1d8917dd3..70eb42725eec 100644
--- a/arch/arm/mach-davinci/dm646x.c
+++ b/arch/arm/mach-davinci/dm646x.c
@@ -592,6 +592,7 @@ static struct resource edma_resources[] = {
 static const struct platform_device_info dm646x_edma_device __initconst = {
.name   = "edma",
.id = 0,
+   .dma_mask   = DMA_BIT_MASK(32),
.res= edma_resources,
.num_res= ARRAY_SIZE(edma_resources),
.data   = _edma_pdata,
-- 
2.6.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v5 12/24] dmaengine: edma: Allocate memory dynamically for bitmaps and structures

2015-10-14 Thread Peter Ujfalusi

Instead of using defines to specify the size of different arrays and
bitmaps, allocate the memory for them based on the information we get from
the HW itself.
Since these defines are set based on the worst case, there are devices
where they are not valid.

Signed-off-by: Peter Ujfalusi 
---
 drivers/dma/edma.c | 62 ++
 1 file changed, 34 insertions(+), 28 deletions(-)

diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index aeb67e0cc523..d5a76c67f83f 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -113,23 +113,6 @@
 #define CHMAP_EXISTBIT(24)
 
 /*
- * This will go away when the private EDMA API is folded
- * into this driver and the platform device(s) are
- * instantiated in the arch code. We can only get away
- * with this simplification because DA8XX may not be built
- * in the same kernel image with other DaVinci parts. This
- * avoids having to sprinkle dmaengine driver platform devices
- * and data throughout all the existing board files.
- */
-#ifdef CONFIG_ARCH_DAVINCI_DA8XX
-#define EDMA_CTLRS 2
-#define EDMA_CHANS 32
-#else
-#define EDMA_CTLRS 1
-#define EDMA_CHANS 64
-#endif /* CONFIG_ARCH_DAVINCI_DA8XX */
-
-/*
  * Max of 20 segments per channel to conserve PaRAM slots
  * Also note that MAX_NR_SG should be atleast the no.of periods
  * that are required for ASoC, otherwise DMA prep calls will
@@ -140,16 +123,12 @@
 #define EDMA_MAX_SLOTS MAX_NR_SG
 #define EDMA_DESCRIPTORS   16
 
-#define EDMA_MAX_PARAMENTRY 512
-
 #define EDMA_CHANNEL_ANY   -1  /* for edma_alloc_channel() */
 #define EDMA_SLOT_ANY  -1  /* for edma_alloc_slot() */
 #define EDMA_CONT_PARAMS_ANY1001
 #define EDMA_CONT_PARAMS_FIXED_EXACT1002
 #define EDMA_CONT_PARAMS_FIXED_NOT_EXACT 1003
 
-#define EDMA_MAX_CC   2
-
 /* PaRAM slots are laid out like this */
 struct edmacc_param {
u32 opt;
@@ -256,22 +235,22 @@ struct edma_cc {
/* The edma_inuse bit for each PaRAM slot is clear unless the
 * channel is in use ... by ARM or DSP, for QDMA, or whatever.
 */
-   DECLARE_BITMAP(edma_inuse, EDMA_MAX_PARAMENTRY);
+   unsigned long *edma_inuse;
 
/* The edma_unused bit for each channel is clear unless
 * it is not being used on this platform. It uses a bit
 * of SOC-specific initialization code.
 */
-   DECLARE_BITMAP(edma_unused, EDMA_CHANS);
+   unsigned long *edma_unused;
 
struct dma_interrupt_data {
void (*callback)(unsigned channel, unsigned short ch_status,
 void *data);
void *data;
-   } intr_data[EDMA_CHANS];
+   } *intr_data;
 
struct dma_device   dma_slave;
-   struct edma_chanslave_chans[EDMA_CHANS];
+   struct edma_chan*slave_chans;
int dummy_slot;
 };
 
@@ -457,6 +436,8 @@ static int prepare_unused_channel_list(struct device *dev, 
void *data)
 {
struct platform_device *pdev = to_platform_device(dev);
struct edma_cc *ecc = data;
+   int dma_req_min = EDMA_CTLR_CHAN(ecc->id, 0);
+   int dma_req_max = dma_req_min + ecc->num_channels;
int i, count;
struct of_phandle_args  dma_spec;
 
@@ -491,11 +472,15 @@ static int prepare_unused_channel_list(struct device 
*dev, void *data)
/* For non-OF case */
for (i = 0; i < pdev->num_resources; i++) {
struct resource *res = >resource[i];
+   int dma_req;
+
+   if (!(res->flags & IORESOURCE_DMA))
+   continue;
 
-   if ((res->flags & IORESOURCE_DMA) && (int)res->start >= 0) {
+   dma_req = (int)res->start;
+   if (dma_req >= dma_req_min && dma_req < dma_req_max)
clear_bit(EDMA_CHAN_SLOT(pdev->resource[i].start),
  ecc->edma_unused);
-   }
}
 
return 0;
@@ -1978,7 +1963,7 @@ static void __init edma_chan_init(struct edma_cc *ecc, 
struct dma_device *dma,
 {
int i, j;
 
-   for (i = 0; i < EDMA_CHANS; i++) {
+   for (i = 0; i < ecc->num_channels; i++) {
struct edma_chan *echan = [i];
echan->ch_num = EDMA_CTLR_CHAN(ecc->id, i);
echan->ecc = ecc;
@@ -2247,6 +2232,27 @@ static int edma_probe(struct platform_device *pdev)
if (ret)
return ret;
 
+   /* Allocate memory based on the information we got from the IP */
+   ecc->slave_chans = devm_kcalloc(dev, ecc->num_channels,
+   sizeof(*ecc->slave_chans), GFP_KERNEL);
+   if (!ecc->slave_chans)
+   return -ENOMEM;
+
+   ecc->intr_data = devm_kcalloc(dev, ecc->num_channels,
+ sizeof(*ecc->intr_data),

[PATCH v5 07/24] ARM/dmaengine: edma: Public API to use private struct pointer

2015-10-14 Thread Peter Ujfalusi

Instead of relying on indexes pointing to edma private date in the global
pointer array, pass the private data pointer via the public API.

Signed-off-by: Peter Ujfalusi 
---
 arch/arm/common/edma.c | 305 ++---
 drivers/dma/edma.c |  79 +-
 include/linux/platform_data/edma.h |  38 +++--
 3 files changed, 214 insertions(+), 208 deletions(-)

diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
index 0b4c0ee59ed9..03692520812a 100644
--- a/arch/arm/common/edma.c
+++ b/arch/arm/common/edma.c
@@ -130,7 +130,7 @@ struct edma {
 
struct edma_soc_info *info;
int id;
-
+   boolunused_chan_list_done;
/* The edma_inuse bit for each PaRAM slot is clear unless the
 * channel is in use ... by ARM or DSP, for QDMA, or whatever.
 */
@@ -264,7 +264,6 @@ static inline void clear_bits(int offset, int len, unsigned 
long *p)
 }
 
 /*/
-static struct edma *edma_cc[EDMA_MAX_CC];
 static int arch_num_cc;
 
 /* dummy param set used to (re)initialize parameter RAM slots */
@@ -490,14 +489,18 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data)
 static int prepare_unused_channel_list(struct device *dev, void *data)
 {
struct platform_device *pdev = to_platform_device(dev);
-   int i, count, ctlr;
+   struct edma *cc = data;
+   int i, count;
struct of_phandle_args  dma_spec;
 
if (dev->of_node) {
+   struct platform_device *dma_pdev;
+
count = of_property_count_strings(dev->of_node, "dma-names");
if (count < 0)
return 0;
for (i = 0; i < count; i++) {
+
if (of_parse_phandle_with_args(dev->of_node, "dmas",
   "#dma-cells", i,
   _spec))
@@ -508,8 +511,12 @@ static int prepare_unused_channel_list(struct device *dev, 
void *data)
continue;
}
 
+   dma_pdev = of_find_device_by_node(dma_spec.np);
+   if (_pdev->dev != cc->dev)
+   continue;
+
clear_bit(EDMA_CHAN_SLOT(dma_spec.args[0]),
- edma_cc[0]->edma_unused);
+ cc->edma_unused);
of_node_put(dma_spec.np);
}
return 0;
@@ -517,11 +524,11 @@ static int prepare_unused_channel_list(struct device 
*dev, void *data)
 
/* For non-OF case */
for (i = 0; i < pdev->num_resources; i++) {
-   if ((pdev->resource[i].flags & IORESOURCE_DMA) &&
-   (int)pdev->resource[i].start >= 0) {
-   ctlr = EDMA_CTLR(pdev->resource[i].start);
+   struct resource *res = >resource[i];
+
+   if ((res->flags & IORESOURCE_DMA) && (int)res->start >= 0) {
clear_bit(EDMA_CHAN_SLOT(pdev->resource[i].start),
- edma_cc[ctlr]->edma_unused);
+ cc->edma_unused);
}
}
 
@@ -530,8 +537,6 @@ static int prepare_unused_channel_list(struct device *dev, 
void *data)
 
 /*---*/
 
-static bool unused_chan_list_done;
-
 /* Resource alloc/free:  dma channels, parameter RAM slots */
 
 /**
@@ -564,77 +569,73 @@ static bool unused_chan_list_done;
  *
  * Returns the number of the channel, else negative errno.
  */
-int edma_alloc_channel(int channel,
+int edma_alloc_channel(struct edma *cc, int channel,
void (*callback)(unsigned channel, u16 ch_status, void *data),
void *data,
enum dma_event_q eventq_no)
 {
-   unsigned i, done = 0, ctlr = 0;
+   unsigned done = 0;
int ret = 0;
 
-   if (!unused_chan_list_done) {
+   if (!cc->unused_chan_list_done) {
/*
 * Scan all the platform devices to find out the EDMA channels
 * used and clear them in the unused list, making the rest
 * available for ARM usage.
 */
-   ret = bus_for_each_dev(_bus_type, NULL, NULL,
-   prepare_unused_channel_list);
+   ret = bus_for_each_dev(_bus_type, NULL, cc,
+  prepare_unused_channel_list);
if (ret < 0)
return ret;
 
-   unused_chan_list_done = true;
+   cc->unused_chan_list_done = true;
}
 
if (channel >= 0) {
-   ctlr = EDMA_CTLR(channel);
+   if (cc->id != EDMA_CTLR(channel)) {
+   dev_err(cc->dev,

[PATCH v5 14/24] dmaengine: edma: Cleanup regarding the use of dev around the code

2015-10-14 Thread Peter Ujfalusi

Be consistent and do not mix the use of dev, >dev, etc in the
functions.

Signed-off-by: Peter Ujfalusi 
---
 drivers/dma/edma.c | 61 +++---
 1 file changed, 30 insertions(+), 31 deletions(-)

diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index 95c10373168d..a9fe5c92451d 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -1198,27 +1198,27 @@ static void edma_execute(struct edma_chan *echan)
j = i + edesc->processed;
edma_write_slot(ecc, echan->slot[i], >pset[j].param);
edesc->sg_len += edesc->pset[j].len;
-   dev_vdbg(echan->vchan.chan.device->dev,
-   "\n pset[%d]:\n"
-   "  chnum\t%d\n"
-   "  slot\t%d\n"
-   "  opt\t%08x\n"
-   "  src\t%08x\n"
-   "  dst\t%08x\n"
-   "  abcnt\t%08x\n"
-   "  ccnt\t%08x\n"
-   "  bidx\t%08x\n"
-   "  cidx\t%08x\n"
-   "  lkrld\t%08x\n",
-   j, echan->ch_num, echan->slot[i],
-   edesc->pset[j].param.opt,
-   edesc->pset[j].param.src,
-   edesc->pset[j].param.dst,
-   edesc->pset[j].param.a_b_cnt,
-   edesc->pset[j].param.ccnt,
-   edesc->pset[j].param.src_dst_bidx,
-   edesc->pset[j].param.src_dst_cidx,
-   edesc->pset[j].param.link_bcntrld);
+   dev_vdbg(dev,
+"\n pset[%d]:\n"
+"  chnum\t%d\n"
+"  slot\t%d\n"
+"  opt\t%08x\n"
+"  src\t%08x\n"
+"  dst\t%08x\n"
+"  abcnt\t%08x\n"
+"  ccnt\t%08x\n"
+"  bidx\t%08x\n"
+"  cidx\t%08x\n"
+"  lkrld\t%08x\n",
+j, echan->ch_num, echan->slot[i],
+edesc->pset[j].param.opt,
+edesc->pset[j].param.src,
+edesc->pset[j].param.dst,
+edesc->pset[j].param.a_b_cnt,
+edesc->pset[j].param.ccnt,
+edesc->pset[j].param.src_dst_bidx,
+edesc->pset[j].param.src_dst_cidx,
+edesc->pset[j].param.link_bcntrld);
/* Link to the previous slot if not the last set */
if (i != (nslots - 1))
edma_link(ecc, echan->slot[i], echan->slot[i + 1]);
@@ -1849,7 +1849,6 @@ err_no_chan:
 static void edma_free_chan_resources(struct dma_chan *chan)
 {
struct edma_chan *echan = to_edma_chan(chan);
-   struct device *dev = chan->device->dev;
int i;
 
/* Terminate transfers */
@@ -1871,7 +1870,7 @@ static void edma_free_chan_resources(struct dma_chan 
*chan)
echan->alloced = false;
}
 
-   dev_dbg(dev, "freeing channel for %u\n", echan->ch_num);
+   dev_dbg(chan->device->dev, "freeing channel for %u\n", echan->ch_num);
 }
 
 /* Send pending descriptor to hardware */
@@ -2196,13 +2195,13 @@ static int edma_probe(struct platform_device *pdev)
return ret;
}
 
-   ret = dma_set_mask_and_coherent(>dev, DMA_BIT_MASK(32));
+   ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
if (ret)
return ret;
 
-   ecc = devm_kzalloc(>dev, sizeof(*ecc), GFP_KERNEL);
+   ecc = devm_kzalloc(dev, sizeof(*ecc), GFP_KERNEL);
if (!ecc) {
-   dev_err(>dev, "Can't allocate controller\n");
+   dev_err(dev, "Can't allocate controller\n");
return -ENOMEM;
}
 
@@ -2345,7 +2344,7 @@ static int edma_probe(struct platform_device *pdev)
 
ecc->dummy_slot = edma_alloc_slot(ecc, EDMA_SLOT_ANY);
if (ecc->dummy_slot < 0) {
-   dev_err(>dev, "Can't allocate PaRAM dummy slot\n");
+   dev_err(dev, "Can't allocate PaRAM dummy slot\n");
return ecc->dummy_slot;
}
 
@@ -2354,7 +2353,7 @@ static int edma_probe(struct platform_device *pdev)
dma_cap_set(DMA_CYCLIC, ecc->dma_slave.cap_mask);
dma_cap_set(DMA_MEMCPY, ecc->dma_slave.cap_mask);
 
-   edma_dma_init(ecc, >dma_slave, >dev);
+   edma_dma_init(ecc, >dma_slave, dev);
 
edma_chan_init(ecc, >dma_slave, ecc->slave_chans);
 
@@ -2366,7 +2365,7 @@ static int edma_probe(struct platform_device *pdev)
of_dma_controller_register(node, of_dma_xlate_by_chan_id,
   >dma_slave);
 
-   dev_info(>dev, "TI EDMA DMA engine driver\n");
+   dev_info(dev, "TI EDMA DMA engine driver\n");

[PATCH v5 06/24] ARM: common: edma: Internal API to use pointer to 'struct edma'

2015-10-14 Thread Peter Ujfalusi

Merge the iomem into the 'struct edma' and change the internal (static)
functions to use pointer to the edma_cc instead of the ctlr number.

Signed-off-by: Peter Ujfalusi 
---
 arch/arm/common/edma.c | 400 -
 1 file changed, 197 insertions(+), 203 deletions(-)

diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
index d82fceda13a3..0b4c0ee59ed9 100644
--- a/arch/arm/common/edma.c
+++ b/arch/arm/common/edma.c
@@ -114,108 +114,141 @@
 #define EDMA_MAX_PARAMENTRY 512
 
 /*/
+struct edma {
+   struct device   *dev;
+   void __iomem *base;
+
+   /* how many dma resources of each type */
+   unsignednum_channels;
+   unsignednum_region;
+   unsignednum_slots;
+   unsignednum_tc;
+   enum dma_event_qdefault_queue;
 
-static void __iomem *edmacc_regs_base[EDMA_MAX_CC];
+   /* list of channels with no even trigger; terminated by "-1" */
+   const s8*noevent;
+
+   struct edma_soc_info *info;
+   int id;
+
+   /* The edma_inuse bit for each PaRAM slot is clear unless the
+* channel is in use ... by ARM or DSP, for QDMA, or whatever.
+*/
+   DECLARE_BITMAP(edma_inuse, EDMA_MAX_PARAMENTRY);
 
-static inline unsigned int edma_read(unsigned ctlr, int offset)
+   /* The edma_unused bit for each channel is clear unless
+* it is not being used on this platform. It uses a bit
+* of SOC-specific initialization code.
+*/
+   DECLARE_BITMAP(edma_unused, EDMA_MAX_DMACH);
+
+   struct dma_interrupt_data {
+   void (*callback)(unsigned channel, unsigned short ch_status,
+   void *data);
+   void *data;
+   } intr_data[EDMA_MAX_DMACH];
+};
+/*/
+
+static inline unsigned int edma_read(struct edma *cc, int offset)
 {
-   return (unsigned int)__raw_readl(edmacc_regs_base[ctlr] + offset);
+   return (unsigned int)__raw_readl(cc->base + offset);
 }
 
-static inline void edma_write(unsigned ctlr, int offset, int val)
+static inline void edma_write(struct edma *cc, int offset, int val)
 {
-   __raw_writel(val, edmacc_regs_base[ctlr] + offset);
+   __raw_writel(val, cc->base + offset);
 }
-static inline void edma_modify(unsigned ctlr, int offset, unsigned and,
-   unsigned or)
+static inline void edma_modify(struct edma *cc, int offset, unsigned and,
+  unsigned or)
 {
-   unsigned val = edma_read(ctlr, offset);
+   unsigned val = edma_read(cc, offset);
val &= and;
val |= or;
-   edma_write(ctlr, offset, val);
+   edma_write(cc, offset, val);
 }
-static inline void edma_and(unsigned ctlr, int offset, unsigned and)
+static inline void edma_and(struct edma *cc, int offset, unsigned and)
 {
-   unsigned val = edma_read(ctlr, offset);
+   unsigned val = edma_read(cc, offset);
val &= and;
-   edma_write(ctlr, offset, val);
+   edma_write(cc, offset, val);
 }
-static inline void edma_or(unsigned ctlr, int offset, unsigned or)
+static inline void edma_or(struct edma *cc, int offset, unsigned or)
 {
-   unsigned val = edma_read(ctlr, offset);
+   unsigned val = edma_read(cc, offset);
val |= or;
-   edma_write(ctlr, offset, val);
+   edma_write(cc, offset, val);
 }
-static inline unsigned int edma_read_array(unsigned ctlr, int offset, int i)
+static inline unsigned int edma_read_array(struct edma *cc, int offset, int i)
 {
-   return edma_read(ctlr, offset + (i << 2));
+   return edma_read(cc, offset + (i << 2));
 }
-static inline void edma_write_array(unsigned ctlr, int offset, int i,
+static inline void edma_write_array(struct edma *cc, int offset, int i,
unsigned val)
 {
-   edma_write(ctlr, offset + (i << 2), val);
+   edma_write(cc, offset + (i << 2), val);
 }
-static inline void edma_modify_array(unsigned ctlr, int offset, int i,
+static inline void edma_modify_array(struct edma *cc, int offset, int i,
unsigned and, unsigned or)
 {
-   edma_modify(ctlr, offset + (i << 2), and, or);
+   edma_modify(cc, offset + (i << 2), and, or);
 }
-static inline void edma_or_array(unsigned ctlr, int offset, int i, unsigned or)
+static inline void edma_or_array(struct edma *cc, int offset, int i, unsigned 
or)
 {
-   edma_or(ctlr, offset + (i << 2), or);
+   edma_or(cc, offset + (i << 2), or);
 }
-static inline void edma_or_array2(unsigned ctlr, int offset, int i, int j,
+static inline void edma_or_array2(struct edma *cc, int offset, int i, int j,
unsigned or)
 {
-   edma_or(ctlr, offset + ((i*2 + j) << 2), or);
+   edma_or(cc, offset + ((i*2 + j) << 2), or);
 }
-static inline void edma_write_array2(unsigned

[PATCH v5 18/24] dmaengine: edma: Consolidate the comments for functions

2015-10-14 Thread Peter Ujfalusi

Remove or rewrite the comments for the internal functions.

Signed-off-by: Peter Ujfalusi 
---
 drivers/dma/edma.c | 86 +++---
 1 file changed, 11 insertions(+), 75 deletions(-)

diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index d33ae0b43925..6bcbdceb3dc2 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -508,19 +508,7 @@ static void edma_setup_interrupt(struct edma_cc *ecc, 
unsigned lch,
 }
 
 /*
- * paRAM management functions
- */
-
-/**
- * edma_write_slot - write parameter RAM data for slot
- * @ecc: pointer to edma_cc struct
- * @slot: number of parameter RAM slot being modified
- * @param: data to be written into parameter RAM slot
- *
- * Use this to assign all parameters of a transfer at once.  This
- * allows more efficient setup of transfers than issuing multiple
- * calls to set up those parameters in small pieces, and provides
- * complete control over all transfer options.
+ * paRAM slot management functions
  */
 static void edma_write_slot(struct edma_cc *ecc, unsigned slot,
const struct edmacc_param *param)
@@ -531,15 +519,6 @@ static void edma_write_slot(struct edma_cc *ecc, unsigned 
slot,
memcpy_toio(ecc->base + PARM_OFFSET(slot), param, PARM_SIZE);
 }
 
-/**
- * edma_read_slot - read parameter RAM data from slot
- * @ecc: pointer to edma_cc struct
- * @slot: number of parameter RAM slot being copied
- * @param: where to store copy of parameter RAM data
- *
- * Use this to read data from a parameter RAM slot, perhaps to
- * save them as a template for later reuse.
- */
 static void edma_read_slot(struct edma_cc *ecc, unsigned slot,
   struct edmacc_param *param)
 {
@@ -590,15 +569,6 @@ static int edma_alloc_slot(struct edma_cc *ecc, int slot)
return EDMA_CTLR_CHAN(ecc->id, slot);
 }
 
-/**
- * edma_free_slot - deallocate DMA parameter RAM
- * @ecc: pointer to edma_cc struct
- * @slot: parameter RAM slot returned from edma_alloc_slot()
- *
- * This deallocates the parameter RAM slot allocated by edma_alloc_slot().
- * Callers are responsible for ensuring the slot is inactive, and will
- * not be activated.
- */
 static void edma_free_slot(struct edma_cc *ecc, unsigned slot)
 {
slot = EDMA_CHAN_SLOT(slot);
@@ -707,10 +677,9 @@ static int edma_start(struct edma_cc *ecc, unsigned 
channel)
  * @ecc: pointer to edma_cc struct
  * @channel: channel being deactivated
  *
- * When @lch is a channel, any active transfer is paused and
- * all pending hardware events are cleared.  The current transfer
- * may not be resumed, and the channel's Parameter RAM should be
- * reinitialized before being reused.
+ * Any active transfer is paused and all pending hardware events are cleared.
+ * The current transfer may not be resumed, and the channel's Parameter RAM
+ * should be reinitialized before being reused.
  */
 static void edma_stop(struct edma_cc *ecc, unsigned channel)
 {
@@ -742,13 +711,9 @@ static void edma_stop(struct edma_cc *ecc, unsigned 
channel)
}
 }
 
-/**
- * edma_pause - pause dma on a channel
- * @ecc: pointer to edma_cc struct
- * @channel: on which edma_start() has been called
- *
- * This temporarily disables EDMA hardware events on the specified channel,
- * preventing them from triggering new transfers on its behalf
+/*
+ * Temporarily disable EDMA hardware events on the specified channel,
+ * preventing them from triggering new transfers
  */
 static void edma_pause(struct edma_cc *ecc, unsigned channel)
 {
@@ -766,13 +731,7 @@ static void edma_pause(struct edma_cc *ecc, unsigned 
channel)
}
 }
 
-/**
- * edma_resume - resumes dma on a paused channel
- * @ecc: pointer to edma_cc struct
- * @channel: on which edma_pause() has been called
- *
- * This re-enables EDMA hardware events on the specified channel.
- */
+/* Re-enable EDMA hardware events on the specified channel.  */
 static void edma_resume(struct edma_cc *ecc, unsigned channel)
 {
if (ecc->id != EDMA_CTLR(channel)) {
@@ -808,19 +767,6 @@ static int edma_trigger_channel(struct edma_cc *ecc, 
unsigned channel)
return 0;
 }
 
-/**
- *
- * It cleans ParamEntry qand bring back EDMA to initial state if media has
- * been removed before EDMA has finished.It is usedful for removable media.
- * Arguments:
- *  ch_no - channel no
- *
- * Return: zero on success, or corresponding error no on failure
- *
- * FIXME this should not be needed ... edma_stop() should suffice.
- *
- */
-
 static void edma_clean_channel(struct edma_cc *ecc, unsigned channel)
 {
if (ecc->id != EDMA_CTLR(channel)) {
@@ -975,14 +921,7 @@ static void edma_free_channel(struct edma_cc *ecc, 
unsigned channel)
clear_bit(channel, ecc->edma_inuse);
 }
 
-/*
- * edma_assign_channel_eventq - move given channel to

[PATCH v5 15/24] dmaengine: edma: Use dev_dbg instead pr_debug

2015-10-14 Thread Peter Ujfalusi

We have access to dev, so it is better to use the dev_dbg for debug prints.

Signed-off-by: Peter Ujfalusi 
---
 drivers/dma/edma.c | 24 
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index a9fe5c92451d..08f9bd0aa0b3 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -676,23 +676,23 @@ static int edma_start(struct edma_cc *ecc, unsigned 
channel)
 
/* EDMA channels without event association */
if (test_bit(channel, ecc->edma_unused)) {
-   pr_debug("EDMA: ESR%d %08x\n", j,
-edma_shadow0_read_array(ecc, SH_ESR, j));
+   dev_dbg(ecc->dev, "ESR%d %08x\n", j,
+   edma_shadow0_read_array(ecc, SH_ESR, j));
edma_shadow0_write_array(ecc, SH_ESR, j, mask);
return 0;
}
 
/* EDMA channel with event association */
-   pr_debug("EDMA: ER%d %08x\n", j,
-edma_shadow0_read_array(ecc, SH_ER, j));
+   dev_dbg(ecc->dev, "ER%d %08x\n", j,
+   edma_shadow0_read_array(ecc, SH_ER, j));
/* Clear any pending event or error */
edma_write_array(ecc, EDMA_ECR, j, mask);
edma_write_array(ecc, EDMA_EMCR, j, mask);
/* Clear any SER */
edma_shadow0_write_array(ecc, SH_SECR, j, mask);
edma_shadow0_write_array(ecc, SH_EESR, j, mask);
-   pr_debug("EDMA: EER%d %08x\n", j,
-edma_shadow0_read_array(ecc, SH_EER, j));
+   dev_dbg(ecc->dev, "EER%d %08x\n", j,
+   edma_shadow0_read_array(ecc, SH_EER, j));
return 0;
}
 
@@ -730,8 +730,8 @@ static void edma_stop(struct edma_cc *ecc, unsigned channel)
/* clear possibly pending completion interrupt */
edma_shadow0_write_array(ecc, SH_ICR, j, mask);
 
-   pr_debug("EDMA: EER%d %08x\n", j,
-edma_shadow0_read_array(ecc, SH_EER, j));
+   dev_dbg(ecc->dev, "EER%d %08x\n", j,
+   edma_shadow0_read_array(ecc, SH_EER, j));
 
/* REVISIT:  consider guarding against inappropriate event
 * chaining by overwriting with dummy_paramset.
@@ -800,8 +800,8 @@ static int edma_trigger_channel(struct edma_cc *ecc, 
unsigned channel)
 
edma_shadow0_write_array(ecc, SH_ESR, (channel >> 5), mask);
 
-   pr_debug("EDMA: ESR%d %08x\n", (channel >> 5),
-edma_shadow0_read_array(ecc, SH_ESR, (channel >> 5)));
+   dev_dbg(ecc->dev, "ESR%d %08x\n", (channel >> 5),
+   edma_shadow0_read_array(ecc, SH_ESR, (channel >> 5)));
return 0;
 }
 
@@ -831,8 +831,8 @@ static void edma_clean_channel(struct edma_cc *ecc, 
unsigned channel)
int j = (channel >> 5);
unsigned int mask = BIT(channel & 0x1f);
 
-   pr_debug("EDMA: EMR%d %08x\n", j,
-edma_read_array(ecc, EDMA_EMR, j));
+   dev_dbg(ecc->dev, "EMR%d %08x\n", j,
+   edma_read_array(ecc, EDMA_EMR, j));
edma_shadow0_write_array(ecc, SH_ECR, j, mask);
/* Clear the corresponding EMR bits */
edma_write_array(ecc, EDMA_EMCR, j, mask);
-- 
2.6.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v5 19/24] dmaengine: edma: Simplify the interrupt handling

2015-10-14 Thread Peter Ujfalusi

With the merger of the arch/arm/common/edma.c code into the dmaengine
driver, there is no longer need to have per channel callback/data storage
for interrupt events.

Signed-off-by: Peter Ujfalusi 
---
 drivers/dma/edma.c | 450 -
 1 file changed, 205 insertions(+), 245 deletions(-)

diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index 6bcbdceb3dc2..daa94a4bbe11 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -154,12 +154,6 @@ struct edmacc_param {
 #define TCCHEN BIT(22)
 #define ITCCHENBIT(23)
 
-/*ch_status parameter of callback function possible values*/
-#define EDMA_DMA_COMPLETE 1
-#define EDMA_DMA_CC_ERROR 2
-#define EDMA_DMA_TC1_ERROR 3
-#define EDMA_DMA_TC2_ERROR 4
-
 struct edma_pset {
u32 len;
dma_addr_t  addr;
@@ -243,12 +237,6 @@ struct edma_cc {
 */
unsigned long *edma_unused;
 
-   struct dma_interrupt_data {
-   void (*callback)(unsigned channel, unsigned short ch_status,
-void *data);
-   void *data;
-   } *intr_data;
-
struct dma_device   dma_slave;
struct edma_chan*slave_chans;
int dummy_slot;
@@ -486,24 +474,18 @@ static int prepare_unused_channel_list(struct device 
*dev, void *data)
return 0;
 }
 
-static void edma_setup_interrupt(struct edma_cc *ecc, unsigned lch,
-   void (*callback)(unsigned channel, u16 ch_status, void *data),
-   void *data)
+static void edma_setup_interrupt(struct edma_cc *ecc, unsigned lch, bool 
enable)
 {
lch = EDMA_CHAN_SLOT(lch);
 
-   if (!callback)
-   edma_shadow0_write_array(ecc, SH_IECR, lch >> 5,
-BIT(lch & 0x1f));
-
-   ecc->intr_data[lch].callback = callback;
-   ecc->intr_data[lch].data = data;
-
-   if (callback) {
+   if (enable) {
edma_shadow0_write_array(ecc, SH_ICR, lch >> 5,
 BIT(lch & 0x1f));
edma_shadow0_write_array(ecc, SH_IESR, lch >> 5,
 BIT(lch & 0x1f));
+   } else {
+   edma_shadow0_write_array(ecc, SH_IECR, lch >> 5,
+BIT(lch & 0x1f));
}
 }
 
@@ -795,8 +777,6 @@ static void edma_clean_channel(struct edma_cc *ecc, 
unsigned channel)
  * edma_alloc_channel - allocate DMA channel and paired parameter RAM
  * @ecc: pointer to edma_cc struct
  * @channel: specific channel to allocate; negative for "any unmapped channel"
- * @callback: optional; to be issued on DMA completion or errors
- * @data: passed to callback
  * @eventq_no: an EVENTQ_* constant, used to choose which Transfer
  * Controller (TC) executes requests using this channel.  Use
  * EVENTQ_DEFAULT unless you really need a high priority queue.
@@ -823,9 +803,7 @@ static void edma_clean_channel(struct edma_cc *ecc, 
unsigned channel)
  * Returns the number of the channel, else negative errno.
  */
 static int edma_alloc_channel(struct edma_cc *ecc, int channel,
-   void (*callback)(unsigned channel, u16 ch_status, void *data),
-   void *data,
-   enum dma_event_q eventq_no)
+ enum dma_event_q eventq_no)
 {
unsigned done = 0;
int ret = 0;
@@ -881,9 +859,7 @@ static int edma_alloc_channel(struct edma_cc *ecc, int 
channel,
edma_stop(ecc, EDMA_CTLR_CHAN(ecc->id, channel));
edma_write_slot(ecc, channel, _paramset);
 
-   if (callback)
-   edma_setup_interrupt(ecc, EDMA_CTLR_CHAN(ecc->id, channel),
-callback, data);
+   edma_setup_interrupt(ecc, EDMA_CTLR_CHAN(ecc->id, channel), true);
 
edma_map_dmach_to_queue(ecc, channel, eventq_no);
 
@@ -914,7 +890,7 @@ static void edma_free_channel(struct edma_cc *ecc, unsigned 
channel)
if (channel >= ecc->num_channels)
return;
 
-   edma_setup_interrupt(ecc, channel, NULL, NULL);
+   edma_setup_interrupt(ecc, channel, false);
/* REVISIT should probably take out of shadow region 0 */
 
edma_write_slot(ecc, channel, _paramset);
@@ -944,148 +920,6 @@ static void edma_assign_channel_eventq(struct edma_cc 
*ecc, unsigned channel,
edma_map_dmach_to_queue(ecc, channel, eventq_no);
 }
 
-/* eDMA interrupt handler */
-static irqreturn_t dma_irq_handler(int irq, void *data)
-{
-   struct edma_cc *ecc = data;
-   int ctlr;
-   u32 sh_ier;
-   u32 sh_ipr;
-   u32 bank;
-
-   ctlr = ecc->id;
-   if (ctlr < 0)
-   return IRQ_NONE;
-
-   dev_dbg(ecc->dev, "dma_irq_handler\n");
-
-   sh_ipr = edma_shadow0_read_array(ecc, SH_IPR, 0);
-   if (!sh_ipr) {
-   sh_ipr = edma_shadow0_read_array(ecc,

[PATCH v5 17/24] dmaengine: edma: Print warning when linking slots from different eDMA

2015-10-14 Thread Peter Ujfalusi

Warning message in case of linking between paRAM slots in different eDMA
controllers.

Signed-off-by: Peter Ujfalusi 
---
 drivers/dma/edma.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index f6653da0ee16..d33ae0b43925 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -619,6 +619,9 @@ static void edma_free_slot(struct edma_cc *ecc, unsigned 
slot)
  */
 static void edma_link(struct edma_cc *ecc, unsigned from, unsigned to)
 {
+   if (unlikely(EDMA_CTLR(from) != EDMA_CTLR(to)))
+   dev_warn(ecc->dev, "Ignoring eDMA instance for linking\n");
+
from = EDMA_CHAN_SLOT(from);
to = EDMA_CHAN_SLOT(to);
if (from >= ecc->num_slots || to >= ecc->num_slots)
-- 
2.6.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v5 22/24] dmaengine: edma: Read channel mapping support only once from HW

2015-10-14 Thread Peter Ujfalusi

Instead of directly reading it from CCCFG register take the information out
once when we set up the configuration from the HW.

Signed-off-by: Peter Ujfalusi 
---
 drivers/dma/edma.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index d105d1ae0f13..4b2ccc9de0ad 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -223,6 +223,7 @@ struct edma_cc {
unsignednum_region;
unsignednum_slots;
unsignednum_tc;
+   boolchmap_exist;
enum dma_event_qdefault_queue;
 
boolunused_chan_list_done;
@@ -1930,11 +1931,14 @@ static int edma_setup_from_hw(struct device *dev, 
struct edma_soc_info *pdata,
value = GET_NUM_EVQUE(cccfg);
ecc->num_tc = value + 1;
 
+   ecc->chmap_exist = (cccfg & CHMAP_EXIST) ? true : false;
+
dev_dbg(dev, "eDMA3 CC HW configuration (cccfg: 0x%08x):\n", cccfg);
dev_dbg(dev, "num_region: %u\n", ecc->num_region);
dev_dbg(dev, "num_channels: %u\n", ecc->num_channels);
dev_dbg(dev, "num_slots: %u\n", ecc->num_slots);
dev_dbg(dev, "num_tc: %u\n", ecc->num_tc);
+   dev_dbg(dev, "chmap_exist: %s\n", ecc->chmap_exist ? "yes" : "no");
 
/* Nothing need to be done if queue priority is provided */
if (pdata->queue_priority_mapping)
@@ -2223,7 +2227,7 @@ static int edma_probe(struct platform_device *pdev)
  queue_priority_mapping[i][1]);
 
/* Map the channel to param entry if channel mapping logic exist */
-   if (edma_read(ecc, EDMA_CCCFG) & CHMAP_EXIST)
+   if (ecc->chmap_exist)
edma_direct_dmach_to_param_mapping(ecc);
 
for (i = 0; i < ecc->num_region; i++) {
@@ -2293,7 +2297,7 @@ static int edma_pm_resume(struct device *dev)
  queue_priority_mapping[i][1]);
 
/* Map the channel to param entry if channel mapping logic */
-   if (edma_read(ecc, EDMA_CCCFG) & CHMAP_EXIST)
+   if (ecc->chmap_exist)
edma_direct_dmach_to_param_mapping(ecc);
 
for (i = 0; i < ecc->num_channels; i++) {
-- 
2.6.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC][PATCH] spi: Setup the master controller driver before setting the chipselect

2015-10-14 Thread Heiner Kallweit

On Wed, Oct 14, 2015 at 1:08 PM, Andy Shevchenko
 wrote:
> +Cc: Jarkko to see from spi-pxa2xx prospective
>
> On Wed, Oct 14, 2015 at 12:47 PM, Ivan T. Ivanov  wrote:
>> Adding Andy.
>>
>>
>>> On Oct 13, 2015, at 12:01 AM, Franklin S Cooper Jr  wrote:
>>>
>>> Some devices depend on the master controller driver setup function being
>>> called before calling any chipselect functions.
>>>
>>> Insure that this is done otherwise uninitialized structures may be
>>> accessed causing a kernel panic.
>
> As far as I understand my concern should be about spi-dw driver.
>
> So, I have just tested yesterday's linux-next with and without
> proposed patch. Works for me:
> Tested-by: Andy Shevchenko 
>
>>>
>>> Signed-off-by: Franklin S Cooper Jr 
>>> ---
>>> drivers/spi/spi.c | 4 ++--
>>> 1 file changed, 2 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
>>> index 38006cc..9374d82 100644
>>> --- a/drivers/spi/spi.c
>>> +++ b/drivers/spi/spi.c
>>> @@ -2053,11 +2053,11 @@ int spi_setup(struct spi_device *spi)
>>>   if (!spi->max_speed_hz)
>>>   spi->max_speed_hz = spi->master->max_speed_hz;
>>>
>>> - spi_set_cs(spi, false);
>>> -
>>>   if (spi->master->setup)
>>>   status = spi->master->setup(spi);
>>>
>>> + spi_set_cs(spi, false);
>>> +
>>>   dev_dbg(>dev, "setup mode %d, %s%s%s%s%u bits/w, %u Hz max --> 
>>> %d\n",
>>>   (int) (spi->mode & (SPI_CPOL | SPI_CPHA)),
>>>   (spi->mode & SPI_CS_HIGH) ? "cs_high, " : "",
>>> --
>>> 2.6.1
>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-spi" in
>>> the body of a message to majord...@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
>
>
>
> --
> With Best Regards,
> Andy Shevchenko
> --
> To unsubscribe from this list: send the line "unsubscribe linux-spi" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
The recent change to the bitbang driver leads to the the set_cs hook
of spi_master being set
now for all drivers using the bitbang layer. This hook is called also
from spi_setup and therefore
one possible side effect is issues with bitbang drivers implementing
the chipselect hook of
spi_bitbang with a dependency on the master being set up before.
The proposed patch looks good to me.
There should be no impact on drivers not using bitbang.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v5 21/24] dmaengine: edma: Simplify and optimize ccerr interrupt handler

2015-10-14 Thread Peter Ujfalusi

No need to run through the bits in QEMR and CCERR events since they will
not trigger any action, so just clearing the errors there is fine.
In case of the missed event the loop can be optimized so we spend less time
to handle the event.

Signed-off-by: Peter Ujfalusi 
---
 drivers/dma/edma.c | 82 +++---
 1 file changed, 35 insertions(+), 47 deletions(-)

diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index 84b98a01993a..d105d1ae0f13 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -1640,9 +1640,10 @@ static inline bool edma_error_pending(struct edma_cc 
*ecc)
 static irqreturn_t dma_ccerr_handler(int irq, void *data)
 {
struct edma_cc *ecc = data;
-   int i;
+   int i, j;
int ctlr;
unsigned int cnt = 0;
+   unsigned int val;
 
ctlr = ecc->id;
if (ctlr < 0)
@@ -1654,57 +1655,44 @@ static irqreturn_t dma_ccerr_handler(int irq, void 
*data)
return IRQ_NONE;
 
while (1) {
-   int j = -1;
-
-   if (edma_read_array(ecc, EDMA_EMR, 0))
-   j = 0;
-   else if (edma_read_array(ecc, EDMA_EMR, 1))
-   j = 1;
-   if (j >= 0) {
-   dev_dbg(ecc->dev, "EMR%d %08x\n", j,
-   edma_read_array(ecc, EDMA_EMR, j));
-   for (i = 0; i < 32; i++) {
+   /* Event missed register(s) */
+   for (j = 0; j < 2; j++) {
+   unsigned long emr;
+
+   val = edma_read_array(ecc, EDMA_EMR, j);
+   if (!val)
+   continue;
+
+   dev_dbg(ecc->dev, "EMR%d 0x%08x\n", j, val);
+   emr = val;
+   for (i = find_next_bit(, 32, 0); i < 32;
+i = find_next_bit(, 32, i + 1)) {
int k = (j << 5) + i;
 
-   if (edma_read_array(ecc, EDMA_EMR, j) &
-   BIT(i)) {
-   /* Clear the corresponding EMR bits */
-   edma_write_array(ecc, EDMA_EMCR, j,
+   /* Clear the corresponding EMR bits */
+   edma_write_array(ecc, EDMA_EMCR, j, BIT(i));
+   /* Clear any SER */
+   edma_shadow0_write_array(ecc, SH_SECR, j,
 BIT(i));
-   /* Clear any SER */
-   edma_shadow0_write_array(ecc, SH_SECR,
-j, BIT(i));
-   
edma_error_handler(>slave_chans[k]);
-   }
-   }
-   } else if (edma_read(ecc, EDMA_QEMR)) {
-   dev_dbg(ecc->dev, "QEMR %02x\n",
-   edma_read(ecc, EDMA_QEMR));
-   for (i = 0; i < 8; i++) {
-   if (edma_read(ecc, EDMA_QEMR) & BIT(i)) {
-   /* Clear the corresponding IPR bits */
-   edma_write(ecc, EDMA_QEMCR, BIT(i));
-   edma_shadow0_write(ecc, SH_QSECR,
-  BIT(i));
-
-   /* NOTE:  not reported!! */
-   }
-   }
-   } else if (edma_read(ecc, EDMA_CCERR)) {
-   dev_dbg(ecc->dev, "CCERR %08x\n",
-   edma_read(ecc, EDMA_CCERR));
-   /* FIXME:  CCERR.BIT(16) ignored!  much better
-* to just write CCERRCLR with CCERR value...
-*/
-   for (i = 0; i < 8; i++) {
-   if (edma_read(ecc, EDMA_CCERR) & BIT(i)) {
-   /* Clear the corresponding IPR bits */
-   edma_write(ecc, EDMA_CCERRCLR, BIT(i));
-
-   /* NOTE:  not reported!! */
-   }
+   edma_error_handler(>slave_chans[k]);
}
}
+
+   val = edma_read(ecc, EDMA_QEMR);
+   if (val) {
+   dev_dbg(ecc->dev, "QEMR 0x%02x\n", val);
+   /* Not reported, just clear the interrupt reason. */
+   edma_write(ecc, EDMA_QEMCR, val);
+   edma_shadow0_write(ecc, SH_QSECR, val);
+   }
+
+   val = edma_read(ecc, EDMA_CCERR);
+   if (val)

Re: [PATCH 4/5] ARM: dts: am437x/am33xx/omap3/dm816x: Add gpmc dma channel

2015-10-14 Thread Roger Quadros

On 13/10/15 04:38, Franklin S Cooper Jr wrote:
> Add dma channel information to the gpmc. Although not enabled by
> default this will allow prefetch-dma to be used.
> 
> Signed-off-by: Franklin S Cooper Jr 
> ---
>  arch/arm/boot/dts/am33xx.dtsi | 2 ++
>  arch/arm/boot/dts/am4372.dtsi | 2 ++
>  arch/arm/boot/dts/dm816x.dtsi | 2 ++
>  arch/arm/boot/dts/omap3.dtsi  | 2 ++

How about fixing up omap4/5 and dra7 as well?

cheers,
-roger

>  4 files changed, 8 insertions(+)
> 
> diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi
> index e065f21..f2d8eed 100644
> --- a/arch/arm/boot/dts/am33xx.dtsi
> +++ b/arch/arm/boot/dts/am33xx.dtsi
> @@ -819,6 +819,8 @@
>   ti,no-idle-on-init;
>   reg = <0x5000 0x2000>;
>   interrupts = <100>;
> + dmas = < 52>;
> + dma-names = "rxtx";
>   gpmc,num-cs = <7>;
>   gpmc,num-waitpins = <2>;
>   #address-cells = <2>;
> diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi
> index ec8b7a3..c02061b 100644
> --- a/arch/arm/boot/dts/am4372.dtsi
> +++ b/arch/arm/boot/dts/am4372.dtsi
> @@ -841,6 +841,8 @@
>   gpmc: gpmc@5000 {
>   compatible = "ti,am3352-gpmc";
>   ti,hwmods = "gpmc";
> + dmas = < 52>;
> + dma-names = "rxtx";
>   clocks = <_gclk>;
>   clock-names = "fck";
>   reg = <0x5000 0x2000>;
> diff --git a/arch/arm/boot/dts/dm816x.dtsi b/arch/arm/boot/dts/dm816x.dtsi
> index 68fb444..d2e5d31 100644
> --- a/arch/arm/boot/dts/dm816x.dtsi
> +++ b/arch/arm/boot/dts/dm816x.dtsi
> @@ -180,6 +180,8 @@
>   #address-cells = <2>;
>   #size-cells = <1>;
>   interrupts = <100>;
> + dmas = < 52>;
> + dma-names = "rxtx";
>   gpmc,num-cs = <6>;
>   gpmc,num-waitpins = <2>;
>   gpio-controller;
> diff --git a/arch/arm/boot/dts/omap3.dtsi b/arch/arm/boot/dts/omap3.dtsi
> index 7f212b6..9dbbcf6 100644
> --- a/arch/arm/boot/dts/omap3.dtsi
> +++ b/arch/arm/boot/dts/omap3.dtsi
> @@ -717,6 +717,8 @@
>   ti,hwmods = "gpmc";
>   reg = <0x6e00 0x02d0>;
>   interrupts = <20>;
> + dmas = < 4>;
> + dma-names = "rxtx";
>   gpmc,num-cs = <8>;
>   gpmc,num-waitpins = <4>;
>   #address-cells = <2>;
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v5 24/24] dmaengine: edma: Dynamic paRAM slot handling if HW supports it

2015-10-14 Thread Peter Ujfalusi

If the eDMA3 has support for channel paRAM slot mapping we can utilize it
to allocate slots on demand and save precious slots for real transfers.
On am335x the eDMA has 64 channels which means we can unlock 64 paRAM
slots out from the available 256.

Signed-off-by: Peter Ujfalusi 
---
 drivers/dma/edma.c | 101 +++--
 1 file changed, 52 insertions(+), 49 deletions(-)

diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index 8d9169b7f208..7eefbf1e1c94 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -413,12 +413,13 @@ static void edma_assign_priority_to_queue(struct edma_cc 
*ecc, int queue_no,
edma_modify(ecc, EDMA_QUEPRI, ~(0x7 << bit), ((priority & 0x7) << bit));
 }
 
-static void edma_direct_dmach_to_param_mapping(struct edma_cc *ecc)
+static void edma_set_chmap(struct edma_cc *ecc, int channel, int slot)
 {
-   int i;
-
-   for (i = 0; i < ecc->num_channels; i++)
-   edma_write_array(ecc, EDMA_DCHMAP, i, (i << 5));
+   if (ecc->chmap_exist) {
+   channel = EDMA_CHAN_SLOT(channel);
+   slot = EDMA_CHAN_SLOT(slot);
+   edma_write_array(ecc, EDMA_DCHMAP, channel, (slot << 5));
+   }
 }
 
 static int prepare_unused_channel_list(struct device *dev, void *data)
@@ -528,10 +529,18 @@ static void edma_read_slot(struct edma_cc *ecc, unsigned 
slot,
  */
 static int edma_alloc_slot(struct edma_cc *ecc, int slot)
 {
-   if (slot > 0)
+   if (slot > 0) {
slot = EDMA_CHAN_SLOT(slot);
+   /* Requesting entry paRAM slot for a HW triggered channel. */
+   if (ecc->chmap_exist && slot < ecc->num_channels)
+   slot = EDMA_SLOT_ANY;
+   }
+
if (slot < 0) {
-   slot = ecc->num_channels;
+   if (ecc->chmap_exist)
+   slot = 0;
+   else
+   slot = ecc->num_channels;
for (;;) {
slot = find_next_zero_bit(ecc->slot_inuse,
  ecc->num_slots,
@@ -541,7 +550,7 @@ static int edma_alloc_slot(struct edma_cc *ecc, int slot)
if (!test_and_set_bit(slot, ecc->slot_inuse))
break;
}
-   } else if (slot < ecc->num_channels || slot >= ecc->num_slots) {
+   } else if (slot >= ecc->num_slots) {
return -EINVAL;
} else if (test_and_set_bit(slot, ecc->slot_inuse)) {
return -EBUSY;
@@ -555,7 +564,7 @@ static int edma_alloc_slot(struct edma_cc *ecc, int slot)
 static void edma_free_slot(struct edma_cc *ecc, unsigned slot)
 {
slot = EDMA_CHAN_SLOT(slot);
-   if (slot < ecc->num_channels || slot >= ecc->num_slots)
+   if (slot >= ecc->num_slots)
return;
 
edma_write_slot(ecc, slot, _paramset);
@@ -806,7 +815,6 @@ static void edma_clean_channel(struct edma_cc *ecc, 
unsigned channel)
 static int edma_alloc_channel(struct edma_cc *ecc, int channel,
  enum dma_event_q eventq_no)
 {
-   unsigned done = 0;
int ret = 0;
 
if (!ecc->unused_chan_list_done) {
@@ -833,24 +841,12 @@ static int edma_alloc_channel(struct edma_cc *ecc, int 
channel,
}
 
if (channel < 0) {
-   channel = 0;
-   for (;;) {
-   channel = find_next_bit(ecc->channel_unused,
-   ecc->num_channels, channel);
-   if (channel == ecc->num_channels)
-   break;
-   if (!test_and_set_bit(channel, ecc->slot_inuse)) {
-   done = 1;
-   break;
-   }
-   channel++;
-   }
-   if (!done)
-   return -ENOMEM;
+   channel = find_next_bit(ecc->channel_unused, ecc->num_channels,
+   0);
+   if (channel == ecc->num_channels)
+   return -EBUSY;
} else if (channel >= ecc->num_channels) {
return -EINVAL;
-   } else if (test_and_set_bit(channel, ecc->slot_inuse)) {
-   return -EBUSY;
}
 
/* ensure access through shadow region 0 */
@@ -858,7 +854,6 @@ static int edma_alloc_channel(struct edma_cc *ecc, int 
channel,
 
/* ensure no events are pending */
edma_stop(ecc, EDMA_CTLR_CHAN(ecc->id, channel));
-   edma_write_slot(ecc, channel, _paramset);
 
edma_setup_interrupt(ecc, EDMA_CTLR_CHAN(ecc->id, channel), true);
 
@@ -891,11 +886,8 @@ static void edma_free_channel(struct edma_cc *ecc, 
unsigned channel)
if (channel >= ecc->num_channels)
return;
 
-   edma_setup_interrupt(ecc, channel, false);
/* REVISIT should probably take out of

[PATCH v5 20/24] dmaengine: edma: Move the pending error check into helper function

2015-10-14 Thread Peter Ujfalusi

In the ccerr interrupt handler the code checks for pending errors in the
error status registers in two different places.
Move the check out to a helper function.

Signed-off-by: Peter Ujfalusi 
---
 drivers/dma/edma.c | 20 
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index daa94a4bbe11..84b98a01993a 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -1626,6 +1626,16 @@ static void edma_error_handler(struct edma_chan *echan)
spin_unlock(>vchan.lock);
 }
 
+static inline bool edma_error_pending(struct edma_cc *ecc)
+{
+   if (edma_read_array(ecc, EDMA_EMR, 0) ||
+   edma_read_array(ecc, EDMA_EMR, 1) ||
+   edma_read(ecc, EDMA_QEMR) || edma_read(ecc, EDMA_CCERR))
+   return true;
+
+   return false;
+}
+
 /* eDMA error interrupt handler */
 static irqreturn_t dma_ccerr_handler(int irq, void *data)
 {
@@ -1640,10 +1650,7 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data)
 
dev_vdbg(ecc->dev, "dma_ccerr_handler\n");
 
-   if ((edma_read_array(ecc, EDMA_EMR, 0) == 0) &&
-   (edma_read_array(ecc, EDMA_EMR, 1) == 0) &&
-   (edma_read(ecc, EDMA_QEMR) == 0) &&
-   (edma_read(ecc, EDMA_CCERR) == 0))
+   if (!edma_error_pending(ecc))
return IRQ_NONE;
 
while (1) {
@@ -1698,10 +1705,7 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data)
}
}
}
-   if ((edma_read_array(ecc, EDMA_EMR, 0) == 0) &&
-   (edma_read_array(ecc, EDMA_EMR, 1) == 0) &&
-   (edma_read(ecc, EDMA_QEMR) == 0) &&
-   (edma_read(ecc, EDMA_CCERR) == 0))
+   if (!edma_error_pending(ecc))
break;
cnt++;
if (cnt > 10)
-- 
2.6.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v5 16/24] dmaengine: edma: Use the edma_write_slot instead open coded memcpy_toio

2015-10-14 Thread Peter Ujfalusi

edma_write_slot() is for writing an entire paRAM slot.

Signed-off-by: Peter Ujfalusi 
---
 drivers/dma/edma.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index 08f9bd0aa0b3..f6653da0ee16 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -968,8 +968,7 @@ static void edma_free_channel(struct edma_cc *ecc, unsigned 
channel)
edma_setup_interrupt(ecc, channel, NULL, NULL);
/* REVISIT should probably take out of shadow region 0 */
 
-   memcpy_toio(ecc->base + PARM_OFFSET(channel), _paramset,
-   PARM_SIZE);
+   edma_write_slot(ecc, channel, _paramset);
clear_bit(channel, ecc->edma_inuse);
 }
 
-- 
2.6.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v5 23/24] dmaengine: edma: Rename bitfields for slot and channel usage tracking

2015-10-14 Thread Peter Ujfalusi

The names chosen for the bitfields were quite confusing and given no real
information on what they are used for...

edma_inuse -> slot_inuse: tracks the slot usage/availability
edma_unused -> channel_unused: tracks the channel usage/availability

Signed-off-by: Peter Ujfalusi 
---
 drivers/dma/edma.c | 51 ++-
 1 file changed, 26 insertions(+), 25 deletions(-)

diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index 4b2ccc9de0ad..8d9169b7f208 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -227,16 +227,16 @@ struct edma_cc {
enum dma_event_qdefault_queue;
 
boolunused_chan_list_done;
-   /* The edma_inuse bit for each PaRAM slot is clear unless the
+   /* The slot_inuse bit for each PaRAM slot is clear unless the
 * channel is in use ... by ARM or DSP, for QDMA, or whatever.
 */
-   unsigned long *edma_inuse;
+   unsigned long *slot_inuse;
 
-   /* The edma_unused bit for each channel is clear unless
+   /* The channel_unused bit for each channel is clear unless
 * it is not being used on this platform. It uses a bit
 * of SOC-specific initialization code.
 */
-   unsigned long *edma_unused;
+   unsigned long *channel_unused;
 
struct dma_device   dma_slave;
struct edma_chan*slave_chans;
@@ -452,7 +452,7 @@ static int prepare_unused_channel_list(struct device *dev, 
void *data)
continue;
 
clear_bit(EDMA_CHAN_SLOT(dma_spec.args[0]),
- ecc->edma_unused);
+ ecc->channel_unused);
of_node_put(dma_spec.np);
}
return 0;
@@ -469,7 +469,7 @@ static int prepare_unused_channel_list(struct device *dev, 
void *data)
dma_req = (int)res->start;
if (dma_req >= dma_req_min && dma_req < dma_req_max)
clear_bit(EDMA_CHAN_SLOT(pdev->resource[i].start),
- ecc->edma_unused);
+ ecc->channel_unused);
}
 
return 0;
@@ -533,17 +533,17 @@ static int edma_alloc_slot(struct edma_cc *ecc, int slot)
if (slot < 0) {
slot = ecc->num_channels;
for (;;) {
-   slot = find_next_zero_bit(ecc->edma_inuse,
+   slot = find_next_zero_bit(ecc->slot_inuse,
  ecc->num_slots,
  slot);
if (slot == ecc->num_slots)
return -ENOMEM;
-   if (!test_and_set_bit(slot, ecc->edma_inuse))
+   if (!test_and_set_bit(slot, ecc->slot_inuse))
break;
}
} else if (slot < ecc->num_channels || slot >= ecc->num_slots) {
return -EINVAL;
-   } else if (test_and_set_bit(slot, ecc->edma_inuse)) {
+   } else if (test_and_set_bit(slot, ecc->slot_inuse)) {
return -EBUSY;
}
 
@@ -559,7 +559,7 @@ static void edma_free_slot(struct edma_cc *ecc, unsigned 
slot)
return;
 
edma_write_slot(ecc, slot, _paramset);
-   clear_bit(slot, ecc->edma_inuse);
+   clear_bit(slot, ecc->slot_inuse);
 }
 
 /**
@@ -631,7 +631,7 @@ static int edma_start(struct edma_cc *ecc, unsigned channel)
unsigned int mask = BIT(channel & 0x1f);
 
/* EDMA channels without event association */
-   if (test_bit(channel, ecc->edma_unused)) {
+   if (test_bit(channel, ecc->channel_unused)) {
dev_dbg(ecc->dev, "ESR%d %08x\n", j,
edma_shadow0_read_array(ecc, SH_ESR, j));
edma_shadow0_write_array(ecc, SH_ESR, j, mask);
@@ -835,11 +835,11 @@ static int edma_alloc_channel(struct edma_cc *ecc, int 
channel,
if (channel < 0) {
channel = 0;
for (;;) {
-   channel = find_next_bit(ecc->edma_unused,
+   channel = find_next_bit(ecc->channel_unused,
ecc->num_channels, channel);
if (channel == ecc->num_channels)
break;
-   if (!test_and_set_bit(channel, ecc->edma_inuse)) {
+   if (!test_and_set_bit(channel, ecc->slot_inuse)) {
done = 1;
break;
}
@@ -849,7 +849,7 @@ static int edma_alloc_channel(struct edma_cc *ecc, int 
channel,
return -ENOMEM;
} else if (channel >= ecc->num_channels) {
return -EINVAL;
-

[PATCH v5 11/24] ARM/dmaengine: edma: Merge the two drivers under drivers/dma/

2015-10-14 Thread Peter Ujfalusi

Move the code out from arch/arm/common and merge it inside of the dmaengine
driver.
This change is done with as minimal (if eny) functional change to the code
as possible to avoid introducing regression.

Signed-off-by: Peter Ujfalusi 
Acked-by: Tony Lindgren 
---
 arch/arm/Kconfig   |1 -
 arch/arm/common/Kconfig|3 -
 arch/arm/common/Makefile   |1 -
 arch/arm/common/edma.c | 1431 --
 arch/arm/mach-omap2/Kconfig|1 -
 drivers/dma/Kconfig|1 -
 drivers/dma/edma.c | 1506 ++--
 include/linux/platform_data/edma.h |   74 --
 8 files changed, 1431 insertions(+), 1587 deletions(-)
 delete mode 100644 arch/arm/common/edma.c

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 39d7d4bd4d5a..0365cbbc9179 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -715,7 +715,6 @@ config ARCH_DAVINCI
select GENERIC_CLOCKEVENTS
select GENERIC_IRQ_CHIP
select HAVE_IDE
-   select TI_PRIV_EDMA
select USE_OF
select ZONE_DMA
help
diff --git a/arch/arm/common/Kconfig b/arch/arm/common/Kconfig
index c3a4e9ceba34..9353184d730d 100644
--- a/arch/arm/common/Kconfig
+++ b/arch/arm/common/Kconfig
@@ -17,6 +17,3 @@ config SHARP_PARAM
 
 config SHARP_SCOOP
bool
-
-config TI_PRIV_EDMA
-   bool
diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile
index 6ee5959a813b..27f23b15b1ea 100644
--- a/arch/arm/common/Makefile
+++ b/arch/arm/common/Makefile
@@ -15,6 +15,5 @@ obj-$(CONFIG_MCPM)+= mcpm_head.o mcpm_entry.o 
mcpm_platsmp.o vlock.o
 CFLAGS_REMOVE_mcpm_entry.o = -pg
 AFLAGS_mcpm_head.o := -march=armv7-a
 AFLAGS_vlock.o := -march=armv7-a
-obj-$(CONFIG_TI_PRIV_EDMA) += edma.o
 obj-$(CONFIG_BL_SWITCHER)  += bL_switcher.o
 obj-$(CONFIG_BL_SWITCHER_DUMMY_IF) += bL_switcher_dummy_if.o
diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
deleted file mode 100644
index 5b747f1bc8b5..
--- a/arch/arm/common/edma.c
+++ /dev/null
@@ -1,1431 +0,0 @@
-/*
- * EDMA3 support for DaVinci
- *
- * Copyright (C) 2006-2009 Texas Instruments.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-#include 
-
-/* Offsets matching "struct edmacc_param" */
-#define PARM_OPT   0x00
-#define PARM_SRC   0x04
-#define PARM_A_B_CNT   0x08
-#define PARM_DST   0x0c
-#define PARM_SRC_DST_BIDX  0x10
-#define PARM_LINK_BCNTRLD  0x14
-#define PARM_SRC_DST_CIDX  0x18
-#define PARM_CCNT  0x1c
-
-#define PARM_SIZE  0x20
-
-/* Offsets for EDMA CC global channel registers and their shadows */
-#define SH_ER  0x00/* 64 bits */
-#define SH_ECR 0x08/* 64 bits */
-#define SH_ESR 0x10/* 64 bits */
-#define SH_CER 0x18/* 64 bits */
-#define SH_EER 0x20/* 64 bits */
-#define SH_EECR0x28/* 64 bits */
-#define SH_EESR0x30/* 64 bits */
-#define SH_SER 0x38/* 64 bits */
-#define SH_SECR0x40/* 64 bits */
-#define SH_IER 0x50/* 64 bits */
-#define SH_IECR0x58/* 64 bits */
-#define SH_IESR0x60/* 64 bits */
-#define SH_IPR 0x68/* 64 bits */
-#define SH_ICR 0x70/* 64 bits */
-#define SH_IEVAL   0x78
-#define SH_QER 0x80
-#define SH_QEER0x84
-#define SH_QEECR   0x88
-#define SH_QEESR   0x8c
-#define SH_QSER0x90
-#define SH_QSECR   0x94
-#define SH_SIZE0x200
-
-/* Offsets for EDMA CC global registers */
-#define EDMA_REV   0x
-#define EDMA_CCCFG 0x0004
-#define EDMA_QCHMAP0x0200  /* 8 registers */
-#define EDMA_DMAQNUM   0x0240  /* 8 registers (4 on OMAP-L1xx) */
-#define EDMA_QDMAQNUM  0x0260
-#define EDMA_QUETCMAP  0x0280
-#define EDMA_QUEPRI0x0284
-#define EDMA_EMR   0x0300  /* 64 bits */
-#define EDMA_EMCR  0x0308  /* 64 bits */
-#define EDMA_QEMR  0x0310
-#define EDMA_QEMCR 0x0314

[PATCH v5 13/24] dmaengine: edma: Use devm_kcalloc when possible

2015-10-14 Thread Peter Ujfalusi

When allocating a memory for number of items it is better (looks better)
to use devm_kcalloc.

Signed-off-by: Peter Ujfalusi 
---
 drivers/dma/edma.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index d5a76c67f83f..95c10373168d 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -2055,7 +2055,7 @@ static int edma_setup_from_hw(struct device *dev, struct 
edma_soc_info *pdata,
 * priority. So Q0 is the highest priority queue and the last queue has
 * the lowest priority.
 */
-   queue_priority_map = devm_kzalloc(dev, (ecc->num_tc + 1) * sizeof(s8),
+   queue_priority_map = devm_kcalloc(dev, ecc->num_tc + 1, sizeof(s8),
  GFP_KERNEL);
if (!queue_priority_map)
return -ENOMEM;
@@ -2086,7 +2086,7 @@ static int edma_xbar_event_map(struct device *dev, struct 
edma_soc_info *pdata,
u32 shift, offset, mux;
int ret, i;
 
-   xbar_chans = devm_kzalloc(dev, (nelm + 2) * sizeof(s16), GFP_KERNEL);
+   xbar_chans = devm_kcalloc(dev, nelm + 2, sizeof(s16), GFP_KERNEL);
if (!xbar_chans)
return -ENOMEM;
 
-- 
2.6.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v5 01/24] ARM: common: edma: Fix channel parameter for irq callbacks

2015-10-14 Thread Peter Ujfalusi

In case when the interrupt happened for the second eDMA the channel
number was incorrectly passed to the client driver.

Signed-off-by: Peter Ujfalusi 
CC: 
---
 arch/arm/common/edma.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
index 873dbfcc7dc9..56fc339571f9 100644
--- a/arch/arm/common/edma.c
+++ b/arch/arm/common/edma.c
@@ -406,7 +406,8 @@ static irqreturn_t dma_irq_handler(int irq, void *data)
BIT(slot));
if (edma_cc[ctlr]->intr_data[channel].callback)
edma_cc[ctlr]->intr_data[channel].callback(
-   channel, EDMA_DMA_COMPLETE,
+   EDMA_CTLR_CHAN(ctlr, channel),
+   EDMA_DMA_COMPLETE,
edma_cc[ctlr]->intr_data[channel].data);
}
} while (sh_ipr);
@@ -460,7 +461,8 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data)
if (edma_cc[ctlr]->intr_data[k].
callback) {
edma_cc[ctlr]->intr_data[k].
-   callback(k,
+   callback(
+   EDMA_CTLR_CHAN(ctlr, k),
EDMA_DMA_CC_ERROR,
edma_cc[ctlr]->intr_data
[k].data);
-- 
2.6.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v5 03/24] dmaengine: edma: Simplify and optimize the edma_execute path

2015-10-14 Thread Peter Ujfalusi

The code path in edma_execute() and edma_callback() can be simplified
and make it more optimal.
There is not need to call in to edma_execute() when the transfer
has been finished for example.
Also the handling of missed/first or next batch of paRAMs can
be done in a more optimal way.

Signed-off-by: Peter Ujfalusi 
---
 drivers/dma/edma.c | 76 +-
 1 file changed, 29 insertions(+), 47 deletions(-)

diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index 558b0b4e7536..19fa49d6f555 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -154,15 +154,11 @@ static void edma_execute(struct edma_chan *echan)
struct device *dev = echan->vchan.chan.device->dev;
int i, j, left, nslots;
 
-   /* If either we processed all psets or we're still not started */
-   if (!echan->edesc ||
-   echan->edesc->pset_nr == echan->edesc->processed) {
-   /* Get next vdesc */
+   if (!echan->edesc) {
+   /* Setup is needed for the first transfer */
vdesc = vchan_next_desc(>vchan);
-   if (!vdesc) {
-   echan->edesc = NULL;
+   if (!vdesc)
return;
-   }
list_del(>node);
echan->edesc = to_edma_desc(>tx);
}
@@ -220,28 +216,26 @@ static void edma_execute(struct edma_chan *echan)
  echan->ecc->dummy_slot);
}
 
-   if (edesc->processed <= MAX_NR_SG) {
-   dev_dbg(dev, "first transfer starting on channel %d\n",
-   echan->ch_num);
-   edma_start(echan->ch_num);
-   } else {
-   dev_dbg(dev, "chan: %d: completed %d elements, resuming\n",
-   echan->ch_num, edesc->processed);
-   edma_resume(echan->ch_num);
-   }
-
-   /*
-* This happens due to setup times between intermediate transfers
-* in long SG lists which have to be broken up into transfers of
-* MAX_NR_SG
-*/
if (echan->missed) {
+   /*
+* This happens due to setup times between intermediate
+* transfers in long SG lists which have to be broken up into
+* transfers of MAX_NR_SG
+*/
dev_dbg(dev, "missed event on channel %d\n", echan->ch_num);
edma_clean_channel(echan->ch_num);
edma_stop(echan->ch_num);
edma_start(echan->ch_num);
edma_trigger_channel(echan->ch_num);
echan->missed = 0;
+   } else if (edesc->processed <= MAX_NR_SG) {
+   dev_dbg(dev, "first transfer starting on channel %d\n",
+   echan->ch_num);
+   edma_start(echan->ch_num);
+   } else {
+   dev_dbg(dev, "chan: %d: completed %d elements, resuming\n",
+   echan->ch_num, edesc->processed);
+   edma_resume(echan->ch_num);
}
 }
 
@@ -259,20 +253,17 @@ static int edma_terminate_all(struct dma_chan *chan)
 * echan->edesc is NULL and exit.)
 */
if (echan->edesc) {
-   int cyclic = echan->edesc->cyclic;
-
+   edma_stop(echan->ch_num);
+   /* Move the cyclic channel back to default queue */
+   if (echan->edesc->cyclic)
+   edma_assign_channel_eventq(echan->ch_num,
+  EVENTQ_DEFAULT);
/*
 * free the running request descriptor
 * since it is not in any of the vdesc lists
 */
edma_desc_free(>edesc->vdesc);
-
echan->edesc = NULL;
-   edma_stop(echan->ch_num);
-   /* Move the cyclic channel back to default queue */
-   if (cyclic)
-   edma_assign_channel_eventq(echan->ch_num,
-  EVENTQ_DEFAULT);
}
 
vchan_get_all_descriptors(>vchan, );
@@ -725,41 +716,33 @@ static void edma_callback(unsigned ch_num, u16 ch_status, 
void *data)
 
edesc = echan->edesc;
 
-   /* Pause the channel for non-cyclic */
-   if (!edesc || !edesc->cyclic)
-   edma_pause(echan->ch_num);
-
+   spin_lock(>vchan.lock);
switch (ch_status) {
case EDMA_DMA_COMPLETE:
-   spin_lock(>vchan.lock);
-
if (edesc) {
if (edesc->cyclic) {
vchan_cyclic_callback(>vdesc);
+   goto out;
} else if (edesc->processed == edesc->pset_nr) {
dev_dbg(dev, "Transfer complete, stopping 
channel %d\n", ch_num);
edesc->residue = 0;
edma_stop(echan->ch_num);

[PATCH v5 09/24] ARM: davinci: Use platform_device_register_full() to create pdev for eDMA

2015-10-14 Thread Peter Ujfalusi

Convert the eDMA platform device creation to use
struct platform_device_info XX __initconst and
platform_device_register_full()
This will allow us to cleanly specify the dma_mask for the devices in an
upcoming patch.

Signed-off-by: Peter Ujfalusi 
Acked-by: Sekhar Nori 
---
 arch/arm/mach-davinci/devices-da8xx.c | 38 ++-
 arch/arm/mach-davinci/dm355.c | 20 +++---
 arch/arm/mach-davinci/dm644x.c| 20 +++---
 arch/arm/mach-davinci/dm646x.c| 18 ++---
 4 files changed, 57 insertions(+), 39 deletions(-)

diff --git a/arch/arm/mach-davinci/devices-da8xx.c 
b/arch/arm/mach-davinci/devices-da8xx.c
index 9ae049ae816a..9f7d266faa0c 100644
--- a/arch/arm/mach-davinci/devices-da8xx.c
+++ b/arch/arm/mach-davinci/devices-da8xx.c
@@ -213,48 +213,50 @@ static struct resource da850_edma1_resources[] = {
},
 };
 
-static struct platform_device da8xx_edma0_device = {
+static const struct platform_device_info da8xx_edma0_device __initconst = {
.name   = "edma",
.id = 0,
-   .dev = {
-   .platform_data = _edma0_pdata,
-   },
-   .num_resources  = ARRAY_SIZE(da8xx_edma0_resources),
-   .resource   = da8xx_edma0_resources,
+   .res= da8xx_edma0_resources,
+   .num_res= ARRAY_SIZE(da8xx_edma0_resources),
+   .data   = _edma0_pdata,
+   .size_data  = sizeof(da8xx_edma0_pdata),
 };
 
-static struct platform_device da850_edma1_device = {
+static const struct platform_device_info da850_edma1_device __initconst = {
.name   = "edma",
.id = 1,
-   .dev = {
-   .platform_data = _edma1_pdata,
-   },
-   .num_resources  = ARRAY_SIZE(da850_edma1_resources),
-   .resource   = da850_edma1_resources,
+   .res= da850_edma1_resources,
+   .num_res= ARRAY_SIZE(da850_edma1_resources),
+   .data   = _edma1_pdata,
+   .size_data  = sizeof(da850_edma1_pdata),
 };
 
 int __init da830_register_edma(struct edma_rsv_info *rsv)
 {
+   struct platform_device *edma_pdev;
+
da8xx_edma0_pdata.rsv = rsv;
 
-   return platform_device_register(_edma0_device);
+   edma_pdev = platform_device_register_full(_edma0_device);
+   return IS_ERR(edma_pdev) ? PTR_ERR(edma_pdev) : 0;
 }
 
 int __init da850_register_edma(struct edma_rsv_info *rsv[2])
 {
-   int ret;
+   struct platform_device *edma_pdev;
 
if (rsv) {
da8xx_edma0_pdata.rsv = rsv[0];
da850_edma1_pdata.rsv = rsv[1];
}
 
-   ret = platform_device_register(_edma0_device);
-   if (ret) {
+   edma_pdev = platform_device_register_full(_edma0_device);
+   if (IS_ERR(edma_pdev)) {
pr_warn("%s: Failed to register eDMA0\n", __func__);
-   return ret;
+   return PTR_ERR(edma_pdev);
}
-   return platform_device_register(_edma1_device);
+   edma_pdev = platform_device_register_full(_edma1_device);
+   return IS_ERR(edma_pdev) ? PTR_ERR(edma_pdev) : 0;
 }
 
 static struct resource da8xx_i2c_resources0[] = {
diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c
index a50bb9c66952..5f10c6695e31 100644
--- a/arch/arm/mach-davinci/dm355.c
+++ b/arch/arm/mach-davinci/dm355.c
@@ -613,12 +613,13 @@ static struct resource edma_resources[] = {
/* not using (or muxing) TC*_ERR */
 };
 
-static struct platform_device dm355_edma_device = {
-   .name   = "edma",
-   .id = 0,
-   .dev.platform_data  = _edma_pdata,
-   .num_resources  = ARRAY_SIZE(edma_resources),
-   .resource   = edma_resources,
+static const struct platform_device_info dm355_edma_device __initconst = {
+   .name   = "edma",
+   .id = 0,
+   .res= edma_resources,
+   .num_res= ARRAY_SIZE(edma_resources),
+   .data   = _edma_pdata,
+   .size_data  = sizeof(dm355_edma_pdata),
 };
 
 static struct resource dm355_asp1_resources[] = {
@@ -1057,13 +1058,18 @@ int __init dm355_init_video(struct vpfe_config 
*vpfe_cfg,
 
 static int __init dm355_init_devices(void)
 {
+   struct platform_device *edma_pdev;
int ret = 0;
 
if (!cpu_is_davinci_dm355())
return 0;
 
davinci_cfg_reg(DM355_INT_EDMA_CC);
-   platform_device_register(_edma_device);
+   edma_pdev = platform_device_register_full(_edma_device);
+   if (IS_ERR(edma_pdev)) {
+   pr_warn("%s: Failed to register eDMA\n", __func__);
+   return PTR_ERR(edma_pdev);
+   }
 
ret = davinci_init_wdt();
if (ret)
diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c
index d759ca8e58e8..aa3453b40d5f 100644
--- a/arch/arm/mach-davinci/dm644x.c
+++

[PATCH v5 04/24] ARM: davinci/common: Convert edma driver to handle one eDMA instance per driver

2015-10-14 Thread Peter Ujfalusi

Currently we have one device created to handle all (maximum 2) eDMAs in the
system.
With this change all eDMA instance will have it's own device/driver.
This change is needed for further cleanups in the eDMA driver stack since
the one device/driver to handle all eDMAs in the system was not flexible
enough and prevents the upcoming work.

Signed-off-by: Peter Ujfalusi 
Acked-by: Sekhar Nori 
---
 arch/arm/common/edma.c| 356 +++---
 arch/arm/mach-davinci/devices-da8xx.c | 110 ---
 arch/arm/mach-davinci/dm355.c |  21 +-
 arch/arm/mach-davinci/dm365.c |  25 +--
 arch/arm/mach-davinci/dm644x.c|  21 +-
 arch/arm/mach-davinci/dm646x.c|  27 ++-
 6 files changed, 234 insertions(+), 326 deletions(-)

diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
index e9c4cb16a47e..7c2fe527e53b 100644
--- a/arch/arm/common/edma.c
+++ b/arch/arm/common/edma.c
@@ -235,6 +235,7 @@ static inline void clear_bits(int offset, int len, unsigned 
long *p)
 
 /* actual number of DMA channels and slots on this silicon */
 struct edma {
+   struct device   *dev;
/* how many dma resources of each type */
unsignednum_channels;
unsignednum_region;
@@ -246,6 +247,7 @@ struct edma {
const s8*noevent;
 
struct edma_soc_info *info;
+   int id;
 
/* The edma_inuse bit for each PaRAM slot is clear unless the
 * channel is in use ... by ARM or DSP, for QDMA, or whatever.
@@ -258,9 +260,6 @@ struct edma {
 */
DECLARE_BITMAP(edma_unused, EDMA_MAX_DMACH);
 
-   unsignedirq_res_start;
-   unsignedirq_res_end;
-
struct dma_interrupt_data {
void (*callback)(unsigned channel, unsigned short ch_status,
void *data);
@@ -349,17 +348,6 @@ setup_dma_interrupt(unsigned lch,
}
 }
 
-static int irq2ctlr(int irq)
-{
-   if (irq >= edma_cc[0]->irq_res_start && irq <= edma_cc[0]->irq_res_end)
-   return 0;
-   else if (irq >= edma_cc[1]->irq_res_start &&
-   irq <= edma_cc[1]->irq_res_end)
-   return 1;
-
-   return -1;
-}
-
 /**
  *
  * DMA interrupt handler
@@ -367,16 +355,17 @@ static int irq2ctlr(int irq)
  */
 static irqreturn_t dma_irq_handler(int irq, void *data)
 {
+   struct edma *cc = data;
int ctlr;
u32 sh_ier;
u32 sh_ipr;
u32 bank;
 
-   ctlr = irq2ctlr(irq);
+   ctlr = cc->id;
if (ctlr < 0)
return IRQ_NONE;
 
-   dev_dbg(data, "dma_irq_handler\n");
+   dev_dbg(cc->dev, "dma_irq_handler\n");
 
sh_ipr = edma_shadow0_read_array(ctlr, SH_IPR, 0);
if (!sh_ipr) {
@@ -394,7 +383,7 @@ static irqreturn_t dma_irq_handler(int irq, void *data)
u32 slot;
u32 channel;
 
-   dev_dbg(data, "IPR%d %08x\n", bank, sh_ipr);
+   dev_dbg(cc->dev, "IPR%d %08x\n", bank, sh_ipr);
 
slot = __ffs(sh_ipr);
sh_ipr &= ~(BIT(slot));
@@ -404,11 +393,11 @@ static irqreturn_t dma_irq_handler(int irq, void *data)
/* Clear the corresponding IPR bits */
edma_shadow0_write_array(ctlr, SH_ICR, bank,
BIT(slot));
-   if (edma_cc[ctlr]->intr_data[channel].callback)
-   edma_cc[ctlr]->intr_data[channel].callback(
+   if (cc->intr_data[channel].callback)
+   cc->intr_data[channel].callback(
EDMA_CTLR_CHAN(ctlr, channel),
EDMA_DMA_COMPLETE,
-   edma_cc[ctlr]->intr_data[channel].data);
+   cc->intr_data[channel].data);
}
} while (sh_ipr);
 
@@ -423,15 +412,16 @@ static irqreturn_t dma_irq_handler(int irq, void *data)
  */
 static irqreturn_t dma_ccerr_handler(int irq, void *data)
 {
+   struct edma *cc = data;
int i;
int ctlr;
unsigned int cnt = 0;
 
-   ctlr = irq2ctlr(irq);
+   ctlr = cc->id;
if (ctlr < 0)
return IRQ_NONE;
 
-   dev_dbg(data, "dma_ccerr_handler\n");
+   dev_dbg(cc->dev, "dma_ccerr_handler\n");
 
if ((edma_read_array(ctlr, EDMA_EMR, 0) == 0) &&
(edma_read_array(ctlr, EDMA_EMR, 1) == 0) &&
@@ -446,8 +436,8 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data)
else if (edma_read_array(ctlr, EDMA_EMR, 1))
j = 1;
if (j >= 0) {
-

[PATCH v5 00/24] dmaengine/ARM: Merge the edma drivers into one

2015-10-14 Thread Peter Ujfalusi

Hi,

Changes since v4:
- checkpatch errors/warnings/checks has been fixed in spot and not in a followup
  patch.
- Sekhar's Acked-by added to patches touching arch/arm/mach-davinci/
- Other comments for v4 has been addressed

Changes since v3:
- Separated the two (patch 10/11 in v2 patch 10 in v3) patch which got squashed
  by accident for v3
- Added Tony's Acked-by to patch 11 (for mach-oamp2 part)

Changes since v2:
- devm_kasprintf format string fixed
- Additional patch to enable dynamic paRAM slot usage when the channel mapping
  is supported by the eDMA module.
  On am335x we have 256 paRAM slots and 64 DMA channels, this means that we had
  64 slots 'locked away' all the time. The dynamic paRAM slot logic will allow
  us to use all 256 slots freely for any purpose.

Changes since v1:
- Convert edma platform device registration to use platform_device_register_full
- Moved the PM callback also to the dmaengine driver - missed in v1
- Commit message added to:
  ARM/dmaengine: edma: Remove limitation on the number of eDMA controllers
- New patch which reads the flag for the channel mapping support in one place

Cover letter:

with this series the edma two driver setup will be changed to have only one
driver to support eDMA3. The legacy edma interface will be removed and eDMA can
only be used via dmaengine API from this point on.
In order to do the merge the following improvements has been done:
- One driver instance per eDMA:
 - Any number of eDMA instances are supported (both legacy and DT boot)
- Not relying on global variables, arrays, etc
- Code simplification and optimizations in several places

This change will also help us to do bigger changes in the eDMA driver since,
since now we have only one driver to work with.

The series has been tested on:
da850-evm (OMAP-L138)
- with legacy and DT boot (both eDMA0 and eDMA1 is enabled)
- In code swapping the eDMA instances in legacy mode to make sure the second
  instance is handled correctly.

am335x-evmsk
- DT boot

I think this series could go via the dmaengine tree. Changes are trivial under
arch/arm/

Regards,
Peter
---
Peter Ujfalusi (24):
  ARM: common: edma: Fix channel parameter for irq callbacks
  ARM: common: edma: Remove unused functions
  dmaengine: edma: Simplify and optimize the edma_execute path
  ARM: davinci/common: Convert edma driver to handle one eDMA instance
per driver
  ARM/dmaengine: edma: Move of_dma_controller_register to the dmaengine
driver
  ARM: common: edma: Internal API to use pointer to 'struct edma'
  ARM/dmaengine: edma: Public API to use private struct pointer
  ARM/dmaengine: edma: Remove limitation on the number of eDMA
controllers
  ARM: davinci: Use platform_device_register_full() to create pdev for
eDMA
  ARM: davinci: Add dma_mask to eDMA devices
  ARM/dmaengine: edma: Merge the two drivers under drivers/dma/
  dmaengine: edma: Allocate memory dynamically for bitmaps and
structures
  dmaengine: edma: Use devm_kcalloc when possible
  dmaengine: edma: Cleanup regarding the use of dev around the code
  dmaengine: edma: Use dev_dbg instead pr_debug
  dmaengine: edma: Use the edma_write_slot instead open coded
memcpy_toio
  dmaengine: edma: Print warning when linking slots from different eDMA
  dmaengine: edma: Consolidate the comments for functions
  dmaengine: edma: Simplify the interrupt handling
  dmaengine: edma: Move the pending error check into helper function
  dmaengine: edma: Simplify and optimize ccerr interrupt handler
  dmaengine: edma: Read channel mapping support only once from HW
  dmaengine: edma: Rename bitfields for slot and channel usage tracking
  dmaengine: edma: Dynamic paRAM slot handling if HW supports it

 arch/arm/Kconfig  |1 -
 arch/arm/common/Kconfig   |3 -
 arch/arm/common/Makefile  |1 -
 arch/arm/common/edma.c| 1876 -
 arch/arm/mach-davinci/devices-da8xx.c |  122 +--
 arch/arm/mach-davinci/dm355.c |   40 +-
 arch/arm/mach-davinci/dm365.c |   25 +-
 arch/arm/mach-davinci/dm644x.c|   40 +-
 arch/arm/mach-davinci/dm646x.c|   44 +-
 arch/arm/mach-omap2/Kconfig   |1 -
 drivers/dma/Kconfig   |1 -
 drivers/dma/edma.c| 1631 
 include/linux/platform_data/edma.h|  101 --
 13 files changed, 1566 insertions(+), 2320 deletions(-)
 delete mode 100644 arch/arm/common/edma.c

-- 
2.6.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCHv3 01/11] arm64: Move swapper pagetable definitions

2015-10-14 Thread Mark Rutland

On Wed, Oct 14, 2015 at 12:20:24PM +0100, Suzuki K. Poulose wrote:
> Move the kernel pagetable (both swapper and idmap) definitions
> from the generic asm/page.h to a new file, asm/kernel-pgtable.h.
> 
> This is mostly a cosmetic change, to clean up the asm/page.h to
> get rid of the arch specific details which are not needed by the
> generic code.
> 
> Also renames the symbols to prevent conflicts. e.g,
>   BLOCK_SHIFT => SWAPPER_BLOCK_SHIFT

This sounds sensible to be, so FWIW:

Acked-by: Mark Rutland 

I have a couple of minor nits below, though.

> Cc: Ard Biesheuvel 
> Cc: Mark Rutland 
> Cc: Catalin Marinas 
> Cc: Will Deacon 
> Signed-off-by: Suzuki K. Poulose 
> Reviewed-by: Ard Biesheuvel 
> Tested-by: Ard Biesheuvel 
> ---
>  arch/arm64/include/asm/kernel-pgtable.h |   65 
> +++
>  arch/arm64/include/asm/page.h   |   18 -
>  arch/arm64/kernel/head.S|   37 --
>  arch/arm64/kernel/vmlinux.lds.S |1 +
>  4 files changed, 74 insertions(+), 47 deletions(-)
>  create mode 100644 arch/arm64/include/asm/kernel-pgtable.h
> 
> diff --git a/arch/arm64/include/asm/kernel-pgtable.h 
> b/arch/arm64/include/asm/kernel-pgtable.h
> new file mode 100644
> index 000..622929d
> --- /dev/null
> +++ b/arch/arm64/include/asm/kernel-pgtable.h
> @@ -0,0 +1,65 @@
> +/*
> + * asm/kernel-pgtable.h : Kernel page table mapping

Drop the filename from the comment. It's redundant and ends up being
painful when refactoring and moving things around.

> + * Copyright (C) 2015 ARM Ltd.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program.  If not, see .
> + */
> +
> +#ifndef __ASM_KERNEL_PGTABLE_H
> +#define __ASM_KERNEL_PGTABLE_H
> +
> +/*
> + * The idmap and swapper page tables need some space reserved in the kernel
> + * image. Both require pgd, pud (4 levels only) and pmd tables to (section)
> + * map the kernel. With the 64K page configuration, swapper and idmap need to
> + * map to pte level. The swapper also maps the FDT (see __create_page_tables
> + * for more information). Note that the number of ID map translation levels
> + * could be increased on the fly if system RAM is out of reach for the 
> default
> + * VA range, so 3 pages are reserved in all cases.
> + */
> +#ifdef CONFIG_ARM64_64K_PAGES
> +#define SWAPPER_PGTABLE_LEVELS   (CONFIG_PGTABLE_LEVELS)
> +#else
> +#define SWAPPER_PGTABLE_LEVELS   (CONFIG_PGTABLE_LEVELS - 1)
> +#endif
> +
> +#define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
> +#define IDMAP_DIR_SIZE   (3 * PAGE_SIZE)
> +
> +/* Initial memory map size */
> +#ifdef CONFIG_ARM64_64K_PAGES
> +#define SWAPPER_BLOCK_SHIFT  PAGE_SHIFT
> +#define SWAPPER_BLOCK_SIZE   PAGE_SIZE
> +#define SWAPPER_TABLE_SHIFT  PMD_SHIFT
> +#else
> +#define SWAPPER_BLOCK_SHIFT  SECTION_SHIFT
> +#define SWAPPER_BLOCK_SIZE   SECTION_SIZE
> +#define SWAPPER_TABLE_SHIFT  PUD_SHIFT
> +#endif
> +
> +
> +/*
> + * Initial memory map attributes.
> + */
> +#define SWAPPER_PTE_FLAGSPTE_TYPE_PAGE | PTE_AF | PTE_SHARED
> +#define SWAPPER_PMD_FLAGSPMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S
> +
> +#ifdef CONFIG_ARM64_64K_PAGES
> +#define SWAPPER_MM_MMUFLAGS  PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS
> +#else
> +#define SWAPPER_MM_MMUFLAGS  PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS
> +#endif
> +
> +
> +#endif

It would be nice to have the usual comment here for what this is ending,
i.e.

#endif /* __ASM_KERNEL_PGTABLE_H */

Thanks,
Mark.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] cpufreq, intel_pstate, Fix intel_pstate powersave min_perf_pct value

2015-10-14 Thread Prarit Bhargava

On systems that initialize the intel_pstate driver with the performance
governor, and then switch to the powersave governor will not transition to
lower cpu frequencies until /sys/devices/system/cpu/intel_pstate/min_perf_pct
is set to a low value.

The behavior of governor switching changed after commit a04759924e25
("[cpufreq] intel_pstate: honor user space min_perf_pct override on
 resume").  The commit introduced tracking of performance percentage
changes via sysfs in order to restore userspace changes during
suspend/resume.  The problem occurs because the global values of the newly
introduced max_sysfs_pct and min_sysfs_pct are not lowered on the governor
change and this causes the powersave governor to inherit the performance
governor's settings.

A simple change would have been to reset max_sysfs_pct to 100 and
min_sysfs_pct to 0 on a governor change, which fixes the problem with
governor switching.  However, since we cannot break userspace[1] the fix
is now to give each governor its own limits storage area so that governor
specific changes are tracked.

I successfully tested this by booting with both the performance governor
and the powersave governor by default, and switching between the two
governors (while monitoring /sys/devices/system/cpu/intel_pstate/ values,
and looking at the output of cpupower frequency-info).  Suspend/Resume
testing was performed by Doug Smythies.

[1] Systems which suspend/resume using the unmaintained pm-utils package
will always transition to the performance governor before the suspend and
after the resume.  This means a system using the powersave governor will
go from powersave to performance, then suspend/resume, performance to
powersave.  The simple change during governor changes would have been
overwritten when the governor changed before and after the suspend/resume.
I have submitted https://bugzilla.redhat.com/show_bug.cgi?id=1271225
against Fedora to remove the 94cpufreq file that causes the problem.  It
should be noted that pm-utils is obsoleted with newer versions of systemd.

Cc: Kristen Carlson Accardi 
Cc: "Rafael J. Wysocki" 
Cc: Viresh Kumar 
Cc: linux...@vger.kernel.org
Cc: Doug Smythies 
Signed-off-by: Prarit Bhargava 
---
 drivers/cpufreq/intel_pstate.c |  120 +---
 1 file changed, 75 insertions(+), 45 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 3af9dd7..78b4be5 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -156,7 +156,20 @@ struct perf_limits {
int min_sysfs_pct;
 };
 
-static struct perf_limits limits = {
+static struct perf_limits performance_limits = {
+   .no_turbo = 0,
+   .turbo_disabled = 0,
+   .max_perf_pct = 100,
+   .max_perf = int_tofp(1),
+   .min_perf_pct = 100,
+   .min_perf = int_tofp(1),
+   .max_policy_pct = 100,
+   .max_sysfs_pct = 100,
+   .min_policy_pct = 0,
+   .min_sysfs_pct = 0,
+};
+
+static struct perf_limits powersave_limits = {
.no_turbo = 0,
.turbo_disabled = 0,
.max_perf_pct = 100,
@@ -169,6 +182,12 @@ static struct perf_limits limits = {
.min_sysfs_pct = 0,
 };
 
+#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE
+static struct perf_limits *limits = _limits;
+#else
+static struct perf_limits *limits = _limits;
+#endif
+
 static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
 int deadband, int integral) {
pid->setpoint = setpoint;
@@ -255,7 +274,7 @@ static inline void update_turbo_state(void)
 
cpu = all_cpu_data[0];
rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
-   limits.turbo_disabled =
+   limits->turbo_disabled =
(misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
 }
@@ -274,14 +293,14 @@ static void intel_pstate_hwp_set(void)
 
for_each_online_cpu(cpu) {
rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, );
-   adj_range = limits.min_perf_pct * range / 100;
+   adj_range = limits->min_perf_pct * range / 100;
min = hw_min + adj_range;
value &= ~HWP_MIN_PERF(~0L);
value |= HWP_MIN_PERF(min);
 
-   adj_range = limits.max_perf_pct * range / 100;
+   adj_range = limits->max_perf_pct * range / 100;
max = hw_min + adj_range;
-   if (limits.no_turbo) {
+   if (limits->no_turbo) {
hw_max = HWP_GUARANTEED_PERF(cap);
if (hw_max < max)
max = hw_max;
@@ -350,7 +369,7 @@ static void __init intel_pstate_debug_expose_params(void)
static ssize_t show_##file_name \
(struct kobject *kobj, struct attribute *attr, char *buf)   \
{   \
-

Re: [PATCH 2/5] mtd: nand: omap2: Start dma request before enabling prefetch

2015-10-14 Thread Roger Quadros

On 13/10/15 04:38, Franklin S Cooper Jr wrote:
> The prefetch engine sends a dma request once a FIFO threshold has
> been met. No other requests are received until the previous request
> is handled.
> 
> Starting an edma transfer (dma_async_issue_pending) results in any
> previous event for the dma channel to be cleared. Therefore, starting
> the prefetch engine before initiating the dma transfer may result in
> the prefetch triggering a dma request but instead of it being handled
> it can end up being cleared. This will result in a hang since the code
> will continue to wait for the dma request to complete.
> 
> By initiating the dma request before enabling the prefetch engine this
> race condition is avoided and no dma request are missed/cleared.
> 
> Signed-off-by: Franklin S Cooper Jr 
> ---
>  drivers/mtd/nand/omap2.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
> index 957c32f..94d11de 100644
> --- a/drivers/mtd/nand/omap2.c
> +++ b/drivers/mtd/nand/omap2.c
> @@ -509,6 +509,9 @@ static inline int omap_nand_dma_transfer(struct mtd_info 
> *mtd, void *addr,
>   tx->callback_param = >comp;
>   dmaengine_submit(tx);
>  
> + init_completion(>comp);
> + dma_async_issue_pending(info->dma);
> +
>   /*  configure and start prefetch transfer */
>   ret = omap_prefetch_enable(info->gpmc_cs,
>   PREFETCH_FIFOTHRESHOLD_MAX, 0x1, len, is_write, info);
> @@ -516,9 +519,6 @@ static inline int omap_nand_dma_transfer(struct mtd_info 
> *mtd, void *addr,
>   /* PFPW engine is busy, use cpu copy method */
>   goto out_copy_unmap;
>  
> - init_completion(>comp);
> - dma_async_issue_pending(info->dma);
> -
>   /* setup and start DMA using dma_addr */

Is the above comment misplaced after this change?

>   wait_for_completion(>comp);
>   tim = 0;
> 

cheers,
-roger
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 1/2] hsi: omap_ssi_port: Prevent warning if cawake_gpio is not defined.

2015-10-14 Thread Sebastian Reichel

Hi,

On Wed, Oct 14, 2015 at 01:44:15PM +0300, Roger Quadros wrote:
> The error handling path is broken as cawake_gpio was defined as
> unsigned integer causing the following warnings on boards that don't
> use SSI port and so don't have cawake_gpio defined. e.g. beagleboard C4.
>
> [...]

Thanks, I queued this one together with a

Fixes: b209e047bc743 ("HSI: Introduce OMAP SSI driver")

-- Sebastian


signature.asc
Description: PGP signature

Re: [PATCH] ARM: dts: am437x-gp-evm: Add wakeup interrupt source for pixcir_i2c_ts

2015-10-14 Thread Vignesh R



On 10/14/2015 04:34 PM, Roger Quadros wrote:
> Vignesh,
> 
> On 14/10/15 12:12, Vignesh R wrote:
>>
>>
>> On 10/14/2015 02:16 PM, Roger Quadros wrote:
>>
>>>
>>> On 14/10/15 08:52, Vignesh R wrote:
 On am437x-gp-evm, pixcir_i2c_ts can wakeup the system from lower power
 state via pinctrl and IO daisy chain using generic wakeirq framework.
 With commit 3fffd1283927 ("i2c: allow specifying separate wakeup
 interrupt in device tree") i2c core allows optional wakeirq to be
 specified via device tree. Add wakeup irq entry to enable pixcir_i2c_ts
 to wake the system from low power state.

 Signed-off-by: Vignesh R 
 ---
  arch/arm/boot/dts/am437x-gp-evm.dts | 5 +
  1 file changed, 5 insertions(+)

 diff --git a/arch/arm/boot/dts/am437x-gp-evm.dts 
 b/arch/arm/boot/dts/am437x-gp-evm.dts
 index 22038f21f228..69e93af7df0d 100644
 --- a/arch/arm/boot/dts/am437x-gp-evm.dts
 +++ b/arch/arm/boot/dts/am437x-gp-evm.dts
 @@ -581,8 +581,13 @@
  
attb-gpio = < 22 GPIO_ACTIVE_HIGH>;
  
 +  interrupts-extended = < 22 GPIO_ACTIVE_HIGH>,
 +<_pinmux 0x264>;
>>>
>>> How does this work?
>>>
>>> interrupts-extended property must have
>>> 1) interrupt parent
>>> 2) interrupt number
>>> 3) interrupt flags
>>>
>>> Your change doesn't seem to comply with those requirements.
>>
>> AFAIU, interrupts-extended has two parts: interrupt parent phandle and
>> interrupt specifier.
>> The number of cells in interrupt specifier is determined by
>> interrupt-cells property of interrupt parent node.
> 
> Got it.
> 
>> In above case, gpio3 has interrupt-cells = 2 hence interrupt specifier
>> has interrupt number and interrupt flag field.
> 
> But is GPIO_ACTIVE_HIGH an interrupt flag?

Oops.. I will change it to IRQ_TYPE_NONE as represented in interrupts
property.

> 
>> But in case am43xx_pinmux node, interrupt-cells is 1 hence has no
>> interrupt flag field.
>>
> Understood, thanks. Might be worth adding a comment as to what 0x264 means 
> though.
> 

Will add a one line comment indicating its the offset of gpio3_22
padconf register from am43xx_pinmux base.

> cheers,
> -roger
> 

-- 
Regards
Vignesh
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

< 4 5 6 7 8 9 10 11 12 13 >

801 - 900 of 2330 matches

Mail list logo