Re: [PATCH v3 6/6] coresight: etm4x: Support panic kdump

2018-01-09 Thread Leo Yan
On Tue, Jan 09, 2018 at 01:21:28PM -0700, Mathieu Poirier wrote:
> On Thu, Dec 21, 2017 at 04:20:15PM +0800, Leo Yan wrote:
> > ETMv4 hardware information and configuration needs to be saved as
> > metadata; these metadata should be compatible with tool 'perf' and
> > can be used for tracing data analysis.  ETMv4 usually works as tracer
> > per CPU, we cannot wait to gather ETM info after the CPU has been panic
> > and cannot execute dump operations for itself; so should gather
> > metadata when the corresponding CPU is alive.
> > 
> > Since values in TRCIDR{0, 1, 2, 8} and TRCAUTHSTATUS are read-only and
> > won't change at the runtime.  Those registers value are filled when
> > tracers are instantiated.
> > 
> > The configuration and control registers TRCCONFIGR and TRCTRACEIDR are
> > dynamically configured, we record their value when enabling coresight
> > path.  When operating from sysFS tracer these two registers are recorded
> > in etm4_enable_sysfs() and add kdump node into list, and remove the
> > kdump node in etm4_disable_sysfs().  When operating from perf,
> > etm_setup_aux() adds all tracers to the dump list and etm4_enable_perf()
> > is used to record configuration registers and update dump buffer info,
> > this can avoid unnecessary list addition and deletion operations.
> > Removal of the tracers from the dump list is done in function
> > free_event_data().
> > 
> > Suggested-by: Mathieu Poirier 
> > Signed-off-by: Leo Yan 
> > ---
> >  drivers/hwtracing/coresight/coresight-etm-perf.c | 12 +++-
> >  drivers/hwtracing/coresight/coresight-etm4x.c| 23 
> > +++
> >  drivers/hwtracing/coresight/coresight-etm4x.h| 15 +++
> >  3 files changed, 49 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c 
> > b/drivers/hwtracing/coresight/coresight-etm-perf.c
> > index 8a0ad77..fec779b 100644
> > --- a/drivers/hwtracing/coresight/coresight-etm-perf.c
> > +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
> > @@ -137,6 +137,12 @@ static void free_event_data(struct work_struct *work)
> > }
> >  
> > for_each_cpu(cpu, mask) {
> > +   struct coresight_device *csdev;
> > +
> > +   csdev = per_cpu(csdev_src, cpu);
> > +   if (csdev)
> > +   coresight_kdump_del(csdev);
> > +
> > if (!(IS_ERR_OR_NULL(event_data->path[cpu])))
> > coresight_release_path(event_data->path[cpu]);
> > }
> > @@ -195,7 +201,7 @@ static void etm_free_aux(void *data)
> >  static void *etm_setup_aux(int event_cpu, void **pages,
> >int nr_pages, bool overwrite)
> >  {
> > -   int cpu;
> > +   int cpu, ret;
> > cpumask_t *mask;
> > struct coresight_device *sink;
> > struct etm_event_data *event_data = NULL;
> > @@ -238,6 +244,10 @@ static void *etm_setup_aux(int event_cpu, void **pages,
> > event_data->path[cpu] = coresight_build_path(csdev, sink);
> > if (IS_ERR(event_data->path[cpu]))
> > goto err;
> > +
> > +   ret = coresight_kdump_add(csdev, cpu);
> 
> Aren't you missing the configuration for trcconfigr and trctraceidr?

Ah, should update these two configurations in function
etm4_enable_perf()?

> > +   if (ret)
> > +   goto err;
> > }
> >  
> > if (!sink_ops(sink)->alloc_buffer)
> > diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c 
> > b/drivers/hwtracing/coresight/coresight-etm4x.c
> > index cf364a5..cbde398 100644
> > --- a/drivers/hwtracing/coresight/coresight-etm4x.c
> > +++ b/drivers/hwtracing/coresight/coresight-etm4x.c
> > @@ -258,10 +258,19 @@ static int etm4_enable_perf(struct coresight_device 
> > *csdev,
> >  static int etm4_enable_sysfs(struct coresight_device *csdev)
> >  {
> > struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
> > +   struct etmv4_config *config = >config;
> > +   struct etmv4_metadata *metadata = >metadata;
> > int ret;
> >  
> > spin_lock(>spinlock);
> >  
> > +   /* Update meta data and add into kdump list */
> > +   metadata->trcconfigr = config->cfg;
> > +   metadata->trctraceidr = drvdata->trcid;
> > +
> > +   coresight_kdump_add(csdev, drvdata->cpu);
> > +   coresight_kdump_update(csdev, (char *)metadata, sizeof(*metadata));
> > +
> > /*
> >  * Executing etm4_enable_hw on the cpu whose ETM is being enabled
> >  * ensures that register writes occur when cpu is powered.
> > @@ -384,6 +393,9 @@ static void etm4_disable_sysfs(struct coresight_device 
> > *csdev)
> >  */
> > smp_call_function_single(drvdata->cpu, etm4_disable_hw, drvdata, 1);
> >  
> > +   /* Delete from kdump list */
> > +   coresight_kdump_del(csdev);
> > +
> > spin_unlock(>spinlock);
> > cpus_read_unlock();
> >  
> > @@ -438,6 +450,7 @@ static void etm4_init_arch_data(void *info)
> > u32 etmidr4;
> > u32 etmidr5;

Re: [PATCH v3 3/6] coresight: Support panic kdump functionality

2018-01-09 Thread Leo Yan
On Tue, Jan 09, 2018 at 11:41:26AM -0700, Mathieu Poirier wrote:
> On Thu, Dec 21, 2017 at 04:20:12PM +0800, Leo Yan wrote:
> > After kernel panic happens, coresight has many useful info can be used
> > for analysis.  For example, the trace info from ETB RAM can be used to
> > check the CPU execution flows before crash.  So we can save the tracing
> > data from sink devices, and rely on kdump to save DDR content and uses
> > "crash" tool to extract coresight dumping from vmcore file.
> > 
> > This patch is to add a simple framework to support panic dump
> > functionality; it registers panic notifier, and provide the general APIs
> > {coresight_kdump_add|coresight_kdump_del} as helper functions so any
> > coresight device can add itself into dump list or delete as needed.
> > 
> > This driver provides helper function coresight_kdump_update() to update
> > the dump buffer base address and buffer size.  This function can be used
> > by coresight driver, e.g. it can be used to save ETM meta data info at
> > runtime and these info can be prepared pre panic happening.
> > 
> > When kernel panic happens, the notifier iterates dump list and calls
> > callback function to dump device specific info.  The panic dump is
> > mainly used to dump trace data so we can get to know the execution flow
> > before the panic happens.
> > 
> > Signed-off-by: Leo Yan 
> > ---
> >  drivers/hwtracing/coresight/Kconfig|   9 ++
> >  drivers/hwtracing/coresight/Makefile   |   1 +
> >  .../hwtracing/coresight/coresight-panic-kdump.c| 154 
> > +
> >  drivers/hwtracing/coresight/coresight-priv.h   |  13 ++
> >  include/linux/coresight.h  |   7 +
> >  5 files changed, 184 insertions(+)
> >  create mode 100644 drivers/hwtracing/coresight/coresight-panic-kdump.c
> > 
> > diff --git a/drivers/hwtracing/coresight/Kconfig 
> > b/drivers/hwtracing/coresight/Kconfig
> > index ef9cb3c..4812529 100644
> > --- a/drivers/hwtracing/coresight/Kconfig
> > +++ b/drivers/hwtracing/coresight/Kconfig
> > @@ -103,4 +103,13 @@ config CORESIGHT_CPU_DEBUG
> >   properly, please refer Documentation/trace/coresight-cpu-debug.txt
> >   for detailed description and the example for usage.
> >  
> > +config CORESIGHT_PANIC_KDUMP
> > +   bool "CoreSight Panic Kdump driver"
> > +   depends on ARM || ARM64
> 
> At this time only ETMv4 supports the feature, so it is only ARM64.

Thanks for reviewing, Mathieu.

Will change to only for ARM64.

> > +   help
> > + This driver provides panic kdump functionality for CoreSight
> > + devices.  When a kernel panic happen a device supplied callback 
> > function
> > + is used to save trace data to memory. From there we rely on kdump to 
> > extract
> > + the trace data from kernel dump file.
> > +
> >  endif
> > diff --git a/drivers/hwtracing/coresight/Makefile 
> > b/drivers/hwtracing/coresight/Makefile
> > index 61db9dd..946fe19 100644
> > --- a/drivers/hwtracing/coresight/Makefile
> > +++ b/drivers/hwtracing/coresight/Makefile
> > @@ -18,3 +18,4 @@ obj-$(CONFIG_CORESIGHT_SOURCE_ETM4X) += coresight-etm4x.o 
> > \
> >  obj-$(CONFIG_CORESIGHT_DYNAMIC_REPLICATOR) += 
> > coresight-dynamic-replicator.o
> >  obj-$(CONFIG_CORESIGHT_STM) += coresight-stm.o
> >  obj-$(CONFIG_CORESIGHT_CPU_DEBUG) += coresight-cpu-debug.o
> > +obj-$(CONFIG_CORESIGHT_PANIC_KDUMP) += coresight-panic-kdump.o
> > diff --git a/drivers/hwtracing/coresight/coresight-panic-kdump.c 
> > b/drivers/hwtracing/coresight/coresight-panic-kdump.c
> > new file mode 100644
> > index 000..c21d20b
> > --- /dev/null
> > +++ b/drivers/hwtracing/coresight/coresight-panic-kdump.c
> > @@ -0,0 +1,154 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +// Copyright (c) 2017 Linaro Limited.
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +
> > +#include "coresight-priv.h"
> > +
> > +typedef void (*coresight_cb_t)(void *data);
> > +
> > +/**
> > + * struct coresight_kdump_node - Node information for dump
> > + * @cpu:   The cpu this node is affined to.
> > + * @csdev: Handler for coresight device.
> > + * @buf:   Pointer for dump buffer.
> > + * @buf_size:  Length of dump buffer.
> > + * @list:  Hook to the list.
> > + */
> > +struct coresight_kdump_node {
> > +   int cpu;
> > +   struct coresight_device *csdev;
> > +   char *buf;
> > +   unsigned int buf_size;
> > +   struct list_head list;
> > +};
> > +
> > +static DEFINE_SPINLOCK(coresight_kdump_lock);
> > +static LIST_HEAD(coresight_kdump_list);
> > +static struct notifier_block coresight_kdump_nb;
> > +
> > +int coresight_kdump_update(struct coresight_device *csdev, char *buf,
> > +  unsigned int buf_size)
> > +{
> > +   struct coresight_kdump_node *node = csdev->dump_node;
> > +
> > +   if (!node) {
> > +   dev_err(>dev, "Failed to update dump node.\n");
> > + 

[RFC] doc: fix code snippet build warnings

2018-01-09 Thread Tobin C. Harding
Posting as RFC in the hope that someone knows how to massage sphinx
correctly to fix this patch.

Currently function kernel-doc contains a multi-line code snippet. This
is causing sphinx to emit 5 build warnings

WARNING: Unexpected indentation.
WARNING: Unexpected indentation.
WARNING: Block quote ends without a blank line; unexpected unindent.
WARNING: Block quote ends without a blank line; unexpected unindent.
WARNING: Inline literal start-string without end-string.

And the snippet is not rendering correctly in HTML.

We can stop shpinx complaining by using '::' instead of the currently
used '``' however this still does not render correctly in HTML. The
rendering is [arguably] better but still incorrect. Sphinx renders two
function calls thus:

:c:func:`rcu_read_lock()`;

The rest of the snippet does however have correct spacing.

Use '::' to pre-fix code snippet. Clears build warnings but does not
render correctly.

Signed-off-by: Tobin C. Harding 
---

To view current broken rendering see

https://www.kernel.org/doc/html/latest/core-api/kernel-api.html?highlight=rcu_pointer_handoff#c.rcu_pointer_handoff

 include/linux/rcupdate.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index a6ddc42f87a5..cc10e772e3e9 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -568,7 +568,8 @@ static inline void rcu_preempt_sleep_check(void) { }
  * is handed off from RCU to some other synchronization mechanism, for
  * example, reference counting or locking.  In C11, it would map to
  * kill_dependency().  It could be used as follows:
- * ``
+ * ::
+ *
  * rcu_read_lock();
  * p = rcu_dereference(gp);
  * long_lived = is_long_lived(p);
@@ -579,7 +580,6 @@ static inline void rcu_preempt_sleep_check(void) { }
  * p = rcu_pointer_handoff(p);
  * }
  * rcu_read_unlock();
- *``
  */
 #define rcu_pointer_handoff(p) (p)
 
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: CRNG acronym

2018-01-09 Thread Theodore Ts'o
On Wed, Jan 10, 2018 at 02:47:19AM +0200, Bob Bib wrote:
> Hello,
> 
> just curious,
> what's the official meaning of the "CRNG" acronym (e. g., in [1])?
> 
> Some searching suggests that "C[S]RNG"
> means "cryptographic[-strength] random number generator".
> [1] 
> https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/tree/drivers/char/random.c
> [2] https://lwn.net/Articles/691071/
> [3] https://lwn.net/Articles/660452/
> [4] https://marc.info/?l=linux-kernel=110247024805208=2

CRNG is an acronym for Cryptoraphic Random Number Generator..

Some people will use CSRNG, where the "S" can be "strong" or
"strength".  I prefer CRNG.

- Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


CRNG acronym

2018-01-09 Thread Bob Bib

Hello,

just curious,
what's the official meaning of the "CRNG" acronym (e. g., in [1])?

Some searching suggests that "C[S]RNG"
means "cryptographic[-strength] random number generator".
 
[1] https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/tree/drivers/char/random.c

[2] https://lwn.net/Articles/691071/
[3] https://lwn.net/Articles/660452/
[4] https://marc.info/?l=linux-kernel=110247024805208=2

--
Best wishes,
Bob
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v13 0/7] cgroup-aware OOM killer

2018-01-09 Thread David Rientjes
On Thu, 30 Nov 2017, Andrew Morton wrote:

> > This patchset makes the OOM killer cgroup-aware.
> 
> Thanks, I'll grab these.
> 
> There has been controversy over this patchset, to say the least.  I
> can't say that I followed it closely!  Could those who still have
> reservations please summarise their concerns and hopefully suggest a
> way forward?
> 

Yes, I'll summarize what my concerns have been in the past and what they 
are wrt the patchset as it stands in -mm.  None of them originate from my 
current usecase or anticipated future usecase of the oom killer for 
system-wide or memcg-constrained oom conditions.  They are based purely on 
the patchset's use of an incomplete and unfair heuristic for deciding 
which cgroup to target.

I'll also suggest simple changes to the patchset, which I have in the 
past, that can be made to address all of these concerns.

1. The unfair comparison of the root mem cgroup vs leaf mem cgroups

The patchset uses two different heuristics to compare root and leaf mem 
cgroups and scores them based on number of pages.  For the root mem 
cgroup, it totals the /proc/pid/oom_score of all processes attached: 
that's based on rss, swap, pgtables, and, most importantly, oom_score_adj.  
For leaf mem cgroups, it's based on that memcg's anonymous, unevictable, 
unreclaimable slab, kernel stack, and swap counters.  These can be wildly 
different independent of /proc/pid/oom_score_adj, but the most obvious 
unfairness comes from users who tune oom_score_adj.

An example: start a process that faults 1GB of anonymous memory and leave 
it attached to the root mem cgroup.  Start six more processes that each 
fault 1GB of anonymous memory and attached them to a leaf mem cgroup.  Set 
all processes to have /proc/pid/oom_score_adj of 1000.  System oom kill 
will always kill the 1GB process attached to the root mem cgroup.  It's 
because oom_badness() relies on /proc/pid/oom_score_adj, which is used to 
evaluate the root mem cgroup, and leaf mem cgroups completely disregard 
it.

In this example, the leaf mem cgroup's score is 1,573,044, the number of 
pages for the 6GB of faulted memory.  The root mem cgroup's score is 
12,652,907, eight times larger even though its usage is six times smaller.

This is caused by the patchset disregarding oom_score_adj entirely for 
leaf mem cgroups and relying on it heavily for the root mem cgroup.  It's 
the complete opposite result of what the cgroup aware oom killer 
advertises.

It also works the other way, if a large memory hog is attached to the root 
mem cgroup but has a negative oom_score_adj it is never killed and random 
processes are nuked solely because they happened to be attached to a leaf 
mem cgroup.  This behavior wrt oom_score_adj is completely undocumented, 
so I can't presume that it is either known nor tested.

Solution: compare the root mem cgroup and leaf mem cgroups equally with 
the same criteria by doing hierarchical accounting of usage and 
subtracting from total system usage to find root usage.

2. Evading the oom killer by attaching processes to child cgroups

Any cgroup on the system can attach all their processes to individual 
child cgroups.  This is functionally the same as doing

for i in $(cat cgroup.procs); do mkdir $i; echo $i > $i/cgroup.procs; 
done

without the no internal process constraint introduced with cgroup v2.  All 
child cgroups are evaluated based on their own usage: all anon, 
unevictable, and unreclaimable slab as described previously.  It requires 
an individual cgroup to be the single largest consumer to be targeted by 
the oom killer.

An example: allow users to manage two different mem cgroup hierarchies 
limited to 100GB each.  User A uses 10GB of memory and user B uses 90GB of 
memory in their respective hierarchies.  On a system oom condition, we'd 
expect at least one process from user B's hierarchy would always be oom 
killed with the cgroup aware oom killer.  In fact, the changelog 
explicitly states it solves an issue where "1) There is no fairness 
between containers. A small container with few large processes will be 
chosen over a large one with huge number of small processes."

The opposite becomes true, however, if user B creates child cgroups and 
distributes its processes such that each child cgroup's usage never 
exceeds 10GB of memory.  This can either be done intentionally to 
purposefully have a low cgroup memory footprint to evade the oom killer or 
unintentionally with cgroup v2 to allow those individual processes to be 
constrained by other cgroups in a single hierarchy model.  User A, using 
10% of his memory limit, is always oom killed instead of user B, using 90% 
of his memory limit.

Others have commented its still possible to do this with a per-process 
model if users split their processes into many subprocesses with small 
memory footprints.

Solution: comparing cgroups must be done hierarchically.  Neither user A 
nor user B can evade the oom killer because 

[PATCH linux dev-4.10 0/6] Add support PECI and PECI hwmon drivers

2018-01-09 Thread Jae Hyun Yoo
From: Jae Hyun Yoo 

Hello,

This patch set provides support for PECI of AST2400/2500 which can give us PECI
functionalities such as temperature monitoring, platform manageability,
processor diagnostics and failure analysis. Also provides generic peci.h and
peci_ioctl.h headers to provide compatibility to peci drivers that can be
implemented later e.g. Nuvoton's BMC SoC family.

The misc peci driver can be used as a multi-purpose PECI controller driver which
serializes all PECI transactions that coming from user space and from other
kernel modules. This misc peci driver could be replaced with other BMC chipsets'
implementation if the implementation provide compatible 'peci_ioctl'
inter-module call and ioctl scheme defined in peci.h and peci_ioctl.h files.

The hwmon peci driver implements a generic PECI hwmon feature which is running
with a PECI misc driver supports compatible native PECI command suite for
retrieving temperatures of the CPU package, CPU cores and DIMM components.

Please review.

-Jae

Jae Hyun Yoo (6):
  Documentation: dt-bindings: Add Aspeed PECI
  ARM: dts: aspeed: peci: Add Aspeed PECI
  drivers/misc: Add driver for Aspeed PECI and generic PECI headers
  Documentation: dt-bindings: Add a generic PECI hwmon
  Documentation: hwmon: Add a generic PECI hwmon
  drivers/hwmon: Add a driver for a generic PECI hwmon

 .../devicetree/bindings/hwmon/peci-hwmon.txt   |   33 +
 .../devicetree/bindings/misc/aspeed-peci.txt   |   55 +
 Documentation/hwmon/peci-hwmon |   74 ++
 arch/arm/boot/dts/aspeed-g4.dtsi   |   14 +
 arch/arm/boot/dts/aspeed-g5.dtsi   |   14 +
 drivers/hwmon/Kconfig  |6 +
 drivers/hwmon/Makefile |1 +
 drivers/hwmon/peci-hwmon.c |  953 +
 drivers/misc/Kconfig   |9 +
 drivers/misc/Makefile  |1 +
 drivers/misc/aspeed-peci.c | 1130 
 include/misc/peci.h|   11 +
 include/uapi/linux/Kbuild  |1 +
 include/uapi/linux/peci_ioctl.h|  270 +
 14 files changed, 2572 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/hwmon/peci-hwmon.txt
 create mode 100644 Documentation/devicetree/bindings/misc/aspeed-peci.txt
 create mode 100644 Documentation/hwmon/peci-hwmon
 create mode 100644 drivers/hwmon/peci-hwmon.c
 create mode 100644 drivers/misc/aspeed-peci.c
 create mode 100644 include/misc/peci.h
 create mode 100644 include/uapi/linux/peci_ioctl.h

-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH linux dev-4.10 3/6] drivers/misc: Add driver for Aspeed PECI and generic PECI headers

2018-01-09 Thread Jae Hyun Yoo
This commit adds driver implementation for Aspeed PECI. Also adds
generic peci.h and peci_ioctl.h files to provide compatibility
to peci drivers that can be implemented later e.g. Nuvoton's BMC
SoC family.

Signed-off-by: Jae Hyun Yoo 
---
 drivers/misc/Kconfig|9 +
 drivers/misc/Makefile   |1 +
 drivers/misc/aspeed-peci.c  | 1130 +++
 include/misc/peci.h |   11 +
 include/uapi/linux/Kbuild   |1 +
 include/uapi/linux/peci_ioctl.h |  270 ++
 6 files changed, 1422 insertions(+)
 create mode 100644 drivers/misc/aspeed-peci.c
 create mode 100644 include/misc/peci.h
 create mode 100644 include/uapi/linux/peci_ioctl.h

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 02ffdd1..96e1e04 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -782,6 +782,15 @@ config ASPEED_LPC_SNOOP
  allows the BMC to listen on and save the data written by
  the host to an arbitrary LPC I/O port.
 
+config ASPEED_PECI
+   tristate "Aspeed AST2400/AST2500 PECI support"
+   select CRC8
+   select REGMAP_MMIO
+   depends on ARCH_ASPEED || COMPILE_TEST
+   help
+ Provides a driver for Platform Environment Control Interface (PECI)
+ controller on Aspeed AST2400/AST2500 SoC.
+
 source "drivers/misc/c2port/Kconfig"
 source "drivers/misc/eeprom/Kconfig"
 source "drivers/misc/cb710/Kconfig"
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index ab8af76..8a22455 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -55,6 +55,7 @@ obj-$(CONFIG_CXL_BASE)+= cxl/
 obj-$(CONFIG_PANEL) += panel.o
 obj-$(CONFIG_ASPEED_LPC_CTRL)  += aspeed-lpc-ctrl.o
 obj-$(CONFIG_ASPEED_LPC_SNOOP) += aspeed-lpc-snoop.o
+obj-$(CONFIG_ASPEED_PECI)   += aspeed-peci.o
 
 lkdtm-$(CONFIG_LKDTM)  += lkdtm_core.o
 lkdtm-$(CONFIG_LKDTM)  += lkdtm_bugs.o
diff --git a/drivers/misc/aspeed-peci.c b/drivers/misc/aspeed-peci.c
new file mode 100644
index 000..04fb794
--- /dev/null
+++ b/drivers/misc/aspeed-peci.c
@@ -0,0 +1,1130 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2012-2020 ASPEED Technology Inc.
+// Copyright (c) 2017 Intel Corporation
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define SOC_NAME "aspeed"
+#define DEVICE_NAME "peci"
+
+#define DUMP_DEBUG 0
+
+/* Aspeed PECI Registers */
+#define AST_PECI_CTRL 0x00
+#define AST_PECI_TIMING   0x04
+#define AST_PECI_CMD  0x08
+#define AST_PECI_CMD_CTRL 0x0c
+#define AST_PECI_EXP_FCS  0x10
+#define AST_PECI_CAP_FCS  0x14
+#define AST_PECI_INT_CTRL 0x18
+#define AST_PECI_INT_STS  0x1c
+#define AST_PECI_W_DATA0  0x20
+#define AST_PECI_W_DATA1  0x24
+#define AST_PECI_W_DATA2  0x28
+#define AST_PECI_W_DATA3  0x2c
+#define AST_PECI_R_DATA0  0x30
+#define AST_PECI_R_DATA1  0x34
+#define AST_PECI_R_DATA2  0x38
+#define AST_PECI_R_DATA3  0x3c
+#define AST_PECI_W_DATA4  0x40
+#define AST_PECI_W_DATA5  0x44
+#define AST_PECI_W_DATA6  0x48
+#define AST_PECI_W_DATA7  0x4c
+#define AST_PECI_R_DATA4  0x50
+#define AST_PECI_R_DATA5  0x54
+#define AST_PECI_R_DATA6  0x58
+#define AST_PECI_R_DATA7  0x5c
+
+/* AST_PECI_CTRL - 0x00 : Control Register */
+#define PECI_CTRL_SAMPLING_MASK GENMASK(19, 16)
+#define PECI_CTRL_SAMPLING(x)   ((x << 16) & PECI_CTRL_SAMPLING_MASK)
+#define PECI_CTRL_SAMPLING_GET(x)   ((x & PECI_CTRL_SAMPLING_MASK) >> 16)
+#define PECI_CTRL_READ_MODE_MASKGENMASK(13, 12)
+#define PECI_CTRL_READ_MODE(x)  ((x << 12) & PECI_CTRL_READ_MODE_MASK)
+#define PECI_CTRL_READ_MODE_GET(x)  ((x & PECI_CTRL_READ_MODE_MASK) >> 12)
+#define PECI_CTRL_READ_MODE_COUNT   BIT(12)
+#define PECI_CTRL_READ_MODE_DBG BIT(13)
+#define PECI_CTRL_CLK_SOURCE_MASK   BIT(11)
+#define PECI_CTRL_CLK_SOURCE(x) ((x << 11) & PECI_CTRL_CLK_SOURCE_MASK)
+#define PECI_CTRL_CLK_SOURCE_GET(x) ((x & PECI_CTRL_CLK_SOURCE_MASK) >> 11)
+#define PECI_CTRL_CLK_DIV_MASK  GENMASK(10, 8)
+#define PECI_CTRL_CLK_DIV(x)((x << 8) & PECI_CTRL_CLK_DIV_MASK)
+#define PECI_CTRL_CLK_DIV_GET(x)((x & PECI_CTRL_CLK_DIV_MASK) >> 8)
+#define PECI_CTRL_INVERT_OUTBIT(7)
+#define PECI_CTRL_INVERT_IN BIT(6)
+#define PECI_CTRL_BUS_CONTENT_ENBIT(5)
+#define PECI_CTRL_PECI_EN   BIT(4)
+#define PECI_CTRL_PECI_CLK_EN   BIT(0)
+
+/* AST_PECI_TIMING - 0x04 : Timing Negotiation Register */
+#define PECI_TIMING_MESSAGE_MASK   GENMASK(15, 8)
+#define PECI_TIMING_MESSAGE(x) ((x << 8) & PECI_TIMING_MESSAGE_MASK)
+#define PECI_TIMING_MESSAGE_GET(x) ((x & PECI_TIMING_MESSAGE_MASK) >> 8)
+#define PECI_TIMING_ADDRESS_MASK   GENMASK(7, 0)
+#define PECI_TIMING_ADDRESS(x) (x & PECI_TIMING_ADDRESS_MASK)
+#define PECI_TIMING_ADDRESS_GET(x) (x & PECI_TIMING_ADDRESS_MASK)
+
+/* AST_PECI_CMD - 0x08 : Command 

[PATCH linux dev-4.10 1/6] Documentation: dt-bindings: Add Aspeed PECI

2018-01-09 Thread Jae Hyun Yoo
This commit adds a dt-bindings document for Aspeed PECI.

Signed-off-by: Jae Hyun Yoo 
---
 .../devicetree/bindings/misc/aspeed-peci.txt   | 55 ++
 1 file changed, 55 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/misc/aspeed-peci.txt

diff --git a/Documentation/devicetree/bindings/misc/aspeed-peci.txt 
b/Documentation/devicetree/bindings/misc/aspeed-peci.txt
new file mode 100644
index 000..d277c73
--- /dev/null
+++ b/Documentation/devicetree/bindings/misc/aspeed-peci.txt
@@ -0,0 +1,55 @@
+* ASPEED PECI (Platform Environment Control Interface) misc driver.
+
+Hardware Interfaces:
+- This driver implements support for the ASPEED AST2400/2500 PECI which has the
+  following features:
+   - Directly connected to APB bus
+   - Intel PECI 3.1 compliant (PECI 3.0 for AST2400)
+   - Maximum packet length is 256 bytes (Baseline transmission unit)
+   - Support up to 8 CPUs and 2 domains per CPU
+   - Integrate PECI compliant I/O buffers, can connect to PECI bus directly
+   - Transmit buffer 32 bytes and receive buffer 32 bytes
+
+Required properties:
+- compatible: "aspeed,ast2400-peci" or "aspeed,ast2500-peci"
+   - aspeed,ast2400-peci: Aspeed AST2400 family PECI control interface
+   - aspeed,ast2500-peci: Aspeed AST2500 family PECI control interface
+- reg: Should contain PECI registers location and length
+- interrupts: Should contain PECI interrupt
+- clocks: Should contain clock source. = <_clkin>;
+- clock_frequency: Should contain the operation frequency of PECI controller.
+   187500 ~ 2400
+
+Optional properties:
+- msg-timing-nego: Message timing negotiation period.
+   This value will determine the period of message timing negotiation to be
+   issued by PECI controller. The unit of the programmed value is four
+   times of PECI clock period.
+   0 ~ 255 (default: 1)
+- addr-timing-nego: Address timing negotiation period.
+   This value will determine the period of address timing negotiation to be
+   issued by PECI controller. The unit of the programmed value is four
+   times of PECI clock period.
+   0 ~ 255 (default: 1)
+- rd-sampling-point: Read sampling point selection.
+   The whole period of a bit time will be divided into 16 time frames.
+   This value will determine which time frame this controller will sample
+   PECI signal for data read back. Usually in the middle of a bit time is
+   the best.
+   0 ~ 15 (default: 8)
+- cmd_timeout_ms: Command timeout in units of ms
+   1 ~ 6 (default: 1000)
+
+Example:
+   peci: peci@1e78b000 {
+   compatible = "aspeed,ast2500-peci";
+   reg = <0x1e78b000 0x60>;
+   interrupt-controller;
+   interrupts = <15>;
+   clocks = <_clkin>;
+   clock-frequency = <2400>;
+   msg-timing-nego = <1>;
+   addr-timing-nego = <1>;
+   rd-sampling-point = <8>;
+   cmd-timeout-ms = <1000>;
+   };
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH linux dev-4.10 5/6] Documentation: hwmon: Add a generic PECI hwmon

2018-01-09 Thread Jae Hyun Yoo
From: Jae Hyun Yoo 

This commit add a document for a generic PECI hwmon driver.

Signed-off-by: Jae Hyun Yoo 
---
 Documentation/hwmon/peci-hwmon | 74 ++
 1 file changed, 74 insertions(+)
 create mode 100644 Documentation/hwmon/peci-hwmon

diff --git a/Documentation/hwmon/peci-hwmon b/Documentation/hwmon/peci-hwmon
new file mode 100644
index 000..e0155b5
--- /dev/null
+++ b/Documentation/hwmon/peci-hwmon
@@ -0,0 +1,74 @@
+Kernel driver peci-hwmon
+===
+
+Supported chips:
+   Generic BMC chips provide PECI controller
+
+Author:
+   Jae Hyun Yoo 
+
+
+Hardware Interfaces
+---
+
+This driver uses a PECI misc driver as a controller interface so one of PECI
+misc drivers which provides compatible ioctls has to be enabled.
+
+
+Description
+---
+
+This driver implements a generic PECI hwmon feature which is running with a 
PECI
+controller driver supports native PECI Client Command Suite for retrieving
+temperatures of the CPU package, CPU cores and DIMM components.
+
+All temperature values are given in millidegree Celsius and will be measurable
+only when the target CPU is powered on.
+
+
+sysfs files
+---
+
+temp1_inputProvides current die temperature of the CPU package.
+temp1_max  Provides thermal control temperature of the CPU package
+   which is also known as Tcontrol.
+temp1_crit Provides shutdown temperature of the CPU package which
+   is also known as the maximum processor junction
+   temperature, Tjmax or Tprochot.
+temp1_crit_hystProvides the hysteresis value from Tcontrol to 
Tjmax of
+   the CPU package.
+
+temp2_inputProvides current DTS thermal margin to Tcontrol of the
+   CPU package. Value 0 means it reaches to Tcontrol
+   temperature. Sub-zero value means the die temperature
+   goes across Tconrtol to Tjmax.
+temp2_min  Provides the minimum DTS thermal margin to Tcontrol of
+   the CPU package.
+temp2_lcritProvides the value when the CPU package temperature
+   reaches to Tjmax.
+
+temp3_inputProvides current Tcontrol temperature of the CPU
+   package which is also known as Fan Temperature target.
+   Indicates the relative value from thermal monitor trip
+   temperature at which fans should be engaged.
+temp3_crit Provides Tcontrol critical value of the CPU package
+   which is same to Tjmax.
+
+temp4_inputProvides current Tthrottle temperature of the CPU
+   package. Used for throttling temperature. If this value
+   is allowed and lower than Tjmax - the throttle will
+   occur and reported at lower than Tjmax.
+
+temp[100-127]_inputProvides current core temperature.
+temp[100-127]_max  Provides thermal control temperature of the core.
+temp[100-127]_crit Provides shutdown temperature of the core.
+temp[100-127]_crit_hystProvides the hysteresis value from Tcontrol to 
Tjmax of
+   the core.
+
+Note:
+   Core temperature group will be appeared when probing the driver if CPU
+   is online or when the first reading on other attr happens because it
+   needs cpu info reading. The number of generated core attrs depends on
+   the number of cores of the cpu package.
+
+temp[200-215]_inputProvides current temperature of the DDR DIMM.
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH linux dev-4.10 4/6] Documentation: dt-bindings: Add a generic PECI hwmon

2018-01-09 Thread Jae Hyun Yoo
This commit add a dt-bindings document for a generic PECI hwmon
driver.

Signed-off-by: Jae Hyun Yoo 
---
 .../devicetree/bindings/hwmon/peci-hwmon.txt   | 33 ++
 1 file changed, 33 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/hwmon/peci-hwmon.txt

diff --git a/Documentation/devicetree/bindings/hwmon/peci-hwmon.txt 
b/Documentation/devicetree/bindings/hwmon/peci-hwmon.txt
new file mode 100644
index 000..20b86f5
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/peci-hwmon.txt
@@ -0,0 +1,33 @@
+* Generic PECI (Platform Environment Control Interface) hwmon driver.
+
+Dependency:
+- This driver uses a PECI misc driver as a controller interface so one of PECI
+  misc drivers which provides compatible ioctls has to be enabled.
+
+Required properties:
+- compatible: "peci-hwmon"
+- cpu-id: Should contain CPU socket ID
+   - 0 ~ 7
+
+Optional properties:
+- show-core: If this protperty is defined, core tmeperature attrubites will be
+enumerated.
+- dimm-nums: Should contain the number of DIMM slots that attached to each CPU
+which is indicated by cpu-id.
+   0 ~ 16 (default: 16)
+In case of 0, DIMM temperature attrubites will not be enumerated.
+
+Example:
+   peci-hwmon0 {
+   compatible = "peci-hwmon";
+   cpu-id = <0>;
+   show-core;
+   dimm-nums = <16>;
+   };
+
+   peci-hwmon1 {
+   compatible = "peci-hwmon";
+   cpu-id = <1>;
+   show-core;
+   dimm-nums = <16>;
+   };
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH linux dev-4.10 6/6] drivers/hwmon: Add a driver for a generic PECI hwmon

2018-01-09 Thread Jae Hyun Yoo
This commit adds driver implementation for a generic PECI hwmon.

Signed-off-by: Jae Hyun Yoo 
---
 drivers/hwmon/Kconfig  |   6 +
 drivers/hwmon/Makefile |   1 +
 drivers/hwmon/peci-hwmon.c | 953 +
 3 files changed, 960 insertions(+)
 create mode 100644 drivers/hwmon/peci-hwmon.c

diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 9256dd0..3a62c60 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -1234,6 +1234,12 @@ config SENSORS_NCT7904
  This driver can also be built as a module.  If so, the module
  will be called nct7904.
 
+config SENSORS_PECI_HWMON
+   tristate "PECI hwmon support"
+   depends on ASPEED_PECI
+   help
+ If you say yes here you get support for the generic PECI hwmon driver.
+
 config SENSORS_NSA320
tristate "ZyXEL NSA320 and compatible fan speed and temperature sensors"
depends on GPIOLIB && OF
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 98000fc..41d43a5 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -131,6 +131,7 @@ obj-$(CONFIG_SENSORS_NCT7802)   += nct7802.o
 obj-$(CONFIG_SENSORS_NCT7904)  += nct7904.o
 obj-$(CONFIG_SENSORS_NSA320)   += nsa320-hwmon.o
 obj-$(CONFIG_SENSORS_NTC_THERMISTOR)   += ntc_thermistor.o
+obj-$(CONFIG_SENSORS_PECI_HWMON)   += peci-hwmon.o
 obj-$(CONFIG_SENSORS_PC87360)  += pc87360.o
 obj-$(CONFIG_SENSORS_PC87427)  += pc87427.o
 obj-$(CONFIG_SENSORS_PCF8591)  += pcf8591.o
diff --git a/drivers/hwmon/peci-hwmon.c b/drivers/hwmon/peci-hwmon.c
new file mode 100644
index 000..2d2a288
--- /dev/null
+++ b/drivers/hwmon/peci-hwmon.c
@@ -0,0 +1,953 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Intel Corporation
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define DEVICE_NAME "peci-hwmon"
+#define HWMON_NAME "peci_hwmon"
+
+#define CPU_ID_MAX   8   /* Max CPU number configured by socket ID */
+#define DIMM_NUMS_MAX16  /* Max DIMM numbers (channel ranks x 2) */
+#define CORE_NUMS_MAX28  /* Max core numbers (max on SKX Platinum) */
+#define TEMP_TYPE_PECI   6   /* Sensor type 6: Intel PECI */
+#define CORE_INDEX_OFFSET100 /* sysfs filename start offset for core temp 
*/
+#define DIMM_INDEX_OFFSET200 /* sysfs filename start offset for DIMM temp 
*/
+#define TEMP_NAME_HEADER_LEN 4   /* sysfs temp type header length */
+#define OF_DIMM_NUMS_DEFAULT 16  /* default dimm-nums setting */
+
+#define CORE_TEMP_ATTRS  5
+#define DIMM_TEMP_ATTRS  2
+#define ATTR_NAME_LEN24
+
+#define UPDATE_INTERVAL_MIN  HZ
+
+enum sign_t {
+   POS,
+   NEG
+};
+
+struct cpuinfo_t {
+   bool valid;
+   u32  dib;
+   u8   cpuid;
+   u8   platform_id;
+   u32  microcode;
+   u8   logical_thread_nums;
+};
+
+struct temp_data_t {
+   bool valid;
+   s32  value;
+   unsigned long last_updated;
+};
+
+struct temp_group_t {
+   struct temp_data_t tjmax;
+   struct temp_data_t tcontrol;
+   struct temp_data_t tthrottle;
+   struct temp_data_t dts_margin;
+   struct temp_data_t die;
+   struct temp_data_t core[CORE_NUMS_MAX];
+   struct temp_data_t dimm[DIMM_NUMS_MAX];
+};
+
+struct core_temp_attr_group_t {
+   struct sensor_device_attribute sd_attrs[CORE_NUMS_MAX][CORE_TEMP_ATTRS];
+   char attr_name[CORE_NUMS_MAX][CORE_TEMP_ATTRS][ATTR_NAME_LEN];
+   struct attribute *attrs[CORE_NUMS_MAX][CORE_TEMP_ATTRS + 1];
+   struct attribute_group attr_group[CORE_NUMS_MAX];
+};
+
+struct dimm_temp_attr_group_t {
+   struct sensor_device_attribute sd_attrs[DIMM_NUMS_MAX][DIMM_TEMP_ATTRS];
+   char attr_name[DIMM_NUMS_MAX][DIMM_TEMP_ATTRS][ATTR_NAME_LEN];
+   struct attribute *attrs[DIMM_NUMS_MAX][DIMM_TEMP_ATTRS + 1];
+   struct attribute_group attr_group[DIMM_NUMS_MAX];
+};
+
+struct peci_hwmon {
+   struct device *dev;
+   struct device *hwmon_dev;
+   char name[NAME_MAX];
+   const struct attribute_group **groups;
+   struct cpuinfo_t cpuinfo;
+   struct temp_group_t temp;
+   u32 cpu_id;
+   bool show_core;
+   u32 core_nums;
+   u32 dimm_nums;
+   atomic_t core_group_created;
+   struct core_temp_attr_group_t core;
+   struct dimm_temp_attr_group_t dimm;
+};
+
+enum label_t {
+   L_DIE,
+   L_DTS,
+   L_TCONTROL,
+   L_TTHROTTLE,
+   L_MAX
+};
+
+static const char *peci_label[L_MAX] = {
+   "Die temperature\n",
+   "DTS thermal margin to Tcontrol\n",
+   "Tcontrol temperature\n",
+   "Tthrottle temperature\n",
+};
+
+static DEFINE_MUTEX(peci_hwmon_lock);
+
+static int create_core_temp_group(struct peci_hwmon *priv, int core_no);
+
+
+static int xfer_peci_msg(int cmd, void *pmsg)
+{
+   int rc;
+
+   mutex_lock(_hwmon_lock);
+   rc = 

[PATCH linux dev-4.10 2/6] ARM: dts: aspeed: peci: Add Aspeed PECI

2018-01-09 Thread Jae Hyun Yoo
This commit adds Aspeed PECI node into aspeed-g4 and aspeed-g5.

Signed-off-by: Jae Hyun Yoo 
---
 arch/arm/boot/dts/aspeed-g4.dtsi | 14 ++
 arch/arm/boot/dts/aspeed-g5.dtsi | 14 ++
 2 files changed, 28 insertions(+)

diff --git a/arch/arm/boot/dts/aspeed-g4.dtsi b/arch/arm/boot/dts/aspeed-g4.dtsi
index b82ebef..7ecc7b2 100644
--- a/arch/arm/boot/dts/aspeed-g4.dtsi
+++ b/arch/arm/boot/dts/aspeed-g4.dtsi
@@ -238,6 +238,20 @@
clocks = <_hpll>;
};
 
+   peci: peci@1e78b000 {
+   compatible = "aspeed,ast2400-peci";
+   reg = <0x1e78b000 0x60>;
+   interrupt-controller;
+   interrupts = <15>;
+   clocks = <_clkin>;
+   clock-frequency = <2400>;
+   msg-timing-nego = <1>;
+   addr-timing-nego = <1>;
+   rd-sampling-point = <8>;
+   cmd-timeout-ms = <1000>;
+   status = "disabled";
+   };
+
sgpio: gpio@0x1e780200 {
#gpio-cells = <2>;
gpio-controller;
diff --git a/arch/arm/boot/dts/aspeed-g5.dtsi b/arch/arm/boot/dts/aspeed-g5.dtsi
index ba3607c..b4e8d51 100644
--- a/arch/arm/boot/dts/aspeed-g5.dtsi
+++ b/arch/arm/boot/dts/aspeed-g5.dtsi
@@ -289,6 +289,20 @@
clocks = <_hpll>;
};
 
+   peci: peci@1e78b000 {
+   compatible = "aspeed,ast2500-peci";
+   reg = <0x1e78b000 0x60>;
+   interrupt-controller;
+   interrupts = <15>;
+   clocks = <_clkin>;
+   clock-frequency = <2400>;
+   msg-timing-nego = <1>;
+   addr-timing-nego = <1>;
+   rd-sampling-point = <8>;
+   cmd-timeout-ms = <1000>;
+   status = "disabled";
+   };
+
timer: timer@1e782000 {
compatible = "aspeed,ast2400-timer";
reg = <0x1e782000 0x90>;
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v6 00/11] Intel SGX Driver

2018-01-09 Thread Dr. Greg Wettstein
On Jan 9,  4:25pm, Jarkko Sakkinen wrote:
} Subject: Re: [PATCH v6 00/11] Intel SGX Driver

Good afternoon I hope the week is going well for everyone.

In order to minimize spamming mailboxes with two mails I'm
incorporating a reply to Jarkko's second e-mail on the Memory
Encryption Engine below as well, since the issues are all related.

> On Thu, Jan 04, 2018 at 03:06:43AM -0600, Dr. Greg Wettstein wrote:
> > If we are talking about the issues motivating the KPTI work I don't
> > have any useful information beyond what is raging through the industry
> > right now.
> > 
> > With respect to SGX, the issues giving rise to KPTI are characteristic
> > of what this technology is designed to address.  The technical 'news'
> > sites, which are even more of an abomination then usual with this
> > issue, are talking about privileged information such as credentials,
> > passwords et.al being leaked by this vulnerability.
> > 
> > Data committed to enclaves are only accessible by the enclave, even
> > the kernel, by definition, can't access the memory.  Given current
> > events that is an arguably useful behavior.

> Exactly. You could think adversary using meltdown leak utilizing
> malware as having same capabilities as peripheral connected to a
> bus, which we can defend against with SGX.

I believe caution needs to be applied to these statements

Since we design high assurance computing devices that use SGX to
protect our autonomous introspection engine, we obviously have very
significant concerns regarding whether the SGX security guarantees are
still operative in the face of these micro-architectural probing
attacks.  Absent official guidance, we have been pouring over the SGX
architectural documents for a week in order to develop risk guidance.

Based on that review, our conclusion was that there was nothing
inherent in the SGX architectural model that implies protection
against confidentiality losses through micro-architectural side
channel inspection.  Our conclusion was reinforced by a group in
London which has reportedly demonstrated the effectiveness of the
conditional branch misprediction exploit against data processed inside
of an enclave.

We have not yet verified the exploit in our lab, but given our
architectural review there would seem to be no reason why it shouldn't
work.  I posted a note to the SGX developer's forum early this morning
with a summary of our analysis but haven't received any responses.

To 'wit in summary.

In this attack scenario, the potential lack of confidentiality inside
of an enclave is the same as if the code was running in unprotected
memory space.  The MM{U,E} infrastructure is servicing micro-op
resource requests for instructions inside of an enclave, just as it
would normally do in untrusted space.  As a result, code running in an
enclave induces cache state changes which can be externally probed,
ie. the effects of a forced branch mispredict on cache state are the
same if the code executes inside of an enclave as if it were in
untrusted memory.

As I noted in my post to the SGX forum, this would be really
interesting if it could be done by an arbitrary process against an
enclave.  As the sample code demonstrates however, the exploit binary
has to be able to invoke at last two ECALL's (invocation of functions
in trusted space) in order to carry out the attack.  This is somewhat
analogous to an exploit where a process is able to attack its own
memory map.

With respect to the other mail:

> Everything going out of L1 gets encrypted. This is done to defend
> against peripheral like adversaries and should work also against
> meltdown.

I don't believe this is an architecturally correct assertion.  The
encryption/decryption occurs at the 'bottom' of the cache heirarchy.

Based on Shay Gueron's paper, which describes the Memory Encryption
Engine (MEE) and its security characteristics and proofs, the MEE acts
as an extension of the memory controller and mediates CACHE<->DRAM
traffic to the Enclave Page Cache (EPC), ie, the protected data
region.  It is responsible for encrypting and decrypting page data as
well as the generation of the tags which are used to populate the
Merkle integrity tree.

As I mentioned in a previous mail, the MEE is responsible for emitting
the 'drop and lock' verification signal which locks the memory
controller if a memory integrity check fails.  This is to support a
fundamental design tenant of the architecture that no unverified data
reaches the caches.

Based on this I believe all of the data in the caches is in plaintext,
not just from L1 upward.  So by inference, speculative execution is
able to induce the population of the caches with unencrypted data and
act on those results.  If this were not the case it would be difficult
to understand how the demonstrated branch mispredict attack could be
successful.

With respect to protecting access to memory, the SGX modified Page
Miss Handler (PMH) is designed to deny the final population of a TLB
slot 

Protecting code integrity with PGP (kernel developer version)

2018-01-09 Thread Konstantin Ryabitsev

Hi, all:

I would like to adapt this document to be more specific to kernel 
development:


 https://github.com/lfit/itpol/blob/master/protecting-code-integrity.md

This stems from many back-and-forth conversations with kernel devs, and 
I believe many would benefit from such guide, especially since the more 
generic version in the above repository does not directly apply.


Question is:

Does such document belong with the rest of the kernel docs in the tree, 
or should it remain fully external? I'll be happy to port it to RST if 
you think it should live alongside other documents like coding 
standards.



-K'
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] docs: add index entry for networking/msg_zerocopy

2018-01-09 Thread Tobin C. Harding
On Tue, Jan 09, 2018 at 10:58:23AM -0700, Jonathan Corbet wrote:
> On Sat,  6 Jan 2018 12:30:37 +1100
> "Tobin C. Harding"  wrote:
> 
> > Currently msg_zerocopy is not included in any toctree. Sphinx emits a
> > build warning to this effect. The other three rst files in
> > Documentation/networking are all indexed. We can add msg_zerocopy to the
> > toctree to enable navigation of the document via HTML kernel docs.
> > 
> > Add msg_zerocopy to the networking/ toctree.
> 
> We had a bit of a race condition here, and this fix was sent in by two
> people.  Dave Miller took the other one, so I'll drop this one.  Sorry
> about that - thanks for the fix!

No worries, just good to see it fixed - one less docs build warning.

thanks,
Tobin.
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 6/6] coresight: etm4x: Support panic kdump

2018-01-09 Thread Mathieu Poirier
On Thu, Dec 21, 2017 at 04:20:15PM +0800, Leo Yan wrote:
> ETMv4 hardware information and configuration needs to be saved as
> metadata; these metadata should be compatible with tool 'perf' and
> can be used for tracing data analysis.  ETMv4 usually works as tracer
> per CPU, we cannot wait to gather ETM info after the CPU has been panic
> and cannot execute dump operations for itself; so should gather
> metadata when the corresponding CPU is alive.
> 
> Since values in TRCIDR{0, 1, 2, 8} and TRCAUTHSTATUS are read-only and
> won't change at the runtime.  Those registers value are filled when
> tracers are instantiated.
> 
> The configuration and control registers TRCCONFIGR and TRCTRACEIDR are
> dynamically configured, we record their value when enabling coresight
> path.  When operating from sysFS tracer these two registers are recorded
> in etm4_enable_sysfs() and add kdump node into list, and remove the
> kdump node in etm4_disable_sysfs().  When operating from perf,
> etm_setup_aux() adds all tracers to the dump list and etm4_enable_perf()
> is used to record configuration registers and update dump buffer info,
> this can avoid unnecessary list addition and deletion operations.
> Removal of the tracers from the dump list is done in function
> free_event_data().
> 
> Suggested-by: Mathieu Poirier 
> Signed-off-by: Leo Yan 
> ---
>  drivers/hwtracing/coresight/coresight-etm-perf.c | 12 +++-
>  drivers/hwtracing/coresight/coresight-etm4x.c| 23 +++
>  drivers/hwtracing/coresight/coresight-etm4x.h| 15 +++
>  3 files changed, 49 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c 
> b/drivers/hwtracing/coresight/coresight-etm-perf.c
> index 8a0ad77..fec779b 100644
> --- a/drivers/hwtracing/coresight/coresight-etm-perf.c
> +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
> @@ -137,6 +137,12 @@ static void free_event_data(struct work_struct *work)
>   }
>  
>   for_each_cpu(cpu, mask) {
> + struct coresight_device *csdev;
> +
> + csdev = per_cpu(csdev_src, cpu);
> + if (csdev)
> + coresight_kdump_del(csdev);
> +
>   if (!(IS_ERR_OR_NULL(event_data->path[cpu])))
>   coresight_release_path(event_data->path[cpu]);
>   }
> @@ -195,7 +201,7 @@ static void etm_free_aux(void *data)
>  static void *etm_setup_aux(int event_cpu, void **pages,
>  int nr_pages, bool overwrite)
>  {
> - int cpu;
> + int cpu, ret;
>   cpumask_t *mask;
>   struct coresight_device *sink;
>   struct etm_event_data *event_data = NULL;
> @@ -238,6 +244,10 @@ static void *etm_setup_aux(int event_cpu, void **pages,
>   event_data->path[cpu] = coresight_build_path(csdev, sink);
>   if (IS_ERR(event_data->path[cpu]))
>   goto err;
> +
> + ret = coresight_kdump_add(csdev, cpu);

Aren't you missing the configuration for trcconfigr and trctraceidr?

> + if (ret)
> + goto err;
>   }
>  
>   if (!sink_ops(sink)->alloc_buffer)
> diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c 
> b/drivers/hwtracing/coresight/coresight-etm4x.c
> index cf364a5..cbde398 100644
> --- a/drivers/hwtracing/coresight/coresight-etm4x.c
> +++ b/drivers/hwtracing/coresight/coresight-etm4x.c
> @@ -258,10 +258,19 @@ static int etm4_enable_perf(struct coresight_device 
> *csdev,
>  static int etm4_enable_sysfs(struct coresight_device *csdev)
>  {
>   struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
> + struct etmv4_config *config = >config;
> + struct etmv4_metadata *metadata = >metadata;
>   int ret;
>  
>   spin_lock(>spinlock);
>  
> + /* Update meta data and add into kdump list */
> + metadata->trcconfigr = config->cfg;
> + metadata->trctraceidr = drvdata->trcid;
> +
> + coresight_kdump_add(csdev, drvdata->cpu);
> + coresight_kdump_update(csdev, (char *)metadata, sizeof(*metadata));
> +
>   /*
>* Executing etm4_enable_hw on the cpu whose ETM is being enabled
>* ensures that register writes occur when cpu is powered.
> @@ -384,6 +393,9 @@ static void etm4_disable_sysfs(struct coresight_device 
> *csdev)
>*/
>   smp_call_function_single(drvdata->cpu, etm4_disable_hw, drvdata, 1);
>  
> + /* Delete from kdump list */
> + coresight_kdump_del(csdev);
> +
>   spin_unlock(>spinlock);
>   cpus_read_unlock();
>  
> @@ -438,6 +450,7 @@ static void etm4_init_arch_data(void *info)
>   u32 etmidr4;
>   u32 etmidr5;
>   struct etmv4_drvdata *drvdata = info;
> + struct etmv4_metadata *metadata = >metadata;
>  
>   /* Make sure all registers are accessible */
>   etm4_os_unlock(drvdata);
> @@ -590,6 +603,16 @@ static void etm4_init_arch_data(void *info)
>   

Re: [PATCH v3 3/6] coresight: Support panic kdump functionality

2018-01-09 Thread Mathieu Poirier
On Thu, Dec 21, 2017 at 04:20:12PM +0800, Leo Yan wrote:
> After kernel panic happens, coresight has many useful info can be used
> for analysis.  For example, the trace info from ETB RAM can be used to
> check the CPU execution flows before crash.  So we can save the tracing
> data from sink devices, and rely on kdump to save DDR content and uses
> "crash" tool to extract coresight dumping from vmcore file.
> 
> This patch is to add a simple framework to support panic dump
> functionality; it registers panic notifier, and provide the general APIs
> {coresight_kdump_add|coresight_kdump_del} as helper functions so any
> coresight device can add itself into dump list or delete as needed.
> 
> This driver provides helper function coresight_kdump_update() to update
> the dump buffer base address and buffer size.  This function can be used
> by coresight driver, e.g. it can be used to save ETM meta data info at
> runtime and these info can be prepared pre panic happening.
> 
> When kernel panic happens, the notifier iterates dump list and calls
> callback function to dump device specific info.  The panic dump is
> mainly used to dump trace data so we can get to know the execution flow
> before the panic happens.
> 
> Signed-off-by: Leo Yan 
> ---
>  drivers/hwtracing/coresight/Kconfig|   9 ++
>  drivers/hwtracing/coresight/Makefile   |   1 +
>  .../hwtracing/coresight/coresight-panic-kdump.c| 154 
> +
>  drivers/hwtracing/coresight/coresight-priv.h   |  13 ++
>  include/linux/coresight.h  |   7 +
>  5 files changed, 184 insertions(+)
>  create mode 100644 drivers/hwtracing/coresight/coresight-panic-kdump.c
> 
> diff --git a/drivers/hwtracing/coresight/Kconfig 
> b/drivers/hwtracing/coresight/Kconfig
> index ef9cb3c..4812529 100644
> --- a/drivers/hwtracing/coresight/Kconfig
> +++ b/drivers/hwtracing/coresight/Kconfig
> @@ -103,4 +103,13 @@ config CORESIGHT_CPU_DEBUG
> properly, please refer Documentation/trace/coresight-cpu-debug.txt
> for detailed description and the example for usage.
>  
> +config CORESIGHT_PANIC_KDUMP
> + bool "CoreSight Panic Kdump driver"
> + depends on ARM || ARM64

At this time only ETMv4 supports the feature, so it is only ARM64.

> + help
> +   This driver provides panic kdump functionality for CoreSight
> +   devices.  When a kernel panic happen a device supplied callback 
> function
> +   is used to save trace data to memory. From there we rely on kdump to 
> extract
> +   the trace data from kernel dump file.
> +
>  endif
> diff --git a/drivers/hwtracing/coresight/Makefile 
> b/drivers/hwtracing/coresight/Makefile
> index 61db9dd..946fe19 100644
> --- a/drivers/hwtracing/coresight/Makefile
> +++ b/drivers/hwtracing/coresight/Makefile
> @@ -18,3 +18,4 @@ obj-$(CONFIG_CORESIGHT_SOURCE_ETM4X) += coresight-etm4x.o \
>  obj-$(CONFIG_CORESIGHT_DYNAMIC_REPLICATOR) += coresight-dynamic-replicator.o
>  obj-$(CONFIG_CORESIGHT_STM) += coresight-stm.o
>  obj-$(CONFIG_CORESIGHT_CPU_DEBUG) += coresight-cpu-debug.o
> +obj-$(CONFIG_CORESIGHT_PANIC_KDUMP) += coresight-panic-kdump.o
> diff --git a/drivers/hwtracing/coresight/coresight-panic-kdump.c 
> b/drivers/hwtracing/coresight/coresight-panic-kdump.c
> new file mode 100644
> index 000..c21d20b
> --- /dev/null
> +++ b/drivers/hwtracing/coresight/coresight-panic-kdump.c
> @@ -0,0 +1,154 @@
> +// SPDX-License-Identifier: GPL-2.0
> +// Copyright (c) 2017 Linaro Limited.
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include "coresight-priv.h"
> +
> +typedef void (*coresight_cb_t)(void *data);
> +
> +/**
> + * struct coresight_kdump_node - Node information for dump
> + * @cpu: The cpu this node is affined to.
> + * @csdev:   Handler for coresight device.
> + * @buf: Pointer for dump buffer.
> + * @buf_size:Length of dump buffer.
> + * @list:Hook to the list.
> + */
> +struct coresight_kdump_node {
> + int cpu;
> + struct coresight_device *csdev;
> + char *buf;
> + unsigned int buf_size;
> + struct list_head list;
> +};
> +
> +static DEFINE_SPINLOCK(coresight_kdump_lock);
> +static LIST_HEAD(coresight_kdump_list);
> +static struct notifier_block coresight_kdump_nb;
> +
> +int coresight_kdump_update(struct coresight_device *csdev, char *buf,
> +unsigned int buf_size)
> +{
> + struct coresight_kdump_node *node = csdev->dump_node;
> +
> + if (!node) {
> + dev_err(>dev, "Failed to update dump node.\n");
> + return -EINVAL;
> + }
> +
> + node->buf = buf;
> + node->buf_size = buf_size;
> + return 0;
> +}
> +
> +int coresight_kdump_add(struct coresight_device *csdev, int cpu)
> +{
> + struct coresight_kdump_node *node;
> + unsigned long flags;
> +
> + node = kzalloc(sizeof(*node), GFP_KERNEL);
> + 

Re: [PATCH RFC 6/7] serial: Add device tree bindings for GENI based UART Controller

2018-01-09 Thread Karthik Ramasubramanian



On 1/2/2018 8:55 AM, Rob Herring wrote:

On Wed, Dec 27, 2017 at 09:27:25AM -0700, Karthikeyan Ramasubramanian wrote:

Add device tree binding support for GENI based UART Controller in the
QUP Wrapper.

Signed-off-by: Karthikeyan Ramasubramanian 
Signed-off-by: Girish Mahadevan 
---
  .../devicetree/bindings/serial/qcom,geni-uart.txt  | 31 ++
  1 file changed, 31 insertions(+)
  create mode 100644 Documentation/devicetree/bindings/serial/qcom,geni-uart.txt

diff --git a/Documentation/devicetree/bindings/serial/qcom,geni-uart.txt 
b/Documentation/devicetree/bindings/serial/qcom,geni-uart.txt
new file mode 100644
index 000..e60ec6a
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/qcom,geni-uart.txt
@@ -0,0 +1,31 @@
+Qualcomm Technologies Inc. GENI based Serial UART Controller driver
+
+This serial UART driver supports console use-cases. This driver is meant
+only for Generic Interface (GENI) based Qualcomm Universal Peripheral (QUP)
+cores and isn't backwards compatible.
+
+Required properties:
+- compatible: should contain "qcom,geni-uart, qcom,geni-console"


Is console different programming model or just how you are using the
h/w? for the latter, drop it as we have stdout-path to select a console.
The console programming model is different from a regular UART port and 
hence the compatible field contains console in it.



+- reg: Should contain UART register location and length.
+- interrupts: Should contain UART core interrupts.
+- clocks: clocks needed for UART, includes the core and AHB clock.
+- pinctrl-names/pinctrl-0/1: The GPIOs assigned to this core. The names
+  Should be "active" and "sleep" for the pin confuguration when core is active
+  or when entering sleep state.
+- qcom,wrapper-core: Wrapper QUP core containing this UART controller.
+
+Example:
+qup_uart11: qcom,qup_uart@0xa88000 {


Use generic node names and no '0x':

serial@a88000

I will update as per the recommendation.



+   compatible = "qcom,geni-uart";
+   reg = <0xa88000 0x7000>;
+   reg-names = "se_phys";
+   clock-names = "se-clk", "m-ahb", "s-ahb";


Not documented.

I will add the documentation for the missing element.



+   clocks = <_gcc GCC_QUPV3_WRAP0_S0_CLK>,
+   <_gcc GCC_QUPV3_WRAP_0_M_AHB_CLK>,
+   <_gcc GCC_QUPV3_WRAP_0_S_AHB_CLK>;
+   pinctrl-names = "default", "sleep";
+   pinctrl-0 = <_1_uart_3_active>;
+   pinctrl-1 = <_1_uart_3_sleep>;
+   interrupts = <0 355 0>;
+   qcom,wrapper-core = <_0>;
+};
--
Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project


--
To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Regards,
Karthik.
--
Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] docs: add index entry for networking/msg_zerocopy

2018-01-09 Thread Jonathan Corbet
On Sat,  6 Jan 2018 12:30:37 +1100
"Tobin C. Harding"  wrote:

> Currently msg_zerocopy is not included in any toctree. Sphinx emits a
> build warning to this effect. The other three rst files in
> Documentation/networking are all indexed. We can add msg_zerocopy to the
> toctree to enable navigation of the document via HTML kernel docs.
> 
> Add msg_zerocopy to the networking/ toctree.

We had a bit of a race condition here, and this fix was sent in by two
people.  Dave Miller took the other one, so I'll drop this one.  Sorry
about that - thanks for the fix!

jon
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Documentation: infiniband: move sysfs interface to ABI

2018-01-09 Thread Aishwarya Pant
On Tue, Jan 09, 2018 at 10:28:20AM -0700, Jason Gunthorpe wrote:
> On Mon, Jan 08, 2018 at 02:23:42PM +0530, Aishwarya Pant wrote:
> > Hi
> > 
> > In Documentation/infiniband/sysfs.txt, there is a description of the 
> > infiniband
> > sysfs interface and there also exists
> > Documentation/ABI/testing/sysfs-class-infiniband which is out of date.
> > 
> > Would it be useful to move out the interface completely from
> > Documentation/infiniband/sysfs.txt to the ABI?
> 
> It would be good, but I fear all the documentation files are variously
> out of date, so it would be harder to do this task reliably without
> having various rdma devices to inspect?
> 
> But if you want to take it on, I could probably get you some sysfs
> dumps from some systems to help?

Yes, I think that would be useful. I might not be able to fill in the
descriptions completely for all attributes but I can create a documentation
skeleton for them.

Aishwarya

> 
> Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Documentation: infiniband: move sysfs interface to ABI

2018-01-09 Thread Jason Gunthorpe
On Mon, Jan 08, 2018 at 02:23:42PM +0530, Aishwarya Pant wrote:
> Hi
> 
> In Documentation/infiniband/sysfs.txt, there is a description of the 
> infiniband
> sysfs interface and there also exists
> Documentation/ABI/testing/sysfs-class-infiniband which is out of date.
> 
> Would it be useful to move out the interface completely from
> Documentation/infiniband/sysfs.txt to the ABI?

It would be good, but I fear all the documentation files are variously
out of date, so it would be harder to do this task reliably without
having various rdma devices to inspect?

But if you want to take it on, I could probably get you some sysfs
dumps from some systems to help?

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] docs-rst: networking: wire up msg_zerocopy

2018-01-09 Thread David Miller
From: Mike Rapoport 
Date: Mon,  8 Jan 2018 08:50:17 +0200

> Fix the following 'make htmldocs' complaint:
> 
> Documentation/networking/msg_zerocopy.rst:: WARNING: document isn't included 
> in any toctree.
> 
> Signed-off-by: Mike Rapoport 

Applied.
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] docs-rst: networking: wire up msg_zerocopy

2018-01-09 Thread David Miller
From: Jonathan Corbet 
Date: Tue, 9 Jan 2018 09:55:27 -0700

> On Tue, 09 Jan 2018 11:50:49 -0500 (EST)
> David Miller  wrote:
> 
>> From: Mike Rapoport 
>> Date: Mon,  8 Jan 2018 08:50:17 +0200
>> 
>> > Fix the following 'make htmldocs' complaint:
>> > 
>> > Documentation/networking/msg_zerocopy.rst:: WARNING: document isn't 
>> > included in any toctree.
>> > 
>> > Signed-off-by: Mike Rapoport   
>> 
>> Does someone else want to take this?
>> 
>> Otherwise I can.
> 
> I can certainly take it (or the equivalent patch posted by Tobin a few
> days earlier) through the docs tree.  I've been holding off, though,
> under the impression that you'd rather take networking docs patches
> yourself.  Either is fine.  Unless you say you've grabbed it, I'll do so
> in the near future.

Oh then I'll take it.

Thanks Jon!
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] docs-rst: networking: wire up msg_zerocopy

2018-01-09 Thread Jonathan Corbet
On Tue, 09 Jan 2018 11:50:49 -0500 (EST)
David Miller  wrote:

> From: Mike Rapoport 
> Date: Mon,  8 Jan 2018 08:50:17 +0200
> 
> > Fix the following 'make htmldocs' complaint:
> > 
> > Documentation/networking/msg_zerocopy.rst:: WARNING: document isn't 
> > included in any toctree.
> > 
> > Signed-off-by: Mike Rapoport   
> 
> Does someone else want to take this?
> 
> Otherwise I can.

I can certainly take it (or the equivalent patch posted by Tobin a few
days earlier) through the docs tree.  I've been holding off, though,
under the impression that you'd rather take networking docs patches
yourself.  Either is fine.  Unless you say you've grabbed it, I'll do so
in the near future.

Thanks,

jon
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] docs-rst: networking: wire up msg_zerocopy

2018-01-09 Thread David Miller
From: Mike Rapoport 
Date: Mon,  8 Jan 2018 08:50:17 +0200

> Fix the following 'make htmldocs' complaint:
> 
> Documentation/networking/msg_zerocopy.rst:: WARNING: document isn't included 
> in any toctree.
> 
> Signed-off-by: Mike Rapoport 

Does someone else want to take this?

Otherwise I can.
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC tip/locking/lockdep v4 16/17] lockdep: Documention for recursive read lock detection reasoning

2018-01-09 Thread Boqun Feng
As now we support recursive read lock deadlock detection, add related
explanation in the Documentation/lockdep/lockdep-desgin.txt:

*   Definition of recursive read locks, non-recursive locks, strong
dependency path and notions of -(**)->.

*   Lockdep's assumption.

*   Informal proof of recursive read lock deadlock detection.

Signed-off-by: Boqun Feng 
---
 Documentation/locking/lockdep-design.txt | 170 +++
 1 file changed, 170 insertions(+)

diff --git a/Documentation/locking/lockdep-design.txt 
b/Documentation/locking/lockdep-design.txt
index 382bc25589c2..0e674305f96a 100644
--- a/Documentation/locking/lockdep-design.txt
+++ b/Documentation/locking/lockdep-design.txt
@@ -284,3 +284,173 @@ Run the command and save the output, then compare against 
the output from
 a later run of this command to identify the leakers.  This same output
 can also help you find situations where runtime lock initialization has
 been omitted.
+
+Recursive Read Deadlock Detection:
+--
+Lockdep now is equipped with deadlock detection for recursive read locks.
+
+Recursive read locks, as their name indicates, are the locks able to be
+acquired recursively, unlike non-recursive read locks, recursive read locks
+only get blocked by current write lock *holders* other than write lock
+*waiters*, for example:
+
+   TASK A: TASK B:
+
+   read_lock(X);
+
+   write_lock(X);
+
+   read_lock(X);
+
+is not a deadlock for recursive read locks, as while the task B is waiting for
+the lock X, the second read_lock() doesn't need to wait because it's a 
recursive
+read lock.
+
+Note that a lock can be a write lock(exclusive lock), a non-recursive read lock
+(non-recursive shared lock) or a recursive read lock(recursive shared lock),
+depending on the API used to acquire it(more detailedly, the value of the
+'read' parameter for lock_acquire(...)). In other words, a single lock instance
+have three types of acquisition depending on the acquisition functions:
+exclusive, non-recursive read, and recursive read.
+
+That said, recursive read locks could introduce deadlocks too, considering the
+following:
+
+   TASK A: TASK B:
+
+   read_lock(X);
+   read_lock(Y);
+   write_lock(Y);
+   write_lock(X);
+
+, neither task could get the write locks because the corresponding read locks
+are held by each other.
+
+Lockdep could detect recursive read lock related deadlocks. The 
dependencies(edges)
+in the lockdep graph are classified into four categories:
+
+1) -(NN)->: non-recursive to non-recursive dependency, non-recursive locks 
include
+non-recursive read locks, write locks and exclusive locks(e.g. 
spinlock_t),
+   they are treated equally in deadlock detection. "X -(NN)-> Y" means
+X -> Y and both X and Y are non-recursive locks.
+
+2) -(RN)->: recursive to non-recursive dependency, recursive locks means 
recursive read
+   locks. "X -(RN)-> Y" means X -> Y and X is recursive read lock and
+Y is non-recursive lock.
+
+3) -(NR)->: non-recursive to recursive dependency, "X -(NR)-> Y" means X -> Y 
and X is
+non-recursive lock and Y is recursive lock.
+
+4) -(RR)->: recursive to recursive dependency, "X -(RR)-> Y" means X -> Y and 
both X
+and Y are recursive locks.
+
+Note that given two locks, they may have multiple dependencies between them, 
for example:
+
+   TASK A:
+
+   read_lock(X);
+   write_lock(Y);
+   ...
+
+   TASK B:
+
+   write_lock(X);
+   write_lock(Y);
+
+, we have both X -(RN)-> Y and X -(NN)-> Y in the dependency graph.
+
+And obviously a non-recursive lock can block the corresponding recursive lock,
+and vice versa. Besides a non-recursive lock may block the other non-recursive
+lock of the same instance(e.g. a write lock may block a corresponding
+non-recursive read lock and vice versa).
+
+We use -(*N)-> for edges that is either -(RN)-> or -(NN)->, the similar for 
-(N*)->,
+-(*R)-> and -(R*)->
+
+A "path" is a series of conjunct dependency edges in the graph. And we define a
+"strong" path, which indicates the strong dependency throughout each dependency
+in the path, as the path that doesn't have two conjunct edges(dependencies) as
+-(*R)-> and -(R*)->. IOW, a "strong" path is a path from a lock walking to 
another
+through the lock dependencies, and if X -> Y -> Z in the path(where X, Y, Z are
+locks), if the walk from X to Y is through a -(NR)-> or -(RR)-> dependency, the
+walk from Y to Z must not be through a -(RN)-> or -(RR)-> dependency, otherwise
+it's not a strong path.
+
+We now prove that if a strong path forms a circle, then we have a potential 
deadlock.
+By "forms a circle", it means for a set of locks A0,A1...An, there is a path 
from
+A0 to An:
+
+   A0 -> A1 -> 

[RFC tip/locking/lockdep v4 03/17] lockdep: Redefine LOCK_*_STATE* bits

2018-01-09 Thread Boqun Feng
There are three types of lock acquisitions: write, non-recursive read
and recursive read, among which write locks and non-recursive read locks
have no difference from a viewpoint for deadlock detections, because a
write acquisition of the corresponding lock on an independent CPU or
task makes a non-recursive read lock act as a write lock in the sense of
deadlock. So we could treat them as the same type(named as
"non-recursive lock") in lockdep.

As in the irq lock inversion detection(safe->unsafe deadlock detection),
we used to differ write lock with read lock(non-recursive and
recursive ones), such a classification could be improved as
non-recursive read lock behaves the same as write lock, so this patch
redefines the meanings of LOCK_{USED_IN, ENABLED}_STATE*.

old:
LOCK_* : stands for write lock
LOCK_*_READ: stands for read lock(non-recursive and recursive)
new:
LOCK_* : stands for non-recursive(write lock and non-recursive
read lock)
LOCK_*_RR: stands for recursive read lock

Such a change is needed for a future improvement on recursive read
related irq inversion deadlock detection.

Signed-off-by: Boqun Feng 
---
 Documentation/locking/lockdep-design.txt |  6 +++---
 kernel/locking/lockdep.c | 28 ++--
 kernel/locking/lockdep_internals.h   | 16 
 kernel/locking/lockdep_proc.c| 12 ++--
 4 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/Documentation/locking/lockdep-design.txt 
b/Documentation/locking/lockdep-design.txt
index 9de1c158d44c..382bc25589c2 100644
--- a/Documentation/locking/lockdep-design.txt
+++ b/Documentation/locking/lockdep-design.txt
@@ -30,9 +30,9 @@ State
 The validator tracks lock-class usage history into 4n + 1 separate state bits:
 
 - 'ever held in STATE context'
-- 'ever held as readlock in STATE context'
+- 'ever held as recursive readlock in STATE context'
 - 'ever held with STATE enabled'
-- 'ever held as readlock with STATE enabled'
+- 'ever held as recurisve readlock with STATE enabled'
 
 Where STATE can be either one of (kernel/locking/lockdep_states.h)
  - hardirq
@@ -51,7 +51,7 @@ locking error messages, inside curlies. A contrived example:
 (_locks[i].lock){-.-...}, at: [] mutex_lock+0x21/0x24
 
 
-The bit position indicates STATE, STATE-read, for each of the states listed
+The bit position indicates STATE, STATE-RR, for each of the states listed
 above, and the character displayed in each indicates:
 
'.'  acquired while irqs disabled and not in irq context
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 0f2bba043a83..8767830664aa 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -447,10 +447,10 @@ DEFINE_PER_CPU(struct lockdep_stats, lockdep_stats);
  */
 
 #define __USAGE(__STATE)   \
-   [LOCK_USED_IN_##__STATE] = "IN-"__stringify(__STATE)"-W",   \
-   [LOCK_ENABLED_##__STATE] = __stringify(__STATE)"-ON-W", \
-   [LOCK_USED_IN_##__STATE##_READ] = "IN-"__stringify(__STATE)"-R",\
-   [LOCK_ENABLED_##__STATE##_READ] = __stringify(__STATE)"-ON-R",
+   [LOCK_USED_IN_##__STATE] = "IN-"__stringify(__STATE),   \
+   [LOCK_ENABLED_##__STATE] = __stringify(__STATE)"-ON",   \
+   [LOCK_USED_IN_##__STATE##_RR] = "IN-"__stringify(__STATE)"-RR", \
+   [LOCK_ENABLED_##__STATE##_RR] = __stringify(__STATE)"-ON-RR",
 
 static const char *usage_str[] =
 {
@@ -491,7 +491,7 @@ void get_usage_chars(struct lock_class *class, char 
usage[LOCK_USAGE_CHARS])
 
 #define LOCKDEP_STATE(__STATE) 
\
usage[i++] = get_usage_char(class, LOCK_USED_IN_##__STATE); \
-   usage[i++] = get_usage_char(class, LOCK_USED_IN_##__STATE##_READ);
+   usage[i++] = get_usage_char(class, LOCK_USED_IN_##__STATE##_RR);
 #include "lockdep_states.h"
 #undef LOCKDEP_STATE
 
@@ -1640,7 +1640,7 @@ static const char *state_names[] = {
 
 static const char *state_rnames[] = {
 #define LOCKDEP_STATE(__STATE) \
-   __stringify(__STATE)"-READ",
+   __stringify(__STATE)"-RR",
 #include "lockdep_states.h"
 #undef LOCKDEP_STATE
 };
@@ -3034,14 +3034,14 @@ static int mark_irqflags(struct task_struct *curr, 
struct held_lock *hlock)
 * mark the lock as used in these contexts:
 */
if (!hlock->trylock) {
-   if (hlock->read) {
+   if (hlock->read == 2) {
if (curr->hardirq_context)
if (!mark_lock(curr, hlock,
-   LOCK_USED_IN_HARDIRQ_READ))
+   LOCK_USED_IN_HARDIRQ_RR))
return 0;
if (curr->softirq_context)
if (!mark_lock(curr, hlock,
-

Re: [PATCH v6 00/11] Intel SGX Driver

2018-01-09 Thread Jarkko Sakkinen
On Thu, Jan 04, 2018 at 03:17:24PM +0100, Cedric Blancher wrote:
> So how does this protect against the MELTDOWN attack (CVE-2017-5754)
> and the MELTATOMBOMBA4 worm which uses this exploit?
> 
> Ced

Everything going out of L1 gets encrypted. This is done to defend
against peripheral like adversaries and should work also against
meltdown.

/Jarkko
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v6 00/11] Intel SGX Driver

2018-01-09 Thread Jarkko Sakkinen
On Thu, Jan 04, 2018 at 03:06:43AM -0600, Dr. Greg Wettstein wrote:
> If we are talking about the issues motivating the KPTI work I don't
> have any useful information beyond what is raging through the industry
> right now.
> 
> With respect to SGX, the issues giving rise to KPTI are characteristic
> of what this technology is designed to address.  The technical 'news'
> sites, which are even more of an abomination then usual with this
> issue, are talking about privileged information such as credentials,
> passwords et.al being leaked by this vulnerability.
> 
> Data committed to enclaves are only accessible by the enclave, even
> the kernel, by definition, can't access the memory.  Given current
> events that is an arguably useful behavior.

Exactly. You could think adversary using meltdown leak utilizing malware
as having same capabilities as peripheral connected to a bus, which we
can defend against with SGX.

/Jarkko
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/5] Documentation/pktgen: Clearify how-to use pktgen samples

2018-01-09 Thread Dmitry Safonov
o Change process name in ps output: looks like, these days the process
  is named kpktgend_, rather than pktgen/.
o Use pg_ctrl for start/stop as it can work well with pgset without
  changes to $(PGDEV) variable.
o Clarify a bit needed $(PGDEV) definition for sample scripts and that
  one needs to `source functions.sh`.
o Document how-to unset a behaviour flag, note about history expansion.
o Fix pgset spi parameter value.

Cc: Jonathan Corbet 
Cc: linux-doc@vger.kernel.org
Signed-off-by: Dmitry Safonov 
---
 Documentation/networking/pktgen.txt | 19 ++-
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/Documentation/networking/pktgen.txt 
b/Documentation/networking/pktgen.txt
index 2c4e3354e128..d2fd78f85aa4 100644
--- a/Documentation/networking/pktgen.txt
+++ b/Documentation/networking/pktgen.txt
@@ -12,8 +12,8 @@ suitable sample script and configure that.
 On a dual CPU:
 
 ps aux | grep pkt
-root   129  0.3  0.0 00 ?SW2003 523:20 [pktgen/0]
-root   130  0.3  0.0 00 ?SW2003 509:50 [pktgen/1]
+root   129  0.3  0.0 00 ?SW2003 523:20 [kpktgend_0]
+root   130  0.3  0.0 00 ?SW2003 509:50 [kpktgend_1]
 
 
 For monitoring and control pktgen creates:
@@ -113,9 +113,16 @@ Configuring devices
 ===
 This is done via the /proc interface, and most easily done via pgset
 as defined in the sample scripts.
+You need to specify PGDEV environment variable to use functions from sample
+scripts, i.e.:
+export PGDEV=/proc/net/pktgen/eth4@0
+source samples/pktgen/functions.sh
 
 Examples:
 
+ pg_ctrl start   starts injection.
+ pg_ctrl stopaborts injection. Also, ^C aborts generator.
+
  pgset "clone_skb 1" sets the number of copies of the same packet
  pgset "clone_skb 0" use single SKB for all transmits
  pgset "burst 8" uses xmit_more API to queue 8 copies of the same
@@ -165,8 +172,12 @@ Examples:
   IPSEC # IPsec encapsulation (needs CONFIG_XFRM)
   NODE_ALLOC # node specific memory allocation
   NO_TIMESTAMP # disable timestamping
+ pgset 'flag ![name]'Clear a flag to determine behaviour.
+ Note that you might need to use single quote in
+ interactive mode, so that your shell wouldn't expand
+ the specified flag as a history command.
 
- pgset spi SPI_VALUE Set specific SA used to transform packet.
+ pgset "spi [SPI_VALUE]" Set specific SA used to transform packet.
 
  pgset "udp_src_min 9"   set UDP source port min, If < udp_src_max, then
  cycle through the port range.
@@ -207,8 +218,6 @@ Examples:
  pgset "tos XX"   set former IPv4 TOS field (e.g. "tos 28" for AF11 no 
ECN, default 00)
  pgset "traffic_class XX" set former IPv6 TRAFFIC CLASS (e.g. "traffic_class 
B8" for EF no ECN, default 00)
 
- pgset stop  aborts injection. Also, ^C aborts generator.
-
  pgset "rate 300M"set rate to 300 Mb/s
  pgset "ratep 100"set rate to 1Mpps
 
-- 
2.13.6

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/5] pktgen: Behavior flags fixes

2018-01-09 Thread Dmitry Safonov
There are a bunch of fixes/cleanups/Documentations.
Diffstat says for itself, regardless added docs and missed flag
parameters.

Cc: Arnd Bergmann 
Cc: "David S. Miller" 
Cc: David Windsor 
Cc: Eric Dumazet 
Cc: Ingo Molnar 
Cc: Johannes Berg 
Cc: Mark Rutland 
Cc: Radu Rendec 
Cc: "Reshetova, Elena" 
Cc: net...@vger.kernel.org

Dmitry Safonov (5):
  Documentation/pktgen: Clearify how-to use pktgen samples
  pktgen: Add missing !flag parameters
  pktgen: Add behavior flag names array - pkt_flag_names
  pktgen: Remove brute-force printing of flags
  pktgen: Clean read user supplied flag mess

 Documentation/networking/pktgen.txt |  19 ++-
 net/core/pktgen.c   | 267 
 2 files changed, 104 insertions(+), 182 deletions(-)

-- 
2.13.6

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5] MIPS: Add noexec=on|off kernel parameter

2018-01-09 Thread Aleksandar Markovic
From: Miodrag Dinic 

Add a new kernel parameter to override the default behavior related to
the decision whether to indicate stack as non-executable or executable
(regardless of PT_GNU_STACK entry or CPU RIXI support) in function
mips_elf_read_implies_exec().

Allowed values:

noexec=on:  force indicating non-exec stack & heap
noexec=off: force indicating executable stack & heap

If this parameter is omitted, kernel behavior remains the same as it
was before this patch is applied.

This functionality is convenient during debugging and is especially
useful for Android development where indication of non-executable
stack is required.

NOTE: Using noexec=on on a system without CPU XI support is not
recommended since there is no actual HW support that provide
non-executable stack and heap. Use only for debugging purposes and
not in a production environment.

Signed-off-by: Miodrag Dinic 
Signed-off-by: Aleksandar Markovic 
---
In the last version, code comments, documentation, and commit message
are modified to better explain the purpose and nature of this option.
A precautionary note is added as well.
---
 Documentation/admin-guide/kernel-parameters.txt | 19 ++
 arch/mips/kernel/elf.c  | 48 +
 2 files changed, 67 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index af7104a..64c562a 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2600,6 +2600,25 @@
noexec=on: enable non-executable mappings (default)
noexec=off: disable non-executable mappings
 
+   noexec  [MIPS]
+   Force indicating stack and heap as non-executable or
+   executable regardless of PT_GNU_STACK entry or CPU XI
+   (execute inhibit) support. Valid valuess are: on, off.
+   noexec=on:  force indicating non-executable
+   stack and heap
+   noexec=off: force indicating executable
+   stack and heap
+   If this parameter is omitted, stack and heap will be
+   indicated non-executable or executable as they are
+   actually set up, which depends on PT_GNU_STACK entry
+   and possibly other factors (for instance, CPU XI
+   support).
+   NOTE: Using noexec=on on a system without CPU XI
+   support is not recommended since there is no actual
+   HW support that provide non-executable stack/heap.
+   Use only for debugging purposes and not in a
+   production environment.
+
nosmap  [X86]
Disable SMAP (Supervisor Mode Access Prevention)
even if it is supported by processor.
diff --git a/arch/mips/kernel/elf.c b/arch/mips/kernel/elf.c
index 731325a..9bb40cc 100644
--- a/arch/mips/kernel/elf.c
+++ b/arch/mips/kernel/elf.c
@@ -326,8 +326,56 @@ void mips_set_personality_nan(struct arch_elf_state *state)
}
 }
 
+static int noexec = EXSTACK_DEFAULT;
+
+/*
+ * kernel parameter: noexec=on|off
+ *
+ * Force indicating stack and heap as non-executable or
+ * executable regardless of PT_GNU_STACK entry or CPU XI
+ * (execute inhibit) support. Valid valuess are: on, off.
+ *
+ * noexec=on:  force indicating non-executable
+ * stack and heap
+ * noexec=off: force indicating executable
+ * stack and heap
+ *
+ * If this parameter is omitted, stack and heap will be
+ * indicated non-executable or executable as they are
+ * actually set up, which depends on PT_GNU_STACK entry
+ * and possibly other factors (for instance, CPU XI
+ * support).
+ *
+ * NOTE: Using noexec=on on a system without CPU XI
+ * support is not recommended since there is no actual
+ * HW support that provide non-executable stack/heap.
+ * Use only for debugging purposes and not in a
+ * production environment.
+ */
+static int __init noexec_setup(char *str)
+{
+   if (!strcmp(str, "on"))
+   noexec = EXSTACK_DISABLE_X;
+   else if (!strcmp(str, "off"))
+   noexec = EXSTACK_ENABLE_X;
+   else
+   pr_err("Malformed noexec format! noexec=on|off\n");
+
+   return 1;
+}
+__setup("noexec=", noexec_setup);
+
 int mips_elf_read_implies_exec(void *elf_ex, int exstack)
 {
+   switch (noexec) {
+   case EXSTACK_DISABLE_X:
+   return 0;
+   case EXSTACK_ENABLE_X:
+   return 1;
+   default:
+   break;
+   }
+
if (exstack != EXSTACK_DISABLE_X) {
/* The binary doesn't request a