Re: [RFC 2/2] net: Add support for NTB virtual ethernet device

2012-07-13 Thread Jon Mason
On Fri, Jul 13, 2012 at 05:08:26PM -0700, Stephen Hemminger wrote:
> On Fri, 13 Jul 2012 14:45:00 -0700
> Jon Mason  wrote:
> 
> > A virtual ethernet device that uses the NTB transport API to send/receive 
> > data.
> > 
> > Signed-off-by: Jon Mason 
> > ---
> >  drivers/net/Kconfig  |4 +
> >  drivers/net/Makefile |1 +
> >  drivers/net/ntb_netdev.c |  411 
> > ++
> >  3 files changed, 416 insertions(+), 0 deletions(-)
> >  create mode 100644 drivers/net/ntb_netdev.c
> 
> 
> > +static void ntb_get_drvinfo(__attribute__((unused)) struct net_device *dev,
> > +   struct ethtool_drvinfo *info)
> > +{
> > +   strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
> > +   strlcpy(info->version, NTB_NETDEV_VER, sizeof(info->version));
> > +}
> > +
> > +static const char ntb_nic_stats[][ETH_GSTRING_LEN] = {
> > +   "rx_packets", "rx_bytes", "rx_errors", "rx_dropped", "rx_length_errors",
> > +   "rx_frame_errors", "rx_fifo_errors",
> > +   "tx_packets", "tx_bytes", "tx_errors", "tx_dropped",
> > +};
> > +
> > +static int ntb_get_stats_count(__attribute__((unused)) struct net_device 
> > *dev)
> > +{
> > +   return ARRAY_SIZE(ntb_nic_stats);
> > +}
> > +
> > +static int ntb_get_sset_count(struct net_device *dev, int sset)
> > +{
> > +   switch (sset) {
> > +   case ETH_SS_STATS:
> > +   return ntb_get_stats_count(dev);
> > +   default:
> > +   return -EOPNOTSUPP;
> > +   }
> > +}
> > +
> > +static void ntb_get_strings(__attribute__((unused)) struct net_device *dev,
> > +   u32 sset, u8 *data)
> > +{
> > +   switch (sset) {
> > +   case ETH_SS_STATS:
> > +   memcpy(data, *ntb_nic_stats, sizeof(ntb_nic_stats));
> > +   }
> > +}
> > +
> > +static void
> > +ntb_get_ethtool_stats(struct net_device *dev,
> > + __attribute__((unused)) struct ethtool_stats *stats,
> > + u64 *data)
> > +{
> > +   int i = 0;
> > +
> > +   data[i++] = dev->stats.rx_packets;
> > +   data[i++] = dev->stats.rx_bytes;
> > +   data[i++] = dev->stats.rx_errors;
> > +   data[i++] = dev->stats.rx_dropped;
> > +   data[i++] = dev->stats.rx_length_errors;
> > +   data[i++] = dev->stats.rx_frame_errors;
> > +   data[i++] = dev->stats.rx_fifo_errors;
> > +   data[i++] = dev->stats.tx_packets;
> > +   data[i++] = dev->stats.tx_bytes;
> > +   data[i++] = dev->stats.tx_errors;
> > +   data[i++] = dev->stats.tx_dropped;
> > +}
> 
> These statistics add no value over existing network stats.
> Don't implement ethtool stats unless device has something more
> interesting to say.

Fair enough

> 
> > +static const struct ethtool_ops ntb_ethtool_ops = {
> > +   .get_drvinfo = ntb_get_drvinfo,
> > +   .get_sset_count = ntb_get_sset_count,
> > +   .get_strings = ntb_get_strings,
> > +   .get_ethtool_stats = ntb_get_ethtool_stats,
> > +   .get_link = ethtool_op_get_link,
> > +};
> 
> If you want to implement bonding or bridging then implementing
> get_settings would help.

Will do.

> > +static int __init ntb_netdev_init_module(void)
> > +{
> > +   struct ntb_netdev *dev;
> > +   int rc;
> > +
> > +   pr_info("%s: Probe\n", KBUILD_MODNAME);
> 
> Useless message

True, will remove.

Thanks for the comments!
 
> > +   netdev = alloc_etherdev(sizeof(struct ntb_netdev));
> > +   if (!netdev)
> > +   return -ENOMEM;
> > +
> > +   dev = netdev_priv(netdev);
> > +   dev->ndev = netdev;
> > +   netdev->features = NETIF_F_HIGHDMA;
> > +
> > +   netdev->hw_features = netdev->features;
> > +   netdev->watchdog_timeo = msecs_to_jiffies(NTB_TX_TIMEOUT_MS);
> > +
> > +   random_ether_addr(netdev->perm_addr);
> > +   memcpy(netdev->dev_addr, netdev->perm_addr, netdev->addr_len);
> > +
> > +   netdev->netdev_ops = _netdev_ops;
> > +   SET_ETHTOOL_OPS(netdev, _ethtool_ops);
> > +
> > +   dev->qp = ntb_transport_create_queue(ntb_netdev_rx_handler,
> > +ntb_netdev_tx_handler,
> > +ntb_netdev_event_handler);
> > +   if (!dev->qp) {
> > +   rc = -EIO;
> > +   goto err;
> > +   }
> > +
> > +   netdev->mtu = ntb_transport_max_size(dev->qp) - ETH_HLEN;
> > +
> > +   rc = register_netdev(netdev);
> > +   if (rc)
> > +   goto err1;
> > +
> > +   pr_info("%s: %s created\n", KBUILD_MODNAME, netdev->name);
> > +   return 0;
> > +
> > +err1:
> > +   ntb_transport_free_queue(dev->qp);
> > +err:
> > +   free_netdev(netdev);
> > +   return rc;
> > +}
> > +module_init(ntb_netdev_init_module);
> > +
> > +static void __exit ntb_netdev_exit_module(void)
> > +{
> > +   struct ntb_netdev *dev = netdev_priv(netdev);
> > +
> > +   unregister_netdev(netdev);
> > +   ntb_transport_free_queue(dev->qp);
> > +   free_netdev(netdev);
> > +
> > +   pr_info("%s: Driver removed\n", KBUILD_MODNAME);
> > +}
> > +module_exit(ntb_netdev_exit_module);
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message 

Re: [RFC 2/2] net: Add support for NTB virtual ethernet device

2012-07-13 Thread Jon Mason
On Sat, Jul 14, 2012 at 01:14:03AM +0200, Jiri Pirko wrote:
> Fri, Jul 13, 2012 at 11:45:00PM CEST, jon.ma...@intel.com wrote:
> >A virtual ethernet device that uses the NTB transport API to send/receive 
> >data.
> >
> >Signed-off-by: Jon Mason 
> >---
> > drivers/net/Kconfig  |4 +
> > drivers/net/Makefile |1 +
> > drivers/net/ntb_netdev.c |  411 
> > ++
> > 3 files changed, 416 insertions(+), 0 deletions(-)
> > create mode 100644 drivers/net/ntb_netdev.c
> >
> >diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
> >index 0c2bd80..9bf8a71 100644
> >--- a/drivers/net/Kconfig
> >+++ b/drivers/net/Kconfig
> >@@ -178,6 +178,10 @@ config NETPOLL_TRAP
> > config NET_POLL_CONTROLLER
> > def_bool NETPOLL
> > 
> >+config NTB_NETDEV
> >+tristate "Virtual Ethernet over NTB"
> >+depends on NTB
> >+
> > config RIONET
> > tristate "RapidIO Ethernet over messaging driver support"
> > depends on RAPIDIO
> >diff --git a/drivers/net/Makefile b/drivers/net/Makefile
> >index 3d375ca..9890148 100644
> >--- a/drivers/net/Makefile
> >+++ b/drivers/net/Makefile
> >@@ -69,3 +69,4 @@ obj-$(CONFIG_USB_IPHETH)+= usb/
> > obj-$(CONFIG_USB_CDC_PHONET)   += usb/
> > 
> > obj-$(CONFIG_HYPERV_NET) += hyperv/
> >+obj-$(CONFIG_NTB_NETDEV) += ntb_netdev.o
> >diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c
> >new file mode 100644
> >index 000..bcbd9d4
> >--- /dev/null
> >+++ b/drivers/net/ntb_netdev.c
> >@@ -0,0 +1,411 @@
> >+/*
> >+ * This file is provided under a dual BSD/GPLv2 license.  When using or
> >+ *   redistributing this file, you may do so under either license.
> >+ *
> >+ *   GPL LICENSE SUMMARY
> >+ *
> >+ *   Copyright(c) 2012 Intel Corporation. All rights reserved.
> >+ *
> >+ *   This program is free software; you can redistribute it and/or modify
> >+ *   it under the terms of version 2 of the GNU General Public License as
> >+ *   published by the Free Software Foundation.
> >+ *
> >+ *   This program is distributed in the hope that it will be useful, but
> >+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
> >+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> >+ *   General Public License for more details.
> >+ *
> >+ *   You should have received a copy of the GNU General Public License
> >+ *   along with this program; if not, write to the Free Software
> >+ *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 
> >USA.
> >+ *   The full GNU General Public License is included in this distribution
> >+ *   in the file called LICENSE.GPL.
> >+ *
> >+ *   BSD LICENSE
> >+ *
> >+ *   Copyright(c) 2012 Intel Corporation. All rights reserved.
> >+ *
> >+ *   Redistribution and use in source and binary forms, with or without
> >+ *   modification, are permitted provided that the following conditions
> >+ *   are met:
> >+ *
> >+ * * Redistributions of source code must retain the above copyright
> >+ *   notice, this list of conditions and the following disclaimer.
> >+ * * Redistributions in binary form must reproduce the above copy
> >+ *   notice, this list of conditions and the following disclaimer in
> >+ *   the documentation and/or other materials provided with the
> >+ *   distribution.
> >+ * * Neither the name of Intel Corporation nor the names of its
> >+ *   contributors may be used to endorse or promote products derived
> >+ *   from this software without specific prior written permission.
> >+ *
> >+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> >+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> >+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> >+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> >+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> >+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> >+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> >+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> >+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> >+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> >+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> >+ *
> >+ * Intel PCIe NTB Network Linux driver
> >+ *
> >+ * Contact Information:
> >+ * Jon Mason 
> >+ */
> >+#include 
> >+#include 
> >+#include 
> >+#include 
> >+
> >+#define NTB_NETDEV_VER  "0.4"
> 
> Is it really necessary to provide this in-file versioning? Doesn't
> kernel version itself do the trick?

Not necessarily.  This may be distributed as a package outside of the kernel 
and the version is useful for debug.

> 
> >+
> >+MODULE_DESCRIPTION(KBUILD_MODNAME);
> >+MODULE_VERSION(NTB_NETDEV_VER);
> >+MODULE_LICENSE("Dual BSD/GPL");
> >+MODULE_AUTHOR("Intel 

[PATCH RFT 2/2] regulator: twl: Convert twlsmps_ops to get_voltage_sel and map_voltage

2012-07-13 Thread Axel Lin
Signed-off-by: Axel Lin 
---
 drivers/regulator/twl-regulator.c |   24 +---
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/drivers/regulator/twl-regulator.c 
b/drivers/regulator/twl-regulator.c
index 03d0bea..8dae1e3 100644
--- a/drivers/regulator/twl-regulator.c
+++ b/drivers/regulator/twl-regulator.c
@@ -755,12 +755,11 @@ static int twl6030smps_list_voltage(struct regulator_dev 
*rdev, unsigned index)
return voltage;
 }
 
-static int
-twl6030smps_set_voltage(struct regulator_dev *rdev, int min_uV, int max_uV,
-   unsigned int *selector)
+static int twl6030smps_map_voltage(struct regulator_dev *rdev, int min_uV,
+  int max_uV)
 {
-   struct twlreg_info  *info = rdev_get_drvdata(rdev);
-   int vsel = 0, calc_uV;
+   struct twlreg_info *info = rdev_get_drvdata(rdev);
+   int vsel = 0;
 
switch (info->flags) {
case 0:
@@ -827,14 +826,16 @@ twl6030smps_set_voltage(struct regulator_dev *rdev, int 
min_uV, int max_uV,
break;
}
 
-   calc_uV = twl6030smps_list_voltage(rdev, vsel);
-   if (calc_uV > max_uV)
-   return -EINVAL;
+   return vsel;
+}
 
-   *selector = vsel;
+static int twl6030smps_set_voltage_sel(struct regulator_dev *rdev,
+  unsigned int selector)
+{
+   struct twlreg_info *info = rdev_get_drvdata(rdev);
 
return twlreg_write(info, TWL_MODULE_PM_RECEIVER, VREG_VOLTAGE_SMPS,
-   vsel);
+   selector);
 }
 
 static int twl6030smps_get_voltage_sel(struct regulator_dev *rdev)
@@ -846,8 +847,9 @@ static int twl6030smps_get_voltage_sel(struct regulator_dev 
*rdev)
 
 static struct regulator_ops twlsmps_ops = {
.list_voltage   = twl6030smps_list_voltage,
+   .map_voltage= twl6030smps_map_voltage,
 
-   .set_voltage= twl6030smps_set_voltage,
+   .set_voltage_sel= twl6030smps_set_voltage_sel,
.get_voltage_sel= twl6030smps_get_voltage_sel,
 
.enable = twl6030reg_enable,
-- 
1.7.9.5



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH RFT 1/2] regulator: twl: Fix checking voltage range in twl6030smps_set_voltage()

2012-07-13 Thread Axel Lin
The voltage selection logic is supposed to find the samllest voltage falls
within specified range. When using equation to calculate vsel, we need to
ensure the requested min_uV meet the range of using the equation.
Otherwise we may select a voltage that is out of specified range.

For example, in the case vsel = 62 means select voltage of 210uV.
What we want is to ensure the requested min_uV <= 210 rather than checking
max_uV >= 210. And this also means in the case min_uV > 210, vsel = 62
does not meet the request.

Also calling twl6030smps_list_voltage() for all cases to ensure the selected
voltage still in bounds.

Signed-off-by: Axel Lin 
---
 drivers/regulator/twl-regulator.c |   36 
 1 file changed, 16 insertions(+), 20 deletions(-)

diff --git a/drivers/regulator/twl-regulator.c 
b/drivers/regulator/twl-regulator.c
index 8f0bd56..03d0bea 100644
--- a/drivers/regulator/twl-regulator.c
+++ b/drivers/regulator/twl-regulator.c
@@ -760,32 +760,28 @@ twl6030smps_set_voltage(struct regulator_dev *rdev, int 
min_uV, int max_uV,
unsigned int *selector)
 {
struct twlreg_info  *info = rdev_get_drvdata(rdev);
-   int vsel = 0;
+   int vsel = 0, calc_uV;
 
switch (info->flags) {
case 0:
if (min_uV == 0)
vsel = 0;
else if ((min_uV >= 60) && (min_uV <= 130)) {
-   int calc_uV;
vsel = DIV_ROUND_UP(min_uV - 60, 12500);
vsel++;
-   calc_uV = twl6030smps_list_voltage(rdev, vsel);
-   if (calc_uV > max_uV)
-   return -EINVAL;
}
/* Values 1..57 for vsel are linear and can be calculated
 * values 58..62 are non linear.
 */
-   else if ((min_uV > 190) && (max_uV >= 210))
+   else if ((min_uV > 190) && (min_uV <= 210))
vsel = 62;
-   else if ((min_uV > 180) && (max_uV >= 190))
+   else if ((min_uV > 180) && (min_uV <= 190))
vsel = 61;
-   else if ((min_uV > 150) && (max_uV >= 180))
+   else if ((min_uV > 150) && (min_uV <= 180))
vsel = 60;
-   else if ((min_uV > 135) && (max_uV >= 150))
+   else if ((min_uV > 135) && (min_uV <= 150))
vsel = 59;
-   else if ((min_uV > 130) && (max_uV >= 135))
+   else if ((min_uV > 130) && (min_uV <= 135))
vsel = 58;
else
return -EINVAL;
@@ -794,25 +790,21 @@ twl6030smps_set_voltage(struct regulator_dev *rdev, int 
min_uV, int max_uV,
if (min_uV == 0)
vsel = 0;
else if ((min_uV >= 70) && (min_uV <= 142)) {
-   int calc_uV;
vsel = DIV_ROUND_UP(min_uV - 70, 12500);
vsel++;
-   calc_uV = twl6030smps_list_voltage(rdev, vsel);
-   if (calc_uV > max_uV)
-   return -EINVAL;
}
/* Values 1..57 for vsel are linear and can be calculated
 * values 58..62 are non linear.
 */
-   else if ((min_uV > 190) && (max_uV >= 210))
+   else if ((min_uV > 190) && (min_uV <= 210))
vsel = 62;
-   else if ((min_uV > 180) && (max_uV >= 190))
+   else if ((min_uV > 180) && (min_uV <= 190))
vsel = 61;
-   else if ((min_uV > 135) && (max_uV >= 180))
+   else if ((min_uV > 135) && (min_uV <= 180))
vsel = 60;
-   else if ((min_uV > 135) && (max_uV >= 150))
+   else if ((min_uV > 135) && (min_uV <= 150))
vsel = 59;
-   else if ((min_uV > 130) && (max_uV >= 135))
+   else if ((min_uV > 130) && (min_uV <= 135))
vsel = 58;
else
return -EINVAL;
@@ -828,13 +820,17 @@ twl6030smps_set_voltage(struct regulator_dev *rdev, int 
min_uV, int max_uV,
case SMPS_OFFSET_EN|SMPS_EXTENDED_EN:
if (min_uV == 0) {
vsel = 0;
-   } else if ((min_uV >= 2161000) && (max_uV <= 4321000)) {
+   } else if ((min_uV >= 2161000) && (min_uV <= 4321000)) {
vsel = DIV_ROUND_UP(min_uV - 2161000, 38600);
vsel++;
}
break;
}
 
+   calc_uV = 

[PATCH UPDATED v3 6/6] workqueue: reimplement WQ_HIGHPRI using a separate worker_pool

2012-07-13 Thread Tejun Heo
>From a465fcee388d62d22e390b57c81ca8411f25a1da Mon Sep 17 00:00:00 2001
From: Tejun Heo 
Date: Fri, 13 Jul 2012 22:16:45 -0700

WQ_HIGHPRI was implemented by queueing highpri work items at the head
of the global worklist.  Other than queueing at the head, they weren't
handled differently; unfortunately, this could lead to execution
latency of a few seconds on heavily loaded systems.

Now that workqueue code has been updated to deal with multiple
worker_pools per global_cwq, this patch reimplements WQ_HIGHPRI using
a separate worker_pool.  NR_WORKER_POOLS is bumped to two and
gcwq->pools[0] is used for normal pri work items and ->pools[1] for
highpri.  Highpri workers get -20 nice level and has 'H' suffix in
their names.  Note that this change increases the number of kworkers
per cpu.

POOL_HIGHPRI_PENDING, pool_determine_ins_pos() and highpri chain
wakeup code in process_one_work() are no longer used and removed.

This allows proper prioritization of highpri work items and removes
high execution latency of highpri work items.

v2: nr_running indexing bug in get_pool_nr_running() fixed.

v3: Refreshed for the get_pool_nr_running() update in the previous
patch.

Signed-off-by: Tejun Heo 
Reported-by: Josh Hunt 
LKML-Reference: 

Cc: Tony Luck 
Cc: Fengguang Wu 
---
 Documentation/workqueue.txt |  103 ---
 kernel/workqueue.c  |  100 +++--
 2 files changed, 65 insertions(+), 138 deletions(-)

diff --git a/Documentation/workqueue.txt b/Documentation/workqueue.txt
index a0b577d..a6ab4b6 100644
--- a/Documentation/workqueue.txt
+++ b/Documentation/workqueue.txt
@@ -89,25 +89,28 @@ called thread-pools.
 
 The cmwq design differentiates between the user-facing workqueues that
 subsystems and drivers queue work items on and the backend mechanism
-which manages thread-pool and processes the queued work items.
+which manages thread-pools and processes the queued work items.
 
 The backend is called gcwq.  There is one gcwq for each possible CPU
-and one gcwq to serve work items queued on unbound workqueues.
+and one gcwq to serve work items queued on unbound workqueues.  Each
+gcwq has two thread-pools - one for normal work items and the other
+for high priority ones.
 
 Subsystems and drivers can create and queue work items through special
 workqueue API functions as they see fit. They can influence some
 aspects of the way the work items are executed by setting flags on the
 workqueue they are putting the work item on. These flags include
-things like CPU locality, reentrancy, concurrency limits and more. To
-get a detailed overview refer to the API description of
+things like CPU locality, reentrancy, concurrency limits, priority and
+more.  To get a detailed overview refer to the API description of
 alloc_workqueue() below.
 
-When a work item is queued to a workqueue, the target gcwq is
-determined according to the queue parameters and workqueue attributes
-and appended on the shared worklist of the gcwq.  For example, unless
-specifically overridden, a work item of a bound workqueue will be
-queued on the worklist of exactly that gcwq that is associated to the
-CPU the issuer is running on.
+When a work item is queued to a workqueue, the target gcwq and
+thread-pool is determined according to the queue parameters and
+workqueue attributes and appended on the shared worklist of the
+thread-pool.  For example, unless specifically overridden, a work item
+of a bound workqueue will be queued on the worklist of either normal
+or highpri thread-pool of the gcwq that is associated to the CPU the
+issuer is running on.
 
 For any worker pool implementation, managing the concurrency level
 (how many execution contexts are active) is an important issue.  cmwq
@@ -115,26 +118,26 @@ tries to keep the concurrency at a minimal but sufficient 
level.
 Minimal to save resources and sufficient in that the system is used at
 its full capacity.
 
-Each gcwq bound to an actual CPU implements concurrency management by
-hooking into the scheduler.  The gcwq is notified whenever an active
-worker wakes up or sleeps and keeps track of the number of the
-currently runnable workers.  Generally, work items are not expected to
-hog a CPU and consume many cycles.  That means maintaining just enough
-concurrency to prevent work processing from stalling should be
-optimal.  As long as there are one or more runnable workers on the
-CPU, the gcwq doesn't start execution of a new work, but, when the
-last running worker goes to sleep, it immediately schedules a new
-worker so that the CPU doesn't sit idle while there are pending work
-items.  This allows using a minimal number of workers without losing
-execution bandwidth.
+Each thread-pool bound to an actual CPU implements concurrency
+management by hooking into the scheduler.  The thread-pool is notified
+whenever an active worker wakes up or sleeps and keeps track of the
+number of the currently runnable 

[PATCH UPDATED 5/6] workqueue: introduce NR_WORKER_POOLS and for_each_worker_pool()

2012-07-13 Thread Tejun Heo
>From 4ce62e9e30cacc26885cab133ad1de358dd79f21 Mon Sep 17 00:00:00 2001
From: Tejun Heo 
Date: Fri, 13 Jul 2012 22:16:44 -0700

Introduce NR_WORKER_POOLS and for_each_worker_pool() and convert code
paths which need to manipulate all pools in a gcwq to use them.
NR_WORKER_POOLS is currently one and for_each_worker_pool() iterates
over only @gcwq->pool.

Note that nr_running is per-pool property and converted to an array
with NR_WORKER_POOLS elements and renamed to pool_nr_running.  Note
that get_pool_nr_running() currently assumes 0 index.  The next patch
will make use of non-zero index.

The changes in this patch are mechanical and don't caues any
functional difference.  This is to prepare for multiple pools per
gcwq.

v2: nr_running indexing bug in get_pool_nr_running() fixed.

v3: Pointer to array is stupid.  Don't use it in get_pool_nr_running()
as suggested by Linus.

Signed-off-by: Tejun Heo 
Cc: Tony Luck 
Cc: Fengguang Wu 
Cc: Linus Torvalds 
---
So, the same 0 index silliness but this shouldn't be as fugly.

Thanks.

 kernel/workqueue.c |  223 +++
 1 files changed, 153 insertions(+), 70 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 7a98bae..b0daaea 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -74,6 +74,8 @@ enum {
TRUSTEE_RELEASE = 3,/* release workers */
TRUSTEE_DONE= 4,/* trustee is done */
 
+   NR_WORKER_POOLS = 1,/* # worker pools per gcwq */
+
BUSY_WORKER_HASH_ORDER  = 6,/* 64 pointers */
BUSY_WORKER_HASH_SIZE   = 1 << BUSY_WORKER_HASH_ORDER,
BUSY_WORKER_HASH_MASK   = BUSY_WORKER_HASH_SIZE - 1,
@@ -274,6 +276,9 @@ EXPORT_SYMBOL_GPL(system_nrt_freezable_wq);
 #define CREATE_TRACE_POINTS
 #include 
 
+#define for_each_worker_pool(pool, gcwq)   \
+   for ((pool) = &(gcwq)->pool; (pool); (pool) = NULL)
+
 #define for_each_busy_worker(worker, i, pos, gcwq) \
for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++) \
hlist_for_each_entry(worker, pos, >busy_hash[i], hentry)
@@ -454,7 +459,7 @@ static bool workqueue_freezing; /* W: have wqs 
started freezing? */
  * try_to_wake_up().  Put it in a separate cacheline.
  */
 static DEFINE_PER_CPU(struct global_cwq, global_cwq);
-static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, gcwq_nr_running);
+static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, 
pool_nr_running[NR_WORKER_POOLS]);
 
 /*
  * Global cpu workqueue and nr_running counter for unbound gcwq.  The
@@ -462,7 +467,9 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, 
gcwq_nr_running);
  * workers have WORKER_UNBOUND set.
  */
 static struct global_cwq unbound_global_cwq;
-static atomic_t unbound_gcwq_nr_running = ATOMIC_INIT(0);  /* always 0 */
+static atomic_t unbound_pool_nr_running[NR_WORKER_POOLS] = {
+   [0 ... NR_WORKER_POOLS - 1] = ATOMIC_INIT(0),   /* always 0 */
+};
 
 static int worker_thread(void *__worker);
 
@@ -477,11 +484,12 @@ static struct global_cwq *get_gcwq(unsigned int cpu)
 static atomic_t *get_pool_nr_running(struct worker_pool *pool)
 {
int cpu = pool->gcwq->cpu;
+   int idx = 0;
 
if (cpu != WORK_CPU_UNBOUND)
-   return _cpu(gcwq_nr_running, cpu);
+   return _cpu(pool_nr_running, cpu)[idx];
else
-   return _gcwq_nr_running;
+   return _pool_nr_running[idx];
 }
 
 static struct cpu_workqueue_struct *get_cwq(unsigned int cpu,
@@ -3345,9 +3353,30 @@ EXPORT_SYMBOL_GPL(work_busy);
__ret1 < 0 ? -1 : 0;\
 })
 
+static bool gcwq_is_managing_workers(struct global_cwq *gcwq)
+{
+   struct worker_pool *pool;
+
+   for_each_worker_pool(pool, gcwq)
+   if (pool->flags & POOL_MANAGING_WORKERS)
+   return true;
+   return false;
+}
+
+static bool gcwq_has_idle_workers(struct global_cwq *gcwq)
+{
+   struct worker_pool *pool;
+
+   for_each_worker_pool(pool, gcwq)
+   if (!list_empty(>idle_list))
+   return true;
+   return false;
+}
+
 static int __cpuinit trustee_thread(void *__gcwq)
 {
struct global_cwq *gcwq = __gcwq;
+   struct worker_pool *pool;
struct worker *worker;
struct work_struct *work;
struct hlist_node *pos;
@@ -3363,13 +3392,15 @@ static int __cpuinit trustee_thread(void *__gcwq)
 * cancelled.
 */
BUG_ON(gcwq->cpu != smp_processor_id());
-   rc = trustee_wait_event(!(gcwq->pool.flags & POOL_MANAGING_WORKERS));
+   rc = trustee_wait_event(!gcwq_is_managing_workers(gcwq));
BUG_ON(rc < 0);
 
-   gcwq->pool.flags |= POOL_MANAGING_WORKERS;
+   for_each_worker_pool(pool, gcwq) {
+   pool->flags |= POOL_MANAGING_WORKERS;
 
-   list_for_each_entry(worker, 

Re: general protection fault on ttm_init()

2012-07-13 Thread Fengguang Wu
Hi Dave,

On Sat, Jul 14, 2012 at 01:33:45PM +1000, Dave Airlie wrote:
> Can you try this patch on top of the previous one?
> 
> I think it should fix it.

You are right, it works!  Thank you very much! :-)

Thanks,
Fengguang
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 5/6] workqueue: introduce NR_WORKER_POOLS and for_each_worker_pool()

2012-07-13 Thread Tejun Heo
Hey, Linus.

On Fri, Jul 13, 2012 at 10:00:10PM -0700, Linus Torvalds wrote:
> On Fri, Jul 13, 2012 at 9:44 PM, Tejun Heo  wrote:
> >
> > nr_running is atomic_t (*nr_running)[2].  Ignoring the pointer to
> > array part, it's just returning the address of N'th element of the
> > array.  ARRAY + N == [N].
> 
> None of this matters one whit.
> 
> You did "&(x)[0]".
> 
> That's insane. It's crazy. It doesn't even matter what "x" is in
> between, it's crazy regardless.

Eh, from my previous reply.

| Ah okay, you're looking at the fifth patch in isolation.  Upto this
| point, the index is always 0.  I'm puttin it in as a placeholder for
| the next patch which makes use of non-zero index.  This patch is
| supposed to prepare everything for multiple pools and thus non-zero
| index.

The patch is about converting stuff to handle size-1 array without
introducing any actual behavior change so that the next patch can bump
the array size and just change the index.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Xen-devel] incorrect layout of globals from head_64.S during kexec boot

2012-07-13 Thread Keir Fraser
On 13/07/2012 21:20, "Olaf Hering"  wrote:

> On Tue, Jul 10, Keir Fraser wrote:
> 
>> On 10/07/2012 19:09, "Olaf Hering"  wrote:
>>> I'm not sure, most likely the gfn will just disappear from the guest,
>>> like a ballooned page disappears. Accessing it will likely cause a
>>> crash.
>> 
>> Best thing to do, is possible, is map the shared-info page in the
>> xen-platform pci device's BAR memory range. Then it will not conflict with
>> any RAM.
>> 
>> If you do map it over the top of an existing RAM page, you will have to
>> repopulate that RAM page before kexec, using populate_physmap hypercall. The
>> good news is that the populate_physmap hypercall will have the side effect
>> of unmapping the shared-info page, reayd to be mapped wherever the new
>> kernel would like it to reside :)
> 
> Keir,
> 
> is this a safe thing to do in a SMP guest?
> If arch/x86/xen/enlighten.c:xen_hvm_init_shared_info() allocates a page
> (backed by mfn M and pfn A) and assigns *HYPERVISOR_shared_info and
> *xen_vcpu then everything will reference these pointers.

So pfn A now points at shared_info, and mfn M is lost (freed back to Xen).
Xen_vcpu doesn't come into it, you'd have that mapped at yet another pfn.

> If drivers/xen/platform-pci.c:platform_pci_init would also do a
> XENMAPSPACE_shared_info call with pfn B, isnt there a small window where
> pfn A is not backed by a mfn because mfn M is now connected to pfn C? As
> a result other code paths which access *HYPERVISOR_shared_info and
> *xen_vcpu between the hypercall and the update of the pointers will read
> 0xff.

Don't really understand this. After the XENMAPSPACE_shared_info_call:
 * PFN B points at shared_info, mfn M_B it previously mapped is lost (freed
back to Xen).
 * PFN A maps nothing, reads return all-1s.

Yes, obviously you can't atomically update the mapping of shinfo from A->B,
ad update your pointer in the kernel at exactly the same time. Presumably
you do this early during boot, or late during kexec, or otherwise at a time
when other processors are not expected to touch shinfo.

> 
> If I read the hypercall code of XENMEM_add_to_physmap correctly the mfn
> backing *HYPERVISOR_shared_info will remain the same, so there is no need
> to copy data from the old to the new *HYPERVISOR_shared_info.

That is correct.

> What do you think, is that race real?

I suppose it is. I didn't imagine it would be a troublesome one though.

 -- Keir

> Olaf


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 5/6] workqueue: introduce NR_WORKER_POOLS and for_each_worker_pool()

2012-07-13 Thread Linus Torvalds
On Fri, Jul 13, 2012 at 9:44 PM, Tejun Heo  wrote:
>
> nr_running is atomic_t (*nr_running)[2].  Ignoring the pointer to
> array part, it's just returning the address of N'th element of the
> array.  ARRAY + N == [N].

None of this matters one whit.

You did "&(x)[0]".

That's insane. It's crazy. It doesn't even matter what "x" is in
between, it's crazy regardless.

It's just a really confused way of saying "x" (*). Except it makes the
code look like an insane monkey on crack got a-hold of your keyboard
when you weren't looking.

And to make it worse, "x" itself was the result of doing "*". Which
was probably written by the insane monkey's older brother, Max, who
has been chewing Quaaludes for a few years, and as a result _his_
brain really isn't doing too well either. Even for a monkey. And now
you're letting *him* at your keyboard too?

So you had two separately (but similarly) insane ways of complicating
the code so that it was really obfuscated. When it really just
computed "y" to begin with, it just added all those "x=*" and
"&(x)[0]" games around it to make it look complicated.

Linus

(*) Technically, "&(x)[0]" is actually a really confused way of saying
"(x+0)" while making sure that "x" was a valid pointer. It basically
guarantees that if "x" started out as an array, it has now been
demoted to a pointer - but since arrays will be demoted to pointers by
pretty much any subsequent operation except for "sizeof()" and a
couple of other special cases anyway, you can pretty much just say
that "&(x)[0]" is "(x+0)" is "x".

And "*" really is exactly the same as "y", except for again some
syntactic checking (ie it is basically an odd way to verify that "y"
is an lvalue, since you cannot do an address-of of a non-lvalue).
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RESEND PATCH 1/1] clk: add DT support for clock gating control

2012-07-13 Thread Rob Herring
On 07/13/2012 04:42 AM, Sebastian Hesselbarh wrote:
> On 07/13/2012 05:19 AM, Rob Herring wrote:
>> What's implemented in Linux should not define the binding. The binding
>> should describe the hardware.
>> [...]
>> True, but not your problem to implement. A binding doesn't necessarily
>> mean there is a full Linux implementation. We just don't want to create
>> something only to find others need something completely different.
> 
> Ok, what about a DT describing the following for a simple register-based
> clock gating controller and the corresponding gated-clock independent of
> the controller. I am sure there are a bunch of SoCs out there that
> control their clock gates by writing some bits to a register. If that
> DT description matches your expectations, I ll prepare patches with
> documentation and implementation for common clock framework.
> 

Clock gates are just 1 part. There's muxes, dividers, plls, etc. I'm not
convinced that it makes sense to define clocks at this level. For
complex chips, I think just defining the chips clock controller module
as a single node with lots of clock outputs. The primary need is to
describe board specific changes not SOC level clock tree. Much of it is
static and generally only a few clocks may change config board to board.

> Sebastian
> 
> -- 
>  /* Simple clock gating controller based on bitmasks and register */
> cgc: clock-gating-control@f100 {
>   compatible = "clock-gating-control-register";
>   reg = <0xf100 0x4>;
> 
>   /* Clock gating control with one bit at bit position 0
>  enable with (1<<0), disable with (0<<0) */
>   cgctrl_usb0: cgc_usb0 {
> clock-gating-control,shift = <0>;
> clock-gating-control,mask = <1>;
> clock-gating-control,enable = <1>;
> clock-gating-control,disable = <0>;
>   };
> 
>   /* Clock gating control with two bits at bit position 1-2
>  enable with (2<<1), disable with (0<<1) */
>   cgctrl_sata: cgc_sata {
> clock-gating-control,shift = <1>;
> clock-gating-control,mask = <3>;
> clock-gating-control,enable = <2>;
> clock-gating-control,disable = <0>;
>   };
> };
> 
> /* Generic clock gate description that can be used with
>any clock gating controller */
> cg_usb0: clockgate@0 {
>   compatible = "gated-clock";
>   #clock-cells = <0>;
>   clocks = <>;
>   clock-gate-control = <_usb0>;
> };

I don't see this scaling to ~50 clocks.

Rob
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 5/6] workqueue: introduce NR_WORKER_POOLS and for_each_worker_pool()

2012-07-13 Thread Tejun Heo
Hello, Linus.

On Fri, Jul 13, 2012 at 09:27:03PM -0700, Linus Torvalds wrote:
> Seeing code like this
> 
> +   return &(*nr_running)[0];
> 
> just makes me go "WTF?"

I was going WTF too.  This was the smallest fix and I wanted to make
it minimal because there's another stack of patches on top of it.
Planning to just fold nr_running into worker_pool afterwards which
will remove the whole function.

> Why are you taking the address of something you just dereferenced (the
> "& [0]" part).

nr_running is atomic_t (*nr_running)[2].  Ignoring the pointer to
array part, it's just returning the address of N'th element of the
array.  ARRAY + N == [N].

> And you actually do that *twice*, except the inner one is more
> complicated. When you assign nr_runing, you take the address of it, so
> the "*nr_running" is actually just the same kind of odd thing (except
> in reverse - you take dereference something you just took the
> address-of).
> 
> Seriously, this to me is a sign of *deeply* confused code. And the
> fact that your first version of that code was buggy *EXACTLY* due to
> this confusion should have made you take a step back.

Type-wise, I don't think it's confused.  Ah okay, you're looking at
the fifth patch in isolation.  Upto this point, the index is always 0.
I'm puttin it in as a placeholder for the next patch which makes use
of non-zero index.  This patch is supposed to prepare everything for
multiple pools and thus non-zero index.

> As far as I can tell, what you actually want that function to do is:
> 
>   static atomic_t *get_pool_nr_running(struct worker_pool *pool)
>   {
> int cpu = pool->gcwq->cpu;
> 
> if (cpu != WORK_CPU_UNBOUND)
> return per_cpu(pool_nr_running, cpu);
> 
> return unbound_pool_nr_running;
>   }

More like the folloiwng in the end.

static atomic_t *get_pool_nr_running(struct worker_pool *pool)
{
int cpu = pool->gcwq->cpu;
int is_highpri = pool_is_highpri(pool);

if (cpu != WORK_CPU_UNBOUND)
return _cpu(pool_nr_running, cpu)[is_highpri];

return _pool_nr_running[is_highpri];
}

> I didn't test the code, btw. I just looked at the patch and went WTF.

Eh... yeah, with or without [2], this is WTF.  I'll just refresh it
with the above version.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 5/6] workqueue: introduce NR_WORKER_POOLS and for_each_worker_pool()

2012-07-13 Thread Linus Torvalds
Seeing code like this

+   return &(*nr_running)[0];

just makes me go "WTF?"

Why are you taking the address of something you just dereferenced (the
"& [0]" part).

And you actually do that *twice*, except the inner one is more
complicated. When you assign nr_runing, you take the address of it, so
the "*nr_running" is actually just the same kind of odd thing (except
in reverse - you take dereference something you just took the
address-of).

Seriously, this to me is a sign of *deeply* confused code. And the
fact that your first version of that code was buggy *EXACTLY* due to
this confusion should have made you take a step back.

As far as I can tell, what you actually want that function to do is:

  static atomic_t *get_pool_nr_running(struct worker_pool *pool)
  {
int cpu = pool->gcwq->cpu;

if (cpu != WORK_CPU_UNBOUND)
return per_cpu(pool_nr_running, cpu);

return unbound_pool_nr_running;
  }

Notice how there isn't an 'address-of' operator anywhere in sight
there. Those things are arrays, they get turned into "atomic_t *"
automatically. And there isn't a single dereference (not a '*', and
not a "[0]" - they are the exact same thing, btw) in sight either.

What am I missing? Are there some new drugs that all the cool kids
chew that I should be trying? Because I really don't think the kinds
of insane "take the address of a dereference" games are a good idea.
They really look to me like somebody is having a really bad drug
experience.

I didn't test the code, btw. I just looked at the patch and went WTF.

Linus
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] bnx2: update bnx2-mips-09 firmware to bnx2-mips-09-6.2.1b

2012-07-13 Thread Ben Hutchings
On Fri, 2012-07-13 at 15:09 +0100, Chris Webb wrote:
> Eric Dumazet  writes:
> 
> > Have you read firmware/README.AddingFirmware ?
> 
> I hadn't, but now I have, and if firmware upgrades are considered 'adding
> new firmware', I agree this patch is wrong, and should have just removed the
> obsolete bnx2-mips-09-6.2.1a file that is no longer used by the bnx2 driver.
> 
> However, not having dealt with cards that need these kinds of horrible
> binary blobs before, I'm a little uncertain how I should be building
> upstream kernels with CONFIG_FIRMWARE_IN_KERNEL and pulling in the correct
> blobs from a linux-firmware checkout.
> 
> Is there a more automatic method than going through the source for each
> configured driver and setting CONFIG_EXTRA_FIRMWARE manually to list the
> relevant firmwares? Is there any way to give the kernel the location of
> linux-firmware and have it compile in everything needed for the selected
> drivers, as used to happen with the firmware/ subdirectory?
> CONFIG_EXTRA_FIRMWARE_DIR doesn't seem to do anything with
> CONFIG_EXTRA_FIRMWARE empty, so I don't think it does what I'm hoping?

I had a go at making CONFIG_FIRMWARE_IN_KERNEL pick the right blobs
automatically , but
I couldn't get the Kbuild rules quite right.  No-one responded and I
haven't had another try since.

Ben.

-- 
Ben Hutchings
The generation of random numbers is too important to be left to chance.
- Robert Coveyou


signature.asc
Description: This is a digitally signed message part


Re: [RFC] Simplifying kernel configuration for distro issues

2012-07-13 Thread Ben Hutchings
On Fri, 2012-07-13 at 13:37 -0700, Linus Torvalds wrote:
> So this has long been one of my pet configuration peeves: as a user I
> am perfectly happy answering the questions about what kinds of
> hardware I want the kernel to support (I kind of know that), but many
> of the "support infrastructure" questions are very opaque, and I have
> no idea which of the them any particular distribution actually depends
> on.
[...]
> The point I'm slowly getting to is that I would actually love to have
> *distro* Kconfig-files, where the distribution would be able to say
> "These are the minimums I *require* to work". So we'd have a "Distro"
> submenu, where you could pick the distro(s) you use, and then pick
> which release, and we'd have something like

I like this idea in principle.

[...]
>  - distro/Kconfig:
> 
> config DISTRO_REQUIREMENTS
> bool "Pick minimal distribution requirements"
> 
> choice DISTRO
> prompt "Distribution"
> depends on DISTRO_REQUIREMENTS
> 
> config FEDORA
> config OPENSUSE
> config UBUNTU
> ...
> 
> endchoice
> 
> and then depending on the DISTRO config, we'd include one of the
> distro-specific ones with lists of supported distro versions and then
> the random config settings for that version:

You might also want to *un*select some options like
CONFIG_SYSFS_DEPRECATED and CONFIG_SYSFS_DEPRECATED_V2 that need to be
set one way or the other depending on the version of udev.  (I think
it's possible to kluge this with the addition of a hidden negative
config option.)

How about stuff like NET and INET, that every distro will need and yet
is configurable even without EXPERT?

[...]
> Sure, you can copy the config file that came with the distro, but it
> has tons of stuff that really isn't required. Not just in hardware,
> but all the debug choices etc that are really a user choice. And it's
> really hard to figure out - even for somebody like me - what a minimal
> usable kernel is.
[...]

And it's still hard for me as kernel packager: just because an option
was requested and enabled to support some bit of userland, doesn't mean
I know what's using or depending on it now.  (I think Dave Jones made
this point already.)  I'm not usually concerned with *minimal* config.

Ben.

-- 
Ben Hutchings
The generation of random numbers is too important to be left to chance.
- Robert Coveyou


signature.asc
Description: This is a digitally signed message part


[PATCH v3 2/2] Documentation: add a caveat for seccomp filter and vsyscall emulation

2012-07-13 Thread Will Drewry
With the addition of seccomp support to vsyscall emulation:
  http://permalink.gmane.org/gmane.linux.kernel/1327732
with some minor changes in the first patch in this series.

Update the documentation to indicate quirky behaviors when the 'ip' is
in the vsyscall page and vsyscall emulation is in effect.

If v2 of the first patch is preferred, then this patch will need to
be changed to indicate that SECCOMP_RET_TRACE does not allow
system calls to be remapped _or_ skipped.

Signed-off-by: Will Drewry 
---
 Documentation/prctl/seccomp_filter.txt |   22 ++
 1 file changed, 22 insertions(+)

diff --git a/Documentation/prctl/seccomp_filter.txt 
b/Documentation/prctl/seccomp_filter.txt
index 597c3c5..67ed88b 100644
--- a/Documentation/prctl/seccomp_filter.txt
+++ b/Documentation/prctl/seccomp_filter.txt
@@ -161,3 +161,25 @@ architecture supports both ptrace_event and seccomp, it 
will be able to
 support seccomp filter with minor fixup: SIGSYS support and seccomp return
 value checking.  Then it must just add CONFIG_HAVE_ARCH_SECCOMP_FILTER
 to its arch-specific Kconfig.
+
+
+Caveats
+---
+
+On x86-64 with vsyscall emulation enabled and while servicing a
+vsyscall-emulated system call:
+- A return value of SECCOMP_RET_TRAP will set a si_call_addr pointing to
+  the vsyscall entry for the given call and not the address after the
+  'syscall' instruction.  Any code which wants to restart the call
+  should return to that address and code wishing to return simulating
+  completion may either sigreturn normally or simulate a ret instruction
+  and use the return address from the stack.
+- A return value of SECCOMP_RET_TRACE will signal the tracer as usual,
+  but the syscall may not be changed to another system call using the
+  orig_rax register. It may only be changed to a different value in
+  order to skip the currently emulated call and any change will result
+  in that behavior.  The remainder of the registers may be altered as
+  usual.
+- Detection of this quirky behavior may be done by checking for getcpu,
+  time, or gettimeofday and if the si_call_addr or rip is in the
+  vsyscall page, specifically at the start of the specific entry call.
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v3 1/2] vsyscall: allow seccomp in vsyscall=emulate

2012-07-13 Thread Will Drewry
If a seccomp filter program is installed, older static binaries and
distributions with older libc implementations (glibc 2.13 and earlier)
that rely on vsyscall use will be terminated regardless of the filter
program policy when executing time, gettimeofday, or getcpu.  This is
only the case when vsyscall emulation is in use (vsyscall=emulate is the
default).

This patch emulates system call entry inside a vsyscall=emulate by
populating regs->ax and regs->orig_ax with the system call number prior
to calling into seccomp such that all seccomp-dependencies function
normally.  Additionally, system call return behavior is emulated in line
with other vsyscall entrypoints for the trace/trap cases.

Note, v3 adds support for a ptracer to skip and emulate vsyscalls. This
is not required behavior but the documentation should reflect the behavior
for whichever is preferred (v2 or v3).

Reported-by: Owen Kibel 
Signed-off-by: Will Drewry 

v3: - allow ptrace orig_ax changes to skip the syscall since changing it is not
  an option. (result of discussions with luto)
- ensure ptrace register modification doesn't change return behavior taking
  the "normal" return path
- add some comments
v2: - fixed ip and sp on SECCOMP_RET_TRAP/ERRNO (thanks to l...@mit.edu)
---
 arch/x86/kernel/vsyscall_64.c |   42 +
 1 file changed, 38 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 7515cf0..c56a8dc 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -139,6 +139,22 @@ static int addr_to_vsyscall_nr(unsigned long addr)
return nr;
 }
 
+static int vsyscall_seccomp(struct task_struct *tsk, int syscall_nr)
+{
+   int ret;
+   struct pt_regs *regs;
+   if (!seccomp_mode(>seccomp))
+   return 0;
+   regs = task_pt_regs(tsk);
+   regs->orig_ax = syscall_nr;
+   regs->ax = syscall_nr;  /* ensure consistency */
+   /* 0 if allowed, -1 on SECCOMP_RET_ERRNO and SECCOMP_RET_TRAP */
+   ret = __secure_computing(syscall_nr);
+   if (regs->orig_ax != syscall_nr)
+   return 1; /* ptrace requested skip */
+   return ret;
+}
+
 static bool write_ok_or_segv(unsigned long ptr, size_t size)
 {
/*
@@ -174,6 +190,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long 
address)
int vsyscall_nr;
int prev_sig_on_uaccess_error;
long ret;
+   int skip;
 
/*
 * No point in checking CS -- the only way to get here is a user mode
@@ -205,9 +222,6 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long 
address)
}
 
tsk = current;
-   if (seccomp_mode(>seccomp))
-   do_exit(SIGKILL);
-
/*
 * With a real vsyscall, page faults cause SIGSEGV.  We want to
 * preserve that behavior to make writing exploits harder.
@@ -222,8 +236,13 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long 
address)
 * address 0".
 */
ret = -EFAULT;
+   skip = 0;
switch (vsyscall_nr) {
case 0:
+   skip = vsyscall_seccomp(tsk, __NR_gettimeofday);
+   if (skip)
+   break;
+
if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
!write_ok_or_segv(regs->si, sizeof(struct timezone)))
break;
@@ -234,6 +253,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long 
address)
break;
 
case 1:
+   skip = vsyscall_seccomp(tsk, __NR_time);
+   if (skip)
+   break;
+
if (!write_ok_or_segv(regs->di, sizeof(time_t)))
break;
 
@@ -241,6 +264,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long 
address)
break;
 
case 2:
+   skip = vsyscall_seccomp(tsk, __NR_getcpu);
+   if (skip)
+   break;
+
if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
!write_ok_or_segv(regs->si, sizeof(unsigned)))
break;
@@ -253,6 +280,12 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long 
address)
 
current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error;
 
+   if (skip) {
+   if ((long)regs->ax <= 0L || skip == 1) /* seccomp errno/trace */
+   goto do_ret;
+   goto done; /* seccomp trap */
+   }
+
if (ret == -EFAULT) {
/* Bad news -- userspace fed a bad pointer to a vsyscall. */
warn_bad_vsyscall(KERN_INFO, regs,
@@ -271,10 +304,11 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long 
address)
 
regs->ax = ret;
 
+do_ret:
/* Emulate a ret instruction. */
regs->ip = caller;
regs->sp += 8;
-
+done:
return true;
 
 

Re: [PATCH 0/3] fs/sysv: stop using write_supers and s_dirt

2012-07-13 Thread Artem Bityutskiy
On Fri, 2012-07-13 at 14:42 -0700, Andrew Morton wrote:
> 
> The issue Alan raised around the superblock timestamp is still up in
> the air.  I guess he's a slow typist ;)
> 
> My take is "no, we don't need to do that any more" - surely all Linux
> systems have a functional hardware clock.  But the changelog should be
> updated to describe and justify the decision.
> 
While I do trust such system existed and may be even still exist, I
doubt that Linux sysv FS implementation is of any help for them because
it updates the superblock time-stamp _only_ if there was write activity,
otherwise it does not. So you cannot rely on our time-stamps at all
anyway. My patches just make it update the time-stamp more rarely.

-- 
Best Regards,
Artem Bityutskiy


signature.asc
Description: This is a digitally signed message part


Re: [PATCH v2] x86/vsyscall: allow seccomp filter in vsyscall=emulate

2012-07-13 Thread Will Drewry
On Fri, Jul 13, 2012 at 7:48 PM, Will Drewry  wrote:
> On Fri, Jul 13, 2012 at 6:00 PM, Andrew Lutomirski  wrote:
>> On Fri, Jul 13, 2012 at 10:06 AM, Will Drewry  wrote:
>>> If a seccomp filter program is installed, older static binaries and
>>> distributions with older libc implementations (glibc 2.13 and earlier)
>>> that rely on vsyscall use will be terminated regardless of the filter
>>> program policy when executing time, gettimeofday, or getcpu.  This is
>>> only the case when vsyscall emulation is in use (vsyscall=emulate is the
>>> default).
>>>
>>> This patch emulates system call entry inside a vsyscall=emulate by
>>> populating regs->ax and regs->orig_ax with the system call number prior
>>> to calling into seccomp such that all seccomp-dependencies function
>>> normally.  Additionally, system call return behavior is emulated in line
>>> with other vsyscall entrypoints for the trace/trap cases.
>>>
>>> Reported-by: Owen Kibel 
>>> Signed-off-by: Will Drewry 
>>>
>>> v2: - fixed ip and sp on SECCOMP_RET_TRAP/TRACE (thanks to l...@mit.edu)
>>
>>> @@ -253,6 +273,12 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned 
>>> long address)
>>>
>>> current_thread_info()->sig_on_uaccess_error = 
>>> prev_sig_on_uaccess_error;
>>>
>>> +   if (skip) {
>>> +   if ((long)regs->ax <= 0L) /* seccomp errno emulation */
>>> +   goto do_ret;
>>> +   goto done; /* seccomp trace/trap */
>>> +   }
>>> +
>>> if (ret == -EFAULT) {
>>> /* Bad news -- userspace fed a bad pointer to a vsyscall. */
>>> warn_bad_vsyscall(KERN_INFO, regs,
>>> @@ -271,10 +297,11 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned 
>>> long address)
>>>
>>> regs->ax = ret;
>>>
>>> +do_ret:
>>> /* Emulate a ret instruction. */
>>> regs->ip = caller;
>>> regs->sp += 8;
>>> -
>>> +done:
>>> return true;
>>>
>>>  sigsegv:
>>> --
>>> 1.7.9.5
>>>
>>
>> This has the same odd property as the sigsegv path that the faulting
>> instruction will appear to be the mov, not the syscall.  That seems to
>> be okay, though -- various pieces of code that try to restart the segv
>> are okay with that.
>
> Yeah - I would otherwise do
>   regs->ip += 9;
> but I wanted to match the code that was therefor SIGSEGV.  If regs->ip
> += 9 _just_ for the SIGSYS case is fine, then I'll make that change
> shortly.  Since any code that sees the vsyscall address should be wise
> enough to avoid it, perhaps that's why the SIGSEGV hasn't had a
> problem so far.

I dashed this off without more thought. It's best to leave it as is
because any return to the emulated page will cause a vsyscall fault
event.

>> Is there any code that assumes that changing rax (i.e. the syscall
>> number) and restarting a syscall after SIGSYS will invoke the new
>> syscall?  (The RET_TRACE path might be similar -- does the
>> ptrace_event(PTRACE_EVENT_SECCOMP, data) in seccomp.c give a debugger
>> a chance to synchronously cancel or change the syscall?
>
> Unfortunately, it does in normal interception. I don't see any way out
> of that quirk with vsyscall=emulate.  As is without seccomp,
> vsyscall=emulate doesn't allow ptrace interception (or syscall
> auditing for that matter) while vsyscall=native does.   So the option
> here is to document the quirky interaction in
> Documentation/prctl/seccomp_filter.txt.  In particular, if the tracer
> sees either (time|gettimeofday|getcpu) and rip in the vsyscall page,
> it will know it can't rewrite or bypass the call.Is there a better
> option?
>
> Given that, I will include a tweak to the documentation to indicate
> that behavior so that userspace authors of BPF programs that use
> SECCOMP_RET_TRACE will be aware of the behavior.
>
>> If those issues aren't problems, then:
>>
>> Reviewed-by: Andy Lutomirski 
>>
>> (If the syscall number needs to change after the fact in the
>> SECCOMP_RET_TRAP case, it'll be a mess.)
>
> Nah - traps are delivered like the forced sigsegv path.
>
> I'll spin a v3 soon including the documentation tweak and the ip
> offset to match vsyscall=native behavior (regs->ip += 9 _just_ for the
> skip case).  Of course, any better ideas for the trace-case will be
> more than welcome, but it seems to me to be an acceptable tradeoff - I
> hope others agree.
>
> I'll make the changes and then put it through its paces to see if any
> other little idiosyncrasies emerge.

I've written up a documentation patch to accompany this one. It
reflects one more change I've made in a v3 of the patch, but it is
optional.  I've added support for SECCOMP_RET_TRACE to still
skip/emulate the system call if it desires.  In v2 it can't.  Either
way is fine in practice, but I'd need to change the accompanying
documentation.

thanks again!
will
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  

[PATCH UPDATED 6/6] workqueue: reimplement WQ_HIGHPRI using a separate worker_pool

2012-07-13 Thread Tejun Heo
>From 12f804d130d966f2a094e8037e9f163215d13f23 Mon Sep 17 00:00:00 2001
From: Tejun Heo 
Date: Fri, 13 Jul 2012 20:50:50 -0700

WQ_HIGHPRI was implemented by queueing highpri work items at the head
of the global worklist.  Other than queueing at the head, they weren't
handled differently; unfortunately, this could lead to execution
latency of a few seconds on heavily loaded systems.

Now that workqueue code has been updated to deal with multiple
worker_pools per global_cwq, this patch reimplements WQ_HIGHPRI using
a separate worker_pool.  NR_WORKER_POOLS is bumped to two and
gcwq->pools[0] is used for normal pri work items and ->pools[1] for
highpri.  Highpri workers get -20 nice level and has 'H' suffix in
their names.  Note that this change increases the number of kworkers
per cpu.

POOL_HIGHPRI_PENDING, pool_determine_ins_pos() and highpri chain
wakeup code in process_one_work() are no longer used and removed.

This allows proper prioritization of highpri work items and removes
high execution latency of highpri work items.

v2: nr_running indexing bug in get_pool_nr_running() fixed.

Signed-off-by: Tejun Heo 
Reported-by: Josh Hunt 
LKML-Reference: 

Cc: Tony Luck 
Cc: Fengguang Wu 
---
git branch updated accordingly.  Thanks.

 Documentation/workqueue.txt |  103 ---
 kernel/workqueue.c  |  100 +++--
 2 files changed, 65 insertions(+), 138 deletions(-)

diff --git a/Documentation/workqueue.txt b/Documentation/workqueue.txt
index a0b577d..a6ab4b6 100644
--- a/Documentation/workqueue.txt
+++ b/Documentation/workqueue.txt
@@ -89,25 +89,28 @@ called thread-pools.
 
 The cmwq design differentiates between the user-facing workqueues that
 subsystems and drivers queue work items on and the backend mechanism
-which manages thread-pool and processes the queued work items.
+which manages thread-pools and processes the queued work items.
 
 The backend is called gcwq.  There is one gcwq for each possible CPU
-and one gcwq to serve work items queued on unbound workqueues.
+and one gcwq to serve work items queued on unbound workqueues.  Each
+gcwq has two thread-pools - one for normal work items and the other
+for high priority ones.
 
 Subsystems and drivers can create and queue work items through special
 workqueue API functions as they see fit. They can influence some
 aspects of the way the work items are executed by setting flags on the
 workqueue they are putting the work item on. These flags include
-things like CPU locality, reentrancy, concurrency limits and more. To
-get a detailed overview refer to the API description of
+things like CPU locality, reentrancy, concurrency limits, priority and
+more.  To get a detailed overview refer to the API description of
 alloc_workqueue() below.
 
-When a work item is queued to a workqueue, the target gcwq is
-determined according to the queue parameters and workqueue attributes
-and appended on the shared worklist of the gcwq.  For example, unless
-specifically overridden, a work item of a bound workqueue will be
-queued on the worklist of exactly that gcwq that is associated to the
-CPU the issuer is running on.
+When a work item is queued to a workqueue, the target gcwq and
+thread-pool is determined according to the queue parameters and
+workqueue attributes and appended on the shared worklist of the
+thread-pool.  For example, unless specifically overridden, a work item
+of a bound workqueue will be queued on the worklist of either normal
+or highpri thread-pool of the gcwq that is associated to the CPU the
+issuer is running on.
 
 For any worker pool implementation, managing the concurrency level
 (how many execution contexts are active) is an important issue.  cmwq
@@ -115,26 +118,26 @@ tries to keep the concurrency at a minimal but sufficient 
level.
 Minimal to save resources and sufficient in that the system is used at
 its full capacity.
 
-Each gcwq bound to an actual CPU implements concurrency management by
-hooking into the scheduler.  The gcwq is notified whenever an active
-worker wakes up or sleeps and keeps track of the number of the
-currently runnable workers.  Generally, work items are not expected to
-hog a CPU and consume many cycles.  That means maintaining just enough
-concurrency to prevent work processing from stalling should be
-optimal.  As long as there are one or more runnable workers on the
-CPU, the gcwq doesn't start execution of a new work, but, when the
-last running worker goes to sleep, it immediately schedules a new
-worker so that the CPU doesn't sit idle while there are pending work
-items.  This allows using a minimal number of workers without losing
-execution bandwidth.
+Each thread-pool bound to an actual CPU implements concurrency
+management by hooking into the scheduler.  The thread-pool is notified
+whenever an active worker wakes up or sleeps and keeps track of the
+number of the currently runnable workers.  Generally, work items are

Re: [PATCH 5/6] workqueue: introduce NR_WORKER_POOLS and for_each_worker_pool()

2012-07-13 Thread Tejun Heo
>From 8a0597bf9939d50039d4a6f446db51cf920daaad Mon Sep 17 00:00:00 2001
From: Tejun Heo 
Date: Fri, 13 Jul 2012 20:50:50 -0700

Introduce NR_WORKER_POOLS and for_each_worker_pool() and convert code
paths which need to manipulate all pools in a gcwq to use them.
NR_WORKER_POOLS is currently one and for_each_worker_pool() iterates
over only @gcwq->pool.

Note that nr_running is per-pool property and converted to an array
with NR_WORKER_POOLS elements and renamed to pool_nr_running.

The changes in this patch are mechanical and don't caues any
functional difference.  This is to prepare for multiple pools per
gcwq.

v2: nr_running indexing bug in get_pool_nr_running() fixed.

Signed-off-by: Tejun Heo 
Cc: Tony Luck 
Cc: Fengguang Wu 
---
git branch updated accordingly.  Thanks!

 kernel/workqueue.c |  225 
 1 files changed, 155 insertions(+), 70 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 7a98bae..82eee34 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -74,6 +74,8 @@ enum {
TRUSTEE_RELEASE = 3,/* release workers */
TRUSTEE_DONE= 4,/* trustee is done */
 
+   NR_WORKER_POOLS = 1,/* # worker pools per gcwq */
+
BUSY_WORKER_HASH_ORDER  = 6,/* 64 pointers */
BUSY_WORKER_HASH_SIZE   = 1 << BUSY_WORKER_HASH_ORDER,
BUSY_WORKER_HASH_MASK   = BUSY_WORKER_HASH_SIZE - 1,
@@ -274,6 +276,9 @@ EXPORT_SYMBOL_GPL(system_nrt_freezable_wq);
 #define CREATE_TRACE_POINTS
 #include 
 
+#define for_each_worker_pool(pool, gcwq)   \
+   for ((pool) = &(gcwq)->pool; (pool); (pool) = NULL)
+
 #define for_each_busy_worker(worker, i, pos, gcwq) \
for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++) \
hlist_for_each_entry(worker, pos, >busy_hash[i], hentry)
@@ -454,7 +459,7 @@ static bool workqueue_freezing; /* W: have wqs 
started freezing? */
  * try_to_wake_up().  Put it in a separate cacheline.
  */
 static DEFINE_PER_CPU(struct global_cwq, global_cwq);
-static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, gcwq_nr_running);
+static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, 
pool_nr_running[NR_WORKER_POOLS]);
 
 /*
  * Global cpu workqueue and nr_running counter for unbound gcwq.  The
@@ -462,7 +467,9 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, 
gcwq_nr_running);
  * workers have WORKER_UNBOUND set.
  */
 static struct global_cwq unbound_global_cwq;
-static atomic_t unbound_gcwq_nr_running = ATOMIC_INIT(0);  /* always 0 */
+static atomic_t unbound_pool_nr_running[NR_WORKER_POOLS] = {
+   [0 ... NR_WORKER_POOLS - 1] = ATOMIC_INIT(0),   /* always 0 */
+};
 
 static int worker_thread(void *__worker);
 
@@ -477,11 +484,14 @@ static struct global_cwq *get_gcwq(unsigned int cpu)
 static atomic_t *get_pool_nr_running(struct worker_pool *pool)
 {
int cpu = pool->gcwq->cpu;
+   atomic_t (*nr_running)[NR_WORKER_POOLS];
 
if (cpu != WORK_CPU_UNBOUND)
-   return _cpu(gcwq_nr_running, cpu);
+   nr_running = _cpu(pool_nr_running, cpu);
else
-   return _gcwq_nr_running;
+   nr_running = _pool_nr_running;
+
+   return &(*nr_running)[0];
 }
 
 static struct cpu_workqueue_struct *get_cwq(unsigned int cpu,
@@ -3345,9 +3355,30 @@ EXPORT_SYMBOL_GPL(work_busy);
__ret1 < 0 ? -1 : 0;\
 })
 
+static bool gcwq_is_managing_workers(struct global_cwq *gcwq)
+{
+   struct worker_pool *pool;
+
+   for_each_worker_pool(pool, gcwq)
+   if (pool->flags & POOL_MANAGING_WORKERS)
+   return true;
+   return false;
+}
+
+static bool gcwq_has_idle_workers(struct global_cwq *gcwq)
+{
+   struct worker_pool *pool;
+
+   for_each_worker_pool(pool, gcwq)
+   if (!list_empty(>idle_list))
+   return true;
+   return false;
+}
+
 static int __cpuinit trustee_thread(void *__gcwq)
 {
struct global_cwq *gcwq = __gcwq;
+   struct worker_pool *pool;
struct worker *worker;
struct work_struct *work;
struct hlist_node *pos;
@@ -3363,13 +3394,15 @@ static int __cpuinit trustee_thread(void *__gcwq)
 * cancelled.
 */
BUG_ON(gcwq->cpu != smp_processor_id());
-   rc = trustee_wait_event(!(gcwq->pool.flags & POOL_MANAGING_WORKERS));
+   rc = trustee_wait_event(!gcwq_is_managing_workers(gcwq));
BUG_ON(rc < 0);
 
-   gcwq->pool.flags |= POOL_MANAGING_WORKERS;
+   for_each_worker_pool(pool, gcwq) {
+   pool->flags |= POOL_MANAGING_WORKERS;
 
-   list_for_each_entry(worker, >pool.idle_list, entry)
-   worker->flags |= WORKER_ROGUE;
+   list_for_each_entry(worker, >idle_list, entry)
+   worker->flags |= WORKER_ROGUE;
+

Re: [PATCH] power_supply: Add min/max alert properties for CAPACITY, TEMP, TEMP_AMBIENT

2012-07-13 Thread Anton Vorontsov
On Thu, Jul 05, 2012 at 04:59:12PM +0530, Ramakrishna Pallala wrote:
> Minimum and maximum alerts on power supply properties will help or allow the
> user space to "proactively" create policies like connect/disconnect charger
> or stop/start the user apps based on capacity or temperature parameters.
> 
> These parameters can be used to avoid unnecessary polling from user space and
> even from kernel space if the underlying HW can support INT triggers(ex: 
> max17042/47).
> 
> This patch adds the following power supply alert type properties:
> CAPACITY_ALERT_MIN
> CAPACITY_ALERT_MAX
> TEMP_ALERT_MIN
> TEMP_ALERT_MAX
> TEMP_AMBIENT_ALERT_MIN
> TEMP_AMBIENT_ALERT_MAX
> 
> Signed-off-by: Ramakrishna Pallala 

Looks nice, applied. Thank you!

-- 
Anton Vorontsov
Email: cbouatmai...@gmail.com
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] mac802154: fix sparse warning for mac802154_slave_get_priv

2012-07-13 Thread Alexander Smirnov

> Make sparse happy by fixing the following error:
>* symbol 'mac802154_slave_get_priv' was not declared. Should it be static?
> 
> Signed-off-by: Silviu-Mihai Popescu 
> ---
> net/mac802154/mib.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
> 

Should be already fixed, please try the latest net-next tree.--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 6/6] workqueue: reimplement WQ_HIGHPRI using a separate worker_pool

2012-07-13 Thread Tejun Heo
Hello,

On Fri, Jul 13, 2012 at 10:08:00AM +0800, Fengguang Wu wrote:
> [0.165669] Performance Events: unsupported Netburst CPU model 6 no PMU 
> driver, software events only.
> [0.167001] XXX cpu=0 gcwq=88000dc0cfc0 base=88000dc11e80
> [0.167989] XXX cpu=0 nr_running=0 @ 88000dc11e80
> [0.168988] XXX cpu=0 nr_running=0 @ 88000dc11e88
> [0.169988] XXX cpu=1 gcwq=88000dd0cfc0 base=88000dd11e80
> [0.170988] XXX cpu=1 nr_running=0 @ 88000dd11e80
> [0.171987] XXX cpu=1 nr_running=0 @ 88000dd11e88
> [0.172988] XXX cpu=8 nr_running=0 @ 81d7c430
> [0.173987] XXX cpu=8 nr_running=12 @ 81d7c438

Heh, I found it.  get_pool_nr_running() stores the nr_running array to
use in a local pointer to array and then returns pointer to the
specific element from there depending on the priority.

atomic_t (*nr_running)[NR_WORKER_POOLS];

/* set @nr_running to the array to use */
return nr_running[worker_pool_pri(pool)];

The [] operator in the return statement is indexing to the arrays
instead of the array elements, so if the index is 1, the above
statement offsets nr_running by sizeof(atomic_t [NR_WORKER_POOLS])
instead of sizeof(atomic_t).  This should have been
&(*nr_running)[worker_pool_pri(pool)] instead.

So, highpri ends up dereferencing out-of-bounds and depending on
variable layout, it may see garbage value from the beginning (what you
were seeing) or get interfered afterwards (what Tony was seeing).
This also explains why I didn't see it and Tony can no longer
reproduce it after debug patch.

Will post updated patches.

Thank you.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] xfs: fix comment typo of struct xfs_da_blkinfo.

2012-07-13 Thread Chen Baozi
Fix trivial typo error that has written "It" to "Is".

Signed-off-by: Chen Baozi 
---
 fs/xfs/xfs_da_btree.h |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index dbf7c07..be30bd4 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -32,7 +32,7 @@ struct zone;
 /*
  * This structure is common to both leaf nodes and non-leaf nodes in the Btree.
  *
- * Is is used to manage a doubly linked list of all blocks at the same
+ * It is used to manage a doubly linked list of all blocks at the same
  * level in the Btree, and to identify which type of block this is.
  */
 #define XFS_DA_NODE_MAGIC  0xfebe  /* magic number: non-leaf blocks */
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: general protection fault on ttm_init()

2012-07-13 Thread Dave Airlie
Can you try this patch on top of the previous one?

I think it should fix it.

Dave.


0001-drm-set-drm_class-to-NULL-after-removing-it.patch
Description: Binary data


Re: [PATCHv4] bq27x00_battery: Add support for BQ27425 chip

2012-07-13 Thread Anton Vorontsov
On Fri, Jul 06, 2012 at 10:06:32AM +0530, Saranya Gopal wrote:
> This patch adds support for BQ27425 (TI) chip. This chip is same as
> BQ27500 with few registers removed and register address map changed.
> The data sheet for this chip is publicly available at
>  http://www.ti.com/product/bq27425-g1
> 
> Changes since v3:
>   Add bq27xxx as prefix for the newly added function.
>   Define SOC register address plus offset for bq27425 instead
>  of register address alone to align with other register reads.
> 
> Signed-off-by: Saranya Gopal 
> ---

Applied w/ Reviewed-by: Lars-Peter Clausen  tag.

Saranya, thanks for the patch! And much thanks to Lars-Peter for the
review work!

p.s.
Saranya, one thing though: I had to fixup a small conflict in the
patch since you seem to prepared the patch against Linus' tree, but
there are some changes in the battery tree for this driver. Please
check if I fixed the conflict in a proper way. :-)

-- 
Anton Vorontsov
Email: cbouatmai...@gmail.com
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 3.5-rc6 fb hw inteldrmfb

2012-07-13 Thread Dave Airlie
cc'ing Daniel,

On Sat, Jul 14, 2012 at 12:18 PM, werner  wrote:
> On a computer of someone else, on which today I tried to install 3.5-rc6
> (what on my computer and on an HP pavillon laptop runs without problem - on
> the laptop however needs ircpoll otherwhise don't find the harddisk), few
> seconds after start booting it sticks and don't continue longer (reboot not
> possible with Ctr-Alt-C but only pressing the reboot button), the last
> message is:   "conflicting fb hw usage inteldrmfb vs VESA VGA - removing
> generic driver"then the cursor stops blinking, and the computer
> sticks.

wierd, are you passing a vga= to the kernel? or is grub2 going graphical?

also the dmesg from 3.4 might be good.

Dave.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] New capability CAP_RND_ADD for solely allowing addition of entropy

2012-07-13 Thread Aaron Jones

I was interested in modifying haveged to drop privileges after opening
/dev/random but discovered this was not possible because it uses the
ioctl RNDADDENTROPY which requires CAP_SYS_ADMIN.

Retaining CAP_SYS_ADMIN after dropping GID/UID would defeat the point
of doing so, so this program must always run with UID 0 and/or
CAP_SYS_ADMIN, which is undesirable.

I attach a patch to add a new capability CAP_RND_ADD, which allows the
use of ioctls RNDADDENTROPY and RNDADDTOENTCNT. It further modifies
drivers/char/random.c to also check for this capability before returning
-EPERM.

==

--- a/drivers/char/random.c2012-07-14 02:52:10.781202854 +0100
+++ b/drivers/char/random.c2012-07-14 02:52:55.369201089 +0100
@@ -1154,14 +1154,14 @@
 return -EFAULT;
 return 0;
 case RNDADDTOENTCNT:
-if (!capable(CAP_SYS_ADMIN))
+if (!capable(CAP_SYS_ADMIN) && !capable(CAP_RND_ADD))
 return -EPERM;
 if (get_user(ent_count, p))
 return -EFAULT;
 credit_entropy_bits(_pool, ent_count);
 return 0;
 case RNDADDENTROPY:
-if (!capable(CAP_SYS_ADMIN))
+if (!capable(CAP_SYS_ADMIN) && !capable(CAP_RND_ADD))
 return -EPERM;
 if (get_user(ent_count, p++))
 return -EFAULT;
--- a/include/linux/capability.h2012-07-14 03:15:52.378624902 +0100
+++ b/include/linux/capability.h2012-07-14 03:16:47.508624928 +0100
@@ -364,7 +364,18 @@

 #define CAP_EPOLLWAKEUP  36

-#define CAP_LAST_CAP CAP_EPOLLWAKEUP
+/* Allow adding of random entropy and updating entropy estimate,
+   but not clearing the entropy pool (see drivers/char/random.c)
+   Introduced so that software like haveged can drop gid/uid
+   on startup and drop all capabilities except this one.
+   Otherwise it would require CAP_SYS_ADMIN, which would
+   defeat the point of dropping gid/uid. */
+
+#define CAP_RND_ADD  37
+
+
+
+#define CAP_LAST_CAP CAP_RND_ADD

 #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Small bug: Wrong return check from idr_pre_get in loop.c

2012-07-13 Thread Silva Paulo
idr_pre_get never returns a value < 0. It returns 0 (no memory) or 1 (OK).

Regards


My_linux-3.5-rc6_patches
Description: Binary data


3.5-rc6 fb hw inteldrmfb

2012-07-13 Thread werner
On a computer of someone else, on which today I tried to 
install 3.5-rc6 (what on my computer and on an HP pavillon 
laptop runs without problem - on the laptop however needs 
ircpoll otherwhise don't find the harddisk), few seconds 
after start booting it sticks and don't continue longer 
(reboot not possible with Ctr-Alt-C but only pressing the 
reboot button), the last message is:   "conflicting fb hw 
usage inteldrmfb vs VESA VGA - removing generic driver" 
   then the cursor stops blinking, and the computer 
sticks.
With 3.4 the same computer runs since weeks without 
problems.  So, that's a regression.


Inspection of syslog, messages, debug don't show anything 
from this boot, it seems before the crash it hasn't nor 
time to log something.


Below is the lspci of that computer.


wl




00:00.0 Host bridge: Intel Corporation 82945G/GZ/P/PL 
Memory Controller Hub (rev 02)
00:02.0 VGA compatible controller: Intel Corporation 
82945G/GZ Integrated Graphics Controller (rev 02)
00:1b.0 Audio device: Intel Corporation 82801G (ICH7 
Family) High Definition Audio Controller (rev 01)
00:1c.0 PCI bridge: Intel Corporation 82801G (ICH7 Family) 
PCI Express Port 1 (rev 01)
00:1c.1 PCI bridge: Intel Corporation 82801G (ICH7 Family) 
PCI Express Port 2 (rev 01)
00:1c.2 PCI bridge: Intel Corporation 82801G (ICH7 Family) 
PCI Express Port 3 (rev 01)
00:1c.3 PCI bridge: Intel Corporation 82801G (ICH7 Family) 
PCI Express Port 4 (rev 01)
00:1d.0 USB Controller: Intel Corporation 82801G (ICH7 
Family) USB UHCI Controller #1 (rev 01)
00:1d.1 USB Controller: Intel Corporation 82801G (ICH7 
Family) USB UHCI Controller #2 (rev 01)
00:1d.2 USB Controller: Intel Corporation 82801G (ICH7 
Family) USB UHCI Controller #3 (rev 01)
00:1d.3 USB Controller: Intel Corporation 82801G (ICH7 
Family) USB UHCI Controller #4 (rev 01)
00:1d.7 USB Controller: Intel Corporation 82801G (ICH7 
Family) USB2 EHCI Controller (rev 01)
00:1e.0 PCI bridge: Intel Corporation 82801 PCI Bridge 
(rev e1)
00:1f.0 ISA bridge: Intel Corporation 82801GB/GR (ICH7 
Family) LPC Interface Bridge (rev 01)
00:1f.1 IDE interface: Intel Corporation 82801G (ICH7 
Family) IDE Controller (rev 01)
00:1f.2 IDE interface: Intel Corporation 82801GB/GR/GH 
(ICH7 Family) SATA IDE Controller (rev 01)
00:1f.3 SMBus: Intel Corporation 82801G (ICH7 Family) 
SMBus Controller (rev 01)
05:00.0 Ethernet controller: Broadcom Corporation 
NetXtreme BCM5751 Gigabit Ethernet PCI Express (rev 01)


---
Professional hosting for everyone - http://www.host.ru
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: RE: Charger Manager Proposal.

2012-07-13 Thread Anton Vorontsov
On Fri, Jul 06, 2012 at 11:20:22AM +, Tc, Jenny wrote:
> Since modifying the charger manager for the below requirements would be more 
> complex and would not fit inside the charger manager we are thinking of 
> implementing new framework under power_supply subsystem with following 
> features.

I'm not an expert in charger-manager sub-subsystem; if you guys want
to, I can dig into it, but I'd rather prefer if you come to some
agreement w/o my intervention.

Quoting Myungjoo from the previous email:

  "I think the features mentioned above are good to be included in
   charger manager as they look quite compatible with current structure
   with some modifications."

Hm. So Myungjoo thinks that some of the features are compatible. Which do
you guys think are not compatible? Is this because charger manager does
everything using a regulator framework? That is, quoting you:

  "The challenge I see in implementing the above requirements in charger
   manager is, some of the above requirements are not supported by
   the regulator framework."

So, maybe the part of the solution would be enhancing the regulators
framework?..

> The outcome of all the above changes would be that, a set of charging 
> algorithms would be available in the mainline and chargers can make use of 
> the algorithms without making much modifications to the charger driver code. 
> Also this would give a standard framework for monitoring and controlling 
> battery charging.

The idea of plug-in charging algorithms sounds great. So that we
could choose the algo based on the battery type, charger type etc.
This is awesome. But do you think you really need a new subsystem
for that? And if so, will it complement charger manager, compete
or substitute it?

I would have no problem with complementary subsystem, or just
evolutionary/incrementally changing the charger-manger (this is
surely preferred). If you think there is no way for incrementally
modifying charger-manager for your needs, and you want a "from
scratch" solution, this is also doable but following requirements
are must-have:

1. You can prove (on technical merits) that your new charger manager
   is a complete superset of the old one, and adds some vital features
   not available before (and these would be hard to implement in
   terms of the old subsystem);
2. You'll have a defined roadmap to convert all charger-manager
   users to a new subsystem (preferably w/ patches ready).

>From the past experience, I can tell you that modifying an existing
subsystem is a much easier way. :-) And the biggest advantage of the
current code is that it is already well-tested, and incremental
changes are easy to bisect.

There were precedents of rewriting drivers and subsystems completely,
so it is nothing new as well. But I urge you to think about it twice.

Thanks,


p.s.

Btw, frankly speaking I'm not so much happy about charger-manager
nowadays, not from the design point of view (and not because it
seems quite complex -- I presume there is a reason for that), but
I'm somewhat unhappy about implementation details, i.e. I complained[1]
about its uevents implementation, but no one seem to bother to fix
that. I see a good flow of new features and interest for the charger
manager (which is great), but the long standing pesky issues are still
there.

So, if you'll have a somewhat more clean uevents implementation, that
would be surely a good point for the new subsystem. :-D

[1] http://lkml.indiana.edu/hypermail/linux/kernel/1205.0/02398.html

-- 
Anton Vorontsov
Email: cbouatmai...@gmail.com
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/2] charger-manager: Use EXTCON Subsystem to control charger cable

2012-07-13 Thread Anton Vorontsov
On Thu, Jul 12, 2012 at 03:03:16PM +0900, Chanwoo Choi wrote:
> This patchset add support EXTCON Subsystem in which charger-manager identify
> the type of external connector and enable/disable charger(regulator) according
> to the state of charger cable(external connector).

Applied, thanks a lot!

-- 
Anton Vorontsov
Email: cbouatmai...@gmail.com
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v5 5/5] ARM: exynos: add thermal sensor driver platform data support

2012-07-13 Thread Kyungmin Park
On Fri, Jul 13, 2012 at 8:10 PM, Amit Daniel Kachhap
 wrote:
> Add necessary default platform data support needed for TMU driver.  This
> dt/non-dt values are tested for origen exynos4210 and smdk exynos5250
> platforms.
Looks good to me.
just nitpicks below.

Thank you,
Kyungmin Park
>
> Signed-off-by: Amit Daniel Kachhap 
> Cc: Donggeun Kim 
> Acked-by: Guenter Roeck 
> Cc: SangWook Ju 
> Cc: Durgadoss 
> Cc: Len Brown 
> Cc: Jean Delvare 
> Signed-off-by: Andrew Morton 
> ---
>  drivers/thermal/exynos_thermal.c |  111 
> +-
>  1 files changed, 110 insertions(+), 1 deletions(-)
>
> diff --git a/drivers/thermal/exynos_thermal.c 
> b/drivers/thermal/exynos_thermal.c
> index 9ef8c37..07736ea 100644
> --- a/drivers/thermal/exynos_thermal.c
> +++ b/drivers/thermal/exynos_thermal.c
> @@ -662,14 +662,121 @@ static irqreturn_t exynos_tmu_irq(int irq, void *id)
>  static struct thermal_sensor_conf exynos_sensor_conf = {
> .name   = "exynos-therm",
> .read_temperature   = (int (*)(void *))exynos_tmu_read,
> +};
> +
> +#if defined(CONFIG_CPU_EXYNOS4210)
BTW, doesn't it same as exynos4412? does it different from exynos4412?
If it's same, it's better to use CONFIG_SOC_EXYNOS4?
> +static struct exynos_tmu_platform_data const exynos4_default_tmu_data = {
> +   .threshold = 80,
> +   .trigger_levels[0] = 5,
> +   .trigger_levels[1] = 20,
> +   .trigger_levels[2] = 30,
> +   .trigger_level0_en = 1,
> +   .trigger_level1_en = 1,
> +   .trigger_level2_en = 1,
> +   .trigger_level3_en = 0,
> +   .gain = 15,
> +   .reference_voltage = 7,
> +   .cal_type = TYPE_ONE_POINT_TRIMMING,
> +   .freq_tab[0] = {
> +   .freq_clip_max = 800 * 1000,
> +   .temp_level = 85,
> +   },
> +   .freq_tab[1] = {
> +   .freq_clip_max = 200 * 1000,
> +   .temp_level = 100,
> +   },
> +   .freq_tab_count = 2,
> +   .type = SOC_ARCH_EXYNOS4,
> +};
> +#define EXYNOS4_TMU_DRV_DATA (_default_tmu_data)
> +#else
> +#define EXYNOS4_TMU_DRV_DATA (NULL)
> +#endif
> +
> +#if defined(CONFIG_SOC_EXYNOS5250)
similar.
> +static struct exynos_tmu_platform_data const exynos5_default_tmu_data = {
> +   .trigger_levels[0] = 85,
> +   .trigger_levels[1] = 103,
> +   .trigger_levels[2] = 110,
> +   .trigger_level0_en = 1,
> +   .trigger_level1_en = 1,
> +   .trigger_level2_en = 1,
> +   .trigger_level3_en = 0,
> +   .gain = 8,
> +   .reference_voltage = 16,
> +   .noise_cancel_mode = 4,
> +   .cal_type = TYPE_ONE_POINT_TRIMMING,
> +   .efuse_value = 55,
> +   .freq_tab[0] = {
> +   .freq_clip_max = 800 * 1000,
> +   .temp_level = 85,
> +   },
> +   .freq_tab[1] = {
> +   .freq_clip_max = 200 * 1000,
> +   .temp_level = 103,
> +   },
> +   .freq_tab_count = 2,
> +   .type = SOC_ARCH_EXYNOS5,
> +};
> +#define EXYNOS5_TMU_DRV_DATA (_default_tmu_data)
> +#else
> +#define EXYNOS5_TMU_DRV_DATA (NULL)
> +#endif
> +
> +#ifdef CONFIG_OF
> +static const struct of_device_id exynos_tmu_match[] = {
> +   {
> +   .compatible = "samsung,exynos4-tmu",
> +   .data = (void *)EXYNOS4_TMU_DRV_DATA,
> +   },
> +   {
> +   .compatible = "samsung,exynos5-tmu",
> +   .data = (void *)EXYNOS5_TMU_DRV_DATA,
> +   },
> +   {},
> +};
> +MODULE_DEVICE_TABLE(of, exynos_tmu_match);
> +#else
> +#define  exynos_tmu_match NULL
> +#endif
> +
> +static struct platform_device_id exynos_tmu_driver_ids[] = {
> +   {
> +   .name   = "exynos4-tmu",
> +   .driver_data= (kernel_ulong_t)EXYNOS4_TMU_DRV_DATA,
> +   },
> +   {
> +   .name   = "exynos5-tmu",
> +   .driver_data= (kernel_ulong_t)EXYNOS5_TMU_DRV_DATA,
> +   },
> +   { },
> +};
> +MODULE_DEVICE_TABLE(platform, exynos4_tmu_driver_ids);
> +
> +static inline struct  exynos_tmu_platform_data *exynos_get_driver_data(
> +   struct platform_device *pdev)
> +{
> +#ifdef CONFIG_OF
> +   if (pdev->dev.of_node) {
> +   const struct of_device_id *match;
> +   match = of_match_node(exynos_tmu_match, pdev->dev.of_node);
> +   if (!match)
> +   return NULL;
> +   return (struct exynos_tmu_platform_data *) match->data;
> +   }
> +#endif
> +   return (struct exynos_tmu_platform_data *)
> +   platform_get_device_id(pdev)->driver_data;
>  }
> -;
>  static int __devinit exynos_tmu_probe(struct platform_device *pdev)
>  {
> struct exynos_tmu_data *data;
> struct exynos_tmu_platform_data *pdata = pdev->dev.platform_data;
> int ret, i;
>
> +   if (!pdata)
> +   pdata = exynos_get_driver_data(pdev);
> +
> if (!pdata) {
> dev_err(>dev, 

Re: resurrecting tcphealth

2012-07-13 Thread valdis . kletnieks
On Fri, 13 Jul 2012 16:55:44 -0700, Stephen Hemminger said:

> >+/* Course retransmit inefficiency- this packet has been 
> >received twice. */
> >+tp->dup_pkts_recv++;
>
> I don't understand that comment, could you use a better sentence please?

I think what was intended was:

/* Curse you, retransmit inefficiency! This packet has been received at least 
twice */


pgpmQHKFPJVPx.pgp
Description: PGP signature


Sandy Bridge pebs fix fixes

2012-07-13 Thread Andi Kleen
- Fix Peter's rewrite of the pebs microcode patches to actually work
- Readd the early microcode update for onlined CPUs to close
the "early schedule user space" race that Peter pointed out originally.

-Andi

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] x86: Do microcode updates at CPU_STARTING, not CPU_ONLINE v2

2012-07-13 Thread Andi Kleen
From: Andi Kleen 

Do microcode updates of resuming or newling plugged CPUs earlier
in CPU_STARTING instead of later when ONLINE. This prevents races
with parallel users who may need a microcode update to avoid some
problem.

Since we cannot request the microcode from udev at this stage,
try to grab the microcode from another CPU. This is also more efficient
because it avoids redundant loads. In addition to that
it avoids the need for separate paths for resume and CPU bootup.

This requires invalidating the microcodes on other CPUs on free.
Each CPU does this in parallel, so it's not a big problem. Each
CPU touches at most NR_CPUs memory locations.

When there is no good microcode available the update is delayed
until the update can be requested. In the normal cases it should
be available.

v2: Review updates
Signed-off-by: Andi Kleen 
---
 arch/x86/kernel/microcode_core.c  |   65 +
 arch/x86/kernel/microcode_intel.c |   13 +++-
 2 files changed, 56 insertions(+), 22 deletions(-)

diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index d4f4d31..5bb4b7e 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -371,20 +371,7 @@ static void microcode_fini_cpu(int cpu)
uci->valid = 0;
 }
 
-static enum ucode_state microcode_resume_cpu(int cpu)
-{
-   struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-
-   if (!uci->mc)
-   return UCODE_NFOUND;
-
-   pr_debug("CPU%d updated upon resume\n", cpu);
-   apply_microcode_on_target(cpu);
-
-   return UCODE_OK;
-}
-
-static enum ucode_state microcode_init_cpu(int cpu)
+static enum ucode_state microcode_init_cpu_late(int cpu)
 {
enum ucode_state ustate;
 
@@ -405,15 +392,44 @@ static enum ucode_state microcode_init_cpu(int cpu)
return ustate;
 }
 
-static enum ucode_state microcode_update_cpu(int cpu)
+/* Grab ucode from another CPU */
+
+static void clone_ucode_data(void)
+{
+   int cpu = smp_processor_id();
+   int i;
+
+   for_each_online_cpu (i) {
+   if (ucode_cpu_info[i].mc &&
+   ucode_cpu_info[i].valid &&
+   cpu_data(i).x86 == cpu_data(cpu).x86 &&
+   cpu_data(i).x86_model == cpu_data(cpu).x86_model) {
+   ucode_cpu_info[cpu].mc = ucode_cpu_info[i].mc;
+   break;
+   }
+   }
+}
+
+static void microcode_init_cpu_early(int cpu)
+{
+   clone_ucode_data();
+   /* We can request later when the CPU is online */
+   if (ucode_cpu_info[cpu].mc == NULL)
+   return;
+   if (microcode_ops->collect_cpu_info(cpu, _cpu_info[cpu].cpu_sig))
+   return;
+   if (microcode_ops->apply_microcode(smp_processor_id()))
+   pr_warn("CPU%d microcode update failed\n", cpu);
+}
+
+static enum ucode_state microcode_update_cpu_late(int cpu)
 {
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
enum ucode_state ustate;
 
-   if (uci->valid)
-   ustate = microcode_resume_cpu(cpu);
-   else
-   ustate = microcode_init_cpu(cpu);
+   /* Resume already done early */
+   if (!uci->valid)
+   ustate = microcode_init_cpu_late(cpu);
 
return ustate;
 }
@@ -431,7 +447,7 @@ static int mc_device_add(struct device *dev, struct 
subsys_interface *sif)
if (err)
return err;
 
-   if (microcode_init_cpu(cpu) == UCODE_ERROR)
+   if (microcode_init_cpu_late(cpu) == UCODE_ERROR)
return -EINVAL;
 
return err;
@@ -481,9 +497,16 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long 
action, void *hcpu)
 
dev = get_cpu_device(cpu);
switch (action) {
+   case CPU_STARTING:
+   case CPU_STARTING_FROZEN:
+   microcode_init_cpu_early(cpu);
+   break;
+
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
-   microcode_update_cpu(cpu);
+   /* Retry again in case we couldn't request early */
+   if (cpu_data(cpu).microcode != ucode_cpu_info[cpu].cpu_sig.rev)
+   microcode_update_cpu_late(cpu);
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
pr_debug("CPU%d added\n", cpu);
diff --git a/arch/x86/kernel/microcode_intel.c 
b/arch/x86/kernel/microcode_intel.c
index 0327e2b..899057b 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -329,6 +329,16 @@ static int apply_microcode(int cpu)
return 0;
 }
 
+static void invalidate_microcode(void *data)
+{
+   int i;
+
+   for_each_possible_cpu (i) {
+   if (ucode_cpu_info[i].mc == data)
+   ucode_cpu_info[i].mc = NULL;
+   }
+}
+
 static enum ucode_state generic_load_microcode(int cpu, void *data, size_t 
size,
int (*get_ucode_data)(void 

[PATCH 1/2] Fix Sandy Bridge microcode check to actually work

2012-07-13 Thread Andi Kleen
From: Andi Kleen 

- The old style microcode interface used by microcode_ctl didn't call
the perf callback.
- The pebs_broken bitfield needs to be unsigned, otherwise the one bit
signed bitfield gets sign extended and any microcode revision on a update
passes, and later updates are ignored.

Signed-off-by: Andi Kleen 
---
 arch/x86/kernel/cpu/perf_event.h |2 +-
 arch/x86/kernel/microcode_core.c |3 +++
 2 files changed, 4 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index a15df4b..1b12fac 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -374,7 +374,7 @@ struct x86_pmu {
/*
 * Intel DebugStore bits
 */
-   int bts :1,
+   unsignedbts :1,
bts_active  :1,
pebs:1,
pebs_active :1,
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 4873e62..d4f4d31 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -228,6 +228,9 @@ static ssize_t microcode_write(struct file *file, const 
char __user *buf,
mutex_unlock(_mutex);
put_online_cpus();
 
+   if (ret >= 0)
+   perf_check_microcode();
+
return ret;
 }
 
-- 
1.7.7.6

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2] x86/vsyscall: allow seccomp filter in vsyscall=emulate

2012-07-13 Thread Will Drewry
On Fri, Jul 13, 2012 at 6:00 PM, Andrew Lutomirski  wrote:
> On Fri, Jul 13, 2012 at 10:06 AM, Will Drewry  wrote:
>> If a seccomp filter program is installed, older static binaries and
>> distributions with older libc implementations (glibc 2.13 and earlier)
>> that rely on vsyscall use will be terminated regardless of the filter
>> program policy when executing time, gettimeofday, or getcpu.  This is
>> only the case when vsyscall emulation is in use (vsyscall=emulate is the
>> default).
>>
>> This patch emulates system call entry inside a vsyscall=emulate by
>> populating regs->ax and regs->orig_ax with the system call number prior
>> to calling into seccomp such that all seccomp-dependencies function
>> normally.  Additionally, system call return behavior is emulated in line
>> with other vsyscall entrypoints for the trace/trap cases.
>>
>> Reported-by: Owen Kibel 
>> Signed-off-by: Will Drewry 
>>
>> v2: - fixed ip and sp on SECCOMP_RET_TRAP/TRACE (thanks to l...@mit.edu)
>
>> @@ -253,6 +273,12 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned 
>> long address)
>>
>> current_thread_info()->sig_on_uaccess_error = 
>> prev_sig_on_uaccess_error;
>>
>> +   if (skip) {
>> +   if ((long)regs->ax <= 0L) /* seccomp errno emulation */
>> +   goto do_ret;
>> +   goto done; /* seccomp trace/trap */
>> +   }
>> +
>> if (ret == -EFAULT) {
>> /* Bad news -- userspace fed a bad pointer to a vsyscall. */
>> warn_bad_vsyscall(KERN_INFO, regs,
>> @@ -271,10 +297,11 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned 
>> long address)
>>
>> regs->ax = ret;
>>
>> +do_ret:
>> /* Emulate a ret instruction. */
>> regs->ip = caller;
>> regs->sp += 8;
>> -
>> +done:
>> return true;
>>
>>  sigsegv:
>> --
>> 1.7.9.5
>>
>
> This has the same odd property as the sigsegv path that the faulting
> instruction will appear to be the mov, not the syscall.  That seems to
> be okay, though -- various pieces of code that try to restart the segv
> are okay with that.

Yeah - I would otherwise do
  regs->ip += 9;
but I wanted to match the code that was therefor SIGSEGV.  If regs->ip
+= 9 _just_ for the SIGSYS case is fine, then I'll make that change
shortly.  Since any code that sees the vsyscall address should be wise
enough to avoid it, perhaps that's why the SIGSEGV hasn't had a
problem so far.

> Is there any code that assumes that changing rax (i.e. the syscall
> number) and restarting a syscall after SIGSYS will invoke the new
> syscall?  (The RET_TRACE path might be similar -- does the
> ptrace_event(PTRACE_EVENT_SECCOMP, data) in seccomp.c give a debugger
> a chance to synchronously cancel or change the syscall?

Unfortunately, it does in normal interception. I don't see any way out
of that quirk with vsyscall=emulate.  As is without seccomp,
vsyscall=emulate doesn't allow ptrace interception (or syscall
auditing for that matter) while vsyscall=native does.   So the option
here is to document the quirky interaction in
Documentation/prctl/seccomp_filter.txt.  In particular, if the tracer
sees either (time|gettimeofday|getcpu) and rip in the vsyscall page,
it will know it can't rewrite or bypass the call.Is there a better
option?

Given that, I will include a tweak to the documentation to indicate
that behavior so that userspace authors of BPF programs that use
SECCOMP_RET_TRACE will be aware of the behavior.

> If those issues aren't problems, then:
>
> Reviewed-by: Andy Lutomirski 
>
> (If the syscall number needs to change after the fact in the
> SECCOMP_RET_TRAP case, it'll be a mess.)

Nah - traps are delivered like the forced sigsegv path.

I'll spin a v3 soon including the documentation tweak and the ip
offset to match vsyscall=native behavior (regs->ip += 9 _just_ for the
skip case).  Of course, any better ideas for the trace-case will be
more than welcome, but it seems to me to be an acceptable tradeoff - I
hope others agree.

I'll make the changes and then put it through its paces to see if any
other little idiosyncrasies emerge.

Thanks for the close review!
will
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/1] eCryptfs: check for eCryptfs cipher support at mount

2012-07-13 Thread Tyler Hicks
On 2012-07-12 19:10:24, Tim Sally wrote:
> The issue occurs when eCryptfs is mounted with a cipher supported by
> the crypto subsystem but not by eCryptfs. The mount succeeds and an
> error does not occur until a write. This change checks for eCryptfs
> cipher support at mount time.
> 
> Resolves Launchpad issue #338914, reported by Tyler Hicks in 03/2009.
> https://bugs.launchpad.net/ecryptfs/+bug/338914
> 
> Signed-off-by: Tim Sally 

Looks good! I've pushed it to the eCryptfs -next branch and it will go
in during the 3.6 merge window.

I'm looking forward to more eCryptfs patches from you. Thanks!

Tyler

> ---
>  fs/ecryptfs/main.c |   13 +
>  1 file changed, 13 insertions(+)
> 
> diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
> index df217dc..aee998d 100644
> --- a/fs/ecryptfs/main.c
> +++ b/fs/ecryptfs/main.c
> @@ -279,6 +279,7 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info 
> *sbi, char *options,
>   char *fnek_src;
>   char *cipher_key_bytes_src;
>   char *fn_cipher_key_bytes_src;
> + u8 cipher_code;
>  
>   *check_ruid = 0;
>  
> @@ -420,6 +421,18 @@ static int ecryptfs_parse_options(struct 
> ecryptfs_sb_info *sbi, char *options,
>   && !fn_cipher_key_bytes_set)
>   mount_crypt_stat->global_default_fn_cipher_key_bytes =
>   mount_crypt_stat->global_default_cipher_key_size;
> +
> + cipher_code = ecryptfs_code_for_cipher_string(
> + mount_crypt_stat->global_default_cipher_name,
> + mount_crypt_stat->global_default_cipher_key_size);
> + if (!cipher_code) {
> + ecryptfs_printk(KERN_ERR,
> + "eCryptfs doesn't support cipher: %s.",
> + mount_crypt_stat->global_default_cipher_name);
> + rc = -EINVAL;
> + goto out;
> + }
> +
>   mutex_lock(_tfm_list_mutex);
>   if (!ecryptfs_tfm_exists(mount_crypt_stat->global_default_cipher_name,
>NULL)) {
> -- 
> 1.7.10.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe ecryptfs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


signature.asc
Description: Digital signature


[PATCH 1/2] [RFC] clk: reentrancy via per-clock locking

2012-07-13 Thread Mike Turquette
This patch, while incomplete, implements a per-clock locking scheme
intended to enable two specific use cases for the clock framework.  The
changelog is really long; it describes what the patch does, how I came
to this design, a few implementation notes for anyone that wants to try
these patches and some possible different ways to solve the problems.
Any advice or design ideas would be greatly appreciated.

TL;DR

This patch implements per-clock locking, which is horrible and I need
some new ideas.  Please help me mailing list peoples.

The two use cases:

1) nested calls to the clock framework in clk_prepare/clk_unprepare

Use case #1 is mandatory for preparing clocks on i2c devices.  Calling
clk_prepare on these clocks results in an i2c transaction which will
result in clk_prepare being called for the i2c controller's clocks.
This results in a deadlock.

2) reentrant calls to the clock framework from the rate change notifier
handlers in clk_set_rate

Use case #2 is needed to perform dvfs transitions via clk_set_rate.  It
is common to scale voltage with frequency and it is also common for
voltage to be scaled by an off-chip device.  Similar to use case #1 this
may result in an i2c transaction that calls clk_prepare from within
clk_set_rate.  This results in a deadlock.

The new locking scheme:

A per-clock locking scheme at first seems like a solution, so long as
reentrant calls do not need to operate on the same clock.  Lockdep uses
the same lock class key for every per-clock lock that was dynamically
initialized in __clk_init() via mutex_init().  To work around this I
stuffed both a struct mutex and a struct lock_class_key into struct clk
and cheated the initialization by directly calling __mutex_init.

@@ -41,6 +41,11 @@ struct clk {
struct hlist_head   children;
struct hlist_node   child_node;
unsigned intnotifier_count;
+   struct mutexlock;
+   struct lock_class_key   lock_key;
+   struct clk  *top;
+   int depth;
+   struct clk  **parent_chain;

...

@@ -1296,6 +1392,16 @@ int __clk_init(struct device *dev, struct clk *clk)
break;
}

+   /* XXX initialize per-clock mutex */
+   __mutex_init(>lock, clk->name, >lock_key);

Note that this also warns us if the struct clk is not statically
allocated (mine are, odds are that yours are not).  This is gross but it
does generate per-clock locks which lockdep is able to evaluate
independently.

However lock direction must be taken into account; clk_prepare locks
upwards and clk_set_rate locks downwards.  I was unsuccessful in finding
a way to make those operations lock in the same direction without
creating circular locking dependencies.

The circular lock dependency issue above issues led me to try
mutex_lock_nested.  One way to make lockdep happy was to use separate
lock subclasses for clk_prepare and clk_set_rate.  The synaptics
touchscreen guys faced similar issues:
https://lkml.org/lkml/2006/9/14/133

This prevented lockdep from warning about circular dependencies, but
with a nasty side effect: clk_prepare() and clk_set_rate() became
essentially unsynchronized.  This is completely analogous to the racy
mess we have today between clk_prepare and clk_enable (independent mutex
& spinlock, respectively), but now applies to clk_prepare and
clk_set_rate.

Quick aside: one interesting observation here is that if we consider
only use case #2 (i.e. dvfs via reentrant calls to clk framework from
rate change notifier handlers) then the same result could have been
achieved without per-clock locks and instead a new global mutex (e.g.
DEFINE_MUTEX(topology_lock)).  Such a solution does NOT solve nested
calls to clk_prepare (use case #1).

Some implementation notes:

Many functions in this patch have NOT been updated to use the per-clock
locks, notably clk_set_parent and the clock registration/initialization
path, as well as the debugfs stuff.  Races abound but my platform hasn't
been bitten by any of them in testing.

As mentioned above those with dynamically initialized clocks will get a
warning that the lock class key is not static data.  Since clk_put is
not implemented and we never get rid of clocks (or their locks) then
this is relatively safe to ignore.

Alternative solutions:

For use case #1: Continue to use the single global clk mutex, but create
a separate clk_prepare_nested or provide a clk flag CLK_PREPARE_NESTED,
or something similar which applies to the i2c controller's struct clk.
Calling clk_prepare_nested or calling clk_prepare on a clk with the
CLK_PREPARE_NESTED flag results in mutex_lock_nested being used on the
global clk mutex.  This is not bomb-proof since there is still missing
context, such as, "I want to nest this call to clk_prepare(x) because it
is a direct dependency of clk_prepare(y)".

For 

[PATCH 2/2] [RFC] cpufreq: omap: scale regulator from clk notifier

2012-07-13 Thread Mike Turquette
This patch moves direct control of the MPU voltage regulator out of the
cpufreq driver .target callback and instead puts that logic into a clock
rate change notifier callback.

The same frequency/voltage lookup via the OPP library is present, except
that the calls to regulator_set_voltage are done from the clock
framework instead of cpufreq.

Ideally it would be nice to reduce the .target callback for OMAP's
cpufreq driver to a simple call to clk_set_rate.  For now there is still
some other stuff needed there (jiffies per loop, rounding the rate, etc
etc).

Not-signed-off-by: Mike Turquette 
---
 drivers/cpufreq/omap-cpufreq.c |  154 +---
 1 file changed, 96 insertions(+), 58 deletions(-)

diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c
index 17fa04d..bf15c49 100644
--- a/drivers/cpufreq/omap-cpufreq.c
+++ b/drivers/cpufreq/omap-cpufreq.c
@@ -80,10 +80,9 @@ static int omap_target(struct cpufreq_policy *policy,
   unsigned int relation)
 {
unsigned int i;
-   int r, ret = 0;
+   int ret = 0;
struct cpufreq_freqs freqs;
-   struct opp *opp;
-   unsigned long freq, volt = 0, volt_old = 0, tol = 0;
+   unsigned long freq;
 
if (!freq_table) {
dev_err(mpu_dev, "%s: cpu%d: no freq table!\n", __func__,
@@ -119,47 +118,11 @@ static int omap_target(struct cpufreq_policy *policy,
 
freq = freqs.new * 1000;
 
-   if (mpu_reg) {
-   opp = opp_find_freq_ceil(mpu_dev, );
-   if (IS_ERR(opp)) {
-   dev_err(mpu_dev, "%s: unable to find MPU OPP for %d\n",
-   __func__, freqs.new);
-   return -EINVAL;
-   }
-   volt = opp_get_voltage(opp);
-   tol = volt * OPP_TOLERANCE / 100;
-   volt_old = regulator_get_voltage(mpu_reg);
-   }
-
-   dev_dbg(mpu_dev, "cpufreq-omap: %u MHz, %ld mV --> %u MHz, %ld mV\n", 
-   freqs.old / 1000, volt_old ? volt_old / 1000 : -1,
-   freqs.new / 1000, volt ? volt / 1000 : -1);
-
-   /* scaling up?  scale voltage before frequency */
-   if (mpu_reg && (freqs.new > freqs.old)) {
-   r = regulator_set_voltage(mpu_reg, volt - tol, volt + tol);
-   if (r < 0) {
-   dev_warn(mpu_dev, "%s: unable to scale voltage up.\n",
-__func__);
-   freqs.new = freqs.old;
-   goto done;
-   }
-   }
+   dev_dbg(mpu_dev, "cpufreq-omap: %u MHz --> %u MHz\n",
+   freqs.old / 1000, freqs.new / 1000); 
 
ret = clk_set_rate(mpu_clk, freqs.new * 1000);
 
-   /* scaling down?  scale voltage after frequency */
-   if (mpu_reg && (freqs.new < freqs.old)) {
-   r = regulator_set_voltage(mpu_reg, volt - tol, volt + tol);
-   if (r < 0) {
-   dev_warn(mpu_dev, "%s: unable to scale voltage down.\n",
-__func__);
-   ret = clk_set_rate(mpu_clk, freqs.old * 1000);
-   freqs.new = freqs.old;
-   goto done;
-   }
-   }
-
freqs.new = omap_getspeed(policy->cpu);
 #ifdef CONFIG_SMP
/*
@@ -187,7 +150,6 @@ static int omap_target(struct cpufreq_policy *policy,
freqs.new);
 #endif
 
-done:
/* notifiers */
for_each_cpu(i, policy->cpus) {
freqs.cpu = i;
@@ -207,10 +169,6 @@ static int __cpuinit omap_cpu_init(struct cpufreq_policy 
*policy)
 {
int result = 0;
 
-   mpu_clk = clk_get(NULL, mpu_clk_name);
-   if (IS_ERR(mpu_clk))
-   return PTR_ERR(mpu_clk);
-
if (policy->cpu >= NR_CPUS) {
result = -EINVAL;
goto fail_ck;
@@ -284,32 +242,74 @@ static struct cpufreq_driver omap_driver = {
.attr   = omap_cpufreq_attr,
 };
 
-static int __init omap_cpufreq_init(void)
+static int mpu_clk_volt_scale_handler(struct notifier_block *nb,
+   unsigned long flags, void *data)
 {
-   if (cpu_is_omap24xx())
-   mpu_clk_name = "virt_prcm_set";
-   else if (cpu_is_omap34xx())
-   mpu_clk_name = "dpll1_ck";
-   else if (cpu_is_omap44xx())
-   mpu_clk_name = "dpll_mpu_ck";
+   struct clk_notifier_data *cnd = data;
+   unsigned long tol;
+   int ret, volt_new, volt_old;
+   struct opp *opp;
 
-   if (!mpu_clk_name) {
-   pr_err("%s: unsupported Silicon?\n", __func__);
-   return -EINVAL;
+   volt_old = regulator_get_voltage(mpu_reg);
+   opp = opp_find_freq_exact(mpu_dev, cnd->new_rate, true);
+   volt_new = opp_get_voltage(opp);
+
+   tol = volt_new * OPP_TOLERANCE / 100;
+
+   /* scaling up?  scale voltage before frequency */
+ 

[PATCH 0/2] [RFC] reentrancy in the common clk framework

2012-07-13 Thread Mike Turquette
Hi all,

This RFC series is meant to kick off some discussion around two related
problems in the current clk framework implementation.

First, clk_prepare for i2c devices might result in nested calls to
clk_prepare (for preparing the clocks of the i2c controller).  So
basically we need to make clk_prepare reentrant for these cases.  Due to
the global prepare_lock mutex this currently results in a deadlock.

Second, dynamic voltage and frequency scaling (dvfs) through the clock
framework suffers from a similar issue as describe above.  To date
several folks have expressed the desire to put voltage scaling logic
into the clk rate change notifier handlers as a way to implement dvfs
without creating a new driver-level api.  There are many benefits to
this approach, but on many platforms it is likely that calling
regulator_set_voltage within a rate change notifier handler will
generate a call to clk_prepare while clk_set_rate is holding the global
prepare_lock mutex.  This also results in a deadlock.

The first patch in this series is an attempt to solve the locking
problem via per-clock locks.  I do not like per-clock locks, but after
some experimentation it held more promise than other approaches.  The
implementation is only partially complete.  If you have any alternative
ideas to that sort of approach please let me know as per-clock locks are
really painful.

The second patch in this series simply demonstrates dvfs via clk rate
change notifiers.  The patch modifies the .target callback in OMAP's
cpufreq driver by removing direct calls to regulator_set_voltage and
instead registers a clk rate change notifier handler to do the same.
And whaddaya know it works!  In a perfect world any cpufreq or devfreq
driver would only need to call clk_set_rate within the .target callback
and everything would Just Work(tm).

Thanks in advance for any feedback, ideas or flames about how I don't
understand lockdep and broke everything and per-clock locking is stupid,
etc.

Mike Turquette (2):
  [RFC] clk: reentrancy via per-clock locking
  [RFC] cpufreq: omap: scale regulator from clk notifier

 drivers/clk/clk.c  |  202 +---
 drivers/cpufreq/omap-cpufreq.c |  154 ++
 include/linux/clk-private.h|5 +
 3 files changed, 250 insertions(+), 111 deletions(-)

-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC 1/2] PCI-Express Non-Transparent Bridge Support

2012-07-13 Thread Stephen Hemminger
On Fri, 13 Jul 2012 14:44:59 -0700
Jon Mason  wrote:

> A PCI-Express non-transparent bridge (NTB) is a point-to-point PCIe bus
> connecting 2 systems, providing electrical isolation between the two 
> subsystems.
> A non-transparent bridge is functionally similar to a transparent bridge 
> except
> that both sides of the bridge have their own independent address domains.  The
> host on one side of the bridge will not have the visibility of the complete
> memory or I/O space on the other side of the bridge.  To communicate across 
> the
> non-transparent bridge, each NTB endpoint has one (or more) apertures exposed 
> to
> the local system.  Writes to these apertures are mirrored to memory on the
> remote system.  Communications can also occur through the use of doorbell
> registers that initiate interrupts to the alternate domain, and scratch-pad
> registers accessible from both sides.
> 
> The NTB device driver is needed to configure these memory windows, doorbell, 
> and
> scratch-pad registers as well as use them in such a way as they can be turned
> into a viable communication channel to the remote system.  ntb_hw.[ch]
> determines the usage model (NTB to NTB or NTB to Root Port) and abstracts away
> the underlying hardware to provide access and a common interface to the 
> doorbell
> registers, scratch pads, and memory windows.  These hardware interfaces are
> exported so that other, non-mainlined kernel drivers can access these.
> ntb_transport.[ch] also uses the exported interfaces in ntb_hw.[ch] to setup a
> communication channel(s) and provide a reliable way of transferring data from
> one side to the other, which it then exports so that "client" drivers can 
> access
> them.  These client drivers are used to provide a standard kernel interface
> (i.e., Ethernet device) to NTB, such that Linux can transfer data from one
> system to the other in a standard way.
> 
> Signed-off-by: Jon Mason 

This driver does some reimplementing of standard type operations is this
because you are trying to use the same code on multiple platforms?

Example:
+
+static void ntb_list_add_head(spinlock_t *lock, struct list_head *entry,
+ struct list_head *list)
+{
+   unsigned long flags;
+
+   spin_lock_irqsave(lock, flags);
+   list_add(entry, list);
+   spin_unlock_irqrestore(lock, flags);
+}
+
+static void ntb_list_add_tail(spinlock_t *lock, struct list_head *entry,
+ struct list_head *list)
+{
+   unsigned long flags;
+
+   spin_lock_irqsave(lock, flags);
+   list_add_tail(entry, list);
+   spin_unlock_irqrestore(lock, flags);
+}

Which are used on skb's and yet we already have sk_buff_head with locking?

I know you probably are committed to this API, but is there some way to
reuse existing shared memory used by virtio-net between two ports?


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC 2/2] net: Add support for NTB virtual ethernet device

2012-07-13 Thread Stephen Hemminger
On Fri, 13 Jul 2012 14:45:00 -0700
Jon Mason  wrote:

> A virtual ethernet device that uses the NTB transport API to send/receive 
> data.
> 
> Signed-off-by: Jon Mason 
> ---
>  drivers/net/Kconfig  |4 +
>  drivers/net/Makefile |1 +
>  drivers/net/ntb_netdev.c |  411 
> ++
>  3 files changed, 416 insertions(+), 0 deletions(-)
>  create mode 100644 drivers/net/ntb_netdev.c


> +static void ntb_get_drvinfo(__attribute__((unused)) struct net_device *dev,
> + struct ethtool_drvinfo *info)
> +{
> + strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
> + strlcpy(info->version, NTB_NETDEV_VER, sizeof(info->version));
> +}
> +
> +static const char ntb_nic_stats[][ETH_GSTRING_LEN] = {
> + "rx_packets", "rx_bytes", "rx_errors", "rx_dropped", "rx_length_errors",
> + "rx_frame_errors", "rx_fifo_errors",
> + "tx_packets", "tx_bytes", "tx_errors", "tx_dropped",
> +};
> +
> +static int ntb_get_stats_count(__attribute__((unused)) struct net_device 
> *dev)
> +{
> + return ARRAY_SIZE(ntb_nic_stats);
> +}
> +
> +static int ntb_get_sset_count(struct net_device *dev, int sset)
> +{
> + switch (sset) {
> + case ETH_SS_STATS:
> + return ntb_get_stats_count(dev);
> + default:
> + return -EOPNOTSUPP;
> + }
> +}
> +
> +static void ntb_get_strings(__attribute__((unused)) struct net_device *dev,
> + u32 sset, u8 *data)
> +{
> + switch (sset) {
> + case ETH_SS_STATS:
> + memcpy(data, *ntb_nic_stats, sizeof(ntb_nic_stats));
> + }
> +}
> +
> +static void
> +ntb_get_ethtool_stats(struct net_device *dev,
> +   __attribute__((unused)) struct ethtool_stats *stats,
> +   u64 *data)
> +{
> + int i = 0;
> +
> + data[i++] = dev->stats.rx_packets;
> + data[i++] = dev->stats.rx_bytes;
> + data[i++] = dev->stats.rx_errors;
> + data[i++] = dev->stats.rx_dropped;
> + data[i++] = dev->stats.rx_length_errors;
> + data[i++] = dev->stats.rx_frame_errors;
> + data[i++] = dev->stats.rx_fifo_errors;
> + data[i++] = dev->stats.tx_packets;
> + data[i++] = dev->stats.tx_bytes;
> + data[i++] = dev->stats.tx_errors;
> + data[i++] = dev->stats.tx_dropped;
> +}

These statistics add no value over existing network stats.
Don't implement ethtool stats unless device has something more
interesting to say.

> +static const struct ethtool_ops ntb_ethtool_ops = {
> + .get_drvinfo = ntb_get_drvinfo,
> + .get_sset_count = ntb_get_sset_count,
> + .get_strings = ntb_get_strings,
> + .get_ethtool_stats = ntb_get_ethtool_stats,
> + .get_link = ethtool_op_get_link,
> +};

If you want to implement bonding or bridging then implementing
get_settings would help.

> +static int __init ntb_netdev_init_module(void)
> +{
> + struct ntb_netdev *dev;
> + int rc;
> +
> + pr_info("%s: Probe\n", KBUILD_MODNAME);

Useless message

> + netdev = alloc_etherdev(sizeof(struct ntb_netdev));
> + if (!netdev)
> + return -ENOMEM;
> +
> + dev = netdev_priv(netdev);
> + dev->ndev = netdev;
> + netdev->features = NETIF_F_HIGHDMA;
> +
> + netdev->hw_features = netdev->features;
> + netdev->watchdog_timeo = msecs_to_jiffies(NTB_TX_TIMEOUT_MS);
> +
> + random_ether_addr(netdev->perm_addr);
> + memcpy(netdev->dev_addr, netdev->perm_addr, netdev->addr_len);
> +
> + netdev->netdev_ops = _netdev_ops;
> + SET_ETHTOOL_OPS(netdev, _ethtool_ops);
> +
> + dev->qp = ntb_transport_create_queue(ntb_netdev_rx_handler,
> +  ntb_netdev_tx_handler,
> +  ntb_netdev_event_handler);
> + if (!dev->qp) {
> + rc = -EIO;
> + goto err;
> + }
> +
> + netdev->mtu = ntb_transport_max_size(dev->qp) - ETH_HLEN;
> +
> + rc = register_netdev(netdev);
> + if (rc)
> + goto err1;
> +
> + pr_info("%s: %s created\n", KBUILD_MODNAME, netdev->name);
> + return 0;
> +
> +err1:
> + ntb_transport_free_queue(dev->qp);
> +err:
> + free_netdev(netdev);
> + return rc;
> +}
> +module_init(ntb_netdev_init_module);
> +
> +static void __exit ntb_netdev_exit_module(void)
> +{
> + struct ntb_netdev *dev = netdev_priv(netdev);
> +
> + unregister_netdev(netdev);
> + ntb_transport_free_queue(dev->qp);
> + free_netdev(netdev);
> +
> + pr_info("%s: Driver removed\n", KBUILD_MODNAME);
> +}
> +module_exit(ntb_netdev_exit_module);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] staging: comedi: addi_data: remove pr_TTLRangelist

2012-07-13 Thread H Hartley Sweeten
All the addi driver ttl subdevices use the range table
'range_digital' provided by the comedi core. The boardinfo
value  'pr_TTLRangeList' is not used by the drivers. Remove
the unused range tables and the boardinfo pointer.

The unused range tables don't make sense anyway...

Signed-off-by: H Hartley Sweeten 
Cc: Ian Abbott 
Cc: Greg Kroah-Hartman 
---
 .../staging/comedi/drivers/addi-data/addi_common.c | 23 --
 .../staging/comedi/drivers/addi-data/addi_common.h |  1 -
 .../comedi/drivers/addi-data/hwdrv_apci16xx.h  | 15 --
 .../comedi/drivers/addi-data/hwdrv_apci3xxx.h  | 14 -
 4 files changed, 53 deletions(-)

diff --git a/drivers/staging/comedi/drivers/addi-data/addi_common.c 
b/drivers/staging/comedi/drivers/addi-data/addi_common.c
index 1e62f33..d7c1e1d 100644
--- a/drivers/staging/comedi/drivers/addi-data/addi_common.c
+++ b/drivers/staging/comedi/drivers/addi-data/addi_common.c
@@ -610,7 +610,6 @@ static const struct addi_board boardtypes[] = {
.i_IorangeBase0 = 128,
.i_PCIEeprom= ADDIDATA_NO_EEPROM,
.i_NbrTTLChannel= 48,
-   .pr_TTLRangelist= _apci16xx_ttl,
.reset  = i_APCI16XX_Reset,
.ttl_config = i_APCI16XX_InsnConfigInitTTLIO,
.ttl_bits   = i_APCI16XX_InsnBitsReadTTLIO,
@@ -623,7 +622,6 @@ static const struct addi_board boardtypes[] = {
.i_IorangeBase0 = 128,
.i_PCIEeprom= ADDIDATA_NO_EEPROM,
.i_NbrTTLChannel= 96,
-   .pr_TTLRangelist= _apci16xx_ttl,
.reset  = i_APCI16XX_Reset,
.ttl_config = i_APCI16XX_InsnConfigInitTTLIO,
.ttl_bits   = i_APCI16XX_InsnBitsReadTTLIO,
@@ -648,7 +646,6 @@ static const struct addi_board boardtypes[] = {
.i_AiMaxdata= 4095,
.pr_AiRangelist = _apci3XXX_ai,
.i_NbrTTLChannel= 24,
-   .pr_TTLRangelist= _apci3XXX_ttl,
.b_AvailableConvertUnit = 6,
.ui_MinAcquisitiontimeNs = 1,
.interrupt  = v_APCI3XXX_Interrupt,
@@ -675,7 +672,6 @@ static const struct addi_board boardtypes[] = {
.i_AiMaxdata= 4095,
.pr_AiRangelist = _apci3XXX_ai,
.i_NbrTTLChannel= 24,
-   .pr_TTLRangelist= _apci3XXX_ttl,
.b_AvailableConvertUnit = 6,
.ui_MinAcquisitiontimeNs = 1,
.interrupt  = v_APCI3XXX_Interrupt,
@@ -702,7 +698,6 @@ static const struct addi_board boardtypes[] = {
.i_AiMaxdata= 4095,
.pr_AiRangelist = _apci3XXX_ai,
.i_NbrTTLChannel= 24,
-   .pr_TTLRangelist= _apci3XXX_ttl,
.b_AvailableConvertUnit = 6,
.ui_MinAcquisitiontimeNs = 1,
.interrupt  = v_APCI3XXX_Interrupt,
@@ -729,7 +724,6 @@ static const struct addi_board boardtypes[] = {
.i_AiMaxdata= 65535,
.pr_AiRangelist = _apci3XXX_ai,
.i_NbrTTLChannel= 24,
-   .pr_TTLRangelist= _apci3XXX_ttl,
.b_AvailableConvertUnit = 6,
.ui_MinAcquisitiontimeNs = 1,
.interrupt  = v_APCI3XXX_Interrupt,
@@ -756,7 +750,6 @@ static const struct addi_board boardtypes[] = {
.i_AiMaxdata= 65535,
.pr_AiRangelist = _apci3XXX_ai,
.i_NbrTTLChannel= 24,
-   .pr_TTLRangelist= _apci3XXX_ttl,
.b_AvailableConvertUnit = 6,
.ui_MinAcquisitiontimeNs = 1,
.interrupt  = v_APCI3XXX_Interrupt,
@@ -783,7 +776,6 @@ static const struct addi_board boardtypes[] = {
.i_AiMaxdata= 65535,
.pr_AiRangelist = _apci3XXX_ai,
.i_NbrTTLChannel= 24,
-   .pr_TTLRangelist= _apci3XXX_ttl,
.b_AvailableConvertUnit = 6,
.ui_MinAcquisitiontimeNs = 1,
.interrupt  = v_APCI3XXX_Interrupt,
@@ -813,7 +805,6 @@ static const struct addi_board boardtypes[] = {
.i_NbrDoChannel = 4,
.i_DoMaxdata= 1,
.i_NbrTTLChannel= 24,
-   .pr_TTLRangelist= _apci3XXX_ttl,
.b_AvailableConvertUnit = 6,
.ui_MinAcquisitiontimeNs = 5000,
.interrupt  = v_APCI3XXX_Interrupt,
@@ -848,7 +839,6 @@ static const struct 

Re: [RFC 1/2] PCI-Express Non-Transparent Bridge Support

2012-07-13 Thread Stephen Hemminger
On Fri, 13 Jul 2012 14:44:59 -0700
Jon Mason  wrote:

> A PCI-Express non-transparent bridge (NTB) is a point-to-point PCIe bus
> connecting 2 systems, providing electrical isolation between the two 
> subsystems.
> A non-transparent bridge is functionally similar to a transparent bridge 
> except
> that both sides of the bridge have their own independent address domains.  The
> host on one side of the bridge will not have the visibility of the complete
> memory or I/O space on the other side of the bridge.  To communicate across 
> the
> non-transparent bridge, each NTB endpoint has one (or more) apertures exposed 
> to
> the local system.  Writes to these apertures are mirrored to memory on the
> remote system.  Communications can also occur through the use of doorbell
> registers that initiate interrupts to the alternate domain, and scratch-pad
> registers accessible from both sides.
> 
> The NTB device driver is needed to configure these memory windows, doorbell, 
> and
> scratch-pad registers as well as use them in such a way as they can be turned
> into a viable communication channel to the remote system.  ntb_hw.[ch]
> determines the usage model (NTB to NTB or NTB to Root Port) and abstracts away
> the underlying hardware to provide access and a common interface to the 
> doorbell
> registers, scratch pads, and memory windows.  These hardware interfaces are
> exported so that other, non-mainlined kernel drivers can access these.
> ntb_transport.[ch] also uses the exported interfaces in ntb_hw.[ch] to setup a
> communication channel(s) and provide a reliable way of transferring data from
> one side to the other, which it then exports so that "client" drivers can 
> access
> them.  These client drivers are used to provide a standard kernel interface
> (i.e., Ethernet device) to NTB, such that Linux can transfer data from one
> system to the other in a standard way.
> 
> Signed-off-by: Jon Mason 

> +
> +static int max_num_cbs = 2;
> +module_param(max_num_cbs, uint, 0644);
> +MODULE_PARM_DESC(max_num_cbs, "Maximum number of NTB transport connections");

Rather than making it a fixed size, could you dynamically set these up
with rtnl_link_ops?

> +static struct ntb_device *ntbdev;

What about multiple boards in system?

> +/**
> + * ntb_hw_link_status() - return the hardware link status
> + * @ndev: pointer to ntb_device instance
> + *
> + * Returns true if the hardware is connected to the remote system
> + *
> + * RETURNS: true or false based on the hardware link state
> + */
> +bool ntb_hw_link_status(struct ntb_device *ndev)
> +{
> + return ndev->link_status == NTB_LINK_UP;
> +}
> +EXPORT_SYMBOL(ntb_hw_link_status);

Why isn't this inline in some header?

> +/**
> + * ntb_query_pdev() - return the pci_dev pointer
> + * @ndev: pointer to ntb_device instance
> + *
> + * Given the ntb pointer return the pci_dev pointerfor the NTB hardware 
> device
> + *
> + * RETURNS: a pointer to the ntb pci_dev
> + */
> +struct pci_dev *ntb_query_pdev(struct ntb_device *ndev)
> +{
> + return ndev->pdev;
> +}
> +EXPORT_SYMBOL(ntb_query_pdev);
> +
> +/**
> + * ntb_query_max_cbs() - return the maximum number of callback tuples
> + * @ndev: pointer to ntb_device instance
> + *
> + * The number of callbacks can vary depending on the platform and MSI-X/MSI
> + * enablement
> + *
> + * RETURNS: the maximum number of callback tuples (3, 15, or 33)
> + */
> +unsigned int ntb_query_max_cbs(struct ntb_device *ndev)
> +{
> + return ndev->max_cbs > max_num_cbs ? max_num_cbs : ndev->max_cbs;
> +}
> +EXPORT_SYMBOL(ntb_query_max_cbs);
> +
> +/**
> + * ntb_register_event_callback() - register event callback
> + * @ndev: pointer to ntb_device instance
> + * @func: callback function to register
> + *
> + * This function registers a callback for any HW driver events such as link
> + * up/down, power management notices and etc.
> + *
> + * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
> + */
> +int ntb_register_event_callback(struct ntb_device *ndev,
> + void (*func)(void *handle, unsigned int event))
> +{
> + if (ndev->event_cb)
> + return -EINVAL;
> +
> + ndev->event_cb = func;
> +
> + return 0;
> +}
> +EXPORT_SYMBOL(ntb_register_event_callback);
> +
> +/**
> + * ntb_unregister_event_callback() - unregisters the event callback
> + * @ndev: pointer to ntb_device instance
> + *
> + * This function unregisters the existing callback from transport
> + */
> +void ntb_unregister_event_callback(struct ntb_device *ndev)
> +{
> + ndev->event_cb = NULL;
> +}
> +EXPORT_SYMBOL(ntb_unregister_event_callback);
> +

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[KORG] lkml.kernel.org up and running

2012-07-13 Thread John 'Warthog9' Hawley
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1

Afternoon everyone,

Just a quick update, lkml.kernel.org (the re-director), is back up and
running.  It should also be slightly better about dealing with the
messed up escaping that some clients seem to incur.

If you have any issues, as always, give us a shout and let us know.

- - John 'Warthog9' Hawley
Chief Kernel.org Administrator
-BEGIN PGP SIGNATURE-
Version: GnuPG v1.4.12 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/

iQIcBAEBAgAGBQJQALXKAAoJEC6na5wrRm2d1TMP/1JnDUny2fOFwYgLhB3FzCtZ
fVgz8sLX0EZOcFEBP1JCttlmNQRj74bEy32opY2rrM4IOWsn+hjyNOGKCUE/pFBC
Vx9R7chAkyKBalpqq9gPXRSycHz2P4iWfOoptJtoivph6SiyBB3+jete7b8e6Dr4
Lk2N5CRgpK+dhM1HObbk3N06Bg15rCn5GuBWSK5HYHy6Di72199S0vcU7MzzPUPW
PEQ7esllK4JOLFSbDbRV4FZBxCROoCTbVIBSR6R1CNjpUJb8FvpGNiGQT6T2DRQX
j32dENXxFV7Z2Fy74ueMx0KN47IsjNZtMZBBrLNKYbmLkPQXS78FXBO4wlsPo+wk
eR/SFfDYIJJA1MjwjZoLo8XaMSZgxwOc2WHtgssMeTc0EyVsI+cy1LIZc6S7dPta
oE6dq4Ne0y4noZI2BmE1smHBume4Ur5Xcgd7oANFGWZEipHjOO8CCmGWRWWPOvCk
u/FO058zDOKmzP2ecs8rqJ8knMXyhMyyQgHhbhQAGdx/AIw5bqMe+TGF3Q4pgSBy
PlUE7XBdl0UavbxeGZ1dZ/nwTrQ76eAJB1eyxCQvuq+fuRWaRn74rfRXdZz8JSMe
SKZhUMKG82OO4mKJtUTY+FJa7U8iMA7VW6N2AVuO67hNcp/9F9BoJurYeP+QLmGP
sEJoeMbMxi3KJs0R9y3k
=a6Bh
-END PGP SIGNATURE-
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: resurrecting tcphealth

2012-07-13 Thread Stephen Hemminger
I am not sure if the is really necessary since the most
of the stats are available elsewhere.

Here are some comments on getting the simplified to match
the kernel style.

>
> static inline struct tcp_sock *tcp_sk(const struct sock *sk)
>diff -rub A/net/ipv4/tcp_input.c B/net/ipv4/tcp_input.c
>--- A/net/ipv4/tcp_input.c 2012-06-22 20:37:50.0 +0200
>+++ B/net/ipv4/tcp_input.c 2012-07-06 10:12:12.0 +0200
>@@ -4414,6 +4415,8 @@
>   }
>
>   if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
>+  /* Course retransmit inefficiency- this packet has been 
>received twice. */
>+  tp->dup_pkts_recv++;

I don't understand that comment, could you use a better sentence please?

>
>   tp->rx_opt.saw_tstamp = 0;
>
>+  /*
>+   *  Tcp health monitoring is interested in
>+   *  total per-connection packet arrivals.
>+   *  This is in the fast path, but is quick.
>+   */
>+  tp->pkts_recv++;
>+

Comment seems bigger justification than necessary for simple
operation.

>diff -rub A/net/ipv4/tcp_ipv4.c B/net/ipv4/tcp_ipv4.c
>--- A/net/ipv4/tcp_ipv4.c  2012-06-22 20:37:50.0 +0200
>+++ B/net/ipv4/tcp_ipv4.c  2012-07-11 09:34:22.0 +0200
>@@ -2533,6 +2533,82 @@
>   return 0;
> }
>
>+
>+/*
>+ *Output /proc/net/tcphealth
>+ */
>+#define LINESZ 128
>+
>+int tcp_health_seq_show(struct seq_file *seq, void *v)
>+{
>+  int len, num;
>+  char srcIP[32], destIP[32];
Unnecessary see below

>+
>+  unsigned long  SmoothedRttEstimate,
>+  AcksSent, DupAcksSent, PktsRecv, DupPktsRecv;

Do not use CamelCase in kernel code.

>+  struct tcp_iter_state *st;
>+
>+  if (v == SEQ_START_TOKEN) {
>+  seq_printf(seq,
>+  "TCP Health Monitoring (established connections only)\n"
>+  " -Duplicate ACKs indicate lost or reordered packets on the
>connection.\n"
>+  " -Duplicate Packets Received signal a slow and badly 
>inefficient
>connection.\n"
>+  " -RttEst estimates how long future packets will take on a 
>round trip
>over the connection.\n"
>+  "id   Local AddressRemote Address   RttEst(ms) 
>AcksSent "

Header seems excessive, just put one line of header please.


>+  "DupAcksSent PktsRecv DupPktsRecv\n");
>+  goto out;
>+  }
>+
>+  /* Loop through established TCP connections */
>+  st = seq->private;
>+
>+
>+  if (st->state == TCP_SEQ_STATE_ESTABLISHED)
>+  {
>+/*; //insert read-lock here */

Don't think you need read-lock

>+  const struct tcp_sock *tp = tcp_sk(v);
>+  const struct inet_sock *inet = inet_sk(v);
>+  __be32 dest = inet->inet_daddr;
>+  __be32 src = inet->inet_rcv_saddr;
>+  __u16 destp = ntohs(inet->inet_dport);
>+  __u16 srcp = ntohs(inet->inet_sport);
>+

These temp variables aren't redundant.

>+  num = st->num;
>+  SmoothedRttEstimate = (tp->srtt >> 3);
>+  AcksSent = tp->acks_sent;
>+  DupAcksSent = tp->dup_acks_sent;
>+  PktsRecv = tp->pkts_recv;
>+  DupPktsRecv = tp->dup_pkts_recv;
>+
>+  sprintf(srcIP, "%lu.%lu.%lu.%lu:%u",
>+  ((src >> 24) & 0xFF), ((src >> 16) & 0xFF), ((src >> 8) 
>& 0xFF), (src &
>0xFF),
>+  srcp);
>+  sprintf(destIP, "%3d.%3d.%3d.%3d:%u",
>+  ((dest >> 24) & 0xFF), ((dest >> 16) & 0xFF), ((dest >> 
>8) & 0xFF),
>(dest & 0xFF),
>+  destp);
>+
>+  seq_printf(seq, "%d: %-21s %-21s "
>+  "%8lu %8lu %8lu %8lu %8lu%n",
>+  num,
>+  srcIP,
>+  destIP,
>+  SmoothedRttEstimate,
>+  AcksSent,
>+  DupAcksSent,
>+  PktsRecv,
>+  DupPktsRecv,
>+
>+  
>+  );
>+

Kernel has %pI4 to print IP addresses. 

seq_printf(seq, "%d: %-21pI4 %-21pI4 "
"%8lu %8lu %8lu %8lu %8lu\n",
num,
>inet_rcv_saddr,
>inet_daddr,
tp->srtt >> 3,
tp->acks_sent,
tp->dup_acks_sent,
tp->pkts_recv,
tp->dup_pkts_recv);

>+  seq_printf(seq, "%*s\n", LINESZ - 1 - len, "");

This padding of line is bogus, just print variable length line.
Are you trying to make it fixed length record file?


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  

[PULL REQUEST] one new bugfix for md/RAID1

2012-07-13 Thread NeilBrown

The following changes since commit 10684112c9d154172ac34e48a2ab68649e8f63ac:

  md/raid10: fix careless build error (2012-07-04 09:35:35 +1000)

are available in the git repository at:

  git://neil.brown.name/md/ tags/md-3.5-fixes

for you to fetch changes up to 2d4f4f3384d4ef4f7c571448e803a1ce721113d5:

  md/raid1: fix use-after-free bug in RAID1 data-check code. (2012-07-09 
11:34:13 +1000)


md: One use-after-free bugfix for RAID1


NeilBrown (1):
  md/raid1: fix use-after-free bug in RAID1 data-check code.

 drivers/md/raid1.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)


Thanks,
NeilBrown


signature.asc
Description: PGP signature


Re: kptr_restrict: pK-error in SysRq show-all-timers(Q)

2012-07-13 Thread Dan Rosenberg
On 06/26/2012 05:20 PM, Stevie Trujillo wrote:
> Hello,
>
> if I press ALT+SysRq+Q all the pointers are replaced with "pK-error" like 
> this:
> [23153.208033]   .base:   pK-error
>
> with echo h > /proc/sysrq-trigger it works:
> [23107.776363]   .base:   88023e60d540
>
> --
> Stevie Trujillo

The intent behind this behavior was to return "pK-error" in cases where the %pK
format specifier was used in interrupt context, because the CAP_SYSLOG check
wouldn't be meaningful.  Clearly this should only apply when kptr_restrict is
actually enabled though.

Signed-off-by: Dan Rosenberg 
Cc: sta...@vger.kernel.org
---
 lib/vsprintf.c |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index c3f36d41..598a73e 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1030,7 +1030,8 @@ char *pointer(const char *fmt, char *buf, char *end, void 
*ptr,
 * %pK cannot be used in IRQ context because its test
 * for CAP_SYSLOG would be meaningless.
 */
-   if (in_irq() || in_serving_softirq() || in_nmi()) {
+   if (kptr_restrict && (in_irq() || in_serving_softirq() ||
+ in_nmi())) {
if (spec.field_width == -1)
spec.field_width = default_width;
return string(buf, end, "pK-error", spec);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] staging: wlan-ng: Fix problem with wrong arguments

2012-07-13 Thread Emil Goode
The function pointer scan in struct cfg80211_ops is not
supposed to be assigned a function with a struct net_device
pointer as an argument. Instead access the net_device struct
in the following way:

struct net_device *dev = request->wdev->netdev;

sparse gives these warnings:

drivers/staging/wlan-ng/cfg80211.c:726:17: warning:
incorrect type in initializer (incompatible argument 2
(different base types))
expected int ( *scan )( ... )
got int ( extern [toplevel] * )( ... )

drivers/staging/wlan-ng/cfg80211.c:726:2: warning:
initialization from incompatible pointer type [enabled by default]

drivers/staging/wlan-ng/cfg80211.c:726:2: warning:
(near initialization for ‘prism2_usb_cfg_ops.scan’)
[enabled by default]

Signed-off-by: Emil Goode 
---
 drivers/staging/wlan-ng/cfg80211.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/wlan-ng/cfg80211.c 
b/drivers/staging/wlan-ng/cfg80211.c
index fabff4d..0970127 100644
--- a/drivers/staging/wlan-ng/cfg80211.c
+++ b/drivers/staging/wlan-ng/cfg80211.c
@@ -327,9 +327,9 @@ int prism2_get_station(struct wiphy *wiphy, struct 
net_device *dev,
return result;
 }
 
-int prism2_scan(struct wiphy *wiphy, struct net_device *dev,
-   struct cfg80211_scan_request *request)
+int prism2_scan(struct wiphy *wiphy, struct cfg80211_scan_request *request)
 {
+   struct net_device *dev = request->wdev->netdev;
struct prism2_wiphy_private *priv = wiphy_priv(wiphy);
wlandevice_t *wlandev = dev->ml_priv;
struct p80211msg_dot11req_scan msg1;
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC 2/2] net: Add support for NTB virtual ethernet device

2012-07-13 Thread Jiri Pirko
Fri, Jul 13, 2012 at 11:45:00PM CEST, jon.ma...@intel.com wrote:
>A virtual ethernet device that uses the NTB transport API to send/receive data.
>
>Signed-off-by: Jon Mason 
>---
> drivers/net/Kconfig  |4 +
> drivers/net/Makefile |1 +
> drivers/net/ntb_netdev.c |  411 ++
> 3 files changed, 416 insertions(+), 0 deletions(-)
> create mode 100644 drivers/net/ntb_netdev.c
>
>diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
>index 0c2bd80..9bf8a71 100644
>--- a/drivers/net/Kconfig
>+++ b/drivers/net/Kconfig
>@@ -178,6 +178,10 @@ config NETPOLL_TRAP
> config NET_POLL_CONTROLLER
>   def_bool NETPOLL
> 
>+config NTB_NETDEV
>+  tristate "Virtual Ethernet over NTB"
>+  depends on NTB
>+
> config RIONET
>   tristate "RapidIO Ethernet over messaging driver support"
>   depends on RAPIDIO
>diff --git a/drivers/net/Makefile b/drivers/net/Makefile
>index 3d375ca..9890148 100644
>--- a/drivers/net/Makefile
>+++ b/drivers/net/Makefile
>@@ -69,3 +69,4 @@ obj-$(CONFIG_USB_IPHETH)+= usb/
> obj-$(CONFIG_USB_CDC_PHONET)   += usb/
> 
> obj-$(CONFIG_HYPERV_NET) += hyperv/
>+obj-$(CONFIG_NTB_NETDEV) += ntb_netdev.o
>diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c
>new file mode 100644
>index 000..bcbd9d4
>--- /dev/null
>+++ b/drivers/net/ntb_netdev.c
>@@ -0,0 +1,411 @@
>+/*
>+ * This file is provided under a dual BSD/GPLv2 license.  When using or
>+ *   redistributing this file, you may do so under either license.
>+ *
>+ *   GPL LICENSE SUMMARY
>+ *
>+ *   Copyright(c) 2012 Intel Corporation. All rights reserved.
>+ *
>+ *   This program is free software; you can redistribute it and/or modify
>+ *   it under the terms of version 2 of the GNU General Public License as
>+ *   published by the Free Software Foundation.
>+ *
>+ *   This program is distributed in the hope that it will be useful, but
>+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
>+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>+ *   General Public License for more details.
>+ *
>+ *   You should have received a copy of the GNU General Public License
>+ *   along with this program; if not, write to the Free Software
>+ *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 
>USA.
>+ *   The full GNU General Public License is included in this distribution
>+ *   in the file called LICENSE.GPL.
>+ *
>+ *   BSD LICENSE
>+ *
>+ *   Copyright(c) 2012 Intel Corporation. All rights reserved.
>+ *
>+ *   Redistribution and use in source and binary forms, with or without
>+ *   modification, are permitted provided that the following conditions
>+ *   are met:
>+ *
>+ * * Redistributions of source code must retain the above copyright
>+ *   notice, this list of conditions and the following disclaimer.
>+ * * Redistributions in binary form must reproduce the above copy
>+ *   notice, this list of conditions and the following disclaimer in
>+ *   the documentation and/or other materials provided with the
>+ *   distribution.
>+ * * Neither the name of Intel Corporation nor the names of its
>+ *   contributors may be used to endorse or promote products derived
>+ *   from this software without specific prior written permission.
>+ *
>+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>+ *
>+ * Intel PCIe NTB Network Linux driver
>+ *
>+ * Contact Information:
>+ * Jon Mason 
>+ */
>+#include 
>+#include 
>+#include 
>+#include 
>+
>+#define NTB_NETDEV_VER"0.4"

Is it really necessary to provide this in-file versioning? Doesn't
kernel version itself do the trick?

>+
>+MODULE_DESCRIPTION(KBUILD_MODNAME);
>+MODULE_VERSION(NTB_NETDEV_VER);
>+MODULE_LICENSE("Dual BSD/GPL");
>+MODULE_AUTHOR("Intel Corporation");
>+
>+struct ntb_netdev {
>+  struct net_device *ndev;
>+  struct ntb_transport_qp *qp;
>+};
>+
>+#define   NTB_TX_TIMEOUT_MS   1000
>+#define   NTB_RXQ_SIZE100
>+
>+static struct net_device *netdev;
>+
>+static void ntb_netdev_event_handler(int status)
>+{
>+  struct ntb_netdev *dev = netdev_priv(netdev);
>+
>+  pr_debug("%s: Event %x, Link %x\n", 

[PATCH TRIVIAL] mm: Fix build warning in kmem_cache_create()

2012-07-13 Thread Shuah Khan
The label oops is used in CONFIG_DEBUG_VM ifdef block and is defined
outside ifdef CONFIG_DEBUG_VM block. This results in the following
build warning when built with CONFIG_DEBUG_VM disabled. Fix to move 
label oops definition to inside a CONFIG_DEBUG_VM block.

mm/slab_common.c: In function ‘kmem_cache_create’:
mm/slab_common.c:101:1: warning: label ‘oops’ defined but not used
[-Wunused-label]

Signed-off-by: Shuah Khan 
---
 mm/slab_common.c |2 ++
 1 file changed, 2 insertions(+)

diff --git a/mm/slab_common.c b/mm/slab_common.c
index 12637ce..aa3ca5b 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -98,7 +98,9 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t 
size, size_t align
 
s = __kmem_cache_create(name, size, align, flags, ctor);
 
+#ifdef CONFIG_DEBUG_VM
 oops:
+#endif
mutex_unlock(_mutex);
put_online_cpus();
 
-- 
1.7.9.5



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 05/12] rbtree: performance and correctness test

2012-07-13 Thread Michel Lespinasse
This small module helps measure the performance of rbtree insert and erase.

Additionally, we run a few correctness tests to check that the rbtrees have
all desired properties:
- contains the right number of nodes in the order desired,
- never two consecutive red nodes on any path,
- all paths to leaf nodes have the same number of black nodes,
- root node is black

Signed-off-by: Michel Lespinasse 
---
 lib/Kconfig.debug |7 +++
 lib/Makefile  |2 +
 lib/rbtree_test.c |  135 +
 3 files changed, 144 insertions(+), 0 deletions(-)
 create mode 100644 lib/rbtree_test.c

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 6777153..736f564 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1145,6 +1145,13 @@ config LATENCYTOP
 source mm/Kconfig.debug
 source kernel/trace/Kconfig
 
+config RBTREE_TEST
+   tristate "Red-Black tree test"
+   depends on m && DEBUG_KERNEL
+   help
+ A benchmark measuring the performance of the rbtree library.
+ Also includes rbtree invariant checks.
+
 config PROVIDE_OHCI1394_DMA_INIT
bool "Remote debugging over FireWire early on boot"
depends on PCI && X86
diff --git a/lib/Makefile b/lib/Makefile
index 18515f0..4899899 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -123,6 +123,8 @@ obj-$(CONFIG_SIGNATURE) += digsig.o
 
 obj-$(CONFIG_CLZ_TAB) += clz_tab.o
 
+obj-$(CONFIG_RBTREE_TEST) += rbtree_test.o
+
 hostprogs-y:= gen_crc32table
 clean-files:= crc32table.h
 
diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c
new file mode 100644
index 000..4c6d250
--- /dev/null
+++ b/lib/rbtree_test.c
@@ -0,0 +1,135 @@
+#include 
+#include 
+#include 
+#include 
+
+#define NODES   100
+#define PERF_LOOPS  10
+#define CHECK_LOOPS 100
+
+struct test_node {
+   struct rb_node rb;
+   u32 key;
+};
+
+static struct rb_root root = RB_ROOT;
+static struct test_node nodes[NODES];
+
+static struct rnd_state rnd;
+
+static void insert(struct test_node *node, struct rb_root *root)
+{
+   struct rb_node **new = >rb_node, *parent = NULL;
+
+   while (*new) {
+   parent = *new;
+   if (node->key < rb_entry(parent, struct test_node, rb)->key)
+   new = >rb_left;
+   else
+   new = >rb_right;
+   }
+
+   rb_link_node(>rb, parent, new);
+   rb_insert_color(>rb, root);
+}
+
+static inline void erase(struct test_node *node, struct rb_root *root)
+{
+   rb_erase(>rb, root);
+}
+
+static void init(void)
+{
+   int i;
+   for (i = 0; i < NODES; i++)
+   nodes[i].key = prandom32();
+}
+
+static bool is_red(struct rb_node *rb)
+{
+   return !(rb->__rb_parent_color & 1);
+}
+
+static int black_path_count(struct rb_node *rb)
+{
+   int count;
+   for (count = 0; rb; rb = rb_parent(rb))
+   count += !is_red(rb);
+   return count;
+}
+
+static void check(int nr_nodes)
+{
+   struct rb_node *rb;
+   int count = 0;
+   int blacks;
+   u32 prev_key = 0;
+
+   for (rb = rb_first(); rb; rb = rb_next(rb)) {
+   struct test_node *node = rb_entry(rb, struct test_node, rb);
+   WARN_ON_ONCE(node->key < prev_key);
+   WARN_ON_ONCE(is_red(rb) &&
+(!rb_parent(rb) || is_red(rb_parent(rb;
+   if (!count)
+   blacks = black_path_count(rb);
+   else
+   WARN_ON_ONCE((!rb->rb_left || !rb->rb_right) &&
+blacks != black_path_count(rb));
+   prev_key = node->key;
+   count++;
+   }
+   WARN_ON_ONCE(count != nr_nodes);
+}
+
+static int rbtree_test_init(void)
+{
+   int i, j;
+   cycles_t time1, time2, time;
+
+   printk(KERN_ALERT "rbtree testing");
+
+   prandom32_seed(, 3141592653589793238);
+   init();
+
+   time1 = get_cycles();
+
+   for (i = 0; i < PERF_LOOPS; i++) {
+   for (j = 0; j < NODES; j++)
+   insert(nodes + j, );
+   for (j = 0; j < NODES; j++)
+   erase(nodes + j, );
+   }
+
+   time2 = get_cycles();
+   time = time2 - time1;
+
+   time = div_u64(time, PERF_LOOPS);
+   printk(" -> %llu cycles\n", time);
+
+   for (i = 0; i < CHECK_LOOPS; i++) {
+   init();
+   for (j = 0; j < NODES; j++) {
+   check(j);
+   insert(nodes + j, );
+   }
+   for (j = 0; j < NODES; j++) {
+   check(NODES - j);
+   erase(nodes + j, );
+   }
+   check(0);
+   }
+
+   return -EAGAIN; /* Fail will directly unload the module */
+}
+
+static void rbtree_test_exit(void)
+{
+   printk(KERN_ALERT "test exit\n");
+}
+
+module_init(rbtree_test_init)
+module_exit(rbtree_test_exit)

Re: [PATCH v2 05/12] rbtree: performance and correctness test

2012-07-13 Thread Michel Lespinasse
On Fri, Jul 13, 2012 at 3:45 PM, Andrew Morton
 wrote:
> On Fri, 13 Jul 2012 15:33:35 -0700 Michel Lespinasse  
> wrote:
>> Ah, I did not realize we had a precedent for in-tree kernel test modules.
>
> hm, well, just because that's what we do now doesn't mean that it was a
> good idea ;) These things arrive as a result of individual developers
> doing stuff in their little directories and no particular thought was
> put into overall structure.
>
> It could be that it would be better to put all these tests into a
> central place, rather than sprinkling them around the tree.  If so,
> then your patch can lead the way, and we (ie: I) prod past and future
> developers into getting with the program.
>
> otoh, perhaps in-kernel test modules will rely on headers and constants
> which are private to the implementation directory.  So perhaps
> sprinkled-everywhere is the best approach.

I think it is at least reasonable. Where we could improve, however,
would be on the Kconfig side of things.

>> I don't think my proposal was significantly better than this
>> precedent, so I'll just adjust my patch to conform to it:
>> - move rbtree_test.c to lib/
>> - modify just lib/Makefile and lib/Kconfig.debug to get the module built.
>>
>> Will send a replacement patch for this (so you can drop that one patch
>> from the stack and replace it with)
>
> OK, you could do that too.  That way you avoid the problem and we can
> worry about it later (if ever), as a separate activity.

Going to attach as a reply to this email.

-- 
Michel "Walken" Lespinasse
A program is never fully debugged until the last user dies.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Simplifying kernel configuration for distro issues

2012-07-13 Thread Frank Rowand
On 07/13/12 14:55, Dave Jones wrote:
> On Fri, Jul 13, 2012 at 11:50:25PM +0200, Paul Bolle wrote:
> 
>  > But just removing all the certainly unused macros probably wouldn't have
>  > made a noticeable difference to anyone using those defconfig files
>  > anyway.
> 
> My point is that I don't think there's many people actually using them.
> (maybe more on the niche platforms, but x86[64] ? I'm sceptical they're used 
> at all)

I'm one of those people who use default configs.  I build a given kernel 
version for
many different embedded boards and expect the default config to work for them.  
It
makes life much easier.

I also share Linus' pain when building for my host x86 system and try to
remove the cruft from my distro config.

-Frank

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2] x86/vsyscall: allow seccomp filter in vsyscall=emulate

2012-07-13 Thread Andrew Lutomirski
On Fri, Jul 13, 2012 at 10:06 AM, Will Drewry  wrote:
> If a seccomp filter program is installed, older static binaries and
> distributions with older libc implementations (glibc 2.13 and earlier)
> that rely on vsyscall use will be terminated regardless of the filter
> program policy when executing time, gettimeofday, or getcpu.  This is
> only the case when vsyscall emulation is in use (vsyscall=emulate is the
> default).
>
> This patch emulates system call entry inside a vsyscall=emulate by
> populating regs->ax and regs->orig_ax with the system call number prior
> to calling into seccomp such that all seccomp-dependencies function
> normally.  Additionally, system call return behavior is emulated in line
> with other vsyscall entrypoints for the trace/trap cases.
>
> Reported-by: Owen Kibel 
> Signed-off-by: Will Drewry 
>
> v2: - fixed ip and sp on SECCOMP_RET_TRAP/TRACE (thanks to l...@mit.edu)

> @@ -253,6 +273,12 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned 
> long address)
>
> current_thread_info()->sig_on_uaccess_error = 
> prev_sig_on_uaccess_error;
>
> +   if (skip) {
> +   if ((long)regs->ax <= 0L) /* seccomp errno emulation */
> +   goto do_ret;
> +   goto done; /* seccomp trace/trap */
> +   }
> +
> if (ret == -EFAULT) {
> /* Bad news -- userspace fed a bad pointer to a vsyscall. */
> warn_bad_vsyscall(KERN_INFO, regs,
> @@ -271,10 +297,11 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned 
> long address)
>
> regs->ax = ret;
>
> +do_ret:
> /* Emulate a ret instruction. */
> regs->ip = caller;
> regs->sp += 8;
> -
> +done:
> return true;
>
>  sigsegv:
> --
> 1.7.9.5
>

This has the same odd property as the sigsegv path that the faulting
instruction will appear to be the mov, not the syscall.  That seems to
be okay, though -- various pieces of code that try to restart the segv
are okay with that.

Is there any code that assumes that changing rax (i.e. the syscall
number) and restarting a syscall after SIGSYS will invoke the new
syscall?  (The RET_TRACE path might be similar -- does the
ptrace_event(PTRACE_EVENT_SECCOMP, data) in seccomp.c give a debugger
a chance to synchronously cancel or change the syscall?

If those issues aren't problems, then:

Reviewed-by: Andy Lutomirski 

(If the syscall number needs to change after the fact in the
SECCOMP_RET_TRAP case, it'll be a mess.)

--Andy
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] PCI: Fix undefined reference to 'pci_fixup_final_inited'

2012-07-13 Thread Randy Dunlap
On 07/13/2012 01:29 PM, Myron Stowe wrote:

> My "PCI: Integrate 'pci_fixup_final' quirks into hot-plug paths" patch
> introduced an undefined reference to 'pci_fixup_final_inited' when
> CONFIG_PCI_QUIRKS is not enabled (on x86_64):
>   drivers/built-in.o: In function `pci_bus_add_device':
>   (.text+0x4f62): undefined reference to `pci_fixup_final_inited'
> 
> This patch removes the external reference ending up with a result closer
> to what we ultimately want when the boot path issues described in the
> original patch are resolved.
> 
> References:
>   https://lkml.org/lkml/2012/7/9/542Original, offending, patch
>   https://lkml.org/lkml/2012/7/12/338 Randy's catch
> 
> Reported-by: rdun...@xenotime.net
> Signed-off-by: Myron Stowe 


Acked-by: Randy Dunlap 

Thanks.


> ---
> 
>  drivers/pci/bus.c|4 +---
>  drivers/pci/quirks.c |   20 
>  2 files changed, 5 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
> index b511bd4..4b0970b 100644
> --- a/drivers/pci/bus.c
> +++ b/drivers/pci/bus.c
> @@ -164,10 +164,8 @@ pci_bus_alloc_resource(struct pci_bus *bus, struct 
> resource *res,
>  int pci_bus_add_device(struct pci_dev *dev)
>  {
>   int retval;
> - extern bool pci_fixup_final_inited;
>  
> - if (pci_fixup_final_inited)
> - pci_fixup_device(pci_fixup_final, dev);
> + pci_fixup_device(pci_fixup_final, dev);
>   retval = device_add(>dev);
>   if (retval)
>   return retval;
> diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
> index 52f44b5..003f356 100644
> --- a/drivers/pci/quirks.c
> +++ b/drivers/pci/quirks.c
> @@ -2956,6 +2956,7 @@ extern struct pci_fixup __end_pci_fixups_resume_early[];
>  extern struct pci_fixup __start_pci_fixups_suspend[];
>  extern struct pci_fixup __end_pci_fixups_suspend[];
>  
> +static bool pci_apply_fixup_final_quirks;
>  
>  void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev)
>  {
> @@ -2973,6 +2974,8 @@ void pci_fixup_device(enum pci_fixup_pass pass, struct 
> pci_dev *dev)
>   break;
>  
>   case pci_fixup_final:
> + if (!pci_apply_fixup_final_quirks)
> + return;
>   start = __start_pci_fixups_final;
>   end = __end_pci_fixups_final;
>   break;
> @@ -3006,21 +3009,6 @@ void pci_fixup_device(enum pci_fixup_pass pass, struct 
> pci_dev *dev)
>  EXPORT_SYMBOL(pci_fixup_device);
>  
>  
> -/*
> - * The global variable 'pci_fixup_final_inited' is being used as a interim
> - * solution for calling the final quirks only during hot-plug events (not
> - * during boot processing).
> - *
> - * When the boot path's PCI device setup sequencing is addressed, we can
> - * remove the instance, and usages of, 'pci_fixup_final_inited' along with
> - * removing 'fs_initcall_sync(pci_apply_final_quirks);' and end up with a
> - * single, uniform, solution that satisfies both the boot path and the
> - * various hot-plug event paths.
> - *
> - * ToDo: Remove 'pci_fixup_final_inited'
> - */
> -bool pci_fixup_final_inited;
> -
>  static int __init pci_apply_final_quirks(void)
>  {
>   struct pci_dev *dev = NULL;
> @@ -3031,6 +3019,7 @@ static int __init pci_apply_final_quirks(void)
>   printk(KERN_DEBUG "PCI: CLS %u bytes\n",
>  pci_cache_line_size << 2);
>  
> + pci_apply_fixup_final_quirks = true;
>   for_each_pci_dev(dev) {
>   pci_fixup_device(pci_fixup_final, dev);
>   /*
> @@ -3051,7 +3040,6 @@ static int __init pci_apply_final_quirks(void)
>   pci_cache_line_size = pci_dfl_cache_line_size;
>   }
>   }
> - pci_fixup_final_inited = 1;
>  
>   if (!pci_cache_line_size) {
>   printk(KERN_DEBUG "PCI: CLS %u bytes, default %u\n",
> 



-- 
~Randy
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Simplifying kernel configuration for distro issues

2012-07-13 Thread david

On Sat, 14 Jul 2012, Jesper Juhl wrote:


We are going to end up with a million+ (or something like that) "config
" options that are going to have to be kept up-to-date
regularly...
Do we really want that?
Maybe we do, maybe we don't - I'm not saying anything either way - just
pointing it out.

I like the general idea - let a user pick the "make my distro work" option
and then tweak from there. But, with hundreds (thousands?) of distroes out
there, is it realy doable? Will we be able to keep things updated
properly?


this needs to be more like 'make install' where the build system doesn't 
have specifics for every distro, but instead refrences a separate file 
that's provided in the same place by every distro, ideally separate from 
the kernel itself.


David Lang
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 05/12] rbtree: performance and correctness test

2012-07-13 Thread Andrew Morton
On Fri, 13 Jul 2012 15:33:35 -0700
Michel Lespinasse  wrote:

> On Fri, Jul 13, 2012 at 1:15 PM, Andrew Morton
>  wrote:
> > On Thu, 12 Jul 2012 17:31:50 -0700 Michel Lespinasse  
> > wrote:
> >>  Makefile|2 +-
> >>  lib/Kconfig.debug   |1 +
> >>  tests/Kconfig   |   18 +++
> >>  tests/Makefile  |1 +
> >>  tests/rbtree_test.c |  135 
> >> +++
> >
> > This patch does a new thing: adds a kernel self-test module into
> > lib/tests/ and sets up the infrastructure to add new kernel self-test
> > modules in that directory.
> >
> > I don't see a problem with this per-se, but it is a new thing which we
> > should think about.
> >
> > In previous such cases (eg, kernel/rcutorture.c) we put those modules
> > into the same directory as the code which is being tested.  So to
> > follow that pattern, this new code would have gone into lib/.
> >
> > If we adopt your new proposal then we should perhaps also move tests
> > such as rcutorture over into tests/.  And that makes one wonder whether
> > we should have a standalone directory for kernel selftest modules.  eg
> > tests/self-test-nmodules/.
> 
> Ah, I did not realize we had a precedent for in-tree kernel test modules.

hm, well, just because that's what we do now doesn't mean that it was a
good idea ;) These things arrive as a result of individual developers
doing stuff in their little directories and no particular thought was
put into overall structure.

It could be that it would be better to put all these tests into a
central place, rather than sprinkling them around the tree.  If so,
then your patch can lead the way, and we (ie: I) prod past and future
developers into getting with the program.

otoh, perhaps in-kernel test modules will rely on headers and constants
which are private to the implementation directory.  So perhaps
sprinkled-everywhere is the best approach.

> I don't think my proposal was significantly better than this
> precedent, so I'll just adjust my patch to conform to it:
> - move rbtree_test.c to lib/
> - modify just lib/Makefile and lib/Kconfig.debug to get the module built.
> 
> Will send a replacement patch for this (so you can drop that one patch
> from the stack and replace it with)

OK, you could do that too.  That way you avoid the problem and we can
worry about it later (if ever), as a separate activity.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] dma-fence: dma-buf synchronization (v2)

2012-07-13 Thread Rob Clark
On Fri, Jul 13, 2012 at 4:44 PM, Maarten Lankhorst
 wrote:
> Hey,
>
> Op 13-07-12 20:52, Rob Clark schreef:
>> On Fri, Jul 13, 2012 at 12:35 PM, Tom Cooksey  wrote:
>>> My other thought is around atomicity. Could this be extended to
>>> (safely) allow for hardware devices which might want to access
>>> multiple buffers simultaneously? I think it probably can with
>>> some tweaks to the interface? An atomic function which does
>>> something like "give me all the fences for all these buffers
>>> and add this fence to each instead/as-well-as"?
>> fwiw, what I'm leaning towards right now is combining dma-fence w/
>> Maarten's idea of dma-buf-mgr (not sure if you saw his patches?).  And
>> let dmabufmgr handle the multi-buffer reservation stuff.  And possibly
>> the read vs write access, although this I'm not 100% sure on... the
>> other option being the concept of read vs write (or
>> exclusive/non-exclusive) fences.
> Agreed, dmabufmgr is meant for reserving multiple buffers without deadlocks.
> The underlying mechanism for synchronization can be dma-fences, it wouldn't
> really change dmabufmgr much.
>> In the current state, the fence is quite simple, and doesn't care
>> *what* it is fencing, which seems advantageous when you get into
>> trying to deal with combinations of devices sharing buffers, some of
>> whom can do hw sync, and some who can't.  So having a bit of
>> partitioning from the code dealing w/ sequencing who can access the
>> buffers when and for what purpose seems like it might not be a bad
>> idea.  Although I'm still working through the different alternatives.
>>
> Yeah, I managed to get nouveau hooked up with generating irqs on
> completion today using an invalid command. It's also no longer a
> performance regression, so software syncing is no longer a problem
> for nouveau. i915 already generates irqs and r600 presumably too.
>
> Monday I'll take a better look at your patch, end of day now. :)

let me send you a slightly updated version.. I fixed locally some
locking fail in attach_fence() and get_fence() that I managed to
introduce when converting from global spinlock to using the
waitqueue's spinlock.

BR,
-R

> ~Maarten
> --
> To unsubscribe from this list: send the line "unsubscribe linux-media" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] cpumask: add a few comments of cpumask functions

2012-07-13 Thread Andrew Morton
On Mon, 28 May 2012 22:23:51 +0800
Alex Shi  wrote:

> Current few cpumask functions' purposes are not quite clear. Stupid
> user like myself need to dig into details for clear function
> purpose and return value.
> Add few explanation for them is helpful.
> 

It appears that Rusty has applied at least some of this patch to
linux-next.  Without reading it ;)

> --- a/include/linux/cpumask.h
> +++ b/include/linux/cpumask.h
> @@ -271,6 +271,7 @@ static inline void cpumask_clear_cpu(int cpu, struct 
> cpumask *dstp)
>   * cpumask_test_cpu - test for a cpu in a cpumask
>   * @cpu: cpu number (< nr_cpu_ids)
>   * @cpumask: the cpumask pointer
> + * Returns 1 if the 'cpu' is in the old bitmap of 'cpumask', otherwise 
> returns 0

In kerneldoc we refer to function arguments by prefixing them with a
'@', not by surrounding them with single quotes.  So this should be

* Returns 1 if @cpu is in the old bitmap of @cpumask, otherwise returns 0


And the same applies to the other comments.  So can you please grab the
latest linux-next, prepare a fixup patch and also check that the patch
is complete - not all of your changes have been applied.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Simplifying kernel configuration for distro issues

2012-07-13 Thread Jesper Juhl
On Fri, 13 Jul 2012, Linus Torvalds wrote:

> So this has long been one of my pet configuration peeves: as a user I
> am perfectly happy answering the questions about what kinds of
> hardware I want the kernel to support (I kind of know that), but many
> of the "support infrastructure" questions are very opaque, and I have
> no idea which of the them any particular distribution actually depends
> on.
> 
> And it tends to change over time. For example, F14 (iirc) started
> using TMPFS and TMPFS_POSIX_ACL/XATTR for /dev. And starting in F16,
> the initrd setup requires DEVTMPFS and DEVTMPFS_MOUNT. There's been
> several times when I started with my old minimal config, and the
> resulting kernel would boot, but something wouldn't quite work right,
> and it can be very subtle indeed.
> 
> Similarly, the distro ends up having very particular requirements for
> exactly *which* security models it uses and needs, and they tend to
> change over time. And now with systemd, CGROUPS suddenly aren't just
> esoteric things that no normal person would want to use, but are used
> for basic infrastructure. And I remember being surprised by OpenSUSE
> suddenly needing the RAW table support for netfilter, because it had a
> NOTRACK rule or something.
> 
> The point I'm slowly getting to is that I would actually love to have
> *distro* Kconfig-files, where the distribution would be able to say
> "These are the minimums I *require* to work". So we'd have a "Distro"
> submenu, where you could pick the distro(s) you use, and then pick
> which release, and we'd have something like
> 
>  - distro/Kconfig:
> 
> config DISTRO_REQUIREMENTS
> bool "Pick minimal distribution requirements"
> 
> choice DISTRO
> prompt "Distribution"
> depends on DISTRO_REQUIREMENTS
> 
> config FEDORA
> config OPENSUSE
> config UBUNTU
> ...
> 
> endchoice
> 
[...]

We are going to end up with a million+ (or something like that) "config 
" options that are going to have to be kept up-to-date 
regularly...
Do we really want that?
Maybe we do, maybe we don't - I'm not saying anything either way - just 
pointing it out.

I like the general idea - let a user pick the "make my distro work" option 
and then tweak from there. But, with hundreds (thousands?) of distroes out 
there, is it realy doable? Will we be able to keep things updated 
properly?

Perhaps a better aproach (and this is going to be controversial, so I'll 
put on my flame-repelling underwear now) would be to severely limit the 
number of available options.
KConfig is a mess (IMHO) - there's no telling what a given Linux kernel 
will support on any given distro on any given arch - there's no known 
mimimum.
How about we start cutting down on the options and start saying "a Linux 
system will provide feature x and y - always ...".
Stuff like (and I'm just pulling random stuff out here) - ASLR, seccomp, 
250HZ minimum etc etc.. We could cut the KConfig options down to 10% of 
what they are now if we just made a few (hard) choices about some things 
that would always be there that everyone could count on.  If people want 
to deviate from the default minimum, sure, let them, but put it under 
*custom*, *embedded*, *specialized distro*, *you know what you are doing* 
menu options.
Configurabillity is good, but only to a certain degree - I think we could 
bennefit from removing a *lot* of options and instead just decreeing that 
"a linux system has this"..


-- 
Jesper Juhlhttp://www.chaosbits.net/
Don't top-post http://www.catb.org/jargon/html/T/top-post.html
Plain text mails only, please.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 05/12] rbtree: performance and correctness test

2012-07-13 Thread Michel Lespinasse
On Fri, Jul 13, 2012 at 1:15 PM, Andrew Morton
 wrote:
> On Thu, 12 Jul 2012 17:31:50 -0700 Michel Lespinasse  
> wrote:
>>  Makefile|2 +-
>>  lib/Kconfig.debug   |1 +
>>  tests/Kconfig   |   18 +++
>>  tests/Makefile  |1 +
>>  tests/rbtree_test.c |  135 
>> +++
>
> This patch does a new thing: adds a kernel self-test module into
> lib/tests/ and sets up the infrastructure to add new kernel self-test
> modules in that directory.
>
> I don't see a problem with this per-se, but it is a new thing which we
> should think about.
>
> In previous such cases (eg, kernel/rcutorture.c) we put those modules
> into the same directory as the code which is being tested.  So to
> follow that pattern, this new code would have gone into lib/.
>
> If we adopt your new proposal then we should perhaps also move tests
> such as rcutorture over into tests/.  And that makes one wonder whether
> we should have a standalone directory for kernel selftest modules.  eg
> tests/self-test-nmodules/.

Ah, I did not realize we had a precedent for in-tree kernel test modules.

I don't think my proposal was significantly better than this
precedent, so I'll just adjust my patch to conform to it:
- move rbtree_test.c to lib/
- modify just lib/Makefile and lib/Kconfig.debug to get the module built.

Will send a replacement patch for this (so you can drop that one patch
from the stack and replace it with)

Thanks,

-- 
Michel "Walken" Lespinasse
A program is never fully debugged until the last user dies.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Simplifying kernel configuration for distro issues

2012-07-13 Thread Hans de Bruin

On 07/13/2012 10:37 PM, Linus Torvalds wrote:
> So this has long been one of my pet configuration peeves: as a user I
> am perfectly happy answering the questions about what kinds of
> hardware I want the kernel to support (I kind of know that), but many
> of the "support infrastructure" questions are very opaque, and I have
> no idea which of the them any particular distribution actually depends
> on.
>
> And it tends to change over time. For example, F14 (iirc) started
> using TMPFS and TMPFS_POSIX_ACL/XATTR for /dev. And starting in F16,
> the initrd setup requires DEVTMPFS and DEVTMPFS_MOUNT. There's been
> several times when I started with my old minimal config, and the
> resulting kernel would boot, but something wouldn't quite work right,
> and it can be very subtle indeed.
>
> Similarly, the distro ends up having very particular requirements for
> exactly *which* security models it uses and needs, and they tend to
> change over time. And now with systemd, CGROUPS suddenly aren't just
> esoteric things that no normal person would want to use, but are used
> for basic infrastructure. And I remember being surprised by OpenSUSE
> suddenly needing the RAW table support for netfilter, because it had a
> NOTRACK rule or something.
>
> The point I'm slowly getting to is that I would actually love to have
> *distro* Kconfig-files, where the distribution would be able to say
> "These are the minimums I *require* to work". So we'd have a "Distro"
> submenu, where you could pick the distro(s) you use, and then pick
> which release, and we'd have something like
>
>   - distro/Kconfig:
>
>  config DISTRO_REQUIREMENTS
>  bool "Pick minimal distribution requirements"
>
>  choice DISTRO
>  prompt "Distribution"
>  depends on DISTRO_REQUIREMENTS
>
>  config FEDORA
>  config OPENSUSE
>  config UBUNTU
>  ...
>
>  endchoice
>
> and then depending on the DISTRO config, we'd include one of the
> distro-specific ones with lists of supported distro versions and then
> the random config settings for that version:
>
>   - distro/Kconfig.suse:
>
>  config OPENSUSE_121
>  select OPENSUSE_11
>  select IP_NF_RAW  # ..
>
>   - distro/Kconfig.Fedora:
>
>  config FEDORA_16
>  select FEDORA_15
>  select DEVTMPFS   # F16 initrd needs this
>  select DEVTMPFS_MOUNT  # .. and expects the kernel to mount
> DEVTMPFS automatically
>  ...
>
>  config FEDORA_17
>  select FEDORA_16
>  select CGROUP_xyzzy
>  ...

Could this be made more dynamic? I would like to download a minimal 
config file from my distro's website and perhaps add my own minimal 
config for the hardware I own and put both downloads somewhere in my 
local tree, or have makemenuconfig ask me for a location of my minimal 
config files?


--
Hans
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Simplifying kernel configuration for distro issues

2012-07-13 Thread Josh Boyer
On Fri, Jul 13, 2012 at 02:17:30PM -0700, Linus Torvalds wrote:
> On Fri, Jul 13, 2012 at 2:02 PM, Dave Jones  wrote:
> >
> > As long as you don't mind these being added after the fact, I suppose
> > it would be workable.  The reason I say that is sometimes, it even catches 
> > *us*
> > by surprise.  We recently found out our virtualisation guys started
> > using sch_htb for example, and we inadvertantly broke it when we moved
> > its module to a 'not always installed' kernel subpackage. (and before that, 
> > 9PFS..)
> >
> > People don't tell us anything, but somehow expect things to keep working.
> 
> I think even a "educated guess" config file is better than what we have now.
> 
> The *two* requirements (and they're really the same theme) I
> personally think we should have for this are
> 
>  -  I think every single "select" for these things should come with a
> comment about what it is about and why the distro needs it (to show
> there was some thought involved and not just a blind "took it from the
> distro config")
> 
>  - It should be about *minimal* settings. I'd rather have too few
> things and the occasional complaint about "oh, it didn't work because
> it missed XYZ" than have it grow to contain all the options just
> because somebody decided to just add random things until things
> worked.

I'd agree that should be the goal.  It seems like something worth at
least trying to get to.  Even if we don't wind up merging them into the
kernel, it will at least lead to a better documented distro config for
every one that tries it.

josh
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] ns: move free_nsproxy() out of do_exit() path

2012-07-13 Thread Kirill A. Shutemov
On Fri, Jul 13, 2012 at 02:08:06PM -0700, Andrew Morton wrote:
> On Fri, 13 Jul 2012 14:48:08 +0300
> "Kirill A. Shutemov"  wrote:
> 
> > From: "Kirill A. Shutemov" 
> > 
> > free_nsproxy() is too heavy to be on exit path. Let's free namespaces
> > asynchronously to not block exit_group() syscall.
> 
> Please be specific, and complete.
> 
> Why is it "too heavy"?  Where is the time being spent?  Is it spent in
> D state or is it spent burning CPU cycles?  Does the patch simply
> offload the work into kernel threads, providing no net gain?

Unpatched switch_task_namespaces() takes 0.010 - 0.011 seconds on my
machine. About 0.008 of the time is synchronize_rcu().

So it's mostly waiting with wait_for_completion() in wait_rcu_gp().
It means D state.

> > The patch also fixes bug with free namespace without synchronize_rcu() 
> > through
> > put_nsproxy().
> 
> I just don't understand this description.

IIUC current locking model requires synchronize_rcu() before
free_nsproxy(). put_nsproxy() calls free_nsproxy() without
synchronize_rcu(). So it's racy.

I guess it was missed during switch to RCU (see cf7b708).
Pavel, am I right?

> Please send a new one which
> includes all details about the bug, including a description of
> the user-visible effects of the bug.

Okay, I will.

-- 
 Kirill A. Shutemov
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Simplifying kernel configuration for distro issues

2012-07-13 Thread david

On Fri, 13 Jul 2012, Linus Torvalds wrote:


On Fri, Jul 13, 2012 at 2:17 PM, Casey Schaufler  wrote:


Oh dear. I would expect Fedora to say that they require SELinux,
thereby making it unusable by anyone doing LSM development.


Oh, *absolutely*.

These options would *not* be meant for people doing odd things and
experienting with configs.

If you do that, then you might *start* by saying "I want this distro"
to get the initial guesstimate of the config file you want, but then
edit the .config by hand later (and remove the "I want all the Fedora
requirements" option, of course).

This is explicitly and exclusively for normal users. The whole point
of "expert configurator for special cases" should not be given any
thought at all - those kinds of people should simply answer "No" to
the "Do you want the distro basic kconfig requirements" question.


hopefully this can be made a little easier.

more of a 'enable anything set in this file, then give me control again so 
I can turn things off' rather than having to manually edit the .config 
file.


If this is done as a hard set of dependancy settings, it will be very 
annoying for people who for any reason want to disable something that the 
distro considers 'essential'.


I also _really_ like the idea of being able to have a vmware option that 
enables the minimum devices that are needed to run.


Having these be hard dependancies also seems like it would make 
interactions between these sorts of things much more likely to cause 
problems.


If however they are one-shot "go through this file and enable anything 
that it says to turn on" things that then let you turn anything off, it 
seems much less likely to cause problems.


and if we can then get some of the big hardware vendors to create such 
files to enable all the drivers needed for their hardware (the big 
things are easy, it's when you get into the internal monitoring busses and 
so on that things get messy)


David Lang
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [GIT pull] leap second fixes for 3.5

2012-07-13 Thread Thomas Gleixner
On Fri, 13 Jul 2012, Thomas Gleixner wrote:

> Linus,
> 
> please pull the latest timers-urgent-for-linus git tree from:
> 
>git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 
> timers-urgent-for-linus
> 
> It's a rather large series, but well discussed, refined and
> reviewed. It got a massive testing by John, Prarit and tip.
> 
> In theory we could split it into two parts. The first two patches
> 
>   5baefd6: hrtimer: Update hrtimer base offsets each hrtimer_interrupt
>   f6c06ab: timekeeping: Provide hrtimer update function
> 
> are merily preventing the stuff loops forever issues, which people
> have observed.

I'm a moron. Copied from teh wrong direction. That should be:

  f55a6fa: hrtimer: Provide clock_was_set_delayed()
  4873fa0: timekeeping: Fix leapsecond triggered load spike issue

That are the two commits which do the basic damage containment.

Gah, I explicitely asked some involved person to look over that mail
before I sent it. :(
 
Grumbling out of a brown paperbag

  tglx
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Simplifying kernel configuration for distro issues

2012-07-13 Thread Paul Bolle
On Fri, 2012-07-13 at 17:55 -0400, Dave Jones wrote:
> My point is that I don't think there's many people actually using them.
> (maybe more on the niche platforms, but x86[64] ? I'm sceptical they're used 
> at all)

I guess you're right. Personally, I tend to start my journeys in self
compiled kernel land by using something I know that works as a starting
point. Ie, I use some distribution's kernel, notice that things seem to
mostly work, and use the .config of that kernel to start exploring
whatever it is I'm interested in. I can't remember ever feeling the urge
to use some defconfig to start these journeys. 


Paul Bolle

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Simplifying kernel configuration for distro issues

2012-07-13 Thread Tony Luck
I always thought that the x86 defconfig file was the one that Linus
used for his primary machine.

-Tony
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/6] KGDB/KDB FIQ (NMI) debugger

2012-07-13 Thread Anton Vorontsov
On Fri, Jul 13, 2012 at 09:43:45AM -0700, Colin Cross wrote:
[...]
> I was referring to the security implications, not size.  Leaving KDB
> on is effectively instant root access over the serial console.

Oh, I see. Yes, for this we'd need to disable all modification commands.

[...]
> > The thing is, we even have a standard sequence for entering KDB,
> > it is GDB-protocol command $3#33, so it actually makes sense to
> > implement this. This would be the only async command, and it doesn't
> > affect anything but the new code. I prepared a separate patch for this.
> 
> I would suggest making the sequence longer than just return.  A single
> character is not that unlikely to be generated by random noise - I've
> seen multiple devices reboot when the serial console was connected
> because it received a SysRq-Crash (a break is all zeroes, which is
> very common while shorting the lines as the console is plugged in, and
> then random noise sent a 'c').

No no, it's not just return. It is either return or the longer '$3#33'
escape sequence. Default is $3#33, so it should be pretty safe (but of
course we can make it even longer, or even configurable).

Thanks,

-- 
Anton Vorontsov
Email: cbouatmai...@gmail.com
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/2] Moved core dump functionality into its own file

2012-07-13 Thread Andrew Morton
On Tue, 3 Jul 2012 12:53:34 +0800
Cong Wang  wrote:

> On Tue, Jul 3, 2012 at 8:38 AM, Alex Kelly  wrote:
> > From: Alex 
> >
> > This was done in preparation for making core dump functionality optional.
> >
> > The variable "suid_dumpable" and associated functions are left in fs/exec.c
> > because they're used elsewhere, such as in ptrace.
> >
> > Signed-off-by: Alex Kelly 
> > Reviewed-by: Josh Triplett 
> 
> Looks good to me:

Me too, but the first patch conflicts somewhat with a few pending
changes in linux-next.  I could fix them up, but would prefer that the
result be tested.  Alex, could you please redo the patches against
linux-next or mmotm?

Also, the patch titles could be improved.  I suggest

coredump: move core dump functionality into its own file
coredump: make core dump functionality optional

Documentation/SubmittingPatches section 15 describes the thinking here.

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Simplifying kernel configuration for distro issues

2012-07-13 Thread Dave Jones
On Fri, Jul 13, 2012 at 11:50:25PM +0200, Paul Bolle wrote:

 > But just removing all the certainly unused macros probably wouldn't have
 > made a noticeable difference to anyone using those defconfig files
 > anyway.

My point is that I don't think there's many people actually using them.
(maybe more on the niche platforms, but x86[64] ? I'm sceptical they're used at 
all)

Dave
 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC 2/2] net: Add support for NTB virtual ethernet device

2012-07-13 Thread Jon Mason
A virtual ethernet device that uses the NTB transport API to send/receive data.

Signed-off-by: Jon Mason 
---
 drivers/net/Kconfig  |4 +
 drivers/net/Makefile |1 +
 drivers/net/ntb_netdev.c |  411 ++
 3 files changed, 416 insertions(+), 0 deletions(-)
 create mode 100644 drivers/net/ntb_netdev.c

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 0c2bd80..9bf8a71 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -178,6 +178,10 @@ config NETPOLL_TRAP
 config NET_POLL_CONTROLLER
def_bool NETPOLL
 
+config NTB_NETDEV
+   tristate "Virtual Ethernet over NTB"
+   depends on NTB
+
 config RIONET
tristate "RapidIO Ethernet over messaging driver support"
depends on RAPIDIO
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 3d375ca..9890148 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -69,3 +69,4 @@ obj-$(CONFIG_USB_IPHETH)+= usb/
 obj-$(CONFIG_USB_CDC_PHONET)   += usb/
 
 obj-$(CONFIG_HYPERV_NET) += hyperv/
+obj-$(CONFIG_NTB_NETDEV) += ntb_netdev.o
diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c
new file mode 100644
index 000..bcbd9d4
--- /dev/null
+++ b/drivers/net/ntb_netdev.c
@@ -0,0 +1,411 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2012 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *   General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *   The full GNU General Public License is included in this distribution
+ *   in the file called LICENSE.GPL.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2012 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copy
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Intel PCIe NTB Network Linux driver
+ *
+ * Contact Information:
+ * Jon Mason 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#define NTB_NETDEV_VER "0.4"
+
+MODULE_DESCRIPTION(KBUILD_MODNAME);
+MODULE_VERSION(NTB_NETDEV_VER);
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel Corporation");
+
+struct ntb_netdev {
+   struct net_device *ndev;
+   struct ntb_transport_qp *qp;
+};
+
+#defineNTB_TX_TIMEOUT_MS   1000
+#defineNTB_RXQ_SIZE100
+
+static struct net_device *netdev;
+
+static void ntb_netdev_event_handler(int status)
+{
+   struct ntb_netdev *dev = netdev_priv(netdev);
+
+   pr_debug("%s: Event %x, Link %x\n", KBUILD_MODNAME, status,
+ntb_transport_link_query(dev->qp));
+
+   /* Currently, only link status event is supported */
+   if (status)
+   netif_carrier_on(netdev);
+   else
+   netif_carrier_off(netdev);
+}
+
+static void 

Re: [RFC] dma-fence: dma-buf synchronization (v2)

2012-07-13 Thread Maarten Lankhorst
Hey,

Op 13-07-12 20:52, Rob Clark schreef:
> On Fri, Jul 13, 2012 at 12:35 PM, Tom Cooksey  wrote:
>> My other thought is around atomicity. Could this be extended to
>> (safely) allow for hardware devices which might want to access
>> multiple buffers simultaneously? I think it probably can with
>> some tweaks to the interface? An atomic function which does
>> something like "give me all the fences for all these buffers
>> and add this fence to each instead/as-well-as"?
> fwiw, what I'm leaning towards right now is combining dma-fence w/
> Maarten's idea of dma-buf-mgr (not sure if you saw his patches?).  And
> let dmabufmgr handle the multi-buffer reservation stuff.  And possibly
> the read vs write access, although this I'm not 100% sure on... the
> other option being the concept of read vs write (or
> exclusive/non-exclusive) fences.
Agreed, dmabufmgr is meant for reserving multiple buffers without deadlocks.
The underlying mechanism for synchronization can be dma-fences, it wouldn't
really change dmabufmgr much.
> In the current state, the fence is quite simple, and doesn't care
> *what* it is fencing, which seems advantageous when you get into
> trying to deal with combinations of devices sharing buffers, some of
> whom can do hw sync, and some who can't.  So having a bit of
> partitioning from the code dealing w/ sequencing who can access the
> buffers when and for what purpose seems like it might not be a bad
> idea.  Although I'm still working through the different alternatives.
>
Yeah, I managed to get nouveau hooked up with generating irqs on
completion today using an invalid command. It's also no longer a
performance regression, so software syncing is no longer a problem
for nouveau. i915 already generates irqs and r600 presumably too.

Monday I'll take a better look at your patch, end of day now. :)

~Maarten
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/3] fs/sysv: stop using write_supers and s_dirt

2012-07-13 Thread Andrew Morton
On Thu, 12 Jul 2012 17:37:58 +0300
Artem Bityutskiy  wrote:

> On Tue, 2012-07-03 at 16:43 +0300, Artem Bityutskiy wrote:
> > This patch-set makes sysv file-system stop using the VFS '->write_supers()'
> > call-back and the '->s_dirt' superblock field because I plan to remove them
> > once all users are gone.
> 
> Hi Andrew,
> 
> would you please pick this patch-set as well? I also sent you an update
> for hfs and hfsplus file-systems today - found a bug while doing some
> more testing. Also, I sent UFS changes today, very similar. Most
> probably no one will reply and I hope you would pick them as well.
> 

The issue Alan raised around the superblock timestamp is still up in
the air.  I guess he's a slow typist ;)

My take is "no, we don't need to do that any more" - surely all Linux
systems have a functional hardware clock.  But the changelog should be
updated to describe and justify the decision.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [opensuse-kernel] Re: [RFC] Simplifying kernel configuration for distro issues

2012-07-13 Thread richard -rw- weinberger
On Fri, Jul 13, 2012 at 10:54 PM, Myklebust, Trond
 wrote:
> We could at least make selection of a minimal set of drivers for the
> more common virtualised platforms a lot easier.
> Right now, you need to hunt through 30+ different menus in order to find
> what you need to run in a basic KVM virtual machine...

Yes, every time I build a kernel to be used on KVM I forget something. :-\

We could introduce a section in Kconfig which contains selections for
common use cases.
E.g. as Linus requested for minimal distro requirements but also
selections for various common
guest configurations.

-- 
Thanks,
//richard
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Simplifying kernel configuration for distro issues

2012-07-13 Thread Geert Uytterhoeven
On Fri, Jul 13, 2012 at 11:02 PM, Dave Jones  wrote:
> I wish defconfig was actually something useful like this, instead of..
> what the hell is it exactly ? No-one even seems to agree, other than
> "random selection of options, many of which were removed n years ago"

It's just to difficult to update them in a sane way.

I mean, I have my own set of defconfigs for all supported m68k-platforms,
but getting them in sync and ready for submitting an update seems to be a
multi-year project, and there are always more important (and more fun) things
to do. So that's why I haven't gotten to updating them since the defconfig
reduction.

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3/6] m32r: Fix 'fix breakage from "m32r: use generic ptrace_resume code"' fallout

2012-07-13 Thread Geert Uytterhoeven
commit acdc0d5ef9dd74534fe8df77a2056fa1d911abe5 ('m32r: fix breakage from
"m32r: use generic ptrace_resume code"') tried to fix commit
e34112e3966fc466ced2698e6c196bb50b1ee20e ("m32r: use generic ptrace_resume
code") by returning values in a function returning void, causing:

arch/m32r/kernel/ptrace.c: In function 'user_enable_single_step':
arch/m32r/kernel/ptrace.c:594:3: warning: 'return' with a value, in function 
returning void [enabled by default]
arch/m32r/kernel/ptrace.c:598:3: warning: 'return' with a value, in function 
returning void [enabled by default]
arch/m32r/kernel/ptrace.c:601:3: warning: 'return' with a value, in function 
returning void [enabled by default]
arch/m32r/kernel/ptrace.c:604:2: warning: 'return' with a value, in function 
returning void [enabled by default]

Remove the unneeded return values.

Signed-off-by: Geert Uytterhoeven 
Cc: Al Viro 
Cc: Christoph Hellwig 
--
Is this the correct fix? There's no way to propagate failures
---
 arch/m32r/kernel/ptrace.c |7 +++
 1 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c
index 4c03361..51f5e9a 100644
--- a/arch/m32r/kernel/ptrace.c
+++ b/arch/m32r/kernel/ptrace.c
@@ -591,17 +591,16 @@ void user_enable_single_step(struct task_struct *child)
 
if (access_process_vm(child, pc&~3, , sizeof(insn), 0)
!= sizeof(insn))
-   return -EIO;
+   return;
 
compute_next_pc(insn, pc, _pc, child);
if (next_pc & 0x8000)
-   return -EIO;
+   return;
 
if (embed_debug_trap(child, next_pc))
-   return -EIO;
+   return;
 
invalidate_cache();
-   return 0;
 }
 
 void user_disable_single_step(struct task_struct *child)
-- 
1.7.0.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Simplifying kernel configuration for distro issues

2012-07-13 Thread Linus Torvalds
On Fri, Jul 13, 2012 at 2:17 PM, Casey Schaufler  wrote:
>
> Oh dear. I would expect Fedora to say that they require SELinux,
> thereby making it unusable by anyone doing LSM development.

Oh, *absolutely*.

These options would *not* be meant for people doing odd things and
experienting with configs.

If you do that, then you might *start* by saying "I want this distro"
to get the initial guesstimate of the config file you want, but then
edit the .config by hand later (and remove the "I want all the Fedora
requirements" option, of course).

This is explicitly and exclusively for normal users. The whole point
of "expert configurator for special cases" should not be given any
thought at all - those kinds of people should simply answer "No" to
the "Do you want the distro basic kconfig requirements" question.

Linus
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 6/6] m32r: Make memset() global for CONFIG_KERNEL_BZIP2=y

2012-07-13 Thread Geert Uytterhoeven
arch/m32r/boot/compressed/misc.c:31:14: error: static declaration of 'memset' 
follows non-static declaration
make[5]: *** [arch/m32r/boot/compressed/misc.o] Error 1
make[4]: *** [arch/m32r/boot/compressed/vmlinux] Error 2

Remove the static keyword to fix this.

Signed-off-by: Geert Uytterhoeven 
---
 arch/m32r/boot/compressed/misc.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/m32r/boot/compressed/misc.c b/arch/m32r/boot/compressed/misc.c
index 3147aa2..28a0952 100644
--- a/arch/m32r/boot/compressed/misc.c
+++ b/arch/m32r/boot/compressed/misc.c
@@ -28,7 +28,7 @@ static unsigned long free_mem_ptr;
 static unsigned long free_mem_end_ptr;
 
 #ifdef CONFIG_KERNEL_BZIP2
-static void *memset(void *s, int c, size_t n)
+void *memset(void *s, int c, size_t n)
 {
char *ss = s;
 
-- 
1.7.0.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 5/6] m32r: Add memcpy() for CONFIG_KERNEL_GZIP=y

2012-07-13 Thread Geert Uytterhoeven
  LD  arch/m32r/boot/compressed/vmlinux
arch/m32r/boot/compressed/misc.o: In function `zlib_updatewindow':
misc.c:(.text+0x190): undefined reference to `memcpy'
misc.c:(.text+0x190): relocation truncated to fit: R_M32R_26_PLTREL against 
undefined symbol `memcpy'
misc.c:(.text+0x1e0): undefined reference to `memcpy'
misc.c:(.text+0x1e0): relocation truncated to fit: R_M32R_26_PLTREL against 
undefined symbol `memcpy'
misc.c:(.text+0x218): undefined reference to `memcpy'
misc.c:(.text+0x218): relocation truncated to fit: R_M32R_26_PLTREL against 
undefined symbol `memcpy'
arch/m32r/boot/compressed/misc.o: In function `zlib_inflate':
misc.c:(.text+0x171c): undefined reference to `memcpy'
misc.c:(.text+0x171c): relocation truncated to fit: R_M32R_26_PLTREL against 
undefined symbol `memcpy'
make[5]: *** [arch/m32r/boot/compressed/vmlinux] Error 1

Add our own implementation of memcpy() to fix this.

Signed-off-by: Geert Uytterhoeven 
---
 arch/m32r/boot/compressed/misc.c |   10 ++
 1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/arch/m32r/boot/compressed/misc.c b/arch/m32r/boot/compressed/misc.c
index 370d608..3147aa2 100644
--- a/arch/m32r/boot/compressed/misc.c
+++ b/arch/m32r/boot/compressed/misc.c
@@ -39,6 +39,16 @@ static void *memset(void *s, int c, size_t n)
 #endif
 
 #ifdef CONFIG_KERNEL_GZIP
+void *memcpy(void *dest, const void *src, size_t n)
+{
+   char *d = dest;
+   const char *s = src;
+   while (n--)
+   *d++ = *s++;
+
+   return dest;
+}
+
 #define BOOT_HEAP_SIZE 0x1
 #include "../../../../lib/decompress_inflate.c"
 #endif
-- 
1.7.0.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/6] m32r: Fix pull clearing RESTORE_SIGMASK into block_sigmask() fallout

2012-07-13 Thread Geert Uytterhoeven
commit a610d6e672d6d3723e8da257ad4a8a288a8f2f89 ("pull clearing
RESTORE_SIGMASK into block_sigmask()") caused:

arch/m32r/kernel/signal.c: In function 'handle_signal':
arch/m32r/kernel/signal.c:289:6: warning: 'return' with a value, in function 
returning void [enabled by default]

Remove the return value it forgot to remove.

Signed-off-by: Geert Uytterhoeven 
Cc: Al Viro 
---
 arch/m32r/kernel/signal.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c
index f3fb2c0..d0f60b9 100644
--- a/arch/m32r/kernel/signal.c
+++ b/arch/m32r/kernel/signal.c
@@ -286,7 +286,7 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, 
siginfo_t *info,
case -ERESTARTNOINTR:
regs->r0 = regs->orig_r0;
if (prev_insn(regs) < 0)
-   return -EFAULT;
+   return;
}
}
 
-- 
1.7.0.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 4/6] m32r: Consistently use "suffix-$(...)"

2012-07-13 Thread Geert Uytterhoeven
commit a556bec9955c8e47b40a87dbfeef6f24d3b2228f ("m32r: fix
arch/m32r/boot/compressed/Makefile") changed "$(suffix_y)" to
"$(suffix-y)", but didn't update any location where "suffix_y" is set,
causing:

make[5]: *** No rule to make target `arch/m32r/boot/compressed/vmlinux.bin.', 
needed by `arch/m32r/boot/compressed/piggy.o'.  Stop.
make[4]: *** [arch/m32r/boot/compressed/vmlinux] Error 2
make[3]: *** [zImage] Error 2

Correct the other locations to fix this.

Signed-off-by: Geert Uytterhoeven 
---
 arch/m32r/boot/compressed/Makefile |6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/m32r/boot/compressed/Makefile 
b/arch/m32r/boot/compressed/Makefile
index 177716b..01729c2 100644
--- a/arch/m32r/boot/compressed/Makefile
+++ b/arch/m32r/boot/compressed/Makefile
@@ -43,9 +43,9 @@ endif
 
 OBJCOPYFLAGS += -R .empty_zero_page
 
-suffix_$(CONFIG_KERNEL_GZIP)   = gz
-suffix_$(CONFIG_KERNEL_BZIP2)  = bz2
-suffix_$(CONFIG_KERNEL_LZMA)   = lzma
+suffix-$(CONFIG_KERNEL_GZIP)   = gz
+suffix-$(CONFIG_KERNEL_BZIP2)  = bz2
+suffix-$(CONFIG_KERNEL_LZMA)   = lzma
 
 $(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y) FORCE
$(call if_changed,ld)
-- 
1.7.0.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/6] m32r: Remove duplicate definition of PTRACE_O_TRACESYSGOOD

2012-07-13 Thread Geert Uytterhoeven
include/linux/ptrace.h:66:0: warning: "PTRACE_O_TRACESYSGOOD" redefined 
[enabled by default]
arch/m32r/include/asm/ptrace.h:117:0: note: this is the location of the 
previous definition

We already have it in , so remove it from 

Signed-off-by: Geert Uytterhoeven 
---
 arch/m32r/include/asm/ptrace.h |3 ---
 1 files changed, 0 insertions(+), 3 deletions(-)

diff --git a/arch/m32r/include/asm/ptrace.h b/arch/m32r/include/asm/ptrace.h
index 5275275..4313aa6 100644
--- a/arch/m32r/include/asm/ptrace.h
+++ b/arch/m32r/include/asm/ptrace.h
@@ -113,9 +113,6 @@ struct pt_regs {
 
 #define PTRACE_OLDSETOPTIONS   21
 
-/* options set using PTRACE_SETOPTIONS */
-#define PTRACE_O_TRACESYSGOOD  0x0001
-
 #ifdef __KERNEL__
 
 #include   /* M32R_PSW_BSM, M32R_PSW_BPM */
-- 
1.7.0.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Simplifying kernel configuration for distro issues

2012-07-13 Thread Linus Torvalds
On Fri, Jul 13, 2012 at 2:02 PM, Dave Jones  wrote:
>
> As long as you don't mind these being added after the fact, I suppose
> it would be workable.  The reason I say that is sometimes, it even catches 
> *us*
> by surprise.  We recently found out our virtualisation guys started
> using sch_htb for example, and we inadvertantly broke it when we moved
> its module to a 'not always installed' kernel subpackage. (and before that, 
> 9PFS..)
>
> People don't tell us anything, but somehow expect things to keep working.

I think even a "educated guess" config file is better than what we have now.

The *two* requirements (and they're really the same theme) I
personally think we should have for this are

 -  I think every single "select" for these things should come with a
comment about what it is about and why the distro needs it (to show
there was some thought involved and not just a blind "took it from the
distro config")

 - It should be about *minimal* settings. I'd rather have too few
things and the occasional complaint about "oh, it didn't work because
it missed XYZ" than have it grow to contain all the options just
because somebody decided to just add random things until things
worked.

Other than that, even if it only gets you *closer* to a kernel that
works with that distro, I think it doesn't have to be all that
perfect. Because the alternative is what we have now.

   Linus
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Simplifying kernel configuration for distro issues

2012-07-13 Thread Casey Schaufler
On 7/13/2012 1:37 PM, Linus Torvalds wrote:
> So this has long been one of my pet configuration peeves: as a user I
> am perfectly happy answering the questions about what kinds of
> hardware I want the kernel to support (I kind of know that), but many
> of the "support infrastructure" questions are very opaque, and I have
> no idea which of the them any particular distribution actually depends
> on.
>
> And it tends to change over time. For example, F14 (iirc) started
> using TMPFS and TMPFS_POSIX_ACL/XATTR for /dev. And starting in F16,
> the initrd setup requires DEVTMPFS and DEVTMPFS_MOUNT. There's been
> several times when I started with my old minimal config, and the
> resulting kernel would boot, but something wouldn't quite work right,
> and it can be very subtle indeed.
>
> Similarly, the distro ends up having very particular requirements for
> exactly *which* security models it uses and needs, and they tend to
> change over time. And now with systemd, CGROUPS suddenly aren't just
> esoteric things that no normal person would want to use, but are used
> for basic infrastructure. And I remember being surprised by OpenSUSE
> suddenly needing the RAW table support for netfilter, because it had a
> NOTRACK rule or something.
>
> The point I'm slowly getting to is that I would actually love to have
> *distro* Kconfig-files, where the distribution would be able to say
> "These are the minimums I *require* to work".

Oh dear. I would expect Fedora to say that they require SELinux,
thereby making it unusable by anyone doing LSM development. It
would also make it more difficult for the people who don't want
any LSM (e.g. everyone sane) to configure the kernel they want.

This is the example that I see because of my particular biases.
I expect that there are similar things that distros do in other
areas that will have the same effect. The distro developers may
have decided that a feature is too cool to live without and
include it in their configuration even when it's not really
necessary. Plus, do you really think that they're going to
clean things out of their configuration when they decide that
they no longer need them?


>  So we'd have a "Distro"
> submenu, where you could pick the distro(s) you use, and then pick
> which release, and we'd have something like
>
>  - distro/Kconfig:
>
> config DISTRO_REQUIREMENTS
> bool "Pick minimal distribution requirements"
>
> choice DISTRO
> prompt "Distribution"
> depends on DISTRO_REQUIREMENTS
>
> config FEDORA
> config OPENSUSE
> config UBUNTU
> ...
>
> endchoice
>
> and then depending on the DISTRO config, we'd include one of the
> distro-specific ones with lists of supported distro versions and then
> the random config settings for that version:
>
>  - distro/Kconfig.suse:
>
> config OPENSUSE_121
> select OPENSUSE_11
> select IP_NF_RAW  # ..
>
>  - distro/Kconfig.Fedora:
>
> config FEDORA_16
> select FEDORA_15
> select DEVTMPFS   # F16 initrd needs this
> select DEVTMPFS_MOUNT  # .. and expects the kernel to mount
> DEVTMPFS automatically
> ...
>
> config FEDORA_17
> select FEDORA_16
> select CGROUP_xyzzy
> ...
>
> and the point would be that it would make it much easier for a normal
> user (and quite frankly, I want to put myself in that group too) to
> make a kernel config that "just works".
>
> Sure, you can copy the config file that came with the distro, but it
> has tons of stuff that really isn't required. Not just in hardware,
> but all the debug choices etc that are really a user choice. And it's
> really hard to figure out - even for somebody like me - what a minimal
> usable kernel is.
>
> And yes, I know about "make localmodconfig". That's missing the point
> for the same reason the distro config is missing the point.
>
> Comments? It doesn't have to start out perfect, but I think it would
> *really* help make the kernel configuration much easier for people.
>
> In addition to the "minimal distro settings", we might also have a few
> "common platform" settings, so that you could basically do a "hey, I
> have a modern PC laptop, make it pick the obvious stuff that a normal
> person needs, like USB storage, FAT/VFAT support, the core power
> management etc". The silly stuff that you need, and that
> "localyesconfig" actually misses because if you haven't inserted a USB
> thumb drive, you won't necessarily have the FAT module loaded, but we
> all know you do want it in real life. But that's really independent
> issue, so let's keep it to just distro core things at first, ok?
>
> Would something like this make sense to people? I really think that
> "How do I generate a kernel config file" is one of those things that
> keeps normal people from compiling their own kernel. And we *want*
> people to compile their own kernel so that they can help with things
> like bisecting etc. The more, the merrier.
>
>

Re: [PATCH] add blockconsole version 1.1

2012-07-13 Thread Borislav Petkov
On Fri, Jul 13, 2012 at 12:20:09PM -0400, Jörn Engel wrote:
> > > Logging to partitions is not supported.
> > 
> > That could be useful though. We have a setup here where we create a
> > partition on the block device and install the OS there for testing
> > purposes while leaving room on the device after it for other OS installs
> > and other people to test stuff.
> > 
> > If blockconsole could log to partitions, one could create an additional
> > small partition exactly for such logs.
> > 
> > I don't know how much work adding logging to partitions is though.
> 
> The actual logging shouldn't care one bit.  But abusing the
> partitioning code to detect a blockconsole device would no longer
> work, so some alternative for that is needed.
> 
> What I like about abusing the partitioning code is that blockconsole
> just works, without any command line parameters or other setup, either
> on boot or by pluggin in a new device.  And because our particular use
> case is a dedicated usb stick, we don't mind the drawbacks much.

Ok, actually using a dedicated usb stick obviates the need to log to
partitions - el cheapo usb sticks are ubiquitous. And I didn't realize
the usb stick use case when talking about the partitions example above
so forget what I said, logging to a dedicated usb stick is the easiest.

You probably could mention this in the docs as the most natural use case
for blockconsole if you haven't done so.

[ … ]

> Thanks!  The patch below should do that - provided my brain slightly
> less broken than it must have been yesterday.

Thanks, will run it next week and let you know.

-- 
Regards/Gruss,
Boris.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] bnx2: update bnx2-mips-09 firmware to bnx2-mips-09-6.2.1b

2012-07-13 Thread Michael Chan
On Fri, 2012-07-13 at 15:09 +0100, Chris Webb wrote: 
> Is there a more automatic method than going through the source for each
> configured driver and setting CONFIG_EXTRA_FIRMWARE manually to list the
> relevant firmwares? Is there any way to give the kernel the location of
> linux-firmware and have it compile in everything needed for the selected
> drivers, as used to happen with the firmware/ subdirectory?
> CONFIG_EXTRA_FIRMWARE_DIR doesn't seem to do anything with
> CONFIG_EXTRA_FIRMWARE empty, so I don't think it does what I'm hoping?
> 

Most users will just download the linux-firmware tree:

//git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git

and copy all the firmware files to /lib/firmware.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kconfig: Print errors to stderr in the Makefile

2012-07-13 Thread Michal Marek
Dne 7.7.2012 23:42, Michal Marek napsal(a):
> Signed-off-by: Michal Marek 
> ---
>  scripts/kconfig/Makefile |   28 ++--
>  1 files changed, 14 insertions(+), 14 deletions(-)

Pushed to kbuild.git#kconfig.

Michal
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[GIT pull] leap second fixes for 3.5

2012-07-13 Thread Thomas Gleixner
Linus,

please pull the latest timers-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 
timers-urgent-for-linus

It's a rather large series, but well discussed, refined and
reviewed. It got a massive testing by John, Prarit and tip.

In theory we could split it into two parts. The first two patches

  5baefd6: hrtimer: Update hrtimer base offsets each hrtimer_interrupt
  f6c06ab: timekeeping: Provide hrtimer update function

are merily preventing the stuff loops forever issues, which people
have observed.

But there is no point in delaying the other 4 commits which achieve
full correctness into 3.6 as they are tagged for stable anyway. And I
rather prefer to have the full fixes merged in bulk than a "prevent
the observable wreckage and deal with the hidden fallout later"
approach.

Thanks,

tglx

-->
John Stultz (3):
  hrtimer: Provide clock_was_set_delayed()
  timekeeping: Fix leapsecond triggered load spike issue
  hrtimer: Update hrtimer base offsets each hrtimer_interrupt

Thomas Gleixner (3):
  timekeeping: Maintain ktime_t based offsets for hrtimers
  hrtimers: Move lock held region in hrtimer_interrupt()
  timekeeping: Provide hrtimer update function


 include/linux/hrtimer.h   |   10 ++-
 kernel/hrtimer.c  |   53 ++---
 kernel/time/timekeeping.c |   63 +++-
 3 files changed, 107 insertions(+), 19 deletions(-)

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index fd0dc30..cc07d27 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -165,6 +165,7 @@ enum  hrtimer_base_type {
  * @lock:  lock protecting the base and associated clock bases
  * and timers
  * @active_bases:  Bitfield to mark bases with active timers
+ * @clock_was_set: Indicates that clock was set from irq context.
  * @expires_next:  absolute time of the next event which was scheduled
  * via clock_set_next_event()
  * @hres_active:   State of high resolution mode
@@ -177,7 +178,8 @@ enum  hrtimer_base_type {
  */
 struct hrtimer_cpu_base {
raw_spinlock_t  lock;
-   unsigned long   active_bases;
+   unsigned intactive_bases;
+   unsigned intclock_was_set;
 #ifdef CONFIG_HIGH_RES_TIMERS
ktime_t expires_next;
int hres_active;
@@ -286,6 +288,8 @@ extern void hrtimer_peek_ahead_timers(void);
 # define MONOTONIC_RES_NSECHIGH_RES_NSEC
 # define KTIME_MONOTONIC_RES   KTIME_HIGH_RES
 
+extern void clock_was_set_delayed(void);
+
 #else
 
 # define MONOTONIC_RES_NSECLOW_RES_NSEC
@@ -306,6 +310,9 @@ static inline int hrtimer_is_hres_active(struct hrtimer 
*timer)
 {
return 0;
 }
+
+static inline void clock_was_set_delayed(void) { }
+
 #endif
 
 extern void clock_was_set(void);
@@ -320,6 +327,7 @@ extern ktime_t ktime_get(void);
 extern ktime_t ktime_get_real(void);
 extern ktime_t ktime_get_boottime(void);
 extern ktime_t ktime_get_monotonic_offset(void);
+extern ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t 
*offs_boot);
 
 DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
 
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index ae34bf5..6db7a5e 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -657,6 +657,14 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer 
*timer,
return 0;
 }
 
+static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
+{
+   ktime_t *offs_real = >clock_base[HRTIMER_BASE_REALTIME].offset;
+   ktime_t *offs_boot = >clock_base[HRTIMER_BASE_BOOTTIME].offset;
+
+   return ktime_get_update_offsets(offs_real, offs_boot);
+}
+
 /*
  * Retrigger next event is called after clock was set
  *
@@ -665,22 +673,12 @@ static inline int hrtimer_enqueue_reprogram(struct 
hrtimer *timer,
 static void retrigger_next_event(void *arg)
 {
struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
-   struct timespec realtime_offset, xtim, wtm, sleep;
 
if (!hrtimer_hres_active())
return;
 
-   /* Optimized out for !HIGH_RES */
-   get_xtime_and_monotonic_and_sleep_offset(, , );
-   set_normalized_timespec(_offset, -wtm.tv_sec, -wtm.tv_nsec);
-
-   /* Adjust CLOCK_REALTIME offset */
raw_spin_lock(>lock);
-   base->clock_base[HRTIMER_BASE_REALTIME].offset =
-   timespec_to_ktime(realtime_offset);
-   base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
-   timespec_to_ktime(sleep);
-
+   hrtimer_update_base(base);
hrtimer_force_reprogram(base, 0);
raw_spin_unlock(>lock);
 }
@@ -710,13 +708,25 @@ static int hrtimer_switch_to_hres(void)
base->clock_base[i].resolution = KTIME_HIGH_RES;
 

Re: [PATCH] scripts/coccinelle/iterators/use_after_iter.cocci: list iterator variable semantic patch

2012-07-13 Thread Michal Marek
Dne 9.7.2012 22:40, Julia Lawall napsal(a):
> From: Julia Lawall 
> 
> If list_for_each_entry, etc complete a traversal of the list, the iterator
> variable ends up pointing to an address at an offset from the list head,
> and not a meaningful structure.  Thus this value should not be used after
> the end of the iterator.

Applied to kbuild.git#misc, thanks.

Michal
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] ns: move free_nsproxy() out of do_exit() path

2012-07-13 Thread Andrew Morton
On Fri, 13 Jul 2012 14:48:08 +0300
"Kirill A. Shutemov"  wrote:

> From: "Kirill A. Shutemov" 
> 
> free_nsproxy() is too heavy to be on exit path. Let's free namespaces
> asynchronously to not block exit_group() syscall.

Please be specific, and complete.

Why is it "too heavy"?  Where is the time being spent?  Is it spent in
D state or is it spent burning CPU cycles?  Does the patch simply
offload the work into kernel threads, providing no net gain?

> The patch also fixes bug with free namespace without synchronize_rcu() through
> put_nsproxy().

I just don't understand this description.  Please send a new one which
includes all details about the bug, including a description of
the user-visible effects of the bug.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Simplifying kernel configuration for distro issues

2012-07-13 Thread Khalid Aziz

On 07/13/2012 02:37 PM, Linus Torvalds wrote:



Would something like this make sense to people? I really think that
"How do I generate a kernel config file" is one of those things that
keeps normal people from compiling their own kernel. And we *want*
people to compile their own kernel so that they can help with things
like bisecting etc. The more, the merrier.


This is a great idea. 7-8 years ago I used to be able to create a
minimally configured kernel from upstream and run my Debian/Ubuntu/...
install with it. It got much harder in a hurry and now it takes too much
work to figure out how to configure upstream kernel to make it work with
distro. It is a 3-5 hour compile to start with distro config file and that
is just too painful. I will help with testing configs or helping sort
through the config options.

--
Khalid Aziz
khalid.a...@hp.com

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Simplifying kernel configuration for distro issues

2012-07-13 Thread Dave Jones
On Fri, Jul 13, 2012 at 01:37:41PM -0700, Linus Torvalds wrote:

 > The point I'm slowly getting to is that I would actually love to have
 > *distro* Kconfig-files, where the distribution would be able to say
 > "These are the minimums I *require* to work".

As long as you don't mind these being added after the fact, I suppose
it would be workable.  The reason I say that is sometimes, it even catches *us*
by surprise.  We recently found out our virtualisation guys started
using sch_htb for example, and we inadvertantly broke it when we moved
its module to a 'not always installed' kernel subpackage. (and before that, 
9PFS..)

People don't tell us anything, but somehow expect things to keep working.

 > In addition to the "minimal distro settings", we might also have a few
 > "common platform" settings, so that you could basically do a "hey, I
 > have a modern PC laptop, make it pick the obvious stuff that a normal
 > person needs, like USB storage, FAT/VFAT support, the core power
 > management etc".

I wish defconfig was actually something useful like this, instead of..
what the hell is it exactly ? No-one even seems to agree, other than
"random selection of options, many of which were removed n years ago"

Dave

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 3.5-rc6 futex_wait_requeue_pi oops.

2012-07-13 Thread Dave Jones
On Fri, Jul 13, 2012 at 01:27:41PM -0700, Darren Hart wrote:
 > I'm returning from a family vacation just now, I'll have a closer look on
 > Monday. It seems to me we recently had some futex lockdep annotations go
 > in, any chance those are somehow involved?
 > 
 > So we have a real user of the futex requeue pi code? Is this via pthread
 > condvars? Is this test available for me to run?

I wouldn't call it a "real user" per se.
details (including git checkout) at http://codemonkey.org.uk/projects/trinity/

Dave
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Simplifying kernel configuration for distro issues

2012-07-13 Thread Myklebust, Trond
On Fri, 2012-07-13 at 13:37 -0700, Linus Torvalds wrote:
> So this has long been one of my pet configuration peeves: as a user I
> am perfectly happy answering the questions about what kinds of
> hardware I want the kernel to support (I kind of know that), but many
> of the "support infrastructure" questions are very opaque, and I have
> no idea which of the them any particular distribution actually depends
> on.
> 
> And it tends to change over time. For example, F14 (iirc) started
> using TMPFS and TMPFS_POSIX_ACL/XATTR for /dev. And starting in F16,
> the initrd setup requires DEVTMPFS and DEVTMPFS_MOUNT. There's been
> several times when I started with my old minimal config, and the
> resulting kernel would boot, but something wouldn't quite work right,
> and it can be very subtle indeed.
> 
> Similarly, the distro ends up having very particular requirements for
> exactly *which* security models it uses and needs, and they tend to
> change over time. And now with systemd, CGROUPS suddenly aren't just
> esoteric things that no normal person would want to use, but are used
> for basic infrastructure. And I remember being surprised by OpenSUSE
> suddenly needing the RAW table support for netfilter, because it had a
> NOTRACK rule or something.
> 
> The point I'm slowly getting to is that I would actually love to have
> *distro* Kconfig-files, where the distribution would be able to say
> "These are the minimums I *require* to work". So we'd have a "Distro"
> submenu, where you could pick the distro(s) you use, and then pick
> which release, and we'd have something like
> 
>  - distro/Kconfig:
> 
> config DISTRO_REQUIREMENTS
> bool "Pick minimal distribution requirements"
> 
> choice DISTRO
> prompt "Distribution"
> depends on DISTRO_REQUIREMENTS
> 
> config FEDORA
> config OPENSUSE
> config UBUNTU
> ...
> 
> endchoice
> 
> and then depending on the DISTRO config, we'd include one of the
> distro-specific ones with lists of supported distro versions and then
> the random config settings for that version:
> 
>  - distro/Kconfig.suse:
> 
> config OPENSUSE_121
> select OPENSUSE_11
> select IP_NF_RAW  # ..
> 
>  - distro/Kconfig.Fedora:
> 
> config FEDORA_16
> select FEDORA_15
> select DEVTMPFS   # F16 initrd needs this
> select DEVTMPFS_MOUNT  # .. and expects the kernel to mount
> DEVTMPFS automatically
> ...
> 
> config FEDORA_17
> select FEDORA_16
> select CGROUP_xyzzy
> ...
> 
> and the point would be that it would make it much easier for a normal
> user (and quite frankly, I want to put myself in that group too) to
> make a kernel config that "just works".
> 
> Sure, you can copy the config file that came with the distro, but it
> has tons of stuff that really isn't required. Not just in hardware,
> but all the debug choices etc that are really a user choice. And it's
> really hard to figure out - even for somebody like me - what a minimal
> usable kernel is.
> 
> And yes, I know about "make localmodconfig". That's missing the point
> for the same reason the distro config is missing the point.
> 
> Comments? It doesn't have to start out perfect, but I think it would
> *really* help make the kernel configuration much easier for people.
> 
> In addition to the "minimal distro settings", we might also have a few
> "common platform" settings, so that you could basically do a "hey, I
> have a modern PC laptop, make it pick the obvious stuff that a normal
> person needs, like USB storage, FAT/VFAT support, the core power
> management etc". The silly stuff that you need, and that
> "localyesconfig" actually misses because if you haven't inserted a USB
> thumb drive, you won't necessarily have the FAT module loaded, but we
> all know you do want it in real life. But that's really independent
> issue, so let's keep it to just distro core things at first, ok?
> 
> Would something like this make sense to people? I really think that
> "How do I generate a kernel config file" is one of those things that
> keeps normal people from compiling their own kernel. And we *want*
> people to compile their own kernel so that they can help with things
> like bisecting etc. The more, the merrier.
> 
> Linus

We could at least make selection of a minimal set of drivers for the
more common virtualised platforms a lot easier.
Right now, you need to hunt through 30+ different menus in order to find
what you need to run in a basic KVM virtual machine...

Cheers
  Trond
-- 
Trond Myklebust
Linux NFS client maintainer

NetApp
trond.mykleb...@netapp.com
www.netapp.com

N�r��yb�X��ǧv�^�)޺{.n�+{zX����ܨ}���Ơz�:+v���zZ+��+zf���h���~i���z��w���?�&�)ߢf��^jǫy�m��@A�a���
0��h���i

Re: [PATCH v2] kconfig: allow long lines in config file

2012-07-13 Thread Michal Marek
On Fri, Jul 13, 2012 at 11:27:12AM -0700, c...@linux.vnet.ibm.com wrote:
> From: Cody Schafer 
> 
> For some config options (CONFIG_EXTRA_FIRMWARE, for example), the length
> of a config file line can exceed the 1024 byte buffer.
> 
> Switch from fgets to compat_getline to fix. compat_getline is an
> internally implimented getline work-alike for portability purposes.
> 
> Signed-off-by: Cody Schafer 

Applied to kbuild.git#kconfig, thanks.

Michal
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


  1   2   3   4   5   6   7   8   9   >