Re: [PATCH 04/11] posix timers:Introduce the 64bit methods with timespec64 type for k_clock structure

2015-04-20 Thread Thomas Gleixner
On Mon, 20 Apr 2015, Baolin Wang wrote:
 @@ -771,6 +771,7 @@ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
   struct itimerspec __user *, setting)
  {
   struct itimerspec cur_setting;
 + struct itimerspec64 cur_setting64;
   struct k_itimer *timr;
   struct k_clock *kc;
   unsigned long flags;
 @@ -781,10 +782,16 @@ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
   return -EINVAL;
  
   kc = clockid_to_kclock(timr-it_clock);
 - if (WARN_ON_ONCE(!kc || !kc-timer_get))
 + if (WARN_ON_ONCE(!kc || (!kc-timer_get  !kc-timer_get64))) {
   ret = -EINVAL;
 - else
 - kc-timer_get(timr, cur_setting);
 + } else {
 + if (kc-timer_get64) {
 + kc-timer_get64(timr, cur_setting64);
 + cur_setting = itimerspec64_to_itimerspec(cur_setting64);
 + } else {
 + kc-timer_get(timr, cur_setting);
 + }
 + }

This is really horrible. You add a metric ton of conditionals to every
syscall just to remove them later again. I have not yet checked the
end result, but this approach is error prone as hell and just
introduces completely useless code churn.

It's useless because you do not factor out the guts of the syscall
functions so we can reuse the very same logic for the future 2038 safe
syscalls which we need to introduce for 32bit machines.

Take a look at the compat syscalls. They do the right thing.

COMPAT_SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
   struct compat_itimerspec __user *, setting)
{
long err;
mm_segment_t oldfs;
struct itimerspec ts;

oldfs = get_fs();
set_fs(KERNEL_DS);
err = sys_timer_gettime(timer_id,
(struct itimerspec __user *) ts);
set_fs(oldfs);
if (!err  put_compat_itimerspec(setting, ts))
return -EFAULT;
return err;
}

So we can be clever and do the following:

1) Preparatory work in posix-timer.c (Patch #1)

- Split out the guts of the syscall and change the syscall
  implementation

static int __timer_gettime(timer_t timer_id, struct itimerspec *cur_setting)
{
struct k_itimer *timr;
struct k_clock *kc;
unsigned long flags;
int ret = 0;

timr = lock_timer(timer_id, flags);
if (!timr)
return -EINVAL;

kc = clockid_to_kclock(timr-it_clock);
if (WARN_ON_ONCE(!kc || !kc-timer_get))
ret = -EINVAL;
else
kc-timer_get(timr, cur_setting);

unlock_timer(timr, flags);
return ret;
}

/* Get the time remaining on a POSIX.1b interval timer. */
SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
struct itimerspec __user *, setting)
{
struct itimerspec cur_setting;
int ret = __timer_gettime(timer_id, cur_setting);

if (!ret  copy_to_user(setting, cur_setting, sizeof (cur_setting)))
return -EFAULT;

return ret;
}


2) Do the 64bit infrastructure work in posix-timer.c (Patch #2)

- Introduce k_clock-timer_get64() and provide a stub function

static int default_timer_get64(struct k_clock *kc, struct k_itimer *timr,
   struct itimerspec64 *cur_setting64)
{
struct itimerspec cur_setting;

kc-timer_get(timer, cur_setting);
return 0;
}

- Add the following to posix_timers_register_clock()

   if (kc-timer_get  !kc-timer_get64)
kc-timer_get64 = default_timer_get64;


- Convert __timer_gettime to 64bit

-static int __timer_gettime(timer_t timer_id, struct itimerspec64 *cur_setting)
+static int __timer_gettime(timer_t timer_id, struct itimerspec *cur_setting)
{
...
kc = clockid_to_kclock(timr-it_clock);
+   if (WARN_ON_ONCE(!kc || !kc-timer))
-   if (WARN_ON_ONCE(!kc || !kc-timer_get64))
ret = -EINVAL;
else
-   kc-timer_get(timr, cur_setting);
+   kc-timer_get64(timr, cur_setting);

unlock_timer(timr, flags);
return ret;
}

- Change the syscall implementation in the following way:

/* Get the time remaining on a POSIX.1b interval timer. */
SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
struct itimerspec __user *, setting)
{
#ifdef CONFIG_64BIT
struct itimerspec64 cur_setting;
int ret = __timer_gettime(timer_id, cur_setting);
#else
struct itimerspec64 cur_setting64;
struct itimerspec cur_setting;
int ret = __timer_gettime(timer_id, cur_setting64);

if (!ret)
cur_setting = itimerspec64_to_itimerspec(cur_setting64);
#endif
if (!ret  copy_to_user(setting, cur_setting, sizeof (cur_setting)))
return -EFAULT;
return ret;
}

The result is two simple to review patches with minimal code churn.

The nice thing is that once we introduce new syscalls 

[PATCH] net: dsa: mv88e6xxx: fix setup of port control 1

2015-04-20 Thread Vivien Didelot
mv88e6xxx_setup_port_common was writing to PORT_DEFAULT_VLAN (port
offset 0x07) instead of PORT_CONTROL_1 (port offset 0x05).

Signed-off-by: Vivien Didelot vivien.dide...@savoirfairelinux.com
---
 drivers/net/dsa/mv88e6xxx.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 9f0c2b9..48712bd 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -1251,8 +1251,7 @@ int mv88e6xxx_setup_port_common(struct dsa_switch *ds, 
int port)
/* Port Control 1: disable trunking, disable sending
 * learning messages to this port.
 */
-   ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_DEFAULT_VLAN,
-  0x);
+   ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_CONTROL_1, 0x);
if (ret)
goto abort;
 
-- 
2.3.5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 02/11] IB/addr: Pass network namespace as a parameter

2015-04-20 Thread Doug Ledford
On Mon, 2015-04-20 at 12:03 +0300, Haggai Eran wrote:
 From: Guy Shapiro gu...@mellanox.com
 
 Add network namespace support to the ib_addr module. For that, all the address
 resolution and matching should be done using the appropriate namespace instead
 of init_net.
 
 This is achieved by:
 
 1. Adding an explicit network namespace argument to exported function that
require a namespace.
 2. Saving the namespace in the rdma_addr_client structure.
 3. Using it when calling networking functions.
 
 In order to preserve the behavior of calling modules, init_net is
 passed as the parameter in calls from other modules. This is modified as
 namespace support is added on more levels.
 
 Signed-off-by: Haggai Eran hagg...@mellanox.com
 Signed-off-by: Yotam Kenneth yota...@mellanox.com
 Signed-off-by: Shachar Raindel rain...@mellanox.com
 Signed-off-by: Guy Shapiro gu...@mellanox.com
 ---
  drivers/infiniband/core/addr.c   | 31 --
  drivers/infiniband/core/cma.c|  4 ++-
  drivers/infiniband/core/verbs.c  | 14 +++---
  drivers/infiniband/hw/ocrdma/ocrdma_ah.c |  3 ++-
  include/rdma/ib_addr.h   | 44 
 
  5 files changed, 72 insertions(+), 24 deletions(-)
 
 diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
 index f80da50d84a5..95beaef6b66d 100644
 --- a/drivers/infiniband/core/addr.c
 +++ b/drivers/infiniband/core/addr.c
 @@ -128,7 +128,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct 
 rdma_dev_addr *dev_addr,
   int ret = -EADDRNOTAVAIL;
  
   if (dev_addr-bound_dev_if) {
 - dev = dev_get_by_index(init_net, dev_addr-bound_dev_if);
 + dev = dev_get_by_index(dev_addr-net, dev_addr-bound_dev_if);
   if (!dev)
   return -ENODEV;
   ret = rdma_copy_addr(dev_addr, dev, NULL);
 @@ -137,9 +137,10 @@ int rdma_translate_ip(struct sockaddr *addr, struct 
 rdma_dev_addr *dev_addr,
   }
  
   switch (addr-sa_family) {
 - case AF_INET:
 - dev = ip_dev_find(init_net,
 - ((struct sockaddr_in *) addr)-sin_addr.s_addr);
 + case AF_INET: {
   ^ Please don't add brackets just so you can
convert a cast into a variable declaration that's unnecessary

 + struct sockaddr_in *addr_in = (struct sockaddr_in *)addr;
 +
 + dev = ip_dev_find(dev_addr-net, addr_in-sin_addr.s_addr);
  
   if (!dev)
   return ret;
 @@ -149,12 +150,12 @@ int rdma_translate_ip(struct sockaddr *addr, struct 
 rdma_dev_addr *dev_addr,
   *vlan_id = rdma_vlan_dev_vlan_id(dev);
   dev_put(dev);
   break;
 -
 + }
  #if IS_ENABLED(CONFIG_IPV6)
   case AF_INET6:
   rcu_read_lock();
 - for_each_netdev_rcu(init_net, dev) {
 - if (ipv6_chk_addr(init_net,
 + for_each_netdev_rcu(dev_addr-net, dev) {
 + if (ipv6_chk_addr(dev_addr-net,
 ((struct sockaddr_in6 *) 
 addr)-sin6_addr,
 dev, 1)) {
   ret = rdma_copy_addr(dev_addr, dev, NULL);
 @@ -236,7 +237,7 @@ static int addr4_resolve(struct sockaddr_in *src_in,
   fl4.daddr = dst_ip;
   fl4.saddr = src_ip;
   fl4.flowi4_oif = addr-bound_dev_if;
 - rt = ip_route_output_key(init_net, fl4);
 + rt = ip_route_output_key(addr-net, fl4);
   if (IS_ERR(rt)) {
   ret = PTR_ERR(rt);
   goto out;
 @@ -278,12 +279,13 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
   fl6.saddr = src_in-sin6_addr;
   fl6.flowi6_oif = addr-bound_dev_if;
  
 - dst = ip6_route_output(init_net, NULL, fl6);
 + dst = ip6_route_output(addr-net, NULL, fl6);
   if ((ret = dst-error))
   goto put;
  
   if (ipv6_addr_any(fl6.saddr)) {
 - ret = ipv6_dev_get_saddr(init_net, ip6_dst_idev(dst)-dev,
 + ret = ipv6_dev_get_saddr(addr-net,
 +  ip6_dst_idev(dst)-dev,
fl6.daddr, 0, fl6.saddr);
   if (ret)
   goto put;
 @@ -458,7 +460,7 @@ static void resolve_cb(int status, struct sockaddr 
 *src_addr,
  }
  
  int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 
 *dmac,
 -u16 *vlan_id)
 +u16 *vlan_id, struct net *net)

In the core networking code, the net namespace is always first.  Please
stick with that paradigm.


-- 
Doug Ledford dledf...@redhat.com
  GPG KeyID: 0E572FDD




signature.asc
Description: This is a digitally signed message part


Re: [PATCH 05/11] time/posix-timers:Convert to the 64bit methods for k_clock callback functions

2015-04-20 Thread Thomas Gleixner
On Mon, 20 Apr 2015, Baolin Wang wrote:
  /* Set clock_realtime */
  static int posix_clock_realtime_set(const clockid_t which_clock,
 - const struct timespec *tp)
 + const struct timespec64 *tp)
  {
 - return do_sys_settimeofday(tp, NULL);
 + struct timespec ts = timespec64_to_timespec(*tp);
 +
 + return do_sys_settimeofday(ts, NULL);

Sigh. No. We first provide a proper function for this, which takes a
timespec64, i.e. do_sys_settimeofday64() instead of having this
wrapper mess all over the place.

   /* SIGEV_NONE timers are not queued ! See common_timer_get */
   if (((timr-it_sigev_notify  ~SIGEV_THREAD_ID) == SIGEV_NONE)) {
 diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h
 index 1d91416..144af14 100644
 --- a/kernel/time/timekeeping.h
 +++ b/kernel/time/timekeeping.h
 @@ -15,7 +15,7 @@ extern u64 timekeeping_max_deferment(void);
  extern int timekeeping_inject_offset(struct timespec *ts);
  extern s32 timekeeping_get_tai_offset(void);
  extern void timekeeping_set_tai_offset(s32 tai_offset);
 -extern void timekeeping_clocktai(struct timespec *ts);
 +extern void timekeeping_clocktai(struct timespec64 *ts);

# git grep timekeeping_clocktai() is your friend.

Thanks,

tglx
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/1] altera tse: Error-Bit on tx-avalon-stream always set.

2015-04-20 Thread Andreas Oetken
From: Andreas Oetken ennoerlan...@gmail.com

The Error-Bit on the avalon streaming interface of the
tx-dma-channel was always set. In SGMII configurations
this leads to error-symbols on the PCS and packet-rejection
on the receiver side (e.g. SGMII/1000Base-X connected switch).

This only applies to the tse-configuration with MSGDMA.

This issue was detected and fixed on a custom board with
a direct connection to a Marvell switch in SGMII-PHY-Mode.
(incl. custom patches for SGMII-PCS).

According to the datasheet if ff_tx_err (avalon-streaming)
is set it is forwarded to gm_tx_err. As a result the PCS
is forwarding the error by sending a /V/-caracter.

Signed-off-by: Andreas Oetken ennoerlan...@gmail.com
---
 drivers/net/ethernet/altera/altera_msgdmahw.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/altera/altera_msgdmahw.h 
b/drivers/net/ethernet/altera/altera_msgdmahw.h
index e335626..eba070f 100644
--- a/drivers/net/ethernet/altera/altera_msgdmahw.h
+++ b/drivers/net/ethernet/altera/altera_msgdmahw.h
@@ -72,7 +72,6 @@ struct msgdma_extended_desc {
 #define MSGDMA_DESC_CTL_TX_SINGLE  (MSGDMA_DESC_CTL_GEN_SOP |  \
 MSGDMA_DESC_CTL_GEN_EOP |  \
 MSGDMA_DESC_CTL_TR_COMP_IRQ |  \
-MSGDMA_DESC_CTL_TR_ERR_IRQ |   \
 MSGDMA_DESC_CTL_GO)
 
 #define MSGDMA_DESC_CTL_RX_SINGLE  (MSGDMA_DESC_CTL_END_ON_EOP |   \
-- 
2.1.4

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 11/11] IB/ucm: Add partial support for network namespaces

2015-04-20 Thread ira.weiny
On Mon, Apr 20, 2015 at 12:03:42PM +0300, Haggai Eran wrote:
 From: Shachar Raindel rain...@mellanox.com
 
 It is impossible to completely support network namespaces for UCM, as
 we cannot identify the target IPoIB device.


As Jasons said it seems like the use of namespaces should be limited to the
RDMA CM layer.  If so I _think_ this patch would not be needed?

Ira



 However, we add support
 which will work if the user is following the IB-Spec Annex 11 (RDMA IP
 CM Services) with the service ID and private data formatting.
 
 Signed-off-by: Haggai Eran hagg...@mellanox.com
 Signed-off-by: Yotam Kenneth yota...@mellanox.com
 Signed-off-by: Shachar Raindel rain...@mellanox.com
 Signed-off-by: Guy Shapiro gu...@mellanox.com
 ---
  drivers/infiniband/core/ucm.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)
 
 diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
 index 9604ab068984..424421091dae 100644
 --- a/drivers/infiniband/core/ucm.c
 +++ b/drivers/infiniband/core/ucm.c
 @@ -45,6 +45,7 @@
  #include linux/idr.h
  #include linux/mutex.h
  #include linux/slab.h
 +#include linux/nsproxy.h
  
  #include asm/uaccess.h
  
 @@ -490,7 +491,7 @@ static ssize_t ib_ucm_create_id(struct ib_ucm_file *file,
   ctx-uid = cmd.uid;
   ctx-cm_id = ib_create_cm_id(file-device-ib_dev,
ib_ucm_event_handler, ctx,
 -  init_net);
 +  current-nsproxy-net_ns);
   if (IS_ERR(ctx-cm_id)) {
   result = PTR_ERR(ctx-cm_id);
   goto err1;
 -- 
 1.7.11.2
 
 --
 To unsubscribe from this list: send the line unsubscribe linux-rdma in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] net: dsa: mv88e6xxx: fix setup of port control 1

2015-04-20 Thread Andrew Lunn
On Mon, Apr 20, 2015 at 05:19:23PM -0400, Vivien Didelot wrote:
 mv88e6xxx_setup_port_common was writing to PORT_DEFAULT_VLAN (port
 offset 0x07) instead of PORT_CONTROL_1 (port offset 0x05).

Hi Vivien

Good catch.
 
 Signed-off-by: Vivien Didelot vivien.dide...@savoirfairelinux.com

Fixes: cca8b1337541 (net: dsa: Use mnemonics rather than register numbers)
Acked-by: Andrew Lunn and...@lunn.ch

Thanks
Andrew

 ---
  drivers/net/dsa/mv88e6xxx.c | 3 +--
  1 file changed, 1 insertion(+), 2 deletions(-)
 
 diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
 index 9f0c2b9..48712bd 100644
 --- a/drivers/net/dsa/mv88e6xxx.c
 +++ b/drivers/net/dsa/mv88e6xxx.c
 @@ -1251,8 +1251,7 @@ int mv88e6xxx_setup_port_common(struct dsa_switch *ds, 
 int port)
   /* Port Control 1: disable trunking, disable sending
* learning messages to this port.
*/
 - ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_DEFAULT_VLAN,
 -0x);
 + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_CONTROL_1, 0x);
   if (ret)
   goto abort;
  
 -- 
 2.3.5
 
 --
 To unsubscribe from this list: send the line unsubscribe netdev in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 05/11] IB/ipoib: Return IPoIB devices as possible matches to get_net_device_by_port_pkey_ip

2015-04-20 Thread ira.weiny
On Mon, Apr 20, 2015 at 12:03:36PM +0300, Haggai Eran wrote:
 From: Guy Shapiro gu...@mellanox.com
 
 Implement callback that returns network device to ib_core according to
 connection parameters. Check the ipoib device and iterate over all child
 devices to look for a match.
 
 For each ipoib device we iterate through all upper devices when searching for
 a matching IP, in order to support bonding.
 
 Signed-off-by: Guy Shapiro gu...@mellanox.com
 Signed-off-by: Haggai Eran hagg...@mellanox.com
 Signed-off-by: Yotam Kenneth yota...@mellanox.com
 Signed-off-by: Shachar Raindel rain...@mellanox.com
 ---
  drivers/infiniband/ulp/ipoib/ipoib_main.c | 122 
 +-
  1 file changed, 121 insertions(+), 1 deletion(-)
 
 diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c 
 b/drivers/infiniband/ulp/ipoib/ipoib_main.c
 index 7cad4dd87469..89a59a0e17e6 100644
 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
 +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
 @@ -48,6 +48,9 @@
  
  #include linux/jhash.h
  #include net/arp.h
 +#include net/addrconf.h
 +#include linux/inetdevice.h
 +#include rdma/ib_cache.h
  
  #define DRV_VERSION 1.0.0
  
 @@ -91,11 +94,15 @@ struct ib_sa_client ipoib_sa_client;
  static void ipoib_add_one(struct ib_device *device);
  static void ipoib_remove_one(struct ib_device *device);
  static void ipoib_neigh_reclaim(struct rcu_head *rp);
 +static struct net_device *ipoib_get_net_device_by_port_pkey_ip(
 + struct ib_device *dev, u8 port, u16 pkey,
 + struct sockaddr *addr);
  
  static struct ib_client ipoib_client = {
   .name   = ipoib,
   .add= ipoib_add_one,
 - .remove = ipoib_remove_one
 + .remove = ipoib_remove_one,
 + .get_net_device_by_port_pkey_ip = ipoib_get_net_device_by_port_pkey_ip,
  };
  
  int ipoib_open(struct net_device *dev)
 @@ -222,6 +229,119 @@ static int ipoib_change_mtu(struct net_device *dev, int 
 new_mtu)
   return 0;
  }
  
 +static bool ipoib_is_dev_match_addr(struct sockaddr *addr,
 + struct net_device *dev)
 +{
 + struct net *net = dev_net(dev);
 +
 + if (addr-sa_family == AF_INET) {
 + struct in_device *in_dev = in_dev_get(dev);
 + struct sockaddr_in *addr_in = (struct sockaddr_in *)addr;
 + __be32 ret_addr;
 +
 + if (!in_dev)
 + return false;
 +
 + ret_addr = inet_confirm_addr(net, in_dev, 0,
 +  addr_in-sin_addr.s_addr,
 +  RT_SCOPE_HOST);
 + in_dev_put(in_dev);
 + if (ret_addr)
 + return true;
 + }
 +#if IS_ENABLED(CONFIG_IPV6)
 + else if (addr-sa_family == AF_INET6) {
 + struct sockaddr_in6 *addr_in6 = (struct sockaddr_in6 *)addr;
 +
 + if (ipv6_chk_addr(net, addr_in6-sin6_addr, dev, 1))
 + return true;
 + }
 +#endif
 + return false;
 +}
 +
 +/**
 + * Find a net_device matching the given address, which is an upper device of
 + * the given net_device.
 + * @addr: IP address to look for.
 + * @dev: base IPoIB net_device
 + *
 + * If found, returns the net_device with a reference held. Otherwise return
 + * NULL.
 + */
 +static struct net_device *ipoib_get_net_dev_match_addr(struct sockaddr *addr,
 +struct net_device *dev)
 +{
 + struct net_device *upper,
 +   *result = NULL;
 + struct list_head *iter;
 +
 + if (ipoib_is_dev_match_addr(addr, dev)) {
 + dev_hold(dev);
 + return dev;
 + }
 +
 + rcu_read_lock();
 + netdev_for_each_all_upper_dev_rcu(dev, upper, iter) {
 + if (ipoib_is_dev_match_addr(addr, upper)) {
 + dev_hold(upper);
 + result = upper;
 + break;
 + }
 + }
 + rcu_read_unlock();
 + return result;
 +}
 +
 +static struct net_device *ipoib_get_net_device_by_port_pkey_ip(
 + struct ib_device *dev, u8 port, u16 pkey, struct sockaddr *addr)
 +{
 + struct ipoib_dev_priv *priv;
 + struct list_head *dev_list;
 + u16 pkey_index;
 +
 + ib_find_cached_pkey(dev, port, pkey, pkey_index);
 + if (pkey_index == (u16)-1)
 + return NULL;

Why not check the return value of ib_find_cached_pkey?

 +
 + if (rdma_node_get_transport(dev-node_type) != RDMA_TRANSPORT_IB)
 + return NULL;

The use of Link Layer and Transport in this series will need to be reevaluated
based on Michaels work:

https://www.mail-archive.com/linux-rdma@vger.kernel.org/msg24140.html

Ira

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC,1/8] soc/fman: Add FMan MURAM support

2015-04-20 Thread Scott Wood
On Mon, 2015-04-20 at 03:58 -0500, Liberman Igal-B31950 wrote:
 
 Regards,
 Igal Liberman.
 
  -Original Message-
  From: Kumar Gala [mailto:ga...@kernel.crashing.org]
  Sent: Thursday, March 12, 2015 5:57 PM
  To: Liberman Igal-B31950
  Cc: linuxppc-...@lists.ozlabs.org; netdev@vger.kernel.org; linux-
  ker...@vger.kernel.org; Wood Scott-B07421
  Subject: Re: [RFC,1/8] soc/fman: Add FMan MURAM support
  
  
  On Mar 11, 2015, at 12:07 AM, Igal.Liberman igal.liber...@freescale.com
  wrote:
  
   From: Igal Liberman igal.liber...@freescale.com
  
   Add Frame Manager Multi-User RAM support.
  
   Signed-off-by: Igal Liberman igal.liber...@freescale.com
   ---
   drivers/soc/fsl/fman/Kconfig|1 +
   drivers/soc/fsl/fman/Makefile   |5 +-
   drivers/soc/fsl/fman/fm_muram.c |  174
  +++
   drivers/soc/fsl/fman/inc/fm_muram_ext.h |   98 +
   4 files changed, 276 insertions(+), 2 deletions(-) create mode 100644
   drivers/soc/fsl/fman/fm_muram.c create mode 100644
   drivers/soc/fsl/fman/inc/fm_muram_ext.h
  
  
  use lib/genalloc instead of rheap
  
 
 Hi Kumar,
 I looked into lib/genalloc allocator.
 As far as I see, the genalloc allocator doesn't allow to control the memory 
 alignment when you allocate a chunk of memory.
 Two important notes regarding MURAM memory:
 - The allocated memory chunks should have specific alignment (might be 
 different in each chunk).
 - The allocations must be efficient, we don't want to waste MURAM due to 
 alignment issues.

If the requirement is that allocations must be size-aligned, use
gen_pool_first_fit_order_align.  Otherwise, improve genalloc to do what
you need.

-Scott

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] net: dsa: mv88e6xxx: use PORT_DEFAULT_VLAN

2015-04-20 Thread Vivien Didelot
Minor, use the explicit PORT_DEFAULT_VLAN define instead of 0x07.

Signed-off-by: Vivien Didelot vivien.dide...@savoirfairelinux.com
---
 drivers/net/dsa/mv88e6xxx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 48712bd..af639ab 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -1274,7 +1274,8 @@ int mv88e6xxx_setup_port_common(struct dsa_switch *ds, 
int port)
/* Default VLAN ID and priority: don't set a default VLAN
 * ID, and set the default packet priority to zero.
 */
-   ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), 0x07, 0x);
+   ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_DEFAULT_VLAN,
+  0x);
 abort:
mutex_unlock(ps-smi_mutex);
return ret;
-- 
2.3.5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 09/11] cputime:Introduce the cputime_to_timespec64/timespec64_to_cputime function

2015-04-20 Thread Thomas Gleixner
On Mon, 20 Apr 2015, Baolin Wang wrote:

 This patch introduces some functions for converting cputime to timespec64 and 
 back,
 that repalce the timespec type with timespec64 type, as well as for arch/s390 
 and
 arch/powerpc architecture.

No. We want a patch which adds the functions and then a patch which
uses them. This stuff is tricky and hard to review. So please split
the patches into smaller chunks.

 +unsigned long
 +timespec64_to_jiffies(const struct timespec64 *value)
 +{
 + return __timespec_to_jiffies(value-tv_sec, value-tv_nsec);
 +}
 +EXPORT_SYMBOL(timespec64_to_jiffies);

So we have now two exports which are doing exactly the same
thing. Copy and paste is wonderful, right?

What about exporting __timespec_to_jiffies() and providing inlines for
timespec_to_jiffies() and timespec64_to_jiffies() ?

EXPORT_SYMBOL is not just a stupid annotation. Its impact on the
resulting kernel size is larger than the actual function
implementation.

 +void
 +jiffies_to_timespec64(const unsigned long jiffies, struct timespec64 *value)
 +{
 + /*
 +  * Convert jiffies to nanoseconds and separate with
 +  * one divide.
 +  */
 + u32 rem;
 + value-tv_sec = div_u64_rem((u64)jiffies * TICK_NSEC,
 +NSEC_PER_SEC, rem);
 + value-tv_nsec = rem;
 +}
 +EXPORT_SYMBOL(jiffies_to_timespec64);

Sigh.

Thanks,

tglx


--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 0/2] net: Fix hw csum failure message flood for ppp tunnel

2015-04-20 Thread Tom Herbert
This patch set addresses bug Bug 95171 - hw csum failure message
flood for ppp tunnel since upgrade to 3.16. The problem is that pppoe
is being used over UDP with UDP checksusm enabled. On receive
checksum conversion turns checksum-unnecessary in checksum-
complete. The PPP receive functions do no properly pull
the checksum over its headers, so that when an encapsulated
checksums is considered the checksum-complete value is incorrect.

This patch adds skb_checksum_complete_unset which can be called
in the receive path in lieu of pulling checksum complete in
layer. This is useful when the packet is being modified (e.g.
decompressed) and the checksum-complete value is no longer
relevant.

In the ppp_receive_frame we call skb_checksum_complete_unset to toss
out checksum-complete. This should eliminate the reported messages.
Alternatively, we could add skb_postpull_rcsum and probably
special case handling for VJ compression if maintaining the
checksum-complete is needed (not clear to me this is worth the
effort).

I haven't tested this since setting up the failure scenario doesn't
seem trivial to configure.
Tom Herbert (2):
  net: add skb_checksum_complete_unset
  ppp: call skb_checksum_complete_unset in ppp_receive_frame

 drivers/net/ppp/ppp_generic.c |  1 +
 include/linux/skbuff.h| 12 
 2 files changed, 13 insertions(+)

-- 
1.8.1

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] net: dsa: mv88e6xxx: use PORT_DEFAULT_VLAN

2015-04-20 Thread Andrew Lunn
On Mon, Apr 20, 2015 at 05:43:26PM -0400, Vivien Didelot wrote:
 Minor, use the explicit PORT_DEFAULT_VLAN define instead of 0x07.

Hi Vivien

I would not normally use the word Minor here, since it will end up
in the commit log.

Other than that:

Acked-by: Andrew Lunn and...@lunn.ch

Thanks
Andrew

 
 Signed-off-by: Vivien Didelot vivien.dide...@savoirfairelinux.com
 ---
  drivers/net/dsa/mv88e6xxx.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)
 
 diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
 index 48712bd..af639ab 100644
 --- a/drivers/net/dsa/mv88e6xxx.c
 +++ b/drivers/net/dsa/mv88e6xxx.c
 @@ -1274,7 +1274,8 @@ int mv88e6xxx_setup_port_common(struct dsa_switch *ds, 
 int port)
   /* Default VLAN ID and priority: don't set a default VLAN
* ID, and set the default packet priority to zero.
*/
 - ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), 0x07, 0x);
 + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_DEFAULT_VLAN,
 +0x);
  abort:
   mutex_unlock(ps-smi_mutex);
   return ret;
 -- 
 2.3.5
 
 --
 To unsubscribe from this list: send the line unsubscribe netdev in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 06/11] IB/cm, cma: Move RDMA IP CM private-data parsing code from ib_cma to ib_cm

2015-04-20 Thread ira.weiny
On Mon, Apr 20, 2015 at 12:03:37PM +0300, Haggai Eran wrote:
 From: Guy Shapiro gu...@mellanox.com
 
 When receiving a connection request, ib_cm needs to associate the request with
 a network namespace. To do this, it needs to know the request's destination
 IP. For this the RDMA IP CM packet formatting functionality needs to be
 exposed to ib_cm.
 

[snip]

 +
 +int cm_save_net_info(struct sockaddr *src_addr,
 +  struct sockaddr *dst_addr,
 +  struct ib_cm_event *ib_event)
 +{
 + struct cm_work *work = container_of(ib_event, struct cm_work, cm_event);
 +
 + if ((rdma_port_get_link_layer(work-port-cm_dev-ib_device,
 +   work-port-port_num) ==
 +  IB_LINK_LAYER_INFINIBAND) 
 + (ib_event-event == IB_CM_REQ_RECEIVED)) {

The original code in the RDMA CM had a check for AF_IB.  Isn't that needed here
as well?

Ira

 + cm_save_ib_info(src_addr, dst_addr,
 + ib_event-param.req_rcvd.primary_path);
 + return 0;
 + }
 +
 + return cm_save_ip_info(src_addr, dst_addr, work);
 +}
 +EXPORT_SYMBOL(cm_save_net_info);
 +
  struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
ib_cm_handler cm_handler,
void *context)
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH iproute2 -next] examples: bpf: fix ld offs to have same prog loaded on ingress/egress

2015-04-20 Thread Alexei Starovoitov

On 4/20/15 4:48 AM, Daniel Borkmann wrote:

Fix up the eBPF example program to match our kernel fix in a166151cbe33 (bpf:
fix bpf helpers to use skb-mac_header relative offsets). Tested on ingress
and egress paths.

Signed-off-by: Daniel Borkmann dan...@iogearbox.net
Cc: Alexei Starovoitov a...@plumgrid.com
---
  ( Stephen, this applies on top of tc: built-in eBPF exec proxy:
https://patchwork.ozlabs.org/patch/461837/ )


Looks good.
Acked-by: Alexei Starovoitov a...@plumgrid.com


--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 07/11] IB/cm: Add network namespace support

2015-04-20 Thread ira.weiny
On Mon, Apr 20, 2015 at 11:06:59AM -0600, Jason Gunthorpe wrote:
 On Mon, Apr 20, 2015 at 12:03:38PM +0300, Haggai Eran wrote:
  From: Guy Shapiro gu...@mellanox.com
  
  Add namespace support to the IB-CM layer.
 
  - Each CM-ID now has a network namespace it is associated with, assigned at
creation. This namespace is used as needed during subsequent action on the
CM-ID or related objects.
 
 There is really something weird about this layering. At the CM layer
 there should be no concept of an IP address, it only deals with GIDs.
 
 So how can a CM object have a network namespace associated with it?
 
   {
  av-port = port;
  av-pkey_index = wc-pkey_index;
  ib_init_ah_from_wc(port-cm_dev-ib_device, port-port_num, wc,
  -  grh, av-ah_attr, init_net);
  +  grh, av-ah_attr, net);
 
 There is something deeply wrong with adding network namespace
 arguments to verbs.
 
 For rocee the gid index clearly specifies the network namespace
 to use, so much of this should go away and have rocee get the
 namespace from the gid index.
 
 Ie in ib_init_ah_from_wc we have the ib_wc which contains the sgid
 index.
 
 I'm really not excited at how many places are gaining a net when those
 layers shouldn't even need to care about IP layer details. Just acting
 as a pass through for rocee doesn't make sense.
 

I had the same feeling when I saw the addition of the network namespace to the
MAD code, especially the RMPP code.

It seems like there should be a better way to deal with this.  My gut says that
the namespace should be handled separate from the ib_init_ah_from_wc.  Perhaps
as a secondary call used only when the namespace is needed?  But I'm not sure
when it is appropriate/needed.

Ira

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 net-next] ip: Add color output option

2015-04-20 Thread Mathias Nyman

Thanks for bearing with my first patch.

On 2015-04-20 10:16-0700, Stephen Hemminger wrote:

On Sat, 18 Apr 2015 13:39:45 +0300
Mathias Nyman m.ny...@iki.fi wrote:


It is hard to quickly find what you are looking for in the output of
the ip
command. Color helps.

This patch adds a '-c' flag to highlight these with individual colors:
   - interface name
   - ip address
   - mac address
   - up/down state

Signed-off-by: Mathias Nyman m.ny...@iki.fi


I like the idea of this, it would be generally good across the board.

But the patch does not apply cleanly to the current version of iproute2.


What is the current version? I used the net-next branch as a base from
here:
http://git.kernel.org/cgit/linux/kernel/git/shemminger/iproute2.git

I thought net-next was used for new features, but master branch now
has newer commits. Should I rebase on top of master?



And there are minor style issues. iproute2 in general ties to follow kernel 
style.

WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#36:
new file mode 100644

ERROR: open brace '{' following enum go on the same line
#45: FILE: include/color.h:5:
+enum color_attr
+{

ERROR: open brace '{' following enum go on the same line
#195: FILE: lib/color.c:7:
+enum color
+{

ERROR: that open brace { should be on the previous line
#207: FILE: lib/color.c:19:
+static const char * const color_codes[] =
+{

ERROR: that open brace { should be on the previous line
#220: FILE: lib/color.c:32:
+static enum color attr_colors[] =
+{

ERROR: do not initialise statics to 0 or NULL
#229: FILE: lib/color.c:41:
+static int color_is_enabled = 0;

WARNING: Missing a blank line after declarations
#240: FILE: lib/color.c:52:
+   va_list args;
+   va_start(args, fmt);


Thanks, so checkpatch.pl applies to iproute2 as well. I'll fix these in v3.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 2/2] ppp: call skb_checksum_complete_unset in ppp_receive_frame

2015-04-20 Thread Tom Herbert
Call checksum_complete_unset in PPP receive to discard checksum-complete
value. PPP does not pull checksum for headers and also modifies packet
as in VJ compression.

Signed-off-by: Tom Herbert t...@herbertland.com
---
 drivers/net/ppp/ppp_generic.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index af034db..9d15566 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -1716,6 +1716,7 @@ ppp_receive_frame(struct ppp *ppp, struct sk_buff *skb, 
struct channel *pch)
 {
/* note: a 0-length skb is used as an error indication */
if (skb-len  0) {
+   skb_checksum_complete_unset(skb);
 #ifdef CONFIG_PPP_MULTILINK
/* XXX do channel-level decompression here */
if (PPP_PROTO(skb) == PPP_MP)
-- 
1.8.1

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 1/2] net: add skb_checksum_complete_unset

2015-04-20 Thread Tom Herbert
This function changes ip_summed to CHECKSUM_NONE if CHECKSUM_COMPLETE
is set. This is called to discard checksum-complete when packet
is being modified and checksum is not pulled for headers in a layer.

Signed-off-by: Tom Herbert t...@herbertland.com
---
 include/linux/skbuff.h | 12 
 1 file changed, 12 insertions(+)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0991259..06793b5 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3016,6 +3016,18 @@ static inline bool __skb_checksum_validate_needed(struct 
sk_buff *skb,
  */
 #define CHECKSUM_BREAK 76
 
+/* Unset checksum-complete
+ *
+ * Unset checksum complete can be done when packet is being modified
+ * (uncompressed for instance) and checksum-complete value is
+ * invalidated.
+ */
+static inline void skb_checksum_complete_unset(struct sk_buff *skb)
+{
+   if (skb-ip_summed == CHECKSUM_COMPLETE)
+   skb-ip_summed = CHECKSUM_NONE;
+}
+
 /* Validate (init) checksum based on checksum complete.
  *
  * Return values:
-- 
1.8.1

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH v3 09/10] lib: libos build scripts and documentation

2015-04-20 Thread Paul Bolle
Some random observations while I'm still trying to wrap my head around
all this (which might take quite some time).

On Sun, 2015-04-19 at 22:28 +0900, Hajime Tazaki wrote:
 --- /dev/null
 +++ b/arch/lib/Kconfig
 @@ -0,0 +1,124 @@
 +menuconfig LIB
 +   bool LibOS-specific options
 +   def_bool n

This is the start of the Kconfig parse for lib. (That would basically
still be true even if you didn't set KBUILD_KCONFIG, see below.) So why
not do something like all arches do:

config LIB
def_bool y
select [...]

Ie, why would someone want to build for ARCH=lib and still not set LIB?

 +   select PROC_FS
 +   select PROC_SYSCTL
 +   select SYSCTL
 +   select SYSFS
 +   help
 +  The 'lib' architecture is a library (user-mode) version of
 +  the linux kernel that includes only its network stack and is
 +   used within the userspace application, and ns-3 simulator.
 +   For more information, about ns-3, see http://www.nsnam.org.
 +
 +config EXPERIMENTAL
 + def_bool y

Unneeded: removed treewide in, I think, 2014.

 +config MMU
 +def_bool n

Add empty line.

 +config FPU
 +def_bool n

Ditto.

 +config SMP
 +def_bool n
 +
 +config ARCH
 + string
 + option env=ARCH
 +
 +config KTIME_SCALAR
 +   def_bool y

This one is unused.

 +config MODULES
 +   def_bool y
 +   option modules
 +
 +config GENERIC_CSUM
 + def_bool y
 +
 +config GENERIC_BUG
 + def_bool y
 + depends on BUG

Add empty line here.

 +config PRINTK
 +   def_bool y
 +
 +config RWSEM_GENERIC_SPINLOCK
 + def_bool y
 +
 +config GENERIC_FIND_NEXT_BIT
 + def_bool y

This one is unused too.

 +config GENERIC_HWEIGHT
 +   def_bool y
 +
 +config TRACE_IRQFLAGS_SUPPORT
 + def_bool y
 +
 +config NO_HZ
 + def_bool y
 +
 +config BASE_FULL
 +   def_bool n
 +
 +config SELECT_MEMORY_MODEL
 +   def_bool n
 +
 +config FLAT_NODE_MEM_MAP
 +   def_bool n
 +
 +config PAGEFLAGS_EXTENDED
 +   def_bool n
 +
 +config VIRT_TO_BUS
 +   def_bool n
 +
 +config HAS_DMA
 +   def_bool n
 +
 +config HZ
 +int
 +default 250
 +
 +config TINY_RCU
 +   def_bool y
 +
 +config HZ_250
 +   def_bool y
 +
 +config BASE_SMALL
 +   int
 +   default 1
 +
 +config SPLIT_PTLOCK_CPUS
 +   int
 +   default 1
 +
 +config FLATMEM
 +   def_bool y
 +
 +config SYSCTL
 +   def_bool y
 +
 +config PROC_FS
 +   def_bool y
 +
 +config SYSFS
 +   def_bool y
 +
 +config PROC_SYSCTL
 +   def_bool y
 +
 +config NETDEVICES
 +   def_bool y
 +
 +config SLIB
 +   def_bool y

You've also added SLIB to init/Kconfig in 02/10. But make ARCH=lib
*config will never visit init/Kconfig, will it? And, apparently, none
of SL[AOU]B are wanted for lib. So I think the entry for config SLIB in
that file can be dropped (as other arches will never see it because it
depends on LIB).

(Note that I haven't actually looked into all the Kconfig entries added
above. Perhaps I might do that. But I'm pretty sure most of the time all
I can say is: I have no idea why this entry defaults to $VALUE.)

 +source net/Kconfig
 +
 +source drivers/base/Kconfig
 +
 +source crypto/Kconfig
 +
 +source lib/Kconfig
 +
 +

Trailing empty lines.

 diff --git a/arch/lib/Makefile b/arch/lib/Makefile
 new file mode 100644
 index 000..d8a0bf9
 --- /dev/null
 +++ b/arch/lib/Makefile
 @@ -0,0 +1,251 @@
 +ARCH_DIR := arch/lib
 +SRCDIR=$(dir $(firstword $(MAKEFILE_LIST)))

Do you use SRCDIR?

 +DCE_TESTDIR=$(srctree)/tools/testing/libos/
 +KBUILD_KCONFIG := arch/$(ARCH)/Kconfig

I think you copied this from arch/um/Makefile. But arch/um/ is, well,
special. Why should lib not start the kconfig parse in the file named
Kconfig? And if you want to start in arch/lib/Kconfig, it would be nice
to add a mainmenu (just like arch/x86/um/Kconfig does).

(I don't read Makefilese well enough to understand the rest of this
file. I think it's scary.)

 +
 +CC = gcc
 +GCCVERSIONGTEQ48 := $(shell expr `gcc -dumpversion` \= 4.8)
 +ifeq $(GCCVERSIONGTEQ48) 1
 +   NO_TREE_LOOP_OPT += -fno-tree-loop-distribute-patterns
 +endif
 +
 +
 +-include $(ARCH_DIR)/objs.mk
 +-include $(srctree)/.config
 +include $(srctree)/scripts/Kbuild.include
 +include $(ARCH_DIR)/processor.mk
 +
 +# targets
 +LIBOS_TOOLS=$(ARCH_DIR)/tools
 +LIBOS_GIT_REPO=git://github.com/libos-nuse/linux-libos-tools
 +KERNEL_LIB=liblinux-$(KERNELVERSION).so
 +
 +ALL_OBJS=$(OBJS) $(KERNEL_LIB) $(modules) $(all-obj-for-clean)
 +
 +# auto generated files
 +AUTOGENS=$(CRC32TABLE) $(COMPILE_H) $(BOUNDS_H) $(ARCH_DIR)/timeconst.h 
 $(ARCH_DIR)/linker.lds
 +COMPILE_H=$(srctree)/include/generated/compile.h
 +BOUNDS_H=$(srctree)/include/generated/bounds.h
 +
 +# from lib/Makefile
 +CRC32TABLE = $(ARCH_DIR)/crc32table.h
 +hostprogs-y  := $(srctree)/lib/gen_crc32table
 +clean-files  := crc32table.h
 +
 +# sources and objects
 +LIB_SRC=\
 +lib.c lib-device.c lib-socket.c random.c softirq.c time.c 

Re: [PATCH] neighbour.c: Avoid GC directly after state change

2015-04-20 Thread YOSHIFUJI Hideaki
Ulf Samuelsson wrote:
 How many neighbors do you want to maintain?
 I guess you have to increase the number of gc_thresh1.
 The current use cases have up to 2048 entries.
 This is expected to grow in the future.
 The 3.4 kernel used in the system today is limited to 1024,
 but that has been raised to about 10k.
 
 The gc_thresh1 test is not implemented in 3.4 but can be backported,
 but still not convinced it is a good idea.

Why?

 To complicate things, one requirement is that for some interfaces
 you always want to keep things alive, if connected, but
 for other interfaces you want things to be removed
 to conserve memory.
 Actually you would want to do this selection on a subnet level.

If you want to introduce per-interface parameter, I am okay with it.

 
 Internal discussions resulted in a proposal to change the patch,
 so that you have a keepalive flag which is tested after
 it has been decided to exit the REACHABLE state.
 
 if the keepalive flag is set, you always go to DELAY state from REACHABLE.

No.

-- 
Hideaki Yoshifuji hideaki.yoshif...@miraclelinux.com
Technical Division, MIRACLE LINUX CORPORATION
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [Intel-wired-lan] [PATCH] e1000e: Do not allow CRC stripping to be disabled on 82579 w/ jumbo frames

2015-04-20 Thread Brown, Aaron F
 From: Intel-wired-lan [mailto:intel-wired-lan-boun...@lists.osuosl.org] On
 Behalf Of Jeff Kirsher
 Sent: Wednesday, April 08, 2015 7:58 PM
 To: Alexander Duyck
 Cc: netdev@vger.kernel.org; intel-wired-...@lists.osuosl.org
 Subject: Re: [Intel-wired-lan] [PATCH] e1000e: Do not allow CRC stripping
 to be disabled on 82579 w/ jumbo frames
 
 On Wed, 2015-04-08 at 18:37 -0700, Alexander Duyck wrote:
  The driver wasn't allowing jumbo frames to be enabled when CRC
  stripping
  was disabled, however it was allowing CRC stripping to be disabled
  while
  jumbo frames were enabled.  This fixes that by making it so that the
  NETIF_F_RXFCS flag cannot be set when jumbo frames are enabled on
  82579 and
  newer parts.
 
  Signed-off-by: Alexander Duyck alexander.h.du...@redhat.com
  ---
   drivers/net/ethernet/intel/e1000e/netdev.c |   14 ++
   1 file changed, 14 insertions(+)
 
 Thanks Alex, I will add your patch to my queue.

Tested-by: Aaron Brown aaron.f.br...@intel.com

 --
 git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue.git
 dev-queue
N�r��yb�X��ǧv�^�)޺{.n�+���z�^�)w*jg����ݢj/���z�ޖ��2�ޙ�)ߡ�a�����G���h��j:+v���w��٥

Re: [PATCH net-next 0/2] net: Fix hw csum failure message flood for ppp tunnel

2015-04-20 Thread David Miller
From: Tom Herbert t...@herbertland.com
Date: Mon, 20 Apr 2015 14:10:03 -0700

 This patch set addresses bug Bug 95171 - hw csum failure message
 flood for ppp tunnel since upgrade to 3.16. The problem is that pppoe
 is being used over UDP with UDP checksusm enabled. On receive
 checksum conversion turns checksum-unnecessary in checksum-
 complete. The PPP receive functions do no properly pull
 the checksum over its headers, so that when an encapsulated
 checksums is considered the checksum-complete value is incorrect.
 
 This patch adds skb_checksum_complete_unset which can be called
 in the receive path in lieu of pulling checksum complete in
 layer. This is useful when the packet is being modified (e.g.
 decompressed) and the checksum-complete value is no longer
 relevant.
 
 In the ppp_receive_frame we call skb_checksum_complete_unset to toss
 out checksum-complete. This should eliminate the reported messages.
 Alternatively, we could add skb_postpull_rcsum and probably
 special case handling for VJ compression if maintaining the
 checksum-complete is needed (not clear to me this is worth the
 effort).
 
 I haven't tested this since setting up the failure scenario doesn't
 seem trivial to configure.

I'm preemptively applying this, but it's really important for
folks to give this some good testing.

Thanks Tom.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/1] altera tse: Error-Bit on tx-avalon-stream always set.

2015-04-20 Thread David Miller
From: Andreas Oetken ennoerlan...@googlemail.com
Date: Tue, 21 Apr 2015 00:16:38 +0200

 From: Andreas Oetken ennoerlan...@gmail.com
 
 The Error-Bit on the avalon streaming interface of the
 tx-dma-channel was always set. In SGMII configurations
 this leads to error-symbols on the PCS and packet-rejection
 on the receiver side (e.g. SGMII/1000Base-X connected switch).
 
 This only applies to the tse-configuration with MSGDMA.
 
 This issue was detected and fixed on a custom board with
 a direct connection to a Marvell switch in SGMII-PHY-Mode.
 (incl. custom patches for SGMII-PCS).
 
 According to the datasheet if ff_tx_err (avalon-streaming)
 is set it is forwarded to gm_tx_err. As a result the PCS
 is forwarding the error by sending a /V/-caracter.
 
 Signed-off-by: Andreas Oetken ennoerlan...@gmail.com

Applied.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] net: dsa: mv88e6xxx: fix setup of port control 1

2015-04-20 Thread David Miller
From: Andrew Lunn and...@lunn.ch
Date: Tue, 21 Apr 2015 01:05:07 +0200

 On Mon, Apr 20, 2015 at 05:19:23PM -0400, Vivien Didelot wrote:
 mv88e6xxx_setup_port_common was writing to PORT_DEFAULT_VLAN (port
 offset 0x07) instead of PORT_CONTROL_1 (port offset 0x05).
 
 Hi Vivien
 
 Good catch.
  
 Signed-off-by: Vivien Didelot vivien.dide...@savoirfairelinux.com
 
 Fixes: cca8b1337541 (net: dsa: Use mnemonics rather than register numbers)
 Acked-by: Andrew Lunn and...@lunn.ch

Applied.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] net: dsa: mv88e6xxx: use PORT_DEFAULT_VLAN

2015-04-20 Thread David Miller
From: Vivien Didelot vivien.dide...@savoirfairelinux.com
Date: Mon, 20 Apr 2015 17:43:26 -0400

 Minor, use the explicit PORT_DEFAULT_VLAN define instead of 0x07.
 
 Signed-off-by: Vivien Didelot vivien.dide...@savoirfairelinux.com

Applied.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [Intel-wired-lan] [PATCH] e1000e: Cleanup handling of VLAN_HLEN as a part of max frame size

2015-04-20 Thread Brown, Aaron F
 From: Intel-wired-lan [mailto:intel-wired-lan-boun...@lists.osuosl.org] On
 Behalf Of Alexander Duyck
 Sent: Wednesday, April 08, 2015 2:03 PM
 To: intel-wired-...@lists.osuosl.org; Kirsher, Jeffrey T
 Cc: netdev@vger.kernel.org; m...@cchtml.com; ht...@twofifty.com
 Subject: [Intel-wired-lan] [PATCH] e1000e: Cleanup handling of VLAN_HLEN
 as a part of max frame size
 
 When the VLAN_HLEN was added to the calculation for the maximum frame size
 there seems to have been a number of issues added to the driver.
 
 The first issue is that in some cases the maximum frame size for a device
 never really reached the actual maximum frame size as the VLAN header
 length was not included the calculation for that value.  As a result some
 parts only supported a maximum frame size of either 1496 in the case of
 parts that didn't support jumbo frames, and 8996 in the case of the parts
 that do.
 
 The second issue is the fact that there were several checks that weren't
 updated so as a result setting an MTU of 1500 was treated as enabling
 jumbo
 frames as the calculated value was 1522 instead of 1518.  I have addressed
 those by replacing ETH_FRAME_LEN with VLAN_ETH_FRAME_LEN where
 appropriate.
 
 The final issue was the fact that lowering the MTU below 1500 would cause
 the driver to allocate 2K buffers for the rings.  This is an old issue
 that
 was fixed several years ago in igb/ixgbe and I am addressing now by just
 replacing == with a = so that we always just round up to 1522 for
 anything
 that isn't a jumbo frame.
 
 Fixes: c751a3d58cf2d (e1000e: Correctly include VLAN_HLEN when changing
 interface MTU)
 Signed-off-by: Alexander Duyck alexander.h.du...@redhat.com
 ---
 
 I have only build tested this though I am 99% sure the fixes here are
 correct.  This patch should fix issues on 82573 and ich8 w/ setting an MTU
 of 1500, and for the PCH series w/ setting an MTU of 9000.
 
 I assume I can get away with bumping the max_hw_frame_size for the PCH
 parts from 9018 to 9022 based on the fact that the Windows INF for the
 parts
 lists supporting either 1514, 4088, and 9014 all of which exclude the 8
 bytes for CRC and VLAN header.
 
  drivers/net/ethernet/intel/e1000e/82571.c   |2 +-
  drivers/net/ethernet/intel/e1000e/ich8lan.c |   10 +-
  drivers/net/ethernet/intel/e1000e/netdev.c  |   18 --
  3 files changed, 14 insertions(+), 16 deletions(-)

Tested-by: Aaron Brown aaron.f.br...@intel.com

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH v2 01/11] RDMA/CMA: Mark IPv4 addresses correctly when the listener is IPv6

2015-04-20 Thread Shachar Raindel


 -Original Message-
 From: Or Gerlitz [mailto:gerlitz...@gmail.com]
 Sent: Monday, April 20, 2015 9:38 PM
 
 On Mon, Apr 20, 2015 at 7:41 PM, Jason Gunthorpe
 jguntho...@obsidianresearch.com wrote:
  On Mon, Apr 20, 2015 at 12:03:32PM +0300, Haggai Eran wrote:
  From: Yotam Kenneth yota...@mellanox.com
 
  When accepting a new connection with the listener being IPv6, the
  family of the new connection is set as IPv6. This causes
 cma_zero_addr
  function to return true on an non-zero address. As a result, the
 wrong
  code path is taken. This causes the connection request to be
 rejected,
  as the RDMA-CM code looks for the wrong type of device.
 
  This description doesn't really make sense as to what the problem is.
 
  @@ -866,12 +866,12 @@ static void cma_save_ip4_info(struct rdma_cm_id
 *id, struct rdma_cm_id *listen_i
 
listen4 = (struct sockaddr_in *) listen_id-
 route.addr.src_addr;
ip4 = (struct sockaddr_in *) id-route.addr.src_addr;
  - ip4-sin_family = listen4-sin_family;
  + ip4-sin_family = AF_INET;
 
  If listen_id-route.addr.src_addr.ss_family != AF_INET then it is
  invalid to cast to sockaddr_in.
 
  So listen4-sin_family MUST be AF_INET or this function MUST NOT be
  called.
 
  Forcing to AF_INET cannot be correct here.
 
 Jason, could you take a look @ this thread
 http://marc.info/?t=14158939504r=1w=2 where the authors
 addressed some comments from Sean and he eventually Acked the patch?
 
  What does this patch have to do with this series?
 
 I believe it's either a pre-patch to address some assumption or
 something they stepped on while testing
 

We stepped upon this issue while testing the containers support we are
Submitting here. When creating a new network namespace, the kernel set 
net-ipv6.sysctl.bindv6only to 0. As a result, we got the IPv6 listening
ID accepting IPv4 connection. This is fixed by the above patch.

Thanks,
--Shachar



IT-Service Desk

2015-04-20 Thread Maestracci Dominique
IT-Service Desk behöver du uppgradera till den senaste e-post Outlook Web Apps 
2015, vänligen klicka på IT-Service Deskhttp://sweden2.wix.com/sweden-upgrade 
att uppgradera till den senaste e-post Outlook Web Apps 2015

I samband med Microsoft Exchange
© 2015 Microsoft Corporation. Alla rättigheter reserverade
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC][PATCH] net: stmmac: use msleep instead of udelay for gpio reset

2015-04-20 Thread Giuseppe CAVALLARO

On 4/19/2015 6:11 AM, Michael Trimarchi wrote:

Hi

On Apr 19, 2015 1:37 AM, Fabio Estevam feste...@gmail.com
mailto:feste...@gmail.com wrote:
 
  On Sat, Apr 18, 2015 at 12:02 PM, Michael Trimarchi
  mich...@amarulasolutions.com mailto:mich...@amarulasolutions.com
wrote:
 
   reset_gpio = data-reset_gpio;
   -   active_low = data-active_low;
   +   active_low = !!data-active_low;
 
  This is an unrelated change.

I have already try to minimize the change. Anyway I will repost it if
necessary


yes send V2 w/ related changes for ms reset time only

thanks

Peppe


Michael



--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 01/16] printk: guard the amount written per line by devkmsg_read()

2015-04-20 Thread Petr Mladek
On Thu 2015-04-16 19:03:38, Tejun Heo wrote:
 devkmsg_read() uses 8k buffer and assumes that the formatted output
 message won't overrun which seems safe given LOG_LINE_MAX, the current
 use of dict and the escaping method being used; however, we're
 planning to use devkmsg formatting wider and accounting for the buffer
 size properly isn't that complicated.
 
 This patch defines CONSOLE_EXT_LOG_MAX as 8192 and updates
 devkmsg_read() so that it limits output accordingly.
 
 Signed-off-by: Tejun Heo t...@kernel.org

Reviewed-by: Petr Mladek pmla...@suse.cz

It is just a refactoring and does not modify the current behavior.


 Cc: Kay Sievers k...@vrfy.org
 Cc: Petr Mladek pmla...@suse.cz
 ---
  include/linux/printk.h |  2 ++
  kernel/printk/printk.c | 35 +++
  2 files changed, 25 insertions(+), 12 deletions(-)
 
 diff --git a/include/linux/printk.h b/include/linux/printk.h
 index 9b30871..58b1fec 100644
 --- a/include/linux/printk.h
 +++ b/include/linux/printk.h
 @@ -30,6 +30,8 @@ static inline const char *printk_skip_level(const char 
 *buffer)
   return buffer;
  }
  
 +#define CONSOLE_EXT_LOG_MAX  8192

If you do a respin from some reason. I would suggest to remove
CONSOLE_ because it is used also for devkmsg.

Best Regards,
Petr

 +
  /* printk's without a loglevel use this.. */
  #define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT
  
 diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
 index 879edfc..b6e24af 100644
 --- a/kernel/printk/printk.c
 +++ b/kernel/printk/printk.c
 @@ -512,7 +512,7 @@ struct devkmsg_user {
   u32 idx;
   enum log_flags prev;
   struct mutex lock;
 - char buf[8192];
 + char buf[CONSOLE_EXT_LOG_MAX];
  };
  
  static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from)
 @@ -565,11 +565,18 @@ static ssize_t devkmsg_write(struct kiocb *iocb, struct 
 iov_iter *from)
   return ret;
  }
  
 +static void append_char(char **pp, char *e, char c)
 +{
 + if (*pp  e)
 + *(*pp)++ = c;
 +}
 +
  static ssize_t devkmsg_read(struct file *file, char __user *buf,
   size_t count, loff_t *ppos)
  {
   struct devkmsg_user *user = file-private_data;
   struct printk_log *msg;
 + char *p, *e;
   u64 ts_usec;
   size_t i;
   char cont = '-';
 @@ -579,6 +586,9 @@ static ssize_t devkmsg_read(struct file *file, char 
 __user *buf,
   if (!user)
   return -EBADF;
  
 + p = user-buf;
 + e = user-buf + sizeof(user-buf);
 +
   ret = mutex_lock_interruptible(user-lock);
   if (ret)
   return ret;
 @@ -625,9 +635,9 @@ static ssize_t devkmsg_read(struct file *file, char 
 __user *buf,
((user-prev  LOG_CONT)  !(msg-flags  LOG_PREFIX)))
   cont = '+';
  
 - len = sprintf(user-buf, %u,%llu,%llu,%c;,
 -   (msg-facility  3) | msg-level,
 -   user-seq, ts_usec, cont);
 + p += scnprintf(p, e - p, %u,%llu,%llu,%c;,
 +(msg-facility  3) | msg-level,
 +user-seq, ts_usec, cont);
   user-prev = msg-flags;
  
   /* escape non-printable characters */
 @@ -635,11 +645,11 @@ static ssize_t devkmsg_read(struct file *file, char 
 __user *buf,
   unsigned char c = log_text(msg)[i];
  
   if (c  ' ' || c = 127 || c == '\\')
 - len += sprintf(user-buf + len, \\x%02x, c);
 + p += scnprintf(p, e - p, \\x%02x, c);
   else
 - user-buf[len++] = c;
 + append_char(p, e, c);
   }
 - user-buf[len++] = '\n';
 + append_char(p, e, '\n');
  
   if (msg-dict_len) {
   bool line = true;
 @@ -648,30 +658,31 @@ static ssize_t devkmsg_read(struct file *file, char 
 __user *buf,
   unsigned char c = log_dict(msg)[i];
  
   if (line) {
 - user-buf[len++] = ' ';
 + append_char(p, e, ' ');
   line = false;
   }
  
   if (c == '\0') {
 - user-buf[len++] = '\n';
 + append_char(p, e, '\n');
   line = true;
   continue;
   }
  
   if (c  ' ' || c = 127 || c == '\\') {
 - len += sprintf(user-buf + len, \\x%02x, c);
 + p += scnprintf(p, e - p, \\x%02x, c);
   continue;
   }
  
 - user-buf[len++] = c;
 + append_char(p, e, c);
   }
 - user-buf[len++] = '\n';
 + append_char(p, e, '\n');
   }
  
   user-idx = log_next(user-idx);
   user-seq++;
   raw_spin_unlock_irq(logbuf_lock);
  
 + len = p - 

Re: [PATCH 01/16] printk: guard the amount written per line by devkmsg_read()

2015-04-20 Thread Petr Mladek
On Mon 2015-04-20 14:11:36, Petr Mladek wrote:
 On Thu 2015-04-16 19:03:38, Tejun Heo wrote:
  devkmsg_read() uses 8k buffer and assumes that the formatted output
  message won't overrun which seems safe given LOG_LINE_MAX, the current
  use of dict and the escaping method being used; however, we're
  planning to use devkmsg formatting wider and accounting for the buffer
  size properly isn't that complicated.
  
  This patch defines CONSOLE_EXT_LOG_MAX as 8192 and updates
  devkmsg_read() so that it limits output accordingly.
  
  Signed-off-by: Tejun Heo t...@kernel.org
 
 Reviewed-by: Petr Mladek pmla...@suse.cz
 
 It is just a refactoring and does not modify the current behavior.

Ah, to make it clear. It did not modify the behavior except for
adding the check for potential buffer overflow.

Best Regards,
Petr

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] neighbour.c: Avoid GC directly after state change

2015-04-20 Thread Ulf Samuelsson


On 04/20/2015 04:33 AM, YOSHIFUJI Hideaki wrote:

Hi,

Ulf Samuelsson wrote:

  From RFC2461:

|  REACHABLE   Roughly speaking, the neighbor is known to have been
|  reachable recently (within tens of seconds ago).
:
|  STALE   The neighbor is no longer known to be reachable but
|  until traffic is sent to the neighbor, no attempt
|  should be made to verify its reachability.
|  DELAY   The neighbor is no longer known to be reachable, and
|  traffic has recently been sent to the neighbor.
|  Rather than probe the neighbor immediately, however,
|  delay sending probes for a short while in order to
|  give upper layer protocols a chance to provide
|  reachability confirmation.



It is all depending on the meaning of the word recently.
You imply, that if timeouts have been triggered, then it is no longer recent,
but that is not the only interpretation, it is up to the implementer to decide
what is recently.

That quoted text is just a brief description.  The document has detailed
state machine.



It is not *mandatory* to follow the state machine strictly, Page 85:

   This appendix contains a summary of the rules specified in Sections
   7.2 and 7.3.  This document does not mandate that implementations
   adhere to this model as long as their external behavior is consistent
   with that described in this document.

The kernel does not follow the state machine today.
The kernel already have a test which compares

neigh-used + timeout with current time,
and move the entry to DELAY.

This is not documented in the state machine so there is already
a precedent to compare

neigh-compared + timeout with current time
and move the entry into DELAY state.

Obviously, some people would not want you to send probes before going STALE,
so it needs to be configurable.

Therefore, if a timeout occurs due to no traffic, they must be probed before
they are garbage collected.

It is what we do in PROBE state.
Yes, but you have to start by moving it into DELAY state first, to init 
the probe counter.

If you move the entry from REACHABLE to DELAY, then the probe counter
may be any value.




If this is not acceptable, how do you propose to solve the problem that you 
cannot
make remote units inaccessible for more than a fraction of a second?

How many neighbors do you want to maintain?
I guess you have to increase the number of gc_thresh1.

The current use cases have up to 2048 entries.
This is expected to grow in the future.
The 3.4 kernel used in the system today is limited to 1024,
but that has been raised to about 10k.

The gc_thresh1 test is not implemented in 3.4 but can be backported,
but still not convinced it is a good idea.

To complicate things, one requirement is that for some interfaces
you always want to keep things alive, if connected, but
for other interfaces you want things to be removed
to conserve memory.
Actually you would want to do this selection on a subnet level.

Internal discussions resulted in a proposal to change the patch,
so that you have a keepalive flag which is tested after
it has been decided to exit the REACHABLE state.

if the keepalive flag is set, you always go to DELAY state from REACHABLE.


Best Regards,
Ulf Samuelsson




--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH iproute2 -next] examples: bpf: fix ld offs to have same prog loaded on ingress/egress

2015-04-20 Thread Daniel Borkmann
Fix up the eBPF example program to match our kernel fix in a166151cbe33 (bpf:
fix bpf helpers to use skb-mac_header relative offsets). Tested on ingress
and egress paths.

Signed-off-by: Daniel Borkmann dan...@iogearbox.net
Cc: Alexei Starovoitov a...@plumgrid.com
---
 ( Stephen, this applies on top of tc: built-in eBPF exec proxy:
   https://patchwork.ozlabs.org/patch/461837/ )

 examples/bpf/bpf_prog.c | 28 +---
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/examples/bpf/bpf_prog.c b/examples/bpf/bpf_prog.c
index 4dc00c3..009febd 100644
--- a/examples/bpf/bpf_prog.c
+++ b/examples/bpf/bpf_prog.c
@@ -58,6 +58,12 @@
  *random type none pass val 0
  *index 38 ref 1 bind 1
  *
+ * The same program can also be installed on ingress side (as opposed to above
+ * egress configuration), e.g.:
+ *
+ * # tc qdisc add dev em1 handle : ingress
+ * # tc filter add dev em1 parent : bpf obj ...
+ *
  * Notes on BPF agent:
  *
  * In the above example, the bpf_agent creates the unix domain socket
@@ -157,6 +163,7 @@
 #include linux/ip.h
 #include linux/ipv6.h
 #include linux/if_tunnel.h
+#include linux/filter.h
 #include linux/bpf.h
 
 /* Common, shared definitions with ebpf_agent.c. */
@@ -222,7 +229,7 @@ struct flow_keys {
__u32 ports;
__u16 port16[2];
};
-   __u16 th_off;
+   __s32 th_off;
__u8 ip_proto;
 };
 
@@ -242,14 +249,14 @@ static inline int flow_ports_offset(__u8 ip_proto)
}
 }
 
-static inline bool flow_is_frag(struct __sk_buff *skb, __u32 nh_off)
+static inline bool flow_is_frag(struct __sk_buff *skb, int nh_off)
 {
return !!(load_half(skb, nh_off + offsetof(struct iphdr, frag_off)) 
  (IP_MF | IP_OFFSET));
 }
 
-static inline __u32 flow_parse_ipv4(struct __sk_buff *skb, __u32 nh_off,
-   __u8 *ip_proto, struct flow_keys *flow)
+static inline int flow_parse_ipv4(struct __sk_buff *skb, int nh_off,
+ __u8 *ip_proto, struct flow_keys *flow)
 {
__u8 ip_ver_len;
 
@@ -272,18 +279,18 @@ static inline __u32 flow_parse_ipv4(struct __sk_buff 
*skb, __u32 nh_off,
return nh_off;
 }
 
-static inline __u32 flow_addr_hash_ipv6(struct __sk_buff *skb, __u32 off)
+static inline __u32 flow_addr_hash_ipv6(struct __sk_buff *skb, int off)
 {
__u32 w0 = load_word(skb, off);
__u32 w1 = load_word(skb, off + sizeof(w0));
__u32 w2 = load_word(skb, off + sizeof(w0) * 2);
__u32 w3 = load_word(skb, off + sizeof(w0) * 3);
 
-   return (__u32)(w0 ^ w1 ^ w2 ^ w3);
+   return w0 ^ w1 ^ w2 ^ w3;
 }
 
-static inline __u32 flow_parse_ipv6(struct __sk_buff *skb, __u32 nh_off,
-   __u8 *ip_proto, struct flow_keys *flow)
+static inline int flow_parse_ipv6(struct __sk_buff *skb, int nh_off,
+ __u8 *ip_proto, struct flow_keys *flow)
 {
*ip_proto = load_byte(skb, nh_off + offsetof(struct ipv6hdr, nexthdr));
 
@@ -296,10 +303,9 @@ static inline __u32 flow_parse_ipv6(struct __sk_buff *skb, 
__u32 nh_off,
 static inline bool flow_dissector(struct __sk_buff *skb,
  struct flow_keys *flow)
 {
+   int poff, nh_off = BPF_LL_OFF + ETH_HLEN;
__be16 proto = skb-protocol;
-   __u32 nh_off = ETH_HLEN;
__u8 ip_proto;
-   int poff;
 
/* TODO: check for skb-vlan_tci, skb-vlan_proto first */
if (proto == htons(ETH_P_8021AD)) {
@@ -369,7 +375,7 @@ static inline bool flow_dissector(struct __sk_buff *skb,
nh_off += flow_ports_offset(ip_proto);
 
flow-ports = load_word(skb, nh_off);
-   flow-th_off = (__u16)nh_off;
+   flow-th_off = nh_off;
flow-ip_proto = ip_proto;
 
return true;
-- 
1.9.3

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V1 net-next] IB/ipoib: Fix ndo_get_iflink

2015-04-20 Thread Haggai Eran
On 17/04/2015 22:21, David Miller wrote:
 From: Erez Shitrit ere...@mellanox.com
 Date: Thu, 16 Apr 2015 16:34:34 +0300
 
 Currently, iflink of the parent interface was always accessed, even 
 when interface didn't have a parent and hence we crashed there.

 Handle the interface types properly: for a child interface, return
 the ifindex of the parent, for parent interface, return its ifindex.

 For child devices, make sure to set the parent pointer prior to
 invoking register_netdevice(), this allows the new ndo to be called
 by the stack immediately after the child device is registered.

 Fixes: 5aa7add8f14b ('infiniband/ipoib: implement ndo_get_iflink')
 Reported-by: Honggang Li ho...@redhat.com
 Signed-off-by: Erez Shitrit ere...@mellanox.com
 Signed-off-by: Honggang Li ho...@redhat.com
 
 Applied, thanks.

Doug, Roland,

You might want to include this patch in your for-next / for-4.1 trees,
or merge net-next again. Currently they contain the issue it fixes, and
it can prevent some systems with IPoIB from booting.

Regards,
Haggai

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCHSET] printk, netconsole: implement reliable netconsole

2015-04-20 Thread David Laight
From: Of Rob Landley
 Sent: 19 April 2015 08:25
 On Thu, Apr 16, 2015 at 6:03 PM, Tejun Heo t...@kernel.org wrote:
  In a lot of configurations, netconsole is a useful way to collect
  system logs; however, all netconsole does is simply emitting UDP
  packets for the raw messages and there's no way for the receiver to
  find out whether the packets were lost and/or reordered in flight.
 
 Except a modern nonsaturated LAN shouldn't do that.
 
 If you have two machines plugged into a hub, and that's _all_ that's
 plugged in, packets should never get dropped. This was the original
 use case of netconsole was that the sender and the receiver were
 plugged into the same router.
 
 However, even on a quite active LAN the days of ethernet doing CDMA
 requiring retransmits are long gone, even 100baseT routers have been
 cacheing and retransmitting data internally so each connection can go
 at the full 11 megabytes/second with the backplane running fast enough
 to keep them all active at the same time. (That's why it's so hard to
 find a _hub_ anymore, it's all routers
...

Most machines are plugged into switches (not routers), many of them
will send 'pause' frames which the host mac may act on.
In which case packet loss is not expected (unless you have broadcast storms
when all bets are off).

Additionally, within a local network you shouldn't really get any packet
loss since no segments should be 100% loaded.
So for testing it is not unreasonable to expect no lost packets in netconsole
traffic.

David




Re: [PATCH 02/16] printk: factor out message formatting from devkmsg_read()

2015-04-20 Thread Petr Mladek
On Thu 2015-04-16 19:03:39, Tejun Heo wrote:
 The extended message formatting used for /dev/kmsg will be used
 implement extended consoles.  Factor out msg_print_ext_header() and
 msg_print_ext_body() from devkmsg_read().
 
 This is pure restructuring.
 
 Signed-off-by: Tejun Heo t...@kernel.org

Reviewed-by: Petr Mladek pmla...@suse.cz

I like the split of the long function.

Best Regards,
Petr

 Cc: Kay Sievers k...@vrfy.org
 Cc: Petr Mladek pmla...@suse.cz
 ---
  kernel/printk/printk.c | 157 
 ++---
  1 file changed, 85 insertions(+), 72 deletions(-)
 
 diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
 index b6e24af..5ea6709 100644
 --- a/kernel/printk/printk.c
 +++ b/kernel/printk/printk.c
 @@ -505,6 +505,86 @@ int check_syslog_permissions(int type, bool from_file)
   return security_syslog(type);
  }
  
 +static void append_char(char **pp, char *e, char c)
 +{
 + if (*pp  e)
 + *(*pp)++ = c;
 +}
 +
 +static ssize_t msg_print_ext_header(char *buf, size_t size,
 + struct printk_log *msg, u64 seq,
 + enum log_flags prev_flags)
 +{
 + u64 ts_usec = msg-ts_nsec;
 + char cont = '-';
 +
 + do_div(ts_usec, 1000);
 +
 + /*
 +  * If we couldn't merge continuation line fragments during the print,
 +  * export the stored flags to allow an optional external merge of the
 +  * records. Merging the records isn't always neccessarily correct, like
 +  * when we hit a race during printing. In most cases though, it produces
 +  * better readable output. 'c' in the record flags mark the first
 +  * fragment of a line, '+' the following.
 +  */
 + if (msg-flags  LOG_CONT  !(prev_flags  LOG_CONT))
 + cont = 'c';
 + else if ((msg-flags  LOG_CONT) ||
 +  ((prev_flags  LOG_CONT)  !(msg-flags  LOG_PREFIX)))
 + cont = '+';
 +
 + return scnprintf(buf, size, %u,%llu,%llu,%c;,
 +(msg-facility  3) | msg-level, seq, ts_usec, cont);
 +}
 +
 +static ssize_t msg_print_ext_body(char *buf, size_t size,
 +   char *dict, size_t dict_len,
 +   char *text, size_t text_len)
 +{
 + char *p = buf, *e = buf + size;
 + size_t i;
 +
 + /* escape non-printable characters */
 + for (i = 0; i  text_len; i++) {
 + unsigned char c = text[i];
 +
 + if (c  ' ' || c = 127 || c == '\\')
 + p += scnprintf(p, e - p, \\x%02x, c);
 + else
 + append_char(p, e, c);
 + }
 + append_char(p, e, '\n');
 +
 + if (dict_len) {
 + bool line = true;
 +
 + for (i = 0; i  dict_len; i++) {
 + unsigned char c = dict[i];
 +
 + if (line) {
 + append_char(p, e, ' ');
 + line = false;
 + }
 +
 + if (c == '\0') {
 + append_char(p, e, '\n');
 + line = true;
 + continue;
 + }
 +
 + if (c  ' ' || c = 127 || c == '\\') {
 + p += scnprintf(p, e - p, \\x%02x, c);
 + continue;
 + }
 +
 + append_char(p, e, c);
 + }
 + append_char(p, e, '\n');
 + }
 +
 + return p - buf;
 +}
  
  /* /dev/kmsg - userspace message inject/listen interface */
  struct devkmsg_user {
 @@ -565,30 +645,17 @@ static ssize_t devkmsg_write(struct kiocb *iocb, struct 
 iov_iter *from)
   return ret;
  }
  
 -static void append_char(char **pp, char *e, char c)
 -{
 - if (*pp  e)
 - *(*pp)++ = c;
 -}
 -
  static ssize_t devkmsg_read(struct file *file, char __user *buf,
   size_t count, loff_t *ppos)
  {
   struct devkmsg_user *user = file-private_data;
   struct printk_log *msg;
 - char *p, *e;
 - u64 ts_usec;
 - size_t i;
 - char cont = '-';
   size_t len;
   ssize_t ret;
  
   if (!user)
   return -EBADF;
  
 - p = user-buf;
 - e = user-buf + sizeof(user-buf);
 -
   ret = mutex_lock_interruptible(user-lock);
   if (ret)
   return ret;
 @@ -618,71 +685,17 @@ static ssize_t devkmsg_read(struct file *file, char 
 __user *buf,
   }
  
   msg = log_from_idx(user-idx);
 - ts_usec = msg-ts_nsec;
 - do_div(ts_usec, 1000);
 -
 - /*
 -  * If we couldn't merge continuation line fragments during the print,
 -  * export the stored flags to allow an optional external merge of the
 -  * records. Merging the records isn't always neccessarily correct, like
 -  * when we hit a race during printing. In most cases though, it produces
 -  * better readable output. 'c' in 

Re: [PATCH 03/16] printk: move LOG_NOCONS skipping into call_console_drivers()

2015-04-20 Thread Petr Mladek
On Thu 2015-04-16 19:03:40, Tejun Heo wrote:
 When a line is printed by multiple printk invocations, each chunk is
 directly sent out to console drivers so that they don't get lost.
 When the line is completed and stored in the log buffer, the line is
 suppressed from going out to consoles as that'd lead to duplicate
 outputs.  This is tracked with LOG_NOCONS flag.
 
 The suppression is currently implemented in console_unlock() which
 skips invoking call_console_drivers() for LOG_NOCONS messages.  This
 patch moves the filtering into call_console_drivers() in preparation
 of the planned extended console drivers which will deal with the
 duplicate messages themselves.
 
 While this makes call_console_drivers() iterate over LOG_NOCONS
 messages, this is extremely unlikely to matter especially given that
 continuation lines aren't that common and also simplifies
 console_unlock() a bit.
 
 Signed-off-by: Tejun Heo t...@kernel.org
 Cc: Kay Sievers k...@vrfy.org
 Cc: Petr Mladek pmla...@suse.cz
 ---
  kernel/printk/printk.c | 46 --
  1 file changed, 24 insertions(+), 22 deletions(-)
 
 diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
 index 5ea6709..0175c46 100644
 --- a/kernel/printk/printk.c
 +++ b/kernel/printk/printk.c
 @@ -1417,7 +1417,8 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, 
 int, len)
   * log_buf[start] to log_buf[end - 1].
   * The console_lock must be held.
   */
 -static void call_console_drivers(int level, const char *text, size_t len)
 +static void call_console_drivers(int level, bool nocons,
 +  const char *text, size_t len)
  {
   struct console *con;
  
 @@ -1438,6 +1439,13 @@ static void call_console_drivers(int level, const char 
 *text, size_t len)
   if (!cpu_online(smp_processor_id()) 
   !(con-flags  CON_ANYTIME))
   continue;
 + /*
 +  * Skip record we have buffered and already printed
 +  * directly to the console when we received it.
 +  */
 + if (nocons)
 + continue;
 +
   con-write(con, text, len);
   }
  }
 @@ -1919,7 +1927,8 @@ static struct cont {
  } cont;
  static struct printk_log *log_from_idx(u32 idx) { return NULL; }
  static u32 log_next(u32 idx) { return 0; }
 -static void call_console_drivers(int level, const char *text, size_t len) {}
 +static void call_console_drivers(int level, bool nocons,
 +  const char *text, size_t len) {}
  static size_t msg_print_text(const struct printk_log *msg, enum log_flags 
 prev,
bool syslog, char *buf, size_t size) { return 0; }
  static size_t cont_print_text(char *text, size_t size) { return 0; }
 @@ -2190,7 +2199,7 @@ static void console_cont_flush(char *text, size_t size)
   len = cont_print_text(text, size);
   raw_spin_unlock(logbuf_lock);
   stop_critical_timings();
 - call_console_drivers(cont.level, text, len);
 + call_console_drivers(cont.level, false, text, len);
   start_critical_timings();
   local_irq_restore(flags);
   return;
 @@ -2234,6 +2243,7 @@ again:
   struct printk_log *msg;
   size_t len;
   int level;
 + bool nocons;
  
   raw_spin_lock_irqsave(logbuf_lock, flags);
   if (seen_seq != log_next_seq) {
 @@ -2252,38 +2262,30 @@ again:
   } else {
   len = 0;
   }
 -skip:
 +
   if (console_seq == log_next_seq)
   break;
  
   msg = log_from_idx(console_idx);
 - if (msg-flags  LOG_NOCONS) {
 - /*
 -  * Skip record we have buffered and already printed
 -  * directly to the console when we received it.
 -  */
 - console_idx = log_next(console_idx);
 - console_seq++;
 - /*
 -  * We will get here again when we register a new
 -  * CON_PRINTBUFFER console. Clear the flag so we
 -  * will properly dump everything later.
 -  */
 - msg-flags = ~LOG_NOCONS;
 - console_prev = msg-flags;
 - goto skip;
 - }
 -
   level = msg-level;
 + nocons = msg-flags  LOG_NOCONS;
   len += msg_print_text(msg, console_prev, false,
 text + len, sizeof(text) - len);
   console_idx = log_next(console_idx);
   console_seq++;
   console_prev = msg-flags;
 +
 + /*
 +  * The log will be processed again when we register a new
 +  * CON_PRINTBUFFER console. Clear the flag so we will
 +  * properly dump everything 

Re: [PATCH 11/11] k_clock:Remove the 32bit methods with timespec type

2015-04-20 Thread Richard Cochran
On Mon, Apr 20, 2015 at 01:57:39PM +0800, Baolin Wang wrote:

 @@ -911,18 +907,14 @@ retry:
   return -EINVAL;
  
   kc = clockid_to_kclock(timr-it_clock);
 - if (WARN_ON_ONCE(!kc || (!kc-timer_set  !kc-timer_set64))) {
 + if (WARN_ON_ONCE(!kc || !kc-timer_set64)) {
   error = -EINVAL;
   } else {
 - if (kc-timer_set64) {
 - new_spec64 = itimerspec_to_itimerspec64(new_spec);
 - error = kc-timer_set64(timr, flags, new_spec64,
 - old_spec64);
 - if (old_setting)
 - old_spec = 
 itimerspec64_to_itimerspec(old_spec64);
 - } else {
 - error = kc-timer_set(timr, flags, new_spec, rtn);
 - }
 + new_spec64 = itimerspec_to_itimerspec64(new_spec);
 + error = kc-timer_set64(timr, flags, new_spec64,
 + old_spec64);

This statement can fit on one line.

 + if (old_setting)
 + old_spec = itimerspec64_to_itimerspec(old_spec64);
   }
  
   unlock_timer(timr, flag);

 @@ -1057,14 +1045,13 @@ SYSCALL_DEFINE2(clock_gettime, const clockid_t, 
 which_clock,
   if (!kc)
   return -EINVAL;
  
 - if (kc-clock_get64) {
 - error = kc-clock_get64(which_clock, kernel_tp64);
 - kernel_tp = timespec64_to_timespec(kernel_tp64);
 - } else {
 - error = kc-clock_get(which_clock, kernel_tp);
 - }
 + error = kc-clock_get64(which_clock, kernel_tp64);
 + if (!error)
 + return error;

Wrong test, should be: if (error) ...

 +
 + kernel_tp = timespec64_to_timespec(kernel_tp64);
  
 - if (!error  copy_to_user(tp, kernel_tp, sizeof (kernel_tp)))

The (!error  ...) was correct here!

 + if (copy_to_user(tp, kernel_tp, sizeof (kernel_tp)))
   error = -EFAULT;
  
   return error;

You can simplify this like so:

return copy_to_user(tp, kernel_tp, sizeof(kernel_tp)) ? -EFAULT : 0;

 @@ -1104,14 +1091,13 @@ SYSCALL_DEFINE2(clock_getres, const clockid_t, 
 which_clock,
   if (!kc)
   return -EINVAL;
  
 - if (kc-clock_getres64) {
 - error = kc-clock_getres64(which_clock, rtn_tp64);
 - rtn_tp = timespec64_to_timespec(rtn_tp64);
 - } else {
 - error = kc-clock_getres(which_clock, rtn_tp);
 - }
 + error = kc-clock_getres64(which_clock, rtn_tp64);
 + if (!error)
 + return error;

Also wrong.

 +
 + rtn_tp = timespec64_to_timespec(rtn_tp64);
  
 - if (!error  tp  copy_to_user(tp, rtn_tp, sizeof (rtn_tp)))
 + if (tp  copy_to_user(tp, rtn_tp, sizeof (rtn_tp)))
   error = -EFAULT;
  
   return error;
 -- 
 1.7.9.5
 

Thanks,
Richard
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 02/11] IB/addr: Pass network namespace as a parameter

2015-04-20 Thread Haggai Eran
From: Guy Shapiro gu...@mellanox.com

Add network namespace support to the ib_addr module. For that, all the address
resolution and matching should be done using the appropriate namespace instead
of init_net.

This is achieved by:

1. Adding an explicit network namespace argument to exported function that
   require a namespace.
2. Saving the namespace in the rdma_addr_client structure.
3. Using it when calling networking functions.

In order to preserve the behavior of calling modules, init_net is
passed as the parameter in calls from other modules. This is modified as
namespace support is added on more levels.

Signed-off-by: Haggai Eran hagg...@mellanox.com
Signed-off-by: Yotam Kenneth yota...@mellanox.com
Signed-off-by: Shachar Raindel rain...@mellanox.com
Signed-off-by: Guy Shapiro gu...@mellanox.com
---
 drivers/infiniband/core/addr.c   | 31 --
 drivers/infiniband/core/cma.c|  4 ++-
 drivers/infiniband/core/verbs.c  | 14 +++---
 drivers/infiniband/hw/ocrdma/ocrdma_ah.c |  3 ++-
 include/rdma/ib_addr.h   | 44 
 5 files changed, 72 insertions(+), 24 deletions(-)

diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index f80da50d84a5..95beaef6b66d 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -128,7 +128,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct 
rdma_dev_addr *dev_addr,
int ret = -EADDRNOTAVAIL;
 
if (dev_addr-bound_dev_if) {
-   dev = dev_get_by_index(init_net, dev_addr-bound_dev_if);
+   dev = dev_get_by_index(dev_addr-net, dev_addr-bound_dev_if);
if (!dev)
return -ENODEV;
ret = rdma_copy_addr(dev_addr, dev, NULL);
@@ -137,9 +137,10 @@ int rdma_translate_ip(struct sockaddr *addr, struct 
rdma_dev_addr *dev_addr,
}
 
switch (addr-sa_family) {
-   case AF_INET:
-   dev = ip_dev_find(init_net,
-   ((struct sockaddr_in *) addr)-sin_addr.s_addr);
+   case AF_INET: {
+   struct sockaddr_in *addr_in = (struct sockaddr_in *)addr;
+
+   dev = ip_dev_find(dev_addr-net, addr_in-sin_addr.s_addr);
 
if (!dev)
return ret;
@@ -149,12 +150,12 @@ int rdma_translate_ip(struct sockaddr *addr, struct 
rdma_dev_addr *dev_addr,
*vlan_id = rdma_vlan_dev_vlan_id(dev);
dev_put(dev);
break;
-
+   }
 #if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
rcu_read_lock();
-   for_each_netdev_rcu(init_net, dev) {
-   if (ipv6_chk_addr(init_net,
+   for_each_netdev_rcu(dev_addr-net, dev) {
+   if (ipv6_chk_addr(dev_addr-net,
  ((struct sockaddr_in6 *) 
addr)-sin6_addr,
  dev, 1)) {
ret = rdma_copy_addr(dev_addr, dev, NULL);
@@ -236,7 +237,7 @@ static int addr4_resolve(struct sockaddr_in *src_in,
fl4.daddr = dst_ip;
fl4.saddr = src_ip;
fl4.flowi4_oif = addr-bound_dev_if;
-   rt = ip_route_output_key(init_net, fl4);
+   rt = ip_route_output_key(addr-net, fl4);
if (IS_ERR(rt)) {
ret = PTR_ERR(rt);
goto out;
@@ -278,12 +279,13 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
fl6.saddr = src_in-sin6_addr;
fl6.flowi6_oif = addr-bound_dev_if;
 
-   dst = ip6_route_output(init_net, NULL, fl6);
+   dst = ip6_route_output(addr-net, NULL, fl6);
if ((ret = dst-error))
goto put;
 
if (ipv6_addr_any(fl6.saddr)) {
-   ret = ipv6_dev_get_saddr(init_net, ip6_dst_idev(dst)-dev,
+   ret = ipv6_dev_get_saddr(addr-net,
+ip6_dst_idev(dst)-dev,
 fl6.daddr, 0, fl6.saddr);
if (ret)
goto put;
@@ -458,7 +460,7 @@ static void resolve_cb(int status, struct sockaddr 
*src_addr,
 }
 
 int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 
*dmac,
-  u16 *vlan_id)
+  u16 *vlan_id, struct net *net)
 {
int ret = 0;
struct rdma_dev_addr dev_addr;
@@ -481,6 +483,7 @@ int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union 
ib_gid *dgid, u8 *dmac,
return ret;
 
memset(dev_addr, 0, sizeof(dev_addr));
+   dev_addr.net = net;
 
ctx.addr = dev_addr;
init_completion(ctx.comp);
@@ -492,7 +495,7 @@ int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union 
ib_gid *dgid, u8 *dmac,
wait_for_completion(ctx.comp);
 
memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
-   dev = dev_get_by_index(init_net, 

[PATCH v2 03/11] IB/core: Pass network namespace as a parameter to relevant functions

2015-04-20 Thread Haggai Eran
From: Guy Shapiro gu...@mellanox.com

Add network namespace parameters for the address related ib_core
functions. The parameter is passed to lower level function, instead of
init_net, so things are done in the correct namespace.

For now pass init_net on every caller.
Callers that will pass init_net permanently are marked with an
appropriate comment.

Signed-off-by: Haggai Eran hagg...@mellanox.com
Signed-off-by: Yotam Kenneth yota...@mellanox.com
Signed-off-by: Shachar Raindel rain...@mellanox.com
Signed-off-by: Guy Shapiro gu...@mellanox.com
---
 drivers/infiniband/core/agent.c   |  4 +++-
 drivers/infiniband/core/cm.c  |  9 +++--
 drivers/infiniband/core/mad_rmpp.c| 10 --
 drivers/infiniband/core/user_mad.c|  4 +++-
 drivers/infiniband/core/verbs.c   | 10 ++
 drivers/infiniband/ulp/srpt/ib_srpt.c |  3 ++-
 include/rdma/ib_verbs.h   | 15 +--
 7 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index f6d29614cb01..539378d64041 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -99,7 +99,9 @@ void agent_send_response(struct ib_mad *mad, struct ib_grh 
*grh,
}
 
agent = port_priv-agent[qpn];
-   ah = ib_create_ah_from_wc(agent-qp-pd, wc, grh, port_num);
+   /* Physical devices (and their MAD replies) always reside in the host
+* network namespace */
+   ah = ib_create_ah_from_wc(agent-qp-pd, wc, grh, port_num, init_net);
if (IS_ERR(ah)) {
dev_err(device-dev, ib_create_ah_from_wc error %ld\n,
PTR_ERR(ah));
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index e28a494e2a3a..5a45cb76c43e 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -290,8 +290,13 @@ static int cm_alloc_response_msg(struct cm_port *port,
struct ib_mad_send_buf *m;
struct ib_ah *ah;
 
+   /* For IB, the network namespace doesn't affect the created address
+* handle, so we use init_net. In the future, RoCE support will
+* require finding a specific network namespace to send the response
+* from. */
ah = ib_create_ah_from_wc(port-mad_agent-qp-pd, mad_recv_wc-wc,
- mad_recv_wc-recv_buf.grh, port-port_num);
+ mad_recv_wc-recv_buf.grh, port-port_num,
+ init_net);
if (IS_ERR(ah))
return PTR_ERR(ah);
 
@@ -346,7 +351,7 @@ static void cm_init_av_for_response(struct cm_port *port, 
struct ib_wc *wc,
av-port = port;
av-pkey_index = wc-pkey_index;
ib_init_ah_from_wc(port-cm_dev-ib_device, port-port_num, wc,
-  grh, av-ah_attr);
+  grh, av-ah_attr, init_net);
 }
 
 static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
diff --git a/drivers/infiniband/core/mad_rmpp.c 
b/drivers/infiniband/core/mad_rmpp.c
index f37878c9c06e..6c1576202965 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -157,8 +157,11 @@ static struct ib_mad_send_buf *alloc_response_msg(struct 
ib_mad_agent *agent,
struct ib_ah *ah;
int hdr_len;
 
+   /* Physical devices (and their MAD replies) always reside in the host
+* network namespace */
ah = ib_create_ah_from_wc(agent-qp-pd, recv_wc-wc,
- recv_wc-recv_buf.grh, agent-port_num);
+ recv_wc-recv_buf.grh, agent-port_num,
+ init_net);
if (IS_ERR(ah))
return (void *) ah;
 
@@ -287,10 +290,13 @@ create_rmpp_recv(struct ib_mad_agent_private *agent,
if (!rmpp_recv)
return NULL;
 
+   /* Physical devices (and their MAD replies) always reside in the host
+* network namespace */
rmpp_recv-ah = ib_create_ah_from_wc(agent-agent.qp-pd,
 mad_recv_wc-wc,
 mad_recv_wc-recv_buf.grh,
-agent-agent.port_num);
+agent-agent.port_num,
+init_net);
if (IS_ERR(rmpp_recv-ah))
goto error;
 
diff --git a/drivers/infiniband/core/user_mad.c 
b/drivers/infiniband/core/user_mad.c
index 928cdd20e2d1..f34c6077759d 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -239,7 +239,9 @@ static void recv_handler(struct ib_mad_agent *agent,
 
ib_init_ah_from_wc(agent-device, agent-port_num,
   mad_recv_wc-wc, mad_recv_wc-recv_buf.grh,
-  ah_attr);
+  ah_attr, init_net);
+  

[PATCH v2 00/11] Add network namespace support in the RDMA-CM

2015-04-20 Thread Haggai Eran
On 4/15/2015 3:39 PM, Doug Ledford wrote:
 For instance, the namespace patches aren't included, and that's at least 
 partially because they didn't apply cleanly any more.

Here's an updated series on top of your tree. I've also included the fix for
IPv4 connections to IPv6 listeners.

Regards,
Haggai

Changes from v1:
- Include patch 1 in this series.
- Rebase for v4.1.

Changes from v0:
- Fix code review comments by Yann
- Rebase on top of linux-3.19

RDMA-CM uses IP based addressing and routing to setup RDMA connections between
hosts. Currently, all of the IP interfaces and addresses used by the RDMA-CM
must reside in the init_net namespace. This restricts the usage of containers
with RDMA to only work with host network namespace (aka the kernel init_net NS
instance).

This patchset allows using network namespaces with the RDMA-CM.

Each RDMA-CM and CM id is keeping a reference to a network namespace.

This reference is based on the process network namespace at the time of the
creation of the object or inherited from the listener.

This network namespace is used to perform all IP and network related
operations. Specifically, the local device lookup, as well as the remote GID
address resolution are done in the context of the RDMA-CM object's namespace.
This allows outgoing connections to reach the right target, even if the same
IP address exists in multiple network namespaces. This can happen if each
network namespace resides on a different pkey.

Additionally, the network namespace is used to split the listener service ID
table. From the user point of view, each network namespace has a unique,
completely independent table of service IDs. This allows running multiple
instances of a single service on the same machine, using containers. To
implement this, the CM layer now parses the IP address from the CM connect
requests, and searches for the matching networking device. The namespace of
the device found is used when looking up the service ID in the listener table.

The functionnality introduced by this series would come into play when the
transport is InfiniBand and IPoIB interfaces are assigned to each namespace.
Multiple IPoIB interfaces can be created and assigned to different RDMA-CM
capable containers, for example using pipework [1].

Full support for RoCE will be introduced in a later stage.

The patches apply against Roland's/Doug's tree for v4.1.

The patchset is structured as follows:

Patch 1 is a resend of patch to fix IPv4 connections to an IPv4/IPv6 listener.

Patches 2 and 4 are relatively trivial API extensions, requiring the callers
of certain ib_addr and ib_core functions to provide a network namespace, as
needed.

Patches 4 and 5 adds the ability to lookup a network namespace according to
the IP address, device and pkey. It finds the matching IPoIB interfaces, and
safely takes a reference on the network namespace before returning to the
caller.

Patch 6 moves the logic that extracts the IP address from a connect request
into the CM layer. This is needed for the upcoming listener lookup by
namespace.

Patch 7 adds support for network namespaces in the CM layer. All callers are
still passing init_net as the namespace, to maintain backward compatibility.
For incoming requests, the namespace of the relevant IPoIB device is used.

Patches 8 and 9 add proper namespace support to the RDMA-CM module.

Patches 10 and 11 add namespace support to the relevant user facing modules in
the IB stack.

[1] https://github.com/jpetazzo/pipework/pull/108

Guy Shapiro (7):
  IB/addr: Pass network namespace as a parameter
  IB/core: Pass network namespace as a parameter to relevant functions
  IB/ipoib: Return IPoIB devices as possible matches to
get_net_device_by_port_pkey_ip
  IB/cm, cma: Move RDMA IP CM private-data parsing code from ib_cma to
ib_cm
  IB/cm: Add network namespace support
  IB/cma: Add support for network namespaces
  IB/ucma: Take the network namespace from the process

Shachar Raindel (1):
  IB/ucm: Add partial support for network namespaces

Yotam Kenneth (3):
  RDMA/CMA: Mark IPv4 addresses correctly when the listener is IPv6
  IB/core: Find the network namespace matching connection parameters
  IB/cma: Separate port allocation to network namespaces

 drivers/infiniband/core/addr.c |  31 +-
 drivers/infiniband/core/agent.c|   4 +-
 drivers/infiniband/core/cm.c   | 287 --
 drivers/infiniband/core/cma.c  | 332 +
 drivers/infiniband/core/device.c   |  57 
 drivers/infiniband/core/mad_rmpp.c |  10 +-
 drivers/infiniband/core/ucm.c  |   4 +-
 drivers/infiniband/core/ucma.c |   4 +-
 drivers/infiniband/core/user_mad.c |   4 +-
 drivers/infiniband/core/verbs.c|  22 +-
 drivers/infiniband/hw/ocrdma/ocrdma_ah.c   |   3 +-
 

[PATCH v2 01/11] RDMA/CMA: Mark IPv4 addresses correctly when the listener is IPv6

2015-04-20 Thread Haggai Eran
From: Yotam Kenneth yota...@mellanox.com

When accepting a new connection with the listener being IPv6, the
family of the new connection is set as IPv6. This causes cma_zero_addr
function to return true on an non-zero address. As a result, the wrong
code path is taken. This causes the connection request to be rejected,
as the RDMA-CM code looks for the wrong type of device.

Since copying the ip address is done in different function depending
on the family (cma_save_ip4_info/cma_save_ip6_info) this is fixed by
hard coding the family of the IP address according to the function.

Signed-off-by: Yotam Kenneth yota...@mellanox.com
Signed-off-by: Or Gerlitz ogerl...@mellanox.com
---
 drivers/infiniband/core/cma.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index d570030d899c..6e5e11ca7702 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -866,12 +866,12 @@ static void cma_save_ip4_info(struct rdma_cm_id *id, 
struct rdma_cm_id *listen_i
 
listen4 = (struct sockaddr_in *) listen_id-route.addr.src_addr;
ip4 = (struct sockaddr_in *) id-route.addr.src_addr;
-   ip4-sin_family = listen4-sin_family;
+   ip4-sin_family = AF_INET;
ip4-sin_addr.s_addr = hdr-dst_addr.ip4.addr;
ip4-sin_port = listen4-sin_port;
 
ip4 = (struct sockaddr_in *) id-route.addr.dst_addr;
-   ip4-sin_family = listen4-sin_family;
+   ip4-sin_family = AF_INET;
ip4-sin_addr.s_addr = hdr-src_addr.ip4.addr;
ip4-sin_port = hdr-port;
 }
@@ -883,12 +883,12 @@ static void cma_save_ip6_info(struct rdma_cm_id *id, 
struct rdma_cm_id *listen_i
 
listen6 = (struct sockaddr_in6 *) listen_id-route.addr.src_addr;
ip6 = (struct sockaddr_in6 *) id-route.addr.src_addr;
-   ip6-sin6_family = listen6-sin6_family;
+   ip6-sin6_family = AF_INET6;
ip6-sin6_addr = hdr-dst_addr.ip6;
ip6-sin6_port = listen6-sin6_port;
 
ip6 = (struct sockaddr_in6 *) id-route.addr.dst_addr;
-   ip6-sin6_family = listen6-sin6_family;
+   ip6-sin6_family = AF_INET6;
ip6-sin6_addr = hdr-src_addr.ip6;
ip6-sin6_port = hdr-port;
 }
-- 
1.7.11.2

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 10/11] IB/ucma: Take the network namespace from the process

2015-04-20 Thread Haggai Eran
From: Guy Shapiro gu...@mellanox.com

Add support for network namespaces from user space. This is done by passing
the network namespace of the process instead of init_net.

Signed-off-by: Haggai Eran hagg...@mellanox.com
Signed-off-by: Yotam Kenneth yota...@mellanox.com
Signed-off-by: Shachar Raindel rain...@mellanox.com
Signed-off-by: Guy Shapiro gu...@mellanox.com
---
 drivers/infiniband/core/ucma.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 2f7fad84f933..0ccdf2b05153 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -42,6 +42,7 @@
 #include linux/slab.h
 #include linux/sysctl.h
 #include linux/module.h
+#include linux/nsproxy.h
 
 #include rdma/rdma_user_cm.h
 #include rdma/ib_marshall.h
@@ -392,7 +393,7 @@ static ssize_t ucma_create_id(struct ucma_file *file, const 
char __user *inbuf,
 
ctx-uid = cmd.uid;
ctx-cm_id = rdma_create_id(ucma_event_handler, ctx, cmd.ps, qp_type,
-   init_net);
+   current-nsproxy-net_ns);
if (IS_ERR(ctx-cm_id)) {
ret = PTR_ERR(ctx-cm_id);
goto err1;
-- 
1.7.11.2

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [RFC,1/8] soc/fman: Add FMan MURAM support

2015-04-20 Thread igal.liber...@freescale.com


Regards,
Igal Liberman.

 -Original Message-
 From: Kumar Gala [mailto:ga...@kernel.crashing.org]
 Sent: Thursday, March 12, 2015 5:57 PM
 To: Liberman Igal-B31950
 Cc: linuxppc-...@lists.ozlabs.org; netdev@vger.kernel.org; linux-
 ker...@vger.kernel.org; Wood Scott-B07421
 Subject: Re: [RFC,1/8] soc/fman: Add FMan MURAM support
 
 
 On Mar 11, 2015, at 12:07 AM, Igal.Liberman igal.liber...@freescale.com
 wrote:
 
  From: Igal Liberman igal.liber...@freescale.com
 
  Add Frame Manager Multi-User RAM support.
 
  Signed-off-by: Igal Liberman igal.liber...@freescale.com
  ---
  drivers/soc/fsl/fman/Kconfig|1 +
  drivers/soc/fsl/fman/Makefile   |5 +-
  drivers/soc/fsl/fman/fm_muram.c |  174
 +++
  drivers/soc/fsl/fman/inc/fm_muram_ext.h |   98 +
  4 files changed, 276 insertions(+), 2 deletions(-) create mode 100644
  drivers/soc/fsl/fman/fm_muram.c create mode 100644
  drivers/soc/fsl/fman/inc/fm_muram_ext.h
 
 
 use lib/genalloc instead of rheap
 

Hi Kumar,
I looked into lib/genalloc allocator.
As far as I see, the genalloc allocator doesn't allow to control the memory 
alignment when you allocate a chunk of memory.
Two important notes regarding MURAM memory:
- The allocated memory chunks should have specific alignment (might be 
different in each chunk).
- The allocations must be efficient, we don't want to waste MURAM due to 
alignment issues.

 - k
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 01/11] linux/time64.h:Introduce the 'struct itimerspec64' for 64bit

2015-04-20 Thread Sergei Shtylyov

Hello.

On 4/20/2015 8:57 AM, Baolin Wang wrote:


This patch introduces the 'struct itimerspec64' for 64bit to replace itimerspec,
and also introduces the conversion methods: itimerspec64_to_itimerspec() and
itimerspec_to_itimerspec64(), that makes itimerspec to ready for 2038 year.


   To not needed here.


Signed-off-by: Baolin Wang baolin.w...@linaro.org


[...]

WBR, Sergei

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Xen-devel] tcp: refine TSO autosizing causes performance regression on Xen

2015-04-20 Thread George Dunlap
On Thu, Apr 16, 2015 at 1:42 PM, Eric Dumazet eric.duma...@gmail.com wrote:
 On Thu, 2015-04-16 at 11:01 +0100, George Dunlap wrote:

 He suggested that after he'd been prodded by 4 more e-mails in which two
 of us guessed what he was trying to get at.  That's what I was
 complaining about.

 My big complain is that I suggested to test to double the sysctl, which
 gave good results.

 Then you provided a patch using a 8x factor. How does that sound ?

 Next time I ask a raise, I should try a 8x factor as well, who knows,
 it might be accepted.

I see.  I chose the value that Stefano had determined had completely
eliminated the overhead.  Doubling the value reduces the overhead to
8%, which should be fine for a short-term fix while we git a proper
mid/long-term fix.

 -George
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] ip_forward: Drop frames with attached skb-sk

2015-04-20 Thread Sebastian Poehn
Initial discussion was:
[FYI] xfrm: Don't lookup sk_policy for timewait sockets

Forwarded frames should not have a socket attached. Especially
tw sockets will lead to panics later-on in the stack.

This was observed with TPROXY assigning a tw socket and broken
policy routing (misconfigured). As a result frame enters
forwarding path instead of input. We cannot solve this in
TPROXY as it cannot know that policy routing is broken.

v2:
Remove useless comment

Signed-off-by: Sebastian Poehn sebastian.po...@gmail.com
---
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 939992c..3674484 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -82,6 +82,9 @@ int ip_forward(struct sk_buff *skb)
if (skb-pkt_type != PACKET_HOST)
goto drop;
 
+   if (unlikely(skb-sk))
+   goto drop;
+
if (skb_warn_if_lro(skb))
goto drop;
 
--

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH linux-next v5] mlx5: wrong page mask if CONFIG_ARCH_DMA_ADDR_T_64BIT enabled for 32Bit architectures

2015-04-20 Thread Eli Cohen
On Wed, Apr 15, 2015 at 04:36:15PM +0800, Honggang Li wrote:
 If CONFIG_ARCH_DMA_ADDR_T_64BIT enabled for x86 systems and physical
 memory is more than 4GB, dma_map_page may return a valid memory
 address which greater than 0x. As a result, the mlx5 device page
 allocator RB tree will be initialized with valid addresses greater than
 0xfff.
 
 However, (addr  PAGE_MASK) set the high four bytes to zeros. So, it's
 impossible for the function, free_4k, to release the pages whose
 addresses greater than 4GB. Memory leaks. And mlx5_ib module can't
 release the pages when user try to remove the module, as a result,
 system hang.
 
 [root@rdma05 root]# dmesg  | grep addr | head
 addr = 3fe384000
 addr  PAGE_MASK =  fe384000
 [root@rdma05 root]# rmmod mlx5_ib    hang on
 
 -- cosnole log -
 mlx5_ib :04:00.0: irq 138 for MSI/MSI-X
   alloc irq_desc for 139 on node -1
   alloc kstat_irqs on node -1
 mlx5_ib :04:00.0: irq 139 for MSI/MSI-X
 :04:00.0:free_4k:221:(pid 1519): page not found
 :04:00.0:free_4k:221:(pid 1519): page not found
 :04:00.0:free_4k:221:(pid 1519): page not found
 :04:00.0:free_4k:221:(pid 1519): page not found
 -- cosnole log -
 
 Fixes: bf0bf77f6519 ('mlx5: Support communicating arbitrary host page size to 
 firmware')
 Signed-off-by: Honggang Li ho...@redhat.com
 ---

Acked-by: Eli Cohen e...@mellanox.com
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V1 net-next] IB/ipoib: Fix ndo_get_iflink

2015-04-20 Thread Or Gerlitz
On Mon, Apr 20, 2015 at 11:16 AM, Haggai Eran hagg...@mellanox.com wrote:
 On 17/04/2015 22:21, David Miller wrote:
 From: Erez Shitrit ere...@mellanox.com
 Date: Thu, 16 Apr 2015 16:34:34 +0300

 Currently, iflink of the parent interface was always accessed, even
 when interface didn't have a parent and hence we crashed there.

 Handle the interface types properly: for a child interface, return
 the ifindex of the parent, for parent interface, return its ifindex.

 For child devices, make sure to set the parent pointer prior to
 invoking register_netdevice(), this allows the new ndo to be called
 by the stack immediately after the child device is registered.

 Fixes: 5aa7add8f14b ('infiniband/ipoib: implement ndo_get_iflink')
 Reported-by: Honggang Li ho...@redhat.com
 Signed-off-by: Erez Shitrit ere...@mellanox.com
 Signed-off-by: Honggang Li ho...@redhat.com

 Applied, thanks.

 Doug, Roland,
 You might want to include this patch in your for-next / for-4.1 trees,
 or merge net-next again. Currently they contain the issue it fixes, and
 it can prevent some systems with IPoIB from booting.

Haggai,


It's upstream by now, pull Linus tree.

Or.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC] [PATCH] FUJITSU Extended Socket network device driver

2015-04-20 Thread Izumi, Taku
This patch adds support for FUJITSU Extended Socket network
device. Extended Socket network device is a shared memory
based high-speed network interface between Extended Partitions of
PRIMEQUEST 2000 series.

#
I know this code needs more refuctoring, but I wanted to post
this code as soon as possible because posting driver code
from scratch is first time for me.
#

Signed-off-by: Taku Izumi izumi.t...@jp.fujitsu.com
---
 drivers/platform/x86/Kconfig |8 +
 drivers/platform/x86/Makefile|2 +
 drivers/platform/x86/fjes/Makefile   |   31 +
 drivers/platform/x86/fjes/fjes.h |   87 ++
 drivers/platform/x86/fjes/fjes_ethtool.c |  135 +++
 drivers/platform/x86/fjes/fjes_hw.c  | 1148 ++
 drivers/platform/x86/fjes/fjes_hw.h  |  353 +++
 drivers/platform/x86/fjes/fjes_main.c| 1525 ++
 drivers/platform/x86/fjes/fjes_regs.h|  139 +++
 9 files changed, 3428 insertions(+)
 create mode 100644 drivers/platform/x86/fjes/Makefile
 create mode 100755 drivers/platform/x86/fjes/fjes.h
 create mode 100755 drivers/platform/x86/fjes/fjes_ethtool.c
 create mode 100755 drivers/platform/x86/fjes/fjes_hw.c
 create mode 100755 drivers/platform/x86/fjes/fjes_hw.h
 create mode 100755 drivers/platform/x86/fjes/fjes_main.c
 create mode 100755 drivers/platform/x86/fjes/fjes_regs.h

diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 9752761..268c7495 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -884,4 +884,12 @@ config PVPANIC
  a paravirtualized device provided by QEMU; it lets a virtual machine
  (guest) communicate panic events to the host.
 
+config FUJITSU_ES
+   tristate FUJITSU Extended Socket Network Device driver
+   depends on ACPI
+   ---help---
+ This driver provides support for Extended Socket network device on
+ Extended Partitioning of FUJITSU PRIMEQUEST 2000 series.
+
+
 endif # X86_PLATFORM_DEVICES
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index f82232b..319eb20 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -58,3 +58,5 @@ obj-$(CONFIG_INTEL_SMARTCONNECT)  += intel-smartconnect.o
 
 obj-$(CONFIG_PVPANIC)   += pvpanic.o
 obj-$(CONFIG_ALIENWARE_WMI)+= alienware-wmi.o
+
+obj-$(CONFIG_FUJITSU_ES)   += fjes/
diff --git a/drivers/platform/x86/fjes/Makefile 
b/drivers/platform/x86/fjes/Makefile
new file mode 100644
index 000..45dc9d3
--- /dev/null
+++ b/drivers/platform/x86/fjes/Makefile
@@ -0,0 +1,31 @@
+
+# 
+# FUJITSU Extended Socket Network Device driver
+# Copyright (c) 2015 FUJITSU LIMITED
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, see http://www.gnu.org/licenses/.
+#
+# The full GNU General Public License is included in this distribution in
+# the file called COPYING.
+#
+
+
+
+#
+# Makefile for the FUJITSU Extended Socket network device driver
+#
+
+obj-$(CONFIG_FUJITSU_ES) += fjes.o
+
+fjes-objs := fjes_main.o fjes_ethtool.o fjes_hw.o
+
diff --git a/drivers/platform/x86/fjes/fjes.h b/drivers/platform/x86/fjes/fjes.h
new file mode 100755
index 000..890f16f
--- /dev/null
+++ b/drivers/platform/x86/fjes/fjes.h
@@ -0,0 +1,87 @@
+/*
+ *  FUJITSU Extended Socket Network Device driver
+ *  Copyright (c) 2015 FUJITSU LIMITED
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, see http://www.gnu.org/licenses/.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called COPYING.
+ *
+ */
+
+
+#ifndef FJES_H_
+#define FJES_H_
+
+#include linux/acpi.h
+#include linux/workqueue.h
+
+#include fjes_hw.h
+
+struct fjes_adapter;
+
+#define FJES_ACPI_SYMBOL   Extended Socket
+
+#define FJES_MAX_QUEUES

Re: [PATCH] Bluetooth: Pre-initialize variables in read_local_oob_ext_data_complete()

2015-04-20 Thread Geert Uytterhoeven
Hi Marcel,

On Fri, Apr 17, 2015 at 10:38 PM, Marcel Holtmann mar...@holtmann.org wrote:
 net/bluetooth/mgmt.c: In function ‘read_local_oob_ext_data_complete’:
 net/bluetooth/mgmt.c:6474: warning: ‘r256’ may be used uninitialized in 
 this function
 net/bluetooth/mgmt.c:6474: warning: ‘h256’ may be used uninitialized in 
 this function
 net/bluetooth/mgmt.c:6474: warning: ‘r192’ may be used uninitialized in 
 this function
 net/bluetooth/mgmt.c:6474: warning: ‘h192’ may be used uninitialized in 
 this function

 While these are false positives, the code can be shortened by
 pre-initializing the hash table pointers and eir_len. This has the side
 effect of killing the compiler warnings.

 can you be a bit specific on which compiler version is this. I fixed one 
 occurrence that seemed valid. However in this case the compiler seems to be 
 just plain stupid. On a gcc 4.9, I am not seeing these for example.

 gcc 4.1.2. As there were too many false positives, these warnings were
 disabled in later versions (throwing away the children with the bad water).

 If you don't like my patch, just drop it. I only look at newly
 introduced warnings
 of this kind anyway.

 I really do not know what is the best solution here. This is a false 
 positive. And I have been looking at this particular code for a warning that 
 was valid, but we missed initially. But these warnings that you are fixing 
 are clearly false positive.

I only sent patches to fix false positives if I think the patches improve the
code. As this is a subjective matter, it's up to you as the maintainer to
decide.

 If this only happens with an old compiler version, I would tend to leave the 
 code as is. Then again, what is the general preferred approach here?

As this is a false positive, it's clearly up to the maintainer to
decide if the patch
improves the code or not.

Thanks!

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say programmer or something like that.
-- Linus Torvalds
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 05/11] IB/ipoib: Return IPoIB devices as possible matches to get_net_device_by_port_pkey_ip

2015-04-20 Thread Haggai Eran
From: Guy Shapiro gu...@mellanox.com

Implement callback that returns network device to ib_core according to
connection parameters. Check the ipoib device and iterate over all child
devices to look for a match.

For each ipoib device we iterate through all upper devices when searching for
a matching IP, in order to support bonding.

Signed-off-by: Guy Shapiro gu...@mellanox.com
Signed-off-by: Haggai Eran hagg...@mellanox.com
Signed-off-by: Yotam Kenneth yota...@mellanox.com
Signed-off-by: Shachar Raindel rain...@mellanox.com
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c | 122 +-
 1 file changed, 121 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c 
b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 7cad4dd87469..89a59a0e17e6 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -48,6 +48,9 @@
 
 #include linux/jhash.h
 #include net/arp.h
+#include net/addrconf.h
+#include linux/inetdevice.h
+#include rdma/ib_cache.h
 
 #define DRV_VERSION 1.0.0
 
@@ -91,11 +94,15 @@ struct ib_sa_client ipoib_sa_client;
 static void ipoib_add_one(struct ib_device *device);
 static void ipoib_remove_one(struct ib_device *device);
 static void ipoib_neigh_reclaim(struct rcu_head *rp);
+static struct net_device *ipoib_get_net_device_by_port_pkey_ip(
+   struct ib_device *dev, u8 port, u16 pkey,
+   struct sockaddr *addr);
 
 static struct ib_client ipoib_client = {
.name   = ipoib,
.add= ipoib_add_one,
-   .remove = ipoib_remove_one
+   .remove = ipoib_remove_one,
+   .get_net_device_by_port_pkey_ip = ipoib_get_net_device_by_port_pkey_ip,
 };
 
 int ipoib_open(struct net_device *dev)
@@ -222,6 +229,119 @@ static int ipoib_change_mtu(struct net_device *dev, int 
new_mtu)
return 0;
 }
 
+static bool ipoib_is_dev_match_addr(struct sockaddr *addr,
+   struct net_device *dev)
+{
+   struct net *net = dev_net(dev);
+
+   if (addr-sa_family == AF_INET) {
+   struct in_device *in_dev = in_dev_get(dev);
+   struct sockaddr_in *addr_in = (struct sockaddr_in *)addr;
+   __be32 ret_addr;
+
+   if (!in_dev)
+   return false;
+
+   ret_addr = inet_confirm_addr(net, in_dev, 0,
+addr_in-sin_addr.s_addr,
+RT_SCOPE_HOST);
+   in_dev_put(in_dev);
+   if (ret_addr)
+   return true;
+   }
+#if IS_ENABLED(CONFIG_IPV6)
+   else if (addr-sa_family == AF_INET6) {
+   struct sockaddr_in6 *addr_in6 = (struct sockaddr_in6 *)addr;
+
+   if (ipv6_chk_addr(net, addr_in6-sin6_addr, dev, 1))
+   return true;
+   }
+#endif
+   return false;
+}
+
+/**
+ * Find a net_device matching the given address, which is an upper device of
+ * the given net_device.
+ * @addr: IP address to look for.
+ * @dev: base IPoIB net_device
+ *
+ * If found, returns the net_device with a reference held. Otherwise return
+ * NULL.
+ */
+static struct net_device *ipoib_get_net_dev_match_addr(struct sockaddr *addr,
+  struct net_device *dev)
+{
+   struct net_device *upper,
+ *result = NULL;
+   struct list_head *iter;
+
+   if (ipoib_is_dev_match_addr(addr, dev)) {
+   dev_hold(dev);
+   return dev;
+   }
+
+   rcu_read_lock();
+   netdev_for_each_all_upper_dev_rcu(dev, upper, iter) {
+   if (ipoib_is_dev_match_addr(addr, upper)) {
+   dev_hold(upper);
+   result = upper;
+   break;
+   }
+   }
+   rcu_read_unlock();
+   return result;
+}
+
+static struct net_device *ipoib_get_net_device_by_port_pkey_ip(
+   struct ib_device *dev, u8 port, u16 pkey, struct sockaddr *addr)
+{
+   struct ipoib_dev_priv *priv;
+   struct list_head *dev_list;
+   u16 pkey_index;
+
+   ib_find_cached_pkey(dev, port, pkey, pkey_index);
+   if (pkey_index == (u16)-1)
+   return NULL;
+
+   if (rdma_node_get_transport(dev-node_type) != RDMA_TRANSPORT_IB)
+   return NULL;
+
+   dev_list = ib_get_client_data(dev, ipoib_client);
+   if (!dev_list)
+   return NULL;
+
+   list_for_each_entry(priv, dev_list, list) {
+   struct net_device *net_dev = NULL;
+   struct ipoib_dev_priv *child_priv;
+
+   if (priv-port != port)
+   continue;
+
+   if (priv-pkey_index == pkey_index) {
+   net_dev = ipoib_get_net_dev_match_addr(addr, priv-dev);
+   if (net_dev)
+   return net_dev;
+   }
+
+

[PATCH v2 08/11] IB/cma: Separate port allocation to network namespaces

2015-04-20 Thread Haggai Eran
From: Yotam Kenneth yota...@mellanox.com

Keep a radix-tree for the network namespaces we support for each port-space.
Dynamically allocate idr for network namespace upon first bind request for a
port in the (ps, net) tuple.
Destroy the idr when the (ps, net) tuple does not contain any bounded ports.

This patch is internal infrastructure work for the following patch. In
this patch, init_net is statically used as the network namespace for
the new port-space API.

The radix-tree is protected under the same locking that protects the
rest of the port space data. This locking is practically a big, static
mutex lock for the entire module.

Signed-off-by: Haggai Eran hagg...@mellanox.com
Signed-off-by: Yotam Kenneth yota...@mellanox.com
Signed-off-by: Shachar Raindel rain...@mellanox.com
Signed-off-by: Guy Shapiro gu...@mellanox.com
---
 drivers/infiniband/core/cma.c | 122 ++
 1 file changed, 99 insertions(+), 23 deletions(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 1ce84a03c883..022b0d0a51cc 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -39,11 +39,13 @@
 #include linux/mutex.h
 #include linux/random.h
 #include linux/idr.h
+#include linux/radix-tree.h
 #include linux/inetdevice.h
 #include linux/slab.h
 #include linux/module.h
 #include net/route.h
 
+#include net/netns/hash.h
 #include net/tcp.h
 #include net/ipv6.h
 
@@ -80,10 +82,83 @@ static LIST_HEAD(dev_list);
 static LIST_HEAD(listen_any_list);
 static DEFINE_MUTEX(lock);
 static struct workqueue_struct *cma_wq;
-static DEFINE_IDR(tcp_ps);
-static DEFINE_IDR(udp_ps);
-static DEFINE_IDR(ipoib_ps);
-static DEFINE_IDR(ib_ps);
+static RADIX_TREE(tcp_ps, GFP_KERNEL);
+static RADIX_TREE(udp_ps, GFP_KERNEL);
+static RADIX_TREE(ipoib_ps, GFP_KERNEL);
+static RADIX_TREE(ib_ps, GFP_KERNEL);
+
+static LIST_HEAD(idrs_list);
+
+struct idr_ll {
+   unsigned net_val;
+   struct net *net;
+   struct radix_tree_root *ps;
+   struct idr idr;
+};
+
+static void zap_ps_idr(struct idr_ll *idr_ll)
+{
+   radix_tree_delete(idr_ll-ps, idr_ll-net_val);
+   idr_destroy(idr_ll-idr);
+   kfree(idr_ll);
+}
+
+static int cma_ps_alloc(struct radix_tree_root *ps, struct net *net, void *ptr,
+   int snum)
+{
+   struct idr_ll *idr_ll;
+   int err;
+   int res;
+
+   idr_ll = radix_tree_lookup(ps, net_hash_mix(net));
+   if (!idr_ll) {
+   idr_ll = kmalloc(sizeof(*idr_ll), GFP_KERNEL);
+   if (!idr_ll)
+   return -ENOMEM;
+   idr_init(idr_ll-idr);
+   idr_ll-net_val = net_hash_mix(net);
+   idr_ll-net = net;
+   idr_ll-ps = ps;
+   err = radix_tree_insert(ps, idr_ll-net_val, idr_ll);
+   if (err) {
+   idr_destroy(idr_ll-idr);
+   kfree(idr_ll);
+   return err;
+   }
+   }
+   res = idr_alloc(idr_ll-idr, ptr, snum, snum + 1, GFP_KERNEL);
+   if (unlikely((res  0)  idr_is_empty(idr_ll-idr))) {
+   zap_ps_idr(idr_ll);
+   return res;
+   }
+   return res;
+}
+
+static void *cma_ps_find(struct radix_tree_root *ps, struct net *net, int snum)
+{
+   struct idr_ll *idr_ll;
+
+   idr_ll = radix_tree_lookup(ps, net_hash_mix(net));
+   if (!idr_ll)
+   return NULL;
+   return idr_find(idr_ll-idr, snum);
+}
+
+static void cma_ps_remove(struct radix_tree_root *ps, struct net *net, int 
snum)
+{
+   struct idr_ll *idr_ll;
+
+   idr_ll = radix_tree_lookup(ps, net_hash_mix(net));
+   if (unlikely(!idr_ll)) {
+   WARN(1, cma_ps_removed can't find expected net ns 0x%lx\n,
+(unsigned long)net);
+   return;
+   }
+   idr_remove(idr_ll-idr, snum);
+   if (idr_is_empty(idr_ll-idr)) {
+   zap_ps_idr(idr_ll);
+   }
+}
 
 struct cma_device {
struct list_headlist;
@@ -94,9 +169,9 @@ struct cma_device {
 };
 
 struct rdma_bind_list {
-   struct idr  *ps;
-   struct hlist_head   owners;
-   unsigned short  port;
+   struct radix_tree_root  *ps;
+   struct hlist_head   owners;
+   unsigned short  port;
 };
 
 enum {
@@ -885,7 +960,7 @@ static void cma_release_port(struct rdma_id_private 
*id_priv)
mutex_lock(lock);
hlist_del(id_priv-node);
if (hlist_empty(bind_list-owners)) {
-   idr_remove(bind_list-ps, bind_list-port);
+   cma_ps_remove(bind_list-ps, init_net, bind_list-port);
kfree(bind_list);
}
mutex_unlock(lock);
@@ -2198,8 +2273,8 @@ static void cma_bind_port(struct rdma_bind_list 
*bind_list,
hlist_add_head(id_priv-node, bind_list-owners);
 }
 
-static int cma_alloc_port(struct idr *ps, struct rdma_id_private 

[PATCH v2 06/11] IB/cm, cma: Move RDMA IP CM private-data parsing code from ib_cma to ib_cm

2015-04-20 Thread Haggai Eran
From: Guy Shapiro gu...@mellanox.com

When receiving a connection request, ib_cm needs to associate the request with
a network namespace. To do this, it needs to know the request's destination
IP. For this the RDMA IP CM packet formatting functionality needs to be
exposed to ib_cm.

This patch merely moves the RDMA IP CM data formatting and parsing functions
to be part of ib_cm. The following patch will utilize the new knowledge to
look-up the appropriate namespace. Each namespace maintains an independent
table of RDMA CM service IDs, allowing isolation and separation between the
network namespaces.

When creating a new incoming connection ID, the code in cm_save_ip_info can no
longer rely on the listener's private data to find the port number, so it
reads it from the requested service ID. This required saving the service ID in
cm_format_paths_from_req.

Signed-off-by: Guy Shapiro gu...@mellanox.com
Signed-off-by: Haggai Eran hagg...@mellanox.com
Signed-off-by: Yotam Kenneth yota...@mellanox.com
Signed-off-by: Shachar Raindel rain...@mellanox.com
---
 drivers/infiniband/core/cm.c  | 156 +++
 drivers/infiniband/core/cma.c | 166 +-
 include/rdma/ib_cm.h  |  56 ++
 3 files changed, 230 insertions(+), 148 deletions(-)

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 5a45cb76c43e..efc5cffb675a 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -51,6 +51,7 @@
 
 #include rdma/ib_cache.h
 #include rdma/ib_cm.h
+#include rdma/ib.h
 #include cm_msgs.h
 
 MODULE_AUTHOR(Sean Hefty);
@@ -701,6 +702,159 @@ static void cm_reject_sidr_req(struct cm_id_private 
*cm_id_priv,
ib_send_cm_sidr_rep(cm_id_priv-id, param);
 }
 
+int cm_format_hdr(void *hdr, int family,
+ struct sockaddr *src_addr,
+ struct sockaddr *dst_addr)
+{
+   struct cm_hdr *cm_hdr;
+
+   cm_hdr = hdr;
+   cm_hdr-cm_version = RDMA_IP_CM_VERSION;
+   if (family == AF_INET) {
+   struct sockaddr_in *src4, *dst4;
+
+   src4 = (struct sockaddr_in *)src_addr;
+   dst4 = (struct sockaddr_in *)dst_addr;
+
+   cm_set_ip_ver(cm_hdr, 4);
+   cm_hdr-src_addr.ip4.addr = src4-sin_addr.s_addr;
+   cm_hdr-dst_addr.ip4.addr = dst4-sin_addr.s_addr;
+   cm_hdr-port = src4-sin_port;
+   } else if (family == AF_INET6) {
+   struct sockaddr_in6 *src6, *dst6;
+
+   src6 = (struct sockaddr_in6 *)src_addr;
+   dst6 = (struct sockaddr_in6 *)dst_addr;
+
+   cm_set_ip_ver(cm_hdr, 6);
+   cm_hdr-src_addr.ip6 = src6-sin6_addr;
+   cm_hdr-dst_addr.ip6 = dst6-sin6_addr;
+   cm_hdr-port = src6-sin6_port;
+   }
+   return 0;
+}
+EXPORT_SYMBOL(cm_format_hdr);
+
+static void cm_save_ib_info(struct sockaddr *src_addr,
+   struct sockaddr *dst_addr,
+   struct ib_sa_path_rec *path)
+{
+   struct sockaddr_ib  *ib;
+
+   if (src_addr) {
+   ib = (struct sockaddr_ib *)src_addr;
+   ib-sib_family = AF_IB;
+   ib-sib_pkey = path-pkey;
+   ib-sib_flowinfo = path-flow_label;
+   memcpy(ib-sib_addr, path-sgid, 16);
+   ib-sib_sid = path-service_id;
+   ib-sib_sid_mask = cpu_to_be64(0xULL);
+   ib-sib_scope_id = 0;
+   }
+   if (dst_addr) {
+   ib = (struct sockaddr_ib *)dst_addr;
+   ib-sib_family = AF_IB;
+   ib-sib_pkey = path-pkey;
+   ib-sib_flowinfo = path-flow_label;
+   memcpy(ib-sib_addr, path-dgid, 16);
+   }
+}
+
+static void cm_save_ip6_info(struct sockaddr *src_addr,
+struct sockaddr *dst_addr,
+struct cm_hdr *hdr,
+__be16 local_port)
+{
+   struct sockaddr_in6 *ip6;
+
+   if (src_addr) {
+   ip6 = (struct sockaddr_in6 *)src_addr;
+   ip6-sin6_family = AF_INET6;
+   ip6-sin6_addr = hdr-dst_addr.ip6;
+   ip6-sin6_port = local_port;
+   }
+
+   if (dst_addr) {
+   ip6 = (struct sockaddr_in6 *)dst_addr;
+   ip6-sin6_family = AF_INET6;
+   ip6-sin6_addr = hdr-src_addr.ip6;
+   ip6-sin6_port = hdr-port;
+   }
+}
+
+static void cm_save_ip4_info(struct sockaddr *src_addr,
+struct sockaddr *dst_addr,
+struct cm_hdr *hdr,
+__be16 local_port)
+{
+   struct sockaddr_in *ip4;
+
+   if (src_addr) {
+   ip4 = (struct sockaddr_in *)src_addr;
+   ip4-sin_family = AF_INET;
+   ip4-sin_addr.s_addr = hdr-dst_addr.ip4.addr;
+   ip4-sin_port = local_port;

[PATCH v2 07/11] IB/cm: Add network namespace support

2015-04-20 Thread Haggai Eran
From: Guy Shapiro gu...@mellanox.com

Add namespace support to the IB-CM layer.

- Each CM-ID now has a network namespace it is associated with, assigned at
  creation. This namespace is used as needed during subsequent action on the
  CM-ID or related objects.

- All of the relevant calls to ib_addr and ib_core were updated to use the
  namespace from the CM-ID. External APIs were extended as needed to allow
  specifying the namespace where relevant.

- The listening service ID table is now also indexed by the CM-ID namespace.

- For incoming connection requests, we use the connection parameters to select
  namespace. The namespace is matched when looking for listening service ID.

To preserve current behavior pass init_net to ib_cm wherever network namespace
function parameters were added.

The ib_cm_create_id interface now takes a reference to the relevant network
namespace. CM-IDs created by accepting a connection for a listening CM-ID will
also take a reference to the namespace. When the ID is destroyed, the
namespace reference is released.

Signed-off-by: Guy Shapiro gu...@mellanox.com
Signed-off-by: Haggai Eran hagg...@mellanox.com
Signed-off-by: Yotam Kenneth yota...@mellanox.com
Signed-off-by: Shachar Raindel rain...@mellanox.com
---
 drivers/infiniband/core/cm.c| 124 
 drivers/infiniband/core/cma.c   |   8 ++-
 drivers/infiniband/core/ucm.c   |   3 +-
 drivers/infiniband/ulp/ipoib/ipoib_cm.c |  21 +-
 drivers/infiniband/ulp/srp/ib_srp.c |   2 +-
 drivers/infiniband/ulp/srpt/ib_srpt.c   |   2 +-
 include/rdma/ib_cm.h|   7 +-
 7 files changed, 130 insertions(+), 37 deletions(-)

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index efc5cffb675a..75c6ac9a4aee 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -241,6 +241,8 @@ struct cm_id_private {
u8 service_timeout;
u8 target_ack_delay;
 
+   struct net *net; /* A network namespace that the ID belongs to */
+
struct list_head work_list;
atomic_t work_count;
 };
@@ -347,12 +349,13 @@ static void cm_set_private_data(struct cm_id_private 
*cm_id_priv,
 }
 
 static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
-   struct ib_grh *grh, struct cm_av *av)
+   struct ib_grh *grh, struct cm_av *av,
+   struct net *net)
 {
av-port = port;
av-pkey_index = wc-pkey_index;
ib_init_ah_from_wc(port-cm_dev-ib_device, port-port_num, wc,
-  grh, av-ah_attr, init_net);
+  grh, av-ah_attr, net);
 }
 
 static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
@@ -521,10 +524,15 @@ static struct cm_id_private * cm_insert_listen(struct 
cm_id_private *cm_id_priv)
if ((cur_cm_id_priv-id.service_mask  service_id) ==
(service_mask  cur_cm_id_priv-id.service_id) 
(cm_id_priv-id.device == cur_cm_id_priv-id.device) 
-   !data_cmp)
+   !data_cmp 
+   net_eq(cm_id_priv-net, cur_cm_id_priv-net))
return cur_cm_id_priv;
 
-   if (cm_id_priv-id.device  cur_cm_id_priv-id.device)
+   if (cm_id_priv-net  cur_cm_id_priv-net)
+   link = (*link)-rb_left;
+   else if (cm_id_priv-net  cur_cm_id_priv-net)
+   link = (*link)-rb_right;
+   else if (cm_id_priv-id.device  cur_cm_id_priv-id.device)
link = (*link)-rb_left;
else if (cm_id_priv-id.device  cur_cm_id_priv-id.device)
link = (*link)-rb_right;
@@ -544,7 +552,8 @@ static struct cm_id_private * cm_insert_listen(struct 
cm_id_private *cm_id_priv)
 
 static struct cm_id_private * cm_find_listen(struct ib_device *device,
 __be64 service_id,
-u8 *private_data)
+u8 *private_data,
+struct net *net)
 {
struct rb_node *node = cm.listen_service_table.rb_node;
struct cm_id_private *cm_id_priv;
@@ -556,10 +565,14 @@ static struct cm_id_private * cm_find_listen(struct 
ib_device *device,
   cm_id_priv-compare_data);
if ((cm_id_priv-id.service_mask  service_id) ==
 cm_id_priv-id.service_id 
-   (cm_id_priv-id.device == device)  !data_cmp)
+   (cm_id_priv-id.device == device)  !data_cmp 
+   net_eq(cm_id_priv-net, net))
return cm_id_priv;
-
-   if (device  cm_id_priv-id.device)
+   if (net  cm_id_priv-net)
+   

[PATCH v2 09/11] IB/cma: Add support for network namespaces

2015-04-20 Thread Haggai Eran
From: Guy Shapiro gu...@mellanox.com

Add support for network namespaces in the ib_cma module. This is
accomplished by:

1. Adding network namespace parameter for rdma_create_id. This parameter is used
   to populate the network namespace field in rdma_id_private. rdma_create_id
   keeps a reference on the network namespace.
2. Using the network namespace from the rdma_id instead of init_net inside of
   ib_cma.
3. Decrementing the reference count for the appropriate network namespace when
   calling rdma_destroy_id.

In order to preserve the current behavior init_net is passed when calling from
other modules.

Signed-off-by: Guy Shapiro gu...@mellanox.com
Signed-off-by: Haggai Eran hagg...@mellanox.com
Signed-off-by: Yotam Kenneth yota...@mellanox.com
Signed-off-by: Shachar Raindel rain...@mellanox.com
---
 drivers/infiniband/core/cma.c  | 52 +-
 drivers/infiniband/core/ucma.c |  3 +-
 drivers/infiniband/ulp/iser/iser_verbs.c   |  2 +-
 drivers/infiniband/ulp/isert/ib_isert.c|  2 +-
 .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h|  4 +-
 include/rdma/rdma_cm.h |  6 ++-
 net/9p/trans_rdma.c|  2 +-
 net/rds/ib.c   |  2 +-
 net/rds/ib_cm.c|  2 +-
 net/rds/iw.c   |  2 +-
 net/rds/iw_cm.c|  2 +-
 net/rds/rdma_transport.c   |  2 +-
 net/sunrpc/xprtrdma/svc_rdma_transport.c   |  2 +-
 net/sunrpc/xprtrdma/verbs.c|  3 +-
 14 files changed, 52 insertions(+), 34 deletions(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 022b0d0a51cc..9ea42fe2853b 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -540,7 +540,8 @@ static int cma_disable_callback(struct rdma_id_private 
*id_priv,
 
 struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
  void *context, enum rdma_port_space ps,
- enum ib_qp_type qp_type)
+ enum ib_qp_type qp_type,
+ struct net *net)
 {
struct rdma_id_private *id_priv;
 
@@ -562,7 +563,7 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler 
event_handler,
INIT_LIST_HEAD(id_priv-listen_list);
INIT_LIST_HEAD(id_priv-mc_list);
get_random_bytes(id_priv-seq_num, sizeof id_priv-seq_num);
-   id_priv-id.route.addr.dev_addr.net = init_net;
+   id_priv-id.route.addr.dev_addr.net = get_net(net);
 
return id_priv-id;
 }
@@ -689,7 +690,7 @@ static int cma_modify_qp_rtr(struct rdma_id_private 
*id_priv,
rdma_port_get_link_layer(id_priv-id.device, id_priv-id.port_num)
== IB_LINK_LAYER_ETHERNET) {
ret = rdma_addr_find_smac_by_sgid(sgid, qp_attr.smac, NULL,
- init_net);
+   id_priv-id.route.addr.dev_addr.net);
 
if (ret)
goto out;
@@ -953,6 +954,7 @@ static void cma_cancel_operation(struct rdma_id_private 
*id_priv,
 static void cma_release_port(struct rdma_id_private *id_priv)
 {
struct rdma_bind_list *bind_list = id_priv-bind_list;
+   struct net *net = id_priv-id.route.addr.dev_addr.net;
 
if (!bind_list)
return;
@@ -960,7 +962,7 @@ static void cma_release_port(struct rdma_id_private 
*id_priv)
mutex_lock(lock);
hlist_del(id_priv-node);
if (hlist_empty(bind_list-owners)) {
-   cma_ps_remove(bind_list-ps, init_net, bind_list-port);
+   cma_ps_remove(bind_list-ps, net, bind_list-port);
kfree(bind_list);
}
mutex_unlock(lock);
@@ -1029,6 +1031,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
cma_deref_id(id_priv-id.context);
 
kfree(id_priv-id.route.path_rec);
+   put_net(id_priv-id.route.addr.dev_addr.net);
kfree(id_priv);
 }
 EXPORT_SYMBOL(rdma_destroy_id);
@@ -1156,7 +1159,8 @@ static struct rdma_id_private *cma_new_conn_id(struct 
rdma_cm_id *listen_id,
int ret;
 
id = rdma_create_id(listen_id-event_handler, listen_id-context,
-   listen_id-ps, ib_event-param.req_rcvd.qp_type);
+   listen_id-ps, ib_event-param.req_rcvd.qp_type,
+   listen_id-route.addr.dev_addr.net);
if (IS_ERR(id))
return NULL;
 
@@ -1201,10 +1205,11 @@ static struct rdma_id_private *cma_new_udp_id(struct 
rdma_cm_id *listen_id,
 {
struct rdma_id_private *id_priv;
struct rdma_cm_id *id;
+   struct net *net = listen_id-route.addr.dev_addr.net;
int ret;
 
id = rdma_create_id(listen_id-event_handler, 

[PATCH v2 11/11] IB/ucm: Add partial support for network namespaces

2015-04-20 Thread Haggai Eran
From: Shachar Raindel rain...@mellanox.com

It is impossible to completely support network namespaces for UCM, as
we cannot identify the target IPoIB device. However, we add support
which will work if the user is following the IB-Spec Annex 11 (RDMA IP
CM Services) with the service ID and private data formatting.

Signed-off-by: Haggai Eran hagg...@mellanox.com
Signed-off-by: Yotam Kenneth yota...@mellanox.com
Signed-off-by: Shachar Raindel rain...@mellanox.com
Signed-off-by: Guy Shapiro gu...@mellanox.com
---
 drivers/infiniband/core/ucm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 9604ab068984..424421091dae 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -45,6 +45,7 @@
 #include linux/idr.h
 #include linux/mutex.h
 #include linux/slab.h
+#include linux/nsproxy.h
 
 #include asm/uaccess.h
 
@@ -490,7 +491,7 @@ static ssize_t ib_ucm_create_id(struct ib_ucm_file *file,
ctx-uid = cmd.uid;
ctx-cm_id = ib_create_cm_id(file-device-ib_dev,
 ib_ucm_event_handler, ctx,
-init_net);
+current-nsproxy-net_ns);
if (IS_ERR(ctx-cm_id)) {
result = PTR_ERR(ctx-cm_id);
goto err1;
-- 
1.7.11.2

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 04/11] IB/core: Find the network namespace matching connection parameters

2015-04-20 Thread Haggai Eran
From: Yotam Kenneth yota...@mellanox.com

In the case of IPoIB, and maybe in other cases, the network device is
managed by an upper-layer protocol (ULP). In order to expose this
network device to other users of the IB device, let ULPs implement
a callback that returns network device according to connection parameters.

The IB device and port, together with the P_Key and the IP address should be
enough to uniquely identify the ULP net device.

This function is passed to ib_core as part of the ib_client
registration.

Using this functionality, add a way to get the network namespace
corresponding to a work completion. This is needed so that responses to CM
requests can be sent from the same network namespace as the request.

Signed-off-by: Haggai Eran hagg...@mellanox.com
Signed-off-by: Yotam Kenneth yota...@mellanox.com
Signed-off-by: Shachar Raindel rain...@mellanox.com
Signed-off-by: Guy Shapiro gu...@mellanox.com
---
 drivers/infiniband/core/device.c | 57 
 include/rdma/ib_verbs.h  | 29 
 2 files changed, 86 insertions(+)

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 18c1ece765f2..2f06be5b0b59 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -38,6 +38,7 @@
 #include linux/slab.h
 #include linux/init.h
 #include linux/mutex.h
+#include linux/netdevice.h
 #include rdma/rdma_netlink.h
 
 #include core_priv.h
@@ -733,6 +734,62 @@ int ib_find_pkey(struct ib_device *device,
 }
 EXPORT_SYMBOL(ib_find_pkey);
 
+static struct net_device *ib_get_net_dev_by_port_pkey_ip(struct ib_device *dev,
+u8 port,
+u16 pkey,
+struct sockaddr *addr)
+{
+   struct net_device *ret = NULL;
+   struct ib_client *client;
+
+   mutex_lock(device_mutex);
+   list_for_each_entry(client, client_list, list)
+   if (client-get_net_device_by_port_pkey_ip) {
+   ret = client-get_net_device_by_port_pkey_ip(dev, port,
+pkey,
+addr);
+   if (ret)
+   break;
+   }
+
+   mutex_unlock(device_mutex);
+   return ret;
+}
+
+struct net *ib_get_net_ns_by_port_pkey_ip(struct ib_device *dev,
+ u8 port,
+ u16 pkey,
+ struct sockaddr *addr)
+{
+   struct net_device *ndev = NULL;
+   struct net *ns;
+
+   switch (rdma_port_get_link_layer(dev, port)) {
+   case IB_LINK_LAYER_INFINIBAND:
+   if (!addr)
+   goto not_found;
+   ndev = ib_get_net_dev_by_port_pkey_ip(dev, port, pkey, addr);
+   break;
+   default:
+   goto not_found;
+   }
+
+   if (!ndev)
+   goto not_found;
+
+   rcu_read_lock();
+   ns = maybe_get_net(dev_net(ndev));
+   dev_put(ndev);
+   rcu_read_unlock();
+   if (!ns)
+   goto not_found;
+   return ns;
+
+not_found:
+   return get_net(init_net);
+}
+EXPORT_SYMBOL(ib_get_net_ns_by_port_pkey_ip);
+
 static int __init ib_core_init(void)
 {
int ret;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index f4a85decc60f..74b239410562 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1683,6 +1683,21 @@ struct ib_client {
void (*add)   (struct ib_device *);
void (*remove)(struct ib_device *);
 
+   /* Returns the net_dev belonging to this ib_client and matching the
+* given parameters.
+* @dev:An RDMA device that the net_dev use for communication.
+* @port:   A physical port number on the RDMA device.
+* @pkey:   P_Key that the net_dev uses if applicable.
+* @addr:   An IP address the net_dev is configured with.
+*
+* An ib_client that implements a net_dev on top of RDMA devices
+* (such as IP over IB) should implement this callback, allowing the
+* rdma_cm module to find the right net_dev for a given request. */
+   struct net_device *(*get_net_device_by_port_pkey_ip)(
+   struct ib_device *dev,
+   u8 port,
+   u16 pkey,
+   struct sockaddr *addr);
struct list_head list;
 };
 
@@ -2679,4 +2694,18 @@ static inline int ib_check_mr_access(int flags)
 int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
   struct ib_mr_status *mr_status);
 
+/**
+ * ib_get_net_ns_by_port_pkey_ip() - Return the appropriate net namespace
+ * for a received CM request
+ * @dev:   

Re: [PATCH v2 00/11] Add network namespace support in the RDMA-CM

2015-04-20 Thread Steve Wise


Hey Haggai,

Did you check for changes needed in drivers/infiniband/core/iwcm.c? I 
notice that it uses init_net here:


static int __init iw_cm_init(void)
{
iwcm_wq = create_singlethread_workqueue(iw_cm_wq);
if (!iwcm_wq)
return -ENOMEM;

iwcm_ctl_table_hdr = register_net_sysctl(init_net, net/iw_cm,
 iwcm_ctl_table);
if (!iwcm_ctl_table_hdr) {
pr_err(iw_cm: couldn't register sysctl paths\n);
destroy_workqueue(iwcm_wq);
return -ENOMEM;
}

return 0;
}

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHSET] printk, netconsole: implement reliable netconsole

2015-04-20 Thread Tejun Heo
Hello, Rob.

On Sun, Apr 19, 2015 at 02:25:09AM -0500, Rob Landley wrote:
 If you have two machines plugged into a hub, and that's _all_ that's
 plugged in, packets should never get dropped. This was the original
 use case of netconsole was that the sender and the receiver were
 plugged into the same router.

Development aid on local network hasn't been the only use case for a
very long time now.  I haven't seen too many large scale setups and
two of them were using netconsole as a way to collect kernel messages
cluster-wide and having issues with lost messages.  One was running it
over a separate lower speed network from the main one which they used
for most managerial tasks including deployment and packet losses
weren't that unusual.

The other is running on the same network but the log collector isn't
per-rack so the packets end up getting routed through congested parts
of the network again experiencing messages losses.

 So are you trying to program around a problem you've actually _seen_,
 or are you attempting to reinvent TCP/IP yet again based on top of UDP
 (Drink!) because of a purely theoretical issue?

At larger scale, the problem is very real.  Let's forget about the
reliability part.  The main thing is being able to identify message
sequences so that the receiver can put the message streams back
together.

That said, once that's there, whether the reliability part is done
with TCP doesn't make that much of difference as it'd still need to
put back the two message streams together, but again this doesn't
matter.  Let's just ignore this part.

  printk already keeps log metadata which contains enough information to
  make netconsole reliable.  This patchset does the followings.
 
 Adds a giant amount of complexity without quite explaining why.

The only signficant complexity is on the receiver side and it doesn't
even have to be in the kernel.  CON_EXTENDED and emitting extended
messages are pretty straight-forward changes.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Problem with patch make nlmsg_end() and genlmsg_end() void

2015-04-20 Thread David Woodhouse
On Wed, 2015-04-08 at 15:12 +0100, David Woodhouse wrote:
 On Wed, 2015-04-08 at 15:08 +0200, Johannes Berg wrote:
  Additionally, the failure mode of this was the process running out of
  memory due to receiving the same results over and over again - does that
  happen for you? It seems it was stuck in recvmsg(), but that may just be
  a side effect of happening to interrupt at that point?
 
 No, strace shows it's just sitting in recvmsg().
 
 As I said, I'm not *sure* it's caused by the same commit; bisecting is
 distinctly non-trivial. It just seemed likely.

FWIW I went back to the Fedora 3.19 kernel for a week and it didn't
show up again. After rebooting to 4.0 earlier today, it's happened
already.

I'll see if I can find a more reliable way of reproducing it, which
will make it slightly saner to try bisecting.


-- 
dwmw2


smime.p7s
Description: S/MIME cryptographic signature


Re: [PATCH] etherdevice: Add ether_addr_copy_unaligned

2015-04-20 Thread David Miller
From: Mateusz Kulikowski mateusz.kulikow...@gmail.com
Date: Sun, 19 Apr 2015 23:39:37 +0200

 Some drivers require copying unaligned ethernet addresses.
 Using memcpy() causes checkpatch warnings and may cause
 regressions (someone will fix alignment of packed structure)
 
 Signed-off-by: Mateusz Kulikowski mateusz.kulikow...@gmail.com

I'd rather see something like this submitted in a patch series alongside
some actual uses.

So I'm tossing this for now.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net-next 0/2] net: Fix hw csum failure message flood for ppp tunnel

2015-04-20 Thread David Miller

Your postings seem to have trouble reaching the list, and therefore patchwork
as well.

Can you try one more time?

Otherwise I'm the only person seeing these patches, which is kinda pointless,
especially since you would like this patch series to get some testing.

Thanks.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] ip_forward: Drop frames with attached skb-sk

2015-04-20 Thread David Miller
From: Sebastian Poehn sebastian.po...@gmail.com
Date: Mon, 20 Apr 2015 09:19:20 +0200

 Initial discussion was:
 [FYI] xfrm: Don't lookup sk_policy for timewait sockets
 
 Forwarded frames should not have a socket attached. Especially
 tw sockets will lead to panics later-on in the stack.
 
 This was observed with TPROXY assigning a tw socket and broken
 policy routing (misconfigured). As a result frame enters
 forwarding path instead of input. We cannot solve this in
 TPROXY as it cannot know that policy routing is broken.
 
 v2:
 Remove useless comment
 
 Signed-off-by: Sebastian Poehn sebastian.po...@gmail.com

Applied and queued up for -stable, thanks Sebastian.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] pppoe: Lacks DST MAC address check

2015-04-20 Thread David Miller
From: Joakim Tjernlund joakim.tjernl...@transmode.se
Date: Sat, 18 Apr 2015 11:53:14 +0200

 A pppoe session is identified by its session ID and MAC address.
 Currently pppoe does not check if the received pkg has the correct
 MAC address. This is a problem when the eth I/F is in promisc mode
 as then any DST MAC address is accepted.

Please read Documentation/SubmittingPatches in the source tree, you
need to provide a proper signoff.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/7] turn Makefile more distribution friendly

2015-04-20 Thread Stephen Hemminger
On Mon, 13 Apr 2015 16:00:56 +0200
Pavel Šimerda pav...@pavlix.net wrote:

 From: Pavel Šimerda psime...@redhat.com
 
 Changes:
 
  * Accept directory settings from environment.
  * Remove redundant ROOTDIR variable.
  * Set KERNEL_INCLUDE default to '/usr/include'.
  * Use CFLAGS from environemnt.
 
 Note: In the long term it might be better to improve the configure
 script to generate those parts of the Makefile in a manner similar
 to autoconf. It might be even practical to autotoolize the package.
 
 Signed-off-by: Pavel Šimerda psime...@redhat.com

I will take this part.
But don't want to start iproute2 down the autoconf/autotool sink hole.

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH iproute2] ipxfrm: wrong nl msg sent on deleteall cmd

2015-04-20 Thread Stephen Hemminger
On Wed, 15 Apr 2015 14:00:53 +0200
Nicolas Dichtel nicolas.dich...@6wind.com wrote:

 XFRM netlink family is independent from the route netlink family. It's wrong
 to call rtnl_wilddump_request(), because it will add a 'struct ifinfomsg' into
 the header and the kernel will complain (at least for xfrm state):
 
 netlink: 24 bytes leftover after parsing attributes in process `ip'.
 
 Reported-by: Gregory Hoggarth gregory.hogga...@alliedtelesis.co.nz
 Signed-off-by: Nicolas Dichtel nicolas.dich...@6wind.com

Applied thanks

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 02/11] IB/addr: Pass network namespace as a parameter

2015-04-20 Thread Jason Gunthorpe
On Mon, Apr 20, 2015 at 12:03:33PM +0300, Haggai Eran wrote:
 +/** rdma_addr_find_smac_by_sgid() - Find the src MAC and VLAN ID for a src 
 GID
 + * @sgid:Source GID to find the MAC and VLAN for.
 + * @smac:A buffer to contain the resulting MAC address.
 + * @vlan_id: Will contain the resulting VLAN ID.
 + * @net: Network namespace to use for the address resolution.
 + *
 + * It is the caller's responsibility to keep the network namespace alive 
 until
 + * the function returns.
 + */
 +int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id,
 + struct net *net);

kdocs are typically placed with the body of the function, not at the
prototype.

Jason
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 01/11] RDMA/CMA: Mark IPv4 addresses correctly when the listener is IPv6

2015-04-20 Thread Jason Gunthorpe
On Mon, Apr 20, 2015 at 12:03:32PM +0300, Haggai Eran wrote:
 From: Yotam Kenneth yota...@mellanox.com
 
 When accepting a new connection with the listener being IPv6, the
 family of the new connection is set as IPv6. This causes cma_zero_addr
 function to return true on an non-zero address. As a result, the wrong
 code path is taken. This causes the connection request to be rejected,
 as the RDMA-CM code looks for the wrong type of device.

This description doesn't really make sense as to what the problem is.

 @@ -866,12 +866,12 @@ static void cma_save_ip4_info(struct rdma_cm_id *id, 
 struct rdma_cm_id *listen_i
  
   listen4 = (struct sockaddr_in *) listen_id-route.addr.src_addr;
   ip4 = (struct sockaddr_in *) id-route.addr.src_addr;
 - ip4-sin_family = listen4-sin_family;
 + ip4-sin_family = AF_INET;

If listen_id-route.addr.src_addr.ss_family != AF_INET then it is
invalid to cast to sockaddr_in.

So listen4-sin_family MUST be AF_INET or this function MUST NOT be
called.

Forcing to AF_INET cannot be correct here.

What does this patch have to do with this series?

Jason
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH iproute2-next 2/2] netns: allow to dump and monitor nsid

2015-04-20 Thread Stephen Hemminger
On Thu, 9 Apr 2015 08:30:14 +
Nicolas Dichtel nicolas.dich...@6wind.com wrote:

 Two commands are added:
  - ip netns list-id
  - ip monitor nsid
 
 A cache is also added to remember the association between the iproute2 netns
 name (from /var/run/netns/) and the nsid.
 To avoid interfering with the rth socket, a new rtnl socket (rtnsh) is used to
 get nsid (we may send rtnl request during listing on rth).
 
 Example:
 $ ip netns list-id
 nsid 0 (iproute2 netns name: foo)
 $ ip monitor nsid
 Deleted nsid 0 (iproute2 netns name: foo)
 nsid 16 (iproute2 netns name: bar)
 
 Signed-off-by: Nicolas Dichtel nicolas.dich...@6wind.com

Applied with a couple of little style cleanups.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH iproute2] tc util: Fix possible buffer overflow when print class id

2015-04-20 Thread Stephen Hemminger
On Mon, 20 Apr 2015 08:33:32 +0300
Vadim Kochan vadi...@gmail.com wrote:

 From: Vadim Kochan vadi...@gmail.com
 
 Use correct handle buffer length.
 
 Signed-off-by: Vadim Kochan vadi...@gmail.com

Looks fine, applied.

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 net-next] ip: Add color output option

2015-04-20 Thread Stephen Hemminger
On Sat, 18 Apr 2015 13:39:45 +0300
Mathias Nyman m.ny...@iki.fi wrote:

 It is hard to quickly find what you are looking for in the output of
 the ip
 command. Color helps.
 
 This patch adds a '-c' flag to highlight these with individual colors:
- interface name
- ip address
- mac address
- up/down state
 
 Signed-off-by: Mathias Nyman m.ny...@iki.fi

I like the idea of this, it would be generally good across the board.

But the patch does not apply cleanly to the current version of iproute2.

And there are minor style issues. iproute2 in general ties to follow kernel 
style.

WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#36: 
new file mode 100644

ERROR: open brace '{' following enum go on the same line
#45: FILE: include/color.h:5:
+enum color_attr
+{

ERROR: open brace '{' following enum go on the same line
#195: FILE: lib/color.c:7:
+enum color
+{

ERROR: that open brace { should be on the previous line
#207: FILE: lib/color.c:19:
+static const char * const color_codes[] =
+{

ERROR: that open brace { should be on the previous line
#220: FILE: lib/color.c:32:
+static enum color attr_colors[] =
+{

ERROR: do not initialise statics to 0 or NULL
#229: FILE: lib/color.c:41:
+static int color_is_enabled = 0;

WARNING: Missing a blank line after declarations
#240: FILE: lib/color.c:52:
+   va_list args;
+   va_start(args, fmt);


--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


YOUR MONEY IS APPROVED.

2015-04-20 Thread HEAD OFFICE
YOUR MONEY IS APPROVED.
FROM ALBERTA / CANADIAN OIL GAS
VIEW THE ATTACHMENT.


YOUR MONEY IS APPROVED..pdf
Description: Adobe PDF document


YOUR MONEY IS APPROVED..pdf
Description: Adobe PDF document


Re: [PATCH v2 07/11] IB/cm: Add network namespace support

2015-04-20 Thread Jason Gunthorpe
On Mon, Apr 20, 2015 at 12:03:38PM +0300, Haggai Eran wrote:
 From: Guy Shapiro gu...@mellanox.com
 
 Add namespace support to the IB-CM layer.

 - Each CM-ID now has a network namespace it is associated with, assigned at
   creation. This namespace is used as needed during subsequent action on the
   CM-ID or related objects.

There is really something weird about this layering. At the CM layer
there should be no concept of an IP address, it only deals with GIDs.

So how can a CM object have a network namespace associated with it?

  {
   av-port = port;
   av-pkey_index = wc-pkey_index;
   ib_init_ah_from_wc(port-cm_dev-ib_device, port-port_num, wc,
 -grh, av-ah_attr, init_net);
 +grh, av-ah_attr, net);

There is something deeply wrong with adding network namespace
arguments to verbs.

For rocee the gid index clearly specifies the network namespace
to use, so much of this should go away and have rocee get the
namespace from the gid index.

Ie in ib_init_ah_from_wc we have the ib_wc which contains the sgid
index.

I'm really not excited at how many places are gaining a net when those
layers shouldn't even need to care about IP layer details. Just acting
as a pass through for rocee doesn't make sense.

Jason
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 04/16] printk: implement support for extended console drivers

2015-04-20 Thread Petr Mladek
On Thu 2015-04-16 19:03:41, Tejun Heo wrote:
 printk log_buf keeps various metadata for each message including its
 sequence number and timestamp.  The metadata is currently available
 only through /dev/kmsg and stripped out before passed onto console
 drivers.  We want this metadata to be available to console drivers
 too.  Immediately, it's to implement reliable netconsole but may be
 useful for other console devices too.
 
 This patch implements support for extended console drivers.  Consoles
 can indicate that they process extended messages by setting the new
 CON_EXTENDED flag and they'll fed messages formatted the same way as
 /dev/kmsg output as follows.
 
  level,sequnum,timestamp,contflag;message text
 
 One special case is fragments.  Message fragments are output
 immediately to consoles to avoid losing them in case of crashes.  For
 normal consoles, this is handled by later suppressing the assembled
 result and /dev/kmsg only shows fully assembled message; however,
 extended consoles would need both the fragments, to avoid losing
 message in case of a crash, and the assembled result, to tell how the
 fragments are assembled and which sequence number got assigned to it.
 
 To help matching up the fragments with the resulting message,
 fragments are emitted in the following format.
 
  level,-,timestamp,-,fragid=fragid;message fragment
 
 And later when the assembly is complete, the following is transmitted,
 
  level,sequnum,timestamp,contflag,fragid=fragid;message text
 
 * Extended message formatting for console drivers is enabled iff there
   ^^^

s/iff/if/

   are registered extended consoles.
 
 * Comment describing extended message formats updated to help
   distinguishing variable with verbatim terms.
 
 Signed-off-by: Tejun Heo t...@kernel.org
 Cc: Kay Sievers k...@vrfy.org
 Cc: Petr Mladek pmla...@suse.cz
 ---
  include/linux/console.h |   1 +
  kernel/printk/printk.c  | 141 
 +---
  2 files changed, 123 insertions(+), 19 deletions(-)
 
 diff --git a/include/linux/console.h b/include/linux/console.h
 index 7571a16..04bbd09 100644
 --- a/include/linux/console.h
 +++ b/include/linux/console.h
 @@ -115,6 +115,7 @@ static inline int con_debug_leave(void)
  #define CON_BOOT (8)
  #define CON_ANYTIME  (16) /* Safe to call when cpu is offline */
  #define CON_BRL  (32) /* Used for a braille device */
 +#define CON_EXTENDED (64) /* Use the extended output format a la /dev/kmsg */
  
  struct console {
   charname[16];
 diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
 index 0175c46..349a37b 100644
 --- a/kernel/printk/printk.c
 +++ b/kernel/printk/printk.c
 @@ -84,6 +84,8 @@ static struct lockdep_map console_lock_dep_map = {
  };
  #endif
  
 +static int nr_ext_console_drivers;
 +
  /*
   * Helper macros to handle lockdep when locking/unlocking console_sem. We use
   * macros instead of functions so that _RET_IP_ contains useful information.
 @@ -195,14 +197,28 @@ static int console_may_schedule;
   * need to be changed in the future, when the requirements change.
   *
   * /dev/kmsg exports the structured data in the following line format:
 - *   level,sequnum,timestamp;message text\n
 + *   level,sequnum,timestamp,contflag;message text\n
   *
   * The optional key/value pairs are attached as continuation lines starting
   * with a space character and terminated by a newline. All possible
   * non-prinatable characters are escaped in the \xff notation.
   *
   * Users of the export format should ignore possible additional values
 - * separated by ',', and find the message after the ';' character.
 + * separated by ',', and find the message after the ';' character. All
 + * optional header fields should have the form key=value.
 + *
 + * For consoles with CON_EXTENDED set, a message formatted like the
 + * following may also be printed. This is a continuation fragment which are
 + * being assembled and will be re-transmitted with a normal header once
 + * assembly finishes. The fragments are sent out immediately to avoid
 + * losing them over a crash.
 + *   level,-,timestamp,-,fragid=fragid;message fragment\n
 + *
 + * On completion of assembly, the following is transmitted.
 + *   level,sequnum,timestamp,contflag,fragid=fragid;message 
 text\n
 + *
 + * Extended consoles should identify and handle duplicates by matching the
 + * fragids of the fragments and assembled messages.
   */
  
  enum log_flags {
 @@ -210,6 +226,7 @@ enum log_flags {
   LOG_NEWLINE = 2,/* text ended with a newline */
   LOG_PREFIX  = 4,/* text started with a prefix */
   LOG_CONT= 8,/* text is a fragment of a continuation line */
 + LOG_DICT_META   = 16,   /* dict contains console meta information */
  };
  
  struct printk_log {
 @@ -292,6 +309,12 @@ static char *log_dict(const struct printk_log *msg)
   return (char *)msg + 

Re: [PATCH V1 net-next] IB/ipoib: Fix ndo_get_iflink

2015-04-20 Thread Doug Ledford
On Mon, 2015-04-20 at 12:21 +0300, Or Gerlitz wrote:
 On Mon, Apr 20, 2015 at 11:16 AM, Haggai Eran hagg...@mellanox.com wrote:
  On 17/04/2015 22:21, David Miller wrote:
  From: Erez Shitrit ere...@mellanox.com
  Date: Thu, 16 Apr 2015 16:34:34 +0300
 
  Currently, iflink of the parent interface was always accessed, even
  when interface didn't have a parent and hence we crashed there.
 
  Handle the interface types properly: for a child interface, return
  the ifindex of the parent, for parent interface, return its ifindex.
 
  For child devices, make sure to set the parent pointer prior to
  invoking register_netdevice(), this allows the new ndo to be called
  by the stack immediately after the child device is registered.
 
  Fixes: 5aa7add8f14b ('infiniband/ipoib: implement ndo_get_iflink')
  Reported-by: Honggang Li ho...@redhat.com
  Signed-off-by: Erez Shitrit ere...@mellanox.com
  Signed-off-by: Honggang Li ho...@redhat.com
 
  Applied, thanks.
 
  Doug, Roland,
  You might want to include this patch in your for-next / for-4.1 trees,
  or merge net-next again. Currently they contain the issue it fixes, and
  it can prevent some systems with IPoIB from booting.
 
 Haggai,
 
 
 It's upstream by now, pull Linus tree.
 
 Or.

Right, it already went via net-next.  I skipped it because of that.

-- 
Doug Ledford dledf...@redhat.com
  GPG KeyID: 0E572FDD




signature.asc
Description: This is a digitally signed message part


Re: [PATCH V1 net-next] IB/ipoib: Fix ndo_get_iflink

2015-04-20 Thread David Miller
From: Haggai Eran hagg...@mellanox.com
Date: Mon, 20 Apr 2015 11:16:34 +0300

 On 17/04/2015 22:21, David Miller wrote:
 From: Erez Shitrit ere...@mellanox.com
 Date: Thu, 16 Apr 2015 16:34:34 +0300
 
 Currently, iflink of the parent interface was always accessed, even 
 when interface didn't have a parent and hence we crashed there.

 Handle the interface types properly: for a child interface, return
 the ifindex of the parent, for parent interface, return its ifindex.

 For child devices, make sure to set the parent pointer prior to
 invoking register_netdevice(), this allows the new ndo to be called
 by the stack immediately after the child device is registered.

 Fixes: 5aa7add8f14b ('infiniband/ipoib: implement ndo_get_iflink')
 Reported-by: Honggang Li ho...@redhat.com
 Signed-off-by: Erez Shitrit ere...@mellanox.com
 Signed-off-by: Honggang Li ho...@redhat.com
 
 Applied, thanks.
 
 Doug, Roland,
 
 You might want to include this patch in your for-next / for-4.1 trees,
 or merge net-next again. Currently they contain the issue it fixes, and
 it can prevent some systems with IPoIB from booting.

I put this into 'net', not 'net-next'.  'net-next' is dormant after I do
my first push to Linus of the merge window.  After that everything goes
via 'net' until the merge window closes and I open 'net-next' up again.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH 06/10] ipv6: Avoid deleting RTF_CACHE route from ip6_route_del()

2015-04-20 Thread David Miller
From: Martin KaFai Lau ka...@fb.com
Date: Fri, 10 Apr 2015 18:54:09 -0700

 Before patch 'Allow pmtu update on /128 via gateway route',
 RTF_CACHE route was not created for DST_HOST.  It also requires changes on 
 both
 delete code path and rt6_select() code patch.
 
 This patch fixes the delete code path to avoid deleting the RTF_CACHE
 route by 'ip -6 r del...'
 
 Signed-off-by: Martin KaFai Lau ka...@fb.com
 Reviewed-by: Hannes Frederic Sowa han...@stressinduktion.org

If a cached route was created in response to say a PMTU event, and
it's a clone/copy/cow of the route we are being asked to delete,
it absolutely should be removed.

In fact this is a critically important aspect of removing routes
from the table.

So this change does not seem correct.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH 04/10] ipv6: Only create RTF_CACHE routes after encountering pmtu exception

2015-04-20 Thread David Miller
From: Martin KaFai Lau ka...@fb.com
Date: Fri, 10 Apr 2015 18:54:07 -0700

 @@ -1171,8 +1170,15 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net 
 *net, __be32 mtu,
   fl6.flowlabel = ip6_flowinfo(iph);
  
   dst = ip6_route_output(net, NULL, fl6);
 - if (!dst-error)
 + if (!dst-error) {
 + unsigned char *outer_network_header = skb_network_header(skb);
 + int offset;
 +
 + skb_reset_network_header(skb);
 + offset = outer_network_header - skb_network_header(skb);
   ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
 + skb_set_network_header(skb, offset);
 + }

I seriously object to adjusting then restoring the location of the SKB
network header in this kind of code path.

Instead, adjust the interfaces to the code doing the packet header
inspection so that it can accomodate an offset or something like that
instead.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv3] pppoe: Lacks DST MAC address check

2015-04-20 Thread Joakim Tjernlund
From: Joakim Tjernlund joakim.tjernl...@transmode.se

A pppoe session is identified by its session ID and MAC address.
Currently pppoe does not check if the received pkg has the correct
MAC address. This is a problem when the eth I/F is in promisc mode
as then any DST MAC address is accepted.

Signed-off-by: Joakim Tjernlund joakim.tjernl...@transmode.se
---
 drivers/net/ppp/pppoe.c | 3 +++
 1 file changed, 3 insertions(+)

v2 - The MAC address check should encompass all pppoe pkgs,
 not only the relay type.

v3 - Add signoff

diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index ff059e1..aa1dd92 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -380,6 +380,9 @@ static int pppoe_rcv_core(struct sock *sk, struct sk_buff 
*skb)
 * can't change.
 */
 
+   if (skb-pkt_type == PACKET_OTHERHOST)
+   goto abort_kfree;
+
if (sk-sk_state  PPPOX_BOUND) {
ppp_input(po-chan, skb);
} else if (sk-sk_state  PPPOX_RELAY) {
-- 
2.0.5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Bug 61651 - [regression] Wake-on-LAN broken in alx (AR8161)

2015-04-20 Thread Christ-Jan Wijtmans
Apologies if this is the wrong place or wrong format but since a while
multiple people want this functionality back into the ALX driver and
also claim that the reason for removing the functionality no longer
applies in new kernels. Also i believe that it could at least be made
an option in the .config file.

https://bugzilla.kernel.org/show_bug.cgi?id=61651

Live long and prosper,

Christ-Jan Wijtmans
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH 04/10] ipv6: Only create RTF_CACHE routes after encountering pmtu exception

2015-04-20 Thread David Miller
From: Martin KaFai Lau ka...@fb.com
Date: Fri, 10 Apr 2015 18:54:07 -0700

 + if (!(rt6-rt6i_flags  RTF_CACHE) 
 + (!(rt6-rt6i_flags  (RTF_NONEXTHOP | RTF_GATEWAY)) ||
 +  !(rt6-dst.flags  DST_HOST))) {

These big convoluted tests are tiring to read over and over again.

At the very least, (rt6-rt6i_flags  (RTF_NONEXTHOP | RTF_GATEWAY)
deserves to be a descriptively named inline function.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH 00/10] ipv6: Only create RTF_CACHE route after encountering pmtu exception

2015-04-20 Thread David Miller
From: Martin KaFai Lau ka...@fb.com
Date: Fri, 10 Apr 2015 18:54:03 -0700

 This series is to avoid creating a RTF_CACHE route whenever we are consulting
 the fib6 tree with a new destination.  Instead, only create RTF_CACHE route
 when we see a pmtu exception.

Please separate out the pure bug fixes from this series and submit them for
inclusion into 'net', thanks.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 01/11] RDMA/CMA: Mark IPv4 addresses correctly when the listener is IPv6

2015-04-20 Thread Or Gerlitz
On Mon, Apr 20, 2015 at 7:41 PM, Jason Gunthorpe
jguntho...@obsidianresearch.com wrote:
 On Mon, Apr 20, 2015 at 12:03:32PM +0300, Haggai Eran wrote:
 From: Yotam Kenneth yota...@mellanox.com

 When accepting a new connection with the listener being IPv6, the
 family of the new connection is set as IPv6. This causes cma_zero_addr
 function to return true on an non-zero address. As a result, the wrong
 code path is taken. This causes the connection request to be rejected,
 as the RDMA-CM code looks for the wrong type of device.

 This description doesn't really make sense as to what the problem is.

 @@ -866,12 +866,12 @@ static void cma_save_ip4_info(struct rdma_cm_id *id, 
 struct rdma_cm_id *listen_i

   listen4 = (struct sockaddr_in *) listen_id-route.addr.src_addr;
   ip4 = (struct sockaddr_in *) id-route.addr.src_addr;
 - ip4-sin_family = listen4-sin_family;
 + ip4-sin_family = AF_INET;

 If listen_id-route.addr.src_addr.ss_family != AF_INET then it is
 invalid to cast to sockaddr_in.

 So listen4-sin_family MUST be AF_INET or this function MUST NOT be
 called.

 Forcing to AF_INET cannot be correct here.

Jason, could you take a look @ this thread
http://marc.info/?t=14158939504r=1w=2 where the authors
addressed some comments from Sean and he eventually Acked the patch?

 What does this patch have to do with this series?

I believe it's either a pre-patch to address some assumption or
something they stepped on while testing

Or.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] pppoe: Lacks DST MAC address check

2015-04-20 Thread Joakim Tjernlund
On Mon, 2015-04-20 at 14:11 -0400, David Miller wrote:
 From: Joakim Tjernlund joakim.tjernl...@transmode.se
 Date: Sat, 18 Apr 2015 11:53:14 +0200
 
  A pppoe session is identified by its session ID and MAC address.
  Currently pppoe does not check if the received pkg has the correct
  MAC address. This is a problem when the eth I/F is in promisc mode
  as then any DST MAC address is accepted.
 
 Please read Documentation/SubmittingPatches in the source tree, you
 need to provide a proper signoff.

Arg! I know this but this time it slipped my mind :(
So sorry, I will send a v3 soon.

 Jocke--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 03/11] time/hrtimer:Introduce hrtimer_get_res64() with timespec64 type for getting the timer resolution

2015-04-20 Thread Thomas Gleixner
On Mon, 20 Apr 2015, Baolin Wang wrote:
 This patch introduces hrtimer_get_res64() function to get the timer resolution
 with timespec64 type, and moves the hrtimer_get_res() function into

FYI, That function is about to go away, but it's not a big deal to
sort that out once I applied the hrtimer rework to the tip tree.

Thanks,

tglx
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 02/11] timekeeping:Introduce the current_kernel_time64() function with timespec64 type

2015-04-20 Thread Baolin Wang
This patch adds current_kernel_time64() function with timespec64 type,
and makes current_kernel_time() 'static inline' and moves it to timekeeping.h
file.

It is convenient for user to get the current kernel time with timespec64 type,
and delete the current_kernel_time() function easily in timekeeping.h file. That
is ready for 2038 when get the current time.

Signed-off-by: Baolin Wang baolin.w...@linaro.org
---
 include/linux/timekeeping.h |   10 +-
 kernel/time/timekeeping.c   |6 +++---
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 3eaae47..c6d5ae9 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -18,10 +18,18 @@ extern int do_sys_settimeofday(const struct timespec *tv,
  * Kernel time accessors
  */
 unsigned long get_seconds(void);
-struct timespec current_kernel_time(void);
+struct timespec64 current_kernel_time64(void);
 /* does not take xtime_lock */
 struct timespec __current_kernel_time(void);
 
+static inline struct timespec current_kernel_time(void)
+{
+   struct timespec64 now;
+
+   now = current_kernel_time64();
+   return timespec64_to_timespec(now);
+}
+
 /*
  * timespec based interfaces
  */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 91db941..8ccc02c 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1721,7 +1721,7 @@ struct timespec __current_kernel_time(void)
return timespec64_to_timespec(tk_xtime(tk));
 }
 
-struct timespec current_kernel_time(void)
+struct timespec64 current_kernel_time64(void)
 {
struct timekeeper *tk = tk_core.timekeeper;
struct timespec64 now;
@@ -1733,9 +1733,9 @@ struct timespec current_kernel_time(void)
now = tk_xtime(tk);
} while (read_seqcount_retry(tk_core.seq, seq));
 
-   return timespec64_to_timespec(now);
+   return now;
 }
-EXPORT_SYMBOL(current_kernel_time);
+EXPORT_SYMBOL(current_kernel_time64);
 
 struct timespec64 get_monotonic_coarse64(void)
 {
-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 07/11] time/alarmtimer:Convert to the new methods for k_clock structure

2015-04-20 Thread Baolin Wang
This patch changes to the new methods with timespec64/itimerspec64
type of k_clock structure, and converts the timespec/itimerspec type to
timespec64/itimerspec64 typein alarmtimer.c file.

Signed-off-by: Baolin Wang baolin.w...@linaro.org
---
 kernel/time/alarmtimer.c |   43 ++-
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 1b001ed..68186e1 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -489,35 +489,36 @@ static enum alarmtimer_restart alarm_handle_timer(struct 
alarm *alarm,
 /**
  * alarm_clock_getres - posix getres interface
  * @which_clock: clockid
- * @tp: timespec to fill
+ * @tp: timespec64 to fill
  *
  * Returns the granularity of underlying alarm base clock
  */
-static int alarm_clock_getres(const clockid_t which_clock, struct timespec *tp)
+static int alarm_clock_getres(const clockid_t which_clock,
+   struct timespec64 *tp)
 {
clockid_t baseid = alarm_bases[clock2alarm(which_clock)].base_clockid;
 
if (!alarmtimer_get_rtcdev())
return -EINVAL;
 
-   return hrtimer_get_res(baseid, tp);
+   return hrtimer_get_res64(baseid, tp);
 }
 
 /**
  * alarm_clock_get - posix clock_get interface
  * @which_clock: clockid
- * @tp: timespec to fill.
+ * @tp: timespec64 to fill.
  *
  * Provides the underlying alarm base time.
  */
-static int alarm_clock_get(clockid_t which_clock, struct timespec *tp)
+static int alarm_clock_get(clockid_t which_clock, struct timespec64 *tp)
 {
struct alarm_base *base = alarm_bases[clock2alarm(which_clock)];
 
if (!alarmtimer_get_rtcdev())
return -EINVAL;
 
-   *tp = ktime_to_timespec(base-gettime());
+   *tp = ktime_to_timespec64(base-gettime());
return 0;
 }
 
@@ -547,24 +548,24 @@ static int alarm_timer_create(struct k_itimer *new_timer)
 /**
  * alarm_timer_get - posix timer_get interface
  * @new_timer: k_itimer pointer
- * @cur_setting: itimerspec data to fill
+ * @cur_setting: itimerspec64 data to fill
  *
  * Copies out the current itimerspec data
  */
 static void alarm_timer_get(struct k_itimer *timr,
-   struct itimerspec *cur_setting)
+   struct itimerspec64 *cur_setting)
 {
ktime_t relative_expiry_time =
alarm_expires_remaining((timr-it.alarm.alarmtimer));
 
if (ktime_to_ns(relative_expiry_time)  0) {
-   cur_setting-it_value = ktime_to_timespec(relative_expiry_time);
+   cur_setting-it_value = 
ktime_to_timespec64(relative_expiry_time);
} else {
cur_setting-it_value.tv_sec = 0;
cur_setting-it_value.tv_nsec = 0;
}
 
-   cur_setting-it_interval = ktime_to_timespec(timr-it.alarm.interval);
+   cur_setting-it_interval = ktime_to_timespec64(timr-it.alarm.interval);
 }
 
 /**
@@ -588,14 +589,14 @@ static int alarm_timer_del(struct k_itimer *timr)
  * alarm_timer_set - posix timer_set interface
  * @timr: k_itimer pointer to be deleted
  * @flags: timer flags
- * @new_setting: itimerspec to be used
- * @old_setting: itimerspec being replaced
+ * @new_setting: itimerspec64 to be used
+ * @old_setting: itimerspec64 being replaced
  *
  * Sets the timer to new_setting, and starts the timer.
  */
 static int alarm_timer_set(struct k_itimer *timr, int flags,
-   struct itimerspec *new_setting,
-   struct itimerspec *old_setting)
+   struct itimerspec64 *new_setting,
+   struct itimerspec64 *old_setting)
 {
ktime_t exp;
 
@@ -613,8 +614,8 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
return TIMER_RETRY;
 
/* start the timer */
-   timr-it.alarm.interval = timespec_to_ktime(new_setting-it_interval);
-   exp = timespec_to_ktime(new_setting-it_value);
+   timr-it.alarm.interval = timespec64_to_ktime(new_setting-it_interval);
+   exp = timespec64_to_ktime(new_setting-it_value);
/* Convert (if necessary) to absolute time */
if (flags != TIMER_ABSTIME) {
ktime_t now;
@@ -670,7 +671,7 @@ static int alarmtimer_do_nsleep(struct alarm *alarm, 
ktime_t absexp)
 
 
 /**
- * update_rmtp - Update remaining timespec value
+ * update_rmtp - Update remaining timespec64 value
  * @exp: expiration time
  * @type: timer type
  * @rmtp: user pointer to remaining timepsec value
@@ -824,12 +825,12 @@ static int __init alarmtimer_init(void)
int error = 0;
int i;
struct k_clock alarm_clock = {
-   .clock_getres   = alarm_clock_getres,
-   .clock_get  = alarm_clock_get,
+   .clock_getres64 = alarm_clock_getres,
+   .clock_get64= alarm_clock_get,
.timer_create   = 

[PATCH 06/11] char/mmtimer:Convert to the 64bit methods for k_clock callback function

2015-04-20 Thread Baolin Wang
This patch converts to the 64bit methods for k_clock callback
function, that converts the timespec type to timespec64 type and
converts the itimerspec type to itimerspec64 type.

Signed-off-by: Baolin Wang baolin.w...@linaro.org
---
 drivers/char/mmtimer.c |   36 +---
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/drivers/char/mmtimer.c b/drivers/char/mmtimer.c
index 3d6c067..213d0bb 100644
--- a/drivers/char/mmtimer.c
+++ b/drivers/char/mmtimer.c
@@ -478,18 +478,18 @@ static int sgi_clock_period;
 static struct timespec sgi_clock_offset;
 static int sgi_clock_period;
 
-static int sgi_clock_get(clockid_t clockid, struct timespec *tp)
+static int sgi_clock_get(clockid_t clockid, struct timespec64 *tp)
 {
u64 nsec;
 
nsec = rtc_time() * sgi_clock_period
+ sgi_clock_offset.tv_nsec;
-   *tp = ns_to_timespec(nsec);
+   *tp = ns_to_timespec64(nsec);
tp-tv_sec += sgi_clock_offset.tv_sec;
return 0;
 };
 
-static int sgi_clock_set(const clockid_t clockid, const struct timespec *tp)
+static int sgi_clock_set(const clockid_t clockid, const struct timespec64 *tp)
 {
 
u64 nsec;
@@ -657,7 +657,7 @@ static int sgi_timer_del(struct k_itimer *timr)
 }
 
 /* Assumption: it_lock is already held with irq's disabled */
-static void sgi_timer_get(struct k_itimer *timr, struct itimerspec 
*cur_setting)
+static void sgi_timer_get(struct k_itimer *timr, struct itimerspec64 
*cur_setting)
 {
 
if (timr-it.mmtimer.clock == TIMER_OFF) {
@@ -668,14 +668,14 @@ static void sgi_timer_get(struct k_itimer *timr, struct 
itimerspec *cur_setting)
return;
}
 
-   cur_setting-it_interval = ns_to_timespec(timr-it.mmtimer.incr * 
sgi_clock_period);
-   cur_setting-it_value = ns_to_timespec((timr-it.mmtimer.expires - 
rtc_time()) * sgi_clock_period);
+   cur_setting-it_interval = ns_to_timespec64(timr-it.mmtimer.incr * 
sgi_clock_period);
+   cur_setting-it_value = ns_to_timespec64((timr-it.mmtimer.expires - 
rtc_time()) * sgi_clock_period);
 }
 
 
 static int sgi_timer_set(struct k_itimer *timr, int flags,
-   struct itimerspec * new_setting,
-   struct itimerspec * old_setting)
+   struct itimerspec64 *new_setting,
+   struct itimerspec64 *old_setting)
 {
unsigned long when, period, irqflags;
int err = 0;
@@ -687,8 +687,8 @@ static int sgi_timer_set(struct k_itimer *timr, int flags,
sgi_timer_get(timr, old_setting);
 
sgi_timer_del(timr);
-   when = timespec_to_ns(new_setting-it_value);
-   period = timespec_to_ns(new_setting-it_interval);
+   when = timespec64_to_ns(new_setting-it_value);
+   period = timespec64_to_ns(new_setting-it_interval);
 
if (when == 0)
/* Clear timer */
@@ -699,11 +699,9 @@ static int sgi_timer_set(struct k_itimer *timr, int flags,
return -ENOMEM;
 
if (flags  TIMER_ABSTIME) {
-   struct timespec n;
unsigned long now;
 
-   getnstimeofday(n);
-   now = timespec_to_ns(n);
+   now = ktime_get_real_ns();
if (when  now)
when -= now;
else
@@ -765,7 +763,7 @@ static int sgi_timer_set(struct k_itimer *timr, int flags,
return err;
 }
 
-static int sgi_clock_getres(const clockid_t which_clock, struct timespec *tp)
+static int sgi_clock_getres(const clockid_t which_clock, struct timespec64 *tp)
 {
tp-tv_sec = 0;
tp-tv_nsec = sgi_clock_period;
@@ -773,13 +771,13 @@ static int sgi_clock_getres(const clockid_t which_clock, 
struct timespec *tp)
 }
 
 static struct k_clock sgi_clock = {
-   .clock_set  = sgi_clock_set,
-   .clock_get  = sgi_clock_get,
-   .clock_getres   = sgi_clock_getres,
+   .clock_set64= sgi_clock_set,
+   .clock_get64= sgi_clock_get,
+   .clock_getres64 = sgi_clock_getres,
.timer_create   = sgi_timer_create,
-   .timer_set  = sgi_timer_set,
+   .timer_set64= sgi_timer_set,
.timer_del  = sgi_timer_del,
-   .timer_get  = sgi_timer_get
+   .timer_get64= sgi_timer_get
 };
 
 /**
-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 05/11] time/posix-timers:Convert to the 64bit methods for k_clock callback functions

2015-04-20 Thread Baolin Wang
This patch converts the timepsec type to timespec64 type, and converts the
itimerspec type to itimerspec64 type for the k_clock callback functions.

This patch also converts the timespec type to timespec64 type for 
timekeeping_clocktai()
function which is used only in the posix-timers.c file.

Signed-off-by: Baolin Wang baolin.w...@linaro.org
---
 include/linux/timekeeping.h |4 +-
 kernel/time/posix-timers.c  |  102 +++
 kernel/time/timekeeping.h   |2 +-
 3 files changed, 57 insertions(+), 51 deletions(-)

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index c6d5ae9..bd3df93 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -242,9 +242,9 @@ static inline void get_monotonic_boottime64(struct 
timespec64 *ts)
*ts = ktime_to_timespec64(ktime_get_boottime());
 }
 
-static inline void timekeeping_clocktai(struct timespec *ts)
+static inline void timekeeping_clocktai(struct timespec64 *ts)
 {
-   *ts = ktime_to_timespec(ktime_get_clocktai());
+   *ts = ktime_to_timespec64(ktime_get_clocktai());
 }
 
 /*
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 9070387..47d1abf 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -132,9 +132,9 @@ static struct k_clock posix_clocks[MAX_CLOCKS];
 static int common_nsleep(const clockid_t, int flags, struct timespec *t,
 struct timespec __user *rmtp);
 static int common_timer_create(struct k_itimer *new_timer);
-static void common_timer_get(struct k_itimer *, struct itimerspec *);
+static void common_timer_get(struct k_itimer *, struct itimerspec64 *);
 static int common_timer_set(struct k_itimer *, int,
-   struct itimerspec *, struct itimerspec *);
+   struct itimerspec64 *, struct itimerspec64 *);
 static int common_timer_del(struct k_itimer *timer);
 
 static enum hrtimer_restart posix_timer_fn(struct hrtimer *data);
@@ -203,17 +203,20 @@ static inline void unlock_timer(struct k_itimer *timr, 
unsigned long flags)
 }
 
 /* Get clock_realtime */
-static int posix_clock_realtime_get(clockid_t which_clock, struct timespec *tp)
+static int posix_clock_realtime_get(clockid_t which_clock,
+   struct timespec64 *tp)
 {
-   ktime_get_real_ts(tp);
+   ktime_get_real_ts64(tp);
return 0;
 }
 
 /* Set clock_realtime */
 static int posix_clock_realtime_set(const clockid_t which_clock,
-   const struct timespec *tp)
+   const struct timespec64 *tp)
 {
-   return do_sys_settimeofday(tp, NULL);
+   struct timespec ts = timespec64_to_timespec(*tp);
+
+   return do_sys_settimeofday(ts, NULL);
 }
 
 static int posix_clock_realtime_adj(const clockid_t which_clock,
@@ -225,48 +228,51 @@ static int posix_clock_realtime_adj(const clockid_t 
which_clock,
 /*
  * Get monotonic time for posix timers
  */
-static int posix_ktime_get_ts(clockid_t which_clock, struct timespec *tp)
+static int posix_ktime_get_ts(clockid_t which_clock, struct timespec64 *tp)
 {
-   ktime_get_ts(tp);
+   ktime_get_ts64(tp);
return 0;
 }
 
 /*
  * Get monotonic-raw time for posix timers
  */
-static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec *tp)
+static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec64 
*tp)
 {
-   getrawmonotonic(tp);
+   getrawmonotonic64(tp);
return 0;
 }
 
 
-static int posix_get_realtime_coarse(clockid_t which_clock, struct timespec 
*tp)
+static int posix_get_realtime_coarse(clockid_t which_clock,
+struct timespec64 *tp)
 {
-   *tp = current_kernel_time();
+   *tp = current_kernel_time64();
return 0;
 }
 
 static int posix_get_monotonic_coarse(clockid_t which_clock,
-   struct timespec *tp)
+   struct timespec64 *tp)
 {
-   *tp = get_monotonic_coarse();
+   *tp = get_monotonic_coarse64();
return 0;
 }
 
-static int posix_get_coarse_res(const clockid_t which_clock, struct timespec 
*tp)
+static int posix_get_coarse_res(const clockid_t which_clock,
+   struct timespec64 *tp)
 {
-   *tp = ktime_to_timespec(KTIME_LOW_RES);
+   *tp = ktime_to_timespec64(KTIME_LOW_RES);
return 0;
 }
 
-static int posix_get_boottime(const clockid_t which_clock, struct timespec *tp)
+static int posix_get_boottime(const clockid_t which_clock,
+ struct timespec64 *tp)
 {
-   get_monotonic_boottime(tp);
+   get_monotonic_boottime64(tp);
return 0;
 }
 
-static int posix_get_tai(clockid_t which_clock, struct timespec *tp)
+static int posix_get_tai(clockid_t which_clock, struct timespec64 *tp)
 {
timekeeping_clocktai(tp);
return 0;
@@ -278,57 

[PATCH 08/11] time/posix-clock:Convert to the 64bit methods for k_clock and posix_clock_operations structure

2015-04-20 Thread Baolin Wang
This patch converts the posix clock operations over to the new methods with
timespec64/itimerspec64 type to making them ready for 2038, and it is based on
the ptp patch series.

And also changes to the 64bit methods for k_clock structure, that
converts the timespec/itimerspec type to timespec64/itimerspec64 type.

Signed-off-by: Baolin Wang baolin.w...@linaro.org
---
 drivers/ptp/ptp_clock.c |   26 --
 include/linux/posix-clock.h |   10 +-
 kernel/time/posix-clock.c   |   20 ++--
 3 files changed, 23 insertions(+), 33 deletions(-)

diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index bee8270..8c086e7 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -97,32 +97,24 @@ static s32 scaled_ppm_to_ppb(long ppm)
 
 /* posix clock implementation */
 
-static int ptp_clock_getres(struct posix_clock *pc, struct timespec *tp)
+static int ptp_clock_getres(struct posix_clock *pc, struct timespec64 *tp)
 {
tp-tv_sec = 0;
tp-tv_nsec = 1;
return 0;
 }
 
-static int ptp_clock_settime(struct posix_clock *pc, const struct timespec *tp)
+static int ptp_clock_settime(struct posix_clock *pc,
+   const struct timespec64 *tp)
 {
struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
-   struct timespec64 ts = timespec_to_timespec64(*tp);
-
-   return ptp-info-settime64(ptp-info, ts);
+   return ptp-info-settime64(ptp-info, tp);
 }
 
-static int ptp_clock_gettime(struct posix_clock *pc, struct timespec *tp)
+static int ptp_clock_gettime(struct posix_clock *pc, struct timespec64 *tp)
 {
struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
-   struct timespec64 ts;
-   int err;
-
-   err = ptp-info-gettime64(ptp-info, ts);
-   if (!err)
-   *tp = timespec64_to_timespec(ts);
-
-   return err;
+   return ptp-info-gettime64(ptp-info, tp);
 }
 
 static int ptp_clock_adjtime(struct posix_clock *pc, struct timex *tx)
@@ -134,8 +126,7 @@ static int ptp_clock_adjtime(struct posix_clock *pc, struct 
timex *tx)
ops = ptp-info;
 
if (tx-modes  ADJ_SETOFFSET) {
-   struct timespec ts;
-   ktime_t kt;
+   struct timespec64 ts;
s64 delta;
 
ts.tv_sec  = tx-time.tv_sec;
@@ -147,8 +138,7 @@ static int ptp_clock_adjtime(struct posix_clock *pc, struct 
timex *tx)
if ((unsigned long) ts.tv_nsec = NSEC_PER_SEC)
return -EINVAL;
 
-   kt = timespec_to_ktime(ts);
-   delta = ktime_to_ns(kt);
+   delta = timespec64_to_ns(ts);
err = ops-adjtime(ops, delta);
} else if (tx-modes  ADJ_FREQUENCY) {
s32 ppb = scaled_ppm_to_ppb(tx-freq);
diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h
index 34c4498..fd7e22c 100644
--- a/include/linux/posix-clock.h
+++ b/include/linux/posix-clock.h
@@ -59,23 +59,23 @@ struct posix_clock_operations {
 
int  (*clock_adjtime)(struct posix_clock *pc, struct timex *tx);
 
-   int  (*clock_gettime)(struct posix_clock *pc, struct timespec *ts);
+   int  (*clock_gettime)(struct posix_clock *pc, struct timespec64 *ts);
 
-   int  (*clock_getres) (struct posix_clock *pc, struct timespec *ts);
+   int  (*clock_getres)(struct posix_clock *pc, struct timespec64 *ts);
 
int  (*clock_settime)(struct posix_clock *pc,
- const struct timespec *ts);
+ const struct timespec64 *ts);
 
int  (*timer_create) (struct posix_clock *pc, struct k_itimer *kit);
 
int  (*timer_delete) (struct posix_clock *pc, struct k_itimer *kit);
 
void (*timer_gettime)(struct posix_clock *pc,
- struct k_itimer *kit, struct itimerspec *tsp);
+ struct k_itimer *kit, struct itimerspec64 *tsp);
 
int  (*timer_settime)(struct posix_clock *pc,
  struct k_itimer *kit, int flags,
- struct itimerspec *tsp, struct itimerspec *old);
+ struct itimerspec64 *tsp, struct itimerspec64 
*old);
/*
 * Optional character device methods:
 */
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
index ce033c7..e21e4c1 100644
--- a/kernel/time/posix-clock.c
+++ b/kernel/time/posix-clock.c
@@ -297,7 +297,7 @@ out:
return err;
 }
 
-static int pc_clock_gettime(clockid_t id, struct timespec *ts)
+static int pc_clock_gettime(clockid_t id, struct timespec64 *ts)
 {
struct posix_clock_desc cd;
int err;
@@ -316,7 +316,7 @@ static int pc_clock_gettime(clockid_t id, struct timespec 
*ts)
return err;
 }
 
-static int pc_clock_getres(clockid_t id, struct timespec *ts)
+static int pc_clock_getres(clockid_t id, struct timespec64 *ts)
 {
   

[PATCH 01/11] linux/time64.h:Introduce the 'struct itimerspec64' for 64bit

2015-04-20 Thread Baolin Wang
This patch introduces the 'struct itimerspec64' for 64bit to replace itimerspec,
and also introduces the conversion methods: itimerspec64_to_itimerspec() and
itimerspec_to_itimerspec64(), that makes itimerspec to ready for 2038 year.

Signed-off-by: Baolin Wang baolin.w...@linaro.org
---
 include/linux/time64.h |   13 +
 1 file changed, 13 insertions(+)

diff --git a/include/linux/time64.h b/include/linux/time64.h
index a383147..3647bdd 100644
--- a/include/linux/time64.h
+++ b/include/linux/time64.h
@@ -18,6 +18,11 @@ struct timespec64 {
 };
 #endif
 
+struct itimerspec64 {
+   struct timespec64 it_interval;  /* timer period */
+   struct timespec64 it_value; /* timer expiration */
+};
+
 /* Parameters used to convert the timespec values: */
 #define MSEC_PER_SEC   1000L
 #define USEC_PER_MSEC  1000L
@@ -187,4 +192,12 @@ static __always_inline void timespec64_add_ns(struct 
timespec64 *a, u64 ns)
 
 #endif
 
+#define itimerspec64_to_itimerspec(its64) \
+   ({ (struct itimerspec){ .it_interval = 
timespec64_to_timespec((its64).it_interval), \
+   .it_value = 
timespec64_to_timespec((its64).it_value) }; })
+
+#define itimerspec_to_itimerspec64(its) \
+   ({ (struct itimerspec64){ .it_interval = 
timespec_to_timespec64((its).it_interval), \
+ .it_value = 
timespec_to_timespec64((its).it_value) }; })
+
 #endif /* _LINUX_TIME64_H */
-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 03/11] time/hrtimer:Introduce hrtimer_get_res64() with timespec64 type for getting the timer resolution

2015-04-20 Thread Baolin Wang
This patch introduces hrtimer_get_res64() function to get the timer resolution
with timespec64 type, and moves the hrtimer_get_res() function into
include/linux/hrtimer.h as a 'static inline' helper that just calls 
hrtimer_get_res64.

It is ready for 2038 year when getting the timer resolution by 
hrtimer_get_res64() function
with timespec64 type, and it is convenient to delete the old hrtimer_get_res() 
function
in hrtimer.h file.

Signed-off-by: Baolin Wang baolin.w...@linaro.org
---
 include/linux/hrtimer.h |   12 +++-
 kernel/time/hrtimer.c   |   10 +-
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 05f6df1..ee8ed44 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -383,7 +383,17 @@ static inline int hrtimer_restart(struct hrtimer *timer)
 
 /* Query timers: */
 extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer);
-extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp);
+extern int hrtimer_get_res64(const clockid_t which_clock,
+struct timespec64 *tp);
+
+static inline int hrtimer_get_res(const clockid_t which_clock,
+ struct timespec *tp)
+{
+   struct timespec64 *ts64;
+
+   *ts64 = timespec_to_timespec64(*tp);
+   return hrtimer_get_res64(which_clock, ts64);
+}
 
 extern ktime_t hrtimer_get_next_event(void);
 
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index bee0c1f..508d936 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1175,24 +1175,24 @@ void hrtimer_init(struct hrtimer *timer, clockid_t 
clock_id,
 EXPORT_SYMBOL_GPL(hrtimer_init);
 
 /**
- * hrtimer_get_res - get the timer resolution for a clock
+ * hrtimer_get_res64 - get the timer resolution for a clock
  * @which_clock: which clock to query
- * @tp: pointer to timespec variable to store the resolution
+ * @tp: pointer to timespec64 variable to store the resolution
  *
  * Store the resolution of the clock selected by @which_clock in the
  * variable pointed to by @tp.
  */
-int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
+int hrtimer_get_res64(const clockid_t which_clock, struct timespec64 *tp)
 {
struct hrtimer_cpu_base *cpu_base;
int base = hrtimer_clockid_to_base(which_clock);
 
cpu_base = raw_cpu_ptr(hrtimer_bases);
-   *tp = ktime_to_timespec(cpu_base-clock_base[base].resolution);
+   *tp = ktime_to_timespec64(cpu_base-clock_base[base].resolution);
 
return 0;
 }
-EXPORT_SYMBOL_GPL(hrtimer_get_res);
+EXPORT_SYMBOL_GPL(hrtimer_get_res64);
 
 static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
 {
-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 04/11] posix timers:Introduce the 64bit methods with timespec64 type for k_clock structure

2015-04-20 Thread Baolin Wang
This patch introduces the new methods with timespec64 type for k_clcok 
structure,
converts the timepsec type to timespec64 type in k_clock structure and converts
the itimerspec type to itimerspec64 type to ready for 2038 issue.

And also introduces the 64bit methods with timespec64 type for the framework
functions.

Next step will migrate all the k_clock users to use the new methods with 
timespec64 type
nd itimerspec64 type, and it contains the files of posix-timers.c, mmtimer.c, 
alarmtimer.c,
posix-clock.c and posix-cpu-timers.c.

Signed-off-by: Baolin Wang baolin.w...@linaro.org
---
 include/linux/posix-timers.h |9 ++
 kernel/time/posix-timers.c   |   65 --
 2 files changed, 59 insertions(+), 15 deletions(-)

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 907f3fd..35786c5 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -98,9 +98,13 @@ struct k_itimer {
 
 struct k_clock {
int (*clock_getres) (const clockid_t which_clock, struct timespec *tp);
+   int (*clock_getres64) (const clockid_t which_clock, struct timespec64 
*tp);
int (*clock_set) (const clockid_t which_clock,
  const struct timespec *tp);
+   int (*clock_set64) (const clockid_t which_clock,
+   const struct timespec64 *tp);
int (*clock_get) (const clockid_t which_clock, struct timespec * tp);
+   int (*clock_get64) (const clockid_t which_clock, struct timespec64 *tp);
int (*clock_adj) (const clockid_t which_clock, struct timex *tx);
int (*timer_create) (struct k_itimer *timer);
int (*nsleep) (const clockid_t which_clock, int flags,
@@ -109,10 +113,15 @@ struct k_clock {
int (*timer_set) (struct k_itimer * timr, int flags,
  struct itimerspec * new_setting,
  struct itimerspec * old_setting);
+   int (*timer_set64) (struct k_itimer *timr, int flags,
+   struct itimerspec64 *new_setting,
+   struct itimerspec64 *old_setting);
int (*timer_del) (struct k_itimer * timr);
 #define TIMER_RETRY 1
void (*timer_get) (struct k_itimer * timr,
   struct itimerspec * cur_setting);
+   void (*timer_get64) (struct k_itimer *timr,
+struct itimerspec64 *cur_setting);
 };
 
 extern struct k_clock clock_posix_cpu;
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 31ea01f..9070387 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -522,13 +522,13 @@ void posix_timers_register_clock(const clockid_t clock_id,
return;
}
 
-   if (!new_clock-clock_get) {
-   printk(KERN_WARNING POSIX clock id %d lacks clock_get()\n,
+   if (!new_clock-clock_get  !new_clock-clock_get64) {
+   printk(KERN_WARNING POSIX clock id %d lacks clock_get() and 
clock_get64()\n,
   clock_id);
return;
}
-   if (!new_clock-clock_getres) {
-   printk(KERN_WARNING POSIX clock id %d lacks clock_getres()\n,
+   if (!new_clock-clock_getres  !new_clock-clock_getres64) {
+   printk(KERN_WARNING POSIX clock id %d lacks clock_getres() and 
clock_getres64()\n,
   clock_id);
return;
}
@@ -579,7 +579,7 @@ static struct k_clock *clockid_to_kclock(const clockid_t id)
return (id  CLOCKFD_MASK) == CLOCKFD ?
clock_posix_dynamic : clock_posix_cpu;
 
-   if (id = MAX_CLOCKS || !posix_clocks[id].clock_getres)
+   if (id = MAX_CLOCKS || (!posix_clocks[id].clock_getres  
!posix_clocks[id].clock_getres64))
return NULL;
return posix_clocks[id];
 }
@@ -771,6 +771,7 @@ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
struct itimerspec __user *, setting)
 {
struct itimerspec cur_setting;
+   struct itimerspec64 cur_setting64;
struct k_itimer *timr;
struct k_clock *kc;
unsigned long flags;
@@ -781,10 +782,16 @@ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
return -EINVAL;
 
kc = clockid_to_kclock(timr-it_clock);
-   if (WARN_ON_ONCE(!kc || !kc-timer_get))
+   if (WARN_ON_ONCE(!kc || (!kc-timer_get  !kc-timer_get64))) {
ret = -EINVAL;
-   else
-   kc-timer_get(timr, cur_setting);
+   } else {
+   if (kc-timer_get64) {
+   kc-timer_get64(timr, cur_setting64);
+   cur_setting = itimerspec64_to_itimerspec(cur_setting64);
+   } else {
+   kc-timer_get(timr, cur_setting);
+   }
+   }
 
unlock_timer(timr, flags);
 
@@ -877,6 +884,7 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, 
flags,
 {
struct 

[PATCH 09/11] cputime:Introduce the cputime_to_timespec64/timespec64_to_cputime function

2015-04-20 Thread Baolin Wang
This patch introduces some functions for converting cputime to timespec64 and 
back,
that repalce the timespec type with timespec64 type, as well as for arch/s390 
and
arch/powerpc architecture.

And these new methods will replace the old 
cputime_to_timespec/timespec_to_cputime
function to ready for 2038 issue. The cputime_to_timespec/timespec_to_cputime 
functions
are moved to include/linux/cputime.h file for removing conveniently.

Signed-off-by: Baolin Wang baolin.w...@linaro.org
---
 arch/powerpc/include/asm/cputime.h|6 +++---
 arch/s390/include/asm/cputime.h   |8 
 include/asm-generic/cputime_jiffies.h |   10 +-
 include/linux/cputime.h   |   15 +++
 include/linux/jiffies.h   |3 +++
 kernel/time/time.c|   21 +
 6 files changed, 51 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/include/asm/cputime.h 
b/arch/powerpc/include/asm/cputime.h
index e245255..5dda5c0 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -154,9 +154,9 @@ static inline cputime_t secs_to_cputime(const unsigned long 
sec)
 }
 
 /*
- * Convert cputime - timespec
+ * Convert cputime - timespec64
  */
-static inline void cputime_to_timespec(const cputime_t ct, struct timespec *p)
+static inline void cputime_to_timespec64(const cputime_t ct, struct timespec64 
*p)
 {
u64 x = (__force u64) ct;
unsigned int frac;
@@ -168,7 +168,7 @@ static inline void cputime_to_timespec(const cputime_t ct, 
struct timespec *p)
p-tv_nsec = x;
 }
 
-static inline cputime_t timespec_to_cputime(const struct timespec *p)
+static inline cputime_t timespec64_to_cputime(const struct timespec64 *p)
 {
u64 ct;
 
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index b91e960..1266697 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -89,16 +89,16 @@ static inline cputime_t secs_to_cputime(const unsigned int 
s)
 }
 
 /*
- * Convert cputime to timespec and back.
+ * Convert cputime to timespec64 and back.
  */
-static inline cputime_t timespec_to_cputime(const struct timespec *value)
+static inline cputime_t timespec64_to_cputime(const struct timespec64 *value)
 {
unsigned long long ret = value-tv_sec * CPUTIME_PER_SEC;
return (__force cputime_t)(ret + __div(value-tv_nsec * 
CPUTIME_PER_USEC, NSEC_PER_USEC));
 }
 
-static inline void cputime_to_timespec(const cputime_t cputime,
-  struct timespec *value)
+static inline void cputime_to_timespec64(const cputime_t cputime,
+  struct timespec64 *value)
 {
unsigned long long __cputime = (__force unsigned long long) cputime;
 #ifndef CONFIG_64BIT
diff --git a/include/asm-generic/cputime_jiffies.h 
b/include/asm-generic/cputime_jiffies.h
index fe386fc..ec77c0b 100644
--- a/include/asm-generic/cputime_jiffies.h
+++ b/include/asm-generic/cputime_jiffies.h
@@ -44,12 +44,12 @@ typedef u64 __nocast cputime64_t;
 #define secs_to_cputime(sec)   jiffies_to_cputime((sec) * HZ)
 
 /*
- * Convert cputime to timespec and back.
+ * Convert cputime to timespec64 and abck.
  */
-#define timespec_to_cputime(__val) \
-   jiffies_to_cputime(timespec_to_jiffies(__val))
-#define cputime_to_timespec(__ct,__val)\
-   jiffies_to_timespec(cputime_to_jiffies(__ct),__val)
+#define timespec64_to_cputime(__val)  \
+   jiffies_to_cputime(timespec64_to_jiffies(__val))
+#define cputime_to_timespec64(__ct,__val)  \
+   jiffies_to_timespec64(cputime_to_jiffies(__ct),__val)
 
 /*
  * Convert cputime to timeval and back.
diff --git a/include/linux/cputime.h b/include/linux/cputime.h
index f2eb2ee..f01896f 100644
--- a/include/linux/cputime.h
+++ b/include/linux/cputime.h
@@ -13,4 +13,19 @@
usecs_to_cputime((__nsecs) / NSEC_PER_USEC)
 #endif
 
+static inline cputime_t timespec_to_cputime(const struct timespec *ts)
+{
+   struct timespec64 ts64 = timespec_to_timespec64(*ts);
+   return timespec64_to_cputime(ts64);
+}
+
+static inline void cputime_to_timespec(const cputime_t cputime,
+   struct timespec *value)
+{
+   struct timespec64 *ts64;
+
+   *ts64 = timespec_to_timespec64(*value);
+   cputime_to_timespec64(cputime, ts64);
+}
+
 #endif /* __LINUX_CPUTIME_H */
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index c367cbd..dbaa4ee 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -293,6 +293,9 @@ extern unsigned long usecs_to_jiffies(const unsigned int u);
 extern unsigned long timespec_to_jiffies(const struct timespec *value);
 extern void jiffies_to_timespec(const unsigned long jiffies,
struct timespec *value);
+extern unsigned long timespec64_to_jiffies(const struct timespec64 *value);
+extern void jiffies_to_timespec64(const 

[PATCH 11/11] k_clock:Remove the 32bit methods with timespec type

2015-04-20 Thread Baolin Wang
All of the k_clock users have been converted to the new methods. This patch
removes the older methods with timepsec/itimerspec type.  As a result, the 
k_clock
structure is ready for the year 2038.

Signed-off-by: Baolin Wang baolin.w...@linaro.org
---
 include/linux/posix-timers.h |9 --
 kernel/time/posix-timers.c   |   72 +-
 2 files changed, 29 insertions(+), 52 deletions(-)

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 35786c5..7c3dae2 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -97,29 +97,20 @@ struct k_itimer {
 };
 
 struct k_clock {
-   int (*clock_getres) (const clockid_t which_clock, struct timespec *tp);
int (*clock_getres64) (const clockid_t which_clock, struct timespec64 
*tp);
-   int (*clock_set) (const clockid_t which_clock,
- const struct timespec *tp);
int (*clock_set64) (const clockid_t which_clock,
const struct timespec64 *tp);
-   int (*clock_get) (const clockid_t which_clock, struct timespec * tp);
int (*clock_get64) (const clockid_t which_clock, struct timespec64 *tp);
int (*clock_adj) (const clockid_t which_clock, struct timex *tx);
int (*timer_create) (struct k_itimer *timer);
int (*nsleep) (const clockid_t which_clock, int flags,
   struct timespec *, struct timespec __user *);
long (*nsleep_restart) (struct restart_block *restart_block);
-   int (*timer_set) (struct k_itimer * timr, int flags,
- struct itimerspec * new_setting,
- struct itimerspec * old_setting);
int (*timer_set64) (struct k_itimer *timr, int flags,
struct itimerspec64 *new_setting,
struct itimerspec64 *old_setting);
int (*timer_del) (struct k_itimer * timr);
 #define TIMER_RETRY 1
-   void (*timer_get) (struct k_itimer * timr,
-  struct itimerspec * cur_setting);
void (*timer_get64) (struct k_itimer *timr,
 struct itimerspec64 *cur_setting);
 };
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 47d1abf..3196ec0 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -528,13 +528,13 @@ void posix_timers_register_clock(const clockid_t clock_id,
return;
}
 
-   if (!new_clock-clock_get  !new_clock-clock_get64) {
-   printk(KERN_WARNING POSIX clock id %d lacks clock_get() and 
clock_get64()\n,
+   if (!new_clock-clock_get64) {
+   printk(KERN_WARNING POSIX clock id %d lacks clock_get64()\n,
   clock_id);
return;
}
-   if (!new_clock-clock_getres  !new_clock-clock_getres64) {
-   printk(KERN_WARNING POSIX clock id %d lacks clock_getres() and 
clock_getres64()\n,
+   if (!!new_clock-clock_getres64) {
+   printk(KERN_WARNING POSIX clock id %d lacks 
clock_getres64()\n,
   clock_id);
return;
}
@@ -585,7 +585,7 @@ static struct k_clock *clockid_to_kclock(const clockid_t id)
return (id  CLOCKFD_MASK) == CLOCKFD ?
clock_posix_dynamic : clock_posix_cpu;
 
-   if (id = MAX_CLOCKS || (!posix_clocks[id].clock_getres  
!posix_clocks[id].clock_getres64))
+   if (id = MAX_CLOCKS || !posix_clocks[id].clock_getres64)
return NULL;
return posix_clocks[id];
 }
@@ -788,15 +788,11 @@ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
return -EINVAL;
 
kc = clockid_to_kclock(timr-it_clock);
-   if (WARN_ON_ONCE(!kc || (!kc-timer_get  !kc-timer_get64))) {
+   if (WARN_ON_ONCE(!kc || !kc-timer_get64)) {
ret = -EINVAL;
} else {
-   if (kc-timer_get64) {
-   kc-timer_get64(timr, cur_setting64);
-   cur_setting = itimerspec64_to_itimerspec(cur_setting64);
-   } else {
-   kc-timer_get(timr, cur_setting);
-   }
+   kc-timer_get64(timr, cur_setting64);
+   cur_setting = itimerspec64_to_itimerspec(cur_setting64);
}
 
unlock_timer(timr, flags);
@@ -911,18 +907,14 @@ retry:
return -EINVAL;
 
kc = clockid_to_kclock(timr-it_clock);
-   if (WARN_ON_ONCE(!kc || (!kc-timer_set  !kc-timer_set64))) {
+   if (WARN_ON_ONCE(!kc || !kc-timer_set64)) {
error = -EINVAL;
} else {
-   if (kc-timer_set64) {
-   new_spec64 = itimerspec_to_itimerspec64(new_spec);
-   error = kc-timer_set64(timr, flags, new_spec64,
-   old_spec64);
-   if (old_setting)
-   

[PATCH 10/11] time/posix-cpu-timers:Convert to the 64bit methods for k_clock structure

2015-04-20 Thread Baolin Wang
This patch changes to the new methods of k_clock structure with timespec64
type, converts the timespec/itimerspec type to timespec64/itimerspec64 type
for the callback function in posix-cpu-timers.c file.

Signed-off-by: Baolin Wang baolin.w...@linaro.org
---
 kernel/time/posix-cpu-timers.c |   83 +---
 1 file changed, 44 insertions(+), 39 deletions(-)

diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 0075da7..51cfead 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -52,7 +52,7 @@ static int check_clock(const clockid_t which_clock)
 }
 
 static inline unsigned long long
-timespec_to_sample(const clockid_t which_clock, const struct timespec *tp)
+timespec64_to_sample(const clockid_t which_clock, const struct timespec64 *tp)
 {
unsigned long long ret;
 
@@ -60,19 +60,19 @@ timespec_to_sample(const clockid_t which_clock, const 
struct timespec *tp)
if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
ret = (unsigned long long)tp-tv_sec * NSEC_PER_SEC + 
tp-tv_nsec;
} else {
-   ret = cputime_to_expires(timespec_to_cputime(tp));
+   ret = cputime_to_expires(timespec64_to_cputime(tp));
}
return ret;
 }
 
-static void sample_to_timespec(const clockid_t which_clock,
+static void sample_to_timespec64(const clockid_t which_clock,
   unsigned long long expires,
-  struct timespec *tp)
+  struct timespec64 *tp)
 {
if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED)
-   *tp = ns_to_timespec(expires);
+   *tp = ns_to_timespec64(expires);
else
-   cputime_to_timespec((__force cputime_t)expires, tp);
+   cputime_to_timespec64((__force cputime_t)expires, tp);
 }
 
 /*
@@ -141,7 +141,7 @@ static inline unsigned long long virt_ticks(struct 
task_struct *p)
 }
 
 static int
-posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
+posix_cpu_clock_getres(const clockid_t which_clock, struct timespec64 *tp)
 {
int error = check_clock(which_clock);
if (!error) {
@@ -160,7 +160,7 @@ posix_cpu_clock_getres(const clockid_t which_clock, struct 
timespec *tp)
 }
 
 static int
-posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
+posix_cpu_clock_set(const clockid_t which_clock, const struct timespec64 *tp)
 {
/*
 * You can never reset a CPU clock, but we check for other errors
@@ -263,7 +263,7 @@ static int cpu_clock_sample_group(const clockid_t 
which_clock,
 
 static int posix_cpu_clock_get_task(struct task_struct *tsk,
const clockid_t which_clock,
-   struct timespec *tp)
+   struct timespec64 *tp)
 {
int err = -EINVAL;
unsigned long long rtn;
@@ -277,13 +277,14 @@ static int posix_cpu_clock_get_task(struct task_struct 
*tsk,
}
 
if (!err)
-   sample_to_timespec(which_clock, rtn, tp);
+   sample_to_timespec64(which_clock, rtn, tp);
 
return err;
 }
 
 
-static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec 
*tp)
+static int posix_cpu_clock_get(const clockid_t which_clock,
+   struct timespec64 *tp)
 {
const pid_t pid = CPUCLOCK_PID(which_clock);
int err = -EINVAL;
@@ -598,7 +599,7 @@ static inline void posix_cpu_timer_kick_nohz(void) { }
  * and try again.  (This happens when the timer is in the middle of firing.)
  */
 static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
-  struct itimerspec *new, struct itimerspec *old)
+  struct itimerspec64 *new, struct itimerspec64 
*old)
 {
unsigned long flags;
struct sighand_struct *sighand;
@@ -608,7 +609,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int 
timer_flags,
 
WARN_ON_ONCE(p == NULL);
 
-   new_expires = timespec_to_sample(timer-it_clock, new-it_value);
+   new_expires = timespec64_to_sample(timer-it_clock, new-it_value);
 
/*
 * Protect against sighand release/switch in exit/exec and p-cpu_timers
@@ -669,7 +670,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int 
timer_flags,
bump_cpu_timer(timer, val);
if (val  timer-it.cpu.expires) {
old_expires = timer-it.cpu.expires - val;
-   sample_to_timespec(timer-it_clock,
+   sample_to_timespec64(timer-it_clock,
   old_expires,
   old-it_value);
} else {
@@ -709,7 +710,7 @@ static int posix_cpu_timer_set(struct 

[PATCH 00/11] Convert the posix_clock_operations and k_clock structure to ready for 2038

2015-04-20 Thread Baolin Wang
This patch series changes the 32-bit time type (timespec/itimerspec) to the 
64-bit one
(timespec64/itimerspec64), since 32-bit time types will break in the year 2038.

This patch series introduces new methods with timespec64/itimerspec64 type,
and removes the old ones with timespec/itimerspec type for 
posix_clock_operations
and k_clock structure.

Also introduces some new functions with timespec64/itimerspec64 type, like 
current_kernel_time64(),
hrtimer_get_res64(), cputime_to_timespec64() and timespec64_to_cputime().

Baolin Wang (11):
  linux/time64.h:Introduce the 'struct itimerspec64' for 64bit
  timekeeping:Introduce the current_kernel_time64() function with
timespec64 type
  time/hrtimer:Introduce hrtimer_get_res64() with timespec64 type for
getting the timer resolution
  posix timers:Introduce the 64bit methods with timespec64 type for
k_clock structure
  time/posix-timers:Convert to the 64bit methods for k_clock callback
functions
  char/mmtimer:Convert to the 64bit methods for k_clock callback
function
  time/alarmtimer:Convert to the new methods for k_clock structure
  time/posix-clock:Convert to the 64bit methods for k_clock and
posix_clock_operations structure
  cputime:Introduce the cputime_to_timespec64/timespec64_to_cputime
function
  time/posix-cpu-timers:Convert to the 64bit methods for k_clock
structure
  k_clock:Remove the 32bit methods with timespec type

 arch/powerpc/include/asm/cputime.h|6 +-
 arch/s390/include/asm/cputime.h   |8 +-
 drivers/char/mmtimer.c|   36 
 drivers/ptp/ptp_clock.c   |   26 ++
 include/asm-generic/cputime_jiffies.h |   10 +--
 include/linux/cputime.h   |   15 
 include/linux/hrtimer.h   |   12 ++-
 include/linux/jiffies.h   |3 +
 include/linux/posix-clock.h   |   10 +--
 include/linux/posix-timers.h  |   18 ++--
 include/linux/time64.h|   13 +++
 include/linux/timekeeping.h   |   14 ++-
 kernel/time/alarmtimer.c  |   43 -
 kernel/time/hrtimer.c |   10 +--
 kernel/time/posix-clock.c |   20 ++---
 kernel/time/posix-cpu-timers.c|   83 +
 kernel/time/posix-timers.c|  157 +++--
 kernel/time/time.c|   21 +
 kernel/time/timekeeping.c |6 +-
 kernel/time/timekeeping.h |2 +-
 20 files changed, 302 insertions(+), 211 deletions(-)

-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv3] pppoe: Lacks DST MAC address check

2015-04-20 Thread David Miller
From: Joakim Tjernlund joakim.tjernl...@transmode.se
Date: Mon, 20 Apr 2015 21:07:48 +0200

 From: Joakim Tjernlund joakim.tjernl...@transmode.se
 
 A pppoe session is identified by its session ID and MAC address.
 Currently pppoe does not check if the received pkg has the correct
 MAC address. This is a problem when the eth I/F is in promisc mode
 as then any DST MAC address is accepted.
 
 Signed-off-by: Joakim Tjernlund joakim.tjernl...@transmode.se
 ---
  drivers/net/ppp/pppoe.c | 3 +++
  1 file changed, 3 insertions(+)
 
 v2 - The MAC address check should encompass all pppoe pkgs,
  not only the relay type.
 
 v3 - Add signoff

Applied, thanks.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


  1   2   >