Re: [PATCH 04/11] posix timers:Introduce the 64bit methods with timespec64 type for k_clock structure
On Mon, 20 Apr 2015, Baolin Wang wrote: @@ -771,6 +771,7 @@ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, struct itimerspec __user *, setting) { struct itimerspec cur_setting; + struct itimerspec64 cur_setting64; struct k_itimer *timr; struct k_clock *kc; unsigned long flags; @@ -781,10 +782,16 @@ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, return -EINVAL; kc = clockid_to_kclock(timr-it_clock); - if (WARN_ON_ONCE(!kc || !kc-timer_get)) + if (WARN_ON_ONCE(!kc || (!kc-timer_get !kc-timer_get64))) { ret = -EINVAL; - else - kc-timer_get(timr, cur_setting); + } else { + if (kc-timer_get64) { + kc-timer_get64(timr, cur_setting64); + cur_setting = itimerspec64_to_itimerspec(cur_setting64); + } else { + kc-timer_get(timr, cur_setting); + } + } This is really horrible. You add a metric ton of conditionals to every syscall just to remove them later again. I have not yet checked the end result, but this approach is error prone as hell and just introduces completely useless code churn. It's useless because you do not factor out the guts of the syscall functions so we can reuse the very same logic for the future 2038 safe syscalls which we need to introduce for 32bit machines. Take a look at the compat syscalls. They do the right thing. COMPAT_SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, struct compat_itimerspec __user *, setting) { long err; mm_segment_t oldfs; struct itimerspec ts; oldfs = get_fs(); set_fs(KERNEL_DS); err = sys_timer_gettime(timer_id, (struct itimerspec __user *) ts); set_fs(oldfs); if (!err put_compat_itimerspec(setting, ts)) return -EFAULT; return err; } So we can be clever and do the following: 1) Preparatory work in posix-timer.c (Patch #1) - Split out the guts of the syscall and change the syscall implementation static int __timer_gettime(timer_t timer_id, struct itimerspec *cur_setting) { struct k_itimer *timr; struct k_clock *kc; unsigned long flags; int ret = 0; timr = lock_timer(timer_id, flags); if (!timr) return -EINVAL; kc = clockid_to_kclock(timr-it_clock); if (WARN_ON_ONCE(!kc || !kc-timer_get)) ret = -EINVAL; else kc-timer_get(timr, cur_setting); unlock_timer(timr, flags); return ret; } /* Get the time remaining on a POSIX.1b interval timer. */ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, struct itimerspec __user *, setting) { struct itimerspec cur_setting; int ret = __timer_gettime(timer_id, cur_setting); if (!ret copy_to_user(setting, cur_setting, sizeof (cur_setting))) return -EFAULT; return ret; } 2) Do the 64bit infrastructure work in posix-timer.c (Patch #2) - Introduce k_clock-timer_get64() and provide a stub function static int default_timer_get64(struct k_clock *kc, struct k_itimer *timr, struct itimerspec64 *cur_setting64) { struct itimerspec cur_setting; kc-timer_get(timer, cur_setting); return 0; } - Add the following to posix_timers_register_clock() if (kc-timer_get !kc-timer_get64) kc-timer_get64 = default_timer_get64; - Convert __timer_gettime to 64bit -static int __timer_gettime(timer_t timer_id, struct itimerspec64 *cur_setting) +static int __timer_gettime(timer_t timer_id, struct itimerspec *cur_setting) { ... kc = clockid_to_kclock(timr-it_clock); + if (WARN_ON_ONCE(!kc || !kc-timer)) - if (WARN_ON_ONCE(!kc || !kc-timer_get64)) ret = -EINVAL; else - kc-timer_get(timr, cur_setting); + kc-timer_get64(timr, cur_setting); unlock_timer(timr, flags); return ret; } - Change the syscall implementation in the following way: /* Get the time remaining on a POSIX.1b interval timer. */ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, struct itimerspec __user *, setting) { #ifdef CONFIG_64BIT struct itimerspec64 cur_setting; int ret = __timer_gettime(timer_id, cur_setting); #else struct itimerspec64 cur_setting64; struct itimerspec cur_setting; int ret = __timer_gettime(timer_id, cur_setting64); if (!ret) cur_setting = itimerspec64_to_itimerspec(cur_setting64); #endif if (!ret copy_to_user(setting, cur_setting, sizeof (cur_setting))) return -EFAULT; return ret; } The result is two simple to review patches with minimal code churn. The nice thing is that once we introduce new syscalls
[PATCH] net: dsa: mv88e6xxx: fix setup of port control 1
mv88e6xxx_setup_port_common was writing to PORT_DEFAULT_VLAN (port offset 0x07) instead of PORT_CONTROL_1 (port offset 0x05). Signed-off-by: Vivien Didelot vivien.dide...@savoirfairelinux.com --- drivers/net/dsa/mv88e6xxx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index 9f0c2b9..48712bd 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -1251,8 +1251,7 @@ int mv88e6xxx_setup_port_common(struct dsa_switch *ds, int port) /* Port Control 1: disable trunking, disable sending * learning messages to this port. */ - ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_DEFAULT_VLAN, - 0x); + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_CONTROL_1, 0x); if (ret) goto abort; -- 2.3.5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 02/11] IB/addr: Pass network namespace as a parameter
On Mon, 2015-04-20 at 12:03 +0300, Haggai Eran wrote: From: Guy Shapiro gu...@mellanox.com Add network namespace support to the ib_addr module. For that, all the address resolution and matching should be done using the appropriate namespace instead of init_net. This is achieved by: 1. Adding an explicit network namespace argument to exported function that require a namespace. 2. Saving the namespace in the rdma_addr_client structure. 3. Using it when calling networking functions. In order to preserve the behavior of calling modules, init_net is passed as the parameter in calls from other modules. This is modified as namespace support is added on more levels. Signed-off-by: Haggai Eran hagg...@mellanox.com Signed-off-by: Yotam Kenneth yota...@mellanox.com Signed-off-by: Shachar Raindel rain...@mellanox.com Signed-off-by: Guy Shapiro gu...@mellanox.com --- drivers/infiniband/core/addr.c | 31 -- drivers/infiniband/core/cma.c| 4 ++- drivers/infiniband/core/verbs.c | 14 +++--- drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 3 ++- include/rdma/ib_addr.h | 44 5 files changed, 72 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index f80da50d84a5..95beaef6b66d 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -128,7 +128,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, int ret = -EADDRNOTAVAIL; if (dev_addr-bound_dev_if) { - dev = dev_get_by_index(init_net, dev_addr-bound_dev_if); + dev = dev_get_by_index(dev_addr-net, dev_addr-bound_dev_if); if (!dev) return -ENODEV; ret = rdma_copy_addr(dev_addr, dev, NULL); @@ -137,9 +137,10 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, } switch (addr-sa_family) { - case AF_INET: - dev = ip_dev_find(init_net, - ((struct sockaddr_in *) addr)-sin_addr.s_addr); + case AF_INET: { ^ Please don't add brackets just so you can convert a cast into a variable declaration that's unnecessary + struct sockaddr_in *addr_in = (struct sockaddr_in *)addr; + + dev = ip_dev_find(dev_addr-net, addr_in-sin_addr.s_addr); if (!dev) return ret; @@ -149,12 +150,12 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, *vlan_id = rdma_vlan_dev_vlan_id(dev); dev_put(dev); break; - + } #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: rcu_read_lock(); - for_each_netdev_rcu(init_net, dev) { - if (ipv6_chk_addr(init_net, + for_each_netdev_rcu(dev_addr-net, dev) { + if (ipv6_chk_addr(dev_addr-net, ((struct sockaddr_in6 *) addr)-sin6_addr, dev, 1)) { ret = rdma_copy_addr(dev_addr, dev, NULL); @@ -236,7 +237,7 @@ static int addr4_resolve(struct sockaddr_in *src_in, fl4.daddr = dst_ip; fl4.saddr = src_ip; fl4.flowi4_oif = addr-bound_dev_if; - rt = ip_route_output_key(init_net, fl4); + rt = ip_route_output_key(addr-net, fl4); if (IS_ERR(rt)) { ret = PTR_ERR(rt); goto out; @@ -278,12 +279,13 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, fl6.saddr = src_in-sin6_addr; fl6.flowi6_oif = addr-bound_dev_if; - dst = ip6_route_output(init_net, NULL, fl6); + dst = ip6_route_output(addr-net, NULL, fl6); if ((ret = dst-error)) goto put; if (ipv6_addr_any(fl6.saddr)) { - ret = ipv6_dev_get_saddr(init_net, ip6_dst_idev(dst)-dev, + ret = ipv6_dev_get_saddr(addr-net, + ip6_dst_idev(dst)-dev, fl6.daddr, 0, fl6.saddr); if (ret) goto put; @@ -458,7 +460,7 @@ static void resolve_cb(int status, struct sockaddr *src_addr, } int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac, -u16 *vlan_id) +u16 *vlan_id, struct net *net) In the core networking code, the net namespace is always first. Please stick with that paradigm. -- Doug Ledford dledf...@redhat.com GPG KeyID: 0E572FDD signature.asc Description: This is a digitally signed message part
Re: [PATCH 05/11] time/posix-timers:Convert to the 64bit methods for k_clock callback functions
On Mon, 20 Apr 2015, Baolin Wang wrote: /* Set clock_realtime */ static int posix_clock_realtime_set(const clockid_t which_clock, - const struct timespec *tp) + const struct timespec64 *tp) { - return do_sys_settimeofday(tp, NULL); + struct timespec ts = timespec64_to_timespec(*tp); + + return do_sys_settimeofday(ts, NULL); Sigh. No. We first provide a proper function for this, which takes a timespec64, i.e. do_sys_settimeofday64() instead of having this wrapper mess all over the place. /* SIGEV_NONE timers are not queued ! See common_timer_get */ if (((timr-it_sigev_notify ~SIGEV_THREAD_ID) == SIGEV_NONE)) { diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h index 1d91416..144af14 100644 --- a/kernel/time/timekeeping.h +++ b/kernel/time/timekeeping.h @@ -15,7 +15,7 @@ extern u64 timekeeping_max_deferment(void); extern int timekeeping_inject_offset(struct timespec *ts); extern s32 timekeeping_get_tai_offset(void); extern void timekeeping_set_tai_offset(s32 tai_offset); -extern void timekeeping_clocktai(struct timespec *ts); +extern void timekeeping_clocktai(struct timespec64 *ts); # git grep timekeeping_clocktai() is your friend. Thanks, tglx -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/1] altera tse: Error-Bit on tx-avalon-stream always set.
From: Andreas Oetken ennoerlan...@gmail.com The Error-Bit on the avalon streaming interface of the tx-dma-channel was always set. In SGMII configurations this leads to error-symbols on the PCS and packet-rejection on the receiver side (e.g. SGMII/1000Base-X connected switch). This only applies to the tse-configuration with MSGDMA. This issue was detected and fixed on a custom board with a direct connection to a Marvell switch in SGMII-PHY-Mode. (incl. custom patches for SGMII-PCS). According to the datasheet if ff_tx_err (avalon-streaming) is set it is forwarded to gm_tx_err. As a result the PCS is forwarding the error by sending a /V/-caracter. Signed-off-by: Andreas Oetken ennoerlan...@gmail.com --- drivers/net/ethernet/altera/altera_msgdmahw.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/altera/altera_msgdmahw.h b/drivers/net/ethernet/altera/altera_msgdmahw.h index e335626..eba070f 100644 --- a/drivers/net/ethernet/altera/altera_msgdmahw.h +++ b/drivers/net/ethernet/altera/altera_msgdmahw.h @@ -72,7 +72,6 @@ struct msgdma_extended_desc { #define MSGDMA_DESC_CTL_TX_SINGLE (MSGDMA_DESC_CTL_GEN_SOP | \ MSGDMA_DESC_CTL_GEN_EOP | \ MSGDMA_DESC_CTL_TR_COMP_IRQ | \ -MSGDMA_DESC_CTL_TR_ERR_IRQ | \ MSGDMA_DESC_CTL_GO) #define MSGDMA_DESC_CTL_RX_SINGLE (MSGDMA_DESC_CTL_END_ON_EOP | \ -- 2.1.4 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 11/11] IB/ucm: Add partial support for network namespaces
On Mon, Apr 20, 2015 at 12:03:42PM +0300, Haggai Eran wrote: From: Shachar Raindel rain...@mellanox.com It is impossible to completely support network namespaces for UCM, as we cannot identify the target IPoIB device. As Jasons said it seems like the use of namespaces should be limited to the RDMA CM layer. If so I _think_ this patch would not be needed? Ira However, we add support which will work if the user is following the IB-Spec Annex 11 (RDMA IP CM Services) with the service ID and private data formatting. Signed-off-by: Haggai Eran hagg...@mellanox.com Signed-off-by: Yotam Kenneth yota...@mellanox.com Signed-off-by: Shachar Raindel rain...@mellanox.com Signed-off-by: Guy Shapiro gu...@mellanox.com --- drivers/infiniband/core/ucm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 9604ab068984..424421091dae 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -45,6 +45,7 @@ #include linux/idr.h #include linux/mutex.h #include linux/slab.h +#include linux/nsproxy.h #include asm/uaccess.h @@ -490,7 +491,7 @@ static ssize_t ib_ucm_create_id(struct ib_ucm_file *file, ctx-uid = cmd.uid; ctx-cm_id = ib_create_cm_id(file-device-ib_dev, ib_ucm_event_handler, ctx, - init_net); + current-nsproxy-net_ns); if (IS_ERR(ctx-cm_id)) { result = PTR_ERR(ctx-cm_id); goto err1; -- 1.7.11.2 -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] net: dsa: mv88e6xxx: fix setup of port control 1
On Mon, Apr 20, 2015 at 05:19:23PM -0400, Vivien Didelot wrote: mv88e6xxx_setup_port_common was writing to PORT_DEFAULT_VLAN (port offset 0x07) instead of PORT_CONTROL_1 (port offset 0x05). Hi Vivien Good catch. Signed-off-by: Vivien Didelot vivien.dide...@savoirfairelinux.com Fixes: cca8b1337541 (net: dsa: Use mnemonics rather than register numbers) Acked-by: Andrew Lunn and...@lunn.ch Thanks Andrew --- drivers/net/dsa/mv88e6xxx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index 9f0c2b9..48712bd 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -1251,8 +1251,7 @@ int mv88e6xxx_setup_port_common(struct dsa_switch *ds, int port) /* Port Control 1: disable trunking, disable sending * learning messages to this port. */ - ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_DEFAULT_VLAN, -0x); + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_CONTROL_1, 0x); if (ret) goto abort; -- 2.3.5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 05/11] IB/ipoib: Return IPoIB devices as possible matches to get_net_device_by_port_pkey_ip
On Mon, Apr 20, 2015 at 12:03:36PM +0300, Haggai Eran wrote: From: Guy Shapiro gu...@mellanox.com Implement callback that returns network device to ib_core according to connection parameters. Check the ipoib device and iterate over all child devices to look for a match. For each ipoib device we iterate through all upper devices when searching for a matching IP, in order to support bonding. Signed-off-by: Guy Shapiro gu...@mellanox.com Signed-off-by: Haggai Eran hagg...@mellanox.com Signed-off-by: Yotam Kenneth yota...@mellanox.com Signed-off-by: Shachar Raindel rain...@mellanox.com --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 122 +- 1 file changed, 121 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 7cad4dd87469..89a59a0e17e6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -48,6 +48,9 @@ #include linux/jhash.h #include net/arp.h +#include net/addrconf.h +#include linux/inetdevice.h +#include rdma/ib_cache.h #define DRV_VERSION 1.0.0 @@ -91,11 +94,15 @@ struct ib_sa_client ipoib_sa_client; static void ipoib_add_one(struct ib_device *device); static void ipoib_remove_one(struct ib_device *device); static void ipoib_neigh_reclaim(struct rcu_head *rp); +static struct net_device *ipoib_get_net_device_by_port_pkey_ip( + struct ib_device *dev, u8 port, u16 pkey, + struct sockaddr *addr); static struct ib_client ipoib_client = { .name = ipoib, .add= ipoib_add_one, - .remove = ipoib_remove_one + .remove = ipoib_remove_one, + .get_net_device_by_port_pkey_ip = ipoib_get_net_device_by_port_pkey_ip, }; int ipoib_open(struct net_device *dev) @@ -222,6 +229,119 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) return 0; } +static bool ipoib_is_dev_match_addr(struct sockaddr *addr, + struct net_device *dev) +{ + struct net *net = dev_net(dev); + + if (addr-sa_family == AF_INET) { + struct in_device *in_dev = in_dev_get(dev); + struct sockaddr_in *addr_in = (struct sockaddr_in *)addr; + __be32 ret_addr; + + if (!in_dev) + return false; + + ret_addr = inet_confirm_addr(net, in_dev, 0, + addr_in-sin_addr.s_addr, + RT_SCOPE_HOST); + in_dev_put(in_dev); + if (ret_addr) + return true; + } +#if IS_ENABLED(CONFIG_IPV6) + else if (addr-sa_family == AF_INET6) { + struct sockaddr_in6 *addr_in6 = (struct sockaddr_in6 *)addr; + + if (ipv6_chk_addr(net, addr_in6-sin6_addr, dev, 1)) + return true; + } +#endif + return false; +} + +/** + * Find a net_device matching the given address, which is an upper device of + * the given net_device. + * @addr: IP address to look for. + * @dev: base IPoIB net_device + * + * If found, returns the net_device with a reference held. Otherwise return + * NULL. + */ +static struct net_device *ipoib_get_net_dev_match_addr(struct sockaddr *addr, +struct net_device *dev) +{ + struct net_device *upper, + *result = NULL; + struct list_head *iter; + + if (ipoib_is_dev_match_addr(addr, dev)) { + dev_hold(dev); + return dev; + } + + rcu_read_lock(); + netdev_for_each_all_upper_dev_rcu(dev, upper, iter) { + if (ipoib_is_dev_match_addr(addr, upper)) { + dev_hold(upper); + result = upper; + break; + } + } + rcu_read_unlock(); + return result; +} + +static struct net_device *ipoib_get_net_device_by_port_pkey_ip( + struct ib_device *dev, u8 port, u16 pkey, struct sockaddr *addr) +{ + struct ipoib_dev_priv *priv; + struct list_head *dev_list; + u16 pkey_index; + + ib_find_cached_pkey(dev, port, pkey, pkey_index); + if (pkey_index == (u16)-1) + return NULL; Why not check the return value of ib_find_cached_pkey? + + if (rdma_node_get_transport(dev-node_type) != RDMA_TRANSPORT_IB) + return NULL; The use of Link Layer and Transport in this series will need to be reevaluated based on Michaels work: https://www.mail-archive.com/linux-rdma@vger.kernel.org/msg24140.html Ira -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC,1/8] soc/fman: Add FMan MURAM support
On Mon, 2015-04-20 at 03:58 -0500, Liberman Igal-B31950 wrote: Regards, Igal Liberman. -Original Message- From: Kumar Gala [mailto:ga...@kernel.crashing.org] Sent: Thursday, March 12, 2015 5:57 PM To: Liberman Igal-B31950 Cc: linuxppc-...@lists.ozlabs.org; netdev@vger.kernel.org; linux- ker...@vger.kernel.org; Wood Scott-B07421 Subject: Re: [RFC,1/8] soc/fman: Add FMan MURAM support On Mar 11, 2015, at 12:07 AM, Igal.Liberman igal.liber...@freescale.com wrote: From: Igal Liberman igal.liber...@freescale.com Add Frame Manager Multi-User RAM support. Signed-off-by: Igal Liberman igal.liber...@freescale.com --- drivers/soc/fsl/fman/Kconfig|1 + drivers/soc/fsl/fman/Makefile |5 +- drivers/soc/fsl/fman/fm_muram.c | 174 +++ drivers/soc/fsl/fman/inc/fm_muram_ext.h | 98 + 4 files changed, 276 insertions(+), 2 deletions(-) create mode 100644 drivers/soc/fsl/fman/fm_muram.c create mode 100644 drivers/soc/fsl/fman/inc/fm_muram_ext.h use lib/genalloc instead of rheap Hi Kumar, I looked into lib/genalloc allocator. As far as I see, the genalloc allocator doesn't allow to control the memory alignment when you allocate a chunk of memory. Two important notes regarding MURAM memory: - The allocated memory chunks should have specific alignment (might be different in each chunk). - The allocations must be efficient, we don't want to waste MURAM due to alignment issues. If the requirement is that allocations must be size-aligned, use gen_pool_first_fit_order_align. Otherwise, improve genalloc to do what you need. -Scott -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] net: dsa: mv88e6xxx: use PORT_DEFAULT_VLAN
Minor, use the explicit PORT_DEFAULT_VLAN define instead of 0x07. Signed-off-by: Vivien Didelot vivien.dide...@savoirfairelinux.com --- drivers/net/dsa/mv88e6xxx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index 48712bd..af639ab 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -1274,7 +1274,8 @@ int mv88e6xxx_setup_port_common(struct dsa_switch *ds, int port) /* Default VLAN ID and priority: don't set a default VLAN * ID, and set the default packet priority to zero. */ - ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), 0x07, 0x); + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_DEFAULT_VLAN, + 0x); abort: mutex_unlock(ps-smi_mutex); return ret; -- 2.3.5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 09/11] cputime:Introduce the cputime_to_timespec64/timespec64_to_cputime function
On Mon, 20 Apr 2015, Baolin Wang wrote: This patch introduces some functions for converting cputime to timespec64 and back, that repalce the timespec type with timespec64 type, as well as for arch/s390 and arch/powerpc architecture. No. We want a patch which adds the functions and then a patch which uses them. This stuff is tricky and hard to review. So please split the patches into smaller chunks. +unsigned long +timespec64_to_jiffies(const struct timespec64 *value) +{ + return __timespec_to_jiffies(value-tv_sec, value-tv_nsec); +} +EXPORT_SYMBOL(timespec64_to_jiffies); So we have now two exports which are doing exactly the same thing. Copy and paste is wonderful, right? What about exporting __timespec_to_jiffies() and providing inlines for timespec_to_jiffies() and timespec64_to_jiffies() ? EXPORT_SYMBOL is not just a stupid annotation. Its impact on the resulting kernel size is larger than the actual function implementation. +void +jiffies_to_timespec64(const unsigned long jiffies, struct timespec64 *value) +{ + /* + * Convert jiffies to nanoseconds and separate with + * one divide. + */ + u32 rem; + value-tv_sec = div_u64_rem((u64)jiffies * TICK_NSEC, +NSEC_PER_SEC, rem); + value-tv_nsec = rem; +} +EXPORT_SYMBOL(jiffies_to_timespec64); Sigh. Thanks, tglx -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next 0/2] net: Fix hw csum failure message flood for ppp tunnel
This patch set addresses bug Bug 95171 - hw csum failure message flood for ppp tunnel since upgrade to 3.16. The problem is that pppoe is being used over UDP with UDP checksusm enabled. On receive checksum conversion turns checksum-unnecessary in checksum- complete. The PPP receive functions do no properly pull the checksum over its headers, so that when an encapsulated checksums is considered the checksum-complete value is incorrect. This patch adds skb_checksum_complete_unset which can be called in the receive path in lieu of pulling checksum complete in layer. This is useful when the packet is being modified (e.g. decompressed) and the checksum-complete value is no longer relevant. In the ppp_receive_frame we call skb_checksum_complete_unset to toss out checksum-complete. This should eliminate the reported messages. Alternatively, we could add skb_postpull_rcsum and probably special case handling for VJ compression if maintaining the checksum-complete is needed (not clear to me this is worth the effort). I haven't tested this since setting up the failure scenario doesn't seem trivial to configure. Tom Herbert (2): net: add skb_checksum_complete_unset ppp: call skb_checksum_complete_unset in ppp_receive_frame drivers/net/ppp/ppp_generic.c | 1 + include/linux/skbuff.h| 12 2 files changed, 13 insertions(+) -- 1.8.1 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] net: dsa: mv88e6xxx: use PORT_DEFAULT_VLAN
On Mon, Apr 20, 2015 at 05:43:26PM -0400, Vivien Didelot wrote: Minor, use the explicit PORT_DEFAULT_VLAN define instead of 0x07. Hi Vivien I would not normally use the word Minor here, since it will end up in the commit log. Other than that: Acked-by: Andrew Lunn and...@lunn.ch Thanks Andrew Signed-off-by: Vivien Didelot vivien.dide...@savoirfairelinux.com --- drivers/net/dsa/mv88e6xxx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index 48712bd..af639ab 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -1274,7 +1274,8 @@ int mv88e6xxx_setup_port_common(struct dsa_switch *ds, int port) /* Default VLAN ID and priority: don't set a default VLAN * ID, and set the default packet priority to zero. */ - ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), 0x07, 0x); + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_DEFAULT_VLAN, +0x); abort: mutex_unlock(ps-smi_mutex); return ret; -- 2.3.5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 06/11] IB/cm, cma: Move RDMA IP CM private-data parsing code from ib_cma to ib_cm
On Mon, Apr 20, 2015 at 12:03:37PM +0300, Haggai Eran wrote: From: Guy Shapiro gu...@mellanox.com When receiving a connection request, ib_cm needs to associate the request with a network namespace. To do this, it needs to know the request's destination IP. For this the RDMA IP CM packet formatting functionality needs to be exposed to ib_cm. [snip] + +int cm_save_net_info(struct sockaddr *src_addr, + struct sockaddr *dst_addr, + struct ib_cm_event *ib_event) +{ + struct cm_work *work = container_of(ib_event, struct cm_work, cm_event); + + if ((rdma_port_get_link_layer(work-port-cm_dev-ib_device, + work-port-port_num) == + IB_LINK_LAYER_INFINIBAND) + (ib_event-event == IB_CM_REQ_RECEIVED)) { The original code in the RDMA CM had a check for AF_IB. Isn't that needed here as well? Ira + cm_save_ib_info(src_addr, dst_addr, + ib_event-param.req_rcvd.primary_path); + return 0; + } + + return cm_save_ip_info(src_addr, dst_addr, work); +} +EXPORT_SYMBOL(cm_save_net_info); + struct ib_cm_id *ib_create_cm_id(struct ib_device *device, ib_cm_handler cm_handler, void *context) -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH iproute2 -next] examples: bpf: fix ld offs to have same prog loaded on ingress/egress
On 4/20/15 4:48 AM, Daniel Borkmann wrote: Fix up the eBPF example program to match our kernel fix in a166151cbe33 (bpf: fix bpf helpers to use skb-mac_header relative offsets). Tested on ingress and egress paths. Signed-off-by: Daniel Borkmann dan...@iogearbox.net Cc: Alexei Starovoitov a...@plumgrid.com --- ( Stephen, this applies on top of tc: built-in eBPF exec proxy: https://patchwork.ozlabs.org/patch/461837/ ) Looks good. Acked-by: Alexei Starovoitov a...@plumgrid.com -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 07/11] IB/cm: Add network namespace support
On Mon, Apr 20, 2015 at 11:06:59AM -0600, Jason Gunthorpe wrote: On Mon, Apr 20, 2015 at 12:03:38PM +0300, Haggai Eran wrote: From: Guy Shapiro gu...@mellanox.com Add namespace support to the IB-CM layer. - Each CM-ID now has a network namespace it is associated with, assigned at creation. This namespace is used as needed during subsequent action on the CM-ID or related objects. There is really something weird about this layering. At the CM layer there should be no concept of an IP address, it only deals with GIDs. So how can a CM object have a network namespace associated with it? { av-port = port; av-pkey_index = wc-pkey_index; ib_init_ah_from_wc(port-cm_dev-ib_device, port-port_num, wc, - grh, av-ah_attr, init_net); + grh, av-ah_attr, net); There is something deeply wrong with adding network namespace arguments to verbs. For rocee the gid index clearly specifies the network namespace to use, so much of this should go away and have rocee get the namespace from the gid index. Ie in ib_init_ah_from_wc we have the ib_wc which contains the sgid index. I'm really not excited at how many places are gaining a net when those layers shouldn't even need to care about IP layer details. Just acting as a pass through for rocee doesn't make sense. I had the same feeling when I saw the addition of the network namespace to the MAD code, especially the RMPP code. It seems like there should be a better way to deal with this. My gut says that the namespace should be handled separate from the ib_init_ah_from_wc. Perhaps as a secondary call used only when the namespace is needed? But I'm not sure when it is appropriate/needed. Ira -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 net-next] ip: Add color output option
Thanks for bearing with my first patch. On 2015-04-20 10:16-0700, Stephen Hemminger wrote: On Sat, 18 Apr 2015 13:39:45 +0300 Mathias Nyman m.ny...@iki.fi wrote: It is hard to quickly find what you are looking for in the output of the ip command. Color helps. This patch adds a '-c' flag to highlight these with individual colors: - interface name - ip address - mac address - up/down state Signed-off-by: Mathias Nyman m.ny...@iki.fi I like the idea of this, it would be generally good across the board. But the patch does not apply cleanly to the current version of iproute2. What is the current version? I used the net-next branch as a base from here: http://git.kernel.org/cgit/linux/kernel/git/shemminger/iproute2.git I thought net-next was used for new features, but master branch now has newer commits. Should I rebase on top of master? And there are minor style issues. iproute2 in general ties to follow kernel style. WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? #36: new file mode 100644 ERROR: open brace '{' following enum go on the same line #45: FILE: include/color.h:5: +enum color_attr +{ ERROR: open brace '{' following enum go on the same line #195: FILE: lib/color.c:7: +enum color +{ ERROR: that open brace { should be on the previous line #207: FILE: lib/color.c:19: +static const char * const color_codes[] = +{ ERROR: that open brace { should be on the previous line #220: FILE: lib/color.c:32: +static enum color attr_colors[] = +{ ERROR: do not initialise statics to 0 or NULL #229: FILE: lib/color.c:41: +static int color_is_enabled = 0; WARNING: Missing a blank line after declarations #240: FILE: lib/color.c:52: + va_list args; + va_start(args, fmt); Thanks, so checkpatch.pl applies to iproute2 as well. I'll fix these in v3. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next 2/2] ppp: call skb_checksum_complete_unset in ppp_receive_frame
Call checksum_complete_unset in PPP receive to discard checksum-complete value. PPP does not pull checksum for headers and also modifies packet as in VJ compression. Signed-off-by: Tom Herbert t...@herbertland.com --- drivers/net/ppp/ppp_generic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index af034db..9d15566 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1716,6 +1716,7 @@ ppp_receive_frame(struct ppp *ppp, struct sk_buff *skb, struct channel *pch) { /* note: a 0-length skb is used as an error indication */ if (skb-len 0) { + skb_checksum_complete_unset(skb); #ifdef CONFIG_PPP_MULTILINK /* XXX do channel-level decompression here */ if (PPP_PROTO(skb) == PPP_MP) -- 1.8.1 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next 1/2] net: add skb_checksum_complete_unset
This function changes ip_summed to CHECKSUM_NONE if CHECKSUM_COMPLETE is set. This is called to discard checksum-complete when packet is being modified and checksum is not pulled for headers in a layer. Signed-off-by: Tom Herbert t...@herbertland.com --- include/linux/skbuff.h | 12 1 file changed, 12 insertions(+) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0991259..06793b5 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3016,6 +3016,18 @@ static inline bool __skb_checksum_validate_needed(struct sk_buff *skb, */ #define CHECKSUM_BREAK 76 +/* Unset checksum-complete + * + * Unset checksum complete can be done when packet is being modified + * (uncompressed for instance) and checksum-complete value is + * invalidated. + */ +static inline void skb_checksum_complete_unset(struct sk_buff *skb) +{ + if (skb-ip_summed == CHECKSUM_COMPLETE) + skb-ip_summed = CHECKSUM_NONE; +} + /* Validate (init) checksum based on checksum complete. * * Return values: -- 1.8.1 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH v3 09/10] lib: libos build scripts and documentation
Some random observations while I'm still trying to wrap my head around all this (which might take quite some time). On Sun, 2015-04-19 at 22:28 +0900, Hajime Tazaki wrote: --- /dev/null +++ b/arch/lib/Kconfig @@ -0,0 +1,124 @@ +menuconfig LIB + bool LibOS-specific options + def_bool n This is the start of the Kconfig parse for lib. (That would basically still be true even if you didn't set KBUILD_KCONFIG, see below.) So why not do something like all arches do: config LIB def_bool y select [...] Ie, why would someone want to build for ARCH=lib and still not set LIB? + select PROC_FS + select PROC_SYSCTL + select SYSCTL + select SYSFS + help + The 'lib' architecture is a library (user-mode) version of + the linux kernel that includes only its network stack and is + used within the userspace application, and ns-3 simulator. + For more information, about ns-3, see http://www.nsnam.org. + +config EXPERIMENTAL + def_bool y Unneeded: removed treewide in, I think, 2014. +config MMU +def_bool n Add empty line. +config FPU +def_bool n Ditto. +config SMP +def_bool n + +config ARCH + string + option env=ARCH + +config KTIME_SCALAR + def_bool y This one is unused. +config MODULES + def_bool y + option modules + +config GENERIC_CSUM + def_bool y + +config GENERIC_BUG + def_bool y + depends on BUG Add empty line here. +config PRINTK + def_bool y + +config RWSEM_GENERIC_SPINLOCK + def_bool y + +config GENERIC_FIND_NEXT_BIT + def_bool y This one is unused too. +config GENERIC_HWEIGHT + def_bool y + +config TRACE_IRQFLAGS_SUPPORT + def_bool y + +config NO_HZ + def_bool y + +config BASE_FULL + def_bool n + +config SELECT_MEMORY_MODEL + def_bool n + +config FLAT_NODE_MEM_MAP + def_bool n + +config PAGEFLAGS_EXTENDED + def_bool n + +config VIRT_TO_BUS + def_bool n + +config HAS_DMA + def_bool n + +config HZ +int +default 250 + +config TINY_RCU + def_bool y + +config HZ_250 + def_bool y + +config BASE_SMALL + int + default 1 + +config SPLIT_PTLOCK_CPUS + int + default 1 + +config FLATMEM + def_bool y + +config SYSCTL + def_bool y + +config PROC_FS + def_bool y + +config SYSFS + def_bool y + +config PROC_SYSCTL + def_bool y + +config NETDEVICES + def_bool y + +config SLIB + def_bool y You've also added SLIB to init/Kconfig in 02/10. But make ARCH=lib *config will never visit init/Kconfig, will it? And, apparently, none of SL[AOU]B are wanted for lib. So I think the entry for config SLIB in that file can be dropped (as other arches will never see it because it depends on LIB). (Note that I haven't actually looked into all the Kconfig entries added above. Perhaps I might do that. But I'm pretty sure most of the time all I can say is: I have no idea why this entry defaults to $VALUE.) +source net/Kconfig + +source drivers/base/Kconfig + +source crypto/Kconfig + +source lib/Kconfig + + Trailing empty lines. diff --git a/arch/lib/Makefile b/arch/lib/Makefile new file mode 100644 index 000..d8a0bf9 --- /dev/null +++ b/arch/lib/Makefile @@ -0,0 +1,251 @@ +ARCH_DIR := arch/lib +SRCDIR=$(dir $(firstword $(MAKEFILE_LIST))) Do you use SRCDIR? +DCE_TESTDIR=$(srctree)/tools/testing/libos/ +KBUILD_KCONFIG := arch/$(ARCH)/Kconfig I think you copied this from arch/um/Makefile. But arch/um/ is, well, special. Why should lib not start the kconfig parse in the file named Kconfig? And if you want to start in arch/lib/Kconfig, it would be nice to add a mainmenu (just like arch/x86/um/Kconfig does). (I don't read Makefilese well enough to understand the rest of this file. I think it's scary.) + +CC = gcc +GCCVERSIONGTEQ48 := $(shell expr `gcc -dumpversion` \= 4.8) +ifeq $(GCCVERSIONGTEQ48) 1 + NO_TREE_LOOP_OPT += -fno-tree-loop-distribute-patterns +endif + + +-include $(ARCH_DIR)/objs.mk +-include $(srctree)/.config +include $(srctree)/scripts/Kbuild.include +include $(ARCH_DIR)/processor.mk + +# targets +LIBOS_TOOLS=$(ARCH_DIR)/tools +LIBOS_GIT_REPO=git://github.com/libos-nuse/linux-libos-tools +KERNEL_LIB=liblinux-$(KERNELVERSION).so + +ALL_OBJS=$(OBJS) $(KERNEL_LIB) $(modules) $(all-obj-for-clean) + +# auto generated files +AUTOGENS=$(CRC32TABLE) $(COMPILE_H) $(BOUNDS_H) $(ARCH_DIR)/timeconst.h $(ARCH_DIR)/linker.lds +COMPILE_H=$(srctree)/include/generated/compile.h +BOUNDS_H=$(srctree)/include/generated/bounds.h + +# from lib/Makefile +CRC32TABLE = $(ARCH_DIR)/crc32table.h +hostprogs-y := $(srctree)/lib/gen_crc32table +clean-files := crc32table.h + +# sources and objects +LIB_SRC=\ +lib.c lib-device.c lib-socket.c random.c softirq.c time.c
Re: [PATCH] neighbour.c: Avoid GC directly after state change
Ulf Samuelsson wrote: How many neighbors do you want to maintain? I guess you have to increase the number of gc_thresh1. The current use cases have up to 2048 entries. This is expected to grow in the future. The 3.4 kernel used in the system today is limited to 1024, but that has been raised to about 10k. The gc_thresh1 test is not implemented in 3.4 but can be backported, but still not convinced it is a good idea. Why? To complicate things, one requirement is that for some interfaces you always want to keep things alive, if connected, but for other interfaces you want things to be removed to conserve memory. Actually you would want to do this selection on a subnet level. If you want to introduce per-interface parameter, I am okay with it. Internal discussions resulted in a proposal to change the patch, so that you have a keepalive flag which is tested after it has been decided to exit the REACHABLE state. if the keepalive flag is set, you always go to DELAY state from REACHABLE. No. -- Hideaki Yoshifuji hideaki.yoshif...@miraclelinux.com Technical Division, MIRACLE LINUX CORPORATION -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [Intel-wired-lan] [PATCH] e1000e: Do not allow CRC stripping to be disabled on 82579 w/ jumbo frames
From: Intel-wired-lan [mailto:intel-wired-lan-boun...@lists.osuosl.org] On Behalf Of Jeff Kirsher Sent: Wednesday, April 08, 2015 7:58 PM To: Alexander Duyck Cc: netdev@vger.kernel.org; intel-wired-...@lists.osuosl.org Subject: Re: [Intel-wired-lan] [PATCH] e1000e: Do not allow CRC stripping to be disabled on 82579 w/ jumbo frames On Wed, 2015-04-08 at 18:37 -0700, Alexander Duyck wrote: The driver wasn't allowing jumbo frames to be enabled when CRC stripping was disabled, however it was allowing CRC stripping to be disabled while jumbo frames were enabled. This fixes that by making it so that the NETIF_F_RXFCS flag cannot be set when jumbo frames are enabled on 82579 and newer parts. Signed-off-by: Alexander Duyck alexander.h.du...@redhat.com --- drivers/net/ethernet/intel/e1000e/netdev.c | 14 ++ 1 file changed, 14 insertions(+) Thanks Alex, I will add your patch to my queue. Tested-by: Aaron Brown aaron.f.br...@intel.com -- git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue.git dev-queue N�r��yb�X��ǧv�^�){.n�+���z�^�)w*jg����ݢj/���z�ޖ��2�ޙ�)ߡ�a�����G���h��j:+v���w��٥
Re: [PATCH net-next 0/2] net: Fix hw csum failure message flood for ppp tunnel
From: Tom Herbert t...@herbertland.com Date: Mon, 20 Apr 2015 14:10:03 -0700 This patch set addresses bug Bug 95171 - hw csum failure message flood for ppp tunnel since upgrade to 3.16. The problem is that pppoe is being used over UDP with UDP checksusm enabled. On receive checksum conversion turns checksum-unnecessary in checksum- complete. The PPP receive functions do no properly pull the checksum over its headers, so that when an encapsulated checksums is considered the checksum-complete value is incorrect. This patch adds skb_checksum_complete_unset which can be called in the receive path in lieu of pulling checksum complete in layer. This is useful when the packet is being modified (e.g. decompressed) and the checksum-complete value is no longer relevant. In the ppp_receive_frame we call skb_checksum_complete_unset to toss out checksum-complete. This should eliminate the reported messages. Alternatively, we could add skb_postpull_rcsum and probably special case handling for VJ compression if maintaining the checksum-complete is needed (not clear to me this is worth the effort). I haven't tested this since setting up the failure scenario doesn't seem trivial to configure. I'm preemptively applying this, but it's really important for folks to give this some good testing. Thanks Tom. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/1] altera tse: Error-Bit on tx-avalon-stream always set.
From: Andreas Oetken ennoerlan...@googlemail.com Date: Tue, 21 Apr 2015 00:16:38 +0200 From: Andreas Oetken ennoerlan...@gmail.com The Error-Bit on the avalon streaming interface of the tx-dma-channel was always set. In SGMII configurations this leads to error-symbols on the PCS and packet-rejection on the receiver side (e.g. SGMII/1000Base-X connected switch). This only applies to the tse-configuration with MSGDMA. This issue was detected and fixed on a custom board with a direct connection to a Marvell switch in SGMII-PHY-Mode. (incl. custom patches for SGMII-PCS). According to the datasheet if ff_tx_err (avalon-streaming) is set it is forwarded to gm_tx_err. As a result the PCS is forwarding the error by sending a /V/-caracter. Signed-off-by: Andreas Oetken ennoerlan...@gmail.com Applied. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] net: dsa: mv88e6xxx: fix setup of port control 1
From: Andrew Lunn and...@lunn.ch Date: Tue, 21 Apr 2015 01:05:07 +0200 On Mon, Apr 20, 2015 at 05:19:23PM -0400, Vivien Didelot wrote: mv88e6xxx_setup_port_common was writing to PORT_DEFAULT_VLAN (port offset 0x07) instead of PORT_CONTROL_1 (port offset 0x05). Hi Vivien Good catch. Signed-off-by: Vivien Didelot vivien.dide...@savoirfairelinux.com Fixes: cca8b1337541 (net: dsa: Use mnemonics rather than register numbers) Acked-by: Andrew Lunn and...@lunn.ch Applied. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] net: dsa: mv88e6xxx: use PORT_DEFAULT_VLAN
From: Vivien Didelot vivien.dide...@savoirfairelinux.com Date: Mon, 20 Apr 2015 17:43:26 -0400 Minor, use the explicit PORT_DEFAULT_VLAN define instead of 0x07. Signed-off-by: Vivien Didelot vivien.dide...@savoirfairelinux.com Applied. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [Intel-wired-lan] [PATCH] e1000e: Cleanup handling of VLAN_HLEN as a part of max frame size
From: Intel-wired-lan [mailto:intel-wired-lan-boun...@lists.osuosl.org] On Behalf Of Alexander Duyck Sent: Wednesday, April 08, 2015 2:03 PM To: intel-wired-...@lists.osuosl.org; Kirsher, Jeffrey T Cc: netdev@vger.kernel.org; m...@cchtml.com; ht...@twofifty.com Subject: [Intel-wired-lan] [PATCH] e1000e: Cleanup handling of VLAN_HLEN as a part of max frame size When the VLAN_HLEN was added to the calculation for the maximum frame size there seems to have been a number of issues added to the driver. The first issue is that in some cases the maximum frame size for a device never really reached the actual maximum frame size as the VLAN header length was not included the calculation for that value. As a result some parts only supported a maximum frame size of either 1496 in the case of parts that didn't support jumbo frames, and 8996 in the case of the parts that do. The second issue is the fact that there were several checks that weren't updated so as a result setting an MTU of 1500 was treated as enabling jumbo frames as the calculated value was 1522 instead of 1518. I have addressed those by replacing ETH_FRAME_LEN with VLAN_ETH_FRAME_LEN where appropriate. The final issue was the fact that lowering the MTU below 1500 would cause the driver to allocate 2K buffers for the rings. This is an old issue that was fixed several years ago in igb/ixgbe and I am addressing now by just replacing == with a = so that we always just round up to 1522 for anything that isn't a jumbo frame. Fixes: c751a3d58cf2d (e1000e: Correctly include VLAN_HLEN when changing interface MTU) Signed-off-by: Alexander Duyck alexander.h.du...@redhat.com --- I have only build tested this though I am 99% sure the fixes here are correct. This patch should fix issues on 82573 and ich8 w/ setting an MTU of 1500, and for the PCH series w/ setting an MTU of 9000. I assume I can get away with bumping the max_hw_frame_size for the PCH parts from 9018 to 9022 based on the fact that the Windows INF for the parts lists supporting either 1514, 4088, and 9014 all of which exclude the 8 bytes for CRC and VLAN header. drivers/net/ethernet/intel/e1000e/82571.c |2 +- drivers/net/ethernet/intel/e1000e/ich8lan.c | 10 +- drivers/net/ethernet/intel/e1000e/netdev.c | 18 -- 3 files changed, 14 insertions(+), 16 deletions(-) Tested-by: Aaron Brown aaron.f.br...@intel.com -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH v2 01/11] RDMA/CMA: Mark IPv4 addresses correctly when the listener is IPv6
-Original Message- From: Or Gerlitz [mailto:gerlitz...@gmail.com] Sent: Monday, April 20, 2015 9:38 PM On Mon, Apr 20, 2015 at 7:41 PM, Jason Gunthorpe jguntho...@obsidianresearch.com wrote: On Mon, Apr 20, 2015 at 12:03:32PM +0300, Haggai Eran wrote: From: Yotam Kenneth yota...@mellanox.com When accepting a new connection with the listener being IPv6, the family of the new connection is set as IPv6. This causes cma_zero_addr function to return true on an non-zero address. As a result, the wrong code path is taken. This causes the connection request to be rejected, as the RDMA-CM code looks for the wrong type of device. This description doesn't really make sense as to what the problem is. @@ -866,12 +866,12 @@ static void cma_save_ip4_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_i listen4 = (struct sockaddr_in *) listen_id- route.addr.src_addr; ip4 = (struct sockaddr_in *) id-route.addr.src_addr; - ip4-sin_family = listen4-sin_family; + ip4-sin_family = AF_INET; If listen_id-route.addr.src_addr.ss_family != AF_INET then it is invalid to cast to sockaddr_in. So listen4-sin_family MUST be AF_INET or this function MUST NOT be called. Forcing to AF_INET cannot be correct here. Jason, could you take a look @ this thread http://marc.info/?t=14158939504r=1w=2 where the authors addressed some comments from Sean and he eventually Acked the patch? What does this patch have to do with this series? I believe it's either a pre-patch to address some assumption or something they stepped on while testing We stepped upon this issue while testing the containers support we are Submitting here. When creating a new network namespace, the kernel set net-ipv6.sysctl.bindv6only to 0. As a result, we got the IPv6 listening ID accepting IPv4 connection. This is fixed by the above patch. Thanks, --Shachar
IT-Service Desk
IT-Service Desk behöver du uppgradera till den senaste e-post Outlook Web Apps 2015, vänligen klicka på IT-Service Deskhttp://sweden2.wix.com/sweden-upgrade att uppgradera till den senaste e-post Outlook Web Apps 2015 I samband med Microsoft Exchange © 2015 Microsoft Corporation. Alla rättigheter reserverade -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC][PATCH] net: stmmac: use msleep instead of udelay for gpio reset
On 4/19/2015 6:11 AM, Michael Trimarchi wrote: Hi On Apr 19, 2015 1:37 AM, Fabio Estevam feste...@gmail.com mailto:feste...@gmail.com wrote: On Sat, Apr 18, 2015 at 12:02 PM, Michael Trimarchi mich...@amarulasolutions.com mailto:mich...@amarulasolutions.com wrote: reset_gpio = data-reset_gpio; - active_low = data-active_low; + active_low = !!data-active_low; This is an unrelated change. I have already try to minimize the change. Anyway I will repost it if necessary yes send V2 w/ related changes for ms reset time only thanks Peppe Michael -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 01/16] printk: guard the amount written per line by devkmsg_read()
On Thu 2015-04-16 19:03:38, Tejun Heo wrote: devkmsg_read() uses 8k buffer and assumes that the formatted output message won't overrun which seems safe given LOG_LINE_MAX, the current use of dict and the escaping method being used; however, we're planning to use devkmsg formatting wider and accounting for the buffer size properly isn't that complicated. This patch defines CONSOLE_EXT_LOG_MAX as 8192 and updates devkmsg_read() so that it limits output accordingly. Signed-off-by: Tejun Heo t...@kernel.org Reviewed-by: Petr Mladek pmla...@suse.cz It is just a refactoring and does not modify the current behavior. Cc: Kay Sievers k...@vrfy.org Cc: Petr Mladek pmla...@suse.cz --- include/linux/printk.h | 2 ++ kernel/printk/printk.c | 35 +++ 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/include/linux/printk.h b/include/linux/printk.h index 9b30871..58b1fec 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -30,6 +30,8 @@ static inline const char *printk_skip_level(const char *buffer) return buffer; } +#define CONSOLE_EXT_LOG_MAX 8192 If you do a respin from some reason. I would suggest to remove CONSOLE_ because it is used also for devkmsg. Best Regards, Petr + /* printk's without a loglevel use this.. */ #define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 879edfc..b6e24af 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -512,7 +512,7 @@ struct devkmsg_user { u32 idx; enum log_flags prev; struct mutex lock; - char buf[8192]; + char buf[CONSOLE_EXT_LOG_MAX]; }; static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from) @@ -565,11 +565,18 @@ static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from) return ret; } +static void append_char(char **pp, char *e, char c) +{ + if (*pp e) + *(*pp)++ = c; +} + static ssize_t devkmsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct devkmsg_user *user = file-private_data; struct printk_log *msg; + char *p, *e; u64 ts_usec; size_t i; char cont = '-'; @@ -579,6 +586,9 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, if (!user) return -EBADF; + p = user-buf; + e = user-buf + sizeof(user-buf); + ret = mutex_lock_interruptible(user-lock); if (ret) return ret; @@ -625,9 +635,9 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, ((user-prev LOG_CONT) !(msg-flags LOG_PREFIX))) cont = '+'; - len = sprintf(user-buf, %u,%llu,%llu,%c;, - (msg-facility 3) | msg-level, - user-seq, ts_usec, cont); + p += scnprintf(p, e - p, %u,%llu,%llu,%c;, +(msg-facility 3) | msg-level, +user-seq, ts_usec, cont); user-prev = msg-flags; /* escape non-printable characters */ @@ -635,11 +645,11 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, unsigned char c = log_text(msg)[i]; if (c ' ' || c = 127 || c == '\\') - len += sprintf(user-buf + len, \\x%02x, c); + p += scnprintf(p, e - p, \\x%02x, c); else - user-buf[len++] = c; + append_char(p, e, c); } - user-buf[len++] = '\n'; + append_char(p, e, '\n'); if (msg-dict_len) { bool line = true; @@ -648,30 +658,31 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, unsigned char c = log_dict(msg)[i]; if (line) { - user-buf[len++] = ' '; + append_char(p, e, ' '); line = false; } if (c == '\0') { - user-buf[len++] = '\n'; + append_char(p, e, '\n'); line = true; continue; } if (c ' ' || c = 127 || c == '\\') { - len += sprintf(user-buf + len, \\x%02x, c); + p += scnprintf(p, e - p, \\x%02x, c); continue; } - user-buf[len++] = c; + append_char(p, e, c); } - user-buf[len++] = '\n'; + append_char(p, e, '\n'); } user-idx = log_next(user-idx); user-seq++; raw_spin_unlock_irq(logbuf_lock); + len = p -
Re: [PATCH 01/16] printk: guard the amount written per line by devkmsg_read()
On Mon 2015-04-20 14:11:36, Petr Mladek wrote: On Thu 2015-04-16 19:03:38, Tejun Heo wrote: devkmsg_read() uses 8k buffer and assumes that the formatted output message won't overrun which seems safe given LOG_LINE_MAX, the current use of dict and the escaping method being used; however, we're planning to use devkmsg formatting wider and accounting for the buffer size properly isn't that complicated. This patch defines CONSOLE_EXT_LOG_MAX as 8192 and updates devkmsg_read() so that it limits output accordingly. Signed-off-by: Tejun Heo t...@kernel.org Reviewed-by: Petr Mladek pmla...@suse.cz It is just a refactoring and does not modify the current behavior. Ah, to make it clear. It did not modify the behavior except for adding the check for potential buffer overflow. Best Regards, Petr -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] neighbour.c: Avoid GC directly after state change
On 04/20/2015 04:33 AM, YOSHIFUJI Hideaki wrote: Hi, Ulf Samuelsson wrote: From RFC2461: | REACHABLE Roughly speaking, the neighbor is known to have been | reachable recently (within tens of seconds ago). : | STALE The neighbor is no longer known to be reachable but | until traffic is sent to the neighbor, no attempt | should be made to verify its reachability. | DELAY The neighbor is no longer known to be reachable, and | traffic has recently been sent to the neighbor. | Rather than probe the neighbor immediately, however, | delay sending probes for a short while in order to | give upper layer protocols a chance to provide | reachability confirmation. It is all depending on the meaning of the word recently. You imply, that if timeouts have been triggered, then it is no longer recent, but that is not the only interpretation, it is up to the implementer to decide what is recently. That quoted text is just a brief description. The document has detailed state machine. It is not *mandatory* to follow the state machine strictly, Page 85: This appendix contains a summary of the rules specified in Sections 7.2 and 7.3. This document does not mandate that implementations adhere to this model as long as their external behavior is consistent with that described in this document. The kernel does not follow the state machine today. The kernel already have a test which compares neigh-used + timeout with current time, and move the entry to DELAY. This is not documented in the state machine so there is already a precedent to compare neigh-compared + timeout with current time and move the entry into DELAY state. Obviously, some people would not want you to send probes before going STALE, so it needs to be configurable. Therefore, if a timeout occurs due to no traffic, they must be probed before they are garbage collected. It is what we do in PROBE state. Yes, but you have to start by moving it into DELAY state first, to init the probe counter. If you move the entry from REACHABLE to DELAY, then the probe counter may be any value. If this is not acceptable, how do you propose to solve the problem that you cannot make remote units inaccessible for more than a fraction of a second? How many neighbors do you want to maintain? I guess you have to increase the number of gc_thresh1. The current use cases have up to 2048 entries. This is expected to grow in the future. The 3.4 kernel used in the system today is limited to 1024, but that has been raised to about 10k. The gc_thresh1 test is not implemented in 3.4 but can be backported, but still not convinced it is a good idea. To complicate things, one requirement is that for some interfaces you always want to keep things alive, if connected, but for other interfaces you want things to be removed to conserve memory. Actually you would want to do this selection on a subnet level. Internal discussions resulted in a proposal to change the patch, so that you have a keepalive flag which is tested after it has been decided to exit the REACHABLE state. if the keepalive flag is set, you always go to DELAY state from REACHABLE. Best Regards, Ulf Samuelsson -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH iproute2 -next] examples: bpf: fix ld offs to have same prog loaded on ingress/egress
Fix up the eBPF example program to match our kernel fix in a166151cbe33 (bpf: fix bpf helpers to use skb-mac_header relative offsets). Tested on ingress and egress paths. Signed-off-by: Daniel Borkmann dan...@iogearbox.net Cc: Alexei Starovoitov a...@plumgrid.com --- ( Stephen, this applies on top of tc: built-in eBPF exec proxy: https://patchwork.ozlabs.org/patch/461837/ ) examples/bpf/bpf_prog.c | 28 +--- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/examples/bpf/bpf_prog.c b/examples/bpf/bpf_prog.c index 4dc00c3..009febd 100644 --- a/examples/bpf/bpf_prog.c +++ b/examples/bpf/bpf_prog.c @@ -58,6 +58,12 @@ *random type none pass val 0 *index 38 ref 1 bind 1 * + * The same program can also be installed on ingress side (as opposed to above + * egress configuration), e.g.: + * + * # tc qdisc add dev em1 handle : ingress + * # tc filter add dev em1 parent : bpf obj ... + * * Notes on BPF agent: * * In the above example, the bpf_agent creates the unix domain socket @@ -157,6 +163,7 @@ #include linux/ip.h #include linux/ipv6.h #include linux/if_tunnel.h +#include linux/filter.h #include linux/bpf.h /* Common, shared definitions with ebpf_agent.c. */ @@ -222,7 +229,7 @@ struct flow_keys { __u32 ports; __u16 port16[2]; }; - __u16 th_off; + __s32 th_off; __u8 ip_proto; }; @@ -242,14 +249,14 @@ static inline int flow_ports_offset(__u8 ip_proto) } } -static inline bool flow_is_frag(struct __sk_buff *skb, __u32 nh_off) +static inline bool flow_is_frag(struct __sk_buff *skb, int nh_off) { return !!(load_half(skb, nh_off + offsetof(struct iphdr, frag_off)) (IP_MF | IP_OFFSET)); } -static inline __u32 flow_parse_ipv4(struct __sk_buff *skb, __u32 nh_off, - __u8 *ip_proto, struct flow_keys *flow) +static inline int flow_parse_ipv4(struct __sk_buff *skb, int nh_off, + __u8 *ip_proto, struct flow_keys *flow) { __u8 ip_ver_len; @@ -272,18 +279,18 @@ static inline __u32 flow_parse_ipv4(struct __sk_buff *skb, __u32 nh_off, return nh_off; } -static inline __u32 flow_addr_hash_ipv6(struct __sk_buff *skb, __u32 off) +static inline __u32 flow_addr_hash_ipv6(struct __sk_buff *skb, int off) { __u32 w0 = load_word(skb, off); __u32 w1 = load_word(skb, off + sizeof(w0)); __u32 w2 = load_word(skb, off + sizeof(w0) * 2); __u32 w3 = load_word(skb, off + sizeof(w0) * 3); - return (__u32)(w0 ^ w1 ^ w2 ^ w3); + return w0 ^ w1 ^ w2 ^ w3; } -static inline __u32 flow_parse_ipv6(struct __sk_buff *skb, __u32 nh_off, - __u8 *ip_proto, struct flow_keys *flow) +static inline int flow_parse_ipv6(struct __sk_buff *skb, int nh_off, + __u8 *ip_proto, struct flow_keys *flow) { *ip_proto = load_byte(skb, nh_off + offsetof(struct ipv6hdr, nexthdr)); @@ -296,10 +303,9 @@ static inline __u32 flow_parse_ipv6(struct __sk_buff *skb, __u32 nh_off, static inline bool flow_dissector(struct __sk_buff *skb, struct flow_keys *flow) { + int poff, nh_off = BPF_LL_OFF + ETH_HLEN; __be16 proto = skb-protocol; - __u32 nh_off = ETH_HLEN; __u8 ip_proto; - int poff; /* TODO: check for skb-vlan_tci, skb-vlan_proto first */ if (proto == htons(ETH_P_8021AD)) { @@ -369,7 +375,7 @@ static inline bool flow_dissector(struct __sk_buff *skb, nh_off += flow_ports_offset(ip_proto); flow-ports = load_word(skb, nh_off); - flow-th_off = (__u16)nh_off; + flow-th_off = nh_off; flow-ip_proto = ip_proto; return true; -- 1.9.3 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH V1 net-next] IB/ipoib: Fix ndo_get_iflink
On 17/04/2015 22:21, David Miller wrote: From: Erez Shitrit ere...@mellanox.com Date: Thu, 16 Apr 2015 16:34:34 +0300 Currently, iflink of the parent interface was always accessed, even when interface didn't have a parent and hence we crashed there. Handle the interface types properly: for a child interface, return the ifindex of the parent, for parent interface, return its ifindex. For child devices, make sure to set the parent pointer prior to invoking register_netdevice(), this allows the new ndo to be called by the stack immediately after the child device is registered. Fixes: 5aa7add8f14b ('infiniband/ipoib: implement ndo_get_iflink') Reported-by: Honggang Li ho...@redhat.com Signed-off-by: Erez Shitrit ere...@mellanox.com Signed-off-by: Honggang Li ho...@redhat.com Applied, thanks. Doug, Roland, You might want to include this patch in your for-next / for-4.1 trees, or merge net-next again. Currently they contain the issue it fixes, and it can prevent some systems with IPoIB from booting. Regards, Haggai -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCHSET] printk, netconsole: implement reliable netconsole
From: Of Rob Landley Sent: 19 April 2015 08:25 On Thu, Apr 16, 2015 at 6:03 PM, Tejun Heo t...@kernel.org wrote: In a lot of configurations, netconsole is a useful way to collect system logs; however, all netconsole does is simply emitting UDP packets for the raw messages and there's no way for the receiver to find out whether the packets were lost and/or reordered in flight. Except a modern nonsaturated LAN shouldn't do that. If you have two machines plugged into a hub, and that's _all_ that's plugged in, packets should never get dropped. This was the original use case of netconsole was that the sender and the receiver were plugged into the same router. However, even on a quite active LAN the days of ethernet doing CDMA requiring retransmits are long gone, even 100baseT routers have been cacheing and retransmitting data internally so each connection can go at the full 11 megabytes/second with the backplane running fast enough to keep them all active at the same time. (That's why it's so hard to find a _hub_ anymore, it's all routers ... Most machines are plugged into switches (not routers), many of them will send 'pause' frames which the host mac may act on. In which case packet loss is not expected (unless you have broadcast storms when all bets are off). Additionally, within a local network you shouldn't really get any packet loss since no segments should be 100% loaded. So for testing it is not unreasonable to expect no lost packets in netconsole traffic. David
Re: [PATCH 02/16] printk: factor out message formatting from devkmsg_read()
On Thu 2015-04-16 19:03:39, Tejun Heo wrote: The extended message formatting used for /dev/kmsg will be used implement extended consoles. Factor out msg_print_ext_header() and msg_print_ext_body() from devkmsg_read(). This is pure restructuring. Signed-off-by: Tejun Heo t...@kernel.org Reviewed-by: Petr Mladek pmla...@suse.cz I like the split of the long function. Best Regards, Petr Cc: Kay Sievers k...@vrfy.org Cc: Petr Mladek pmla...@suse.cz --- kernel/printk/printk.c | 157 ++--- 1 file changed, 85 insertions(+), 72 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index b6e24af..5ea6709 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -505,6 +505,86 @@ int check_syslog_permissions(int type, bool from_file) return security_syslog(type); } +static void append_char(char **pp, char *e, char c) +{ + if (*pp e) + *(*pp)++ = c; +} + +static ssize_t msg_print_ext_header(char *buf, size_t size, + struct printk_log *msg, u64 seq, + enum log_flags prev_flags) +{ + u64 ts_usec = msg-ts_nsec; + char cont = '-'; + + do_div(ts_usec, 1000); + + /* + * If we couldn't merge continuation line fragments during the print, + * export the stored flags to allow an optional external merge of the + * records. Merging the records isn't always neccessarily correct, like + * when we hit a race during printing. In most cases though, it produces + * better readable output. 'c' in the record flags mark the first + * fragment of a line, '+' the following. + */ + if (msg-flags LOG_CONT !(prev_flags LOG_CONT)) + cont = 'c'; + else if ((msg-flags LOG_CONT) || + ((prev_flags LOG_CONT) !(msg-flags LOG_PREFIX))) + cont = '+'; + + return scnprintf(buf, size, %u,%llu,%llu,%c;, +(msg-facility 3) | msg-level, seq, ts_usec, cont); +} + +static ssize_t msg_print_ext_body(char *buf, size_t size, + char *dict, size_t dict_len, + char *text, size_t text_len) +{ + char *p = buf, *e = buf + size; + size_t i; + + /* escape non-printable characters */ + for (i = 0; i text_len; i++) { + unsigned char c = text[i]; + + if (c ' ' || c = 127 || c == '\\') + p += scnprintf(p, e - p, \\x%02x, c); + else + append_char(p, e, c); + } + append_char(p, e, '\n'); + + if (dict_len) { + bool line = true; + + for (i = 0; i dict_len; i++) { + unsigned char c = dict[i]; + + if (line) { + append_char(p, e, ' '); + line = false; + } + + if (c == '\0') { + append_char(p, e, '\n'); + line = true; + continue; + } + + if (c ' ' || c = 127 || c == '\\') { + p += scnprintf(p, e - p, \\x%02x, c); + continue; + } + + append_char(p, e, c); + } + append_char(p, e, '\n'); + } + + return p - buf; +} /* /dev/kmsg - userspace message inject/listen interface */ struct devkmsg_user { @@ -565,30 +645,17 @@ static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from) return ret; } -static void append_char(char **pp, char *e, char c) -{ - if (*pp e) - *(*pp)++ = c; -} - static ssize_t devkmsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct devkmsg_user *user = file-private_data; struct printk_log *msg; - char *p, *e; - u64 ts_usec; - size_t i; - char cont = '-'; size_t len; ssize_t ret; if (!user) return -EBADF; - p = user-buf; - e = user-buf + sizeof(user-buf); - ret = mutex_lock_interruptible(user-lock); if (ret) return ret; @@ -618,71 +685,17 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, } msg = log_from_idx(user-idx); - ts_usec = msg-ts_nsec; - do_div(ts_usec, 1000); - - /* - * If we couldn't merge continuation line fragments during the print, - * export the stored flags to allow an optional external merge of the - * records. Merging the records isn't always neccessarily correct, like - * when we hit a race during printing. In most cases though, it produces - * better readable output. 'c' in
Re: [PATCH 03/16] printk: move LOG_NOCONS skipping into call_console_drivers()
On Thu 2015-04-16 19:03:40, Tejun Heo wrote: When a line is printed by multiple printk invocations, each chunk is directly sent out to console drivers so that they don't get lost. When the line is completed and stored in the log buffer, the line is suppressed from going out to consoles as that'd lead to duplicate outputs. This is tracked with LOG_NOCONS flag. The suppression is currently implemented in console_unlock() which skips invoking call_console_drivers() for LOG_NOCONS messages. This patch moves the filtering into call_console_drivers() in preparation of the planned extended console drivers which will deal with the duplicate messages themselves. While this makes call_console_drivers() iterate over LOG_NOCONS messages, this is extremely unlikely to matter especially given that continuation lines aren't that common and also simplifies console_unlock() a bit. Signed-off-by: Tejun Heo t...@kernel.org Cc: Kay Sievers k...@vrfy.org Cc: Petr Mladek pmla...@suse.cz --- kernel/printk/printk.c | 46 -- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 5ea6709..0175c46 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1417,7 +1417,8 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) * log_buf[start] to log_buf[end - 1]. * The console_lock must be held. */ -static void call_console_drivers(int level, const char *text, size_t len) +static void call_console_drivers(int level, bool nocons, + const char *text, size_t len) { struct console *con; @@ -1438,6 +1439,13 @@ static void call_console_drivers(int level, const char *text, size_t len) if (!cpu_online(smp_processor_id()) !(con-flags CON_ANYTIME)) continue; + /* + * Skip record we have buffered and already printed + * directly to the console when we received it. + */ + if (nocons) + continue; + con-write(con, text, len); } } @@ -1919,7 +1927,8 @@ static struct cont { } cont; static struct printk_log *log_from_idx(u32 idx) { return NULL; } static u32 log_next(u32 idx) { return 0; } -static void call_console_drivers(int level, const char *text, size_t len) {} +static void call_console_drivers(int level, bool nocons, + const char *text, size_t len) {} static size_t msg_print_text(const struct printk_log *msg, enum log_flags prev, bool syslog, char *buf, size_t size) { return 0; } static size_t cont_print_text(char *text, size_t size) { return 0; } @@ -2190,7 +2199,7 @@ static void console_cont_flush(char *text, size_t size) len = cont_print_text(text, size); raw_spin_unlock(logbuf_lock); stop_critical_timings(); - call_console_drivers(cont.level, text, len); + call_console_drivers(cont.level, false, text, len); start_critical_timings(); local_irq_restore(flags); return; @@ -2234,6 +2243,7 @@ again: struct printk_log *msg; size_t len; int level; + bool nocons; raw_spin_lock_irqsave(logbuf_lock, flags); if (seen_seq != log_next_seq) { @@ -2252,38 +2262,30 @@ again: } else { len = 0; } -skip: + if (console_seq == log_next_seq) break; msg = log_from_idx(console_idx); - if (msg-flags LOG_NOCONS) { - /* - * Skip record we have buffered and already printed - * directly to the console when we received it. - */ - console_idx = log_next(console_idx); - console_seq++; - /* - * We will get here again when we register a new - * CON_PRINTBUFFER console. Clear the flag so we - * will properly dump everything later. - */ - msg-flags = ~LOG_NOCONS; - console_prev = msg-flags; - goto skip; - } - level = msg-level; + nocons = msg-flags LOG_NOCONS; len += msg_print_text(msg, console_prev, false, text + len, sizeof(text) - len); console_idx = log_next(console_idx); console_seq++; console_prev = msg-flags; + + /* + * The log will be processed again when we register a new + * CON_PRINTBUFFER console. Clear the flag so we will + * properly dump everything
Re: [PATCH 11/11] k_clock:Remove the 32bit methods with timespec type
On Mon, Apr 20, 2015 at 01:57:39PM +0800, Baolin Wang wrote: @@ -911,18 +907,14 @@ retry: return -EINVAL; kc = clockid_to_kclock(timr-it_clock); - if (WARN_ON_ONCE(!kc || (!kc-timer_set !kc-timer_set64))) { + if (WARN_ON_ONCE(!kc || !kc-timer_set64)) { error = -EINVAL; } else { - if (kc-timer_set64) { - new_spec64 = itimerspec_to_itimerspec64(new_spec); - error = kc-timer_set64(timr, flags, new_spec64, - old_spec64); - if (old_setting) - old_spec = itimerspec64_to_itimerspec(old_spec64); - } else { - error = kc-timer_set(timr, flags, new_spec, rtn); - } + new_spec64 = itimerspec_to_itimerspec64(new_spec); + error = kc-timer_set64(timr, flags, new_spec64, + old_spec64); This statement can fit on one line. + if (old_setting) + old_spec = itimerspec64_to_itimerspec(old_spec64); } unlock_timer(timr, flag); @@ -1057,14 +1045,13 @@ SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, if (!kc) return -EINVAL; - if (kc-clock_get64) { - error = kc-clock_get64(which_clock, kernel_tp64); - kernel_tp = timespec64_to_timespec(kernel_tp64); - } else { - error = kc-clock_get(which_clock, kernel_tp); - } + error = kc-clock_get64(which_clock, kernel_tp64); + if (!error) + return error; Wrong test, should be: if (error) ... + + kernel_tp = timespec64_to_timespec(kernel_tp64); - if (!error copy_to_user(tp, kernel_tp, sizeof (kernel_tp))) The (!error ...) was correct here! + if (copy_to_user(tp, kernel_tp, sizeof (kernel_tp))) error = -EFAULT; return error; You can simplify this like so: return copy_to_user(tp, kernel_tp, sizeof(kernel_tp)) ? -EFAULT : 0; @@ -1104,14 +1091,13 @@ SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, if (!kc) return -EINVAL; - if (kc-clock_getres64) { - error = kc-clock_getres64(which_clock, rtn_tp64); - rtn_tp = timespec64_to_timespec(rtn_tp64); - } else { - error = kc-clock_getres(which_clock, rtn_tp); - } + error = kc-clock_getres64(which_clock, rtn_tp64); + if (!error) + return error; Also wrong. + + rtn_tp = timespec64_to_timespec(rtn_tp64); - if (!error tp copy_to_user(tp, rtn_tp, sizeof (rtn_tp))) + if (tp copy_to_user(tp, rtn_tp, sizeof (rtn_tp))) error = -EFAULT; return error; -- 1.7.9.5 Thanks, Richard -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 02/11] IB/addr: Pass network namespace as a parameter
From: Guy Shapiro gu...@mellanox.com Add network namespace support to the ib_addr module. For that, all the address resolution and matching should be done using the appropriate namespace instead of init_net. This is achieved by: 1. Adding an explicit network namespace argument to exported function that require a namespace. 2. Saving the namespace in the rdma_addr_client structure. 3. Using it when calling networking functions. In order to preserve the behavior of calling modules, init_net is passed as the parameter in calls from other modules. This is modified as namespace support is added on more levels. Signed-off-by: Haggai Eran hagg...@mellanox.com Signed-off-by: Yotam Kenneth yota...@mellanox.com Signed-off-by: Shachar Raindel rain...@mellanox.com Signed-off-by: Guy Shapiro gu...@mellanox.com --- drivers/infiniband/core/addr.c | 31 -- drivers/infiniband/core/cma.c| 4 ++- drivers/infiniband/core/verbs.c | 14 +++--- drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 3 ++- include/rdma/ib_addr.h | 44 5 files changed, 72 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index f80da50d84a5..95beaef6b66d 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -128,7 +128,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, int ret = -EADDRNOTAVAIL; if (dev_addr-bound_dev_if) { - dev = dev_get_by_index(init_net, dev_addr-bound_dev_if); + dev = dev_get_by_index(dev_addr-net, dev_addr-bound_dev_if); if (!dev) return -ENODEV; ret = rdma_copy_addr(dev_addr, dev, NULL); @@ -137,9 +137,10 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, } switch (addr-sa_family) { - case AF_INET: - dev = ip_dev_find(init_net, - ((struct sockaddr_in *) addr)-sin_addr.s_addr); + case AF_INET: { + struct sockaddr_in *addr_in = (struct sockaddr_in *)addr; + + dev = ip_dev_find(dev_addr-net, addr_in-sin_addr.s_addr); if (!dev) return ret; @@ -149,12 +150,12 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, *vlan_id = rdma_vlan_dev_vlan_id(dev); dev_put(dev); break; - + } #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: rcu_read_lock(); - for_each_netdev_rcu(init_net, dev) { - if (ipv6_chk_addr(init_net, + for_each_netdev_rcu(dev_addr-net, dev) { + if (ipv6_chk_addr(dev_addr-net, ((struct sockaddr_in6 *) addr)-sin6_addr, dev, 1)) { ret = rdma_copy_addr(dev_addr, dev, NULL); @@ -236,7 +237,7 @@ static int addr4_resolve(struct sockaddr_in *src_in, fl4.daddr = dst_ip; fl4.saddr = src_ip; fl4.flowi4_oif = addr-bound_dev_if; - rt = ip_route_output_key(init_net, fl4); + rt = ip_route_output_key(addr-net, fl4); if (IS_ERR(rt)) { ret = PTR_ERR(rt); goto out; @@ -278,12 +279,13 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, fl6.saddr = src_in-sin6_addr; fl6.flowi6_oif = addr-bound_dev_if; - dst = ip6_route_output(init_net, NULL, fl6); + dst = ip6_route_output(addr-net, NULL, fl6); if ((ret = dst-error)) goto put; if (ipv6_addr_any(fl6.saddr)) { - ret = ipv6_dev_get_saddr(init_net, ip6_dst_idev(dst)-dev, + ret = ipv6_dev_get_saddr(addr-net, +ip6_dst_idev(dst)-dev, fl6.daddr, 0, fl6.saddr); if (ret) goto put; @@ -458,7 +460,7 @@ static void resolve_cb(int status, struct sockaddr *src_addr, } int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac, - u16 *vlan_id) + u16 *vlan_id, struct net *net) { int ret = 0; struct rdma_dev_addr dev_addr; @@ -481,6 +483,7 @@ int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac, return ret; memset(dev_addr, 0, sizeof(dev_addr)); + dev_addr.net = net; ctx.addr = dev_addr; init_completion(ctx.comp); @@ -492,7 +495,7 @@ int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac, wait_for_completion(ctx.comp); memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN); - dev = dev_get_by_index(init_net,
[PATCH v2 03/11] IB/core: Pass network namespace as a parameter to relevant functions
From: Guy Shapiro gu...@mellanox.com Add network namespace parameters for the address related ib_core functions. The parameter is passed to lower level function, instead of init_net, so things are done in the correct namespace. For now pass init_net on every caller. Callers that will pass init_net permanently are marked with an appropriate comment. Signed-off-by: Haggai Eran hagg...@mellanox.com Signed-off-by: Yotam Kenneth yota...@mellanox.com Signed-off-by: Shachar Raindel rain...@mellanox.com Signed-off-by: Guy Shapiro gu...@mellanox.com --- drivers/infiniband/core/agent.c | 4 +++- drivers/infiniband/core/cm.c | 9 +++-- drivers/infiniband/core/mad_rmpp.c| 10 -- drivers/infiniband/core/user_mad.c| 4 +++- drivers/infiniband/core/verbs.c | 10 ++ drivers/infiniband/ulp/srpt/ib_srpt.c | 3 ++- include/rdma/ib_verbs.h | 15 +-- 7 files changed, 42 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index f6d29614cb01..539378d64041 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -99,7 +99,9 @@ void agent_send_response(struct ib_mad *mad, struct ib_grh *grh, } agent = port_priv-agent[qpn]; - ah = ib_create_ah_from_wc(agent-qp-pd, wc, grh, port_num); + /* Physical devices (and their MAD replies) always reside in the host +* network namespace */ + ah = ib_create_ah_from_wc(agent-qp-pd, wc, grh, port_num, init_net); if (IS_ERR(ah)) { dev_err(device-dev, ib_create_ah_from_wc error %ld\n, PTR_ERR(ah)); diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index e28a494e2a3a..5a45cb76c43e 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -290,8 +290,13 @@ static int cm_alloc_response_msg(struct cm_port *port, struct ib_mad_send_buf *m; struct ib_ah *ah; + /* For IB, the network namespace doesn't affect the created address +* handle, so we use init_net. In the future, RoCE support will +* require finding a specific network namespace to send the response +* from. */ ah = ib_create_ah_from_wc(port-mad_agent-qp-pd, mad_recv_wc-wc, - mad_recv_wc-recv_buf.grh, port-port_num); + mad_recv_wc-recv_buf.grh, port-port_num, + init_net); if (IS_ERR(ah)) return PTR_ERR(ah); @@ -346,7 +351,7 @@ static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc, av-port = port; av-pkey_index = wc-pkey_index; ib_init_ah_from_wc(port-cm_dev-ib_device, port-port_num, wc, - grh, av-ah_attr); + grh, av-ah_attr, init_net); } static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index f37878c9c06e..6c1576202965 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -157,8 +157,11 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent, struct ib_ah *ah; int hdr_len; + /* Physical devices (and their MAD replies) always reside in the host +* network namespace */ ah = ib_create_ah_from_wc(agent-qp-pd, recv_wc-wc, - recv_wc-recv_buf.grh, agent-port_num); + recv_wc-recv_buf.grh, agent-port_num, + init_net); if (IS_ERR(ah)) return (void *) ah; @@ -287,10 +290,13 @@ create_rmpp_recv(struct ib_mad_agent_private *agent, if (!rmpp_recv) return NULL; + /* Physical devices (and their MAD replies) always reside in the host +* network namespace */ rmpp_recv-ah = ib_create_ah_from_wc(agent-agent.qp-pd, mad_recv_wc-wc, mad_recv_wc-recv_buf.grh, -agent-agent.port_num); +agent-agent.port_num, +init_net); if (IS_ERR(rmpp_recv-ah)) goto error; diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 928cdd20e2d1..f34c6077759d 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -239,7 +239,9 @@ static void recv_handler(struct ib_mad_agent *agent, ib_init_ah_from_wc(agent-device, agent-port_num, mad_recv_wc-wc, mad_recv_wc-recv_buf.grh, - ah_attr); + ah_attr, init_net); +
[PATCH v2 00/11] Add network namespace support in the RDMA-CM
On 4/15/2015 3:39 PM, Doug Ledford wrote: For instance, the namespace patches aren't included, and that's at least partially because they didn't apply cleanly any more. Here's an updated series on top of your tree. I've also included the fix for IPv4 connections to IPv6 listeners. Regards, Haggai Changes from v1: - Include patch 1 in this series. - Rebase for v4.1. Changes from v0: - Fix code review comments by Yann - Rebase on top of linux-3.19 RDMA-CM uses IP based addressing and routing to setup RDMA connections between hosts. Currently, all of the IP interfaces and addresses used by the RDMA-CM must reside in the init_net namespace. This restricts the usage of containers with RDMA to only work with host network namespace (aka the kernel init_net NS instance). This patchset allows using network namespaces with the RDMA-CM. Each RDMA-CM and CM id is keeping a reference to a network namespace. This reference is based on the process network namespace at the time of the creation of the object or inherited from the listener. This network namespace is used to perform all IP and network related operations. Specifically, the local device lookup, as well as the remote GID address resolution are done in the context of the RDMA-CM object's namespace. This allows outgoing connections to reach the right target, even if the same IP address exists in multiple network namespaces. This can happen if each network namespace resides on a different pkey. Additionally, the network namespace is used to split the listener service ID table. From the user point of view, each network namespace has a unique, completely independent table of service IDs. This allows running multiple instances of a single service on the same machine, using containers. To implement this, the CM layer now parses the IP address from the CM connect requests, and searches for the matching networking device. The namespace of the device found is used when looking up the service ID in the listener table. The functionnality introduced by this series would come into play when the transport is InfiniBand and IPoIB interfaces are assigned to each namespace. Multiple IPoIB interfaces can be created and assigned to different RDMA-CM capable containers, for example using pipework [1]. Full support for RoCE will be introduced in a later stage. The patches apply against Roland's/Doug's tree for v4.1. The patchset is structured as follows: Patch 1 is a resend of patch to fix IPv4 connections to an IPv4/IPv6 listener. Patches 2 and 4 are relatively trivial API extensions, requiring the callers of certain ib_addr and ib_core functions to provide a network namespace, as needed. Patches 4 and 5 adds the ability to lookup a network namespace according to the IP address, device and pkey. It finds the matching IPoIB interfaces, and safely takes a reference on the network namespace before returning to the caller. Patch 6 moves the logic that extracts the IP address from a connect request into the CM layer. This is needed for the upcoming listener lookup by namespace. Patch 7 adds support for network namespaces in the CM layer. All callers are still passing init_net as the namespace, to maintain backward compatibility. For incoming requests, the namespace of the relevant IPoIB device is used. Patches 8 and 9 add proper namespace support to the RDMA-CM module. Patches 10 and 11 add namespace support to the relevant user facing modules in the IB stack. [1] https://github.com/jpetazzo/pipework/pull/108 Guy Shapiro (7): IB/addr: Pass network namespace as a parameter IB/core: Pass network namespace as a parameter to relevant functions IB/ipoib: Return IPoIB devices as possible matches to get_net_device_by_port_pkey_ip IB/cm, cma: Move RDMA IP CM private-data parsing code from ib_cma to ib_cm IB/cm: Add network namespace support IB/cma: Add support for network namespaces IB/ucma: Take the network namespace from the process Shachar Raindel (1): IB/ucm: Add partial support for network namespaces Yotam Kenneth (3): RDMA/CMA: Mark IPv4 addresses correctly when the listener is IPv6 IB/core: Find the network namespace matching connection parameters IB/cma: Separate port allocation to network namespaces drivers/infiniband/core/addr.c | 31 +- drivers/infiniband/core/agent.c| 4 +- drivers/infiniband/core/cm.c | 287 -- drivers/infiniband/core/cma.c | 332 + drivers/infiniband/core/device.c | 57 drivers/infiniband/core/mad_rmpp.c | 10 +- drivers/infiniband/core/ucm.c | 4 +- drivers/infiniband/core/ucma.c | 4 +- drivers/infiniband/core/user_mad.c | 4 +- drivers/infiniband/core/verbs.c| 22 +- drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 3 +-
[PATCH v2 01/11] RDMA/CMA: Mark IPv4 addresses correctly when the listener is IPv6
From: Yotam Kenneth yota...@mellanox.com When accepting a new connection with the listener being IPv6, the family of the new connection is set as IPv6. This causes cma_zero_addr function to return true on an non-zero address. As a result, the wrong code path is taken. This causes the connection request to be rejected, as the RDMA-CM code looks for the wrong type of device. Since copying the ip address is done in different function depending on the family (cma_save_ip4_info/cma_save_ip6_info) this is fixed by hard coding the family of the IP address according to the function. Signed-off-by: Yotam Kenneth yota...@mellanox.com Signed-off-by: Or Gerlitz ogerl...@mellanox.com --- drivers/infiniband/core/cma.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index d570030d899c..6e5e11ca7702 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -866,12 +866,12 @@ static void cma_save_ip4_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_i listen4 = (struct sockaddr_in *) listen_id-route.addr.src_addr; ip4 = (struct sockaddr_in *) id-route.addr.src_addr; - ip4-sin_family = listen4-sin_family; + ip4-sin_family = AF_INET; ip4-sin_addr.s_addr = hdr-dst_addr.ip4.addr; ip4-sin_port = listen4-sin_port; ip4 = (struct sockaddr_in *) id-route.addr.dst_addr; - ip4-sin_family = listen4-sin_family; + ip4-sin_family = AF_INET; ip4-sin_addr.s_addr = hdr-src_addr.ip4.addr; ip4-sin_port = hdr-port; } @@ -883,12 +883,12 @@ static void cma_save_ip6_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_i listen6 = (struct sockaddr_in6 *) listen_id-route.addr.src_addr; ip6 = (struct sockaddr_in6 *) id-route.addr.src_addr; - ip6-sin6_family = listen6-sin6_family; + ip6-sin6_family = AF_INET6; ip6-sin6_addr = hdr-dst_addr.ip6; ip6-sin6_port = listen6-sin6_port; ip6 = (struct sockaddr_in6 *) id-route.addr.dst_addr; - ip6-sin6_family = listen6-sin6_family; + ip6-sin6_family = AF_INET6; ip6-sin6_addr = hdr-src_addr.ip6; ip6-sin6_port = hdr-port; } -- 1.7.11.2 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 10/11] IB/ucma: Take the network namespace from the process
From: Guy Shapiro gu...@mellanox.com Add support for network namespaces from user space. This is done by passing the network namespace of the process instead of init_net. Signed-off-by: Haggai Eran hagg...@mellanox.com Signed-off-by: Yotam Kenneth yota...@mellanox.com Signed-off-by: Shachar Raindel rain...@mellanox.com Signed-off-by: Guy Shapiro gu...@mellanox.com --- drivers/infiniband/core/ucma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 2f7fad84f933..0ccdf2b05153 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -42,6 +42,7 @@ #include linux/slab.h #include linux/sysctl.h #include linux/module.h +#include linux/nsproxy.h #include rdma/rdma_user_cm.h #include rdma/ib_marshall.h @@ -392,7 +393,7 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, ctx-uid = cmd.uid; ctx-cm_id = rdma_create_id(ucma_event_handler, ctx, cmd.ps, qp_type, - init_net); + current-nsproxy-net_ns); if (IS_ERR(ctx-cm_id)) { ret = PTR_ERR(ctx-cm_id); goto err1; -- 1.7.11.2 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [RFC,1/8] soc/fman: Add FMan MURAM support
Regards, Igal Liberman. -Original Message- From: Kumar Gala [mailto:ga...@kernel.crashing.org] Sent: Thursday, March 12, 2015 5:57 PM To: Liberman Igal-B31950 Cc: linuxppc-...@lists.ozlabs.org; netdev@vger.kernel.org; linux- ker...@vger.kernel.org; Wood Scott-B07421 Subject: Re: [RFC,1/8] soc/fman: Add FMan MURAM support On Mar 11, 2015, at 12:07 AM, Igal.Liberman igal.liber...@freescale.com wrote: From: Igal Liberman igal.liber...@freescale.com Add Frame Manager Multi-User RAM support. Signed-off-by: Igal Liberman igal.liber...@freescale.com --- drivers/soc/fsl/fman/Kconfig|1 + drivers/soc/fsl/fman/Makefile |5 +- drivers/soc/fsl/fman/fm_muram.c | 174 +++ drivers/soc/fsl/fman/inc/fm_muram_ext.h | 98 + 4 files changed, 276 insertions(+), 2 deletions(-) create mode 100644 drivers/soc/fsl/fman/fm_muram.c create mode 100644 drivers/soc/fsl/fman/inc/fm_muram_ext.h use lib/genalloc instead of rheap Hi Kumar, I looked into lib/genalloc allocator. As far as I see, the genalloc allocator doesn't allow to control the memory alignment when you allocate a chunk of memory. Two important notes regarding MURAM memory: - The allocated memory chunks should have specific alignment (might be different in each chunk). - The allocations must be efficient, we don't want to waste MURAM due to alignment issues. - k -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 01/11] linux/time64.h:Introduce the 'struct itimerspec64' for 64bit
Hello. On 4/20/2015 8:57 AM, Baolin Wang wrote: This patch introduces the 'struct itimerspec64' for 64bit to replace itimerspec, and also introduces the conversion methods: itimerspec64_to_itimerspec() and itimerspec_to_itimerspec64(), that makes itimerspec to ready for 2038 year. To not needed here. Signed-off-by: Baolin Wang baolin.w...@linaro.org [...] WBR, Sergei -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Xen-devel] tcp: refine TSO autosizing causes performance regression on Xen
On Thu, Apr 16, 2015 at 1:42 PM, Eric Dumazet eric.duma...@gmail.com wrote: On Thu, 2015-04-16 at 11:01 +0100, George Dunlap wrote: He suggested that after he'd been prodded by 4 more e-mails in which two of us guessed what he was trying to get at. That's what I was complaining about. My big complain is that I suggested to test to double the sysctl, which gave good results. Then you provided a patch using a 8x factor. How does that sound ? Next time I ask a raise, I should try a 8x factor as well, who knows, it might be accepted. I see. I chose the value that Stefano had determined had completely eliminated the overhead. Doubling the value reduces the overhead to 8%, which should be fine for a short-term fix while we git a proper mid/long-term fix. -George -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2] ip_forward: Drop frames with attached skb-sk
Initial discussion was: [FYI] xfrm: Don't lookup sk_policy for timewait sockets Forwarded frames should not have a socket attached. Especially tw sockets will lead to panics later-on in the stack. This was observed with TPROXY assigning a tw socket and broken policy routing (misconfigured). As a result frame enters forwarding path instead of input. We cannot solve this in TPROXY as it cannot know that policy routing is broken. v2: Remove useless comment Signed-off-by: Sebastian Poehn sebastian.po...@gmail.com --- diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 939992c..3674484 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -82,6 +82,9 @@ int ip_forward(struct sk_buff *skb) if (skb-pkt_type != PACKET_HOST) goto drop; + if (unlikely(skb-sk)) + goto drop; + if (skb_warn_if_lro(skb)) goto drop; -- -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH linux-next v5] mlx5: wrong page mask if CONFIG_ARCH_DMA_ADDR_T_64BIT enabled for 32Bit architectures
On Wed, Apr 15, 2015 at 04:36:15PM +0800, Honggang Li wrote: If CONFIG_ARCH_DMA_ADDR_T_64BIT enabled for x86 systems and physical memory is more than 4GB, dma_map_page may return a valid memory address which greater than 0x. As a result, the mlx5 device page allocator RB tree will be initialized with valid addresses greater than 0xfff. However, (addr PAGE_MASK) set the high four bytes to zeros. So, it's impossible for the function, free_4k, to release the pages whose addresses greater than 4GB. Memory leaks. And mlx5_ib module can't release the pages when user try to remove the module, as a result, system hang. [root@rdma05 root]# dmesg | grep addr | head addr = 3fe384000 addr PAGE_MASK = fe384000 [root@rdma05 root]# rmmod mlx5_ib hang on -- cosnole log - mlx5_ib :04:00.0: irq 138 for MSI/MSI-X alloc irq_desc for 139 on node -1 alloc kstat_irqs on node -1 mlx5_ib :04:00.0: irq 139 for MSI/MSI-X :04:00.0:free_4k:221:(pid 1519): page not found :04:00.0:free_4k:221:(pid 1519): page not found :04:00.0:free_4k:221:(pid 1519): page not found :04:00.0:free_4k:221:(pid 1519): page not found -- cosnole log - Fixes: bf0bf77f6519 ('mlx5: Support communicating arbitrary host page size to firmware') Signed-off-by: Honggang Li ho...@redhat.com --- Acked-by: Eli Cohen e...@mellanox.com -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH V1 net-next] IB/ipoib: Fix ndo_get_iflink
On Mon, Apr 20, 2015 at 11:16 AM, Haggai Eran hagg...@mellanox.com wrote: On 17/04/2015 22:21, David Miller wrote: From: Erez Shitrit ere...@mellanox.com Date: Thu, 16 Apr 2015 16:34:34 +0300 Currently, iflink of the parent interface was always accessed, even when interface didn't have a parent and hence we crashed there. Handle the interface types properly: for a child interface, return the ifindex of the parent, for parent interface, return its ifindex. For child devices, make sure to set the parent pointer prior to invoking register_netdevice(), this allows the new ndo to be called by the stack immediately after the child device is registered. Fixes: 5aa7add8f14b ('infiniband/ipoib: implement ndo_get_iflink') Reported-by: Honggang Li ho...@redhat.com Signed-off-by: Erez Shitrit ere...@mellanox.com Signed-off-by: Honggang Li ho...@redhat.com Applied, thanks. Doug, Roland, You might want to include this patch in your for-next / for-4.1 trees, or merge net-next again. Currently they contain the issue it fixes, and it can prevent some systems with IPoIB from booting. Haggai, It's upstream by now, pull Linus tree. Or. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC] [PATCH] FUJITSU Extended Socket network device driver
This patch adds support for FUJITSU Extended Socket network device. Extended Socket network device is a shared memory based high-speed network interface between Extended Partitions of PRIMEQUEST 2000 series. # I know this code needs more refuctoring, but I wanted to post this code as soon as possible because posting driver code from scratch is first time for me. # Signed-off-by: Taku Izumi izumi.t...@jp.fujitsu.com --- drivers/platform/x86/Kconfig |8 + drivers/platform/x86/Makefile|2 + drivers/platform/x86/fjes/Makefile | 31 + drivers/platform/x86/fjes/fjes.h | 87 ++ drivers/platform/x86/fjes/fjes_ethtool.c | 135 +++ drivers/platform/x86/fjes/fjes_hw.c | 1148 ++ drivers/platform/x86/fjes/fjes_hw.h | 353 +++ drivers/platform/x86/fjes/fjes_main.c| 1525 ++ drivers/platform/x86/fjes/fjes_regs.h| 139 +++ 9 files changed, 3428 insertions(+) create mode 100644 drivers/platform/x86/fjes/Makefile create mode 100755 drivers/platform/x86/fjes/fjes.h create mode 100755 drivers/platform/x86/fjes/fjes_ethtool.c create mode 100755 drivers/platform/x86/fjes/fjes_hw.c create mode 100755 drivers/platform/x86/fjes/fjes_hw.h create mode 100755 drivers/platform/x86/fjes/fjes_main.c create mode 100755 drivers/platform/x86/fjes/fjes_regs.h diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 9752761..268c7495 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -884,4 +884,12 @@ config PVPANIC a paravirtualized device provided by QEMU; it lets a virtual machine (guest) communicate panic events to the host. +config FUJITSU_ES + tristate FUJITSU Extended Socket Network Device driver + depends on ACPI + ---help--- + This driver provides support for Extended Socket network device on + Extended Partitioning of FUJITSU PRIMEQUEST 2000 series. + + endif # X86_PLATFORM_DEVICES diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile index f82232b..319eb20 100644 --- a/drivers/platform/x86/Makefile +++ b/drivers/platform/x86/Makefile @@ -58,3 +58,5 @@ obj-$(CONFIG_INTEL_SMARTCONNECT) += intel-smartconnect.o obj-$(CONFIG_PVPANIC) += pvpanic.o obj-$(CONFIG_ALIENWARE_WMI)+= alienware-wmi.o + +obj-$(CONFIG_FUJITSU_ES) += fjes/ diff --git a/drivers/platform/x86/fjes/Makefile b/drivers/platform/x86/fjes/Makefile new file mode 100644 index 000..45dc9d3 --- /dev/null +++ b/drivers/platform/x86/fjes/Makefile @@ -0,0 +1,31 @@ + +# +# FUJITSU Extended Socket Network Device driver +# Copyright (c) 2015 FUJITSU LIMITED +# +# This program is free software; you can redistribute it and/or modify it +# under the terms and conditions of the GNU General Public License, +# version 2, as published by the Free Software Foundation. +# +# This program is distributed in the hope it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program; if not, see http://www.gnu.org/licenses/. +# +# The full GNU General Public License is included in this distribution in +# the file called COPYING. +# + + + +# +# Makefile for the FUJITSU Extended Socket network device driver +# + +obj-$(CONFIG_FUJITSU_ES) += fjes.o + +fjes-objs := fjes_main.o fjes_ethtool.o fjes_hw.o + diff --git a/drivers/platform/x86/fjes/fjes.h b/drivers/platform/x86/fjes/fjes.h new file mode 100755 index 000..890f16f --- /dev/null +++ b/drivers/platform/x86/fjes/fjes.h @@ -0,0 +1,87 @@ +/* + * FUJITSU Extended Socket Network Device driver + * Copyright (c) 2015 FUJITSU LIMITED + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, see http://www.gnu.org/licenses/. + * + * The full GNU General Public License is included in this distribution in + * the file called COPYING. + * + */ + + +#ifndef FJES_H_ +#define FJES_H_ + +#include linux/acpi.h +#include linux/workqueue.h + +#include fjes_hw.h + +struct fjes_adapter; + +#define FJES_ACPI_SYMBOL Extended Socket + +#define FJES_MAX_QUEUES
Re: [PATCH] Bluetooth: Pre-initialize variables in read_local_oob_ext_data_complete()
Hi Marcel, On Fri, Apr 17, 2015 at 10:38 PM, Marcel Holtmann mar...@holtmann.org wrote: net/bluetooth/mgmt.c: In function ‘read_local_oob_ext_data_complete’: net/bluetooth/mgmt.c:6474: warning: ‘r256’ may be used uninitialized in this function net/bluetooth/mgmt.c:6474: warning: ‘h256’ may be used uninitialized in this function net/bluetooth/mgmt.c:6474: warning: ‘r192’ may be used uninitialized in this function net/bluetooth/mgmt.c:6474: warning: ‘h192’ may be used uninitialized in this function While these are false positives, the code can be shortened by pre-initializing the hash table pointers and eir_len. This has the side effect of killing the compiler warnings. can you be a bit specific on which compiler version is this. I fixed one occurrence that seemed valid. However in this case the compiler seems to be just plain stupid. On a gcc 4.9, I am not seeing these for example. gcc 4.1.2. As there were too many false positives, these warnings were disabled in later versions (throwing away the children with the bad water). If you don't like my patch, just drop it. I only look at newly introduced warnings of this kind anyway. I really do not know what is the best solution here. This is a false positive. And I have been looking at this particular code for a warning that was valid, but we missed initially. But these warnings that you are fixing are clearly false positive. I only sent patches to fix false positives if I think the patches improve the code. As this is a subjective matter, it's up to you as the maintainer to decide. If this only happens with an old compiler version, I would tend to leave the code as is. Then again, what is the general preferred approach here? As this is a false positive, it's clearly up to the maintainer to decide if the patch improves the code or not. Thanks! Gr{oetje,eeting}s, Geert -- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say programmer or something like that. -- Linus Torvalds -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 05/11] IB/ipoib: Return IPoIB devices as possible matches to get_net_device_by_port_pkey_ip
From: Guy Shapiro gu...@mellanox.com Implement callback that returns network device to ib_core according to connection parameters. Check the ipoib device and iterate over all child devices to look for a match. For each ipoib device we iterate through all upper devices when searching for a matching IP, in order to support bonding. Signed-off-by: Guy Shapiro gu...@mellanox.com Signed-off-by: Haggai Eran hagg...@mellanox.com Signed-off-by: Yotam Kenneth yota...@mellanox.com Signed-off-by: Shachar Raindel rain...@mellanox.com --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 122 +- 1 file changed, 121 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 7cad4dd87469..89a59a0e17e6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -48,6 +48,9 @@ #include linux/jhash.h #include net/arp.h +#include net/addrconf.h +#include linux/inetdevice.h +#include rdma/ib_cache.h #define DRV_VERSION 1.0.0 @@ -91,11 +94,15 @@ struct ib_sa_client ipoib_sa_client; static void ipoib_add_one(struct ib_device *device); static void ipoib_remove_one(struct ib_device *device); static void ipoib_neigh_reclaim(struct rcu_head *rp); +static struct net_device *ipoib_get_net_device_by_port_pkey_ip( + struct ib_device *dev, u8 port, u16 pkey, + struct sockaddr *addr); static struct ib_client ipoib_client = { .name = ipoib, .add= ipoib_add_one, - .remove = ipoib_remove_one + .remove = ipoib_remove_one, + .get_net_device_by_port_pkey_ip = ipoib_get_net_device_by_port_pkey_ip, }; int ipoib_open(struct net_device *dev) @@ -222,6 +229,119 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) return 0; } +static bool ipoib_is_dev_match_addr(struct sockaddr *addr, + struct net_device *dev) +{ + struct net *net = dev_net(dev); + + if (addr-sa_family == AF_INET) { + struct in_device *in_dev = in_dev_get(dev); + struct sockaddr_in *addr_in = (struct sockaddr_in *)addr; + __be32 ret_addr; + + if (!in_dev) + return false; + + ret_addr = inet_confirm_addr(net, in_dev, 0, +addr_in-sin_addr.s_addr, +RT_SCOPE_HOST); + in_dev_put(in_dev); + if (ret_addr) + return true; + } +#if IS_ENABLED(CONFIG_IPV6) + else if (addr-sa_family == AF_INET6) { + struct sockaddr_in6 *addr_in6 = (struct sockaddr_in6 *)addr; + + if (ipv6_chk_addr(net, addr_in6-sin6_addr, dev, 1)) + return true; + } +#endif + return false; +} + +/** + * Find a net_device matching the given address, which is an upper device of + * the given net_device. + * @addr: IP address to look for. + * @dev: base IPoIB net_device + * + * If found, returns the net_device with a reference held. Otherwise return + * NULL. + */ +static struct net_device *ipoib_get_net_dev_match_addr(struct sockaddr *addr, + struct net_device *dev) +{ + struct net_device *upper, + *result = NULL; + struct list_head *iter; + + if (ipoib_is_dev_match_addr(addr, dev)) { + dev_hold(dev); + return dev; + } + + rcu_read_lock(); + netdev_for_each_all_upper_dev_rcu(dev, upper, iter) { + if (ipoib_is_dev_match_addr(addr, upper)) { + dev_hold(upper); + result = upper; + break; + } + } + rcu_read_unlock(); + return result; +} + +static struct net_device *ipoib_get_net_device_by_port_pkey_ip( + struct ib_device *dev, u8 port, u16 pkey, struct sockaddr *addr) +{ + struct ipoib_dev_priv *priv; + struct list_head *dev_list; + u16 pkey_index; + + ib_find_cached_pkey(dev, port, pkey, pkey_index); + if (pkey_index == (u16)-1) + return NULL; + + if (rdma_node_get_transport(dev-node_type) != RDMA_TRANSPORT_IB) + return NULL; + + dev_list = ib_get_client_data(dev, ipoib_client); + if (!dev_list) + return NULL; + + list_for_each_entry(priv, dev_list, list) { + struct net_device *net_dev = NULL; + struct ipoib_dev_priv *child_priv; + + if (priv-port != port) + continue; + + if (priv-pkey_index == pkey_index) { + net_dev = ipoib_get_net_dev_match_addr(addr, priv-dev); + if (net_dev) + return net_dev; + } + +
[PATCH v2 08/11] IB/cma: Separate port allocation to network namespaces
From: Yotam Kenneth yota...@mellanox.com Keep a radix-tree for the network namespaces we support for each port-space. Dynamically allocate idr for network namespace upon first bind request for a port in the (ps, net) tuple. Destroy the idr when the (ps, net) tuple does not contain any bounded ports. This patch is internal infrastructure work for the following patch. In this patch, init_net is statically used as the network namespace for the new port-space API. The radix-tree is protected under the same locking that protects the rest of the port space data. This locking is practically a big, static mutex lock for the entire module. Signed-off-by: Haggai Eran hagg...@mellanox.com Signed-off-by: Yotam Kenneth yota...@mellanox.com Signed-off-by: Shachar Raindel rain...@mellanox.com Signed-off-by: Guy Shapiro gu...@mellanox.com --- drivers/infiniband/core/cma.c | 122 ++ 1 file changed, 99 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 1ce84a03c883..022b0d0a51cc 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -39,11 +39,13 @@ #include linux/mutex.h #include linux/random.h #include linux/idr.h +#include linux/radix-tree.h #include linux/inetdevice.h #include linux/slab.h #include linux/module.h #include net/route.h +#include net/netns/hash.h #include net/tcp.h #include net/ipv6.h @@ -80,10 +82,83 @@ static LIST_HEAD(dev_list); static LIST_HEAD(listen_any_list); static DEFINE_MUTEX(lock); static struct workqueue_struct *cma_wq; -static DEFINE_IDR(tcp_ps); -static DEFINE_IDR(udp_ps); -static DEFINE_IDR(ipoib_ps); -static DEFINE_IDR(ib_ps); +static RADIX_TREE(tcp_ps, GFP_KERNEL); +static RADIX_TREE(udp_ps, GFP_KERNEL); +static RADIX_TREE(ipoib_ps, GFP_KERNEL); +static RADIX_TREE(ib_ps, GFP_KERNEL); + +static LIST_HEAD(idrs_list); + +struct idr_ll { + unsigned net_val; + struct net *net; + struct radix_tree_root *ps; + struct idr idr; +}; + +static void zap_ps_idr(struct idr_ll *idr_ll) +{ + radix_tree_delete(idr_ll-ps, idr_ll-net_val); + idr_destroy(idr_ll-idr); + kfree(idr_ll); +} + +static int cma_ps_alloc(struct radix_tree_root *ps, struct net *net, void *ptr, + int snum) +{ + struct idr_ll *idr_ll; + int err; + int res; + + idr_ll = radix_tree_lookup(ps, net_hash_mix(net)); + if (!idr_ll) { + idr_ll = kmalloc(sizeof(*idr_ll), GFP_KERNEL); + if (!idr_ll) + return -ENOMEM; + idr_init(idr_ll-idr); + idr_ll-net_val = net_hash_mix(net); + idr_ll-net = net; + idr_ll-ps = ps; + err = radix_tree_insert(ps, idr_ll-net_val, idr_ll); + if (err) { + idr_destroy(idr_ll-idr); + kfree(idr_ll); + return err; + } + } + res = idr_alloc(idr_ll-idr, ptr, snum, snum + 1, GFP_KERNEL); + if (unlikely((res 0) idr_is_empty(idr_ll-idr))) { + zap_ps_idr(idr_ll); + return res; + } + return res; +} + +static void *cma_ps_find(struct radix_tree_root *ps, struct net *net, int snum) +{ + struct idr_ll *idr_ll; + + idr_ll = radix_tree_lookup(ps, net_hash_mix(net)); + if (!idr_ll) + return NULL; + return idr_find(idr_ll-idr, snum); +} + +static void cma_ps_remove(struct radix_tree_root *ps, struct net *net, int snum) +{ + struct idr_ll *idr_ll; + + idr_ll = radix_tree_lookup(ps, net_hash_mix(net)); + if (unlikely(!idr_ll)) { + WARN(1, cma_ps_removed can't find expected net ns 0x%lx\n, +(unsigned long)net); + return; + } + idr_remove(idr_ll-idr, snum); + if (idr_is_empty(idr_ll-idr)) { + zap_ps_idr(idr_ll); + } +} struct cma_device { struct list_headlist; @@ -94,9 +169,9 @@ struct cma_device { }; struct rdma_bind_list { - struct idr *ps; - struct hlist_head owners; - unsigned short port; + struct radix_tree_root *ps; + struct hlist_head owners; + unsigned short port; }; enum { @@ -885,7 +960,7 @@ static void cma_release_port(struct rdma_id_private *id_priv) mutex_lock(lock); hlist_del(id_priv-node); if (hlist_empty(bind_list-owners)) { - idr_remove(bind_list-ps, bind_list-port); + cma_ps_remove(bind_list-ps, init_net, bind_list-port); kfree(bind_list); } mutex_unlock(lock); @@ -2198,8 +2273,8 @@ static void cma_bind_port(struct rdma_bind_list *bind_list, hlist_add_head(id_priv-node, bind_list-owners); } -static int cma_alloc_port(struct idr *ps, struct rdma_id_private
[PATCH v2 06/11] IB/cm, cma: Move RDMA IP CM private-data parsing code from ib_cma to ib_cm
From: Guy Shapiro gu...@mellanox.com When receiving a connection request, ib_cm needs to associate the request with a network namespace. To do this, it needs to know the request's destination IP. For this the RDMA IP CM packet formatting functionality needs to be exposed to ib_cm. This patch merely moves the RDMA IP CM data formatting and parsing functions to be part of ib_cm. The following patch will utilize the new knowledge to look-up the appropriate namespace. Each namespace maintains an independent table of RDMA CM service IDs, allowing isolation and separation between the network namespaces. When creating a new incoming connection ID, the code in cm_save_ip_info can no longer rely on the listener's private data to find the port number, so it reads it from the requested service ID. This required saving the service ID in cm_format_paths_from_req. Signed-off-by: Guy Shapiro gu...@mellanox.com Signed-off-by: Haggai Eran hagg...@mellanox.com Signed-off-by: Yotam Kenneth yota...@mellanox.com Signed-off-by: Shachar Raindel rain...@mellanox.com --- drivers/infiniband/core/cm.c | 156 +++ drivers/infiniband/core/cma.c | 166 +- include/rdma/ib_cm.h | 56 ++ 3 files changed, 230 insertions(+), 148 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 5a45cb76c43e..efc5cffb675a 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -51,6 +51,7 @@ #include rdma/ib_cache.h #include rdma/ib_cm.h +#include rdma/ib.h #include cm_msgs.h MODULE_AUTHOR(Sean Hefty); @@ -701,6 +702,159 @@ static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv, ib_send_cm_sidr_rep(cm_id_priv-id, param); } +int cm_format_hdr(void *hdr, int family, + struct sockaddr *src_addr, + struct sockaddr *dst_addr) +{ + struct cm_hdr *cm_hdr; + + cm_hdr = hdr; + cm_hdr-cm_version = RDMA_IP_CM_VERSION; + if (family == AF_INET) { + struct sockaddr_in *src4, *dst4; + + src4 = (struct sockaddr_in *)src_addr; + dst4 = (struct sockaddr_in *)dst_addr; + + cm_set_ip_ver(cm_hdr, 4); + cm_hdr-src_addr.ip4.addr = src4-sin_addr.s_addr; + cm_hdr-dst_addr.ip4.addr = dst4-sin_addr.s_addr; + cm_hdr-port = src4-sin_port; + } else if (family == AF_INET6) { + struct sockaddr_in6 *src6, *dst6; + + src6 = (struct sockaddr_in6 *)src_addr; + dst6 = (struct sockaddr_in6 *)dst_addr; + + cm_set_ip_ver(cm_hdr, 6); + cm_hdr-src_addr.ip6 = src6-sin6_addr; + cm_hdr-dst_addr.ip6 = dst6-sin6_addr; + cm_hdr-port = src6-sin6_port; + } + return 0; +} +EXPORT_SYMBOL(cm_format_hdr); + +static void cm_save_ib_info(struct sockaddr *src_addr, + struct sockaddr *dst_addr, + struct ib_sa_path_rec *path) +{ + struct sockaddr_ib *ib; + + if (src_addr) { + ib = (struct sockaddr_ib *)src_addr; + ib-sib_family = AF_IB; + ib-sib_pkey = path-pkey; + ib-sib_flowinfo = path-flow_label; + memcpy(ib-sib_addr, path-sgid, 16); + ib-sib_sid = path-service_id; + ib-sib_sid_mask = cpu_to_be64(0xULL); + ib-sib_scope_id = 0; + } + if (dst_addr) { + ib = (struct sockaddr_ib *)dst_addr; + ib-sib_family = AF_IB; + ib-sib_pkey = path-pkey; + ib-sib_flowinfo = path-flow_label; + memcpy(ib-sib_addr, path-dgid, 16); + } +} + +static void cm_save_ip6_info(struct sockaddr *src_addr, +struct sockaddr *dst_addr, +struct cm_hdr *hdr, +__be16 local_port) +{ + struct sockaddr_in6 *ip6; + + if (src_addr) { + ip6 = (struct sockaddr_in6 *)src_addr; + ip6-sin6_family = AF_INET6; + ip6-sin6_addr = hdr-dst_addr.ip6; + ip6-sin6_port = local_port; + } + + if (dst_addr) { + ip6 = (struct sockaddr_in6 *)dst_addr; + ip6-sin6_family = AF_INET6; + ip6-sin6_addr = hdr-src_addr.ip6; + ip6-sin6_port = hdr-port; + } +} + +static void cm_save_ip4_info(struct sockaddr *src_addr, +struct sockaddr *dst_addr, +struct cm_hdr *hdr, +__be16 local_port) +{ + struct sockaddr_in *ip4; + + if (src_addr) { + ip4 = (struct sockaddr_in *)src_addr; + ip4-sin_family = AF_INET; + ip4-sin_addr.s_addr = hdr-dst_addr.ip4.addr; + ip4-sin_port = local_port;
[PATCH v2 07/11] IB/cm: Add network namespace support
From: Guy Shapiro gu...@mellanox.com Add namespace support to the IB-CM layer. - Each CM-ID now has a network namespace it is associated with, assigned at creation. This namespace is used as needed during subsequent action on the CM-ID or related objects. - All of the relevant calls to ib_addr and ib_core were updated to use the namespace from the CM-ID. External APIs were extended as needed to allow specifying the namespace where relevant. - The listening service ID table is now also indexed by the CM-ID namespace. - For incoming connection requests, we use the connection parameters to select namespace. The namespace is matched when looking for listening service ID. To preserve current behavior pass init_net to ib_cm wherever network namespace function parameters were added. The ib_cm_create_id interface now takes a reference to the relevant network namespace. CM-IDs created by accepting a connection for a listening CM-ID will also take a reference to the namespace. When the ID is destroyed, the namespace reference is released. Signed-off-by: Guy Shapiro gu...@mellanox.com Signed-off-by: Haggai Eran hagg...@mellanox.com Signed-off-by: Yotam Kenneth yota...@mellanox.com Signed-off-by: Shachar Raindel rain...@mellanox.com --- drivers/infiniband/core/cm.c| 124 drivers/infiniband/core/cma.c | 8 ++- drivers/infiniband/core/ucm.c | 3 +- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 21 +- drivers/infiniband/ulp/srp/ib_srp.c | 2 +- drivers/infiniband/ulp/srpt/ib_srpt.c | 2 +- include/rdma/ib_cm.h| 7 +- 7 files changed, 130 insertions(+), 37 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index efc5cffb675a..75c6ac9a4aee 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -241,6 +241,8 @@ struct cm_id_private { u8 service_timeout; u8 target_ack_delay; + struct net *net; /* A network namespace that the ID belongs to */ + struct list_head work_list; atomic_t work_count; }; @@ -347,12 +349,13 @@ static void cm_set_private_data(struct cm_id_private *cm_id_priv, } static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc, - struct ib_grh *grh, struct cm_av *av) + struct ib_grh *grh, struct cm_av *av, + struct net *net) { av-port = port; av-pkey_index = wc-pkey_index; ib_init_ah_from_wc(port-cm_dev-ib_device, port-port_num, wc, - grh, av-ah_attr, init_net); + grh, av-ah_attr, net); } static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) @@ -521,10 +524,15 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) if ((cur_cm_id_priv-id.service_mask service_id) == (service_mask cur_cm_id_priv-id.service_id) (cm_id_priv-id.device == cur_cm_id_priv-id.device) - !data_cmp) + !data_cmp + net_eq(cm_id_priv-net, cur_cm_id_priv-net)) return cur_cm_id_priv; - if (cm_id_priv-id.device cur_cm_id_priv-id.device) + if (cm_id_priv-net cur_cm_id_priv-net) + link = (*link)-rb_left; + else if (cm_id_priv-net cur_cm_id_priv-net) + link = (*link)-rb_right; + else if (cm_id_priv-id.device cur_cm_id_priv-id.device) link = (*link)-rb_left; else if (cm_id_priv-id.device cur_cm_id_priv-id.device) link = (*link)-rb_right; @@ -544,7 +552,8 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) static struct cm_id_private * cm_find_listen(struct ib_device *device, __be64 service_id, -u8 *private_data) +u8 *private_data, +struct net *net) { struct rb_node *node = cm.listen_service_table.rb_node; struct cm_id_private *cm_id_priv; @@ -556,10 +565,14 @@ static struct cm_id_private * cm_find_listen(struct ib_device *device, cm_id_priv-compare_data); if ((cm_id_priv-id.service_mask service_id) == cm_id_priv-id.service_id - (cm_id_priv-id.device == device) !data_cmp) + (cm_id_priv-id.device == device) !data_cmp + net_eq(cm_id_priv-net, net)) return cm_id_priv; - - if (device cm_id_priv-id.device) + if (net cm_id_priv-net) +
[PATCH v2 09/11] IB/cma: Add support for network namespaces
From: Guy Shapiro gu...@mellanox.com Add support for network namespaces in the ib_cma module. This is accomplished by: 1. Adding network namespace parameter for rdma_create_id. This parameter is used to populate the network namespace field in rdma_id_private. rdma_create_id keeps a reference on the network namespace. 2. Using the network namespace from the rdma_id instead of init_net inside of ib_cma. 3. Decrementing the reference count for the appropriate network namespace when calling rdma_destroy_id. In order to preserve the current behavior init_net is passed when calling from other modules. Signed-off-by: Guy Shapiro gu...@mellanox.com Signed-off-by: Haggai Eran hagg...@mellanox.com Signed-off-by: Yotam Kenneth yota...@mellanox.com Signed-off-by: Shachar Raindel rain...@mellanox.com --- drivers/infiniband/core/cma.c | 52 +- drivers/infiniband/core/ucma.c | 3 +- drivers/infiniband/ulp/iser/iser_verbs.c | 2 +- drivers/infiniband/ulp/isert/ib_isert.c| 2 +- .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h| 4 +- include/rdma/rdma_cm.h | 6 ++- net/9p/trans_rdma.c| 2 +- net/rds/ib.c | 2 +- net/rds/ib_cm.c| 2 +- net/rds/iw.c | 2 +- net/rds/iw_cm.c| 2 +- net/rds/rdma_transport.c | 2 +- net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 +- net/sunrpc/xprtrdma/verbs.c| 3 +- 14 files changed, 52 insertions(+), 34 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 022b0d0a51cc..9ea42fe2853b 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -540,7 +540,8 @@ static int cma_disable_callback(struct rdma_id_private *id_priv, struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, void *context, enum rdma_port_space ps, - enum ib_qp_type qp_type) + enum ib_qp_type qp_type, + struct net *net) { struct rdma_id_private *id_priv; @@ -562,7 +563,7 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, INIT_LIST_HEAD(id_priv-listen_list); INIT_LIST_HEAD(id_priv-mc_list); get_random_bytes(id_priv-seq_num, sizeof id_priv-seq_num); - id_priv-id.route.addr.dev_addr.net = init_net; + id_priv-id.route.addr.dev_addr.net = get_net(net); return id_priv-id; } @@ -689,7 +690,7 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, rdma_port_get_link_layer(id_priv-id.device, id_priv-id.port_num) == IB_LINK_LAYER_ETHERNET) { ret = rdma_addr_find_smac_by_sgid(sgid, qp_attr.smac, NULL, - init_net); + id_priv-id.route.addr.dev_addr.net); if (ret) goto out; @@ -953,6 +954,7 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv, static void cma_release_port(struct rdma_id_private *id_priv) { struct rdma_bind_list *bind_list = id_priv-bind_list; + struct net *net = id_priv-id.route.addr.dev_addr.net; if (!bind_list) return; @@ -960,7 +962,7 @@ static void cma_release_port(struct rdma_id_private *id_priv) mutex_lock(lock); hlist_del(id_priv-node); if (hlist_empty(bind_list-owners)) { - cma_ps_remove(bind_list-ps, init_net, bind_list-port); + cma_ps_remove(bind_list-ps, net, bind_list-port); kfree(bind_list); } mutex_unlock(lock); @@ -1029,6 +1031,7 @@ void rdma_destroy_id(struct rdma_cm_id *id) cma_deref_id(id_priv-id.context); kfree(id_priv-id.route.path_rec); + put_net(id_priv-id.route.addr.dev_addr.net); kfree(id_priv); } EXPORT_SYMBOL(rdma_destroy_id); @@ -1156,7 +1159,8 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, int ret; id = rdma_create_id(listen_id-event_handler, listen_id-context, - listen_id-ps, ib_event-param.req_rcvd.qp_type); + listen_id-ps, ib_event-param.req_rcvd.qp_type, + listen_id-route.addr.dev_addr.net); if (IS_ERR(id)) return NULL; @@ -1201,10 +1205,11 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, { struct rdma_id_private *id_priv; struct rdma_cm_id *id; + struct net *net = listen_id-route.addr.dev_addr.net; int ret; id = rdma_create_id(listen_id-event_handler,
[PATCH v2 11/11] IB/ucm: Add partial support for network namespaces
From: Shachar Raindel rain...@mellanox.com It is impossible to completely support network namespaces for UCM, as we cannot identify the target IPoIB device. However, we add support which will work if the user is following the IB-Spec Annex 11 (RDMA IP CM Services) with the service ID and private data formatting. Signed-off-by: Haggai Eran hagg...@mellanox.com Signed-off-by: Yotam Kenneth yota...@mellanox.com Signed-off-by: Shachar Raindel rain...@mellanox.com Signed-off-by: Guy Shapiro gu...@mellanox.com --- drivers/infiniband/core/ucm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 9604ab068984..424421091dae 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -45,6 +45,7 @@ #include linux/idr.h #include linux/mutex.h #include linux/slab.h +#include linux/nsproxy.h #include asm/uaccess.h @@ -490,7 +491,7 @@ static ssize_t ib_ucm_create_id(struct ib_ucm_file *file, ctx-uid = cmd.uid; ctx-cm_id = ib_create_cm_id(file-device-ib_dev, ib_ucm_event_handler, ctx, -init_net); +current-nsproxy-net_ns); if (IS_ERR(ctx-cm_id)) { result = PTR_ERR(ctx-cm_id); goto err1; -- 1.7.11.2 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 04/11] IB/core: Find the network namespace matching connection parameters
From: Yotam Kenneth yota...@mellanox.com In the case of IPoIB, and maybe in other cases, the network device is managed by an upper-layer protocol (ULP). In order to expose this network device to other users of the IB device, let ULPs implement a callback that returns network device according to connection parameters. The IB device and port, together with the P_Key and the IP address should be enough to uniquely identify the ULP net device. This function is passed to ib_core as part of the ib_client registration. Using this functionality, add a way to get the network namespace corresponding to a work completion. This is needed so that responses to CM requests can be sent from the same network namespace as the request. Signed-off-by: Haggai Eran hagg...@mellanox.com Signed-off-by: Yotam Kenneth yota...@mellanox.com Signed-off-by: Shachar Raindel rain...@mellanox.com Signed-off-by: Guy Shapiro gu...@mellanox.com --- drivers/infiniband/core/device.c | 57 include/rdma/ib_verbs.h | 29 2 files changed, 86 insertions(+) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 18c1ece765f2..2f06be5b0b59 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -38,6 +38,7 @@ #include linux/slab.h #include linux/init.h #include linux/mutex.h +#include linux/netdevice.h #include rdma/rdma_netlink.h #include core_priv.h @@ -733,6 +734,62 @@ int ib_find_pkey(struct ib_device *device, } EXPORT_SYMBOL(ib_find_pkey); +static struct net_device *ib_get_net_dev_by_port_pkey_ip(struct ib_device *dev, +u8 port, +u16 pkey, +struct sockaddr *addr) +{ + struct net_device *ret = NULL; + struct ib_client *client; + + mutex_lock(device_mutex); + list_for_each_entry(client, client_list, list) + if (client-get_net_device_by_port_pkey_ip) { + ret = client-get_net_device_by_port_pkey_ip(dev, port, +pkey, +addr); + if (ret) + break; + } + + mutex_unlock(device_mutex); + return ret; +} + +struct net *ib_get_net_ns_by_port_pkey_ip(struct ib_device *dev, + u8 port, + u16 pkey, + struct sockaddr *addr) +{ + struct net_device *ndev = NULL; + struct net *ns; + + switch (rdma_port_get_link_layer(dev, port)) { + case IB_LINK_LAYER_INFINIBAND: + if (!addr) + goto not_found; + ndev = ib_get_net_dev_by_port_pkey_ip(dev, port, pkey, addr); + break; + default: + goto not_found; + } + + if (!ndev) + goto not_found; + + rcu_read_lock(); + ns = maybe_get_net(dev_net(ndev)); + dev_put(ndev); + rcu_read_unlock(); + if (!ns) + goto not_found; + return ns; + +not_found: + return get_net(init_net); +} +EXPORT_SYMBOL(ib_get_net_ns_by_port_pkey_ip); + static int __init ib_core_init(void) { int ret; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index f4a85decc60f..74b239410562 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1683,6 +1683,21 @@ struct ib_client { void (*add) (struct ib_device *); void (*remove)(struct ib_device *); + /* Returns the net_dev belonging to this ib_client and matching the +* given parameters. +* @dev:An RDMA device that the net_dev use for communication. +* @port: A physical port number on the RDMA device. +* @pkey: P_Key that the net_dev uses if applicable. +* @addr: An IP address the net_dev is configured with. +* +* An ib_client that implements a net_dev on top of RDMA devices +* (such as IP over IB) should implement this callback, allowing the +* rdma_cm module to find the right net_dev for a given request. */ + struct net_device *(*get_net_device_by_port_pkey_ip)( + struct ib_device *dev, + u8 port, + u16 pkey, + struct sockaddr *addr); struct list_head list; }; @@ -2679,4 +2694,18 @@ static inline int ib_check_mr_access(int flags) int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status); +/** + * ib_get_net_ns_by_port_pkey_ip() - Return the appropriate net namespace + * for a received CM request + * @dev:
Re: [PATCH v2 00/11] Add network namespace support in the RDMA-CM
Hey Haggai, Did you check for changes needed in drivers/infiniband/core/iwcm.c? I notice that it uses init_net here: static int __init iw_cm_init(void) { iwcm_wq = create_singlethread_workqueue(iw_cm_wq); if (!iwcm_wq) return -ENOMEM; iwcm_ctl_table_hdr = register_net_sysctl(init_net, net/iw_cm, iwcm_ctl_table); if (!iwcm_ctl_table_hdr) { pr_err(iw_cm: couldn't register sysctl paths\n); destroy_workqueue(iwcm_wq); return -ENOMEM; } return 0; } -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCHSET] printk, netconsole: implement reliable netconsole
Hello, Rob. On Sun, Apr 19, 2015 at 02:25:09AM -0500, Rob Landley wrote: If you have two machines plugged into a hub, and that's _all_ that's plugged in, packets should never get dropped. This was the original use case of netconsole was that the sender and the receiver were plugged into the same router. Development aid on local network hasn't been the only use case for a very long time now. I haven't seen too many large scale setups and two of them were using netconsole as a way to collect kernel messages cluster-wide and having issues with lost messages. One was running it over a separate lower speed network from the main one which they used for most managerial tasks including deployment and packet losses weren't that unusual. The other is running on the same network but the log collector isn't per-rack so the packets end up getting routed through congested parts of the network again experiencing messages losses. So are you trying to program around a problem you've actually _seen_, or are you attempting to reinvent TCP/IP yet again based on top of UDP (Drink!) because of a purely theoretical issue? At larger scale, the problem is very real. Let's forget about the reliability part. The main thing is being able to identify message sequences so that the receiver can put the message streams back together. That said, once that's there, whether the reliability part is done with TCP doesn't make that much of difference as it'd still need to put back the two message streams together, but again this doesn't matter. Let's just ignore this part. printk already keeps log metadata which contains enough information to make netconsole reliable. This patchset does the followings. Adds a giant amount of complexity without quite explaining why. The only signficant complexity is on the receiver side and it doesn't even have to be in the kernel. CON_EXTENDED and emitting extended messages are pretty straight-forward changes. Thanks. -- tejun -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Problem with patch make nlmsg_end() and genlmsg_end() void
On Wed, 2015-04-08 at 15:12 +0100, David Woodhouse wrote: On Wed, 2015-04-08 at 15:08 +0200, Johannes Berg wrote: Additionally, the failure mode of this was the process running out of memory due to receiving the same results over and over again - does that happen for you? It seems it was stuck in recvmsg(), but that may just be a side effect of happening to interrupt at that point? No, strace shows it's just sitting in recvmsg(). As I said, I'm not *sure* it's caused by the same commit; bisecting is distinctly non-trivial. It just seemed likely. FWIW I went back to the Fedora 3.19 kernel for a week and it didn't show up again. After rebooting to 4.0 earlier today, it's happened already. I'll see if I can find a more reliable way of reproducing it, which will make it slightly saner to try bisecting. -- dwmw2 smime.p7s Description: S/MIME cryptographic signature
Re: [PATCH] etherdevice: Add ether_addr_copy_unaligned
From: Mateusz Kulikowski mateusz.kulikow...@gmail.com Date: Sun, 19 Apr 2015 23:39:37 +0200 Some drivers require copying unaligned ethernet addresses. Using memcpy() causes checkpatch warnings and may cause regressions (someone will fix alignment of packed structure) Signed-off-by: Mateusz Kulikowski mateusz.kulikow...@gmail.com I'd rather see something like this submitted in a patch series alongside some actual uses. So I'm tossing this for now. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net-next 0/2] net: Fix hw csum failure message flood for ppp tunnel
Your postings seem to have trouble reaching the list, and therefore patchwork as well. Can you try one more time? Otherwise I'm the only person seeing these patches, which is kinda pointless, especially since you would like this patch series to get some testing. Thanks. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2] ip_forward: Drop frames with attached skb-sk
From: Sebastian Poehn sebastian.po...@gmail.com Date: Mon, 20 Apr 2015 09:19:20 +0200 Initial discussion was: [FYI] xfrm: Don't lookup sk_policy for timewait sockets Forwarded frames should not have a socket attached. Especially tw sockets will lead to panics later-on in the stack. This was observed with TPROXY assigning a tw socket and broken policy routing (misconfigured). As a result frame enters forwarding path instead of input. We cannot solve this in TPROXY as it cannot know that policy routing is broken. v2: Remove useless comment Signed-off-by: Sebastian Poehn sebastian.po...@gmail.com Applied and queued up for -stable, thanks Sebastian. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2] pppoe: Lacks DST MAC address check
From: Joakim Tjernlund joakim.tjernl...@transmode.se Date: Sat, 18 Apr 2015 11:53:14 +0200 A pppoe session is identified by its session ID and MAC address. Currently pppoe does not check if the received pkg has the correct MAC address. This is a problem when the eth I/F is in promisc mode as then any DST MAC address is accepted. Please read Documentation/SubmittingPatches in the source tree, you need to provide a proper signoff. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/7] turn Makefile more distribution friendly
On Mon, 13 Apr 2015 16:00:56 +0200 Pavel Šimerda pav...@pavlix.net wrote: From: Pavel Šimerda psime...@redhat.com Changes: * Accept directory settings from environment. * Remove redundant ROOTDIR variable. * Set KERNEL_INCLUDE default to '/usr/include'. * Use CFLAGS from environemnt. Note: In the long term it might be better to improve the configure script to generate those parts of the Makefile in a manner similar to autoconf. It might be even practical to autotoolize the package. Signed-off-by: Pavel Šimerda psime...@redhat.com I will take this part. But don't want to start iproute2 down the autoconf/autotool sink hole. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH iproute2] ipxfrm: wrong nl msg sent on deleteall cmd
On Wed, 15 Apr 2015 14:00:53 +0200 Nicolas Dichtel nicolas.dich...@6wind.com wrote: XFRM netlink family is independent from the route netlink family. It's wrong to call rtnl_wilddump_request(), because it will add a 'struct ifinfomsg' into the header and the kernel will complain (at least for xfrm state): netlink: 24 bytes leftover after parsing attributes in process `ip'. Reported-by: Gregory Hoggarth gregory.hogga...@alliedtelesis.co.nz Signed-off-by: Nicolas Dichtel nicolas.dich...@6wind.com Applied thanks -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 02/11] IB/addr: Pass network namespace as a parameter
On Mon, Apr 20, 2015 at 12:03:33PM +0300, Haggai Eran wrote: +/** rdma_addr_find_smac_by_sgid() - Find the src MAC and VLAN ID for a src GID + * @sgid:Source GID to find the MAC and VLAN for. + * @smac:A buffer to contain the resulting MAC address. + * @vlan_id: Will contain the resulting VLAN ID. + * @net: Network namespace to use for the address resolution. + * + * It is the caller's responsibility to keep the network namespace alive until + * the function returns. + */ +int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id, + struct net *net); kdocs are typically placed with the body of the function, not at the prototype. Jason -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 01/11] RDMA/CMA: Mark IPv4 addresses correctly when the listener is IPv6
On Mon, Apr 20, 2015 at 12:03:32PM +0300, Haggai Eran wrote: From: Yotam Kenneth yota...@mellanox.com When accepting a new connection with the listener being IPv6, the family of the new connection is set as IPv6. This causes cma_zero_addr function to return true on an non-zero address. As a result, the wrong code path is taken. This causes the connection request to be rejected, as the RDMA-CM code looks for the wrong type of device. This description doesn't really make sense as to what the problem is. @@ -866,12 +866,12 @@ static void cma_save_ip4_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_i listen4 = (struct sockaddr_in *) listen_id-route.addr.src_addr; ip4 = (struct sockaddr_in *) id-route.addr.src_addr; - ip4-sin_family = listen4-sin_family; + ip4-sin_family = AF_INET; If listen_id-route.addr.src_addr.ss_family != AF_INET then it is invalid to cast to sockaddr_in. So listen4-sin_family MUST be AF_INET or this function MUST NOT be called. Forcing to AF_INET cannot be correct here. What does this patch have to do with this series? Jason -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH iproute2-next 2/2] netns: allow to dump and monitor nsid
On Thu, 9 Apr 2015 08:30:14 + Nicolas Dichtel nicolas.dich...@6wind.com wrote: Two commands are added: - ip netns list-id - ip monitor nsid A cache is also added to remember the association between the iproute2 netns name (from /var/run/netns/) and the nsid. To avoid interfering with the rth socket, a new rtnl socket (rtnsh) is used to get nsid (we may send rtnl request during listing on rth). Example: $ ip netns list-id nsid 0 (iproute2 netns name: foo) $ ip monitor nsid Deleted nsid 0 (iproute2 netns name: foo) nsid 16 (iproute2 netns name: bar) Signed-off-by: Nicolas Dichtel nicolas.dich...@6wind.com Applied with a couple of little style cleanups. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH iproute2] tc util: Fix possible buffer overflow when print class id
On Mon, 20 Apr 2015 08:33:32 +0300 Vadim Kochan vadi...@gmail.com wrote: From: Vadim Kochan vadi...@gmail.com Use correct handle buffer length. Signed-off-by: Vadim Kochan vadi...@gmail.com Looks fine, applied. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 net-next] ip: Add color output option
On Sat, 18 Apr 2015 13:39:45 +0300 Mathias Nyman m.ny...@iki.fi wrote: It is hard to quickly find what you are looking for in the output of the ip command. Color helps. This patch adds a '-c' flag to highlight these with individual colors: - interface name - ip address - mac address - up/down state Signed-off-by: Mathias Nyman m.ny...@iki.fi I like the idea of this, it would be generally good across the board. But the patch does not apply cleanly to the current version of iproute2. And there are minor style issues. iproute2 in general ties to follow kernel style. WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? #36: new file mode 100644 ERROR: open brace '{' following enum go on the same line #45: FILE: include/color.h:5: +enum color_attr +{ ERROR: open brace '{' following enum go on the same line #195: FILE: lib/color.c:7: +enum color +{ ERROR: that open brace { should be on the previous line #207: FILE: lib/color.c:19: +static const char * const color_codes[] = +{ ERROR: that open brace { should be on the previous line #220: FILE: lib/color.c:32: +static enum color attr_colors[] = +{ ERROR: do not initialise statics to 0 or NULL #229: FILE: lib/color.c:41: +static int color_is_enabled = 0; WARNING: Missing a blank line after declarations #240: FILE: lib/color.c:52: + va_list args; + va_start(args, fmt); -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
YOUR MONEY IS APPROVED.
YOUR MONEY IS APPROVED. FROM ALBERTA / CANADIAN OIL GAS VIEW THE ATTACHMENT. YOUR MONEY IS APPROVED..pdf Description: Adobe PDF document YOUR MONEY IS APPROVED..pdf Description: Adobe PDF document
Re: [PATCH v2 07/11] IB/cm: Add network namespace support
On Mon, Apr 20, 2015 at 12:03:38PM +0300, Haggai Eran wrote: From: Guy Shapiro gu...@mellanox.com Add namespace support to the IB-CM layer. - Each CM-ID now has a network namespace it is associated with, assigned at creation. This namespace is used as needed during subsequent action on the CM-ID or related objects. There is really something weird about this layering. At the CM layer there should be no concept of an IP address, it only deals with GIDs. So how can a CM object have a network namespace associated with it? { av-port = port; av-pkey_index = wc-pkey_index; ib_init_ah_from_wc(port-cm_dev-ib_device, port-port_num, wc, -grh, av-ah_attr, init_net); +grh, av-ah_attr, net); There is something deeply wrong with adding network namespace arguments to verbs. For rocee the gid index clearly specifies the network namespace to use, so much of this should go away and have rocee get the namespace from the gid index. Ie in ib_init_ah_from_wc we have the ib_wc which contains the sgid index. I'm really not excited at how many places are gaining a net when those layers shouldn't even need to care about IP layer details. Just acting as a pass through for rocee doesn't make sense. Jason -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 04/16] printk: implement support for extended console drivers
On Thu 2015-04-16 19:03:41, Tejun Heo wrote: printk log_buf keeps various metadata for each message including its sequence number and timestamp. The metadata is currently available only through /dev/kmsg and stripped out before passed onto console drivers. We want this metadata to be available to console drivers too. Immediately, it's to implement reliable netconsole but may be useful for other console devices too. This patch implements support for extended console drivers. Consoles can indicate that they process extended messages by setting the new CON_EXTENDED flag and they'll fed messages formatted the same way as /dev/kmsg output as follows. level,sequnum,timestamp,contflag;message text One special case is fragments. Message fragments are output immediately to consoles to avoid losing them in case of crashes. For normal consoles, this is handled by later suppressing the assembled result and /dev/kmsg only shows fully assembled message; however, extended consoles would need both the fragments, to avoid losing message in case of a crash, and the assembled result, to tell how the fragments are assembled and which sequence number got assigned to it. To help matching up the fragments with the resulting message, fragments are emitted in the following format. level,-,timestamp,-,fragid=fragid;message fragment And later when the assembly is complete, the following is transmitted, level,sequnum,timestamp,contflag,fragid=fragid;message text * Extended message formatting for console drivers is enabled iff there ^^^ s/iff/if/ are registered extended consoles. * Comment describing extended message formats updated to help distinguishing variable with verbatim terms. Signed-off-by: Tejun Heo t...@kernel.org Cc: Kay Sievers k...@vrfy.org Cc: Petr Mladek pmla...@suse.cz --- include/linux/console.h | 1 + kernel/printk/printk.c | 141 +--- 2 files changed, 123 insertions(+), 19 deletions(-) diff --git a/include/linux/console.h b/include/linux/console.h index 7571a16..04bbd09 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -115,6 +115,7 @@ static inline int con_debug_leave(void) #define CON_BOOT (8) #define CON_ANYTIME (16) /* Safe to call when cpu is offline */ #define CON_BRL (32) /* Used for a braille device */ +#define CON_EXTENDED (64) /* Use the extended output format a la /dev/kmsg */ struct console { charname[16]; diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 0175c46..349a37b 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -84,6 +84,8 @@ static struct lockdep_map console_lock_dep_map = { }; #endif +static int nr_ext_console_drivers; + /* * Helper macros to handle lockdep when locking/unlocking console_sem. We use * macros instead of functions so that _RET_IP_ contains useful information. @@ -195,14 +197,28 @@ static int console_may_schedule; * need to be changed in the future, when the requirements change. * * /dev/kmsg exports the structured data in the following line format: - * level,sequnum,timestamp;message text\n + * level,sequnum,timestamp,contflag;message text\n * * The optional key/value pairs are attached as continuation lines starting * with a space character and terminated by a newline. All possible * non-prinatable characters are escaped in the \xff notation. * * Users of the export format should ignore possible additional values - * separated by ',', and find the message after the ';' character. + * separated by ',', and find the message after the ';' character. All + * optional header fields should have the form key=value. + * + * For consoles with CON_EXTENDED set, a message formatted like the + * following may also be printed. This is a continuation fragment which are + * being assembled and will be re-transmitted with a normal header once + * assembly finishes. The fragments are sent out immediately to avoid + * losing them over a crash. + * level,-,timestamp,-,fragid=fragid;message fragment\n + * + * On completion of assembly, the following is transmitted. + * level,sequnum,timestamp,contflag,fragid=fragid;message text\n + * + * Extended consoles should identify and handle duplicates by matching the + * fragids of the fragments and assembled messages. */ enum log_flags { @@ -210,6 +226,7 @@ enum log_flags { LOG_NEWLINE = 2,/* text ended with a newline */ LOG_PREFIX = 4,/* text started with a prefix */ LOG_CONT= 8,/* text is a fragment of a continuation line */ + LOG_DICT_META = 16, /* dict contains console meta information */ }; struct printk_log { @@ -292,6 +309,12 @@ static char *log_dict(const struct printk_log *msg) return (char *)msg +
Re: [PATCH V1 net-next] IB/ipoib: Fix ndo_get_iflink
On Mon, 2015-04-20 at 12:21 +0300, Or Gerlitz wrote: On Mon, Apr 20, 2015 at 11:16 AM, Haggai Eran hagg...@mellanox.com wrote: On 17/04/2015 22:21, David Miller wrote: From: Erez Shitrit ere...@mellanox.com Date: Thu, 16 Apr 2015 16:34:34 +0300 Currently, iflink of the parent interface was always accessed, even when interface didn't have a parent and hence we crashed there. Handle the interface types properly: for a child interface, return the ifindex of the parent, for parent interface, return its ifindex. For child devices, make sure to set the parent pointer prior to invoking register_netdevice(), this allows the new ndo to be called by the stack immediately after the child device is registered. Fixes: 5aa7add8f14b ('infiniband/ipoib: implement ndo_get_iflink') Reported-by: Honggang Li ho...@redhat.com Signed-off-by: Erez Shitrit ere...@mellanox.com Signed-off-by: Honggang Li ho...@redhat.com Applied, thanks. Doug, Roland, You might want to include this patch in your for-next / for-4.1 trees, or merge net-next again. Currently they contain the issue it fixes, and it can prevent some systems with IPoIB from booting. Haggai, It's upstream by now, pull Linus tree. Or. Right, it already went via net-next. I skipped it because of that. -- Doug Ledford dledf...@redhat.com GPG KeyID: 0E572FDD signature.asc Description: This is a digitally signed message part
Re: [PATCH V1 net-next] IB/ipoib: Fix ndo_get_iflink
From: Haggai Eran hagg...@mellanox.com Date: Mon, 20 Apr 2015 11:16:34 +0300 On 17/04/2015 22:21, David Miller wrote: From: Erez Shitrit ere...@mellanox.com Date: Thu, 16 Apr 2015 16:34:34 +0300 Currently, iflink of the parent interface was always accessed, even when interface didn't have a parent and hence we crashed there. Handle the interface types properly: for a child interface, return the ifindex of the parent, for parent interface, return its ifindex. For child devices, make sure to set the parent pointer prior to invoking register_netdevice(), this allows the new ndo to be called by the stack immediately after the child device is registered. Fixes: 5aa7add8f14b ('infiniband/ipoib: implement ndo_get_iflink') Reported-by: Honggang Li ho...@redhat.com Signed-off-by: Erez Shitrit ere...@mellanox.com Signed-off-by: Honggang Li ho...@redhat.com Applied, thanks. Doug, Roland, You might want to include this patch in your for-next / for-4.1 trees, or merge net-next again. Currently they contain the issue it fixes, and it can prevent some systems with IPoIB from booting. I put this into 'net', not 'net-next'. 'net-next' is dormant after I do my first push to Linus of the merge window. After that everything goes via 'net' until the merge window closes and I open 'net-next' up again. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH 06/10] ipv6: Avoid deleting RTF_CACHE route from ip6_route_del()
From: Martin KaFai Lau ka...@fb.com Date: Fri, 10 Apr 2015 18:54:09 -0700 Before patch 'Allow pmtu update on /128 via gateway route', RTF_CACHE route was not created for DST_HOST. It also requires changes on both delete code path and rt6_select() code patch. This patch fixes the delete code path to avoid deleting the RTF_CACHE route by 'ip -6 r del...' Signed-off-by: Martin KaFai Lau ka...@fb.com Reviewed-by: Hannes Frederic Sowa han...@stressinduktion.org If a cached route was created in response to say a PMTU event, and it's a clone/copy/cow of the route we are being asked to delete, it absolutely should be removed. In fact this is a critically important aspect of removing routes from the table. So this change does not seem correct. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH 04/10] ipv6: Only create RTF_CACHE routes after encountering pmtu exception
From: Martin KaFai Lau ka...@fb.com Date: Fri, 10 Apr 2015 18:54:07 -0700 @@ -1171,8 +1170,15 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, fl6.flowlabel = ip6_flowinfo(iph); dst = ip6_route_output(net, NULL, fl6); - if (!dst-error) + if (!dst-error) { + unsigned char *outer_network_header = skb_network_header(skb); + int offset; + + skb_reset_network_header(skb); + offset = outer_network_header - skb_network_header(skb); ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu)); + skb_set_network_header(skb, offset); + } I seriously object to adjusting then restoring the location of the SKB network header in this kind of code path. Instead, adjust the interfaces to the code doing the packet header inspection so that it can accomodate an offset or something like that instead. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv3] pppoe: Lacks DST MAC address check
From: Joakim Tjernlund joakim.tjernl...@transmode.se A pppoe session is identified by its session ID and MAC address. Currently pppoe does not check if the received pkg has the correct MAC address. This is a problem when the eth I/F is in promisc mode as then any DST MAC address is accepted. Signed-off-by: Joakim Tjernlund joakim.tjernl...@transmode.se --- drivers/net/ppp/pppoe.c | 3 +++ 1 file changed, 3 insertions(+) v2 - The MAC address check should encompass all pppoe pkgs, not only the relay type. v3 - Add signoff diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index ff059e1..aa1dd92 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -380,6 +380,9 @@ static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb) * can't change. */ + if (skb-pkt_type == PACKET_OTHERHOST) + goto abort_kfree; + if (sk-sk_state PPPOX_BOUND) { ppp_input(po-chan, skb); } else if (sk-sk_state PPPOX_RELAY) { -- 2.0.5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Bug 61651 - [regression] Wake-on-LAN broken in alx (AR8161)
Apologies if this is the wrong place or wrong format but since a while multiple people want this functionality back into the ALX driver and also claim that the reason for removing the functionality no longer applies in new kernels. Also i believe that it could at least be made an option in the .config file. https://bugzilla.kernel.org/show_bug.cgi?id=61651 Live long and prosper, Christ-Jan Wijtmans -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH 04/10] ipv6: Only create RTF_CACHE routes after encountering pmtu exception
From: Martin KaFai Lau ka...@fb.com Date: Fri, 10 Apr 2015 18:54:07 -0700 + if (!(rt6-rt6i_flags RTF_CACHE) + (!(rt6-rt6i_flags (RTF_NONEXTHOP | RTF_GATEWAY)) || + !(rt6-dst.flags DST_HOST))) { These big convoluted tests are tiring to read over and over again. At the very least, (rt6-rt6i_flags (RTF_NONEXTHOP | RTF_GATEWAY) deserves to be a descriptively named inline function. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH 00/10] ipv6: Only create RTF_CACHE route after encountering pmtu exception
From: Martin KaFai Lau ka...@fb.com Date: Fri, 10 Apr 2015 18:54:03 -0700 This series is to avoid creating a RTF_CACHE route whenever we are consulting the fib6 tree with a new destination. Instead, only create RTF_CACHE route when we see a pmtu exception. Please separate out the pure bug fixes from this series and submit them for inclusion into 'net', thanks. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 01/11] RDMA/CMA: Mark IPv4 addresses correctly when the listener is IPv6
On Mon, Apr 20, 2015 at 7:41 PM, Jason Gunthorpe jguntho...@obsidianresearch.com wrote: On Mon, Apr 20, 2015 at 12:03:32PM +0300, Haggai Eran wrote: From: Yotam Kenneth yota...@mellanox.com When accepting a new connection with the listener being IPv6, the family of the new connection is set as IPv6. This causes cma_zero_addr function to return true on an non-zero address. As a result, the wrong code path is taken. This causes the connection request to be rejected, as the RDMA-CM code looks for the wrong type of device. This description doesn't really make sense as to what the problem is. @@ -866,12 +866,12 @@ static void cma_save_ip4_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_i listen4 = (struct sockaddr_in *) listen_id-route.addr.src_addr; ip4 = (struct sockaddr_in *) id-route.addr.src_addr; - ip4-sin_family = listen4-sin_family; + ip4-sin_family = AF_INET; If listen_id-route.addr.src_addr.ss_family != AF_INET then it is invalid to cast to sockaddr_in. So listen4-sin_family MUST be AF_INET or this function MUST NOT be called. Forcing to AF_INET cannot be correct here. Jason, could you take a look @ this thread http://marc.info/?t=14158939504r=1w=2 where the authors addressed some comments from Sean and he eventually Acked the patch? What does this patch have to do with this series? I believe it's either a pre-patch to address some assumption or something they stepped on while testing Or. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2] pppoe: Lacks DST MAC address check
On Mon, 2015-04-20 at 14:11 -0400, David Miller wrote: From: Joakim Tjernlund joakim.tjernl...@transmode.se Date: Sat, 18 Apr 2015 11:53:14 +0200 A pppoe session is identified by its session ID and MAC address. Currently pppoe does not check if the received pkg has the correct MAC address. This is a problem when the eth I/F is in promisc mode as then any DST MAC address is accepted. Please read Documentation/SubmittingPatches in the source tree, you need to provide a proper signoff. Arg! I know this but this time it slipped my mind :( So sorry, I will send a v3 soon. Jocke-- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 03/11] time/hrtimer:Introduce hrtimer_get_res64() with timespec64 type for getting the timer resolution
On Mon, 20 Apr 2015, Baolin Wang wrote: This patch introduces hrtimer_get_res64() function to get the timer resolution with timespec64 type, and moves the hrtimer_get_res() function into FYI, That function is about to go away, but it's not a big deal to sort that out once I applied the hrtimer rework to the tip tree. Thanks, tglx -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 02/11] timekeeping:Introduce the current_kernel_time64() function with timespec64 type
This patch adds current_kernel_time64() function with timespec64 type, and makes current_kernel_time() 'static inline' and moves it to timekeeping.h file. It is convenient for user to get the current kernel time with timespec64 type, and delete the current_kernel_time() function easily in timekeeping.h file. That is ready for 2038 when get the current time. Signed-off-by: Baolin Wang baolin.w...@linaro.org --- include/linux/timekeeping.h | 10 +- kernel/time/timekeeping.c |6 +++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 3eaae47..c6d5ae9 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -18,10 +18,18 @@ extern int do_sys_settimeofday(const struct timespec *tv, * Kernel time accessors */ unsigned long get_seconds(void); -struct timespec current_kernel_time(void); +struct timespec64 current_kernel_time64(void); /* does not take xtime_lock */ struct timespec __current_kernel_time(void); +static inline struct timespec current_kernel_time(void) +{ + struct timespec64 now; + + now = current_kernel_time64(); + return timespec64_to_timespec(now); +} + /* * timespec based interfaces */ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 91db941..8ccc02c 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1721,7 +1721,7 @@ struct timespec __current_kernel_time(void) return timespec64_to_timespec(tk_xtime(tk)); } -struct timespec current_kernel_time(void) +struct timespec64 current_kernel_time64(void) { struct timekeeper *tk = tk_core.timekeeper; struct timespec64 now; @@ -1733,9 +1733,9 @@ struct timespec current_kernel_time(void) now = tk_xtime(tk); } while (read_seqcount_retry(tk_core.seq, seq)); - return timespec64_to_timespec(now); + return now; } -EXPORT_SYMBOL(current_kernel_time); +EXPORT_SYMBOL(current_kernel_time64); struct timespec64 get_monotonic_coarse64(void) { -- 1.7.9.5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 07/11] time/alarmtimer:Convert to the new methods for k_clock structure
This patch changes to the new methods with timespec64/itimerspec64 type of k_clock structure, and converts the timespec/itimerspec type to timespec64/itimerspec64 typein alarmtimer.c file. Signed-off-by: Baolin Wang baolin.w...@linaro.org --- kernel/time/alarmtimer.c | 43 ++- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 1b001ed..68186e1 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -489,35 +489,36 @@ static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm, /** * alarm_clock_getres - posix getres interface * @which_clock: clockid - * @tp: timespec to fill + * @tp: timespec64 to fill * * Returns the granularity of underlying alarm base clock */ -static int alarm_clock_getres(const clockid_t which_clock, struct timespec *tp) +static int alarm_clock_getres(const clockid_t which_clock, + struct timespec64 *tp) { clockid_t baseid = alarm_bases[clock2alarm(which_clock)].base_clockid; if (!alarmtimer_get_rtcdev()) return -EINVAL; - return hrtimer_get_res(baseid, tp); + return hrtimer_get_res64(baseid, tp); } /** * alarm_clock_get - posix clock_get interface * @which_clock: clockid - * @tp: timespec to fill. + * @tp: timespec64 to fill. * * Provides the underlying alarm base time. */ -static int alarm_clock_get(clockid_t which_clock, struct timespec *tp) +static int alarm_clock_get(clockid_t which_clock, struct timespec64 *tp) { struct alarm_base *base = alarm_bases[clock2alarm(which_clock)]; if (!alarmtimer_get_rtcdev()) return -EINVAL; - *tp = ktime_to_timespec(base-gettime()); + *tp = ktime_to_timespec64(base-gettime()); return 0; } @@ -547,24 +548,24 @@ static int alarm_timer_create(struct k_itimer *new_timer) /** * alarm_timer_get - posix timer_get interface * @new_timer: k_itimer pointer - * @cur_setting: itimerspec data to fill + * @cur_setting: itimerspec64 data to fill * * Copies out the current itimerspec data */ static void alarm_timer_get(struct k_itimer *timr, - struct itimerspec *cur_setting) + struct itimerspec64 *cur_setting) { ktime_t relative_expiry_time = alarm_expires_remaining((timr-it.alarm.alarmtimer)); if (ktime_to_ns(relative_expiry_time) 0) { - cur_setting-it_value = ktime_to_timespec(relative_expiry_time); + cur_setting-it_value = ktime_to_timespec64(relative_expiry_time); } else { cur_setting-it_value.tv_sec = 0; cur_setting-it_value.tv_nsec = 0; } - cur_setting-it_interval = ktime_to_timespec(timr-it.alarm.interval); + cur_setting-it_interval = ktime_to_timespec64(timr-it.alarm.interval); } /** @@ -588,14 +589,14 @@ static int alarm_timer_del(struct k_itimer *timr) * alarm_timer_set - posix timer_set interface * @timr: k_itimer pointer to be deleted * @flags: timer flags - * @new_setting: itimerspec to be used - * @old_setting: itimerspec being replaced + * @new_setting: itimerspec64 to be used + * @old_setting: itimerspec64 being replaced * * Sets the timer to new_setting, and starts the timer. */ static int alarm_timer_set(struct k_itimer *timr, int flags, - struct itimerspec *new_setting, - struct itimerspec *old_setting) + struct itimerspec64 *new_setting, + struct itimerspec64 *old_setting) { ktime_t exp; @@ -613,8 +614,8 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, return TIMER_RETRY; /* start the timer */ - timr-it.alarm.interval = timespec_to_ktime(new_setting-it_interval); - exp = timespec_to_ktime(new_setting-it_value); + timr-it.alarm.interval = timespec64_to_ktime(new_setting-it_interval); + exp = timespec64_to_ktime(new_setting-it_value); /* Convert (if necessary) to absolute time */ if (flags != TIMER_ABSTIME) { ktime_t now; @@ -670,7 +671,7 @@ static int alarmtimer_do_nsleep(struct alarm *alarm, ktime_t absexp) /** - * update_rmtp - Update remaining timespec value + * update_rmtp - Update remaining timespec64 value * @exp: expiration time * @type: timer type * @rmtp: user pointer to remaining timepsec value @@ -824,12 +825,12 @@ static int __init alarmtimer_init(void) int error = 0; int i; struct k_clock alarm_clock = { - .clock_getres = alarm_clock_getres, - .clock_get = alarm_clock_get, + .clock_getres64 = alarm_clock_getres, + .clock_get64= alarm_clock_get, .timer_create =
[PATCH 06/11] char/mmtimer:Convert to the 64bit methods for k_clock callback function
This patch converts to the 64bit methods for k_clock callback function, that converts the timespec type to timespec64 type and converts the itimerspec type to itimerspec64 type. Signed-off-by: Baolin Wang baolin.w...@linaro.org --- drivers/char/mmtimer.c | 36 +--- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/drivers/char/mmtimer.c b/drivers/char/mmtimer.c index 3d6c067..213d0bb 100644 --- a/drivers/char/mmtimer.c +++ b/drivers/char/mmtimer.c @@ -478,18 +478,18 @@ static int sgi_clock_period; static struct timespec sgi_clock_offset; static int sgi_clock_period; -static int sgi_clock_get(clockid_t clockid, struct timespec *tp) +static int sgi_clock_get(clockid_t clockid, struct timespec64 *tp) { u64 nsec; nsec = rtc_time() * sgi_clock_period + sgi_clock_offset.tv_nsec; - *tp = ns_to_timespec(nsec); + *tp = ns_to_timespec64(nsec); tp-tv_sec += sgi_clock_offset.tv_sec; return 0; }; -static int sgi_clock_set(const clockid_t clockid, const struct timespec *tp) +static int sgi_clock_set(const clockid_t clockid, const struct timespec64 *tp) { u64 nsec; @@ -657,7 +657,7 @@ static int sgi_timer_del(struct k_itimer *timr) } /* Assumption: it_lock is already held with irq's disabled */ -static void sgi_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) +static void sgi_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting) { if (timr-it.mmtimer.clock == TIMER_OFF) { @@ -668,14 +668,14 @@ static void sgi_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) return; } - cur_setting-it_interval = ns_to_timespec(timr-it.mmtimer.incr * sgi_clock_period); - cur_setting-it_value = ns_to_timespec((timr-it.mmtimer.expires - rtc_time()) * sgi_clock_period); + cur_setting-it_interval = ns_to_timespec64(timr-it.mmtimer.incr * sgi_clock_period); + cur_setting-it_value = ns_to_timespec64((timr-it.mmtimer.expires - rtc_time()) * sgi_clock_period); } static int sgi_timer_set(struct k_itimer *timr, int flags, - struct itimerspec * new_setting, - struct itimerspec * old_setting) + struct itimerspec64 *new_setting, + struct itimerspec64 *old_setting) { unsigned long when, period, irqflags; int err = 0; @@ -687,8 +687,8 @@ static int sgi_timer_set(struct k_itimer *timr, int flags, sgi_timer_get(timr, old_setting); sgi_timer_del(timr); - when = timespec_to_ns(new_setting-it_value); - period = timespec_to_ns(new_setting-it_interval); + when = timespec64_to_ns(new_setting-it_value); + period = timespec64_to_ns(new_setting-it_interval); if (when == 0) /* Clear timer */ @@ -699,11 +699,9 @@ static int sgi_timer_set(struct k_itimer *timr, int flags, return -ENOMEM; if (flags TIMER_ABSTIME) { - struct timespec n; unsigned long now; - getnstimeofday(n); - now = timespec_to_ns(n); + now = ktime_get_real_ns(); if (when now) when -= now; else @@ -765,7 +763,7 @@ static int sgi_timer_set(struct k_itimer *timr, int flags, return err; } -static int sgi_clock_getres(const clockid_t which_clock, struct timespec *tp) +static int sgi_clock_getres(const clockid_t which_clock, struct timespec64 *tp) { tp-tv_sec = 0; tp-tv_nsec = sgi_clock_period; @@ -773,13 +771,13 @@ static int sgi_clock_getres(const clockid_t which_clock, struct timespec *tp) } static struct k_clock sgi_clock = { - .clock_set = sgi_clock_set, - .clock_get = sgi_clock_get, - .clock_getres = sgi_clock_getres, + .clock_set64= sgi_clock_set, + .clock_get64= sgi_clock_get, + .clock_getres64 = sgi_clock_getres, .timer_create = sgi_timer_create, - .timer_set = sgi_timer_set, + .timer_set64= sgi_timer_set, .timer_del = sgi_timer_del, - .timer_get = sgi_timer_get + .timer_get64= sgi_timer_get }; /** -- 1.7.9.5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 05/11] time/posix-timers:Convert to the 64bit methods for k_clock callback functions
This patch converts the timepsec type to timespec64 type, and converts the itimerspec type to itimerspec64 type for the k_clock callback functions. This patch also converts the timespec type to timespec64 type for timekeeping_clocktai() function which is used only in the posix-timers.c file. Signed-off-by: Baolin Wang baolin.w...@linaro.org --- include/linux/timekeeping.h |4 +- kernel/time/posix-timers.c | 102 +++ kernel/time/timekeeping.h |2 +- 3 files changed, 57 insertions(+), 51 deletions(-) diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index c6d5ae9..bd3df93 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -242,9 +242,9 @@ static inline void get_monotonic_boottime64(struct timespec64 *ts) *ts = ktime_to_timespec64(ktime_get_boottime()); } -static inline void timekeeping_clocktai(struct timespec *ts) +static inline void timekeeping_clocktai(struct timespec64 *ts) { - *ts = ktime_to_timespec(ktime_get_clocktai()); + *ts = ktime_to_timespec64(ktime_get_clocktai()); } /* diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 9070387..47d1abf 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -132,9 +132,9 @@ static struct k_clock posix_clocks[MAX_CLOCKS]; static int common_nsleep(const clockid_t, int flags, struct timespec *t, struct timespec __user *rmtp); static int common_timer_create(struct k_itimer *new_timer); -static void common_timer_get(struct k_itimer *, struct itimerspec *); +static void common_timer_get(struct k_itimer *, struct itimerspec64 *); static int common_timer_set(struct k_itimer *, int, - struct itimerspec *, struct itimerspec *); + struct itimerspec64 *, struct itimerspec64 *); static int common_timer_del(struct k_itimer *timer); static enum hrtimer_restart posix_timer_fn(struct hrtimer *data); @@ -203,17 +203,20 @@ static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) } /* Get clock_realtime */ -static int posix_clock_realtime_get(clockid_t which_clock, struct timespec *tp) +static int posix_clock_realtime_get(clockid_t which_clock, + struct timespec64 *tp) { - ktime_get_real_ts(tp); + ktime_get_real_ts64(tp); return 0; } /* Set clock_realtime */ static int posix_clock_realtime_set(const clockid_t which_clock, - const struct timespec *tp) + const struct timespec64 *tp) { - return do_sys_settimeofday(tp, NULL); + struct timespec ts = timespec64_to_timespec(*tp); + + return do_sys_settimeofday(ts, NULL); } static int posix_clock_realtime_adj(const clockid_t which_clock, @@ -225,48 +228,51 @@ static int posix_clock_realtime_adj(const clockid_t which_clock, /* * Get monotonic time for posix timers */ -static int posix_ktime_get_ts(clockid_t which_clock, struct timespec *tp) +static int posix_ktime_get_ts(clockid_t which_clock, struct timespec64 *tp) { - ktime_get_ts(tp); + ktime_get_ts64(tp); return 0; } /* * Get monotonic-raw time for posix timers */ -static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec *tp) +static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec64 *tp) { - getrawmonotonic(tp); + getrawmonotonic64(tp); return 0; } -static int posix_get_realtime_coarse(clockid_t which_clock, struct timespec *tp) +static int posix_get_realtime_coarse(clockid_t which_clock, +struct timespec64 *tp) { - *tp = current_kernel_time(); + *tp = current_kernel_time64(); return 0; } static int posix_get_monotonic_coarse(clockid_t which_clock, - struct timespec *tp) + struct timespec64 *tp) { - *tp = get_monotonic_coarse(); + *tp = get_monotonic_coarse64(); return 0; } -static int posix_get_coarse_res(const clockid_t which_clock, struct timespec *tp) +static int posix_get_coarse_res(const clockid_t which_clock, + struct timespec64 *tp) { - *tp = ktime_to_timespec(KTIME_LOW_RES); + *tp = ktime_to_timespec64(KTIME_LOW_RES); return 0; } -static int posix_get_boottime(const clockid_t which_clock, struct timespec *tp) +static int posix_get_boottime(const clockid_t which_clock, + struct timespec64 *tp) { - get_monotonic_boottime(tp); + get_monotonic_boottime64(tp); return 0; } -static int posix_get_tai(clockid_t which_clock, struct timespec *tp) +static int posix_get_tai(clockid_t which_clock, struct timespec64 *tp) { timekeeping_clocktai(tp); return 0; @@ -278,57
[PATCH 08/11] time/posix-clock:Convert to the 64bit methods for k_clock and posix_clock_operations structure
This patch converts the posix clock operations over to the new methods with timespec64/itimerspec64 type to making them ready for 2038, and it is based on the ptp patch series. And also changes to the 64bit methods for k_clock structure, that converts the timespec/itimerspec type to timespec64/itimerspec64 type. Signed-off-by: Baolin Wang baolin.w...@linaro.org --- drivers/ptp/ptp_clock.c | 26 -- include/linux/posix-clock.h | 10 +- kernel/time/posix-clock.c | 20 ++-- 3 files changed, 23 insertions(+), 33 deletions(-) diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index bee8270..8c086e7 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -97,32 +97,24 @@ static s32 scaled_ppm_to_ppb(long ppm) /* posix clock implementation */ -static int ptp_clock_getres(struct posix_clock *pc, struct timespec *tp) +static int ptp_clock_getres(struct posix_clock *pc, struct timespec64 *tp) { tp-tv_sec = 0; tp-tv_nsec = 1; return 0; } -static int ptp_clock_settime(struct posix_clock *pc, const struct timespec *tp) +static int ptp_clock_settime(struct posix_clock *pc, + const struct timespec64 *tp) { struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); - struct timespec64 ts = timespec_to_timespec64(*tp); - - return ptp-info-settime64(ptp-info, ts); + return ptp-info-settime64(ptp-info, tp); } -static int ptp_clock_gettime(struct posix_clock *pc, struct timespec *tp) +static int ptp_clock_gettime(struct posix_clock *pc, struct timespec64 *tp) { struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); - struct timespec64 ts; - int err; - - err = ptp-info-gettime64(ptp-info, ts); - if (!err) - *tp = timespec64_to_timespec(ts); - - return err; + return ptp-info-gettime64(ptp-info, tp); } static int ptp_clock_adjtime(struct posix_clock *pc, struct timex *tx) @@ -134,8 +126,7 @@ static int ptp_clock_adjtime(struct posix_clock *pc, struct timex *tx) ops = ptp-info; if (tx-modes ADJ_SETOFFSET) { - struct timespec ts; - ktime_t kt; + struct timespec64 ts; s64 delta; ts.tv_sec = tx-time.tv_sec; @@ -147,8 +138,7 @@ static int ptp_clock_adjtime(struct posix_clock *pc, struct timex *tx) if ((unsigned long) ts.tv_nsec = NSEC_PER_SEC) return -EINVAL; - kt = timespec_to_ktime(ts); - delta = ktime_to_ns(kt); + delta = timespec64_to_ns(ts); err = ops-adjtime(ops, delta); } else if (tx-modes ADJ_FREQUENCY) { s32 ppb = scaled_ppm_to_ppb(tx-freq); diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h index 34c4498..fd7e22c 100644 --- a/include/linux/posix-clock.h +++ b/include/linux/posix-clock.h @@ -59,23 +59,23 @@ struct posix_clock_operations { int (*clock_adjtime)(struct posix_clock *pc, struct timex *tx); - int (*clock_gettime)(struct posix_clock *pc, struct timespec *ts); + int (*clock_gettime)(struct posix_clock *pc, struct timespec64 *ts); - int (*clock_getres) (struct posix_clock *pc, struct timespec *ts); + int (*clock_getres)(struct posix_clock *pc, struct timespec64 *ts); int (*clock_settime)(struct posix_clock *pc, - const struct timespec *ts); + const struct timespec64 *ts); int (*timer_create) (struct posix_clock *pc, struct k_itimer *kit); int (*timer_delete) (struct posix_clock *pc, struct k_itimer *kit); void (*timer_gettime)(struct posix_clock *pc, - struct k_itimer *kit, struct itimerspec *tsp); + struct k_itimer *kit, struct itimerspec64 *tsp); int (*timer_settime)(struct posix_clock *pc, struct k_itimer *kit, int flags, - struct itimerspec *tsp, struct itimerspec *old); + struct itimerspec64 *tsp, struct itimerspec64 *old); /* * Optional character device methods: */ diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index ce033c7..e21e4c1 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c @@ -297,7 +297,7 @@ out: return err; } -static int pc_clock_gettime(clockid_t id, struct timespec *ts) +static int pc_clock_gettime(clockid_t id, struct timespec64 *ts) { struct posix_clock_desc cd; int err; @@ -316,7 +316,7 @@ static int pc_clock_gettime(clockid_t id, struct timespec *ts) return err; } -static int pc_clock_getres(clockid_t id, struct timespec *ts) +static int pc_clock_getres(clockid_t id, struct timespec64 *ts) {
[PATCH 01/11] linux/time64.h:Introduce the 'struct itimerspec64' for 64bit
This patch introduces the 'struct itimerspec64' for 64bit to replace itimerspec, and also introduces the conversion methods: itimerspec64_to_itimerspec() and itimerspec_to_itimerspec64(), that makes itimerspec to ready for 2038 year. Signed-off-by: Baolin Wang baolin.w...@linaro.org --- include/linux/time64.h | 13 + 1 file changed, 13 insertions(+) diff --git a/include/linux/time64.h b/include/linux/time64.h index a383147..3647bdd 100644 --- a/include/linux/time64.h +++ b/include/linux/time64.h @@ -18,6 +18,11 @@ struct timespec64 { }; #endif +struct itimerspec64 { + struct timespec64 it_interval; /* timer period */ + struct timespec64 it_value; /* timer expiration */ +}; + /* Parameters used to convert the timespec values: */ #define MSEC_PER_SEC 1000L #define USEC_PER_MSEC 1000L @@ -187,4 +192,12 @@ static __always_inline void timespec64_add_ns(struct timespec64 *a, u64 ns) #endif +#define itimerspec64_to_itimerspec(its64) \ + ({ (struct itimerspec){ .it_interval = timespec64_to_timespec((its64).it_interval), \ + .it_value = timespec64_to_timespec((its64).it_value) }; }) + +#define itimerspec_to_itimerspec64(its) \ + ({ (struct itimerspec64){ .it_interval = timespec_to_timespec64((its).it_interval), \ + .it_value = timespec_to_timespec64((its).it_value) }; }) + #endif /* _LINUX_TIME64_H */ -- 1.7.9.5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 03/11] time/hrtimer:Introduce hrtimer_get_res64() with timespec64 type for getting the timer resolution
This patch introduces hrtimer_get_res64() function to get the timer resolution with timespec64 type, and moves the hrtimer_get_res() function into include/linux/hrtimer.h as a 'static inline' helper that just calls hrtimer_get_res64. It is ready for 2038 year when getting the timer resolution by hrtimer_get_res64() function with timespec64 type, and it is convenient to delete the old hrtimer_get_res() function in hrtimer.h file. Signed-off-by: Baolin Wang baolin.w...@linaro.org --- include/linux/hrtimer.h | 12 +++- kernel/time/hrtimer.c | 10 +- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 05f6df1..ee8ed44 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -383,7 +383,17 @@ static inline int hrtimer_restart(struct hrtimer *timer) /* Query timers: */ extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer); -extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp); +extern int hrtimer_get_res64(const clockid_t which_clock, +struct timespec64 *tp); + +static inline int hrtimer_get_res(const clockid_t which_clock, + struct timespec *tp) +{ + struct timespec64 *ts64; + + *ts64 = timespec_to_timespec64(*tp); + return hrtimer_get_res64(which_clock, ts64); +} extern ktime_t hrtimer_get_next_event(void); diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index bee0c1f..508d936 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1175,24 +1175,24 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, EXPORT_SYMBOL_GPL(hrtimer_init); /** - * hrtimer_get_res - get the timer resolution for a clock + * hrtimer_get_res64 - get the timer resolution for a clock * @which_clock: which clock to query - * @tp: pointer to timespec variable to store the resolution + * @tp: pointer to timespec64 variable to store the resolution * * Store the resolution of the clock selected by @which_clock in the * variable pointed to by @tp. */ -int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp) +int hrtimer_get_res64(const clockid_t which_clock, struct timespec64 *tp) { struct hrtimer_cpu_base *cpu_base; int base = hrtimer_clockid_to_base(which_clock); cpu_base = raw_cpu_ptr(hrtimer_bases); - *tp = ktime_to_timespec(cpu_base-clock_base[base].resolution); + *tp = ktime_to_timespec64(cpu_base-clock_base[base].resolution); return 0; } -EXPORT_SYMBOL_GPL(hrtimer_get_res); +EXPORT_SYMBOL_GPL(hrtimer_get_res64); static void __run_hrtimer(struct hrtimer *timer, ktime_t *now) { -- 1.7.9.5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 04/11] posix timers:Introduce the 64bit methods with timespec64 type for k_clock structure
This patch introduces the new methods with timespec64 type for k_clcok structure, converts the timepsec type to timespec64 type in k_clock structure and converts the itimerspec type to itimerspec64 type to ready for 2038 issue. And also introduces the 64bit methods with timespec64 type for the framework functions. Next step will migrate all the k_clock users to use the new methods with timespec64 type nd itimerspec64 type, and it contains the files of posix-timers.c, mmtimer.c, alarmtimer.c, posix-clock.c and posix-cpu-timers.c. Signed-off-by: Baolin Wang baolin.w...@linaro.org --- include/linux/posix-timers.h |9 ++ kernel/time/posix-timers.c | 65 -- 2 files changed, 59 insertions(+), 15 deletions(-) diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 907f3fd..35786c5 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -98,9 +98,13 @@ struct k_itimer { struct k_clock { int (*clock_getres) (const clockid_t which_clock, struct timespec *tp); + int (*clock_getres64) (const clockid_t which_clock, struct timespec64 *tp); int (*clock_set) (const clockid_t which_clock, const struct timespec *tp); + int (*clock_set64) (const clockid_t which_clock, + const struct timespec64 *tp); int (*clock_get) (const clockid_t which_clock, struct timespec * tp); + int (*clock_get64) (const clockid_t which_clock, struct timespec64 *tp); int (*clock_adj) (const clockid_t which_clock, struct timex *tx); int (*timer_create) (struct k_itimer *timer); int (*nsleep) (const clockid_t which_clock, int flags, @@ -109,10 +113,15 @@ struct k_clock { int (*timer_set) (struct k_itimer * timr, int flags, struct itimerspec * new_setting, struct itimerspec * old_setting); + int (*timer_set64) (struct k_itimer *timr, int flags, + struct itimerspec64 *new_setting, + struct itimerspec64 *old_setting); int (*timer_del) (struct k_itimer * timr); #define TIMER_RETRY 1 void (*timer_get) (struct k_itimer * timr, struct itimerspec * cur_setting); + void (*timer_get64) (struct k_itimer *timr, +struct itimerspec64 *cur_setting); }; extern struct k_clock clock_posix_cpu; diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 31ea01f..9070387 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -522,13 +522,13 @@ void posix_timers_register_clock(const clockid_t clock_id, return; } - if (!new_clock-clock_get) { - printk(KERN_WARNING POSIX clock id %d lacks clock_get()\n, + if (!new_clock-clock_get !new_clock-clock_get64) { + printk(KERN_WARNING POSIX clock id %d lacks clock_get() and clock_get64()\n, clock_id); return; } - if (!new_clock-clock_getres) { - printk(KERN_WARNING POSIX clock id %d lacks clock_getres()\n, + if (!new_clock-clock_getres !new_clock-clock_getres64) { + printk(KERN_WARNING POSIX clock id %d lacks clock_getres() and clock_getres64()\n, clock_id); return; } @@ -579,7 +579,7 @@ static struct k_clock *clockid_to_kclock(const clockid_t id) return (id CLOCKFD_MASK) == CLOCKFD ? clock_posix_dynamic : clock_posix_cpu; - if (id = MAX_CLOCKS || !posix_clocks[id].clock_getres) + if (id = MAX_CLOCKS || (!posix_clocks[id].clock_getres !posix_clocks[id].clock_getres64)) return NULL; return posix_clocks[id]; } @@ -771,6 +771,7 @@ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, struct itimerspec __user *, setting) { struct itimerspec cur_setting; + struct itimerspec64 cur_setting64; struct k_itimer *timr; struct k_clock *kc; unsigned long flags; @@ -781,10 +782,16 @@ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, return -EINVAL; kc = clockid_to_kclock(timr-it_clock); - if (WARN_ON_ONCE(!kc || !kc-timer_get)) + if (WARN_ON_ONCE(!kc || (!kc-timer_get !kc-timer_get64))) { ret = -EINVAL; - else - kc-timer_get(timr, cur_setting); + } else { + if (kc-timer_get64) { + kc-timer_get64(timr, cur_setting64); + cur_setting = itimerspec64_to_itimerspec(cur_setting64); + } else { + kc-timer_get(timr, cur_setting); + } + } unlock_timer(timr, flags); @@ -877,6 +884,7 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, { struct
[PATCH 09/11] cputime:Introduce the cputime_to_timespec64/timespec64_to_cputime function
This patch introduces some functions for converting cputime to timespec64 and back, that repalce the timespec type with timespec64 type, as well as for arch/s390 and arch/powerpc architecture. And these new methods will replace the old cputime_to_timespec/timespec_to_cputime function to ready for 2038 issue. The cputime_to_timespec/timespec_to_cputime functions are moved to include/linux/cputime.h file for removing conveniently. Signed-off-by: Baolin Wang baolin.w...@linaro.org --- arch/powerpc/include/asm/cputime.h|6 +++--- arch/s390/include/asm/cputime.h |8 include/asm-generic/cputime_jiffies.h | 10 +- include/linux/cputime.h | 15 +++ include/linux/jiffies.h |3 +++ kernel/time/time.c| 21 + 6 files changed, 51 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index e245255..5dda5c0 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -154,9 +154,9 @@ static inline cputime_t secs_to_cputime(const unsigned long sec) } /* - * Convert cputime - timespec + * Convert cputime - timespec64 */ -static inline void cputime_to_timespec(const cputime_t ct, struct timespec *p) +static inline void cputime_to_timespec64(const cputime_t ct, struct timespec64 *p) { u64 x = (__force u64) ct; unsigned int frac; @@ -168,7 +168,7 @@ static inline void cputime_to_timespec(const cputime_t ct, struct timespec *p) p-tv_nsec = x; } -static inline cputime_t timespec_to_cputime(const struct timespec *p) +static inline cputime_t timespec64_to_cputime(const struct timespec64 *p) { u64 ct; diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index b91e960..1266697 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -89,16 +89,16 @@ static inline cputime_t secs_to_cputime(const unsigned int s) } /* - * Convert cputime to timespec and back. + * Convert cputime to timespec64 and back. */ -static inline cputime_t timespec_to_cputime(const struct timespec *value) +static inline cputime_t timespec64_to_cputime(const struct timespec64 *value) { unsigned long long ret = value-tv_sec * CPUTIME_PER_SEC; return (__force cputime_t)(ret + __div(value-tv_nsec * CPUTIME_PER_USEC, NSEC_PER_USEC)); } -static inline void cputime_to_timespec(const cputime_t cputime, - struct timespec *value) +static inline void cputime_to_timespec64(const cputime_t cputime, + struct timespec64 *value) { unsigned long long __cputime = (__force unsigned long long) cputime; #ifndef CONFIG_64BIT diff --git a/include/asm-generic/cputime_jiffies.h b/include/asm-generic/cputime_jiffies.h index fe386fc..ec77c0b 100644 --- a/include/asm-generic/cputime_jiffies.h +++ b/include/asm-generic/cputime_jiffies.h @@ -44,12 +44,12 @@ typedef u64 __nocast cputime64_t; #define secs_to_cputime(sec) jiffies_to_cputime((sec) * HZ) /* - * Convert cputime to timespec and back. + * Convert cputime to timespec64 and abck. */ -#define timespec_to_cputime(__val) \ - jiffies_to_cputime(timespec_to_jiffies(__val)) -#define cputime_to_timespec(__ct,__val)\ - jiffies_to_timespec(cputime_to_jiffies(__ct),__val) +#define timespec64_to_cputime(__val) \ + jiffies_to_cputime(timespec64_to_jiffies(__val)) +#define cputime_to_timespec64(__ct,__val) \ + jiffies_to_timespec64(cputime_to_jiffies(__ct),__val) /* * Convert cputime to timeval and back. diff --git a/include/linux/cputime.h b/include/linux/cputime.h index f2eb2ee..f01896f 100644 --- a/include/linux/cputime.h +++ b/include/linux/cputime.h @@ -13,4 +13,19 @@ usecs_to_cputime((__nsecs) / NSEC_PER_USEC) #endif +static inline cputime_t timespec_to_cputime(const struct timespec *ts) +{ + struct timespec64 ts64 = timespec_to_timespec64(*ts); + return timespec64_to_cputime(ts64); +} + +static inline void cputime_to_timespec(const cputime_t cputime, + struct timespec *value) +{ + struct timespec64 *ts64; + + *ts64 = timespec_to_timespec64(*value); + cputime_to_timespec64(cputime, ts64); +} + #endif /* __LINUX_CPUTIME_H */ diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index c367cbd..dbaa4ee 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -293,6 +293,9 @@ extern unsigned long usecs_to_jiffies(const unsigned int u); extern unsigned long timespec_to_jiffies(const struct timespec *value); extern void jiffies_to_timespec(const unsigned long jiffies, struct timespec *value); +extern unsigned long timespec64_to_jiffies(const struct timespec64 *value); +extern void jiffies_to_timespec64(const
[PATCH 11/11] k_clock:Remove the 32bit methods with timespec type
All of the k_clock users have been converted to the new methods. This patch removes the older methods with timepsec/itimerspec type. As a result, the k_clock structure is ready for the year 2038. Signed-off-by: Baolin Wang baolin.w...@linaro.org --- include/linux/posix-timers.h |9 -- kernel/time/posix-timers.c | 72 +- 2 files changed, 29 insertions(+), 52 deletions(-) diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 35786c5..7c3dae2 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -97,29 +97,20 @@ struct k_itimer { }; struct k_clock { - int (*clock_getres) (const clockid_t which_clock, struct timespec *tp); int (*clock_getres64) (const clockid_t which_clock, struct timespec64 *tp); - int (*clock_set) (const clockid_t which_clock, - const struct timespec *tp); int (*clock_set64) (const clockid_t which_clock, const struct timespec64 *tp); - int (*clock_get) (const clockid_t which_clock, struct timespec * tp); int (*clock_get64) (const clockid_t which_clock, struct timespec64 *tp); int (*clock_adj) (const clockid_t which_clock, struct timex *tx); int (*timer_create) (struct k_itimer *timer); int (*nsleep) (const clockid_t which_clock, int flags, struct timespec *, struct timespec __user *); long (*nsleep_restart) (struct restart_block *restart_block); - int (*timer_set) (struct k_itimer * timr, int flags, - struct itimerspec * new_setting, - struct itimerspec * old_setting); int (*timer_set64) (struct k_itimer *timr, int flags, struct itimerspec64 *new_setting, struct itimerspec64 *old_setting); int (*timer_del) (struct k_itimer * timr); #define TIMER_RETRY 1 - void (*timer_get) (struct k_itimer * timr, - struct itimerspec * cur_setting); void (*timer_get64) (struct k_itimer *timr, struct itimerspec64 *cur_setting); }; diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 47d1abf..3196ec0 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -528,13 +528,13 @@ void posix_timers_register_clock(const clockid_t clock_id, return; } - if (!new_clock-clock_get !new_clock-clock_get64) { - printk(KERN_WARNING POSIX clock id %d lacks clock_get() and clock_get64()\n, + if (!new_clock-clock_get64) { + printk(KERN_WARNING POSIX clock id %d lacks clock_get64()\n, clock_id); return; } - if (!new_clock-clock_getres !new_clock-clock_getres64) { - printk(KERN_WARNING POSIX clock id %d lacks clock_getres() and clock_getres64()\n, + if (!!new_clock-clock_getres64) { + printk(KERN_WARNING POSIX clock id %d lacks clock_getres64()\n, clock_id); return; } @@ -585,7 +585,7 @@ static struct k_clock *clockid_to_kclock(const clockid_t id) return (id CLOCKFD_MASK) == CLOCKFD ? clock_posix_dynamic : clock_posix_cpu; - if (id = MAX_CLOCKS || (!posix_clocks[id].clock_getres !posix_clocks[id].clock_getres64)) + if (id = MAX_CLOCKS || !posix_clocks[id].clock_getres64) return NULL; return posix_clocks[id]; } @@ -788,15 +788,11 @@ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, return -EINVAL; kc = clockid_to_kclock(timr-it_clock); - if (WARN_ON_ONCE(!kc || (!kc-timer_get !kc-timer_get64))) { + if (WARN_ON_ONCE(!kc || !kc-timer_get64)) { ret = -EINVAL; } else { - if (kc-timer_get64) { - kc-timer_get64(timr, cur_setting64); - cur_setting = itimerspec64_to_itimerspec(cur_setting64); - } else { - kc-timer_get(timr, cur_setting); - } + kc-timer_get64(timr, cur_setting64); + cur_setting = itimerspec64_to_itimerspec(cur_setting64); } unlock_timer(timr, flags); @@ -911,18 +907,14 @@ retry: return -EINVAL; kc = clockid_to_kclock(timr-it_clock); - if (WARN_ON_ONCE(!kc || (!kc-timer_set !kc-timer_set64))) { + if (WARN_ON_ONCE(!kc || !kc-timer_set64)) { error = -EINVAL; } else { - if (kc-timer_set64) { - new_spec64 = itimerspec_to_itimerspec64(new_spec); - error = kc-timer_set64(timr, flags, new_spec64, - old_spec64); - if (old_setting) -
[PATCH 10/11] time/posix-cpu-timers:Convert to the 64bit methods for k_clock structure
This patch changes to the new methods of k_clock structure with timespec64 type, converts the timespec/itimerspec type to timespec64/itimerspec64 type for the callback function in posix-cpu-timers.c file. Signed-off-by: Baolin Wang baolin.w...@linaro.org --- kernel/time/posix-cpu-timers.c | 83 +--- 1 file changed, 44 insertions(+), 39 deletions(-) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 0075da7..51cfead 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -52,7 +52,7 @@ static int check_clock(const clockid_t which_clock) } static inline unsigned long long -timespec_to_sample(const clockid_t which_clock, const struct timespec *tp) +timespec64_to_sample(const clockid_t which_clock, const struct timespec64 *tp) { unsigned long long ret; @@ -60,19 +60,19 @@ timespec_to_sample(const clockid_t which_clock, const struct timespec *tp) if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { ret = (unsigned long long)tp-tv_sec * NSEC_PER_SEC + tp-tv_nsec; } else { - ret = cputime_to_expires(timespec_to_cputime(tp)); + ret = cputime_to_expires(timespec64_to_cputime(tp)); } return ret; } -static void sample_to_timespec(const clockid_t which_clock, +static void sample_to_timespec64(const clockid_t which_clock, unsigned long long expires, - struct timespec *tp) + struct timespec64 *tp) { if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) - *tp = ns_to_timespec(expires); + *tp = ns_to_timespec64(expires); else - cputime_to_timespec((__force cputime_t)expires, tp); + cputime_to_timespec64((__force cputime_t)expires, tp); } /* @@ -141,7 +141,7 @@ static inline unsigned long long virt_ticks(struct task_struct *p) } static int -posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp) +posix_cpu_clock_getres(const clockid_t which_clock, struct timespec64 *tp) { int error = check_clock(which_clock); if (!error) { @@ -160,7 +160,7 @@ posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp) } static int -posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp) +posix_cpu_clock_set(const clockid_t which_clock, const struct timespec64 *tp) { /* * You can never reset a CPU clock, but we check for other errors @@ -263,7 +263,7 @@ static int cpu_clock_sample_group(const clockid_t which_clock, static int posix_cpu_clock_get_task(struct task_struct *tsk, const clockid_t which_clock, - struct timespec *tp) + struct timespec64 *tp) { int err = -EINVAL; unsigned long long rtn; @@ -277,13 +277,14 @@ static int posix_cpu_clock_get_task(struct task_struct *tsk, } if (!err) - sample_to_timespec(which_clock, rtn, tp); + sample_to_timespec64(which_clock, rtn, tp); return err; } -static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) +static int posix_cpu_clock_get(const clockid_t which_clock, + struct timespec64 *tp) { const pid_t pid = CPUCLOCK_PID(which_clock); int err = -EINVAL; @@ -598,7 +599,7 @@ static inline void posix_cpu_timer_kick_nohz(void) { } * and try again. (This happens when the timer is in the middle of firing.) */ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, - struct itimerspec *new, struct itimerspec *old) + struct itimerspec64 *new, struct itimerspec64 *old) { unsigned long flags; struct sighand_struct *sighand; @@ -608,7 +609,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, WARN_ON_ONCE(p == NULL); - new_expires = timespec_to_sample(timer-it_clock, new-it_value); + new_expires = timespec64_to_sample(timer-it_clock, new-it_value); /* * Protect against sighand release/switch in exit/exec and p-cpu_timers @@ -669,7 +670,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, bump_cpu_timer(timer, val); if (val timer-it.cpu.expires) { old_expires = timer-it.cpu.expires - val; - sample_to_timespec(timer-it_clock, + sample_to_timespec64(timer-it_clock, old_expires, old-it_value); } else { @@ -709,7 +710,7 @@ static int posix_cpu_timer_set(struct
[PATCH 00/11] Convert the posix_clock_operations and k_clock structure to ready for 2038
This patch series changes the 32-bit time type (timespec/itimerspec) to the 64-bit one (timespec64/itimerspec64), since 32-bit time types will break in the year 2038. This patch series introduces new methods with timespec64/itimerspec64 type, and removes the old ones with timespec/itimerspec type for posix_clock_operations and k_clock structure. Also introduces some new functions with timespec64/itimerspec64 type, like current_kernel_time64(), hrtimer_get_res64(), cputime_to_timespec64() and timespec64_to_cputime(). Baolin Wang (11): linux/time64.h:Introduce the 'struct itimerspec64' for 64bit timekeeping:Introduce the current_kernel_time64() function with timespec64 type time/hrtimer:Introduce hrtimer_get_res64() with timespec64 type for getting the timer resolution posix timers:Introduce the 64bit methods with timespec64 type for k_clock structure time/posix-timers:Convert to the 64bit methods for k_clock callback functions char/mmtimer:Convert to the 64bit methods for k_clock callback function time/alarmtimer:Convert to the new methods for k_clock structure time/posix-clock:Convert to the 64bit methods for k_clock and posix_clock_operations structure cputime:Introduce the cputime_to_timespec64/timespec64_to_cputime function time/posix-cpu-timers:Convert to the 64bit methods for k_clock structure k_clock:Remove the 32bit methods with timespec type arch/powerpc/include/asm/cputime.h|6 +- arch/s390/include/asm/cputime.h |8 +- drivers/char/mmtimer.c| 36 drivers/ptp/ptp_clock.c | 26 ++ include/asm-generic/cputime_jiffies.h | 10 +-- include/linux/cputime.h | 15 include/linux/hrtimer.h | 12 ++- include/linux/jiffies.h |3 + include/linux/posix-clock.h | 10 +-- include/linux/posix-timers.h | 18 ++-- include/linux/time64.h| 13 +++ include/linux/timekeeping.h | 14 ++- kernel/time/alarmtimer.c | 43 - kernel/time/hrtimer.c | 10 +-- kernel/time/posix-clock.c | 20 ++--- kernel/time/posix-cpu-timers.c| 83 + kernel/time/posix-timers.c| 157 +++-- kernel/time/time.c| 21 + kernel/time/timekeeping.c |6 +- kernel/time/timekeeping.h |2 +- 20 files changed, 302 insertions(+), 211 deletions(-) -- 1.7.9.5 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCHv3] pppoe: Lacks DST MAC address check
From: Joakim Tjernlund joakim.tjernl...@transmode.se Date: Mon, 20 Apr 2015 21:07:48 +0200 From: Joakim Tjernlund joakim.tjernl...@transmode.se A pppoe session is identified by its session ID and MAC address. Currently pppoe does not check if the received pkg has the correct MAC address. This is a problem when the eth I/F is in promisc mode as then any DST MAC address is accepted. Signed-off-by: Joakim Tjernlund joakim.tjernl...@transmode.se --- drivers/net/ppp/pppoe.c | 3 +++ 1 file changed, 3 insertions(+) v2 - The MAC address check should encompass all pppoe pkgs, not only the relay type. v3 - Add signoff Applied, thanks. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html