[PATCH,net-next] r8169: Change order to init regs

2015-08-22 Thread Corcodel Marian
Disable writting on registers on probe stage because
 register CplusCmd must init first.Here is order:  1. CplusCmd,  2. ChipCmd,  3.
 Rest regs.  I split __rtl8169_set_features func for solve this issue.

Signed-off-by: Corcodel Marian corcodel.mar...@gmail.com

diff --git a/drivers/net/ethernet/realtek/r8169.c 
b/drivers/net/ethernet/realtek/r8169.c
index ea461fe..b0f7ed2 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -2057,6 +2057,48 @@ static void __rtl8169_set_features(struct net_device 
*dev,
RTL_R16(CPlusCmd);
 }
 
+static void rtl8169_set_feat_part2(struct net_device *dev,
+  netdev_features_t features)
+{
+   struct rtl8169_private *tp = netdev_priv(dev);
+   void __iomem *ioaddr = tp-mmio_addr;
+   
+
+
+   if (features  NETIF_F_RXCSUM)
+   tp-cp_cmd |= RxChkSum;
+   else
+   tp-cp_cmd = ~RxChkSum;
+
+   if (features  NETIF_F_HW_VLAN_CTAG_RX)
+   tp-cp_cmd |= RxVlan;
+   else
+   tp-cp_cmd = ~RxVlan;
+
+   tp-cp_cmd |= RTL_R16(CPlusCmd)  ~(RxVlan | RxChkSum);
+
+   RTL_W16(CPlusCmd, tp-cp_cmd);
+   RTL_R16(CPlusCmd);
+}
+
+static void rtl8169_set_feat_part1(struct net_device *dev,
+  netdev_features_t features)
+{
+   struct rtl8169_private *tp = netdev_priv(dev);
+   void __iomem *ioaddr = tp-mmio_addr;
+   u32 rx_config;
+
+   rx_config = RTL_R32(RxConfig);
+   if (features  NETIF_F_RXALL)
+   rx_config |= (AcceptErr | AcceptRunt);
+   else
+   rx_config = ~(AcceptErr | AcceptRunt);
+
+   RTL_W32(RxConfig, rx_config);
+
+
+}
+
 static int rtl8169_set_features(struct net_device *dev,
netdev_features_t features)
 {
@@ -7604,6 +7646,7 @@ static int rtl_open(struct net_device *dev)
if (!tp-RxDescArray)
goto err_free_tx_0;
 
+   rtl8169_set_feat_part2(dev, dev-features);
retval = rtl8169_init_ring(dev);
if (retval  0)
goto err_free_rx_1;
@@ -7628,10 +7671,11 @@ static int rtl_open(struct net_device *dev)
 
rtl8169_init_phy(dev, tp);
 
-   __rtl8169_set_features(dev, dev-features);
+   //__rtl8169_set_features(dev, dev-features);
 
rtl_pll_power_up(tp);
-
+   rtl8169_set_feat_part1(dev, dev-features);
+   rtl_init_rxcfg(tp);
rtl_hw_start(dev);
 
netif_start_queue(dev);
@@ -8178,13 +8222,13 @@ static int rtl_init_one(struct pci_dev *pdev, const 
struct pci_device_id *ent)
/* Identify chip attached to board */
rtl8169_get_mac_version(tp, dev, cfg-default_ver);
 
-   rtl_init_rxcfg(tp);
+   //rtl_init_rxcfg(tp);
 
rtl_irq_disable(tp);
 
rtl_hw_initialize(tp);
 
-   rtl_hw_reset(tp);
+   //rtl_hw_reset(tp);
 
rtl_ack_events(tp, 0x);
if (!pci_is_pcie(pdev))
@@ -8200,9 +8244,9 @@ static int rtl_init_one(struct pci_dev *pdev, const 
struct pci_device_id *ent)
chipset = tp-mac_version;
tp-txd_version = rtl_chip_infos[chipset].txd_version;
 
-   RTL_W8(Cfg9346, Cfg9346_Unlock);
+   /*RTL_W8(Cfg9346, Cfg9346_Unlock);
RTL_W8(Config1, RTL_R8(Config1) | PMEnable);
-   RTL_W8(Config5, RTL_R8(Config5)  (BWF | MWF | UWF | LanWake | 
PMEStatus));
+   RTL_W8(Config5, RTL_R8(Config5)  (BWF | MWF | UWF | LanWake | 
PMEStatus));*/
switch (tp-mac_version) {
case RTL_GIGA_MAC_VER_34:
case RTL_GIGA_MAC_VER_35:
@@ -8234,7 +8278,7 @@ static int rtl_init_one(struct pci_dev *pdev, const 
struct pci_device_id *ent)
if ((RTL_R8(Config5)  (UWF | BWF | MWF)) != 0)
tp-features |= RTL_FEATURE_WOL;
tp-features |= rtl_try_msi(tp, cfg);
-   RTL_W8(Cfg9346, Cfg9346_Lock);
+   //RTL_W8(Cfg9346, Cfg9346_Lock);
 
if (rtl_tbi_enabled(tp)) {
tp-set_speed = rtl8169_set_speed_tbi;
-- 
2.1.4

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH, net-next]r8169:Disable interrupts.

2015-08-22 Thread Francois Romieu
Corcodel Marian corcodel.mar...@gmail.com :
 Disable interrupts when close the interface.
 
 
 Signed-off-by: Corcodel Marian corcodel.mar...@gmail.com
 
 diff --git a/drivers/net/ethernet/realtek/r8169.c 
 b/drivers/net/ethernet/realtek/r8169.c
 index 6cd7226..ea461fe 100644
 --- a/drivers/net/ethernet/realtek/r8169.c
 +++ b/drivers/net/ethernet/realtek/r8169.c
 @@ -7548,6 +7548,7 @@ static int rtl8169_close(struct net_device *dev)
  
   /* Update counters before going down */
   rtl8169_update_counters(dev);
 + rtl8169_irq_mask_and_ack(tp);

It's already called through
- rtl8169_close
  - rtl8169_down

-- 
Ueimor
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 net-next] r8169: Add values missing in @get_stats64 from HW counters

2015-08-22 Thread Francois Romieu
Corinna Vinschen vinsc...@redhat.com :
[...]
 That won't happen with the current patch because only
 rtl8169_reset_counters would print a log message, it's only called from
 open, and open occurs rather seldom.  Atop of that the code only tries
 to reset counters on HW supporting it, and only if resetting on the HW
 fails, there will be a log message at all.  There's no reasonable chance
 that failing to reset the counters will lead to log flooding.

Thanks for reformulating it. We are in violent agreement here.

[...]
 I'm not trying to avoid work, I'm trying to understand.
 
 As far as I see it failing to reset the counters has no impact on the
 viability of the code.  It's still working with offsets and if the
 offset is 0 or non-0, the user space won't see the difference in the
 values returned by @get_stats64.  Successful resetting the counters is
 just a bonus.

Sorry, my english was really bad:

the code should propagate failure when rtl8169_reset_counters and
rtl8169_update_counters *simultaneously* fail.

-- 
Ueimor
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: ipg and dl2k mess

2015-08-22 Thread Francois Romieu
Ondrej Zary li...@rainbow-software.org :
[...]
 The patch below is enough to make my IP1000A card work with dl2k driver - no
 more lost packets and hangs. Haven't tested gigabit speed yet - the PHY will
 probably need some tweaking but that should be easy.

Neither dl2k nor ipg uses napi. They are a bit dusty.

 So maybe we should add IP1000A support to dl2k and remove the broken ipg
 driver.

Do you mean to merge both ?

 Does anyone have HW to test?

I have a PCI 32 IC Plus IP1000A at hand (courtesy of IC Plus Corp).

-- 
Ueimor
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Hi

2015-08-22 Thread barr lewis
let us talk
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: Visa Debit Card Pay-Out

2015-08-22 Thread Tim Baumgarten

Your name and email was randomly picked by Visa Debit Card Europe Inc. in our 
Barclay premier league 2015/2016 season prize pay-out promo. Please contact the 
Coordinator @barclaypremiershipinc2...@hotmail.com.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 2/4] Added ART correlated clocksource and ART CPU feature

2015-08-22 Thread Thomas Gleixner
On Fri, 21 Aug 2015, Christopher S. Hall wrote:

 Add detect_art() call to early TSC initialization which reads ART-TSC
   numerator/denominator and sets CPU feature if present
 
 Add convert_art_to_tsc() function performing conversion ART to TSC
 
 Add art_timestamp referencing art_to_tsc() and clocksource_tsc enabling
   driver conversion of ART to TSC

That changelog needs a rewrite. See patch 1/4

 @@ -352,6 +352,7 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
  #define cpu_has_de   boot_cpu_has(X86_FEATURE_DE)
  #define cpu_has_pse  boot_cpu_has(X86_FEATURE_PSE)
  #define cpu_has_tsc  boot_cpu_has(X86_FEATURE_TSC)
 +#define cpu_has_art  boot_cpu_has(X86_FEATURE_ART)

Please do not add more cpu_has macros. There is nothing wrong to write
boot_cpu_has(X86_FEATURE_ART) in the code.

 +#define ART_CPUID_LEAF (0x15)
 +#define ART_MIN_DENOMINATOR (2)

#define ART_CPUID_LEAF 0x15
#define ART_MIN_DENOMINATOR2

Why is the minimum denominator 2? That wants a comment.

 +static u32 art_to_tsc_numerator;
 +static u32 art_to_tsc_denominator;

Both want to be read_mostly

 +/*
 + * If ART is present detect the numberator:denominator to convert to TSC
 + */
 +void detect_art(void)
 +{
 + unsigned int unused[2];
 +
 + if (boot_cpu_data.cpuid_level = ART_CPUID_LEAF) {
 + cpuid(ART_CPUID_LEAF, art_to_tsc_denominator,
 +   art_to_tsc_numerator, unused, unused+1);
 +
 + if (art_to_tsc_denominator = ART_MIN_DENOMINATOR) {
 + set_cpu_cap(boot_cpu_data, X86_FEATURE_ART);
 + }

No parentheses around one liners please.

 + }
 +}
 +
  static int __init cpufreq_tsc(void)
  {
   if (!cpu_has_tsc)
   return 0;
 +
 + detect_art();
 +
   if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
   return 0;
   cpufreq_register_notifier(time_cpufreq_notifier_block,
 @@ -1059,6 +1085,32 @@ int unsynchronized_tsc(void)
   return 0;
  }
  
 +/*
 + * Convert ART to TSC given numerator/denominator found in detect_art()
 + */
 +static u64 convert_art_to_tsc(struct correlated_cs *cs, u64 cycles)
 +{
 + u64 tmp, res;
 +
 + switch (art_to_tsc_denominator) {
 + default:
 + res = (cycles / art_to_tsc_denominator) * art_to_tsc_numerator;
 + tmp = (cycles % art_to_tsc_denominator) * art_to_tsc_numerator;
 + res += tmp / art_to_tsc_denominator;
 + break;
 + case 2:
 +res = (cycles  1) * art_to_tsc_numerator;
 +tmp = (cycles  0x1) * art_to_tsc_numerator;
 +res += tmp  1;
 +break;

Is it really worth do do this optimization? And if we do it we
shouldn't special case it for 2. You can check at ART detection time
whether the denominator is a power of two and have a flag which
selects a div/mod base or a shift based conversion.

Thanks,

tglx
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 13/14] RDS: return EMSGSIZE for oversize requests before processing/queueing

2015-08-22 Thread Santosh Shilimkar
From: Mukesh Kacker mukesh.kac...@oracle.com

rds_send_queue_rm() allows for the current datagram being queued
to exceed SO_SNDBUF thresholds by checking bytes queued without
counting in length of current datagram. (Since sk_sndbuf is set
to twice requested SO_SNDBUF value as a kernel heuristic this
is usually fine!)

If this current datagram squeezing past the threshold is itself
many times the size of the sk_sndbuf threshold itself then even
twice the SO_SNDBUF does not save us and it gets queued but
cannot be transmitted. Threads block and deadlock and device
becomes unusable. The check for this datagram not exceeding
SNDBUF thresholds (EMSGSIZE) is not done on this datagram as
that check is only done if queueing attempt fails.
(Datagrams that follow this datagram fail queueing attempts, go
through the check and eventually trip EMSGSIZE error but zero
length datagrams silently fail!)

This fix moves the check for datagrams exceeding SNDBUF limits
before any processing or queueing is attempted and returns EMSGSIZE
early in the rds_sndmsg() code. This change also ensures that all
datagrams get checked for exceeding SNDBUF/sk_sndbuf size limits
and the large datagrams that exceed those limits do not get to
rds_send_queue_rm() code for processing.

Signed-off-by: Mukesh Kacker mukesh.kac...@oracle.com
Signed-off-by: Santosh Shilimkar ssant...@kernel.org
Signed-off-by: Santosh Shilimkar santosh.shilim...@oracle.com
---
 net/rds/send.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/net/rds/send.c b/net/rds/send.c
index b0fe412..c2667bf 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1015,6 +1015,11 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, 
size_t payload_len)
goto out;
}
 
+   if (payload_len  rds_sk_sndbuf(rs)) {
+   ret = -EMSGSIZE;
+   goto out;
+   }
+
/* size of rm including all sgs */
ret = rds_rm_size(msg, payload_len);
if (ret  0)
@@ -1086,11 +1091,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, 
size_t payload_len)
while (!rds_send_queue_rm(rs, conn, rm, rs-rs_bound_port,
  dport, queued)) {
rds_stats_inc(s_send_queue_full);
-   /* XXX make sure this is reasonable */
-   if (payload_len  rds_sk_sndbuf(rs)) {
-   ret = -EMSGSIZE;
-   goto out;
-   }
+
if (nonblock) {
ret = -EAGAIN;
goto out;
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 14/14] RDS: check for valid cm_id before initiating connection

2015-08-22 Thread Santosh Shilimkar
Connection could have been dropped while the route is being resolved
so check for valid cm_id before initiating the connection.

Reviewed-by: Ajaykumar Hotchandani ajaykumar.hotchand...@oracle.com
Signed-off-by: Santosh Shilimkar ssant...@kernel.org
Signed-off-by: Santosh Shilimkar santosh.shilim...@oracle.com
---
 net/rds/rdma_transport.c | 15 +--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index 2082408..b9b40af 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -34,6 +34,7 @@
 #include rdma/rdma_cm.h
 
 #include rdma_transport.h
+#include ib.h
 
 static struct rdma_cm_id *rds_rdma_listen_id;
 
@@ -82,8 +83,18 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
break;
 
case RDMA_CM_EVENT_ROUTE_RESOLVED:
-   /* XXX worry about racing with listen acceptance */
-   ret = trans-cm_initiate_connect(cm_id);
+   /* Connection could have been dropped so make sure the
+* cm_id is valid before proceeding
+*/
+   if (conn) {
+   struct rds_ib_connection *ibic;
+
+   ibic = conn-c_transport_data;
+   if (ibic  ibic-i_cm_id == cm_id)
+   ret = trans-cm_initiate_connect(cm_id);
+   else
+   rds_conn_drop(conn);
+   }
break;
 
case RDMA_CM_EVENT_ESTABLISHED:
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 00/14] RDS: Assorted bug fixes

2015-08-22 Thread Santosh Shilimkar
We would like to improve RDS upstream support and in that context, I
started playing with it.  But run into number of issues including as
basic is RDS IB RDMA doesn't work. As part of the debug, I ended up
creating the $subject series which has bunch of assorted fixes. At
least with this series I can run RDS IB RDMA and other tests
successfully.

Some of these fixes have been done by Chris Meson, Andy Grover and
Zach Brown while at Oracle. There are still more kinks with FMR and
error handling and I plan to address them in a follow up series.

Series generated against Linus's master(v4.2-rc-7) but also applies
against next-next cleanly. Its tested on Oracle hardware with IB
fabric for both bcopy as well as RDMA mode. I don't have access
to iWARP hardware so any testing help on iWARP hardware appreciated.

Mukesh Kacker (1):
  RDS: return EMSGSIZE for oversize requests before processing/queueing

Santosh Shilimkar (13):
  RDS: restore return value in rds_cmsg_rdma_args()
  RDS: always free recv frag as we free its ring entry
  RDS: destroy the ib state earlier during shutdown
  RDS: don't update ip address tables if the address hasn't changed
  RDS: make sure we post recv buffers
  RDS: check for congestion updates during rds_send_xmit
  RDS: add a sock_destruct callback debug aid
  RDS: Mark message mapped before transmit
  RDS: Make sure we do a signaled send for large-send
  RDS: Fix assertion level from fatal to warning
  RDS: Don't destroy the rdma id until after we're done using it
  RDS: make sure rds_send_drop_to properly takes the m_rs_lock
  RDS: check for valid cm_id before initiating connection

 net/rds/af_rds.c |  9 ++
 net/rds/connection.c |  2 ++
 net/rds/ib.h |  2 +-
 net/rds/ib_cm.c  | 17 +++-
 net/rds/ib_rdma.c| 11 ++--
 net/rds/ib_recv.c| 71 ++--
 net/rds/ib_send.c|  5 
 net/rds/rdma.c   |  4 ++-
 net/rds/rdma_transport.c | 15 --
 net/rds/rds.h|  1 +
 net/rds/send.c   | 54 ++--
 11 files changed, 153 insertions(+), 38 deletions(-)


Regards,
Santosh
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 09/14] RDS: Make sure we do a signaled send for large-send

2015-08-22 Thread Santosh Shilimkar
WR(Work Requests )always generate a WC(Work Completion) with
signaled send. Default RDS ib code is setup for un-signaled
completion. Since RDS connction is persistent, we can end up
sending the data even after large-send when the remote end is
not active(for any reason).

By doing  a signaled send at least once per large-send,
we can at least detect the problem in work completion
handler there by avoiding sending more data to
inactive remote.

Reviewed-by: Ajaykumar Hotchandani ajaykumar.hotchand...@oracle.com
Signed-off-by: Santosh Shilimkar ssant...@kernel.org
Signed-off-by: Santosh Shilimkar santosh.shilim...@oracle.com
---
 net/rds/ib_send.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 5d0a704..c576ebe 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -709,6 +709,11 @@ int rds_ib_xmit(struct rds_connection *conn, struct 
rds_message *rm,
if (scat == rm-data.op_sg[rm-data.op_count]) {
prev-s_op = ic-i_data_op;
prev-s_wr.send_flags |= IB_SEND_SOLICITED;
+   if (!(prev-s_wr.send_flags  IB_SEND_SIGNALED)) {
+   ic-i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
+   prev-s_wr.send_flags |= IB_SEND_SIGNALED;
+   nr_sig++;
+   }
ic-i_data_op = NULL;
}
 
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 1/4] Add correlated clocksource deriving system time from an auxiliary clocksource

2015-08-22 Thread Thomas Gleixner
On Fri, 21 Aug 2015, Christopher S. Hall wrote:

 Add struct correlated_cs with pointer to original clocksource and
   function pointer to convert correlated clocksource to the original
 
 Add get_correlated_timestamp() function which given specific correlated_cs
   and correlated_ts convert correlated counter value to system time

This is not a proper changelog.

1) The subject line lacks a subsystem prefix

   timekeeping:

   Is the proper choice here

2) The subject line should be short and precise

   timekeeping: Add mechanism to gather correlated timestamps

   Might be an informative one.

3) The changelog itself should describe the reason why we want this
   change, the purpose of the change etc.

   Add foo
   Add bar

   Is pointless because we can see that from the patch itself.

   What the patch cannot not explain is the WHY. That's what the
   changelog is for.

4) You dropped the authorship

   The proper way to do this is to add a 'FROM: author' at the top of
   the changelog body.

As I wrote the patch, so I give you a changelog along with it:

---
Subject: timekeeping: Add mechanism to gather correlated timestamps

From: Thomas Gleixner t...@linutronix.de

Modern Intel hardware provides the so called Always Running Timer
(ART). The TSC which is usually used for timekeeping is derived from
ART and runs with a fixed frequency ratio to it. ART is routed to
devices and allows to take atomic timestamp samples from the device
clock and the ART. One use case is PTP timestamps on network cards. We
want to utilize this feature as it allows us to better correlate the
PTP timestamp to the system time.

In order to gather precise timestamps we need to make sure that the
conversion from ART to TSC and the following conversion from TSC to
clock realtime happens synchronized with the ongoing timekeeping
updates. Otherwise we might convert an ART timestamp from point A in
time with the conversion factors of point B in time. These conversion
factors can differ due to NTP/PTP frequency adjustments and therefor
the resulting clock realtime timestamp would be slightly off, which is
contrary to the whole purpose of synchronized hardware timestamps.

Provide data structures which describe the correlation between two
clocksources and a function to gather correlated and convert
timestamps from a device. The function is as any other timekeeping
function protected against current timekeeper updates via the
timekeeper sequence lock. It calls the device function to gather the
hardware timestamps and converts them to clock real time and clock
monotonic raw.

Signed-off-by: Thomas Gleixner t...@linutronix.de



Can you see the difference?

 Signed-off-by: Christopher S. Hall christopher.s.h...@intel.com
 ---
  include/linux/clocksource.h | 33 +++
  include/linux/timekeeping.h |  4 +++
  kernel/time/timekeeping.c   | 65 
 +
  3 files changed, 102 insertions(+)
 
 diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
 index 278dd27..4bedadb 100644
 --- a/include/linux/clocksource.h
 +++ b/include/linux/clocksource.h
 @@ -258,4 +258,37 @@ void acpi_generic_timer_init(void);
  static inline void acpi_generic_timer_init(void) { }
  #endif
  
 +/*
 + * struct correlated_cs - Descriptor for a clocksource correlated to another
 + *   clocksource

Don't believe checkpatch here. KernelDoc requires that this is one
line, 80 char limit or not.

  /**
 + * get_correlated_timestamp - Get a correlated timestamp
 + *

Lacks the parameter documentation:

* @crt: Pointer to a correlated timestamp structure which provides
*   the device specific timestamp function and is used to store
*   the raw and the correlated timestamps.
* @crs: Pointer to a correlated clocksource structure which describes 
*   the correlated clocksource and provides a conversion function
*   to the timekeeping clocksource

 + return 0;
 +}
 +EXPORT_SYMBOL(get_correlated_timestamp);

EXPORT_SYMBOL_GPL please.

Thanks,

tglx
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 net-next] netfilter: ipset: Fixing unnamed union init

2015-08-22 Thread Pablo Neira Ayuso
Cc'ing Andrew, since he's got a similar patch in mmotm [1].

On Sat, Aug 22, 2015 at 08:11:18PM +0200, Jozsef Kadlecsik wrote:
 On Sat, 22 Aug 2015, Elad Raz wrote:
 
  In continue to proposed Vinson Lee's post [1], this patch fixes compilation
  issues founded at gcc 4.4.7. The initialization of .cidr field of unnamed
  unions causes compilation error in gcc 4.4.x.
 
 There's already a (couple of weeks old) patch in the -mm tree to fix the 
 gcc compatilibity issue, see the last comment in the thread you refer to:
 
  Visible links
  [1] https://lkml.org/lkml/2015/7/5/74
 
 So I'm unsure whether a new patch should be submitted for this.

I'll take this patch so this follows the usual path through David's
net tree. IIRC, duplicates get automatically dropped from the mmotm
tree.

[1] 
http://git.cmpxchg.org/cgit.cgi/linux-mmotm.git/commit/?id=18b386426ff0a90e405c962ec8f51ea70e7df637

I'll wait for some little time just in case someone raises any
concern.

Thanks!
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 4/4] Enabling hardware supported PTP system/device crosstimestamping

2015-08-22 Thread Thomas Gleixner
On Fri, 21 Aug 2015, Christopher S. Hall wrote:
 From: Christopher Hall christopher.s.h...@intel.com
 
 Add getsynctime() PTP device callback to cross timestamp system device
   clock using ART translation depends on platform being = SPT
   and having ART
 
 getsynctime() reads ART (TSC-derived)/device cross timestamp and
   converts to realtime/device time reporting cross timestamp to
   PTP driver

See patch 1/4
 
 index 25a0ad5..228f3f3 100644
 --- a/drivers/net/ethernet/intel/e1000e/ptp.c
 +++ b/drivers/net/ethernet/intel/e1000e/ptp.c
 @@ -25,6 +25,8 @@
   */
  
  #include e1000.h
 +#include asm/tsc.h
 +#include linux/timekeeping.h

The usual way to order includes is:

#include linux/timekeeping.h

#include asm/tsc.h

#include e1000.h

 +/**
 + * e1000e_phc_getsynctime - Reads the current time from the hardware clock 
 and
 + * correlated system time
 + * @ptp: ptp clock structure
 + * @devts: timespec structure to hold the current device time value
 + * @systs: timespec structure to hold the current system time value
 + *
 + * Read device and system (ART) clock simultaneously and return the correct
 + * clock values in ns after converting into a struct timespec.
 + **/
 +static int e1000e_phc_getsynctime(struct ptp_clock_info *ptp,
 +   struct timespec64 *devts,
 +   struct timespec64 *systs)
 +{
 + struct e1000_adapter *adapter = container_of(ptp, struct e1000_adapter,
 +  ptp_clock_info);
 + unsigned long flags;
 + u32 remainder;
 + struct correlated_ts art_correlated_ts;
 + u64 device_time;
 + int ret;
 +
 + art_correlated_ts.get_ts = e1000e_phc_get_ts;
 + art_correlated_ts.private = adapter;
 + ret = get_correlated_timestamp(art_correlated_ts,
 +art_timestamper);

Pointless line break

 + if (ret != 0)
 + goto bail;

What's the purpose of this goto?

   if (ret)
return ret;

is completely sufficient.

 +
 + systs-tv_sec =
 + div_u64_rem(art_correlated_ts.system_real.tv64,
 + NSEC_PER_SEC, remainder);
 + systs-tv_nsec = remainder;

ktime_to_timespec64() perhaps?

And please move that conversion to the ptp ioctl

 + spin_lock_irqsave(adapter-systim_lock, flags);
 + device_time = timecounter_cyc2time(adapter-tc,
 +art_correlated_ts.device_ts);



 + /* CPU must have ART and GBe must be from Sunrise Point or greater */
 + if (hw-mac.type  e1000_pch_spt || !cpu_has_art)
 + adapter-ptp_clock_info.getsynctime64 = NULL;

We do it the other way round. We leave the default NULL and update it
if we detect the feature.

Thanks,

tglx
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 01/14] RDS: restore return value in rds_cmsg_rdma_args()

2015-08-22 Thread Santosh Shilimkar
In rds_cmsg_rdma_args() 'ret' is used by rds_pin_pages() which returns
number of pinned pages on success. And the same value is returned to the
caller of rds_cmsg_rdma_args() on success which is not intended.

Commit f4a3fc03c1d7 (RDS: Clean up error handling in rds_cmsg_rdma_args)
removed the 'ret = 0' line which broke RDS RDMA mode.

Fix it by restoring the return value on rds_pin_pages() success
keeping the clean-up in place.

Signed-off-by: Santosh Shilimkar ssant...@kernel.org
Signed-off-by: Santosh Shilimkar santosh.shilim...@oracle.com
---
 net/rds/rdma.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 40084d8..6401b50 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -658,6 +658,8 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct 
rds_message *rm,
ret = rds_pin_pages(iov-addr, nr, pages, !op-op_write);
if (ret  0)
goto out;
+   else
+   ret = 0;
 
rdsdebug(RDS: nr_bytes %u nr %u iov-bytes %llu iov-addr 
%llx\n,
 nr_bytes, nr, iov-bytes, iov-addr);
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 10/11] ah6: fix error return code

2015-08-22 Thread Julia Lawall
Return a negative error code on failure.

A simplified version of the semantic match that finds this problem is as
follows: (http://coccinelle.lip6.fr/)

// smpl
@@
identifier ret; expression e1,e2;
@@
(
if (\(ret  0\|ret != 0\))
 { ... return ret; }
|
ret = 0
)
... when != ret = e1
when != ret
*if(...)
{
  ... when != ret = e2
  when forall
 return ret;
}
// /smpl

Signed-off-by: Julia Lawall julia.law...@lip6.fr

---
 net/ipv6/ah6.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index ed7d4e3..0630a4d 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -577,8 +577,10 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff 
*skb)
 
work_iph = ah_alloc_tmp(ahash, nfrags + sglists, hdr_len +
ahp-icv_trunc_len + seqhi_len);
-   if (!work_iph)
+   if (!work_iph) {
+   err = -ENOMEM;
goto out;
+   }
 
auth_data = ah_tmp_auth((u8 *)work_iph, hdr_len);
seqhi = (__be32 *)(auth_data + ahp-icv_trunc_len);

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 05/11] mlxsw: fix error return code

2015-08-22 Thread Julia Lawall
Return a negative error code on failure.

A simplified version of the semantic match that finds this problem is as
follows: (http://coccinelle.lip6.fr/)

// smpl
@@
identifier ret; expression e1,e2;
@@
(
if (\(ret  0\|ret != 0\))
 { ... return ret; }
|
ret = 0
)
... when != ret = e1
when != ret
*if(...)
{
  ... when != ret = e2
  when forall
 return ret;
}
// /smpl

Signed-off-by: Julia Lawall julia.law...@lip6.fr

---
 drivers/net/ethernet/mellanox/mlxsw/pci.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c 
b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index a34f474..045f98f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1726,6 +1726,7 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const 
struct pci_device_id *id)
mlxsw_pci_dbg_root);
if (!mlxsw_pci-dbg_dir) {
dev_err(pdev-dev, Failed to create debugfs dir\n);
+   err = -ENOMEM;
goto err_dbg_create_dir;
}
 

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 06/14] RDS: check for congestion updates during rds_send_xmit

2015-08-22 Thread Santosh Shilimkar
Ensure we don't keep sending the data if the link is congested.

Reviewed-by: Ajaykumar Hotchandani ajaykumar.hotchand...@oracle.com
Signed-off-by: Santosh Shilimkar ssant...@kernel.org
Signed-off-by: Santosh Shilimkar santosh.shilim...@oracle.com
---
 net/rds/send.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/rds/send.c b/net/rds/send.c
index e9430f5..dbdf907 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -411,7 +411,8 @@ over_batch:
 */
if (ret == 0) {
smp_mb();
-   if (!list_empty(conn-c_send_queue) 
+   if ((test_bit(0, conn-c_map_queued) ||
+!list_empty(conn-c_send_queue)) 
send_gen == conn-c_send_gen) {
rds_stats_inc(s_send_lock_queue_raced);
goto restart;
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 04/14] RDS: don't update ip address tables if the address hasn't changed

2015-08-22 Thread Santosh Shilimkar
If the ip address tables hasn't changed, there is no need to remove
them only to be added back again.

Lets fix it.
Reviewed-by: Ajaykumar Hotchandani ajaykumar.hotchand...@oracle.com
Signed-off-by: Santosh Shilimkar ssant...@kernel.org
Signed-off-by: Santosh Shilimkar santosh.shilim...@oracle.com
---
 net/rds/ib_rdma.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 657ba9f..e49c956 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -151,12 +151,17 @@ int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, 
__be32 ipaddr)
struct rds_ib_device *rds_ibdev_old;
 
rds_ibdev_old = rds_ib_get_device(ipaddr);
-   if (rds_ibdev_old) {
+   if (!rds_ibdev_old)
+   return rds_ib_add_ipaddr(rds_ibdev, ipaddr);
+
+   if (rds_ibdev_old != rds_ibdev) {
rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr);
rds_ib_dev_put(rds_ibdev_old);
+   return rds_ib_add_ipaddr(rds_ibdev, ipaddr);
}
+   rds_ib_dev_put(rds_ibdev_old);
 
-   return rds_ib_add_ipaddr(rds_ibdev, ipaddr);
+   return 0;
 }
 
 void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection 
*conn)
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 02/14] RDS: always free recv frag as we free its ring entry

2015-08-22 Thread Santosh Shilimkar
We were still seeing rare occurrences of the WARN_ON(recv-r_frag) which
indicates that the recv refill path was finding allocated frags in ring
entries that were marked free. These were usually followed by OOM crashes.
They only seem to be occurring in the presence of completion errors and
connection resets.

This patch ensures that we free the frag as we mark the ring entry free.
This should stop the refill path from finding allocated frags in ring
entries that were marked free.

Reviewed-by: Ajaykumar Hotchandani ajaykumar.hotchand...@oracle.com
Signed-off-by: Santosh Shilimkar ssant...@kernel.org
Signed-off-by: Santosh Shilimkar santosh.shilim...@oracle.com
---
 net/rds/ib_recv.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index cac5b45..2a6a75c 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -982,10 +982,17 @@ static inline void rds_poll_cq(struct rds_ib_connection 
*ic,
}
 
/*
-* It's very important that we only free this ring entry if 
we've truly
-* freed the resources allocated to the entry.  The refilling 
path can
-* leak if we don't.
+* rds_ib_process_recv() doesn't always consume the frag, and
+* we might not have called it at all if the wc didn't indicate
+* success. We already unmapped the frag's pages, though, and
+* the following rds_ib_ring_free() call tells the refill path
+* that it will not find an allocated frag here. Make sure we
+* keep that promise by freeing a frag that's still on the ring.
 */
+   if (recv-r_frag) {
+   rds_ib_frag_free(ic, recv-r_frag);
+   recv-r_frag = NULL;
+   }
rds_ib_ring_free(ic-i_recv_ring, 1);
}
 }
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 08/14] RDS: Mark message mapped before transmit

2015-08-22 Thread Santosh Shilimkar
rds_send_xmit() marks the rds message map flag after
xmit_[rdma/atomic]() which is clearly wrong.  We need
to maintain the ownership between transport and rds.

Also take care of error path.

Reviewed-by: Ajaykumar Hotchandani ajaykumar.hotchand...@oracle.com
Signed-off-by: Santosh Shilimkar ssant...@kernel.org
Signed-off-by: Santosh Shilimkar santosh.shilim...@oracle.com
---
 net/rds/send.c | 24 
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/net/rds/send.c b/net/rds/send.c
index dbdf907..96ae38d 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -282,26 +282,34 @@ restart:
/* The transport either sends the whole rdma or none of it */
if (rm-rdma.op_active  !conn-c_xmit_rdma_sent) {
rm-m_final_op = rm-rdma;
+   /* The transport owns the mapped memory for now.
+* You can't unmap it while it's on the send queue
+*/
+   set_bit(RDS_MSG_MAPPED, rm-m_flags);
ret = conn-c_trans-xmit_rdma(conn, rm-rdma);
-   if (ret)
+   if (ret) {
+   clear_bit(RDS_MSG_MAPPED, rm-m_flags);
+   wake_up_interruptible(rm-m_flush_wait);
break;
+   }
conn-c_xmit_rdma_sent = 1;
 
-   /* The transport owns the mapped memory for now.
-* You can't unmap it while it's on the send queue */
-   set_bit(RDS_MSG_MAPPED, rm-m_flags);
}
 
if (rm-atomic.op_active  !conn-c_xmit_atomic_sent) {
rm-m_final_op = rm-atomic;
+   /* The transport owns the mapped memory for now.
+* You can't unmap it while it's on the send queue
+*/
+   set_bit(RDS_MSG_MAPPED, rm-m_flags);
ret = conn-c_trans-xmit_atomic(conn, rm-atomic);
-   if (ret)
+   if (ret) {
+   clear_bit(RDS_MSG_MAPPED, rm-m_flags);
+   wake_up_interruptible(rm-m_flush_wait);
break;
+   }
conn-c_xmit_atomic_sent = 1;
 
-   /* The transport owns the mapped memory for now.
-* You can't unmap it while it's on the send queue */
-   set_bit(RDS_MSG_MAPPED, rm-m_flags);
}
 
/*
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 07/14] RDS: add a sock_destruct callback debug aid

2015-08-22 Thread Santosh Shilimkar
This helps to detect the accidental processes/apps trying to destroy
the RDS socket which they are sharing with other processes/apps.

Reviewed-by: Ajaykumar Hotchandani ajaykumar.hotchand...@oracle.com
Signed-off-by: Santosh Shilimkar ssant...@kernel.org
Signed-off-by: Santosh Shilimkar santosh.shilim...@oracle.com
---
 net/rds/af_rds.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 896834c..a2f28a6 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -438,6 +438,14 @@ static const struct proto_ops rds_proto_ops = {
.sendpage = sock_no_sendpage,
 };
 
+static void rds_sock_destruct(struct sock *sk)
+{
+   struct rds_sock *rs = rds_sk_to_rs(sk);
+
+   WARN_ON((rs-rs_item != rs-rs_item.next ||
+rs-rs_item != rs-rs_item.prev));
+}
+
 static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
 {
struct rds_sock *rs;
@@ -445,6 +453,7 @@ static int __rds_create(struct socket *sock, struct sock 
*sk, int protocol)
sock_init_data(sock, sk);
sock-ops   = rds_proto_ops;
sk-sk_protocol = protocol;
+   sk-sk_destruct = rds_sock_destruct;
 
rs = rds_sk_to_rs(sk);
spin_lock_init(rs-rs_lock);
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 10/14] RDS: Fix assertion level from fatal to warning

2015-08-22 Thread Santosh Shilimkar
Fix the asserion level since its not fatal and can be hit
in normal execution paths. There is no need to take the
system down.

We keep the WARN_ON() to detect the condition if we get
here with bad pages.

Reviewed-by: Ajaykumar Hotchandani ajaykumar.hotchand...@oracle.com
Signed-off-by: Santosh Shilimkar ssant...@kernel.org
Signed-off-by: Santosh Shilimkar santosh.shilim...@oracle.com
---
 net/rds/ib_rdma.c | 2 +-
 net/rds/rdma.c| 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index e49c956..7b7aac8 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -490,7 +490,7 @@ static void __rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
 
/* FIXME we need a way to tell a r/w MR
 * from a r/o MR */
-   BUG_ON(irqs_disabled());
+   WARN_ON(!page-mapping  irqs_disabled());
set_page_dirty(page);
put_page(page);
}
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 6401b50..c1df9b1 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -451,7 +451,7 @@ void rds_rdma_free_op(struct rm_rdma_op *ro)
 * is the case for a RDMA_READ which copies from remote
 * to local memory */
if (!ro-op_write) {
-   BUG_ON(irqs_disabled());
+   WARN_ON(!page-mapping  irqs_disabled());
set_page_dirty(page);
}
put_page(page);
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 11/14] RDS: Don't destroy the rdma id until after we're done using it

2015-08-22 Thread Santosh Shilimkar
From: Santosh Shilimkar ssant...@kernel.org

During connection resets, we are destroying the rdma id too soon. We can't
destroy it when it is still in use. So lets move rdma_destroy_id() after
we clear the rings.

Reviewed-by: Ajaykumar Hotchandani ajaykumar.hotchand...@oracle.com
Signed-off-by: Santosh Shilimkar ssant...@kernel.org
Signed-off-by: Santosh Shilimkar santosh.shilim...@oracle.com
---
 net/rds/ib_cm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index cb78da1..0443af7 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -646,7 +646,6 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
ib_destroy_cq(ic-i_send_cq);
if (ic-i_recv_cq)
ib_destroy_cq(ic-i_recv_cq);
-   rdma_destroy_id(ic-i_cm_id);
 
/* then free the resources that ib callbacks use */
if (ic-i_send_hdrs)
@@ -672,6 +671,8 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
if (ic-i_recvs)
rds_ib_recv_clear_ring(ic);
 
+   rdma_destroy_id(ic-i_cm_id);
+
/*
 * Move connection back to the nodev list.
 */
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 3/4] Add support for driver cross-timestamp to PTP_SYS_OFFSET ioctl

2015-08-22 Thread Richard Cochran
On Sat, Aug 22, 2015 at 10:33:48PM +0200, Thomas Gleixner wrote:
  @@ -196,19 +197,31 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int 
  cmd, unsigned long arg)
  break;
  }
  pct = sysoff-ts[0];
  -   for (i = 0; i  sysoff-n_samples; i++) {
  -   getnstimeofday64(ts);
  +   if (ptp-info-getsynctime64  sysoff-n_samples == 1 
 
 The number of samples should be irrelevant for this sampling method.

Chris had send me a preview of this before he posted, so I can explain
that test for one sample.

User space requests N (1 to 25) samples of the two clocks.  The kernel
is supposed to deliver that many samples.  This has always been the
documented behavior.  From ptp_clock.h:

  struct ptp_sys_offset {
unsigned int n_samples; /* Desired number of measurements. */
unsigned int rsv[3];/* Reserved for future use. */
/*
 * Array of interleaved system/phc time stamps. The kernel
 * will provide 2*n_samples + 1 time stamps, with the last
 * one as a system time stamp.
 */
struct ptp_clock_time ts[2 * PTP_MAX_SAMPLES + 1];
  };

So the kernel cannot simply change n_samples to 1.

I would prefer to have a new system call that compares any two posix
clock_t, but that is of course more work.

Allowing n_samples=1 as a special case is a kind of overloading of the
ioctl to support the new capability.  At least it preserves the
behavior of the interface from the user's perspective.

Thanks,
Richard
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 00/11] fix error return code

2015-08-22 Thread Julia Lawall
The complate semantic patch that finds this problem is as follows:
(http://coccinelle.lip6.fr/)

// smpl
@ok exists@
identifier f,ret,i;
expression e;
constant c;
@@

// identify a function that returns a negative return value at least once.
f(...) {
... when any
(
return -c@i;
|
ret = -c@i;
... when != ret = e
return ret;
|
if (ret  0) { ... return ret; }
)
... when any
}

@r exists@
identifier ret,ok.f,fn;
expression e1,e2,e3,e4,e5,e6,x;
statement S,S1;
position p1,p2,p3;
@@

// identify a case where the return variable is set to a non-negative value
// and then returned in error-handling code
f(...) {
... when any
(
if@p1 (\(ret  0\|ret != 0\))
 { ... return ret; }
|
ret@p1 = 0
)
... when != \(ret = e1\|ret++\|ret--\|ret+=e1\|ret-=e1\)
when != ret
when any
(
 if (+... ret = e5 ...+) S1
|
 if (+... ret ...+) S1
|
if@p2(+...x = fn(...)...+)
 {
  ... when != ret = e6
  when forall
 return@p3 ret;
}
|
break;
|
x = fn(...)
... when != \(ret = e4\|ret++\|ret--\|ret+=e4\|ret-=e4\)
when != ret
(
 if (+... ret = e3 ...+) S
|
 if (+... ret ...+) S
|
if@p2(+...\(x != 0\|x  0\|x == NULL\|IS_ERR(x)\)...+)
 {
  ... when != ret = e2
  when forall
 return@p3 ret;
}
)
)
... when any
}

@printer depends on r@
position p;
identifier ok.f,pr;
constant char [] c;
@@

f(...) { ...pr@p(...,c,...)... }

@bad0 exists@
identifier r.ret,ok.f,g != {ERR_PTR,IS_ERR};
position p != printer.p;
@@

f(...) { ... when any
g@p(...,ret,...)
... when any
 }

@bad depends on !bad0 exists@
position r.p1,r.p2;
statement S1,S2;
identifier r.ret;
expression e1;
@@

// ignore the above if there is some path where the variable is set to
// something else
(
if@p1 (\(ret  0\|ret != 0\)) S1
|
ret@p1 = 0
)
... when any
 \(ret = e1\|ret++\|ret--\|ret+=e1\|ret-=e1\|ret\)
... when any
if@p2(...) S2

@bad1 depends on !bad0  !bad exists@
position r.p2;
statement S2;
identifier r.ret;
expression e1;
constant c;
@@

ret = -c
... when != \(ret = e1\|ret++\|ret--\|ret+=e1\|ret-=e1\)
when != ret
when any
if@p2(...) S2

@bad2 depends on !bad0  !bad  !bad1 exists@
position r.p1,r.p2;
identifier r.ret;
expression e1;
statement S2;
constant c;
@@

// likewise ignore it if there has been an intervening return
ret@p1 = 0
... when != if (...) { ... ret = e1 ... return ret; }
when != if (...) { ... return -c; }
when any
if@p2(...) S2

@script:python depends on !bad0  !bad  !bad1  !bad2@
p1  r.p1;
p2  r.p2;
p3  r.p3;
@@

cocci.print_main(,p1)
cocci.print_secs(,p2)
cocci.print_secs(,p3)
// /smpl

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 03/11] net: davinci_emac: fix error return code

2015-08-22 Thread Julia Lawall
Propagate error code on failure.

A simplified version of the semantic match that finds this problem is as
follows: (http://coccinelle.lip6.fr/)

// smpl
@@
identifier ret; expression e1,e2;
@@
(
if (\(ret  0\|ret != 0\))
 { ... return ret; }
|
ret = 0
)
... when != ret = e1
when != ret
*if(...)
{
  ... when != ret = e2
  when forall
 return ret;
}
// /smpl

Signed-off-by: Julia Lawall julia.law...@lip6.fr

---
 drivers/net/ethernet/ti/davinci_emac.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ti/davinci_emac.c 
b/drivers/net/ethernet/ti/davinci_emac.c
index aeebc0a..a21c77b 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -2004,8 +2004,10 @@ static int davinci_emac_probe(struct platform_device 
*pdev)
if (res_ctrl) {
priv-ctrl_base =
devm_ioremap_resource(pdev-dev, res_ctrl);
-   if (IS_ERR(priv-ctrl_base))
+   if (IS_ERR(priv-ctrl_base)) {
+   rc = PTR_ERR(priv-ctrl_base);
goto no_pdata;
+   }
} else {
priv-ctrl_base = priv-remap_addr + pdata-ctrl_mod_reg_offset;
}

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] veth: replace iflink by a dedicated symlink in sysfs

2015-08-22 Thread Vincent Bernat
 ❦ 20 août 2015 14:07 -0700, David Miller da...@davemloft.net :

 I also don't know what is the best way to handle this. veth advertises
 its peer via IFLA_LINK since 4.1, so it's too late to change it for
 this
 release.

 Apparently we need to pick our poison. Either way, we break something.
 Sure. I would prefer to have the same mechanism in all version, but I
 can live with the other solution.
 
 David, any thoughts about this?

 You can't break the 4.1 semantics, it's in a released kernel and people
 _ARE_ using it.

I had a look at what other kind of daemons may exploit the pre-4.1
semantics (of not having an infinite loop when following iflink) and
failed to find any other users than lldpd. Other LLDP daemons (lldpad,
ladvd, openlldpd) have other ways to find the lower interface. I would
also have thought that NetSNMP would use it to implement ifStackTable
but it doesn't in fact implement this table.
-- 
It were not best that we should all think alike; it is difference of opinion
that makes horse-races.
-- Mark Twain, Pudd'nhead Wilson's Calendar
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 3/4] Add support for driver cross-timestamp to PTP_SYS_OFFSET ioctl

2015-08-22 Thread Thomas Gleixner
On Fri, 21 Aug 2015, Christopher S. Hall wrote:
 From: Christopher Hall christopher.s.h...@intel.com
 
 This patch allows system and device time (cross-timestamp) to be
 performed by the driver. Currently, the cross-timestamping is performed
 in the PTP_SYS_OFFSET ioctl.  The PTP clock driver reads gettimeofday()
 and the gettime64() callback provided by the driver. The cross-timestamp
 is best effort where the latency between the capture of system time
 (getnstimeofday()) and the device time (driver callback) may be
 significant.
 
 This patch adds an additional callback getsynctime64(). Which will be
 called when the driver is able to perform a more accurate, implementation
 specific cross-timestamping.  For example, future network devices that
 implement PCIE PTM will be able to precisely correlate the device clock
 with the system clock with virtually zero latency between captures.
 This added callback can be used by the driver to expose this functionality.
 
 The callback, getsynctime64(), will only be called when defined and
 n_samples == 1 because the driver returns only 1 cross-timestamp where
 multiple samples cannot be chained together.
 
 This patch also adds to the capabilities ioctl (PTP_CLOCK_GETCAPS),
 allowing applications to query whether or not drivers implement the
 getsynctime callback, providing more precise cross timestamping.

That looks close to a proper changelog. A few nitpicks though.

Please avoid 'This patch does ...' phrases. We already know that this
is a patch.


 Commit Details:

Please get rid of this. It's useless noise.
 
 Added additional callback to ptp_clock_info:
 
 * getsynctime64()

 @@ -196,19 +197,31 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int 
 cmd, unsigned long arg)
   break;
   }
   pct = sysoff-ts[0];
 - for (i = 0; i  sysoff-n_samples; i++) {
 - getnstimeofday64(ts);
 + if (ptp-info-getsynctime64  sysoff-n_samples == 1 

The number of samples should be irrelevant for this sampling method.

 + ptp-info-getsynctime64(ptp-info, ts, systs) == 0) {

Why is this function taking struct timespec64 pointers? Just so every
driver which implements the callback needs to convert from u64 to
struct timespec64? That's simply wrong. Use u64 for both and do the
conversion in the ioctl.

Thanks,

tglx
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net-next] vrf: rename the framework to mrf

2015-08-22 Thread David Miller
From: Nicolas Dichtel nicolas.dich...@6wind.com
Date: Sat, 22 Aug 2015 18:10:20 +0200

 This patch renames the recently added vrf driver. 'VRF' term is very
 generic and there is no clear definition of it.
 For example, someone may expect more isolation and uses network namespaces
 to implement VRF,

This is a rediculous argument.

Does someone using VRF on a Cisco box expect Linux namespaces to be used?

Sorry, this is not going to get applied.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 net-next] netfilter: ipset: Fixing unnamed union init

2015-08-22 Thread Jozsef Kadlecsik
On Sat, 22 Aug 2015, Elad Raz wrote:

 In continue to proposed Vinson Lee's post [1], this patch fixes compilation
 issues founded at gcc 4.4.7. The initialization of .cidr field of unnamed
 unions causes compilation error in gcc 4.4.x.

There's already a (couple of weeks old) patch in the -mm tree to fix the 
gcc compatilibity issue, see the last comment in the thread you refer to:

 Visible links
 [1] https://lkml.org/lkml/2015/7/5/74

So I'm unsure whether a new patch should be submitted for this.

Best regards,
Jozsef
 
 Signed-off-by: Elad Raz el...@mellanox.com
 ---
  net/netfilter/ipset/ip_set_hash_netnet.c | 20 ++--
  net/netfilter/ipset/ip_set_hash_netportnet.c | 20 ++--
  2 files changed, 36 insertions(+), 4 deletions(-)
 
 diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c 
 b/net/netfilter/ipset/ip_set_hash_netnet.c
 index 3c862c0..a93dfeb 100644
 --- a/net/netfilter/ipset/ip_set_hash_netnet.c
 +++ b/net/netfilter/ipset/ip_set_hash_netnet.c
 @@ -131,6 +131,13 @@ hash_netnet4_data_next(struct hash_netnet4_elem *next,
  #define HOST_MASK32
  #include ip_set_hash_gen.h
  
 +static void
 +hash_netnet4_init(struct hash_netnet4_elem *e)
 +{
 + e-cidr[0] = HOST_MASK;
 + e-cidr[1] = HOST_MASK;
 +}
 +
  static int
  hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
 const struct xt_action_param *par,
 @@ -160,7 +167,7 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
  {
   const struct hash_netnet *h = set-data;
   ipset_adtfn adtfn = set-variant-adt[adt];
 - struct hash_netnet4_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, };
 + struct hash_netnet4_elem e = { };
   struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
   u32 ip = 0, ip_to = 0, last;
   u32 ip2 = 0, ip2_from = 0, ip2_to = 0, last2;
 @@ -169,6 +176,7 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
   if (tb[IPSET_ATTR_LINENO])
   *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
  
 + hash_netnet4_init(e);
   if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
   return -IPSET_ERR_PROTOCOL;
 @@ -357,6 +365,13 @@ hash_netnet6_data_next(struct hash_netnet4_elem *next,
  #define IP_SET_EMIT_CREATE
  #include ip_set_hash_gen.h
  
 +static void
 +hash_netnet6_init(struct hash_netnet6_elem *e)
 +{
 + e-cidr[0] = HOST_MASK;
 + e-cidr[1] = HOST_MASK;
 +}
 +
  static int
  hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
 const struct xt_action_param *par,
 @@ -385,13 +400,14 @@ hash_netnet6_uadt(struct ip_set *set, struct nlattr 
 *tb[],
 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
  {
   ipset_adtfn adtfn = set-variant-adt[adt];
 - struct hash_netnet6_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, };
 + struct hash_netnet6_elem e = { };
   struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
   int ret;
  
   if (tb[IPSET_ATTR_LINENO])
   *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
  
 + hash_netnet6_init(e);
   if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
   return -IPSET_ERR_PROTOCOL;
 diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c 
 b/net/netfilter/ipset/ip_set_hash_netportnet.c
 index 0c68734..9a14c23 100644
 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c
 +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
 @@ -142,6 +142,13 @@ hash_netportnet4_data_next(struct hash_netportnet4_elem 
 *next,
  #define HOST_MASK32
  #include ip_set_hash_gen.h
  
 +static void
 +hash_netportnet4_init(struct hash_netportnet4_elem *e)
 +{
 + e-cidr[0] = HOST_MASK;
 + e-cidr[1] = HOST_MASK;
 +}
 +
  static int
  hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
 const struct xt_action_param *par,
 @@ -175,7 +182,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr 
 *tb[],
  {
   const struct hash_netportnet *h = set-data;
   ipset_adtfn adtfn = set-variant-adt[adt];
 - struct hash_netportnet4_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, };
 + struct hash_netportnet4_elem e = { };
   struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
   u32 ip = 0, ip_to = 0, ip_last, p = 0, port, port_to;
   u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2;
 @@ -185,6 +192,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr 
 *tb[],
   if (tb[IPSET_ATTR_LINENO])
   *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
  
 + hash_netportnet4_init(e);
   if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
 @@ -412,6 +420,13 @@ hash_netportnet6_data_next(struct 

Re: [PATCH v2 net-next] netfilter: ipset: Fixing unnamed union init

2015-08-22 Thread Akemi Yagi
I was wondering about that, too. The referenced patch,

net-netfilter-ipset-work-around-gcc-444-initializer-bug.patch

was confirmed to fix the issue. What I cannot figure out is the
procedure by which this patch gets added to Linus' tree. It was in the
-mm tree back in mid June but still is not in the current 4.2-rc7.
Without this fix, Linux 4.2 will not build under RHEL-6 (for example).

Akemi

On Sat, Aug 22, 2015 at 11:11 AM, Jozsef Kadlecsik
kad...@blackhole.kfki.hu wrote:
 On Sat, 22 Aug 2015, Elad Raz wrote:

 In continue to proposed Vinson Lee's post [1], this patch fixes compilation
 issues founded at gcc 4.4.7. The initialization of .cidr field of unnamed
 unions causes compilation error in gcc 4.4.x.

 There's already a (couple of weeks old) patch in the -mm tree to fix the
 gcc compatilibity issue, see the last comment in the thread you refer to:

 Visible links
 [1] https://lkml.org/lkml/2015/7/5/74

 So I'm unsure whether a new patch should be submitted for this.

 Best regards,
 Jozsef

 Signed-off-by: Elad Raz el...@mellanox.com
 ---
  net/netfilter/ipset/ip_set_hash_netnet.c | 20 ++--
  net/netfilter/ipset/ip_set_hash_netportnet.c | 20 ++--
  2 files changed, 36 insertions(+), 4 deletions(-)

 diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c 
 b/net/netfilter/ipset/ip_set_hash_netnet.c
 index 3c862c0..a93dfeb 100644
 --- a/net/netfilter/ipset/ip_set_hash_netnet.c
 +++ b/net/netfilter/ipset/ip_set_hash_netnet.c
 @@ -131,6 +131,13 @@ hash_netnet4_data_next(struct hash_netnet4_elem *next,
  #define HOST_MASK32
  #include ip_set_hash_gen.h

 +static void
 +hash_netnet4_init(struct hash_netnet4_elem *e)
 +{
 + e-cidr[0] = HOST_MASK;
 + e-cidr[1] = HOST_MASK;
 +}
 +
  static int
  hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
 const struct xt_action_param *par,
 @@ -160,7 +167,7 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr 
 *tb[],
  {
   const struct hash_netnet *h = set-data;
   ipset_adtfn adtfn = set-variant-adt[adt];
 - struct hash_netnet4_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, };
 + struct hash_netnet4_elem e = { };
   struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
   u32 ip = 0, ip_to = 0, last;
   u32 ip2 = 0, ip2_from = 0, ip2_to = 0, last2;
 @@ -169,6 +176,7 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr 
 *tb[],
   if (tb[IPSET_ATTR_LINENO])
   *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);

 + hash_netnet4_init(e);
   if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
   return -IPSET_ERR_PROTOCOL;
 @@ -357,6 +365,13 @@ hash_netnet6_data_next(struct hash_netnet4_elem *next,
  #define IP_SET_EMIT_CREATE
  #include ip_set_hash_gen.h

 +static void
 +hash_netnet6_init(struct hash_netnet6_elem *e)
 +{
 + e-cidr[0] = HOST_MASK;
 + e-cidr[1] = HOST_MASK;
 +}
 +
  static int
  hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
 const struct xt_action_param *par,
 @@ -385,13 +400,14 @@ hash_netnet6_uadt(struct ip_set *set, struct nlattr 
 *tb[],
 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
  {
   ipset_adtfn adtfn = set-variant-adt[adt];
 - struct hash_netnet6_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, };
 + struct hash_netnet6_elem e = { };
   struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
   int ret;

   if (tb[IPSET_ATTR_LINENO])
   *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);

 + hash_netnet6_init(e);
   if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
   return -IPSET_ERR_PROTOCOL;
 diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c 
 b/net/netfilter/ipset/ip_set_hash_netportnet.c
 index 0c68734..9a14c23 100644
 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c
 +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
 @@ -142,6 +142,13 @@ hash_netportnet4_data_next(struct hash_netportnet4_elem 
 *next,
  #define HOST_MASK32
  #include ip_set_hash_gen.h

 +static void
 +hash_netportnet4_init(struct hash_netportnet4_elem *e)
 +{
 + e-cidr[0] = HOST_MASK;
 + e-cidr[1] = HOST_MASK;
 +}
 +
  static int
  hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
 const struct xt_action_param *par,
 @@ -175,7 +182,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr 
 *tb[],
  {
   const struct hash_netportnet *h = set-data;
   ipset_adtfn adtfn = set-variant-adt[adt];
 - struct hash_netportnet4_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, 
 };
 + struct hash_netportnet4_elem e = { };
   struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
   u32 ip = 0, ip_to = 0, ip_last, p = 0, port, port_to;
   u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2;
 @@ -185,6 

[PATCH net-next] vrf: rename the framework to mrf

2015-08-22 Thread Nicolas Dichtel
This patch renames the recently added vrf driver. 'VRF' term is very
generic and there is no clear definition of it.
For example, someone may expect more isolation and uses network namespaces
to implement VRF,

MRF was suggested by Cumulus people, so let's use this term.

This patch also enlightens the fact that this driver is not *the* way to
implement VRF in linux, but *a* tool among others.

CC: Shrijeet Mukherjee s...@cumulusnetworks.com
CC: David Ahern d...@cumulusnetworks.com
Signed-off-by: Nicolas Dichtel nicolas.dich...@6wind.com
---
 drivers/net/Kconfig  |   8 +-
 drivers/net/Makefile |   2 +-
 drivers/net/mrf.c| 668 +++
 drivers/net/vrf.c| 668 ---
 include/linux/netdevice.h|  20 +-
 include/net/flow.h   |   2 +-
 include/net/mrf.h| 177 
 include/net/route.h  |   4 +-
 include/net/vrf.h| 178 
 include/uapi/linux/if_link.h |  10 +-
 net/ipv4/af_inet.c   |   4 +-
 net/ipv4/fib_frontend.c  |  12 +-
 net/ipv4/fib_trie.c  |   2 +-
 net/ipv4/icmp.c  |   7 +-
 net/ipv4/ip_fragment.c   |   6 +-
 net/ipv4/ip_output.c |   2 +-
 net/ipv4/route.c |  10 +-
 net/ipv4/udp.c   |   8 +-
 18 files changed, 894 insertions(+), 894 deletions(-)
 create mode 100644 drivers/net/mrf.c
 delete mode 100644 drivers/net/vrf.c
 create mode 100644 include/net/mrf.h
 delete mode 100644 include/net/vrf.h

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index f50373645ab4..db4e5f5e8535 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -296,12 +296,12 @@ config NLMON
  diagnostics, etc. This is mostly intended for developers or support
  to debug netlink issues. If unsure, say N.
 
-config NET_VRF
-   tristate Virtual Routing and Forwarding (Lite)
+config NET_MRF
+   tristate Multiple Routing Framework
depends on IP_MULTIPLE_TABLES  IPV6_MULTIPLE_TABLES
---help---
- This option enables the support for mapping interfaces into VRF's. The
- support enables VRF devices.
+ This option enables the support for mapping interfaces into MRF's. The
+ support enables MRF devices.
 
 endif # NET_CORE
 
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index ca16dd689b36..60da845fa976 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -25,7 +25,7 @@ obj-$(CONFIG_VIRTIO_NET) += virtio_net.o
 obj-$(CONFIG_VXLAN) += vxlan.o
 obj-$(CONFIG_GENEVE) += geneve.o
 obj-$(CONFIG_NLMON) += nlmon.o
-obj-$(CONFIG_NET_VRF) += vrf.o
+obj-$(CONFIG_NET_MRF) += mrf.o
 
 #
 # Networking Drivers
diff --git a/drivers/net/mrf.c b/drivers/net/mrf.c
new file mode 100644
index ..c959703a5a9c
--- /dev/null
+++ b/drivers/net/mrf.c
@@ -0,0 +1,668 @@
+/*
+ * mrf.c: device driver to encapsulate a MRF space
+ *
+ * Copyright (c) 2015 Cumulus Networks. All rights reserved.
+ * Copyright (c) 2015 Shrijeet Mukherjee s...@cumulusnetworks.com
+ * Copyright (c) 2015 David Ahern d...@cumulusnetworks.com
+ *
+ * Based on dummy, team and ipvlan drivers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include linux/module.h
+#include linux/kernel.h
+#include linux/netdevice.h
+#include linux/etherdevice.h
+#include linux/ip.h
+#include linux/init.h
+#include linux/moduleparam.h
+#include linux/netfilter.h
+#include linux/rtnetlink.h
+#include net/rtnetlink.h
+#include linux/u64_stats_sync.h
+#include linux/hashtable.h
+
+#include linux/inetdevice.h
+#include net/ip.h
+#include net/ip_fib.h
+#include net/ip6_route.h
+#include net/rtnetlink.h
+#include net/route.h
+#include net/addrconf.h
+#include net/mrf.h
+
+#define DRV_NAME   mrf
+#define DRV_VERSION1.0
+
+#define mrf_is_slave(dev)   ((dev)-flags  IFF_SLAVE)
+
+#define mrf_master_get_rcu(dev) \
+   ((struct net_device *)rcu_dereference(dev-rx_handler_data))
+
+struct pcpu_dstats {
+   u64 tx_pkts;
+   u64 tx_bytes;
+   u64 tx_drps;
+   u64 rx_pkts;
+   u64 rx_bytes;
+   struct u64_stats_sync   syncp;
+};
+
+static struct dst_entry *mrf_ip_check(struct dst_entry *dst, u32 cookie)
+{
+   return dst;
+}
+
+static int mrf_ip_local_out(struct sk_buff *skb)
+{
+   return ip_local_out(skb);
+}
+
+static unsigned int mrf_v4_mtu(const struct dst_entry *dst)
+{
+   /* TO-DO: return max ethernet size? */
+   return dst-dev-mtu;
+}
+
+static void mrf_dst_destroy(struct dst_entry *dst)
+{
+   /* our dst lives forever - or until the device is closed */
+}
+
+static unsigned int mrf_default_advmss(const