[PATCH mlx5-next 1/2] mlx5: update timecounter at least twice per counter overflow

2018-12-03 Thread Miroslav Lichvar
The timecounter needs to be updated at least once in half of the
cyclecounter interval to prevent timecounter_cyc2time() interpreting a
new timestamp as an old value and causing a backward jump.

This would be an issue if the timecounter multiplier was so small that
the update interval would not be limited by the 64-bit overflow in
multiplication.

Shorten the calculated interval to make sure the timecounter is updated
in time even when the system clock is slowed down by up to 10%, the
multiplier is increased by up to 10%, and the scheduled overflow check
is late by 15%.

Cc: Richard Cochran 
Cc: Ariel Levkovich 
Cc: Saeed Mahameed 
Signed-off-by: Miroslav Lichvar 
---
 drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c 
b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 0d90b1b4a3d3..2d6168ee99e8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -511,14 +511,14 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev)
 ktime_to_ns(ktime_get_real()));
 
/* Calculate period in seconds to call the overflow watchdog - to make
-* sure counter is checked at least once every wrap around.
+* sure counter is checked at least twice every wrap around.
 * The period is calculated as the minimum between max HW cycles count
 * (The clock source mask) and max amount of cycles that can be
 * multiplied by clock multiplier where the result doesn't exceed
 * 64bits.
 */
overflow_cycles = div64_u64(~0ULL >> 1, clock->cycles.mult);
-   overflow_cycles = min(overflow_cycles, clock->cycles.mask >> 1);
+   overflow_cycles = min(overflow_cycles, div_u64(clock->cycles.mask, 3));
 
ns = cyclecounter_cyc2ns(>cycles, overflow_cycles,
 frac, );
-- 
2.17.2



[PATCH mlx5-next 2/2] mlx5: extend PTP gettime function to read system clock

2018-12-03 Thread Miroslav Lichvar
Read the system time right before and immediately after reading the low
register of the internal timer. This adds support for the
PTP_SYS_OFFSET_EXTENDED ioctl.

Cc: Richard Cochran 
Cc: Saeed Mahameed 
Signed-off-by: Miroslav Lichvar 
---
 .../ethernet/mellanox/mlx5/core/lib/clock.c   | 20 +++
 .../net/ethernet/mellanox/mlx5/core/main.c| 11 --
 .../ethernet/mellanox/mlx5/core/mlx5_core.h   |  4 +++-
 3 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c 
b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 2d6168ee99e8..f9618047c16a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -71,7 +71,7 @@ static u64 read_internal_timer(const struct cyclecounter *cc)
struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
  clock);
 
-   return mlx5_read_internal_timer(mdev) & cc->mask;
+   return mlx5_read_internal_timer(mdev, NULL) & cc->mask;
 }
 
 static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev)
@@ -155,15 +155,19 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp,
return 0;
 }
 
-static int mlx5_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
+static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts,
+struct ptp_system_timestamp *sts)
 {
struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
ptp_info);
-   u64 ns;
+   struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
+ clock);
unsigned long flags;
+   u64 cycles, ns;
 
write_seqlock_irqsave(>lock, flags);
-   ns = timecounter_read(>tc);
+   cycles = mlx5_read_internal_timer(mdev, sts);
+   ns = timecounter_cyc2time(>tc, cycles);
write_sequnlock_irqrestore(>lock, flags);
 
*ts = ns_to_timespec64(ns);
@@ -306,7 +310,7 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
ts.tv_sec = rq->perout.start.sec;
ts.tv_nsec = rq->perout.start.nsec;
ns = timespec64_to_ns();
-   cycles_now = mlx5_read_internal_timer(mdev);
+   cycles_now = mlx5_read_internal_timer(mdev, NULL);
write_seqlock_irqsave(>lock, flags);
nsec_now = timecounter_cyc2time(>tc, cycles_now);
nsec_delta = ns - nsec_now;
@@ -383,7 +387,7 @@ static const struct ptp_clock_info mlx5_ptp_clock_info = {
.pps= 0,
.adjfreq= mlx5_ptp_adjfreq,
.adjtime= mlx5_ptp_adjtime,
-   .gettime64  = mlx5_ptp_gettime,
+   .gettimex64 = mlx5_ptp_gettimex,
.settime64  = mlx5_ptp_settime,
.enable = NULL,
.verify = NULL,
@@ -466,8 +470,8 @@ void mlx5_pps_event(struct mlx5_core_dev *mdev,
ptp_clock_event(clock->ptp, _event);
break;
case PTP_PF_PEROUT:
-   mlx5_ptp_gettime(>ptp_info, );
-   cycles_now = mlx5_read_internal_timer(mdev);
+   mlx5_ptp_gettimex(>ptp_info, , NULL);
+   cycles_now = mlx5_read_internal_timer(mdev, NULL);
ts.tv_sec += 1;
ts.tv_nsec = 0;
ns = timespec64_to_ns();
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 28132c7dc05f..6f1cf291df7e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -624,15 +624,22 @@ int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 
func_id)
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
-u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev)
+u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev,
+struct ptp_system_timestamp *sts)
 {
u32 timer_h, timer_h1, timer_l;
 
timer_h = ioread32be(>iseg->internal_timer_h);
+   ptp_read_system_prets(sts);
timer_l = ioread32be(>iseg->internal_timer_l);
+   ptp_read_system_postts(sts);
timer_h1 = ioread32be(>iseg->internal_timer_h);
-   if (timer_h != timer_h1) /* wrap around */
+   if (timer_h != timer_h1) {
+   /* wrap around */
+   ptp_read_system_prets(sts);
timer_l = ioread32be(>iseg->internal_timer_l);
+   ptp_read_system_postts(sts);
+   }
 
return (u64)timer_l | (u64)timer_h1 << 32;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h 
b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 0594d0961cb3..3d66afc9b7a5 100644
--- a/d

[PATCH mlx5-next 0/2] Improvements for PTP clock in mlx5 driver

2018-12-03 Thread Miroslav Lichvar
This is a pair of patches for the mlx5 driver related to the PTP clock.

The first patch fixes a potential issue causing backward jumps due to a
late update of the PHC timecounter.

The second patch adds support for the PTP_SYS_OFFSET_EXTENDED ioctl.

Miroslav Lichvar (2):
  mlx5: update timecounter at least twice per counter overflow
  mlx5: extend PTP gettime function to read system clock

 .../ethernet/mellanox/mlx5/core/lib/clock.c   | 24 +++
 .../net/ethernet/mellanox/mlx5/core/main.c| 11 +++--
 .../ethernet/mellanox/mlx5/core/mlx5_core.h   |  4 +++-
 3 files changed, 26 insertions(+), 13 deletions(-)

-- 
2.17.2



[PATCH net-next 7/8] ixgbe: extend PTP gettime function to read system clock

2018-11-09 Thread Miroslav Lichvar
This adds support for the PTP_SYS_OFFSET_EXTENDED ioctl.

Cc: Richard Cochran 
Cc: Jacob Keller 
Cc: Jeff Kirsher 
Signed-off-by: Miroslav Lichvar 
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c | 54 
 1 file changed, 44 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
index b3e0d8bb5cbd..d81a50dc9535 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
@@ -443,22 +443,52 @@ static int ixgbe_ptp_adjtime(struct ptp_clock_info *ptp, 
s64 delta)
 }
 
 /**
- * ixgbe_ptp_gettime
+ * ixgbe_ptp_gettimex
  * @ptp: the ptp clock structure
- * @ts: timespec structure to hold the current time value
+ * @ts: timespec to hold the PHC timestamp
+ * @sts: structure to hold the system time before and after reading the PHC
  *
  * read the timecounter and return the correct value on ns,
  * after converting it into a struct timespec.
  */
-static int ixgbe_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
+static int ixgbe_ptp_gettimex(struct ptp_clock_info *ptp,
+ struct timespec64 *ts,
+ struct ptp_system_timestamp *sts)
 {
struct ixgbe_adapter *adapter =
container_of(ptp, struct ixgbe_adapter, ptp_caps);
+   struct ixgbe_hw *hw = >hw;
unsigned long flags;
-   u64 ns;
+   u64 ns, stamp;
 
spin_lock_irqsave(>tmreg_lock, flags);
-   ns = timecounter_read(>hw_tc);
+
+   switch (adapter->hw.mac.type) {
+   case ixgbe_mac_X550:
+   case ixgbe_mac_X550EM_x:
+   case ixgbe_mac_x550em_a:
+   /* Upper 32 bits represent billions of cycles, lower 32 bits
+* represent cycles. However, we use timespec64_to_ns for the
+* correct math even though the units haven't been corrected
+* yet.
+*/
+   ptp_read_system_prets(sts);
+   IXGBE_READ_REG(hw, IXGBE_SYSTIMR);
+   ptp_read_system_postts(sts);
+   ts->tv_nsec = IXGBE_READ_REG(hw, IXGBE_SYSTIML);
+   ts->tv_sec = IXGBE_READ_REG(hw, IXGBE_SYSTIMH);
+   stamp = timespec64_to_ns(ts);
+   break;
+   default:
+   ptp_read_system_prets(sts);
+   stamp = IXGBE_READ_REG(hw, IXGBE_SYSTIML);
+   ptp_read_system_postts(sts);
+   stamp |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIMH) << 32;
+   break;
+   }
+
+   ns = timecounter_cyc2time(>hw_tc, stamp);
+
spin_unlock_irqrestore(>tmreg_lock, flags);
 
*ts = ns_to_timespec64(ns);
@@ -567,10 +597,14 @@ void ixgbe_ptp_overflow_check(struct ixgbe_adapter 
*adapter)
 {
bool timeout = time_is_before_jiffies(adapter->last_overflow_check +
 IXGBE_OVERFLOW_PERIOD);
-   struct timespec64 ts;
+   unsigned long flags;
 
if (timeout) {
-   ixgbe_ptp_gettime(>ptp_caps, );
+   /* Update the timecounter */
+   spin_lock_irqsave(>tmreg_lock, flags);
+   timecounter_read(>hw_tc);
+   spin_unlock_irqrestore(>tmreg_lock, flags);
+
adapter->last_overflow_check = jiffies;
}
 }
@@ -1216,7 +1250,7 @@ static long ixgbe_ptp_create_clock(struct ixgbe_adapter 
*adapter)
adapter->ptp_caps.pps = 1;
adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_82599;
adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime;
-   adapter->ptp_caps.gettime64 = ixgbe_ptp_gettime;
+   adapter->ptp_caps.gettimex64 = ixgbe_ptp_gettimex;
adapter->ptp_caps.settime64 = ixgbe_ptp_settime;
adapter->ptp_caps.enable = ixgbe_ptp_feature_enable;
adapter->ptp_setup_sdp = ixgbe_ptp_setup_sdp_x540;
@@ -1233,7 +1267,7 @@ static long ixgbe_ptp_create_clock(struct ixgbe_adapter 
*adapter)
adapter->ptp_caps.pps = 0;
adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_82599;
adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime;
-   adapter->ptp_caps.gettime64 = ixgbe_ptp_gettime;
+   adapter->ptp_caps.gettimex64 = ixgbe_ptp_gettimex;
adapter->ptp_caps.settime64 = ixgbe_ptp_settime;
adapter->ptp_caps.enable = ixgbe_ptp_feature_enable;
break;
@@ -1249,7 +1283,7 @@ static long ixgbe_ptp_create_clock(struct ixgbe_adapter 
*adapter)
adapter->ptp_caps.pps = 0;
adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_X550;
adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime;
-   adapter->ptp_caps.gettime64 = ixgbe_ptp_gettime;
+   adapter->ptp_caps.gettimex64 = 

[PATCH net-next 8/8] tg3: extend PTP gettime function to read system clock

2018-11-09 Thread Miroslav Lichvar
This adds support for the PTP_SYS_OFFSET_EXTENDED ioctl.

Cc: Richard Cochran 
Cc: Michael Chan 
Signed-off-by: Miroslav Lichvar 
---
 drivers/net/ethernet/broadcom/tg3.c | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c 
b/drivers/net/ethernet/broadcom/tg3.c
index 89295306f161..ce44d208e137 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -6135,10 +6135,16 @@ static int tg3_setup_phy(struct tg3 *tp, bool 
force_reset)
 }
 
 /* tp->lock must be held */
-static u64 tg3_refclk_read(struct tg3 *tp)
+static u64 tg3_refclk_read(struct tg3 *tp, struct ptp_system_timestamp *sts)
 {
-   u64 stamp = tr32(TG3_EAV_REF_CLCK_LSB);
-   return stamp | (u64)tr32(TG3_EAV_REF_CLCK_MSB) << 32;
+   u64 stamp;
+
+   ptp_read_system_prets(sts);
+   stamp = tr32(TG3_EAV_REF_CLCK_LSB);
+   ptp_read_system_postts(sts);
+   stamp |= (u64)tr32(TG3_EAV_REF_CLCK_MSB) << 32;
+
+   return stamp;
 }
 
 /* tp->lock must be held */
@@ -6229,13 +6235,14 @@ static int tg3_ptp_adjtime(struct ptp_clock_info *ptp, 
s64 delta)
return 0;
 }
 
-static int tg3_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
+static int tg3_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts,
+   struct ptp_system_timestamp *sts)
 {
u64 ns;
struct tg3 *tp = container_of(ptp, struct tg3, ptp_info);
 
tg3_full_lock(tp, 0);
-   ns = tg3_refclk_read(tp);
+   ns = tg3_refclk_read(tp, sts);
ns += tp->ptp_adjust;
tg3_full_unlock(tp);
 
@@ -6330,7 +6337,7 @@ static const struct ptp_clock_info tg3_ptp_caps = {
.pps= 0,
.adjfreq= tg3_ptp_adjfreq,
.adjtime= tg3_ptp_adjtime,
-   .gettime64  = tg3_ptp_gettime,
+   .gettimex64 = tg3_ptp_gettimex,
.settime64  = tg3_ptp_settime,
.enable = tg3_ptp_enable,
 };
-- 
2.17.2



[PATCH net-next 5/8] e1000e: extend PTP gettime function to read system clock

2018-11-09 Thread Miroslav Lichvar
This adds support for the PTP_SYS_OFFSET_EXTENDED ioctl.

Cc: Richard Cochran 
Cc: Jacob Keller 
Cc: Jeff Kirsher 
Signed-off-by: Miroslav Lichvar 
---
 drivers/net/ethernet/intel/e1000e/e1000.h  |  3 ++
 drivers/net/ethernet/intel/e1000e/netdev.c | 42 --
 drivers/net/ethernet/intel/e1000e/ptp.c| 16 +
 3 files changed, 45 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h 
b/drivers/net/ethernet/intel/e1000e/e1000.h
index c760dc72c520..be13227f1697 100644
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -505,6 +505,9 @@ extern const struct e1000_info e1000_es2_info;
 void e1000e_ptp_init(struct e1000_adapter *adapter);
 void e1000e_ptp_remove(struct e1000_adapter *adapter);
 
+u64 e1000e_read_systim(struct e1000_adapter *adapter,
+  struct ptp_system_timestamp *sts);
+
 static inline s32 e1000_phy_hw_reset(struct e1000_hw *hw)
 {
return hw->phy.ops.reset(hw);
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index 16a73bd9f4cb..59bd587d809d 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -4319,13 +4319,16 @@ void e1000e_reinit_locked(struct e1000_adapter *adapter)
 /**
  * e1000e_sanitize_systim - sanitize raw cycle counter reads
  * @hw: pointer to the HW structure
- * @systim: time value read, sanitized and returned
+ * @systim: PHC time value read, sanitized and returned
+ * @sts: structure to hold system time before and after reading SYSTIML,
+ * may be NULL
  *
  * Errata for 82574/82583 possible bad bits read from SYSTIMH/L:
  * check to see that the time is incrementing at a reasonable
  * rate and is a multiple of incvalue.
  **/
-static u64 e1000e_sanitize_systim(struct e1000_hw *hw, u64 systim)
+static u64 e1000e_sanitize_systim(struct e1000_hw *hw, u64 systim,
+ struct ptp_system_timestamp *sts)
 {
u64 time_delta, rem, temp;
u64 systim_next;
@@ -4335,7 +4338,9 @@ static u64 e1000e_sanitize_systim(struct e1000_hw *hw, 
u64 systim)
incvalue = er32(TIMINCA) & E1000_TIMINCA_INCVALUE_MASK;
for (i = 0; i < E1000_MAX_82574_SYSTIM_REREADS; i++) {
/* latch SYSTIMH on read of SYSTIML */
+   ptp_read_system_prets(sts);
systim_next = (u64)er32(SYSTIML);
+   ptp_read_system_postts(sts);
systim_next |= (u64)er32(SYSTIMH) << 32;
 
time_delta = systim_next - systim;
@@ -4353,15 +4358,16 @@ static u64 e1000e_sanitize_systim(struct e1000_hw *hw, 
u64 systim)
 }
 
 /**
- * e1000e_cyclecounter_read - read raw cycle counter (used by time counter)
- * @cc: cyclecounter structure
+ * e1000e_read_systim - read SYSTIM register
+ * @adapter: board private structure
+ * @sts: structure which will contain system time before and after reading
+ * SYSTIML, may be NULL
  **/
-static u64 e1000e_cyclecounter_read(const struct cyclecounter *cc)
+u64 e1000e_read_systim(struct e1000_adapter *adapter,
+  struct ptp_system_timestamp *sts)
 {
-   struct e1000_adapter *adapter = container_of(cc, struct e1000_adapter,
-cc);
struct e1000_hw *hw = >hw;
-   u32 systimel, systimeh;
+   u32 systimel, systimel_2, systimeh;
u64 systim;
/* SYSTIMH latching upon SYSTIML read does not work well.
 * This means that if SYSTIML overflows after we read it but before
@@ -4369,11 +4375,15 @@ static u64 e1000e_cyclecounter_read(const struct 
cyclecounter *cc)
 * will experience a huge non linear increment in the systime value
 * to fix that we test for overflow and if true, we re-read systime.
 */
+   ptp_read_system_prets(sts);
systimel = er32(SYSTIML);
+   ptp_read_system_postts(sts);
systimeh = er32(SYSTIMH);
/* Is systimel is so large that overflow is possible? */
if (systimel >= (u32)0x - E1000_TIMINCA_INCVALUE_MASK) {
-   u32 systimel_2 = er32(SYSTIML);
+   ptp_read_system_prets(sts);
+   systimel_2 = er32(SYSTIML);
+   ptp_read_system_postts(sts);
if (systimel > systimel_2) {
/* There was an overflow, read again SYSTIMH, and use
 * systimel_2
@@ -4386,11 +4396,23 @@ static u64 e1000e_cyclecounter_read(const struct 
cyclecounter *cc)
systim |= (u64)systimeh << 32;
 
if (adapter->flags2 & FLAG2_CHECK_SYSTIM_OVERFLOW)
-   systim = e1000e_sanitize_systim(hw, systim);
+   systim = e1000e_sanitize_systim(hw, systim, sts);
 
return systim;
 }
 
+/**
+ * e1000e_cyclecounter_read - read raw cycle counter (used by time counter)
+ * @cc: cyclecounter structure
+ **/
+sta

[PATCH net-next 6/8] igb: extend PTP gettime function to read system clock

2018-11-09 Thread Miroslav Lichvar
This adds support for the PTP_SYS_OFFSET_EXTENDED ioctl.

Cc: Richard Cochran 
Cc: Jacob Keller 
Cc: Jeff Kirsher 
Signed-off-by: Miroslav Lichvar 
---
 drivers/net/ethernet/intel/igb/igb_ptp.c | 65 
 1 file changed, 55 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c 
b/drivers/net/ethernet/intel/igb/igb_ptp.c
index 29ced6b74d36..8c1833a157d3 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -275,17 +275,53 @@ static int igb_ptp_adjtime_i210(struct ptp_clock_info 
*ptp, s64 delta)
return 0;
 }
 
-static int igb_ptp_gettime_82576(struct ptp_clock_info *ptp,
-struct timespec64 *ts)
+static int igb_ptp_gettimex_82576(struct ptp_clock_info *ptp,
+ struct timespec64 *ts,
+ struct ptp_system_timestamp *sts)
 {
struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
   ptp_caps);
+   struct e1000_hw *hw = >hw;
unsigned long flags;
+   u32 lo, hi;
u64 ns;
 
spin_lock_irqsave(>tmreg_lock, flags);
 
-   ns = timecounter_read(>tc);
+   ptp_read_system_prets(sts);
+   lo = rd32(E1000_SYSTIML);
+   ptp_read_system_postts(sts);
+   hi = rd32(E1000_SYSTIMH);
+
+   ns = timecounter_cyc2time(>tc, ((u64)hi << 32) | lo);
+
+   spin_unlock_irqrestore(>tmreg_lock, flags);
+
+   *ts = ns_to_timespec64(ns);
+
+   return 0;
+}
+
+static int igb_ptp_gettimex_82580(struct ptp_clock_info *ptp,
+ struct timespec64 *ts,
+ struct ptp_system_timestamp *sts)
+{
+   struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+  ptp_caps);
+   struct e1000_hw *hw = >hw;
+   unsigned long flags;
+   u32 lo, hi;
+   u64 ns;
+
+   spin_lock_irqsave(>tmreg_lock, flags);
+
+   ptp_read_system_prets(sts);
+   rd32(E1000_SYSTIMR);
+   ptp_read_system_postts(sts);
+   lo = rd32(E1000_SYSTIML);
+   hi = rd32(E1000_SYSTIMH);
+
+   ns = timecounter_cyc2time(>tc, ((u64)hi << 32) | lo);
 
spin_unlock_irqrestore(>tmreg_lock, flags);
 
@@ -294,16 +330,22 @@ static int igb_ptp_gettime_82576(struct ptp_clock_info 
*ptp,
return 0;
 }
 
-static int igb_ptp_gettime_i210(struct ptp_clock_info *ptp,
-   struct timespec64 *ts)
+static int igb_ptp_gettimex_i210(struct ptp_clock_info *ptp,
+struct timespec64 *ts,
+struct ptp_system_timestamp *sts)
 {
struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
   ptp_caps);
+   struct e1000_hw *hw = >hw;
unsigned long flags;
 
spin_lock_irqsave(>tmreg_lock, flags);
 
-   igb_ptp_read_i210(igb, ts);
+   ptp_read_system_prets(sts);
+   rd32(E1000_SYSTIMR);
+   ptp_read_system_postts(sts);
+   ts->tv_nsec = rd32(E1000_SYSTIML);
+   ts->tv_sec = rd32(E1000_SYSTIMH);
 
spin_unlock_irqrestore(>tmreg_lock, flags);
 
@@ -656,9 +698,12 @@ static void igb_ptp_overflow_check(struct work_struct 
*work)
struct igb_adapter *igb =
container_of(work, struct igb_adapter, ptp_overflow_work.work);
struct timespec64 ts;
+   u64 ns;
 
-   igb->ptp_caps.gettime64(>ptp_caps, );
+   /* Update the timecounter */
+   ns = timecounter_read(>tc);
 
+   ts = ns_to_timespec64(ns);
pr_debug("igb overflow check at %lld.%09lu\n",
 (long long) ts.tv_sec, ts.tv_nsec);
 
@@ -1124,7 +1169,7 @@ void igb_ptp_init(struct igb_adapter *adapter)
adapter->ptp_caps.pps = 0;
adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82576;
adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576;
-   adapter->ptp_caps.gettime64 = igb_ptp_gettime_82576;
+   adapter->ptp_caps.gettimex64 = igb_ptp_gettimex_82576;
adapter->ptp_caps.settime64 = igb_ptp_settime_82576;
adapter->ptp_caps.enable = igb_ptp_feature_enable;
adapter->cc.read = igb_ptp_read_82576;
@@ -1143,7 +1188,7 @@ void igb_ptp_init(struct igb_adapter *adapter)
adapter->ptp_caps.pps = 0;
adapter->ptp_caps.adjfine = igb_ptp_adjfine_82580;
adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576;
-   adapter->ptp_caps.gettime64 = igb_ptp_gettime_82576;
+   adapter->ptp_caps.gettimex64 = igb_ptp_gettimex_82580;
adapter->ptp_caps.settime64 = igb_ptp_settime_82576;
adapter->ptp_caps.enable = igb_ptp_feature_ena

[PATCH net-next 4/8] ptp: deprecate gettime64() in favor of gettimex64()

2018-11-09 Thread Miroslav Lichvar
When a driver provides gettimex64(), use it in the PTP_SYS_OFFSET ioctl
and POSIX clock's gettime() instead of gettime64(). Drivers should
provide only one of the functions.

Cc: Richard Cochran 
Cc: Jacob Keller 
Signed-off-by: Miroslav Lichvar 
---
 drivers/ptp/ptp_chardev.c| 5 -
 drivers/ptp/ptp_clock.c  | 5 -
 include/linux/ptp_clock_kernel.h | 2 ++
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
index aad0d36cf5c0..797fab33bb98 100644
--- a/drivers/ptp/ptp_chardev.c
+++ b/drivers/ptp/ptp_chardev.c
@@ -260,7 +260,10 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, 
unsigned long arg)
pct->sec = ts.tv_sec;
pct->nsec = ts.tv_nsec;
pct++;
-   err = ptp->info->gettime64(ptp->info, );
+   if (ops->gettimex64)
+   err = ops->gettimex64(ops, , NULL);
+   else
+   err = ops->gettime64(ops, );
if (err)
goto out;
pct->sec = ts.tv_sec;
diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index 5419a89d300e..40fda23e4b05 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -117,7 +117,10 @@ static int ptp_clock_gettime(struct posix_clock *pc, 
struct timespec64 *tp)
struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
int err;
 
-   err = ptp->info->gettime64(ptp->info, tp);
+   if (ptp->info->gettimex64)
+   err = ptp->info->gettimex64(ptp->info, tp, NULL);
+   else
+   err = ptp->info->gettime64(ptp->info, tp);
return err;
 }
 
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index a1ec0448e341..7121bbe76979 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -82,6 +82,8 @@ struct ptp_system_timestamp {
  *parameter delta: Desired change in nanoseconds.
  *
  * @gettime64:  Reads the current time from the hardware clock.
+ *  This method is deprecated.  New drivers should implement
+ *  the @gettimex64 method instead.
  *  parameter ts: Holds the result.
  *
  * @gettimex64:  Reads the current time from the hardware clock and optionally
-- 
2.17.2



[PATCH net-next 3/8] ptp: add PTP_SYS_OFFSET_EXTENDED ioctl

2018-11-09 Thread Miroslav Lichvar
The PTP_SYS_OFFSET ioctl, which can be used to measure the offset
between a PHC and the system clock, includes the total time that the
driver needs to read the PHC timestamp.

This typically involves reading of multiple PCI registers (sometimes in
multiple iterations) and the register that contains the lowest bits of
the timestamp is not read in the middle between the two readings of the
system clock. This asymmetry causes the measured offset to have a
significant error.

Introduce a new ioctl, driver function, and helper functions, which
allow the reading of the lowest register to be isolated from the other
readings in order to reduce the asymmetry. The ioctl returns three
timestamps for each measurement:
- system time right before reading the lowest bits of the PHC timestamp
- PHC time
- system time immediately after reading the lowest bits of the PHC
  timestamp

Cc: Richard Cochran 
Cc: Jacob Keller 
Cc: Marcelo Tosatti 
Signed-off-by: Miroslav Lichvar 
---
 drivers/ptp/ptp_chardev.c| 33 
 include/linux/ptp_clock_kernel.h | 31 ++
 include/uapi/linux/ptp_clock.h   | 12 
 3 files changed, 76 insertions(+)

diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
index 3c681bed5703..aad0d36cf5c0 100644
--- a/drivers/ptp/ptp_chardev.c
+++ b/drivers/ptp/ptp_chardev.c
@@ -122,10 +122,12 @@ int ptp_open(struct posix_clock *pc, fmode_t fmode)
 long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 {
struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
+   struct ptp_sys_offset_extended *extoff = NULL;
struct ptp_sys_offset_precise precise_offset;
struct system_device_crosststamp xtstamp;
struct ptp_clock_info *ops = ptp->info;
struct ptp_sys_offset *sysoff = NULL;
+   struct ptp_system_timestamp sts;
struct ptp_clock_request req;
struct ptp_clock_caps caps;
struct ptp_clock_time *pct;
@@ -211,6 +213,36 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, 
unsigned long arg)
err = -EFAULT;
break;
 
+   case PTP_SYS_OFFSET_EXTENDED:
+   if (!ptp->info->gettimex64) {
+   err = -EOPNOTSUPP;
+   break;
+   }
+   extoff = memdup_user((void __user *)arg, sizeof(*extoff));
+   if (IS_ERR(extoff)) {
+   err = PTR_ERR(extoff);
+   extoff = NULL;
+   break;
+   }
+   if (extoff->n_samples > PTP_MAX_SAMPLES) {
+   err = -EINVAL;
+   break;
+   }
+   for (i = 0; i < extoff->n_samples; i++) {
+   err = ptp->info->gettimex64(ptp->info, , );
+   if (err)
+   goto out;
+   extoff->ts[i][0].sec = sts.pre_ts.tv_sec;
+   extoff->ts[i][0].nsec = sts.pre_ts.tv_nsec;
+   extoff->ts[i][1].sec = ts.tv_sec;
+   extoff->ts[i][1].nsec = ts.tv_nsec;
+   extoff->ts[i][2].sec = sts.post_ts.tv_sec;
+   extoff->ts[i][2].nsec = sts.post_ts.tv_nsec;
+   }
+   if (copy_to_user((void __user *)arg, extoff, sizeof(*extoff)))
+   err = -EFAULT;
+   break;
+
case PTP_SYS_OFFSET:
sysoff = memdup_user((void __user *)arg, sizeof(*sysoff));
if (IS_ERR(sysoff)) {
@@ -284,6 +316,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, 
unsigned long arg)
}
 
 out:
+   kfree(extoff);
kfree(sysoff);
return err;
 }
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index 51349d124ee5..a1ec0448e341 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -39,6 +39,15 @@ struct ptp_clock_request {
 };
 
 struct system_device_crosststamp;
+
+/**
+ * struct ptp_system_timestamp - system time corresponding to a PHC timestamp
+ */
+struct ptp_system_timestamp {
+   struct timespec64 pre_ts;
+   struct timespec64 post_ts;
+};
+
 /**
  * struct ptp_clock_info - decribes a PTP hardware clock
  *
@@ -75,6 +84,14 @@ struct system_device_crosststamp;
  * @gettime64:  Reads the current time from the hardware clock.
  *  parameter ts: Holds the result.
  *
+ * @gettimex64:  Reads the current time from the hardware clock and optionally
+ *   also the system clock.
+ *   parameter ts: Holds the PHC timestamp.
+ *   parameter sts: If not NULL, it holds a pair of timestamps from
+ *   the system clock. The first reading is made right before
+ *   reading the lowest bits of the PHC timestamp and the seco

[PATCH net-next 0/8] More accurate PHC<->system clock synchronization

2018-11-09 Thread Miroslav Lichvar
RFC->v1:
- added new patches
- separated PHC timestamp from ptp_system_timestamp
- fixed memory leak in PTP_SYS_OFFSET_EXTENDED
- changed PTP_SYS_OFFSET_EXTENDED to work with array of arrays
- fixed PTP_SYS_OFFSET_EXTENDED to break correctly from loop
- fixed timecounter updates in drivers
- split gettimex in igb driver
- fixed ptp_read_* functions to be available without
  CONFIG_PTP_1588_CLOCK

This series enables a more accurate synchronization between PTP hardware
clocks and the system clock.

The first two patches are minor cleanup/bug fixes.

The third patch adds an extended version of the PTP_SYS_OFFSET ioctl,
which returns three timestamps for each measurement. The idea is to
shorten the interval between the system timestamps to contain just the
reading of the lowest register of the PHC in order to reduce the error
in the measured offset and get a smaller upper bound on the maximum
error.

The fourth patch deprecates the original gettime function.

The remaining patches update the gettime function in order to support
the new ioctl in the e1000e, igb, ixgbe, and tg3 drivers.

Tests with few different NICs in different machines show that:
- with an I219 (e1000e) the measured delay was reduced from 2500 to 1300
  ns and the error in the measured offset, when compared to the cross
  timestamping supported by the driver, was reduced by a factor of 5
- with an I210 (igb) the delay was reduced from 5100 to 1700 ns
- with an I350 (igb) the delay was reduced from 2300 to 750 ns
- with an X550 (ixgbe) the delay was reduced from 1950 to 650 ns
- with a BCM5720 (tg3) the delay was reduced from 2400 to 1200 ns


Miroslav Lichvar (8):
  ptp: reorder declarations in ptp_ioctl()
  ptp: check gettime64 return code in PTP_SYS_OFFSET ioctl
  ptp: add PTP_SYS_OFFSET_EXTENDED ioctl
  ptp: deprecate gettime64() in favor of gettimex64()
  e1000e: extend PTP gettime function to read system clock
  igb: extend PTP gettime function to read system clock
  ixgbe: extend PTP gettime function to read system clock
  tg3: extend PTP gettime function to read system clock

 drivers/net/ethernet/broadcom/tg3.c  | 19 --
 drivers/net/ethernet/intel/e1000e/e1000.h|  3 +
 drivers/net/ethernet/intel/e1000e/netdev.c   | 42 ++---
 drivers/net/ethernet/intel/e1000e/ptp.c  | 16 +++--
 drivers/net/ethernet/intel/igb/igb_ptp.c | 65 +---
 drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c | 54 +---
 drivers/ptp/ptp_chardev.c| 55 ++---
 drivers/ptp/ptp_clock.c  |  5 +-
 include/linux/ptp_clock_kernel.h | 33 ++
 include/uapi/linux/ptp_clock.h   | 12 
 10 files changed, 253 insertions(+), 51 deletions(-)

-- 
2.17.2



[PATCH net-next 2/8] ptp: check gettime64 return code in PTP_SYS_OFFSET ioctl

2018-11-09 Thread Miroslav Lichvar
If a gettime64 call fails, return the error and avoid copying data back
to user.

Cc: Richard Cochran 
Cc: Jacob Keller 
Signed-off-by: Miroslav Lichvar 
---
 drivers/ptp/ptp_chardev.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
index b54b8158ff8a..3c681bed5703 100644
--- a/drivers/ptp/ptp_chardev.c
+++ b/drivers/ptp/ptp_chardev.c
@@ -228,7 +228,9 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, 
unsigned long arg)
pct->sec = ts.tv_sec;
pct->nsec = ts.tv_nsec;
pct++;
-   ptp->info->gettime64(ptp->info, );
+   err = ptp->info->gettime64(ptp->info, );
+   if (err)
+   goto out;
pct->sec = ts.tv_sec;
pct->nsec = ts.tv_nsec;
pct++;
@@ -281,6 +283,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, 
unsigned long arg)
break;
}
 
+out:
kfree(sysoff);
return err;
 }
-- 
2.17.2



[PATCH net-next 1/8] ptp: reorder declarations in ptp_ioctl()

2018-11-09 Thread Miroslav Lichvar
Reorder declarations of variables as reversed Christmas tree.

Cc: Richard Cochran 
Suggested-by: Richard Cochran 
Signed-off-by: Miroslav Lichvar 
---
 drivers/ptp/ptp_chardev.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
index 2012551d93e0..b54b8158ff8a 100644
--- a/drivers/ptp/ptp_chardev.c
+++ b/drivers/ptp/ptp_chardev.c
@@ -121,18 +121,18 @@ int ptp_open(struct posix_clock *pc, fmode_t fmode)
 
 long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 {
-   struct ptp_clock_caps caps;
-   struct ptp_clock_request req;
-   struct ptp_sys_offset *sysoff = NULL;
-   struct ptp_sys_offset_precise precise_offset;
-   struct ptp_pin_desc pd;
struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
+   struct ptp_sys_offset_precise precise_offset;
+   struct system_device_crosststamp xtstamp;
struct ptp_clock_info *ops = ptp->info;
+   struct ptp_sys_offset *sysoff = NULL;
+   struct ptp_clock_request req;
+   struct ptp_clock_caps caps;
struct ptp_clock_time *pct;
+   unsigned int i, pin_index;
+   struct ptp_pin_desc pd;
struct timespec64 ts;
-   struct system_device_crosststamp xtstamp;
int enable, err = 0;
-   unsigned int i, pin_index;
 
switch (cmd) {
 
-- 
2.17.2



Re: [net 1/8] igb: shorten maximum PHC timecounter update interval

2018-11-01 Thread Miroslav Lichvar
On Wed, Oct 31, 2018 at 12:42:47PM -0700, Jeff Kirsher wrote:
> From: Miroslav Lichvar 
> 
> The timecounter needs to be updated at least once per ~550 seconds in
> order to avoid a 40-bit SYSTIM timestamp to be misinterpreted as an old
> timestamp.
> 
> Since commit 500462a9d ("timers: Switch to a non-cascading wheel"),
> scheduling of delayed work seems to be less accurate and a requested
> delay of 540 seconds may actually be longer than 550 seconds. Shorten
> the delay to 480 seconds to be sure the timecounter is updated in time.

It looks like this is the v1 of the patch. There was a v2 I sent on
Oct 26, which made the interval even shorter. I can send a separate
patch for that change.

-- 
Miroslav Lichvar


Re: [RFC PATCH 4/4] ixgbe: add support for extended PHC gettime

2018-10-31 Thread Miroslav Lichvar
On Wed, Oct 31, 2018 at 07:40:03AM -0700, Richard Cochran wrote:
> On Mon, Oct 29, 2018 at 02:31:09PM +0100, Miroslav Lichvar wrote:
> > I think there could be a flag in ptp_system_timestamp, or a parameter
> > of gettimex64(), which would enable/disable reading of the system
> > clock.
> 
> I'm not a fan of functions that change their behavior based on flags
> in their input parameters.

How about separating the PHC timestamp from the ptp_system_timestamp
structure and use NULL to indicate we don't want to read the system
clock? A gettimex64(ptp, ts, NULL) call would be equal to
gettime64(ptp, ts).

struct ptp_system_timestamp {
struct timespec64 pre_ts;
struct timespec64 post_ts;
};

int (*gettimex64)(struct ptp_clock_info *ptp, struct timespec64 *ts,
  struct ptp_system_timestamp *sts);

-- 
Miroslav Lichvar


Re: [RFC PATCH 3/4] igb: add support for extended PHC gettime

2018-10-31 Thread Miroslav Lichvar
On Tue, Oct 30, 2018 at 07:29:16PM -0700, Richard Cochran wrote:
> On Fri, Oct 26, 2018 at 06:27:41PM +0200, Miroslav Lichvar wrote:
> > +static int igb_ptp_gettimex(struct ptp_clock_info *ptp,
> > +   struct ptp_system_timestamp *sts)
> > +{
> > +   struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
> > +  ptp_caps);
> > +   struct e1000_hw *hw = >hw;
> > +   unsigned long flags;
> > +   u32 lo, hi;
> > +   u64 ns;
> > +
> > +   spin_lock_irqsave(>tmreg_lock, flags);
> > +
> > +   /* 82576 doesn't have SYSTIMR */
> > +   if (igb->hw.mac.type == e1000_82576) {
> 
> Instead of if/then/else, can't you follow the pattern of providing
> different function flavors ...

I can. I was just trying to minimize the amount of triplicated code.
In the next version I'll add a patch to deprecate the old gettime
functions, as Jacob suggested, and replace them with the extended
versions, so the amount of code will not change that much.

Thanks,

-- 
Miroslav Lichvar


Re: [RFC PATCH 4/4] ixgbe: add support for extended PHC gettime

2018-10-29 Thread Miroslav Lichvar
On Fri, Oct 26, 2018 at 04:54:57PM +, Keller, Jacob E wrote:
> > -Original Message-
> > From: Miroslav Lichvar [mailto:mlich...@redhat.com]
> > Sent: Friday, October 26, 2018 9:28 AM
> > To: netdev@vger.kernel.org
> > Cc: intel-wired-...@lists.osuosl.org; Richard Cochran 
> > ;
> > Keller, Jacob E ; Miroslav Lichvar 
> > 
> > Subject: [RFC PATCH 4/4] ixgbe: add support for extended PHC gettime
> > 
> > Cc: Richard Cochran 
> > Cc: Jacob Keller 
> > Signed-off-by: Miroslav Lichvar 

> What about replacing gettime64 with:
> 
> static int ixgbe_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 
> *ts)
> {
> struct ptp_system_timestamp sts
> 
> ixgbe_ptp_gettimex(ptp, );
> *ts = sts.phc_ts
> }

That will work, but it will be slower. With HPET as a clocksource
there would be few microseconds of an extra (symmetric) delay and the
applications would have to assume a larger maximum error.

I think there could be a flag in ptp_system_timestamp, or a parameter
of gettimex64(), which would enable/disable reading of the system
clock.

> Actually, could that even just be provided by the PTP core if gettime64 isn't 
> implemented? This way new drivers only have to implement the new interface, 
> and userspace will just get the old behavior if they use the old call?

Good idea.

Thanks,

-- 
Miroslav Lichvar


Re: [Intel-wired-lan] [RFC PATCH 1/4] ptp: add PTP_SYS_OFFSET_EXTENDED ioctl

2018-10-29 Thread Miroslav Lichvar
On Fri, Oct 26, 2018 at 03:16:47PM -0700, Vinicius Costa Gomes wrote:
> > +   case PTP_SYS_OFFSET_EXTENDED:
> > +   if (!ptp->info->gettimex64) {
> > +   err = -EOPNOTSUPP;
> > +   break;
> > +   }
> > +   sysoff_extended = memdup_user((void __user *)arg,
> > + sizeof(*sysoff_extended));
> 
> Looks like you forgot to free 'sysoff_extended', no? 

Oh, I did. Thanks for catching that. I'll fix it in the next version.

-- 
Miroslav Lichvar


[PATCH v2 net] igb: shorten maximum PHC timecounter update interval

2018-10-26 Thread Miroslav Lichvar
The timecounter needs to be updated at least once per ~550 seconds in
order to avoid a 40-bit SYSTIM timestamp to be misinterpreted as an old
timestamp.

Since commit 500462a9de65 ("timers: Switch to a non-cascading wheel"),
scheduling of delayed work seems to be less accurate and a requested
delay of 540 seconds may actually be longer than 550 seconds. Also, the
PHC may be adjusted to run up to 6% faster than real time and the system
clock up to 10% slower. Shorten the delay to 360 seconds to be sure the
timecounter is updated in time.

This fixes an issue with HW timestamps on 82580/I350/I354 being off by
~1100 seconds for few seconds every ~9 minutes.

Cc: Jacob Keller 
Cc: Richard Cochran 
Cc: Thomas Gleixner 
Signed-off-by: Miroslav Lichvar 
---
 drivers/net/ethernet/intel/igb/igb_ptp.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c 
b/drivers/net/ethernet/intel/igb/igb_ptp.c
index 9f4d700e09df..2b95dc9c7a6a 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -51,9 +51,17 @@
  *
  * The 40 bit 82580 SYSTIM overflows every
  *   2^40 * 10^-9 /  60  = 18.3 minutes.
+ *
+ * SYSTIM is converted to real time using a timecounter. As
+ * timecounter_cyc2time() allows old timestamps, the timecounter needs
+ * to be updated at least once per half of the SYSTIM interval.
+ * Scheduling of delayed work is not very accurate, and also the NIC
+ * clock can be adjusted to run up to 6% faster and the system clock
+ * up to 10% slower, so we aim for 6 minutes to be sure the actual
+ * interval in the NIC time is shorter than 9.16 minutes.
  */
 
-#define IGB_SYSTIM_OVERFLOW_PERIOD (HZ * 60 * 9)
+#define IGB_SYSTIM_OVERFLOW_PERIOD (HZ * 60 * 6)
 #define IGB_PTP_TX_TIMEOUT (HZ * 15)
 #define INCPERIOD_82576BIT(E1000_TIMINCA_16NS_SHIFT)
 #define INCVALUE_82576_MASKGENMASK(E1000_TIMINCA_16NS_SHIFT - 1, 0)
-- 
2.17.2



[RFC PATCH 4/4] ixgbe: add support for extended PHC gettime

2018-10-26 Thread Miroslav Lichvar
Cc: Richard Cochran 
Cc: Jacob Keller 
Signed-off-by: Miroslav Lichvar 
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c | 57 
 1 file changed, 57 insertions(+)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
index b3e0d8bb5cbd..d31e8d3effc7 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
@@ -466,6 +466,60 @@ static int ixgbe_ptp_gettime(struct ptp_clock_info *ptp, 
struct timespec64 *ts)
return 0;
 }
 
+/**
+ * ixgbe_ptp_gettimex
+ * @ptp: the ptp clock structure
+ * @sts: structure to hold the system time before reading the PHC,
+ * the PHC timestamp, and system time after reading the PHC
+ *
+ * read the timecounter and return the correct value on ns,
+ * after converting it into a struct timespec.
+ */
+static int ixgbe_ptp_gettimex(struct ptp_clock_info *ptp,
+ struct ptp_system_timestamp *sts)
+{
+   struct ixgbe_adapter *adapter =
+   container_of(ptp, struct ixgbe_adapter, ptp_caps);
+   struct ixgbe_hw *hw = >hw;
+   unsigned long flags;
+   struct timespec64 ts;
+   u64 ns, stamp;
+
+   spin_lock_irqsave(>tmreg_lock, flags);
+
+   switch (adapter->hw.mac.type) {
+   case ixgbe_mac_X550:
+   case ixgbe_mac_X550EM_x:
+   case ixgbe_mac_x550em_a:
+   /* Upper 32 bits represent billions of cycles, lower 32 bits
+* represent cycles. However, we use timespec64_to_ns for the
+* correct math even though the units haven't been corrected
+* yet.
+*/
+   ptp_read_system_prets(sts);
+   IXGBE_READ_REG(hw, IXGBE_SYSTIMR);
+   ptp_read_system_postts(sts);
+   ts.tv_nsec = IXGBE_READ_REG(hw, IXGBE_SYSTIML);
+   ts.tv_sec = IXGBE_READ_REG(hw, IXGBE_SYSTIMH);
+   stamp = timespec64_to_ns();
+   break;
+   default:
+   ptp_read_system_prets(sts);
+   stamp = IXGBE_READ_REG(hw, IXGBE_SYSTIML);
+   ptp_read_system_postts(sts);
+   stamp |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIMH) << 32;
+   break;
+   }
+
+   ns = timecounter_cyc2time(>hw_tc, stamp);
+
+   spin_unlock_irqrestore(>tmreg_lock, flags);
+
+   sts->phc_ts = ns_to_timespec64(ns);
+
+   return 0;
+}
+
 /**
  * ixgbe_ptp_settime
  * @ptp: the ptp clock structure
@@ -1217,6 +1271,7 @@ static long ixgbe_ptp_create_clock(struct ixgbe_adapter 
*adapter)
adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_82599;
adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime;
adapter->ptp_caps.gettime64 = ixgbe_ptp_gettime;
+   adapter->ptp_caps.gettimex64 = ixgbe_ptp_gettimex;
adapter->ptp_caps.settime64 = ixgbe_ptp_settime;
adapter->ptp_caps.enable = ixgbe_ptp_feature_enable;
adapter->ptp_setup_sdp = ixgbe_ptp_setup_sdp_x540;
@@ -1234,6 +1289,7 @@ static long ixgbe_ptp_create_clock(struct ixgbe_adapter 
*adapter)
adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_82599;
adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime;
adapter->ptp_caps.gettime64 = ixgbe_ptp_gettime;
+   adapter->ptp_caps.gettimex64 = ixgbe_ptp_gettimex;
adapter->ptp_caps.settime64 = ixgbe_ptp_settime;
adapter->ptp_caps.enable = ixgbe_ptp_feature_enable;
break;
@@ -1250,6 +1306,7 @@ static long ixgbe_ptp_create_clock(struct ixgbe_adapter 
*adapter)
adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_X550;
adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime;
adapter->ptp_caps.gettime64 = ixgbe_ptp_gettime;
+   adapter->ptp_caps.gettimex64 = ixgbe_ptp_gettimex;
adapter->ptp_caps.settime64 = ixgbe_ptp_settime;
adapter->ptp_caps.enable = ixgbe_ptp_feature_enable;
adapter->ptp_setup_sdp = NULL;
-- 
2.17.2



[RFC PATCH 2/4] e1000e: add support for extended PHC gettime

2018-10-26 Thread Miroslav Lichvar
Cc: Richard Cochran 
Cc: Jacob Keller 
Signed-off-by: Miroslav Lichvar 
---
 drivers/net/ethernet/intel/e1000e/e1000.h  |  3 ++
 drivers/net/ethernet/intel/e1000e/netdev.c | 48 +-
 drivers/net/ethernet/intel/e1000e/ptp.c| 21 ++
 3 files changed, 62 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h 
b/drivers/net/ethernet/intel/e1000e/e1000.h
index c760dc72c520..be13227f1697 100644
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -505,6 +505,9 @@ extern const struct e1000_info e1000_es2_info;
 void e1000e_ptp_init(struct e1000_adapter *adapter);
 void e1000e_ptp_remove(struct e1000_adapter *adapter);
 
+u64 e1000e_read_systim(struct e1000_adapter *adapter,
+  struct ptp_system_timestamp *sts);
+
 static inline s32 e1000_phy_hw_reset(struct e1000_hw *hw)
 {
return hw->phy.ops.reset(hw);
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index 3ba0c90e7055..3bad1a1f36c3 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -4319,13 +4319,16 @@ void e1000e_reinit_locked(struct e1000_adapter *adapter)
 /**
  * e1000e_sanitize_systim - sanitize raw cycle counter reads
  * @hw: pointer to the HW structure
- * @systim: time value read, sanitized and returned
+ * @systim: PHC time value read, sanitized and returned
+ * @sts: structure which will contain system time before and after reading
+ * SYSTIML, may be NULL
  *
  * Errata for 82574/82583 possible bad bits read from SYSTIMH/L:
  * check to see that the time is incrementing at a reasonable
  * rate and is a multiple of incvalue.
  **/
-static u64 e1000e_sanitize_systim(struct e1000_hw *hw, u64 systim)
+static u64 e1000e_sanitize_systim(struct e1000_hw *hw, u64 systim,
+ struct ptp_system_timestamp *sts)
 {
u64 time_delta, rem, temp;
u64 systim_next;
@@ -4335,7 +4338,11 @@ static u64 e1000e_sanitize_systim(struct e1000_hw *hw, 
u64 systim)
incvalue = er32(TIMINCA) & E1000_TIMINCA_INCVALUE_MASK;
for (i = 0; i < E1000_MAX_82574_SYSTIM_REREADS; i++) {
/* latch SYSTIMH on read of SYSTIML */
+   if (sts)
+   ptp_read_system_prets(sts);
systim_next = (u64)er32(SYSTIML);
+   if (sts)
+   ptp_read_system_postts(sts);
systim_next |= (u64)er32(SYSTIMH) << 32;
 
time_delta = systim_next - systim;
@@ -4353,15 +4360,16 @@ static u64 e1000e_sanitize_systim(struct e1000_hw *hw, 
u64 systim)
 }
 
 /**
- * e1000e_cyclecounter_read - read raw cycle counter (used by time counter)
- * @cc: cyclecounter structure
+ * e1000e_read_systim - read SYSTIM register
+ * @adapter: board private structure
+ * @sts: structure which will contain system time before and after reading
+ * SYSTIML, may be NULL
  **/
-static u64 e1000e_cyclecounter_read(const struct cyclecounter *cc)
+u64 e1000e_read_systim(struct e1000_adapter *adapter,
+  struct ptp_system_timestamp *sts)
 {
-   struct e1000_adapter *adapter = container_of(cc, struct e1000_adapter,
-cc);
struct e1000_hw *hw = >hw;
-   u32 systimel, systimeh;
+   u32 systimel, systimel_2, systimeh;
u64 systim;
/* SYSTIMH latching upon SYSTIML read does not work well.
 * This means that if SYSTIML overflows after we read it but before
@@ -4369,11 +4377,19 @@ static u64 e1000e_cyclecounter_read(const struct 
cyclecounter *cc)
 * will experience a huge non linear increment in the systime value
 * to fix that we test for overflow and if true, we re-read systime.
 */
+   if (sts)
+   ptp_read_system_prets(sts);
systimel = er32(SYSTIML);
+   if (sts)
+   ptp_read_system_postts(sts);
systimeh = er32(SYSTIMH);
/* Is systimel is so large that overflow is possible? */
if (systimel >= (u32)0x - E1000_TIMINCA_INCVALUE_MASK) {
-   u32 systimel_2 = er32(SYSTIML);
+   if (sts)
+   ptp_read_system_prets(sts);
+   systimel_2 = er32(SYSTIML);
+   if (sts)
+   ptp_read_system_postts(sts);
if (systimel > systimel_2) {
/* There was an overflow, read again SYSTIMH, and use
 * systimel_2
@@ -4386,11 +4402,23 @@ static u64 e1000e_cyclecounter_read(const struct 
cyclecounter *cc)
systim |= (u64)systimeh << 32;
 
if (adapter->flags2 & FLAG2_CHECK_SYSTIM_OVERFLOW)
-   systim = e1000e_sanitize_systim(hw, systim);
+   systim = e1000e_sanitize_systim(hw, systim, sts);
 
return systim;
 }
 
+/**
+

[RFC PATCH 1/4] ptp: add PTP_SYS_OFFSET_EXTENDED ioctl

2018-10-26 Thread Miroslav Lichvar
The PTP_SYS_OFFSET ioctl, which can be used to measure the offset
between a PHC and the system clock, includes the total time that the
gettime64 function of a driver needs to read the PHC timestamp.

This typically involves reading of multiple PCI registers (sometimes in
multiple iterations) and the register that contains the lowest bits of
the timestamp is not read in the middle between the two readings of the
system clock. This asymmetry causes the measured offset to have a
significant error.

Introduce a new ioctl, driver function, and helper functions, which
allow the reading of the lowest register to be isolated from the other
readings in order to reduce the asymmetry. The ioctl and driver function
return three timestamps for each measurement:
- system time right before reading the lowest bits of the PHC timestamp
- PHC time
- system time immediately after reading the lowest bits of the PHC
  timestamp

Cc: Richard Cochran 
Cc: Jacob Keller 
Signed-off-by: Miroslav Lichvar 
---
 drivers/ptp/ptp_chardev.c| 39 
 include/linux/ptp_clock_kernel.h | 26 +
 include/uapi/linux/ptp_clock.h   | 12 ++
 3 files changed, 77 insertions(+)

diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
index 2012551d93e0..1a04c437fd4f 100644
--- a/drivers/ptp/ptp_chardev.c
+++ b/drivers/ptp/ptp_chardev.c
@@ -124,11 +124,13 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, 
unsigned long arg)
struct ptp_clock_caps caps;
struct ptp_clock_request req;
struct ptp_sys_offset *sysoff = NULL;
+   struct ptp_sys_offset_extended *sysoff_extended = NULL;
struct ptp_sys_offset_precise precise_offset;
struct ptp_pin_desc pd;
struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
struct ptp_clock_info *ops = ptp->info;
struct ptp_clock_time *pct;
+   struct ptp_system_timestamp sts;
struct timespec64 ts;
struct system_device_crosststamp xtstamp;
int enable, err = 0;
@@ -211,6 +213,43 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, 
unsigned long arg)
err = -EFAULT;
break;
 
+   case PTP_SYS_OFFSET_EXTENDED:
+   if (!ptp->info->gettimex64) {
+   err = -EOPNOTSUPP;
+   break;
+   }
+   sysoff_extended = memdup_user((void __user *)arg,
+ sizeof(*sysoff_extended));
+   if (IS_ERR(sysoff_extended)) {
+   err = PTR_ERR(sysoff_extended);
+   sysoff = NULL;
+   break;
+   }
+   if (sysoff_extended->n_samples > PTP_MAX_SAMPLES) {
+   err = -EINVAL;
+   break;
+   }
+
+   pct = _extended->ts[0];
+   for (i = 0; i < sysoff_extended->n_samples; i++) {
+   err = ptp->info->gettimex64(ptp->info, );
+   if (err)
+   break;
+   pct->sec = sts.sys_ts1.tv_sec;
+   pct->nsec = sts.sys_ts1.tv_nsec;
+   pct++;
+   pct->sec = sts.phc_ts.tv_sec;
+   pct->nsec = sts.phc_ts.tv_nsec;
+   pct++;
+   pct->sec = sts.sys_ts2.tv_sec;
+   pct->nsec = sts.sys_ts2.tv_nsec;
+   pct++;
+   }
+   if (copy_to_user((void __user *)arg, sysoff_extended,
+sizeof(*sysoff_extended)))
+   err = -EFAULT;
+   break;
+
case PTP_SYS_OFFSET:
sysoff = memdup_user((void __user *)arg, sizeof(*sysoff));
if (IS_ERR(sysoff)) {
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index 51349d124ee5..79321d929925 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -39,6 +39,13 @@ struct ptp_clock_request {
 };
 
 struct system_device_crosststamp;
+
+struct ptp_system_timestamp {
+   struct timespec64 sys_ts1;
+   struct timespec64 phc_ts;
+   struct timespec64 sys_ts2;
+};
+
 /**
  * struct ptp_clock_info - decribes a PTP hardware clock
  *
@@ -75,6 +82,13 @@ struct system_device_crosststamp;
  * @gettime64:  Reads the current time from the hardware clock.
  *  parameter ts: Holds the result.
  *
+ * @gettimex64:  Reads the current time from the system clock, hardware clock,
+ *   and system clock again.
+ *   parameter sts:  The structure contains system time right
+ *   before reading the lowest bits of the PHC timestamp, the PHC
+ *   timestamp itself, and system time immediately after reading
+ *

[RFC PATCH 3/4] igb: add support for extended PHC gettime

2018-10-26 Thread Miroslav Lichvar
Cc: Richard Cochran 
Cc: Jacob Keller 
Signed-off-by: Miroslav Lichvar 
---
 drivers/net/ethernet/intel/igb/igb_ptp.c | 43 
 1 file changed, 43 insertions(+)

diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c 
b/drivers/net/ethernet/intel/igb/igb_ptp.c
index 29ced6b74d36..6294d18b5a60 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -310,6 +310,46 @@ static int igb_ptp_gettime_i210(struct ptp_clock_info *ptp,
return 0;
 }
 
+static int igb_ptp_gettimex(struct ptp_clock_info *ptp,
+   struct ptp_system_timestamp *sts)
+{
+   struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+  ptp_caps);
+   struct e1000_hw *hw = >hw;
+   unsigned long flags;
+   u32 lo, hi;
+   u64 ns;
+
+   spin_lock_irqsave(>tmreg_lock, flags);
+
+   /* 82576 doesn't have SYSTIMR */
+   if (igb->hw.mac.type == e1000_82576) {
+   ptp_read_system_prets(sts);
+   lo = rd32(E1000_SYSTIML);
+   ptp_read_system_postts(sts);
+   hi = rd32(E1000_SYSTIMH);
+   } else {
+   ptp_read_system_prets(sts);
+   rd32(E1000_SYSTIMR);
+   ptp_read_system_postts(sts);
+   lo = rd32(E1000_SYSTIML);
+   hi = rd32(E1000_SYSTIMH);
+   }
+
+   /* SYSTIM on I210/I211 counts time in seconds and nanoseconds */
+   if (igb->hw.mac.type == e1000_i210 || igb->hw.mac.type == e1000_i211) {
+   sts->phc_ts.tv_sec = hi;
+   sts->phc_ts.tv_nsec = lo;
+   } else {
+   ns = timecounter_cyc2time(>tc, ((u64)hi << 32) | lo);
+   sts->phc_ts = ns_to_timespec64(ns);
+   }
+
+   spin_unlock_irqrestore(>tmreg_lock, flags);
+
+   return 0;
+}
+
 static int igb_ptp_settime_82576(struct ptp_clock_info *ptp,
 const struct timespec64 *ts)
 {
@@ -1125,6 +1165,7 @@ void igb_ptp_init(struct igb_adapter *adapter)
adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82576;
adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576;
adapter->ptp_caps.gettime64 = igb_ptp_gettime_82576;
+   adapter->ptp_caps.gettimex64 = igb_ptp_gettimex;
adapter->ptp_caps.settime64 = igb_ptp_settime_82576;
adapter->ptp_caps.enable = igb_ptp_feature_enable;
adapter->cc.read = igb_ptp_read_82576;
@@ -1144,6 +1185,7 @@ void igb_ptp_init(struct igb_adapter *adapter)
adapter->ptp_caps.adjfine = igb_ptp_adjfine_82580;
adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576;
adapter->ptp_caps.gettime64 = igb_ptp_gettime_82576;
+   adapter->ptp_caps.gettimex64 = igb_ptp_gettimex;
adapter->ptp_caps.settime64 = igb_ptp_settime_82576;
adapter->ptp_caps.enable = igb_ptp_feature_enable;
adapter->cc.read = igb_ptp_read_82580;
@@ -1172,6 +1214,7 @@ void igb_ptp_init(struct igb_adapter *adapter)
adapter->ptp_caps.adjfine = igb_ptp_adjfine_82580;
adapter->ptp_caps.adjtime = igb_ptp_adjtime_i210;
adapter->ptp_caps.gettime64 = igb_ptp_gettime_i210;
+   adapter->ptp_caps.gettimex64 = igb_ptp_gettimex;
adapter->ptp_caps.settime64 = igb_ptp_settime_i210;
adapter->ptp_caps.enable = igb_ptp_feature_enable_i210;
adapter->ptp_caps.verify = igb_ptp_verify_pin;
-- 
2.17.2



[RFC PATCH 0/4] More accurate PHC<->system clock synchronization

2018-10-26 Thread Miroslav Lichvar
This series adds support for a more accurate synchronization between a
PTP hardware clock and the system clock.

The first patch adds an extended version of the PTP_SYS_OFFSET ioctl,
which returns three timestamps for each measurement. The idea is to
shorten the interval between the system timestamps to contain just the
reading of the lowest register of the PHC in order to reduce the error
in the measured offset and give a better bound on the maximum error.

The other patches add support for the new ioctl to the e1000e, igb,
and ixgbe driver. Tests with few different NICs in different machines
(and PCIe slots) show that:
- with an I219 (e1000e) the measured delay improved from 2500 to 1300 ns
  and the error in the measured offset, when compared to cross
  timestamping, was reduced by a factor of 5
- with an I210 (igb) the delay improved from 5100 to 1700 ns
- with an I350 (igb) the delay improved from 2300 to 750 ns
- with an X550 (ixgbe) the delay improved from 1950 to 650 ns

There is some duplication of code in the igb and ixgbe drivers, which I
don't like very much, but I thought it's better than extending and
wrapping the existing functions like in the e1000e driver. Also, mixing
SYSTIM and "system time" in the code will probably be confusing.

I wasn't able to find a better name for the ioctl, the structures, and
the driver function. If anyone has suggestions, please let me know.

Miroslav Lichvar (4):
  ptp: add PTP_SYS_OFFSET_EXTENDED ioctl
  e1000e: add support for extended PHC gettime
  igb: add support for extended PHC gettime
  ixgbe: add support for extended PHC gettime

 drivers/net/ethernet/intel/e1000e/e1000.h|  3 ++
 drivers/net/ethernet/intel/e1000e/netdev.c   | 48 +
 drivers/net/ethernet/intel/e1000e/ptp.c  | 21 
 drivers/net/ethernet/intel/igb/igb_ptp.c | 43 +++
 drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c | 57 
 drivers/ptp/ptp_chardev.c| 39 ++
 include/linux/ptp_clock_kernel.h | 26 +
 include/uapi/linux/ptp_clock.h   | 12 +
 8 files changed, 239 insertions(+), 10 deletions(-)

-- 
2.17.2



Re: [PATCH] igb: shorten maximum PHC timecounter update interval

2018-10-26 Thread Miroslav Lichvar
On Fri, Oct 12, 2018 at 07:05:30AM -0700, Richard Cochran wrote:
> On Fri, Oct 12, 2018 at 01:13:39PM +0200, Miroslav Lichvar wrote:
> > Since commit 500462a9d ("timers: Switch to a non-cascading wheel"),
> > scheduling of delayed work seems to be less accurate and a requested
> > delay of 540 seconds may actually be longer than 550 seconds. Shorten
> > the delay to 480 seconds to be sure the timecounter is updated in time.
> 
> Good catch.  This timer wheel change will affect other, similar
> drivers.  Guess I'll go through and adjust their timeouts, too.

I just realized that we need to fit there also any frequency
adjustments of the PHC and system clock. The PHC can be set to run up
to 6% faster and the system clock can be slowed down by up to 10%.

Those 480 seconds in the igb driver is not short enough for that.
Should I fix and resend this patch, or send a new one?

Other drivers may have a similar problem.

-- 
Miroslav Lichvar


Re: Improving accuracy of PHC readings

2018-10-23 Thread Miroslav Lichvar
On Fri, Oct 19, 2018 at 04:52:13PM +, Keller, Jacob E wrote:
> > This should significantly improve the accuracy of the synchronization,
> > reduce the uncertainty in the readings to less than a half or third,
> > and also reduce the jitter as there are fewer register reads sensitive
> > to the PCIe delay.
> > 
> > What do you think?
> > 
> 
> Nice! I think this is good. I'd love to see some data to back it up, but it 
> makes sense to me.

I tried a quick hack with an X550 and I219. The delay dropped from
about 2940 ns to 1040 ns on the first port of the X550, from 1920 ns
to 660 ns on the second port of the X550, and from 2500 ns to 1300 ns
on the I219.

The I219 supports the SYS_OFFSET_PRECISE ioctl (cross timestamping),
which we can use for comparison. The difference between the offsets
calculated using the two ioctls was about 500-600 ns before and now it
is about -50--150 ns.

I was not able to find any information on how accurate cross
timestamping on this HW is actually supposed to be, so I'm wondering
which of the two is closer to the truth.

Here is an output from phc2sys with the I219:

Before:
phc offset   -59 s2 freq +40 delay   2527
phc offset19 s2 freq+101 delay   2526
phc offset   -23 s2 freq +64 delay   2522
phc offset46 s2 freq+126 delay   2535
phc offset   -32 s2 freq +62 delay   2530
phc offset   -10 s2 freq +75 delay   2526
phc offset   102 s2 freq+184 delay   2523

After:
phc offset17 s2 freq+105 delay   1298
phc offset47 s2 freq+140 delay   1299
phc offset   -42 s2 freq +65 delay   1293
phc offset-6 s2 freq +88 delay   1299
phc offset34 s2 freq+127 delay   1300
phc offset   -14 s2 freq +89 delay   1301
phc offset   -86 s2 freq +13 delay   1296
phc offset   -21 s2 freq +52 delay   1298

-- 
Miroslav Lichvar


Re: Improving accuracy of PHC readings

2018-10-23 Thread Miroslav Lichvar
On Mon, Oct 22, 2018 at 03:48:02PM -0700, Richard Cochran wrote:
> On Fri, Oct 19, 2018 at 11:51:37AM +0200, Miroslav Lichvar wrote:
> > The extra timestamp doesn't fit the API of the PTP_SYS_OFFSET ioctl,
> > so it would need to shift the timestamp it returns by the missing
> > intervals (assuming the frequency offset between the PHC and system
> > clock is small), or a new ioctl could be introduced that would return
> > all timestamps in an array looking like this:
> > 
> > [sys, phc, sys, sys, phc, sys, ...]
> 
> How about a new ioctl with number of trials as input and single offset
> as output?

The difference between the system timestamps is important as it gives
an upper bound on the error in the offset, so I think the output
should be at least a pair of offset and delay.

The question is from which triplet should be the offset and delay
calculated. The one with the minimum delay is a good choice, but it's
not the only option. For instance, an average or median from all
triplets that have delay smaller than the minimum + 30 nanoseconds may
give a more stable offset.

This is not that different from an NTP client filtering measurements
made over network. I'm not sure if we should try to solve it in the
kernel or drivers. My preference would be to give the user space all
the data and process it there.

If the increased size of the array is an issue, we can reduce the
maximum number of readings.

Does that make sense?

-- 
Miroslav Lichvar


Improving accuracy of PHC readings

2018-10-19 Thread Miroslav Lichvar
I think there might be a way how we could significantly improve
accuracy of synchronization between the system clock and a PTP
hardware clock, at least with some network drivers.

Currently, the PTP_SYS_OFFSET ioctl reads the system clock, reads the
PHC using the gettime64 function of the driver, and reads the system
clock again. The ioctl can repeat this to provide multiple readings to
the user space.

phc2sys (or another program synchronizing the system clock to the PHC)
assumes the PHC timestamps were captured in the middle between the two
closest system clock timestamps.

The trouble is that gettime64 typically reads multiple (2-3) registers
and the timestamp is latched on the first one, so the assumption about
middle point is wrong. There is an asymmetry, even if the delays on
the PCIe bus are perfectly symmetric.

A solution to this would be a new driver function that wraps the
latching register read with readings of the system clock and return
three timestamps instead of one. For example:

ktime_get_real_ts64(_ts1);
IXGBE_READ_REG(hw, IXGBE_SYSTIMR);
ktime_get_real_ts64(_ts2);
phc_ts.tv_nsec = IXGBE_READ_REG(hw, IXGBE_SYSTIML);
phc_ts.tv_sec = IXGBE_READ_REG(hw, IXGBE_SYSTIMH);
 
The extra timestamp doesn't fit the API of the PTP_SYS_OFFSET ioctl,
so it would need to shift the timestamp it returns by the missing
intervals (assuming the frequency offset between the PHC and system
clock is small), or a new ioctl could be introduced that would return
all timestamps in an array looking like this:

[sys, phc, sys, sys, phc, sys, ...]

This should significantly improve the accuracy of the synchronization,
reduce the uncertainty in the readings to less than a half or third,
and also reduce the jitter as there are fewer register reads sensitive
to the PCIe delay.

What do you think?

-- 
Miroslav Lichvar


[PATCH] igb: shorten maximum PHC timecounter update interval

2018-10-12 Thread Miroslav Lichvar
The timecounter needs to be updated at least once per ~550 seconds in
order to avoid a 40-bit SYSTIM timestamp to be misinterpreted as an old
timestamp.

Since commit 500462a9d ("timers: Switch to a non-cascading wheel"),
scheduling of delayed work seems to be less accurate and a requested
delay of 540 seconds may actually be longer than 550 seconds. Shorten
the delay to 480 seconds to be sure the timecounter is updated in time.

This fixes an issue with HW timestamps on 82580/I350/I354 being off by
~1100 seconds for few seconds every ~9 minutes.

Cc: Jacob Keller 
Cc: Richard Cochran 
Cc: Thomas Gleixner 
Signed-off-by: Miroslav Lichvar 
---
 drivers/net/ethernet/intel/igb/igb_ptp.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c 
b/drivers/net/ethernet/intel/igb/igb_ptp.c
index 9f4d700e09df..29ced6b74d36 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -51,9 +51,15 @@
  *
  * The 40 bit 82580 SYSTIM overflows every
  *   2^40 * 10^-9 /  60  = 18.3 minutes.
+ *
+ * SYSTIM is converted to real time using a timecounter. As
+ * timecounter_cyc2time() allows old timestamps, the timecounter
+ * needs to be updated at least once per half of the SYSTIM interval.
+ * Scheduling of delayed work is not very accurate, so we aim for 8
+ * minutes to be sure the actual interval is shorter than 9.16 minutes.
  */
 
-#define IGB_SYSTIM_OVERFLOW_PERIOD (HZ * 60 * 9)
+#define IGB_SYSTIM_OVERFLOW_PERIOD (HZ * 60 * 8)
 #define IGB_PTP_TX_TIMEOUT (HZ * 15)
 #define INCPERIOD_82576BIT(E1000_TIMINCA_16NS_SHIFT)
 #define INCVALUE_82576_MASKGENMASK(E1000_TIMINCA_16NS_SHIFT - 1, 0)
-- 
2.17.1



Re: [RFC v3 net-next 13/18] net/sched: Introduce the TBS Qdisc

2018-04-12 Thread Miroslav Lichvar
On Thu, Apr 12, 2018 at 08:03:49AM -0700, Richard Cochran wrote:
> On Wed, Apr 11, 2018 at 04:38:44PM -0700, Jesus Sanchez-Palencia wrote:
> > Just breaking this down a bit, yes, TAI is the network time base, and the 
> > NICs
> > PTP clock use that because PTP is (commonly) based on TAI. After the PHCs 
> > have
> > been synchronized over the network (e.g. with ptp4l), my understanding is 
> > that
> > if applications want to use the clockid_t CLOCK_TAI as a network clock 
> > reference
> > it's required that something (i.e. phc2sys) is synchronizing the PHCs and 
> > the
> > system clock, and also that something calls adjtime to apply the TAI vs UTC
> > offset to CLOCK_TAI.
> 
> Yes.  I haven't seen any distro that sets the TAI-UTC offset after
> boot, nor are there any user space tools for this.  The kernel is
> ready, though.

FWIW, the default NTP configuration in Fedora sets the kernel TAI-UTC
offset.

> > I was thinking about the full offload use-cases, thus when no scheduling is
> > happening inside the qdiscs. Applications could just read the time from the 
> > PHC
> > clocks directly without having to rely on any of the above. On this case,
> > userspace would use DYNAMIC_CLOCK just to flag that this is the case, but I 
> > must
> > admit it's not clear to me how common of a use-case that is, or even if it 
> > makes
> > sense.
> 
> 1588 allows only two timescales, TAI and ARB-itrary.  Although it
> doesn't make too much sense to use ARB, still people will do strange
> things.  Probably some people use UTC.  I am not advocating supporting
> alternate timescales, just pointing out the possibility.

There is also the possibility that the NIC clock is not synchronized
to anything. For synchronization of the system clock it's easier to
leave it free running and only track its phase/frequency offset to
allow conversion between the PHC and system time.

-- 
Miroslav Lichvar


Socket error queue with timestamping and SOF_TIMESTAMPING_OPT_CMSG

2018-04-03 Thread Miroslav Lichvar
I came across an interesting issue with error messages in sockets with
enabled timestamping using the SOF_TIMESTAMPING_OPT_CMSG option. When
the socket is connected and there is an error (e.g. due to destination
unreachable ICMP), select() indicates there is an exception on the
socket, but recvmsg() reading from the error queue returns with EAGAIN
and the application gets stuck in an infinite loop.

Some observations:
- it happens on both AF_INET and AF_INET6 SOCK_DGRAM sockets
- enabling the IP_RECVERR option avoids getting EAGAIN
- using recvmmsg() instead of recvmsg() avoids getting EAGAIN
  (that is why I didn't notice it earlier)
- disabling TX timestamping doesn't prevent the socket from having an
  exception
- reading from the non-error queue stops the loop

Is this a bug?

It looks to me like SOF_TIMESTAMPING_OPT_CMSG implicitly, but only
partially, enables IP_RECVERR. Are applications required to use
IP_RECVERR in this case? My expectation was that without IP_RECVERR
the error queue would only have messages with transmit timestamps, and
nothing would change with reporting of real errors. Also, from the
documentation I had an impression that SOF_TIMESTAMPING_OPT_CMSG is a
no-op on AF_INET6 sockets.

-- 
Miroslav Lichvar


Re: [RFC v3 net-next 08/18] net: SO_TXTIME: Add clockid and drop_if_late params

2018-03-08 Thread Miroslav Lichvar
On Wed, Mar 07, 2018 at 02:45:45PM -0800, Eric Dumazet wrote:
> On Wed, 2018-03-07 at 13:52 -0800, Jesus Sanchez-Palencia wrote:
> > > Do we really need 32 bits for a clockid_t ?
> > 
> > There is a 2 bytes hole just after tc_index, so a u16 clockid would
> > fit
> > perfectly without increasing the skbuffs size / cachelines any
> > further.

> Not convincing really :/
> 
> Next big feature needing one bit in sk_buff will add it, and add a
> 63bit hole.

Would it be possible to put the clockid in skb_shared_info? If that's
technically difficult or does not make sense, I'm ok with the clockid
being a socket option.

If a packet is sent immediately after changing the clockid via
setsockopt(), will it be still guaranteed that the packet is
restricted by the new id?

> Why do we _really_ need dynamic clocks being supported in core
> networking stack, other than 'that is needed to send 2 packets per
> second with precise departure time and arbitrary user defined clocks,
> so lets do that, and do not care of the other 10,000,000 packets we
> receive/send per second'

Well, I'd not expect it to be a common use case, but a public NTP
server could be sending millions of packets per second in traffic
peaks (typically at *:00:00) over multiple interfaces.

-- 
Miroslav Lichvar


Re: [Intel-wired-lan] [RFC v2 net-next 01/10] net: Add a new socket option for a future transmit time.

2018-02-13 Thread Miroslav Lichvar
On Mon, Feb 12, 2018 at 02:39:06PM -0800, Jesus Sanchez-Palencia wrote:
> On 01/18/2018 12:42 AM, Miroslav Lichvar wrote:
> > Please keep in mind that the PHCs and the system clock don't have to
> > be synchronized to each other. If I understand the rest of the series
> > correctly, there is an assumption that the PHCs are keeping time in
> > TAI and CLOCK_TAI can be used as a fallback.
> 
> Just to double-check, imagine that I've configured the qdisc for
> SW best-effort and with clockid CLOCK_REALTIME. When it receives a
> packet with the clockid of a /dev/ptpX, the qdisc should just drop that
> packet, right?

Yes, I think it should drop it. The kernel does not know the offset
between the two clocks (they don't even have to be synchronized), so
it cannot convert a PHC-based TX time to the system time.

-- 
Miroslav Lichvar


Re: [Intel-wired-lan] [RFC v2 net-next 01/10] net: Add a new socket option for a future transmit time.

2018-02-01 Thread Miroslav Lichvar
On Wed, Jan 31, 2018 at 04:49:36PM -0800, Jesus Sanchez-Palencia wrote:
> On 01/18/2018 09:13 AM, Richard Cochran wrote:
> > Right, the clockid_t should be passed in through the CMSG along with
> > the time.
> 
> While implementing this today it crossed my mind that why don't we have the
> clockid_t set per socket (e.g. as an argument to SO_TXTIME) instead of per 
> packet?

I suspect that might have an impact on the performance. Even if the
application doesn't use sendmmsg(), it would possibly have to call
setsockopt() before each sendmsg() to change the clockid_t, right?

If clockid_t could be set per packet, a special value could be used
to allow sending on interfaces that don't support it.

> The only use-case that we could think of that would be 'blocked' was using
> sendmmsg() to send a packet to different interfaces with a single syscall, but
> I'm not sure how common that is.

The SO_TXTIME option will make sendmmsg() useful in applications where
it wasn't before. For instance, an NTP server will be able to batch
multiple responses as their transmit timestamps can be set accurately
in advance and it's no longer necessary to send the responses as soon
as they are assembled.

I think it would be nice the sendmmsg() calls didn't have to be split
by clockid_t.

-- 
Miroslav Lichvar


Re: [Intel-wired-lan] [RFC v2 net-next 01/10] net: Add a new socket option for a future transmit time.

2018-01-25 Thread Miroslav Lichvar
On Fri, Jan 19, 2018 at 06:09:15PM -0800, Richard Cochran wrote:
> On Fri, Jan 19, 2018 at 04:15:46PM -0500, Willem de Bruijn wrote:
> > > +   if (cmsg->cmsg_len != CMSG_LEN(sizeof(ktime_t)))
> > > +   return -EINVAL;
> > 
> > I don't see any existing reference to ktime_t in include/uapi. Just use a 
> > s64?
> 
> Agreed.  I didn't see the point of switching to ktime, either.

Do I understand it correctly that no other interface is using
nanoseconds since 1970? We probably don't have to worry about year
2262 yet, but wouldn't it be better to make it consistent with the
timestamping API using timespec? Or is it just better to avoid the
64/32-bit mess of time_t?

-- 
Miroslav Lichvar


Re: [Intel-wired-lan] [RFC v2 net-next 01/10] net: Add a new socket option for a future transmit time.

2018-01-18 Thread Miroslav Lichvar
On Wed, Jan 17, 2018 at 03:06:12PM -0800, Jesus Sanchez-Palencia wrote:
> From: Richard Cochran <rcoch...@linutronix.de>
> 
> This patch introduces SO_TXTIME.  User space enables this option in
> order to pass a desired future transmit time in a CMSG when calling
> sendmsg(2).
> 
> A new field is added to struct sockcm_cookie, and the tstamp from
> skbuffs will be used later on.

In the discussion about the v1 patchset, there was a question if the
cmsg should include a clockid_t. Without that, how can an application
prevent the packet from being sent using an incorrect clock, e.g.
the system clock when it expects it to be a PHC, or a different PHC
when the socket is not bound to a specific interface?

At least in some applications it would be preferred to not sent a
packet at all instead of sending it at a wrong time.

Please keep in mind that the PHCs and the system clock don't have to
be synchronized to each other. If I understand the rest of the series
correctly, there is an assumption that the PHCs are keeping time in
TAI and CLOCK_TAI can be used as a fallback.

-- 
Miroslav Lichvar


Re: [PATCH RFC V1 net-next 0/6] Time based packet transmission

2017-09-19 Thread Miroslav Lichvar
On Mon, Sep 18, 2017 at 09:41:15AM +0200, Richard Cochran wrote:
> This series is an early RFC that introduces a new socket option
> allowing time based transmission of packets.  This option will be
> useful in implementing various real time protocols over Ethernet,
> including but not limited to P802.1Qbv, which is currently finding
> its way into 802.1Q.

If I understand it correctly, this also allows us to make a PTP/NTP
"one-step" clock with HW that doesn't support it directly.

> * Open questions about SO_TXTIME semantics
> 
>   - What should the kernel do if the dialed Tx time is in the past?
> Should the packet be sent ASAP, or should we throw an error?

Dropping the packet with an error would make more sense to me.

>   - What should the timescale be for the dialed Tx time?  Should the
> kernel select UTC when using the SW Qdisc and the HW time
> otherwise?  Or should the socket option include a clockid_t?

I think for applications that don't (want to) bind their socket to a
specific interface it would be useful if the cmsg specified clockid_t
or maybe if_index. If the packet would be sent using a different
PHC/interface, it should be dropped.

>   | | plain preempt_rt | so_txtime | txtime @ 250 us |
>   |-+--+---+-|
>   | min:|+1.940800e+04 | +4.72e+02 |   +4.72e+02 |
>   | max:|+7.556000e+04 | +5.68e+02 |   +5.76e+02 |
>   | pk-pk:  |+5.615200e+04 | +9.60e+01 |   +1.04e+02 |
>   | mean:   |+3.292776e+04 | +5.072274e+02 |   +5.073602e+02 |
>   | stddev: |+6.514709e+03 | +1.310849e+01 |   +1.507144e+01 |
>   | count:  |   60 |60 | 240 |
> 
>   Using so_txtime, the peak to peak jitter is about 100 nanoseconds,

Nice!

-- 
Miroslav Lichvar


[PATCH ethtool 2/2] ethtool: add support for HWTSTAMP_FILTER_NTP_ALL

2017-05-23 Thread Miroslav Lichvar
Add HWTSTAMP_FILTER_NTP_ALL to the list of hardware receive
filters which can be printed by ethtool -T.

CC: Richard Cochran <richardcoch...@gmail.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 ethtool.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ethtool.c b/ethtool.c
index ad18704..85b320c 100644
--- a/ethtool.c
+++ b/ethtool.c
@@ -1589,7 +1589,7 @@ static char *tx_type_labels[N_TX_TYPES] = {
"one-step-sync (HWTSTAMP_TX_ONESTEP_SYNC)",
 };
 
-#define N_RX_FILTERS (HWTSTAMP_FILTER_PTP_V2_DELAY_REQ + 1)
+#define N_RX_FILTERS (HWTSTAMP_FILTER_NTP_ALL + 1)
 
 static char *rx_filter_labels[N_RX_FILTERS] = {
"none  (HWTSTAMP_FILTER_NONE)",
@@ -1607,6 +1607,7 @@ static char *rx_filter_labels[N_RX_FILTERS] = {
"ptpv2-event   (HWTSTAMP_FILTER_PTP_V2_EVENT)",
"ptpv2-sync(HWTSTAMP_FILTER_PTP_V2_SYNC)",
"ptpv2-delay-req   (HWTSTAMP_FILTER_PTP_V2_DELAY_REQ)",
+   "ntp-all   (HWTSTAMP_FILTER_NTP_ALL)",
 };
 
 static int dump_tsinfo(const struct ethtool_ts_info *info)
-- 
2.9.3



[PATCH ethtool 0/2] Add new receive timestamping filter

2017-05-23 Thread Miroslav Lichvar
These two patches update ethtool to be able to print the new NTP
timestamping filter in the ethtool -T command.

Miroslav Lichvar (2):
  net_tstamp.h: sync with net-next
  ethtool: add support for HWTSTAMP_FILTER_NTP_ALL

 ethtool.c |  3 ++-
 net_tstamp-copy.h | 52 +---
 2 files changed, 43 insertions(+), 12 deletions(-)

-- 
2.9.3



[PATCH ethtool 1/2] net_tstamp.h: sync with net-next

2017-05-23 Thread Miroslav Lichvar
This covers kernel changes up to:

commit b50a5c70ffa4fd6b6da324ab54c84adf48fb17d9
Author: Miroslav Lichvar <mlich...@redhat.com>
Date:   Fri May 19 17:52:40 2017 +0200

net: allow simultaneous SW and HW transmit timestamping

Add SOF_TIMESTAMPING_OPT_TX_SWHW option to allow an outgoing packet to
be looped to the socket's error queue with a software timestamp even
when a hardware transmit timestamp is expected to be provided by the
driver.

Applications using this option will receive two separate messages from
the error queue, one with a software timestamp and the other with a
hardware timestamp. As the hardware timestamp is saved to the shared skb
info, which may happen before the first message with software timestamp
is received by the application, the hardware timestamp is copied to the
SCM_TIMESTAMPING control message only when the skb has no software
timestamp or it is an incoming packet.

While changing sw_tx_timestamp(), inline it in skb_tx_timestamp() as
there are no other users.

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
    Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
Acked-by: Willem de Bruijn <will...@google.com>
Signed-off-by: David S. Miller <da...@davemloft.net>

CC: Richard Cochran <richardcoch...@gmail.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 net_tstamp-copy.h | 52 +---
 1 file changed, 41 insertions(+), 11 deletions(-)

diff --git a/net_tstamp-copy.h b/net_tstamp-copy.h
index ae5df12..3d421d9 100644
--- a/net_tstamp-copy.h
+++ b/net_tstamp-copy.h
@@ -9,6 +9,7 @@
 #ifndef _NET_TIMESTAMPING_H
 #define _NET_TIMESTAMPING_H
 
+#include 
 #include/* for SO_TIMESTAMPING */
 
 /* SO_TIMESTAMPING gets an integer bit field comprised of these values */
@@ -20,23 +21,42 @@ enum {
SOF_TIMESTAMPING_SOFTWARE = (1<<4),
SOF_TIMESTAMPING_SYS_HARDWARE = (1<<5),
SOF_TIMESTAMPING_RAW_HARDWARE = (1<<6),
-   SOF_TIMESTAMPING_MASK =
-   (SOF_TIMESTAMPING_RAW_HARDWARE - 1) |
-   SOF_TIMESTAMPING_RAW_HARDWARE
+   SOF_TIMESTAMPING_OPT_ID = (1<<7),
+   SOF_TIMESTAMPING_TX_SCHED = (1<<8),
+   SOF_TIMESTAMPING_TX_ACK = (1<<9),
+   SOF_TIMESTAMPING_OPT_CMSG = (1<<10),
+   SOF_TIMESTAMPING_OPT_TSONLY = (1<<11),
+   SOF_TIMESTAMPING_OPT_STATS = (1<<12),
+   SOF_TIMESTAMPING_OPT_PKTINFO = (1<<13),
+   SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14),
+
+   SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_TX_SWHW,
+   SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
+SOF_TIMESTAMPING_LAST
 };
 
+/*
+ * SO_TIMESTAMPING flags are either for recording a packet timestamp or for
+ * reporting the timestamp to user space.
+ * Recording flags can be set both via socket options and control messages.
+ */
+#define SOF_TIMESTAMPING_TX_RECORD_MASK(SOF_TIMESTAMPING_TX_HARDWARE | 
\
+SOF_TIMESTAMPING_TX_SOFTWARE | \
+SOF_TIMESTAMPING_TX_SCHED | \
+SOF_TIMESTAMPING_TX_ACK)
+
 /**
- * struct hwtstamp_config - %SIOCSHWTSTAMP parameter
+ * struct hwtstamp_config - %SIOCGHWTSTAMP and %SIOCSHWTSTAMP parameter
  *
- * @flags: no flags defined right now, must be zero
+ * @flags: no flags defined right now, must be zero for %SIOCSHWTSTAMP
  * @tx_type:   one of HWTSTAMP_TX_*
- * @rx_type:   one of one of HWTSTAMP_FILTER_*
+ * @rx_filter: one of HWTSTAMP_FILTER_*
  *
- * %SIOCSHWTSTAMP expects a  ifreq with a ifr_data pointer to
- * this structure. dev_ifsioc() in the kernel takes care of the
- * translation between 32 bit userspace and 64 bit kernel. The
- * structure is intentionally chosen so that it has the same layout on
- * 32 and 64 bit systems, don't break this!
+ * %SIOCGHWTSTAMP and %SIOCSHWTSTAMP expect a  ifreq with a
+ * ifr_data pointer to this structure.  For %SIOCSHWTSTAMP, if the
+ * driver or hardware does not support the requested @rx_filter value,
+ * the driver may use a more general filter mode.  In this case
+ * @rx_filter will indicate the actual mode on return.
  */
 struct hwtstamp_config {
int flags;
@@ -108,6 +128,16 @@ enum hwtstamp_rx_filters {
HWTSTAMP_FILTER_PTP_V2_SYNC,
/* PTP v2/802.AS1, any layer, Delay_req packet */
HWTSTAMP_FILTER_PTP_V2_DELAY_REQ,
+
+   /* NTP, UDP, all versions and packet modes */
+   HWTSTAMP_FILTER_NTP_ALL,
+};
+
+/* SCM_TIMESTAMPING_PKTINFO control message */
+struct scm_ts_pktinfo {
+   __u32 if_index;
+   __u32 pkt_length;
+   __u32 reserved[2];
 };
 
 #endif /* _NET_TIMESTAMPING_H */
-- 
2.9.3



[PATCH v6 net-next 6/7] net: allow simultaneous SW and HW transmit timestamping

2017-05-19 Thread Miroslav Lichvar
Add SOF_TIMESTAMPING_OPT_TX_SWHW option to allow an outgoing packet to
be looped to the socket's error queue with a software timestamp even
when a hardware transmit timestamp is expected to be provided by the
driver.

Applications using this option will receive two separate messages from
the error queue, one with a software timestamp and the other with a
hardware timestamp. As the hardware timestamp is saved to the shared skb
info, which may happen before the first message with software timestamp
is received by the application, the hardware timestamp is copied to the
SCM_TIMESTAMPING control message only when the skb has no software
timestamp or it is an incoming packet.

While changing sw_tx_timestamp(), inline it in skb_tx_timestamp() as
there are no other users.

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 Documentation/networking/timestamping.txt |  8 
 include/linux/skbuff.h| 10 ++
 include/uapi/linux/net_tstamp.h   |  3 ++-
 net/core/skbuff.c |  4 
 net/socket.c  | 20 ++--
 5 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/Documentation/networking/timestamping.txt 
b/Documentation/networking/timestamping.txt
index 50eb0e5..196ba17 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -203,6 +203,14 @@ SOF_TIMESTAMPING_OPT_PKTINFO:
   enabled and the driver is using NAPI. The struct contains also two
   other fields, but they are reserved and undefined.
 
+SOF_TIMESTAMPING_OPT_TX_SWHW:
+
+  Request both hardware and software timestamps for outgoing packets
+  when SOF_TIMESTAMPING_TX_HARDWARE and SOF_TIMESTAMPING_TX_SOFTWARE
+  are enabled at the same time. If both timestamps are generated,
+  two separate messages will be looped to the socket's error queue,
+  each containing just one timestamp.
+
 New applications are encouraged to pass SOF_TIMESTAMPING_OPT_ID to
 disambiguate timestamps and SOF_TIMESTAMPING_OPT_TSONLY to operate
 regardless of the setting of sysctl net.core.tstamp_allow_data.
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 1f8028c..3b2e284 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3254,13 +3254,6 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
 void skb_tstamp_tx(struct sk_buff *orig_skb,
   struct skb_shared_hwtstamps *hwtstamps);
 
-static inline void sw_tx_timestamp(struct sk_buff *skb)
-{
-   if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP &&
-   !(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS))
-   skb_tstamp_tx(skb, NULL);
-}
-
 /**
  * skb_tx_timestamp() - Driver hook for transmit timestamping
  *
@@ -3276,7 +3269,8 @@ static inline void sw_tx_timestamp(struct sk_buff *skb)
 static inline void skb_tx_timestamp(struct sk_buff *skb)
 {
skb_clone_tx_timestamp(skb);
-   sw_tx_timestamp(skb);
+   if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP)
+   skb_tstamp_tx(skb, NULL);
 }
 
 /**
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index dee74d3..3d421d9 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -28,8 +28,9 @@ enum {
SOF_TIMESTAMPING_OPT_TSONLY = (1<<11),
SOF_TIMESTAMPING_OPT_STATS = (1<<12),
SOF_TIMESTAMPING_OPT_PKTINFO = (1<<13),
+   SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14),
 
-   SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_PKTINFO,
+   SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_TX_SWHW,
SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
 SOF_TIMESTAMPING_LAST
 };
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 346d3e8..68c02df 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3875,6 +3875,10 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
if (!sk)
return;
 
+   if (!hwtstamps && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
+   skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)
+   return;
+
tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
if (!skb_may_tx_timestamp(sk, tsonly))
return;
diff --git a/net/socket.c b/net/socket.c
index 67db7d8..cb355a7 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -662,6 +662,19 @@ static bool skb_is_err_queue(const struct sk_buff *skb)
return skb->pkt_type == PACKET_OUTGOING;
 }
 
+/* On transmit, software and hardware timestamps are returned independently.
+ * As the two skb clones share the hardware timestamp, which may be updated
+ * before the software timestamp is received, a hardware TX timestamp may be
+ * returned only if there is no software TX timestamp. 

[PATCH v6 net-next 7/7] net: ethernet: update drivers to make both SW and HW TX timestamps

2017-05-19 Thread Miroslav Lichvar
Some drivers were calling the skb_tx_timestamp() function only when
a hardware timestamp was not requested. Now that applications can use
the SOF_TIMESTAMPING_OPT_TX_SWHW option to request both software and
hardware timestamps, the drivers need to be modified to unconditionally
call skb_tx_timestamp().

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c  | 3 +--
 drivers/net/ethernet/intel/e1000e/netdev.c| 4 ++--
 drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c   | 3 +--
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 ++
 4 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c 
b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 89b21d7..5a2ad9c 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1391,8 +1391,7 @@ static void xgbe_prep_tx_tstamp(struct xgbe_prv_data 
*pdata,
spin_unlock_irqrestore(>tstamp_lock, flags);
}
 
-   if (!XGMAC_GET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES, PTP))
-   skb_tx_timestamp(skb);
+   skb_tx_timestamp(skb);
 }
 
 static void xgbe_prep_vlan(struct sk_buff *skb, struct xgbe_packet_data 
*packet)
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index 0ff9295..6ed3bc4 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -5868,10 +5868,10 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
adapter->tx_hwtstamp_skb = skb_get(skb);
adapter->tx_hwtstamp_start = jiffies;
schedule_work(>tx_hwtstamp_work);
-   } else {
-   skb_tx_timestamp(skb);
}
 
+   skb_tx_timestamp(skb);
+
netdev_sent_queue(netdev, skb->len);
e1000_tx_queue(tx_ring, tx_flags, count);
/* Make sure there is space in the ring for the next send. */
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c 
b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
index 1e59435..89831ad 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
@@ -1418,8 +1418,7 @@ static netdev_tx_t sxgbe_xmit(struct sk_buff *skb, struct 
net_device *dev)
priv->hw->desc->tx_enable_tstamp(first_desc);
}
 
-   if (!tqueue->hwts_tx_en)
-   skb_tx_timestamp(skb);
+   skb_tx_timestamp(skb);
 
priv->hw->dma->enable_dma_transmission(priv->ioaddr, txq_index);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c 
b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index cce862b..27c12e7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2880,8 +2880,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, 
struct net_device *dev)
priv->xstats.tx_set_ic_bit++;
}
 
-   if (!priv->hwts_tx_en)
-   skb_tx_timestamp(skb);
+   skb_tx_timestamp(skb);
 
if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
 priv->hwts_tx_en)) {
@@ -3084,8 +3083,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, 
struct net_device *dev)
priv->xstats.tx_set_ic_bit++;
}
 
-   if (!priv->hwts_tx_en)
-   skb_tx_timestamp(skb);
+   skb_tx_timestamp(skb);
 
/* Ready to fill the first descriptor and set the OWN bit w/o any
 * problems because all the descriptors are actually ready to be
-- 
2.9.3



[PATCH v6 net-next 5/7] net: fix documentation of struct scm_timestamping

2017-05-19 Thread Miroslav Lichvar
The scm_timestamping struct may return multiple non-zero fields, e.g.
when both software and hardware RX timestamping is enabled, or when the
SO_TIMESTAMP(NS) option is combined with SCM_TIMESTAMPING and a false
software timestamp is generated in the recvmsg() call in order to always
return a SCM_TIMESTAMP(NS) message.

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 Documentation/networking/timestamping.txt | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/timestamping.txt 
b/Documentation/networking/timestamping.txt
index ce11e3a..50eb0e5 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -322,7 +322,7 @@ struct scm_timestamping {
 };
 
 The structure can return up to three timestamps. This is a legacy
-feature. Only one field is non-zero at any time. Most timestamps
+feature. At least one field is non-zero at any time. Most timestamps
 are passed in ts[0]. Hardware timestamps are passed in ts[2].
 
 ts[1] used to hold hardware timestamps converted to system time.
@@ -331,6 +331,12 @@ a HW PTP clock source, to allow time conversion in 
userspace and
 optionally synchronize system time with a userspace PTP stack such
 as linuxptp. For the PTP clock API, see Documentation/ptp/ptp.txt.
 
+Note that if the SO_TIMESTAMP or SO_TIMESTAMPNS option is enabled
+together with SO_TIMESTAMPING using SOF_TIMESTAMPING_SOFTWARE, a false
+software timestamp will be generated in the recvmsg() call and passed
+in ts[0] when a real software timestamp is missing. This happens also
+on hardware transmit timestamps.
+
 2.1.1 Transmit timestamps with MSG_ERRQUEUE
 
 For transmit timestamps the outgoing packet is looped back to the
-- 
2.9.3



[PATCH v6 net-next 4/7] net: add new control message for incoming HW-timestamped packets

2017-05-19 Thread Miroslav Lichvar
Add SOF_TIMESTAMPING_OPT_PKTINFO option to request a new control message
for incoming packets with hardware timestamps. It contains the index of
the real interface which received the packet and the length of the
packet at layer 2.

The index is useful with bonding, bridges and other interfaces, where
IP_PKTINFO doesn't allow applications to determine which PHC made the
timestamp. With the L2 length (and link speed) it is possible to
transpose preamble timestamps to trailer timestamps, which are used in
the NTP protocol.

While this information could be provided by two new socket options
independently from timestamping, it doesn't look like they would be very
useful. With this option any performance impact is limited to hardware
timestamping.

Use dev_get_by_napi_id() to get the device and its index. On kernels
with disabled CONFIG_NET_RX_BUSY_POLL or drivers not using NAPI, a zero
index will be returned in the control message.

CC: Richard Cochran <richardcoch...@gmail.com>
Acked-by: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 Documentation/networking/timestamping.txt | 10 ++
 include/uapi/asm-generic/socket.h |  2 ++
 include/uapi/linux/net_tstamp.h   | 11 ++-
 net/socket.c  | 27 ++-
 4 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/timestamping.txt 
b/Documentation/networking/timestamping.txt
index 96f5069..ce11e3a 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -193,6 +193,16 @@ SOF_TIMESTAMPING_OPT_STATS:
   the transmit timestamps, such as how long a certain block of
   data was limited by peer's receiver window.
 
+SOF_TIMESTAMPING_OPT_PKTINFO:
+
+  Enable the SCM_TIMESTAMPING_PKTINFO control message for incoming
+  packets with hardware timestamps. The message contains struct
+  scm_ts_pktinfo, which supplies the index of the real interface which
+  received the packet and its length at layer 2. A valid (non-zero)
+  interface index will be returned only if CONFIG_NET_RX_BUSY_POLL is
+  enabled and the driver is using NAPI. The struct contains also two
+  other fields, but they are reserved and undefined.
+
 New applications are encouraged to pass SOF_TIMESTAMPING_OPT_ID to
 disambiguate timestamps and SOF_TIMESTAMPING_OPT_TSONLY to operate
 regardless of the setting of sysctl net.core.tstamp_allow_data.
diff --git a/include/uapi/asm-generic/socket.h 
b/include/uapi/asm-generic/socket.h
index 2b48856..a5f6e81 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -100,4 +100,6 @@
 
 #define SO_COOKIE  57
 
+#define SCM_TIMESTAMPING_PKTINFO   58
+
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index 0749fb1..dee74d3 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -9,6 +9,7 @@
 #ifndef _NET_TIMESTAMPING_H
 #define _NET_TIMESTAMPING_H
 
+#include 
 #include/* for SO_TIMESTAMPING */
 
 /* SO_TIMESTAMPING gets an integer bit field comprised of these values */
@@ -26,8 +27,9 @@ enum {
SOF_TIMESTAMPING_OPT_CMSG = (1<<10),
SOF_TIMESTAMPING_OPT_TSONLY = (1<<11),
SOF_TIMESTAMPING_OPT_STATS = (1<<12),
+   SOF_TIMESTAMPING_OPT_PKTINFO = (1<<13),
 
-   SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_STATS,
+   SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_PKTINFO,
SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
 SOF_TIMESTAMPING_LAST
 };
@@ -130,4 +132,11 @@ enum hwtstamp_rx_filters {
HWTSTAMP_FILTER_NTP_ALL,
 };
 
+/* SCM_TIMESTAMPING_PKTINFO control message */
+struct scm_ts_pktinfo {
+   __u32 if_index;
+   __u32 pkt_length;
+   __u32 reserved[2];
+};
+
 #endif /* _NET_TIMESTAMPING_H */
diff --git a/net/socket.c b/net/socket.c
index c2564eb..67db7d8 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -662,6 +662,27 @@ static bool skb_is_err_queue(const struct sk_buff *skb)
return skb->pkt_type == PACKET_OUTGOING;
 }
 
+static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
+{
+   struct scm_ts_pktinfo ts_pktinfo;
+   struct net_device *orig_dev;
+
+   if (!skb_mac_header_was_set(skb))
+   return;
+
+   memset(_pktinfo, 0, sizeof(ts_pktinfo));
+
+   rcu_read_lock();
+   orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
+   if (orig_dev)
+   ts_pktinfo.if_index = orig_dev->ifindex;
+   rcu_read_unlock();
+
+   ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
+   put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
+sizeof(ts_pktinfo), _pktinfo);
+}
+
 /*
  * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
  */
@@ -699,8 +720,12 @@ void __sock_

[PATCH v6 net-next 2/7] net: ethernet: update drivers to handle HWTSTAMP_FILTER_NTP_ALL

2017-05-19 Thread Miroslav Lichvar
Include HWTSTAMP_FILTER_NTP_ALL in net_hwtstamp_validate() as a valid
filter and update drivers which can timestamp all packets, or which
explicitly list unsupported filters instead of using a default case, to
handle the filter.

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c   | 1 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c   | 1 +
 drivers/net/ethernet/cavium/liquidio/lio_main.c| 1 +
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 1 +
 drivers/net/ethernet/cavium/octeon/octeon_mgmt.c   | 1 +
 drivers/net/ethernet/intel/e1000e/netdev.c | 1 +
 drivers/net/ethernet/intel/i40e/i40e_ptp.c | 1 +
 drivers/net/ethernet/intel/igb/igb_ptp.c   | 1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c   | 1 +
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 1 +
 drivers/net/ethernet/neterion/vxge/vxge-main.c | 1 +
 drivers/net/ethernet/qlogic/qede/qede_ptp.c| 1 +
 drivers/net/ethernet/sfc/ef10.c| 1 +
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  | 1 +
 drivers/net/ethernet/ti/cpsw.c | 1 +
 drivers/net/ethernet/tile/tilegx.c | 1 +
 net/core/dev_ioctl.c   | 3 +--
 18 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c 
b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index c772420..89b21d7 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1268,6 +1268,7 @@ static int xgbe_set_hwtstamp_settings(struct 
xgbe_prv_data *pdata,
case HWTSTAMP_FILTER_NONE:
break;
 
+   case HWTSTAMP_FILTER_NTP_ALL:
case HWTSTAMP_FILTER_ALL:
XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSENALL, 1);
XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSENA, 1);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c 
b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 7414ffd..14c236e 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -15351,6 +15351,7 @@ int bnx2x_configure_ptp_filters(struct bnx2x *bp)
break;
case HWTSTAMP_FILTER_ALL:
case HWTSTAMP_FILTER_SOME:
+   case HWTSTAMP_FILTER_NTP_ALL:
bp->rx_filter = HWTSTAMP_FILTER_NONE;
break;
case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c 
b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 649f2aa..ba01242 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -3024,6 +3024,7 @@ static int hwtstamp_ioctl(struct net_device *netdev, 
struct ifreq *ifr)
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+   case HWTSTAMP_FILTER_NTP_ALL:
conf.rx_filter = HWTSTAMP_FILTER_ALL;
break;
default:
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c 
b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index d51c8d8..31d737c 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -2085,6 +2085,7 @@ static int hwtstamp_ioctl(struct net_device *netdev, 
struct ifreq *ifr)
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+   case HWTSTAMP_FILTER_NTP_ALL:
conf.rx_filter = HWTSTAMP_FILTER_ALL;
break;
default:
diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c 
b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
index a213868..2887bca 100644
--- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
+++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
@@ -755,6 +755,7 @@ static int octeon_mgmt_ioctl_hwtstamp(struct net_device 
*netdev,
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+   case HWTSTAMP_FILTER_NTP_ALL:
p->has_rx_tstamp = have_hw_timestamps;
config.rx_filter = HWTSTAMP_FILTER_ALL;
if (p->has_rx_tstamp) {
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index b367972..0ff9295 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -3680,6 +3680,7 @@ static int e1000e_config_hwtstamp(struct e1000_adapter 
*adapter,
 * Delay Request messages but not both so fall-through to
 * time stamp all packets.
 

[PATCH v6 net-next 3/7] net: add function to retrieve original skb device using NAPI ID

2017-05-19 Thread Miroslav Lichvar
Since commit b68581778cd0 ("net: Make skb->skb_iif always track
skb->dev") skbs don't have the original index of the interface which
received the packet. This information is now needed for a new control
message related to hardware timestamping.

Instead of adding a new field to skb, we can find the device by the NAPI
ID if it is available, i.e. CONFIG_NET_RX_BUSY_POLL is enabled and the
driver is using NAPI. Add dev_get_by_napi_id() and also skb_napi_id() to
hide the CONFIG_NET_RX_BUSY_POLL ifdef.

CC: Richard Cochran <richardcoch...@gmail.com>
Suggested-by: Willem de Bruijn <will...@google.com>
Acked-by: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 include/linux/netdevice.h |  1 +
 include/linux/skbuff.h|  9 +
 net/core/dev.c| 26 ++
 3 files changed, 36 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3f39d27..b6c36d5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2456,6 +2456,7 @@ static inline int dev_recursion_level(void)
 struct net_device *dev_get_by_index(struct net *net, int ifindex);
 struct net_device *__dev_get_by_index(struct net *net, int ifindex);
 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
+struct net_device *dev_get_by_napi_id(unsigned int napi_id);
 int netdev_get_name(struct net *net, char *name, int ifindex);
 int dev_restart(struct net_device *dev);
 int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7c0cb2c..1f8028c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -855,6 +855,15 @@ static inline bool skb_pkt_type_ok(u32 ptype)
return ptype <= PACKET_OTHERHOST;
 }
 
+static inline unsigned int skb_napi_id(const struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_RX_BUSY_POLL
+   return skb->napi_id;
+#else
+   return 0;
+#endif
+}
+
 void kfree_skb(struct sk_buff *skb);
 void kfree_skb_list(struct sk_buff *segs);
 void skb_tx_error(struct sk_buff *skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index acd594c..6d3c452 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -162,6 +162,7 @@ static int netif_rx_internal(struct sk_buff *skb);
 static int call_netdevice_notifiers_info(unsigned long val,
 struct net_device *dev,
 struct netdev_notifier_info *info);
+static struct napi_struct *napi_by_id(unsigned int napi_id);
 
 /*
  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
@@ -866,6 +867,31 @@ struct net_device *dev_get_by_index(struct net *net, int 
ifindex)
 EXPORT_SYMBOL(dev_get_by_index);
 
 /**
+ * dev_get_by_napi_id - find a device by napi_id
+ * @napi_id: ID of the NAPI struct
+ *
+ * Search for an interface by NAPI ID. Returns %NULL if the device
+ * is not found or a pointer to the device. The device has not had
+ * its reference counter increased so the caller must be careful
+ * about locking. The caller must hold RCU lock.
+ */
+
+struct net_device *dev_get_by_napi_id(unsigned int napi_id)
+{
+   struct napi_struct *napi;
+
+   WARN_ON_ONCE(!rcu_read_lock_held());
+
+   if (napi_id < MIN_NAPI_ID)
+   return NULL;
+
+   napi = napi_by_id(napi_id);
+
+   return napi ? napi->dev : NULL;
+}
+EXPORT_SYMBOL(dev_get_by_napi_id);
+
+/**
  * netdev_get_name - get a netdevice name, knowing its ifindex.
  * @net: network namespace
  * @name: a pointer to the buffer where the name will be stored.
-- 
2.9.3



[PATCH v6 net-next 0/7] Extend socket timestamping API

2017-05-19 Thread Miroslav Lichvar
Changes v5->v6:
- fixed skb_is_swtx_tstamp() when OPT_TX_SWHW is disabled and improved
  its description
- improved OPT_PKTINFO documentation
- improved scm_timestamping documentation

Changes v4->v5:
- fixed initialization of reserved fields in struct scm_ts_pktinfo

Changes v3->v4:
- added reserved fields to struct scm_ts_pktinfo
- replaced patch fixing false SW timestamps with a documentation fix
- updated OPT_TX_SWHW patch to handle false SW timestamps

Changes v2->v3:
- modified struct scm_ts_pktinfo to use fixed-width integer types
- added WARN_ON_ONCE for missing RCU lock in dev_get_by_napi_id()
- modified dev_get_by_napi_id() to not return dev in unexpected branch
- modified recv to return SCM_TIMESTAMPING_PKTINFO even if the interface
  index is unknown

Changes v1->v2:
- added separate patch for new NAPI functions 
- split code from __sock_recv_timestamp() for better readability
- fixed RCU locking
- fixed compiler warning (missing case in switch in first patch)
- inline sw_tx_timestamp() in its only user

Changes RFC->v1:
- reworked SOF_TIMESTAMPING_OPT_PKTINFO patch to not add new fields to
  skb shared info (net device is now looked up by napi_id), not require
  any changes in drivers, and restrict the cmsg to incoming packets
- renamed SOF_TIMESTAMPING_OPT_MULTIMSG to SOF_TIMESTAMPING_OPT_TX_SWHW
  and fixed its description
- moved struct scm_ts_pktinfo from errqueue.h to net_tstamp.h as it
  can't be received from the error queue anymore
- improved commit descriptions and removed incorrect comment

This patchset adds new options to the timestamping API that will be
useful for NTP implementations and possibly other applications.

The first patch specifies a timestamp filter for NTP packets. The second
patch updates drivers that can timestamp all packets, or need to list
the filter as unsupported. There is no attempt to add the support to the
phyter driver.

The third patch adds two helper functions working with NAPI ID, which is
needed by the next patch. The fourth patch adds a new option to get a
new control message with the L2 length and interface index for incoming
packets with hardware timestamps.

The fifth patch fixes documentation on number of non-zero fields in
scm_timestamping and warns about false software timestamps when
SO_TIMESTAMP(NS) is combined with SCM_TIMESTAMPING.

The sixth patch adds a new option to request both software and hardware
timestamps for outgoing packets. The seventh patch updates drivers that
assumed software timestamping cannot be used together with hardware
timestamping.

The patches have been tested on x86_64 machines with igb and e1000e
drivers.

Miroslav Lichvar (7):
  net: define receive timestamp filter for NTP
  net: ethernet: update drivers to handle HWTSTAMP_FILTER_NTP_ALL
  net: add function to retrieve original skb device using NAPI ID
  net: add new control message for incoming HW-timestamped packets
  net: fix documentation of struct scm_timestamping
  net: allow simultaneous SW and HW transmit timestamping
  net: ethernet: update drivers to make both SW and HW TX timestamps

 Documentation/networking/timestamping.txt  | 26 +++-
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c   |  4 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c   |  1 +
 drivers/net/ethernet/cavium/liquidio/lio_main.c|  1 +
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c |  1 +
 drivers/net/ethernet/cavium/octeon/octeon_mgmt.c   |  1 +
 drivers/net/ethernet/intel/e1000e/netdev.c |  5 ++-
 drivers/net/ethernet/intel/i40e/i40e_ptp.c |  1 +
 drivers/net/ethernet/intel/igb/igb_ptp.c   |  1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c   |  1 +
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_clock.c |  1 +
 drivers/net/ethernet/neterion/vxge/vxge-main.c |  1 +
 drivers/net/ethernet/qlogic/qede/qede_ptp.c|  1 +
 drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c|  3 +-
 drivers/net/ethernet/sfc/ef10.c|  1 +
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  |  7 ++--
 drivers/net/ethernet/ti/cpsw.c |  1 +
 drivers/net/ethernet/tile/tilegx.c |  1 +
 include/linux/netdevice.h  |  1 +
 include/linux/skbuff.h | 19 +
 include/uapi/asm-generic/socket.h  |  2 +
 include/uapi/linux/net_tstamp.h| 15 ++-
 net/core/dev.c | 26 
 net/core/dev_ioctl.c   |  1 +
 net/core/skbuff.c  |  4 ++
 net/socket.c   | 47 --
 27 files changed, 151 insertions(+), 23 deletions(-)

-- 
2.9.3



[PATCH v6 net-next 1/7] net: define receive timestamp filter for NTP

2017-05-19 Thread Miroslav Lichvar
Add HWTSTAMP_FILTER_NTP_ALL to the hwtstamp_rx_filters enum for
timestamping of NTP packets. There is currently only one driver
(phyter) that could support it directly.

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 include/uapi/linux/net_tstamp.h | 3 +++
 net/core/dev_ioctl.c| 2 ++
 2 files changed, 5 insertions(+)

diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index 464dcca..0749fb1 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -125,6 +125,9 @@ enum hwtstamp_rx_filters {
HWTSTAMP_FILTER_PTP_V2_SYNC,
/* PTP v2/802.AS1, any layer, Delay_req packet */
HWTSTAMP_FILTER_PTP_V2_DELAY_REQ,
+
+   /* NTP, UDP, all versions and packet modes */
+   HWTSTAMP_FILTER_NTP_ALL,
 };
 
 #endif /* _NET_TIMESTAMPING_H */
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index b94b1d2..8f036a7 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -227,6 +227,8 @@ static int net_hwtstamp_validate(struct ifreq *ifr)
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
rx_filter_valid = 1;
break;
+   case HWTSTAMP_FILTER_NTP_ALL:
+   break;
}
 
if (!tx_type_valid || !rx_filter_valid)
-- 
2.9.3



Re: [PATCH v5 net-next 5/7] net: fix documentation of struct scm_timestamping

2017-05-19 Thread Miroslav Lichvar
On Thu, May 18, 2017 at 03:38:30PM -0400, Willem de Bruijn wrote:
> On Thu, May 18, 2017 at 10:07 AM, Miroslav Lichvar <mlich...@redhat.com> 
> wrote:
> > +Note that if the SO_TIMESTAMP or SO_TIMESTAMPNS option is enabled
> > +together with SO_TIMESTAMPING using SOF_TIMESTAMPING_SOFTWARE, a false
> > +software timestamp will be generated in the recvmsg() call and passed
> > +in ts[0] when a real software timestamp is missing.
> 
> With receive software timestamping this is expected behavior? I would make
> explicit that this happens even on tx timestamps.

How about adding ", e.g. when receive timestamping is enabled
between receiving the message and the recvmsg() call, or it is a
message with a hardware transmit timestamp." ?

> > For this reason it
> > +is not recommended to combine SO_TIMESTAMP(NS) with SO_TIMESTAMPING.
> 
> And I'd remove this. The extra timestamp is harmless, and we may be missing
> other reasons why someone would want to enable both on the same socket.

Ok. I'm just concerned people will inadvertently use the timestamp as
a real timestamp and then wonder why SW TX timestamping is so bad. I
have fallen into this trap.

-- 
Miroslav Lichvar


Re: [PATCH v5 net-next 4/7] net: add new control message for incoming HW-timestamped packets

2017-05-19 Thread Miroslav Lichvar
On Thu, May 18, 2017 at 04:20:53PM -0400, Willem de Bruijn wrote:
> On Thu, May 18, 2017 at 10:07 AM, Miroslav Lichvar <mlich...@redhat.com> 
> wrote:
> > +SOF_TIMESTAMPING_OPT_PKTINFO:
> > +
> > +  Enable the SCM_TIMESTAMPING_PKTINFO control message for incoming
> > +  packets with hardware timestamps. The message contains struct
> > +  scm_ts_pktinfo, which supplies the index of the real interface which
> > +  received the packet and its length at layer 2. A valid (non-zero)
> > +  interface index will be returned only if CONFIG_NET_RX_BUSY_POLL is
> > +  enabled and the driver is using NAPI.
> 
> It is probably good to explicitly call out that the remaining two fields
> are reserved and undefined. To stress that applications cannot be
> overly pedantic and start failing if these become non-zero.

Ok. I'm adding "The struct contains also two other fields, but they
are reserved and undefined".

-- 
Miroslav Lichvar


Re: [PATCH v4 net-next 6/7] net: allow simultaneous SW and HW transmit timestamping

2017-05-19 Thread Miroslav Lichvar
On Thu, May 18, 2017 at 04:16:26PM -0400, Willem de Bruijn wrote:
> On Thu, May 18, 2017 at 9:06 AM, Miroslav Lichvar <mlich...@redhat.com> wrote:
> > +/* On transmit, software and hardware timestamps are returned 
> > independently.
> > + * As the two skb clones share the hardware timestamp, which may be updated
> > + * before the software timestamp is received, a hardware TX timestamp may 
> > be
> > + * returned only if there is no software TX timestamp. A false software
> > + * timestamp made for SOCK_RCVTSTAMP when a real timestamp is missing must
> > + * be ignored.
> 
> Please expand on why this case can be ignored. It is quite subtle. How about
> something like
> 
> *
> * A false software timestamp is one made inside the __sock_recv_timestamp
> * call itself. These are generated whenever SO_TIMESTAMP(NS) is enabled
> * on the socket, even when the timestamp reported is for another option, such
> * as hardware tx timestamp.
> *
> * Ignore these when deciding whether a timestamp source is hw or sw.
> */

That seems a bit too verbose to me. :) Would the following work?

/* On transmit, software and hardware timestamps are returned independently.
 * As the two skb clones share the hardware timestamp, which may be updated
 * before the software timestamp is received, a hardware TX timestamp may be
 * returned only if there is no software TX timestamp. Ignore false software
 * timestamps, which may be made in the __sock_recv_timestamp() call when the
 * option SO_TIMESTAMP(NS) is enabled on the socket, even when the skb has a
 * hardware timestamp.
 */

> > +static bool skb_is_swtx_tstamp(const struct sk_buff *skb,
> > +  const struct sock *sk, int false_tstamp)
> > +{
> > +   if (false_tstamp && sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW)
> 
> Also, why is it ignored only for the new mode?

Good point. That should not be there. The function can be now reduced
to a single line again. I originally tried a different approach,
disabling false timestamps in the new mode, but then I thought it's
better to not complicate it unnecessarily and keep it consistent.

-- 
Miroslav Lichvar


[PATCH v5 net-next 6/7] net: allow simultaneous SW and HW transmit timestamping

2017-05-18 Thread Miroslav Lichvar
Add SOF_TIMESTAMPING_OPT_TX_SWHW option to allow an outgoing packet to
be looped to the socket's error queue with a software timestamp even
when a hardware transmit timestamp is expected to be provided by the
driver.

Applications using this option will receive two separate messages from
the error queue, one with a software timestamp and the other with a
hardware timestamp. As the hardware timestamp is saved to the shared skb
info, which may happen before the first message with software timestamp
is received by the application, the hardware timestamp is copied to the
SCM_TIMESTAMPING control message only when the skb has no software
timestamp or it is an incoming packet.

While changing sw_tx_timestamp(), inline it in skb_tx_timestamp() as
there are no other users.

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 Documentation/networking/timestamping.txt |  8 
 include/linux/skbuff.h| 10 ++
 include/uapi/linux/net_tstamp.h   |  3 ++-
 net/core/skbuff.c |  4 
 net/socket.c  | 23 +--
 5 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/Documentation/networking/timestamping.txt 
b/Documentation/networking/timestamping.txt
index 74b7c61..65be2d9 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -202,6 +202,14 @@ SOF_TIMESTAMPING_OPT_PKTINFO:
   interface index will be returned only if CONFIG_NET_RX_BUSY_POLL is
   enabled and the driver is using NAPI.
 
+SOF_TIMESTAMPING_OPT_TX_SWHW:
+
+  Request both hardware and software timestamps for outgoing packets
+  when SOF_TIMESTAMPING_TX_HARDWARE and SOF_TIMESTAMPING_TX_SOFTWARE
+  are enabled at the same time. If both timestamps are generated,
+  two separate messages will be looped to the socket's error queue,
+  each containing just one timestamp.
+
 New applications are encouraged to pass SOF_TIMESTAMPING_OPT_ID to
 disambiguate timestamps and SOF_TIMESTAMPING_OPT_TSONLY to operate
 regardless of the setting of sysctl net.core.tstamp_allow_data.
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 1f8028c..3b2e284 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3254,13 +3254,6 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
 void skb_tstamp_tx(struct sk_buff *orig_skb,
   struct skb_shared_hwtstamps *hwtstamps);
 
-static inline void sw_tx_timestamp(struct sk_buff *skb)
-{
-   if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP &&
-   !(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS))
-   skb_tstamp_tx(skb, NULL);
-}
-
 /**
  * skb_tx_timestamp() - Driver hook for transmit timestamping
  *
@@ -3276,7 +3269,8 @@ static inline void sw_tx_timestamp(struct sk_buff *skb)
 static inline void skb_tx_timestamp(struct sk_buff *skb)
 {
skb_clone_tx_timestamp(skb);
-   sw_tx_timestamp(skb);
+   if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP)
+   skb_tstamp_tx(skb, NULL);
 }
 
 /**
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index dee74d3..3d421d9 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -28,8 +28,9 @@ enum {
SOF_TIMESTAMPING_OPT_TSONLY = (1<<11),
SOF_TIMESTAMPING_OPT_STATS = (1<<12),
SOF_TIMESTAMPING_OPT_PKTINFO = (1<<13),
+   SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14),
 
-   SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_PKTINFO,
+   SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_TX_SWHW,
SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
 SOF_TIMESTAMPING_LAST
 };
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 346d3e8..68c02df 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3875,6 +3875,10 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
if (!sk)
return;
 
+   if (!hwtstamps && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
+   skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)
+   return;
+
tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
if (!skb_may_tx_timestamp(sk, tsonly))
return;
diff --git a/net/socket.c b/net/socket.c
index 67db7d8..0b5758a 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -662,6 +662,22 @@ static bool skb_is_err_queue(const struct sk_buff *skb)
return skb->pkt_type == PACKET_OUTGOING;
 }
 
+/* On transmit, software and hardware timestamps are returned independently.
+ * As the two skb clones share the hardware timestamp, which may be updated
+ * before the software timestamp is received, a hardware TX timestamp may be
+ * returned only if there is no software TX timestamp. A false so

[PATCH v5 net-next 0/7] Extend socket timestamping API

2017-05-18 Thread Miroslav Lichvar
Changes v4->v5:
- fix initialization of reserved fields in struct scm_ts_pktinfo

Changes v3->v4:
- added reserved fields to struct scm_ts_pktinfo
- replaced patch fixing false SW timestamps with a documentation fix
- updated OPT_TX_SWHW patch to handle false SW timestamps

Changes v2->v3:
- modified struct scm_ts_pktinfo to use fixed-width integer types
- added WARN_ON_ONCE for missing RCU lock in dev_get_by_napi_id()
- modified dev_get_by_napi_id() to not return dev in unexpected branch
- modified recv to return SCM_TIMESTAMPING_PKTINFO even if the interface
  index is unknown

Changes v1->v2:
- added separate patch for new NAPI functions 
- split code from __sock_recv_timestamp() for better readability
- fixed RCU locking
- fixed compiler warning (missing case in switch in first patch)
- inline sw_tx_timestamp() in its only user

Changes RFC->v1:
- reworked SOF_TIMESTAMPING_OPT_PKTINFO patch to not add new fields to
  skb shared info (net device is now looked up by napi_id), not require
  any changes in drivers, and restrict the cmsg to incoming packets
- renamed SOF_TIMESTAMPING_OPT_MULTIMSG to SOF_TIMESTAMPING_OPT_TX_SWHW
  and fixed its description
- moved struct scm_ts_pktinfo from errqueue.h to net_tstamp.h as it
  can't be received from the error queue anymore
- improved commit descriptions and removed incorrect comment

This patchset adds new options to the timestamping API that will be
useful for NTP implementations and possibly other applications.

The first patch specifies a timestamp filter for NTP packets. The second
patch updates drivers that can timestamp all packets, or need to list
the filter as unsupported. There is no attempt to add the support to the
phyter driver.

The third patch adds two helper functions working with NAPI ID, which is
needed by the next patch. The fourth patch adds a new option to get a
new control message with the L2 length and interface index for incoming
packets with hardware timestamps.

The fifth patch fixes documentation on number of non-zero fields in
scm_timestamping and warns about false software timestamps when
SO_TIMESTAMP(NS) is combined with SCM_TIMESTAMPING.

The sixth patch adds a new option to request both software and hardware
timestamps for outgoing packets. The seventh patch updates drivers that
assumed software timestamping cannot be used together with hardware
timestamping.

The patches have been tested on x86_64 machines with igb and e1000e
drivers.

Miroslav Lichvar (7):
  net: define receive timestamp filter for NTP
  net: ethernet: update drivers to handle HWTSTAMP_FILTER_NTP_ALL
  net: add function to retrieve original skb device using NAPI ID
  net: add new control message for incoming HW-timestamped packets
  net: fix documentation of struct scm_timestamping
  net: allow simultaneous SW and HW transmit timestamping
  net: ethernet: update drivers to make both SW and HW TX timestamps

 Documentation/networking/timestamping.txt  | 25 ++-
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c   |  4 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c   |  1 +
 drivers/net/ethernet/cavium/liquidio/lio_main.c|  1 +
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c |  1 +
 drivers/net/ethernet/cavium/octeon/octeon_mgmt.c   |  1 +
 drivers/net/ethernet/intel/e1000e/netdev.c |  5 ++-
 drivers/net/ethernet/intel/i40e/i40e_ptp.c |  1 +
 drivers/net/ethernet/intel/igb/igb_ptp.c   |  1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c   |  1 +
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_clock.c |  1 +
 drivers/net/ethernet/neterion/vxge/vxge-main.c |  1 +
 drivers/net/ethernet/qlogic/qede/qede_ptp.c|  1 +
 drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c|  3 +-
 drivers/net/ethernet/sfc/ef10.c|  1 +
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  |  7 ++-
 drivers/net/ethernet/ti/cpsw.c |  1 +
 drivers/net/ethernet/tile/tilegx.c |  1 +
 include/linux/netdevice.h  |  1 +
 include/linux/skbuff.h | 19 
 include/uapi/asm-generic/socket.h  |  2 +
 include/uapi/linux/net_tstamp.h| 15 ++-
 net/core/dev.c | 26 +++
 net/core/dev_ioctl.c   |  1 +
 net/core/skbuff.c  |  4 ++
 net/socket.c   | 50 --
 27 files changed, 153 insertions(+), 23 deletions(-)

-- 
2.9.3



[PATCH v5 net-next 3/7] net: add function to retrieve original skb device using NAPI ID

2017-05-18 Thread Miroslav Lichvar
Since commit b68581778cd0 ("net: Make skb->skb_iif always track
skb->dev") skbs don't have the original index of the interface which
received the packet. This information is now needed for a new control
message related to hardware timestamping.

Instead of adding a new field to skb, we can find the device by the NAPI
ID if it is available, i.e. CONFIG_NET_RX_BUSY_POLL is enabled and the
driver is using NAPI. Add dev_get_by_napi_id() and also skb_napi_id() to
hide the CONFIG_NET_RX_BUSY_POLL ifdef.

CC: Richard Cochran <richardcoch...@gmail.com>
Suggested-by: Willem de Bruijn <will...@google.com>
Acked-by: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 include/linux/netdevice.h |  1 +
 include/linux/skbuff.h|  9 +
 net/core/dev.c| 26 ++
 3 files changed, 36 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3f39d27..b6c36d5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2456,6 +2456,7 @@ static inline int dev_recursion_level(void)
 struct net_device *dev_get_by_index(struct net *net, int ifindex);
 struct net_device *__dev_get_by_index(struct net *net, int ifindex);
 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
+struct net_device *dev_get_by_napi_id(unsigned int napi_id);
 int netdev_get_name(struct net *net, char *name, int ifindex);
 int dev_restart(struct net_device *dev);
 int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7c0cb2c..1f8028c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -855,6 +855,15 @@ static inline bool skb_pkt_type_ok(u32 ptype)
return ptype <= PACKET_OTHERHOST;
 }
 
+static inline unsigned int skb_napi_id(const struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_RX_BUSY_POLL
+   return skb->napi_id;
+#else
+   return 0;
+#endif
+}
+
 void kfree_skb(struct sk_buff *skb);
 void kfree_skb_list(struct sk_buff *segs);
 void skb_tx_error(struct sk_buff *skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index acd594c..6d3c452 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -162,6 +162,7 @@ static int netif_rx_internal(struct sk_buff *skb);
 static int call_netdevice_notifiers_info(unsigned long val,
 struct net_device *dev,
 struct netdev_notifier_info *info);
+static struct napi_struct *napi_by_id(unsigned int napi_id);
 
 /*
  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
@@ -866,6 +867,31 @@ struct net_device *dev_get_by_index(struct net *net, int 
ifindex)
 EXPORT_SYMBOL(dev_get_by_index);
 
 /**
+ * dev_get_by_napi_id - find a device by napi_id
+ * @napi_id: ID of the NAPI struct
+ *
+ * Search for an interface by NAPI ID. Returns %NULL if the device
+ * is not found or a pointer to the device. The device has not had
+ * its reference counter increased so the caller must be careful
+ * about locking. The caller must hold RCU lock.
+ */
+
+struct net_device *dev_get_by_napi_id(unsigned int napi_id)
+{
+   struct napi_struct *napi;
+
+   WARN_ON_ONCE(!rcu_read_lock_held());
+
+   if (napi_id < MIN_NAPI_ID)
+   return NULL;
+
+   napi = napi_by_id(napi_id);
+
+   return napi ? napi->dev : NULL;
+}
+EXPORT_SYMBOL(dev_get_by_napi_id);
+
+/**
  * netdev_get_name - get a netdevice name, knowing its ifindex.
  * @net: network namespace
  * @name: a pointer to the buffer where the name will be stored.
-- 
2.9.3



[PATCH v5 net-next 4/7] net: add new control message for incoming HW-timestamped packets

2017-05-18 Thread Miroslav Lichvar
Add SOF_TIMESTAMPING_OPT_PKTINFO option to request a new control message
for incoming packets with hardware timestamps. It contains the index of
the real interface which received the packet and the length of the
packet at layer 2.

The index is useful with bonding, bridges and other interfaces, where
IP_PKTINFO doesn't allow applications to determine which PHC made the
timestamp. With the L2 length (and link speed) it is possible to
transpose preamble timestamps to trailer timestamps, which are used in
the NTP protocol.

While this information could be provided by two new socket options
independently from timestamping, it doesn't look like they would be very
useful. With this option any performance impact is limited to hardware
timestamping.

Use dev_get_by_napi_id() to get the device and its index. On kernels
with disabled CONFIG_NET_RX_BUSY_POLL or drivers not using NAPI, a zero
index will be returned in the control message.

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 Documentation/networking/timestamping.txt |  9 +
 include/uapi/asm-generic/socket.h |  2 ++
 include/uapi/linux/net_tstamp.h   | 11 ++-
 net/socket.c  | 27 ++-
 4 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/timestamping.txt 
b/Documentation/networking/timestamping.txt
index 96f5069..600c6bf 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -193,6 +193,15 @@ SOF_TIMESTAMPING_OPT_STATS:
   the transmit timestamps, such as how long a certain block of
   data was limited by peer's receiver window.
 
+SOF_TIMESTAMPING_OPT_PKTINFO:
+
+  Enable the SCM_TIMESTAMPING_PKTINFO control message for incoming
+  packets with hardware timestamps. The message contains struct
+  scm_ts_pktinfo, which supplies the index of the real interface which
+  received the packet and its length at layer 2. A valid (non-zero)
+  interface index will be returned only if CONFIG_NET_RX_BUSY_POLL is
+  enabled and the driver is using NAPI.
+
 New applications are encouraged to pass SOF_TIMESTAMPING_OPT_ID to
 disambiguate timestamps and SOF_TIMESTAMPING_OPT_TSONLY to operate
 regardless of the setting of sysctl net.core.tstamp_allow_data.
diff --git a/include/uapi/asm-generic/socket.h 
b/include/uapi/asm-generic/socket.h
index 2b48856..a5f6e81 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -100,4 +100,6 @@
 
 #define SO_COOKIE  57
 
+#define SCM_TIMESTAMPING_PKTINFO   58
+
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index 0749fb1..dee74d3 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -9,6 +9,7 @@
 #ifndef _NET_TIMESTAMPING_H
 #define _NET_TIMESTAMPING_H
 
+#include 
 #include/* for SO_TIMESTAMPING */
 
 /* SO_TIMESTAMPING gets an integer bit field comprised of these values */
@@ -26,8 +27,9 @@ enum {
SOF_TIMESTAMPING_OPT_CMSG = (1<<10),
SOF_TIMESTAMPING_OPT_TSONLY = (1<<11),
SOF_TIMESTAMPING_OPT_STATS = (1<<12),
+   SOF_TIMESTAMPING_OPT_PKTINFO = (1<<13),
 
-   SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_STATS,
+   SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_PKTINFO,
SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
 SOF_TIMESTAMPING_LAST
 };
@@ -130,4 +132,11 @@ enum hwtstamp_rx_filters {
HWTSTAMP_FILTER_NTP_ALL,
 };
 
+/* SCM_TIMESTAMPING_PKTINFO control message */
+struct scm_ts_pktinfo {
+   __u32 if_index;
+   __u32 pkt_length;
+   __u32 reserved[2];
+};
+
 #endif /* _NET_TIMESTAMPING_H */
diff --git a/net/socket.c b/net/socket.c
index c2564eb..67db7d8 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -662,6 +662,27 @@ static bool skb_is_err_queue(const struct sk_buff *skb)
return skb->pkt_type == PACKET_OUTGOING;
 }
 
+static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
+{
+   struct scm_ts_pktinfo ts_pktinfo;
+   struct net_device *orig_dev;
+
+   if (!skb_mac_header_was_set(skb))
+   return;
+
+   memset(_pktinfo, 0, sizeof(ts_pktinfo));
+
+   rcu_read_lock();
+   orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
+   if (orig_dev)
+   ts_pktinfo.if_index = orig_dev->ifindex;
+   rcu_read_unlock();
+
+   ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
+   put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
+sizeof(ts_pktinfo), _pktinfo);
+}
+
 /*
  * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
  */
@@ -699,8 +720,12 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock 
*sk,
empty = 0;
   

[PATCH v5 net-next 5/7] net: fix documentation of struct scm_timestamping

2017-05-18 Thread Miroslav Lichvar
The scm_timestamping struct may return multiple non-zero fields, e.g.
when both software and hardware RX timestamping is enabled, or when the
SO_TIMESTAMP(NS) option is combined with SCM_TIMESTAMPING and a false
software timestamp is generated in the recvmsg() call in order to always
return a SCM_TIMESTAMP(NS) message.

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 Documentation/networking/timestamping.txt | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/timestamping.txt 
b/Documentation/networking/timestamping.txt
index 600c6bf..74b7c61 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -321,7 +321,7 @@ struct scm_timestamping {
 };
 
 The structure can return up to three timestamps. This is a legacy
-feature. Only one field is non-zero at any time. Most timestamps
+feature. At least one field is non-zero at any time. Most timestamps
 are passed in ts[0]. Hardware timestamps are passed in ts[2].
 
 ts[1] used to hold hardware timestamps converted to system time.
@@ -330,6 +330,12 @@ a HW PTP clock source, to allow time conversion in 
userspace and
 optionally synchronize system time with a userspace PTP stack such
 as linuxptp. For the PTP clock API, see Documentation/ptp/ptp.txt.
 
+Note that if the SO_TIMESTAMP or SO_TIMESTAMPNS option is enabled
+together with SO_TIMESTAMPING using SOF_TIMESTAMPING_SOFTWARE, a false
+software timestamp will be generated in the recvmsg() call and passed
+in ts[0] when a real software timestamp is missing. For this reason it
+is not recommended to combine SO_TIMESTAMP(NS) with SO_TIMESTAMPING.
+
 2.1.1 Transmit timestamps with MSG_ERRQUEUE
 
 For transmit timestamps the outgoing packet is looped back to the
-- 
2.9.3



[PATCH v5 net-next 2/7] net: ethernet: update drivers to handle HWTSTAMP_FILTER_NTP_ALL

2017-05-18 Thread Miroslav Lichvar
Include HWTSTAMP_FILTER_NTP_ALL in net_hwtstamp_validate() as a valid
filter and update drivers which can timestamp all packets, or which
explicitly list unsupported filters instead of using a default case, to
handle the filter.

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c   | 1 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c   | 1 +
 drivers/net/ethernet/cavium/liquidio/lio_main.c| 1 +
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 1 +
 drivers/net/ethernet/cavium/octeon/octeon_mgmt.c   | 1 +
 drivers/net/ethernet/intel/e1000e/netdev.c | 1 +
 drivers/net/ethernet/intel/i40e/i40e_ptp.c | 1 +
 drivers/net/ethernet/intel/igb/igb_ptp.c   | 1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c   | 1 +
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 1 +
 drivers/net/ethernet/neterion/vxge/vxge-main.c | 1 +
 drivers/net/ethernet/qlogic/qede/qede_ptp.c| 1 +
 drivers/net/ethernet/sfc/ef10.c| 1 +
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  | 1 +
 drivers/net/ethernet/ti/cpsw.c | 1 +
 drivers/net/ethernet/tile/tilegx.c | 1 +
 net/core/dev_ioctl.c   | 3 +--
 18 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c 
b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index c772420..89b21d7 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1268,6 +1268,7 @@ static int xgbe_set_hwtstamp_settings(struct 
xgbe_prv_data *pdata,
case HWTSTAMP_FILTER_NONE:
break;
 
+   case HWTSTAMP_FILTER_NTP_ALL:
case HWTSTAMP_FILTER_ALL:
XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSENALL, 1);
XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSENA, 1);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c 
b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 7414ffd..14c236e 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -15351,6 +15351,7 @@ int bnx2x_configure_ptp_filters(struct bnx2x *bp)
break;
case HWTSTAMP_FILTER_ALL:
case HWTSTAMP_FILTER_SOME:
+   case HWTSTAMP_FILTER_NTP_ALL:
bp->rx_filter = HWTSTAMP_FILTER_NONE;
break;
case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c 
b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 649f2aa..ba01242 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -3024,6 +3024,7 @@ static int hwtstamp_ioctl(struct net_device *netdev, 
struct ifreq *ifr)
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+   case HWTSTAMP_FILTER_NTP_ALL:
conf.rx_filter = HWTSTAMP_FILTER_ALL;
break;
default:
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c 
b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index d51c8d8..31d737c 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -2085,6 +2085,7 @@ static int hwtstamp_ioctl(struct net_device *netdev, 
struct ifreq *ifr)
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+   case HWTSTAMP_FILTER_NTP_ALL:
conf.rx_filter = HWTSTAMP_FILTER_ALL;
break;
default:
diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c 
b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
index a213868..2887bca 100644
--- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
+++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
@@ -755,6 +755,7 @@ static int octeon_mgmt_ioctl_hwtstamp(struct net_device 
*netdev,
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+   case HWTSTAMP_FILTER_NTP_ALL:
p->has_rx_tstamp = have_hw_timestamps;
config.rx_filter = HWTSTAMP_FILTER_ALL;
if (p->has_rx_tstamp) {
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index b367972..0ff9295 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -3680,6 +3680,7 @@ static int e1000e_config_hwtstamp(struct e1000_adapter 
*adapter,
 * Delay Request messages but not both so fall-through to
 * time stamp all packets.
 

[PATCH v5 net-next 7/7] net: ethernet: update drivers to make both SW and HW TX timestamps

2017-05-18 Thread Miroslav Lichvar
Some drivers were calling the skb_tx_timestamp() function only when
a hardware timestamp was not requested. Now that applications can use
the SOF_TIMESTAMPING_OPT_TX_SWHW option to request both software and
hardware timestamps, the drivers need to be modified to unconditionally
call skb_tx_timestamp().

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c  | 3 +--
 drivers/net/ethernet/intel/e1000e/netdev.c| 4 ++--
 drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c   | 3 +--
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 ++
 4 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c 
b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 89b21d7..5a2ad9c 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1391,8 +1391,7 @@ static void xgbe_prep_tx_tstamp(struct xgbe_prv_data 
*pdata,
spin_unlock_irqrestore(>tstamp_lock, flags);
}
 
-   if (!XGMAC_GET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES, PTP))
-   skb_tx_timestamp(skb);
+   skb_tx_timestamp(skb);
 }
 
 static void xgbe_prep_vlan(struct sk_buff *skb, struct xgbe_packet_data 
*packet)
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index 0ff9295..6ed3bc4 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -5868,10 +5868,10 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
adapter->tx_hwtstamp_skb = skb_get(skb);
adapter->tx_hwtstamp_start = jiffies;
schedule_work(>tx_hwtstamp_work);
-   } else {
-   skb_tx_timestamp(skb);
}
 
+   skb_tx_timestamp(skb);
+
netdev_sent_queue(netdev, skb->len);
e1000_tx_queue(tx_ring, tx_flags, count);
/* Make sure there is space in the ring for the next send. */
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c 
b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
index 1e59435..89831ad 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
@@ -1418,8 +1418,7 @@ static netdev_tx_t sxgbe_xmit(struct sk_buff *skb, struct 
net_device *dev)
priv->hw->desc->tx_enable_tstamp(first_desc);
}
 
-   if (!tqueue->hwts_tx_en)
-   skb_tx_timestamp(skb);
+   skb_tx_timestamp(skb);
 
priv->hw->dma->enable_dma_transmission(priv->ioaddr, txq_index);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c 
b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index cce862b..27c12e7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2880,8 +2880,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, 
struct net_device *dev)
priv->xstats.tx_set_ic_bit++;
}
 
-   if (!priv->hwts_tx_en)
-   skb_tx_timestamp(skb);
+   skb_tx_timestamp(skb);
 
if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
 priv->hwts_tx_en)) {
@@ -3084,8 +3083,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, 
struct net_device *dev)
priv->xstats.tx_set_ic_bit++;
}
 
-   if (!priv->hwts_tx_en)
-   skb_tx_timestamp(skb);
+   skb_tx_timestamp(skb);
 
/* Ready to fill the first descriptor and set the OWN bit w/o any
 * problems because all the descriptors are actually ready to be
-- 
2.9.3



[PATCH v5 net-next 1/7] net: define receive timestamp filter for NTP

2017-05-18 Thread Miroslav Lichvar
Add HWTSTAMP_FILTER_NTP_ALL to the hwtstamp_rx_filters enum for
timestamping of NTP packets. There is currently only one driver
(phyter) that could support it directly.

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 include/uapi/linux/net_tstamp.h | 3 +++
 net/core/dev_ioctl.c| 2 ++
 2 files changed, 5 insertions(+)

diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index 464dcca..0749fb1 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -125,6 +125,9 @@ enum hwtstamp_rx_filters {
HWTSTAMP_FILTER_PTP_V2_SYNC,
/* PTP v2/802.AS1, any layer, Delay_req packet */
HWTSTAMP_FILTER_PTP_V2_DELAY_REQ,
+
+   /* NTP, UDP, all versions and packet modes */
+   HWTSTAMP_FILTER_NTP_ALL,
 };
 
 #endif /* _NET_TIMESTAMPING_H */
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index b94b1d2..8f036a7 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -227,6 +227,8 @@ static int net_hwtstamp_validate(struct ifreq *ifr)
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
rx_filter_valid = 1;
break;
+   case HWTSTAMP_FILTER_NTP_ALL:
+   break;
}
 
if (!tx_type_valid || !rx_filter_valid)
-- 
2.9.3



Re: [PATCH v4 net-next 0/7] Extend socket timestamping API

2017-05-18 Thread Miroslav Lichvar
On Thu, May 18, 2017 at 02:58:23PM +0200, Miroslav Lichvar wrote:
> Changes v3->v4:
> - added reserved fields to struct scm_ts_pktinfo

Of course, I forgot to initialize the new reserved fields. I'll send a
new series.

-- 
Miroslav Lichvar


[PATCH v4 net-next 4/7] net: add new control message for incoming HW-timestamped packets

2017-05-18 Thread Miroslav Lichvar
Add SOF_TIMESTAMPING_OPT_PKTINFO option to request a new control message
for incoming packets with hardware timestamps. It contains the index of
the real interface which received the packet and the length of the
packet at layer 2.

The index is useful with bonding, bridges and other interfaces, where
IP_PKTINFO doesn't allow applications to determine which PHC made the
timestamp. With the L2 length (and link speed) it is possible to
transpose preamble timestamps to trailer timestamps, which are used in
the NTP protocol.

While this information could be provided by two new socket options
independently from timestamping, it doesn't look like they would be very
useful. With this option any performance impact is limited to hardware
timestamping.

Use dev_get_by_napi_id() to get the device and its index. On kernels
with disabled CONFIG_NET_RX_BUSY_POLL or drivers not using NAPI, a zero
index will be returned in the control message.

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 Documentation/networking/timestamping.txt |  9 +
 include/uapi/asm-generic/socket.h |  2 ++
 include/uapi/linux/net_tstamp.h   | 11 ++-
 net/socket.c  | 27 ++-
 4 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/timestamping.txt 
b/Documentation/networking/timestamping.txt
index 96f5069..600c6bf 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -193,6 +193,15 @@ SOF_TIMESTAMPING_OPT_STATS:
   the transmit timestamps, such as how long a certain block of
   data was limited by peer's receiver window.
 
+SOF_TIMESTAMPING_OPT_PKTINFO:
+
+  Enable the SCM_TIMESTAMPING_PKTINFO control message for incoming
+  packets with hardware timestamps. The message contains struct
+  scm_ts_pktinfo, which supplies the index of the real interface which
+  received the packet and its length at layer 2. A valid (non-zero)
+  interface index will be returned only if CONFIG_NET_RX_BUSY_POLL is
+  enabled and the driver is using NAPI.
+
 New applications are encouraged to pass SOF_TIMESTAMPING_OPT_ID to
 disambiguate timestamps and SOF_TIMESTAMPING_OPT_TSONLY to operate
 regardless of the setting of sysctl net.core.tstamp_allow_data.
diff --git a/include/uapi/asm-generic/socket.h 
b/include/uapi/asm-generic/socket.h
index 2b48856..a5f6e81 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -100,4 +100,6 @@
 
 #define SO_COOKIE  57
 
+#define SCM_TIMESTAMPING_PKTINFO   58
+
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index 0749fb1..dee74d3 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -9,6 +9,7 @@
 #ifndef _NET_TIMESTAMPING_H
 #define _NET_TIMESTAMPING_H
 
+#include 
 #include/* for SO_TIMESTAMPING */
 
 /* SO_TIMESTAMPING gets an integer bit field comprised of these values */
@@ -26,8 +27,9 @@ enum {
SOF_TIMESTAMPING_OPT_CMSG = (1<<10),
SOF_TIMESTAMPING_OPT_TSONLY = (1<<11),
SOF_TIMESTAMPING_OPT_STATS = (1<<12),
+   SOF_TIMESTAMPING_OPT_PKTINFO = (1<<13),
 
-   SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_STATS,
+   SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_PKTINFO,
SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
 SOF_TIMESTAMPING_LAST
 };
@@ -130,4 +132,11 @@ enum hwtstamp_rx_filters {
HWTSTAMP_FILTER_NTP_ALL,
 };
 
+/* SCM_TIMESTAMPING_PKTINFO control message */
+struct scm_ts_pktinfo {
+   __u32 if_index;
+   __u32 pkt_length;
+   __u32 reserved[2];
+};
+
 #endif /* _NET_TIMESTAMPING_H */
diff --git a/net/socket.c b/net/socket.c
index c2564eb..ee1f4ec 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -662,6 +662,27 @@ static bool skb_is_err_queue(const struct sk_buff *skb)
return skb->pkt_type == PACKET_OUTGOING;
 }
 
+static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
+{
+   struct scm_ts_pktinfo ts_pktinfo;
+   struct net_device *orig_dev;
+   int ifindex = 0;
+
+   if (!skb_mac_header_was_set(skb))
+   return;
+
+   rcu_read_lock();
+   orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
+   if (orig_dev)
+   ifindex = orig_dev->ifindex;
+   rcu_read_unlock();
+
+   ts_pktinfo.if_index = ifindex;
+   ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
+   put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
+sizeof(ts_pktinfo), _pktinfo);
+}
+
 /*
  * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
  */
@@ -699,8 +720,12 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock 
*sk,
empty = 0;
   

[PATCH v4 net-next 0/7] Extend socket timestamping API

2017-05-18 Thread Miroslav Lichvar
Changes v3->v4:
- added reserved fields to struct scm_ts_pktinfo
- replaced patch fixing false SW timestamps with a documentation fix
- updated OPT_TX_SWHW patch to handle false SW timestamps

Changes v2->v3:
- modified struct scm_ts_pktinfo to use fixed-width integer types
- added WARN_ON_ONCE for missing RCU lock in dev_get_by_napi_id()
- modified dev_get_by_napi_id() to not return dev in unexpected branch
- modified recv to return SCM_TIMESTAMPING_PKTINFO even if the interface
  index is unknown

Changes v1->v2:
- added separate patch for new NAPI functions 
- split code from __sock_recv_timestamp() for better readability
- fixed RCU locking
- fixed compiler warning (missing case in switch in first patch)
- inline sw_tx_timestamp() in its only user

Changes RFC->v1:
- reworked SOF_TIMESTAMPING_OPT_PKTINFO patch to not add new fields to
  skb shared info (net device is now looked up by napi_id), not require
  any changes in drivers, and restrict the cmsg to incoming packets
- renamed SOF_TIMESTAMPING_OPT_MULTIMSG to SOF_TIMESTAMPING_OPT_TX_SWHW
  and fixed its description
- moved struct scm_ts_pktinfo from errqueue.h to net_tstamp.h as it
  can't be received from the error queue anymore
- improved commit descriptions and removed incorrect comment

This patchset adds new options to the timestamping API that will be
useful for NTP implementations and possibly other applications.

The first patch specifies a timestamp filter for NTP packets. The second
patch updates drivers that can timestamp all packets, or need to list
the filter as unsupported. There is no attempt to add the support to the
phyter driver.

The third patch adds two helper functions working with NAPI ID, which is
needed by the next patch. The fourth patch adds a new option to get a
new control message with the L2 length and interface index for incoming
packets with hardware timestamps.

The fifth patch fixes documentation on number of non-zero fields in
scm_timestamping and warns about false software timestamps when
SO_TIMESTAMP(NS) is combined with SCM_TIMESTAMPING.

The sixth patch adds a new option to request both software and hardware
timestamps for outgoing packets. The seventh patch updates drivers that
assumed software timestamping cannot be used together with hardware
timestamping.

The patches have been tested on x86_64 machines with igb and e1000e
drivers.

Miroslav Lichvar (7):
  net: define receive timestamp filter for NTP
  net: ethernet: update drivers to handle HWTSTAMP_FILTER_NTP_ALL
  net: add function to retrieve original skb device using NAPI ID
  net: add new control message for incoming HW-timestamped packets
  net: fix documentation of struct scm_timestamping
  net: allow simultaneous SW and HW transmit timestamping
  net: ethernet: update drivers to make both SW and HW TX timestamps

 Documentation/networking/timestamping.txt  | 25 ++-
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c   |  4 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c   |  1 +
 drivers/net/ethernet/cavium/liquidio/lio_main.c|  1 +
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c |  1 +
 drivers/net/ethernet/cavium/octeon/octeon_mgmt.c   |  1 +
 drivers/net/ethernet/intel/e1000e/netdev.c |  5 ++-
 drivers/net/ethernet/intel/i40e/i40e_ptp.c |  1 +
 drivers/net/ethernet/intel/igb/igb_ptp.c   |  1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c   |  1 +
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_clock.c |  1 +
 drivers/net/ethernet/neterion/vxge/vxge-main.c |  1 +
 drivers/net/ethernet/qlogic/qede/qede_ptp.c|  1 +
 drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c|  3 +-
 drivers/net/ethernet/sfc/ef10.c|  1 +
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  |  7 ++-
 drivers/net/ethernet/ti/cpsw.c |  1 +
 drivers/net/ethernet/tile/tilegx.c |  1 +
 include/linux/netdevice.h  |  1 +
 include/linux/skbuff.h | 19 
 include/uapi/asm-generic/socket.h  |  2 +
 include/uapi/linux/net_tstamp.h| 15 ++-
 net/core/dev.c | 26 +++
 net/core/dev_ioctl.c   |  1 +
 net/core/skbuff.c  |  4 ++
 net/socket.c   | 50 --
 27 files changed, 153 insertions(+), 23 deletions(-)

-- 
2.9.3



[PATCH v4 net-next 6/7] net: allow simultaneous SW and HW transmit timestamping

2017-05-18 Thread Miroslav Lichvar
Add SOF_TIMESTAMPING_OPT_TX_SWHW option to allow an outgoing packet to
be looped to the socket's error queue with a software timestamp even
when a hardware transmit timestamp is expected to be provided by the
driver.

Applications using this option will receive two separate messages from
the error queue, one with a software timestamp and the other with a
hardware timestamp. As the hardware timestamp is saved to the shared skb
info, which may happen before the first message with software timestamp
is received by the application, the hardware timestamp is copied to the
SCM_TIMESTAMPING control message only when the skb has no software
timestamp or it is an incoming packet.

While changing sw_tx_timestamp(), inline it in skb_tx_timestamp() as
there are no other users.

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 Documentation/networking/timestamping.txt |  8 
 include/linux/skbuff.h| 10 ++
 include/uapi/linux/net_tstamp.h   |  3 ++-
 net/core/skbuff.c |  4 
 net/socket.c  | 23 +--
 5 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/Documentation/networking/timestamping.txt 
b/Documentation/networking/timestamping.txt
index 74b7c61..65be2d9 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -202,6 +202,14 @@ SOF_TIMESTAMPING_OPT_PKTINFO:
   interface index will be returned only if CONFIG_NET_RX_BUSY_POLL is
   enabled and the driver is using NAPI.
 
+SOF_TIMESTAMPING_OPT_TX_SWHW:
+
+  Request both hardware and software timestamps for outgoing packets
+  when SOF_TIMESTAMPING_TX_HARDWARE and SOF_TIMESTAMPING_TX_SOFTWARE
+  are enabled at the same time. If both timestamps are generated,
+  two separate messages will be looped to the socket's error queue,
+  each containing just one timestamp.
+
 New applications are encouraged to pass SOF_TIMESTAMPING_OPT_ID to
 disambiguate timestamps and SOF_TIMESTAMPING_OPT_TSONLY to operate
 regardless of the setting of sysctl net.core.tstamp_allow_data.
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 1f8028c..3b2e284 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3254,13 +3254,6 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
 void skb_tstamp_tx(struct sk_buff *orig_skb,
   struct skb_shared_hwtstamps *hwtstamps);
 
-static inline void sw_tx_timestamp(struct sk_buff *skb)
-{
-   if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP &&
-   !(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS))
-   skb_tstamp_tx(skb, NULL);
-}
-
 /**
  * skb_tx_timestamp() - Driver hook for transmit timestamping
  *
@@ -3276,7 +3269,8 @@ static inline void sw_tx_timestamp(struct sk_buff *skb)
 static inline void skb_tx_timestamp(struct sk_buff *skb)
 {
skb_clone_tx_timestamp(skb);
-   sw_tx_timestamp(skb);
+   if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP)
+   skb_tstamp_tx(skb, NULL);
 }
 
 /**
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index dee74d3..3d421d9 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -28,8 +28,9 @@ enum {
SOF_TIMESTAMPING_OPT_TSONLY = (1<<11),
SOF_TIMESTAMPING_OPT_STATS = (1<<12),
SOF_TIMESTAMPING_OPT_PKTINFO = (1<<13),
+   SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14),
 
-   SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_PKTINFO,
+   SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_TX_SWHW,
SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
 SOF_TIMESTAMPING_LAST
 };
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 346d3e8..68c02df 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3875,6 +3875,10 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
if (!sk)
return;
 
+   if (!hwtstamps && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
+   skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)
+   return;
+
tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
if (!skb_may_tx_timestamp(sk, tsonly))
return;
diff --git a/net/socket.c b/net/socket.c
index ee1f4ec..64705ae 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -662,6 +662,22 @@ static bool skb_is_err_queue(const struct sk_buff *skb)
return skb->pkt_type == PACKET_OUTGOING;
 }
 
+/* On transmit, software and hardware timestamps are returned independently.
+ * As the two skb clones share the hardware timestamp, which may be updated
+ * before the software timestamp is received, a hardware TX timestamp may be
+ * returned only if there is no software TX timestamp. A false so

[PATCH v4 net-next 1/7] net: define receive timestamp filter for NTP

2017-05-18 Thread Miroslav Lichvar
Add HWTSTAMP_FILTER_NTP_ALL to the hwtstamp_rx_filters enum for
timestamping of NTP packets. There is currently only one driver
(phyter) that could support it directly.

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 include/uapi/linux/net_tstamp.h | 3 +++
 net/core/dev_ioctl.c| 2 ++
 2 files changed, 5 insertions(+)

diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index 464dcca..0749fb1 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -125,6 +125,9 @@ enum hwtstamp_rx_filters {
HWTSTAMP_FILTER_PTP_V2_SYNC,
/* PTP v2/802.AS1, any layer, Delay_req packet */
HWTSTAMP_FILTER_PTP_V2_DELAY_REQ,
+
+   /* NTP, UDP, all versions and packet modes */
+   HWTSTAMP_FILTER_NTP_ALL,
 };
 
 #endif /* _NET_TIMESTAMPING_H */
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index b94b1d2..8f036a7 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -227,6 +227,8 @@ static int net_hwtstamp_validate(struct ifreq *ifr)
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
rx_filter_valid = 1;
break;
+   case HWTSTAMP_FILTER_NTP_ALL:
+   break;
}
 
if (!tx_type_valid || !rx_filter_valid)
-- 
2.9.3



[PATCH v4 net-next 2/7] net: ethernet: update drivers to handle HWTSTAMP_FILTER_NTP_ALL

2017-05-18 Thread Miroslav Lichvar
Include HWTSTAMP_FILTER_NTP_ALL in net_hwtstamp_validate() as a valid
filter and update drivers which can timestamp all packets, or which
explicitly list unsupported filters instead of using a default case, to
handle the filter.

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c   | 1 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c   | 1 +
 drivers/net/ethernet/cavium/liquidio/lio_main.c| 1 +
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 1 +
 drivers/net/ethernet/cavium/octeon/octeon_mgmt.c   | 1 +
 drivers/net/ethernet/intel/e1000e/netdev.c | 1 +
 drivers/net/ethernet/intel/i40e/i40e_ptp.c | 1 +
 drivers/net/ethernet/intel/igb/igb_ptp.c   | 1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c   | 1 +
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 1 +
 drivers/net/ethernet/neterion/vxge/vxge-main.c | 1 +
 drivers/net/ethernet/qlogic/qede/qede_ptp.c| 1 +
 drivers/net/ethernet/sfc/ef10.c| 1 +
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  | 1 +
 drivers/net/ethernet/ti/cpsw.c | 1 +
 drivers/net/ethernet/tile/tilegx.c | 1 +
 net/core/dev_ioctl.c   | 3 +--
 18 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c 
b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index c772420..89b21d7 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1268,6 +1268,7 @@ static int xgbe_set_hwtstamp_settings(struct 
xgbe_prv_data *pdata,
case HWTSTAMP_FILTER_NONE:
break;
 
+   case HWTSTAMP_FILTER_NTP_ALL:
case HWTSTAMP_FILTER_ALL:
XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSENALL, 1);
XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSENA, 1);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c 
b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 7414ffd..14c236e 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -15351,6 +15351,7 @@ int bnx2x_configure_ptp_filters(struct bnx2x *bp)
break;
case HWTSTAMP_FILTER_ALL:
case HWTSTAMP_FILTER_SOME:
+   case HWTSTAMP_FILTER_NTP_ALL:
bp->rx_filter = HWTSTAMP_FILTER_NONE;
break;
case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c 
b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 649f2aa..ba01242 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -3024,6 +3024,7 @@ static int hwtstamp_ioctl(struct net_device *netdev, 
struct ifreq *ifr)
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+   case HWTSTAMP_FILTER_NTP_ALL:
conf.rx_filter = HWTSTAMP_FILTER_ALL;
break;
default:
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c 
b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index d51c8d8..31d737c 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -2085,6 +2085,7 @@ static int hwtstamp_ioctl(struct net_device *netdev, 
struct ifreq *ifr)
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+   case HWTSTAMP_FILTER_NTP_ALL:
conf.rx_filter = HWTSTAMP_FILTER_ALL;
break;
default:
diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c 
b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
index a213868..2887bca 100644
--- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
+++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
@@ -755,6 +755,7 @@ static int octeon_mgmt_ioctl_hwtstamp(struct net_device 
*netdev,
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+   case HWTSTAMP_FILTER_NTP_ALL:
p->has_rx_tstamp = have_hw_timestamps;
config.rx_filter = HWTSTAMP_FILTER_ALL;
if (p->has_rx_tstamp) {
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index b367972..0ff9295 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -3680,6 +3680,7 @@ static int e1000e_config_hwtstamp(struct e1000_adapter 
*adapter,
 * Delay Request messages but not both so fall-through to
 * time stamp all packets.
 

[PATCH v4 net-next 7/7] net: ethernet: update drivers to make both SW and HW TX timestamps

2017-05-18 Thread Miroslav Lichvar
Some drivers were calling the skb_tx_timestamp() function only when
a hardware timestamp was not requested. Now that applications can use
the SOF_TIMESTAMPING_OPT_TX_SWHW option to request both software and
hardware timestamps, the drivers need to be modified to unconditionally
call skb_tx_timestamp().

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c  | 3 +--
 drivers/net/ethernet/intel/e1000e/netdev.c| 4 ++--
 drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c   | 3 +--
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 ++
 4 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c 
b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 89b21d7..5a2ad9c 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1391,8 +1391,7 @@ static void xgbe_prep_tx_tstamp(struct xgbe_prv_data 
*pdata,
spin_unlock_irqrestore(>tstamp_lock, flags);
}
 
-   if (!XGMAC_GET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES, PTP))
-   skb_tx_timestamp(skb);
+   skb_tx_timestamp(skb);
 }
 
 static void xgbe_prep_vlan(struct sk_buff *skb, struct xgbe_packet_data 
*packet)
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index 0ff9295..6ed3bc4 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -5868,10 +5868,10 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
adapter->tx_hwtstamp_skb = skb_get(skb);
adapter->tx_hwtstamp_start = jiffies;
schedule_work(>tx_hwtstamp_work);
-   } else {
-   skb_tx_timestamp(skb);
}
 
+   skb_tx_timestamp(skb);
+
netdev_sent_queue(netdev, skb->len);
e1000_tx_queue(tx_ring, tx_flags, count);
/* Make sure there is space in the ring for the next send. */
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c 
b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
index 1e59435..89831ad 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
@@ -1418,8 +1418,7 @@ static netdev_tx_t sxgbe_xmit(struct sk_buff *skb, struct 
net_device *dev)
priv->hw->desc->tx_enable_tstamp(first_desc);
}
 
-   if (!tqueue->hwts_tx_en)
-   skb_tx_timestamp(skb);
+   skb_tx_timestamp(skb);
 
priv->hw->dma->enable_dma_transmission(priv->ioaddr, txq_index);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c 
b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index cce862b..27c12e7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2880,8 +2880,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, 
struct net_device *dev)
priv->xstats.tx_set_ic_bit++;
}
 
-   if (!priv->hwts_tx_en)
-   skb_tx_timestamp(skb);
+   skb_tx_timestamp(skb);
 
if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
 priv->hwts_tx_en)) {
@@ -3084,8 +3083,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, 
struct net_device *dev)
priv->xstats.tx_set_ic_bit++;
}
 
-   if (!priv->hwts_tx_en)
-   skb_tx_timestamp(skb);
+   skb_tx_timestamp(skb);
 
/* Ready to fill the first descriptor and set the OWN bit w/o any
 * problems because all the descriptors are actually ready to be
-- 
2.9.3



[PATCH v4 net-next 5/7] net: fix documentation of struct scm_timestamping

2017-05-18 Thread Miroslav Lichvar
The scm_timestamping struct may return multiple non-zero fields, e.g.
when both software and hardware RX timestamping is enabled, or when the
SO_TIMESTAMP(NS) option is combined with SCM_TIMESTAMPING and a false
software timestamp is generated in the recvmsg() call in order to always
return a SCM_TIMESTAMP(NS) message.

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 Documentation/networking/timestamping.txt | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/timestamping.txt 
b/Documentation/networking/timestamping.txt
index 600c6bf..74b7c61 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -321,7 +321,7 @@ struct scm_timestamping {
 };
 
 The structure can return up to three timestamps. This is a legacy
-feature. Only one field is non-zero at any time. Most timestamps
+feature. At least one field is non-zero at any time. Most timestamps
 are passed in ts[0]. Hardware timestamps are passed in ts[2].
 
 ts[1] used to hold hardware timestamps converted to system time.
@@ -330,6 +330,12 @@ a HW PTP clock source, to allow time conversion in 
userspace and
 optionally synchronize system time with a userspace PTP stack such
 as linuxptp. For the PTP clock API, see Documentation/ptp/ptp.txt.
 
+Note that if the SO_TIMESTAMP or SO_TIMESTAMPNS option is enabled
+together with SO_TIMESTAMPING using SOF_TIMESTAMPING_SOFTWARE, a false
+software timestamp will be generated in the recvmsg() call and passed
+in ts[0] when a real software timestamp is missing. For this reason it
+is not recommended to combine SO_TIMESTAMP(NS) with SO_TIMESTAMPING.
+
 2.1.1 Transmit timestamps with MSG_ERRQUEUE
 
 For transmit timestamps the outgoing packet is looped back to the
-- 
2.9.3



[PATCH v4 net-next 3/7] net: add function to retrieve original skb device using NAPI ID

2017-05-18 Thread Miroslav Lichvar
Since commit b68581778cd0 ("net: Make skb->skb_iif always track
skb->dev") skbs don't have the original index of the interface which
received the packet. This information is now needed for a new control
message related to hardware timestamping.

Instead of adding a new field to skb, we can find the device by the NAPI
ID if it is available, i.e. CONFIG_NET_RX_BUSY_POLL is enabled and the
driver is using NAPI. Add dev_get_by_napi_id() and also skb_napi_id() to
hide the CONFIG_NET_RX_BUSY_POLL ifdef.

CC: Richard Cochran <richardcoch...@gmail.com>
Suggested-by: Willem de Bruijn <will...@google.com>
Acked-by: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 include/linux/netdevice.h |  1 +
 include/linux/skbuff.h|  9 +
 net/core/dev.c| 26 ++
 3 files changed, 36 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3f39d27..b6c36d5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2456,6 +2456,7 @@ static inline int dev_recursion_level(void)
 struct net_device *dev_get_by_index(struct net *net, int ifindex);
 struct net_device *__dev_get_by_index(struct net *net, int ifindex);
 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
+struct net_device *dev_get_by_napi_id(unsigned int napi_id);
 int netdev_get_name(struct net *net, char *name, int ifindex);
 int dev_restart(struct net_device *dev);
 int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7c0cb2c..1f8028c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -855,6 +855,15 @@ static inline bool skb_pkt_type_ok(u32 ptype)
return ptype <= PACKET_OTHERHOST;
 }
 
+static inline unsigned int skb_napi_id(const struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_RX_BUSY_POLL
+   return skb->napi_id;
+#else
+   return 0;
+#endif
+}
+
 void kfree_skb(struct sk_buff *skb);
 void kfree_skb_list(struct sk_buff *segs);
 void skb_tx_error(struct sk_buff *skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index acd594c..6d3c452 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -162,6 +162,7 @@ static int netif_rx_internal(struct sk_buff *skb);
 static int call_netdevice_notifiers_info(unsigned long val,
 struct net_device *dev,
 struct netdev_notifier_info *info);
+static struct napi_struct *napi_by_id(unsigned int napi_id);
 
 /*
  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
@@ -866,6 +867,31 @@ struct net_device *dev_get_by_index(struct net *net, int 
ifindex)
 EXPORT_SYMBOL(dev_get_by_index);
 
 /**
+ * dev_get_by_napi_id - find a device by napi_id
+ * @napi_id: ID of the NAPI struct
+ *
+ * Search for an interface by NAPI ID. Returns %NULL if the device
+ * is not found or a pointer to the device. The device has not had
+ * its reference counter increased so the caller must be careful
+ * about locking. The caller must hold RCU lock.
+ */
+
+struct net_device *dev_get_by_napi_id(unsigned int napi_id)
+{
+   struct napi_struct *napi;
+
+   WARN_ON_ONCE(!rcu_read_lock_held());
+
+   if (napi_id < MIN_NAPI_ID)
+   return NULL;
+
+   napi = napi_by_id(napi_id);
+
+   return napi ? napi->dev : NULL;
+}
+EXPORT_SYMBOL(dev_get_by_napi_id);
+
+/**
  * netdev_get_name - get a netdevice name, knowing its ifindex.
  * @net: network namespace
  * @name: a pointer to the buffer where the name will be stored.
-- 
2.9.3



Re: [PATCH v3 net-next 5/7] net: don't make false software transmit timestamps

2017-05-17 Thread Miroslav Lichvar
On Tue, May 16, 2017 at 06:34:38PM -0400, Willem de Bruijn wrote:
> On Tue, May 16, 2017 at 8:44 AM, Miroslav Lichvar <mlich...@redhat.com> wrote:
> > If software timestamping is enabled by the SO_TIMESTAMP(NS) option
> > when a message without timestamp is already waiting in the queue, the
> > __sock_recv_timestamp() function will read the current time to make a
> > timestamp in order to always have something for the application.
> >
> > However, this applies also to outgoing packets looped back to the error
> > queue when hardware timestamping is enabled by the SO_TIMESTAMPING
> > option.
> 
> This is already the case for sockets that have both software receive
> timestamps and hardware tx timestamps enabled, independent from
> the new option SOF_TIMESTAMPING_OPT_TX_SWHW, right? If so,
> then this behavior must remain.

Even if we consider that it's not actually returning a valid TX
timestamp and it didn't behave as documented ("Only one field is
non-zero at any time")?

On the RX side this timestamp does make some sense as it could be
viewed as a very late timestamp, correctly ordered after the HW
timestamp, but on the TX side the order is reversed and returning a
timestamp later than the actual transmission might break a protocol.

If you don't see it as a bug fix, I think this weird interaction
between the SO_TIMESTAMPING(NS) and SO_TIMESTAMPING options needs to
be documented.

-- 
Miroslav Lichvar


[PATCH v3 net-next 4/7] net: add new control message for incoming HW-timestamped packets

2017-05-16 Thread Miroslav Lichvar
Add SOF_TIMESTAMPING_OPT_PKTINFO option to request a new control message
for incoming packets with hardware timestamps. It contains the index of
the real interface which received the packet and the length of the
packet at layer 2.

The index is useful with bonding, bridges and other interfaces, where
IP_PKTINFO doesn't allow applications to determine which PHC made the
timestamp. With the L2 length (and link speed) it is possible to
transpose preamble timestamps to trailer timestamps, which are used in
the NTP protocol.

While this information could be provided by two new socket options
independently from timestamping, it doesn't look like they would be very
useful. With this option any performance impact is limited to hardware
timestamping.

Use dev_get_by_napi_id() to get the device and its index. On kernels
with disabled CONFIG_NET_RX_BUSY_POLL or drivers not using NAPI, a zero
index will be returned in the control message.

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 Documentation/networking/timestamping.txt |  9 +
 include/uapi/asm-generic/socket.h |  2 ++
 include/uapi/linux/net_tstamp.h   | 10 +-
 net/socket.c  | 27 ++-
 4 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/timestamping.txt 
b/Documentation/networking/timestamping.txt
index 96f5069..600c6bf 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -193,6 +193,15 @@ SOF_TIMESTAMPING_OPT_STATS:
   the transmit timestamps, such as how long a certain block of
   data was limited by peer's receiver window.
 
+SOF_TIMESTAMPING_OPT_PKTINFO:
+
+  Enable the SCM_TIMESTAMPING_PKTINFO control message for incoming
+  packets with hardware timestamps. The message contains struct
+  scm_ts_pktinfo, which supplies the index of the real interface which
+  received the packet and its length at layer 2. A valid (non-zero)
+  interface index will be returned only if CONFIG_NET_RX_BUSY_POLL is
+  enabled and the driver is using NAPI.
+
 New applications are encouraged to pass SOF_TIMESTAMPING_OPT_ID to
 disambiguate timestamps and SOF_TIMESTAMPING_OPT_TSONLY to operate
 regardless of the setting of sysctl net.core.tstamp_allow_data.
diff --git a/include/uapi/asm-generic/socket.h 
b/include/uapi/asm-generic/socket.h
index 2b48856..a5f6e81 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -100,4 +100,6 @@
 
 #define SO_COOKIE  57
 
+#define SCM_TIMESTAMPING_PKTINFO   58
+
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index 0749fb1..f2fb455 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -9,6 +9,7 @@
 #ifndef _NET_TIMESTAMPING_H
 #define _NET_TIMESTAMPING_H
 
+#include 
 #include/* for SO_TIMESTAMPING */
 
 /* SO_TIMESTAMPING gets an integer bit field comprised of these values */
@@ -26,8 +27,9 @@ enum {
SOF_TIMESTAMPING_OPT_CMSG = (1<<10),
SOF_TIMESTAMPING_OPT_TSONLY = (1<<11),
SOF_TIMESTAMPING_OPT_STATS = (1<<12),
+   SOF_TIMESTAMPING_OPT_PKTINFO = (1<<13),
 
-   SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_STATS,
+   SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_PKTINFO,
SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
 SOF_TIMESTAMPING_LAST
 };
@@ -130,4 +132,10 @@ enum hwtstamp_rx_filters {
HWTSTAMP_FILTER_NTP_ALL,
 };
 
+/* SCM_TIMESTAMPING_PKTINFO control message */
+struct scm_ts_pktinfo {
+   __u32 if_index;
+   __u32 pkt_length;
+};
+
 #endif /* _NET_TIMESTAMPING_H */
diff --git a/net/socket.c b/net/socket.c
index c2564eb..ee1f4ec 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -662,6 +662,27 @@ static bool skb_is_err_queue(const struct sk_buff *skb)
return skb->pkt_type == PACKET_OUTGOING;
 }
 
+static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
+{
+   struct scm_ts_pktinfo ts_pktinfo;
+   struct net_device *orig_dev;
+   int ifindex = 0;
+
+   if (!skb_mac_header_was_set(skb))
+   return;
+
+   rcu_read_lock();
+   orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
+   if (orig_dev)
+   ifindex = orig_dev->ifindex;
+   rcu_read_unlock();
+
+   ts_pktinfo.if_index = ifindex;
+   ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
+   put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
+sizeof(ts_pktinfo), _pktinfo);
+}
+
 /*
  * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
  */
@@ -699,8 +720,12 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock 
*sk,
empty = 0;
if (shhwtstamps &&

[PATCH v3 net-next 3/7] net: add function to retrieve original skb device using NAPI ID

2017-05-16 Thread Miroslav Lichvar
Since commit b68581778cd0 ("net: Make skb->skb_iif always track
skb->dev") skbs don't have the original index of the interface which
received the packet. This information is now needed for a new control
message related to hardware timestamping.

Instead of adding a new field to skb, we can find the device by the NAPI
ID if it is available, i.e. CONFIG_NET_RX_BUSY_POLL is enabled and the
driver is using NAPI. Add dev_get_by_napi_id() and also skb_napi_id() to
hide the CONFIG_NET_RX_BUSY_POLL ifdef.

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Suggested-by: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 include/linux/netdevice.h |  1 +
 include/linux/skbuff.h|  9 +
 net/core/dev.c| 26 ++
 3 files changed, 36 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3f39d27..b6c36d5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2456,6 +2456,7 @@ static inline int dev_recursion_level(void)
 struct net_device *dev_get_by_index(struct net *net, int ifindex);
 struct net_device *__dev_get_by_index(struct net *net, int ifindex);
 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
+struct net_device *dev_get_by_napi_id(unsigned int napi_id);
 int netdev_get_name(struct net *net, char *name, int ifindex);
 int dev_restart(struct net_device *dev);
 int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a098d95..42dd430 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -915,6 +915,15 @@ static inline bool skb_pkt_type_ok(u32 ptype)
return ptype <= PACKET_OTHERHOST;
 }
 
+static inline unsigned int skb_napi_id(const struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_RX_BUSY_POLL
+   return skb->napi_id;
+#else
+   return 0;
+#endif
+}
+
 void kfree_skb(struct sk_buff *skb);
 void kfree_skb_list(struct sk_buff *segs);
 void skb_tx_error(struct sk_buff *skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index fca407b..dcbc84e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -161,6 +161,7 @@ static int netif_rx_internal(struct sk_buff *skb);
 static int call_netdevice_notifiers_info(unsigned long val,
 struct net_device *dev,
 struct netdev_notifier_info *info);
+static struct napi_struct *napi_by_id(unsigned int napi_id);
 
 /*
  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
@@ -865,6 +866,31 @@ struct net_device *dev_get_by_index(struct net *net, int 
ifindex)
 EXPORT_SYMBOL(dev_get_by_index);
 
 /**
+ * dev_get_by_napi_id - find a device by napi_id
+ * @napi_id: ID of the NAPI struct
+ *
+ * Search for an interface by NAPI ID. Returns %NULL if the device
+ * is not found or a pointer to the device. The device has not had
+ * its reference counter increased so the caller must be careful
+ * about locking. The caller must hold RCU lock.
+ */
+
+struct net_device *dev_get_by_napi_id(unsigned int napi_id)
+{
+   struct napi_struct *napi;
+
+   WARN_ON_ONCE(!rcu_read_lock_held());
+
+   if (napi_id < MIN_NAPI_ID)
+   return NULL;
+
+   napi = napi_by_id(napi_id);
+
+   return napi ? napi->dev : NULL;
+}
+EXPORT_SYMBOL(dev_get_by_napi_id);
+
+/**
  * netdev_get_name - get a netdevice name, knowing its ifindex.
  * @net: network namespace
  * @name: a pointer to the buffer where the name will be stored.
-- 
2.9.3



[PATCH v3 net-next 6/7] net: allow simultaneous SW and HW transmit timestamping

2017-05-16 Thread Miroslav Lichvar
Add SOF_TIMESTAMPING_OPT_TX_SWHW option to allow an outgoing packet to
be looped to the socket's error queue with a software timestamp even
when a hardware transmit timestamp is expected to be provided by the
driver.

Applications using this option will receive two separate messages from
the error queue, one with a software timestamp and the other with a
hardware timestamp. As the hardware timestamp is saved to the shared skb
info, which may happen before the first message with software timestamp
is received by the application, the hardware timestamp is copied to the
SCM_TIMESTAMPING control message only when the skb has no software
timestamp or it is an incoming packet.

While changing sw_tx_timestamp(), inline it in skb_tx_timestamp() as
there are no other users.

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 Documentation/networking/timestamping.txt | 14 --
 include/linux/skbuff.h| 10 ++
 include/uapi/linux/net_tstamp.h   |  3 ++-
 net/core/skbuff.c |  4 
 net/socket.c  | 11 +++
 5 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/Documentation/networking/timestamping.txt 
b/Documentation/networking/timestamping.txt
index 600c6bf..55b0007 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -202,6 +202,14 @@ SOF_TIMESTAMPING_OPT_PKTINFO:
   interface index will be returned only if CONFIG_NET_RX_BUSY_POLL is
   enabled and the driver is using NAPI.
 
+SOF_TIMESTAMPING_OPT_TX_SWHW:
+
+  Request both hardware and software timestamps for outgoing packets
+  when SOF_TIMESTAMPING_TX_HARDWARE and SOF_TIMESTAMPING_TX_SOFTWARE
+  are enabled at the same time. If both timestamps are generated,
+  two separate messages will be looped to the socket's error queue,
+  each containing just one timestamp.
+
 New applications are encouraged to pass SOF_TIMESTAMPING_OPT_ID to
 disambiguate timestamps and SOF_TIMESTAMPING_OPT_TSONLY to operate
 regardless of the setting of sysctl net.core.tstamp_allow_data.
@@ -321,8 +329,10 @@ struct scm_timestamping {
 };
 
 The structure can return up to three timestamps. This is a legacy
-feature. Only one field is non-zero at any time. Most timestamps
-are passed in ts[0]. Hardware timestamps are passed in ts[2].
+feature. Most timestamps are passed in ts[0]. Hardware timestamps
+are passed in ts[2]. Incoming packets may have timestamps in both
+ts[0] and ts[2], but for outgoing packets only one field is non-zero
+at any time.
 
 ts[1] used to hold hardware timestamps converted to system time.
 Instead, expose the hardware clock device on the NIC directly as
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 42dd430..3824549 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3307,13 +3307,6 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
 void skb_tstamp_tx(struct sk_buff *orig_skb,
   struct skb_shared_hwtstamps *hwtstamps);
 
-static inline void sw_tx_timestamp(struct sk_buff *skb)
-{
-   if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP &&
-   !(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS))
-   skb_tstamp_tx(skb, NULL);
-}
-
 /**
  * skb_tx_timestamp() - Driver hook for transmit timestamping
  *
@@ -3329,7 +3322,8 @@ static inline void sw_tx_timestamp(struct sk_buff *skb)
 static inline void skb_tx_timestamp(struct sk_buff *skb)
 {
skb_clone_tx_timestamp(skb);
-   sw_tx_timestamp(skb);
+   if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP)
+   skb_tstamp_tx(skb, NULL);
 }
 
 /**
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index f2fb455..3cab0ab 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -28,8 +28,9 @@ enum {
SOF_TIMESTAMPING_OPT_TSONLY = (1<<11),
SOF_TIMESTAMPING_OPT_STATS = (1<<12),
SOF_TIMESTAMPING_OPT_PKTINFO = (1<<13),
+   SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14),
 
-   SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_PKTINFO,
+   SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_TX_SWHW,
SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
 SOF_TIMESTAMPING_LAST
 };
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 346d3e8..68c02df 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3875,6 +3875,10 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
if (!sk)
return;
 
+   if (!hwtstamps && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
+   skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)
+   return;
+
tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
if (!skb_

[PATCH v3 net-next 7/7] net: ethernet: update drivers to make both SW and HW TX timestamps

2017-05-16 Thread Miroslav Lichvar
Some drivers were calling the skb_tx_timestamp() function only when
a hardware timestamp was not requested. Now that applications can use
the SOF_TIMESTAMPING_OPT_TX_SWHW option to request both software and
hardware timestamps, the drivers need to be modified to unconditionally
call skb_tx_timestamp().

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c  | 3 +--
 drivers/net/ethernet/intel/e1000e/netdev.c| 4 ++--
 drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c   | 3 +--
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 ++
 4 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c 
b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 89b21d7..5a2ad9c 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1391,8 +1391,7 @@ static void xgbe_prep_tx_tstamp(struct xgbe_prv_data 
*pdata,
spin_unlock_irqrestore(>tstamp_lock, flags);
}
 
-   if (!XGMAC_GET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES, PTP))
-   skb_tx_timestamp(skb);
+   skb_tx_timestamp(skb);
 }
 
 static void xgbe_prep_vlan(struct sk_buff *skb, struct xgbe_packet_data 
*packet)
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index 0ff9295..6ed3bc4 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -5868,10 +5868,10 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
adapter->tx_hwtstamp_skb = skb_get(skb);
adapter->tx_hwtstamp_start = jiffies;
schedule_work(>tx_hwtstamp_work);
-   } else {
-   skb_tx_timestamp(skb);
}
 
+   skb_tx_timestamp(skb);
+
netdev_sent_queue(netdev, skb->len);
e1000_tx_queue(tx_ring, tx_flags, count);
/* Make sure there is space in the ring for the next send. */
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c 
b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
index 1e59435..89831ad 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
@@ -1418,8 +1418,7 @@ static netdev_tx_t sxgbe_xmit(struct sk_buff *skb, struct 
net_device *dev)
priv->hw->desc->tx_enable_tstamp(first_desc);
}
 
-   if (!tqueue->hwts_tx_en)
-   skb_tx_timestamp(skb);
+   skb_tx_timestamp(skb);
 
priv->hw->dma->enable_dma_transmission(priv->ioaddr, txq_index);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c 
b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index cce862b..27c12e7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2880,8 +2880,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, 
struct net_device *dev)
priv->xstats.tx_set_ic_bit++;
}
 
-   if (!priv->hwts_tx_en)
-   skb_tx_timestamp(skb);
+   skb_tx_timestamp(skb);
 
if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
 priv->hwts_tx_en)) {
@@ -3084,8 +3083,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, 
struct net_device *dev)
priv->xstats.tx_set_ic_bit++;
}
 
-   if (!priv->hwts_tx_en)
-   skb_tx_timestamp(skb);
+   skb_tx_timestamp(skb);
 
/* Ready to fill the first descriptor and set the OWN bit w/o any
 * problems because all the descriptors are actually ready to be
-- 
2.9.3



[PATCH v3 net-next 5/7] net: don't make false software transmit timestamps

2017-05-16 Thread Miroslav Lichvar
If software timestamping is enabled by the SO_TIMESTAMP(NS) option
when a message without timestamp is already waiting in the queue, the
__sock_recv_timestamp() function will read the current time to make a
timestamp in order to always have something for the application.

However, this applies also to outgoing packets looped back to the error
queue when hardware timestamping is enabled by the SO_TIMESTAMPING
option. A software transmit timestamp made after the actual transmission
is added to messages with hardware timestamps.

Modify the function to save the current time as a software timestamp
only if it's for a received packet (i.e. it's not in the error queue).

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 net/socket.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/socket.c b/net/socket.c
index ee1f4ec..879df37 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -689,7 +689,8 @@ static void put_ts_pktinfo(struct msghdr *msg, struct 
sk_buff *skb)
 void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb)
 {
-   int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
+   int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP) &&
+  !skb_is_err_queue(skb);
struct scm_timestamping tss;
int empty = 1;
struct skb_shared_hwtstamps *shhwtstamps =
-- 
2.9.3



[PATCH v3 net-next 2/7] net: ethernet: update drivers to handle HWTSTAMP_FILTER_NTP_ALL

2017-05-16 Thread Miroslav Lichvar
Include HWTSTAMP_FILTER_NTP_ALL in net_hwtstamp_validate() as a valid
filter and update drivers which can timestamp all packets, or which
explicitly list unsupported filters instead of using a default case, to
handle the filter.

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c   | 1 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c   | 1 +
 drivers/net/ethernet/cavium/liquidio/lio_main.c| 1 +
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 1 +
 drivers/net/ethernet/cavium/octeon/octeon_mgmt.c   | 1 +
 drivers/net/ethernet/intel/e1000e/netdev.c | 1 +
 drivers/net/ethernet/intel/i40e/i40e_ptp.c | 1 +
 drivers/net/ethernet/intel/igb/igb_ptp.c   | 1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c   | 1 +
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 1 +
 drivers/net/ethernet/neterion/vxge/vxge-main.c | 1 +
 drivers/net/ethernet/qlogic/qede/qede_ptp.c| 1 +
 drivers/net/ethernet/sfc/ef10.c| 1 +
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  | 1 +
 drivers/net/ethernet/ti/cpsw.c | 1 +
 drivers/net/ethernet/tile/tilegx.c | 1 +
 net/core/dev_ioctl.c   | 3 +--
 18 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c 
b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index c772420..89b21d7 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1268,6 +1268,7 @@ static int xgbe_set_hwtstamp_settings(struct 
xgbe_prv_data *pdata,
case HWTSTAMP_FILTER_NONE:
break;
 
+   case HWTSTAMP_FILTER_NTP_ALL:
case HWTSTAMP_FILTER_ALL:
XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSENALL, 1);
XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSENA, 1);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c 
b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index a851f95..2f30b1a 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -15351,6 +15351,7 @@ int bnx2x_configure_ptp_filters(struct bnx2x *bp)
break;
case HWTSTAMP_FILTER_ALL:
case HWTSTAMP_FILTER_SOME:
+   case HWTSTAMP_FILTER_NTP_ALL:
bp->rx_filter = HWTSTAMP_FILTER_NONE;
break;
case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c 
b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 927617c..7a0ef5b 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -3020,6 +3020,7 @@ static int hwtstamp_ioctl(struct net_device *netdev, 
struct ifreq *ifr)
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+   case HWTSTAMP_FILTER_NTP_ALL:
conf.rx_filter = HWTSTAMP_FILTER_ALL;
break;
default:
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c 
b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 34c7782..15e21b5 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -2100,6 +2100,7 @@ static int hwtstamp_ioctl(struct net_device *netdev, 
struct ifreq *ifr)
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+   case HWTSTAMP_FILTER_NTP_ALL:
conf.rx_filter = HWTSTAMP_FILTER_ALL;
break;
default:
diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c 
b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
index a213868..2887bca 100644
--- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
+++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
@@ -755,6 +755,7 @@ static int octeon_mgmt_ioctl_hwtstamp(struct net_device 
*netdev,
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+   case HWTSTAMP_FILTER_NTP_ALL:
p->has_rx_tstamp = have_hw_timestamps;
config.rx_filter = HWTSTAMP_FILTER_ALL;
if (p->has_rx_tstamp) {
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index b367972..0ff9295 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -3680,6 +3680,7 @@ static int e1000e_config_hwtstamp(struct e1000_adapter 
*adapter,
 * Delay Request messages but not both so fall-through to
 * time stamp all packets.
 

[PATCH v3 net-next 1/7] net: define receive timestamp filter for NTP

2017-05-16 Thread Miroslav Lichvar
Add HWTSTAMP_FILTER_NTP_ALL to the hwtstamp_rx_filters enum for
timestamping of NTP packets. There is currently only one driver
(phyter) that could support it directly.

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 include/uapi/linux/net_tstamp.h | 3 +++
 net/core/dev_ioctl.c| 2 ++
 2 files changed, 5 insertions(+)

diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index 464dcca..0749fb1 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -125,6 +125,9 @@ enum hwtstamp_rx_filters {
HWTSTAMP_FILTER_PTP_V2_SYNC,
/* PTP v2/802.AS1, any layer, Delay_req packet */
HWTSTAMP_FILTER_PTP_V2_DELAY_REQ,
+
+   /* NTP, UDP, all versions and packet modes */
+   HWTSTAMP_FILTER_NTP_ALL,
 };
 
 #endif /* _NET_TIMESTAMPING_H */
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index b94b1d2..8f036a7 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -227,6 +227,8 @@ static int net_hwtstamp_validate(struct ifreq *ifr)
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
rx_filter_valid = 1;
break;
+   case HWTSTAMP_FILTER_NTP_ALL:
+   break;
}
 
if (!tx_type_valid || !rx_filter_valid)
-- 
2.9.3



[PATCH v3 net-next 0/7] Extend socket timestamping API

2017-05-16 Thread Miroslav Lichvar
Changes v2->v3:
- modified struct scm_ts_pktinfo to use fixed-width integer types
- added WARN_ON_ONCE for missing RCU lock in dev_get_by_napi_id()
- modified dev_get_by_napi_id() to not return dev in unexpected branch
- modified recv to return SCM_TIMESTAMPING_PKTINFO even if the interface
  index is unknown

Changes v1->v2:
- added separate patch for new NAPI functions 
- split code from __sock_recv_timestamp() for better readability
- fixed RCU locking
- fixed compiler warning (missing case in switch in first patch)
- inline sw_tx_timestamp() in its only user

Changes RFC->v1:
- reworked SOF_TIMESTAMPING_OPT_PKTINFO patch to not add new fields to
  skb shared info (net device is now looked up by napi_id), not require
  any changes in drivers, and restrict the cmsg to incoming packets
- renamed SOF_TIMESTAMPING_OPT_MULTIMSG to SOF_TIMESTAMPING_OPT_TX_SWHW
  and fixed its description
- moved struct scm_ts_pktinfo from errqueue.h to net_tstamp.h as it
  can't be received from the error queue anymore
- improved commit descriptions and removed incorrect comment

This patchset adds new options to the timestamping API that will be
useful for NTP implementations and possibly other applications.

The first patch specifies a timestamp filter for NTP packets. The second
patch updates drivers that can timestamp all packets, or need to list
the filter as unsupported. There is no attempt to add the support to the
phyter driver.

The third patch adds two helper functions working with NAPI ID, which is
needed by the next patch. The fourth patch adds a new option to get a
new control message with the L2 length and interface index for incoming
packets with hardware timestamps.

The fifth patch fixes the code to not make a false software TX timestamp
when hardware timestamping is enabled. The sixth patch depends on this
fix.

The sixth patch adds a new option to request both software and hardware
timestamps for outgoing packets. The seventh patch updates drivers that
assumed software timestamping cannot be used together with hardware
timestamping.

The patches have been tested on x86_64 machines with igb and e1000e
drivers.

Miroslav Lichvar (7):
  net: define receive timestamp filter for NTP
  net: ethernet: update drivers to handle HWTSTAMP_FILTER_NTP_ALL
  net: add function to retrieve original skb device using NAPI ID
  net: add new control message for incoming HW-timestamped packets
  net: don't make false software transmit timestamps
  net: allow simultaneous SW and HW transmit timestamping
  net: ethernet: update drivers to make both SW and HW TX timestamps

 Documentation/networking/timestamping.txt  | 23 ++--
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c   |  4 +--
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c   |  1 +
 drivers/net/ethernet/cavium/liquidio/lio_main.c|  1 +
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c |  1 +
 drivers/net/ethernet/cavium/octeon/octeon_mgmt.c   |  1 +
 drivers/net/ethernet/intel/e1000e/netdev.c |  5 +--
 drivers/net/ethernet/intel/i40e/i40e_ptp.c |  1 +
 drivers/net/ethernet/intel/igb/igb_ptp.c   |  1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c   |  1 +
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_clock.c |  1 +
 drivers/net/ethernet/neterion/vxge/vxge-main.c |  1 +
 drivers/net/ethernet/qlogic/qede/qede_ptp.c|  1 +
 drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c|  3 +-
 drivers/net/ethernet/sfc/ef10.c|  1 +
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  |  7 ++--
 drivers/net/ethernet/ti/cpsw.c |  1 +
 drivers/net/ethernet/tile/tilegx.c |  1 +
 include/linux/netdevice.h  |  1 +
 include/linux/skbuff.h | 19 +-
 include/uapi/asm-generic/socket.h  |  2 ++
 include/uapi/linux/net_tstamp.h| 14 +++-
 net/core/dev.c | 26 ++
 net/core/dev_ioctl.c   |  1 +
 net/core/skbuff.c  |  4 +++
 net/socket.c   | 41 --
 27 files changed, 141 insertions(+), 23 deletions(-)

-- 
2.9.3



Re: [PATCH v2 net-next 3/7] net: add function to retrieve original skb device using NAPI ID

2017-05-16 Thread Miroslav Lichvar
On Tue, May 02, 2017 at 12:16:13PM -0400, Willem de Bruijn wrote:
> On Tue, May 2, 2017 at 6:10 AM, Miroslav Lichvar <mlich...@redhat.com> wrote:
> >  /**
> > + * dev_get_by_napi_id - find a device by napi_id
> > + * @napi_id: ID of the NAPI struct
> > + *
> > + * Search for an interface by NAPI ID. Returns %NULL if the device
> > + * is not found or a pointer to the device. The device has not had
> > + * its reference counter increased so the caller must be careful
> > + * about locking. The caller must hold RCU lock.
> 
> Instead of a comment, can check with
> 
>   WARN_ON_ONCE(!rcu_read_lock_held());

The other dev_get_* functions have the same comment, so I think it's
better to keep it for consistency. I'll add the warning and sent
a new series with the other changes you have suggested.

Thanks,

-- 
Miroslav Lichvar


[PATCH net] net: netcp: fix check of requested timestamping filter

2017-05-15 Thread Miroslav Lichvar
The driver doesn't support timestamping of all received packets and
should return error when trying to enable the HWTSTAMP_FILTER_ALL
filter.

Cc: WingMan Kwok <w-kw...@ti.com>
Cc: Richard Cochran <richardcoch...@gmail.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 drivers/net/ethernet/ti/netcp_ethss.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/ti/netcp_ethss.c 
b/drivers/net/ethernet/ti/netcp_ethss.c
index 897176f..dd92950 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -2651,7 +2651,6 @@ static int gbe_hwtstamp_set(struct gbe_intf *gbe_intf, 
struct ifreq *ifr)
case HWTSTAMP_FILTER_NONE:
cpts_rx_enable(cpts, 0);
break;
-   case HWTSTAMP_FILTER_ALL:
case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
-- 
2.9.3



Re: [PATCH v2 net-next 0/7] Extend socket timestamping API

2017-05-02 Thread Miroslav Lichvar
Hm, I see that net-next was closed. I missed the annoucement. Sorry
for the spam.

On Tue, May 02, 2017 at 02:46:02PM +0200, Miroslav Lichvar wrote:
> Changes v1->v2:
> - added separate patch for new NAPI functions 
> - split code from __sock_recv_timestamp() for better readability
> - fixed RCU locking
> - fixed compiler warning (missing case in switch in first patch)
> - inline sw_tx_timestamp() in its only user

-- 
Miroslav Lichvar


[PATCH v2 net-next 1/7] net: define receive timestamp filter for NTP

2017-05-02 Thread Miroslav Lichvar
Add HWTSTAMP_FILTER_NTP_ALL to the hwtstamp_rx_filters enum for
timestamping of NTP packets. There is currently only one driver
(phyter) that could support it directly.

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 include/uapi/linux/net_tstamp.h | 3 +++
 net/core/dev_ioctl.c| 2 ++
 2 files changed, 5 insertions(+)

diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index 464dcca..0749fb1 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -125,6 +125,9 @@ enum hwtstamp_rx_filters {
HWTSTAMP_FILTER_PTP_V2_SYNC,
/* PTP v2/802.AS1, any layer, Delay_req packet */
HWTSTAMP_FILTER_PTP_V2_DELAY_REQ,
+
+   /* NTP, UDP, all versions and packet modes */
+   HWTSTAMP_FILTER_NTP_ALL,
 };
 
 #endif /* _NET_TIMESTAMPING_H */
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index b94b1d2..8f036a7 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -227,6 +227,8 @@ static int net_hwtstamp_validate(struct ifreq *ifr)
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
rx_filter_valid = 1;
break;
+   case HWTSTAMP_FILTER_NTP_ALL:
+   break;
}
 
if (!tx_type_valid || !rx_filter_valid)
-- 
2.9.3



[PATCH v2 net-next 6/7] net: allow simultaneous SW and HW transmit timestamping

2017-05-02 Thread Miroslav Lichvar
Add SOF_TIMESTAMPING_OPT_TX_SWHW option to allow an outgoing packet to
be looped to the socket's error queue with a software timestamp even
when a hardware transmit timestamp is expected to be provided by the
driver.

Applications using this option will receive two separate messages from
the error queue, one with a software timestamp and the other with a
hardware timestamp. As the hardware timestamp is saved to the shared skb
info, which may happen before the first message with software timestamp
is received by the application, the hardware timestamp is copied to the
SCM_TIMESTAMPING control message only when the skb has no software
timestamp or it is an incoming packet.

While changing sw_tx_timestamp(), inline it in skb_tx_timestamp() as
there are no other users.

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 Documentation/networking/timestamping.txt | 14 --
 include/linux/skbuff.h| 10 ++
 include/uapi/linux/net_tstamp.h   |  3 ++-
 net/core/skbuff.c |  4 
 net/socket.c  | 11 +++
 5 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/Documentation/networking/timestamping.txt 
b/Documentation/networking/timestamping.txt
index 6c07e7c..ab29a6e 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -201,6 +201,14 @@ SOF_TIMESTAMPING_OPT_PKTINFO:
   which received the packet and its length at layer 2. This option
   works only if CONFIG_NET_RX_BUSY_POLL is enabled.
 
+SOF_TIMESTAMPING_OPT_TX_SWHW:
+
+  Request both hardware and software timestamps for outgoing packets
+  when SOF_TIMESTAMPING_TX_HARDWARE and SOF_TIMESTAMPING_TX_SOFTWARE
+  are enabled at the same time. If both timestamps are generated,
+  two separate messages will be looped to the socket's error queue,
+  each containing just one timestamp.
+
 New applications are encouraged to pass SOF_TIMESTAMPING_OPT_ID to
 disambiguate timestamps and SOF_TIMESTAMPING_OPT_TSONLY to operate
 regardless of the setting of sysctl net.core.tstamp_allow_data.
@@ -320,8 +328,10 @@ struct scm_timestamping {
 };
 
 The structure can return up to three timestamps. This is a legacy
-feature. Only one field is non-zero at any time. Most timestamps
-are passed in ts[0]. Hardware timestamps are passed in ts[2].
+feature. Most timestamps are passed in ts[0]. Hardware timestamps
+are passed in ts[2]. Incoming packets may have timestamps in both
+ts[0] and ts[2], but for outgoing packets only one field is non-zero
+at any time.
 
 ts[1] used to hold hardware timestamps converted to system time.
 Instead, expose the hardware clock device on the NIC directly as
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index bfe6ec3..3be2241 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3307,13 +3307,6 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
 void skb_tstamp_tx(struct sk_buff *orig_skb,
   struct skb_shared_hwtstamps *hwtstamps);
 
-static inline void sw_tx_timestamp(struct sk_buff *skb)
-{
-   if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP &&
-   !(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS))
-   skb_tstamp_tx(skb, NULL);
-}
-
 /**
  * skb_tx_timestamp() - Driver hook for transmit timestamping
  *
@@ -3329,7 +3322,8 @@ static inline void sw_tx_timestamp(struct sk_buff *skb)
 static inline void skb_tx_timestamp(struct sk_buff *skb)
 {
skb_clone_tx_timestamp(skb);
-   sw_tx_timestamp(skb);
+   if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP)
+   skb_tstamp_tx(skb, NULL);
 }
 
 /**
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index 8fcae35..d251972 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -27,8 +27,9 @@ enum {
SOF_TIMESTAMPING_OPT_TSONLY = (1<<11),
SOF_TIMESTAMPING_OPT_STATS = (1<<12),
SOF_TIMESTAMPING_OPT_PKTINFO = (1<<13),
+   SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14),
 
-   SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_PKTINFO,
+   SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_TX_SWHW,
SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
 SOF_TIMESTAMPING_LAST
 };
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 58604c1..db5aa19 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3874,6 +3874,10 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
if (!sk)
return;
 
+   if (!hwtstamps && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
+   skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)
+   return;
+
tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
i

[PATCH v2 net-next 3/7] net: add function to retrieve original skb device using NAPI ID

2017-05-02 Thread Miroslav Lichvar
Since commit b68581778cd0 ("net: Make skb->skb_iif always track
skb->dev") skbs don't have the original index of the interface which
received the packet. This information is now needed for a new control
message related to hardware timestamping.

Instead of adding a new field to skb, we can find the device by the NAPI
ID if it is available, i.e. CONFIG_NET_RX_BUSY_POLL is enabled and the
driver is using NAPI. Add dev_get_by_napi_id() and also skb_napi_id() to
hide the CONFIG_NET_RX_BUSY_POLL ifdef.

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Suggested-by: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 include/linux/netdevice.h |  1 +
 include/linux/skbuff.h|  9 +
 net/core/dev.c| 26 ++
 3 files changed, 36 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9c23bd2..4ca2a16 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2456,6 +2456,7 @@ static inline int dev_recursion_level(void)
 struct net_device *dev_get_by_index(struct net *net, int ifindex);
 struct net_device *__dev_get_by_index(struct net *net, int ifindex);
 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
+struct net_device *dev_get_by_napi_id(unsigned int napi_id);
 int netdev_get_name(struct net *net, char *name, int ifindex);
 int dev_restart(struct net_device *dev);
 int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 81ef53f..bfe6ec3 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -915,6 +915,15 @@ static inline bool skb_pkt_type_ok(u32 ptype)
return ptype <= PACKET_OTHERHOST;
 }
 
+static inline unsigned int skb_napi_id(const struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_RX_BUSY_POLL
+   return skb->napi_id;
+#else
+   return 0;
+#endif
+}
+
 void kfree_skb(struct sk_buff *skb);
 void kfree_skb_list(struct sk_buff *segs);
 void skb_tx_error(struct sk_buff *skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index 35a06ce..fe079b2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -160,6 +160,7 @@ static int netif_rx_internal(struct sk_buff *skb);
 static int call_netdevice_notifiers_info(unsigned long val,
 struct net_device *dev,
 struct netdev_notifier_info *info);
+static struct napi_struct *napi_by_id(unsigned int napi_id);
 
 /*
  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
@@ -864,6 +865,31 @@ struct net_device *dev_get_by_index(struct net *net, int 
ifindex)
 EXPORT_SYMBOL(dev_get_by_index);
 
 /**
+ * dev_get_by_napi_id - find a device by napi_id
+ * @napi_id: ID of the NAPI struct
+ *
+ * Search for an interface by NAPI ID. Returns %NULL if the device
+ * is not found or a pointer to the device. The device has not had
+ * its reference counter increased so the caller must be careful
+ * about locking. The caller must hold RCU lock.
+ */
+
+struct net_device *dev_get_by_napi_id(unsigned int napi_id)
+{
+   struct napi_struct *napi;
+
+   if (napi_id < MIN_NAPI_ID)
+   return NULL;
+
+   napi = napi_by_id(napi_id);
+   if (napi)
+   return napi->dev;
+
+   return NULL;
+}
+EXPORT_SYMBOL(dev_get_by_napi_id);
+
+/**
  * netdev_get_name - get a netdevice name, knowing its ifindex.
  * @net: network namespace
  * @name: a pointer to the buffer where the name will be stored.
-- 
2.9.3



[PATCH v2 net-next 2/7] net: ethernet: update drivers to handle HWTSTAMP_FILTER_NTP_ALL

2017-05-02 Thread Miroslav Lichvar
Include HWTSTAMP_FILTER_NTP_ALL in net_hwtstamp_validate() as a valid
filter and update drivers which can timestamp all packets, or which
explicitly list unsupported filters instead of using a default case, to
handle the filter.

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c   | 1 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c   | 1 +
 drivers/net/ethernet/cavium/liquidio/lio_main.c| 1 +
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 1 +
 drivers/net/ethernet/cavium/octeon/octeon_mgmt.c   | 1 +
 drivers/net/ethernet/intel/e1000e/netdev.c | 1 +
 drivers/net/ethernet/intel/i40e/i40e_ptp.c | 1 +
 drivers/net/ethernet/intel/igb/igb_ptp.c   | 1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c   | 1 +
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 1 +
 drivers/net/ethernet/neterion/vxge/vxge-main.c | 1 +
 drivers/net/ethernet/qlogic/qede/qede_ptp.c| 1 +
 drivers/net/ethernet/sfc/ef10.c| 1 +
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  | 1 +
 drivers/net/ethernet/ti/cpsw.c | 1 +
 drivers/net/ethernet/tile/tilegx.c | 1 +
 net/core/dev_ioctl.c   | 3 +--
 18 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c 
b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index c772420..89b21d7 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1268,6 +1268,7 @@ static int xgbe_set_hwtstamp_settings(struct 
xgbe_prv_data *pdata,
case HWTSTAMP_FILTER_NONE:
break;
 
+   case HWTSTAMP_FILTER_NTP_ALL:
case HWTSTAMP_FILTER_ALL:
XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSENALL, 1);
XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSENA, 1);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c 
b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index a851f95..2f30b1a 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -15351,6 +15351,7 @@ int bnx2x_configure_ptp_filters(struct bnx2x *bp)
break;
case HWTSTAMP_FILTER_ALL:
case HWTSTAMP_FILTER_SOME:
+   case HWTSTAMP_FILTER_NTP_ALL:
bp->rx_filter = HWTSTAMP_FILTER_NONE;
break;
case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c 
b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 927617c..7a0ef5b 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -3020,6 +3020,7 @@ static int hwtstamp_ioctl(struct net_device *netdev, 
struct ifreq *ifr)
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+   case HWTSTAMP_FILTER_NTP_ALL:
conf.rx_filter = HWTSTAMP_FILTER_ALL;
break;
default:
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c 
b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 34c7782..15e21b5 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -2100,6 +2100,7 @@ static int hwtstamp_ioctl(struct net_device *netdev, 
struct ifreq *ifr)
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+   case HWTSTAMP_FILTER_NTP_ALL:
conf.rx_filter = HWTSTAMP_FILTER_ALL;
break;
default:
diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c 
b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
index a213868..2887bca 100644
--- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
+++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
@@ -755,6 +755,7 @@ static int octeon_mgmt_ioctl_hwtstamp(struct net_device 
*netdev,
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+   case HWTSTAMP_FILTER_NTP_ALL:
p->has_rx_tstamp = have_hw_timestamps;
config.rx_filter = HWTSTAMP_FILTER_ALL;
if (p->has_rx_tstamp) {
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index b367972..0ff9295 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -3680,6 +3680,7 @@ static int e1000e_config_hwtstamp(struct e1000_adapter 
*adapter,
 * Delay Request messages but not both so fall-through to
 * time stamp all packets.
 

[PATCH v2 net-next 5/7] net: don't make false software transmit timestamps

2017-05-02 Thread Miroslav Lichvar
If software timestamping is enabled by the SO_TIMESTAMP(NS) option
when a message without timestamp is already waiting in the queue, the
__sock_recv_timestamp() function will read the current time to make a
timestamp in order to always have something for the application.

However, this applies also to outgoing packets looped back to the error
queue when hardware timestamping is enabled by the SO_TIMESTAMPING
option. A software transmit timestamp made after the actual transmission
is added to messages with hardware timestamps.

Modify the function to save the current time as a software timestamp
only if it's for a received packet (i.e. it's not in the error queue).

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 net/socket.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/socket.c b/net/socket.c
index da4d4ab..fe7e5bc 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -692,7 +692,8 @@ static void put_ts_pktinfo(struct msghdr *msg, struct 
sk_buff *skb)
 void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb)
 {
-   int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
+   int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP) &&
+  !skb_is_err_queue(skb);
struct scm_timestamping tss;
int empty = 1;
struct skb_shared_hwtstamps *shhwtstamps =
-- 
2.9.3



[PATCH v2 net-next 4/7] net: add new control message for incoming HW-timestamped packets

2017-05-02 Thread Miroslav Lichvar
Add SOF_TIMESTAMPING_OPT_PKTINFO option to request a new control message
for incoming packets with hardware timestamps. It contains the index of
the real interface which received the packet and the length of the
packet at layer 2.

The index is useful with bonding, bridges and other interfaces, where
IP_PKTINFO doesn't allow applications to determine which PHC made the
timestamp. With the L2 length (and link speed) it is possible to
transpose preamble timestamps to trailer timestamps, which are used in
the NTP protocol.

While this information could be provided by two new socket options
independently from timestamping, it doesn't look like it would be very
useful. With this option any performance impact is limited to hardware
timestamping.

Use dev_get_by_napi_id() to look up the device and its index. This
limits the option to kernels with enabled CONFIG_NET_RX_BUSY_POLL and
drivers using napi, but it should cover all current MAC drivers that
support hardware timestamping.

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 Documentation/networking/timestamping.txt |  8 
 include/uapi/asm-generic/socket.h |  2 ++
 include/uapi/linux/net_tstamp.h   |  9 -
 net/socket.c  | 30 +-
 4 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/timestamping.txt 
b/Documentation/networking/timestamping.txt
index 96f5069..6c07e7c 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -193,6 +193,14 @@ SOF_TIMESTAMPING_OPT_STATS:
   the transmit timestamps, such as how long a certain block of
   data was limited by peer's receiver window.
 
+SOF_TIMESTAMPING_OPT_PKTINFO:
+
+  Enable the SCM_TIMESTAMPING_PKTINFO control message for incoming
+  packets with hardware timestamps. The message contains struct
+  scm_ts_pktinfo, which supplies the index of the real interface
+  which received the packet and its length at layer 2. This option
+  works only if CONFIG_NET_RX_BUSY_POLL is enabled.
+
 New applications are encouraged to pass SOF_TIMESTAMPING_OPT_ID to
 disambiguate timestamps and SOF_TIMESTAMPING_OPT_TSONLY to operate
 regardless of the setting of sysctl net.core.tstamp_allow_data.
diff --git a/include/uapi/asm-generic/socket.h 
b/include/uapi/asm-generic/socket.h
index 2b48856..a5f6e81 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -100,4 +100,6 @@
 
 #define SO_COOKIE  57
 
+#define SCM_TIMESTAMPING_PKTINFO   58
+
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index 0749fb1..8fcae35 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -26,8 +26,9 @@ enum {
SOF_TIMESTAMPING_OPT_CMSG = (1<<10),
SOF_TIMESTAMPING_OPT_TSONLY = (1<<11),
SOF_TIMESTAMPING_OPT_STATS = (1<<12),
+   SOF_TIMESTAMPING_OPT_PKTINFO = (1<<13),
 
-   SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_STATS,
+   SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_PKTINFO,
SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
 SOF_TIMESTAMPING_LAST
 };
@@ -130,4 +131,10 @@ enum hwtstamp_rx_filters {
HWTSTAMP_FILTER_NTP_ALL,
 };
 
+/* SCM_TIMESTAMPING_PKTINFO control message */
+struct scm_ts_pktinfo {
+   int if_index;
+   int pkt_length;
+};
+
 #endif /* _NET_TIMESTAMPING_H */
diff --git a/net/socket.c b/net/socket.c
index c2564eb..da4d4ab 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -662,6 +662,30 @@ static bool skb_is_err_queue(const struct sk_buff *skb)
return skb->pkt_type == PACKET_OUTGOING;
 }
 
+static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
+{
+   struct scm_ts_pktinfo ts_pktinfo;
+   struct net_device *orig_dev;
+   int ifindex = 0;
+
+   if (!skb_mac_header_was_set(skb))
+   return;
+
+   rcu_read_lock();
+   orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
+   if (orig_dev)
+   ifindex = orig_dev->ifindex;
+   rcu_read_unlock();
+
+   if (ifindex == 0)
+   return;
+
+   ts_pktinfo.if_index = ifindex;
+   ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
+   put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
+sizeof(ts_pktinfo), _pktinfo);
+}
+
 /*
  * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
  */
@@ -699,8 +723,12 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock 
*sk,
empty = 0;
if (shhwtstamps &&
(sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
-   ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
+   ktime_to_

[PATCH v2 net-next 7/7] net: ethernet: update drivers to make both SW and HW TX timestamps

2017-05-02 Thread Miroslav Lichvar
Some drivers were calling the skb_tx_timestamp() function only when
a hardware timestamp was not requested. Now that applications can use
the SOF_TIMESTAMPING_OPT_TX_SWHW option to request both software and
hardware timestamps, the drivers need to be modified to unconditionally
call skb_tx_timestamp().

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c  | 3 +--
 drivers/net/ethernet/intel/e1000e/netdev.c| 4 ++--
 drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c   | 3 +--
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 ++
 4 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c 
b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 89b21d7..5a2ad9c 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1391,8 +1391,7 @@ static void xgbe_prep_tx_tstamp(struct xgbe_prv_data 
*pdata,
spin_unlock_irqrestore(>tstamp_lock, flags);
}
 
-   if (!XGMAC_GET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES, PTP))
-   skb_tx_timestamp(skb);
+   skb_tx_timestamp(skb);
 }
 
 static void xgbe_prep_vlan(struct sk_buff *skb, struct xgbe_packet_data 
*packet)
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index 0ff9295..6ed3bc4 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -5868,10 +5868,10 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
adapter->tx_hwtstamp_skb = skb_get(skb);
adapter->tx_hwtstamp_start = jiffies;
schedule_work(>tx_hwtstamp_work);
-   } else {
-   skb_tx_timestamp(skb);
}
 
+   skb_tx_timestamp(skb);
+
netdev_sent_queue(netdev, skb->len);
e1000_tx_queue(tx_ring, tx_flags, count);
/* Make sure there is space in the ring for the next send. */
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c 
b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
index d54490d..50c182c 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
@@ -1418,8 +1418,7 @@ static netdev_tx_t sxgbe_xmit(struct sk_buff *skb, struct 
net_device *dev)
priv->hw->desc->tx_enable_tstamp(first_desc);
}
 
-   if (!tqueue->hwts_tx_en)
-   skb_tx_timestamp(skb);
+   skb_tx_timestamp(skb);
 
priv->hw->dma->enable_dma_transmission(priv->ioaddr, txq_index);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c 
b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 3115700..7f857ee 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2880,8 +2880,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, 
struct net_device *dev)
priv->xstats.tx_set_ic_bit++;
}
 
-   if (!priv->hwts_tx_en)
-   skb_tx_timestamp(skb);
+   skb_tx_timestamp(skb);
 
if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
 priv->hwts_tx_en)) {
@@ -3084,8 +3083,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, 
struct net_device *dev)
priv->xstats.tx_set_ic_bit++;
}
 
-   if (!priv->hwts_tx_en)
-   skb_tx_timestamp(skb);
+   skb_tx_timestamp(skb);
 
/* Ready to fill the first descriptor and set the OWN bit w/o any
 * problems because all the descriptors are actually ready to be
-- 
2.9.3



[PATCH v2 net-next 0/7] Extend socket timestamping API

2017-05-02 Thread Miroslav Lichvar
Changes v1->v2:
- added separate patch for new NAPI functions 
- split code from __sock_recv_timestamp() for better readability
- fixed RCU locking
- fixed compiler warning (missing case in switch in first patch)
- inline sw_tx_timestamp() in its only user

Changes RFC->v1:
- reworked SOF_TIMESTAMPING_OPT_PKTINFO patch to not add new fields to
  skb shared info (net device is now looked up by napi_id), not require
  any changes in drivers, and restrict the cmsg to incoming packets
- renamed SOF_TIMESTAMPING_OPT_MULTIMSG to SOF_TIMESTAMPING_OPT_TX_SWHW
  and fixed its description
- moved struct scm_ts_pktinfo from errqueue.h to net_tstamp.h as it
  can't be received from the error queue anymore
- improved commit descriptions and removed incorrect comment

This patchset adds new options to the timestamping API that will be
useful for NTP implementations and possibly other applications.

The first patch specifies a timestamp filter for NTP packets. The second
patch updates drivers that can timestamp all packets, or need to list
the filter as unsupported. There is no attempt to add the support to the
phyter driver.

The third patch adds two helper functions working with NAPI ID, which is
needed by the next patch. The fourth patch adds a new option to get a
new control message with the L2 length and interface index for incoming
packets with hardware timestamps.

The fifth patch fixes the code to not make a false software TX timestamp
when hardware timestamping is enabled. The sixth patch depends on this
fix.

The sixth patch adds a new option to request both software and hardware
timestamps for outgoing packets. The seventh patch updates drivers that
assumed software timestamping cannot be used together with hardware
timestamping.

The patches have been tested on x86_64 machines with igb and e1000e
drivers.

Miroslav Lichvar (7):
  net: define receive timestamp filter for NTP
  net: ethernet: update drivers to handle HWTSTAMP_FILTER_NTP_ALL
  net: add function to retrieve original skb device using NAPI ID
  net: add new control message for incoming HW-timestamped packets
  net: don't make false software transmit timestamps
  net: allow simultaneous SW and HW transmit timestamping
  net: ethernet: update drivers to make both SW and HW TX timestamps

 Documentation/networking/timestamping.txt  | 22 ++-
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c   |  4 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c   |  1 +
 drivers/net/ethernet/cavium/liquidio/lio_main.c|  1 +
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c |  1 +
 drivers/net/ethernet/cavium/octeon/octeon_mgmt.c   |  1 +
 drivers/net/ethernet/intel/e1000e/netdev.c |  5 ++-
 drivers/net/ethernet/intel/i40e/i40e_ptp.c |  1 +
 drivers/net/ethernet/intel/igb/igb_ptp.c   |  1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c   |  1 +
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_clock.c |  1 +
 drivers/net/ethernet/neterion/vxge/vxge-main.c |  1 +
 drivers/net/ethernet/qlogic/qede/qede_ptp.c|  1 +
 drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c|  3 +-
 drivers/net/ethernet/sfc/ef10.c|  1 +
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  |  7 ++--
 drivers/net/ethernet/ti/cpsw.c |  1 +
 drivers/net/ethernet/tile/tilegx.c |  1 +
 include/linux/netdevice.h  |  1 +
 include/linux/skbuff.h | 19 ++
 include/uapi/asm-generic/socket.h  |  2 +
 include/uapi/linux/net_tstamp.h| 13 ++-
 net/core/dev.c | 26 +
 net/core/dev_ioctl.c   |  1 +
 net/core/skbuff.c  |  4 ++
 net/socket.c   | 44 +-
 27 files changed, 142 insertions(+), 23 deletions(-)

-- 
2.9.3



Re: [PATCH v1 net-next 5/6] net: allow simultaneous SW and HW transmit timestamping

2017-05-02 Thread Miroslav Lichvar
On Fri, Apr 28, 2017 at 04:07:29PM -0400, Willem de Bruijn wrote:
> On Fri, Apr 28, 2017 at 12:23 PM, Miroslav Lichvar <mlich...@redhat.com> 
> wrote:
> > On Fri, Apr 28, 2017 at 11:50:28AM -0400, Willem de Bruijn wrote:
> >> A more elegant solution would be to not set SKBTX_IN_PROGRESS
> >> at all if SOF_TIMESTAMPING_OPT_TX_SWHW is set on the socket.
> >> But the patch to do so is not elegant, having to update callsites in many
> >> device drivers.
> >
> > Also, it would change the meaning of the flag as it seems some drivers
> > actually use the SKBTX_IN_PROGRESS flag to check if they expect a
> > timestamp.
> >
> > How about allocating the last bit of tx_flags for SKBTX_SWHW_TSTAMP?
> 
> That is such a scarce resource that I really would prefer to avoid using
> that if we can.

Ok. I think it won't really matter. We should keep in mind that the
reason for adding the OPT_TX_SWHW option was to not break old
applications which enabled both SW and HW TX timestamping, even though
they could get only one timestamp. I think most applications in future
will either enable only SW or HW TX timestamping, or enable both
together with the OPT_TX_SWHW option in order to get a SW timestamp
when HW timestamp was requested but missing.

> >> Otherwise you may indeed have to call skb_tstamp_tx for every packet
> >> that has SKBTX_SW_TSTAMP set, as you do. We can at least move
> >> the skb->sk != NULL check into skb_tx_timestamp in skbuff.h.

There are other callers of skb_tx_timestamp() and it's not obvious to
me they are all safe (i.e. cannot pass skb will sk==NULL), so I think
this should rather be a separate patch if necessary.

I'll resend the series with the other changes you have suggested.

-- 
Miroslav Lichvar


Re: [PATCH v1 net-next 5/6] net: allow simultaneous SW and HW transmit timestamping

2017-04-28 Thread Miroslav Lichvar
On Fri, Apr 28, 2017 at 11:50:28AM -0400, Willem de Bruijn wrote:
> On Fri, Apr 28, 2017 at 4:54 AM, Miroslav Lichvar <mlich...@redhat.com> wrote:
> >>   if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP &&
> >> -!(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS))
> >> +  (!(skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)) ||
> >> +  (skb->sk && skb->sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW)
> >
> > I'm not sure if this can work. sk_buff.h would need to include sock.h
> > in order to get the definition of struct sock. Any suggestions?
> 
> A more elegant solution would be to not set SKBTX_IN_PROGRESS
> at all if SOF_TIMESTAMPING_OPT_TX_SWHW is set on the socket.
> But the patch to do so is not elegant, having to update callsites in many
> device drivers.

Also, it would change the meaning of the flag as it seems some drivers
actually use the SKBTX_IN_PROGRESS flag to check if they expect a
timestamp.

How about allocating the last bit of tx_flags for SKBTX_SWHW_TSTAMP?

> Otherwise you may indeed have to call skb_tstamp_tx for every packet
> that has SKBTX_SW_TSTAMP set, as you do. We can at least move
> the skb->sk != NULL check into skb_tx_timestamp in skbuff.h.
> 
> By the way, if changing this code, I think that it's time to get rid of
> sw_tx_timestamp. It is only called from skb_tx_timestamp. Let's
> just move the condition in there.

Ok. I assume that should be a separate patch.

-- 
Miroslav Lichvar


Re: [PATCH v1 net-next 5/6] net: allow simultaneous SW and HW transmit timestamping

2017-04-28 Thread Miroslav Lichvar
On Wed, Apr 26, 2017 at 08:00:02PM -0400, Willem de Bruijn wrote:
> > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> > index 81ef53f..42bff22 100644
> > --- a/include/linux/skbuff.h
> > +++ b/include/linux/skbuff.h
> > @@ -3300,8 +3300,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
> >
> >  static inline void sw_tx_timestamp(struct sk_buff *skb)
> >  {
> > -   if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP &&
> > -   !(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS))
> > +   if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP)
> > skb_tstamp_tx(skb, NULL);
> >  }

> > +++ b/net/core/skbuff.c
> > @@ -3874,6 +3874,10 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
> > if (!sk)
> > return;
> >
> > +   if (!hwtstamps && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) 
> > &&
> > +   skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)
> > +   return;
> > +
> 
> This check should only happen for software transmit timestamps, so simpler to
> revise the check in sw_tx_timestamp above to
> 
>   if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP &&
> -!(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS))
> +  (!(skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)) ||
> +  (skb->sk && skb->sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW)

I'm not sure if this can work. sk_buff.h would need to include sock.h
in order to get the definition of struct sock. Any suggestions?

-- 
Miroslav Lichvar


Re: [PATCH v1 net-next 5/6] net: allow simultaneous SW and HW transmit timestamping

2017-04-27 Thread Miroslav Lichvar
On Thu, Apr 27, 2017 at 12:21:00PM -0400, Willem de Bruijn wrote:
> >> > @@ -720,6 +720,7 @@ void __sock_recv_timestamp(struct msghdr *msg, 
> >> > struct sock *sk,
> >> > empty = 0;
> >> > if (shhwtstamps &&
> >> > (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
> >> > +   (empty || !skb_is_err_queue(skb)) &&
> >> > ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
> >>
> >> I find skb->tstamp == 0 easier to understand than the condition on empty.
> >>
> >> Indeed, this is so non-obvious that I would suggest another helper function
> >> skb_is_hwtx_tstamp with a concise comment about the race condition
> >> between tx software and hardware timestamps (as in the last sentence of
> >> the commit message).
> >
> > Should it include also the skb_is_err_queue() check? If it returned
> > true for both TX and RX HW timestamps, maybe it could be called
> > skb_has_hw_tstamp?
> 
> For the purpose of documenting why this complex condition exists,
> I would call the skb_is_err_queue in that helper function and make
> it tx + hw specific.

Hm, like this?

if (shhwtstamps &&
(sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
+   (skb_is_hwtx_tstamp(skb) || !skb_is_err_queue(skb)) &&
    ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {

where skb_is_hwtx_tstamp() has
return skb->tstamp == 0 && skb_is_err_queue(skb);

I was just not sure about the unnecessary skb_is_err_queue() call.

-- 
Miroslav Lichvar


Re: [PATCH v1 net-next 5/6] net: allow simultaneous SW and HW transmit timestamping

2017-04-27 Thread Miroslav Lichvar
On Wed, Apr 26, 2017 at 08:00:02PM -0400, Willem de Bruijn wrote:
> > +   if (!hwtstamps && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) 
> > &&
> > +   skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)
> > +   return;
> > +
> 
> This check should only happen for software transmit timestamps, so simpler to
> revise the check in sw_tx_timestamp above to
> 
>   if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP &&
> -!(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS))
> +  (!(skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)) ||
> +  (skb->sk && skb->sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW)

Good point. This will avoid unnecessary calls of skb_tstamp_tx() in
the common case when SOF_TIMESTAMPING_OPT_TX_SWHW will not be enabled.

> > @@ -720,6 +720,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct 
> > sock *sk,
> > empty = 0;
> > if (shhwtstamps &&
> > (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
> > +   (empty || !skb_is_err_queue(skb)) &&
> > ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
> 
> I find skb->tstamp == 0 easier to understand than the condition on empty.
> 
> Indeed, this is so non-obvious that I would suggest another helper function
> skb_is_hwtx_tstamp with a concise comment about the race condition
> between tx software and hardware timestamps (as in the last sentence of
> the commit message).

Should it include also the skb_is_err_queue() check? If it returned
true for both TX and RX HW timestamps, maybe it could be called
skb_has_hw_tstamp?

-- 
Miroslav Lichvar


Re: [PATCH v1 net-next 3/6] net: add new control message for incoming HW-timestamped packets

2017-04-27 Thread Miroslav Lichvar
Thanks for the comments.

On Wed, Apr 26, 2017 at 07:34:49PM -0400, Willem de Bruijn wrote:
> > +struct net_device *dev_get_by_napi_id(unsigned int napi_id)
> > +{
> > +   struct net_device *dev = NULL;
> > +   struct napi_struct *napi;
> > +
> > +   rcu_read_lock();
> > +
> > +   napi = napi_by_id(napi_id);
> > +   if (napi)
> > +   dev = napi->dev;
> > +
> > +   rcu_read_unlock();
> > +
> > +   return dev;
> > +}
> > +EXPORT_SYMBOL(dev_get_by_napi_id);
> 
> Returning dev without holding a reference is not safe. You'll probably
> have to call this with rcu_read_lock held instead.

How about changing the function to simply return the index instead of
the device (e.g. dev_get_ifindex_by_napi_id())? Would that be too
specific?

> >  /*
> >   * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
> >   */
> > @@ -699,8 +719,12 @@ void __sock_recv_timestamp(struct msghdr *msg, struct 
> > sock *sk,
> > empty = 0;
> > if (shhwtstamps &&
> > (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
> 
> This information is also informative with software timestamps.

But is it useful and worth the cost? If I have two interfaces and only
one has HW timestamping, or just one interface which can timestamp
incoming packets at a limited rate, I would prefer to not waste CPU
cycles preparing and processing useless data.

> And getting the real iif is definitely useful outside timestamps.

Do you have an example? We have asked that in the original thread,
but no one suggested anything. For AF_PACKET there is PACKET_ORIGDEV.
When I was searching the Internet on how to get the index with INET
sockets, it looked like I was the only one who had this problem :).

> An
> alternative approach is to add versioning to IP_PKTINFO with a new
> setsockopt IP_PKTINFO_VERSION plus a new struct in_pktinfo_v2
> that extends in_pktinfo. Just a thought.

The struct would contain both the original and last interface index,
and the length as well? And similarly with in6_pktinfo?

If there is an agreement that the information would useful also for
other things than timestamping, I can try that. If not, I think it
would be better to keep it tied to HW timestamping.

-- 
Miroslav Lichvar


Re: [PATCH v1 net-next 0/6] Extend socket timestamping API

2017-04-27 Thread Miroslav Lichvar
On Wed, Apr 26, 2017 at 06:54:35PM +0200, Richard Cochran wrote:
> On Wed, Apr 26, 2017 at 04:50:29PM +0200, Miroslav Lichvar wrote:
> > This patchset adds new options to the timestamping API that will be
> > useful for NTP implementations and possibly other applications.
> 
> Are there any userland ntp patches floating around to exercise the new
> HW time stamping option?

I'm not sure if anyone is working on support for HW timestamping in
ntp. With chrony, you can test it with an experimental code in the
hwts branch of https://github.com/mlichvar/chrony.

$ ./configure --enable-debug && make
# ./chronyd -d -d 'hwtimestamp *' 'server pool.ntp.org iburst' |& grep TIMESTAMP
 TX SCM_TIMESTAMPING: swts=1493285228.512531924 hwts=0.0
 TX SCM_TIMESTAMPING: swts=0.0 hwts=1493285226.073103885
 SCM_TIMESTAMPING_PKTINFO: if=2 len=90
 RX SCM_TIMESTAMPING: swts=1493285228.530657351 hwts=1493285226.091054104
 TX SCM_TIMESTAMPING: swts=1493285230.553457791 hwts=0.0
 TX SCM_TIMESTAMPING: swts=0.0 hwts=1493285228.113705104
 SCM_TIMESTAMPING_PKTINFO: if=2 len=90
 RX SCM_TIMESTAMPING: swts=1493285230.582817079 hwts=1493285228.142890229

-- 
Miroslav Lichvar


[PATCH v1 net-next 5/6] net: allow simultaneous SW and HW transmit timestamping

2017-04-26 Thread Miroslav Lichvar
Add SOF_TIMESTAMPING_OPT_TX_SWHW option to allow an outgoing packet to
be looped to the socket's error queue with a software timestamp even
when a hardware transmit timestamp is expected to be provided by the
driver.

Applications using this option will receive two separate messages from
the error queue, one with a software timestamp and the other with a
hardware timestamp. As the hardware timestamp is saved to the shared skb
info, which may happen before the first message with software timestamp
is received by the application, the hardware timestamp is copied to the
SCM_TIMESTAMPING control message only when the skb has no software
timestamp or it is an incoming packet.

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 Documentation/networking/timestamping.txt | 14 --
 include/linux/skbuff.h|  3 +--
 include/uapi/linux/net_tstamp.h   |  3 ++-
 net/core/skbuff.c |  4 
 net/socket.c  |  1 +
 5 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/Documentation/networking/timestamping.txt 
b/Documentation/networking/timestamping.txt
index 6c07e7c..ab29a6e 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -201,6 +201,14 @@ SOF_TIMESTAMPING_OPT_PKTINFO:
   which received the packet and its length at layer 2. This option
   works only if CONFIG_NET_RX_BUSY_POLL is enabled.
 
+SOF_TIMESTAMPING_OPT_TX_SWHW:
+
+  Request both hardware and software timestamps for outgoing packets
+  when SOF_TIMESTAMPING_TX_HARDWARE and SOF_TIMESTAMPING_TX_SOFTWARE
+  are enabled at the same time. If both timestamps are generated,
+  two separate messages will be looped to the socket's error queue,
+  each containing just one timestamp.
+
 New applications are encouraged to pass SOF_TIMESTAMPING_OPT_ID to
 disambiguate timestamps and SOF_TIMESTAMPING_OPT_TSONLY to operate
 regardless of the setting of sysctl net.core.tstamp_allow_data.
@@ -320,8 +328,10 @@ struct scm_timestamping {
 };
 
 The structure can return up to three timestamps. This is a legacy
-feature. Only one field is non-zero at any time. Most timestamps
-are passed in ts[0]. Hardware timestamps are passed in ts[2].
+feature. Most timestamps are passed in ts[0]. Hardware timestamps
+are passed in ts[2]. Incoming packets may have timestamps in both
+ts[0] and ts[2], but for outgoing packets only one field is non-zero
+at any time.
 
 ts[1] used to hold hardware timestamps converted to system time.
 Instead, expose the hardware clock device on the NIC directly as
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 81ef53f..42bff22 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3300,8 +3300,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
 
 static inline void sw_tx_timestamp(struct sk_buff *skb)
 {
-   if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP &&
-   !(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS))
+   if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP)
skb_tstamp_tx(skb, NULL);
 }
 
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index 8fcae35..d251972 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -27,8 +27,9 @@ enum {
SOF_TIMESTAMPING_OPT_TSONLY = (1<<11),
SOF_TIMESTAMPING_OPT_STATS = (1<<12),
SOF_TIMESTAMPING_OPT_PKTINFO = (1<<13),
+   SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14),
 
-   SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_PKTINFO,
+   SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_TX_SWHW,
SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
 SOF_TIMESTAMPING_LAST
 };
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 58604c1..db5aa19 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3874,6 +3874,10 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
if (!sk)
return;
 
+   if (!hwtstamps && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
+   skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)
+   return;
+
tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
if (!skb_may_tx_timestamp(sk, tsonly))
return;
diff --git a/net/socket.c b/net/socket.c
index 68b9304..14b7688 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -720,6 +720,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock 
*sk,
empty = 0;
if (shhwtstamps &&
(sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
+   (empty || !skb_is_err_queue(skb)) &&
ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
empty = 0;
if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
-- 
2.9.3



[PATCH v1 net-next 0/6] Extend socket timestamping API

2017-04-26 Thread Miroslav Lichvar
Changes RFC->v1:
- reworked SOF_TIMESTAMPING_OPT_PKTINFO patch to not add new fields to
  skb shared info (net device is now looked up by napi_id), not require
  any changes in drivers, and restrict the cmsg to incoming packets
- renamed SOF_TIMESTAMPING_OPT_MULTIMSG to SOF_TIMESTAMPING_OPT_TX_SWHW
  and fixed its description
- moved struct scm_ts_pktinfo from errqueue.h to net_tstamp.h as it
  can't be received from the error queue anymore
- improved commit descriptions and removed incorrect comment

This patchset adds new options to the timestamping API that will be
useful for NTP implementations and possibly other applications.

The first patch specifies a timestamp filter for NTP packets. The second
patch updates drivers that can timestamp all packets, or need to list
the filter as unsupported. There is no attempt to add the support to the
phyter driver.

The third patch adds a new option to get a new control message with the
L2 length and interface index for incoming packets with hardware
timestamps.

The fourth patch fixes the code to not make a false software TX
timestamp when hardware timestamping is enabled. The fifth patch depends
on this fix.

The fifth patch adds a new option to request both software and hardware
timestamps for outgoing packets. The sixth patch updates drivers that
assumed software timestamping cannot be used together with hardware
timestamping.

The patches have been tested on x86_64 machines with igb and e1000e
drivers.

Miroslav Lichvar (6):
  net: define receive timestamp filter for NTP
  net: ethernet: update drivers to handle HWTSTAMP_FILTER_NTP_ALL
  net: add new control message for incoming HW-timestamped packets
  net: don't make false software transmit timestamps
  net: allow simultaneous SW and HW transmit timestamping
  net: ethernet: update drivers to make both SW and HW TX timestamps

 Documentation/networking/timestamping.txt  | 22 ++--
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c   |  4 +--
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c   |  1 +
 drivers/net/ethernet/cavium/liquidio/lio_main.c|  1 +
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c |  1 +
 drivers/net/ethernet/cavium/octeon/octeon_mgmt.c   |  1 +
 drivers/net/ethernet/intel/e1000e/netdev.c |  5 ++--
 drivers/net/ethernet/intel/i40e/i40e_ptp.c |  1 +
 drivers/net/ethernet/intel/igb/igb_ptp.c   |  1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c   |  1 +
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_clock.c |  1 +
 drivers/net/ethernet/neterion/vxge/vxge-main.c |  1 +
 drivers/net/ethernet/qlogic/qede/qede_ptp.c|  1 +
 drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c|  3 +--
 drivers/net/ethernet/sfc/ef10.c|  1 +
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  |  7 +++--
 drivers/net/ethernet/ti/cpsw.c |  1 +
 drivers/net/ethernet/tile/tilegx.c |  1 +
 include/linux/netdevice.h  |  1 +
 include/linux/skbuff.h |  3 +--
 include/uapi/asm-generic/socket.h  |  2 ++
 include/uapi/linux/net_tstamp.h| 13 +-
 net/core/dev.c | 18 +
 net/core/dev_ioctl.c   |  1 +
 net/core/skbuff.c  |  4 +++
 net/socket.c   | 30 --
 27 files changed, 110 insertions(+), 17 deletions(-)

-- 
2.9.3


[PATCH v1 net-next 1/6] net: define receive timestamp filter for NTP

2017-04-26 Thread Miroslav Lichvar
Add HWTSTAMP_FILTER_NTP_ALL to the hwtstamp_rx_filters enum for
timestamping of NTP packets. There is currently only one driver
(phyter) that could support it directly.

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 include/uapi/linux/net_tstamp.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index 464dcca..0749fb1 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -125,6 +125,9 @@ enum hwtstamp_rx_filters {
HWTSTAMP_FILTER_PTP_V2_SYNC,
/* PTP v2/802.AS1, any layer, Delay_req packet */
HWTSTAMP_FILTER_PTP_V2_DELAY_REQ,
+
+   /* NTP, UDP, all versions and packet modes */
+   HWTSTAMP_FILTER_NTP_ALL,
 };
 
 #endif /* _NET_TIMESTAMPING_H */
-- 
2.9.3



[PATCH v1 net-next 3/6] net: add new control message for incoming HW-timestamped packets

2017-04-26 Thread Miroslav Lichvar
Add SOF_TIMESTAMPING_OPT_PKTINFO option to request a new control message
for incoming packets with hardware timestamps. It contains the index of
the real interface which received the packet and the length of the
packet at layer 2.

The index is useful with bonding, bridges and other interfaces, where
IP_PKTINFO doesn't allow applications to determine which PHC made the
timestamp. With the L2 length (and link speed) it is possible to
transpose preamble timestamps to trailer timestamps, which are used in
the NTP protocol.

While this information could be provided by two new socket options
independently from timestamping, it doesn't look like it would be very
useful. With this option any performance impact is limited to hardware
timestamping.

As skb currently has no field for the original interface index, use
napi_id to look up the device and its index. This limits the option to
kernels with enabled CONFIG_NET_RX_BUSY_POLL and drivers using napi,
which should cover all current MAC drivers that support hardware
timestamping.

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 Documentation/networking/timestamping.txt |  8 
 include/linux/netdevice.h |  1 +
 include/uapi/asm-generic/socket.h |  2 ++
 include/uapi/linux/net_tstamp.h   |  9 -
 net/core/dev.c| 18 ++
 net/socket.c  | 26 +-
 6 files changed, 62 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/timestamping.txt 
b/Documentation/networking/timestamping.txt
index 96f5069..6c07e7c 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -193,6 +193,14 @@ SOF_TIMESTAMPING_OPT_STATS:
   the transmit timestamps, such as how long a certain block of
   data was limited by peer's receiver window.
 
+SOF_TIMESTAMPING_OPT_PKTINFO:
+
+  Enable the SCM_TIMESTAMPING_PKTINFO control message for incoming
+  packets with hardware timestamps. The message contains struct
+  scm_ts_pktinfo, which supplies the index of the real interface
+  which received the packet and its length at layer 2. This option
+  works only if CONFIG_NET_RX_BUSY_POLL is enabled.
+
 New applications are encouraged to pass SOF_TIMESTAMPING_OPT_ID to
 disambiguate timestamps and SOF_TIMESTAMPING_OPT_TSONLY to operate
 regardless of the setting of sysctl net.core.tstamp_allow_data.
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8c5c8cd..c7ce189 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2451,6 +2451,7 @@ static inline int dev_recursion_level(void)
 struct net_device *dev_get_by_index(struct net *net, int ifindex);
 struct net_device *__dev_get_by_index(struct net *net, int ifindex);
 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
+struct net_device *dev_get_by_napi_id(unsigned int napi_id);
 int netdev_get_name(struct net *net, char *name, int ifindex);
 int dev_restart(struct net_device *dev);
 int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb);
diff --git a/include/uapi/asm-generic/socket.h 
b/include/uapi/asm-generic/socket.h
index 2b48856..a5f6e81 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -100,4 +100,6 @@
 
 #define SO_COOKIE  57
 
+#define SCM_TIMESTAMPING_PKTINFO   58
+
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index 0749fb1..8fcae35 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -26,8 +26,9 @@ enum {
SOF_TIMESTAMPING_OPT_CMSG = (1<<10),
SOF_TIMESTAMPING_OPT_TSONLY = (1<<11),
SOF_TIMESTAMPING_OPT_STATS = (1<<12),
+   SOF_TIMESTAMPING_OPT_PKTINFO = (1<<13),
 
-   SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_STATS,
+   SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_PKTINFO,
SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
 SOF_TIMESTAMPING_LAST
 };
@@ -130,4 +131,10 @@ enum hwtstamp_rx_filters {
HWTSTAMP_FILTER_NTP_ALL,
 };
 
+/* SCM_TIMESTAMPING_PKTINFO control message */
+struct scm_ts_pktinfo {
+   int if_index;
+   int pkt_length;
+};
+
 #endif /* _NET_TIMESTAMPING_H */
diff --git a/net/core/dev.c b/net/core/dev.c
index 1b3317c..0a78f7f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -160,6 +160,7 @@ static int netif_rx_internal(struct sk_buff *skb);
 static int call_netdevice_notifiers_info(unsigned long val,
 struct net_device *dev,
 struct netdev_notifier_info *info);
+static struct napi_struct *napi_by_id(unsigned int napi_id);
 
 /*
  * The @dev_base_head list is protected by @dev_base_loc

[PATCH v1 net-next 2/6] net: ethernet: update drivers to handle HWTSTAMP_FILTER_NTP_ALL

2017-04-26 Thread Miroslav Lichvar
Include HWTSTAMP_FILTER_NTP_ALL in net_hwtstamp_validate() as a valid
filter and update drivers which can timestamp all packets, or which
explicitly list unsupported filters instead of using a default case, to
handle the filter.

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c   | 1 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c   | 1 +
 drivers/net/ethernet/cavium/liquidio/lio_main.c| 1 +
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 1 +
 drivers/net/ethernet/cavium/octeon/octeon_mgmt.c   | 1 +
 drivers/net/ethernet/intel/e1000e/netdev.c | 1 +
 drivers/net/ethernet/intel/i40e/i40e_ptp.c | 1 +
 drivers/net/ethernet/intel/igb/igb_ptp.c   | 1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c   | 1 +
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 1 +
 drivers/net/ethernet/neterion/vxge/vxge-main.c | 1 +
 drivers/net/ethernet/qlogic/qede/qede_ptp.c| 1 +
 drivers/net/ethernet/sfc/ef10.c| 1 +
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  | 1 +
 drivers/net/ethernet/ti/cpsw.c | 1 +
 drivers/net/ethernet/tile/tilegx.c | 1 +
 net/core/dev_ioctl.c   | 1 +
 18 files changed, 18 insertions(+)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c 
b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index c772420..89b21d7 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1268,6 +1268,7 @@ static int xgbe_set_hwtstamp_settings(struct 
xgbe_prv_data *pdata,
case HWTSTAMP_FILTER_NONE:
break;
 
+   case HWTSTAMP_FILTER_NTP_ALL:
case HWTSTAMP_FILTER_ALL:
XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSENALL, 1);
XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSENA, 1);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c 
b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index a851f95..2f30b1a 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -15351,6 +15351,7 @@ int bnx2x_configure_ptp_filters(struct bnx2x *bp)
break;
case HWTSTAMP_FILTER_ALL:
case HWTSTAMP_FILTER_SOME:
+   case HWTSTAMP_FILTER_NTP_ALL:
bp->rx_filter = HWTSTAMP_FILTER_NONE;
break;
case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c 
b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 927617c..7a0ef5b 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -3020,6 +3020,7 @@ static int hwtstamp_ioctl(struct net_device *netdev, 
struct ifreq *ifr)
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+   case HWTSTAMP_FILTER_NTP_ALL:
conf.rx_filter = HWTSTAMP_FILTER_ALL;
break;
default:
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c 
b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 34c7782..15e21b5 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -2100,6 +2100,7 @@ static int hwtstamp_ioctl(struct net_device *netdev, 
struct ifreq *ifr)
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+   case HWTSTAMP_FILTER_NTP_ALL:
conf.rx_filter = HWTSTAMP_FILTER_ALL;
break;
default:
diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c 
b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
index a213868..2887bca 100644
--- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
+++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
@@ -755,6 +755,7 @@ static int octeon_mgmt_ioctl_hwtstamp(struct net_device 
*netdev,
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+   case HWTSTAMP_FILTER_NTP_ALL:
p->has_rx_tstamp = have_hw_timestamps;
config.rx_filter = HWTSTAMP_FILTER_ALL;
if (p->has_rx_tstamp) {
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index 667fc45..940a79e 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -3668,6 +3668,7 @@ static int e1000e_config_hwtstamp(struct e1000_adapter 
*adapter,
 * Delay Request messages but not both so fall-through to
 * time stamp all packets.
 */
+   c

[PATCH v1 net-next 4/6] net: don't make false software transmit timestamps

2017-04-26 Thread Miroslav Lichvar
If software timestamping is enabled by the SO_TIMESTAMP(NS) option
when a message without timestamp is already waiting in the queue, the
__sock_recv_timestamp() function will read the current time to make a
timestamp in order to always have something for the application.

However, this applies also to outgoing packets looped back to the error
queue when hardware timestamping is enabled by the SO_TIMESTAMPING
option. A software transmit timestamp made after the actual transmission
is added to messages with hardware timestamps.

Modify the function to save the current time as a software timestamp
only if it's for a received packet (i.e. it's not in the error queue).

CC: Richard Cochran <richardcoch...@gmail.com>
CC: Willem de Bruijn <will...@google.com>
Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
---
 net/socket.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/socket.c b/net/socket.c
index 5ea5f29..68b9304 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -688,7 +688,8 @@ static void put_ts_pktinfo(struct msghdr *msg, struct 
sk_buff *skb)
 void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb)
 {
-   int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
+   int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP) &&
+  !skb_is_err_queue(skb);
struct scm_timestamping tss;
int empty = 1;
struct skb_shared_hwtstamps *shhwtstamps =
-- 
2.9.3



  1   2   >