Hi,

some updates

> Here it would help to see the values of oldts and newts when the >
assertion fails.
The counter values when the assert happens are: 

newts.tv_sec = 56c923a3
  oldts.tv_sec= 56c810c2
  delta=000112e1

The delta at error time is either 
000112e1 or 000112e0


The check code looks now:

#include <time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>

int main(int argc, char* argv[])
{
    static const int CLOCKFD = 3;
    int fd = open("/dev/ptp0", O_RDONLY);
    if(fd == -1)
    {
        printf("open failed\n");
        exit(1);
    }
    clockid_t clkid = ((~(clockid_t)(fd) << 3) | CLOCKFD);

    struct timespec oldts;
    struct timespec newts;
    int r = clock_gettime(clkid, &oldts);
    if(r == -1)
    {
        printf("clock_gettime failed\n");
        exit(1);
    }


    for (;;)
    {
        int r = clock_gettime(clkid, &newts);
        if(r == -1)
        {
            printf("clock_gettime failed\n");
            exit(1);
        }
        time_t delta = newts.tv_sec - oldts.tv_sec;
        if(delta < 0)
        {
            printf("newts.tv_sec = %08x\n oldts.tv_sec= %08x\n delta=%08x\n", 
newts.tv_sec , oldts.tv_sec, delta);
            exit(1);
        }
        if(delta > 1)
        {
            printf("newts.tv_sec = %08x\n oldts.tv_sec= %08x\n delta=%08x\n", 
newts.tv_sec , oldts.tv_sec, delta);
            exit(1);
        }
        oldts = newts;
    }
}

wrt:

> drivers/net/ethernet/intel/e1000e/netdev.c:e1000e_cyclecounter_read():


The workaround implemented there does not seem to work correctly.
Using a card where the workaround jumps in, the problem appears too.


There are various versions of the workaround in linux and we backported the 
linux 4.4 workaround
to 3.19, but still the same error happens.

Finally a collegue used the patch below and the problem vanished(above test 
code did run a long time),
 however it is unclear if the timestamps produced are still 'correct' and 
useful for ptp. 
The fix still looks surprising..

kind regards
  Frank

patch against 3.19 kernel.

--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -4150,11 +4150,29 @@ static cycle_t e1000e_cyclecounter_read(
        struct e1000_adapter *adapter = container_of(cc, struct e1000_adapter,
                                                     cc);
        struct e1000_hw *hw = &adapter->hw;
+       u32 systimel_1, systimel_2, systimeh;
        cycle_t systim, systim_next;
-
-       /* latch SYSTIMH on read of SYSTIML */
-       systim = (cycle_t)er32(SYSTIML);
-       systim |= (cycle_t)er32(SYSTIMH) << 32;
+       /* SYSTIMH latching upon SYSTIML read does not work well.
+        * This means that if SYSTIML overflows after we read it but before
+        * we read SYSTIMH, the value of SYSTIMH has been incremented and we
+        * will experience a huge non linear increment in the systime value
+        * to fix that we test for overflow and if true, we re-read systime.
+        */
+       systimel_1 = er32(SYSTIML);
+       systimeh = er32(SYSTIMH);
+       systimel_2 = er32(SYSTIML);
+       /* Check for overflow. If there was no overflow, use the values */
+       if (systimel_1 < systimel_2) {
+               systim = (cycle_t)systimel_1;
+               systim |= (cycle_t)systimeh << 32;
+       } else {
+               /* There was an overflow, read again SYSTIMH, and use
+                * systimel_2
+                */
+               systimeh = er32(SYSTIMH);
+               systim = (cycle_t)systimel_2;
+               systim |= (cycle_t)systimeh << 32;
+       }
 
        if ((hw->mac.type == e1000_82574) || (hw->mac.type == e1000_82583)) {
                u64 incvalue, time_delta, rem, temp;
@@ -4167,8 +4185,21 @@ static cycle_t e1000e_cyclecounter_read(
                incvalue = er32(TIMINCA) & E1000_TIMINCA_INCVALUE_MASK;
                for (i = 0; i < E1000_MAX_82574_SYSTIM_REREADS; i++) {
                        /* latch SYSTIMH on read of SYSTIML */
-                       systim_next = (cycle_t)er32(SYSTIML);
-                       systim_next |= (cycle_t)er32(SYSTIMH) << 32;
+                       systimel_1 = er32(SYSTIML);
+                       systimeh = er32(SYSTIMH);
+                       systimel_2 = er32(SYSTIML);
+                       /* Check for overflow. If there was no overflow, use 
the values */
+                       if (systimel_1 < systimel_2) {
+                               systim_next = (cycle_t)systimel_1;
+                               systim_next |= (cycle_t)systimeh << 32;
+                       } else {
+                       /* There was an overflow, read again SYSTIMH, and use
+                        * systimel_2
+                        */
+                               systimeh = er32(SYSTIMH);
+                               systim_next = (cycle_t)systimel_2;
+                               systim_next |= (cycle_t)systimeh << 32;
+                       }
 
                        time_delta = systim_next - systim;
                        temp = time_delta;



------------------------------------------------------------------------------
Site24x7 APM Insight: Get Deep Visibility into Application Performance
APM + Mobile APM + RUM: Monitor 3 App instances at just $35/Month
Monitor end-to-end web transactions and take corrective actions now
Troubleshoot faster and improve end-user experience. Signup Now!
http://pubads.g.doubleclick.net/gampad/clk?id=272487151&iu=/4140
_______________________________________________
Linuxptp-users mailing list
Linuxptp-users@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linuxptp-users

Reply via email to