This is an automated email from the ASF dual-hosted git repository.
adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new d941dd5 [clock] ntp_gettime(): STA_NANO bites again
d941dd5 is described below
commit d941dd5c4f1caba6c105ce1137bd18cc789d5bc2
Author: Alexey Serbin <[email protected]>
AuthorDate: Wed Feb 5 18:20:37 2020 -0800
[clock] ntp_gettime(): STA_NANO bites again
Even with the most recent glibc 2.31, ntptimeval::time::tv_usec can
either be in nanoseconds or microseconds and it's not possible to tell
that apart if using only the information returned by ntp_gettime() call:
sysdeps/unix/sysv/linux/ntp_gettime.c:
------------------------------
int
ntp_gettime (struct ntptimeval *ntv)
{
struct timex tntx;
int result;
tntx.modes = 0;
result = __adjtimex (&tntx);
ntv->time = tntx.time;
ntv->maxerror = tntx.maxerror;
ntv->esterror = tntx.esterror;
return result;
}
------------------------------
In the code above, __adjtimex() populates the the timex structure taking
into account the STA_NANO flag, which is subject to current setting by
ADJ_MICRO. However, ntp_gettime() does not convert the value of the
ntptimeval::time::tv_usec field using the presence of STA_NANO flag in
tntx.status.
So, on Linux it's necessary to use ntp_adjtime() to get NTP time and
maxerror because ntp_gettime() is essentially unusable. See [1] as
well.
In contrast, the ntp_gettime() call on BSD-based systems (e.g. macOS)
does not bring such surprises.
This is a follow-up to e72208436a625391739217394c67d783e992367a.
[1]
https://stackoverflow.com/questions/16063408/does-ntp-gettime-actually-return-nanosecond-precision
Change-Id: Id171b6fea2274d32a35c6173bab9996b36c0c4f6
Reviewed-on: http://gerrit.cloudera.org:8080/15171
Tested-by: Alexey Serbin <[email protected]>
Reviewed-by: Adar Dembo <[email protected]>
---
src/kudu/clock/system_ntp.cc | 54 ++++++++++++++++++++++++++++----------------
1 file changed, 35 insertions(+), 19 deletions(-)
diff --git a/src/kudu/clock/system_ntp.cc b/src/kudu/clock/system_ntp.cc
index b4977a1..e000c85 100644
--- a/src/kudu/clock/system_ntp.cc
+++ b/src/kudu/clock/system_ntp.cc
@@ -71,6 +71,21 @@ Status NtpStateToStatus(int rc) {
}
}
+// Check if the specified code corresponds to an error returned by
ntp_adjtime()
+// and convert it into Status.
+Status CheckForNtpAdjtimeError(int rc) {
+ // From http://man7.org/linux/man-pages/man2/adjtimex.2.html,
+ // the failure implies invalid pointer for its output parameter of the
'timex'
+ // type. It cannot be the case in the code below. However, there were reports
+ // that with old version of Docker it might return EPERM even if 'tx.mode'
+ // is set to 0; see KUDU-2000 for details.
+ if (PREDICT_FALSE(rc == -1)) {
+ int err = errno;
+ return Status::RuntimeError("ntp_adjtime() failed", ErrnoToString(err));
+ }
+ return Status::OK();
+}
+
void TryRun(vector<string> cmd, vector<string>* log) {
string exe;
Status s = FindExecutable(cmd[0], {"/sbin", "/usr/sbin/"}, &exe);
@@ -104,23 +119,15 @@ SystemNtp::SystemNtp()
}
Status SystemNtp::Init() {
- timex tx;
- tx.modes = 0; // set mode to 0 for read-only query
- int rc = ntp_adjtime(&tx);
- // From http://man7.org/linux/man-pages/man2/adjtimex.2.html,
- // the failure implies invalid pointer for 'tx', which cannot be the case.
- // However, there were reports that with old version of Docker it might
return
- // EPERM even if 'tx.mode' is set to 0; see KUDU-2000 for details.
- if (PREDICT_FALSE(rc == -1)) {
- int err = errno;
- return Status::RuntimeError("ntp_adjtime() failed", ErrnoToString(err));
- }
+ timex t;
+ t.modes = 0; // set mode to 0 for read-only query
+ RETURN_NOT_OK(CheckForNtpAdjtimeError(ntp_adjtime(&t)));
// The unit of the reported tolerance is ppm with 16-bit fractional part:
// 65536 is 1 ppm (see
http://man7.org/linux/man-pages/man3/ntp_adjtime.3.html
// for details).
- skew_ppm_ = tx.tolerance / 65536;
- VLOG(1) << "ntp_adjtime(): tolerance is " << tx.tolerance;
+ skew_ppm_ = t.tolerance / 65536;
+ VLOG(1) << "ntp_adjtime(): tolerance is " << t.tolerance;
return Status::OK();
}
@@ -131,16 +138,25 @@ Status SystemNtp::WalltimeWithError(uint64_t* now_usec,
uint64_t* error_usec) {
}
// Read the clock and convert its state into status. This will return an
error
// if the clock is not synchronized.
- ntptimeval tv;
- RETURN_NOT_OK(NtpStateToStatus(ntp_gettime(&tv)));
- uint64_t now = static_cast<uint64_t>(tv.time.tv_sec) * 1000000;
#ifdef __APPLE__
- now += tv.time.tv_nsec / 1000;
+ ntptimeval t;
+ RETURN_NOT_OK(NtpStateToStatus(ntp_gettime(&t)));
+ uint64_t now = static_cast<uint64_t>(t.time.tv_sec) * 1000000 +
+ t.time.tv_nsec / 1000;
#else
- now += tv.time.tv_usec;
+ timex t;
+ t.modes = 0; // set mode to 0 for read-only query
+ const int rc = ntp_adjtime(&t);
+ RETURN_NOT_OK(CheckForNtpAdjtimeError(rc));
+ RETURN_NOT_OK(NtpStateToStatus(rc));
+ if (t.status & STA_NANO) {
+ t.time.tv_usec /= 1000;
+ }
+ uint64_t now = static_cast<uint64_t>(t.time.tv_sec) * 1000000 +
+ t.time.tv_usec;
#endif
+ *error_usec = t.maxerror;
*now_usec = now;
- *error_usec = tv.maxerror;
return Status::OK();
}