--- Begin Message ---
Package: pv
Version: 1.6.0-1+b1
Severity: wishlist
Tags: upstream patch
Hello,
if the bandwidth of the data pv processes contains bursts - not just
small fluctuations - the ETA (actually: time left) prediction suffers
pretty badly since AFAICT the simple but fragile linear computation is
used, based on total data transferred so far, and total time spent for
this.
This hits me in two scenarios:
* A tar pipe "tar -cf - . | pv --size=... | ..." where tar reads a
few huge files at raw speed (say: 100 Mbyte/sec) while many spread
small files slow down the process significantly (say: to 100 kbyte/sec).
* The receiving side right of pv does buffering, so the first bunch of
data is processed pretty fast, followed by the much slower rate for
sustained operation.
In both cases, pv's ETA prediction will happily show a short time
that's left, but then start counting *up*wards, reach a peak, and
eventually count downwards as expected, although not quite at one
second per second. As a result, the predicted total time is *way*
below the time this will actually take.
This is a generic problem with predicting the future, however the
linear approach is the problematic part here because burst have a huge
impact on the computation.
How to repeat:
(
dd if=/dev/zero bs=1M count=1 status=none | pv --quiet --rate-limit=1M
dd if=/dev/zero bs=1M count=9 status=none | pv --quiet --rate-limit=10k
) | pv --size=10M >/dev/null
The commands in the subshell produce 10 Mibyte data, a burst for the
first one, then pretty slow for the rest.
Expected:
After jumping to the "10%" instantly and declaring a remaining time of
somewhat nine seconds, pv should pretty soon (within say 30 seconds)
adopt the prediction to the actual input rate. As a result, the time
spent (second column) plus the time left (last column) should soon sum
up to 922 seconds, or "15:22".
Actually seen:
As described above. A few examples, progress bar stripped, note the
total time estimation keeps growing until the end:
| 1,09MiB 0:00:10 [10,1KiB/s] 10% ETA 0:01:21
(total time estimation: 91sec = 01:31)
| 1,28MiB 0:00:30 [10,1KiB/s] 12% ETA 0:03:23
(total time estimation: 233sec = 03:53)
| 1,58MiB 0:01:00 [10,1KiB/s] 15% ETA 0:05:20
(total time estimation: 380sec = 06:20)
| 2,16MiB 0:02:00 [10,1KiB/s] 21% ETA 0:07:14
(total time estimation: 554sec = 09:14)
| 2,75MiB 0:03:00 [10,1KiB/s] 27% ETA 0:07:54
(total time estimation: 654sec = 10:54)
| 3,33MiB 0:04:00 [10,1KiB/s] 33% ETA 0:07:59
(total time estimation: 719sec = 11:59)
| 3,92MiB 0:05:00 [10,1KiB/s] 39% ETA 0:07:45
(total time estimation: 765sec = 12:45)
| 6,85MiB 0:10:00 [10,1KiB/s] 68% ETA 0:04:35
(total time estimation: 875sec = 14:35)
| 8,02MiB 0:12:00 [10,1KiB/s] 80% ETA 0:02:57
(total time estimation: 897sec = 14:57)
| 8,61MiB 0:13:00 [10,1KiB/s] 86% ETA 0:02:06
(total time estimation: 906sec = 15:06)
| 9,19MiB 0:14:00 [10,1KiB/s] 91% ETA 0:01:13
(total time estimation: 913sec = 15:13)
| 9,78MiB 0:15:00 [10,1KiB/s] 97% ETA 0:00:20
(total time estimation: 920sec = 15:20)
Suggestion:
Use exponential smoothing, either as an option (preferably the
default), or even as a replacement. Some tests here suggest a
smoothing factor of 0.5 would serve the purpose quite well. Making
this another option was nice to have, though.
( some time passes )
Okay, I was in the right mood to demonstrate this. First was a proof of
concept in Perl, followed by a quick hack for pv. Ended up with a patch
that includes parameter parsing and documentation. Attached.
Cheers,
Christoph
--- a/src/pv/display.c
+++ b/src/pv/display.c
@@ -74,18 +74,24 @@
* number of seconds until completion.
*/
static long pv__calc_eta(const long long so_far, const long long total,
- const long elapsed)
+ const long elapsed, const long double rate,
+ const double alpha)
{
long long amount_left;
+ static double smoothed_rate = 0.0;
if (so_far < 1)
return 0;
- amount_left = total - so_far;
- amount_left *= (long long) elapsed;
- amount_left /= so_far;
+ if (alpha == 0.0) {
+ amount_left = total - so_far;
+ amount_left *= (long long) elapsed;
+ amount_left /= so_far;
- return (long) amount_left;
+ return (long) amount_left;
+ }
+ smoothed_rate = alpha*rate + (1.0-alpha)*smoothed_rate;
+ return (long) ((total - so_far) / smoothed_rate);
}
/*
@@ -653,7 +659,7 @@
eta =
pv__calc_eta(total_bytes - state->initial_offset,
state->size - state->initial_offset,
- elapsed_sec);
+ elapsed_sec, rate, state->alpha);
/*
* Bounds check, so we don't overrun the suffix buffer. This
@@ -705,7 +711,7 @@
eta =
pv__calc_eta(total_bytes - state->initial_offset,
state->size - state->initial_offset,
- elapsed_sec);
+ elapsed_sec, rate, state->alpha);
/*
* Bounds check, so we don't overrun the suffix buffer. This
--- a/src/include/options.h
+++ b/src/include/options.h
@@ -42,6 +42,7 @@
double delay_start; /* delay before first display */
unsigned int watch_pid; /* process to watch fds of */
int watch_fd; /* fd to watch */
+ double alpha; /* smoothing factor */
unsigned int width; /* screen width */
unsigned int height; /* screen height */
char *name; /* process name, if any */
--- a/src/include/pv.h
+++ b/src/include/pv.h
@@ -93,6 +93,7 @@
extern void pv_state_format_string_set(pvstate_t, const char *);
extern void pv_state_watch_pid_set(pvstate_t, unsigned int);
extern void pv_state_watch_fd_set(pvstate_t, int);
+extern void pv_state_alpha_set(pvstate_t, double);
extern void pv_state_inputfiles(pvstate_t, int, const char **);
--- a/src/main/options.c
+++ b/src/main/options.c
@@ -80,12 +80,13 @@
{"remote", 1, 0, 'R'},
{"pidfile", 1, 0, 'P'},
{"watchfd", 1, 0, 'd'},
+ {"alpha", 1, 0, 'O'},
{0, 0, 0, 0}
};
int option_index = 0;
#endif
char *short_options =
- "hVpteIrabTA:fnqcWD:s:l0i:w:H:N:F:L:B:CESR:P:d:";
+ "hVpteIrabTA:fnqcWD:s:l0i:w:H:N:F:L:B:CESR:P:d:O:";
int c, numopts;
unsigned int check_pid;
int check_fd;
@@ -124,6 +125,7 @@
opts->delay_start = 0;
opts->watch_pid = 0;
opts->watch_fd = -1;
+ opts->alpha = 0.5;
do {
#ifdef HAVE_GETOPT_LONG
@@ -158,6 +160,7 @@
break;
case 'i':
case 'D':
+ case 'O':
if (pv_getnum_check(optarg, PV_NUMTYPE_DOUBLE) !=
0) {
fprintf(stderr, "%s: -%c: %s\n",
@@ -311,6 +314,16 @@
sscanf(optarg, "%u:%d", &(opts->watch_pid),
&(opts->watch_fd));
break;
+ case 'O':
+ opts->alpha = pv_getnum_d(optarg);
+ if (opts->alpha < 0.0 || opts->alpha > 1.0) {
+ fprintf(stderr, "%s: -%c: %s\n",
+ opts->program_name, c,
+ _("value not in the range 0.0 to 1.0"));
+ opts_free(opts);
+ return 0;
+ }
+ break;
default:
#ifdef HAVE_GETOPT_LONG
fprintf(stderr,
--- a/src/pv/state.c
+++ b/src/pv/state.c
@@ -205,6 +205,11 @@
state->watch_fd = val;
};
+void pv_state_alpha_set(pvstate_t state, double val)
+{
+ state->alpha = val;
+};
+
/*
* Set the array of input files.
--- a/src/include/pv-internal.h
+++ b/src/include/pv-internal.h
@@ -71,6 +71,7 @@
double delay_start; /* delay before first display */
unsigned int watch_pid; /* process to watch fds of */
int watch_fd; /* fd to watch */
+ double alpha; /* smoothing factor */
unsigned int width; /* screen width */
unsigned int height; /* screen height */
const char *name; /* display name */
--- a/src/main/main.c
+++ b/src/main/main.c
@@ -202,6 +202,7 @@
pv_state_format_string_set(state, opts->format);
pv_state_watch_pid_set(state, opts->watch_pid);
pv_state_watch_fd_set(state, opts->watch_fd);
+ pv_state_alpha_set(state, opts->alpha);
pv_state_set_format(state, opts->progress, opts->timer, opts->eta,
opts->fineta, opts->rate, opts->average_rate,
--- a/doc/quickref.1.in
+++ b/doc/quickref.1.in
@@ -419,6 +419,22 @@
.B @PACKAGE@
- followed by a newline.
.TP
+.B \-O FACTOR, \-\-alpha FACTOR
+Controls the exponential smoothing used by
+.B @PACKAGE@
+for ETA prediction. Must be a number between 0.0 and 1.0. A
+.BR FACTOR
+close to 1.0 causes changes in the incoming bandwith to affect the
+calculation pretty soon, resulting in a fast-changing ETA. On the
+contrary, a
+.BR FACTOR
+close to 0.0 will result in rather slow adjustment. A
+.BR FACTOR
+of 0.0 disables exponential smoothing and uses a prediction based on
+the entire transfer time. This was the default of
+.B @PACKAGE@
+until version 1.6.
+.TP
.B \-h, \-\-help
Print a usage message on standard output and exit successfully.
.TP
signature.asc
Description: Digital signature
--- End Message ---