Oh dear,
This looks like a rate controller problem in mpeg2enc.
There were some changes made in this area to fix a problem with the old method
predicting how large a frame was likely to be (which tended to be rather
inaccurate and produce 'oscillating' quantisation). It looks like your
stream probably worked well with the old method (for some reason) but not
with the newer one.
Can you try it again after recompiling with the following attached
mpeg2enc/ratectl.cc (matches CVS mpeg2enc)? This should eliminate the problem
(it starts throttling extra-hard when the decoder's buffer is becoming
significantly empty...). I can't easily replicate problems like yours
because the video 'spikes' that cause them depend entirely on whats being
encoded.
Aside, setting
--video-bitrate 1800
without an increasing the specified minimum decoder buffer size is really
'torturing' mpeg2enc. If you can play a non-standard VCD like this you can
almost certainly increase the decoder buffer size assumed to be required.
--video-buffer 120 or even
--video-buffer 230
and still playback fine. You'll get better looking results too!
Of course you need to give mplex the same
--video-buffer
specification.
cheers,
Andrew
/* ratectl.c, bitrate control routines (linear quantization only currently) */
/* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */
/*
* Disclaimer of Warranty
*
* These software programs are available to the user without any license fee or
* royalty on an "as is" basis. The MPEG Software Simulation Group disclaims
* any and all warranties, whether express, implied, or statuary, including any
* implied warranties or merchantability or of fitness for a particular
* purpose. In no event shall the copyright-holder be liable for any
* incidental, punitive, or consequential damages of any kind whatsoever
* arising from the use of these programs.
*
* This disclaimer of warranty extends to the user of these programs and user's
* customers, employees, agents, transferees, successors, and assigns.
*
* The MPEG Software Simulation Group does not represent or warrant that the
* programs furnished hereunder are free of infringement of any third-party
* patents.
*
* Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
* are subject to royalty fees to patent holders. Many of these patents are
* general enough such that they are unavoidable regardless of implementation
* design.
*
*/
/* Modifications and enhancements (C) 2000,2001,2002,2003 Andrew Stevens */
/* These modifications are free software; you can redistribute it
* and/or modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
* 02111-1307, USA.
*
*/
#include "config.h"
#include <math.h>
#include <limits.h>
#include "mjpeg_types.h"
#include "mjpeg_logging.h"
#include "mpeg2syntaxcodes.h"
#include "tables.h"
#include "simd.h"
#include "fastintfns.h"
#include "mpeg2encoder.hh"
#include "picture.hh"
#include "ratectl.hh"
#include "quantize.hh"
/* private prototypes */
static double scale_quantf( int q_scale_type, double quant )
{
double quantf;
if ( q_scale_type )
{
int iquantl, iquanth;
double wl, wh;
/* BUG TODO: This should interpolate the table... */
wh = quant-floor(quant);
wl = 1.0 - wh;
iquantl = (int) floor(quant);
iquanth = iquantl+1;
/* clip to legal (linear) range */
if (iquantl<1)
{
iquantl = 1;
iquanth = 1;
}
if (iquantl>111)
{
iquantl = 112;
iquanth = 112;
}
quantf = (double)
wl * (double)non_linear_mquant_table[map_non_linear_mquant[iquantl]]
+
wh * (double)non_linear_mquant_table[map_non_linear_mquant[iquanth]]
;
}
else
{
/* clip mquant to legal (linear) range */
quantf = quant;
if (quantf<2.0)
quantf = 2;
if (quantf>62.0)
quantf = 62.0;
}
return quantf;
}
int RateCtl::ScaleQuant( int q_scale_type , double quant )
{
int iquant;
if ( q_scale_type )
{
iquant = (int) floor(quant+0.5);
/* clip mquant to legal (linear) range */
if (iquant<1)
iquant = 1;
if (iquant>112)
iquant = 112;
iquant =
non_linear_mquant_table[map_non_linear_mquant[iquant]];
}
else
{
/* clip mquant to legal (linear) range */
iquant = (int)floor(quant+0.5);
if (iquant<2)
iquant = 2;
if (iquant>62)
iquant = 62;
iquant = (iquant/2)*2; // Must be *even*
}
return iquant;
}
double RateCtl::InvScaleQuant( int q_scale_type, int raw_code )
{
int i;
if( q_scale_type )
{
i = 112;
while( 1 < i && map_non_linear_mquant[i] != raw_code )
--i;
return ((double)i);
}
else
return ((double)raw_code);
}
RateCtl::RateCtl( EncoderParams &_encparams ) :
encparams( _encparams )
{
}
/*****************************
*
* On-the-fly rate controller. The constructor sets up the initial
* control and estimator parameter values to values that experience
* suggest make sense. All the important ones are dynamically
* tuned anyway so these values are not too critical.
*
****************************/
OnTheFlyRateCtl::OnTheFlyRateCtl(EncoderParams &encparams ) :
RateCtl(encparams)
{
buffer_variation = 0;
bits_transported = 0;
bits_used = 0;
prev_bitcount = 0;
bitcnt_EOP = 0;
frame_overshoot_margin = 0;
sum_avg_act = 0.0;
sum_avg_var = 0.0;
/* TODO: These values should are really MPEG-1/2 and material type
dependent. The encoder should probably run over the first 100
frames or so look-ahead to tune theses dynamically before doing
real encoding... alternative a config file should be written!
*/
sum_avg_quant = 0.0;
}
/*********************
*
* Initialise rate control parameters
* params: reinit - Rate control is being re-initialised during the middle
* of a run. Don't reset adaptive parameters.
********************/
void OnTheFlyRateCtl::InitSeq(bool reinit)
{
double init_quant;
/* If its stills with a size we have to hit then make the
guesstimates of for initial quantisation pessimistic...
*/
bits_transported = bits_used = 0;
field_rate = 2*encparams.decode_frame_rate;
fields_per_pict = encparams.fieldpic ? 1 : 2;
if( encparams.still_size > 0 )
{
per_pict_bits = encparams.still_size * 8;
R = encparams.still_size * 8;
}
else
{
per_pict_bits =
static_cast<int32_t>(encparams.fieldpic
? encparams.bit_rate / field_rate
: encparams.bit_rate / encparams.decode_frame_rate
);
R = static_cast<int32_t>(encparams.bit_rate);
}
/* Everything else already set or adaptive */
if( reinit )
return;
first_gop = true;
K_AVG_WINDOW_I = 2.0;
switch( encparams.M )
{
case 1 : // P
K_AVG_WINDOW_P = 8.0;
K_AVG_WINDOW_B = 1.0; // dummy
break;
case 2 : // BP
K_AVG_WINDOW_P = 4.0;
K_AVG_WINDOW_B = 4.0;
break;
default: // BBP
K_AVG_WINDOW_P = 3.0;
K_AVG_WINDOW_B = 7.0;
break;
}
/* Calculate reasonable margins for variation in the decoder
buffer. We assume that having less than 5 frame intervals
worth buffered is cutting it fine for avoiding under-runs.
The gain values represent the fraction of the under/over shoot
to be recovered during one second. Gain is decreased if the
buffer margin is large, gain is higher for avoiding overshoot.
Currently, for a 1-frame sized margin gain is set to recover
an undershoot in half a second
*/
if( encparams.still_size > 0 )
{
undershoot_carry = 0;
overshoot_gain = 1.0;
}
else
{
int buffer_safe = 3 * per_pict_bits ;
undershoot_carry = (encparams.video_buffer_size - buffer_safe)/6;
if( undershoot_carry < 0 )
mjpeg_error_exit1("Rate control can't cope with a video buffer smaller 4 frame intervals");
overshoot_gain = encparams.bit_rate / (encparams.video_buffer_size-buffer_safe);
}
bits_per_mb = (double)encparams.bit_rate / (encparams.mb_per_pict);
/*
Reaction paramer - i.e. quantisation feedback gain relative
to bit over/undershoot.
For normal frames it is fairly modest as we can compensate
over multiple frames and can average out variations in image
complexity.
For stills we set it a higher so corrections take place
more rapidly *within* a single frame.
*/
if( encparams.still_size > 0 )
r = (int)floor(2.0*encparams.bit_rate/encparams.decode_frame_rate);
else
r = (int)floor(4.0*encparams.bit_rate/encparams.decode_frame_rate);
/* Set the virtual buffers for per-frame rate control feedback to
values corresponding to the quantisation floor (if specified)
or a "reasonable" quantisation (6.0) if not.
*/
init_quant = (encparams.quant_floor > 0.0 ? encparams.quant_floor : 6.0);
d0p = d0b = d0i = static_cast<int>(init_quant * r / 62.0);
next_ip_delay = 0.0;
decoding_time = 0.0;
#ifdef OUTPUTr_STAT
fprintf(statfile,"\nrate control: sequence initialization\n");
fprintf(statfile,
" initial global complexity measures (I,P,B): Xi=%.0f, Xp=%.0f, Xb=%.0f\n",
Xi, Xp, Xb);
fprintf(statfile," reaction parameter: r=%d\n", r);
fprintf(statfile,
" initial virtual buffer fullness (I,P,B): d0i=%d, d0pb=%d",
d0i, d0pb);
#endif
}
void OnTheFlyRateCtl::InitGOP( int np, int nb)
{
Np = encparams.fieldpic ? 2*np+1 : 2*np;
Nb = encparams.fieldpic ? 2*nb : 2*nb;
Ni = encparams.fieldpic ? 1 : 2;
fields_in_gop = Ni + Nb + Np;
/*
At the start of a GOP before any frames have gone the
actual buffer state represents a long term average. Any
undershoot due to the I_frame of the previous GOP
should by now have been caught up.
*/
gop_buffer_correction = 0;
/* Each still is encoded independently so we reset rate control
for each one. They're all I-frames so each stills is a GOP too.
*/
if( first_gop || encparams.still_size > 0)
{
mjpeg_debug( "FIRST GOP INIT");
fast_tune = true;
first_I = first_B = first_P = true;
first_gop = false;
I_pict_base_bits = per_pict_bits;
B_pict_base_bits = per_pict_bits;
P_pict_base_bits = per_pict_bits;
}
else
{
mjpeg_debug( "REST GOP INIT" );
double recovery_fraction = field_rate/(overshoot_gain * fields_in_gop);
double recovery_gain =
recovery_fraction > 1.0 ? 1.0 : overshoot_gain * recovery_fraction;
int available_bits =
static_cast<int>( (encparams.bit_rate+buffer_variation*recovery_gain)
* fields_in_gop/field_rate);
double Xsum = Ni*Xi+Np*Xp+Nb*Xb;
I_pict_base_bits = (int32_t)(fields_per_pict*available_bits*Xi/Xsum);
P_pict_base_bits = (int32_t)(fields_per_pict*available_bits*Xp/Xsum);
B_pict_base_bits = (int32_t)(fields_per_pict*available_bits*Xb/Xsum);
fast_tune = 0;
}
#ifdef OUTPUT_STAT
fprintf(statfile,"\nrate control: new group of pictures (GOP)\n");
fprintf(statfile," target number of bits for GOP: R=%.0f\n",R);
fprintf(statfile," number of P pictures in GOP: Np=%d\n",Np);
fprintf(statfile," number of B pictures in GOP: Nb=%d\n",Nb);
#endif
}
/* Step 1: compute target bits for current picture being coded */
void OnTheFlyRateCtl::InitPict(Picture &picture, int64_t bitcount_SOP)
{
double current_Q;
int available_bits;
double Xsum,varsum;
/* TODO: A.Stevens Nov 2000 - This modification needs testing visually.
Weird. The original code used the average activity of the
*previous* frame as the basis for quantisation calculations for
rather than the activity in the *current* frame. That *has* to
be a bad idea..., surely, here we try to be smarter by using the
current values and keeping track of how much of the frames
activitity has been covered as we go along.
We also guesstimate the relationship between (sum
of DCT coefficients) and actual quantisation weighted activty.
We use this to try to predict the activity of each frame.
*/
picture.ActivityMeasures( actsum, varsum );
avg_act = actsum/(double)(encparams.mb_per_pict);
avg_var = varsum/(double)(encparams.mb_per_pict);
sum_avg_act += avg_act;
sum_avg_var += avg_var;
actcovered = 0.0;
sum_vbuf_Q = 0.0;
/* Allocate target bits for frame based on frames numbers in GOP
weighted by:
- global complexity averages
- predicted activity measures
- fixed type based weightings
T = (Nx * Xx/Kx) / Sigma_j (Nj * Xj / Kj)
N.b. B frames are an exception as there is *no* predictive
element in their bit-allocations. The reason this is done is
that highly active B frames are inevitably the result of
transients and/or scene changes. Psycho-visual considerations
suggest there's no point rendering sudden transients
terribly well as they're not percieved accurately anyway.
In the case of scene changes similar considerations apply. In
this case also we want to save bits for the next I or P frame
where they will help improve other frames too.
TODO: Experiment with *inverse* predictive correction for B-frames
and turning off active-block boosting for B-frames.
Note that we have to calulate per-frame bits by scaling the one-second
bit-pool a one-GOP bit-pool.
*/
if( encparams.still_size > 0 )
available_bits = per_pict_bits;
else
{
const double danger_fraction = 0.5;
int danger_level =
static_cast<int>(danger_fraction*encparams.video_buffer_size);
// Double gain as we come up toward a buffer variation of the decoders
// buffer size
double gain =
-buffer_variation < danger_level
? ( overshoot_gain )
: ( overshoot_gain * (2.0*danger_fraction)
* - buffer_variation / danger_level );
if( gain > overshoot_gain )
mjpeg_warn( "Decoder buffer running low: boosting overshoot gain!" );
int feedback_correction =
static_cast<int>( fast_tune
? buffer_variation * gain
: (buffer_variation+gop_buffer_correction)
* gain
);
available_bits =
static_cast<int>( (encparams.bit_rate+feedback_correction)
* fields_in_gop/field_rate
);
}
min_q = min_d = INT_MAX;
max_q = max_d = INT_MIN;
Xsum = Ni*Xi+Np*Xp+Nb*Xb;
switch (picture.pict_type)
{
case I_TYPE:
/* There is little reason to rely on the *last* I-frame as
they're not closely related. The slow correction of K
should be enough to fine-tune.
*/
d = d0i;
if( first_I )
{
T = (int32_t)(fields_per_pict*available_bits/(Ni+(Np/1.7)+Nb/(2.0*1.7)));
}
else
{
T = (int32_t)(fields_per_pict*available_bits*Xi/Xsum);
}
pict_base_bits = I_pict_base_bits;
break;
case P_TYPE:
d = d0p;
if( first_P )
{
T = (int32_t)(fields_per_pict*available_bits/(Np+Nb/2.0));
}
else
{
T = (int32_t)(fields_per_pict*available_bits*Xp/Xsum);
}
pict_base_bits = P_pict_base_bits;
break;
case B_TYPE:
d = d0b;
if( first_B )
{
T = (int32_t)(fields_per_pict*available_bits/(Nb+Np*2.0));
}
else
{
T = (int32_t)(fields_per_pict*available_bits*Xb/Xsum);
}
pict_base_bits = B_pict_base_bits;
break;
}
/*
If we're fed a sequences of identical or near-identical images
we can get actually get allocations for frames that exceed
the video buffer size! This of course won't work so we arbitrarily
limit any individual frame to 3/4's of the buffer.
*/
T = intmin( T, encparams.video_buffer_size*3/4 );
mjpeg_debug( "Frame %c T=%05d A=%06d Xi=%.2f Xp=%.2f Xb=%.2f",
pict_type_char[picture.pict_type],
(int)T/8, (int)available_bits/8,
Xi, Xp,Xb );
/*
To account for the wildly different sizes of frames
we compute a correction to the current instantaneous
buffer state that accounts for the fact that all other
thing being equal buffer to go down a lot after the I-frame
decode but fill up again through the B and P frames.
For this we use the base bit allocations of the picture's
"pict_base_bits" which will pretty accurately add up to a
GOP-length's of bits not the more dynamic predictive T target
bit-allocation (which *won't* add up very well).
*/
//mjpeg_debug( "PBB=%d PPB=%d", pict_base_bits, per_pict_bits );
gop_buffer_correction += (pict_base_bits-per_pict_bits);
/* Undershot bits have been "returned" via R */
if( d < 0 )
d = 0;
/* We don't let the target volume get absurdly low as it makes some
of the prediction maths ill-condtioned. At these levels quantisation
is always minimum anyway
*/
T = intmax( T, 4000 );
if( encparams.still_size > 0 && encparams.vbv_buffer_still_size )
{
/* If stills size must match then target low to ensure no
overshoot.
*/
mjpeg_info( "Setting VCD HR still overshoot margin to %d bytes", T/(16*8) );
frame_overshoot_margin = T/16;
T -= frame_overshoot_margin;
}
current_Q = ScaleQuant(picture.q_scale_type,62.0*d / r);
picture.avg_act = avg_act;
picture.sum_avg_act = sum_avg_act;
S = bitcount_SOP;
#ifdef OUTPUT_STAT
fprintf(statfile,"\nrate control: start of picture\n");
fprintf(statfile," target number of bits: T=%.0f\n",T/8);
#endif
}
/*
* Update rate-controls statistics after pictures has ended..
*
* RETURN: The amount of padding necessary for picture to meet syntax or
* rate constraints...
*/
int OnTheFlyRateCtl::UpdatePict(Picture &picture, int64_t _bitcount_EOP)
{
double X;
double K;
double AQ;
int32_t AP; /* Actual (including padding) picture bit counts */
int i;
int Qsum;
int frame_overshoot;
int64_t bitcount_EOP = _bitcount_EOP;
AP = bitcount_EOP - S;
frame_overshoot = (int)AP-(int)T;
/* For the virtual buffers for quantisation feedback it is the
actual under/overshoot *including* padding. Otherwise the
buffers go zero.
BUGBUGBUG should'nt this go after the padding calculation?
*/
d += frame_overshoot;
/* Warn if it looks like we've busted the safety margins in stills
size specification. Adjust padding to account for safety
margin if we're padding to suit stills whose size has to be
specified in advance in vbv_buffer_size.
*/
picture.pad = 0;
int padding_bits = 0;
if( encparams.still_size > 0 && encparams.vbv_buffer_still_size)
{
if( frame_overshoot > frame_overshoot_margin )
{
mjpeg_warn( "Rate overshoot: VCD hi-res still %d bytes too large! ",
((int)AP)/8-encparams.still_size);
}
//
// Aim for an actual size squarely in the middle of the 2048
// byte granuality of the still_size coding. This gives a
// safety margin for headers etc.
//
frame_overshoot = frame_overshoot - frame_overshoot_margin;
if( frame_overshoot < -2048*8 )
frame_overshoot += 1024*8;
// Make sure we pad nicely to byte alignment
if( frame_overshoot < 0 )
{
padding_bits = (((bitcount_EOP-frame_overshoot)>>3)<<3)-bitcount_EOP;
picture.pad = 1;
}
#ifdef JUNK_DONE_ELSEWHERE_NOW
if( padding_bytes > 0 )
{
mjpeg_debug( "Padding still to size: %d extra bytes", padding_bytes );
picture.pad = 1;
for( i = 0; i < padding_bytes/2; ++i )
{
writer.PutBits(0, 16);
}
}
#endif
}
/* Adjust the various bit counting parameters for the padding bytes that
* will be added */
AP += padding_bits ;
frame_overshoot += padding_bits;
bitcount_EOP += padding_bits;
/*
Compute the estimate of the current decoder buffer state. We
use this to feedback-correct the available bit-pool with a
fraction of the current buffer state estimate. If we're ahead
of the game we allow a small increase in the pool. If we
dropping towards a dangerously low buffer we decrease the pool
(rather more vigorously).
Note that since we cannot hold more than a buffer-full if we have
a positive buffer_variation in CBR we assume it was padded away
and in VBR we assume we only sent until the buffer was full.
*/
bits_used += (bitcount_EOP-prev_bitcount);
prev_bitcount = bitcount_EOP;
bits_transported += per_pict_bits;
//mjpeg_debug( "TR=%" PRId64 " USD=%" PRId64 "", bits_transported/8, bits_used/8);
buffer_variation = (int32_t)(bits_transported - bits_used);
if( buffer_variation > 0 )
{
if( encparams.quant_floor > 0 )
{
bits_transported = bits_used;
buffer_variation = 0;
}
else if( buffer_variation > undershoot_carry )
{
bits_used = bits_transported + undershoot_carry;
buffer_variation = undershoot_carry;
}
}
Qsum = 0;
for( i = 0; i < encparams.mb_per_pict; ++i )
{
Qsum += picture.mbinfo[i].mquant;
}
/* AQ is the average Quantisation of the block.
Its only used for stats display as the integerisation
of the quantisation value makes it rather coarse for use in
estimating bit-demand */
AQ = (double)Qsum/(double)encparams.mb_per_pict;
sum_avg_quant += AQ;
/* X (Chi - Complexity!) is an estimate of "bit-demand" for the
frame. I.e. how many bits it would need to be encoded without
quantisation. It is used in adaptively allocating bits to busy
frames. It is simply calculated as bits actually used times
average target (not rounded!) quantisation.
K is a running estimate of how bit-demand relates to frame
activity - bits demand per activity it is used to allow
prediction of quantisation needed to hit a bit-allocation.
*/
X = AP * AQ;
/* To handle longer sequences with little picture content
where I, B and P frames are of unusually similar size we
insist I frames assumed to be at least one and a half times
as complex as typical P frames
*/
if( picture.pict_type == I_TYPE )
X = fmax(X, 1.5*Xp);
K = X / actsum;
picture.AQ = AQ;
picture.SQ = sum_avg_quant;
//mjpeg_debug( "D=%d R=%d GC=%d", buffer_variation/8, (int)R/8,
//gop_buffer_correction/8 );
/* Xi are used as a guesstimate of *typical* frame activities
based on the past. Thus we don't want anomalous outliers due
to scene changes swinging things too much (this is handled by
the predictive complexity measure stuff) so we use moving
averages. The weightings are intended so all 3 averages have
similar real-time decay periods based on an assumption of
20-30Hz frame rates.
*/
switch (picture.pict_type)
{
case I_TYPE:
d0i = d;
sum_I_size += AP/8.0;
++I_count;
if( first_I )
{
Xi = X;
first_I = 0;
}
else
{
Xi = (X + K_AVG_WINDOW_I*Xi)/(K_AVG_WINDOW_I+1.0);
}
break;
case P_TYPE:
sum_P_size += AP/8.0;
++P_count;
d0p = d;
if( first_P )
{
Xp = X;
first_P = 0;
}
else
{
if( fast_tune )
Xp = (X+Xp*2.0)/3.0;
else
Xp = (X + Xp*K_AVG_WINDOW_P)/(K_AVG_WINDOW_P+1.0);
}
break;
case B_TYPE:
sum_B_size += AP/8.0;
++B_count;
d0b = d;
if( first_B )
{
Xb = X;
first_B = 0;
}
else
{
if( fast_tune )
{
Xb = (X + Xb * 3.0) / 4.0;
}
else
Xb = (X + Xb*K_AVG_WINDOW_B)/(K_AVG_WINDOW_B+1.0);
}
break;
}
mjpeg_debug( "Frame %c A=%6.0f %.2f: I = %6.0f P = %5.0f B = %5.0f",
pict_type_char[picture.pict_type],
AP/8.0,
X,
sum_I_size/I_count,
sum_P_size/P_count,
sum_B_size/B_count );
VbvEndOfPict(picture, bitcount_EOP);
#ifdef OUTPUT_STAT
fprintf(statfile,"\nrate control: end of picture\n");
fprintf(statfile," actual number of bits: S=%lld\n",S);
fprintf(statfile," average quantization parameter AQ=%.1f\n",
(double)AQ);
fprintf(statfile," remaining number of bits in GOP: R=%.0f\n",R);
fprintf(statfile,
" global complexity measures (I,P,B): Xi=%.0f, Xp=%.0f, Xb=%.0f\n",
Xi, Xp, Xb);
fprintf(statfile,
" virtual buffer fullness (I,PB): d0i=%d, d0b=%d\n",
d0i, d0b);
fprintf(statfile," remaining number of P pictures in GOP: Np=%d\n",Np);
fprintf(statfile," remaining number of B pictures in GOP: Nb=%d\n",Nb);
fprintf(statfile," average activity: avg_act=%.1f \n", avg_act );
#endif
return padding_bits/8;
}
/* compute initial quantization stepsize (at the beginning of picture)
encparams.quant_floor != 0 is the VBR case where we set a bitrate as a (high)
maximum and then put a floor on quantisation to achieve a reasonable
overall size.
*/
int OnTheFlyRateCtl::InitialMacroBlockQuant(Picture &picture)
{
int mquant = ScaleQuant( picture.q_scale_type, d*62.0/r );
/*
fprintf(statfile,"rc_start_mb:\n");
fprintf(statfile,"mquant=%d\n",mquant);
*/
return intmax(mquant, static_cast<int>(encparams.quant_floor));
}
/*************
*
* SelectQuantization - select a quantisation for the current
* macroblock based on the fullness of the virtual decoder buffer.
*
************/
int OnTheFlyRateCtl::MacroBlockQuant( const MacroBlock &mb, int64_t bitcount )
{
int mquant;
int lum_variance = mb.BaseLumVariance();
double act = mb.Activity();
const Picture &picture = mb.ParentPicture();
/* A.Stevens 2000 : we measure how much *information* (total activity)
has been covered and aim to release bits in proportion.
We keep track of a virtual buffer that catches the difference
between the bits allocated and the bits we actually used. The
fullness of this buffer controls quantisation.
*/
/* Guesstimate a virtual buffer fullness based on
bits used vs. bits in proportion to activity encoded
*/
double dj = ((double)d) +
((double)(bitcount-S) - actcovered * ((double)T) / actsum);
/* scale against dynamic range of mquant and the bits/picture
count. encparams.quant_floor != 0.0 is the VBR case where we set a
bitrate as a (high) maximum and then put a floor on
quantisation to achieve a reasonable overall size. Not that
this *is* baseline quantisation. Not adjust for local
activity. Otherwise we end up blurring active
macroblocks. Silly in a VBR context.
*/
double Qj = dj*62.0/r;
Qj = (Qj > encparams.quant_floor) ? Qj : encparams.quant_floor;
/* Heuristic: We decrease quantisation for macroblocks
with markedly low luminace variance. This helps make
gentle gradients (e.g. smooth backgrounds) look better at
(hopefully) small additonal cost in coding bits
*/
double act_boost;
#ifdef OLD_QUANTISATION_STEARING
double N_act = ( act < avg_act || picture.pict_type == B_TYPE ) ?
1.0 :
(encparams.act_boost*act + avg_act)/(act + encparams.act_boost*avg_act);
act_boost = 1.0/N_act;
#else
if( lum_variance < encparams.boost_var_ceil )
{
if( lum_variance < encparams.boost_var_ceil/2)
act_boost = encparams.act_boost;
else
{
double max_boost_var = encparams.boost_var_ceil/2;
double above_max_boost =
(static_cast<double>(lum_variance)-max_boost_var)
/ max_boost_var;
act_boost = 1.0 + (encparams.act_boost-1.0) * (1.0-above_max_boost);
}
}
else
act_boost = 1.0;
#endif
sum_vbuf_Q += scale_quantf(picture.q_scale_type,Qj/act_boost);
mquant = ScaleQuant(picture.q_scale_type,Qj/act_boost) ;
/* Update activity covered */
actcovered += act;
return mquant;
#ifdef OUTPUT_STAT
/*
fprintf(statfile,"MQ(%d): ",j);
fprintf(statfile,"dj=%.0f, Qj=%1.1f, actj=3.1%f, N_actj=1.1%f, mquant=%03d\n",
dj,Qj,actj,N_actj,mquant);
*/
//picture.mbinfo[j].N_act = N_actj;
#endif
}
/* VBV calculations
*
* generates warnings if underflow or overflow occurs
*/
/* vbv_end_of_picture
*
* - has to be called directly after writing picture_data()
* - needed for accurate VBV buffer overflow calculation
* - assumes there is no byte stuffing prior to the next start code
*
* Note correction for bytes that will be stuffed away in the eventual CBR
* bit-stream.
*/
void OnTheFlyRateCtl::VbvEndOfPict(Picture &picture, int64_t bitcount)
{
bitcnt_EOP = bitcount - BITCOUNT_OFFSET;
}
/* calc_vbv_delay
*
* has to be called directly after writing the picture start code, the
* reference point for vbv_delay
*
* A.Stevens 2000:
* Actually we call it just before the start code is written, but anyone
* who thinks 32 bits +/- in all these other approximations matters is fooling
* themselves.
*/
void OnTheFlyRateCtl::CalcVbvDelay(Picture &picture)
{
/* number of 1/90000 s ticks until next picture is to be decoded */
if (picture.pict_type == B_TYPE)
{
if (encparams.prog_seq)
{
if (!picture.repeatfirst)
picture_delay = 90000.0/encparams.frame_rate; /* 1 frame */
else
{
if (!picture.topfirst)
picture_delay = 90000.0*2.0/encparams.frame_rate; /* 2 frames */
else
picture_delay = 90000.0*3.0/encparams.frame_rate; /* 3 frames */
}
}
else
{
/* interlaced */
if (encparams.fieldpic)
picture_delay = 90000.0/(2.0*encparams.frame_rate); /* 1 field */
else
{
if (!picture.repeatfirst)
picture_delay = 90000.0*2.0/(2.0*encparams.frame_rate); /* 2 flds */
else
picture_delay = 90000.0*3.0/(2.0*encparams.frame_rate); /* 3 flds */
}
}
}
else
{
/* I or P picture */
if (encparams.fieldpic)
{
if(picture.topfirst && (picture.pict_struct==TOP_FIELD))
{
/* first field */
picture_delay = 90000.0/(2.0*encparams.frame_rate);
}
else
{
/* second field */
/* take frame reordering delay into account */
picture_delay = next_ip_delay - 90000.0/(2.0*encparams.frame_rate);
}
}
else
{
/* frame picture */
/* take frame reordering delay into account*/
picture_delay = next_ip_delay;
}
if (!encparams.fieldpic ||
picture.topfirst!=(picture.pict_struct==TOP_FIELD))
{
/* frame picture or second field */
if (encparams.prog_seq)
{
if (!picture.repeatfirst)
next_ip_delay = 90000.0/encparams.frame_rate;
else
{
if (!picture.topfirst)
next_ip_delay = 90000.0*2.0/encparams.frame_rate;
else
next_ip_delay = 90000.0*3.0/encparams.frame_rate;
}
}
else
{
if (encparams.fieldpic)
next_ip_delay = 90000.0/(2.0*encparams.frame_rate);
else
{
if (!picture.repeatfirst)
next_ip_delay = 90000.0*2.0/(2.0*encparams.frame_rate);
else
next_ip_delay = 90000.0*3.0/(2.0*encparams.frame_rate);
}
}
}
}
if (decoding_time==0.0)
{
/* first call of calc_vbv_delay */
/* we start with a 7/8 filled VBV buffer (12.5% back-off) */
picture_delay = ((encparams.vbv_buffer_size*7)/8)*90000.0/encparams.bit_rate;
if (encparams.fieldpic)
next_ip_delay = (int)(90000.0/encparams.frame_rate+0.5);
}
/* VBV checks */
/*
TODO: This is currently disabled because it is hopeless wrong
most of the time. It generates 20 warnings for frames with small
predecessors (small bitcnt_EOP) that in reality would be padded
away by the multiplexer for every realistic warning for an
oversize packet.
*/
#ifdef CRIES_WOLF
/* check for underflow (previous picture).
*/
if (!encparams.low_delay && (decoding_time < (double)bitcnt_EOP*90000.0/encparams.bit_rate))
{
/* picture not completely in buffer at intended decoding time */
mjpeg_warn("vbv_delay underflow frame %d (target=%.1f, actual=%.1f)",
frame_num-1, decoding_time, bitcnt_EOP*90000.0/encparams.bit_rate);
}
/* when to decode current frame */
decoding_time += picture_delay;
/* check for overflow (current picture). Unless verbose warn
only if overflow must be at least in part due to an oversize
frame (rather than undersize predecessor).
*/
picture.vbv_delay = (int)(decoding_time - ((double)bitcnt_EOP)*90000.0/bit_rate);
if ( decoding_time * ((double)bit_rate / 90000.0) - ((double)bitcnt_EOP)
> vbv_buffer_size )
{
double oversize = encparams.vbv_buffer_size -
(decoding_time / 90000.0 * bit_rate - (double)(bitcnt_EOP+frame_undershoot));
if(!quiet || oversize > 0.0 )
mjpeg_warn("vbv_delay overflow frame %d - %f.0 bytes!",
frame_num,
oversize / 8.0
);
}
#ifdef OUTPUT_STAT
fprintf(statfile,
"\nvbv_delay=%d (coder.BitCount=%lld, decoding_time=%.2f, bitcnt_EOP=%lld)\n",
picture.vbv_delay,coder.BitCount(),decoding_time,bitcnt_EOP);
#endif
if (picture.vbv_delay<0)
{
mjpeg_warn("vbv_delay underflow: %d",picture.vbv_delay);
picture.vbv_delay = 0;
}
if (picture.vbv_delay>65535)
{
mjpeg_warn("vbv_delay frame %d exceeds permissible range: %d",
frame_num, picture.vbv_delay);
picture.vbv_delay = 65535;
}
#else
if( !encparams.mpeg1 || encparams.quant_floor != 0 || encparams.still_size > 0)
picture.vbv_delay = 0xffff;
else if( encparams.still_size > 0 )
picture.vbv_delay = static_cast<int>(90000.0/encparams.frame_rate/4);
#endif
}
/*
* Local variables:
* c-file-style: "stroustrup"
* tab-width: 4
* indent-tabs-mode: nil
* End:
*/