This implements a simple reactive VBR rate control mode for single-layer H.264.
The primary aim here is to avoid the problematic behaviour that the CBR rate
controller displays on scene changes, where the QP can get pushed up by a large
amount in a short period and compromise the quality of following frames to a
very visible degree.

The main idea, then, is to try to keep the HRD buffering above the target level
most of the time, so that when a large frame is generated (on a scene change or
when the stream complexity increases) we have plenty of slack to be able to
encode the more difficult region without compromising quality immediately on
the following frames.   It is optimistic about the complexity of future frames,
so even after generating one or more large frames on a significant change it
will try to keep the QP at its current level until the HRD buffer bounds force
a change to maintain the intended rate.

Compared to the CBR rate controller, it keeps the quality level much more
stable - QP does not always spike up as large frames are generated when the
complexity of the stream increases transiently, but equally it does not reduce
as quickly when the complexity of the stream decreases.

Signed-off-by: Mark Thompson <s...@jkqxz.net>
---
On 09/01/17 05:23, Xiang, Haihao wrote:
>> +    BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I], 
>> (int)encoder_context->brc.min_qp, 51);
>> +    BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P], 
>> (int)encoder_context->brc.min_qp, 51);
>> +    BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_B], 
>> (int)encoder_context->brc.min_qp, 51);
> 
> The lower bound is 1 when encoder_context->brc.min_qp is equal to 0.
> 
>> +
>> +    if (sts == BRC_UNDERFLOW && qp[slice_type] == 51)
>> +        sts = BRC_UNDERFLOW_WITH_MAX_QP;
>> +    if (sts == BRC_OVERFLOW && qp[slice_type] == 
>> encoder_context->brc.min_qp)
> 
> Same as above

Apologies, I missed updating it to match 
33a32935ac9e2622adc5c59045d565b4e5904749.

Fixed in the same way as that patch in the version.

Thanks,

- Mark


 src/gen6_mfc.c        |  10 ++--
 src/gen6_mfc_common.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++++--
 src/gen75_mfc.c       |  10 ++--
 src/gen8_mfc.c        |  10 ++--
 src/i965_drv_video.c  |   5 +-
 5 files changed, 141 insertions(+), 20 deletions(-)

diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c
index 8077c14..1765530 100644
--- a/src/gen6_mfc.c
+++ b/src/gen6_mfc.c
@@ -798,7 +798,7 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
     int qp_mb;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = 
mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -816,7 +816,7 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
                              pPicParameter,
                              pSliceParameter,
                              encode_state, encoder_context,
-                             (rate_control_mode == VA_RC_CBR), qp_slice, 
slice_batch);
+                             (rate_control_mode != VA_RC_CQP), qp_slice, 
slice_batch);
 
     if ( slice_index == 0) 
         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, 
encoder_context, slice_batch);
@@ -1188,7 +1188,7 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
     int qp_slice;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = 
mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1209,7 +1209,7 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
                              pSliceParameter,
                              encode_state,
                              encoder_context,
-                             (rate_control_mode == VA_RC_CBR),
+                             (rate_control_mode != VA_RC_CQP),
                              qp_slice,
                              slice_batch);
 
@@ -1368,7 +1368,7 @@ gen6_mfc_avc_encode_picture(VADriverContextP ctx,
         /*Programing bcs pipeline*/
         gen6_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);  
//filling the pipeline
         gen6_mfc_run(ctx, encode_state, encoder_context);
-        if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == 
VA_RC_VBR*/) {
+        if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR) {
             gen6_mfc_stop(ctx, encode_state, encoder_context, 
&current_frame_bits_size);
             sts = intel_mfc_brc_postpack(encode_state, encoder_context, 
current_frame_bits_size);
             if (sts == BRC_NO_HRD_VIOLATION) {
diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c
index fbedc94..0d21a11 100644
--- a/src/gen6_mfc_common.c
+++ b/src/gen6_mfc_common.c
@@ -127,6 +127,9 @@ static void intel_mfc_brc_init(struct encode_state 
*encode_state,
                 ((double)encoder_context->brc.framerate[i - 1].num / 
(double)encoder_context->brc.framerate[i - 1].den);
         }
 
+        if (mfc_context->brc.mode == VA_RC_VBR && 
encoder_context->brc.target_percentage[i])
+            bitrate = bitrate * encoder_context->brc.target_percentage[i] / 
100;
+
         if (i == encoder_context->layer.num_layers - 1)
             factor = 1.0;
         else {
@@ -219,9 +222,9 @@ int intel_mfc_update_hrd(struct encode_state *encode_state,
     return BRC_NO_HRD_VIOLATION;
 }
 
-int intel_mfc_brc_postpack(struct encode_state *encode_state,
-                           struct intel_encoder_context *encoder_context,
-                           int frame_bits)
+static int intel_mfc_brc_postpack_cbr(struct encode_state *encode_state,
+                                      struct intel_encoder_context 
*encoder_context,
+                                      int frame_bits)
 {
     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
     gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
@@ -368,6 +371,121 @@ int intel_mfc_brc_postpack(struct encode_state 
*encode_state,
     return sts;
 }
 
+static int intel_mfc_brc_postpack_vbr(struct encode_state *encode_state,
+                                      struct intel_encoder_context 
*encoder_context,
+                                      int frame_bits)
+{
+    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+    gen6_brc_status sts;
+    VAEncSliceParameterBufferH264 *pSliceParameter = 
(VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+    int slice_type = 
intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
+    int *qp = mfc_context->brc.qp_prime_y[0];
+    int min_qp = MAX(1, encoder_context->brc.min_qp);
+    int qp_delta, large_frame_adjustment;
+
+    // This implements a simple reactive VBR rate control mode for 
single-layer H.264.  The primary
+    // aim here is to avoid the problematic behaviour that the CBR rate 
controller displays on
+    // scene changes, where the QP can get pushed up by a large amount in a 
short period and
+    // compromise the quality of following frames to a very visible degree.
+    // The main idea, then, is to try to keep the HRD buffering above the 
target level most of the
+    // time, so that when a large frame is generated (on a scene change or 
when the stream
+    // complexity increases) we have plenty of slack to be able to encode the 
more difficult region
+    // without compromising quality immediately on the following frames.   It 
is optimistic about
+    // the complexity of future frames, so even after generating one or more 
large frames on a
+    // significant change it will try to keep the QP at its current level 
until the HRD buffer
+    // bounds force a change to maintain the intended rate.
+
+    sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
+
+    // This adjustment is applied to increase the QP by more than we normally 
would if a very
+    // large frame is encountered and we are in danger of running out of slack.
+    large_frame_adjustment = rint(2.0 * log(frame_bits / 
mfc_context->brc.target_frame_size[0][slice_type]));
+
+    if (sts == BRC_UNDERFLOW) {
+        // The frame is far too big and we don't have the bits available to 
send it, so it will
+        // have to be re-encoded at a higher QP.
+        qp_delta = +2;
+        if (frame_bits > mfc_context->brc.target_frame_size[0][slice_type])
+            qp_delta += large_frame_adjustment;
+    } else if (sts == BRC_OVERFLOW) {
+        // The frame is very small and we are now overflowing the HRD buffer.  
Currently this case
+        // does not occur because we ignore overflow in VBR mode.
+        assert(0 && "Overflow in VBR mode");
+    } else if (frame_bits <= 
mfc_context->brc.target_frame_size[0][slice_type]) {
+        // The frame is smaller than the average size expected for this frame 
type.
+        if (mfc_context->hrd.current_buffer_fullness[0] >
+            (mfc_context->hrd.target_buffer_fullness[0] + 
mfc_context->hrd.buffer_size[0]) / 2.0) {
+            // We currently have lots of bits available, so decrease the QP 
slightly for the next
+            // frame.
+            qp_delta = -1;
+        } else {
+            // The HRD buffer fullness is increasing, so do nothing.  (We may 
be under the target
+            // level here, but are moving in the right direction.)
+            qp_delta = 0;
+        }
+    } else {
+        // The frame is larger than the average size expected for this frame 
type.
+        if (mfc_context->hrd.current_buffer_fullness[0] > 
mfc_context->hrd.target_buffer_fullness[0]) {
+            // We are currently over the target level, so do nothing.
+            qp_delta = 0;
+        } else if (mfc_context->hrd.current_buffer_fullness[0] > 
mfc_context->hrd.target_buffer_fullness[0] / 2.0) {
+            // We are under the target level, but not critically.  Increase 
the QP by one step if
+            // continuing like this would underflow soon (currently within one 
second).
+            if (mfc_context->hrd.current_buffer_fullness[0] /
+                (double)(frame_bits - 
mfc_context->brc.target_frame_size[0][slice_type] + 1) <
+                ((double)encoder_context->brc.framerate[0].num / 
(double)encoder_context->brc.framerate[0].den))
+                qp_delta = +1;
+            else
+                qp_delta = 0;
+        } else {
+            // We are a long way under the target level.  Always increase the 
QP, possibly by a
+            // larger amount dependent on how big the frame we just made 
actually was.
+            qp_delta = +1 + large_frame_adjustment;
+        }
+    }
+
+    switch (slice_type) {
+    case SLICE_TYPE_I:
+        qp[SLICE_TYPE_I] += qp_delta;
+        qp[SLICE_TYPE_P]  = qp[SLICE_TYPE_I] + BRC_I_P_QP_DIFF;
+        qp[SLICE_TYPE_B]  = qp[SLICE_TYPE_I] + BRC_I_B_QP_DIFF;
+        break;
+    case SLICE_TYPE_P:
+        qp[SLICE_TYPE_P] += qp_delta;
+        qp[SLICE_TYPE_I]  = qp[SLICE_TYPE_P] - BRC_I_P_QP_DIFF;
+        qp[SLICE_TYPE_B]  = qp[SLICE_TYPE_P] + BRC_P_B_QP_DIFF;
+        break;
+    case SLICE_TYPE_B:
+        qp[SLICE_TYPE_B] += qp_delta;
+        qp[SLICE_TYPE_I]  = qp[SLICE_TYPE_B] - BRC_I_B_QP_DIFF;
+        qp[SLICE_TYPE_P]  = qp[SLICE_TYPE_B] - BRC_P_B_QP_DIFF;
+        break;
+    }
+    BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I], min_qp, 51);
+    BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P], min_qp, 51);
+    BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_B], min_qp, 51);
+
+    if (sts == BRC_UNDERFLOW && qp[slice_type] == 51)
+        sts = BRC_UNDERFLOW_WITH_MAX_QP;
+    if (sts == BRC_OVERFLOW && qp[slice_type] == min_qp)
+        sts = BRC_OVERFLOW_WITH_MIN_QP;
+
+    return sts;
+}
+
+int intel_mfc_brc_postpack(struct encode_state *encode_state,
+                           struct intel_encoder_context *encoder_context,
+                           int frame_bits)
+{
+    switch (encoder_context->rate_control_mode) {
+    case VA_RC_CBR:
+        return intel_mfc_brc_postpack_cbr(encode_state, encoder_context, 
frame_bits);
+    case VA_RC_VBR:
+        return intel_mfc_brc_postpack_vbr(encode_state, encoder_context, 
frame_bits);
+    }
+    assert(0 && "Invalid RC mode");
+}
+
 static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
                                        struct intel_encoder_context 
*encoder_context)
 {
@@ -427,7 +545,7 @@ void intel_mfc_brc_prepare(struct encode_state 
*encode_state,
         encoder_context->codec != CODEC_H264_MVC)
         return;
 
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         /*Programing bit rate control */
         if (encoder_context->brc.need_reset) {
             intel_mfc_bit_rate_control_context_init(encode_state, 
encoder_context);
diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c
index 0fbbe76..7b76b99 100644
--- a/src/gen75_mfc.c
+++ b/src/gen75_mfc.c
@@ -1174,7 +1174,7 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP 
ctx,
     int qp_mb;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = 
mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1192,7 +1192,7 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP 
ctx,
                               pPicParameter,
                               pSliceParameter,
                               encode_state, encoder_context,
-                              (rate_control_mode == VA_RC_CBR), qp_slice, 
slice_batch);
+                              (rate_control_mode != VA_RC_CQP), qp_slice, 
slice_batch);
 
     if ( slice_index == 0)
         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, 
encoder_context, slice_batch);
@@ -1521,7 +1521,7 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
     int qp_slice;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = 
mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1540,7 +1540,7 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
                               pSliceParameter,
                               encode_state,
                               encoder_context,
-                              (rate_control_mode == VA_RC_CBR),
+                              (rate_control_mode != VA_RC_CQP),
                               qp_slice,
                               slice_batch);
 
@@ -1702,7 +1702,7 @@ gen75_mfc_avc_encode_picture(VADriverContextP ctx,
         /*Programing bcs pipeline*/
         gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context); 
//filling the pipeline
         gen75_mfc_run(ctx, encode_state, encoder_context);
-        if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == 
VA_RC_VBR*/) {
+        if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR) {
             gen75_mfc_stop(ctx, encode_state, encoder_context, 
&current_frame_bits_size);
             sts = intel_mfc_brc_postpack(encode_state, encoder_context, 
current_frame_bits_size);
             if (sts == BRC_NO_HRD_VIOLATION) {
diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c
index 90119d7..8e68c7c 100644
--- a/src/gen8_mfc.c
+++ b/src/gen8_mfc.c
@@ -1177,7 +1177,7 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP 
ctx,
     int qp_mb;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = 
mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1195,7 +1195,7 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP 
ctx,
                              pPicParameter,
                              pSliceParameter,
                              encode_state, encoder_context,
-                             (rate_control_mode == VA_RC_CBR), qp_slice, 
slice_batch);
+                             (rate_control_mode != VA_RC_CQP), qp_slice, 
slice_batch);
 
     if ( slice_index == 0)
         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, 
encoder_context, slice_batch);
@@ -1534,7 +1534,7 @@ gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
     int qp_slice;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = 
mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1553,7 +1553,7 @@ gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
                               pSliceParameter,
                               encode_state,
                               encoder_context,
-                              (rate_control_mode == VA_RC_CBR),
+                              (rate_control_mode != VA_RC_CQP),
                               qp_slice,
                               slice_batch);
 
@@ -1729,7 +1729,7 @@ gen8_mfc_avc_encode_picture(VADriverContextP ctx,
         /*Programing bcs pipeline*/
         gen8_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);  
//filling the pipeline
         gen8_mfc_run(ctx, encode_state, encoder_context);
-        if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == 
VA_RC_VBR*/) {
+        if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR) {
             gen8_mfc_stop(ctx, encode_state, encoder_context, 
&current_frame_bits_size);
             sts = intel_mfc_brc_postpack(encode_state, encoder_context, 
current_frame_bits_size);
             if (sts == BRC_NO_HRD_VIOLATION) {
diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c
index 76cb915..cc37190 100644
--- a/src/i965_drv_video.c
+++ b/src/i965_drv_video.c
@@ -936,7 +936,10 @@ i965_GetConfigAttributes(VADriverContextP ctx,
                     profile != VAProfileMPEG2Simple)
                     attrib_list[i].value |= VA_RC_CBR;
 
-                if (profile == VAProfileVP9Profile0)
+                if (profile == VAProfileVP9Profile0 ||
+                    profile == VAProfileH264ConstrainedBaseline ||
+                    profile == VAProfileH264Main ||
+                    profile == VAProfileH264High)
                     attrib_list[i].value |= VA_RC_VBR;
 
                 break;
-- 
2.11.0
_______________________________________________
Libva mailing list
Libva@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/libva

Reply via email to