This patch is for discussion only, not ready to commit yet and maybe newer will be.

NVENC in current CUDA 8 SDK is setting wrong aspect ratio when encoding resolution 720x576 and 720x480 and using AR 4:3 or 16:9 it will encode to h264 header that 15:11 and 20:11 is used. This is very very very ugly way how to fix it. I hope someone in NVIDIA will fix this soon, because all encoded streams are not displayed correctly for example in videojs player.

Support for HEVC is currently broken.

--
Miroslav Slugeň
















>From 2300c38e381ee9bf4af96e6c0f6d05193738bcab Mon Sep 17 00:00:00 2001
From: Miroslav Slugen <thunde...@email.cz>
Date: Sun, 12 Feb 2017 22:19:02 +0100
Subject: [PATCH 1/1] nvenc: fix wrong aspect ratio for 720x576 and 720x480
 resolutions

---
 libavcodec/nvenc.c    |   4 +
 libavcodec/nvenc_ar.c | 414 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 418 insertions(+)
 create mode 100644 libavcodec/nvenc_ar.c

diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
index 7005465..87284b7 100644
--- a/libavcodec/nvenc.c
+++ b/libavcodec/nvenc.c
@@ -31,6 +31,8 @@
 #include "libavutil/pixdesc.h"
 #include "internal.h"
 
+#include "nvenc_ar.c"
+
 #define NVENC_CAP 0x30
 #define IS_CBR(rc) (rc == NV_ENC_PARAMS_RC_CBR ||               \
                     rc == NV_ENC_PARAMS_RC_2_PASS_QUALITY ||    \
@@ -1613,6 +1615,8 @@ FF_ENABLE_DEPRECATION_WARNINGS
     if (res < 0)
         goto error2;
 
+    nvenc_sar_fix(avctx, pkt);
+
     av_free(slice_offsets);
 
     return 0;
diff --git a/libavcodec/nvenc_ar.c b/libavcodec/nvenc_ar.c
new file mode 100644
index 0000000..a45c7fb
--- /dev/null
+++ b/libavcodec/nvenc_ar.c
@@ -0,0 +1,414 @@
+
+typedef struct nal_unit {
+    AVPacket *pkt;
+
+    int reset;
+
+    uint8_t *b;
+    int bs;
+
+    int nal_ref_idc;
+    int nal_unit_type;
+
+    int profile_idc;
+    int level_idc;
+    int seq_parameter_set_id;
+
+    int chroma_format_idc;
+    int residual_colour_transform_flag;
+    int bit_depth_luma_minus8;
+    int bit_depth_chroma_minus8;
+    int qpprime_y_zero_transform_bypass_flag;
+    int seq_scaling_matrix_present_flag;
+    int seq_scaling_list_present_flag[8];
+    int* scaling_list_4x4[6];
+    int UseDefaultScalingMatrix4x4Flag[6];
+    int* scaling_list_8x8[2];
+    int UseDefaultScalingMatrix8x8Flag[2];
+
+
+    int log2_max_frame_num_minus4;
+    int pic_order_cnt_type;
+    int log2_max_pic_order_cnt_lsb_minus4;
+    int delta_pic_order_always_zero_flag;
+    int offset_for_non_ref_pic;
+    int offset_for_top_to_bottom_field;
+    int num_ref_frames_in_pic_order_cnt_cycle;
+    int offset_for_ref_frame[256];
+    int num_ref_frames;
+    int gaps_in_frame_num_value_allowed_flag;
+    int pic_width_in_mbs_minus1;
+    int pic_height_in_map_units_minus1;
+    int frame_mbs_only_flag;
+    int mb_adaptive_frame_field_flag;
+    int direct_8x8_inference_flag;
+    int frame_cropping_flag;
+    int frame_crop_left_offset;
+    int frame_crop_right_offset;
+    int frame_crop_top_offset;
+    int frame_crop_bottom_offset;
+    int vui_parameters_present_flag;
+
+    struct {
+        int aspect_ratio_info_present_flag;
+        int aspect_ratio_idc;
+        int sar_width;
+        int sar_height;
+        int overscan_info_present_flag;
+        int overscan_appropriate_flag;
+        int video_signal_type_present_flag;
+        int video_format;
+        int video_full_range_flag;
+        int colour_description_present_flag;
+        int colour_primaries;
+        int transfer_characteristics;
+        int matrix_coefficients;
+        int chroma_loc_info_present_flag;
+        int chroma_sample_loc_type_top_field;
+        int chroma_sample_loc_type_bottom_field;
+        int timing_info_present_flag;
+        int num_units_in_tick;
+        int time_scale;
+        int fixed_frame_rate_flag;
+        int nal_hrd_parameters_present_flag;
+        int vcl_hrd_parameters_present_flag;
+        int low_delay_hrd_flag;
+        int pic_struct_present_flag;
+        int bitstream_restriction_flag;
+        int motion_vectors_over_pic_boundaries_flag;
+        int max_bytes_per_pic_denom;
+        int max_bits_per_mb_denom;
+        int log2_max_mv_length_horizontal;
+        int log2_max_mv_length_vertical;
+        int num_reorder_frames;
+        int max_dec_frame_buffering;
+    } vui;
+
+} nal_unit;
+
+
+static int nal_unit_find (uint8_t* b, int bs, int* start, int* end) {
+    int i;
+
+    *start = 0;
+    *end = 0;
+
+    for (i = 0; i < bs; i++) {
+        if ((i + 3 <= bs) && (b[i] == 0 && b[i + 1] == 0 && b[i + 2] == 0x01)) {
+            i+= 3;
+            goto found;
+        }
+
+        if ((i + 4 <= bs) && (b[i] == 0 && b[i + 1] == 0 && b[i + 2] == 0 && b[i + 3] == 0x01)) {
+            i+= 4;
+            goto found;
+        }
+    }
+
+    goto not_found;
+
+found:
+    *start = i;
+
+    for (; i < bs; i++) {
+        if (((i + 3 <= bs) && (b[i] == 0 && b[i + 1] == 0 && b[i + 2] == 0x01)) ||
+            ((i + 4 <= bs) && (b[i] == 0 && b[i + 1] == 0 && b[i + 2] == 0 && b[i + 3] == 0x01))) break;
+    }
+
+    *end = i;
+    return (*end - *start);
+
+not_found:
+    return 0;
+}
+
+static int nal_unit_get (nal_unit *p, int bs) {
+    int i, pos, r = 0;
+
+    for (i = 0; (i < bs && p->bs > 0); i++) {
+        pos = (p->bs - 1) % 8;
+
+        r = (r << 1) | ((p->b[0] >> pos) & 0x1);
+
+        if (!pos) p->b++;
+        p->bs--;
+    }
+
+    return r;
+}
+
+static uint32_t nal_unit_get_ue (nal_unit *p){
+    int32_t r = 0;
+    int i = 0;
+
+    for (i = 0; ((nal_unit_get(p, 1) == 0) && (i < 32) && (p->bs > 0)); i++);
+
+    r = nal_unit_get(p, i);
+    r+= (1 << i) - 1;
+    return r;
+}
+
+static int32_t nal_unit_get_se (nal_unit *p) {
+    int32_t r = nal_unit_get_ue(p);
+    if (r & 0x01) {
+        r = (r+1)/2;
+    } else {
+        r = -(r/2);
+    }
+    return r;
+}
+
+static int nal_unit_set (nal_unit *p, int bs, int v) {
+    int i, pos;
+
+    for (i = 0; (i < bs && p->bs > 0); i++) {
+        pos = (p->bs - 1) % 8;
+
+        p->b[0] = (p->b[0] & (0xFF - (1 << pos))) | (((v >> ((bs - 1) - i)) & 0x1) << pos);
+
+        if (!pos) p->b++;
+        p->bs--;
+    }
+
+    return 0;
+}
+
+static void nal_unit_read_scaling_list(nal_unit *p, int* scalingList, int sizeOfScalingList, int useDefaultScalingMatrixFlag ) {
+    int j, lastScale = 8, nextScale = 8, delta_scale;
+
+    if (!scalingList) return;
+
+    for (j = 0; j < sizeOfScalingList; j++) {
+        if (nextScale != 0) {
+            delta_scale = nal_unit_get_se(p);
+            nextScale = (lastScale + delta_scale + 256) % 256;
+            useDefaultScalingMatrixFlag = (j == 0 && nextScale == 0);
+        }
+        scalingList[j] = (nextScale == 0) ? lastScale : nextScale;
+        lastScale = scalingList[j];
+    }
+}
+
+static int nal_unit_ar_set (AVCodecContext *avctx, nal_unit *p, uint8_t *buf) {
+    int i, pos, ret = 0;
+
+    pos = p->b - p->pkt->data;
+
+    /* alokujeme další paměť */
+    if ((ret = av_grow_packet(p->pkt, 4)) < 0) return ret;
+
+    /* posuneme všechna data za pozicí */
+    memmove(p->pkt->data + pos + 4, p->pkt->data + pos, p->pkt->size - pos);
+    p->pkt->size+= 4;
+
+    /* posuneme se na novou pozici aspect_ratio_idc */
+    p->b = p->pkt->data + (pos - 1);
+    p->bs+= 8 + 32;
+
+    /* musíme nastavit novou pozici */
+    for (i = 0; i < 5; i++) {
+        nal_unit_set(p, 8, buf[i]);
+    }
+
+    p->reset = 1;
+
+    return ret;
+}
+
+static int nal_unit_read_vui (AVCodecContext *avctx, nal_unit *p) {
+    char buf[5];
+
+    p->vui.aspect_ratio_info_present_flag = nal_unit_get(p, 1);
+    if (p->vui.aspect_ratio_info_present_flag) {
+        p->vui.aspect_ratio_idc = nal_unit_get(p, 8);
+
+        switch (p->vui.aspect_ratio_idc) {
+        /* 4:3 */
+        case 2:
+            buf[0] = 0xFF;
+            buf[1] = 0;
+            buf[2] = 0x10;
+            buf[3] = 0;
+            buf[4] = 0x0F;
+            nal_unit_ar_set(avctx, p, buf);
+            break;
+        /* 16:9 */
+        case 4:
+            buf[0] = 0xFF;
+            buf[1] = 0;
+            buf[2] = 0x40;
+            buf[3] = 0;
+            buf[4] = 0x2D;
+            nal_unit_ar_set(avctx, p, buf);
+            break;
+        case 0xFF:
+            p->vui.sar_width = nal_unit_get(p, 16);
+            p->vui.sar_height = nal_unit_get(p, 16);
+            break;
+        }
+
+        //av_log(avctx, AV_LOG_ERROR, "NAL AR idc: %d, sarw: %d, sarh: %d\n", p->vui.aspect_ratio_idc, p->vui.sar_width, p->vui.sar_height);
+    }
+    /* ... */
+
+    return 0;
+}
+
+static int nal_unit_read_sps (AVCodecContext *avctx, nal_unit *p) {
+    int i;
+
+    /* 8bit - profile_idc */
+    p->profile_idc = nal_unit_get(p, 8);
+    /* 6bit - constraint_setX_flags */
+    nal_unit_get(p, 1);
+    nal_unit_get(p, 1);
+    nal_unit_get(p, 1);
+    nal_unit_get(p, 1);
+    nal_unit_get(p, 1);
+    nal_unit_get(p, 1);
+    /* 2bit - reserved_zero */
+    nal_unit_get(p, 2);
+    /* 8bit - level_idc */
+    p->level_idc = nal_unit_get(p, 8);
+    p->seq_parameter_set_id = nal_unit_get_ue(p);
+
+    //av_log(avctx, AV_LOG_ERROR, "NAL profile: %d, level: %d, set_id: %d\n", p->profile_idc, p->level_idc, p->seq_parameter_set_id);
+
+    if (p->profile_idc == 100 || p->profile_idc == 110 ||
+        p->profile_idc == 122 || p->profile_idc == 144) {
+        p->chroma_format_idc = nal_unit_get_ue(p);
+        if (p->chroma_format_idc == 3) {
+            p->residual_colour_transform_flag = nal_unit_get(p, 1);
+        }
+        p->bit_depth_luma_minus8 = nal_unit_get_ue(p);
+        p->bit_depth_chroma_minus8 = nal_unit_get_ue(p);
+        p->qpprime_y_zero_transform_bypass_flag = nal_unit_get(p, 1);
+        p->seq_scaling_matrix_present_flag = nal_unit_get(p, 1);
+        if (p->seq_scaling_matrix_present_flag) {
+            for (i = 0; i < 8; i++) {
+                p->seq_scaling_list_present_flag[i] = nal_unit_get(p, 1);
+                if (p->seq_scaling_list_present_flag[i]) {
+                    if (i < 6) {
+                        nal_unit_read_scaling_list(p, p->scaling_list_4x4[i], 16, p->UseDefaultScalingMatrix4x4Flag[i]);
+                    } else {
+                        nal_unit_read_scaling_list(p, p->scaling_list_8x8[i - 6], 64, p->UseDefaultScalingMatrix8x8Flag[i - 6]);
+                    }
+                }
+            }
+        }
+    }
+    p->log2_max_frame_num_minus4 = nal_unit_get_ue(p);
+    p->pic_order_cnt_type = nal_unit_get_ue(p);
+    if (p->pic_order_cnt_type == 0) {
+        p->log2_max_pic_order_cnt_lsb_minus4 = nal_unit_get_ue(p);
+    } else if (p->pic_order_cnt_type == 1) {
+        p->delta_pic_order_always_zero_flag = nal_unit_get(p, 1);
+        p->offset_for_non_ref_pic = nal_unit_get_se(p);
+        p->offset_for_top_to_bottom_field = nal_unit_get_se(p);
+        p->num_ref_frames_in_pic_order_cnt_cycle = nal_unit_get_ue(p);
+        for(i = 0; i < p->num_ref_frames_in_pic_order_cnt_cycle; i++) {
+            p->offset_for_ref_frame[i] = nal_unit_get_se(p);
+        }
+    }
+    p->num_ref_frames = nal_unit_get_ue(p);
+    p->gaps_in_frame_num_value_allowed_flag = nal_unit_get(p, 1);
+
+    /* kontrola na rozlišení nutná */
+    p->pic_width_in_mbs_minus1 = nal_unit_get_ue(p);
+    p->pic_height_in_map_units_minus1 = nal_unit_get_ue(p);
+    if ((p->pic_width_in_mbs_minus1 != 44) ||
+        ((p->pic_height_in_map_units_minus1 != 35) && (p->pic_height_in_map_units_minus1 != 29))) goto done;
+
+    p->frame_mbs_only_flag = nal_unit_get(p, 1);
+    if (!p->frame_mbs_only_flag) {
+        p->mb_adaptive_frame_field_flag = nal_unit_get(p, 1);
+    }
+    p->direct_8x8_inference_flag = nal_unit_get(p, 1);
+    p->frame_cropping_flag = nal_unit_get(p, 1);
+    if(p->frame_cropping_flag) {
+        p->frame_crop_left_offset = nal_unit_get_ue(p);
+        p->frame_crop_right_offset = nal_unit_get_ue(p);
+        p->frame_crop_top_offset = nal_unit_get_ue(p);
+        p->frame_crop_bottom_offset = nal_unit_get_ue(p);
+    }
+    p->vui_parameters_present_flag = nal_unit_get(p, 1);
+    if (p->vui_parameters_present_flag) {
+        nal_unit_read_vui(avctx, p);
+    }
+    //read_rbsp_trailing_bits
+
+done:
+    return 0;
+}
+
+static int nal_unit_read (AVCodecContext *avctx, nal_unit *p) {
+
+    /* 1bit - forbidden_zero_bit */
+    if (nal_unit_get(p, 1)) goto done;
+
+    /* 2bit - nal_ref_idc */
+    p->nal_ref_idc = nal_unit_get(p, 2);
+
+    /* 5bit - nal_unit_type */
+    p->nal_unit_type = nal_unit_get(p, 5);
+
+    switch (p->nal_unit_type) {
+    case 7:
+        //av_log(avctx, AV_LOG_ERROR, "NAL SPS\n");
+        nal_unit_read_sps(avctx, p);
+        break;
+    default:
+        goto done;
+    }
+
+done:
+    return 0;
+}
+
+static int nvenc_sar_fix (AVCodecContext *avctx, AVPacket *pkt) {
+    int i, ret, size, len, start, stop;
+    uint8_t *data = NULL;
+    nal_unit nal;
+
+reset:
+    i = 0;
+    data = pkt->data;
+    size = pkt->size;
+
+next:
+    if (size <= 0) goto done;
+
+    if ((ret = nal_unit_find(data, size, &start, &stop)) < 0) return ret;
+
+    len = ret;
+    if (len + start <= 0) goto done;
+
+    //av_log(avctx,AV_LOG_ERROR,"found %d NAL uint: %d - %d, length: %d\n", i, start, stop, len);
+
+    data+= start;
+    size-= start;
+
+    /* prázdná NAL */
+    if (len <= 0) {
+        i++;
+        goto next;
+    }
+
+    memset(&nal, 0, sizeof(nal_unit));
+
+    nal.pkt = pkt;
+    nal.b = data;
+    nal.bs = len << 3;
+
+    if ((ret = nal_unit_read(avctx, &nal)) < 0) return ret;
+
+    if (nal.reset) goto reset;
+
+    data+= len;
+    size-= len;
+    i++;
+    goto next;
+
+done:
+    return 0;
+}
-- 
2.1.4

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

Reply via email to