# HG changeset patch # User IndumathiR<induma...@multicorewareinc.com> # Date 1518528290 -19800 # Tue Feb 13 18:54:50 2018 +0530 # Node ID 27e3b161cd8b59ad1cae67a96e11e3e0506d5017 # Parent 04a337abd70de269cef7d9655365f3a3ebde02aa Add VMAF suppport to report per frame and aggregate VMAF score
diff -r 04a337abd70d -r 27e3b161cd8b doc/reST/api.rst --- a/doc/reST/api.rst Thu Apr 12 15:10:59 2018 +0530 +++ b/doc/reST/api.rst Tue Feb 13 18:54:50 2018 +0530 @@ -398,7 +398,30 @@ * release library static allocations, reset configured CTU size */ void x265_cleanup(void); +VMAF (Video Multi-Method Assessment Fusion) +========================================== +If you set the ENABLE_LIBVMAF cmake option to ON, then x265 will report per frame +and aggregate VMAF score for the given input and dump the scores in csv file. +The user also need to specify the :option:`--recon` in command line to get the VMAF scores. + + /* x265_calculate_vmafScore: + * returns VMAF score for the input video. + * This api must be called only after encoding was done. */ + double x265_calculate_vmafscore(x265_param*, x265_vmaf_data*); + + /* x265_calculate_vmaf_framelevelscore: + * returns VMAF score for each frame in a given input video. */ + double x265_calculate_vmaf_framelevelscore(x265_vmaf_framedata*); + +.. Note:: + + When setting ENABLE_LIBVMAF cmake option to ON, it is recommended to + also set ENABLE_SHARED to OFF to prevent build problems. + We only need the static library from these builds. + + Binaries build with windows will not have VMAF support. + Multi-library Interface ======================= diff -r 04a337abd70d -r 27e3b161cd8b source/CMakeLists.txt --- a/source/CMakeLists.txt Thu Apr 12 15:10:59 2018 +0530 +++ b/source/CMakeLists.txt Tue Feb 13 18:54:50 2018 +0530 @@ -29,7 +29,7 @@ option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF) mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD) # X265_BUILD must be incremented each time the public API is changed -set(X265_BUILD 157) +set(X265_BUILD 158) configure_file("${PROJECT_SOURCE_DIR}/x265.def.in" "${PROJECT_BINARY_DIR}/x265.def") configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in" @@ -109,6 +109,11 @@ if(NO_ATOMICS) add_definitions(-DNO_ATOMICS=1) endif(NO_ATOMICS) + find_library(VMAF vmaf) + option(ENABLE_LIBVMAF "Enable VMAF" OFF) + if(ENABLE_LIBVMAF) + add_definitions(-DENABLE_LIBVMAF) + endif() endif(UNIX) if(X64 AND NOT WIN32) @@ -536,6 +541,9 @@ if(EXTRA_LIB) target_link_libraries(x265-static ${EXTRA_LIB}) endif() +if(ENABLE_LIBVMAF) + target_link_libraries(x265-static ${VMAF}) +endif() install(TARGETS x265-static LIBRARY DESTINATION ${LIB_INSTALL_DIR} ARCHIVE DESTINATION ${LIB_INSTALL_DIR}) diff -r 04a337abd70d -r 27e3b161cd8b source/common/picyuv.h --- a/source/common/picyuv.h Thu Apr 12 15:10:59 2018 +0530 +++ b/source/common/picyuv.h Tue Feb 13 18:54:50 2018 +0530 @@ -72,6 +72,7 @@ pixel m_maxChromaVLevel; pixel m_minChromaVLevel; double m_avgChromaVLevel; + double m_vmafScore; x265_param *m_param; PicYuv(); diff -r 04a337abd70d -r 27e3b161cd8b source/encoder/api.cpp --- a/source/encoder/api.cpp Thu Apr 12 15:10:59 2018 +0530 +++ b/source/encoder/api.cpp Tue Feb 13 18:54:50 2018 +0530 @@ -31,6 +31,10 @@ #include "nal.h" #include "bitcost.h" +#if ENABLE_LIBVMAF +#include "libvmaf.h" +#endif + /* multilib namespace reflectors */ #if LINKED_8BIT namespace x265_8bit { @@ -302,13 +306,34 @@ encoder->fetchStats(outputStats, statsSizeBytes); } } +#if ENABLE_LIBVMAF +void x265_vmaf_encoder_log(x265_encoder* enc, int argc, char **argv, x265_param *param, x265_vmaf_data *vmafdata) +{ + if (enc) + { + Encoder *encoder = static_cast<Encoder*>(enc); + x265_stats stats; + stats.aggregateVmafScore = x265_calculate_vmafscore(param, vmafdata); + if(vmafdata->reference_file) + fclose(vmafdata->reference_file); + if(vmafdata->distorted_file) + fclose(vmafdata->distorted_file); + if(vmafdata) + x265_free(vmafdata); + encoder->fetchStats(&stats, sizeof(stats)); + int padx = encoder->m_sps.conformanceWindow.rightOffset; + int pady = encoder->m_sps.conformanceWindow.bottomOffset; + x265_csvlog_encode(encoder->m_param, &stats, padx, pady, argc, argv); + } +} +#endif void x265_encoder_log(x265_encoder* enc, int argc, char **argv) { if (enc) { Encoder *encoder = static_cast<Encoder*>(enc); - x265_stats stats; + x265_stats stats; encoder->fetchStats(&stats, sizeof(stats)); int padx = encoder->m_sps.conformanceWindow.rightOffset; int pady = encoder->m_sps.conformanceWindow.bottomOffset; @@ -457,7 +482,13 @@ &x265_csvlog_frame, &x265_csvlog_encode, &x265_dither_image, - &x265_set_analysis_data + &x265_set_analysis_data, +#if ENABLE_LIBVMAF + &x265_calculate_vmafscore, + &x265_calculate_vmaf_framelevelscore, + &x265_vmaf_encoder_log +#endif + }; typedef const x265_api* (*api_get_func)(int bitDepth); @@ -751,6 +782,9 @@ /* detailed performance statistics */ fprintf(csvfp, ", DecideWait (ms), Row0Wait (ms), Wall time (ms), Ref Wait Wall (ms), Total CTU time (ms)," "Stall Time (ms), Total frame time (ms), Avg WPP, Row Blocks"); +#if ENABLE_LIBVMAF + fprintf(csvfp, ", VMAF Frame Score"); +#endif } fprintf(csvfp, "\n"); } @@ -759,6 +793,9 @@ fputs(summaryCSVHeader, csvfp); if (param->csvLogLevel >= 2 || param->maxCLL || param->maxFALL) fputs("MaxCLL, MaxFALL,", csvfp); +#if ENABLE_LIBVMAF + fputs(" Aggregate VMAF Score,", csvfp); +#endif fputs(" Version\n", csvfp); } } @@ -868,6 +905,9 @@ frameStats->totalFrameTime); fprintf(param->csvfpt, " %.3lf, %d", frameStats->avgWPP, frameStats->countRowBlocks); +#if ENABLE_LIBVMAF + fprintf(param->csvfpt, ", %lf", frameStats->vmafFrameScore); +#endif } fprintf(param->csvfpt, "\n"); fflush(stderr); @@ -886,7 +926,11 @@ fputs(summaryCSVHeader, p->csvfpt); if (p->csvLogLevel >= 2 || p->maxCLL || p->maxFALL) fputs("MaxCLL, MaxFALL,", p->csvfpt); +#if ENABLE_LIBVMAF + fputs(" Aggregate VMAF score,", p->csvfpt); +#endif fputs(" Version\n",p->csvfpt); + } // CLI arguments or other if (argc) @@ -919,7 +963,6 @@ char buffer[200]; strftime(buffer, 128, "%c", timeinfo); fprintf(p->csvfpt, ", %s, ", buffer); - // elapsed time, fps, bitrate fprintf(p->csvfpt, "%.2f, %.2f, %.2f,", stats->elapsedEncodeTime, stats->encodedPictureCount / stats->elapsedEncodeTime, stats->bitrate); @@ -981,7 +1024,11 @@ fprintf(p->csvfpt, " -, -, -, -, -, -, -,"); if (p->csvLogLevel >= 2 || p->maxCLL || p->maxFALL) fprintf(p->csvfpt, " %-6u, %-6u,", stats->maxCLL, stats->maxFALL); +#if ENABLE_LIBVMAF + fprintf(p->csvfpt, " %lf,", stats->aggregateVmafScore); +#endif fprintf(p->csvfpt, " %s\n", api->version_str); + } } @@ -1072,4 +1119,318 @@ } } +#if ENABLE_LIBVMAF +/* Read y values of single frame for 8-bit input */ +int read_image_byte(FILE *file, float *buf, int width, int height, int stride) +{ + char *byte_ptr = (char *)buf; + unsigned char *tmp_buf = 0; + int i, j; + int ret = 1; + + if (width <= 0 || height <= 0) + { + goto fail_or_end; + } + + if (!(tmp_buf = (unsigned char*)malloc(width))) + { + goto fail_or_end; + } + + for (i = 0; i < height; ++i) + { + float *row_ptr = (float *)byte_ptr; + + if (fread(tmp_buf, 1, width, file) != (size_t)width) + { + goto fail_or_end; + } + + for (j = 0; j < width; ++j) + { + row_ptr[j] = tmp_buf[j]; + } + + byte_ptr += stride; + } + + ret = 0; + +fail_or_end: + free(tmp_buf); + return ret; +} +/* Read y values of single frame for 10-bit input */ +int read_image_word(FILE *file, float *buf, int width, int height, int stride) +{ + char *byte_ptr = (char *)buf; + unsigned short *tmp_buf = 0; + int i, j; + int ret = 1; + + if (width <= 0 || height <= 0) + { + goto fail_or_end; + } + + if (!(tmp_buf = (unsigned short*)malloc(width * 2))) // '*2' to accommodate words + { + goto fail_or_end; + } + + for (i = 0; i < height; ++i) + { + float *row_ptr = (float *)byte_ptr; + + if (fread(tmp_buf, 2, width, file) != (size_t)width) // '2' for word + { + goto fail_or_end; + } + + for (j = 0; j < width; ++j) + { + row_ptr[j] = tmp_buf[j] / 4.0; // '/4' to convert from 10 to 8-bit + } + + byte_ptr += stride; + } + + ret = 0; + +fail_or_end: + free(tmp_buf); + return ret; +} + +int read_frame(float *reference_data, float *distorted_data, float *temp_data, int stride_byte, void *s) +{ + x265_vmaf_data *user_data = (x265_vmaf_data *)s; + int ret; + + // read reference y + if (user_data->internalBitDepth == 8) + { + ret = read_image_byte(user_data->reference_file, reference_data, user_data->width, user_data->height, stride_byte); + } + else if (user_data->internalBitDepth == 10) + { + ret = read_image_word(user_data->reference_file, reference_data, user_data->width, user_data->height, stride_byte); + } + else + { + x265_log(NULL, X265_LOG_ERROR, "Invalid bitdepth\n"); + return 1; + } + if (ret) + { + if (feof(user_data->reference_file)) + { + ret = 2; // OK if end of file + } + return ret; + } + + // read distorted y + if (user_data->internalBitDepth == 8) + { + ret = read_image_byte(user_data->distorted_file, distorted_data, user_data->width, user_data->height, stride_byte); + } + else if (user_data->internalBitDepth == 10) + { + ret = read_image_word(user_data->distorted_file, distorted_data, user_data->width, user_data->height, stride_byte); + } + else + { + x265_log(NULL, X265_LOG_ERROR, "Invalid bitdepth\n"); + return 1; + } + if (ret) + { + if (feof(user_data->distorted_file)) + { + ret = 2; // OK if end of file + } + return ret; + } + + // reference skip u and v + if (user_data->internalBitDepth == 8) + { + if (fread(temp_data, 1, user_data->offset, user_data->reference_file) != (size_t)user_data->offset) + { + x265_log(NULL, X265_LOG_ERROR, "reference fread to skip u and v failed.\n"); + goto fail_or_end; + } + } + else if (user_data->internalBitDepth == 10) + { + if (fread(temp_data, 2, user_data->offset, user_data->reference_file) != (size_t)user_data->offset) + { + x265_log(NULL, X265_LOG_ERROR, "reference fread to skip u and v failed.\n"); + goto fail_or_end; + } + } + else + { + x265_log(NULL, X265_LOG_ERROR, "Invalid format\n"); + goto fail_or_end; + } + + // distorted skip u and v + if (user_data->internalBitDepth == 8) + { + if (fread(temp_data, 1, user_data->offset, user_data->distorted_file) != (size_t)user_data->offset) + { + x265_log(NULL, X265_LOG_ERROR, "distorted fread to skip u and v failed.\n"); + goto fail_or_end; + } + } + else if (user_data->internalBitDepth == 10) + { + if (fread(temp_data, 2, user_data->offset, user_data->distorted_file) != (size_t)user_data->offset) + { + x265_log(NULL, X265_LOG_ERROR, "distorted fread to skip u and v failed.\n"); + goto fail_or_end; + } + } + else + { + x265_log(NULL, X265_LOG_ERROR, "Invalid format\n"); + goto fail_or_end; + } + + +fail_or_end: + return ret; +} + +double x265_calculate_vmafscore(x265_param *param, x265_vmaf_data *data) +{ + double score; + + data->width = param->sourceWidth; + data->height = param->sourceHeight; + data->internalBitDepth = param->internalBitDepth; + + if (param->internalCsp == X265_CSP_I420) + { + if ((param->sourceWidth * param->sourceHeight) % 2 != 0) + x265_log(NULL, X265_LOG_ERROR, "Invalid file size\n"); + data->offset = param->sourceWidth * param->sourceHeight / 2; + } + else if (param->internalCsp == X265_CSP_I422) + data->offset = param->sourceWidth * param->sourceHeight; + else if (param->internalCsp == X265_CSP_I444) + data->offset = param->sourceWidth * param->sourceHeight * 2; + else + x265_log(NULL, X265_LOG_ERROR, "Invalid format\n"); + + compute_vmaf(&score, vcd->format, data->width, data->height, read_frame, data, vcd->model_path, vcd->log_path, vcd->log_fmt, vcd->disable_clip, vcd->disable_avx, vcd->enable_transform, vcd->phone_model, vcd->psnr, vcd->ssim, vcd->ms_ssim, vcd->pool); + + return score; +} + +int read_frame_10bit(float *reference_data, float *distorted_data, float *temp_data, int stride, void *s) +{ + x265_vmaf_framedata *user_data = (x265_vmaf_framedata *)s; + + PicYuv *reference_frame = (PicYuv *)user_data->reference_frame; + PicYuv *distorted_frame = (PicYuv *)user_data->distorted_frame; + + if(!user_data->frame_set) { + + int reference_stride = reference_frame->m_stride; + int distorted_stride = distorted_frame->m_stride; + + const uint16_t *reference_ptr = (const uint16_t *)reference_frame->m_picOrg[0]; + const uint16_t *distorted_ptr = (const uint16_t *)distorted_frame->m_picOrg[0]; + + temp_data = reference_data; + + int height = user_data->height; + int width = user_data->width; + + int i,j; + for (i = 0; i < height; i++) { + for ( j = 0; j < width; j++) { + temp_data[j] = ((float)reference_ptr[j] / 4.0); + } + reference_ptr += reference_stride; + temp_data += stride / sizeof(*temp_data); + } + + temp_data = distorted_data; + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + temp_data[j] = ((float)distorted_ptr[j] / 4.0); + } + distorted_ptr += distorted_stride; + temp_data += stride / sizeof(*temp_data); + } + + user_data->frame_set = 1; + return 0; + } + return 2; +} + +int read_frame_8bit(float *reference_data, float *distorted_data, float *temp_data, int stride, void *s) +{ + x265_vmaf_framedata *user_data = (x265_vmaf_framedata *)s; + + PicYuv *reference_frame = (PicYuv *)user_data->reference_frame; + PicYuv *distorted_frame = (PicYuv *)user_data->distorted_frame; + + if(!user_data->frame_set) { + + int reference_stride = reference_frame->m_stride; + int distorted_stride = distorted_frame->m_stride; + + const uint8_t *reference_ptr = (const uint8_t *)reference_frame->m_picOrg[0]; + const uint8_t *distorted_ptr = (const uint8_t *)distorted_frame->m_picOrg[0]; + + temp_data = reference_data; + + int height = user_data->height; + int width = user_data->width; + + int i,j; + for (i = 0; i < height; i++) { + for ( j = 0; j < width; j++) { + temp_data[j] = (float)reference_ptr[j]; + } + reference_ptr += reference_stride; + temp_data += stride / sizeof(*temp_data); + } + + temp_data = distorted_data; + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + temp_data[j] = (float)distorted_ptr[j]; + } + distorted_ptr += distorted_stride; + temp_data += stride / sizeof(*temp_data); + } + + user_data->frame_set = 1; + return 0; + } + return 2; +} + +double x265_calculate_vmaf_framelevelscore(x265_vmaf_framedata *vmafframedata) +{ + double score; + int (*read_frame)(float *reference_data, float *distorted_data, float *temp_data, + int stride, void *s); + if (vmafframedata->internalBitDepth == 8) + read_frame = read_frame_8bit; + else + read_frame = read_frame_10bit; + compute_vmaf(&score, vcd->format, vmafframedata->width, vmafframedata->height, read_frame, vmafframedata, vcd->model_path, vcd->log_path, vcd->log_fmt, vcd->disable_clip, vcd->disable_avx, vcd->enable_transform, vcd->phone_model, vcd->psnr, vcd->ssim, vcd->ms_ssim, vcd->pool); + + return score; +} +#endif } /* end namespace or extern "C" */ diff -r 04a337abd70d -r 27e3b161cd8b source/encoder/encoder.cpp --- a/source/encoder/encoder.cpp Thu Apr 12 15:10:59 2018 +0530 +++ b/source/encoder/encoder.cpp Tue Feb 13 18:54:50 2018 +0530 @@ -2127,6 +2127,9 @@ #define ELAPSED_MSEC(start, end) (((double)(end) - (start)) / 1000) if (m_param->csvLogLevel >= 2) { +#if ENABLE_LIBVMAF + frameStats->vmafFrameScore = curFrame->m_fencPic->m_vmafScore; +#endif frameStats->decideWaitTime = ELAPSED_MSEC(0, curEncoder->m_slicetypeWaitTime); frameStats->row0WaitTime = ELAPSED_MSEC(curEncoder->m_startCompressTime, curEncoder->m_row0WaitTime); frameStats->wallTime = ELAPSED_MSEC(curEncoder->m_row0WaitTime, curEncoder->m_endCompressTime); diff -r 04a337abd70d -r 27e3b161cd8b source/encoder/frameencoder.cpp --- a/source/encoder/frameencoder.cpp Thu Apr 12 15:10:59 2018 +0530 +++ b/source/encoder/frameencoder.cpp Tue Feb 13 18:54:50 2018 +0530 @@ -864,6 +864,9 @@ m_frameFilter.processRow(i - m_filterRowDelay); } } +#if ENABLE_LIBVMAF + vmafFrameLevelScore(); +#endif if (m_param->maxSlices > 1) { @@ -932,7 +935,7 @@ updateChecksum(reconPic->m_picOrg[1], m_checksum[1], height, width, stride, 0, cuHeight); updateChecksum(reconPic->m_picOrg[2], m_checksum[2], height, width, stride, 0, cuHeight); } - } + } } // end of (m_param->maxSlices > 1) if (m_param->rc.bStatWrite) @@ -1189,7 +1192,7 @@ m_cuStats.accumulate(m_tld[i].analysis.m_stats[m_jpId], *m_param); #endif - m_endFrameTime = x265_mdate(); + m_endFrameTime = x265_mdate(); } void FrameEncoder::encodeSlice(uint32_t sliceAddr) @@ -2058,11 +2061,36 @@ m_nr->nrOffsetDenoise[cat][0] = 0; } } +#if ENABLE_LIBVMAF +void FrameEncoder::vmafFrameLevelScore() +{ + PicYuv *fenc = m_frame->m_fencPic; + PicYuv *recon = m_frame->m_reconPic; + + x265_vmaf_framedata *vmafframedata = (x265_vmaf_framedata*)x265_malloc(sizeof(x265_vmaf_framedata)); + if (!vmafframedata) + { + x265_log(NULL, X265_LOG_ERROR, "vmaf frame data alloc failed\n"); + } + + vmafframedata->height = fenc->m_picHeight; + vmafframedata->width = fenc->m_picWidth; + vmafframedata->frame_set = 0; + vmafframedata->internalBitDepth = m_param->internalBitDepth; + vmafframedata->reference_frame = fenc; + vmafframedata->distorted_frame = recon; + + fenc->m_vmafScore = x265_calculate_vmaf_framelevelscore(vmafframedata); + + if (vmafframedata) + x265_free(vmafframedata); +} +#endif Frame *FrameEncoder::getEncodedPicture(NALList& output) { if (m_frame) - { + { /* block here until worker thread completes */ m_done.wait(); diff -r 04a337abd70d -r 27e3b161cd8b source/encoder/frameencoder.h --- a/source/encoder/frameencoder.h Thu Apr 12 15:10:59 2018 +0530 +++ b/source/encoder/frameencoder.h Tue Feb 13 18:54:50 2018 +0530 @@ -240,6 +240,9 @@ void enqueueRowFilter(int row) { WaveFront::enqueueRow(row * 2 + 1); } void enableRowEncoder(int row) { WaveFront::enableRow(row * 2 + 0); } void enableRowFilter(int row) { WaveFront::enableRow(row * 2 + 1); } +#if ENABLE_LIBVMAF + void vmafFrameLevelScore(); +#endif }; } diff -r 04a337abd70d -r 27e3b161cd8b source/x265.cpp --- a/source/x265.cpp Thu Apr 12 15:10:59 2018 +0530 +++ b/source/x265.cpp Tue Feb 13 18:54:50 2018 +0530 @@ -75,6 +75,7 @@ const char* reconPlayCmd; const x265_api* api; x265_param* param; + x265_vmaf_data* vmafData; bool bProgress; bool bForceY4m; bool bDither; @@ -96,6 +97,7 @@ reconPlayCmd = NULL; api = NULL; param = NULL; + vmafData = NULL; framesToBeEncoded = seek = 0; totalbytes = 0; bProgress = true; @@ -216,6 +218,14 @@ x265_log(NULL, X265_LOG_ERROR, "param alloc failed\n"); return true; } +#if ENABLE_LIBVMAF + vmafData = (x265_vmaf_data*)x265_malloc(sizeof(x265_vmaf_data)); + if(!vmafData) + { + x265_log(NULL, X265_LOG_ERROR, "vmaf data alloc failed\n"); + return true; + } +#endif if (api->param_default_preset(param, preset, tune) < 0) { @@ -363,6 +373,7 @@ info.frameCount = 0; getParamAspectRatio(param, info.sarWidth, info.sarHeight); + this->input = InputFile::open(info, this->bForceY4m); if (!this->input || this->input->isFail()) { @@ -439,7 +450,30 @@ param->sourceWidth, param->sourceHeight, param->fpsNum, param->fpsDenom, x265_source_csp_names[param->internalCsp]); } +#if ENABLE_LIBVMAF + if (!reconfn) + { + x265_log(param, X265_LOG_ERROR, "recon file must be specified to get VMAF score, try --help for help\n"); + return true; + } + const char *str = strrchr(info.filename, '.'); + if (!strcmp(str, ".y4m")) + { + x265_log(param, X265_LOG_ERROR, "VMAF supports YUV file format only.\n"); + return true; + } + if(param->internalCsp == X265_CSP_I420 || param->internalCsp == X265_CSP_I422 || param->internalCsp == X265_CSP_I444) + { + vmafData->reference_file = x265_fopen(inputfn, "rb"); + vmafData->distorted_file = x265_fopen(reconfn, "rb"); + } + else + { + x265_log(param, X265_LOG_ERROR, "VMAF will support only yuv420p, yu422p, yu444p, yuv420p10le, yuv422p10le, yuv444p10le formats.\n"); + return true; + } +#endif this->output = OutputFile::open(outputfn, info); if (this->output->isFail()) { @@ -555,7 +589,9 @@ x265_param* param = cliopt.param; const x265_api* api = cliopt.api; - +#if ENABLE_LIBVMAF + x265_vmaf_data* vmafdata = cliopt.vmafData; +#endif /* This allows muxers to modify bitstream format */ cliopt.output->setParam(param); @@ -712,7 +748,7 @@ if (!numEncoded) break; } - + /* clear progress report */ if (cliopt.bProgress) fprintf(stderr, "%*s\r", 80, " "); @@ -723,7 +759,11 @@ api->encoder_get_stats(encoder, &stats, sizeof(stats)); if (param->csvfn && !b_ctrl_c) +#if ENABLE_LIBVMAF + api->vmaf_encoder_log(encoder, argc, argv, param, vmafdata); +#else api->encoder_log(encoder, argc, argv); +#endif api->encoder_close(encoder); int64_t second_largest_pts = 0; diff -r 04a337abd70d -r 27e3b161cd8b source/x265.h --- a/source/x265.h Thu Apr 12 15:10:59 2018 +0530 +++ b/source/x265.h Tue Feb 13 18:54:50 2018 +0530 @@ -209,6 +209,7 @@ x265_cu_stats cuStats; x265_pu_stats puStats; double totalFrameTime; + double vmafFrameScore; } x265_frame_stats; typedef struct x265_ctu_info_t @@ -536,6 +537,7 @@ double elapsedEncodeTime; /* wall time since encoder was opened */ double elapsedVideoTime; /* encoded picture count / frame rate */ double bitrate; /* accBits / elapsed video time */ + double aggregateVmafScore; /* aggregate VMAF score for input video*/ uint64_t accBits; /* total bits output thus far */ uint32_t encodedPictureCount; /* number of output pictures thus far */ uint32_t totalWPFrames; /* number of uni-directional weighted frames used */ @@ -572,6 +574,47 @@ float bitrateFactor; } x265_zone; +/* data to calculate aggregate VMAF score */ +typedef struct x265_vmaf_data +{ + int width; + int height; + size_t offset; + int internalBitDepth; + FILE *reference_file; /* FILE pointer for input file */ + FILE *distorted_file; /* FILE pointer for recon file generated*/ +}x265_vmaf_data; + +/* data to calculate frame level VMAF score */ +typedef struct x265_vmaf_framedata +{ + int width; + int height; + int frame_set; + int internalBitDepth; + void *reference_frame; /* points to fenc of particular frame */ + void *distorted_frame; /* points to recon of particular frame */ +}x265_vmaf_framedata; + +/* common data needed to calculate both frame level and video level VMAF scores */ +typedef struct x265_vmaf_commondata +{ + char *format; + char *model_path; + char *log_path; + char *log_fmt; + int disable_clip; + int disable_avx; + int enable_transform; + int phone_model; + int psnr; + int ssim; + int ms_ssim; + char *pool; +}x265_vmaf_commondata; + +static const x265_vmaf_commondata vcd[] = {NULL, (char *)"/usr/local/share/model/vmaf_v0.6.1.pkl", NULL, NULL, 0, 0, 0, 0, 0, 0, 0, NULL}; + /* x265 input parameters * * For version safety you may use x265_param_alloc/free() to manage the @@ -1811,6 +1854,22 @@ /* In-place downshift from a bit-depth greater than 8 to a bit-depth of 8, using * the residual bits to dither each row. */ void x265_dither_image(x265_picture *, int picWidth, int picHeight, int16_t *errorBuf, int bitDepth); +#if ENABLE_LIBVMAF +/* x265_calculate_vmafScore: + * returns VMAF score for the input video. + * This api must be called only after encoding was done. */ +double x265_calculate_vmafscore(x265_param*, x265_vmaf_data*); + +/* x265_calculate_vmaf_framelevelscore: + * returns VMAF score for each frame in a given input video. */ +double x265_calculate_vmaf_framelevelscore(x265_vmaf_framedata*); +/* x265_vmaf_encoder_log: + * write a line to the configured CSV file. If a CSV filename was not + * configured, or file open failed, this function will perform no write. + * This api will be called only when ENABLE_LIBVMAF cmake option is set */ +void x265_vmaf_encoder_log(x265_encoder *encoder, int argc, char **argv, x265_param*, x265_vmaf_data*); + +#endif #define X265_MAJOR_VERSION 1 @@ -1864,6 +1923,11 @@ void (*csvlog_encode)(const x265_param*, const x265_stats *, int, int, int, char**); void (*dither_image)(x265_picture*, int, int, int16_t*, int); int (*set_analysis_data)(x265_encoder *encoder, x265_analysis_data *analysis_data, int poc, uint32_t cuBytes); +#if ENABLE_LIBVMAF + double (*calculate_vmafscore)(x265_param *, x265_vmaf_data *); + double (*calculate_vmaf_framelevelscore)(x265_vmaf_framedata *); + void (*vmaf_encoder_log)(x265_encoder*, int, char**, x265_param *, x265_vmaf_data *); +#endif /* add new pointers to the end, or increment X265_MAJOR_VERSION */ } x265_api;
# HG changeset patch # User IndumathiR<induma...@multicorewareinc.com> # Date 1518528290 -19800 # Tue Feb 13 18:54:50 2018 +0530 # Node ID 27e3b161cd8b59ad1cae67a96e11e3e0506d5017 # Parent 04a337abd70de269cef7d9655365f3a3ebde02aa Add VMAF suppport to report per frame and aggregate VMAF score diff -r 04a337abd70d -r 27e3b161cd8b doc/reST/api.rst --- a/doc/reST/api.rst Thu Apr 12 15:10:59 2018 +0530 +++ b/doc/reST/api.rst Tue Feb 13 18:54:50 2018 +0530 @@ -398,7 +398,30 @@ * release library static allocations, reset configured CTU size */ void x265_cleanup(void); +VMAF (Video Multi-Method Assessment Fusion) +========================================== +If you set the ENABLE_LIBVMAF cmake option to ON, then x265 will report per frame +and aggregate VMAF score for the given input and dump the scores in csv file. +The user also need to specify the :option:`--recon` in command line to get the VMAF scores. + + /* x265_calculate_vmafScore: + * returns VMAF score for the input video. + * This api must be called only after encoding was done. */ + double x265_calculate_vmafscore(x265_param*, x265_vmaf_data*); + + /* x265_calculate_vmaf_framelevelscore: + * returns VMAF score for each frame in a given input video. */ + double x265_calculate_vmaf_framelevelscore(x265_vmaf_framedata*); + +.. Note:: + + When setting ENABLE_LIBVMAF cmake option to ON, it is recommended to + also set ENABLE_SHARED to OFF to prevent build problems. + We only need the static library from these builds. + + Binaries build with windows will not have VMAF support. + Multi-library Interface ======================= diff -r 04a337abd70d -r 27e3b161cd8b source/CMakeLists.txt --- a/source/CMakeLists.txt Thu Apr 12 15:10:59 2018 +0530 +++ b/source/CMakeLists.txt Tue Feb 13 18:54:50 2018 +0530 @@ -29,7 +29,7 @@ option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF) mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD) # X265_BUILD must be incremented each time the public API is changed -set(X265_BUILD 157) +set(X265_BUILD 158) configure_file("${PROJECT_SOURCE_DIR}/x265.def.in" "${PROJECT_BINARY_DIR}/x265.def") configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in" @@ -109,6 +109,11 @@ if(NO_ATOMICS) add_definitions(-DNO_ATOMICS=1) endif(NO_ATOMICS) + find_library(VMAF vmaf) + option(ENABLE_LIBVMAF "Enable VMAF" OFF) + if(ENABLE_LIBVMAF) + add_definitions(-DENABLE_LIBVMAF) + endif() endif(UNIX) if(X64 AND NOT WIN32) @@ -536,6 +541,9 @@ if(EXTRA_LIB) target_link_libraries(x265-static ${EXTRA_LIB}) endif() +if(ENABLE_LIBVMAF) + target_link_libraries(x265-static ${VMAF}) +endif() install(TARGETS x265-static LIBRARY DESTINATION ${LIB_INSTALL_DIR} ARCHIVE DESTINATION ${LIB_INSTALL_DIR}) diff -r 04a337abd70d -r 27e3b161cd8b source/common/picyuv.h --- a/source/common/picyuv.h Thu Apr 12 15:10:59 2018 +0530 +++ b/source/common/picyuv.h Tue Feb 13 18:54:50 2018 +0530 @@ -72,6 +72,7 @@ pixel m_maxChromaVLevel; pixel m_minChromaVLevel; double m_avgChromaVLevel; + double m_vmafScore; x265_param *m_param; PicYuv(); diff -r 04a337abd70d -r 27e3b161cd8b source/encoder/api.cpp --- a/source/encoder/api.cpp Thu Apr 12 15:10:59 2018 +0530 +++ b/source/encoder/api.cpp Tue Feb 13 18:54:50 2018 +0530 @@ -31,6 +31,10 @@ #include "nal.h" #include "bitcost.h" +#if ENABLE_LIBVMAF +#include "libvmaf.h" +#endif + /* multilib namespace reflectors */ #if LINKED_8BIT namespace x265_8bit { @@ -302,13 +306,34 @@ encoder->fetchStats(outputStats, statsSizeBytes); } } +#if ENABLE_LIBVMAF +void x265_vmaf_encoder_log(x265_encoder* enc, int argc, char **argv, x265_param *param, x265_vmaf_data *vmafdata) +{ + if (enc) + { + Encoder *encoder = static_cast<Encoder*>(enc); + x265_stats stats; + stats.aggregateVmafScore = x265_calculate_vmafscore(param, vmafdata); + if(vmafdata->reference_file) + fclose(vmafdata->reference_file); + if(vmafdata->distorted_file) + fclose(vmafdata->distorted_file); + if(vmafdata) + x265_free(vmafdata); + encoder->fetchStats(&stats, sizeof(stats)); + int padx = encoder->m_sps.conformanceWindow.rightOffset; + int pady = encoder->m_sps.conformanceWindow.bottomOffset; + x265_csvlog_encode(encoder->m_param, &stats, padx, pady, argc, argv); + } +} +#endif void x265_encoder_log(x265_encoder* enc, int argc, char **argv) { if (enc) { Encoder *encoder = static_cast<Encoder*>(enc); - x265_stats stats; + x265_stats stats; encoder->fetchStats(&stats, sizeof(stats)); int padx = encoder->m_sps.conformanceWindow.rightOffset; int pady = encoder->m_sps.conformanceWindow.bottomOffset; @@ -457,7 +482,13 @@ &x265_csvlog_frame, &x265_csvlog_encode, &x265_dither_image, - &x265_set_analysis_data + &x265_set_analysis_data, +#if ENABLE_LIBVMAF + &x265_calculate_vmafscore, + &x265_calculate_vmaf_framelevelscore, + &x265_vmaf_encoder_log +#endif + }; typedef const x265_api* (*api_get_func)(int bitDepth); @@ -751,6 +782,9 @@ /* detailed performance statistics */ fprintf(csvfp, ", DecideWait (ms), Row0Wait (ms), Wall time (ms), Ref Wait Wall (ms), Total CTU time (ms)," "Stall Time (ms), Total frame time (ms), Avg WPP, Row Blocks"); +#if ENABLE_LIBVMAF + fprintf(csvfp, ", VMAF Frame Score"); +#endif } fprintf(csvfp, "\n"); } @@ -759,6 +793,9 @@ fputs(summaryCSVHeader, csvfp); if (param->csvLogLevel >= 2 || param->maxCLL || param->maxFALL) fputs("MaxCLL, MaxFALL,", csvfp); +#if ENABLE_LIBVMAF + fputs(" Aggregate VMAF Score,", csvfp); +#endif fputs(" Version\n", csvfp); } } @@ -868,6 +905,9 @@ frameStats->totalFrameTime); fprintf(param->csvfpt, " %.3lf, %d", frameStats->avgWPP, frameStats->countRowBlocks); +#if ENABLE_LIBVMAF + fprintf(param->csvfpt, ", %lf", frameStats->vmafFrameScore); +#endif } fprintf(param->csvfpt, "\n"); fflush(stderr); @@ -886,7 +926,11 @@ fputs(summaryCSVHeader, p->csvfpt); if (p->csvLogLevel >= 2 || p->maxCLL || p->maxFALL) fputs("MaxCLL, MaxFALL,", p->csvfpt); +#if ENABLE_LIBVMAF + fputs(" Aggregate VMAF score,", p->csvfpt); +#endif fputs(" Version\n",p->csvfpt); + } // CLI arguments or other if (argc) @@ -919,7 +963,6 @@ char buffer[200]; strftime(buffer, 128, "%c", timeinfo); fprintf(p->csvfpt, ", %s, ", buffer); - // elapsed time, fps, bitrate fprintf(p->csvfpt, "%.2f, %.2f, %.2f,", stats->elapsedEncodeTime, stats->encodedPictureCount / stats->elapsedEncodeTime, stats->bitrate); @@ -981,7 +1024,11 @@ fprintf(p->csvfpt, " -, -, -, -, -, -, -,"); if (p->csvLogLevel >= 2 || p->maxCLL || p->maxFALL) fprintf(p->csvfpt, " %-6u, %-6u,", stats->maxCLL, stats->maxFALL); +#if ENABLE_LIBVMAF + fprintf(p->csvfpt, " %lf,", stats->aggregateVmafScore); +#endif fprintf(p->csvfpt, " %s\n", api->version_str); + } } @@ -1072,4 +1119,318 @@ } } +#if ENABLE_LIBVMAF +/* Read y values of single frame for 8-bit input */ +int read_image_byte(FILE *file, float *buf, int width, int height, int stride) +{ + char *byte_ptr = (char *)buf; + unsigned char *tmp_buf = 0; + int i, j; + int ret = 1; + + if (width <= 0 || height <= 0) + { + goto fail_or_end; + } + + if (!(tmp_buf = (unsigned char*)malloc(width))) + { + goto fail_or_end; + } + + for (i = 0; i < height; ++i) + { + float *row_ptr = (float *)byte_ptr; + + if (fread(tmp_buf, 1, width, file) != (size_t)width) + { + goto fail_or_end; + } + + for (j = 0; j < width; ++j) + { + row_ptr[j] = tmp_buf[j]; + } + + byte_ptr += stride; + } + + ret = 0; + +fail_or_end: + free(tmp_buf); + return ret; +} +/* Read y values of single frame for 10-bit input */ +int read_image_word(FILE *file, float *buf, int width, int height, int stride) +{ + char *byte_ptr = (char *)buf; + unsigned short *tmp_buf = 0; + int i, j; + int ret = 1; + + if (width <= 0 || height <= 0) + { + goto fail_or_end; + } + + if (!(tmp_buf = (unsigned short*)malloc(width * 2))) // '*2' to accommodate words + { + goto fail_or_end; + } + + for (i = 0; i < height; ++i) + { + float *row_ptr = (float *)byte_ptr; + + if (fread(tmp_buf, 2, width, file) != (size_t)width) // '2' for word + { + goto fail_or_end; + } + + for (j = 0; j < width; ++j) + { + row_ptr[j] = tmp_buf[j] / 4.0; // '/4' to convert from 10 to 8-bit + } + + byte_ptr += stride; + } + + ret = 0; + +fail_or_end: + free(tmp_buf); + return ret; +} + +int read_frame(float *reference_data, float *distorted_data, float *temp_data, int stride_byte, void *s) +{ + x265_vmaf_data *user_data = (x265_vmaf_data *)s; + int ret; + + // read reference y + if (user_data->internalBitDepth == 8) + { + ret = read_image_byte(user_data->reference_file, reference_data, user_data->width, user_data->height, stride_byte); + } + else if (user_data->internalBitDepth == 10) + { + ret = read_image_word(user_data->reference_file, reference_data, user_data->width, user_data->height, stride_byte); + } + else + { + x265_log(NULL, X265_LOG_ERROR, "Invalid bitdepth\n"); + return 1; + } + if (ret) + { + if (feof(user_data->reference_file)) + { + ret = 2; // OK if end of file + } + return ret; + } + + // read distorted y + if (user_data->internalBitDepth == 8) + { + ret = read_image_byte(user_data->distorted_file, distorted_data, user_data->width, user_data->height, stride_byte); + } + else if (user_data->internalBitDepth == 10) + { + ret = read_image_word(user_data->distorted_file, distorted_data, user_data->width, user_data->height, stride_byte); + } + else + { + x265_log(NULL, X265_LOG_ERROR, "Invalid bitdepth\n"); + return 1; + } + if (ret) + { + if (feof(user_data->distorted_file)) + { + ret = 2; // OK if end of file + } + return ret; + } + + // reference skip u and v + if (user_data->internalBitDepth == 8) + { + if (fread(temp_data, 1, user_data->offset, user_data->reference_file) != (size_t)user_data->offset) + { + x265_log(NULL, X265_LOG_ERROR, "reference fread to skip u and v failed.\n"); + goto fail_or_end; + } + } + else if (user_data->internalBitDepth == 10) + { + if (fread(temp_data, 2, user_data->offset, user_data->reference_file) != (size_t)user_data->offset) + { + x265_log(NULL, X265_LOG_ERROR, "reference fread to skip u and v failed.\n"); + goto fail_or_end; + } + } + else + { + x265_log(NULL, X265_LOG_ERROR, "Invalid format\n"); + goto fail_or_end; + } + + // distorted skip u and v + if (user_data->internalBitDepth == 8) + { + if (fread(temp_data, 1, user_data->offset, user_data->distorted_file) != (size_t)user_data->offset) + { + x265_log(NULL, X265_LOG_ERROR, "distorted fread to skip u and v failed.\n"); + goto fail_or_end; + } + } + else if (user_data->internalBitDepth == 10) + { + if (fread(temp_data, 2, user_data->offset, user_data->distorted_file) != (size_t)user_data->offset) + { + x265_log(NULL, X265_LOG_ERROR, "distorted fread to skip u and v failed.\n"); + goto fail_or_end; + } + } + else + { + x265_log(NULL, X265_LOG_ERROR, "Invalid format\n"); + goto fail_or_end; + } + + +fail_or_end: + return ret; +} + +double x265_calculate_vmafscore(x265_param *param, x265_vmaf_data *data) +{ + double score; + + data->width = param->sourceWidth; + data->height = param->sourceHeight; + data->internalBitDepth = param->internalBitDepth; + + if (param->internalCsp == X265_CSP_I420) + { + if ((param->sourceWidth * param->sourceHeight) % 2 != 0) + x265_log(NULL, X265_LOG_ERROR, "Invalid file size\n"); + data->offset = param->sourceWidth * param->sourceHeight / 2; + } + else if (param->internalCsp == X265_CSP_I422) + data->offset = param->sourceWidth * param->sourceHeight; + else if (param->internalCsp == X265_CSP_I444) + data->offset = param->sourceWidth * param->sourceHeight * 2; + else + x265_log(NULL, X265_LOG_ERROR, "Invalid format\n"); + + compute_vmaf(&score, vcd->format, data->width, data->height, read_frame, data, vcd->model_path, vcd->log_path, vcd->log_fmt, vcd->disable_clip, vcd->disable_avx, vcd->enable_transform, vcd->phone_model, vcd->psnr, vcd->ssim, vcd->ms_ssim, vcd->pool); + + return score; +} + +int read_frame_10bit(float *reference_data, float *distorted_data, float *temp_data, int stride, void *s) +{ + x265_vmaf_framedata *user_data = (x265_vmaf_framedata *)s; + + PicYuv *reference_frame = (PicYuv *)user_data->reference_frame; + PicYuv *distorted_frame = (PicYuv *)user_data->distorted_frame; + + if(!user_data->frame_set) { + + int reference_stride = reference_frame->m_stride; + int distorted_stride = distorted_frame->m_stride; + + const uint16_t *reference_ptr = (const uint16_t *)reference_frame->m_picOrg[0]; + const uint16_t *distorted_ptr = (const uint16_t *)distorted_frame->m_picOrg[0]; + + temp_data = reference_data; + + int height = user_data->height; + int width = user_data->width; + + int i,j; + for (i = 0; i < height; i++) { + for ( j = 0; j < width; j++) { + temp_data[j] = ((float)reference_ptr[j] / 4.0); + } + reference_ptr += reference_stride; + temp_data += stride / sizeof(*temp_data); + } + + temp_data = distorted_data; + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + temp_data[j] = ((float)distorted_ptr[j] / 4.0); + } + distorted_ptr += distorted_stride; + temp_data += stride / sizeof(*temp_data); + } + + user_data->frame_set = 1; + return 0; + } + return 2; +} + +int read_frame_8bit(float *reference_data, float *distorted_data, float *temp_data, int stride, void *s) +{ + x265_vmaf_framedata *user_data = (x265_vmaf_framedata *)s; + + PicYuv *reference_frame = (PicYuv *)user_data->reference_frame; + PicYuv *distorted_frame = (PicYuv *)user_data->distorted_frame; + + if(!user_data->frame_set) { + + int reference_stride = reference_frame->m_stride; + int distorted_stride = distorted_frame->m_stride; + + const uint8_t *reference_ptr = (const uint8_t *)reference_frame->m_picOrg[0]; + const uint8_t *distorted_ptr = (const uint8_t *)distorted_frame->m_picOrg[0]; + + temp_data = reference_data; + + int height = user_data->height; + int width = user_data->width; + + int i,j; + for (i = 0; i < height; i++) { + for ( j = 0; j < width; j++) { + temp_data[j] = (float)reference_ptr[j]; + } + reference_ptr += reference_stride; + temp_data += stride / sizeof(*temp_data); + } + + temp_data = distorted_data; + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + temp_data[j] = (float)distorted_ptr[j]; + } + distorted_ptr += distorted_stride; + temp_data += stride / sizeof(*temp_data); + } + + user_data->frame_set = 1; + return 0; + } + return 2; +} + +double x265_calculate_vmaf_framelevelscore(x265_vmaf_framedata *vmafframedata) +{ + double score; + int (*read_frame)(float *reference_data, float *distorted_data, float *temp_data, + int stride, void *s); + if (vmafframedata->internalBitDepth == 8) + read_frame = read_frame_8bit; + else + read_frame = read_frame_10bit; + compute_vmaf(&score, vcd->format, vmafframedata->width, vmafframedata->height, read_frame, vmafframedata, vcd->model_path, vcd->log_path, vcd->log_fmt, vcd->disable_clip, vcd->disable_avx, vcd->enable_transform, vcd->phone_model, vcd->psnr, vcd->ssim, vcd->ms_ssim, vcd->pool); + + return score; +} +#endif } /* end namespace or extern "C" */ diff -r 04a337abd70d -r 27e3b161cd8b source/encoder/encoder.cpp --- a/source/encoder/encoder.cpp Thu Apr 12 15:10:59 2018 +0530 +++ b/source/encoder/encoder.cpp Tue Feb 13 18:54:50 2018 +0530 @@ -2127,6 +2127,9 @@ #define ELAPSED_MSEC(start, end) (((double)(end) - (start)) / 1000) if (m_param->csvLogLevel >= 2) { +#if ENABLE_LIBVMAF + frameStats->vmafFrameScore = curFrame->m_fencPic->m_vmafScore; +#endif frameStats->decideWaitTime = ELAPSED_MSEC(0, curEncoder->m_slicetypeWaitTime); frameStats->row0WaitTime = ELAPSED_MSEC(curEncoder->m_startCompressTime, curEncoder->m_row0WaitTime); frameStats->wallTime = ELAPSED_MSEC(curEncoder->m_row0WaitTime, curEncoder->m_endCompressTime); diff -r 04a337abd70d -r 27e3b161cd8b source/encoder/frameencoder.cpp --- a/source/encoder/frameencoder.cpp Thu Apr 12 15:10:59 2018 +0530 +++ b/source/encoder/frameencoder.cpp Tue Feb 13 18:54:50 2018 +0530 @@ -864,6 +864,9 @@ m_frameFilter.processRow(i - m_filterRowDelay); } } +#if ENABLE_LIBVMAF + vmafFrameLevelScore(); +#endif if (m_param->maxSlices > 1) { @@ -932,7 +935,7 @@ updateChecksum(reconPic->m_picOrg[1], m_checksum[1], height, width, stride, 0, cuHeight); updateChecksum(reconPic->m_picOrg[2], m_checksum[2], height, width, stride, 0, cuHeight); } - } + } } // end of (m_param->maxSlices > 1) if (m_param->rc.bStatWrite) @@ -1189,7 +1192,7 @@ m_cuStats.accumulate(m_tld[i].analysis.m_stats[m_jpId], *m_param); #endif - m_endFrameTime = x265_mdate(); + m_endFrameTime = x265_mdate(); } void FrameEncoder::encodeSlice(uint32_t sliceAddr) @@ -2058,11 +2061,36 @@ m_nr->nrOffsetDenoise[cat][0] = 0; } } +#if ENABLE_LIBVMAF +void FrameEncoder::vmafFrameLevelScore() +{ + PicYuv *fenc = m_frame->m_fencPic; + PicYuv *recon = m_frame->m_reconPic; + + x265_vmaf_framedata *vmafframedata = (x265_vmaf_framedata*)x265_malloc(sizeof(x265_vmaf_framedata)); + if (!vmafframedata) + { + x265_log(NULL, X265_LOG_ERROR, "vmaf frame data alloc failed\n"); + } + + vmafframedata->height = fenc->m_picHeight; + vmafframedata->width = fenc->m_picWidth; + vmafframedata->frame_set = 0; + vmafframedata->internalBitDepth = m_param->internalBitDepth; + vmafframedata->reference_frame = fenc; + vmafframedata->distorted_frame = recon; + + fenc->m_vmafScore = x265_calculate_vmaf_framelevelscore(vmafframedata); + + if (vmafframedata) + x265_free(vmafframedata); +} +#endif Frame *FrameEncoder::getEncodedPicture(NALList& output) { if (m_frame) - { + { /* block here until worker thread completes */ m_done.wait(); diff -r 04a337abd70d -r 27e3b161cd8b source/encoder/frameencoder.h --- a/source/encoder/frameencoder.h Thu Apr 12 15:10:59 2018 +0530 +++ b/source/encoder/frameencoder.h Tue Feb 13 18:54:50 2018 +0530 @@ -240,6 +240,9 @@ void enqueueRowFilter(int row) { WaveFront::enqueueRow(row * 2 + 1); } void enableRowEncoder(int row) { WaveFront::enableRow(row * 2 + 0); } void enableRowFilter(int row) { WaveFront::enableRow(row * 2 + 1); } +#if ENABLE_LIBVMAF + void vmafFrameLevelScore(); +#endif }; } diff -r 04a337abd70d -r 27e3b161cd8b source/x265.cpp --- a/source/x265.cpp Thu Apr 12 15:10:59 2018 +0530 +++ b/source/x265.cpp Tue Feb 13 18:54:50 2018 +0530 @@ -75,6 +75,7 @@ const char* reconPlayCmd; const x265_api* api; x265_param* param; + x265_vmaf_data* vmafData; bool bProgress; bool bForceY4m; bool bDither; @@ -96,6 +97,7 @@ reconPlayCmd = NULL; api = NULL; param = NULL; + vmafData = NULL; framesToBeEncoded = seek = 0; totalbytes = 0; bProgress = true; @@ -216,6 +218,14 @@ x265_log(NULL, X265_LOG_ERROR, "param alloc failed\n"); return true; } +#if ENABLE_LIBVMAF + vmafData = (x265_vmaf_data*)x265_malloc(sizeof(x265_vmaf_data)); + if(!vmafData) + { + x265_log(NULL, X265_LOG_ERROR, "vmaf data alloc failed\n"); + return true; + } +#endif if (api->param_default_preset(param, preset, tune) < 0) { @@ -363,6 +373,7 @@ info.frameCount = 0; getParamAspectRatio(param, info.sarWidth, info.sarHeight); + this->input = InputFile::open(info, this->bForceY4m); if (!this->input || this->input->isFail()) { @@ -439,7 +450,30 @@ param->sourceWidth, param->sourceHeight, param->fpsNum, param->fpsDenom, x265_source_csp_names[param->internalCsp]); } +#if ENABLE_LIBVMAF + if (!reconfn) + { + x265_log(param, X265_LOG_ERROR, "recon file must be specified to get VMAF score, try --help for help\n"); + return true; + } + const char *str = strrchr(info.filename, '.'); + if (!strcmp(str, ".y4m")) + { + x265_log(param, X265_LOG_ERROR, "VMAF supports YUV file format only.\n"); + return true; + } + if(param->internalCsp == X265_CSP_I420 || param->internalCsp == X265_CSP_I422 || param->internalCsp == X265_CSP_I444) + { + vmafData->reference_file = x265_fopen(inputfn, "rb"); + vmafData->distorted_file = x265_fopen(reconfn, "rb"); + } + else + { + x265_log(param, X265_LOG_ERROR, "VMAF will support only yuv420p, yu422p, yu444p, yuv420p10le, yuv422p10le, yuv444p10le formats.\n"); + return true; + } +#endif this->output = OutputFile::open(outputfn, info); if (this->output->isFail()) { @@ -555,7 +589,9 @@ x265_param* param = cliopt.param; const x265_api* api = cliopt.api; - +#if ENABLE_LIBVMAF + x265_vmaf_data* vmafdata = cliopt.vmafData; +#endif /* This allows muxers to modify bitstream format */ cliopt.output->setParam(param); @@ -712,7 +748,7 @@ if (!numEncoded) break; } - + /* clear progress report */ if (cliopt.bProgress) fprintf(stderr, "%*s\r", 80, " "); @@ -723,7 +759,11 @@ api->encoder_get_stats(encoder, &stats, sizeof(stats)); if (param->csvfn && !b_ctrl_c) +#if ENABLE_LIBVMAF + api->vmaf_encoder_log(encoder, argc, argv, param, vmafdata); +#else api->encoder_log(encoder, argc, argv); +#endif api->encoder_close(encoder); int64_t second_largest_pts = 0; diff -r 04a337abd70d -r 27e3b161cd8b source/x265.h --- a/source/x265.h Thu Apr 12 15:10:59 2018 +0530 +++ b/source/x265.h Tue Feb 13 18:54:50 2018 +0530 @@ -209,6 +209,7 @@ x265_cu_stats cuStats; x265_pu_stats puStats; double totalFrameTime; + double vmafFrameScore; } x265_frame_stats; typedef struct x265_ctu_info_t @@ -536,6 +537,7 @@ double elapsedEncodeTime; /* wall time since encoder was opened */ double elapsedVideoTime; /* encoded picture count / frame rate */ double bitrate; /* accBits / elapsed video time */ + double aggregateVmafScore; /* aggregate VMAF score for input video*/ uint64_t accBits; /* total bits output thus far */ uint32_t encodedPictureCount; /* number of output pictures thus far */ uint32_t totalWPFrames; /* number of uni-directional weighted frames used */ @@ -572,6 +574,47 @@ float bitrateFactor; } x265_zone; +/* data to calculate aggregate VMAF score */ +typedef struct x265_vmaf_data +{ + int width; + int height; + size_t offset; + int internalBitDepth; + FILE *reference_file; /* FILE pointer for input file */ + FILE *distorted_file; /* FILE pointer for recon file generated*/ +}x265_vmaf_data; + +/* data to calculate frame level VMAF score */ +typedef struct x265_vmaf_framedata +{ + int width; + int height; + int frame_set; + int internalBitDepth; + void *reference_frame; /* points to fenc of particular frame */ + void *distorted_frame; /* points to recon of particular frame */ +}x265_vmaf_framedata; + +/* common data needed to calculate both frame level and video level VMAF scores */ +typedef struct x265_vmaf_commondata +{ + char *format; + char *model_path; + char *log_path; + char *log_fmt; + int disable_clip; + int disable_avx; + int enable_transform; + int phone_model; + int psnr; + int ssim; + int ms_ssim; + char *pool; +}x265_vmaf_commondata; + +static const x265_vmaf_commondata vcd[] = {NULL, (char *)"/usr/local/share/model/vmaf_v0.6.1.pkl", NULL, NULL, 0, 0, 0, 0, 0, 0, 0, NULL}; + /* x265 input parameters * * For version safety you may use x265_param_alloc/free() to manage the @@ -1811,6 +1854,22 @@ /* In-place downshift from a bit-depth greater than 8 to a bit-depth of 8, using * the residual bits to dither each row. */ void x265_dither_image(x265_picture *, int picWidth, int picHeight, int16_t *errorBuf, int bitDepth); +#if ENABLE_LIBVMAF +/* x265_calculate_vmafScore: + * returns VMAF score for the input video. + * This api must be called only after encoding was done. */ +double x265_calculate_vmafscore(x265_param*, x265_vmaf_data*); + +/* x265_calculate_vmaf_framelevelscore: + * returns VMAF score for each frame in a given input video. */ +double x265_calculate_vmaf_framelevelscore(x265_vmaf_framedata*); +/* x265_vmaf_encoder_log: + * write a line to the configured CSV file. If a CSV filename was not + * configured, or file open failed, this function will perform no write. + * This api will be called only when ENABLE_LIBVMAF cmake option is set */ +void x265_vmaf_encoder_log(x265_encoder *encoder, int argc, char **argv, x265_param*, x265_vmaf_data*); + +#endif #define X265_MAJOR_VERSION 1 @@ -1864,6 +1923,11 @@ void (*csvlog_encode)(const x265_param*, const x265_stats *, int, int, int, char**); void (*dither_image)(x265_picture*, int, int, int16_t*, int); int (*set_analysis_data)(x265_encoder *encoder, x265_analysis_data *analysis_data, int poc, uint32_t cuBytes); +#if ENABLE_LIBVMAF + double (*calculate_vmafscore)(x265_param *, x265_vmaf_data *); + double (*calculate_vmaf_framelevelscore)(x265_vmaf_framedata *); + void (*vmaf_encoder_log)(x265_encoder*, int, char**, x265_param *, x265_vmaf_data *); +#endif /* add new pointers to the end, or increment X265_MAJOR_VERSION */ } x265_api;
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel