vlc | branch: master | Steve Lhomme <[email protected]> | Tue Feb 9 09:32:06 2021 +0100| [ad460c422795928a484c87e82025853da01d9614] | committer: Steve Lhomme
d3d_shader: separate the code to generate shader blobs dynamically The ID3D10Blob format is only known by a tiny part of the code now. > http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=ad460c422795928a484c87e82025853da01d9614 --- modules/video_output/Makefile.am | 1 + modules/video_output/win32/d3d11_shaders.c | 30 +- modules/video_output/win32/d3d11_shaders.h | 1 + modules/video_output/win32/d3d_dynamic_shader.c | 723 ++++++++++++++++++++++++ modules/video_output/win32/d3d_dynamic_shader.h | 48 ++ modules/video_output/win32/d3d_shaders.c | 688 +--------------------- modules/video_output/win32/d3d_shaders.h | 34 +- modules/video_output/win32/direct3d11.c | 4 +- 8 files changed, 805 insertions(+), 724 deletions(-) diff --git a/modules/video_output/Makefile.am b/modules/video_output/Makefile.am index 5900db1c19..375da4b28d 100644 --- a/modules/video_output/Makefile.am +++ b/modules/video_output/Makefile.am @@ -135,6 +135,7 @@ libdirect3d11_plugin_la_SOURCES = video_output/win32/direct3d11.c \ video_output/win32/d3d11_quad.c video_output/win32/d3d11_quad.h \ video_output/win32/d3d11_shaders.c video_output/win32/d3d11_shaders.h \ video_output/win32/d3d_shaders.c video_output/win32/d3d_shaders.h \ + video_output/win32/d3d_dynamic_shader.c video_output/win32/d3d_dynamic_shader.h \ video_output/win32/d3d11_swapchain.c video_output/win32/d3d11_swapchain.h \ video_output/win32/dxgi_swapchain.c video_output/win32/dxgi_swapchain.h \ video_output/win32/common.c video_output/win32/common.h diff --git a/modules/video_output/win32/d3d11_shaders.c b/modules/video_output/win32/d3d11_shaders.c index 18aa1bca2f..39a407ee8a 100644 --- a/modules/video_output/win32/d3d11_shaders.c +++ b/modules/video_output/win32/d3d11_shaders.c @@ -37,6 +37,8 @@ #include <d3d11.h> #include "d3d11_shaders.h" +#include "d3d_dynamic_shader.h" + HRESULT (D3D11_CompilePixelShader)(vlc_object_t *o, const d3d_shader_compiler_t *compiler, d3d11_device_t *d3d_dev, bool texture_array, @@ -45,7 +47,7 @@ HRESULT (D3D11_CompilePixelShader)(vlc_object_t *o, const d3d_shader_compiler_t video_color_primaries_t primaries, bool src_full_range, d3d11_quad_t *quad) { - ID3DBlob *pPSBlob[DXGI_MAX_RENDER_TARGET]; + d3d_shader_blob pPSBlob[DXGI_MAX_RENDER_TARGET] = { 0 }; D3D11_SAMPLER_DESC sampDesc; memset(&sampDesc, 0, sizeof(sampDesc)); @@ -79,18 +81,18 @@ HRESULT (D3D11_CompilePixelShader)(vlc_object_t *o, const d3d_shader_compiler_t if (SUCCEEDED(hr)) { hr = ID3D11Device_CreatePixelShader(d3d_dev->d3ddevice, - (void *)ID3D10Blob_GetBufferPointer(pPSBlob[0]), - ID3D10Blob_GetBufferSize(pPSBlob[0]), NULL, &quad->d3dpixelShader[0]); + pPSBlob[0].buffer, pPSBlob[0].buf_size, + NULL, &quad->d3dpixelShader[0]); - ID3D10Blob_Release(pPSBlob[0]); + D3D_ShaderBlobRelease(&pPSBlob[0]); - if (pPSBlob[1]) + if (pPSBlob[1].buffer) { hr = ID3D11Device_CreatePixelShader(d3d_dev->d3ddevice, - (void *)ID3D10Blob_GetBufferPointer(pPSBlob[1]), - ID3D10Blob_GetBufferSize(pPSBlob[1]), NULL, &quad->d3dpixelShader[1]); + pPSBlob[1].buffer, pPSBlob[1].buf_size, + NULL, &quad->d3dpixelShader[1]); - ID3D10Blob_Release(pPSBlob[1]); + D3D_ShaderBlobRelease(&pPSBlob[1]); } } return hr; @@ -152,14 +154,14 @@ static HRESULT CompileVertexShader(vlc_object_t *obj, const d3d_shader_compiler_ d3d11_device_t *d3d_dev, bool flat, d3d11_vertex_shader_t *output) { - ID3DBlob *pVSBlob; + d3d_shader_blob pVSBlob = { 0 }; HRESULT hr; hr = D3D_CompileVertexShader(obj, compiler, d3d_dev->feature_level, flat, &pVSBlob); if (FAILED(hr)) return hr; - hr = ID3D11Device_CreateVertexShader(d3d_dev->d3ddevice, (void *)ID3D10Blob_GetBufferPointer(pVSBlob), - ID3D10Blob_GetBufferSize(pVSBlob), NULL, &output->shader); + hr = ID3D11Device_CreateVertexShader(d3d_dev->d3ddevice, pVSBlob.buffer, + pVSBlob.buf_size, NULL, &output->shader); if(FAILED(hr)) { msg_Err(obj, "Failed to create the flat vertex shader. (hr=0x%lX)", hr); @@ -172,8 +174,8 @@ static HRESULT CompileVertexShader(vlc_object_t *obj, const d3d_shader_compiler_ { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0}, }; - hr = ID3D11Device_CreateInputLayout(d3d_dev->d3ddevice, layout, 2, (void *)ID3D10Blob_GetBufferPointer(pVSBlob), - ID3D10Blob_GetBufferSize(pVSBlob), &output->layout); + hr = ID3D11Device_CreateInputLayout(d3d_dev->d3ddevice, layout, 2, pVSBlob.buffer, + pVSBlob.buf_size, &output->layout); if(FAILED(hr)) { msg_Err(obj, "Failed to create the vertex input layout. (hr=0x%lX)", hr); @@ -182,7 +184,7 @@ static HRESULT CompileVertexShader(vlc_object_t *obj, const d3d_shader_compiler_ return S_OK; error: - ID3D10Blob_Release(pVSBlob); + D3D_ShaderBlobRelease(&pVSBlob); return hr; } diff --git a/modules/video_output/win32/d3d11_shaders.h b/modules/video_output/win32/d3d11_shaders.h index e0a716a2a6..c0e4b87e2e 100644 --- a/modules/video_output/win32/d3d11_shaders.h +++ b/modules/video_output/win32/d3d11_shaders.h @@ -24,6 +24,7 @@ #define VLC_D3D11_SHADERS_H #include "d3d_shaders.h" +#include "d3d_dynamic_shader.h" #include "../../video_chroma/d3d11_fmt.h" diff --git a/modules/video_output/win32/d3d_dynamic_shader.c b/modules/video_output/win32/d3d_dynamic_shader.c new file mode 100644 index 0000000000..00ac461433 --- /dev/null +++ b/modules/video_output/win32/d3d_dynamic_shader.c @@ -0,0 +1,723 @@ +/***************************************************************************** + * d3d_dynamic_shader.c: Direct3D Shader APIs + ***************************************************************************** + * Copyright (C) 2017-2021 VLC authors and VideoLAN + * + * Authors: Steve Lhomme <[email protected]> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. + *****************************************************************************/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include <windows.h> +#include <assert.h> + +#include <vlc_common.h> + +#define COBJMACROS +#include <d3dcompiler.h> + +#include "d3d_shaders.h" +#include "d3d_dynamic_shader.h" + +static const char globPixelShaderDefault[] = "\ + cbuffer PS_CONSTANT_BUFFER : register(b0)\n\ + {\n\ + float4x4 WhitePoint;\n\ + float4x4 Colorspace;\n\ + float4x4 Primaries;\n\ + float Opacity;\n\ + float LuminanceScale;\n\ + float BoundaryX;\n\ + float BoundaryY;\n\ + };\n\ + Texture2D%s shaderTexture[4];\n\ + SamplerState normalSampler : register(s0);\n\ + SamplerState borderSampler : register(s1);\n\ + \n\ + struct PS_INPUT\n\ + {\n\ + float4 Position : SV_POSITION;\n\ + float3 Texture : TEXCOORD;\n\ + };\n\ + \n\ + /* see http://filmicworlds.com/blog/filmic-tonemapping-operators/ */\n\ + inline float4 hable(float4 x) {\n\ + const float A = 0.15, B = 0.50, C = 0.10, D = 0.20, E = 0.02, F = 0.30;\n\ + return ((x * (A*x + (C*B))+(D*E))/(x * (A*x + B) + (D*F))) - E/F;\n\ + }\n\ + \n\ + /* https://en.wikipedia.org/wiki/Hybrid_Log-Gamma#Technical_details */\n\ + inline float inverse_HLG(float x){\n\ + const float B67_a = 0.17883277;\n\ + const float B67_b = 0.28466892;\n\ + const float B67_c = 0.55991073;\n\ + const float B67_inv_r2 = 4.0; /* 1/0.5² */\n\ + if (x <= 0.5)\n\ + x = x * x * B67_inv_r2;\n\ + else\n\ + x = exp((x - B67_c) / B67_a) + B67_b;\n\ + return x;\n\ + }\n\ + \n\ + inline float4 sourceToLinear(float4 rgb) {\n\ +const float ST2084_m1 = 2610.0 / (4096.0 * 4);\n\ +const float ST2084_m2 = (2523.0 / 4096.0) * 128.0;\n\ +const float ST2084_c1 = 3424.0 / 4096.0;\n\ +const float ST2084_c2 = (2413.0 / 4096.0) * 32.0;\n\ +const float ST2084_c3 = (2392.0 / 4096.0) * 32.0;\n\ +%s;\n\ + }\n\ + \n\ + inline float4 linearToDisplay(float4 rgb) {\n\ +%s;\n\ + }\n\ + \n\ + inline float4 transformPrimaries(float4 rgb) {\n\ +%s;\n\ + }\n\ + \n\ + inline float4 toneMapping(float4 rgb) {\n\ +%s;\n\ + }\n\ + \n\ + inline float4 adjustRange(float4 rgb) {\n\ +%s;\n\ + }\n\ + \n\ + inline float4 reorderPlanes(float4 rgb) {\n\ +%s;\n\ + }\n\ + \n\ + inline float4 sampleTexture(SamplerState samplerState, float3 coords) {\n\ + float4 sample;\n\ +%s /* sampling routine in sample */\n\ + return sample;\n\ + }\n\ + \n\ + float4 main( PS_INPUT In ) : SV_TARGET\n\ + {\n\ + float4 sample;\n\ + \n\ + if (In.Texture.x > BoundaryX || In.Texture.y > BoundaryY) \n\ + sample = sampleTexture( borderSampler, In.Texture );\n\ + else\n\ + sample = sampleTexture( normalSampler, In.Texture );\n\ + float4 rgba = max(mul(mul(sample, WhitePoint), Colorspace),0);\n\ + float opacity = rgba.a * Opacity;\n\ + float4 rgb = rgba; rgb.a = 0;\n\ + rgb = sourceToLinear(rgb);\n\ + rgb = transformPrimaries(rgb);\n\ + rgb = toneMapping(rgb);\n\ + rgb = linearToDisplay(rgb);\n\ + rgb = adjustRange(rgb);\n\ + rgb = reorderPlanes(rgb);\n\ + return float4(rgb.rgb, saturate(opacity));\n\ + }\n\ +"; + +static const char globVertexShaderFlat[] = "\ +struct d3d_vertex_t\n\ +{\n\ + float3 Position : POSITION;\n\ + float2 uv : TEXCOORD;\n\ +};\n\ +\n\ +struct PS_INPUT\n\ +{\n\ + float4 Position : SV_POSITION;\n\ + float3 Texture : TEXCOORD;\n\ +};\n\ +\n\ +PS_INPUT main( d3d_vertex_t In )\n\ +{\n\ + PS_INPUT Output;\n\ + Output.Position = float4(In.Position, 1);\n\ + Output.Texture = float3(In.uv, 0);\n\ + return Output;\n\ +}\n\ +"; + +static const char globVertexShaderProjection[] = "\n\ +cbuffer VS_PROJECTION_CONST : register(b0)\n\ +{\n\ + float4x4 View;\n\ + float4x4 Zoom;\n\ + float4x4 Projection;\n\ +};\n\ +struct d3d_vertex_t\n\ +{\n\ + float3 Position : POSITION;\n\ + float2 uv : TEXCOORD;\n\ +};\n\ +\n\ +struct PS_INPUT\n\ +{\n\ + float4 Position : SV_POSITION;\n\ + float3 Texture : TEXCOORD;\n\ +};\n\ +\n\ +PS_INPUT main( d3d_vertex_t In )\n\ +{\n\ + PS_INPUT Output;\n\ + float4 pos = float4(In.Position, 1);\n\ + pos = mul(View, pos);\n\ + pos = mul(Zoom, pos);\n\ + pos = mul(Projection, pos);\n\ + Output.Position = pos;\n\ + Output.Texture = float3(In.uv, 0);\n\ + return Output;\n\ +}\n\ +"; + +static void ReleaseID3D10Blob(d3d_shader_blob *blob) +{ + ID3D10Blob_Release( (ID3D10Blob*)blob->opaque ); +} + +static void ID3D10BlobtoBlob(ID3D10Blob *d3dblob, d3d_shader_blob *blob) +{ + blob->opaque = d3dblob; + blob->pf_release = ReleaseID3D10Blob; + blob->buf_size = ID3D10Blob_GetBufferSize(d3dblob); + blob->buffer = ID3D10Blob_GetBufferPointer(d3dblob); +} + + +static HRESULT CompileShader(vlc_object_t *obj, const d3d_shader_compiler_t *compiler, + D3D_FEATURE_LEVEL feature_level, + const char *psz_shader, bool pixelShader, + d3d_shader_blob *blob) +{ + ID3D10Blob* pShaderBlob = NULL, *pErrBlob; + const char *target; + if (pixelShader) + { + if (likely(feature_level >= D3D_FEATURE_LEVEL_10_0)) + target = "ps_4_0"; + else if (feature_level >= D3D_FEATURE_LEVEL_9_3) + target = "ps_4_0_level_9_3"; + else + target = "ps_4_0_level_9_1"; + } + else + { + if (likely(feature_level >= D3D_FEATURE_LEVEL_10_0)) + target = "vs_4_0"; + else if (feature_level >= D3D_FEATURE_LEVEL_9_3) + target = "vs_4_0_level_9_3"; + else + target = "vs_4_0_level_9_1"; + } + + UINT compileFlags = 0; +#if VLC_WINSTORE_APP + VLC_UNUSED(compiler); +#else +# define D3DCompile(args...) compiler->OurD3DCompile(args) +# if !defined(NDEBUG) + if (IsDebuggerPresent()) + compileFlags += D3DCOMPILE_DEBUG; +# endif +#endif + HRESULT hr = D3DCompile(psz_shader, strlen(psz_shader), + NULL, NULL, NULL, "main", target, + compileFlags, 0, &pShaderBlob, &pErrBlob); + + if (FAILED(hr)) { + char *err = pErrBlob ? ID3D10Blob_GetBufferPointer(pErrBlob) : NULL; + msg_Err(obj, "invalid %s Shader (hr=0x%lX): %s", pixelShader?"Pixel":"Vertex", hr, err ); + if (pErrBlob) + ID3D10Blob_Release(pErrBlob); + return E_FAIL; + } + if (!pShaderBlob) + return E_INVALIDARG; + ID3D10BlobtoBlob(pShaderBlob, blob); + return S_OK; +} + +static HRESULT CompilePixelShaderBlob(vlc_object_t *o, const d3d_shader_compiler_t *compiler, + D3D_FEATURE_LEVEL feature_level, + bool texture_array, + const char *psz_sampler, + const char *psz_src_to_linear, + const char *psz_primaries_transform, + const char *psz_linear_to_display, + const char *psz_tone_mapping, + const char *psz_adjust_range, const char *psz_move_planes, + d3d_shader_blob *pPSBlob) +{ + char *shader; + int allocated = asprintf(&shader, globPixelShaderDefault, texture_array ? "Array" : "", + psz_src_to_linear, psz_linear_to_display, + psz_primaries_transform, psz_tone_mapping, + psz_adjust_range, psz_move_planes, psz_sampler); + if (allocated <= 0) + { + msg_Err(o, "no room for the Pixel Shader"); + return E_OUTOFMEMORY; + } + if (var_InheritInteger(o, "verbose") >= 4) + msg_Dbg(o, "shader %s", shader); +#ifndef NDEBUG + else { + msg_Dbg(o,"psz_src_to_linear %s", psz_src_to_linear); + msg_Dbg(o,"psz_primaries_transform %s", psz_primaries_transform); + msg_Dbg(o,"psz_tone_mapping %s", psz_tone_mapping); + msg_Dbg(o,"psz_linear_to_display %s", psz_linear_to_display); + msg_Dbg(o,"psz_adjust_range %s", psz_adjust_range); + msg_Dbg(o,"psz_sampler %s", psz_sampler); + msg_Dbg(o,"psz_move_planes %s", psz_move_planes); + } +#endif + + HRESULT hr = CompileShader(o, compiler, feature_level, shader, true, pPSBlob); + free(shader); + return hr; +} + +HRESULT (D3D_CompilePixelShader)(vlc_object_t *o, const d3d_shader_compiler_t *compiler, + D3D_FEATURE_LEVEL feature_level, + bool texture_array, + const display_info_t *display, + video_transfer_func_t transfer, + video_color_primaries_t primaries, bool src_full_range, + const d3d_format_t *dxgi_fmt, + d3d_shader_blob pPSBlob[DXGI_MAX_RENDER_TARGET]) +{ + static const char *DEFAULT_NOOP = "return rgb"; + const char *psz_sampler[DXGI_MAX_RENDER_TARGET] = {NULL, NULL}; + const char *psz_src_to_linear = DEFAULT_NOOP; + const char *psz_linear_to_display = DEFAULT_NOOP; + const char *psz_primaries_transform = DEFAULT_NOOP; + const char *psz_tone_mapping = "return rgb * LuminanceScale"; + const char *psz_adjust_range = DEFAULT_NOOP; + const char *psz_move_planes[2] = {DEFAULT_NOOP, DEFAULT_NOOP}; + char *psz_range = NULL; + + if ( display->pixelFormat->formatTexture == DXGI_FORMAT_NV12 || + display->pixelFormat->formatTexture == DXGI_FORMAT_P010 ) + { + /* we need 2 shaders, one for the Y target, one for the UV target */ + switch (dxgi_fmt->formatTexture) + { + case DXGI_FORMAT_NV12: + case DXGI_FORMAT_P010: + psz_sampler[0] = + "sample.x = shaderTexture[0].Sample(samplerState, coords).x;\n" + "sample.y = 0.0;\n" + "sample.z = 0.0;\n" + "sample.a = 1;"; + psz_sampler[1] = + // TODO should be shaderTexture[0] ? + "sample.xy = shaderTexture[1].Sample(samplerState, coords).xy;\n" + "sample.z = 0.0;\n" + "sample.a = 1;"; + break; + case DXGI_FORMAT_R8G8B8A8_UNORM: + case DXGI_FORMAT_B8G8R8A8_UNORM: + case DXGI_FORMAT_B8G8R8X8_UNORM: + case DXGI_FORMAT_R10G10B10A2_UNORM: + case DXGI_FORMAT_R16G16B16A16_UNORM: + case DXGI_FORMAT_B5G6R5_UNORM: + /* Y */ + psz_sampler[0] = + "sample = shaderTexture[0].Sample(samplerState, coords);\n"; + psz_move_planes[0] = "return rgb"; + /* UV */ + psz_sampler[1] = + "sample = shaderTexture[0].Sample(samplerState, coords);\n"; + psz_move_planes[1] = + "rgb.x = rgb.y;\n" + "rgb.y = rgb.z;\n" + "rgb.z = 0;\n" + "return rgb"; + break; + case DXGI_FORMAT_UNKNOWN: + switch (dxgi_fmt->fourcc) + { + case VLC_CODEC_YUVA: + /* Y */ + psz_sampler[0] = + "sample.x = shaderTexture[0].Sample(samplerState, coords).x;\n" + "sample.y = 0.0;\n" + "sample.z = 0.0;\n" + "sample.a = shaderTexture[3].Sample(samplerState, coords).x;"; + /* UV */ + psz_sampler[1] = + "sample.x = shaderTexture[1].Sample(samplerState, coords).x;\n" + "sample.y = shaderTexture[2].Sample(samplerState, coords).x;\n" + "sample.z = 0.0;\n" + "sample.a = shaderTexture[3].Sample(samplerState, coords).x;"; + break; + default: + vlc_assert_unreachable(); + } + break; + default: + vlc_assert_unreachable(); + } + } + else + { + switch (dxgi_fmt->formatTexture) + { + case DXGI_FORMAT_NV12: + case DXGI_FORMAT_P010: + psz_sampler[0] = + "sample.x = shaderTexture[0].Sample(samplerState, coords).x;\n" + "sample.yz = shaderTexture[1].Sample(samplerState, coords).xy;\n" + "sample.a = 1;"; + break; + case DXGI_FORMAT_YUY2: + psz_sampler[0] = + "sample.x = shaderTexture[0].Sample(samplerState, coords).x;\n" + "sample.y = shaderTexture[0].Sample(samplerState, coords).y;\n" + "sample.z = shaderTexture[0].Sample(samplerState, coords).a;\n" + "sample.a = 1;"; + break; + case DXGI_FORMAT_Y210: + psz_sampler[0] = + "sample.x = shaderTexture[0].Sample(samplerState, coords).r;\n" + "sample.y = shaderTexture[0].Sample(samplerState, coords).g;\n" + "sample.z = shaderTexture[0].Sample(samplerState, coords).a;\n" + "sample.a = 1;"; + break; + case DXGI_FORMAT_Y410: + psz_sampler[0] = + "sample.x = shaderTexture[0].Sample(samplerState, coords).g;\n" + "sample.y = shaderTexture[0].Sample(samplerState, coords).r;\n" + "sample.z = shaderTexture[0].Sample(samplerState, coords).b;\n" + "sample.a = 1;"; + break; + case DXGI_FORMAT_AYUV: + psz_sampler[0] = + "sample.x = shaderTexture[0].Sample(samplerState, coords).z;\n" + "sample.y = shaderTexture[0].Sample(samplerState, coords).y;\n" + "sample.z = shaderTexture[0].Sample(samplerState, coords).x;\n" + "sample.a = 1;"; + break; + case DXGI_FORMAT_R8G8B8A8_UNORM: + case DXGI_FORMAT_B8G8R8A8_UNORM: + case DXGI_FORMAT_B8G8R8X8_UNORM: + case DXGI_FORMAT_R10G10B10A2_UNORM: + case DXGI_FORMAT_R16G16B16A16_UNORM: + case DXGI_FORMAT_B5G6R5_UNORM: + psz_sampler[0] = + "sample = shaderTexture[0].Sample(samplerState, coords);"; + break; + case DXGI_FORMAT_UNKNOWN: + switch (dxgi_fmt->fourcc) + { + case VLC_CODEC_I420_10L: + psz_sampler[0] = + "float3 coords_2 = float3(coords.x/2, coords.y, coords.z);\n" + "sample.x = shaderTexture[0].Sample(samplerState, coords_2).x * 64;\n" + "sample.y = shaderTexture[1].Sample(samplerState, coords_2).x * 64;\n" + "sample.z = shaderTexture[2].Sample(samplerState, coords_2).x * 64;\n" + "sample.a = 1;"; + break; + case VLC_CODEC_I444_16L: + psz_sampler[0] = + "float3 coords_2 = float3(coords.x/2, coords.y, coords.z);\n" + "sample.x = shaderTexture[0].Sample(samplerState, coords_2).x;\n" + "sample.y = shaderTexture[1].Sample(samplerState, coords_2).x;\n" + "sample.z = shaderTexture[2].Sample(samplerState, coords_2).x;\n" + "sample.a = 1;"; + break; + case VLC_CODEC_I420: + psz_sampler[0] = + "sample.x = shaderTexture[0].Sample(samplerState, coords).x;\n" + "sample.y = shaderTexture[1].Sample(samplerState, coords).x;\n" + "sample.z = shaderTexture[2].Sample(samplerState, coords).x;\n" + "sample.a = 1;"; + break; + case VLC_CODEC_YUVA: + psz_sampler[0] = + "sample.x = shaderTexture[0].Sample(samplerState, coords).x;\n" + "sample.y = shaderTexture[1].Sample(samplerState, coords).x;\n" + "sample.z = shaderTexture[2].Sample(samplerState, coords).x;\n" + "sample.a = shaderTexture[3].Sample(samplerState, coords).x;"; + break; + default: + vlc_assert_unreachable(); + } + break; + default: + vlc_assert_unreachable(); + } + } + + video_transfer_func_t src_transfer; + + if (transfer != display->transfer) + { + /* we need to go in linear mode */ + switch (transfer) + { + case TRANSFER_FUNC_SMPTE_ST2084: + /* ST2084 to Linear */ + psz_src_to_linear = + "rgb = pow(max(rgb, 0), 1.0/ST2084_m2);\n" + "rgb = max(rgb - ST2084_c1, 0.0) / (ST2084_c2 - ST2084_c3 * rgb);\n" + "rgb = pow(rgb, 1.0/ST2084_m1);\n" + "return rgb * 10000"; + src_transfer = TRANSFER_FUNC_LINEAR; + break; + case TRANSFER_FUNC_HLG: + psz_src_to_linear = "const float alpha_gain = 2000; /* depends on the display output */\n" + "/* TODO: in one call */\n" + "rgb.r = inverse_HLG(rgb.r);\n" + "rgb.g = inverse_HLG(rgb.g);\n" + "rgb.b = inverse_HLG(rgb.b);\n" + "float3 ootf_2020 = float3(0.2627, 0.6780, 0.0593);\n" + "float ootf_ys = alpha_gain * dot(ootf_2020, rgb);\n" + "return rgb * pow(ootf_ys, 0.200)"; + src_transfer = TRANSFER_FUNC_LINEAR; + break; + case TRANSFER_FUNC_BT709: + psz_src_to_linear = "return pow(rgb, 1.0 / 0.45)"; + src_transfer = TRANSFER_FUNC_LINEAR; + break; + case TRANSFER_FUNC_BT470_M: + case TRANSFER_FUNC_SRGB: + psz_src_to_linear = "return pow(rgb, 2.2)"; + src_transfer = TRANSFER_FUNC_LINEAR; + break; + case TRANSFER_FUNC_BT470_BG: + psz_src_to_linear = "return pow(rgb, 2.8)"; + src_transfer = TRANSFER_FUNC_LINEAR; + break; + default: + msg_Dbg(o, "unhandled source transfer %d", transfer); + src_transfer = transfer; + break; + } + + switch (display->transfer) + { + case TRANSFER_FUNC_SRGB: + if (src_transfer == TRANSFER_FUNC_LINEAR) + { + /* Linear to sRGB */ + psz_linear_to_display = "return pow(rgb, 1.0 / 2.2)"; + + if (transfer == TRANSFER_FUNC_SMPTE_ST2084 || transfer == TRANSFER_FUNC_HLG) + { + /* HDR tone mapping */ + psz_tone_mapping = + "static const float4 HABLE_DIV = hable(11.2);\n" + "rgb = hable(rgb * LuminanceScale) / HABLE_DIV;\n" + "return rgb"; + } + } + else + msg_Warn(o, "don't know how to transfer from %d to sRGB", src_transfer); + break; + + case TRANSFER_FUNC_SMPTE_ST2084: + if (src_transfer == TRANSFER_FUNC_LINEAR) + { + /* Linear to ST2084 */ + psz_linear_to_display = + "rgb = pow(rgb / 10000, ST2084_m1);\n" + "rgb = (ST2084_c1 + ST2084_c2 * rgb) / (1 + ST2084_c3 * rgb);\n" + "rgb = pow(rgb, ST2084_m2);\n" + "return rgb"; + } + else + msg_Warn(o, "don't know how to transfer from %d to SMPTE ST 2084", src_transfer); + break; + default: + msg_Warn(o, "don't know how to transfer from %d to %d", src_transfer, display->transfer); + break; + } + } + + if (display->primaries != primaries) + { + switch (primaries) + { + case COLOR_PRIMARIES_BT601_525: + case COLOR_PRIMARIES_BT601_625: + case COLOR_PRIMARIES_BT709: + case COLOR_PRIMARIES_BT2020: + case COLOR_PRIMARIES_DCI_P3: + case COLOR_PRIMARIES_FCC1953: + psz_primaries_transform = "return max(mul(rgb, Primaries), 0)"; + break; + default: + /* see STANDARD_PRIMARIES */ + msg_Warn(o, "unhandled color primaries %d", primaries); + } + } + + int range_adjust = 0; + if (display->b_full_range) { + if (!src_full_range) + range_adjust = 1; /* raise the source to full range */ + } else { + if (src_full_range) + range_adjust = -1; /* lower the source to studio range */ + } + if (!DxgiIsRGBFormat(dxgi_fmt) && !src_full_range && DxgiIsRGBFormat(display->pixelFormat)) + range_adjust--; /* the YUV->RGB conversion already output full range */ + + if (range_adjust != 0) + { + FLOAT itu_black_level; + FLOAT itu_range_factor; + FLOAT itu_white_level; + switch (dxgi_fmt->bitsPerChannel) + { + case 8: + /* Rec. ITU-R BT.709-6 §4.6 */ + itu_black_level = 16.f / 255.f; + itu_white_level = 235.f / 255.f; + itu_range_factor = (float)(235 - 16) / 255.f; + break; + case 10: + /* Rec. ITU-R BT.709-6 §4.6 */ + itu_black_level = 64.f / 1023.f; + itu_white_level = 940.f / 1023.f; + itu_range_factor = (float)(940 - 64) / 1023.f; + break; + case 12: + /* Rec. ITU-R BT.2020-2 Table 5 */ + itu_black_level = 256.f / 4095.f; + itu_white_level = 3760.f / 4095.f; + itu_range_factor = (float)(3760 - 256) / 4095.f; + break; + default: + /* unknown bitdepth, use approximation for infinite bit depth */ + itu_black_level = 16.f / 256.f; + itu_white_level = 235.f / 256.f; + itu_range_factor = (float)(235 - 16) / 256.f; + break; + } + + FLOAT black_level = 0; + FLOAT range_factor = 1.0f; + if (range_adjust > 0) + { + /* expand the range from studio to full range */ + while (range_adjust--) + { + black_level -= itu_black_level; + range_factor /= itu_range_factor; + } + asprintf(&psz_range, "return clamp((rgb + %f) * %f, 0, 1)", + black_level, range_factor); + } + else + { + /* shrink the range to studio range */ + while (range_adjust++) + { + black_level += itu_black_level; + range_factor *= itu_range_factor; + } + asprintf(&psz_range, "return clamp(rgb + %f * %f,%f,%f)", + black_level, range_factor, itu_black_level, itu_white_level); + } + psz_adjust_range = psz_range; + } + + HRESULT hr; + hr = CompilePixelShaderBlob(o, compiler, feature_level, texture_array, + psz_sampler[0], + psz_src_to_linear, + psz_primaries_transform, + psz_linear_to_display, + psz_tone_mapping, + psz_adjust_range, psz_move_planes[0], &pPSBlob[0]); + if (SUCCEEDED(hr) && psz_sampler[1]) + { + hr = CompilePixelShaderBlob(o, compiler, feature_level, texture_array, + psz_sampler[1], + psz_src_to_linear, + psz_primaries_transform, + psz_linear_to_display, + psz_tone_mapping, + psz_adjust_range, psz_move_planes[1], &pPSBlob[1]); + if (FAILED(hr)) + D3D_ShaderBlobRelease(&pPSBlob[0]); + } + free(psz_range); + + return hr; +} + +HRESULT D3D_CompileVertexShader(vlc_object_t *obj, const d3d_shader_compiler_t *compiler, + D3D_FEATURE_LEVEL feature_level, bool flat, + d3d_shader_blob *blob) +{ + return CompileShader(obj, compiler, feature_level, + flat ? globVertexShaderFlat : globVertexShaderProjection, + false, blob); +} + + +#if !VLC_WINSTORE_APP +static HINSTANCE Direct3DLoadShaderLibrary(void) +{ + HINSTANCE instance = NULL; + /* d3dcompiler_47 is the latest on windows 8.1 */ + for (int i = 47; i > 41; --i) { + WCHAR filename[19]; + _snwprintf(filename, 19, TEXT("D3DCOMPILER_%d.dll"), i); + instance = LoadLibrary(filename); + if (instance) break; + } + return instance; +} +#endif // !VLC_WINSTORE_APP + +int D3D_InitShaderCompiler(vlc_object_t *obj, d3d_shader_compiler_t *compiler) +{ +#if !VLC_WINSTORE_APP + compiler->compiler_dll = Direct3DLoadShaderLibrary(); + if (!compiler->compiler_dll) { + msg_Err(obj, "cannot load d3dcompiler.dll, aborting"); + return VLC_EGENERIC; + } + + compiler->OurD3DCompile = (void *)GetProcAddress(compiler->compiler_dll, "D3DCompile"); + if (!compiler->OurD3DCompile) { + msg_Err(obj, "Cannot locate reference to D3DCompile in d3dcompiler DLL"); + FreeLibrary(compiler->compiler_dll); + return VLC_EGENERIC; + } +#endif // !VLC_WINSTORE_APP + + return VLC_SUCCESS; +} + +void D3D_ReleaseShaderCompiler(d3d_shader_compiler_t *compiler) +{ +#if !VLC_WINSTORE_APP + if (compiler->compiler_dll) + { + FreeLibrary(compiler->compiler_dll); + compiler->compiler_dll = NULL; + } + compiler->OurD3DCompile = NULL; +#endif // !VLC_WINSTORE_APP +} + diff --git a/modules/video_output/win32/d3d_dynamic_shader.h b/modules/video_output/win32/d3d_dynamic_shader.h new file mode 100644 index 0000000000..4431de7555 --- /dev/null +++ b/modules/video_output/win32/d3d_dynamic_shader.h @@ -0,0 +1,48 @@ +/***************************************************************************** + * d3d_dynamic_shader.h: Direct3D Shader Blob generation + ***************************************************************************** + * Copyright (C) 2017-2021 VLC authors and VideoLAN + * + * Authors: Steve Lhomme <[email protected]> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. + *****************************************************************************/ + +#ifndef VLC_D3D_DYNAMIC_SHADER_H +#define VLC_D3D_DYNAMIC_SHADER_H + +typedef struct +{ + HINSTANCE compiler_dll; /* handle of the opened d3dcompiler dll */ + pD3DCompile OurD3DCompile; +} d3d_shader_compiler_t; + +int D3D_InitShaderCompiler(vlc_object_t *, d3d_shader_compiler_t *); +void D3D_ReleaseShaderCompiler(d3d_shader_compiler_t *); + +HRESULT D3D_CompilePixelShader(vlc_object_t *, const d3d_shader_compiler_t *, + D3D_FEATURE_LEVEL, + bool texture_array, + const display_info_t *, + video_transfer_func_t, + video_color_primaries_t, bool src_full_range, + const d3d_format_t *dxgi_fmt, + d3d_shader_blob pPSBlob[DXGI_MAX_RENDER_TARGET]); + +HRESULT D3D_CompileVertexShader(vlc_object_t *, const d3d_shader_compiler_t *, + D3D_FEATURE_LEVEL, bool flat, + d3d_shader_blob *); + +#endif /* VLC_D3D_DYNAMIC_SHADER_H */ diff --git a/modules/video_output/win32/d3d_shaders.c b/modules/video_output/win32/d3d_shaders.c index 0830528d9b..b1dc10435f 100644 --- a/modules/video_output/win32/d3d_shaders.c +++ b/modules/video_output/win32/d3d_shaders.c @@ -32,6 +32,7 @@ #define COBJMACROS #include "d3d_shaders.h" +#include "d3d_dynamic_shader.h" #define SPHERE_RADIUS 1.f @@ -39,692 +40,6 @@ #define nbLatBands SPHERE_SLICES #define nbLonBands SPHERE_SLICES -#define ST2084_PQ_CONSTANTS "const float ST2084_m1 = 2610.0 / (4096.0 * 4);\n\ -const float ST2084_m2 = (2523.0 / 4096.0) * 128.0;\n\ -const float ST2084_c1 = 3424.0 / 4096.0;\n\ -const float ST2084_c2 = (2413.0 / 4096.0) * 32.0;\n\ -const float ST2084_c3 = (2392.0 / 4096.0) * 32.0;\n" - -#define STRINGIZE2(s) #s -#define STRINGIZE(s) STRINGIZE2(s) - -static const char* globPixelShaderDefault = "\ - cbuffer PS_CONSTANT_BUFFER : register(b0)\n\ - {\n\ - float4x4 WhitePoint;\n\ - float4x4 Colorspace;\n\ - float4x4 Primaries;\n\ - float Opacity;\n\ - float LuminanceScale;\n\ - float BoundaryX;\n\ - float BoundaryY;\n\ - };\n\ - Texture2D%s shaderTexture[4];\n\ - SamplerState normalSampler : register(s0);\n\ - SamplerState borderSampler : register(s1);\n\ - \n\ - struct PS_INPUT\n\ - {\n\ - float4 Position : SV_POSITION;\n\ - float3 Texture : TEXCOORD;\n\ - };\n\ - \n\ - /* see http://filmicworlds.com/blog/filmic-tonemapping-operators/ */\n\ - inline float4 hable(float4 x) {\n\ - const float A = 0.15, B = 0.50, C = 0.10, D = 0.20, E = 0.02, F = 0.30;\n\ - return ((x * (A*x + (C*B))+(D*E))/(x * (A*x + B) + (D*F))) - E/F;\n\ - }\n\ - \n\ - /* https://en.wikipedia.org/wiki/Hybrid_Log-Gamma#Technical_details */\n\ - inline float inverse_HLG(float x){\n\ - const float B67_a = 0.17883277;\n\ - const float B67_b = 0.28466892;\n\ - const float B67_c = 0.55991073;\n\ - const float B67_inv_r2 = 4.0; /* 1/0.5 */\n\ - if (x <= 0.5)\n\ - x = x * x * B67_inv_r2;\n\ - else\n\ - x = exp((x - B67_c) / B67_a) + B67_b;\n\ - return x;\n\ - }\n\ - \n\ - inline float4 sourceToLinear(float4 rgb) {\n\ -%s;\n\ - }\n\ - \n\ - inline float4 linearToDisplay(float4 rgb) {\n\ -%s;\n\ - }\n\ - \n\ - inline float4 transformPrimaries(float4 rgb) {\n\ -%s;\n\ - }\n\ - \n\ - inline float4 toneMapping(float4 rgb) {\n\ -%s;\n\ - }\n\ - \n\ - inline float4 adjustRange(float4 rgb) {\n\ -%s;\n\ - }\n\ - \n\ - inline float4 reorderPlanes(float4 rgb) {\n\ -%s;\n\ - }\n\ - \n\ - inline float4 sampleTexture(SamplerState samplerState, float3 coords) {\n\ - float4 sample;\n\ -%s /* sampling routine in sample */\n\ - return sample;\n\ - }\n\ - \n\ - float4 main( PS_INPUT In ) : SV_TARGET\n\ - {\n\ - float4 sample;\n\ - \n\ - if (In.Texture.x > BoundaryX || In.Texture.y > BoundaryY) \n\ - sample = sampleTexture( borderSampler, In.Texture );\n\ - else\n\ - sample = sampleTexture( normalSampler, In.Texture );\n\ - float4 rgba = max(mul(mul(sample, WhitePoint), Colorspace),0);\n\ - float opacity = rgba.a * Opacity;\n\ - float4 rgb = rgba; rgb.a = 0;\n\ - rgb = sourceToLinear(rgb);\n\ - rgb = transformPrimaries(rgb);\n\ - rgb = toneMapping(rgb);\n\ - rgb = linearToDisplay(rgb);\n\ - rgb = adjustRange(rgb);\n\ - rgb = reorderPlanes(rgb);\n\ - return float4(rgb.rgb, saturate(opacity));\n\ - }\n\ -"; - -static const char* globVertexShaderFlat = "\ -struct d3d_vertex_t\n\ -{\n\ - float3 Position : POSITION;\n\ - float2 uv : TEXCOORD;\n\ -};\n\ -\n\ -struct PS_INPUT\n\ -{\n\ - float4 Position : SV_POSITION;\n\ - float3 Texture : TEXCOORD;\n\ -};\n\ -\n\ -PS_INPUT main( d3d_vertex_t In )\n\ -{\n\ - PS_INPUT Output;\n\ - Output.Position = float4(In.Position, 1);\n\ - Output.Texture = float3(In.uv, 0);\n\ - return Output;\n\ -}\n\ -"; - -static const char* globVertexShaderProjection = "\n\ -cbuffer VS_PROJECTION_CONST : register(b0)\n\ -{\n\ - float4x4 View;\n\ - float4x4 Zoom;\n\ - float4x4 Projection;\n\ -};\n\ -struct d3d_vertex_t\n\ -{\n\ - float3 Position : POSITION;\n\ - float2 uv : TEXCOORD;\n\ -};\n\ -\n\ -struct PS_INPUT\n\ -{\n\ - float4 Position : SV_POSITION;\n\ - float3 Texture : TEXCOORD;\n\ -};\n\ -\n\ -PS_INPUT main( d3d_vertex_t In )\n\ -{\n\ - PS_INPUT Output;\n\ - float4 pos = float4(In.Position, 1);\n\ - pos = mul(View, pos);\n\ - pos = mul(Zoom, pos);\n\ - pos = mul(Projection, pos);\n\ - Output.Position = pos;\n\ - Output.Texture = float3(In.uv, 0);\n\ - return Output;\n\ -}\n\ -"; - -#if !VLC_WINSTORE_APP -static HINSTANCE Direct3DLoadShaderLibrary(void) -{ - HINSTANCE instance = NULL; - /* d3dcompiler_47 is the latest on windows 8.1 */ - for (int i = 47; i > 41; --i) { - WCHAR filename[19]; - _snwprintf(filename, 19, TEXT("D3DCOMPILER_%d.dll"), i); - instance = LoadLibrary(filename); - if (instance) break; - } - return instance; -} -#endif // !VLC_WINSTORE_APP - -int (D3D_InitShaders)(vlc_object_t *obj, d3d_shader_compiler_t *compiler) -{ -#if !VLC_WINSTORE_APP - compiler->compiler_dll = Direct3DLoadShaderLibrary(); - if (!compiler->compiler_dll) { - msg_Err(obj, "cannot load d3dcompiler.dll, aborting"); - return VLC_EGENERIC; - } - - compiler->OurD3DCompile = (void *)GetProcAddress(compiler->compiler_dll, "D3DCompile"); - if (!compiler->OurD3DCompile) { - msg_Err(obj, "Cannot locate reference to D3DCompile in d3dcompiler DLL"); - FreeLibrary(compiler->compiler_dll); - return VLC_EGENERIC; - } -#endif // !VLC_WINSTORE_APP - - return VLC_SUCCESS; -} - -void D3D_ReleaseShaders(d3d_shader_compiler_t *compiler) -{ -#if !VLC_WINSTORE_APP - if (compiler->compiler_dll) - { - FreeLibrary(compiler->compiler_dll); - compiler->compiler_dll = NULL; - } - compiler->OurD3DCompile = NULL; -#endif // !VLC_WINSTORE_APP -} - -static ID3DBlob* CompileShader(vlc_object_t *obj, const d3d_shader_compiler_t *compiler, - D3D_FEATURE_LEVEL feature_level, - const char *psz_shader, bool pixel) -{ - ID3DBlob* pShaderBlob = NULL, *pErrBlob; - const char *target; - if (pixel) - { - if (likely(feature_level >= D3D_FEATURE_LEVEL_10_0)) - target = "ps_4_0"; - else if (feature_level >= D3D_FEATURE_LEVEL_9_3) - target = "ps_4_0_level_9_3"; - else - target = "ps_4_0_level_9_1"; - } - else - { - if (likely(feature_level >= D3D_FEATURE_LEVEL_10_0)) - target = "vs_4_0"; - else if (feature_level >= D3D_FEATURE_LEVEL_9_3) - target = "vs_4_0_level_9_3"; - else - target = "vs_4_0_level_9_1"; - } - - UINT compileFlags = 0; -#if VLC_WINSTORE_APP - VLC_UNUSED(compiler); -#else -# define D3DCompile(args...) compiler->OurD3DCompile(args) -# if !defined(NDEBUG) - if (IsDebuggerPresent()) - compileFlags += D3DCOMPILE_DEBUG; -# endif -#endif - HRESULT hr = D3DCompile(psz_shader, strlen(psz_shader), - NULL, NULL, NULL, "main", target, - compileFlags, 0, &pShaderBlob, &pErrBlob); - - if (FAILED(hr)) { - char *err = pErrBlob ? ID3D10Blob_GetBufferPointer(pErrBlob) : NULL; - msg_Err(obj, "invalid %s Shader (hr=0x%lX): %s", pixel?"Pixel":"Vertex", hr, err ); - if (pErrBlob) - ID3D10Blob_Release(pErrBlob); - return NULL; - } - return pShaderBlob; -} - -static HRESULT CompilePixelShaderBlob(vlc_object_t *o, const d3d_shader_compiler_t *compiler, - D3D_FEATURE_LEVEL feature_level, - bool texture_array, - const char *psz_sampler, - const char *psz_src_to_linear, - const char *psz_primaries_transform, - const char *psz_linear_to_display, - const char *psz_tone_mapping, - const char *psz_adjust_range, const char *psz_move_planes, - ID3DBlob **pPSBlob) -{ - char *shader; - int allocated = asprintf(&shader, globPixelShaderDefault, texture_array ? "Array" : "", - psz_src_to_linear, psz_linear_to_display, - psz_primaries_transform, psz_tone_mapping, - psz_adjust_range, psz_move_planes, psz_sampler); - if (allocated <= 0) - { - msg_Err(o, "no room for the Pixel Shader"); - return E_OUTOFMEMORY; - } - if (var_InheritInteger(o, "verbose") >= 4) - msg_Dbg(o, "shader %s", shader); -#ifndef NDEBUG - else { - msg_Dbg(o,"psz_src_to_linear %s", psz_src_to_linear); - msg_Dbg(o,"psz_primaries_transform %s", psz_primaries_transform); - msg_Dbg(o,"psz_tone_mapping %s", psz_tone_mapping); - msg_Dbg(o,"psz_linear_to_display %s", psz_linear_to_display); - msg_Dbg(o,"psz_adjust_range %s", psz_adjust_range); - msg_Dbg(o,"psz_sampler %s", psz_sampler); - msg_Dbg(o,"psz_move_planes %s", psz_move_planes); - } -#endif - - *pPSBlob = CompileShader(o, compiler, feature_level, shader, true); - free(shader); - if (!*pPSBlob) - return E_INVALIDARG; - return S_OK; -} - -HRESULT (D3D_CompilePixelShader)(vlc_object_t *o, const d3d_shader_compiler_t *compiler, - D3D_FEATURE_LEVEL feature_level, - bool texture_array, - const display_info_t *display, - video_transfer_func_t transfer, - video_color_primaries_t primaries, bool src_full_range, - const d3d_format_t *dxgi_fmt, - ID3DBlob *pPSBlob[DXGI_MAX_RENDER_TARGET]) -{ - static const char *DEFAULT_NOOP = "return rgb"; - const char *psz_sampler[DXGI_MAX_RENDER_TARGET] = {NULL, NULL}; - const char *psz_src_to_linear = DEFAULT_NOOP; - const char *psz_linear_to_display = DEFAULT_NOOP; - const char *psz_primaries_transform = DEFAULT_NOOP; - const char *psz_tone_mapping = "return rgb * LuminanceScale"; - const char *psz_adjust_range = DEFAULT_NOOP; - const char *psz_move_planes[2] = {DEFAULT_NOOP, DEFAULT_NOOP}; - char *psz_range = NULL; - - if ( display->pixelFormat->formatTexture == DXGI_FORMAT_NV12 || - display->pixelFormat->formatTexture == DXGI_FORMAT_P010 ) - { - /* we need 2 shaders, one for the Y target, one for the UV target */ - switch (dxgi_fmt->formatTexture) - { - case DXGI_FORMAT_NV12: - case DXGI_FORMAT_P010: - psz_sampler[0] = - "sample.x = shaderTexture[0].Sample(samplerState, coords).x;\n" - "sample.y = 0.0;\n" - "sample.z = 0.0;\n" - "sample.a = 1;"; - psz_sampler[1] = - // TODO should be shaderTexture[0] ? - "sample.xy = shaderTexture[1].Sample(samplerState, coords).xy;\n" - "sample.z = 0.0;\n" - "sample.a = 1;"; - break; - case DXGI_FORMAT_R8G8B8A8_UNORM: - case DXGI_FORMAT_B8G8R8A8_UNORM: - case DXGI_FORMAT_B8G8R8X8_UNORM: - case DXGI_FORMAT_R10G10B10A2_UNORM: - case DXGI_FORMAT_R16G16B16A16_UNORM: - case DXGI_FORMAT_B5G6R5_UNORM: - /* Y */ - psz_sampler[0] = - "sample = shaderTexture[0].Sample(samplerState, coords);\n"; - psz_move_planes[0] = "return rgb"; - /* UV */ - psz_sampler[1] = - "sample = shaderTexture[0].Sample(samplerState, coords);\n"; - psz_move_planes[1] = - "rgb.x = rgb.y;\n" - "rgb.y = rgb.z;\n" - "rgb.z = 0;\n" - "return rgb"; - break; - case DXGI_FORMAT_UNKNOWN: - switch (dxgi_fmt->fourcc) - { - case VLC_CODEC_YUVA: - /* Y */ - psz_sampler[0] = - "sample.x = shaderTexture[0].Sample(samplerState, coords).x;\n" - "sample.y = 0.0;\n" - "sample.z = 0.0;\n" - "sample.a = shaderTexture[3].Sample(samplerState, coords).x;"; - /* UV */ - psz_sampler[1] = - "sample.x = shaderTexture[1].Sample(samplerState, coords).x;\n" - "sample.y = shaderTexture[2].Sample(samplerState, coords).x;\n" - "sample.z = 0.0;\n" - "sample.a = shaderTexture[3].Sample(samplerState, coords).x;"; - break; - default: - vlc_assert_unreachable(); - } - break; - default: - vlc_assert_unreachable(); - } - } - else - { - switch (dxgi_fmt->formatTexture) - { - case DXGI_FORMAT_NV12: - case DXGI_FORMAT_P010: - psz_sampler[0] = - "sample.x = shaderTexture[0].Sample(samplerState, coords).x;\n" - "sample.yz = shaderTexture[1].Sample(samplerState, coords).xy;\n" - "sample.a = 1;"; - break; - case DXGI_FORMAT_YUY2: - psz_sampler[0] = - "sample.x = shaderTexture[0].Sample(samplerState, coords).x;\n" - "sample.y = shaderTexture[0].Sample(samplerState, coords).y;\n" - "sample.z = shaderTexture[0].Sample(samplerState, coords).a;\n" - "sample.a = 1;"; - break; - case DXGI_FORMAT_Y210: - psz_sampler[0] = - "sample.x = shaderTexture[0].Sample(samplerState, coords).r;\n" - "sample.y = shaderTexture[0].Sample(samplerState, coords).g;\n" - "sample.z = shaderTexture[0].Sample(samplerState, coords).a;\n" - "sample.a = 1;"; - break; - case DXGI_FORMAT_Y410: - psz_sampler[0] = - "sample.x = shaderTexture[0].Sample(samplerState, coords).g;\n" - "sample.y = shaderTexture[0].Sample(samplerState, coords).r;\n" - "sample.z = shaderTexture[0].Sample(samplerState, coords).b;\n" - "sample.a = 1;"; - break; - case DXGI_FORMAT_AYUV: - psz_sampler[0] = - "sample.x = shaderTexture[0].Sample(samplerState, coords).z;\n" - "sample.y = shaderTexture[0].Sample(samplerState, coords).y;\n" - "sample.z = shaderTexture[0].Sample(samplerState, coords).x;\n" - "sample.a = 1;"; - break; - case DXGI_FORMAT_R8G8B8A8_UNORM: - case DXGI_FORMAT_B8G8R8A8_UNORM: - case DXGI_FORMAT_B8G8R8X8_UNORM: - case DXGI_FORMAT_R10G10B10A2_UNORM: - case DXGI_FORMAT_R16G16B16A16_UNORM: - case DXGI_FORMAT_B5G6R5_UNORM: - psz_sampler[0] = - "sample = shaderTexture[0].Sample(samplerState, coords);"; - break; - case DXGI_FORMAT_UNKNOWN: - switch (dxgi_fmt->fourcc) - { - case VLC_CODEC_I420_10L: - psz_sampler[0] = - "float3 coords_2 = float3(coords.x/2, coords.y, coords.z);\n" - "sample.x = shaderTexture[0].Sample(samplerState, coords_2).x * 64;\n" - "sample.y = shaderTexture[1].Sample(samplerState, coords_2).x * 64;\n" - "sample.z = shaderTexture[2].Sample(samplerState, coords_2).x * 64;\n" - "sample.a = 1;"; - break; - case VLC_CODEC_I444_16L: - psz_sampler[0] = - "float3 coords_2 = float3(coords.x/2, coords.y, coords.z);\n" - "sample.x = shaderTexture[0].Sample(samplerState, coords_2).x;\n" - "sample.y = shaderTexture[1].Sample(samplerState, coords_2).x;\n" - "sample.z = shaderTexture[2].Sample(samplerState, coords_2).x;\n" - "sample.a = 1;"; - break; - case VLC_CODEC_I420: - psz_sampler[0] = - "sample.x = shaderTexture[0].Sample(samplerState, coords).x;\n" - "sample.y = shaderTexture[1].Sample(samplerState, coords).x;\n" - "sample.z = shaderTexture[2].Sample(samplerState, coords).x;\n" - "sample.a = 1;"; - break; - case VLC_CODEC_YUVA: - psz_sampler[0] = - "sample.x = shaderTexture[0].Sample(samplerState, coords).x;\n" - "sample.y = shaderTexture[1].Sample(samplerState, coords).x;\n" - "sample.z = shaderTexture[2].Sample(samplerState, coords).x;\n" - "sample.a = shaderTexture[3].Sample(samplerState, coords).x;"; - break; - default: - vlc_assert_unreachable(); - } - break; - default: - vlc_assert_unreachable(); - } - } - - video_transfer_func_t src_transfer; - - if (transfer != display->transfer) - { - /* we need to go in linear mode */ - switch (transfer) - { - case TRANSFER_FUNC_SMPTE_ST2084: - /* ST2084 to Linear */ - psz_src_to_linear = - ST2084_PQ_CONSTANTS - "rgb = pow(max(rgb, 0), 1.0/ST2084_m2);\n" - "rgb = max(rgb - ST2084_c1, 0.0) / (ST2084_c2 - ST2084_c3 * rgb);\n" - "rgb = pow(rgb, 1.0/ST2084_m1);\n" - "return rgb * 10000"; - src_transfer = TRANSFER_FUNC_LINEAR; - break; - case TRANSFER_FUNC_HLG: - psz_src_to_linear = "const float alpha_gain = 2000; /* depends on the display output */\n" - "/* TODO: in one call */\n" - "rgb.r = inverse_HLG(rgb.r);\n" - "rgb.g = inverse_HLG(rgb.g);\n" - "rgb.b = inverse_HLG(rgb.b);\n" - "float3 ootf_2020 = float3(0.2627, 0.6780, 0.0593);\n" - "float ootf_ys = alpha_gain * dot(ootf_2020, rgb);\n" - "return rgb * pow(ootf_ys, 0.200)"; - src_transfer = TRANSFER_FUNC_LINEAR; - break; - case TRANSFER_FUNC_BT709: - psz_src_to_linear = "return pow(rgb, 1.0 / 0.45)"; - src_transfer = TRANSFER_FUNC_LINEAR; - break; - case TRANSFER_FUNC_BT470_M: - case TRANSFER_FUNC_SRGB: - psz_src_to_linear = "return pow(rgb, 2.2)"; - src_transfer = TRANSFER_FUNC_LINEAR; - break; - case TRANSFER_FUNC_BT470_BG: - psz_src_to_linear = "return pow(rgb, 2.8)"; - src_transfer = TRANSFER_FUNC_LINEAR; - break; - default: - msg_Dbg(o, "unhandled source transfer %d", transfer); - src_transfer = transfer; - break; - } - - switch (display->transfer) - { - case TRANSFER_FUNC_SRGB: - if (src_transfer == TRANSFER_FUNC_LINEAR) - { - /* Linear to sRGB */ - psz_linear_to_display = "return pow(rgb, 1.0 / 2.2)"; - - if (transfer == TRANSFER_FUNC_SMPTE_ST2084 || transfer == TRANSFER_FUNC_HLG) - { - /* HDR tone mapping */ - psz_tone_mapping = - "static const float4 HABLE_DIV = hable(11.2);\n" - "rgb = hable(rgb * LuminanceScale) / HABLE_DIV;\n" - "return rgb"; - } - } - else - msg_Warn(o, "don't know how to transfer from %d to sRGB", src_transfer); - break; - - case TRANSFER_FUNC_SMPTE_ST2084: - if (src_transfer == TRANSFER_FUNC_LINEAR) - { - /* Linear to ST2084 */ - psz_linear_to_display = - ST2084_PQ_CONSTANTS - "rgb = pow(rgb / 10000, ST2084_m1);\n" - "rgb = (ST2084_c1 + ST2084_c2 * rgb) / (1 + ST2084_c3 * rgb);\n" - "rgb = pow(rgb, ST2084_m2);\n" - "return rgb"; - } - else - msg_Warn(o, "don't know how to transfer from %d to SMPTE ST 2084", src_transfer); - break; - default: - msg_Warn(o, "don't know how to transfer from %d to %d", src_transfer, display->transfer); - break; - } - } - - if (display->primaries != primaries) - { - switch (primaries) - { - case COLOR_PRIMARIES_BT601_525: - case COLOR_PRIMARIES_BT601_625: - case COLOR_PRIMARIES_BT709: - case COLOR_PRIMARIES_BT2020: - case COLOR_PRIMARIES_DCI_P3: - case COLOR_PRIMARIES_FCC1953: - psz_primaries_transform = "return max(mul(rgb, Primaries), 0)"; - break; - default: - /* see STANDARD_PRIMARIES */ - msg_Warn(o, "unhandled color primaries %d", primaries); - } - } - - int range_adjust = 0; - if (display->b_full_range) { - if (!src_full_range) - range_adjust = 1; /* raise the source to full range */ - } else { - if (src_full_range) - range_adjust = -1; /* lower the source to studio range */ - } - if (!DxgiIsRGBFormat(dxgi_fmt) && !src_full_range && DxgiIsRGBFormat(display->pixelFormat)) - range_adjust--; /* the YUV->RGB conversion already output full range */ - - if (range_adjust != 0) - { - psz_range = malloc(256); - if (likely(psz_range)) - { - FLOAT itu_black_level; - FLOAT itu_range_factor; - FLOAT itu_white_level; - switch (dxgi_fmt->bitsPerChannel) - { - case 8: - /* Rec. ITU-R BT.709-6 ?4.6 */ - itu_black_level = 16.f / 255.f; - itu_white_level = 235.f / 255.f; - itu_range_factor = (float)(235 - 16) / 255.f; - break; - case 10: - /* Rec. ITU-R BT.709-6 ?4.6 */ - itu_black_level = 64.f / 1023.f; - itu_white_level = 940.f / 1023.f; - itu_range_factor = (float)(940 - 64) / 1023.f; - break; - case 12: - /* Rec. ITU-R BT.2020-2 Table 5 */ - itu_black_level = 256.f / 4095.f; - itu_white_level = 3760.f / 4095.f; - itu_range_factor = (float)(3760 - 256) / 4095.f; - break; - default: - /* unknown bitdepth, use approximation for infinite bit depth */ - itu_black_level = 16.f / 256.f; - itu_white_level = 235.f / 256.f; - itu_range_factor = (float)(235 - 16) / 256.f; - break; - } - - FLOAT black_level = 0; - FLOAT range_factor = 1.0f; - if (range_adjust > 0) - { - /* expand the range from studio to full range */ - while (range_adjust--) - { - black_level -= itu_black_level; - range_factor /= itu_range_factor; - } - sprintf(psz_range, "return clamp((rgb + %f) * %f, 0, 1)", - black_level, range_factor); - } - else - { - /* shrink the range to studio range */ - while (range_adjust++) - { - black_level += itu_black_level; - range_factor *= itu_range_factor; - } - sprintf(psz_range, "return clamp(rgb + %f * %f,%f,%f)", - black_level, range_factor, itu_black_level, itu_white_level); - } - psz_adjust_range = psz_range; - } - } - - HRESULT hr; - hr = CompilePixelShaderBlob(o, compiler, feature_level, texture_array, - psz_sampler[0], - psz_src_to_linear, - psz_primaries_transform, - psz_linear_to_display, - psz_tone_mapping, - psz_adjust_range, psz_move_planes[0], &pPSBlob[0]); - if (SUCCEEDED(hr) && psz_sampler[1]) - { - hr = CompilePixelShaderBlob(o, compiler, feature_level, texture_array, - psz_sampler[1], - psz_src_to_linear, - psz_primaries_transform, - psz_linear_to_display, - psz_tone_mapping, - psz_adjust_range, psz_move_planes[1], &pPSBlob[1]); - if (FAILED(hr)) - { - ID3D10Blob_Release(pPSBlob[0]); - pPSBlob[0] = NULL; - } - } - else - pPSBlob[1] = NULL; - free(psz_range); - - return hr; -} - -HRESULT D3D_CompileVertexShader(vlc_object_t *obj, const d3d_shader_compiler_t *compiler, - D3D_FEATURE_LEVEL feature_level, bool flat, - ID3DBlob **pVSBlob) -{ - *pVSBlob = CompileShader(obj, compiler, feature_level, - flat ? globVertexShaderFlat : globVertexShaderProjection, false); - if (!*pVSBlob) - return E_FAIL; - return S_OK; -} - float (D3D_GetFormatLuminance)(vlc_object_t *o, const video_format_t *fmt) { switch (fmt->transfer) @@ -745,7 +60,6 @@ float (D3D_GetFormatLuminance)(vlc_object_t *o, const video_format_t *fmt) } } - struct xy_primary { double x, y; }; diff --git a/modules/video_output/win32/d3d_shaders.h b/modules/video_output/win32/d3d_shaders.h index c660476346..b164fc63e5 100644 --- a/modules/video_output/win32/d3d_shaders.h +++ b/modules/video_output/win32/d3d_shaders.h @@ -73,15 +73,6 @@ typedef struct { } texture; } d3d_vertex_t; -typedef struct -{ - HINSTANCE compiler_dll; /* handle of the opened d3dcompiler dll */ - pD3DCompile OurD3DCompile; -} d3d_shader_compiler_t; - -int D3D_InitShaders(vlc_object_t *, d3d_shader_compiler_t *); -void D3D_ReleaseShaders(d3d_shader_compiler_t *); - /* A Quad is texture that can be displayed in a rectangle */ typedef struct @@ -100,18 +91,19 @@ typedef struct } d3d_quad_t; -HRESULT D3D_CompilePixelShader(vlc_object_t *, const d3d_shader_compiler_t *, - D3D_FEATURE_LEVEL, - bool texture_array, - const display_info_t *, - video_transfer_func_t, - video_color_primaries_t, bool src_full_range, - const d3d_format_t *dxgi_fmt, - ID3DBlob *pPSBlob[DXGI_MAX_RENDER_TARGET]); - -HRESULT D3D_CompileVertexShader(vlc_object_t *, const d3d_shader_compiler_t *, - D3D_FEATURE_LEVEL, bool flat, - ID3DBlob **); +typedef struct d3d_shader_blob +{ + void *opaque; + void (*pf_release)(struct d3d_shader_blob *); + SIZE_T buf_size; + void *buffer; +} d3d_shader_blob; + +static inline void D3D_ShaderBlobRelease(d3d_shader_blob *blob) +{ + blob->pf_release(blob); + *blob = (d3d_shader_blob) { 0 }; +} float D3D_GetFormatLuminance(vlc_object_t *, const video_format_t *); #define D3D_GetFormatLuminance(a,b) D3D_GetFormatLuminance(VLC_OBJECT(a),b) diff --git a/modules/video_output/win32/direct3d11.c b/modules/video_output/win32/direct3d11.c index 8b2bb849f4..2da3146735 100644 --- a/modules/video_output/win32/direct3d11.c +++ b/modules/video_output/win32/direct3d11.c @@ -358,7 +358,7 @@ static int Open(vout_display_t *vd, const vout_display_cfg_t *cfg, if (!sys) return VLC_ENOMEM; - int ret = D3D_InitShaders(VLC_OBJECT(vd), &sys->shaders); + int ret = D3D_InitShaderCompiler(VLC_OBJECT(vd), &sys->shaders); if (ret != VLC_SUCCESS) goto error; @@ -450,7 +450,7 @@ error: static void Close(vout_display_t *vd) { - D3D_ReleaseShaders(&vd->sys->shaders); + D3D_ReleaseShaderCompiler(&vd->sys->shaders); Direct3D11Close(vd); #if !VLC_WINSTORE_APP UnhookWindowsSensors(vd->sys->p_sensors); _______________________________________________ vlc-commits mailing list [email protected] https://mailman.videolan.org/listinfo/vlc-commits
