This patch LGTM, thanks. > -----Original Message----- > From: Gong, Zhigang > Sent: Wednesday, December 17, 2014 09:42 > To: [email protected] > Cc: Yang, Rong R; Gong, Zhigang > Subject: [PATCH] Refactor all image builtin functions. > > Refactor almost all the image builtin related functions to simplfy the code > and get rid of most of the awful macros. > > Signed-off-by: Zhigang Gong <[email protected]> > --- > backend/src/libocl/src/ocl_image.cl | 811 ++++++++++++++++++--------- > -- > backend/src/llvm/llvm_gen_backend.cpp | 174 ++++--- > backend/src/llvm/llvm_gen_ocl_function.hxx | 36 +- > backend/src/llvm/llvm_scalarize.cpp | 13 +- > 4 files changed, 618 insertions(+), 416 deletions(-) > > diff --git a/backend/src/libocl/src/ocl_image.cl > b/backend/src/libocl/src/ocl_image.cl > index fd421bf..95b98ff 100644 > --- a/backend/src/libocl/src/ocl_image.cl > +++ b/backend/src/libocl/src/ocl_image.cl > @@ -20,29 +20,90 @@ > #include "ocl_integer.h" > #include "ocl_common.h" > > +///////////////////////////////////////////////////////////////////////////// > // > +// Beignet builtin functions. > +///////////////////////////////////////////////////////////////////////////// > // > + > // 1D read > -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t > sampler, float u, uint sampler_offset); > -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t > sampler, int u, uint sampler_offset); > -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t > sampler, float u, uint sampler_offset); > -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t > sampler, int u, uint sampler_offset); > -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t > sampler, float u, uint sampler_offset); > -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t > sampler, int u, uint sampler_offset); > +OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t > sampler, > + float u, uint sampler_offset); > +OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t > sampler, > + int u, uint sampler_offset); > +OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t > sampler, > + float u, uint sampler_offset); > +OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t > sampler, > + int u, uint sampler_offset); > +OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t > sampler, > + float u, uint sampler_offset); > +OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t > sampler, > + int u, uint sampler_offset); > > // 2D & 1D Array read > -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t > sampler, float u, float v, uint sampler_offset); > -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t > sampler, int u, int v, uint sampler_offset); > -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t > sampler, float u, float v, uint sampler_offset); > -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t > sampler, int u, int v, uint sampler_offset); > -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t > sampler, float u, float v, uint sampler_offset); > -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t > sampler, int u, int v, uint sampler_offset); > +OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t > sampler, > + float2 coord, uint sampler_offset); > +OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t > sampler, > + int2 coord, uint sampler_offset); > +OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t > sampler, > + float2 coord, uint sampler_offset); > +OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t > sampler, > + int2 coord, uint sampler_offset); > +OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t > sampler, > + float2 coord, uint sampler_offset); > +OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t > sampler, > + int2 coord, uint sampler_offset); > > // 3D & 2D Array read > -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t > sampler, float u, float v, float w, uint sampler_offset); > -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t > sampler, int u, int v, int w, uint sampler_offset); > -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t > sampler, float u, float v, float w, uint sampler_offset); > -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t > sampler, int u, int v, int w, uint sampler_offset); > -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t > sampler, float u, float v, float w, uint sampler_offset); > -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t > sampler, int u, int v, int w, uint sampler_offset); > +OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t > sampler, > + float4 coord, uint sampler_offset); > +OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t > sampler, > + int4 coord, uint sampler_offset); > +OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t > sampler, > + float4 coord, uint sampler_offset); > +OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t > sampler, > + int4 coord, uint sampler_offset); > +OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t > sampler, > + float4 coord, uint sampler_offset); > +OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t > sampler, > + int4 coord, uint sampler_offset); > + > +// Don't know why we need to support 3 component coordinates, but it's in > the old > +// version, let's keep to support it. > +INLINE_OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, > sampler_t sampler, > + float3 coord, uint > sampler_offset) > +{ > + return __gen_ocl_read_imagei(surface_id, sampler, > + (float4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset); > +} > +INLINE_OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, > sampler_t sampler, > + int3 coord, uint > sampler_offset) > +{ > + return __gen_ocl_read_imagei(surface_id, sampler, > + (int4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset); > +} > +INLINE_OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, > sampler_t sampler, > + float3 coord, uint > sampler_offset) > +{ > + return __gen_ocl_read_imageui(surface_id, sampler, > + (float4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset); > +} > +INLINE_OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, > sampler_t sampler, > + int3 coord, uint > sampler_offset) > +{ > + return __gen_ocl_read_imageui(surface_id, sampler, > + (int4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset); > +} > +INLINE_OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, > sampler_t sampler, > + float3 coord, uint sampler_offset) > +{ > + return __gen_ocl_read_imagef(surface_id, sampler, > + (float4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset); > +} > +INLINE_OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, > sampler_t sampler, > + int3 coord, uint > sampler_offset) > +{ > + return __gen_ocl_read_imagef(surface_id, sampler, > + (int4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset); > +} > > // 1D write > OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int4 > color); > @@ -50,14 +111,27 @@ OVERLOADABLE void __gen_ocl_write_imageui(uint > surface_id, int u, uint4 color); > OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, float4 > color); > > // 2D & 1D Array write > -OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int v, > int4 color); > -OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, int v, > uint4 color); > -OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, int v, > float4 color); > +OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int2 coord, > int4 color); > +OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int2 coord, > uint4 color); > +OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int2 coord, > float4 color); > > // 3D & 2D Array write > -OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int v, > int w, int4 color); > -OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, int v, > int w, uint4 color); > -OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, int v, > int w, float4 color); > +OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int4 coord, > int4 color); > +OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int4 coord, > uint4 color); > +OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int4 coord, > float4 color); > + > +INLINE_OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int3 > coord, int4 color) > +{ > + __gen_ocl_write_imagei(surface_id, (int4)(coord.s0, coord.s1, coord.s2, 0), > color); > +} > +INLINE_OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, > int3 coord, uint4 color) > +{ > + __gen_ocl_write_imageui(surface_id, (int4)(coord.s0, coord.s1, coord.s2, > 0), color); > +} > +INLINE_OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int3 > coord, float4 color) > +{ > + __gen_ocl_write_imagef(surface_id, (int4)(coord.s0, coord.s1, coord.s2, > 0), color); > +} > > int __gen_ocl_get_image_width(uint surface_id); > int __gen_ocl_get_image_height(uint surface_id); > @@ -65,225 +139,436 @@ int > __gen_ocl_get_image_channel_data_type(uint surface_id); > int __gen_ocl_get_image_channel_order(uint surface_id); > int __gen_ocl_get_image_depth(uint surface_id); > > -// 2D 3D Image Common Macro > -#ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND > -#define GEN_FIX_1 1 > -#else > -#define GEN_FIX_1 0 > -#endif > > #define GET_IMAGE(cl_image, surface_id) \ > uint surface_id = (uint)cl_image > -OVERLOADABLE float __gen_compute_array_index(const float index, > image1d_array_t image) > + > +///////////////////////////////////////////////////////////////////////////// > // > +// helper functions to validate array index. > +///////////////////////////////////////////////////////////////////////////// > // > +INLINE_OVERLOADABLE float2 __gen_validate_array_index(float2 coord, > image1d_array_t image) > { > GET_IMAGE(image, surface_id); > float array_size = __gen_ocl_get_image_depth(surface_id); > - return clamp(rint(index), 0.f, array_size - 1.f); > + coord.s1 = clamp(rint(coord.s1), 0.f, array_size - 1.f); > + return coord; > } > > -OVERLOADABLE float __gen_compute_array_index(float index, > image2d_array_t image) > +INLINE_OVERLOADABLE float4 __gen_validate_array_index(float4 coord, > image2d_array_t image) > { > GET_IMAGE(image, surface_id); > float array_size = __gen_ocl_get_image_depth(surface_id); > - return clamp(rint(index), 0.f, array_size - 1.f); > + coord.s2 = clamp(rint(coord.s2), 0.f, array_size - 1.f); > + return coord; > } > > -OVERLOADABLE int __gen_compute_array_index(int index, > image1d_array_t image) > +INLINE_OVERLOADABLE float3 __gen_validate_array_index(float3 coord, > image2d_array_t image) > +{ > + GET_IMAGE(image, surface_id); > + float array_size = __gen_ocl_get_image_depth(surface_id); > + coord.s2 = clamp(rint(coord.s2), 0.f, array_size - 1.f); > + return coord; > +} > + > +INLINE_OVERLOADABLE int2 __gen_validate_array_index(int2 coord, > image1d_array_t image) > { > GET_IMAGE(image, surface_id); > int array_size = __gen_ocl_get_image_depth(surface_id); > - return clamp(index, 0, array_size - 1); > + coord.s1 = clamp(coord.s1, 0, array_size - 1); > + return coord; > } > > -OVERLOADABLE int __gen_compute_array_index(int index, > image2d_array_t image) > +INLINE_OVERLOADABLE int4 __gen_validate_array_index(int4 coord, > image2d_array_t image) > { > GET_IMAGE(image, surface_id); > int array_size = __gen_ocl_get_image_depth(surface_id); > - return clamp(index, 0, array_size - 1); > -} > - > -#define DECL_READ_IMAGE0(int_clamping_fix, > \ > - image_type, type, suffix, coord_type, n) > \ > - OVERLOADABLE type read_image ##suffix(image_type cl_image, \ > - const sampler_t sampler, > \ > - coord_type coord) > \ > - { > \ > - GET_IMAGE(cl_image, surface_id); > \ > - GET_IMAGE_ARRAY_SIZE(cl_image, coord, int, ai); > \ > - if (int_clamping_fix && > \ > - ((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) && > \ > - ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)) > \ > - return __gen_ocl_read_image ##suffix( > \ > - EXPEND_READ_COORD(surface_id, sampler, coord)); > \ > - return __gen_ocl_read_image ##suffix( > \ > - EXPEND_READ_COORDF(surface_id, sampler, coord), 0); > \ > - } > + coord.s2 = clamp(coord.s2, 0, array_size - 1); > + return coord; > +} > > -#define DECL_READ_IMAGE1(float_coord_rounding_fix, int_clamping_fix, > \ > - image_type, type, suffix, coord_type, n) > \ > - OVERLOADABLE type read_image ##suffix(image_type cl_image, \ > - const sampler_t sampler, > \ > - coord_type coord) > \ > - { > \ > - GET_IMAGE(cl_image, surface_id); > \ > - GET_IMAGE_ARRAY_SIZE(cl_image, coord, float, ai) > \ > - coord_type tmpCoord = coord; > \ > - if (float_coord_rounding_fix | int_clamping_fix) { > \ > - if (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) > \ > - && ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)) { > \ > - if (float_coord_rounding_fix > \ > - && ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0)) { > \ > - FIXUP_FLOAT_COORD(tmpCoord); > \ > - } > \ > - if (int_clamping_fix) { > \ > - coord_type intCoord; > \ > - if (sampler & CLK_NORMALIZED_COORDS_TRUE) { > \ > - DENORMALIZE_COORD(surface_id, intCoord, tmpCoord); > \ > - } else > \ > - intCoord = tmpCoord; > \ > - return __gen_ocl_read_image ##suffix( > \ > - EXPEND_READ_COORDI(surface_id, sampler, intCoord));\ > - } > \ > - } > \ > - } > \ > - return __gen_ocl_read_image ##suffix( > \ > - EXPEND_READ_COORDF(surface_id, sampler, tmpCoord), > 0);\ > - } > +INLINE_OVERLOADABLE int3 __gen_validate_array_index(int3 coord, > image2d_array_t image) > +{ > + GET_IMAGE(image, surface_id); > + int array_size = __gen_ocl_get_image_depth(surface_id); > + coord.s2 = clamp(coord.s2, 0, array_size - 1); > + return coord; > +} > + > +// For non array image type, we need to do nothing. > +#define GEN_VALIDATE_ARRAY_INDEX(coord_type, image_type) \ > +INLINE_OVERLOADABLE coord_type > __gen_validate_array_index(coord_type coord, image_type image) \ > +{ \ > + return coord; \ > +} > + > +GEN_VALIDATE_ARRAY_INDEX(float, image1d_t) > +GEN_VALIDATE_ARRAY_INDEX(int, image1d_t) > +GEN_VALIDATE_ARRAY_INDEX(float2, image2d_t) > +GEN_VALIDATE_ARRAY_INDEX(int2, image2d_t) > +GEN_VALIDATE_ARRAY_INDEX(float4, image3d_t) > +GEN_VALIDATE_ARRAY_INDEX(int4, image3d_t) > +GEN_VALIDATE_ARRAY_INDEX(float3, image3d_t) > +GEN_VALIDATE_ARRAY_INDEX(int3, image3d_t) > +GEN_VALIDATE_ARRAY_INDEX(float, image1d_buffer_t) > +GEN_VALIDATE_ARRAY_INDEX(int, image1d_buffer_t) > + > +///////////////////////////////////////////////////////////////////////////// > // > +// Helper functions to work around some coordiate boundary issues. > +// The major issue on Gen7/Gen7.5 are the sample message could not > sampling > +// integer type surfaces correctly with CLK_ADDRESS_CLAMP and > CLK_FILTER_NEAREST. > +// The work around is to use a LD message instead of normal sample > message. > +///////////////////////////////////////////////////////////////////////////// > // > +bool __gen_sampler_need_fix(const sampler_t sampler) > +{ > + return (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) > && > + ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)); > +} > + > +bool __gen_sampler_need_rounding_fix(const sampler_t sampler) > +{ > + return ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0); > +} > + > + > +INLINE_OVERLOADABLE float __gen_fixup_float_coord(float tmpCoord) > +{ > + if (tmpCoord < 0 && tmpCoord > -0x1p-20f) > + tmpCoord += -0x1p-9f; > + return tmpCoord; > +} > + > +INLINE_OVERLOADABLE float2 __gen_fixup_float_coord(float2 tmpCoord) > +{ > + if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) > + tmpCoord.s0 += -0x1p-9f; > + if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f) > + tmpCoord.s1 += -0x1p-9f; > + return tmpCoord; > +} > + > +INLINE_OVERLOADABLE float3 __gen_fixup_float_coord(float3 tmpCoord) > +{ > + if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) > + tmpCoord.s0 += -0x1p-9f; > + if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f) > + tmpCoord.s1 += -0x1p-9f; > + if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20f) > + tmpCoord.s2 += -0x1p-9f; > + return tmpCoord; > +} > + > +INLINE_OVERLOADABLE float4 __gen_fixup_float_coord(float4 tmpCoord) > +{ > + if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) > + tmpCoord.s0 += -0x1p-9f; > + if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f) > + tmpCoord.s1 += -0x1p-9f; > + if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20f) > + tmpCoord.s2 += -0x1p-9f; > + return tmpCoord; > +} > + > +// Functions to denormalize coordiates, it's needed when we need to use > LD > +// message (sampler offset is non-zero) and the coordiates are normalized > +// coordiates. > +INLINE_OVERLOADABLE float __gen_denormalize_coord(const image1d_t > image, float srcCoord) > +{ > + GET_IMAGE(image, surface_id); > + return srcCoord * __gen_ocl_get_image_width(surface_id); > +} > + > +INLINE_OVERLOADABLE float2 __gen_denormalize_coord(const > image1d_array_t image, float2 srcCoord) > +{ > + GET_IMAGE(image, surface_id); > + srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id); > + return srcCoord; > +} > + > +INLINE_OVERLOADABLE float __gen_denormalize_coord(const > image1d_buffer_t image, float srcCoord) > +{ > + GET_IMAGE(image, surface_id); > + return srcCoord * __gen_ocl_get_image_width(surface_id); > +} > + > +INLINE_OVERLOADABLE float2 __gen_denormalize_coord(const image2d_t > image, float2 srcCoord) > +{ > + GET_IMAGE(image, surface_id); > + srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id); > + srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id); > + return srcCoord; > +} > + > +INLINE_OVERLOADABLE float3 __gen_denormalize_coord(const > image2d_array_t image, float3 srcCoord) > +{ > + GET_IMAGE(image, surface_id); > + srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id); > + srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id); > + return srcCoord; > +} > + > +INLINE_OVERLOADABLE float3 __gen_denormalize_coord(const image3d_t > image, float3 srcCoord) > +{ > + GET_IMAGE(image, surface_id); > + srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id); > + srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id); > + srcCoord.s2 = srcCoord.s2 * __gen_ocl_get_image_depth(surface_id); > + return srcCoord; > +} > + > +INLINE_OVERLOADABLE float4 __gen_denormalize_coord(const > image2d_array_t image, float4 srcCoord) > +{ > + GET_IMAGE(image, surface_id); > + srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id); > + srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id); > + return srcCoord; > +} > + > +INLINE_OVERLOADABLE float4 __gen_denormalize_coord(const image3d_t > image, float4 srcCoord) > +{ > + GET_IMAGE(image, surface_id); > + srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id); > + srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id); > + srcCoord.s2 = srcCoord.s2 * __gen_ocl_get_image_depth(surface_id); > + return srcCoord; > +} > + > +// After denormalize, we have to fixup the negative boundary. > +INLINE_OVERLOADABLE float __gen_fixup_neg_boundary(float coord) > +{ > + return coord < 0 ? -1 : coord; > +} > + > +INLINE_OVERLOADABLE float2 __gen_fixup_neg_boundary(float2 coord) > +{ > + coord.s0 = coord.s0 < 0 ? -1 : coord.s0; > + coord.s1 = coord.s1 < 0 ? -1 : coord.s1; > + return coord; > +} > + > +INLINE_OVERLOADABLE float4 __gen_fixup_neg_boundary(float4 coord) > +{ > + coord.s0 = coord.s0 < 0 ? -1 : coord.s0; > + coord.s1 = coord.s1 < 0 ? -1 : coord.s1; > + coord.s2 = coord.s2 < 0 ? -1 : coord.s2; > + return coord; > +} > > -#define DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, > coord_type, n) \ > - OVERLOADABLE type read_image ##suffix(image_type cl_image, \ > - coord_type coord) > \ > - { > \ > - GET_IMAGE(cl_image, surface_id); > \ > - GET_IMAGE_ARRAY_SIZE(cl_image, coord, int, ai) > \ > - return __gen_ocl_read_image ##suffix( > \ > - EXPEND_READ_COORDF(surface_id, > \ > - CLK_NORMALIZED_COORDS_FALSE > \ > - | CLK_ADDRESS_NONE > \ > - | CLK_FILTER_NEAREST, (float)coord), 0); > \ > +INLINE_OVERLOADABLE float3 __gen_fixup_neg_boundary(float3 coord) > +{ > + coord.s0 = coord.s0 < 0 ? -1 : coord.s0; > + coord.s1 = coord.s1 < 0 ? -1 : coord.s1; > + coord.s2 = coord.s2 < 0 ? -1 : coord.s2; > + return coord; > +} > + > +///////////////////////////////////////////////////////////////////////////// > // > +// Built-in Image Read/Write Functions > +///////////////////////////////////////////////////////////////////////////// > // > + > +// 2D 3D Image Common Macro > +#ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND > +#define GEN_FIX_FLOAT_ROUNDING 1 > +#define GEN_FIX_INT_CLAMPING 1 > +#else > +#define GEN_FIX_FLOAT_ROUNDING 0 > +#define GEN_FIX_INT_CLAMPING 0 > +#endif > + > +// For integer coordinates > +#define DECL_READ_IMAGE0(int_clamping_fix, image_type, > \ > + image_data_type, suffix, coord_type) > \ > + OVERLOADABLE image_data_type read_image ##suffix(image_type > cl_image, \ > + const sampler_t sampler, > \ > + coord_type coord) > \ > + { > \ > + GET_IMAGE(cl_image, surface_id); > \ > + coord = __gen_validate_array_index(coord, cl_image); > \ > + if (int_clamping_fix && __gen_sampler_need_fix(sampler)) > \ > + return __gen_ocl_read_image ##suffix(surface_id, sampler, coord, 1); > \ > + return __gen_ocl_read_image ##suffix(surface_id, sampler, coord, 0); > \ > } > > -#define DECL_WRITE_IMAGE(image_type, type, suffix, coord_type) \ > - OVERLOADABLE void write_image ##suffix(image_type cl_image, > coord_type coord, type color)\ > - {\ > - GET_IMAGE(cl_image, surface_id);\ > - __gen_ocl_write_image ##suffix(EXPEND_WRITE_COORD(surface_id, > coord, color));\ > +// For float coordinates > +#define DECL_READ_IMAGE1(int_clamping_fix, image_type, > \ > + image_data_type, suffix, coord_type) > \ > + OVERLOADABLE image_data_type read_image ##suffix(image_type > cl_image, \ > + const sampler_t sampler, > \ > + coord_type coord) > \ > + { > \ > + GET_IMAGE(cl_image, surface_id); > \ > + coord_type tmpCoord = __gen_validate_array_index(coord, cl_image); > \ > + if (GEN_FIX_FLOAT_ROUNDING | int_clamping_fix) { > \ > + if (__gen_sampler_need_fix(sampler)) { > \ > + if (GEN_FIX_FLOAT_ROUNDING && > \ > + __gen_sampler_need_rounding_fix(sampler)) > \ > + tmpCoord = __gen_fixup_float_coord(tmpCoord); > \ > + if (int_clamping_fix) { > \ > + if (sampler & CLK_NORMALIZED_COORDS_TRUE) > \ > + tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord); > \ > + tmpCoord = __gen_fixup_neg_boundary(tmpCoord); > \ > + return __gen_ocl_read_image ##suffix( > \ > + surface_id, sampler, tmpCoord, 1); > \ > + } > \ > + } > \ > + } > \ > + return __gen_ocl_read_image ##suffix(surface_id, sampler, tmpCoord, > 0); \ > } > > -#define DECL_IMAGE_INFO_COMMON(image_type) \ > - OVERLOADABLE int get_image_channel_data_type(image_type image)\ > - { \ > - GET_IMAGE(image, surface_id);\ > - return __gen_ocl_get_image_channel_data_type(surface_id); \ > - }\ > - OVERLOADABLE int get_image_channel_order(image_type image)\ > - { \ > - GET_IMAGE(image, surface_id);\ > - return __gen_ocl_get_image_channel_order(surface_id); \ > - } \ > - OVERLOADABLE int get_image_width(image_type image) \ > - { \ > - GET_IMAGE(image, surface_id); \ > - return __gen_ocl_get_image_width(surface_id); \ > +#define DECL_READ_IMAGE_NOSAMPLER(image_type, image_data_type, > \ > + suffix, coord_type) > \ > + OVERLOADABLE image_data_type read_image ##suffix(image_type > cl_image, \ > + coord_type coord) > \ > + { > \ > + GET_IMAGE(cl_image, surface_id); > \ > + coord = __gen_validate_array_index(coord, cl_image); > \ > + return __gen_ocl_read_image ##suffix( > \ > + surface_id, CLK_NORMALIZED_COORDS_FALSE | > CLK_ADDRESS_NONE \ > + | CLK_FILTER_NEAREST, coord, 0); > \ > } > > -// 1D > -#define DECL_IMAGE(int_clamping_fix, image_type, type, suffix) > \ > - DECL_READ_IMAGE0(int_clamping_fix, image_type, type, suffix, int, 1) > \ > - DECL_READ_IMAGE1(GEN_FIX_1, int_clamping_fix, image_type, type, > suffix, float, 1) \ > - DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, int, 1) > \ > - DECL_WRITE_IMAGE(image_type, type, suffix, int) > \ > - DECL_WRITE_IMAGE(image_type, type, suffix, float) > - > -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord, 1 > -#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, > (float)coord > -#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord > < 0 ? -1 : coord), 1 > -#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord = > srcCoord * __gen_ocl_get_image_width(id); > -#define EXPEND_WRITE_COORD(id, coord, color) id, coord, color > -#define GET_IMAGE_ARRAY_SIZE(a,b,c,d) > - > -#define FIXUP_FLOAT_COORD(tmpCoord) \ > - { \ > - if (tmpCoord < 0 && tmpCoord > -0x1p-20f) \ > - tmpCoord += -0x1p-9f; \ > +#define DECL_WRITE_IMAGE(image_type, image_data_type, suffix, > coord_type) \ > + OVERLOADABLE void write_image ##suffix(image_type cl_image, > \ > + coord_type coord, > \ > + image_data_type color) > \ > + { > \ > + GET_IMAGE(cl_image, surface_id); > \ > + coord_type fixedCoord = __gen_validate_array_index(coord, cl_image); > \ > + __gen_ocl_write_image ##suffix(surface_id, fixedCoord, color); > \ > } > > -DECL_IMAGE(GEN_FIX_1, image1d_t, int4, i) > -DECL_IMAGE(GEN_FIX_1, image1d_t, uint4, ui) > -DECL_IMAGE(0, image1d_t, float4, f) > -DECL_IMAGE(GEN_FIX_1, image1d_buffer_t, int4, i) > -DECL_IMAGE(GEN_FIX_1, image1d_buffer_t, uint4, ui) > -DECL_IMAGE(0, image1d_buffer_t, float4, f) > +#define int1 int > +#define float1 float > > -// 1D Info > -DECL_IMAGE_INFO_COMMON(image1d_t) > -DECL_IMAGE_INFO_COMMON(image1d_buffer_t) > > -#undef EXPEND_READ_COORD > -#undef EXPEND_READ_COORDF > -#undef EXPEND_READ_COORDI > -#undef DENORMALIZE_COORD > -#undef EXPEND_WRITE_COORD > -#undef FIXUP_FLOAT_COORD > -#undef DECL_IMAGE > -// End of 1D > - > -#define DECL_IMAGE(int_clamping_fix, image_type, type, suffix, n) > \ > - DECL_READ_IMAGE0(int_clamping_fix, image_type, type, suffix, int ##n, n) > \ > - DECL_READ_IMAGE1(GEN_FIX_1, int_clamping_fix, image_type, type, > suffix, float ##n, n) \ > - DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, int ##n, n) > \ > - DECL_WRITE_IMAGE(image_type, type, suffix, int ## n) > \ > - DECL_WRITE_IMAGE(image_type, type, suffix, float ## n) > -// 2D > -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, > coord.s1, 1 > -#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, > (float)coord.s0, (float)coord.s1 > -#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, > (int)(coord.s0 < 0 ? -1 : coord.s0), \ > - (int)(coord.s1 < 0 ? -1 : > coord.s1), 1 > -#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = > srcCoord.x * __gen_ocl_get_image_width(id); \ > - dstCoord.y = srcCoord.y * > __gen_ocl_get_image_height(id); > -#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, > color > - > -#define FIXUP_FLOAT_COORD(tmpCoord) \ > - { \ > - if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \ > - tmpCoord.s0 += -0x1p-9f; \ > - if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f) \ > - tmpCoord.s1 += -0x1p-9f; \ > +#define DECL_IMAGE(int_clamping_fix, image_type, image_data_type, > suffix, n) \ > + DECL_READ_IMAGE0(int_clamping_fix, image_type, > \ > + image_data_type, suffix, int ##n) > \ > + DECL_READ_IMAGE1(int_clamping_fix, image_type, > \ > + image_data_type, suffix, float ##n) > \ > + DECL_READ_IMAGE_NOSAMPLER(image_type, image_data_type, suffix, > int ##n) \ > + DECL_WRITE_IMAGE(image_type, image_data_type, suffix, int ## n) > \ > + > +// 1D > +#define DECL_IMAGE_TYPE(image_type, n) > \ > + DECL_IMAGE(GEN_FIX_INT_CLAMPING, image_type, int4, i, n) > \ > + DECL_IMAGE(GEN_FIX_INT_CLAMPING, image_type, uint4, ui, n) > \ > + DECL_IMAGE(0, image_type, float4, f, n) > + > +DECL_IMAGE_TYPE(image1d_t, 1) > +DECL_IMAGE_TYPE(image1d_buffer_t, 1) > +DECL_IMAGE_TYPE(image2d_t, 2) > +DECL_IMAGE_TYPE(image3d_t, 4) > +DECL_IMAGE_TYPE(image3d_t, 3) > +DECL_IMAGE_TYPE(image2d_array_t, 4) > +DECL_IMAGE_TYPE(image2d_array_t, 3) > + > +// For 1D Array: > +// fixup_1darray_coord functions are to convert 1d array coord to 2d array > coord > +// and the caller must set the sampler offset to 2 by using this converted > coord. > +// It is used to work around an image 1d array restrication which could not > set > +// ai in the LD message. We solve it by fake the same image as a 2D array, > and > +// then access it by LD message as a 3D sufface, treat the ai as the w > coordinate. > +INLINE_OVERLOADABLE float4 __gen_fixup_1darray_coord(float2 coord, > image1d_array_t image) > +{ > + float4 newCoord; > + newCoord.s0 = coord.s0 < 0 ? -1 : coord.s0; > + newCoord.s1 = 0; > + newCoord.s2 = coord.s1; > + newCoord.s3 = 0; > + return newCoord; > +} > + > +INLINE_OVERLOADABLE int4 __gen_fixup_1darray_coord(int2 coord, > image1d_array_t image) > +{ > + int4 newCoord; > + newCoord.s0 = coord.s0; > + newCoord.s1 = 0; > + newCoord.s2 = coord.s1; > + newCoord.s3 = 0; > + return newCoord; > +} > + > +// For integer coordinates > +#define DECL_READ_IMAGE0_1DArray(int_clamping_fix, > \ > + image_data_type, suffix, coord_type) > \ > + OVERLOADABLE image_data_type read_image ##suffix(image1d_array_t > cl_image, \ > + const sampler_t sampler, > \ > + coord_type coord) > \ > + { > \ > + GET_IMAGE(cl_image, surface_id); > \ > + coord = __gen_validate_array_index(coord, cl_image); > \ > + if (int_clamping_fix && __gen_sampler_need_fix(sampler)) { > \ > + int4 newCoord = __gen_fixup_1darray_coord(coord, cl_image); > \ > + return __gen_ocl_read_image ##suffix(surface_id, sampler, newCoord, > 2); \ > + } > \ > + return __gen_ocl_read_image ##suffix(surface_id, sampler, coord, 0); > \ > } > > -DECL_IMAGE(GEN_FIX_1, image2d_t, int4, i, 2) > -DECL_IMAGE(GEN_FIX_1, image2d_t, uint4, ui, 2) > -DECL_IMAGE(0, image2d_t, float4, f, 2) > - > -// 1D Array > -#undef GET_IMAGE_ARRAY_SIZE > -#undef EXPEND_READ_COORD > -#undef EXPEND_READ_COORDF > -#undef EXPEND_READ_COORDI > -#undef DENORMALIZE_COORD > -#undef EXPEND_WRITE_COORD > -#undef FIXUP_FLOAT_COORD > - > -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, > (int)0, ai, 2 > -#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, > (float)coord.s0, (float)ai > -#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, > (int)(coord.s0 < 0 ? -1 : coord.s0), 0, (int)ai, 2 > -#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = > srcCoord.x * __gen_ocl_get_image_width(id); > -#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, > __gen_compute_array_index(coord.s1, cl_image), color > -#define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \ > - coord_type ai = __gen_compute_array_index(coord.s1, image); > - > -#define FIXUP_FLOAT_COORD(tmpCoord) \ > - { \ > - if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \ > - tmpCoord.s0 += -0x1p-9f; \ > +// For float coordiates > +#define DECL_READ_IMAGE1_1DArray(int_clamping_fix, image_data_type, > \ > + suffix, coord_type) > \ > + OVERLOADABLE image_data_type read_image ##suffix(image1d_array_t > cl_image, \ > + const sampler_t sampler, > \ > + coord_type coord) > \ > + { > \ > + GET_IMAGE(cl_image, surface_id); > \ > + coord_type tmpCoord = __gen_validate_array_index(coord, cl_image); > \ > + if (GEN_FIX_FLOAT_ROUNDING | int_clamping_fix) { > \ > + if (__gen_sampler_need_fix(sampler)) { > \ > + if (GEN_FIX_FLOAT_ROUNDING && > \ > + __gen_sampler_need_rounding_fix(sampler)) > \ > + tmpCoord = __gen_fixup_float_coord(tmpCoord); > \ > + if (int_clamping_fix) { > \ > + if (sampler & CLK_NORMALIZED_COORDS_TRUE) > \ > + tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord); > \ > + float4 newCoord = __gen_fixup_1darray_coord(tmpCoord, cl_image); > \ > + return __gen_ocl_read_image ##suffix( > \ > + surface_id, sampler, newCoord, 2); > \ > + } > \ > + } > \ > + } > \ > + return __gen_ocl_read_image ##suffix(surface_id, sampler, tmpCoord, > 0); \ > } > > -DECL_IMAGE(GEN_FIX_1, image1d_array_t, int4, i, 2) > -DECL_IMAGE(GEN_FIX_1, image1d_array_t, uint4, ui, 2) > -DECL_IMAGE(0, image1d_array_t, float4, f, 2) > +#define DECL_IMAGE_1DArray(int_clamping_fix, image_data_type, suffix) > \ > + DECL_READ_IMAGE0_1DArray(int_clamping_fix, image_data_type, suffix, > int2) \ > + DECL_READ_IMAGE1_1DArray(int_clamping_fix, image_data_type, > \ > + suffix, float2) > \ > + DECL_READ_IMAGE_NOSAMPLER(image1d_array_t, image_data_type, > suffix, int2) \ > + DECL_WRITE_IMAGE(image1d_array_t, image_data_type, suffix, int2) > \ > + > +DECL_IMAGE_1DArray(GEN_FIX_INT_CLAMPING, int4, i) > +DECL_IMAGE_1DArray(GEN_FIX_INT_CLAMPING, uint4, ui) > +DECL_IMAGE_1DArray(0, float4, f) > + > +///////////////////////////////////////////////////////////////////////////// > // > +// Built-in Image Query Functions > +///////////////////////////////////////////////////////////////////////////// > // > +#define DECL_IMAGE_INFO_COMMON(image_type) > \ > + OVERLOADABLE int get_image_channel_data_type(image_type image) > \ > + { > \ > + GET_IMAGE(image, surface_id); > \ > + return __gen_ocl_get_image_channel_data_type(surface_id); > \ > + } > \ > + OVERLOADABLE int get_image_channel_order(image_type image) > \ > + { > \ > + GET_IMAGE(image, surface_id); > \ > + return __gen_ocl_get_image_channel_order(surface_id); > \ > + } > \ > + OVERLOADABLE int get_image_width(image_type image) > \ > + { > \ > + GET_IMAGE(image, surface_id); > \ > + return __gen_ocl_get_image_width(surface_id); > \ > + } > > -// 2D Info > +DECL_IMAGE_INFO_COMMON(image1d_t) > +DECL_IMAGE_INFO_COMMON(image1d_buffer_t) > +DECL_IMAGE_INFO_COMMON(image1d_array_t) > DECL_IMAGE_INFO_COMMON(image2d_t) > +DECL_IMAGE_INFO_COMMON(image3d_t) > +DECL_IMAGE_INFO_COMMON(image2d_array_t) > + > +// 2D extra Info > OVERLOADABLE int get_image_height(image2d_t image) > { > GET_IMAGE(image, surface_id); > @@ -293,90 +578,9 @@ OVERLOADABLE int2 get_image_dim(image2d_t > image) > { > return (int2){get_image_width(image), get_image_height(image)}; > } > +// End of 2D > > -// 1D Array info > -DECL_IMAGE_INFO_COMMON(image1d_array_t) > -OVERLOADABLE size_t get_image_array_size(image1d_array_t image) > -{ > - GET_IMAGE(image, surface_id); > - return __gen_ocl_get_image_depth(surface_id); > -} > - > -#undef EXPEND_READ_COORD > -#undef EXPEND_READ_COORDI > -#undef EXPEND_READ_COORDF > -#undef DENORMALIZE_COORD > -#undef EXPEND_WRITE_COORD > -#undef FIXUP_FLOAT_COORD > -#undef GET_IMAGE_ARRAY_SIZE > -// End of 2D and 1D Array > - > -// 3D > -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, > coord.s1, coord.s2, 1 > -#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, > (float)coord.s0, (float)coord.s1, (float)coord.s2 > -#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int) > (coord.s0 < 0 ? -1 : coord.s0), \ > - (int)(coord.s1 < 0 ? -1 : > coord.s1), (int)(coord.s2 < 0 ? > -1 : coord.s2), 1 > -#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = > srcCoord.x * __gen_ocl_get_image_width(id); \ > - dstCoord.y = srcCoord.y * > __gen_ocl_get_image_height(id); \ > - dstCoord.z = srcCoord.z * > __gen_ocl_get_image_depth(id); > -#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, > coord.s2, color > - > -#define FIXUP_FLOAT_COORD(tmpCoord) \ > - { \ > - if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \ > - tmpCoord.s0 += -0x1p-9f; \ > - if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f) \ > - tmpCoord.s1 += -0x1p-9f; \ > - if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20f) \ > - tmpCoord.s2 += -0x1p-9f; \ > - } > -#define GET_IMAGE_ARRAY_SIZE(a,b,c,d) > - > -DECL_IMAGE(GEN_FIX_1, image3d_t, int4, i, 4) > -DECL_IMAGE(GEN_FIX_1, image3d_t, uint4, ui, 4) > -DECL_IMAGE(0, image3d_t, float4, f, 4) > - > -DECL_IMAGE(GEN_FIX_1, image3d_t, int4, i, 3) > -DECL_IMAGE(GEN_FIX_1, image3d_t, uint4, ui, 3) > -DECL_IMAGE(0, image3d_t, float4, f, 3) > - > -#undef EXPEND_READ_COORD > -#undef EXPEND_READ_COORDF > -#undef EXPEND_READ_COORDI > -#undef DENORMALIZE_COORD > -#undef EXPEND_WRITE_COORD > -#undef FIXUP_FLOAT_COORD > -#undef GET_IMAGE_ARRAY_SIZE > - > -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, > coord.s1, ai, 1 > -#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, > (float)coord.s0, (float)coord.s1, (float)ai > -#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int) > (coord.s0 < 0 ? -1 : coord.s0), \ > - (int)(coord.s1 < 0 ? -1 : > coord.s1), (int)ai, 1 > -#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = > srcCoord.x * __gen_ocl_get_image_width(id); \ > - dstCoord.y = srcCoord.y * > __gen_ocl_get_image_height(id); > -#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, > __gen_compute_array_index(coord.s2, cl_image), color > - > -#define FIXUP_FLOAT_COORD(tmpCoord) \ > - { \ > - if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \ > - tmpCoord.s0 += -0x1p-9f; \ > - if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f) \ > - tmpCoord.s1 += -0x1p-9f; \ > - } > -#define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \ > - coord_type ai = __gen_compute_array_index(coord.s2, image); > - > -// 2D Array > -DECL_IMAGE(GEN_FIX_1, image2d_array_t, int4, i, 4) > -DECL_IMAGE(GEN_FIX_1, image2d_array_t, uint4, ui, 4) > -DECL_IMAGE(0, image2d_array_t, float4, f, 4) > - > -DECL_IMAGE(GEN_FIX_1, image2d_array_t, int4, i, 3) > -DECL_IMAGE(GEN_FIX_1, image2d_array_t, uint4, ui, 3) > -DECL_IMAGE(0, image2d_array_t, float4, f, 3) > - > -// 3D Info > -DECL_IMAGE_INFO_COMMON(image3d_t) > +// 3D extra Info > OVERLOADABLE int get_image_height(image3d_t image) > { > GET_IMAGE(image, surface_id); > @@ -389,11 +593,13 @@ OVERLOADABLE int get_image_depth(image3d_t > image) > } > OVERLOADABLE int4 get_image_dim(image3d_t image) > { > - return (int4){get_image_width(image), get_image_height(image), > get_image_depth(image), 0}; > + return (int4) (get_image_width(image), > + get_image_height(image), > + get_image_depth(image), > + 0); > } > > -// 2D Array Info > -DECL_IMAGE_INFO_COMMON(image2d_array_t) > +// 2D Array extra Info > OVERLOADABLE int get_image_height(image2d_array_t image) > { > GET_IMAGE(image, surface_id); > @@ -409,21 +615,10 @@ OVERLOADABLE size_t > get_image_array_size(image2d_array_t image) > return __gen_ocl_get_image_depth(surface_id); > } > > -#undef EXPEND_READ_COORD > -#undef EXPEND_READ_COORDF > -#undef EXPEND_READ_COORDI > -#undef DENORMALIZE_COORD > -#undef EXPEND_WRITE_COORD > -#undef FIXUP_FLOAT_COORD > -#undef GET_IMAGE_ARRAY_SIZE > -// End of 3D and 2D Array > - > -#undef DECL_IMAGE > -#undef DECL_READ_IMAGE > -#undef DECL_READ_IMAGE_NOSAMPLER > -#undef DECL_WRITE_IMAGE > -#undef GEN_FIX_1 > -// End of Image > - > - > -#undef GET_IMAGE > +// 1D Array info > +OVERLOADABLE size_t get_image_array_size(image1d_array_t image) > +{ > + GET_IMAGE(image, surface_id); > + return __gen_ocl_get_image_depth(surface_id); > +} > +// End of 1DArray > diff --git a/backend/src/llvm/llvm_gen_backend.cpp > b/backend/src/llvm/llvm_gen_backend.cpp > index a438f09..afaa4a5 100644 > --- a/backend/src/llvm/llvm_gen_backend.cpp > +++ b/backend/src/llvm/llvm_gen_backend.cpp > @@ -257,9 +257,10 @@ namespace gbe > /*! Get number of element to process dealing either with a vector or a > scalar > * value > */ > - static ir::Type getVectorInfo(ir::Context &ctx, Type *llvmType, Value > *value, uint32_t &elemNum, bool useUnsigned = false) > + static ir::Type getVectorInfo(ir::Context &ctx, Value *value, uint32_t > &elemNum, bool useUnsigned = false) > { > ir::Type type; > + Type *llvmType = value->getType(); > if (llvmType->isVectorTy() == true) { > VectorType *vectorType = cast<VectorType>(llvmType); > Type *elementType = vectorType->getElementType(); > @@ -629,6 +630,7 @@ namespace gbe > void emitAtomicInst(CallInst &I, CallSite &CS, ir::AtomicOps opcode); > > uint8_t appendSampler(CallSite::arg_iterator AI); > + uint8_t getImageID(CallInst &I); > > // These instructions are not supported at all > void visitVAArgInst(VAArgInst &I) {NOT_SUPPORTED;} > @@ -2526,8 +2528,8 @@ namespace gbe > Value *srcValue = I.getOperand(0); > Value *dstValue = &I; > uint32_t srcElemNum = 0, dstElemNum = 0 ; > - ir::Type srcType = getVectorInfo(ctx, srcValue->getType(), srcValue, > srcElemNum); > - ir::Type dstType = getVectorInfo(ctx, dstValue->getType(), dstValue, > dstElemNum); > + ir::Type srcType = getVectorInfo(ctx, srcValue, srcElemNum); > + ir::Type dstType = getVectorInfo(ctx, dstValue, dstElemNum); > // As long and double are not compatible in register storage > // and we do not support double yet, simply put an assert here > GBE_ASSERT(!(srcType == ir::TYPE_S64 && dstType == > ir::TYPE_DOUBLE)); > @@ -2927,7 +2929,7 @@ namespace gbe > { > // dst is a 4 elements vector. We allocate all 4 registers here. > uint32_t elemNum; > - (void)getVectorInfo(ctx, I.getType(), &I, elemNum); > + (void)getVectorInfo(ctx, &I, elemNum); > GBE_ASSERT(elemNum == 4); > this->newRegister(&I); > break; > @@ -3055,6 +3057,15 @@ namespace gbe > return index; > } > > + uint8_t GenWriter::getImageID(CallInst &I) { > + PtrOrigMapIter iter = pointerOrigMap.find(&I); > + GBE_ASSERT(iter != pointerOrigMap.end()); > + SmallVectorImpl<Value *> &origins = iter->second; > + GBE_ASSERT(origins.size() == 1); > + const ir::Register imageReg = this->getRegister(origins[0]); > + return ctx.getFunction().getImageSet()->getIdx(imageReg); > + } > + > void GenWriter::emitCallInst(CallInst &I) { > if (Function *F = I.getCalledFunction()) { > if (F->getIntrinsicID() != 0) { > @@ -3218,7 +3229,6 @@ namespace gbe > default: NOT_IMPLEMENTED; > } > } else { > - int image_dim; > // Get the name of the called function and handle it > Value *Callee = I.getCalledValue(); > const std::string fnName = Callee->getName(); > @@ -3334,13 +3344,13 @@ namespace gbe > case GEN_OCL_GET_IMAGE_CHANNEL_DATA_TYPE: > case GEN_OCL_GET_IMAGE_CHANNEL_ORDER: > { > - GBE_ASSERT(AI != AE); const ir::Register surfaceReg = this- > >getRegister(*AI); ++AI; > + const uint8_t imageID = getImageID(I); > + GBE_ASSERT(AI != AE); ++AI; > const ir::Register reg = this->getRegister(&I, 0); > int infoType = it->second - GEN_OCL_GET_IMAGE_WIDTH; > - const uint8_t surfaceID = ctx.getFunction().getImageSet()- > >getIdx(surfaceReg); > - ir::ImageInfoKey key(surfaceID, infoType); > + ir::ImageInfoKey key(imageID, infoType); > const ir::Register infoReg = ctx.getFunction().getImageSet()- > >appendInfo(key, &ctx); > - ctx.GET_IMAGE_INFO(infoType, reg, surfaceID, infoReg); > + ctx.GET_IMAGE_INFO(infoType, reg, imageID, infoReg); > break; > } > > @@ -3350,69 +3360,75 @@ namespace gbe > case GEN_OCL_READ_IMAGE_I_1D_I: > case GEN_OCL_READ_IMAGE_UI_1D_I: > case GEN_OCL_READ_IMAGE_F_1D_I: > - image_dim = 1; > - goto handle_read_image; > case GEN_OCL_READ_IMAGE_I_2D: > case GEN_OCL_READ_IMAGE_UI_2D: > case GEN_OCL_READ_IMAGE_F_2D: > case GEN_OCL_READ_IMAGE_I_2D_I: > case GEN_OCL_READ_IMAGE_UI_2D_I: > case GEN_OCL_READ_IMAGE_F_2D_I: > - image_dim = 2; > - goto handle_read_image; > case GEN_OCL_READ_IMAGE_I_3D: > case GEN_OCL_READ_IMAGE_UI_3D: > case GEN_OCL_READ_IMAGE_F_3D: > case GEN_OCL_READ_IMAGE_I_3D_I: > case GEN_OCL_READ_IMAGE_UI_3D_I: > case GEN_OCL_READ_IMAGE_F_3D_I: > - image_dim = 3; > -handle_read_image: > { > - GBE_ASSERT(AI != AE); const ir::Register surfaceReg = this- > >getRegister(*AI); ++AI; > - const uint8_t surfaceID = ctx.getFunction().getImageSet()- > >getIdx(surfaceReg); > + const uint8_t imageID = getImageID(I); > + GBE_ASSERT(AI != AE); ++AI; > GBE_ASSERT(AI != AE); > const uint8_t sampler = this->appendSampler(AI); > - ++AI; > - > - ir::Register ucoord; > - ir::Register vcoord; > - ir::Register wcoord; > - > - GBE_ASSERT(AI != AE); ucoord = this->getRegister(*AI); ++AI; > - if (image_dim > 1) { > - GBE_ASSERT(AI != AE); > - vcoord = this->getRegister(*AI); > - ++AI; > - } else { > - vcoord = ir::ocl::invalid; > - } > - > - if (image_dim > 2) { > - GBE_ASSERT(AI != AE); > - wcoord = this->getRegister(*AI); > - ++AI; > - } else { > - wcoord = ir::ocl::invalid; > - } > + ++AI; GBE_ASSERT(AI != AE); > + uint32_t coordNum; > + (void)getVectorInfo(ctx, *AI, coordNum); > + if (coordNum == 4) > + coordNum = 3; > + const uint32_t imageDim = coordNum; > + GBE_ASSERT(imageDim >= 1 && imageDim <= 3); > > - vector<ir::Register> dstTupleData, srcTupleData; > - const uint32_t elemNum = 4; > - for (uint32_t elemID = 0; elemID < elemNum; ++elemID) { > - const ir::Register reg = this->getRegister(&I, elemID); > - dstTupleData.push_back(reg); > - } > - srcTupleData.push_back(ucoord); > - srcTupleData.push_back(vcoord); > - srcTupleData.push_back(wcoord); > uint8_t samplerOffset = 0; > + Value *coordVal = *AI; > + ++AI; GBE_ASSERT(AI != AE); > + Value *samplerOffsetVal = *AI; > #ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND > - GBE_ASSERT(AI != AE); Constant *CPV = dyn_cast<Constant>(*AI); > + Constant *CPV = dyn_cast<Constant>(samplerOffsetVal); > assert(CPV); > const ir::Immediate &x = processConstantImm(CPV); > GBE_ASSERTM(x.getType() == ir::TYPE_U32 || x.getType() == > ir::TYPE_S32, "Invalid sampler type"); > samplerOffset = x.getIntegerValue(); > #endif > + bool isFloatCoord = it->second <= GEN_OCL_READ_IMAGE_F_3D; > + bool requiredFloatCoord = samplerOffset == 0; > + > + vector<ir::Register> dstTupleData, srcTupleData; > + for (uint32_t elemID = 0; elemID < 3; elemID++) { > + ir::Register reg; > + > + if (elemID < imageDim) > + reg = this->getRegister(coordVal, elemID); > + else > + reg = ir::ocl::invalid; > + > + if (isFloatCoord == requiredFloatCoord) > + srcTupleData.push_back(reg); > + else if (!requiredFloatCoord) { > + ir::Register intCoordReg = > ctx.reg(ir::RegisterFamily::FAMILY_DWORD); > + ctx.CVT(ir::TYPE_S32, ir::TYPE_FLOAT, intCoordReg, reg); > + srcTupleData.push_back(intCoordReg); > + } else { > + ir::Register floatCoordReg = > ctx.reg(ir::RegisterFamily::FAMILY_DWORD); > + ctx.CVT(ir::TYPE_FLOAT, ir::TYPE_S32, floatCoordReg, reg); > + srcTupleData.push_back(floatCoordReg); > + } > + } > + > + uint32_t elemNum; > + (void)getVectorInfo(ctx, &I, elemNum); > + GBE_ASSERT(elemNum == 4); > + > + for (uint32_t elemID = 0; elemID < elemNum; ++elemID) { > + const ir::Register reg = this->getRegister(&I, elemID); > + dstTupleData.push_back(reg); > + } > const ir::Tuple dstTuple = ctx.arrayTuple(&dstTupleData[0], > elemNum); > const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 3); > > @@ -3445,58 +3461,46 @@ handle_read_image: > GBE_ASSERT(0); // never been here. > } > > - bool isFloatCoord = it->second <= GEN_OCL_READ_IMAGE_F_3D; > - > - ctx.SAMPLE(surfaceID, dstTuple, srcTuple, dstType == > ir::TYPE_FLOAT, > - isFloatCoord, sampler, samplerOffset); > + ctx.SAMPLE(imageID, dstTuple, srcTuple, dstType == > ir::TYPE_FLOAT, > + requiredFloatCoord, sampler, samplerOffset); > break; > } > > case GEN_OCL_WRITE_IMAGE_I_1D: > case GEN_OCL_WRITE_IMAGE_UI_1D: > case GEN_OCL_WRITE_IMAGE_F_1D: > - image_dim = 1; > - goto handle_write_image; > case GEN_OCL_WRITE_IMAGE_I_2D: > case GEN_OCL_WRITE_IMAGE_UI_2D: > case GEN_OCL_WRITE_IMAGE_F_2D: > - image_dim = 2; > - goto handle_write_image; > case GEN_OCL_WRITE_IMAGE_I_3D: > case GEN_OCL_WRITE_IMAGE_UI_3D: > case GEN_OCL_WRITE_IMAGE_F_3D: > - image_dim = 3; > -handle_write_image: > { > - GBE_ASSERT(AI != AE); const ir::Register surfaceReg = this- > >getRegister(*AI); ++AI; > - const uint8_t surfaceID = ctx.getFunction().getImageSet()- > >getIdx(surfaceReg); > - ir::Register ucoord, vcoord, wcoord; > - > - GBE_ASSERT(AI != AE); ucoord = this->getRegister(*AI); ++AI; > + const uint8_t imageID = getImageID(I); > + GBE_ASSERT(AI != AE); ++AI; GBE_ASSERT(AI != AE); > + uint32_t coordNum; > + (void)getVectorInfo(ctx, *AI, coordNum); > + if (coordNum == 4) > + coordNum = 3; > + const uint32_t imageDim = coordNum; > + vector<ir::Register> srcTupleData; > + GBE_ASSERT(imageDim >= 1 && imageDim <= 3); > > - if (image_dim > 1) { > - GBE_ASSERT(AI != AE); > - vcoord = this->getRegister(*AI); > - ++AI; > - } else > - vcoord = ir::ocl::invalid; > - > - if (image_dim > 2) { > - GBE_ASSERT(AI != AE); > - wcoord = this->getRegister(*AI); > - ++AI; > - } else { > - wcoord = ir::ocl::invalid; > - } > + for (uint32_t elemID = 0; elemID < 3; elemID++) { > + ir::Register reg; > > - GBE_ASSERT(AI != AE); > - vector<ir::Register> srcTupleData; > + if (elemID < imageDim) > + reg = this->getRegister(*AI, elemID); > + else > + reg = ir::ocl::invalid; > > - srcTupleData.push_back(ucoord); > - srcTupleData.push_back(vcoord); > - srcTupleData.push_back(wcoord); > + srcTupleData.push_back(reg); > + } > + ++AI; GBE_ASSERT(AI != AE); > + uint32_t elemNum; > + (void)getVectorInfo(ctx, *AI, elemNum); > + GBE_ASSERT(elemNum == 4); > > - const uint32_t elemNum = 4; > for (uint32_t elemID = 0; elemID < elemNum; ++elemID) { > const ir::Register reg = this->getRegister(*AI, elemID); > srcTupleData.push_back(reg); > @@ -3523,7 +3527,7 @@ handle_write_image: > GBE_ASSERT(0); // never been here. > } > > - ctx.TYPED_WRITE(surfaceID, srcTuple, srcType, ir::TYPE_U32); > + ctx.TYPED_WRITE(imageID, srcTuple, srcType, ir::TYPE_U32); > break; > } > case GEN_OCL_MUL_HI_INT: > diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx > b/backend/src/llvm/llvm_gen_ocl_function.hxx > index 7434c78..8d55c3f 100644 > --- a/backend/src/llvm/llvm_gen_ocl_function.hxx > +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx > @@ -49,35 +49,35 @@ DECL_LLVM_GEN_FUNCTION(FORCE_SIMD16, > __gen_ocl_force_simd16) > DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_1D, > _Z21__gen_ocl_read_imageijtfj) > DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_1D, > _Z22__gen_ocl_read_imageuijtfj) > DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_1D, > _Z21__gen_ocl_read_imagefjtfj) > -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D, > _Z21__gen_ocl_read_imageijtffj) > -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D, > _Z22__gen_ocl_read_imageuijtffj) > -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D, > _Z21__gen_ocl_read_imagefjtffj) > -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D, > _Z21__gen_ocl_read_imageijtfffj) > -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D, > _Z22__gen_ocl_read_imageuijtfffj) > -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D, > _Z21__gen_ocl_read_imagefjtfffj) > +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D, > _Z21__gen_ocl_read_imageijtDv2_fj) > +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D, > _Z22__gen_ocl_read_imageuijtDv2_fj) > +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D, > _Z21__gen_ocl_read_imagefjtDv2_fj) > +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D, > _Z21__gen_ocl_read_imageijtDv4_fj) > +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D, > _Z22__gen_ocl_read_imageuijtDv4_fj) > +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D, > _Z21__gen_ocl_read_imagefjtDv4_fj) > // work around read image with the LD message. The coords are integer > type. > DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_1D_I, > _Z21__gen_ocl_read_imageijtij) > DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_1D_I, > _Z22__gen_ocl_read_imageuijtij) > DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_1D_I, > _Z21__gen_ocl_read_imagefjtij) > -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D_I, > _Z21__gen_ocl_read_imageijtiij) > -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D_I, > _Z22__gen_ocl_read_imageuijtiij) > -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D_I, > _Z21__gen_ocl_read_imagefjtiij) > -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D_I, > _Z21__gen_ocl_read_imageijtiiij) > -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D_I, > _Z22__gen_ocl_read_imageuijtiiij) > -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D_I, > _Z21__gen_ocl_read_imagefjtiiij) > +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D_I, > _Z21__gen_ocl_read_imageijtDv2_ij) > +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D_I, > _Z22__gen_ocl_read_imageuijtDv2_ij) > +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D_I, > _Z21__gen_ocl_read_imagefjtDv2_ij) > +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D_I, > _Z21__gen_ocl_read_imageijtDv4_ij) > +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D_I, > _Z22__gen_ocl_read_imageuijtDv4_ij) > +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D_I, > _Z21__gen_ocl_read_imagefjtDv4_ij) > > // To write_image functions. > DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_1D, > _Z22__gen_ocl_write_imageijiDv4_i) > DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_1D, > _Z23__gen_ocl_write_imageuijiDv4_j) > DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_1D, > _Z22__gen_ocl_write_imagefjiDv4_f) > > -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_2D, > _Z22__gen_ocl_write_imageijiiDv4_i) > -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_2D, > _Z23__gen_ocl_write_imageuijiiDv4_j) > -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_2D, > _Z22__gen_ocl_write_imagefjiiDv4_f) > +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_2D, > _Z22__gen_ocl_write_imageijDv2_iDv4_i) > +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_2D, > _Z23__gen_ocl_write_imageuijDv2_iDv4_j) > +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_2D, > _Z22__gen_ocl_write_imagefjDv2_iDv4_f) > > -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_3D, > _Z22__gen_ocl_write_imageijiiiDv4_i) > -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_3D, > _Z23__gen_ocl_write_imageuijiiiDv4_j) > -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_3D, > _Z22__gen_ocl_write_imagefjiiiDv4_f) > +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_3D, > _Z22__gen_ocl_write_imageijDv4_iS_) > +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_3D, > _Z23__gen_ocl_write_imageuijDv4_iDv4_j) > +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_3D, > _Z22__gen_ocl_write_imagefjDv4_iDv4_f) > > // To get image info function > DECL_LLVM_GEN_FUNCTION(GET_IMAGE_WIDTH, > __gen_ocl_get_image_width) > diff --git a/backend/src/llvm/llvm_scalarize.cpp > b/backend/src/llvm/llvm_scalarize.cpp > index 5450a2b..baf526b 100644 > --- a/backend/src/llvm/llvm_scalarize.cpp > +++ b/backend/src/llvm/llvm_scalarize.cpp > @@ -648,7 +648,7 @@ namespace gbe { > > // Get the function arguments > CallSite CS(call); > - CallSite::arg_iterator CI = CS.arg_begin() + 2; > + CallSite::arg_iterator CI = CS.arg_begin() + 1; > > switch (it->second) { > default: break; > @@ -661,8 +661,7 @@ namespace gbe { > case GEN_OCL_READ_IMAGE_I_3D: > case GEN_OCL_READ_IMAGE_UI_3D: > case GEN_OCL_READ_IMAGE_F_3D: > - > - case GEN_OCL_READ_IMAGE_I_1D_I: > + case GEN_OCL_READ_IMAGE_I_1D_I: > case GEN_OCL_READ_IMAGE_UI_1D_I: > case GEN_OCL_READ_IMAGE_F_1D_I: > case GEN_OCL_READ_IMAGE_I_2D_I: > @@ -674,6 +673,9 @@ namespace gbe { > case GEN_OCL_GET_IMAGE_WIDTH: > case GEN_OCL_GET_IMAGE_HEIGHT: > { > + ++CI; > + if ((*CI)->getType()->isVectorTy()) > + *CI = InsertToVector(call, *CI); > setAppendPoint(call); > extractFromVector(call); > break; > @@ -681,15 +683,16 @@ namespace gbe { > case GEN_OCL_WRITE_IMAGE_I_3D: > case GEN_OCL_WRITE_IMAGE_UI_3D: > case GEN_OCL_WRITE_IMAGE_F_3D: > - CI++; > case GEN_OCL_WRITE_IMAGE_I_2D: > case GEN_OCL_WRITE_IMAGE_UI_2D: > case GEN_OCL_WRITE_IMAGE_F_2D: > - CI++; > case GEN_OCL_WRITE_IMAGE_I_1D: > case GEN_OCL_WRITE_IMAGE_UI_1D: > case GEN_OCL_WRITE_IMAGE_F_1D: > { > + if ((*CI)->getType()->isVectorTy()) > + *CI = InsertToVector(call, *CI); > + ++CI; > *CI = InsertToVector(call, *CI); > break; > } > -- > 1.8.3.2
_______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
