[Mesa-dev] [PATCH] fbo-depth-array:Check completness with a color texture

2014-10-08 Thread Vincent Lejeune
---
 tests/all.py|  2 +-
 tests/fbo/fbo-depth-array.c | 36 ++--
 2 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/tests/all.py b/tests/all.py
index 586cead..9aa600f 100644
--- a/tests/all.py
+++ b/tests/all.py
@@ -2831,7 +2831,7 @@ add_shader_test_dir(ext_texture_array,
 add_msaa_visual_plain_tests(ext_texture_array, 'copyteximage 1D_ARRAY')
 add_msaa_visual_plain_tests(ext_texture_array, 'copyteximage 2D_ARRAY')
 add_plain_test(ext_texture_array, 'fbo-array')
-for test in ('depth-clear', 'depth-layered-clear', 'depth-draw', 
'fs-writes-depth',
+for test in ('depth-clear', 'depth-layered-clear', 
'depth-stencil-color-clear', 'depth-draw', 'fs-writes-depth',
  'stencil-clear', 'stencil-layered-clear', 'stencil-draw', 
'fs-writes-stencil'):
 add_concurrent_test(ext_texture_array, 'fbo-depth-array ' + test)
 add_plain_test(ext_texture_array, 'array-texture')
diff --git a/tests/fbo/fbo-depth-array.c b/tests/fbo/fbo-depth-array.c
index 84370e4..dde807d 100644
--- a/tests/fbo/fbo-depth-array.c
+++ b/tests/fbo/fbo-depth-array.c
@@ -46,6 +46,7 @@
 enum {
CLEAR,
LAYERED_CLEAR,
+   LAYERED_DEPTH_STENCIL_COLOR_CLEAR,
DRAW,
FS_WRITES_VALUE,
 };
@@ -135,11 +136,13 @@ static GLuint program_stencil_output;
 static GLuint program_texdepth;
 static GLuint program_texstencil;
 
+static GLuint color_texture;
+
 
 static float
 get_depth_value(unsigned layer)
 {
-   if (test == LAYERED_CLEAR)
+   if (test == LAYERED_CLEAR || LAYERED_DEPTH_STENCIL_COLOR_CLEAR)
return 0.4; /* constant */
else
return (double)(layer+1) / (layers+1);
@@ -181,6 +184,10 @@ parse_args(int argc, char **argv)
test = LAYERED_CLEAR;
puts(Testing layered glClear);
}
+   else if (!strcmp(argv[i], depth-stencil-color-layered-clear)) 
{
+   test = LAYERED_DEPTH_STENCIL_COLOR_CLEAR;
+   puts(Testing depth stencil color layered glClear);
+   }
else if (!strcmp(argv[i], depth-draw)) {
test = DRAW;
puts(Testing drawing);
@@ -224,6 +231,14 @@ create_array_fbo(void)
int layer;
 
glGenTextures(1, tex);
+   glGenTextures(1, color_texture);
+
+   glBindTexture(GL_TEXTURE_2D_ARRAY, color_texture);
+   glTexImage3D(GL_TEXTURE_2D_ARRAY, 0, GL_RGBA,
+width, height, layers, 0,
+GL_RGBA, GL_UNSIGNED_INT, NULL);
+   assert(glGetError() == 0);
+
glBindTexture(GL_TEXTURE_2D_ARRAY, tex);
assert(glGetError() == 0);
 
@@ -241,11 +256,19 @@ create_array_fbo(void)
 
/* draw something into each layer of the array texture */
for (layer = 0; layer  layers; layer++) {
-   if (test == LAYERED_CLEAR) {
-   glFramebufferTexture(GL_FRAMEBUFFER,
-test_stencil ? 
GL_STENCIL_ATTACHMENT :
-   GL_DEPTH_ATTACHMENT,
-tex, 0);
+   if (test == LAYERED_CLEAR || test == 
LAYERED_DEPTH_STENCIL_COLOR_CLEAR) {
+   if (test == LAYERED_DEPTH_STENCIL_COLOR_CLEAR) {
+   glFramebufferTexture(GL_FRAMEBUFFER,
+GL_COLOR_ATTACHMENT0_EXT,
+color_texture, 0);
+   glFramebufferTexture(GL_FRAMEBUFFER,
+   
GL_DEPTH_STENCIL_ATTACHMENT,
+tex, 0);
+   } else
+   glFramebufferTexture(GL_FRAMEBUFFER,
+test_stencil ? 
GL_STENCIL_ATTACHMENT :
+   
GL_DEPTH_ATTACHMENT,
+tex, 0);
 
status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
if (status != GL_FRAMEBUFFER_COMPLETE) {
@@ -433,6 +456,7 @@ test_once(void)
}
 
glDeleteTextures(1, tex);
+   glDeleteTextures(1, color_texture);
assert(glGetError() == 0);
return pass;
 }
-- 
1.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] Copy Layered field in reuse_framebuffer_texture_attachment

2014-09-11 Thread Vincent Lejeune
Fix #83596
https://bugs.freedesktop.org/show_bug.cgi?id=83596
---
 src/mesa/main/fbobject.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index ae3a418..5eaf1a3 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -2299,6 +2299,7 @@ reuse_framebuffer_texture_attachment(struct 
gl_framebuffer *fb,
dst_att-Complete = src_att-Complete;
dst_att-TextureLevel = src_att-TextureLevel;
dst_att-Zoffset = src_att-Zoffset;
+   dst_att-Layered = src_att-Layered;
 }
 
 
-- 
1.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] PATCHES: R600: Implement work-around for CF stack HW bug

2013-12-20 Thread Vincent Lejeune
Some cosmetic comments below, otherwise the patches are:
reviewed-by: Vincent Lejeune vljn at ovi.com

-OutStreamer.EmitRawText(
-  Twine(; Kernel info:\n) +
-  ; NumSgprs:  + Twine(KernelInfo.NumSGPR) + \n +
-  ; NumVgprs:  + Twine(KernelInfo.NumVGPR) + \n);
+if (STM.getGeneration()  AMDGPUSubtarget::NORTHERN_ISLANDS) {
+
I think it would look cleaner without empty newline here
+  OutStreamer.EmitRawText(
+Twine(; Kernel info:\n) +
+; NumSgprs:  + Twine(KernelInfo.NumSGPR) + \n +
+; NumVgprs:  + Twine(KernelInfo.NumVGPR) + \n);
+} else {

+void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
+  CFStack::StackItem Item = CFStack::ENTRY;
+  switch(Opcode) {
+  case AMDGPU::CF_PUSH_EG:
+  case AMDGPU::CF_ALU_PUSH_BEFORE:
+if (!isWQM) {
+  if (!ST.hasCaymanISA()  
!branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
+Item = CFStack::FIRST_NON_WQM_PUSH;  // May not be required on 
Evergreen/NI
+ // See comment in
+ // CFStack::getSubEntrySize()
+  else if (CurrentEntries  0 
+   ST.getGeneration()  AMDGPUSubtarget::EVERGREEN 
+   !ST.hasCaymanISA() 
+   !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
+Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
+  else
+Item = CFStack::SUB_ENTRY;
+} else {
+  Item = CFStack::ENTRY;
It's a single line statement, I think it should be without brace.
+}
+break;
 case AMDGPU::CF_ALU_PUSH_BEFORE:
-  CurrentStack++;
-  MaxStack = std::max(MaxStack, CurrentStack);
-  HasPush = true;
-  if (ST.hasCaymanISA()  CurrentLoopDepth  1) {
+  if (ST.hasCaymanISA()  CFStack.getLoopDepth()  1) {
 BuildMI(MBB, MI, MBB.findDebugLoc(MI), 
 TII-get(AMDGPU::CF_PUSH_EG))
 .addImm(CfCount + 1)
 .addImm(1);
 MI-setDesc(TII-get(AMDGPU::CF_ALU));
 CfCount++;
+CFStack.pushBranch(AMDGPU::CF_PUSH_EG);
+  } else {
+CFStack.pushBranch(AMDGPU::CF_ALU_PUSH_BEFORE);
Here too
   }

+bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
+  if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE  ST.hasCaymanISA() 
+  getLoopDepth()  1) {
+return true;
And here too
+  }

Thank for this patch set, stack bugs are really not easy to spot and fix.
Vincent

 Le Mercredi 11 décembre 2013 19h07, Tom Stellard t...@stellard.net a écrit :
  Hi,
 
 The attached patches implement a work-around for the CF stack HW bug
 that is present on some Evergreen and NI GPUs.
 
 Please Review.
 
 -Tom
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] r600/llvm: Allow arbitrary amount of temps in tgsi to llvm

2013-12-06 Thread Vincent Lejeune
---
 src/gallium/drivers/radeon/radeon_llvm.h   |  6 +++
 .../drivers/radeon/radeon_setup_tgsi_llvm.c| 43 --
 2 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_llvm.h 
b/src/gallium/drivers/radeon/radeon_llvm.h
index 2cab6b0..00714fb 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -112,6 +112,12 @@ struct radeon_llvm_context {
LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS][TGSI_NUM_CHANNELS];
unsigned output_reg_count;
 
+   /** This pointer is used to contain the temporary values.
+ * The amount of temporary used in tgsi can't be bound to a max value 
and
+ * thus we must allocate this array at runtime.
+ */
+   LLVMValueRef *temps;
+   unsigned temps_count;
LLVMValueRef system_values[RADEON_LLVM_MAX_SYSTEM_VALUES];
 
/*=== Private Members ===*/
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 3bb01ec..4c30de4 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -142,6 +142,13 @@ emit_array_fetch(
return result;
 }
 
+static bool uses_temp_indirect_addressing(
+   struct lp_build_tgsi_context *bld_base)
+{
+   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+   return (bld-indirect_files  (1  TGSI_FILE_TEMPORARY));
+}
+
 static LLVMValueRef
 emit_fetch(
struct lp_build_tgsi_context *bld_base,
@@ -184,7 +191,11 @@ emit_fetch(
break;
 
case TGSI_FILE_TEMPORARY:
-   ptr = lp_get_temp_ptr_soa(bld, reg-Register.Index, swizzle);
+   if (uses_temp_indirect_addressing(bld_base)) {
+   ptr = lp_get_temp_ptr_soa(bld, reg-Register.Index, 
swizzle);
+   break;
+   }
+   ptr = ctx-temps[reg-Register.Index * TGSI_NUM_CHANNELS + 
swizzle];
result = LLVMBuildLoad(builder, ptr, );
break;
 
@@ -216,6 +227,7 @@ static void emit_declaration(
const struct tgsi_full_declaration *decl)
 {
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+   unsigned first, last, i, idx;
switch(decl-Declaration.File) {
case TGSI_FILE_ADDRESS:
{
@@ -234,7 +246,23 @@ static void emit_declaration(
case TGSI_FILE_TEMPORARY:
if (decl-Declaration.Array  decl-Array.ArrayID = 
RADEON_LLVM_MAX_ARRAYS)
ctx-arrays[decl-Array.ArrayID - 1] = decl-Range;
-   lp_emit_declaration_soa(bld_base, decl);
+   if (uses_temp_indirect_addressing(bld_base)) {
+   lp_emit_declaration_soa(bld_base, decl);
+   break;
+   }
+   first = decl-Range.First;
+   last = decl-Range.Last;
+   if (!ctx-temps_count) {
+   ctx-temps_count = 
bld_base-info-file_max[TGSI_FILE_TEMPORARY] + 1;
+   ctx-temps = MALLOC(TGSI_NUM_CHANNELS * 
ctx-temps_count * sizeof(LLVMValueRef));
+   }
+   for (idx = first; idx = last; idx++) {
+   for (i = 0; i  TGSI_NUM_CHANNELS; i++) {
+   ctx-temps[idx * TGSI_NUM_CHANNELS + i] =
+   lp_build_alloca(bld_base-base.gallivm, 
bld_base-base.vec_type,
+   temp);
+   }
+   }
break;
 
case TGSI_FILE_INPUT:
@@ -284,6 +312,7 @@ emit_store(
const struct tgsi_opcode_info * info,
LLVMValueRef dst[4])
 {
+   struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
struct gallivm_state *gallivm = bld-bld_base.base.gallivm;
struct lp_build_context base = bld-bld_base.base;
@@ -359,7 +388,10 @@ emit_store(
break;
 
case TGSI_FILE_TEMPORARY:
-   temp_ptr = lp_get_temp_ptr_soa(bld, i + 
range.First, chan_index);
+   if 
(uses_temp_indirect_addressing(bld_base))
+   temp_ptr = 
lp_get_temp_ptr_soa(bld, i + range.First, chan_index);
+   else
+   temp_ptr = ctx-temps[(i + 
range.First) * TGSI_NUM_CHANNELS + chan_index];
break;
 
default:
@@ -377,7 +409,9 @@ emit_store(
break;
 
case TGSI_FILE_TEMPORARY:
-   temp_ptr = lp_get_temp_ptr_soa(bld, 

[Mesa-dev] [PATCH] r600/llvm: Allow arbitrary amount of temps in tgsi to llvm

2013-12-04 Thread Vincent Lejeune
---
 src/gallium/drivers/radeon/radeon_llvm.h   |  5 +++
 .../drivers/radeon/radeon_setup_tgsi_llvm.c| 41 +++---
 2 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_llvm.h 
b/src/gallium/drivers/radeon/radeon_llvm.h
index 2cab6b0..6d84f44 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -112,6 +112,11 @@ struct radeon_llvm_context {
LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS][TGSI_NUM_CHANNELS];
unsigned output_reg_count;
 
+   /**
+* @brief system_values
+*/
+   LLVMValueRef *temps;
+   unsigned temps_count;
LLVMValueRef system_values[RADEON_LLVM_MAX_SYSTEM_VALUES];
 
/*=== Private Members ===*/
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 3bb01ec..c897b03 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -184,7 +184,11 @@ emit_fetch(
break;
 
case TGSI_FILE_TEMPORARY:
-   ptr = lp_get_temp_ptr_soa(bld, reg-Register.Index, swizzle);
+   if (false) {
+   ptr = lp_get_temp_ptr_soa(bld, reg-Register.Index, 
swizzle);
+   break;
+   }
+   ptr = ctx-temps[reg-Register.Index * TGSI_NUM_CHANNELS + 
swizzle];
result = LLVMBuildLoad(builder, ptr, );
break;
 
@@ -200,6 +204,13 @@ emit_fetch(
return bitcast(bld_base, type, result);
 }
 
+static bool uses_temp_indirect_addressing(
+   struct lp_build_tgsi_context *bld_base)
+{
+   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+   return !(bld-indirect_files  (1  TGSI_FILE_TEMPORARY));
+}
+
 static LLVMValueRef fetch_system_value(
struct lp_build_tgsi_context * bld_base,
const struct tgsi_full_src_register *reg,
@@ -234,7 +245,22 @@ static void emit_declaration(
case TGSI_FILE_TEMPORARY:
if (decl-Declaration.Array  decl-Array.ArrayID = 
RADEON_LLVM_MAX_ARRAYS)
ctx-arrays[decl-Array.ArrayID - 1] = decl-Range;
-   lp_emit_declaration_soa(bld_base, decl);
+   if (uses_temp_indirect_addressing(bld_base)) {
+   lp_emit_declaration_soa(bld_base, decl);
+   break;
+   }
+   unsigned first = decl-Range.First, last = decl-Range.Last;
+   if (!ctx-temps_count) {
+   ctx-temps_count = 
bld_base-info-file_max[TGSI_FILE_TEMPORARY] + 1;
+   ctx-temps = MALLOC(TGSI_NUM_CHANNELS * 
ctx-temps_count * sizeof(LLVMValueRef));
+   }
+   for (unsigned idx = first; idx = last; idx++) {
+   for (unsigned i = 0; i  TGSI_NUM_CHANNELS; i++) {
+   ctx-temps[idx * TGSI_NUM_CHANNELS + i] =
+   lp_build_alloca(bld_base-base.gallivm, 
bld_base-base.vec_type,
+   temp);
+   }
+   }
break;
 
case TGSI_FILE_INPUT:
@@ -284,6 +310,7 @@ emit_store(
const struct tgsi_opcode_info * info,
LLVMValueRef dst[4])
 {
+   struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
struct gallivm_state *gallivm = bld-bld_base.base.gallivm;
struct lp_build_context base = bld-bld_base.base;
@@ -359,7 +386,10 @@ emit_store(
break;
 
case TGSI_FILE_TEMPORARY:
-   temp_ptr = lp_get_temp_ptr_soa(bld, i + 
range.First, chan_index);
+   if 
(uses_temp_indirect_addressing(bld_base))
+   temp_ptr = 
lp_get_temp_ptr_soa(bld, i + range.First, chan_index);
+   else
+   temp_ptr = ctx-temps[(i + 
range.First) * TGSI_NUM_CHANNELS + chan_index];
break;
 
default:
@@ -377,7 +407,9 @@ emit_store(
break;
 
case TGSI_FILE_TEMPORARY:
-   temp_ptr = lp_get_temp_ptr_soa(bld, 
reg-Register.Index, chan_index);
+   if (uses_temp_indirect_addressing(bld_base))
+   break;
+   temp_ptr = ctx-temps[ TGSI_NUM_CHANNELS * 
reg-Register.Index + chan_index];
break;
 
default:
@@ -1392,4 +1424,5 @@ void radeon_llvm_dispose(struct 

Re: [Mesa-dev] [PATCH] R600: Make sure OQAP defs and uses happen in the same clause

2013-11-14 Thread Vincent Lejeune
This patch is : reviewed-by: Vincent Lejeunevljn at ovi.com



- Mail original -
 De : Tom Stellard t...@stellard.net
 À : Vincent Lejeune v...@ovi.com
 Cc : mesa-dev@lists.freedesktop.org mesa-dev@lists.freedesktop.org; 
 llvm-comm...@cs.uiuc.edu llvm-comm...@cs.uiuc.edu; Tom Stellard 
 thomas.stell...@amd.com
 Envoyé le : Jeudi 14 novembre 2013 1h53
 Objet : Re: [PATCH] R600: Make sure OQAP defs and uses happen in the same 
 clause
 
 Hi Vincent,
 
 I discovered a bug in the previous patch.  Here is an updated versions.
 
 -Tom
 
 On Tue, Nov 12, 2013 at 03:01:42PM -0800, Tom Stellard wrote:
  Hi Vincent,
 
  Here is an updated patch where I added a call to
  SubstituteKCacheBank() in canClauseLocalKillFitInClause()  This should
  prevent OQAP uses and defs from being split because of constant bank
  limitations.
 
  Maybe we can leave the ScheduleDAGMutation optimization as a future
  TODO.
 
  -Tom
 
  On Sun, Nov 03, 2013 at 10:19:16AM -0800, Vincent Lejeune wrote:
   I have put some comments below but otherwise the patch is
   reviewed-by: Vincent Lejeune vljn at ovi.com
   
   
   -- next part --
   From 2eb4673e3184af0e077cbe30a594602441e8d98e Mon Sep 17 
 00:00:00 2001 From: Tom Stellard thomas.stellard at amd.com
   Date: Thu, 5 Sep 2013 08:59:32 -0700
   Subject: [PATCH] R600: Fix scheduling of instructions that use the 
 LDS output
queue
   
   The LDS output queue is accessed via the OQAP register.  The OQAP
   register cannot be live across clauses, so if value is written to 
 the
   output queue, it must be retrieved before the end of the clause.
   With the machine scheduler, we cannot statisfy this constraint, 
 because
   it lacks proper alias analysis and it will mark some LDS accesses 
 as
   having a chain dependency on vertex fetches.  Since vertex fetches
   
   We can customize the dependency graph before machine scheduling takes 
 place,
   using ScheduleDAGMutation.
   I already wrote some code to break artificial dependencies between 
 vector
   subregister read/write here :
   
 http://cgit.freedesktop.org/~vlj/llvm/commit/?h=vliw5id=e91b16a22845d0a80ed348f158ae7ab293e003a8
   While I'm expecting from Matthias Braun's Subregister patches 
 to be upstreamed
   to obsolete most of this patch except tests, it can be reworked so 
 that
   it'll parse all MEM dependency, and remove the ones between 
 instructions
   touching different memory pool (like VTX_FETCH and LDS_READ).
   
   require a new clauses, the dependency may end up spiltting OQAP 
 uses and
   defs so the end up in different clauses.  See the 
 lds-output-queue.ll
   test for a more detailed explanation.
   
   To work around this issue, we now combine the LDS read and the 
 OQAP
   copy into one instruction and expand it after register allocation.
   
   This patch also adds some checks to the EmitClauseMarker pass, so 
 that
   it doesn't end a clause with a value still in the output queue 
 and
   removes AR.X and OQAP handling from the scheduler (AR.X uses and 
 defs
   were already being expanded post-RA, so the scheduler will never 
 see
   them).
   ---
lib/Target/R600/R600EmitClauseMarkers.cpp     | 52 ++
lib/Target/R600/R600ExpandSpecialInstrs.cpp   | 17 +
lib/Target/R600/R600ISelLowering.cpp          | 20 +++---
lib/Target/R600/R600InstrInfo.cpp             |  8 +++
lib/Target/R600/R600InstrInfo.h               |  2 +
lib/Target/R600/R600MachineScheduler.cpp      | 32 -
lib/Target/R600/R600MachineScheduler.h        |  2 -
lib/Target/R600/R600RegisterInfo.cpp          | 13 
lib/Target/R600/R600RegisterInfo.h            |  2 +
test/CodeGen/R600/lds-output-queue.ll         | 99 
 +++
test/CodeGen/R600/local-memory-two-objects.ll |  8 ++-
11 files changed, 206 insertions(+), 49 deletions(-)
create mode 100644 test/CodeGen/R600/lds-output-queue.ll
   
   diff --git a/lib/Target/R600/R600EmitClauseMarkers.cpp 
 b/lib/Target/R600/R600EmitClauseMarkers.cpp
   
   
   +  bool canClauseLocalKillFitInClause(
   +                             unsigned AluInstCount,
   +                             MachineBasicBlock::iterator Def,
   +                             MachineBasicBlock::iterator BBEnd) 
 {
   +    const R600RegisterInfo TRI = TII-getRegisterInfo();
   +    for (MachineInstr::const_mop_iterator
   +           MOI = Def-operands_begin(),
   +           MOE = Def-operands_end(); MOI != MOE; ++MOI) 
 {
   +      if (!MOI-isReg() || !MOI-isDef() ||
   +          TRI.isPhysRegLiveAcrossClauses(MOI-getReg()))
   +        continue;
   +
   +      // Def defines a clause local register, so check that its 
 use will fit
   +      // in the clause.
   +      unsigned LastUseCount = 0;
   +      for (MachineBasicBlock::iterator UseI = Def; UseI != BBEnd; 
 ++UseI) {
   +        AluInstCount += OccupiedDwords(UseI);
   +        // We have reached the maximum instruction limit before

[Mesa-dev] [PATCH] r600/llvm: Store inputs in function arguments

2013-11-11 Thread Vincent Lejeune
---
 src/gallium/drivers/r600/r600_llvm.c | 119 +++
 src/gallium/drivers/r600/r600_shader.c   |   1 +
 src/gallium/drivers/radeon/radeon_llvm.h |   1 +
 3 files changed, 121 insertions(+)

diff --git a/src/gallium/drivers/r600/r600_llvm.c 
b/src/gallium/drivers/r600/r600_llvm.c
index 5afe3cb..a2ff0ec 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -77,6 +77,11 @@ static void llvm_load_system_value(
default: assert(!unknown system value);
}
 
+#if HAVE_LLVM = 0x0304
+   ctx-system_values[index] = 
LLVMBuildExtractElement(ctx-gallivm.builder,
+   LLVMGetParam(ctx-main_fn, 0), 
lp_build_const_int32((ctx-gallivm), chan),
+   );
+#else
LLVMValueRef reg = lp_build_const_int32(
ctx-soa.bld_base.base.gallivm, chan);
ctx-system_values[index] = build_intrinsic(
@@ -84,8 +89,49 @@ static void llvm_load_system_value(
llvm.R600.load.input,
ctx-soa.bld_base.base.elem_type, reg, 1,
LLVMReadNoneAttribute);
+#endif
 }
 
+#if HAVE_LLVM = 0x0304
+static LLVMValueRef
+llvm_load_input_vector(
+   struct radeon_llvm_context * ctx, unsigned location, unsigned ijregs,
+   boolean interp)
+{
+   LLVMTypeRef VecType;
+   LLVMValueRef Args[3] = {
+   lp_build_const_int32((ctx-gallivm), location)
+   };
+   unsigned ArgCount = 1;
+   if (interp) {
+   VecType = 
LLVMVectorType(ctx-soa.bld_base.base.elem_type, 2);
+   LLVMValueRef IJIndex = LLVMGetParam(ctx-main_fn, 
ijregs / 2);
+   Args[ArgCount++] = 
LLVMBuildExtractElement(ctx-gallivm.builder, IJIndex,
+   lp_build_const_int32((ctx-gallivm), 2 * 
(ijregs % 2)), );
+   Args[ArgCount++] = 
LLVMBuildExtractElement(ctx-gallivm.builder, IJIndex,
+   lp_build_const_int32((ctx-gallivm), 2 * 
(ijregs % 2) + 1), );
+   LLVMValueRef HalfVec[2] = {
+   build_intrinsic(ctx-gallivm.builder, 
llvm.R600.interp.xy,
+   VecType, Args, ArgCount, 
LLVMReadNoneAttribute),
+   build_intrinsic(ctx-gallivm.builder, 
llvm.R600.interp.zw,
+   VecType, Args, ArgCount, 
LLVMReadNoneAttribute)
+   };
+   LLVMValueRef MaskInputs[4] = {
+   lp_build_const_int32((ctx-gallivm), 0),
+   lp_build_const_int32((ctx-gallivm), 1),
+   lp_build_const_int32((ctx-gallivm), 2),
+   lp_build_const_int32((ctx-gallivm), 3)
+   };
+   LLVMValueRef Mask = LLVMConstVector(MaskInputs, 4);
+   return LLVMBuildShuffleVector(ctx-gallivm.builder, 
HalfVec[0], HalfVec[1],
+   Mask, );
+   } else {
+   VecType = 
LLVMVectorType(ctx-soa.bld_base.base.elem_type, 4);
+   return build_intrinsic(ctx-gallivm.builder, 
llvm.R600.interp.const,
+   VecType, Args, ArgCount, LLVMReadNoneAttribute);
+   }
+}
+#else
 static LLVMValueRef
 llvm_load_input_helper(
struct radeon_llvm_context * ctx,
@@ -110,7 +156,22 @@ llvm_load_input_helper(
return build_intrinsic(bb-gallivm-builder, intrinsic,
bb-elem_type, arg[0], arg_count, LLVMReadNoneAttribute);
 }
+#endif
 
+#if HAVE_LLVM = 0x0304
+static LLVMValueRef
+llvm_face_select_helper(
+   struct radeon_llvm_context * ctx,
+   LLVMValueRef face, LLVMValueRef front_color, LLVMValueRef back_color)
+{
+   const struct lp_build_context * bb = ctx-soa.bld_base.base;
+   LLVMValueRef is_front = LLVMBuildFCmp(
+   bb-gallivm-builder, LLVMRealUGT, face,
+   lp_build_const_float(bb-gallivm, 0.0f),);
+   return LLVMBuildSelect(bb-gallivm-builder, is_front,
+   front_color, back_color, );
+}
+#else
 static LLVMValueRef
 llvm_face_select_helper(
struct radeon_llvm_context * ctx,
@@ -124,6 +185,7 @@ llvm_face_select_helper(
return LLVMBuildSelect(bb-gallivm-builder, is_front,
front_color, back_color, );
 }
+#endif
 
 static void llvm_load_input(
struct radeon_llvm_context * ctx,
@@ -132,11 +194,55 @@ static void llvm_load_input(
 {
const struct r600_shader_io * input = ctx-r600_inputs[input_index];
unsigned chan;
+#if HAVE_LLVM  0x0304
unsigned interp = 0;
int ij_index;
+#endif
int two_side = (ctx-two_side  input-name == TGSI_SEMANTIC_COLOR);
LLVMValueRef v;
+#if HAVE_LLVM = 0x0304
+   boolean 

[Mesa-dev] [PATCH] r600/llvm: Store inputs in function arguments

2013-11-03 Thread Vincent Lejeune
---
 src/gallium/drivers/r600/r600_llvm.c   | 125 -
 src/gallium/drivers/r600/r600_shader.c |   2 +
 src/gallium/drivers/radeon/radeon_llvm.h   |   1 +
 .../drivers/radeon/radeon_setup_tgsi_llvm.c|   2 +-
 4 files changed, 75 insertions(+), 55 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_llvm.c 
b/src/gallium/drivers/r600/r600_llvm.c
index 5afe3cb..8dcda1a 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -87,37 +87,50 @@ static void llvm_load_system_value(
 }
 
 static LLVMValueRef
-llvm_load_input_helper(
-   struct radeon_llvm_context * ctx,
-   unsigned idx, int interp, int ij_index)
+llvm_load_input_vector(
+   struct radeon_llvm_context * ctx, unsigned location, unsigned ijregs,
+   boolean interp)
 {
-   const struct lp_build_context * bb = ctx-soa.bld_base.base;
-   LLVMValueRef arg[2];
-   int arg_count;
-   const char * intrinsic;
-
-   arg[0] = lp_build_const_int32(bb-gallivm, idx);
-
-   if (interp) {
-   intrinsic = llvm.R600.interp.input;
-   arg[1] = lp_build_const_int32(bb-gallivm, ij_index);
-   arg_count = 2;
-   } else {
-   intrinsic = llvm.R600.load.input;
-   arg_count = 1;
-   }
-
-   return build_intrinsic(bb-gallivm-builder, intrinsic,
-   bb-elem_type, arg[0], arg_count, LLVMReadNoneAttribute);
+   LLVMTypeRef VecType;
+   LLVMValueRef Args[2] = {
+   lp_build_const_int32((ctx-gallivm), location)
+   };
+   unsigned ArgCount = 1;
+   if (interp) {
+   VecType = 
LLVMVectorType(ctx-soa.bld_base.base.elem_type, 2);
+   LLVMValueRef IJIndex = LLVMGetParam(ctx-main_fn, 
ijregs / 2);
+   Args[ArgCount++] = 
LLVMBuildExtractElement(ctx-gallivm.builder, IJIndex,
+   lp_build_const_int32((ctx-gallivm), 2 * (ijregs % 
2)), );
+   Args[ArgCount++] = 
LLVMBuildExtractElement(ctx-gallivm.builder, IJIndex,
+   lp_build_const_int32((ctx-gallivm), 2 * (ijregs % 2) 
+ 1), );
+   LLVMValueRef HalfVec[2] = {
+   build_intrinsic(ctx-gallivm.builder, 
llvm.R600.interp.xy,
+   VecType, Args, ArgCount, 
LLVMReadNoneAttribute),
+   build_intrinsic(ctx-gallivm.builder, 
llvm.R600.interp.zw,
+   VecType, Args, ArgCount, 
LLVMReadNoneAttribute)
+   };
+   LLVMValueRef MaskInputs[4] = {
+   lp_build_const_int32((ctx-gallivm), 0),
+   lp_build_const_int32((ctx-gallivm), 1),
+   lp_build_const_int32((ctx-gallivm), 2),
+   lp_build_const_int32((ctx-gallivm), 3)
+   };
+   LLVMValueRef Mask = LLVMConstVector(MaskInputs, 4);
+   return LLVMBuildShuffleVector(ctx-gallivm.builder, 
HalfVec[0], HalfVec[1],
+   Mask, );
+   } else {
+   VecType = 
LLVMVectorType(ctx-soa.bld_base.base.elem_type, 4);
+   return build_intrinsic(ctx-gallivm.builder, 
llvm.R600.interp.const,
+   VecType, Args, ArgCount, LLVMReadNoneAttribute);
+   }
 }
 
 static LLVMValueRef
 llvm_face_select_helper(
struct radeon_llvm_context * ctx,
-   unsigned face_loc, LLVMValueRef front_color, LLVMValueRef back_color)
+   LLVMValueRef face, LLVMValueRef front_color, LLVMValueRef back_color)
 {
const struct lp_build_context * bb = ctx-soa.bld_base.base;
-   LLVMValueRef face = llvm_load_input_helper(ctx, face_loc, 0, 0);
LLVMValueRef is_front = LLVMBuildFCmp(
bb-gallivm-builder, LLVMRealUGT, face,
lp_build_const_float(bb-gallivm, 0.0f),);
@@ -132,50 +145,46 @@ static void llvm_load_input(
 {
const struct r600_shader_io * input = ctx-r600_inputs[input_index];
unsigned chan;
-   unsigned interp = 0;
-   int ij_index;
int two_side = (ctx-two_side  input-name == TGSI_SEMANTIC_COLOR);
LLVMValueRef v;
+   boolean require_interp_intrinsic = ctx-chip_class = EVERGREEN 
+   ctx-type == TGSI_PROCESSOR_FRAGMENT;
 
-   if (ctx-chip_class = EVERGREEN  ctx-type == 
TGSI_PROCESSOR_FRAGMENT 
-   input-spi_sid) {
-   interp = 1;
-   ij_index = (input-interpolate  0) ? input-ij_index : -1;
-   }
+   if (require_interp_intrinsic  input-spi_sid) {
+   v = llvm_load_input_vector(ctx, input-lds_pos, input-ij_index,
+   (input-interpolate  0));
+   

[Mesa-dev] [PATCH] R600: Use function inputs to represent data stored in gpr

2013-11-03 Thread Vincent Lejeune
---
 lib/Target/R600/AMDGPUCallingConv.td| 15 -
 lib/Target/R600/R600ISelLowering.cpp| 55 ++--
 lib/Target/R600/R600Instructions.td |  2 +-
 lib/Target/R600/R600Intrinsics.td   |  8 ++-
 test/CodeGen/R600/big_alu.ll| 85 -
 test/CodeGen/R600/complex-folding.ll|  9 +--
 test/CodeGen/R600/floor.ll  | 14 ++--
 test/CodeGen/R600/fmad.ll   | 20 +++---
 test/CodeGen/R600/fmax.ll   | 13 ++--
 test/CodeGen/R600/fmin.ll   | 13 ++--
 test/CodeGen/R600/llvm.AMDGPU.mul.ll| 16 ++---
 test/CodeGen/R600/llvm.cos.ll   | 12 ++--
 test/CodeGen/R600/llvm.pow.ll   | 16 ++---
 test/CodeGen/R600/llvm.sin.ll   | 12 ++--
 test/CodeGen/R600/load-input-fold.ll| 29 -
 test/CodeGen/R600/max-literals.ll   | 25 
 test/CodeGen/R600/pv-packing.ll | 25 +++-
 test/CodeGen/R600/pv.ll | 61 +-
 test/CodeGen/R600/r600-encoding.ll  | 15 +++--
 test/CodeGen/R600/r600-export-fix.ll| 14 ++--
 test/CodeGen/R600/r600cfg.ll| 14 ++--
 test/CodeGen/R600/reciprocal.ll | 13 ++--
 test/CodeGen/R600/rv7x0_count3.ll   | 19 +++---
 test/CodeGen/R600/schedule-fs-loop-nested-if.ll | 13 ++--
 test/CodeGen/R600/schedule-vs-if-nested-loop.ll | 14 ++--
 test/CodeGen/R600/shared-op-cycle.ll| 16 ++---
 test/CodeGen/R600/swizzle-export.ll | 32 --
 test/CodeGen/R600/tex-clause-antidep.ll | 13 ++--
 test/CodeGen/R600/texture-input-merge.ll| 13 ++--
 29 files changed, 285 insertions(+), 321 deletions(-)

diff --git a/lib/Target/R600/AMDGPUCallingConv.td 
b/lib/Target/R600/AMDGPUCallingConv.td
index a194e6d..bb7d6f8 100644
--- a/lib/Target/R600/AMDGPUCallingConv.td
+++ b/lib/Target/R600/AMDGPUCallingConv.td
@@ -42,6 +42,17 @@ def CC_SI : CallingConv[
 
 ];
 
+// Calling convention for R600
+def CC_R600 : CallingConv[
+  CCIfInRegCCIfType[v4f32, v4i32] , CCAssignToReg[
+T0_XYZW, T1_XYZW, T2_XYZW, T3_XYZW, T4_XYZW, T5_XYZW, T6_XYZW, T7_XYZW,
+T8_XYZW, T9_XYZW, T10_XYZW, T11_XYZW, T12_XYZW, T13_XYZW, T14_XYZW, 
T15_XYZW,
+T16_XYZW, T17_XYZW, T18_XYZW, T19_XYZW, T20_XYZW, T21_XYZW, T22_XYZW,
+T23_XYZW, T24_XYZW, T25_XYZW, T26_XYZW, T27_XYZW, T28_XYZW, T29_XYZW,
+T30_XYZW, T31_XYZW, T32_XYZW
+  ]
+];
+
 // Calling convention for compute kernels
 def CC_AMDGPU_Kernel : CallingConv[
   CCIfType[v4i32, v4f32],   CCAssignToStack 16, 16,
@@ -61,5 +72,7 @@ def CC_AMDGPU : CallingConv[
State.getMachineFunction().getInfoR600MachineFunctionInfo()-
ShaderType == ShaderType::COMPUTE, CCDelegateToCC_AMDGPU_Kernel,
   CCIfState.getTarget().getSubtargetAMDGPUSubtarget()#
-   .getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS, 
CCDelegateToCC_SI
+   .getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS, 
CCDelegateToCC_SI,
+  CCIfState.getTarget().getSubtargetAMDGPUSubtarget()#
+   .getGeneration()  AMDGPUSubtarget::SOUTHERN_ISLANDS, 
CCDelegateToCC_R600
 ];
diff --git a/lib/Target/R600/R600ISelLowering.cpp 
b/lib/Target/R600/R600ISelLowering.cpp
index 3c2e388..deab985 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -554,51 +554,23 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, 
SelectionDAG DAG) const
 SDLoc DL(Op);
 switch(IntrinsicID) {
 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
-case AMDGPUIntrinsic::R600_load_input: {
-  int64_t RegIndex = 
castConstantSDNode(Op.getOperand(1))-getZExtValue();
-  unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
-  MachineFunction MF = DAG.getMachineFunction();
-  MachineRegisterInfo MRI = MF.getRegInfo();
-  MRI.addLiveIn(Reg);
-  return DAG.getCopyFromReg(DAG.getEntryNode(),
-  SDLoc(DAG.getEntryNode()), Reg, VT);
-}
-
-case AMDGPUIntrinsic::R600_interp_input: {
+case AMDGPUIntrinsic::R600_interp_xy:
+case AMDGPUIntrinsic::R600_interp_zw: {
   int slot = castConstantSDNode(Op.getOperand(1))-getZExtValue();
-  int ijb = castConstantSDNode(Op.getOperand(2))-getSExtValue();
   MachineSDNode *interp;
-  if (ijb  0) {
-const MachineFunction MF = DAG.getMachineFunction();
-const R600InstrInfo *TII =
-  static_castconst R600InstrInfo*(MF.getTarget().getInstrInfo());
-interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
-MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
-return DAG.getTargetExtractSubreg(
-TII-getRegisterInfo().getSubRegFromChannel(slot % 4),
-DL, MVT::f32, SDValue(interp, 0));
-  }
+  SDValue RegisterINode = Op.getOperand(2);
+  SDValue 

Re: [Mesa-dev] [PATCH] R600: Make sure OQAP defs and uses happen in the same clause

2013-11-03 Thread Vincent Lejeune
I have put some comments below but otherwise the patch is
reviewed-by: Vincent Lejeune vljn at ovi.com


-- next part --
From 2eb4673e3184af0e077cbe30a594602441e8d98e Mon Sep 17 00:00:00 2001 From: 
Tom Stellard thomas.stellard at amd.com
Date: Thu, 5 Sep 2013 08:59:32 -0700
Subject: [PATCH] R600: Fix scheduling of instructions that use the LDS output
 queue

The LDS output queue is accessed via the OQAP register.  The OQAP
register cannot be live across clauses, so if value is written to the
output queue, it must be retrieved before the end of the clause.
With the machine scheduler, we cannot statisfy this constraint, because
it lacks proper alias analysis and it will mark some LDS accesses as
having a chain dependency on vertex fetches.  Since vertex fetches

We can customize the dependency graph before machine scheduling takes place,
using ScheduleDAGMutation.
I already wrote some code to break artificial dependencies between vector
subregister read/write here :
http://cgit.freedesktop.org/~vlj/llvm/commit/?h=vliw5id=e91b16a22845d0a80ed348f158ae7ab293e003a8
While I'm expecting from Matthias Braun's Subregister patches to be upstreamed
to obsolete most of this patch except tests, it can be reworked so that
it'll parse all MEM dependency, and remove the ones between instructions
touching different memory pool (like VTX_FETCH and LDS_READ).

require a new clauses, the dependency may end up spiltting OQAP uses and
defs so the end up in different clauses.  See the lds-output-queue.ll
test for a more detailed explanation.

To work around this issue, we now combine the LDS read and the OQAP
copy into one instruction and expand it after register allocation.

This patch also adds some checks to the EmitClauseMarker pass, so that
it doesn't end a clause with a value still in the output queue and
removes AR.X and OQAP handling from the scheduler (AR.X uses and defs
were already being expanded post-RA, so the scheduler will never see
them).
---
 lib/Target/R600/R600EmitClauseMarkers.cpp | 52 ++
 lib/Target/R600/R600ExpandSpecialInstrs.cpp   | 17 +
 lib/Target/R600/R600ISelLowering.cpp  | 20 +++---
 lib/Target/R600/R600InstrInfo.cpp |  8 +++
 lib/Target/R600/R600InstrInfo.h   |  2 +
 lib/Target/R600/R600MachineScheduler.cpp  | 32 -
 lib/Target/R600/R600MachineScheduler.h|  2 -
 lib/Target/R600/R600RegisterInfo.cpp  | 13 
 lib/Target/R600/R600RegisterInfo.h|  2 +
 test/CodeGen/R600/lds-output-queue.ll | 99 +++
 test/CodeGen/R600/local-memory-two-objects.ll |  8 ++-
 11 files changed, 206 insertions(+), 49 deletions(-)
 create mode 100644 test/CodeGen/R600/lds-output-queue.ll

diff --git a/lib/Target/R600/R600EmitClauseMarkers.cpp 
b/lib/Target/R600/R600EmitClauseMarkers.cpp


+  bool canClauseLocalKillFitInClause(
+ unsigned AluInstCount,
+ MachineBasicBlock::iterator Def,
+ MachineBasicBlock::iterator BBEnd) {
+const R600RegisterInfo TRI = TII-getRegisterInfo();
+for (MachineInstr::const_mop_iterator
+   MOI = Def-operands_begin(),
+   MOE = Def-operands_end(); MOI != MOE; ++MOI) {
+  if (!MOI-isReg() || !MOI-isDef() ||
+  TRI.isPhysRegLiveAcrossClauses(MOI-getReg()))
+continue;
+
+  // Def defines a clause local register, so check that its use will fit
+  // in the clause.
+  unsigned LastUseCount = 0;
+  for (MachineBasicBlock::iterator UseI = Def; UseI != BBEnd; ++UseI) {
+AluInstCount += OccupiedDwords(UseI);
+// We have reached the maximum instruction limit before finding the
+// use that kills this register, so we cannot use this def in the
+// current clause.
+if (AluInstCount = TII-getMaxAlusPerClause())
+  return false;
+
+// Register kill flags have been cleared by the time we get to this
+// pass, but it is safe to assume that all uses of this register
+// occur in the same basic block as its definition, because
+// it is illegal for the scheduler to schedule them in
+// different blocks.
+if (UseI-findRegisterUseOperandIdx(MOI-getReg()))
+  LastUseCount = AluInstCount;
+
+if (UseI != Def  UseI-findRegisterDefOperandIdx(MOI-getReg()) != 
-1)
+  break;
+  }
+  if (LastUseCount)
+return LastUseCount = TII-getMaxAlusPerClause();
+  llvm_unreachable(Clause local register live at end of clause.);
+}
+return true;
+  }

This function does not check if current clause can hold all constant bank.
I think it's likely to be rare for a clause to be split because of constant 
bank limitations,
but it would be better to have an assertion failure in such case to make 
debugging easier.
For instance if the SubstituteKCacheBank return false, you can check that there 
is no lds
use

[Mesa-dev] [PATCH 1/2] r600/llvm: Fix texbuf for pre EG gen

2013-10-30 Thread Vincent Lejeune
R600/R700 implementation of tex buffer fetch requires the result of the VFETCH
instruction to be ANDed with R600_BUFFER_INFO_CONST_BUFFER, and the last channel
to be ORed with the same const buffer.
---
 src/gallium/drivers/r600/r600_llvm.c | 29 +
 1 file changed, 29 insertions(+)

diff --git a/src/gallium/drivers/r600/r600_llvm.c 
b/src/gallium/drivers/r600/r600_llvm.c
index 34dd3ad..d7fa5f8 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -427,6 +427,35 @@ static void llvm_emit_tex(
emit_data-output[0] = build_intrinsic(gallivm-builder,
llvm.R600.load.texbuf,
emit_data-dst_type, 
args, 2, LLVMReadNoneAttribute);
+   if (ctx-chip_class = EVERGREEN)
+   return;
+   ctx-uses_tex_buffers = true;
+   LLVMDumpValue(emit_data-output[0]);
+   emit_data-output[0] = 
LLVMBuildBitCast(gallivm-builder,
+   emit_data-output[0], 
LLVMVectorType(bld_base-base.int_elem_type, 4),
+   );
+   LLVMValueRef Mask = llvm_load_const_buffer(bld_base,
+   lp_build_const_int32(gallivm, 0),
+   LLVM_R600_BUFFER_INFO_CONST_BUFFER);
+   Mask = LLVMBuildBitCast(gallivm-builder, Mask,
+   LLVMVectorType(bld_base-base.int_elem_type, 
4), );
+   emit_data-output[0] = 
lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_AND,
+   emit_data-output[0],
+   Mask);
+   LLVMValueRef WComponent = 
LLVMBuildExtractElement(gallivm-builder,
+   emit_data-output[0], 
lp_build_const_int32(gallivm, 3), );
+   Mask = llvm_load_const_buffer(bld_base, 
lp_build_const_int32(gallivm, 1),
+   LLVM_R600_BUFFER_INFO_CONST_BUFFER);
+   Mask = LLVMBuildExtractElement(gallivm-builder, Mask,
+   lp_build_const_int32(gallivm, 0), );
+   Mask = LLVMBuildBitCast(gallivm-builder, Mask,
+   bld_base-base.int_elem_type, );
+   WComponent = lp_build_emit_llvm_binary(bld_base, 
TGSI_OPCODE_OR,
+   WComponent, Mask);
+   emit_data-output[0] = 
LLVMBuildInsertElement(gallivm-builder,
+   emit_data-output[0], WComponent, 
lp_build_const_int32(gallivm, 3), );
+   emit_data-output[0] = 
LLVMBuildBitCast(gallivm-builder,
+   emit_data-output[0], 
LLVMVectorType(bld_base-base.elem_type, 4), );
}
return;
default:
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] r600/llvm: Fix isampleBuffer on preEG

2013-10-30 Thread Vincent Lejeune
On R600/R700 hw the data are stored from the channel 2 of the second dword.
---
 src/gallium/drivers/r600/r600_llvm.c | 15 ++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/r600_llvm.c 
b/src/gallium/drivers/r600/r600_llvm.c
index d7fa5f8..5afe3cb 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -415,9 +415,22 @@ static void llvm_emit_tex(
case TGSI_OPCODE_TXQ: {
struct radeon_llvm_context * ctx = 
radeon_llvm_context(bld_base);
ctx-uses_tex_buffers = true;
-   LLVMValueRef offset = 
lp_build_const_int32(bld_base-base.gallivm, 0);
+   bool isEgPlus = (ctx-chip_class = EVERGREEN);
+   LLVMValueRef offset = 
lp_build_const_int32(bld_base-base.gallivm,
+   isEgPlus ? 0 : 1);
LLVMValueRef cvecval = llvm_load_const_buffer(bld_base, 
offset,
LLVM_R600_BUFFER_INFO_CONST_BUFFER);
+   if (!isEgPlus) {
+   LLVMValueRef maskval[4] = {
+   lp_build_const_int32(gallivm, 1),
+   lp_build_const_int32(gallivm, 2),
+   lp_build_const_int32(gallivm, 3),
+   lp_build_const_int32(gallivm, 0),
+   };
+   LLVMValueRef mask = LLVMConstVector(maskval, 4);
+   cvecval = 
LLVMBuildShuffleVector(gallivm-builder, cvecval, cvecval,
+   mask, );
+   }
emit_data-output[0] = cvecval;
return;
}
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] R600: Make sure OQAP defs and uses happen in the same clause

2013-10-25 Thread Vincent Lejeune
This patch should work when checking than no OQAP is used before beeing queued, 
assuming that a value in OQAP is consumed
and cannot be read twice. However I'm not sure I cover all LDS instructions 
that queues a value, I only use LDS_RET_READ in switch case.

Vincent



- Mail original -
 De : Tom Stellard t...@stellard.net
 À : Vincent Lejeune v...@ovi.com
 Cc : llvm-comm...@cs.uiuc.edu llvm-comm...@cs.uiuc.edu; 
 mesa-dev@lists.freedesktop.org mesa-dev@lists.freedesktop.org; Tom 
 Stellard thomas.stell...@amd.com
 Envoyé le : Mardi 22 octobre 2013 23h20
 Objet : Re: [PATCH] R600: Make sure OQAP defs and uses happen in the same 
 clause
 
 Hi Vincent,
 
 Here is an updated patch.  I wasn't sure where to put the assertion to
 check that UnscheduledNoLiveOut{Defs,Uses} is empty when switching to a
 new clause.  I tried adding it to R600SchedStartegy::schedNode() behind
 the if (NextInstKind != CurInstKind) condition, but it always failed.
 Any suggestions on where I should but it?
 
 -Tom
 
 
 On Mon, Oct 21, 2013 at 12:40:28PM -0700, Vincent Lejeune wrote:
 
 
 
 
  - Mail original -
   De : Tom Stellard t...@stellard.net
   À : llvm-comm...@cs.uiuc.edu
   Cc : mesa-dev@lists.freedesktop.org; Tom Stellard 
 thomas.stell...@amd.com
   Envoyé le : Vendredi 11 octobre 2013 20h10
   Objet : [PATCH] R600: Make sure OQAP defs and uses happen in the same 
 clause
   
   From: Tom Stellard thomas.stell...@amd.com
   
   Reading the special OQAP register pops the top value off the LDS
   input queue and returns it to the instruction.  This queue is
   invalidated at the end of an ALU clause and leaving values in the 
 queue
   can lead to GPU hangs.  This means that if we load a value into the 
 queue,
   we must use it before the end of the clause.
   
   This fixes some hangs in the OpenCV test suite.
   ---
   lib/Target/R600/R600MachineScheduler.cpp | 25 
 +
   lib/Target/R600/R600MachineScheduler.h   |  4 ++--
   test/CodeGen/R600/lds-input-queue.ll     | 26 
 ++
   3 files changed, 41 insertions(+), 14 deletions(-)
   create mode 100644 test/CodeGen/R600/lds-input-queue.ll
   
   diff --git a/lib/Target/R600/R600MachineScheduler.cpp 
   b/lib/Target/R600/R600MachineScheduler.cpp
   index 6c26d9e..611b7f4 100644
   --- a/lib/Target/R600/R600MachineScheduler.cpp
   +++ b/lib/Target/R600/R600MachineScheduler.cpp
   @@ -93,11 +93,12 @@ SUnit* R600SchedStrategy::pickNode(bool 
 IsTopNode) 
   {
      }
   
   
   -  // We want to scheduled AR defs as soon as possible to make sure 
 they 
   aren't
   -  // put in a different ALU clause from their uses.
   -  if (!SU  !UnscheduledARDefs.empty()) {
   -      SU = UnscheduledARDefs[0];
   -      UnscheduledARDefs.erase(UnscheduledARDefs.begin());
   +  // We want to scheduled defs that cannot be live outside of this 
 clause 
   +  // as soon as possible to make sure they aren't put in a 
 different
   +  // ALU clause from their uses.
   +  if (!SU  !UnscheduledNoLiveOutDefs.empty()) {
   +      SU = UnscheduledNoLiveOutDefs[0];
   +      
 UnscheduledNoLiveOutDefs.erase(UnscheduledNoLiveOutDefs.begin());
          NextInstKind = IDAlu;
      }
   
   @@ -132,9 +133,9 @@ SUnit* R600SchedStrategy::pickNode(bool 
 IsTopNode) 
   {
   
      // We want to schedule the AR uses as late as possible to make sure 
 that
      // the AR defs have been released.
   -  if (!SU  !UnscheduledARUses.empty()) {
   -      SU = UnscheduledARUses[0];
   -      UnscheduledARUses.erase(UnscheduledARUses.begin());
   +  if (!SU  !UnscheduledNoLiveOutUses.empty()) {
   +      SU = UnscheduledNoLiveOutUses[0];
   +      
 UnscheduledNoLiveOutUses.erase(UnscheduledNoLiveOutUses.begin());
 
  Can we use std::queueSUnit* instead of a std::vector for 
 UnscheduledNoLiveOutUses ?
  I had to use a vector because I needed to be able to pop non topmost SUnit 
 in some case
  (to fit Instruction Group const read limitation) but I would rather avoid 
 erase(iterator) call
  when possible.
 
 
          NextInstKind = IDAlu;
      }
   
   @@ -217,15 +218,15 @@ void R600SchedStrategy::releaseBottomNode(SUnit 
 *SU) 
   {
   
      int IK = getInstKind(SU);
   
   -  // Check for AR register defines
   +  // Check for registers that do not live across ALU clauses.
      for (MachineInstr::const_mop_iterator I = 
   SU-getInstr()-operands_begin(),
                                            E = 
   SU-getInstr()-operands_end();
                                            I != E; ++I) {
   -    if (I-isReg()  I-getReg() == AMDGPU::AR_X) 
 {
   +    if (I-isReg()  (I-getReg() == AMDGPU::AR_X || 
   I-getReg() == AMDGPU::OQAP)) {
          if (I-isDef()) {
   -        UnscheduledARDefs.push_back(SU);
   +        UnscheduledNoLiveOutDefs.push_back(SU);
          } else {
   -        UnscheduledARUses.push_back(SU);
   +        UnscheduledNoLiveOutUses.push_back(SU);
          }
          return;
        }
   diff --git a/lib/Target/R600

[Mesa-dev] [PATCH 1/2] r600/llvm: Fix texbuf for pre EG gen

2013-10-21 Thread Vincent Lejeune
---
 src/gallium/drivers/r600/r600_llvm.c | 29 +
 1 file changed, 29 insertions(+)

diff --git a/src/gallium/drivers/r600/r600_llvm.c 
b/src/gallium/drivers/r600/r600_llvm.c
index 34dd3ad..d7fa5f8 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -427,6 +427,35 @@ static void llvm_emit_tex(
emit_data-output[0] = build_intrinsic(gallivm-builder,
llvm.R600.load.texbuf,
emit_data-dst_type, 
args, 2, LLVMReadNoneAttribute);
+   if (ctx-chip_class = EVERGREEN)
+   return;
+   ctx-uses_tex_buffers = true;
+   LLVMDumpValue(emit_data-output[0]);
+   emit_data-output[0] = 
LLVMBuildBitCast(gallivm-builder,
+   emit_data-output[0], 
LLVMVectorType(bld_base-base.int_elem_type, 4),
+   );
+   LLVMValueRef Mask = llvm_load_const_buffer(bld_base,
+   lp_build_const_int32(gallivm, 0),
+   LLVM_R600_BUFFER_INFO_CONST_BUFFER);
+   Mask = LLVMBuildBitCast(gallivm-builder, Mask,
+   LLVMVectorType(bld_base-base.int_elem_type, 
4), );
+   emit_data-output[0] = 
lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_AND,
+   emit_data-output[0],
+   Mask);
+   LLVMValueRef WComponent = 
LLVMBuildExtractElement(gallivm-builder,
+   emit_data-output[0], 
lp_build_const_int32(gallivm, 3), );
+   Mask = llvm_load_const_buffer(bld_base, 
lp_build_const_int32(gallivm, 1),
+   LLVM_R600_BUFFER_INFO_CONST_BUFFER);
+   Mask = LLVMBuildExtractElement(gallivm-builder, Mask,
+   lp_build_const_int32(gallivm, 0), );
+   Mask = LLVMBuildBitCast(gallivm-builder, Mask,
+   bld_base-base.int_elem_type, );
+   WComponent = lp_build_emit_llvm_binary(bld_base, 
TGSI_OPCODE_OR,
+   WComponent, Mask);
+   emit_data-output[0] = 
LLVMBuildInsertElement(gallivm-builder,
+   emit_data-output[0], WComponent, 
lp_build_const_int32(gallivm, 3), );
+   emit_data-output[0] = 
LLVMBuildBitCast(gallivm-builder,
+   emit_data-output[0], 
LLVMVectorType(bld_base-base.elem_type, 4), );
}
return;
default:
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] r600/llvm: Fix isampleBuffer on preEG

2013-10-21 Thread Vincent Lejeune
---
 src/gallium/drivers/r600/r600_llvm.c | 15 ++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/r600_llvm.c 
b/src/gallium/drivers/r600/r600_llvm.c
index d7fa5f8..5afe3cb 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -415,9 +415,22 @@ static void llvm_emit_tex(
case TGSI_OPCODE_TXQ: {
struct radeon_llvm_context * ctx = 
radeon_llvm_context(bld_base);
ctx-uses_tex_buffers = true;
-   LLVMValueRef offset = 
lp_build_const_int32(bld_base-base.gallivm, 0);
+   bool isEgPlus = (ctx-chip_class = EVERGREEN);
+   LLVMValueRef offset = 
lp_build_const_int32(bld_base-base.gallivm,
+   isEgPlus ? 0 : 1);
LLVMValueRef cvecval = llvm_load_const_buffer(bld_base, 
offset,
LLVM_R600_BUFFER_INFO_CONST_BUFFER);
+   if (!isEgPlus) {
+   LLVMValueRef maskval[4] = {
+   lp_build_const_int32(gallivm, 1),
+   lp_build_const_int32(gallivm, 2),
+   lp_build_const_int32(gallivm, 3),
+   lp_build_const_int32(gallivm, 0),
+   };
+   LLVMValueRef mask = LLVMConstVector(maskval, 4);
+   cvecval = 
LLVMBuildShuffleVector(gallivm-builder, cvecval, cvecval,
+   mask, );
+   }
emit_data-output[0] = cvecval;
return;
}
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeonsi: Do not set both inreg and byval

2013-10-11 Thread Vincent Lejeune
---
 src/gallium/drivers/radeonsi/radeonsi_shader.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c 
b/src/gallium/drivers/radeonsi/radeonsi_shader.c
index ab996cc..209b77e 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
@@ -1655,16 +1655,19 @@ static void create_function(struct si_shader_context 
*si_shader_ctx)
 
for (i = 0; i = last_sgpr; ++i) {
LLVMValueRef P = 
LLVMGetParam(si_shader_ctx-radeon_bld.main_fn, i);
-   LLVMAddAttribute(P, LLVMInRegAttribute);
+   switch (i) {
+   default:
+   LLVMAddAttribute(P, LLVMInRegAttribute);
+   break;
+#if HAVE_LLVM = 0x0304
/* We tell llvm that array inputs are passed by value to allow 
Sinking pass
 * to move load. Inputs are constant so this is fine. */
-   switch (i) {
-   default: break;
case SI_PARAM_CONST:
case SI_PARAM_SAMPLER:
case SI_PARAM_RESOURCE:
LLVMAddAttribute(P, LLVMByValAttribute);
break;
+#endif
}
}
 
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] R600/SI: Support byval arguments

2013-10-10 Thread Vincent Lejeune
llvm does not allow function attribute to flagged byval and inreg at the same 
time.

It works currently because we don't verify our module in mesa, as our byval 
inreg arguments
are considered byval by the sinking pass, and inreg by ISel pass.


The patch : 
http://lists.freedesktop.org/archives/mesa-dev/2013-October/046022.html fixes 
the situation
but requires the backend to provide a way to lower byval arguments.
This patch provides such support.

Vincent

 De : Tom Stellard t...@stellard.net
À : Vincent Lejeune v...@ovi.com 
Cc : mesa-dev@lists.freedesktop.org 
Envoyé le : Jeudi 10 octobre 2013 15h19
Objet : Re: [Mesa-dev] [PATCH] R600/SI: Support byval arguments
 

On Thu, Oct 10, 2013 at 12:04:16AM +0200, Vincent Lejeune wrote:

What is the purpose of this change?

-Tom


 ---
  lib/Target/R600/AMDGPUCallingConv.td | 7 ++-
  lib/Target/R600/SIISelLowering.cpp   | 3 ++-
  2 files changed, 8 insertions(+), 2 deletions(-)
 
 diff --git a/lib/Target/R600/AMDGPUCallingConv.td 
 b/lib/Target/R600/AMDGPUCallingConv.td
 index d26be32..a194e6d 100644
 --- a/lib/Target/R600/AMDGPUCallingConv.td
 +++ b/lib/Target/R600/AMDGPUCallingConv.td
 @@ -33,7 +33,12 @@ def CC_SI : CallingConv[
      VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
      VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
      VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31
 -  ]
 +  ],
 +
 +  CCIfByValCCIfType[i64] , CCAssignToRegWithShadow
 +    [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ],
 +    [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15 ]
 +  
  
  ];
  
 diff --git a/lib/Target/R600/SIISelLowering.cpp 
 b/lib/Target/R600/SIISelLowering.cpp
 index 2174753..cd18154 100644
 --- a/lib/Target/R600/SIISelLowering.cpp
 +++ b/lib/Target/R600/SIISelLowering.cpp
 @@ -158,7 +158,8 @@ SDValue SITargetLowering::LowerFormalArguments(
      const ISD::InputArg Arg = Ins[i];
  
      // First check if it's a PS input addr
 -    if (Info-ShaderType == ShaderType::PIXEL  !Arg.Flags.isInReg()) {
 +    if (Info-ShaderType == ShaderType::PIXEL  !Arg.Flags.isInReg() 
 +        !Arg.Flags.isByVal()) {
  
        assert((PSInputNum = 15)  Too many PS inputs!);
  
 -- 
 1.8.3.1
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] R600/SI: Support byval arguments

2013-10-09 Thread Vincent Lejeune
---
 lib/Target/R600/AMDGPUCallingConv.td | 7 ++-
 lib/Target/R600/SIISelLowering.cpp   | 3 ++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/lib/Target/R600/AMDGPUCallingConv.td 
b/lib/Target/R600/AMDGPUCallingConv.td
index d26be32..a194e6d 100644
--- a/lib/Target/R600/AMDGPUCallingConv.td
+++ b/lib/Target/R600/AMDGPUCallingConv.td
@@ -33,7 +33,12 @@ def CC_SI : CallingConv[
 VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
 VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
 VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31
-  ]
+  ],
+
+  CCIfByValCCIfType[i64] , CCAssignToRegWithShadow
+[ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ],
+[ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15 ]
+  
 
 ];
 
diff --git a/lib/Target/R600/SIISelLowering.cpp 
b/lib/Target/R600/SIISelLowering.cpp
index 2174753..cd18154 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -158,7 +158,8 @@ SDValue SITargetLowering::LowerFormalArguments(
 const ISD::InputArg Arg = Ins[i];
 
 // First check if it's a PS input addr
-if (Info-ShaderType == ShaderType::PIXEL  !Arg.Flags.isInReg()) {
+if (Info-ShaderType == ShaderType::PIXEL  !Arg.Flags.isInReg() 
+!Arg.Flags.isByVal()) {
 
   assert((PSInputNum = 15)  Too many PS inputs!);
 
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeonsi: Do not set both inreg and byval

2013-10-09 Thread Vincent Lejeune
---
 src/gallium/drivers/radeonsi/radeonsi_shader.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c 
b/src/gallium/drivers/radeonsi/radeonsi_shader.c
index ab996cc..9d95997 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
@@ -1655,11 +1655,12 @@ static void create_function(struct si_shader_context 
*si_shader_ctx)
 
for (i = 0; i = last_sgpr; ++i) {
LLVMValueRef P = 
LLVMGetParam(si_shader_ctx-radeon_bld.main_fn, i);
-   LLVMAddAttribute(P, LLVMInRegAttribute);
+   switch (i) {
+   default:
+   LLVMAddAttribute(P, LLVMInRegAttribute);
+   break;
/* We tell llvm that array inputs are passed by value to allow 
Sinking pass
 * to move load. Inputs are constant so this is fine. */
-   switch (i) {
-   default: break;
case SI_PARAM_CONST:
case SI_PARAM_SAMPLER:
case SI_PARAM_RESOURCE:
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] PATCH: R600/SI: Enable the verifier on most lit tests

2013-10-08 Thread Vincent Lejeune
3rd patch is reviewed-by:Vincent Lejeunevljn at ovi.com

The first one 


Subject: [PATCH 1/4] R600/SI: Mark the EXEC register as reserved

This prevents the machine verifier from complaining about uses of
an undefined physical register.
---
 lib/Target/R600/SIRegisterInfo.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-) diff --git 
 a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
index 5d12564..279ff33 100644
--- a/lib/Target/R600/SIRegisterInfo.cpp
+++ b/lib/Target/R600/SIRegisterInfo.cpp
@@ -25,7 +25,8 @@ SIRegisterInfo::SIRegisterInfo(AMDGPUTargetMachine tm)  
BitVector SIRegisterInfo::getReservedRegs(const MachineFunction MF) const {
   BitVector Reserved(getNumRegs());
-  return Reserved;
+  Reserved.set(AMDGPU::EXEC);
+  return Reserved;
 } 
looks like a tab space increment there.
With this fixed, this patch is reviewed-by:Vincent Lejeunevljn at ovi.com

I'd like somebody else to have a look at the 2 others patches, I'm not familiar 
enough with SI isa for now.




- Mail original -
 De : Tom Stellard t...@stellard.net
 À : llvm-comm...@cs.uiuc.edu
 Cc : mesa-dev@lists.freedesktop.org
 Envoyé le : Vendredi 4 octobre 2013 2h36
 Objet : PATCH: R600/SI: Enable the verifier on most lit tests
 
 Hi,
 
 I would like to start using the machine verifier to help catch compiler
 bugs.  I think it will be especially useful for making sure all our
 instructions have legal operands.  The attached patches fix some simple
 machine verifier errors and enable it for most lit tests.
 
 Unfortunately, we cannot enable the machine verifier on tests that have
 branches, because the way IF and ELSE instructions are selected leaves us
 with a copy instruction following the IF and ELSE terminators, which
 violates one of the verifier rules.
 
 -Tom
 
 ___
 llvm-commits mailing list
 llvm-comm...@cs.uiuc.edu
 http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeonsi: Allow Sinking pass to move preloaded const/res/sampl

2013-10-06 Thread Vincent Lejeune
This fixes a crash in Unigine Heaven 3.0, and probably in some
others apps.
---
 src/gallium/drivers/radeonsi/radeonsi_shader.c | 20 
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c 
b/src/gallium/drivers/radeonsi/radeonsi_shader.c
index 97ed4e3..89c12c3 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
@@ -114,8 +114,12 @@ static LLVMValueRef build_indexed_load(
 {
struct lp_build_context * base = 
si_shader_ctx-radeon_bld.soa.bld_base.base;
 
+   LLVMValueRef indices[2] = {
+   LLVMConstInt(LLVMInt64TypeInContext(base-gallivm-context), 0, 
false),
+   offset
+   };
LLVMValueRef computed_ptr = LLVMBuildGEP(
-   base-gallivm-builder, base_ptr, offset, 1, );
+   base-gallivm-builder, base_ptr, indices, 2, );
 
LLVMValueRef result = LLVMBuildLoad(base-gallivm-builder, 
computed_ptr, );
LLVMSetMetadata(result, 1, si_shader_ctx-const_md);
@@ -1578,9 +1582,13 @@ static void create_function(struct si_shader_context 
*si_shader_ctx)
v2i32 = LLVMVectorType(i32, 2);
v3i32 = LLVMVectorType(i32, 3);
 
-   params[SI_PARAM_CONST] = LLVMPointerType(LLVMVectorType(i8, 16), 
CONST_ADDR_SPACE);
-   params[SI_PARAM_SAMPLER] = params[SI_PARAM_CONST];
-   params[SI_PARAM_RESOURCE] = LLVMPointerType(LLVMVectorType(i8, 32), 
CONST_ADDR_SPACE);
+   params[SI_PARAM_CONST] = 
LLVMPointerType(LLVMArrayType(LLVMVectorType(i8, 16), 64), CONST_ADDR_SPACE);
+   /* We assume at most 16 textures per program at the moment.
+* This need probably need to be changed to support bindless textures */
+   params[SI_PARAM_SAMPLER] = LLVMPointerType(
+   LLVMArrayType(LLVMVectorType(i8, 16), 16), CONST_ADDR_SPACE);
+   params[SI_PARAM_RESOURCE] = LLVMPointerType(
+   LLVMArrayType(LLVMVectorType(i8, 32), 16), CONST_ADDR_SPACE);
 
switch (si_shader_ctx-type) {
case TGSI_PROCESSOR_VERTEX:
@@ -1647,6 +1655,10 @@ static void create_function(struct si_shader_context 
*si_shader_ctx)
for (i = 0; i = last_sgpr; ++i) {
LLVMValueRef P = 
LLVMGetParam(si_shader_ctx-radeon_bld.main_fn, i);
LLVMAddAttribute(P, LLVMInRegAttribute);
+   /* We tell llvm that array inputs are passed by value to allow 
Sinking pass
+* to move load. Inputs are constant so this is fine. */
+   if (i  3)
+   LLVMAddAttribute(P, LLVMByValAttribute);
}
 
 #if HAVE_LLVM = 0x0304
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] R600/SI: Add SinkingPass before ISel

2013-10-06 Thread Vincent Lejeune
---
 lib/Target/R600/AMDGPUTargetMachine.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp 
b/lib/Target/R600/AMDGPUTargetMachine.cpp
index d77cddd..f28f27a 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -120,6 +120,7 @@ bool
 AMDGPUPassConfig::addPreISel() {
   const AMDGPUSubtarget ST = TM-getSubtargetAMDGPUSubtarget();
   addPass(createFlattenCFGPass());
+  addPass(createSinkingPass());
   if (ST.getGeneration()  AMDGPUSubtarget::NORTHERN_ISLANDS) {
 addPass(createSITypeRewriter());
 addPass(createStructurizeCFGPass());
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeonsi: Allow Sinking pass to move preloaded const/res/sampl

2013-10-06 Thread Vincent Lejeune
This fixes a crash in Unigine Heaven 3.0, and probably in some
others apps.
---
 src/gallium/drivers/radeonsi/radeonsi_shader.c | 27 ++
 src/gallium/drivers/radeonsi/si_state.h|  1 +
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c 
b/src/gallium/drivers/radeonsi/radeonsi_shader.c
index 97ed4e3..ab996cc 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
@@ -114,8 +114,12 @@ static LLVMValueRef build_indexed_load(
 {
struct lp_build_context * base = 
si_shader_ctx-radeon_bld.soa.bld_base.base;
 
+   LLVMValueRef indices[2] = {
+   LLVMConstInt(LLVMInt64TypeInContext(base-gallivm-context), 0, 
false),
+   offset
+   };
LLVMValueRef computed_ptr = LLVMBuildGEP(
-   base-gallivm-builder, base_ptr, offset, 1, );
+   base-gallivm-builder, base_ptr, indices, 2, );
 
LLVMValueRef result = LLVMBuildLoad(base-gallivm-builder, 
computed_ptr, );
LLVMSetMetadata(result, 1, si_shader_ctx-const_md);
@@ -1578,9 +1582,14 @@ static void create_function(struct si_shader_context 
*si_shader_ctx)
v2i32 = LLVMVectorType(i32, 2);
v3i32 = LLVMVectorType(i32, 3);
 
-   params[SI_PARAM_CONST] = LLVMPointerType(LLVMVectorType(i8, 16), 
CONST_ADDR_SPACE);
-   params[SI_PARAM_SAMPLER] = params[SI_PARAM_CONST];
-   params[SI_PARAM_RESOURCE] = LLVMPointerType(LLVMVectorType(i8, 32), 
CONST_ADDR_SPACE);
+   params[SI_PARAM_CONST] = LLVMPointerType(
+   LLVMArrayType(LLVMVectorType(i8, 16), NUM_CONST_BUFFERS), 
CONST_ADDR_SPACE);
+   /* We assume at most 16 textures per program at the moment.
+* This need probably need to be changed to support bindless textures */
+   params[SI_PARAM_SAMPLER] = LLVMPointerType(
+   LLVMArrayType(LLVMVectorType(i8, 16), NUM_SAMPLER_VIEWS), 
CONST_ADDR_SPACE);
+   params[SI_PARAM_RESOURCE] = LLVMPointerType(
+   LLVMArrayType(LLVMVectorType(i8, 32), NUM_SAMPLER_STATES), 
CONST_ADDR_SPACE);
 
switch (si_shader_ctx-type) {
case TGSI_PROCESSOR_VERTEX:
@@ -1647,6 +1656,16 @@ static void create_function(struct si_shader_context 
*si_shader_ctx)
for (i = 0; i = last_sgpr; ++i) {
LLVMValueRef P = 
LLVMGetParam(si_shader_ctx-radeon_bld.main_fn, i);
LLVMAddAttribute(P, LLVMInRegAttribute);
+   /* We tell llvm that array inputs are passed by value to allow 
Sinking pass
+* to move load. Inputs are constant so this is fine. */
+   switch (i) {
+   default: break;
+   case SI_PARAM_CONST:
+   case SI_PARAM_SAMPLER:
+   case SI_PARAM_RESOURCE:
+   LLVMAddAttribute(P, LLVMByValAttribute);
+   break;
+   }
}
 
 #if HAVE_LLVM = 0x0304
diff --git a/src/gallium/drivers/radeonsi/si_state.h 
b/src/gallium/drivers/radeonsi/si_state.h
index 94a1521..6dbf880 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -107,6 +107,7 @@ union si_state {
  */
 #define FMASK_TEX_OFFSET   NUM_TEX_UNITS
 #define NUM_SAMPLER_VIEWS  (FMASK_TEX_OFFSET+NUM_TEX_UNITS)
+#define NUM_SAMPLER_STATES NUM_TEX_UNITS
 
 #define NUM_CONST_BUFFERS 2
 
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] R600: Add a ldptr intrinsic to support MSAA.

2013-09-30 Thread Vincent Lejeune
---
 lib/Target/R600/R600ISelLowering.cpp | 6 +-
 lib/Target/R600/R600Instructions.td  | 4 
 lib/Target/R600/R600Intrinsics.td| 1 +
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/lib/Target/R600/R600ISelLowering.cpp 
b/lib/Target/R600/R600ISelLowering.cpp
index 126db73..a6778a4 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -590,7 +590,8 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, 
SelectionDAG DAG) const
 case AMDGPUIntrinsic::R600_txf:
 case AMDGPUIntrinsic::R600_txq:
 case AMDGPUIntrinsic::R600_ddx:
-case AMDGPUIntrinsic::R600_ddy: {
+case AMDGPUIntrinsic::R600_ddy:
+case AMDGPUIntrinsic::R600_ldptr: {
   unsigned TextureOp;
   switch (IntrinsicID) {
   case AMDGPUIntrinsic::R600_tex:
@@ -623,6 +624,9 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, 
SelectionDAG DAG) const
   case AMDGPUIntrinsic::R600_ddy:
 TextureOp = 9;
 break;
+  case AMDGPUIntrinsic::R600_ldptr:
+TextureOp = 10;
+break;
   default:
 llvm_unreachable(Unknow Texture Operation);
   }
diff --git a/lib/Target/R600/R600Instructions.td 
b/lib/Target/R600/R600Instructions.td
index 82ecbad..9dc9303 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -881,6 +881,9 @@ def TEX_SAMPLE_C_L : R600_TEX 0x19, TEX_SAMPLE_C_L;
 def TEX_SAMPLE_LB : R600_TEX 0x12, TEX_SAMPLE_LB;
 def TEX_SAMPLE_C_LB : R600_TEX 0x1A, TEX_SAMPLE_C_LB;
 def TEX_LD : R600_TEX 0x03, TEX_LD;
+def TEX_LDPTR : R600_TEX 0x03, TEX_LDPTR {
+  let Inst{6-5} = 1;
+}
 def TEX_GET_TEXTURE_RESINFO : R600_TEX 0x04, TEX_GET_TEXTURE_RESINFO;
 def TEX_GET_GRADIENTS_H : R600_TEX 0x07, TEX_GET_GRADIENTS_H;
 def TEX_GET_GRADIENTS_V : R600_TEX 0x08, TEX_GET_GRADIENTS_V;
@@ -899,6 +902,7 @@ defm : TexPattern6, TEX_LD, v4i32;
 defm : TexPattern7, TEX_GET_TEXTURE_RESINFO, v4i32;
 defm : TexPattern8, TEX_GET_GRADIENTS_H;
 defm : TexPattern9, TEX_GET_GRADIENTS_V;
+defm : TexPattern10, TEX_LDPTR, v4i32;
 
 
//===--===//
 // Helper classes for common instructions
diff --git a/lib/Target/R600/R600Intrinsics.td 
b/lib/Target/R600/R600Intrinsics.td
index 58d86b6..b5cb369 100644
--- a/lib/Target/R600/R600Intrinsics.td
+++ b/lib/Target/R600/R600Intrinsics.td
@@ -52,6 +52,7 @@ let TargetPrefix = R600, isTarget = 1 in {
   def int_R600_txb : TextureIntrinsicFloatInput;
   def int_R600_txbc : TextureIntrinsicFloatInput;
   def int_R600_txf : TextureIntrinsicInt32Input;
+  def int_R600_ldptr : TextureIntrinsicInt32Input;
   def int_R600_txq : TextureIntrinsicInt32Input;
   def int_R600_ddx : TextureIntrinsicFloatInput;
   def int_R600_ddy : TextureIntrinsicFloatInput;
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] r600g/llvm: fix txq for texture buffer

2013-09-30 Thread Vincent Lejeune
---
 src/gallium/drivers/r600/r600_llvm.c | 7 +--
 src/gallium/drivers/r600/r600_shader.c   | 1 +
 src/gallium/drivers/radeon/radeon_llvm.h | 1 +
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_llvm.c 
b/src/gallium/drivers/r600/r600_llvm.c
index 03a68e4..54291a1 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -23,6 +23,7 @@
 #define CONSTANT_BUFFER_0_ADDR_SPACE 8
 #define CONSTANT_BUFFER_1_ADDR_SPACE (CONSTANT_BUFFER_0_ADDR_SPACE + 
R600_UCP_CONST_BUFFER)
 #define CONSTANT_TXQ_BUFFER (CONSTANT_BUFFER_0_ADDR_SPACE + 
R600_TXQ_CONST_BUFFER)
+#define LLVM_R600_BUFFER_INFO_CONST_BUFFER (CONSTANT_BUFFER_0_ADDR_SPACE + 
R600_BUFFER_INFO_CONST_BUFFER)
 
 static LLVMValueRef llvm_load_const_buffer(
struct lp_build_tgsi_context * bld_base,
@@ -410,8 +411,10 @@ static void llvm_emit_tex(
if (emit_data-inst-Texture.Texture == TGSI_TEXTURE_BUFFER) {
switch (emit_data-inst-Instruction.Opcode) {
case TGSI_OPCODE_TXQ: {
-   LLVMValueRef offset = 
lp_build_const_int32(bld_base-base.gallivm, 1);
-   LLVMValueRef cvecval = llvm_load_const_buffer(bld_base, 
offset, R600_BUFFER_INFO_CONST_BUFFER);
+   struct radeon_llvm_context * ctx = 
radeon_llvm_context(bld_base);
+   ctx-uses_tex_buffers = true;
+   LLVMValueRef offset = 
lp_build_const_int32(bld_base-base.gallivm, 0);
+   LLVMValueRef cvecval = llvm_load_const_buffer(bld_base, 
offset, LLVM_R600_BUFFER_INFO_CONST_BUFFER);
emit_data-output[0] = cvecval;
return;
}
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index ce15cd7..e8e1333 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -1139,6 +1139,7 @@ static int r600_shader_from_tgsi(struct r600_screen 
*rscreen,
radeon_llvm_ctx.alpha_to_one = key.alpha_to_one;
mod = r600_tgsi_llvm(radeon_llvm_ctx, tokens);
ctx.shader-has_txq_cube_array_z_comp = 
radeon_llvm_ctx.has_txq_cube_array_z_comp;
+   ctx.shader-uses_tex_buffers = radeon_llvm_ctx.uses_tex_buffers;
 
if (r600_llvm_compile(mod, rscreen-b.family, ctx.bc, 
use_kill, dump)) {
radeon_llvm_dispose(radeon_llvm_ctx);
diff --git a/src/gallium/drivers/radeon/radeon_llvm.h 
b/src/gallium/drivers/radeon/radeon_llvm.h
index 14a8c34..345ae70 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -67,6 +67,7 @@ struct radeon_llvm_context {
unsigned fs_color_all;
unsigned alpha_to_one;
unsigned has_txq_cube_array_z_comp;
+   unsigned uses_tex_buffers;
 
/*=== Front end configuration ===*/
 
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] r600g/llvm: fix sample cube shadow

2013-09-30 Thread Vincent Lejeune
---
 src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 8ff9abd..ac2e511 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -654,7 +654,8 @@ void radeon_llvm_emit_prepare_cube_coords(
opcode == TGSI_OPCODE_TXB2 ||
opcode == TGSI_OPCODE_TXL2) {
coords[3] = coords_arg[4];
-   } else if (opcode == TGSI_OPCODE_TXB ||
+   } else if (opcode == TGSI_OPCODE_TEX ||
+   opcode == TGSI_OPCODE_TXB ||
opcode == TGSI_OPCODE_TXL) {
coords[3] = coords_arg[3];
}
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] R600: Don't use trans slot for instructions that read LDS source registers

2013-09-08 Thread Vincent Lejeune
A few comments below, otherwise :

reviewed-by: Vincent Lejeunevljn at ovi.com



- Mail original -
 De : Tom Stellard t...@stellard.net
 À : llvm-comm...@cs.uiuc.edu
 Cc : mesa-dev@lists.freedesktop.org; Tom Stellard thomas.stell...@amd.com
 Envoyé le : Vendredi 6 septembre 2013 0h23
 Objet : [PATCH] R600: Don't use trans slot for instructions that read LDS 
 source registers
 
 From: Tom Stellard thomas.stell...@amd.com
 
 This fixes some regressions in the piglit local memory store tests
 introduced by recent commits which made the scheduler aware of the trans
 slot.
 
 It's not possible to test this using lit, because there is no way to
 determine from the assembly dumps whether or not an instruction is in
 the trans slot.
 
 Even if this were possible, the test would be highly sensitive to
 changes in the scheduler and might generate confusing false negatives.
 ---
 lib/Target/R600/R600InstrInfo.cpp        | 17 +
 lib/Target/R600/R600InstrInfo.h          |  1 +
 lib/Target/R600/R600MachineScheduler.cpp |  5 +
 lib/Target/R600/R600Packetizer.cpp       |  5 +
 lib/Target/R600/R600RegisterInfo.td      | 10 +-
 5 files changed, 37 insertions(+), 1 deletion(-)
 
 diff --git a/lib/Target/R600/R600InstrInfo.cpp 
 b/lib/Target/R600/R600InstrInfo.cpp
 index 0e7cfb4..60a3f7d 100644
 --- a/lib/Target/R600/R600InstrInfo.cpp
 +++ b/lib/Target/R600/R600InstrInfo.cpp
 @@ -204,6 +204,23 @@ bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) 
 const {
    }
 }
 
 +bool R600InstrInfo::readsLDSSrcReg(const MachineInstr *MI) const {
 +  if (!isALUInstr(MI-getOpcode())) {
 +    return false;
 +  }
 +  for (MachineInstr::const_mop_iterator I = MI-operands_begin(),
 +                                        E = MI-operands_end(); I != E; ++I) 
 {
 +    if (!I-isReg() || !I-isUse() ||
 +        TargetRegisterInfo::isVirtualRegister(I-getReg())) {
 +      continue;
 +    }
 +    if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I-getReg())) {
 +      return true;
 +    }

The bracket in this if statements and in the previous one are unneeded.

 +  }
 +  return false;
 +}
 +
 int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const {
    static const unsigned OpTable[] = {
      AMDGPU::OpName::src0,
 diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h
 index 24cc43d..0d1ffc8 100644
 --- a/lib/Target/R600/R600InstrInfo.h
 +++ b/lib/Target/R600/R600InstrInfo.h
 @@ -78,6 +78,7 @@ namespace llvm {
    bool usesTextureCache(const MachineInstr *MI) const;
 
    bool mustBeLastInClause(unsigned Opcode) const;
 +  bool readsLDSSrcReg(const MachineInstr *MI) const;
 
    /// \returns The operand index for the given source number.  Legal values
    /// for SrcNum are 0, 1, and 2.
 diff --git a/lib/Target/R600/R600MachineScheduler.cpp 
 b/lib/Target/R600/R600MachineScheduler.cpp
 index 0499dd5..f67ba89 100644
 --- a/lib/Target/R600/R600MachineScheduler.cpp
 +++ b/lib/Target/R600/R600MachineScheduler.cpp
 @@ -314,6 +314,11 @@ R600SchedStrategy::AluKind 
 R600SchedStrategy::getAluKind(SUnit *SU) const {
      if (regBelongsToClass(DestReg, AMDGPU::R600_Reg128RegClass))
        return AluT_XYZW;
 
 +    // LDS src registers cannot be used in the Trans slot.
 +    if (TII-readsLDSSrcReg(MI)) {
 +      return AluT_XYZW;
 +    }

Here too

 +
      return AluAny;
 
 }
 diff --git a/lib/Target/R600/R600Packetizer.cpp 
 b/lib/Target/R600/R600Packetizer.cpp
 index 6c70052..ee256d5 100644
 --- a/lib/Target/R600/R600Packetizer.cpp
 +++ b/lib/Target/R600/R600Packetizer.cpp
 @@ -272,6 +272,11 @@ public:
        return false;
      }
 
 +    // We cannot read LDS source registrs from the Trans slot.
 +    if (isTransSlot  TII-readsLDSSrcReg(MI)) {
 +      return false;
 +    }

And here too

 +
      CurrentPacketMIs.pop_back();
      return true;
    }
 diff --git a/lib/Target/R600/R600RegisterInfo.td 
 b/lib/Target/R600/R600RegisterInfo.td
 index fa987cf..514427e 100644
 --- a/lib/Target/R600/R600RegisterInfo.td
 +++ b/lib/Target/R600/R600RegisterInfo.td
 @@ -95,6 +95,12 @@ foreach Index = 448-480 in {
 
 // Special Registers
 
 +def OQA : R600RegOQA, 219;
 +def OQB : R600RegOQB, 220;
 +def OQAP : R600RegOQAP, 221;
 +def OQBP : R600RegOQAP, 222;
 +def LDS_DIRECT_A : R600RegLDS_DIRECT_A, 223;
 +def LDS_DIRECT_B : R600RegLDS_DIRECT_B, 224;
 def ZERO : R600Reg0.0, 248;
 def ONE : R600Reg1.0, 249;
 def NEG_ONE : R600Reg-1.0, 249;
 @@ -115,7 +121,6 @@ def PRED_SEL_OFF: R600RegPred_sel_off, 
 0;
 def PRED_SEL_ZERO : R600RegPred_sel_zero, 2;
 def PRED_SEL_ONE : R600RegPred_sel_one, 3;
 def AR_X : R600RegAR.x, 0;
 -def OQAP : R600RegOQAP, 221;
 
 def R600_ArrayBase : RegisterClass AMDGPU, [f32, i32], 32,
                            (add (sequence ArrayBase%u, 448, 
 480));
 @@ -130,6 +135,9 @@ let isAllocatable = 0 in {
 // XXX: Only use the X channel, until we support wider stack widths
 def R600_Addr : RegisterClass AMDGPU, [i32], 127, (add (sequence 
 Addr%u_X, 0, 127));
 

Re: [Mesa-dev] R600 Patches: KCache kernel arguments and 24-bit arithmetic

2013-06-27 Thread Vincent Lejeune
The whole serie is : reviewed-by:Vincent Lejeune vljn at ovi.com

In a future patch we might also remove the ISD::BUILD_VECTOR case in the 
Select() function and use
a tablegen pattern ; I wrote it because we lowered r600.load.input intrinsic to 
a raw register ; however now
we lower it to a copy from a register which should be convertible to a 
REG_SEQUENCE.

Vincent




- Mail original -
 De : Tom Stellard t...@stellard.net
 À : llvm-comm...@cs.uiuc.edu
 Cc : mesa-dev@lists.freedesktop.org
 Envoyé le : Mardi 25 juin 2013 23h37
 Objet : [Mesa-dev] R600 Patches: KCache kernel arguments and 24-bit
arithmetic
 
 Hi,
 
 The attached patches clean up kernel argument handling for both R600 and
 SI and for R600 makes it possible to read arguments through the KCache.
 There are also patches that add support for the 24-bit arithmetic instructions
 (MAD_UINT24, MAD_INT24, MUL_UINT24, and MUL_INT24).  In order to test
 these patches with you will also need to apply the corresponding Mesa
 patches which will be on the mailing list soon.
 
 -Tom
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] R600: Bugfixes

2013-06-17 Thread Vincent Lejeune
Hi,

these patches fix 2 bugs in R600 backend.
The first one use the rv710/rv730 correct encoding for TEX clause with more 
than 8 instructions.
This bug has been spoted there : 

https://bugs.freedesktop.org/show_bug.cgi?id=64257
The other patch fix a typo that causes instructions not to use PV/PS register 
when R600Packetizers evaluates read port limitations.
It prevents some bundling opportunities in some (not so frequent) situation.

Vincent


0001-R600-Properly-set-COUNT_3-bit-in-TEX-clause-initiati.patch
Description: Binary data


0002-R600-PV-stores-Reg-id-not-index.patch
Description: Binary data
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] R600 Patches: Add support for the local address space

2013-06-14 Thread Vincent Lejeune
Hi,

Thank for your work on this !
Patch 2, 4 and 5 have my rb.


diff --git a/lib/Target/R600/R600InstrInfo.cpp 
b/lib/Target/R600/R600InstrInfo.cpp
index b9da74c..6de47f7 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -133,6 +133,12 @@ bool R600InstrInfo::isCubeOp(unsigned Opcode) const {
 bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
   unsigned TargetFlags = get(Opcode).TSFlags; +  return (TargetFlags  
 R600_InstFlag::ALU_INST);
+}
+
+bool R600InstrInfo::hasInstrModifiers(unsigned Opcode) const {
+  unsigned TargetFlags = get(Opcode).TSFlags;
+
   return ((TargetFlags  R600_InstFlag::OP1) |
   (TargetFlags  R600_InstFlag::OP2) |
   (TargetFlags  R600_InstFlag::OP3));
Function prototype is not defined here (it is defined in patch 5).



diff --git a/lib/Target/R600/R600MachineScheduler.cpp 
b/lib/Target/R600/R600MachineScheduler.cpp
index a330d88..acc1b4d 100644
--- a/lib/Target/R600/R600MachineScheduler.cpp
+++ b/lib/Target/R600/R600MachineScheduler.cpp
@@ -269,10 +269,14 @@ R600SchedStrategy::AluKind 
R600SchedStrategy::getAluKind(SUnit *SU) const {
 }
 
 // Does the instruction take a whole IG ?
+// XXX: Is it possible to add a helper function in R600InstrInfo that can
+// be used here and in R600PacketizerList::isSoloInstruction() ?
 if(TII-isVector(*MI) ||
 TII-isCubeOp(MI-getOpcode()) ||
-TII-isReductionOp(MI-getOpcode()))
+TII-isReductionOp(MI-getOpcode()) ||
+MI-getOpcode() == AMDGPU::GROUP_BARRIER) {
   return AluT_XYZW;
+}

I'm not sure it'll factorize that much code ; R600Packetizer is called after 
cube/reduction op are lowered
by R600Expand pass and thus the isVector/ReductionOp check is useless. I may 
have left some debug code in
isSoloInstruction code though.



- Mail original -
 De : Tom Stellard t...@stellard.net
 À : llvm-comm...@cs.uiuc.edu
 Cc : mesa-dev@lists.freedesktop.org
 Envoyé le : Jeudi 13 juin 2013 2h42
 Objet : [Mesa-dev] R600 Patches: Add support for the local address space
 
 Hi,
 
 The attached patches add support for local address space on
 Evergreen / Northern Islands GPUs.
 
 Please Review.
 
 -Tom
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] r600g/llvm: fix txq for texture buffer

2013-05-21 Thread Vincent Lejeune
---
 src/gallium/drivers/r600/r600_llvm.c | 7 +--
 src/gallium/drivers/r600/r600_shader.c   | 1 +
 src/gallium/drivers/radeon/radeon_llvm.h | 1 +
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_llvm.c 
b/src/gallium/drivers/r600/r600_llvm.c
index c1809b3..77c6abb 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -23,6 +23,7 @@
 #define CONSTANT_BUFFER_0_ADDR_SPACE 8
 #define CONSTANT_BUFFER_1_ADDR_SPACE (CONSTANT_BUFFER_0_ADDR_SPACE + 
R600_UCP_CONST_BUFFER)
 #define CONSTANT_TXQ_BUFFER (CONSTANT_BUFFER_0_ADDR_SPACE + 
R600_TXQ_CONST_BUFFER)
+#define LLVM_R600_BUFFER_INFO_CONST_BUFFER (CONSTANT_BUFFER_0_ADDR_SPACE + 
R600_BUFFER_INFO_CONST_BUFFER)
 
 static LLVMValueRef llvm_load_const_buffer(
struct lp_build_tgsi_context * bld_base,
@@ -410,8 +411,10 @@ static void llvm_emit_tex(
if (emit_data-inst-Texture.Texture == TGSI_TEXTURE_BUFFER) {
switch (emit_data-inst-Instruction.Opcode) {
case TGSI_OPCODE_TXQ: {
-   LLVMValueRef offset = 
lp_build_const_int32(bld_base-base.gallivm, 1);
-   LLVMValueRef cvecval = llvm_load_const_buffer(bld_base, 
offset, R600_BUFFER_INFO_CONST_BUFFER);
+   struct radeon_llvm_context * ctx = 
radeon_llvm_context(bld_base);
+   ctx-uses_tex_buffers = true;
+   LLVMValueRef offset = 
lp_build_const_int32(bld_base-base.gallivm, 0);
+   LLVMValueRef cvecval = llvm_load_const_buffer(bld_base, 
offset, LLVM_R600_BUFFER_INFO_CONST_BUFFER);
emit_data-output[0] = cvecval;
return;
}
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 81ed3ce..2f126c6 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -1170,6 +1170,7 @@ static int r600_shader_from_tgsi(struct r600_screen 
*rscreen,
radeon_llvm_ctx.alpha_to_one = key.alpha_to_one;
mod = r600_tgsi_llvm(radeon_llvm_ctx, tokens);
ctx.shader-has_txq_cube_array_z_comp = 
radeon_llvm_ctx.has_txq_cube_array_z_comp;
+   ctx.shader-uses_tex_buffers = radeon_llvm_ctx.uses_tex_buffers;
 
if (r600_llvm_compile(mod, rscreen-family, ctx.bc, use_kill, 
dump)) {
radeon_llvm_dispose(radeon_llvm_ctx);
diff --git a/src/gallium/drivers/radeon/radeon_llvm.h 
b/src/gallium/drivers/radeon/radeon_llvm.h
index 14a8c34..345ae70 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -67,6 +67,7 @@ struct radeon_llvm_context {
unsigned fs_color_all;
unsigned alpha_to_one;
unsigned has_txq_cube_array_z_comp;
+   unsigned uses_tex_buffers;
 
/*=== Front end configuration ===*/
 
-- 
1.8.2.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] r600g/llvm: fix sample cube shadow

2013-05-21 Thread Vincent Lejeune
---
 src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 3f7e79f..f49170d 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -654,7 +654,8 @@ void radeon_llvm_emit_prepare_cube_coords(
opcode == TGSI_OPCODE_TXB2 ||
opcode == TGSI_OPCODE_TXL2) {
coords[3] = coords_arg[4];
-   } else if (opcode == TGSI_OPCODE_TXB ||
+   } else if (opcode == TGSI_OPCODE_TEX ||
+   opcode == TGSI_OPCODE_TXB ||
opcode == TGSI_OPCODE_TXL) {
coords[3] = coords_arg[3];
}
-- 
1.8.2.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] r600g/llvm: Factorize code loading from const buffer.

2013-05-20 Thread Vincent Lejeune
---
 src/gallium/drivers/r600/r600_llvm.c | 51 +---
 1 file changed, 24 insertions(+), 27 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_llvm.c 
b/src/gallium/drivers/r600/r600_llvm.c
index c6c9123..26d40a2 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -23,30 +23,40 @@
 #define CONSTANT_BUFFER_0_ADDR_SPACE 8
 #define CONSTANT_BUFFER_1_ADDR_SPACE (CONSTANT_BUFFER_0_ADDR_SPACE + 
R600_UCP_CONST_BUFFER)
 
+static LLVMValueRef llvm_load_const_buffer(
+   struct lp_build_tgsi_context * bld_base,
+   LLVMValueRef OffsetValue,
+   unsigned ConstantAddressSpace)
+{
+   LLVMValueRef offset[2] = {
+   
LLVMConstInt(LLVMInt64TypeInContext(bld_base-base.gallivm-context), 0, false),
+   OffsetValue
+   };
+
+   LLVMTypeRef const_ptr_type = 
LLVMPointerType(LLVMArrayType(LLVMVectorType(bld_base-base.elem_type, 4), 
1024),
+   ConstantAddressSpace);
+   LLVMValueRef const_ptr = 
LLVMBuildIntToPtr(bld_base-base.gallivm-builder, 
lp_build_const_int32(bld_base-base.gallivm, 0), const_ptr_type, );
+   LLVMValueRef ptr = LLVMBuildGEP(bld_base-base.gallivm-builder, 
const_ptr, offset, 2, );
+   return LLVMBuildLoad(bld_base-base.gallivm-builder, ptr, );
+}
+
 static LLVMValueRef llvm_fetch_const(
struct lp_build_tgsi_context * bld_base,
const struct tgsi_full_src_register *reg,
enum tgsi_opcode_type type,
unsigned swizzle)
 {
-   LLVMValueRef offset[2] = {
-   
LLVMConstInt(LLVMInt64TypeInContext(bld_base-base.gallivm-context), 0, false),
-   lp_build_const_int32(bld_base-base.gallivm, 
reg-Register.Index)
-   };
+   LLVMValueRef offset = lp_build_const_int32(bld_base-base.gallivm, 
reg-Register.Index);
if (reg-Register.Indirect) {
struct lp_build_tgsi_soa_context *bld = 
lp_soa_context(bld_base);
LLVMValueRef index = 
LLVMBuildLoad(bld_base-base.gallivm-builder, 
bld-addr[reg-Indirect.Index][reg-Indirect.Swizzle], );
-   offset[1] = LLVMBuildAdd(bld_base-base.gallivm-builder, 
offset[1], index, );
+   offset = LLVMBuildAdd(bld_base-base.gallivm-builder, offset, 
index, );
}
unsigned ConstantAddressSpace = CONSTANT_BUFFER_0_ADDR_SPACE ;
if (reg-Register.Dimension) {
ConstantAddressSpace += reg-Dimension.Index;
}
-   LLVMTypeRef const_ptr_type = 
LLVMPointerType(LLVMArrayType(LLVMVectorType(bld_base-base.elem_type, 4), 
1024),
-   ConstantAddressSpace);
-   LLVMValueRef const_ptr = 
LLVMBuildIntToPtr(bld_base-base.gallivm-builder, 
lp_build_const_int32(bld_base-base.gallivm, 0), const_ptr_type, );
-   LLVMValueRef ptr = LLVMBuildGEP(bld_base-base.gallivm-builder, 
const_ptr, offset, 2, );
-   LLVMValueRef cvecval = LLVMBuildLoad(bld_base-base.gallivm-builder, 
ptr, );
+   LLVMValueRef cvecval = llvm_load_const_buffer(bld_base, offset, 
ConstantAddressSpace);
LLVMValueRef cval = 
LLVMBuildExtractElement(bld_base-base.gallivm-builder, cvecval, 
lp_build_const_int32(bld_base-base.gallivm, swizzle), );
return bitcast(bld_base, type, cval);
 }
@@ -250,14 +260,8 @@ static void llvm_emit_epilogue(struct 
lp_build_tgsi_context * bld_base)
LLVMValueRef adjusted_elements[4];
for (reg_index = 0; reg_index  2; reg_index 
++) {
for (chan = 0; chan  
TGSI_NUM_CHANNELS; chan++) {
-   LLVMValueRef offset[2] = {
-   
LLVMConstInt(LLVMInt64TypeInContext(bld_base-base.gallivm-context), 0, false),
-   
lp_build_const_int32(bld_base-base.gallivm, reg_index * 4 + chan)
-   };
-   LLVMTypeRef const_ptr_type = 
LLVMPointerType(LLVMArrayType(LLVMVectorType(bld_base-base.elem_type, 4), 
1024), CONSTANT_BUFFER_1_ADDR_SPACE);
-   LLVMValueRef const_ptr = 
LLVMBuildIntToPtr(bld_base-base.gallivm-builder, 
lp_build_const_int32(bld_base-base.gallivm, 0), const_ptr_type, );
-   LLVMValueRef ptr = 
LLVMBuildGEP(bld_base-base.gallivm-builder, const_ptr, offset, 2, );
-   LLVMValueRef base_vector = 
LLVMBuildLoad(bld_base-base.gallivm-builder, ptr, );
+   LLVMValueRef offset = 
lp_build_const_int32(bld_base-base.gallivm, reg_index * 4 + chan);
+   LLVMValueRef base_vector = 
llvm_load_const_buffer(bld_base, offset, CONSTANT_BUFFER_1_ADDR_SPACE);

[Mesa-dev] [PATCH 2/4] r600g/llvm: Fix cubearray textureSize

2013-05-20 Thread Vincent Lejeune
---
 src/gallium/drivers/r600/r600_llvm.c | 15 +++
 src/gallium/drivers/r600/r600_shader.c   |  1 +
 src/gallium/drivers/radeon/radeon_llvm.h |  1 +
 3 files changed, 17 insertions(+)

diff --git a/src/gallium/drivers/r600/r600_llvm.c 
b/src/gallium/drivers/r600/r600_llvm.c
index 26d40a2..3d2c492 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -22,6 +22,7 @@
 
 #define CONSTANT_BUFFER_0_ADDR_SPACE 8
 #define CONSTANT_BUFFER_1_ADDR_SPACE (CONSTANT_BUFFER_0_ADDR_SPACE + 
R600_UCP_CONST_BUFFER)
+#define CONSTANT_TXQ_BUFFER (CONSTANT_BUFFER_0_ADDR_SPACE + 
R600_TXQ_CONST_BUFFER)
 
 static LLVMValueRef llvm_load_const_buffer(
struct lp_build_tgsi_context * bld_base,
@@ -471,6 +472,20 @@ static void llvm_emit_tex(
emit_data-output[0] = build_intrinsic(gallivm-builder,
action-intr_name,
emit_data-dst_type, args, c, 
LLVMReadNoneAttribute);
+
+   if (emit_data-inst-Instruction.Opcode == TGSI_OPCODE_TXQ 
+   ((emit_data-inst-Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
+   emit_data-inst-Texture.Texture == 
TGSI_TEXTURE_SHADOWCUBE_ARRAY)))
+   if (emit_data-inst-Dst[0].Register.WriteMask  4) {
+   LLVMValueRef offset = 
lp_build_const_int32(bld_base-base.gallivm, 0);
+   LLVMValueRef ZLayer = 
LLVMBuildExtractElement(gallivm-builder,
+   llvm_load_const_buffer(bld_base, offset, 
CONSTANT_TXQ_BUFFER),
+   lp_build_const_int32(gallivm, 0), );
+
+   emit_data-output[0] = 
LLVMBuildInsertElement(gallivm-builder, emit_data-output[0], ZLayer, 
lp_build_const_int32(gallivm, 2), );
+   struct radeon_llvm_context * ctx = 
radeon_llvm_context(bld_base);
+   ctx-has_txq_cube_array_z_comp = true;
+   }
 }
 
 static void emit_cndlt(
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 4d74db0..81ed3ce 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -1169,6 +1169,7 @@ static int r600_shader_from_tgsi(struct r600_screen 
*rscreen,
radeon_llvm_ctx.clip_vertex = ctx.cv_output;
radeon_llvm_ctx.alpha_to_one = key.alpha_to_one;
mod = r600_tgsi_llvm(radeon_llvm_ctx, tokens);
+   ctx.shader-has_txq_cube_array_z_comp = 
radeon_llvm_ctx.has_txq_cube_array_z_comp;
 
if (r600_llvm_compile(mod, rscreen-family, ctx.bc, use_kill, 
dump)) {
radeon_llvm_dispose(radeon_llvm_ctx);
diff --git a/src/gallium/drivers/radeon/radeon_llvm.h 
b/src/gallium/drivers/radeon/radeon_llvm.h
index 1d4bd45..14a8c34 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -66,6 +66,7 @@ struct radeon_llvm_context {
unsigned color_buffer_count;
unsigned fs_color_all;
unsigned alpha_to_one;
+   unsigned has_txq_cube_array_z_comp;
 
/*=== Front end configuration ===*/
 
-- 
1.8.2.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] r600g/llvm: Factorize code loading from const buffer.

2013-05-20 Thread Vincent Lejeune
---
 src/gallium/drivers/r600/r600_llvm.c | 51 +---
 1 file changed, 24 insertions(+), 27 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_llvm.c 
b/src/gallium/drivers/r600/r600_llvm.c
index c6c9123..26d40a2 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -23,30 +23,40 @@
 #define CONSTANT_BUFFER_0_ADDR_SPACE 8
 #define CONSTANT_BUFFER_1_ADDR_SPACE (CONSTANT_BUFFER_0_ADDR_SPACE + 
R600_UCP_CONST_BUFFER)
 
+static LLVMValueRef llvm_load_const_buffer(
+   struct lp_build_tgsi_context * bld_base,
+   LLVMValueRef OffsetValue,
+   unsigned ConstantAddressSpace)
+{
+   LLVMValueRef offset[2] = {
+   
LLVMConstInt(LLVMInt64TypeInContext(bld_base-base.gallivm-context), 0, false),
+   OffsetValue
+   };
+
+   LLVMTypeRef const_ptr_type = 
LLVMPointerType(LLVMArrayType(LLVMVectorType(bld_base-base.elem_type, 4), 
1024),
+   ConstantAddressSpace);
+   LLVMValueRef const_ptr = 
LLVMBuildIntToPtr(bld_base-base.gallivm-builder, 
lp_build_const_int32(bld_base-base.gallivm, 0), const_ptr_type, );
+   LLVMValueRef ptr = LLVMBuildGEP(bld_base-base.gallivm-builder, 
const_ptr, offset, 2, );
+   return LLVMBuildLoad(bld_base-base.gallivm-builder, ptr, );
+}
+
 static LLVMValueRef llvm_fetch_const(
struct lp_build_tgsi_context * bld_base,
const struct tgsi_full_src_register *reg,
enum tgsi_opcode_type type,
unsigned swizzle)
 {
-   LLVMValueRef offset[2] = {
-   
LLVMConstInt(LLVMInt64TypeInContext(bld_base-base.gallivm-context), 0, false),
-   lp_build_const_int32(bld_base-base.gallivm, 
reg-Register.Index)
-   };
+   LLVMValueRef offset = lp_build_const_int32(bld_base-base.gallivm, 
reg-Register.Index);
if (reg-Register.Indirect) {
struct lp_build_tgsi_soa_context *bld = 
lp_soa_context(bld_base);
LLVMValueRef index = 
LLVMBuildLoad(bld_base-base.gallivm-builder, 
bld-addr[reg-Indirect.Index][reg-Indirect.Swizzle], );
-   offset[1] = LLVMBuildAdd(bld_base-base.gallivm-builder, 
offset[1], index, );
+   offset = LLVMBuildAdd(bld_base-base.gallivm-builder, offset, 
index, );
}
unsigned ConstantAddressSpace = CONSTANT_BUFFER_0_ADDR_SPACE ;
if (reg-Register.Dimension) {
ConstantAddressSpace += reg-Dimension.Index;
}
-   LLVMTypeRef const_ptr_type = 
LLVMPointerType(LLVMArrayType(LLVMVectorType(bld_base-base.elem_type, 4), 
1024),
-   ConstantAddressSpace);
-   LLVMValueRef const_ptr = 
LLVMBuildIntToPtr(bld_base-base.gallivm-builder, 
lp_build_const_int32(bld_base-base.gallivm, 0), const_ptr_type, );
-   LLVMValueRef ptr = LLVMBuildGEP(bld_base-base.gallivm-builder, 
const_ptr, offset, 2, );
-   LLVMValueRef cvecval = LLVMBuildLoad(bld_base-base.gallivm-builder, 
ptr, );
+   LLVMValueRef cvecval = llvm_load_const_buffer(bld_base, offset, 
ConstantAddressSpace);
LLVMValueRef cval = 
LLVMBuildExtractElement(bld_base-base.gallivm-builder, cvecval, 
lp_build_const_int32(bld_base-base.gallivm, swizzle), );
return bitcast(bld_base, type, cval);
 }
@@ -250,14 +260,8 @@ static void llvm_emit_epilogue(struct 
lp_build_tgsi_context * bld_base)
LLVMValueRef adjusted_elements[4];
for (reg_index = 0; reg_index  2; reg_index 
++) {
for (chan = 0; chan  
TGSI_NUM_CHANNELS; chan++) {
-   LLVMValueRef offset[2] = {
-   
LLVMConstInt(LLVMInt64TypeInContext(bld_base-base.gallivm-context), 0, false),
-   
lp_build_const_int32(bld_base-base.gallivm, reg_index * 4 + chan)
-   };
-   LLVMTypeRef const_ptr_type = 
LLVMPointerType(LLVMArrayType(LLVMVectorType(bld_base-base.elem_type, 4), 
1024), CONSTANT_BUFFER_1_ADDR_SPACE);
-   LLVMValueRef const_ptr = 
LLVMBuildIntToPtr(bld_base-base.gallivm-builder, 
lp_build_const_int32(bld_base-base.gallivm, 0), const_ptr_type, );
-   LLVMValueRef ptr = 
LLVMBuildGEP(bld_base-base.gallivm-builder, const_ptr, offset, 2, );
-   LLVMValueRef base_vector = 
LLVMBuildLoad(bld_base-base.gallivm-builder, ptr, );
+   LLVMValueRef offset = 
lp_build_const_int32(bld_base-base.gallivm, reg_index * 4 + chan);
+   LLVMValueRef base_vector = 
llvm_load_const_buffer(bld_base, offset, CONSTANT_BUFFER_1_ADDR_SPACE);

[Mesa-dev] [PATCH 4/4] r600g/llvm: fix cubemap lod/bias

2013-05-20 Thread Vincent Lejeune
---
 src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 0629b89..3f7e79f 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -654,6 +654,9 @@ void radeon_llvm_emit_prepare_cube_coords(
opcode == TGSI_OPCODE_TXB2 ||
opcode == TGSI_OPCODE_TXL2) {
coords[3] = coords_arg[4];
+   } else if (opcode == TGSI_OPCODE_TXB ||
+   opcode == TGSI_OPCODE_TXL) {
+   coords[3] = coords_arg[3];
}
}
 
-- 
1.8.2.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] r600g/llvm: Fix texelFetchOffset-2D

2013-05-20 Thread Vincent Lejeune
---
 src/gallium/drivers/r600/r600_llvm.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/gallium/drivers/r600/r600_llvm.c 
b/src/gallium/drivers/r600/r600_llvm.c
index 3d2c492..c1809b3 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -460,6 +460,12 @@ static void llvm_emit_tex(
for (c = 1; c  emit_data-arg_count; ++c)
args[c] = emit_data-args[c];
 
+   if (emit_data-inst-Instruction.Opcode == TGSI_OPCODE_TXF) {
+   args[1] = LLVMBuildShl(gallivm-builder, args[1], 
lp_build_const_int32(gallivm, 1), );
+   args[2] = LLVMBuildShl(gallivm-builder, args[2], 
lp_build_const_int32(gallivm, 1), );
+   args[3] = LLVMBuildShl(gallivm-builder, args[3], 
lp_build_const_int32(gallivm, 1), );
+   }
+
sampler_src = emit_data-inst-Instruction.NumSrcRegs-1;
 
args[c++] = lp_build_const_int32(gallivm,
-- 
1.8.2.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] R600/SI Patches: A few cleanups for compute

2013-05-16 Thread Vincent Lejeune
Hi,


-- next part --
From dc547a89dac5039ce521f3c27fb23346251d488d Mon Sep 17 00:00:00 2001 From: 
Tom Stellard thomas.stellard at amd.com
Date: Tue, 7 May 2013 16:26:26 -0400
Subject: [PATCH 4/7] R600: Swap the legality of rotl and rotr

The hardware supports rotr and not rotl.
---
 lib/Target/R600/AMDGPUISelLowering.cpp |  3 +++
 lib/Target/R600/AMDGPUISelLowering.h   |  1 -
 lib/Target/R600/AMDGPUInstrInfo.td |  6 --
 lib/Target/R600/AMDGPUInstructions.td  |  6 ++
 lib/Target/R600/AMDILISelLowering.cpp  |  2 --
 lib/Target/R600/R600ISelLowering.cpp   | 15 ---
 lib/Target/R600/R600Instructions.td|  6 ++
 test/CodeGen/R600/rotr.ll  | 29 +
 8 files changed, 40 insertions(+), 28 deletions(-)
 create mode 100644 test/CodeGen/R600/rotr.ll

diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp 
b/lib/Target/R600/AMDGPUISelLowering.cpp
index a266df5..b3c51e3 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -46,6 +46,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine 
TM) :
   setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
   setOperationAction(ISD::FRINT,  MVT::f32, Legal);
 
+  // The hardware supports ROTR, but not ROTL
+  setOperationAction(ISD::ROTL, MVT::i32, Expand);
+
   // Lower floating point store/load to integer store/load to reduce the 
 number
   // of patterns in tablegen.
   setOperationAction(ISD::STORE, MVT::f32, Promote);
diff --git a/lib/Target/R600/AMDGPUISelLowering.h 
b/lib/Target/R600/AMDGPUISelLowering.h
index c2a79ea..6f8ab8b 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -115,7 +115,6 @@ enum {
   RET_FLAG,
   BRANCH_COND,
   // End AMDIL ISD Opcodes
-  BITALIGN,
   BUFFER_STORE,
   DWORDADDR,
   FRACT,
diff --git a/lib/Target/R600/AMDGPUInstrInfo.td 
b/lib/Target/R600/AMDGPUInstrInfo.td
index b66ae87..a0a3410 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.td
+++ b/lib/Target/R600/AMDGPUInstrInfo.td
@@ -23,12 +23,6 @@ def AMDGPUDTIntTernaryOp : SDTypeProfile1, 3, [
 // AMDGPU DAG Nodes
 //
 
-// out = ((a  32) | b)  c)
-//
-// Can be used to optimize rtol:
-// rotl(a, b) = bitalign(a, a, 32 - b)
-def AMDGPUbitalign : SDNodeAMDGPUISD::BITALIGN, AMDGPUDTIntTernaryOp;
-
 // This argument to this node is a dword address.
 def AMDGPUdwordaddr : SDNodeAMDGPUISD::DWORDADDR, SDTIntUnaryOp;
 
diff --git a/lib/Target/R600/AMDGPUInstructions.td 
b/lib/Target/R600/AMDGPUInstructions.td
index d2620b2..54df7d0 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -295,6 +295,12 @@ class BFEPattern Instruction BFE : Pat 
   (BFE $x, $y, $z)
 ;
 
+// rotr pattern
+class ROTRPattern Instruction BIT_ALIGN : Pat 
+  (rotr i32:$src0, i32:$src1),
+  (BIT_ALIGN $src0, $src0, $src1)
+;
+
 include R600Instructions.td
 
 include SIInstrInfo.td
diff --git a/lib/Target/R600/AMDILISelLowering.cpp 
b/lib/Target/R600/AMDILISelLowering.cpp
index 922cac1..e20dbe0 100644
--- a/lib/Target/R600/AMDILISelLowering.cpp
+++ b/lib/Target/R600/AMDILISelLowering.cpp
@@ -138,8 +138,6 @@ void AMDGPUTargetLowering::InitAMDILLowering() {
 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 
-// GPU doesn't have a rotl, rotr, or byteswap instruction
-setOperationAction(ISD::ROTR, VT, Expand);
 setOperationAction(ISD::BSWAP, VT, Expand);
 
 // GPU doesn't have any counting operators
diff --git a/lib/Target/R600/R600ISelLowering.cpp 
b/lib/Target/R600/R600ISelLowering.cpp
index 7252235..e58a8dd 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -72,8 +72,6 @@ R600TargetLowering::R600TargetLowering(TargetMachine TM) :
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
 
-  setOperationAction(ISD::ROTL, MVT::i32, Custom);
-
   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
   setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
 
@@ -327,7 +325,6 @@ using namespace llvm::AMDGPUIntrinsic;
 SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG DAG) 
 const {
   switch (Op.getOpcode()) {
   default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
-  case ISD::ROTL: return LowerROTL(Op, DAG);
   case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
   case ISD::SELECT: return LowerSELECT(Op, DAG);
   case ISD::STORE: return LowerSTORE(Op, DAG);
@@ -518,18 +515,6 @@ SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, 
SelectionDAG DAG) const
   return DAG.getConstant(Offset * 4 * TFL-getStackWidth(MF), MVT::i32);
 }
 
-SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG DAG) const {
-  DebugLoc DL = Op.getDebugLoc();
-  EVT VT = Op.getValueType();
-
-  return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
- Op.getOperand(0),
-

Re: [Mesa-dev] [PATCH 2/2] R600: Fix encoding for R600 family GPUs

2013-05-14 Thread Vincent Lejeune
Thank for fixing this !
Both patches are reviewed-by: vljn at ovi.com




- Mail original -
 De : Tom Stellard t...@stellard.net
 À : llvm-comm...@cs.uiuc.edu
 Cc : mesa-dev@lists.freedesktop.org; Tom Stellard thomas.stell...@amd.com
 Envoyé le : Mercredi 15 mai 2013 1h03
 Objet : [Mesa-dev] [PATCH 2/2] R600: Fix encoding for R600 family GPUs
 
 From: Tom Stellard thomas.stell...@amd.com
 
 https://bugs.freedesktop.org/show_bug.cgi?id=64193
 https://bugs.freedesktop.org/show_bug.cgi?id=64257
 https://bugs.freedesktop.org/show_bug.cgi?id=64320
 
 NOTE: This is a candidate for the 3.3 branch.
 ---
 lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp |  7 +++
 test/CodeGen/R600/r600-encoding.ll                 | 24 ++
 2 files changed, 31 insertions(+)
 create mode 100644 test/CodeGen/R600/r600-encoding.ll
 
 diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp 
 b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
 index c5bd01a..cb4cf0c 100644
 --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
 +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
 @@ -179,6 +179,13 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst 
 MI, raw_ostream OS,
      Emit((u_int32_t) 0, OS);
    } else {
      uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
 +    if ((STI.getFeatureBits()  AMDGPU::FeatureR600ALUInst) 
 +       ((Desc.TSFlags  R600_InstFlag::OP1) ||
 +         Desc.TSFlags  R600_InstFlag::OP2)) {
 +      uint64_t ISAOpCode = Inst  (0x3FFULL  39);
 +      Inst = ~(0x3FFULL  39);
 +      Inst |= ISAOpCode  1;
 +    }
      Emit(Inst, OS);
    }
 }
 diff --git a/test/CodeGen/R600/r600-encoding.ll 
 b/test/CodeGen/R600/r600-encoding.ll
 new file mode 100644
 index 000..c8040a1
 --- /dev/null
 +++ b/test/CodeGen/R600/r600-encoding.ll
 @@ -0,0 +1,24 @@
 +; RUN: llc  %s -march=r600 -show-mc-encoding -mcpu=redwood | FileCheck 
 --check-prefix=EG-CHECK %s
 +; RUN: llc  %s -march=r600 -show-mc-encoding -mcpu=rs880 | FileCheck 
 --check-prefix=R600-CHECK %s
 +
 +; The earliest R600 GPUs have a slightly different encoding than the rest of
 +; the VLIW4/5 GPUs.
 +
 +; EG-CHECK: @test
 +; EG-CHECK: MUL_IEEE {{[ *TXYZW.,0-9]+}} ; encoding: 
 [{{0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x10,0x01,0x[0-9a-f]+,0x[0-9a-f]+}}]
 +
 +; R600-CHECK: @test
 +; R600-CHECK: MUL_IEEE {{[ *TXYZW.,0-9]+}} ; encoding: 
 [{{0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x10,0x02,0x[0-9a-f]+,0x[0-9a-f]+}}]
 +
 +define void @test() {
 +entry:
 +  %0 = call float @llvm.R600.load.input(i32 0)
 +  %1 = call float @llvm.R600.load.input(i32 1)
 +  %2 = fmul float %0, %1
 +  call void @llvm.AMDGPU.store.output(float %2, i32 0)
 +  ret void
 +}
 +
 +declare float @llvm.R600.load.input(i32) readnone
 +
 +declare void @llvm.AMDGPU.store.output(float, i32)
 -- 
 1.7.11.4
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] R600 Patchset: Emit true ISA

2013-05-06 Thread Vincent Lejeune
Reviewed-by:Vincent Lejeunevljn at ovi.com


- Mail original -
 De : Tom Stellard t...@stellard.net
 À : Vincent Lejeune v...@ovi.com
 Cc : llvm-comm...@cs.uiuc.edu llvm-comm...@cs.uiuc.edu; 
 mesa-dev@lists.freedesktop.org mesa-dev@lists.freedesktop.org
 Envoyé le : Lundi 6 mai 2013 17h02
 Objet : Re: R600 Patchset: Emit true ISA
 
 On Sat, May 04, 2013 at 09:09:25AM -0700, Vincent Lejeune wrote:
  Hi,
 
  Thank for doing this.
  Patches 1 2 and 3 have my rb.
  For patch 4:
 
 
 Hi Vincent,
 
 Attached is an updated version of patch 4.
 
 -Tom
 
  @@ -125,9 +106,7 @@ MCCodeEmitter *llvm::createR600MCCodeEmitter(const 
 MCInstrInfo MCII,
   
   void R600MCCodeEmitter::EncodeInstruction(const MCInst MI, 
 raw_ostream OS,
                                          SmallVectorImplMCFixup 
 Fixups) const {
  -  if (isFCOp(MI.getOpcode())){
  -    EmitFCInstr(MI, OS);
  -  } else if (MI.getOpcode() == AMDGPU::RETURN ||
  +  if (MI.getOpcode() == AMDGPU::RETURN ||
       MI.getOpcode() == AMDGPU::FETCH_CLAUSE ||
       MI.getOpcode() == AMDGPU::ALU_CLAUSE ||
       MI.getOpcode() == AMDGPU::BUNDLE ||
  @@ -135,12 +114,6 @@ void R600MCCodeEmitter::EncodeInstruction(const 
 MCInst MI, raw_ostream OS,
       return;
     } else {
       switch(MI.getOpcode()) {
  -    case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
  -    case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
  -      uint64_t inst = getBinaryCodeForInstr(MI, Fixups);
  -      Emit(inst, OS);
  -      break;
  -    }
       case AMDGPU::CONSTANT_LOAD_eg:
       case AMDGPU::VTX_READ_PARAM_8_eg:
       case AMDGPU::VTX_READ_PARAM_16_eg:
  Is it possible to use R600_InstFlag::VTX_INST and R600_InstFlag::TEX_INST 
 instead and to remove the switch() statement ?
  @@ -234,44 +207,6 @@ void R600MCCodeEmitter::EncodeInstruction(const 
 MCInst MI, raw_ostream OS,
         Emit((u_int32_t) 0, OS);
         break;
       } 
  - Mail original -
   De : Tom Stellard t...@stellard.net
   À : llvm-comm...@cs.uiuc.edu
   Cc : mesa-dev@lists.freedesktop.org
   Envoyé le : Samedi 4 mai 2013 0h53
   Objet : R600 Patchset: Emit true ISA
   
   Hi,
   
   The attached patches modify the CodeEmitter to emit true ISA.
   Previously, we were prefixing all instructions with an instruction 
 type
   byte.
   
   Vincent did most of the work to convert the CodeEmitter to true ISA,
   these patches are just the last few cleanups that are needed to finish
   the project.
   
   Please test/review.
   
   Thanks,
   Tom
   
   ___
   llvm-commits mailing list
   llvm-comm...@cs.uiuc.edu
   http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
  
 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] R600 Patchset: Emit true ISA

2013-05-04 Thread Vincent Lejeune
Hi,

Thank for doing this.
Patches 1 2 and 3 have my rb.
For patch 4:

@@ -125,9 +106,7 @@ MCCodeEmitter *llvm::createR600MCCodeEmitter(const 
MCInstrInfo MCII,
 
 void R600MCCodeEmitter::EncodeInstruction(const MCInst MI, raw_ostream OS,
SmallVectorImplMCFixup Fixups) 
 const {
-  if (isFCOp(MI.getOpcode())){
-EmitFCInstr(MI, OS);
-  } else if (MI.getOpcode() == AMDGPU::RETURN ||
+  if (MI.getOpcode() == AMDGPU::RETURN ||
 MI.getOpcode() == AMDGPU::FETCH_CLAUSE ||
 MI.getOpcode() == AMDGPU::ALU_CLAUSE ||
 MI.getOpcode() == AMDGPU::BUNDLE ||
@@ -135,12 +114,6 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst 
MI, raw_ostream OS,
 return;
   } else {
 switch(MI.getOpcode()) {
-case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
-case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
-  uint64_t inst = getBinaryCodeForInstr(MI, Fixups);
-  Emit(inst, OS);
-  break;
-}
 case AMDGPU::CONSTANT_LOAD_eg:
 case AMDGPU::VTX_READ_PARAM_8_eg:
 case AMDGPU::VTX_READ_PARAM_16_eg:
Is it possible to use R600_InstFlag::VTX_INST and R600_InstFlag::TEX_INST 
instead and to remove the switch() statement ?
@@ -234,44 +207,6 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst 
MI, raw_ostream OS,
   Emit((u_int32_t) 0, OS);
   break;
 } 
- Mail original -
 De : Tom Stellard t...@stellard.net
 À : llvm-comm...@cs.uiuc.edu
 Cc : mesa-dev@lists.freedesktop.org
 Envoyé le : Samedi 4 mai 2013 0h53
 Objet : R600 Patchset: Emit true ISA
 
 Hi,
 
 The attached patches modify the CodeEmitter to emit true ISA.
 Previously, we were prefixing all instructions with an instruction type
 byte.
 
 Vincent did most of the work to convert the CodeEmitter to true ISA,
 these patches are just the last few cleanups that are needed to finish
 the project.
 
 Please test/review.
 
 Thanks,
 Tom
 
 ___
 llvm-commits mailing list
 llvm-comm...@cs.uiuc.edu
 http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] r600g/llvm: Undefines unrequired texture coord values

2013-04-30 Thread Vincent Lejeune
This is a port of r600g:mask unused source components for SAMPLE
patch from Vadim Girlin.
---
 src/gallium/drivers/r600/r600_llvm.c | 25 -
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/r600_llvm.c 
b/src/gallium/drivers/r600/r600_llvm.c
index 83d7340..a94faf2 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -429,9 +429,32 @@ static void llvm_emit_tex(
}
}
 
+   if (emit_data-inst-Instruction.Opcode == TGSI_OPCODE_TEX) {
+   LLVMValueRef Vector[4] = {
+   LLVMBuildExtractElement(gallivm-builder, 
emit_data-args[0], lp_build_const_int32(gallivm, 0), ),
+   LLVMBuildExtractElement(gallivm-builder, 
emit_data-args[0], lp_build_const_int32(gallivm, 1), ),
+   LLVMBuildExtractElement(gallivm-builder, 
emit_data-args[0], lp_build_const_int32(gallivm, 2), ),
+   LLVMBuildExtractElement(gallivm-builder, 
emit_data-args[0], lp_build_const_int32(gallivm, 3), ),
+   };
+   switch (emit_data-inst-Texture.Texture) {
+   case TGSI_TEXTURE_2D:
+   case TGSI_TEXTURE_RECT:
+   Vector[2] = Vector[3] = 
LLVMGetUndef(bld_base-base.elem_type);
+   break;
+   case TGSI_TEXTURE_1D:
+   Vector[1] = Vector[2] = Vector[3] = 
LLVMGetUndef(bld_base-base.elem_type);
+   break;
+   default:
+   break;
+   }
+   args[0] = lp_build_gather_values(gallivm, Vector, 4);
+   } else {
+   args[0] = emit_data-args[0];
+   }
+
assert(emit_data-arg_count + 2 = Elements(args));
 
-   for (c = 0; c  emit_data-arg_count; ++c)
+   for (c = 1; c  emit_data-arg_count; ++c)
args[c] = emit_data-args[c];
 
sampler_src = emit_data-inst-Instruction.NumSrcRegs-1;
-- 
1.8.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] r600/llvm: use killgt info from llvm

2013-04-29 Thread Vincent Lejeune
---
 src/gallium/drivers/r600/r600_llvm.c   | 2 ++
 src/gallium/drivers/r600/r600_llvm.h   | 1 +
 src/gallium/drivers/r600/r600_shader.c | 8 ++--
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_llvm.c 
b/src/gallium/drivers/r600/r600_llvm.c
index 2050be2..83d7340 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -556,6 +556,7 @@ unsigned r600_llvm_compile(
unsigned * inst_byte_count,
enum radeon_family family,
struct r600_bytecode *bc,
+   boolean *use_kill,
unsigned dump)
 {
unsigned r;
@@ -566,6 +567,7 @@ unsigned r600_llvm_compile(
*inst_byte_count = binary.code_size;
bc-ngpr = util_le32_to_cpu(*(uint32_t*)binary.config);
bc-nstack = util_le32_to_cpu(*(uint32_t*)(binary.config + 4));
+   *use_kill = util_le32_to_cpu(*(uint32_t*)(binary.config + 8));
return r;
 }
 
diff --git a/src/gallium/drivers/r600/r600_llvm.h 
b/src/gallium/drivers/r600/r600_llvm.h
index 919dd24..50bbca6 100644
--- a/src/gallium/drivers/r600/r600_llvm.h
+++ b/src/gallium/drivers/r600/r600_llvm.h
@@ -22,6 +22,7 @@ unsigned r600_llvm_compile(
unsigned * inst_byte_count,
enum radeon_family family,
struct r600_bytecode *bc,
+   boolean *use_kill,
unsigned dump);
 
 #endif /* defined R600_USE_LLVM || defined HAVE_OPENCL */
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 0204f80..25f900f 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -272,6 +272,7 @@ int r600_compute_shader_create(struct pipe_context * ctx,
unsigned byte_count;
struct r600_shader_ctx shader_ctx;
bool dump = (r600_ctx-screen-debug_flags  DBG_CS) != 0;
+   boolean use_kill;
 
shader_ctx.bc = bytecode;
r600_bytecode_init(shader_ctx.bc, r600_ctx-chip_class, 
r600_ctx-family,
@@ -279,7 +280,7 @@ int r600_compute_shader_create(struct pipe_context * ctx,
shader_ctx.bc-type = TGSI_PROCESSOR_COMPUTE;
shader_ctx.bc-isa = r600_ctx-isa;
r600_llvm_compile(mod, bytes, byte_count, r600_ctx-family,
-   shader_ctx.bc, dump);
+   shader_ctx.bc, use_kill, dump);
r600_bytecode_from_byte_stream(shader_ctx, bytes, byte_count);
if (shader_ctx.bc-chip_class == CAYMAN) {
cm_bytecode_add_cf_end(shader_ctx.bc);
@@ -1444,6 +1445,7 @@ static int r600_shader_from_tgsi(struct r600_screen 
*rscreen,
if (use_llvm) {
struct radeon_llvm_context radeon_llvm_ctx;
LLVMModuleRef mod;
+   boolean use_kill;
bool dump = r600_can_dump_shader(rscreen, ctx.type);
 
memset(radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx));
@@ -1461,13 +1463,15 @@ static int r600_shader_from_tgsi(struct r600_screen 
*rscreen,
mod = r600_tgsi_llvm(radeon_llvm_ctx, tokens);
 
if (r600_llvm_compile(mod, inst_bytes, inst_byte_count,
- rscreen-family, ctx.bc, dump)) {
+ rscreen-family, ctx.bc, use_kill, 
dump)) {
FREE(inst_bytes);
radeon_llvm_dispose(radeon_llvm_ctx);
use_llvm = 0;
fprintf(stderr, R600 LLVM backend failed to compile 
shader.  Falling back to TGSI\n);
} else {
+   if (use_kill)
+   ctx.shader-uses_kill = use_kill;
ctx.file_offset[TGSI_FILE_OUTPUT] =
ctx.file_offset[TGSI_FILE_INPUT];
}
-- 
1.8.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] R600: Emit used GPRs count

2013-04-10 Thread Vincent Lejeune
---
 lib/Target/R600/AMDGPUAsmPrinter.cpp | 35 +--
 lib/Target/R600/AMDGPUAsmPrinter.h   |  3 ++-
 2 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp 
b/lib/Target/R600/AMDGPUAsmPrinter.cpp
index dacb033..580cfb4 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -21,6 +21,7 @@
 #include AMDGPU.h
 #include SIMachineFunctionInfo.h
 #include SIRegisterInfo.h
+#include R600RegisterInfo.h
 #include llvm/MC/MCStreamer.h
 #include llvm/Support/TargetRegistry.h
 #include llvm/Target/TargetLoweringObjectFile.h
@@ -52,13 +53,43 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction 
MF) {
   }
   OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
   if (STM.device()-getGeneration()  AMDGPUDeviceInfo::HD6XXX) {
-EmitProgramInfo(MF);
+EmitProgramInfoSI(MF);
+  } else {
+EmitProgramInfoR600(MF);
   }
   EmitFunctionBody();
   return false;
 }
 
-void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction MF) {
+void AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction MF) {
+  unsigned MaxGPR = 0;
+  const R600RegisterInfo * RI =
+static_castconst R600RegisterInfo*(TM.getRegisterInfo());
+
+  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+  BB != BB_E; ++BB) {
+MachineBasicBlock MBB = *BB;
+for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+I != E; ++I) {
+  MachineInstr MI = *I;
+  unsigned numOperands = MI.getNumOperands();
+  for (unsigned op_idx = 0; op_idx  numOperands; op_idx++) {
+MachineOperand  MO = MI.getOperand(op_idx);
+if (!MO.isReg())
+  continue;
+unsigned HWReg = RI-getEncodingValue(MO.getReg())  0xff;
+
+// Register with value  127 aren't GPR
+if (HWReg  127)
+  continue;
+MaxGPR = std::max(MaxGPR, HWReg);
+  }
+}
+  }
+  OutStreamer.EmitIntValue(MaxGPR + 1, 4);
+}
+
+void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction MF) {
   unsigned MaxSGPR = 0;
   unsigned MaxVGPR = 0;
   bool VCCUsed = false;
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.h 
b/lib/Target/R600/AMDGPUAsmPrinter.h
index 3812282..f425ef4 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.h
+++ b/lib/Target/R600/AMDGPUAsmPrinter.h
@@ -33,7 +33,8 @@ public:
 
   /// \brief Emit register usage information so that the GPU driver
   /// can correctly setup the GPU state.
-  void EmitProgramInfo(MachineFunction MF);
+  void EmitProgramInfoR600(MachineFunction MF);
+  void EmitProgramInfoSI(MachineFunction MF);
 
   /// Implemented in AMDGPUMCInstLower.cpp
   virtual void EmitInstruction(const MachineInstr *MI);
-- 
1.8.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] R600: Export is emitted as a CF_NATIVE inst

2013-04-10 Thread Vincent Lejeune
---
 lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 15 +--
 lib/Target/R600/R600Instructions.td|  8 
 2 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp 
b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index 469a8ad..416d710 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -250,15 +250,6 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst 
MI, raw_ostream OS,
   Emit(Word2, OS);
   break;
 }
-case AMDGPU::EG_ExportSwz:
-case AMDGPU::R600_ExportSwz:
-case AMDGPU::EG_ExportBuf:
-case AMDGPU::R600_ExportBuf: {
-  uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
-  EmitByte(INSTR_EXPORT, OS);
-  Emit(Inst, OS);
-  break;
-}
 case AMDGPU::CF_ALU:
 case AMDGPU::CF_ALU_PUSH_BEFORE: {
   uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
@@ -286,7 +277,11 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst 
MI, raw_ostream OS,
 case AMDGPU::CF_CONTINUE_R600:
 case AMDGPU::CF_JUMP_R600:
 case AMDGPU::CF_ELSE_R600:
-case AMDGPU::POP_R600: {
+case AMDGPU::POP_R600:
+case AMDGPU::EG_ExportSwz:
+case AMDGPU::R600_ExportSwz:
+case AMDGPU::EG_ExportBuf:
+case AMDGPU::R600_ExportBuf: {
   uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
   EmitByte(INSTR_NATIVE, OS);
   Emit(Inst, OS);
diff --git a/lib/Target/R600/R600Instructions.td 
b/lib/Target/R600/R600Instructions.td
index b4c45e1..2e9a8a3 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -1423,7 +1423,7 @@ let Predicates = [isR600] in {
 (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_r600 R600_Reg32:$src));
 
   def R600_ExportSwz : ExportSwzInst {
-let Word1{20-17} = 1; // BURST_COUNT
+let Word1{20-17} = 0; // BURST_COUNT
 let Word1{21} = eop;
 let Word1{22} = 1; // VALID_PIXEL_MODE
 let Word1{30-23} = inst;
@@ -1432,7 +1432,7 @@ let Predicates = [isR600] in {
   defm : ExportPatternR600_ExportSwz, 39;
 
   def R600_ExportBuf : ExportBufInst {
-let Word1{20-17} = 1; // BURST_COUNT
+let Word1{20-17} = 0; // BURST_COUNT
 let Word1{21} = eop;
 let Word1{22} = 1; // VALID_PIXEL_MODE
 let Word1{30-23} = inst;
@@ -1622,7 +1622,7 @@ let hasSideEffects = 1 in {
 (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src0));
 
   def EG_ExportSwz : ExportSwzInst {
-let Word1{19-16} = 1; // BURST_COUNT
+let Word1{19-16} = 0; // BURST_COUNT
 let Word1{20} = 1; // VALID_PIXEL_MODE
 let Word1{21} = eop;
 let Word1{29-22} = inst;
@@ -1632,7 +1632,7 @@ let hasSideEffects = 1 in {
   defm : ExportPatternEG_ExportSwz, 83;
 
   def EG_ExportBuf : ExportBufInst {
-let Word1{19-16} = 1; // BURST_COUNT
+let Word1{19-16} = 0; // BURST_COUNT
 let Word1{20} = 1; // VALID_PIXEL_MODE
 let Word1{21} = eop;
 let Word1{29-22} = inst;
-- 
1.8.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] R600: Make Export Instruction not duplicable

2013-04-10 Thread Vincent Lejeune
---
 lib/Target/R600/R600Instructions.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/Target/R600/R600Instructions.td 
b/lib/Target/R600/R600Instructions.td
index 2e9a8a3..1c292bb 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -738,7 +738,7 @@ multiclass SteamOutputExportPatternInstruction ExportInst,
   4095, imm:$mask, buf3inst, 0);
 }
 
-let usesCustomInserter = 1 in {
+let usesCustomInserter = 1, isNotDuplicable = 1 in {
 
 class ExportSwzInst : InstR600ISA(
 outs),
-- 
1.8.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] R600: Add VTX_READ_* and RAT_WRITE_CACHELESS_* when computing cf addr

2013-04-09 Thread Vincent Lejeune
---
 lib/Target/R600/R600ControlFlowFinalizer.cpp | 11 ++-
 test/CodeGen/R600/loop-adress.ll | 44 
 2 files changed, 54 insertions(+), 1 deletion(-)
 create mode 100644 test/CodeGen/R600/loop-adress.ll

diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp 
b/lib/Target/R600/R600ControlFlowFinalizer.cpp
index cfaa36e..2350130 100644
--- a/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -67,6 +67,13 @@ private:
 case AMDGPU::TEX_SAMPLE_C_G:
 case AMDGPU::TXD:
 case AMDGPU::TXD_SHADOW:
+case AMDGPU::VTX_READ_GLOBAL_8_eg:
+case AMDGPU::VTX_READ_GLOBAL_32_eg:
+case AMDGPU::VTX_READ_GLOBAL_128_eg:
+case AMDGPU::VTX_READ_PARAM_8_eg:
+case AMDGPU::VTX_READ_PARAM_16_eg:
+case AMDGPU::VTX_READ_PARAM_32_eg:
+case AMDGPU::VTX_READ_PARAM_128_eg:
  return true;
 default:
   return false;
@@ -207,6 +214,8 @@ public:
 case AMDGPU::EG_ExportSwz:
 case AMDGPU::R600_ExportBuf:
 case AMDGPU::R600_ExportSwz:
+case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
+case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
   DEBUG(dbgs()  CfCount  :; MI-dump(););
   CfCount++;
   break;
@@ -215,7 +224,7 @@ public:
   MaxStack = std::max(MaxStack, CurrentStack);
   MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
   getHWInstrDesc(CF_WHILE_LOOP))
-  .addImm(2);
+  .addImm(1);
   std::pairunsigned, std::setMachineInstr *  Pair(CfCount,
   std::setMachineInstr *());
   Pair.second.insert(MIb);
diff --git a/test/CodeGen/R600/loop-adress.ll b/test/CodeGen/R600/loop-adress.ll
new file mode 100644
index 000..dc9295e
--- /dev/null
+++ b/test/CodeGen/R600/loop-adress.ll
@@ -0,0 +1,44 @@
+;RUN: llc  %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: TEX
+;CHECK: ALU_PUSH
+;CHECK: JUMP @4
+;CHECK: ELSE @16
+;CHECK: TEX
+;CHECK: LOOP_START_DX10 @15
+;CHECK: LOOP_BREAK @14
+;CHECK: POP @16
+
+target datalayout = 
e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64
+target triple = r600--
+
+define void @loop_ge(i32 addrspace(1)* nocapture %out, i32 %iterations) #0 {
+entry:
+  %cmp5 = icmp sgt i32 %iterations, 0
+  br i1 %cmp5, label %for.body, label %for.end
+
+for.body: ; preds = %for.body, %entry
+  %i.07.in = phi i32 [ %i.07, %for.body ], [ %iterations, %entry ]
+  %ai.06 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %i.07 = add nsw i32 %i.07.in, -1
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %out, i32 %ai.06
+  store i32 %i.07, i32 addrspace(1)* %arrayidx, align 4, !tbaa !4
+  %add = add nsw i32 %ai.06, 1
+  %exitcond = icmp eq i32 %add, %iterations
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:  ; preds = %for.body, %entry
+  ret void
+}
+
+attributes #0 = { nounwind fp-contract-model=standard 
relocation-model=pic ssp-buffers-size=8 }
+
+!opencl.kernels = !{!0, !1, !2, !3}
+
+!0 = metadata !{void (i32 addrspace(1)*, i32)* @loop_ge}
+!1 = metadata !{null}
+!2 = metadata !{null}
+!3 = metadata !{null}
+!4 = metadata !{metadata !int, metadata !5}
+!5 = metadata !{metadata !omnipotent char, metadata !6}
+!6 = metadata !{metadata !Simple C/C++ TBAA}
-- 
1.8.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] R600: Control Flow support for pre EG gen

2013-04-07 Thread Vincent Lejeune
---
 lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp |  30 ++--
 lib/Target/R600/R600ControlFlowFinalizer.cpp   |  84 +++--
 lib/Target/R600/R600Instructions.td| 198 +++--
 3 files changed, 240 insertions(+), 72 deletions(-)

diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp 
b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index 927bcbd..469a8ad 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -266,17 +266,27 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst 
MI, raw_ostream OS,
   Emit(Inst, OS);
   break;
 }
-case AMDGPU::CF_TC:
-case AMDGPU::CF_VC:
-case AMDGPU::CF_CALL_FS:
+case AMDGPU::CF_TC_EG:
+case AMDGPU::CF_VC_EG:
+case AMDGPU::CF_CALL_FS_EG:
+case AMDGPU::CF_TC_R600:
+case AMDGPU::CF_VC_R600:
+case AMDGPU::CF_CALL_FS_R600:
   return;
-case AMDGPU::WHILE_LOOP:
-case AMDGPU::END_LOOP:
-case AMDGPU::LOOP_BREAK:
-case AMDGPU::CF_CONTINUE:
-case AMDGPU::CF_JUMP:
-case AMDGPU::CF_ELSE:
-case AMDGPU::POP: {
+case AMDGPU::WHILE_LOOP_EG:
+case AMDGPU::END_LOOP_EG:
+case AMDGPU::LOOP_BREAK_EG:
+case AMDGPU::CF_CONTINUE_EG:
+case AMDGPU::CF_JUMP_EG:
+case AMDGPU::CF_ELSE_EG:
+case AMDGPU::POP_EG:
+case AMDGPU::WHILE_LOOP_R600:
+case AMDGPU::END_LOOP_R600:
+case AMDGPU::LOOP_BREAK_R600:
+case AMDGPU::CF_CONTINUE_R600:
+case AMDGPU::CF_JUMP_R600:
+case AMDGPU::CF_ELSE_R600:
+case AMDGPU::POP_R600: {
   uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
   EmitByte(INSTR_NATIVE, OS);
   Emit(Inst, OS);
diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp 
b/lib/Target/R600/R600ControlFlowFinalizer.cpp
index 3a6c7ea..cfaa36e 100644
--- a/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -30,9 +30,22 @@ namespace llvm {
 class R600ControlFlowFinalizer : public MachineFunctionPass {
 
 private:
+  enum ControlFlowInstruction {
+CF_TC,
+CF_CALL_FS,
+CF_WHILE_LOOP,
+CF_END_LOOP,
+CF_LOOP_BREAK,
+CF_LOOP_CONTINUE,
+CF_JUMP,
+CF_ELSE,
+CF_POP
+  };
+  
   static char ID;
   const R600InstrInfo *TII;
   unsigned MaxFetchInst;
+  const AMDGPUSubtarget ST;
 
   bool isFetch(const MachineInstr *MI) const {
 switch (MI-getOpcode()) {
@@ -70,6 +83,52 @@ private:
 }
   }
 
+  const MCInstrDesc getHWInstrDesc(ControlFlowInstruction CFI) const {
+if (ST.device()-getGeneration() = AMDGPUDeviceInfo::HD4XXX) {
+  switch (CFI) {
+  case CF_TC:
+return TII-get(AMDGPU::CF_TC_R600);
+  case CF_CALL_FS:
+return TII-get(AMDGPU::CF_CALL_FS_R600);
+  case CF_WHILE_LOOP:
+return TII-get(AMDGPU::WHILE_LOOP_R600);
+  case CF_END_LOOP:
+return TII-get(AMDGPU::END_LOOP_R600);
+  case CF_LOOP_BREAK:
+return TII-get(AMDGPU::LOOP_BREAK_R600);
+  case CF_LOOP_CONTINUE:
+return TII-get(AMDGPU::CF_CONTINUE_R600);
+  case CF_JUMP:
+return TII-get(AMDGPU::CF_JUMP_R600);
+  case CF_ELSE:
+return TII-get(AMDGPU::CF_ELSE_R600);
+  case CF_POP:
+return TII-get(AMDGPU::POP_R600);
+  }
+} else {
+  switch (CFI) {
+  case CF_TC:
+return TII-get(AMDGPU::CF_TC_EG);
+  case CF_CALL_FS:
+return TII-get(AMDGPU::CF_CALL_FS_EG);
+  case CF_WHILE_LOOP:
+return TII-get(AMDGPU::WHILE_LOOP_EG);
+  case CF_END_LOOP:
+return TII-get(AMDGPU::END_LOOP_EG);
+  case CF_LOOP_BREAK:
+return TII-get(AMDGPU::LOOP_BREAK_EG);
+  case CF_LOOP_CONTINUE:
+return TII-get(AMDGPU::CF_CONTINUE_EG);
+  case CF_JUMP:
+return TII-get(AMDGPU::CF_JUMP_EG);
+  case CF_ELSE:
+return TII-get(AMDGPU::CF_ELSE_EG);
+  case CF_POP:
+return TII-get(AMDGPU::POP_EG);
+  }
+}
+  }
+
   MachineBasicBlock::iterator
   MakeFetchClause(MachineBasicBlock MBB, MachineBasicBlock::iterator I,
   unsigned CfAddress) const {
@@ -85,7 +144,7 @@ private:
 break;
 }
 BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
-TII-get(AMDGPU::CF_TC))
+getHWInstrDesc(CF_TC))
 .addImm(CfAddress) // ADDR
 .addImm(AluInstCount); // COUNT
 return I;
@@ -104,7 +163,8 @@ private:
 
 public:
   R600ControlFlowFinalizer(TargetMachine tm) : MachineFunctionPass(ID),
-TII (static_castconst R600InstrInfo *(tm.getInstrInfo())) {
+TII (static_castconst R600InstrInfo *(tm.getInstrInfo())),
+ST(tm.getSubtargetAMDGPUSubtarget()) {
   const AMDGPUSubtarget ST = tm.getSubtargetAMDGPUSubtarget();
   if (ST.device()-getGeneration() = AMDGPUDeviceInfo::HD4XXX)
 MaxFetchInst = 8;
@@ -124,7 +184,7 @@ public:
   R600MachineFunctionInfo *MFI = MF.getInfoR600MachineFunctionInfo();
   if (MFI-ShaderType == 1) {
 BuildMI(MBB, 

[Mesa-dev] [PATCH] r600g/llvm: Add support for native isa for pre EG

2013-04-07 Thread Vincent Lejeune
This fixes bug 62756 :
https://bugs.freedesktop.org/show_bug.cgi?id=62756#c12

(Requires corresponding llvm commit)
---
 src/gallium/drivers/r600/r600_asm.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c 
b/src/gallium/drivers/r600/r600_asm.c
index a0dc1de..26a848a 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -1494,7 +1494,11 @@ static int r600_bytecode_cf_build(struct r600_bytecode 
*bc, struct r600_bytecode
const struct cf_op_info *cfop = r600_isa_cf(cf-op);
unsigned opcode = r600_isa_cf_opcode(bc-isa-hw_class, cf-op);
 
-   if (cfop-flags  CF_ALU) {
+
+   if (cf-op == CF_NATIVE) {
+   bc-bytecode[id++] = cf-isa[0];
+   bc-bytecode[id++] = cf-isa[1];
+   } else if (cfop-flags  CF_ALU) {
bc-bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf-addr  1) |
S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf-kcache[0].mode) |
S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf-kcache[0].bank) |
-- 
1.8.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] R600: Add support for native control flow

2013-04-01 Thread Vincent Lejeune
---
 lib/Target/R600/AMDGPU.h   |   1 +
 lib/Target/R600/AMDGPUTargetMachine.cpp|   1 +
 lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp |  21 +-
 lib/Target/R600/R600ControlFlowFinalizer.cpp   | 264 +
 lib/Target/R600/R600Instructions.td| 100 
 .../CodeGen/R600/disconnected-predset-break-bug.ll |   5 +-
 test/CodeGen/R600/predicates.ll|  12 +-
 7 files changed, 397 insertions(+), 7 deletions(-)
 create mode 100644 lib/Target/R600/R600ControlFlowFinalizer.cpp

diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
index 3cd792a..0b01433 100644
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -24,6 +24,7 @@ class AMDGPUTargetMachine;
 FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
 FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine tm);
 FunctionPass *createR600EmitClauseMarkers(TargetMachine tm);
+FunctionPass *createR600ControlFlowFinalizer(TargetMachine tm);
 
 // SI Passes
 FunctionPass *createSIAnnotateControlFlowPass();
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp 
b/lib/Target/R600/AMDGPUTargetMachine.cpp
index 45b1be0..e7ea876 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -153,6 +153,7 @@ bool AMDGPUPassConfig::addPreEmitPass() {
 addPass(createAMDGPUCFGStructurizerPass(*TM));
 addPass(createR600EmitClauseMarkers(*TM));
 addPass(createR600ExpandSpecialInstrsPass(*TM));
+addPass(createR600ControlFlowFinalizer(*TM));
 addPass(FinalizeMachineBundlesID);
   } else {
 addPass(createSILowerControlFlowPass(*TM));
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp 
b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index 1bf87fc..6ef4d40 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -147,6 +147,10 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst 
MI, raw_ostream OS,
 return;
   } else {
 switch(MI.getOpcode()) {
+case AMDGPU::STACK_SIZE: {
+  EmitByte(MI.getOperand(0).getImm(), OS);
+  break;
+}
 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
   uint64_t inst = getBinaryCodeForInstr(MI, Fixups);
@@ -259,7 +263,22 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst 
MI, raw_ostream OS,
   Emit(Inst, OS);
   break;
 }
-
+case AMDGPU::CF_TC:
+case AMDGPU::CF_VC:
+case AMDGPU::CF_CALL_FS:
+  return;
+case AMDGPU::WHILE_LOOP:
+case AMDGPU::END_LOOP:
+case AMDGPU::LOOP_BREAK:
+case AMDGPU::CF_CONTINUE:
+case AMDGPU::CF_JUMP:
+case AMDGPU::CF_ELSE:
+case AMDGPU::POP: {
+  uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
+  EmitByte(INSTR_NATIVE, OS);
+  Emit(Inst, OS);
+  break;
+}
 default:
   EmitALUInstr(MI, Fixups, OS);
   break;
diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp 
b/lib/Target/R600/R600ControlFlowFinalizer.cpp
new file mode 100644
index 000..bd87d74
--- /dev/null
+++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -0,0 +1,264 @@
+//===-- R600ControlFlowFinalizer.cpp - Finalize Control Flow 
Inst--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+//
+/// \file
+/// This pass compute turns all control flow pseudo instructions into native 
one
+/// computing their address on the fly ; it also sets STACK_SIZE info.
+//===--===//
+
+#include AMDGPU.h
+#include R600Defines.h
+#include R600InstrInfo.h
+#include R600MachineFunctionInfo.h
+#include R600RegisterInfo.h
+#include llvm/CodeGen/MachineFunctionPass.h
+#include llvm/CodeGen/MachineInstrBuilder.h
+#include llvm/CodeGen/MachineRegisterInfo.h
+
+namespace llvm {
+
+class R600ControlFlowFinalizer : public MachineFunctionPass {
+
+private:
+  static char ID;
+  const R600InstrInfo *TII;
+  unsigned MaxFetchInst;
+
+  bool isFetch(const MachineInstr *MI) const {
+switch (MI-getOpcode()) {
+case AMDGPU::TEX_VTX_CONSTBUF:
+case AMDGPU::TEX_VTX_TEXBUF:
+case AMDGPU::TEX_LD:
+case AMDGPU::TEX_GET_TEXTURE_RESINFO:
+case AMDGPU::TEX_GET_GRADIENTS_H:
+case AMDGPU::TEX_GET_GRADIENTS_V:
+case AMDGPU::TEX_SET_GRADIENTS_H:
+case AMDGPU::TEX_SET_GRADIENTS_V:
+case AMDGPU::TEX_SAMPLE:
+case AMDGPU::TEX_SAMPLE_C:
+case AMDGPU::TEX_SAMPLE_L:
+case AMDGPU::TEX_SAMPLE_C_L:
+case AMDGPU::TEX_SAMPLE_LB:
+case AMDGPU::TEX_SAMPLE_C_LB:
+case AMDGPU::TEX_SAMPLE_G:
+case AMDGPU::TEX_SAMPLE_C_G:
+case AMDGPU::TXD:
+case AMDGPU::TXD_SHADOW:
+ return true;
+default:
+   

Re: [Mesa-dev] [PATCH] r600g: don't reserve more stack space than required v4

2013-04-01 Thread Vincent Lejeune
Btw where can I find some more info on stack_size ?
I assumed it should represent the amout of max stacked exec_mask,
but it looks like it is possible to have much more manually pushed exec_mask 
level
than reported by nstack (iiuc a push count as much as a 1/4 of a loop level).




- Mail original -
 De : Vadim Girlin vadimgir...@gmail.com
 À : Vincent Lejeune v...@ovi.com
 Cc : Alex Deucher alexdeuc...@gmail.com; mesa-dev@lists.freedesktop.org 
 mesa-dev@lists.freedesktop.org
 Envoyé le : Dimanche 31 mars 2013 22h34
 Objet : Re: [Mesa-dev] [PATCH] r600g: don't reserve more stack space than 
 required v4
 
 On 04/01/2013 12:00 AM, Vincent Lejeune wrote:
  Hi Vadim,
 
  Does this patch work ? (It's still not pushed)
 
 It works for me on evergreen, but I'm not sure about other chip generations. 
 I wanted to ask somebody to test it, but the problem is that the piglit 
 coverage 
 for this is not enough (e.g. initial version of this patch had no regressions 
 with piglit but resulted in artifacts with Heaven). I thought about adding 
 more 
 control flow tests but haven't written them yet. The same algorithm 
 seemingly works in my r600-sb branch with other chips, but the test coverage 
 with that branch is even lower due to the if-conversion that eliminates most 
 of 
 the conditional control flow.
 
 I usually prefer not to push any patches until I'm sure that they are not 
 breaking anything. But well, possibly in this case it's easier to simply 
 push it and wait for the bug reports. I think I'll check if it needs 
 rebasing and push it in a day or two if there are no objections.
 
 Vadim
 
  I'm working on doing native control flow for llvm and intend to port 
 your patch on the control flow reservation.
 
  Vincent
 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600g: don't reserve more stack space than required v4

2013-03-31 Thread Vincent Lejeune
Hi Vadim,

Does this patch work ? (It's still not pushed)
I'm working on doing native control flow for llvm and intend to port your patch 
on the control flow reservation.

Vincent




- Mail original -
 De : Vadim Girlin vadimgir...@gmail.com
 À : Alex Deucher alexdeuc...@gmail.com
 Cc : mesa-dev@lists.freedesktop.org
 Envoyé le : Vendredi 22 février 2013 1h37
 Objet : Re: [Mesa-dev] [PATCH] r600g: don't reserve more stack space than 
 required v4
 
 On 02/22/2013 04:23 AM, Alex Deucher wrote:
  On Thu, Feb 21, 2013 at 6:52 PM, Vadim Girlin vadimgir...@gmail.com 
 wrote:
  v4: implement exact computation taking into account wavefront size
 
  Signed-off-by: Vadim Girlin vadimgir...@gmail.com
  ---
    src/gallium/drivers/r600/r600_asm.c    |  44 +--
    src/gallium/drivers/r600/r600_asm.h    |  24 --
    src/gallium/drivers/r600/r600_shader.c | 131 
 ++---
    3 files changed, 142 insertions(+), 57 deletions(-)
 
  diff --git a/src/gallium/drivers/r600/r600_asm.c 
 b/src/gallium/drivers/r600/r600_asm.c
  index 3632aa5..f041e27 100644
  --- a/src/gallium/drivers/r600/r600_asm.c
  +++ b/src/gallium/drivers/r600/r600_asm.c
  @@ -86,6 +86,38 @@ static struct r600_bytecode_tex 
 *r600_bytecode_tex(void)
           return tex;
    }
 
  +static unsigned stack_entry_size(enum radeon_family chip) {
  +       /* Wavefront size:
  +        *   64: R600/RV670/RV770/Cypress/R740/Barts/Turks/Caicos/
  +        *       Aruba/Sumo/Sumo2/redwood/juniper
  +        *   32: R630/R730/R710/Palm/Cedar
  +        *   16: R610/Rs780
  +        *
  +        * Stack row size:
  +        *      Wavefront Size                        16  32  48  64
  +        *      Columns per Row (R6xx/R7xx/R8xx only)  8   8   4   4
  +        *      Columns per Row (R9xx+)                8   4   4   4 */
  +
  +       switch (chip) {
  +       /* FIXME: are some chips missing here? */
  +       /* wavefront size 16 */
  +       case CHIP_RV610:
  +       case CHIP_RS780:
 
  RV620
  RS880
 
  Should be 16 as well.
 
 Thanks, I'll add them.
 
 Vadim
 
 
  +       /* wavefront size 32 */
  +       case CHIP_RV630:
  +       case CHIP_RV635:
  +       case CHIP_RV730:
  +       case CHIP_RV710:
  +       case CHIP_PALM:
  +       case CHIP_CEDAR:
  +               return 8;
  +
  +       /* wavefront size 64 */
  +       default:
  +               return 4;
  +       }
  +}
  +
    void r600_bytecode_init(struct r600_bytecode *bc,
                           enum chip_class chip_class,
                           enum radeon_family family,
  @@ -103,6 +135,7 @@ void r600_bytecode_init(struct r600_bytecode *bc,
           LIST_INITHEAD(bc-cf);
           bc-chip_class = chip_class;
           bc-msaa_texture_mode = msaa_texture_mode;
  +       bc-stack.entry_size = stack_entry_size(family);
    }
 
    static int r600_bytecode_add_cf(struct r600_bytecode *bc)
  @@ -1524,8 +1557,8 @@ int r600_bytecode_build(struct r600_bytecode *bc)
           unsigned addr;
           int i, r;
 
  -       if (bc-callstack[0].max  0)
  -               bc-nstack = ((bc-callstack[0].max + 3)  
 2) + 2;
  +       bc-nstack = bc-stack.max_entries;
  +
           if (bc-type == TGSI_PROCESSOR_VERTEX  
 !bc-nstack) {
                   bc-nstack = 1;
           }
  @@ -1826,8 +1859,8 @@ void r600_bytecode_disasm(struct r600_bytecode 
 *bc)
                   chip = '6';
                   break;
           }
  -       fprintf(stderr, bytecode %d dw -- %d gprs 
 -\n,
  -               bc-ndw, bc-ngpr);
  +       fprintf(stderr, bytecode %d dw -- %d gprs -- %d nstack 
 -\n,
  +               bc-ndw, bc-ngpr, bc-nstack);
           fprintf(stderr, shader %d -- %c\n, index++, 
 chip);
 
           LIST_FOR_EACH_ENTRY(cf, bc-cf, list) {
  @@ -2105,7 +2138,8 @@ void r600_bytecode_dump(struct r600_bytecode *bc)
                   chip = '6';
                   break;
           }
  -       fprintf(stderr, bytecode %d dw -- %d gprs 
 -\n, bc-ndw, bc-ngpr);
  +       fprintf(stderr, bytecode %d dw -- %d gprs -- %d nstack 
 -\n,
  +               bc-ndw, bc-ngpr, bc-nstack);
           fprintf(stderr,      %c\n, chip);
 
           LIST_FOR_EACH_ENTRY(cf, bc-cf, list) {
  diff --git a/src/gallium/drivers/r600/r600_asm.h 
 b/src/gallium/drivers/r600/r600_asm.h
  index 03cd238..5a9869d 100644
  --- a/src/gallium/drivers/r600/r600_asm.h
  +++ b/src/gallium/drivers/r600/r600_asm.h
  @@ -173,16 +173,25 @@ struct r600_cf_stack_entry {
    };
 
    #define SQ_MAX_CALL_DEPTH 0x0020
  -struct r600_cf_callstack {
  -       unsigned                        fc_sp_before_entry;
  -       int                             sub_desc_index;
  -       int                             current;
  -       int                             max;
  -};
 
    #define AR_HANDLE_NORMAL 0
    #define AR_HANDLE_RV6XX 1 /* except RV670 */
 
  +struct r600_stack_info {
  +       /* current level of 

[Mesa-dev] [PATCH] R600: Emit CF_ALU and use true kcache register.

2013-03-28 Thread Vincent Lejeune
---
 lib/Target/R600/AMDGPU.h   |   1 +
 lib/Target/R600/AMDGPUTargetMachine.cpp|   1 +
 lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp |  10 +-
 lib/Target/R600/R600EmitClauseMarkers.cpp  | 250 +
 lib/Target/R600/R600Instructions.td|  79 ++-
 lib/Target/R600/R600RegisterInfo.td|  63 ++
 test/CodeGen/R600/kcache-fold.ll   |   2 +-
 7 files changed, 395 insertions(+), 11 deletions(-)
 create mode 100644 lib/Target/R600/R600EmitClauseMarkers.cpp

diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
index e099a9f..3cd792a 100644
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -23,6 +23,7 @@ class AMDGPUTargetMachine;
 // R600 Passes
 FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
 FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine tm);
+FunctionPass *createR600EmitClauseMarkers(TargetMachine tm);
 
 // SI Passes
 FunctionPass *createSIAnnotateControlFlowPass();
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp 
b/lib/Target/R600/AMDGPUTargetMachine.cpp
index 0185747..45b1be0 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -151,6 +151,7 @@ bool AMDGPUPassConfig::addPreEmitPass() {
   if (ST.device()-getGeneration() = AMDGPUDeviceInfo::HD6XXX) {
 addPass(createAMDGPUCFGPreparationPass(*TM));
 addPass(createAMDGPUCFGStructurizerPass(*TM));
+addPass(createR600EmitClauseMarkers(*TM));
 addPass(createR600ExpandSpecialInstrsPass(*TM));
 addPass(FinalizeMachineBundlesID);
   } else {
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp 
b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index a945fe9..1bf87fc 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -101,7 +101,8 @@ enum InstrTypes {
   INSTR_FC,
   INSTR_NATIVE,
   INSTR_VTX,
-  INSTR_EXPORT
+  INSTR_EXPORT,
+  INSTR_CFALU
 };
 
 enum FCInstr {
@@ -251,6 +252,13 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst 
MI, raw_ostream OS,
   Emit(Inst, OS);
   break;
 }
+case AMDGPU::CF_ALU:
+case AMDGPU::CF_ALU_PUSH_BEFORE: {
+  uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
+  EmitByte(INSTR_CFALU, OS);
+  Emit(Inst, OS);
+  break;
+}
 
 default:
   EmitALUInstr(MI, Fixups, OS);
diff --git a/lib/Target/R600/R600EmitClauseMarkers.cpp 
b/lib/Target/R600/R600EmitClauseMarkers.cpp
new file mode 100644
index 000..3c0e86e
--- /dev/null
+++ b/lib/Target/R600/R600EmitClauseMarkers.cpp
@@ -0,0 +1,250 @@
+//===-- R600EmitClauseMarkers.cpp - Emit CF_ALU 
---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+//
+/// \file
+/// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold
+/// 128 Alu instructions ; these instructions can access up to 4 prefetched
+/// 4 lines of 16 registers from constant buffers. Such ALU clauses are
+/// initiated by CF_ALU instructions.
+//===--===//
+
+#include AMDGPU.h
+#include R600Defines.h
+#include R600InstrInfo.h
+#include R600MachineFunctionInfo.h
+#include R600RegisterInfo.h
+#include llvm/CodeGen/MachineFunctionPass.h
+#include llvm/CodeGen/MachineInstrBuilder.h
+#include llvm/CodeGen/MachineRegisterInfo.h
+
+namespace llvm {
+
+class R600EmitClauseMarkersPass : public MachineFunctionPass {
+
+private:
+  static char ID;
+  const R600InstrInfo *TII;
+
+  unsigned OccupiedDwords(MachineInstr *MI) const {
+switch (MI-getOpcode()) {
+case AMDGPU::INTERP_PAIR_XY:
+case AMDGPU::INTERP_PAIR_ZW:
+case AMDGPU::INTERP_VEC_LOAD:
+case AMDGPU::DOT4_eg_pseudo:
+case AMDGPU::DOT4_r600_pseudo:
+  return 4;
+case AMDGPU::KILL:
+  return 0;
+default:
+  break;
+}
+
+if(TII-isVector(*MI) ||
+TII-isCubeOp(MI-getOpcode()) ||
+TII-isReductionOp(MI-getOpcode()))
+  return 4;
+
+unsigned NumLiteral = 0;
+for (MachineInstr::mop_iterator It = MI-operands_begin(),
+E = MI-operands_end(); It != E; ++It) {
+  MachineOperand MO = *It;
+  if (MO.isReg()  MO.getReg() == AMDGPU::ALU_LITERAL_X)
+++NumLiteral;
+}
+return 1 + NumLiteral;
+  }
+
+  bool isALU(const MachineInstr *MI) const {
+if (TII-isALUInstr(MI-getOpcode()))
+  return true;
+if (TII-isVector(*MI) || TII-isCubeOp(MI-getOpcode()))
+  return true;
+switch (MI-getOpcode()) {
+case AMDGPU::INTERP_PAIR_XY:
+case AMDGPU::INTERP_PAIR_ZW:
+case AMDGPU::INTERP_VEC_LOAD:
+case AMDGPU::COPY:
+case AMDGPU::DOT4_eg_pseudo:
+case 

[Mesa-dev] [PATCH 1/2] R600: Emit native instructions for tex

2013-03-27 Thread Vincent Lejeune
---
 lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 169 +
 lib/Target/R600/R600Instructions.td| 156 +++
 2 files changed, 196 insertions(+), 129 deletions(-)

diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp 
b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index d207160..00ebb44 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -66,8 +66,6 @@ private:
   void EmitSrcISA(const MCInst MI, unsigned RegOpIdx, unsigned SelOpIdx,
 raw_ostream OS) const;
   void EmitDst(const MCInst MI, raw_ostream OS) const;
-  void EmitTexInstr(const MCInst MI, SmallVectorImplMCFixup Fixups,
-raw_ostream OS) const;
   void EmitFCInstr(const MCInst MI, raw_ostream OS) const;
 
   void EmitNullBytes(unsigned int byteCount, raw_ostream OS) const;
@@ -140,9 +138,7 @@ MCCodeEmitter *llvm::createR600MCCodeEmitter(const 
MCInstrInfo MCII,
 
 void R600MCCodeEmitter::EncodeInstruction(const MCInst MI, raw_ostream OS,
SmallVectorImplMCFixup Fixups) const 
{
-  if (isTexOp(MI.getOpcode())) {
-EmitTexInstr(MI, Fixups, OS);
-  } else if (isFCOp(MI.getOpcode())){
+  if (isFCOp(MI.getOpcode())){
 EmitFCInstr(MI, OS);
   } else if (MI.getOpcode() == AMDGPU::RETURN ||
 MI.getOpcode() == AMDGPU::BUNDLE ||
@@ -175,6 +171,76 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst 
MI, raw_ostream OS,
   Emit(InstWord2, OS);
   break;
 }
+case AMDGPU::TEX_LD:
+case AMDGPU::TEX_GET_TEXTURE_RESINFO:
+case AMDGPU::TEX_SAMPLE:
+case AMDGPU::TEX_SAMPLE_C:
+case AMDGPU::TEX_SAMPLE_L:
+case AMDGPU::TEX_SAMPLE_C_L:
+case AMDGPU::TEX_SAMPLE_LB:
+case AMDGPU::TEX_SAMPLE_C_LB:
+case AMDGPU::TEX_SAMPLE_G:
+case AMDGPU::TEX_SAMPLE_C_G:
+case AMDGPU::TEX_GET_GRADIENTS_H:
+case AMDGPU::TEX_GET_GRADIENTS_V:
+case AMDGPU::TEX_SET_GRADIENTS_H:
+case AMDGPU::TEX_SET_GRADIENTS_V: {
+  unsigned Opcode = MI.getOpcode();
+  bool hasOffsets = (Opcode == AMDGPU::TEX_LD);
+  unsigned OpOffset = hasOffsets ? 3 : 0;
+  int64_t Sampler = MI.getOperand(OpOffset + 3).getImm();
+  int64_t TextureType = MI.getOperand(OpOffset + 4).getImm();
+
+  uint32_t srcSelect[4] = {0, 1, 2, 3};
+  uint32_t Offsets[3] = {0 , 0, 0};
+  uint64_t coordType[4] = {1, 1, 1, 1};
+
+  if (hasOffsets)
+for (unsigned i = 0; i  3; i++)
+  Offsets[i] = MI.getOperand(i + 2).getImm();
+
+  if (TextureType == TEXTURE_RECT
+|| TextureType == TEXTURE_SHADOWRECT) {
+coordType[ELEMENT_X] = 0;
+coordType[ELEMENT_Y] = 0;
+  }
+
+  if (TextureType == TEXTURE_1D_ARRAY
+  || TextureType == TEXTURE_SHADOW1D_ARRAY) {
+if (Opcode == AMDGPU::TEX_SAMPLE_C_L || Opcode == 
AMDGPU::TEX_SAMPLE_C_LB) {
+  coordType[ELEMENT_Y] = 0;
+} else {
+  coordType[ELEMENT_Z] = 0;
+  srcSelect[ELEMENT_Z] = ELEMENT_Y;
+}
+  } else if (TextureType == TEXTURE_2D_ARRAY
+ || TextureType == TEXTURE_SHADOW2D_ARRAY) {
+coordType[ELEMENT_Z] = 0;
+  }
+
+
+  if ((TextureType == TEXTURE_SHADOW1D
+  || TextureType == TEXTURE_SHADOW2D
+  || TextureType == TEXTURE_SHADOWRECT
+  || TextureType == TEXTURE_SHADOW1D_ARRAY)
+   Opcode != AMDGPU::TEX_SAMPLE_C_L
+   Opcode != AMDGPU::TEX_SAMPLE_C_LB) {
+srcSelect[ELEMENT_W] = ELEMENT_Z;
+  }
+
+  uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups) |
+  coordType[ELEMENT_X]  60 | coordType[ELEMENT_Y]  61 |
+  coordType[ELEMENT_Z]  62 | coordType[ELEMENT_W]  63;
+  uint32_t Word2 = Sampler  15 | srcSelect[ELEMENT_X]  20 |
+  srcSelect[ELEMENT_Y]  23 | srcSelect[ELEMENT_Z]  26 |
+  srcSelect[ELEMENT_W]  29 | Offsets[0]  0 | Offsets[1]  5 |
+  Offsets[2]  10;
+
+  EmitByte(INSTR_TEX, OS);
+  Emit(Word01, OS);
+  Emit(Word2, OS);
+  break;
+}
 case AMDGPU::EG_ExportSwz:
 case AMDGPU::R600_ExportSwz:
 case AMDGPU::EG_ExportBuf:
@@ -334,99 +400,6 @@ void R600MCCodeEmitter::EmitSrcISA(const MCInst MI, 
unsigned RegOpIdx,
   Emit(InlineConstant.i, OS);
 }
 
-void R600MCCodeEmitter::EmitTexInstr(const MCInst MI,
- SmallVectorImplMCFixup Fixups,
- raw_ostream OS) const {
-
-  unsigned Opcode = MI.getOpcode();
-  bool hasOffsets = (Opcode == AMDGPU::TEX_LD);
-  unsigned OpOffset = hasOffsets ? 3 : 0;
-  int64_t Resource = MI.getOperand(OpOffset + 2).getImm();
-  int64_t Sampler = MI.getOperand(OpOffset + 3).getImm();
-  int64_t TextureType = MI.getOperand(OpOffset + 4).getImm();
-  unsigned srcSelect[4] = {0, 1, 2, 3};
-
-  // Emit instruction type
-  EmitByte(1, OS);
-
-  // Emit instruction
-  EmitByte(getBinaryCodeForInstr(MI, 

[Mesa-dev] [PATCH 2/2] R600: Emit CF_ALU and use true kcache register.

2013-03-27 Thread Vincent Lejeune
---
 lib/Target/R600/AMDGPU.h   |   1 +
 lib/Target/R600/AMDGPUTargetMachine.cpp|   1 +
 lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp |  10 +-
 lib/Target/R600/R600EmitClauseMarkers.cpp  | 243 +
 lib/Target/R600/R600Instructions.td|  83 ++-
 lib/Target/R600/R600RegisterInfo.td|  63 ++
 6 files changed, 389 insertions(+), 12 deletions(-)
 create mode 100644 lib/Target/R600/R600EmitClauseMarkers.cpp

diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
index e099a9f..3cd792a 100644
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -23,6 +23,7 @@ class AMDGPUTargetMachine;
 // R600 Passes
 FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
 FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine tm);
+FunctionPass *createR600EmitClauseMarkers(TargetMachine tm);
 
 // SI Passes
 FunctionPass *createSIAnnotateControlFlowPass();
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp 
b/lib/Target/R600/AMDGPUTargetMachine.cpp
index 0185747..45b1be0 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -151,6 +151,7 @@ bool AMDGPUPassConfig::addPreEmitPass() {
   if (ST.device()-getGeneration() = AMDGPUDeviceInfo::HD6XXX) {
 addPass(createAMDGPUCFGPreparationPass(*TM));
 addPass(createAMDGPUCFGStructurizerPass(*TM));
+addPass(createR600EmitClauseMarkers(*TM));
 addPass(createR600ExpandSpecialInstrsPass(*TM));
 addPass(FinalizeMachineBundlesID);
   } else {
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp 
b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index 00ebb44..cf43f3f 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -101,7 +101,8 @@ enum InstrTypes {
   INSTR_FC,
   INSTR_NATIVE,
   INSTR_VTX,
-  INSTR_EXPORT
+  INSTR_EXPORT,
+  INSTR_CFALU
 };
 
 enum FCInstr {
@@ -250,6 +251,13 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst 
MI, raw_ostream OS,
   Emit(Inst, OS);
   break;
 }
+case AMDGPU::CF_ALU:
+case AMDGPU::CF_ALU_PUSH_BEFORE: {
+  uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
+  EmitByte(INSTR_CFALU, OS);
+  Emit(Inst, OS);
+  break;
+}
 
 default:
   EmitALUInstr(MI, Fixups, OS);
diff --git a/lib/Target/R600/R600EmitClauseMarkers.cpp 
b/lib/Target/R600/R600EmitClauseMarkers.cpp
new file mode 100644
index 000..b869c88
--- /dev/null
+++ b/lib/Target/R600/R600EmitClauseMarkers.cpp
@@ -0,0 +1,243 @@
+//===-- R600EmitClauseMarkers.cpp - Emit CF_ALU 
---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+//
+/// \file
+/// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold
+/// 128 Alu instructions ; these instructions can access up to 4 prefetched
+/// 4 lines of 16 registers from constant buffers. Such ALU clauses are
+/// initiated by CF_ALU instructions.
+//===--===//
+
+#include AMDGPU.h
+#include R600Defines.h
+#include R600InstrInfo.h
+#include R600MachineFunctionInfo.h
+#include R600RegisterInfo.h
+#include llvm/CodeGen/MachineFunctionPass.h
+#include llvm/CodeGen/MachineInstrBuilder.h
+#include llvm/CodeGen/MachineRegisterInfo.h
+
+namespace llvm {
+
+class R600EmitClauseMarkersPass : public MachineFunctionPass {
+
+private:
+  static char ID;
+  const R600InstrInfo *TII;
+
+  unsigned OccupiedDwords(MachineInstr *MI) const {
+switch (MI-getOpcode()) {
+case AMDGPU::INTERP_PAIR_XY:
+case AMDGPU::INTERP_PAIR_ZW:
+case AMDGPU::INTERP_VEC_LOAD:
+case AMDGPU::DOT4_eg_pseudo:
+case AMDGPU::DOT4_r600_pseudo:
+  return 4;
+case AMDGPU::KILL:
+  return 0;
+default:
+  break;
+}
+
+if(TII-isVector(*MI) ||
+TII-isCubeOp(MI-getOpcode()) ||
+TII-isReductionOp(MI-getOpcode()))
+  return 4;
+
+unsigned NumLiteral = 0;
+for (MachineInstr::mop_iterator It = MI-operands_begin(),
+E = MI-operands_end(); It != E; ++It) {
+  MachineOperand MO = *It;
+  if (MO.isReg()  MO.getReg() == AMDGPU::ALU_LITERAL_X)
+++NumLiteral;
+}
+return 1 + NumLiteral;
+  }
+
+  bool isALU(const MachineInstr *MI) const {
+if (TII-isALUInstr(MI-getOpcode()))
+  return true;
+if (TII-isVector(*MI) || TII-isCubeOp(MI-getOpcode()))
+  return true;
+switch (MI-getOpcode()) {
+case AMDGPU::INTERP_PAIR_XY:
+case AMDGPU::INTERP_PAIR_ZW:
+case AMDGPU::INTERP_VEC_LOAD:
+case AMDGPU::COPY:
+case AMDGPU::DOT4_eg_pseudo:
+case AMDGPU::DOT4_r600_pseudo:
+  return true;
+default:
+  

[Mesa-dev] [PATCH 1/2] r600g/llvm: use native encode for tex

2013-03-27 Thread Vincent Lejeune
---
 src/gallium/drivers/r600/r600_shader.c | 50 ++
 1 file changed, 27 insertions(+), 23 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 29facf7..1e21559 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -489,29 +489,33 @@ static unsigned r600_tex_from_byte_stream(struct 
r600_shader_ctx *ctx,
 {
struct r600_bytecode_tex tex;
 
-   tex.op = r600_isa_fetch_by_opcode(ctx-bc-isa, bytes[bytes_read++]);
-   tex.resource_id = bytes[bytes_read++];
-   tex.src_gpr = bytes[bytes_read++];
-   tex.src_rel = bytes[bytes_read++];
-   tex.dst_gpr = bytes[bytes_read++];
-   tex.dst_rel = bytes[bytes_read++];
-   tex.dst_sel_x = bytes[bytes_read++];
-   tex.dst_sel_y = bytes[bytes_read++];
-   tex.dst_sel_z = bytes[bytes_read++];
-   tex.dst_sel_w = bytes[bytes_read++];
-   tex.lod_bias = bytes[bytes_read++];
-   tex.coord_type_x = bytes[bytes_read++];
-   tex.coord_type_y = bytes[bytes_read++];
-   tex.coord_type_z = bytes[bytes_read++];
-   tex.coord_type_w = bytes[bytes_read++];
-   tex.offset_x = bytes[bytes_read++];
-   tex.offset_y = bytes[bytes_read++];
-   tex.offset_z = bytes[bytes_read++];
-   tex.sampler_id = bytes[bytes_read++];
-   tex.src_sel_x = bytes[bytes_read++];
-   tex.src_sel_y = bytes[bytes_read++];
-   tex.src_sel_z = bytes[bytes_read++];
-   tex.src_sel_w = bytes[bytes_read++];
+   uint32_t word0 = i32_from_byte_stream(bytes, bytes_read);
+   uint32_t word1 = i32_from_byte_stream(bytes, bytes_read);
+   uint32_t word2 = i32_from_byte_stream(bytes, bytes_read);
+
+   tex.op = r600_isa_fetch_by_opcode(ctx-bc-isa, 
G_SQ_TEX_WORD0_TEX_INST(word0));
+   tex.resource_id = G_SQ_TEX_WORD0_RESOURCE_ID(word0);
+   tex.src_gpr = G_SQ_TEX_WORD0_SRC_GPR(word0);
+   tex.src_rel = G_SQ_TEX_WORD0_SRC_REL(word0);
+   tex.dst_gpr = G_SQ_TEX_WORD1_DST_GPR(word1);
+   tex.dst_rel = G_SQ_TEX_WORD1_DST_REL(word1);
+   tex.dst_sel_x = G_SQ_TEX_WORD1_DST_SEL_X(word1);
+   tex.dst_sel_y = G_SQ_TEX_WORD1_DST_SEL_Y(word1);
+   tex.dst_sel_z = G_SQ_TEX_WORD1_DST_SEL_Z(word1);
+   tex.dst_sel_w = G_SQ_TEX_WORD1_DST_SEL_W(word1);
+   tex.lod_bias = G_SQ_TEX_WORD1_LOD_BIAS(word1);
+   tex.coord_type_x = G_SQ_TEX_WORD1_COORD_TYPE_X(word1);
+   tex.coord_type_y = G_SQ_TEX_WORD1_COORD_TYPE_Y(word1);
+   tex.coord_type_z = G_SQ_TEX_WORD1_COORD_TYPE_Z(word1);
+   tex.coord_type_w = G_SQ_TEX_WORD1_COORD_TYPE_W(word1);
+   tex.offset_x = G_SQ_TEX_WORD2_OFFSET_X(word2);
+   tex.offset_y = G_SQ_TEX_WORD2_OFFSET_Y(word2);
+   tex.offset_z = G_SQ_TEX_WORD2_OFFSET_Z(word2);
+   tex.sampler_id = G_SQ_TEX_WORD2_SAMPLER_ID(word2);
+   tex.src_sel_x = G_SQ_TEX_WORD2_SRC_SEL_X(word2);
+   tex.src_sel_y = G_SQ_TEX_WORD2_SRC_SEL_Y(word2);
+   tex.src_sel_z = G_SQ_TEX_WORD2_SRC_SEL_Z(word2);
+   tex.src_sel_w = G_SQ_TEX_WORD2_SRC_SEL_W(word2);
 
tex.inst_mod = 0;
 
-- 
1.8.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] r600g/llvm: Add support for cf_alu native encode

2013-03-27 Thread Vincent Lejeune
---
 src/gallium/drivers/r600/r600_asm.c|  2 +-
 src/gallium/drivers/r600/r600_asm.h|  1 +
 src/gallium/drivers/r600/r600_shader.c | 14 ++
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c 
b/src/gallium/drivers/r600/r600_asm.c
index 0d570ca..65c705d 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -106,7 +106,7 @@ void r600_bytecode_init(struct r600_bytecode *bc,
bc-msaa_texture_mode = msaa_texture_mode;
 }
 
-static int r600_bytecode_add_cf(struct r600_bytecode *bc)
+int r600_bytecode_add_cf(struct r600_bytecode *bc)
 {
struct r600_bytecode_cf *cf = r600_bytecode_cf();
 
diff --git a/src/gallium/drivers/r600/r600_asm.h 
b/src/gallium/drivers/r600/r600_asm.h
index 1465c31..c1aa3ba 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -227,6 +227,7 @@ int r600_bytecode_add_tex(struct r600_bytecode *bc,
 int r600_bytecode_add_output(struct r600_bytecode *bc,
const struct r600_bytecode_output *output);
 int r600_bytecode_build(struct r600_bytecode *bc);
+int r600_bytecode_add_cf(struct r600_bytecode *bc);
 int r600_bytecode_add_cfinst(struct r600_bytecode *bc,
unsigned op);
 int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 1e21559..6fd1f42 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -626,6 +626,20 @@ static void r600_bytecode_from_byte_stream(struct 
r600_shader_ctx *ctx,
 bytes_read = r600_export_from_byte_stream(ctx, bytes,
 bytes_read);
 break;
+   case 6: {
+   int32_t word0 = i32_from_byte_stream(bytes, 
bytes_read);
+   int32_t word1 = i32_from_byte_stream(bytes, 
bytes_read);
+
+   r600_bytecode_add_cf(ctx-bc);
+   ctx-bc-cf_last-op = 
r600_isa_cf_by_opcode(ctx-bc-isa, G_SQ_CF_ALU_WORD1_CF_INST(word1), 1);
+   ctx-bc-cf_last-kcache[0].bank = 
G_SQ_CF_ALU_WORD0_KCACHE_BANK0(word0);
+   ctx-bc-cf_last-kcache[0].addr = 
G_SQ_CF_ALU_WORD1_KCACHE_ADDR0(word1);
+   ctx-bc-cf_last-kcache[0].mode = 
G_SQ_CF_ALU_WORD0_KCACHE_MODE0(word0);
+   ctx-bc-cf_last-kcache[1].bank = 
G_SQ_CF_ALU_WORD0_KCACHE_BANK1(word0);
+   ctx-bc-cf_last-kcache[1].addr = 
G_SQ_CF_ALU_WORD1_KCACHE_ADDR1(word1);
+   ctx-bc-cf_last-kcache[1].mode = 
G_SQ_CF_ALU_WORD1_KCACHE_MODE1(word1);
+   break;
+  }
default:
/* XXX: Error here */
break;
-- 
1.8.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] r600g: Add get/set to handle ALLOC_EXPORT_RAT_WORD0

2013-03-19 Thread Vincent Lejeune
---
 src/gallium/drivers/r600/eg_asm.c  |  38 +++
 src/gallium/drivers/r600/eg_sq.h   |  59 
 src/gallium/drivers/r600/r600_asm.c| 119 +
 src/gallium/drivers/r600/r600_asm.h|   8 ++-
 src/gallium/drivers/r600/r600_shader.c |  34 +++---
 5 files changed, 248 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/r600/eg_asm.c 
b/src/gallium/drivers/r600/eg_asm.c
index fffc436..cacb82f 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -106,6 +106,22 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct 
r600_bytecode_cf *cf)
if (bc-chip_class == EVERGREEN) /* no EOP on cayman */
bc-bytecode[id] |= 
S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf-output.end_of_program);
id++;
+   } else if (cfop-flags  CF_MEM) {
+   /* MEM_RAT_CACHELESS instructions */
+   bc-bytecode[id++] = 
S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RW_GPR(cf-output.gpr) |
+   
S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_ELEM_SIZE(cf-output.elem_size) |
+   
S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_ID(cf-output.rat_id) |
+   
S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST(cf-output.rat_inst) |
+   
S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_INDEX_GPR(cf-output.index_gpr) |
+   
S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf-output.type);
+   bc-bytecode[id] = 
S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf-output.burst_count - 1) |
+   
S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf-output.barrier) |
+   
S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) |
+   
S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(cf-output.comp_mask) |
+   
S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(cf-output.array_size);
+   if (bc-chip_class == EVERGREEN) /* no EOP on cayman */
+   bc-bytecode[id] |= 
S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf-output.end_of_program);
+   id++;
} else {
/* branch, loop, call, return instructions */
bc-bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf-cf_addr  
1);
@@ -118,6 +134,28 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct 
r600_bytecode_cf *cf)
return 0;
 }
 
+void eg_bytecode_export_rat_read(struct r600_bytecode *bc,
+   struct r600_bytecode_output *output, uint32_t word0, uint32_t 
word1) {
+   output-rat_id = G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_ID(word0);
+   output-rat_inst = G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST(word0);
+   output-type = G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_TYPE(word0);
+   output-gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RW_GPR(word0);
+   output-elem_size = G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_ELEM_SIZE(word0);
+   output-index_gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_INDEX_GPR(word0);
+
+   output-swizzle_x = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(word1);
+   output-swizzle_y = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(word1);
+   output-swizzle_z = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(word1);
+   output-swizzle_w = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(word1);
+   output-burst_count = G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(word1);
+   output-end_of_program = 
G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1);
+   output-op = r600_isa_cf_by_opcode(bc-isa,
+   G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1), /* is_cf_alu 
= */ 0 );
+   output-barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1);
+   output-array_size = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(word1);
+   output-comp_mask = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(word1);
+}
+
 void eg_bytecode_export_read(struct r600_bytecode *bc,
struct r600_bytecode_output *output, uint32_t word0, uint32_t 
word1)
 {
diff --git a/src/gallium/drivers/r600/eg_sq.h b/src/gallium/drivers/r600/eg_sq.h
index b534872..83588de 100644
--- a/src/gallium/drivers/r600/eg_sq.h
+++ b/src/gallium/drivers/r600/eg_sq.h
@@ -176,6 +176,65 @@
 #define   G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(x)(((x)  
30)  0x3)
 #define   C_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE   0x3FFF
 /* done */
+#define P_SQ_CF_ALLOC_EXPORT_WORD0_RAT
+#define   S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_ID(x)   (((x)  
0xF)  0)
+#define   G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_ID(x)   (((x)  
0)  0xF)
+#define   S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST(x) (((x)  
0x3F)  4)
+#define   G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST(x) (((x)  
4)  0x3F)
+#define 

[Mesa-dev] [PATCH] R600: Use CONSTANT_BUFFER_0 address space for Implicit Parameters

2013-03-16 Thread Vincent Lejeune
It allows the backend to generate reads to constant cache which
are faster that VTX_READ.
---
 lib/Target/R600/R600ISelLowering.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/Target/R600/R600ISelLowering.cpp 
b/lib/Target/R600/R600ISelLowering.cpp
index a0e27ea..caa1899 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -522,7 +522,7 @@ SDValue 
R600TargetLowering::LowerImplicitParameter(SelectionDAG DAG, EVT VT,
unsigned DwordOffset) const 
{
   unsigned ByteOffset = DwordOffset * 4;
   PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
-  AMDGPUAS::PARAM_I_ADDRESS);
+  AMDGPUAS::CONSTANT_BUFFER_0);
 
   // We shouldn't be using an offset wider than 16-bits for implicit 
parameters.
   assert(isInt16(ByteOffset));
-- 
1.8.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] R600: Relax some vector constraints on Dot4.

2013-03-15 Thread Vincent Lejeune
Hi Christian,

LLVM does indeed coalesce registers for R600 targets, I was however thinking of 
copies between vectors.
For instance, let's say you have 4 vectors coming from instructions that only 
emit vectors (like TEX_SAMPLE iirc) :

If the shader wants to mix them before doing dp4, you end with something like 
:

T0_XYZW = TEX_SAMPLE
T1_XYZW = TEX_SAMPLE
T2_XYZW = TEX_SAMPLE
T3_XYZW = TEX_SAMPLE
T0_W = COPY T4_W
T1_Z =  COPY T3_Z
DOT4 T0_XYZW, T1_XYZW

From hw point of view, the 2 copies are not necessary because DOT4 
instructions does not require that its operands belong to the same 128 bits 
register.
It's perfectly legal to have a bundle like this one :

Dot4_eg_real T0_X T1_X
Dot4_eg_real T0_Y T1_Y
Dot4_eg_real T0_Z T3_Z
Dot4_eg_real T4_W T1_W

(In fact it is even possible to remove the R600_TReg32_* constraints on the 
inputs but then you have to ensure the bundle does not read more than
3 gprs from a channel which need much more work)

The previous case may seem not so frequent but it still occurs in Lightmark and 
Unigine Heaven.

We represent dot4 inputs as vectors but using 8 scalar inputs is closer from hw 
capabilities, that's why I wrote this patch. Besides, scalar values usually
have shorter live interval, lowering register pressure. Shaders that have a dp4 
instructions often end up consuming less registers with this patch.

Vincent




- Mail original -
 De : Christian König deathsim...@vodafone.de
 À : Vincent Lejeune v...@ovi.com
 Cc : llvm-comm...@cs.uiuc.edu; mesa-dev@lists.freedesktop.org
 Envoyé le : Vendredi 15 mars 2013 11h18
 Objet : Re: [Mesa-dev] [PATCH] R600: Relax some vector constraints on Dot4.
 
 Hi Vincent,
 
 while I really appreciate your work, I think you're development is going 
 into the wrong direction here. Those copies you're trying to avoid (not only 
 with this patch, but also with the previous REG_SEQUENCE patches), shouldn't 
 happen in the first place. I'm not so deeply into the R600 part of our LLVM 
 backend that I can say that I'm 100% sure, but to me that just looks like 
 workarounds to an incorrect defined register space.
 
 Here is an simple example from SI, that should show how things are intended 
 to 
 work. It's a simple 2D texture fetch, the coordinates of that this fetch are 
 usually provided in an two element vector build of VGPRs (I use a 2D fetch 
 just 
 for simplicity, a 3D fetch with explicit LOD would work the same way and 
 would 
 use a four element vector).
 
 After ISel the assembler code starts with something like this (simplified):
 ...
 %vreg13def,tied1 = V_INTERP_P2_F32 ...
 ...
 %vreg17def,tied1 = V_INTERP_P2_F32 ...
 ...
 %vreg22def = IMPLICIT_DEF; VReg_64:%vreg22
 %vreg21def,tied1 = INSERT_SUBREG %vreg22tied0, 
 %vreg13kill, sub0; VReg_64:%vreg21,%vreg22 VReg_32:%vreg13
 %vreg23def,tied1 = INSERT_SUBREG %vreg21tied0, 
 %vreg17kill, sub1; VReg_64:%vreg23,%vreg21 VReg_32:%vreg17
 %vreg24def = IMAGE_SAMPLE 15, 0, 0, 0, 0, 0, 0, 0, %vreg23kill, 
 
 
 As you can see the sub components of the vectors are inserted/extracted just 
 like it happens on R600, but the registerallocater is capable of handling 
 that 
 much better than on R600 and so avoiding the (sometimes quite expensive) COPY 
 operations in the first place. The resulting code looks like this:
 
 ...
 %vreg23:sub0def,tied1 = V_INTERP_P2_F32 ...
 ...
 %vreg23:sub1def,tied1 = V_INTERP_P2_F32 ...
 ...
 %vreg24def = IMAGE_SAMPLE 15, 0, 0, 0, 0, 0, 0, 0, %vreg23, ...
 
 So INSERT_SUBREG isn't replaced with a COPY like on R600, but instead the 
 V_INTERP_P2_F32 instructions can write directly to the appropriate sub 
 register 
 component.
 
 I'm not 100% sure why this doesn't work the same way on R600, but I 
 think it might be a good idea figuring that out.
 
 Cheers,
 Christian.
 
 Am 14.03.2013 21:51, schrieb Vincent Lejeune:
  Dot4 now uses 8 scalar operands instead of 2 vectors one which allows 
 register
  coalescer to remove some unneeded COPY.
  This patch also defines some structures/functions that can be used to 
 handle
  every vector instructions (CUBE, Cayman special instructions...) in a 
 similar
  fashion.
  ---
    lib/Target/R600/AMDGPUISelLowering.h        |  1 +
    lib/Target/R600/R600Defines.h               | 74 
    lib/Target/R600/R600ExpandSpecialInstrs.cpp | 25 
    lib/Target/R600/R600ISelLowering.cpp        | 21 +++
    lib/Target/R600/R600InstrInfo.cpp           | 88 
 +
    lib/Target/R600/R600InstrInfo.h             |  5 ++
    lib/Target/R600/R600Instructions.td         | 51 -
    lib/Target/R600/R600MachineScheduler.cpp    |  2 +
    8 files changed, 266 insertions(+), 1 deletion(-)
 
  diff --git a/lib/Target/R600/AMDGPUISelLowering.h 
 b/lib/Target/R600/AMDGPUISelLowering.h
  index f31b646..f9f5a60 100644
  --- a/lib/Target/R600/AMDGPUISelLowering.h
  +++ b/lib/Target/R600/AMDGPUISelLowering.h
  @@ -125,6 +125,7 @@ enum {
      SMIN,
      UMIN

Re: [Mesa-dev] Google Summer of Code ideas needed

2013-03-15 Thread Vincent Lejeune
Hi,

If LLVM backend development is allowed, maybe a student could work on improving 
VLIW5 scheduling for R600 hardware.
So far I focused on VLIW4 architecture, but extending the scheduler to support 
Trans ALU wouldn't be too hard.
This would require a way to represent Trans slot compatibility for instruction 
in R600Instructions.td, check for 

additionnal constants read/literals limitation on this slot, and modifying a 
couple of functions inside R600MachineScheduler.cpp.
This may look like a short task but the student would also need some time to 
get used to all the tools we use, like piglit, and to 

understand llvm codebase.



- Mail original -
 De : Tom Stellard t...@stellard.net
 À : mesa-dev@lists.freedesktop.org
 Cc : 
 Envoyé le : Mercredi 13 mars 2013 18h11
 Objet : [Mesa-dev] Google Summer of Code ideas needed
 
 Hi,
 
 It's time again for Google Summer of Code, so we need to start updating
 the X.Org ideas page (http://www.x.org/wiki/SummerOfCodeIdeas) with new
 ideas.  Since there have been a few issues with the wikis lately, if you
 have any ideas please respond to this thread, and I will make sure they
 get onto the official ideas page (but still feel free to update the wiki
 page yourself if you can).  A good project description should contain:
 
 - A brief description of the project
 - A difficulty rating (e.g. easy, medium, hard)
 - The skills / programming languages required
 
 Also, I am going to purge all the old ideas from the ideas page in the
 next week, so if there are any of the old ideas that you think are
 still relevant, let me know and I will keep it.
 
 The ideas page is used as one of the criteria by Google for selecting
 mentoring organizations and part of the reason X.Org was not selected
 last year was that the ideas page was not up to par, so if we want to
 participate in Google Summer of Code this year, it is important we
 have a good ideas page with lots of ideas.
 
 Thanks,
 Tom Stellard
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] R600: Relax some vector constraints on Dot4.

2013-03-14 Thread Vincent Lejeune
Dot4 now uses 8 scalar operands instead of 2 vectors one which allows register
coalescer to remove some unneeded COPY.
This patch also defines some structures/functions that can be used to handle
every vector instructions (CUBE, Cayman special instructions...) in a similar
fashion.
---
 lib/Target/R600/AMDGPUISelLowering.h|  1 +
 lib/Target/R600/R600Defines.h   | 74 
 lib/Target/R600/R600ExpandSpecialInstrs.cpp | 25 
 lib/Target/R600/R600ISelLowering.cpp| 21 +++
 lib/Target/R600/R600InstrInfo.cpp   | 88 +
 lib/Target/R600/R600InstrInfo.h |  5 ++
 lib/Target/R600/R600Instructions.td | 51 -
 lib/Target/R600/R600MachineScheduler.cpp|  2 +
 8 files changed, 266 insertions(+), 1 deletion(-)

diff --git a/lib/Target/R600/AMDGPUISelLowering.h 
b/lib/Target/R600/AMDGPUISelLowering.h
index f31b646..f9f5a60 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -125,6 +125,7 @@ enum {
   SMIN,
   UMIN,
   URECIP,
+  DOT4,
   EXPORT,
   CONST_ADDRESS,
   REGISTER_LOAD,
diff --git a/lib/Target/R600/R600Defines.h b/lib/Target/R600/R600Defines.h
index 16cfcf5..72d83b0 100644
--- a/lib/Target/R600/R600Defines.h
+++ b/lib/Target/R600/R600Defines.h
@@ -92,6 +92,80 @@ namespace R600Operands {
 {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8, 9,-1,10,11,12,13,14,15,16,17}
   };
 
+  enum VecOps {
+UPDATE_EXEC_MASK_X,
+UPDATE_PREDICATE_X,
+WRITE_X,
+OMOD_X,
+DST_REL_X,
+CLAMP_X,
+SRC0_X,
+SRC0_NEG_X,
+SRC0_REL_X,
+SRC0_ABS_X,
+SRC0_SEL_X,
+SRC1_X,
+SRC1_NEG_X,
+SRC1_REL_X,
+SRC1_ABS_X,
+SRC1_SEL_X,
+PRED_SEL_X,
+UPDATE_EXEC_MASK_Y,
+UPDATE_PREDICATE_Y,
+WRITE_Y,
+OMOD_Y,
+DST_REL_Y,
+CLAMP_Y,
+SRC0_Y,
+SRC0_NEG_Y,
+SRC0_REL_Y,
+SRC0_ABS_Y,
+SRC0_SEL_Y,
+SRC1_Y,
+SRC1_NEG_Y,
+SRC1_REL_Y,
+SRC1_ABS_Y,
+SRC1_SEL_Y,
+PRED_SEL_Y,
+UPDATE_EXEC_MASK_Z,
+UPDATE_PREDICATE_Z,
+WRITE_Z,
+OMOD_Z,
+DST_REL_Z,
+CLAMP_Z,
+SRC0_Z,
+SRC0_NEG_Z,
+SRC0_REL_Z,
+SRC0_ABS_Z,
+SRC0_SEL_Z,
+SRC1_Z,
+SRC1_NEG_Z,
+SRC1_REL_Z,
+SRC1_ABS_Z,
+SRC1_SEL_Z,
+PRED_SEL_Z,
+UPDATE_EXEC_MASK_W,
+UPDATE_PREDICATE_W,
+WRITE_W,
+OMOD_W,
+DST_REL_W,
+CLAMP_W,
+SRC0_W,
+SRC0_NEG_W,
+SRC0_REL_W,
+SRC0_ABS_W,
+SRC0_SEL_W,
+SRC1_W,
+SRC1_NEG_W,
+SRC1_REL_W,
+SRC1_ABS_W,
+SRC1_SEL_W,
+PRED_SEL_W,
+IMM_0,
+IMM_1,
+VEC_COUNT
+ };
+
 }
 
 #endif // R600DEFINES_H_
diff --git a/lib/Target/R600/R600ExpandSpecialInstrs.cpp 
b/lib/Target/R600/R600ExpandSpecialInstrs.cpp
index f8c900f..993bdad 100644
--- a/lib/Target/R600/R600ExpandSpecialInstrs.cpp
+++ b/lib/Target/R600/R600ExpandSpecialInstrs.cpp
@@ -182,6 +182,31 @@ bool 
R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction MF) {
 MI.eraseFromParent();
 continue;
 }
+  case AMDGPU::DOT_4: {
+
+const R600RegisterInfo TRI = TII-getRegisterInfo();
+
+unsigned DstReg = MI.getOperand(0).getReg();
+unsigned DstBase = TRI.getEncodingValue(DstReg)  HW_REG_MASK;
+
+for (unsigned Chan = 0; Chan  4; ++Chan) {
+  bool Mask = (Chan != TRI.getHWRegChan(DstReg));
+  unsigned SubDstReg =
+  AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
+  MachineInstr *BMI =
+  TII-buildSlotOfVectorInstruction(MBB, MI, Chan, SubDstReg);
+  if (Chan  0) {
+BMI-bundleWithPred();
+  }
+  if (Mask) {
+TII-addFlag(BMI, 0, MO_FLAG_MASK);
+  }
+  if (Chan != 3)
+TII-addFlag(BMI, 0, MO_FLAG_NOT_LAST);
+}
+MI.eraseFromParent();
+continue;
+  }
   }
 
   bool IsReduction = TII-isReductionOp(MI.getOpcode());
diff --git a/lib/Target/R600/R600ISelLowering.cpp 
b/lib/Target/R600/R600ISelLowering.cpp
index a73691d..4868dc7 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -394,6 +394,27 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, 
SelectionDAG DAG) const
 
   return SDValue(interp, slot % 2);
 }
+case AMDGPUIntrinsic::AMDGPU_dp4: {
+  SDValue Args[8] = {
+  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
+  DAG.getConstant(0, MVT::i32)),
+  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
+  DAG.getConstant(0, MVT::i32)),
+  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
+  DAG.getConstant(1, MVT::i32)),
+  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
+  DAG.getConstant(1, MVT::i32)),
+  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
+  

[Mesa-dev] [PATCH] R600: Factorize code handling Const Read Port limitation

2013-03-13 Thread Vincent Lejeune
---
 lib/Target/R600/AMDILISelDAGToDAG.cpp| 34 ++
 lib/Target/R600/R600InstrInfo.cpp| 54 ++
 lib/Target/R600/R600InstrInfo.h  |  3 ++
 lib/Target/R600/R600MachineScheduler.cpp | 77 
 lib/Target/R600/R600MachineScheduler.h   |  3 +-
 test/CodeGen/R600/kcache-fold-2.ll   | 52 +
 6 files changed, 144 insertions(+), 79 deletions(-)
 create mode 100644 test/CodeGen/R600/kcache-fold-2.ll

diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp 
b/lib/Target/R600/AMDILISelDAGToDAG.cpp
index 0c7880d..05a1ea7 100644
--- a/lib/Target/R600/AMDILISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp
@@ -336,6 +336,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
   return Result;
 }
 
+
 bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
 const R600InstrInfo *TII, std::vectorSDValue Ops) {
   int OperandIdx[] = {
@@ -365,17 +366,34 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
 SDValue Operand = Ops[OperandIdx[i] - 1];
 switch (Operand.getOpcode()) {
 case AMDGPUISD::CONST_ADDRESS: {
-  if (i == 2)
-break;
   SDValue CstOffset;
-  if (!Operand.getValueType().isVector() 
-  SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) {
-Ops[OperandIdx[i] - 1] = CurDAG-getRegister(AMDGPU::ALU_CONST, 
MVT::f32);
-Ops[SelIdx[i] - 1] = CstOffset;
-return true;
+  if (Operand.getValueType().isVector() ||
+  !SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset))
+break;
+
+  // Gather others constants values
+  std::vectorunsigned Consts;
+  for (unsigned j = 0; j  3; j++) {
+int SrcIdx = OperandIdx[j];
+if (SrcIdx  0)
+  break;
+if (RegisterSDNode *Reg = dyn_castRegisterSDNode(Ops[SrcIdx - 1])) {
+  if (Reg-getReg() == AMDGPU::ALU_CONST) {
+ConstantSDNode *Cst = dyn_castConstantSDNode(Ops[SelIdx[j] - 1]);
+Consts.push_back(Cst-getZExtValue());
+  }
+}
   }
+
+  ConstantSDNode *Cst = dyn_castConstantSDNode(CstOffset);
+  Consts.push_back(Cst-getZExtValue());
+  if (!TII-fitsConstReadLimitations(Consts))
+break;
+
+  Ops[OperandIdx[i] - 1] = CurDAG-getRegister(AMDGPU::ALU_CONST, 
MVT::f32);
+  Ops[SelIdx[i] - 1] = CstOffset;
+  return true;
   }
-  break;
 case ISD::FNEG:
   if (NegIdx[i]  0)
 break;
diff --git a/lib/Target/R600/R600InstrInfo.cpp 
b/lib/Target/R600/R600InstrInfo.cpp
index be3318a..0865098 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -139,6 +139,60 @@ bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
   (TargetFlags  R600_InstFlag::OP3));
 }
 
+bool
+R600InstrInfo::fitsConstReadLimitations(const std::vectorunsigned Consts)
+const {
+  assert (Consts.size() = 12  Too many operands in instructions group);
+  unsigned Pair1 = 0, Pair2 = 0;
+  for (unsigned i = 0, n = Consts.size(); i  n; ++i) {
+unsigned ReadConstHalf = Consts[i]  2;
+unsigned ReadConstIndex = Consts[i]  (~3);
+unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf;
+if (!Pair1) {
+  Pair1 = ReadHalfConst;
+  continue;
+}
+if (Pair1 == ReadHalfConst)
+  continue;
+if (!Pair2) {
+  Pair2 = ReadHalfConst;
+  continue;
+}
+if (Pair2 != ReadHalfConst)
+  return false;
+  }
+  return true;
+}
+
+bool
+R600InstrInfo::canBundle(const std::vectorMachineInstr * MIs) const {
+  std::vectorunsigned Consts;
+  for (unsigned i = 0, n = MIs.size(); i  n; i++) {
+const MachineInstr *MI = MIs[i];
+
+const R600Operands::Ops OpTable[3][2] = {
+  {R600Operands::SRC0, R600Operands::SRC0_SEL},
+  {R600Operands::SRC1, R600Operands::SRC1_SEL},
+  {R600Operands::SRC2, R600Operands::SRC2_SEL},
+};
+
+if (!isALUInstr(MI-getOpcode()))
+  continue;
+
+for (unsigned j = 0; j  3; j++) {
+  int SrcIdx = getOperandIdx(MI-getOpcode(), OpTable[j][0]);
+  if (SrcIdx  0)
+break;
+  if (MI-getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) {
+unsigned Const = MI-getOperand(
+getOperandIdx(MI-getOpcode(), OpTable[j][1])).getImm();
+Consts.push_back(Const);
+  }
+}
+  }
+  return fitsConstReadLimitations(Consts);
+}
+
 DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine 
*TM,
 const ScheduleDAG *DAG) const {
   const InstrItineraryData *II = TM-getInstrItineraryData();
diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h
index efe721c..bf9569e 100644
--- a/lib/Target/R600/R600InstrInfo.h
+++ b/lib/Target/R600/R600InstrInfo.h
@@ -53,6 +53,9 @@ namespace llvm {
   /// \returns true if this \p Opcode represents an ALU instruction.
   bool isALUInstr(unsigned Opcode) const;
 
+  bool fitsConstReadLimitations(const std::vectorunsigned) const;
+ 

[Mesa-dev] [PATCH] R600: Lower clamp constant to constant

2013-03-13 Thread Vincent Lejeune
---
 lib/Target/R600/R600ISelLowering.cpp | 23 +++
 test/CodeGen/R600/clamp-constants.ll | 20 
 2 files changed, 43 insertions(+)
 create mode 100644 test/CodeGen/R600/clamp-constants.ll

diff --git a/lib/Target/R600/R600ISelLowering.cpp 
b/lib/Target/R600/R600ISelLowering.cpp
index a73691d..96686e6 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -394,6 +394,29 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, 
SelectionDAG DAG) const
 
   return SDValue(interp, slot % 2);
 }
+case AMDGPUIntrinsic::AMDIL_clamp: {
+  ConstantFPSDNode *Min = dyn_castConstantFPSDNode(Op.getOperand(2));
+  ConstantFPSDNode *Max = dyn_castConstantFPSDNode(Op.getOperand(3));
+  if (ConstantFPSDNode *C = dyn_castConstantFPSDNode(Op.getOperand(1))) {
+switch (C-getValueAPF().compare(Max-getValueAPF())) {
+case APFloat::cmpGreaterThan:
+case APFloat::cmpEqual:
+  return Op.getOperand(3);
+default:
+  break;
+}
+
+switch (C-getValueAPF().compare(Min-getValueAPF())) {
+case APFloat::cmpLessThan:
+case APFloat::cmpEqual:
+  return Op.getOperand(2);
+default:
+  break;
+}
+return Op.getOperand(1);
+  }
+  break;
+}
 
 case r600_read_ngroups_x:
   return LowerImplicitParameter(DAG, VT, DL, 0);
diff --git a/test/CodeGen/R600/clamp-constants.ll 
b/test/CodeGen/R600/clamp-constants.ll
new file mode 100644
index 000..cf4d35f
--- /dev/null
+++ b/test/CodeGen/R600/clamp-constants.ll
@@ -0,0 +1,20 @@
+;RUN: llc  %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK-NOT: MOV
+
+define void @main() {
+main_body:
+  %0 = call float @llvm.AMDIL.clamp.(float 1.50e+00, float 0.00e+00, 
float 1.00e+00)
+  %1 = call float @llvm.AMDIL.clamp.(float 0.00e+00, float 0.00e+00, 
float 1.00e+00)
+  %2 = call float @llvm.AMDIL.clamp.(float 1.00e+00, float 0.00e+00, 
float 1.00e+00)
+  %3 = call float @llvm.AMDIL.clamp.(float -0.50e+00, float 0.00e+00, 
float 1.00e+00)
+  %4 = insertelement 4 x float undef, float %0, i32 0
+  %5 = insertelement 4 x float %4, float %1, i32 1
+  %6 = insertelement 4 x float %5, float %2, i32 2
+  %7 = insertelement 4 x float %6, float %3, i32 3
+  call void @llvm.R600.store.swizzle(4 x float %7, i32 0, i32 0)
+  ret void
+}
+
+declare float @llvm.AMDIL.clamp.(float, float, float) readnone
+declare void @llvm.R600.store.swizzle(4 x float, i32, i32)
-- 
1.8.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] R600: Factorize code handling Const Read Port limitation

2013-03-13 Thread Vincent Lejeune
I fixed the coding style issue.
The iostream include was a debug leftover line, it shouldn't be there.


- Mail original -
 De : Tom Stellard t...@stellard.net
 À : Vincent Lejeune v...@ovi.com
 Cc : llvm-comm...@cs.uiuc.edu; mesa-dev@lists.freedesktop.org
 Envoyé le : Mercredi 13 mars 2013 21h49
 Objet : Re: [PATCH] R600: Factorize code handling Const Read Port limitation
 
 On Wed, Mar 13, 2013 at 09:12:41PM +0100, Vincent Lejeune wrote:
  ---
   lib/Target/R600/AMDILISelDAGToDAG.cpp    | 34 ++
   lib/Target/R600/R600InstrInfo.cpp        | 54 ++
   lib/Target/R600/R600InstrInfo.h          |  3 ++
   lib/Target/R600/R600MachineScheduler.cpp | 77 
 
   lib/Target/R600/R600MachineScheduler.h   |  3 +-
   test/CodeGen/R600/kcache-fold-2.ll       | 52 +
   6 files changed, 144 insertions(+), 79 deletions(-)
   create mode 100644 test/CodeGen/R600/kcache-fold-2.ll
 
  diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp 
 b/lib/Target/R600/AMDILISelDAGToDAG.cpp
  index 0c7880d..05a1ea7 100644
  --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp
  +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp
  @@ -336,6 +336,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
     return Result;
   }
   
  +
 
 Whitespace
   bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
       const R600InstrInfo *TII, std::vectorSDValue Ops) {
     int OperandIdx[] = {
  @@ -365,17 +366,34 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned 
 Opcode,
       SDValue Operand = Ops[OperandIdx[i] - 1];
       switch (Operand.getOpcode()) {
       case AMDGPUISD::CONST_ADDRESS: {
  -      if (i == 2)
  -        break;
         SDValue CstOffset;
  -      if (!Operand.getValueType().isVector() 
  -          SelectGlobalValueConstantOffset(Operand.getOperand(0), 
 CstOffset)) {
  -        Ops[OperandIdx[i] - 1] = CurDAG-getRegister(AMDGPU::ALU_CONST, 
 MVT::f32);
  -        Ops[SelIdx[i] - 1] = CstOffset;
  -        return true;
  +      if (Operand.getValueType().isVector() ||
  +          !SelectGlobalValueConstantOffset(Operand.getOperand(0), 
 CstOffset))
  +        break;
  +
  +      // Gather others constants values
  +      std::vectorunsigned Consts;
  +      for (unsigned j = 0; j  3; j++) {
  +        int SrcIdx = OperandIdx[j];
  +        if (SrcIdx  0)
  +          break;
  +        if (RegisterSDNode *Reg = 
 dyn_castRegisterSDNode(Ops[SrcIdx - 1])) {
  +          if (Reg-getReg() == AMDGPU::ALU_CONST) {
  +            ConstantSDNode *Cst = 
 dyn_castConstantSDNode(Ops[SelIdx[j] - 1]);
  +            Consts.push_back(Cst-getZExtValue());
  +          }
  +        }
         }
  +
  +      ConstantSDNode *Cst = dyn_castConstantSDNode(CstOffset);
  +      Consts.push_back(Cst-getZExtValue());
  +      if (!TII-fitsConstReadLimitations(Consts))
  +        break;
  +
  +      Ops[OperandIdx[i] - 1] = CurDAG-getRegister(AMDGPU::ALU_CONST, 
 MVT::f32);
  +      Ops[SelIdx[i] - 1] = CstOffset;
  +      return true;
         }
  -      break;
       case ISD::FNEG:
         if (NegIdx[i]  0)
           break;
  diff --git a/lib/Target/R600/R600InstrInfo.cpp 
 b/lib/Target/R600/R600InstrInfo.cpp
  index be3318a..0865098 100644
  --- a/lib/Target/R600/R600InstrInfo.cpp
  +++ b/lib/Target/R600/R600InstrInfo.cpp
  @@ -139,6 +139,60 @@ bool R600InstrInfo::isALUInstr(unsigned Opcode) const 
 {
             (TargetFlags  R600_InstFlag::OP3));
   }
   
  +bool
  +R600InstrInfo::fitsConstReadLimitations(const std::vectorunsigned 
 Consts)
  +    const {
  +  assert (Consts.size() = 12  Too many operands in 
 instructions group);
  +  unsigned Pair1 = 0, Pair2 = 0;
  +  for (unsigned i = 0, n = Consts.size(); i  n; ++i) {
  +    unsigned ReadConstHalf = Consts[i]  2;
  +    unsigned ReadConstIndex = Consts[i]  (~3);
  +    unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf;
  +    if (!Pair1) {
  +      Pair1 = ReadHalfConst;
  +      continue;
  +    }
  +    if (Pair1 == ReadHalfConst)
  +      continue;
  +    if (!Pair2) {
  +      Pair2 = ReadHalfConst;
  +      continue;
  +    }
  +    if (Pair2 != ReadHalfConst)
  +      return false;
  +  }
  +  return true;
  +}
  +
  +bool
  +R600InstrInfo::canBundle(const std::vectorMachineInstr * MIs) 
 const {
  +  std::vectorunsigned Consts;
  +  for (unsigned i = 0, n = MIs.size(); i  n; i++) {
  +    const MachineInstr *MI = MIs[i];
  +
  +    const R600Operands::Ops OpTable[3][2] = {
  +      {R600Operands::SRC0, R600Operands::SRC0_SEL},
  +      {R600Operands::SRC1, R600Operands::SRC1_SEL},
  +      {R600Operands::SRC2, R600Operands::SRC2_SEL},
  +    };
  +
  +    if (!isALUInstr(MI-getOpcode()))
  +      continue;
  +
  +    for (unsigned j = 0; j  3; j++) {
  +      int SrcIdx = getOperandIdx(MI-getOpcode(), OpTable[j][0]);
  +      if (SrcIdx  0)
  +        break;
  +      if (MI-getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) {
  +        unsigned Const = MI-getOperand(
  +            getOperandIdx(MI

[Mesa-dev] [PATCH] R600: Fix JUMP handling so that MachineInstr verification can occur

2013-03-08 Thread Vincent Lejeune
This allows R600 Target to use the newly created -verify-misched llc flag
---
 lib/Target/R600/AMDILCFGStructurizer.cpp|   8 +-
 lib/Target/R600/R600ISelLowering.cpp|   7 +-
 lib/Target/R600/R600InstrInfo.cpp   |  66 ++--
 lib/Target/R600/R600Instructions.td |  26 +++--
 test/CodeGen/R600/schedule-fs-loop-nested-if.ll |  82 +++
 test/CodeGen/R600/schedule-fs-loop-nested.ll|  87 
 test/CodeGen/R600/schedule-fs-loop.ll   |  54 ++
 test/CodeGen/R600/schedule-vs-if-nested-loop.ll | 133 
 8 files changed, 418 insertions(+), 45 deletions(-)
 create mode 100644 test/CodeGen/R600/schedule-fs-loop-nested-if.ll
 create mode 100644 test/CodeGen/R600/schedule-fs-loop-nested.ll
 create mode 100644 test/CodeGen/R600/schedule-fs-loop.ll
 create mode 100644 test/CodeGen/R600/schedule-vs-if-nested-loop.ll

diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp 
b/lib/Target/R600/AMDILCFGStructurizer.cpp
index aa8ab6b..b0cd0f9 100644
--- a/lib/Target/R600/AMDILCFGStructurizer.cpp
+++ b/lib/Target/R600/AMDILCFGStructurizer.cpp
@@ -2595,6 +2595,7 @@ struct CFGStructTraitsAMDGPUCFGStructurizer {
 
   static int getBranchNzeroOpcode(int oldOpcode) {
 switch(oldOpcode) {
+case AMDGPU::JUMP_COND:
 case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
 case AMDGPU::BRANCH_COND_i32:
 case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
@@ -2606,6 +2607,7 @@ struct CFGStructTraitsAMDGPUCFGStructurizer {
 
   static int getBranchZeroOpcode(int oldOpcode) {
 switch(oldOpcode) {
+case AMDGPU::JUMP_COND:
 case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
 case AMDGPU::BRANCH_COND_i32:
 case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
@@ -2617,6 +2619,7 @@ struct CFGStructTraitsAMDGPUCFGStructurizer {
 
   static int getContinueNzeroOpcode(int oldOpcode) {
 switch(oldOpcode) {
+case AMDGPU::JUMP_COND:
 case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
 default:
   assert(0  internal error);
@@ -2626,6 +2629,7 @@ struct CFGStructTraitsAMDGPUCFGStructurizer {
 
   static int getContinueZeroOpcode(int oldOpcode) {
 switch(oldOpcode) {
+case AMDGPU::JUMP_COND:
 case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
 default:
   assert(0  internal error);
@@ -2654,8 +2658,7 @@ struct CFGStructTraitsAMDGPUCFGStructurizer {
 
   static bool isCondBranch(MachineInstr *instr) {
 switch (instr-getOpcode()) {
-  case AMDGPU::JUMP:
-return instr-getOperand(instr-findFirstPredOperandIdx()).getReg() != 
0;
+  case AMDGPU::JUMP_COND:
   case AMDGPU::BRANCH_COND_i32:
   case AMDGPU::BRANCH_COND_f32:
   break;
@@ -2668,7 +2671,6 @@ struct CFGStructTraitsAMDGPUCFGStructurizer {
   static bool isUncondBranch(MachineInstr *instr) {
 switch (instr-getOpcode()) {
 case AMDGPU::JUMP:
-  return instr-getOperand(instr-findFirstPredOperandIdx()).getReg() == 0;
 case AMDGPU::BRANCH:
   return true;
 default:
diff --git a/lib/Target/R600/R600ISelLowering.cpp 
b/lib/Target/R600/R600ISelLowering.cpp
index 6ee4c8f..a73691d 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -221,8 +221,7 @@ MachineBasicBlock * 
R600TargetLowering::EmitInstrWithCustomInserter(
 
   case AMDGPU::BRANCH:
   BuildMI(*BB, I, BB-findDebugLoc(I), TII-get(AMDGPU::JUMP))
-  .addOperand(MI-getOperand(0))
-  .addReg(0);
+  .addOperand(MI-getOperand(0));
   break;
 
   case AMDGPU::BRANCH_COND_f32: {
@@ -233,7 +232,7 @@ MachineBasicBlock * 
R600TargetLowering::EmitInstrWithCustomInserter(
   .addImm(OPCODE_IS_NOT_ZERO)
   .addImm(0); // Flags
 TII-addFlag(NewMI, 0, MO_FLAG_PUSH);
-BuildMI(*BB, I, BB-findDebugLoc(I), TII-get(AMDGPU::JUMP))
+BuildMI(*BB, I, BB-findDebugLoc(I), TII-get(AMDGPU::JUMP_COND))
 .addOperand(MI-getOperand(0))
 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
 break;
@@ -247,7 +246,7 @@ MachineBasicBlock * 
R600TargetLowering::EmitInstrWithCustomInserter(
 .addImm(OPCODE_IS_NOT_ZERO_INT)
 .addImm(0); // Flags
 TII-addFlag(NewMI, 0, MO_FLAG_PUSH);
-BuildMI(*BB, I, BB-findDebugLoc(I), TII-get(AMDGPU::JUMP))
+BuildMI(*BB, I, BB-findDebugLoc(I), TII-get(AMDGPU::JUMP_COND))
.addOperand(MI-getOperand(0))
 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
 break;
diff --git a/lib/Target/R600/R600InstrInfo.cpp 
b/lib/Target/R600/R600InstrInfo.cpp
index 106bbc0..be3318a 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -168,6 +168,11 @@ findFirstPredicateSetterFrom(MachineBasicBlock MBB,
   return NULL;
 }
 
+static
+bool isJump(unsigned Opcode) {
+  return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND;
+}
+
 bool
 

Re: [Mesa-dev] [PATCH 7/7] radeon/llvm: enable LICM and DCE pass

2013-03-05 Thread Vincent Lejeune
Reviewed-by: Vincent Lejeune vljn at ovi.com




- Mail original -
 De : Christian König deathsim...@vodafone.de
 À : mesa-dev@lists.freedesktop.org
 Cc : mic...@daenzer.net
 Envoyé le : Mardi 5 mars 2013 15h27
 Objet : [Mesa-dev] [PATCH 7/7] radeon/llvm: enable LICM and DCE pass
 
 From: Christian König christian.koe...@amd.com
 
 Signed-off-by: Christian König christian.koe...@amd.com
 ---
 .../drivers/radeon/radeon_setup_tgsi_llvm.c        |    2 ++
 1 file changed, 2 insertions(+)
 
 diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
 b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
 index f7b7586..f017b87 100644
 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
 +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
 @@ -1281,6 +1281,8 @@ void radeon_llvm_finalize_module(struct 
 radeon_llvm_context * ctx)
 
     /* Add some optimization passes */
     LLVMAddScalarReplAggregatesPass(gallivm-passmgr);
 +    LLVMAddLICMPass(gallivm-passmgr);
 +    LLVMAddAggressiveDCEPass(gallivm-passmgr);
     LLVMAddCFGSimplificationPass(gallivm-passmgr);
 
     /* Run the passs */
 -- 
 1.7.9.5
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 7/7] radeon/llvm: enable LICM and DCE pass

2013-03-05 Thread Vincent Lejeune
LICM stands for Loop Invariant Code Motion.
Instructions that does not depend of loop index are moved outside of loop body.
(This solves one of llvm generated code Vadim pointed in another thread)

DCE is DeadCodeElimination...I don't know the difference between classic DCE 
and aggressive DCE though.




- Mail original -
 De : Michel Dänzer mic...@daenzer.net
 À : Christian König deathsim...@vodafone.de
 Cc : mesa-dev@lists.freedesktop.org
 Envoyé le : Mardi 5 mars 2013 18h20
 Objet : Re: [Mesa-dev] [PATCH 7/7] radeon/llvm: enable LICM and DCE pass
 
 On Die, 2013-03-05 at 15:27 +0100, Christian König wrote: 
  From: Christian König christian.koe...@amd.com
 
  Signed-off-by: Christian König christian.koe...@amd.com
 
 This could use a little more information, e.g.: What are LICM and DCE?
 Why is it a good idea to enable them?
 
 
 -- 
 Earthling Michel Dänzer           |                  http://www.amd.com
 Libre software enthusiast         |          Debian, X and DRI developer
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/6] R600: Use MUL_IEEE for trig/fdiv intrinsic

2013-02-18 Thread Vincent Lejeune
---
 lib/Target/R600/R600Instructions.td | 8 
 test/CodeGen/R600/fdiv.v4f32.ll | 8 
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/lib/Target/R600/R600Instructions.td 
b/lib/Target/R600/R600Instructions.td
index 0a01400..e4cc06e 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -1090,12 +1090,12 @@ class COS_Common bits11 inst : R600_1OP 
 multiclass DIV_Common InstR600 recip_ieee {
 def : Pat
   (int_AMDGPU_div R600_Reg32:$src0, R600_Reg32:$src1),
-  (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
+  (MUL_IEEE R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
 ;
 
 def : Pat
   (fdiv R600_Reg32:$src0, R600_Reg32:$src1),
-  (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
+  (MUL_IEEE R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
 ;
 }
 
@@ -1169,12 +1169,12 @@ let Predicates = [isR600] in {
 // cards.
 class COS_PAT InstR600 trig : Pat
   (fcos R600_Reg32:$src),
-  (trig (MUL (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
+  (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
 ;
 
 class SIN_PAT InstR600 trig : Pat
   (fsin R600_Reg32:$src),
-  (trig (MUL (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
+  (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
 ;
 
 
//===--===//
diff --git a/test/CodeGen/R600/fdiv.v4f32.ll b/test/CodeGen/R600/fdiv.v4f32.ll
index b013fd6..459fd11 100644
--- a/test/CodeGen/R600/fdiv.v4f32.ll
+++ b/test/CodeGen/R600/fdiv.v4f32.ll
@@ -1,13 +1,13 @@
 ;RUN: llc  %s -march=r600 -mcpu=redwood | FileCheck %s
 
 ;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @test(4 x float addrspace(1)* %out, 4 x float addrspace(1)* 
%in) {
   %b_ptr = getelementptr 4 x float addrspace(1)* %in, i32 1
-- 
1.8.1.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/6] R600: CONST_ADDRESS node is not marked as mayLoad anymore

2013-02-18 Thread Vincent Lejeune
mayLoad complexify scheduling and does not bring any usefull info
as the location is not writeable at all.
---
 lib/Target/R600/R600Instructions.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/Target/R600/R600Instructions.td 
b/lib/Target/R600/R600Instructions.td
index e4cc06e..0a777f1 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -513,7 +513,7 @@ def INTERP_PAIR_ZW :  AMDGPUShaderInst 
 
 def CONST_ADDRESS: SDNodeAMDGPUISD::CONST_ADDRESS,
   SDTypeProfile1, -1, [SDTCisInt0, SDTCisPtrTy1],
-  [SDNPMayLoad, SDNPVariadic]
+  [SDNPVariadic]
 ;
 
 
//===--===//
-- 
1.8.1.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/6] R600: Turn BUILD_VECTOR into Reg_Sequence

2013-02-18 Thread Vincent Lejeune
---
 lib/Target/R600/AMDILISelDAGToDAG.cpp | 29 +
 1 file changed, 29 insertions(+)

diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp 
b/lib/Target/R600/AMDILISelDAGToDAG.cpp
index 2e726e9..6b24117 100644
--- a/lib/Target/R600/AMDILISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp
@@ -160,6 +160,35 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
   }
   switch (Opc) {
   default: break;
+  case ISD::BUILD_VECTOR: {
+const AMDGPUSubtarget ST = TM.getSubtargetAMDGPUSubtarget();
+if (ST.device()-getGeneration()  AMDGPUDeviceInfo::HD6XXX) {
+  break;
+}
+// BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
+// that adds a 128 bits reg copy when going through TwoAddressInstructions
+// pass. We want to avoid 128 bits copies as much as possible because they
+// can't be bundled by our scheduler.
+SDValue RegSeqArgs[9] = {
+  CurDAG-getTargetConstant(AMDGPU::R600_Reg128RegClassID, MVT::i32),
+  SDValue(), CurDAG-getTargetConstant(AMDGPU::sub0, MVT::i32),
+  SDValue(), CurDAG-getTargetConstant(AMDGPU::sub1, MVT::i32),
+  SDValue(), CurDAG-getTargetConstant(AMDGPU::sub2, MVT::i32),
+  SDValue(), CurDAG-getTargetConstant(AMDGPU::sub3, MVT::i32)
+};
+bool IsRegSeq = true;
+for (unsigned i = 0; i  N-getNumOperands(); i++) {
+  if (dyn_castRegisterSDNode(N-getOperand(i))) {
+IsRegSeq = false;
+break;
+  }
+  RegSeqArgs[2 * i + 1] = N-getOperand(i);
+}
+if (!IsRegSeq)
+  break;
+return CurDAG-SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N-getVTList(),
+RegSeqArgs, 2 * N-getNumOperands() + 1);
+  }
   case ISD::ConstantFP:
   case ISD::Constant: {
 const AMDGPUSubtarget ST = TM.getSubtargetAMDGPUSubtarget();
-- 
1.8.1.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/6] R600: Fix for Unigine when MachineSched is enabled

2013-02-18 Thread Vincent Lejeune
---
 lib/Target/R600/R600Instructions.td | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/Target/R600/R600Instructions.td 
b/lib/Target/R600/R600Instructions.td
index 0a777f1..74106c9 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -1587,6 +1587,7 @@ def PRED_X : InstR600 
   (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags),
   , [], NullALU {
   let FlagOperandIdx = 3;
+  let isTerminator = 1;
 }
 
 let isTerminator = 1, isBranch = 1, isBarrier = 1 in {
-- 
1.8.1.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/6] R600: Remove LowerConstCopyPass and lower CONST_COPY right after ISel.

2013-02-18 Thread Vincent Lejeune
Maintaining CONST_COPY Instructions until Pre Emit may prevent some ifcvt case
and taking them in account for scheduling is difficult for no real benefit.
---
 lib/Target/R600/AMDGPU.h|   1 -
 lib/Target/R600/AMDGPUTargetMachine.cpp |   1 -
 lib/Target/R600/R600ISelLowering.cpp|   8 +-
 lib/Target/R600/R600Instructions.td |   7 +-
 lib/Target/R600/R600LowerConstCopy.cpp  | 222 
 5 files changed, 11 insertions(+), 228 deletions(-)
 delete mode 100644 lib/Target/R600/R600LowerConstCopy.cpp

diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
index ba87918..67073ab 100644
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -23,7 +23,6 @@ class AMDGPUTargetMachine;
 // R600 Passes
 FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
 FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine tm);
-FunctionPass *createR600LowerConstCopy(TargetMachine tm);
 
 // SI Passes
 FunctionPass *createSIAnnotateControlFlowPass();
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp 
b/lib/Target/R600/AMDGPUTargetMachine.cpp
index e2f00be..70b34b0 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -143,7 +143,6 @@ bool AMDGPUPassConfig::addPreEmitPass() {
 addPass(createAMDGPUCFGStructurizerPass(*TM));
 addPass(createR600ExpandSpecialInstrsPass(*TM));
 addPass(FinalizeMachineBundlesID);
-addPass(createR600LowerConstCopy(*TM));
   } else {
 addPass(createSILowerControlFlowPass(*TM));
   }
diff --git a/lib/Target/R600/R600ISelLowering.cpp 
b/lib/Target/R600/R600ISelLowering.cpp
index ece0b9a..f25ced1 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -150,7 +150,13 @@ MachineBasicBlock * 
R600TargetLowering::EmitInstrWithCustomInserter(
 TII-buildMovImm(*BB, I, MI-getOperand(0).getReg(),
  MI-getOperand(1).getImm());
 break;
-
+  case AMDGPU::CONST_COPY: {
+MachineInstr *NewMI = TII-buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
+MI-getOperand(0).getReg(), AMDGPU::ALU_CONST);
+TII-setImmOperand(NewMI, R600Operands::SRC0_SEL,
+MI-getOperand(1).getImm());
+break;
+  }
 
   case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
   case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
diff --git a/lib/Target/R600/R600Instructions.td 
b/lib/Target/R600/R600Instructions.td
index 74106c9..10bcdcf 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -1650,17 +1650,18 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, 
hasCtrlDep = 1,
 // Constant Buffer Addressing Support
 
//===--===//
 
-let isCodeGenOnly = 1, isPseudo = 1, Namespace = AMDGPU  in {
+let usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = 
AMDGPU  in {
 def CONST_COPY : Instruction {
   let OutOperandList = (outs R600_Reg32:$dst);
   let InOperandList = (ins i32imm:$src);
-  let Pattern = [(set R600_Reg32:$dst, (CONST_ADDRESS 
ADDRGA_CONST_OFFSET:$src))];
+  let Pattern =
+  [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))];
   let AsmString = CONST_COPY;
   let neverHasSideEffects = 1;
   let isAsCheapAsAMove = 1;
   let Itinerary = NullALU;
 }
-} // end isCodeGenOnly = 1, isPseudo = 1, Namespace = AMDGPU
+} // end usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = 
AMDGPU
 
 def TEX_VTX_CONSTBUF :
   InstR600ISA (outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), 
VTX_READ_eg $dst, $ptr,
diff --git a/lib/Target/R600/R600LowerConstCopy.cpp 
b/lib/Target/R600/R600LowerConstCopy.cpp
deleted file mode 100644
index 3ebe653..000
--- a/lib/Target/R600/R600LowerConstCopy.cpp
+++ /dev/null
@@ -1,222 +0,0 @@
-//===-- R600LowerConstCopy.cpp - Propagate ConstCopy / lower them to 
MOV---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===--===//
-//
-/// \file
-/// This pass is intended to handle remaining ConstCopy pseudo MachineInstr.
-/// ISel will fold each Const Buffer read inside scalar ALU. However it cannot
-/// fold them inside vector instruction, like DOT4 or Cube ; ISel emits
-/// ConstCopy instead. This pass (executed after ExpandingSpecialInstr) will 
try
-/// to fold them if possible or replace them by MOV otherwise.
-//
-//===--===//
-
-#include AMDGPU.h
-#include R600InstrInfo.h
-#include llvm/CodeGen/MachineFunction.h
-#include llvm/CodeGen/MachineFunctionPass.h
-#include llvm/CodeGen/MachineInstrBuilder.h
-#include llvm/IR/GlobalValue.h
-
-namespace llvm {
-
-class R600LowerConstCopy : public MachineFunctionPass {
-private:
-  static char ID;
-  const 

[Mesa-dev] [PATCH 6/6] R600: initial scheduler code

2013-02-18 Thread Vincent Lejeune
From: Vadim Girlin vadimgir...@gmail.com

This is a skeleton for a pre-RA MachineInstr scheduler strategy. Currently
it only tries to expose more parallelism for ALU instructions (this also
makes the distribution of GPR channels more uniform and increases the
chances of ALU instructions to be packed together in a single VLIW group).
Also it tries to reduce clause switching by grouping instruction of the
same kind (ALU/FETCH/CF) together.

Vincent Lejeune:
 - Support for VLIW4 Slot assignement
 - Recomputation of ScheduleDAG to get more parallelism opportunities

Tom Stellard:
 - Fix assertion failure when trying to determine an instruction's slot
   based on its destination register's class
 - Fix some compiler warnings

Vincent Lejeune: [v2]
 - Remove recomputation of ScheduleDAG (will be provided in a later patch)
 - Improve estimation of an ALU clause size so that heuristic does not emit cf
 instructions at the wrong position.
 - Make schedule heuristic smarter using SUnit Depth
 - Take constant read limitations into account
---
 lib/Target/R600/AMDGPUTargetMachine.cpp  |  17 +-
 lib/Target/R600/R600MachineScheduler.cpp | 483 +++
 lib/Target/R600/R600MachineScheduler.h   | 121 
 test/CodeGen/R600/fdiv.v4f32.ll  |   6 +-
 4 files changed, 623 insertions(+), 4 deletions(-)
 create mode 100644 lib/Target/R600/R600MachineScheduler.cpp
 create mode 100644 lib/Target/R600/R600MachineScheduler.h

diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp 
b/lib/Target/R600/AMDGPUTargetMachine.cpp
index 70b34b0..eb58853 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -17,6 +17,7 @@
 #include AMDGPU.h
 #include R600ISelLowering.h
 #include R600InstrInfo.h
+#include R600MachineScheduler.h
 #include SIISelLowering.h
 #include SIInstrInfo.h
 #include llvm/Analysis/Passes.h
@@ -39,6 +40,14 @@ extern C void LLVMInitializeR600Target() {
   RegisterTargetMachineAMDGPUTargetMachine X(TheAMDGPUTarget);
 }
 
+static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
+  return new ScheduleDAGMI(C, new R600SchedStrategy());
+}
+
+static MachineSchedRegistry
+SchedCustomRegistry(r600, Run R600's custom scheduler,
+createR600MachineScheduler);
+
 AMDGPUTargetMachine::AMDGPUTargetMachine(const Target T, StringRef TT,
 StringRef CPU, StringRef FS,
   TargetOptions Options,
@@ -70,7 +79,13 @@ namespace {
 class AMDGPUPassConfig : public TargetPassConfig {
 public:
   AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase PM)
-: TargetPassConfig(TM, PM) {}
+: TargetPassConfig(TM, PM) {
+const AMDGPUSubtarget ST = TM-getSubtargetAMDGPUSubtarget();
+if (ST.device()-getGeneration() = AMDGPUDeviceInfo::HD6XXX) {
+  enablePass(MachineSchedulerID);
+  MachineSchedRegistry::setDefault(createR600MachineScheduler);
+}
+  }
 
   AMDGPUTargetMachine getAMDGPUTargetMachine() const {
 return getTMAMDGPUTargetMachine();
diff --git a/lib/Target/R600/R600MachineScheduler.cpp 
b/lib/Target/R600/R600MachineScheduler.cpp
new file mode 100644
index 000..efd9490
--- /dev/null
+++ b/lib/Target/R600/R600MachineScheduler.cpp
@@ -0,0 +1,483 @@
+//===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ 
-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+//
+/// \file
+/// \brief R600 Machine Scheduler interface
+// TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS 
slot
+//
+//===--===//
+
+#define DEBUG_TYPE misched
+
+#include R600MachineScheduler.h
+#include llvm/CodeGen/MachineRegisterInfo.h
+#include llvm/CodeGen/LiveIntervalAnalysis.h
+#include llvm/Pass.h
+#include llvm/PassManager.h
+#include set
+#include iostream
+using namespace llvm;
+
+void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
+
+  DAG = dag;
+  TII = static_castconst R600InstrInfo*(DAG-TII);
+  TRI = static_castconst R600RegisterInfo*(DAG-TRI);
+  MRI = DAG-MRI;
+  Available[IDAlu]-clear();
+  Available[IDFetch]-clear();
+  Available[IDOther]-clear();
+  CurInstKind = IDOther;
+  CurEmitted = 0;
+  memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate));
+  InstKindLimit[IDAlu] = 120; // 120 minus 8 for security
+
+
+  const AMDGPUSubtarget ST = DAG-TM.getSubtargetAMDGPUSubtarget();
+  if (ST.device()-getGeneration() = AMDGPUDeviceInfo::HD5XXX) {
+InstKindLimit[IDFetch] = 7; // 8 minus 1 for security
+  } else {
+InstKindLimit[IDFetch] = 15; // 16 minus 1 for security
+  }
+}
+
+void R600SchedStrategy::MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst)
+{
+  if (QSrc-empty())
+return;
+  for (ReadyQueue::iterator I = QSrc-begin(),
+  E = QSrc-end

[Mesa-dev] Re : Re: r600g: status of my work on the shader optimization

2013-02-15 Thread Vincent Lejeune
I think the bad result of llvm can be explained because of the lack of muladd 
support currently. Unigine 3.0 has a lot of geometry and i suspect vertex 
shader being almost twice bigger than they are in tgsi case does not help.

Fwiw with an hd 6950 I have the same performance in unigine 3 high, medium 
texture, no ssao (it seems to use indirect addressing) with llvm backend as 
high, high texture, no ssao with fglrx under Windows. Its not a fair 
comparaison but I think 3.8 kernel may provide the necessary boost to cope up 
with fglrx. Anyways I have some fps peak at 60fps that does not show up with 
tgsi backend that I also have with fglrx that makes me think llvm backend 
generates rather efficient code, but i always cherry pick muladd patches.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] R600: Increase number of ArrayBase Reg to 32

2013-02-14 Thread Vincent Lejeune
---
 lib/Target/R600/R600RegisterInfo.td | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/Target/R600/R600RegisterInfo.td 
b/lib/Target/R600/R600RegisterInfo.td
index 3812eb7..0718854 100644
--- a/lib/Target/R600/R600RegisterInfo.td
+++ b/lib/Target/R600/R600RegisterInfo.td
@@ -44,7 +44,7 @@ foreach Index = 0-127 in {
 }
 
 // Array Base Register holding input in FS
-foreach Index = 448-464 in {
+foreach Index = 448-480 in {
   def ArrayBase#Index :  R600RegARRAY_BASE, Index;
 }
 
@@ -66,7 +66,7 @@ def PRED_SEL_ONE : R600RegPred_sel_one, 3;
 def AR_X : R600RegAR.x, 0;
 
 def R600_ArrayBase : RegisterClass AMDGPU, [f32, i32], 32,
-  (add (sequence ArrayBase%u, 448, 464));
+  (add (sequence ArrayBase%u, 448, 480));
 // special registers for ALU src operands
 // const buffer reference, SRCx_SEL contains index
 def ALU_CONST : R600RegCBuf, 0;
-- 
1.8.1.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] R600: Support for TBO

2013-02-14 Thread Vincent Lejeune
NOTE: This is a candidate for the Mesa stable branch.
---
 lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp |  3 +-
 lib/Target/R600/R600Instructions.td| 54 ++
 lib/Target/R600/R600Intrinsics.td  |  2 +
 3 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp 
b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index e061b18..7ec783f 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -165,7 +165,8 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst MI, 
raw_ostream OS,
 case AMDGPU::VTX_READ_GLOBAL_8_eg:
 case AMDGPU::VTX_READ_GLOBAL_32_eg:
 case AMDGPU::VTX_READ_GLOBAL_128_eg:
-case AMDGPU::TEX_VTX_CONSTBUF: {
+case AMDGPU::TEX_VTX_CONSTBUF:
+case AMDGPU::TEX_VTX_TEXBUF : {
   uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
   uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
 
diff --git a/lib/Target/R600/R600Instructions.td 
b/lib/Target/R600/R600Instructions.td
index 529a4ed..e7efd0b 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -1710,6 +1710,60 @@ def TEX_VTX_CONSTBUF :
 // Inst{127-96} = 0;
 }
 
+def TEX_VTX_TEXBUF:
+  InstR600ISA (outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), 
TEX_VTX_EXPLICIT_READ $dst, $ptr,
+  [(set R600_Reg128:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, 
imm:$BUFFER_ID))],
+VTX_WORD1_GPR, VTX_WORD0 {
+
+let VC_INST = 0;
+let FETCH_TYPE = 2;
+let FETCH_WHOLE_QUAD = 0;
+let SRC_REL = 0;
+let SRC_SEL_X = 0;
+let DST_REL = 0;
+let USE_CONST_FIELDS = 1;
+let NUM_FORMAT_ALL = 0;
+let FORMAT_COMP_ALL = 0;
+let SRF_MODE_ALL = 1;
+let MEGA_FETCH_COUNT = 16;
+let DST_SEL_X= 0;
+let DST_SEL_Y= 1;
+let DST_SEL_Z= 2;
+let DST_SEL_W= 3;
+let DATA_FORMAT  = 0;
+
+let Inst{31-0} = Word0;
+let Inst{63-32} = Word1;
+
+// LLVM can only encode 64-bit instructions, so these fields are manually
+// encoded in R600CodeEmitter
+//
+// bits16 OFFSET;
+// bits2  ENDIAN_SWAP = 0;
+// bits1  CONST_BUF_NO_STRIDE = 0;
+// bits1  MEGA_FETCH = 0;
+// bits1  ALT_CONST = 0;
+// bits2  BUFFER_INDEX_MODE = 0;
+
+
+
+// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
+// is done in R600CodeEmitter
+//
+// Inst{79-64} = OFFSET;
+// Inst{81-80} = ENDIAN_SWAP;
+// Inst{82}= CONST_BUF_NO_STRIDE;
+// Inst{83}= MEGA_FETCH;
+// Inst{84}= ALT_CONST;
+// Inst{86-85} = BUFFER_INDEX_MODE;
+// Inst{95-86} = 0; Reserved
+
+// VTX_WORD3 (Padding)
+//
+// Inst{127-96} = 0;
+}
+
+
 
 //======//
 // Instructions support
diff --git a/lib/Target/R600/R600Intrinsics.td 
b/lib/Target/R600/R600Intrinsics.td
index b5e4f1e..dc8980a 100644
--- a/lib/Target/R600/R600Intrinsics.td
+++ b/lib/Target/R600/R600Intrinsics.td
@@ -16,6 +16,8 @@ let TargetPrefix = R600, isTarget = 1 in {
 Intrinsic[llvm_float_ty], [llvm_i32_ty], [IntrNoMem];
   def int_R600_interp_input :
 Intrinsic[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem];
+  def int_R600_load_texbuf :
+Intrinsic[llvm_v4f32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem];
   def int_R600_store_swizzle :
 Intrinsic[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [];
   def int_R600_store_stream_output :
-- 
1.8.1.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] r600g/llvm: Add support for UBO

2013-02-14 Thread Vincent Lejeune
NOTE: This is a candidate for the Mesa stable branch.
---
 src/gallium/drivers/r600/r600_llvm.c|  6 +-
 src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 17 +
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/r600_llvm.c 
b/src/gallium/drivers/r600/r600_llvm.c
index fa66fcc..7a41688 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -38,8 +38,12 @@ static LLVMValueRef llvm_fetch_const(
LLVMValueRef index = 
LLVMBuildLoad(bld_base-base.gallivm-builder, 
bld-addr[reg-Indirect.Index][reg-Indirect.SwizzleX], );
offset[1] = LLVMBuildAdd(bld_base-base.gallivm-builder, 
offset[1], index, );
}
+   unsigned ConstantAddressSpace = CONSTANT_BUFFER_0_ADDR_SPACE ;
+   if (reg-Register.Dimension) {
+   ConstantAddressSpace += reg-Dimension.Index;
+   }
LLVMTypeRef const_ptr_type = 
LLVMPointerType(LLVMArrayType(LLVMVectorType(bld_base-base.elem_type, 4), 
1024),
-   
CONSTANT_BUFFER_0_ADDR_SPACE);
+   ConstantAddressSpace);
LLVMValueRef const_ptr = 
LLVMBuildIntToPtr(bld_base-base.gallivm-builder, 
lp_build_const_int32(bld_base-base.gallivm, 0), const_ptr_type, );
LLVMValueRef ptr = LLVMBuildGEP(bld_base-base.gallivm-builder, 
const_ptr, offset, 2, );
LLVMValueRef cvecval = LLVMBuildLoad(bld_base-base.gallivm-builder, 
ptr, );
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 0f90991..8902ae4 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -766,6 +766,22 @@ static void emit_icmp(
emit_data-output[emit_data-chan] = v;
 }
 
+static void emit_ucmp(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   unsigned pred;
+   LLVMBuilderRef builder = bld_base-base.gallivm-builder;
+   LLVMContextRef context = bld_base-base.gallivm-context;
+
+
+   LLVMValueRef v = LLVMBuildFCmp(builder, LLVMRealUGE,
+   emit_data-args[0], 
lp_build_const_float(bld_base-base.gallivm, 0.), );
+
+   emit_data-output[emit_data-chan] = LLVMBuildSelect(builder, v, 
emit_data-args[2], emit_data-args[1], );
+}
+
 static void emit_cmp(
const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context * bld_base,
@@ -1241,6 +1257,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context 
* ctx)
bld_base-op_actions[TGSI_OPCODE_USNE].emit = emit_icmp;
bld_base-op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
bld_base-op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
+   bld_base-op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
 
bld_base-rsq_action.emit = build_tgsi_intrinsic_nomem;
bld_base-rsq_action.intr_name = llvm.AMDGPU.rsq;
-- 
1.8.1.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] r600g/llvm: Fix alpha_to_one piglit tests

2013-02-14 Thread Vincent Lejeune
---
 src/gallium/drivers/r600/r600_llvm.c | 2 ++
 src/gallium/drivers/r600/r600_shader.c   | 1 +
 src/gallium/drivers/radeon/radeon_llvm.h | 1 +
 3 files changed, 4 insertions(+)

diff --git a/src/gallium/drivers/r600/r600_llvm.c 
b/src/gallium/drivers/r600/r600_llvm.c
index 7a41688..59047e7 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -234,6 +234,8 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context 
* bld_base)
elements[chan] = LLVMBuildLoad(base-gallivm-builder,
ctx-soa.outputs[i][chan], );
}
+   if (ctx-alpha_to_one  ctx-type == TGSI_PROCESSOR_FRAGMENT 
 ctx-r600_outputs[i].name == TGSI_SEMANTIC_COLOR)
+   elements[3] = lp_build_const_float(base-gallivm, 1.0f);
LLVMValueRef output = lp_build_gather_values(base-gallivm, 
elements, 4);
 
if (ctx-type == TGSI_PROCESSOR_VERTEX) {
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 59a7f92..8642463 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -1428,6 +1428,7 @@ static int r600_shader_from_tgsi(struct r600_screen 
*rscreen,
radeon_llvm_ctx.fs_color_all = shader-fs_write_all  
(rscreen-chip_class = EVERGREEN);
radeon_llvm_ctx.stream_outputs = so;
radeon_llvm_ctx.clip_vertex = ctx.cv_output;
+   radeon_llvm_ctx.alpha_to_one = key.alpha_to_one;
mod = r600_tgsi_llvm(radeon_llvm_ctx, tokens);
if (debug_get_bool_option(R600_DUMP_SHADERS, FALSE)) {
dump = 1;
diff --git a/src/gallium/drivers/radeon/radeon_llvm.h 
b/src/gallium/drivers/radeon/radeon_llvm.h
index 21360e2..bfeacb5 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -64,6 +64,7 @@ struct radeon_llvm_context {
struct pipe_stream_output_info *stream_outputs;
unsigned color_buffer_count;
unsigned fs_color_all;
+   unsigned alpha_to_one;
 
/*=== Front end configuration ===*/
 
-- 
1.8.1.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] r600g/llvm: Set Inputs/Outputs count to 32 (api reported value)

2013-02-14 Thread Vincent Lejeune
---
 src/gallium/drivers/radeon/radeon_llvm.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_llvm.h 
b/src/gallium/drivers/radeon/radeon_llvm.h
index bfeacb5..b1e025b 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -31,8 +31,8 @@
 #include gallivm/lp_bld_init.h
 #include gallivm/lp_bld_tgsi.h
 
-#define RADEON_LLVM_MAX_INPUTS 16 * 4
-#define RADEON_LLVM_MAX_OUTPUTS 16 * 4
+#define RADEON_LLVM_MAX_INPUTS 32 * 4
+#define RADEON_LLVM_MAX_OUTPUTS 32 * 4
 #define RADEON_LLVM_MAX_BRANCH_DEPTH 16
 #define RADEON_LLVM_MAX_LOOP_DEPTH 16
 
-- 
1.8.1.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/4] r600g/llvm: Support for TBO

2013-02-14 Thread Vincent Lejeune
---
 src/gallium/drivers/r600/r600_llvm.c | 29 +
 1 file changed, 29 insertions(+)

diff --git a/src/gallium/drivers/r600/r600_llvm.c 
b/src/gallium/drivers/r600/r600_llvm.c
index 59047e7..89bcb79 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -412,6 +412,35 @@ static void llvm_emit_tex(
LLVMValueRef args[6];
unsigned c, sampler_src;
 
+   if (emit_data-inst-Texture.Texture == TGSI_TEXTURE_BUFFER) {
+   switch (emit_data-inst-Instruction.Opcode) {
+   case TGSI_OPCODE_TXQ: {
+   LLVMValueRef offset[2] = {
+   
LLVMConstInt(LLVMInt64TypeInContext(bld_base-base.gallivm-context), 0, false),
+   lp_build_const_int32(bld_base-base.gallivm, 1)
+   };
+   LLVMTypeRef const_ptr_type = 
LLVMPointerType(LLVMArrayType(LLVMVectorType(bld_base-base.elem_type, 4), 
1024),
+   
R600_BUFFER_INFO_CONST_BUFFER);
+   LLVMValueRef const_ptr = 
LLVMBuildIntToPtr(bld_base-base.gallivm-builder, 
lp_build_const_int32(bld_base-base.gallivm, 0), const_ptr_type, );
+   LLVMValueRef ptr = 
LLVMBuildGEP(bld_base-base.gallivm-builder, const_ptr, offset, 2, );
+   LLVMValueRef cvecval = 
LLVMBuildLoad(bld_base-base.gallivm-builder, ptr, );
+   emit_data-output[0] = cvecval;
+   break;
+   }
+   case TGSI_OPCODE_TXF: {
+   args[0] = LLVMBuildExtractElement(gallivm-builder, 
emit_data-args[0], lp_build_const_int32(gallivm, 0), );
+   args[1] = lp_build_const_int32(gallivm, 
R600_MAX_CONST_BUFFERS);
+   emit_data-output[0] = build_intrinsic(gallivm-builder,
+   llvm.R600.load.texbuf,
+   emit_data-dst_type, 
args, 2, LLVMReadNoneAttribute);
+   }
+   break;
+   default:
+   assert(0  Unknow Texture Buffer Instruction !);
+   }
+   return;
+   }
+
assert(emit_data-arg_count + 2 = Elements(args));
 
for (c = 0; c  emit_data-arg_count; ++c)
-- 
1.8.1.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] r600g: Report Instructions Group count with R600_DUMP_SHADERS=1

2013-02-13 Thread Vincent Lejeune
---
 src/gallium/drivers/r600/r600_asm.c | 7 ++-
 src/gallium/drivers/r600/r600_asm.h | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c 
b/src/gallium/drivers/r600/r600_asm.c
index 3632aa5..eacdb0c 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -812,6 +812,8 @@ static int merge_inst_groups(struct r600_bytecode *bc, 
struct r600_bytecode_alu
 
/* looks like everything worked out right, apply the changes */
 
+   bc-nig --;
+
/* undo adding previus literals */
bc-cf_last-ndw -= align(prev_nliteral, 2);
 
@@ -1140,6 +1142,9 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
if (nalu-dst.sel = bc-ngpr) {
bc-ngpr = nalu-dst.sel + 1;
}
+   if (nalu-last) {
+   bc-nig ++;
+   }
LIST_ADDTAIL(nalu-list, bc-cf_last-alu);
/* each alu use 2 dwords */
bc-cf_last-ndw += 2;
@@ -2105,7 +2110,7 @@ void r600_bytecode_dump(struct r600_bytecode *bc)
chip = '6';
break;
}
-   fprintf(stderr, bytecode %d dw -- %d gprs -\n, 
bc-ndw, bc-ngpr);
+   fprintf(stderr, bytecode %d dw -- %d gprs -- %d ig-\n, 
bc-ndw, bc-ngpr, bc-nig);
fprintf(stderr,  %c\n, chip);
 
LIST_FOR_EACH_ENTRY(cf, bc-cf, list) {
diff --git a/src/gallium/drivers/r600/r600_asm.h 
b/src/gallium/drivers/r600/r600_asm.h
index 03cd238..1638ca0 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -192,6 +192,7 @@ struct r600_bytecode {
struct r600_bytecode_cf *cf_last;
unsignedndw;
unsignedncf;
+   unsignednig; // Number of Instructions Group
unsignedngpr;
unsignednstack;
unsignednresource;
-- 
1.8.1.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] R600/SI: Do not fold single instruction with more that 3 kcache read

2013-02-12 Thread Vincent Lejeune
It fixes around 100 tfb piglit tests and 16 glean tests.

NOTE: This is a candidate for the Mesa stable branch.
---
 lib/Target/R600/AMDILISelDAGToDAG.cpp  | 2 ++
 lib/Target/R600/R600LowerConstCopy.cpp | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp 
b/lib/Target/R600/AMDILISelDAGToDAG.cpp
index 2f34fe3..858eb5d 100644
--- a/lib/Target/R600/AMDILISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp
@@ -358,6 +358,8 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
 SDValue Operand = Ops[OperandIdx[i] - 1];
 switch (Operand.getOpcode()) {
 case AMDGPUISD::CONST_ADDRESS: {
+  if (i == 2)
+break;
   SDValue CstOffset;
   if (!Operand.getValueType().isVector() 
   SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) {
diff --git a/lib/Target/R600/R600LowerConstCopy.cpp 
b/lib/Target/R600/R600LowerConstCopy.cpp
index 2557e8f..c8c27a8 100644
--- a/lib/Target/R600/R600LowerConstCopy.cpp
+++ b/lib/Target/R600/R600LowerConstCopy.cpp
@@ -180,7 +180,7 @@ bool 
R600LowerConstCopy::runOnMachineFunction(MachineFunction MF) {
 int ConstMovSel =
 TII-getOperandIdx(CstMov-getOpcode(), 
R600Operands::SRC0_SEL);
 unsigned ConstIndex = CstMov-getOperand(ConstMovSel).getImm();
-if (canFoldInBundle(CP, ConstIndex)) {
+if (MI-isInsideBundle()  canFoldInBundle(CP, ConstIndex)) {
   TII-setImmOperand(MI, OpTable[SrcOp][1], ConstIndex);
   MI-getOperand(SrcIdx).setReg(AMDGPU::ALU_CONST);
 } else {
-- 
1.8.1.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] R600/SI: Add support for indirect addressing of non default const buffer

2013-02-12 Thread Vincent Lejeune
NOTE: This is a candidate for the Mesa stable branch.
---
 lib/Target/R600/R600ISelLowering.cpp | 6 --
 lib/Target/R600/R600Instructions.td  | 9 -
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/lib/Target/R600/R600ISelLowering.cpp 
b/lib/Target/R600/R600ISelLowering.cpp
index 21d301c..c4cb870 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -911,7 +911,8 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, 
SelectionDAG DAG) const
   if (ConstantBlock  -1) {
 SDValue Result;
 if (dyn_castConstantExpr(LoadNode-getSrcValue()) ||
-dyn_castConstant(LoadNode-getSrcValue())) {
+dyn_castConstant(LoadNode-getSrcValue()) ||
+dyn_castConstantSDNode(Ptr)) {
   SDValue Slots[4];
   for (unsigned i = 0; i  4; i++) {
 // We want Const position encoded with the following formula :
@@ -927,7 +928,8 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, 
SelectionDAG DAG) const
 } else {
   // non constant ptr cant be folded, keeps it as a v4f32 load
   Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
-  DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, 
MVT::i32))
+  DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, 
MVT::i32)),
+  DAG.getConstant(LoadNode-getAddressSpace() - 9, MVT::i32)
   );
 }
 
diff --git a/lib/Target/R600/R600Instructions.td 
b/lib/Target/R600/R600Instructions.td
index 50ff6aa..529a4ed 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -512,8 +512,8 @@ def INTERP_PAIR_ZW :  AMDGPUShaderInst 
   [];
 
 def CONST_ADDRESS: SDNodeAMDGPUISD::CONST_ADDRESS,
-  SDTypeProfile1, 1, [SDTCisInt0, SDTCisPtrTy1],
-  [SDNPMayLoad]
+  SDTypeProfile1, -1, [SDTCisInt0, SDTCisPtrTy1],
+  [SDNPMayLoad, SDNPVariadic]
 ;
 
 
//===--===//
@@ -1658,14 +1658,13 @@ def CONST_COPY : Instruction {
 } // end isCodeGenOnly = 1, isPseudo = 1, Namespace = AMDGPU
 
 def TEX_VTX_CONSTBUF :
-  InstR600ISA (outs R600_Reg128:$dst), (ins MEMxi:$ptr), VTX_READ_eg $dst, 
$ptr,
-  [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr))],
+  InstR600ISA (outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), 
VTX_READ_eg $dst, $ptr,
+  [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 
imm:$BUFFER_ID)))],
   VTX_WORD1_GPR, VTX_WORD0 {
 
   let VC_INST = 0;
   let FETCH_TYPE = 2;
   let FETCH_WHOLE_QUAD = 0;
-  let BUFFER_ID = 0;
   let SRC_REL = 0;
   let SRC_SEL_X = 0;
   let DST_REL = 0;
-- 
1.8.1.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] r600g/llvm: Add support for UBO

2013-02-12 Thread Vincent Lejeune
NOTE: This is a candidate for the Mesa stable branch.
---
 src/gallium/drivers/r600/r600_llvm.c| 10 +++---
 src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 17 +
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c   |  4 ++--
 3 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_llvm.c 
b/src/gallium/drivers/r600/r600_llvm.c
index fa66fcc..e8b4679 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -18,7 +18,7 @@
 
 #include stdio.h
 
-#if defined R600_USE_LLVM || defined HAVE_OPENCL
+//#if defined R600_USE_LLVM || defined HAVE_OPENCL
 
 #define CONSTANT_BUFFER_0_ADDR_SPACE 9
 #define CONSTANT_BUFFER_1_ADDR_SPACE (CONSTANT_BUFFER_0_ADDR_SPACE + 
R600_UCP_CONST_BUFFER)
@@ -38,8 +38,12 @@ static LLVMValueRef llvm_fetch_const(
LLVMValueRef index = 
LLVMBuildLoad(bld_base-base.gallivm-builder, 
bld-addr[reg-Indirect.Index][reg-Indirect.SwizzleX], );
offset[1] = LLVMBuildAdd(bld_base-base.gallivm-builder, 
offset[1], index, );
}
+   unsigned ConstantAddressSpace = CONSTANT_BUFFER_0_ADDR_SPACE ;
+   if (reg-Register.Dimension) {
+   ConstantAddressSpace += reg-Dimension.Index;
+   }
LLVMTypeRef const_ptr_type = 
LLVMPointerType(LLVMArrayType(LLVMVectorType(bld_base-base.elem_type, 4), 
1024),
-   
CONSTANT_BUFFER_0_ADDR_SPACE);
+   ConstantAddressSpace);
LLVMValueRef const_ptr = 
LLVMBuildIntToPtr(bld_base-base.gallivm-builder, 
lp_build_const_int32(bld_base-base.gallivm, 0), const_ptr_type, );
LLVMValueRef ptr = LLVMBuildGEP(bld_base-base.gallivm-builder, 
const_ptr, offset, 2, );
LLVMValueRef cvecval = LLVMBuildLoad(bld_base-base.gallivm-builder, 
ptr, );
@@ -602,4 +606,4 @@ unsigned r600_llvm_compile(
gpu_family, dump);
 }
 
-#endif
+//#endif
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 0f90991..8902ae4 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -766,6 +766,22 @@ static void emit_icmp(
emit_data-output[emit_data-chan] = v;
 }
 
+static void emit_ucmp(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   unsigned pred;
+   LLVMBuilderRef builder = bld_base-base.gallivm-builder;
+   LLVMContextRef context = bld_base-base.gallivm-context;
+
+
+   LLVMValueRef v = LLVMBuildFCmp(builder, LLVMRealUGE,
+   emit_data-args[0], 
lp_build_const_float(bld_base-base.gallivm, 0.), );
+
+   emit_data-output[emit_data-chan] = LLVMBuildSelect(builder, v, 
emit_data-args[2], emit_data-args[1], );
+}
+
 static void emit_cmp(
const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context * bld_base,
@@ -1241,6 +1257,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context 
* ctx)
bld_base-op_actions[TGSI_OPCODE_USNE].emit = emit_icmp;
bld_base-op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
bld_base-op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
+   bld_base-op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
 
bld_base-rsq_action.emit = build_tgsi_intrinsic_nomem;
bld_base-rsq_action.intr_name = llvm.AMDGPU.rsq;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index 62ba4b1..bbfe664 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -372,7 +372,7 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
 }
 
 ws-info.r600_virtual_address = FALSE;
-if (ws-info.drm_minor = 13) {
+/*if (ws-info.drm_minor = 13) {
 ws-info.r600_virtual_address = TRUE;
 if (!radeon_get_drm_value(ws-fd, RADEON_INFO_VA_START, NULL,
   ws-info.r600_va_start))
@@ -380,7 +380,7 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
 if (!radeon_get_drm_value(ws-fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL,
   ws-info.r600_ib_vm_max_size))
 ws-info.r600_virtual_address = FALSE;
-}
+}*/
 }
 
 /* Get max pipes, this is only needed for compute shaders.  All evergreen+
-- 
1.8.1.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] R600: Do not fold single instruction with more that 3 kcache read

2013-02-12 Thread Vincent Lejeune
It fixes around 100 tfb piglit tests and 16 glean tests.

NOTE: This is a candidate for the Mesa stable branch.
---
 lib/Target/R600/AMDILISelDAGToDAG.cpp  |  2 ++
 lib/Target/R600/R600LowerConstCopy.cpp |  2 +-
 test/CodeGen/R600/kcache-fold.ll   | 52 ++
 3 files changed, 55 insertions(+), 1 deletion(-)
 create mode 100644 test/CodeGen/R600/kcache-fold.ll

diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp 
b/lib/Target/R600/AMDILISelDAGToDAG.cpp
index 2f34fe3..858eb5d 100644
--- a/lib/Target/R600/AMDILISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp
@@ -358,6 +358,8 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
 SDValue Operand = Ops[OperandIdx[i] - 1];
 switch (Operand.getOpcode()) {
 case AMDGPUISD::CONST_ADDRESS: {
+  if (i == 2)
+break;
   SDValue CstOffset;
   if (!Operand.getValueType().isVector() 
   SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) {
diff --git a/lib/Target/R600/R600LowerConstCopy.cpp 
b/lib/Target/R600/R600LowerConstCopy.cpp
index 2557e8f..c8c27a8 100644
--- a/lib/Target/R600/R600LowerConstCopy.cpp
+++ b/lib/Target/R600/R600LowerConstCopy.cpp
@@ -180,7 +180,7 @@ bool 
R600LowerConstCopy::runOnMachineFunction(MachineFunction MF) {
 int ConstMovSel =
 TII-getOperandIdx(CstMov-getOpcode(), 
R600Operands::SRC0_SEL);
 unsigned ConstIndex = CstMov-getOperand(ConstMovSel).getImm();
-if (canFoldInBundle(CP, ConstIndex)) {
+if (MI-isInsideBundle()  canFoldInBundle(CP, ConstIndex)) {
   TII-setImmOperand(MI, OpTable[SrcOp][1], ConstIndex);
   MI-getOperand(SrcIdx).setReg(AMDGPU::ALU_CONST);
 } else {
diff --git a/test/CodeGen/R600/kcache-fold.ll b/test/CodeGen/R600/kcache-fold.ll
new file mode 100644
index 000..382f78c
--- /dev/null
+++ b/test/CodeGen/R600/kcache-fold.ll
@@ -0,0 +1,52 @@
+;RUN: llc  %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: MOV T{{[0-9]+\.[XYZW], CBuf0\[[0-9]+\]\.[XYZW]}}
+
+define void @main() {
+main_body:
+  %0 = load 4 x float addrspace(9)* null
+  %1 = extractelement 4 x float %0, i32 0
+  %2 = load 4 x float addrspace(9)* getelementptr ([1024 x 4 x float] 
addrspace(9)* null, i64 0, i32 1)
+  %3 = extractelement 4 x float %2, i32 0
+  %4 = load 4 x float addrspace(9)* getelementptr ([1024 x 4 x float] 
addrspace(9)* null, i64 0, i32 2)
+  %5 = extractelement 4 x float %4, i32 0
+  %6 = fcmp ult float %1, 0.00e+00
+  %7 = select i1 %6, float %3, float %5
+  %8 = load 4 x float addrspace(9)* null
+  %9 = extractelement 4 x float %8, i32 1
+  %10 = load 4 x float addrspace(9)* getelementptr ([1024 x 4 x float] 
addrspace(9)* null, i64 0, i32 1)
+  %11 = extractelement 4 x float %10, i32 1
+  %12 = load 4 x float addrspace(9)* getelementptr ([1024 x 4 x float] 
addrspace(9)* null, i64 0, i32 2)
+  %13 = extractelement 4 x float %12, i32 1
+  %14 = fcmp ult float %9, 0.00e+00
+  %15 = select i1 %14, float %11, float %13
+  %16 = load 4 x float addrspace(9)* null
+  %17 = extractelement 4 x float %16, i32 2
+  %18 = load 4 x float addrspace(9)* getelementptr ([1024 x 4 x float] 
addrspace(9)* null, i64 0, i32 1)
+  %19 = extractelement 4 x float %18, i32 2
+  %20 = load 4 x float addrspace(9)* getelementptr ([1024 x 4 x float] 
addrspace(9)* null, i64 0, i32 2)
+  %21 = extractelement 4 x float %20, i32 2
+  %22 = fcmp ult float %17, 0.00e+00
+  %23 = select i1 %22, float %19, float %21
+  %24 = load 4 x float addrspace(9)* null
+  %25 = extractelement 4 x float %24, i32 3
+  %26 = load 4 x float addrspace(9)* getelementptr ([1024 x 4 x float] 
addrspace(9)* null, i64 0, i32 1)
+  %27 = extractelement 4 x float %26, i32 3
+  %28 = load 4 x float addrspace(9)* getelementptr ([1024 x 4 x float] 
addrspace(9)* null, i64 0, i32 2)
+  %29 = extractelement 4 x float %28, i32 3
+  %30 = fcmp ult float %25, 0.00e+00
+  %31 = select i1 %30, float %27, float %29
+  %32 = call float @llvm.AMDIL.clamp.(float %7, float 0.00e+00, float 
1.00e+00)
+  %33 = call float @llvm.AMDIL.clamp.(float %15, float 0.00e+00, float 
1.00e+00)
+  %34 = call float @llvm.AMDIL.clamp.(float %23, float 0.00e+00, float 
1.00e+00)
+  %35 = call float @llvm.AMDIL.clamp.(float %31, float 0.00e+00, float 
1.00e+00)
+  %36 = insertelement 4 x float undef, float %32, i32 0
+  %37 = insertelement 4 x float %36, float %33, i32 1
+  %38 = insertelement 4 x float %37, float %34, i32 2
+  %39 = insertelement 4 x float %38, float %35, i32 3
+  call void @llvm.R600.store.swizzle(4 x float %39, i32 0, i32 0)
+  ret void
+}
+
+declare float @llvm.AMDIL.clamp.(float, float, float) readnone
+declare void @llvm.R600.store.swizzle(4 x float, i32, i32)
-- 
1.8.1.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] R600/SI: Turn BUILD_VECTOR into Reg_Sequence

2013-02-11 Thread Vincent Lejeune
---
 lib/Target/R600/AMDILISelDAGToDAG.cpp | 24 
 1 file changed, 24 insertions(+)

diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp 
b/lib/Target/R600/AMDILISelDAGToDAG.cpp
index b125ba8..2f34fe3 100644
--- a/lib/Target/R600/AMDILISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp
@@ -160,6 +160,30 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
   }
   switch (Opc) {
   default: break;
+  case ISD::BUILD_VECTOR: {
+// BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
+// that adds a 128 bits reg copy when going through TwoAddressInstructions
+// pass. We want to avoid 128 bits copies as much as possible because they
+// can't be bundled by our scheduler.
+SDValue RegSeqArgs[9] = {
+  CurDAG-getTargetConstant(AMDGPU::R600_Reg128RegClassID, MVT::i32),
+  SDValue(), CurDAG-getTargetConstant(AMDGPU::sub0, MVT::i32),
+  SDValue(), CurDAG-getTargetConstant(AMDGPU::sub1, MVT::i32),
+  SDValue(), CurDAG-getTargetConstant(AMDGPU::sub2, MVT::i32),
+  SDValue(), CurDAG-getTargetConstant(AMDGPU::sub3, MVT::i32)
+};
+bool IsRegSeq = true;
+for (unsigned i = 0; i  N-getNumOperands(); i++) {
+  if (dyn_castRegisterSDNode(N-getOperand(i))) {
+IsRegSeq = false;
+break;
+  }
+  RegSeqArgs[2 * i + 1] = N-getOperand(i);
+}
+if (!IsRegSeq)
+  break;
+return CurDAG-SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N-getVTList(), 
RegSeqArgs, 2 * N-getNumOperands() + 1);
+  }
   case ISD::ConstantFP:
   case ISD::Constant: {
 const AMDGPUSubtarget ST = TM.getSubtargetAMDGPUSubtarget();
-- 
1.8.1.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] R600: initial scheduler code

2013-02-11 Thread Vincent Lejeune
From: Vadim Girlin vadimgir...@gmail.com

This is a skeleton for a pre-RA MachineInstr scheduler strategy. Currently
it only tries to expose more parallelism for ALU instructions (this also
makes the distribution of GPR channels more uniform and increases the
chances of ALU instructions to be packed together in a single VLIW group).
Also it tries to reduce clause switching by grouping instruction of the
same kind (ALU/FETCH/CF) together.

Vincent Lejeune:
 - Support for VLIW4 Slot assignement
 - Recomputation of ScheduleDAG to get more parallelism opportunities
---
 lib/Target/R600/AMDGPUTargetMachine.cpp  |  17 +-
 lib/Target/R600/R600MachineScheduler.cpp | 452 +++
 lib/Target/R600/R600MachineScheduler.h   | 119 
 3 files changed, 587 insertions(+), 1 deletion(-)
 create mode 100644 lib/Target/R600/R600MachineScheduler.cpp
 create mode 100644 lib/Target/R600/R600MachineScheduler.h

diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp 
b/lib/Target/R600/AMDGPUTargetMachine.cpp
index 821e864..e6070cd 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -17,6 +17,7 @@
 #include AMDGPU.h
 #include R600ISelLowering.h
 #include R600InstrInfo.h
+#include R600MachineScheduler.h
 #include SIISelLowering.h
 #include SIInstrInfo.h
 #include llvm/Analysis/Passes.h
@@ -39,6 +40,14 @@ extern C void LLVMInitializeR600Target() {
   RegisterTargetMachineAMDGPUTargetMachine X(TheAMDGPUTarget);
 }
 
+static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
+  return new ScheduleDAGMI(C, new R600SchedStrategy());
+}
+
+static MachineSchedRegistry
+SchedCustomRegistry(r600, Run R600's custom scheduler,
+createR600MachineScheduler);
+
 AMDGPUTargetMachine::AMDGPUTargetMachine(const Target T, StringRef TT,
 StringRef CPU, StringRef FS,
   TargetOptions Options,
@@ -70,7 +79,13 @@ namespace {
 class AMDGPUPassConfig : public TargetPassConfig {
 public:
   AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase PM)
-: TargetPassConfig(TM, PM) {}
+: TargetPassConfig(TM, PM) {
+const AMDGPUSubtarget ST = TM-getSubtargetAMDGPUSubtarget();
+if (ST.device()-getGeneration() = AMDGPUDeviceInfo::HD6XXX) {
+  enablePass(MachineSchedulerID);
+  MachineSchedRegistry::setDefault(createR600MachineScheduler);
+}
+  }
 
   AMDGPUTargetMachine getAMDGPUTargetMachine() const {
 return getTMAMDGPUTargetMachine();
diff --git a/lib/Target/R600/R600MachineScheduler.cpp 
b/lib/Target/R600/R600MachineScheduler.cpp
new file mode 100644
index 000..229374c
--- /dev/null
+++ b/lib/Target/R600/R600MachineScheduler.cpp
@@ -0,0 +1,452 @@
+//===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ 
-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+//
+/// \file
+/// \brief R600 Machine Scheduler interface
+// TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS 
slot
+//
+//===--===//
+
+#define DEBUG_TYPE misched
+
+#include R600MachineScheduler.h
+#include llvm/CodeGen/MachineRegisterInfo.h
+#include llvm/CodeGen/LiveIntervalAnalysis.h
+#include llvm/Pass.h
+#include llvm/PassManager.h
+#include set
+#include iostream
+using namespace llvm;
+
+/// \brief Recompute Output and Anti dependencies of incoming dag
+/// ScheduleDAGInstrs has a conservative policy about subregisters 
dependencies.
+/// All subreg write of a same superreg will be chained by Output/Anti deps.
+/// These artificial deps delay releases of MI and thus reduce parallelism
+/// oportunities. This function recompute the ScheduleDag to produce proper
+/// subreg aware dependencies.
+// Todo : It should also recompute Data dependencies
+static
+void RecomputeScheduleDAGMI(ScheduleDAGMI *dag) {
+
+  // Remove all Output/Anti deps
+  for (unsigned i = 0; i  dag-SUnits.size(); ++i) {
+SUnit SU = dag-SUnits[i];
+for (SUnit::pred_iterator SUIt = SU.Preds.begin(), SUE = SU.Preds.end(); 
+SUIt != SUE; ++SUIt) {
+  SDep SD = *SUIt;
+  SUnit *SUPred = SD.getSUnit();
+  if (SD.getKind() == SDep::Output) {
+SUPred-removePred(SD);
+  }
+}
+  }
+
+// Now recompute output/anti dependencies
+  for (unsigned i = 0; i  dag-SUnits.size(); ++i) {
+SUnit SU = dag-SUnits[i];
+MachineOperand DestMO = SU.getInstr()-getOperand(0);
+unsigned DestReg = SU.getInstr()-getOperand(0).getReg();
+DEBUG(dbgs()  Recomputing deps for ; SU.dump(dag); dbgs()  \n;);
+// Using LiveInterval should make things a lot more efficient, but we
+// can't access them inside a MachineSchedStrategy.
+// Scheduling occurs on a per MBB basis, so it is sufficient to get deps

Re: [Mesa-dev] [PATCH] R600: Fix regression with shadow array sampler on pre-SI GPUs.

2013-02-11 Thread Vincent Lejeune




- Mail original -
 De : Michel Dänzer mic...@daenzer.net
 À : Vincent Lejeune v...@ovi.com
 Cc : mesa-dev@lists.freedesktop.org
 Envoyé le : Lundi 11 février 2013 17h53
 Objet : [PATCH] R600: Fix regression with shadow array sampler on pre-SI GPUs.
 
 From: Michel Dänzer michel.daen...@amd.com
 
 'R600/SI: Use proper instructions for array/shadow samplers.' removed 
 two
 cases from TEX_SHADOW. Vincent Lejeune reported on IRC that this broke some
 shadow array piglit tests with the r600g driver. Reinstating the removed
 cases should fix this, and still works with radeonsi as well.
 
 Signed-off-by: Michel Dänzer michel.daen...@amd.com
 ---
 
 Vincent, can you confirm this fixes the regression?

It does, thank !

 
 lib/Target/R600/R600Instructions.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/lib/Target/R600/R600Instructions.td 
 b/lib/Target/R600/R600Instructions.td
 index d307ed2..1069570 100644
 --- a/lib/Target/R600/R600Instructions.td
 +++ b/lib/Target/R600/R600Instructions.td
 @@ -399,7 +399,7 @@ class R600_TEX bits11 inst, string opName, 
 listdag pattern,
 def TEX_SHADOW : PatLeaf
    (imm),
    [{uint32_t TType = (uint32_t)N-getZExtValue();
 -    return (TType = 6  TType = 8) || TType == 13;
 +    return (TType = 6  TType = 8) || (TType = 11 
  TType = 13);
    }]
 ;
 
 -- 
 1.8.1.3
 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] R600/SI: Use MULADD_IEEE/V_MAD_F32 instruction for mad pattern

2013-02-10 Thread Vincent Lejeune
---
 lib/Target/R600/AMDGPUISelLowering.cpp | 10 +++---
 lib/Target/R600/AMDGPUISelLowering.h   |  1 -
 lib/Target/R600/AMDILISelLowering.cpp  |  3 ++-
 lib/Target/R600/AMDILInstrInfo.td  |  1 -
 lib/Target/R600/AMDILIntrinsics.td | 10 --
 lib/Target/R600/R600Instructions.td|  9 -
 lib/Target/R600/SIInstructions.td  |  4 ++--
 test/CodeGen/R600/fmad.ll  | 19 +++
 8 files changed, 34 insertions(+), 23 deletions(-)
 create mode 100644 test/CodeGen/R600/fmad.ll

diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp 
b/lib/Target/R600/AMDGPUISelLowering.cpp
index d0d23d6..0a33264 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -127,9 +127,6 @@ SDValue 
AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   return LowerIntrinsicLRP(Op, DAG);
 case AMDGPUIntrinsic::AMDIL_fraction:
   return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
-case AMDGPUIntrinsic::AMDIL_mad:
-  return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
-  Op.getOperand(2), Op.getOperand(3));
 case AMDGPUIntrinsic::AMDIL_max:
   return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
   Op.getOperand(2));
@@ -176,9 +173,9 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
 Op.getOperand(1));
   SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
 Op.getOperand(3));
-  return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
-   Op.getOperand(2),
-   OneSubAC);
+  return DAG.getNode(ISD::FADD, DL, VT,
+  DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
+  OneSubAC);
 }
 
 /// \brief Generate Min/Max node
@@ -393,7 +390,6 @@ const char* 
AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch (Opcode) {
   default: return 0;
   // AMDIL DAG nodes
-  NODE_NAME_CASE(MAD);
   NODE_NAME_CASE(CALL);
   NODE_NAME_CASE(UMUL);
   NODE_NAME_CASE(DIV_INF);
diff --git a/lib/Target/R600/AMDGPUISelLowering.h 
b/lib/Target/R600/AMDGPUISelLowering.h
index 4b844a3..f27b5db 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -108,7 +108,6 @@ namespace AMDGPUISD {
 enum {
   // AMDIL ISD Opcodes
   FIRST_NUMBER = ISD::BUILTIN_OP_END,
-  MAD, // 32bit Fused Multiply Add instruction
   CALL,// Function call based on a single integer
   UMUL,// 32bit unsigned multiplication
   DIV_INF,  // Divide with infinity returned on zero divisor
diff --git a/lib/Target/R600/AMDILISelLowering.cpp 
b/lib/Target/R600/AMDILISelLowering.cpp
index 2e60adc..3480ac8 100644
--- a/lib/Target/R600/AMDILISelLowering.cpp
+++ b/lib/Target/R600/AMDILISelLowering.cpp
@@ -451,7 +451,8 @@ AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG 
DAG) const {
   SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
 
   // float fr = mad(fqneg, fb, fa);
-  SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa);
+  SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
+  DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
 
   // int iq = (int)fq;
   SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
diff --git a/lib/Target/R600/AMDILInstrInfo.td 
b/lib/Target/R600/AMDILInstrInfo.td
index e969bbf..110f147 100644
--- a/lib/Target/R600/AMDILInstrInfo.td
+++ b/lib/Target/R600/AMDILInstrInfo.td
@@ -116,7 +116,6 @@ def IL_retflag   : SDNodeAMDGPUISD::RET_FLAG, 
SDTNone,
 //======//
 // Floating point math functions
 def IL_div_inf  : SDNodeAMDGPUISD::DIV_INF, SDTIL_GenBinaryOp;
-def IL_mad  : SDNodeAMDGPUISD::MAD, SDTIL_GenTernaryOp;
 
 
//===--===//
 // Integer functions
diff --git a/lib/Target/R600/AMDILIntrinsics.td 
b/lib/Target/R600/AMDILIntrinsics.td
index 3f9e20f..6ec3559 100644
--- a/lib/Target/R600/AMDILIntrinsics.td
+++ b/lib/Target/R600/AMDILIntrinsics.td
@@ -92,12 +92,6 @@ let TargetPrefix = AMDIL, isTarget = 1 in {
   TernaryIntInt;
   def int_AMDIL_bfm : GCCBuiltin__amdil_bfm,
   BinaryIntInt;
-  def int_AMDIL_mad_i32 : GCCBuiltin__amdil_imad,
-  TernaryIntInt;
-  def int_AMDIL_mad_u32 : GCCBuiltin__amdil_umad,
-  TernaryIntInt;
-  def int_AMDIL_mad : GCCBuiltin__amdil_mad,
-  TernaryIntFloat;
   def int_AMDIL_mulhi_i32 : GCCBuiltin__amdil_imul_high,
   BinaryIntInt;
   def int_AMDIL_mulhi_u32 : GCCBuiltin__amdil_umul_high,
@@ -110,10 +104,6 @@ let TargetPrefix = AMDIL, isTarget = 1 in {
   BinaryIntInt;
   def int_AMDIL_mulhi24_u32 : GCCBuiltin__amdil_umul24_high,
   BinaryIntInt;
-  def int_AMDIL_mad24_i32 

[Mesa-dev] [PATCH] R600: Use MULADD_IEEE instruction for mad pattern

2013-02-07 Thread Vincent Lejeune
---
 lib/Target/R600/AMDGPUISelLowering.cpp | 10 +++---
 lib/Target/R600/AMDGPUISelLowering.h   |  1 -
 lib/Target/R600/AMDILISelLowering.cpp  |  3 ++-
 lib/Target/R600/AMDILInstrInfo.td  |  1 -
 lib/Target/R600/AMDILIntrinsics.td | 10 --
 lib/Target/R600/R600Instructions.td|  9 -
 lib/Target/R600/SIInstructions.td  |  2 +-
 test/CodeGen/R600/fmad.ll  | 19 +++
 8 files changed, 33 insertions(+), 22 deletions(-)
 create mode 100644 test/CodeGen/R600/fmad.ll

diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp 
b/lib/Target/R600/AMDGPUISelLowering.cpp
index f3a047a..530da5a 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -127,9 +127,6 @@ SDValue 
AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   return LowerIntrinsicLRP(Op, DAG);
 case AMDGPUIntrinsic::AMDIL_fraction:
   return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
-case AMDGPUIntrinsic::AMDIL_mad:
-  return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
-  Op.getOperand(2), Op.getOperand(3));
 case AMDGPUIntrinsic::AMDIL_max:
   return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
   Op.getOperand(2));
@@ -176,9 +173,9 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
 Op.getOperand(1));
   SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
 Op.getOperand(3));
-  return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
-   Op.getOperand(2),
-   OneSubAC);
+  return DAG.getNode(ISD::FADD, DL, VT,
+  DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
+  OneSubAC);
 }
 
 /// \brief Generate Min/Max node
@@ -393,7 +390,6 @@ const char* 
AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch (Opcode) {
   default: return 0;
   // AMDIL DAG nodes
-  NODE_NAME_CASE(MAD);
   NODE_NAME_CASE(CALL);
   NODE_NAME_CASE(UMUL);
   NODE_NAME_CASE(DIV_INF);
diff --git a/lib/Target/R600/AMDGPUISelLowering.h 
b/lib/Target/R600/AMDGPUISelLowering.h
index 0584d39..e4d77e3 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -103,7 +103,6 @@ namespace AMDGPUISD {
 enum {
   // AMDIL ISD Opcodes
   FIRST_NUMBER = ISD::BUILTIN_OP_END,
-  MAD, // 32bit Fused Multiply Add instruction
   CALL,// Function call based on a single integer
   UMUL,// 32bit unsigned multiplication
   DIV_INF,  // Divide with infinity returned on zero divisor
diff --git a/lib/Target/R600/AMDILISelLowering.cpp 
b/lib/Target/R600/AMDILISelLowering.cpp
index 8bfd30c..1dd0270 100644
--- a/lib/Target/R600/AMDILISelLowering.cpp
+++ b/lib/Target/R600/AMDILISelLowering.cpp
@@ -451,7 +451,8 @@ AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG 
DAG) const {
   SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
 
   // float fr = mad(fqneg, fb, fa);
-  SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa);
+  SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
+  DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
 
   // int iq = (int)fq;
   SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
diff --git a/lib/Target/R600/AMDILInstrInfo.td 
b/lib/Target/R600/AMDILInstrInfo.td
index e969bbf..110f147 100644
--- a/lib/Target/R600/AMDILInstrInfo.td
+++ b/lib/Target/R600/AMDILInstrInfo.td
@@ -116,7 +116,6 @@ def IL_retflag   : SDNodeAMDGPUISD::RET_FLAG, 
SDTNone,
 //======//
 // Floating point math functions
 def IL_div_inf  : SDNodeAMDGPUISD::DIV_INF, SDTIL_GenBinaryOp;
-def IL_mad  : SDNodeAMDGPUISD::MAD, SDTIL_GenTernaryOp;
 
 
//===--===//
 // Integer functions
diff --git a/lib/Target/R600/AMDILIntrinsics.td 
b/lib/Target/R600/AMDILIntrinsics.td
index 3f9e20f..6ec3559 100644
--- a/lib/Target/R600/AMDILIntrinsics.td
+++ b/lib/Target/R600/AMDILIntrinsics.td
@@ -92,12 +92,6 @@ let TargetPrefix = AMDIL, isTarget = 1 in {
   TernaryIntInt;
   def int_AMDIL_bfm : GCCBuiltin__amdil_bfm,
   BinaryIntInt;
-  def int_AMDIL_mad_i32 : GCCBuiltin__amdil_imad,
-  TernaryIntInt;
-  def int_AMDIL_mad_u32 : GCCBuiltin__amdil_umad,
-  TernaryIntInt;
-  def int_AMDIL_mad : GCCBuiltin__amdil_mad,
-  TernaryIntFloat;
   def int_AMDIL_mulhi_i32 : GCCBuiltin__amdil_imul_high,
   BinaryIntInt;
   def int_AMDIL_mulhi_u32 : GCCBuiltin__amdil_umul_high,
@@ -110,10 +104,6 @@ let TargetPrefix = AMDIL, isTarget = 1 in {
   BinaryIntInt;
   def int_AMDIL_mulhi24_u32 : GCCBuiltin__amdil_umul24_high,
   BinaryIntInt;
-  def int_AMDIL_mad24_i32 : 

[Mesa-dev] [PATCH] R600: Do not fold modifier/litterals in vector inst

2013-02-06 Thread Vincent Lejeune
This fixes a couple of regressions on (probably not just) cayman
---
 lib/Target/R600/AMDILISelDAGToDAG.cpp | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp 
b/lib/Target/R600/AMDILISelDAGToDAG.cpp
index 84223f6..7fc3a2f 100644
--- a/lib/Target/R600/AMDILISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp
@@ -229,7 +229,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
 continue;
   }
   } else {
-if (!TII-isALUInstr(Use-getMachineOpcode())) {
+if (!TII-isALUInstr(Use-getMachineOpcode()) ||
+(TII-get(Use-getMachineOpcode()).TSFlags 
+R600_InstFlag::VECTOR)) {
   continue;
 }
 
@@ -272,7 +274,8 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
   if (ST.device()-getGeneration() = AMDGPUDeviceInfo::HD6XXX) {
 const R600InstrInfo *TII =
 static_castconst R600InstrInfo*(TM.getInstrInfo());
-if (Result  Result-isMachineOpcode()
+if (Result  Result-isMachineOpcode() 
+!(TII-get(Result-getMachineOpcode()).TSFlags  R600_InstFlag::VECTOR)
  TII-isALUInstr(Result-getMachineOpcode())) {
   // Fold FNEG/FABS/CONST_ADDRESS
   // TODO: Isel can generate multiple MachineInst, we need to recursively
-- 
1.8.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] R600: Use MULADD_IEEE instruction for mad pattern

2013-02-03 Thread Vincent Lejeune
---
 lib/Target/R600/AMDGPUISelLowering.cpp | 6 +++---
 lib/Target/R600/AMDILISelLowering.cpp  | 3 ++-
 lib/Target/R600/R600Instructions.td| 8 
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp 
b/lib/Target/R600/AMDGPUISelLowering.cpp
index f3a047a..40c2f5f 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -176,9 +176,9 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
 Op.getOperand(1));
   SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
 Op.getOperand(3));
-  return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
-   Op.getOperand(2),
-   OneSubAC);
+  return DAG.getNode(ISD::FADD, DL, VT,
+  DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
+  OneSubAC);
 }
 
 /// \brief Generate Min/Max node
diff --git a/lib/Target/R600/AMDILISelLowering.cpp 
b/lib/Target/R600/AMDILISelLowering.cpp
index 8bfd30c..1dd0270 100644
--- a/lib/Target/R600/AMDILISelLowering.cpp
+++ b/lib/Target/R600/AMDILISelLowering.cpp
@@ -451,7 +451,8 @@ AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG 
DAG) const {
   SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
 
   // float fr = mad(fqneg, fb, fa);
-  SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa);
+  SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
+  DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
 
   // int iq = (int)fq;
   SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
diff --git a/lib/Target/R600/R600Instructions.td 
b/lib/Target/R600/R600Instructions.td
index bcbb5a1..d3cee56 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -916,6 +916,12 @@ class MULADD_Common bits5 inst : R600_3OP 
(IL_mad R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))]
 ;
 
+class MULADD_IEEE_Common bits5 inst : R600_3OP 
+  inst, MULADD_IEEE,
+  [(set (f32 R600_Reg32:$dst),
+   (fadd (fmul R600_Reg32:$src0, R600_Reg32:$src1), R600_Reg32:$src2))]
+;
+
 class CNDE_Common bits5 inst : R600_3OP 
   inst, CNDE,
   [(set R600_Reg32:$dst,
@@ -1070,6 +1076,7 @@ let Predicates = [isR600] in {
 
   def MUL_LIT_r600 : MUL_LIT_Common0x0C;
   def MULADD_r600 : MULADD_Common0x10;
+  def MULADD_IEEE_r600 : MULADD_IEEE_Common0x14;
   def CNDE_r600 : CNDE_Common0x18;
   def CNDGT_r600 : CNDGT_Common0x19;
   def CNDGE_r600 : CNDGE_Common0x1A;
@@ -1209,6 +1216,7 @@ let Predicates = [isEGorCayman] in {
   ;
 
   def MULADD_eg : MULADD_Common0x14;
+  def MULADD_IEEE_eg : MULADD_IEEE_Common0x18;
   def ASHR_eg : ASHR_Common0x15;
   def LSHR_eg : LSHR_Common0x16;
   def LSHL_eg : LSHL_Common0x17;
-- 
1.8.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] R600: Do not fold modifier/litterals in vector inst

2013-02-03 Thread Vincent Lejeune
This fixes a couple of regressions on (probably not just) cayman
---
 lib/Target/R600/AMDILISelDAGToDAG.cpp | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp 
b/lib/Target/R600/AMDILISelDAGToDAG.cpp
index 84223f6..7fc3a2f 100644
--- a/lib/Target/R600/AMDILISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp
@@ -229,7 +229,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
 continue;
   }
   } else {
-if (!TII-isALUInstr(Use-getMachineOpcode())) {
+if (!TII-isALUInstr(Use-getMachineOpcode()) ||
+(TII-get(Use-getMachineOpcode()).TSFlags 
+R600_InstFlag::VECTOR)) {
   continue;
 }
 
@@ -272,7 +274,8 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
   if (ST.device()-getGeneration() = AMDGPUDeviceInfo::HD6XXX) {
 const R600InstrInfo *TII =
 static_castconst R600InstrInfo*(TM.getInstrInfo());
-if (Result  Result-isMachineOpcode()
+if (Result  Result-isMachineOpcode() 
+!(TII-get(Result-getMachineOpcode()).TSFlags  R600_InstFlag::VECTOR)
  TII-isALUInstr(Result-getMachineOpcode())) {
   // Fold FNEG/FABS/CONST_ADDRESS
   // TODO: Isel can generate multiple MachineInst, we need to recursively
-- 
1.8.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] R600: Fold zero/one in export instructions

2013-02-03 Thread Vincent Lejeune
---
 lib/Target/R600/R600ISelLowering.cpp | 111 ---
 lib/Target/R600/R600Instructions.td  |  20 ++-
 lib/Target/R600/R600Intrinsics.td|   3 -
 3 files changed, 55 insertions(+), 79 deletions(-)

diff --git a/lib/Target/R600/R600ISelLowering.cpp 
b/lib/Target/R600/R600ISelLowering.cpp
index 4dc6729..f796738 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -279,57 +279,6 @@ MachineBasicBlock * 
R600TargetLowering::EmitInstrWithCustomInserter(
 using namespace llvm::Intrinsic;
 using namespace llvm::AMDGPUIntrinsic;
 
-static SDValue
-InsertScalarToRegisterExport(SelectionDAG DAG, DebugLoc DL, SDNode 
**ExportMap,
-unsigned Slot, unsigned Channel, unsigned Inst, unsigned Type,
-SDValue Scalar, SDValue Chain) {
-  if (!ExportMap[Slot]) {
-SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
-  DL, MVT::v4f32,
-  DAG.getUNDEF(MVT::v4f32),
-  Scalar,
-  DAG.getConstant(Channel, MVT::i32));
-
-unsigned Mask = 1  Channel;
-
-const SDValue Ops[] = {Chain, Vector, DAG.getConstant(Inst, MVT::i32),
-DAG.getConstant(Type, MVT::i32), DAG.getConstant(Slot, MVT::i32),
-DAG.getConstant(Mask, MVT::i32)};
-
-SDValue Res =  DAG.getNode(
-AMDGPUISD::EXPORT,
-DL,
-MVT::Other,
-Ops, 6);
- ExportMap[Slot] = Res.getNode();
- return Res;
-  }
-
-  SDNode *ExportInstruction = (SDNode *) ExportMap[Slot] ;
-  SDValue PreviousVector = ExportInstruction-getOperand(1);
-  SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
-  DL, MVT::v4f32,
-  PreviousVector,
-  Scalar,
-  DAG.getConstant(Channel, MVT::i32));
-
-  unsigned Mask = dyn_castConstantSDNode(ExportInstruction-getOperand(5))
-  -getZExtValue();
-  Mask |= (1  Channel);
-
-  const SDValue Ops[] = {ExportInstruction-getOperand(0), Vector,
-  DAG.getConstant(Inst, MVT::i32),
-  DAG.getConstant(Type, MVT::i32),
-  DAG.getConstant(Slot, MVT::i32),
-  DAG.getConstant(Mask, MVT::i32)};
-
-  DAG.UpdateNodeOperands(ExportInstruction,
-  Ops, 6);
-
-  return Chain;
-
-}
-
 SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG DAG) 
const {
   switch (Op.getOpcode()) {
   default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
@@ -356,16 +305,19 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, 
SelectionDAG DAG) const
   }
   return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
 }
-case AMDGPUIntrinsic::R600_store_pixel_color: {
-  MachineFunction MF = DAG.getMachineFunction();
-  R600MachineFunctionInfo *MFI = MF.getInfoR600MachineFunctionInfo();
-  int64_t RegIndex = 
castConstantSDNode(Op.getOperand(3))-getZExtValue();
-
-  SDNode **OutputsMap = MFI-Outputs;
-  return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap,
-  RegIndex / 4, RegIndex % 4, 0, 0, Op.getOperand(2),
-  Chain);
-
+case AMDGPUIntrinsic::R600_store_swizzle: {
+  const SDValue Args[8] = {
+Chain,
+Op.getOperand(2), // Export Value
+Op.getOperand(3), // ArrayBase
+Op.getOperand(4), // Type
+DAG.getConstant(0, MVT::i32), // SWZ_X
+DAG.getConstant(1, MVT::i32), // SWZ_Y
+DAG.getConstant(2, MVT::i32), // SWZ_Z
+DAG.getConstant(3, MVT::i32) // SWZ_W
+  };
+  return DAG.getNode(AMDGPUISD::EXPORT, Op.getDebugLoc(), 
Op.getValueType(),
+  Args, 8);
 }
 
 // default for switch(IntrinsicID)
@@ -962,6 +914,43 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
   }
 }
   }
+  case AMDGPUISD::EXPORT: {
+SDValue Arg = N-getOperand(1);
+if (Arg.getOpcode() != ISD::BUILD_VECTOR)
+  break;
+SDValue NewBldVec[4] = {
+DAG.getUNDEF(MVT::f32),
+DAG.getUNDEF(MVT::f32),
+DAG.getUNDEF(MVT::f32),
+DAG.getUNDEF(MVT::f32)
+  };
+SDValue NewArgs[8] = {
+  N-getOperand(0), // Chain
+  SDValue(),
+  N-getOperand(2), // ArrayBase
+  N-getOperand(3), // Type
+  N-getOperand(4), // SWZ_X
+  N-getOperand(5), // SWZ_Y
+  N-getOperand(6), // SWZ_Z
+  N-getOperand(7) // SWZ_W
+};
+for (unsigned i = 0; i  Arg.getNumOperands(); i++) {
+  if (ConstantFPSDNode *C = dyn_castConstantFPSDNode(Arg.getOperand(i))) 
{
+if (C-isZero()) {
+  NewArgs[4 + i] = DAG.getConstant(4, MVT::i32); // SEL_0
+} else if (C-isExactlyValue(1.0)) {
+  NewArgs[4 + i] = DAG.getConstant(5, MVT::i32); // SEL_0
+} else {
+  NewBldVec[i] = Arg.getOperand(i);
+}
+  } else {
+NewBldVec[i] = Arg.getOperand(i);
+  }
+}
+DebugLoc DL = N-getDebugLoc();
+NewArgs[1] = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4f32, NewBldVec, 4);
+return DAG.getNode(AMDGPUISD::EXPORT, DL, N-getVTList(), NewArgs, 8);
+  }
   }
   return SDValue();
 }
diff --git 

[Mesa-dev] [PATCH 4/4] R600: Export instructions are no longer terminator

2013-02-03 Thread Vincent Lejeune
This allows MachineInstScheduler to reorder them, and thus make scheduling more
efficient.
---
 lib/Target/R600/R600Instructions.td | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/Target/R600/R600Instructions.td 
b/lib/Target/R600/R600Instructions.td
index 3c043aa..82a63df 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -625,7 +625,7 @@ multiclass SteamOutputExportPatternInstruction ExportInst,
   4095, imm:$mask, buf3inst, 0);
 }
 
-let isTerminator = 1, usesCustomInserter = 1 in {
+let usesCustomInserter = 1 in {
 
 class ExportSwzInst : InstR600ISA(
 outs),
@@ -639,7 +639,7 @@ class ExportSwzInst : InstR600ISA(
   let Inst{63-32} = Word1;
 }
 
-} // End isTerminator = 1, usesCustomInserter = 1
+} // End usesCustomInserter = 1
 
 class ExportBufInst : InstR600ISA(
 outs),
-- 
1.8.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] R600: Make store_dummy intrinsic more general by passing export type

2013-01-25 Thread Vincent Lejeune
---
 lib/Target/R600/R600Instructions.td | 9 +++--
 lib/Target/R600/R600Intrinsics.td   | 4 ++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/lib/Target/R600/R600Instructions.td 
b/lib/Target/R600/R600Instructions.td
index 13293b6..3537906 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -608,9 +608,14 @@ multiclass ExportPatternInstruction ExportInst, bits8 
cf_inst {
 0, 61, 7, 0, 7, 7, cf_inst, 0)
   ;
 
-  def : Pat(int_R600_store_pixel_dummy),
+  def : Pat(int_R600_store_dummy (i32 imm:$type)),
 (ExportInst
-(v4f32 (IMPLICIT_DEF)), 0, 0, 7, 7, 7, 7, cf_inst, 0)
+(v4f32 (IMPLICIT_DEF)), imm:$type, 0, 7, 7, 7, 7, cf_inst, 0)
+  ;
+
+  def : Pat(int_R600_store_dummy 1),
+(ExportInst
+(v4f32 (IMPLICIT_DEF)), 1, 60, 7, 7, 7, 7, cf_inst, 0)
   ;
 
   def : Pat(EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type),
diff --git a/lib/Target/R600/R600Intrinsics.td 
b/lib/Target/R600/R600Intrinsics.td
index 4c652a6..b5e4f1e 100644
--- a/lib/Target/R600/R600Intrinsics.td
+++ b/lib/Target/R600/R600Intrinsics.td
@@ -24,6 +24,6 @@ let TargetPrefix = R600, isTarget = 1 in {
   Intrinsic[], [llvm_float_ty], [];
   def int_R600_store_pixel_stencil :
   Intrinsic[], [llvm_float_ty], [];
-  def int_R600_store_pixel_dummy :
-  Intrinsic[], [], [];
+  def int_R600_store_dummy :
+  Intrinsic[], [llvm_i32_ty], [];
 }
-- 
1.8.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] r600g/llvm: Add dummy export for vs output

2013-01-25 Thread Vincent Lejeune
Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=59588
---
 src/gallium/drivers/r600/r600_llvm.c | 22 --
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_llvm.c 
b/src/gallium/drivers/r600/r600_llvm.c
index 32b8e56..913dccc 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -374,9 +374,27 @@ static void llvm_emit_epilogue(struct 
lp_build_tgsi_context * bld_base)
}
}
}
+   // Add dummy exports
+   if (ctx-type == TGSI_PROCESSOR_VERTEX) {
+   if (!next_param) {
+   lp_build_intrinsic_unary(base-gallivm-builder, 
llvm.R600.store.dummy,
+   LLVMVoidTypeInContext(base-gallivm-context),
+   lp_build_const_int32(base-gallivm, 
V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM));
+   }
+   if (!(next_pos-60)) {
+   lp_build_intrinsic_unary(base-gallivm-builder, 
llvm.R600.store.dummy,
+   LLVMVoidTypeInContext(base-gallivm-context),
+   lp_build_const_int32(base-gallivm, 
V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS));
+   }
+   }
+   if (ctx-type == TGSI_PROCESSOR_FRAGMENT) {
+   if (!has_color) {
+   lp_build_intrinsic_unary(base-gallivm-builder, 
llvm.R600.store.dummy,
+   LLVMVoidTypeInContext(base-gallivm-context),
+   lp_build_const_int32(base-gallivm, 
V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL));
+   }
+   }
 
-   if (!has_color  ctx-type == TGSI_PROCESSOR_FRAGMENT)
-   lp_build_intrinsic(base-gallivm-builder, 
llvm.R600.store.pixel.dummy, LLVMVoidTypeInContext(base-gallivm-context), 
0, 0);
 }
 
 static void llvm_emit_tex(
-- 
1.8.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] R600: Fold remaining CONST_COPY after expand pseudo inst

2013-01-25 Thread Vincent Lejeune
---
 lib/Target/R600/AMDGPUTargetMachine.cpp |   2 +-
 lib/Target/R600/R600LowerConstCopy.cpp  | 170 +---
 2 files changed, 160 insertions(+), 12 deletions(-)

diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp 
b/lib/Target/R600/AMDGPUTargetMachine.cpp
index 7b069e7..2185be3 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -136,8 +136,8 @@ bool AMDGPUPassConfig::addPreEmitPass() {
 addPass(createAMDGPUCFGPreparationPass(*TM));
 addPass(createAMDGPUCFGStructurizerPass(*TM));
 addPass(createR600ExpandSpecialInstrsPass(*TM));
-addPass(createR600LowerConstCopy(*TM));
 addPass(FinalizeMachineBundlesID);
+addPass(createR600LowerConstCopy(*TM));
   } else {
 addPass(createSILowerLiteralConstantsPass(*TM));
 addPass(createSILowerControlFlowPass(*TM));
diff --git a/lib/Target/R600/R600LowerConstCopy.cpp 
b/lib/Target/R600/R600LowerConstCopy.cpp
index d14ae20..2557e8f 100644
--- a/lib/Target/R600/R600LowerConstCopy.cpp
+++ b/lib/Target/R600/R600LowerConstCopy.cpp
@@ -13,7 +13,6 @@
 /// fold them inside vector instruction, like DOT4 or Cube ; ISel emits
 /// ConstCopy instead. This pass (executed after ExpandingSpecialInstr) will 
try
 /// to fold them if possible or replace them by MOV otherwise.
-/// TODO : Implement the folding part, using Copy Propagation algorithm.
 //
 
//===--===//
 
@@ -30,6 +29,13 @@ class R600LowerConstCopy : public MachineFunctionPass {
 private:
   static char ID;
   const R600InstrInfo *TII;
+
+  struct ConstPairs {
+unsigned XYPair;
+unsigned ZWPair;
+  };
+
+  bool canFoldInBundle(ConstPairs UsedConst, unsigned ReadConst) const;
 public:
   R600LowerConstCopy(TargetMachine tm);
   virtual bool runOnMachineFunction(MachineFunction MF);
@@ -39,27 +45,169 @@ public:
 
 char R600LowerConstCopy::ID = 0;
 
-
 R600LowerConstCopy::R600LowerConstCopy(TargetMachine tm) :
 MachineFunctionPass(ID),
 TII (static_castconst R600InstrInfo *(tm.getInstrInfo()))
 {
 }
 
+bool R600LowerConstCopy::canFoldInBundle(ConstPairs UsedConst,
+unsigned ReadConst) const {
+  unsigned ReadConstChan = ReadConst  3;
+  unsigned ReadConstIndex = ReadConst  (~3);
+  if (ReadConstChan  2) {
+if (!UsedConst.XYPair) {
+  UsedConst.XYPair = ReadConstIndex;
+}
+return UsedConst.XYPair == ReadConstIndex;
+  } else {
+if (!UsedConst.ZWPair) {
+  UsedConst.ZWPair = ReadConstIndex;
+}
+return UsedConst.ZWPair == ReadConstIndex;
+  }
+}
+
+static bool isControlFlow(const MachineInstr MI) {
+  return (MI.getOpcode() == AMDGPU::IF_PREDICATE_SET) ||
+  (MI.getOpcode() == AMDGPU::ENDIF) ||
+  (MI.getOpcode() == AMDGPU::ELSE) ||
+  (MI.getOpcode() == AMDGPU::WHILELOOP) ||
+  (MI.getOpcode() == AMDGPU::BREAK);
+}
+
 bool R600LowerConstCopy::runOnMachineFunction(MachineFunction MF) {
+
   for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
   BB != BB_E; ++BB) {
 MachineBasicBlock MBB = *BB;
-for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
-  I != E;) {
-  MachineInstr MI = *I;
-  I = llvm::next(I);
-  if (MI.getOpcode() != AMDGPU::CONST_COPY)
+DenseMapunsigned, MachineInstr * RegToConstIndex;
+for (MachineBasicBlock::instr_iterator I = MBB.instr_begin(),
+E = MBB.instr_end(); I != E;) {
+
+  if (I-getOpcode() == AMDGPU::CONST_COPY) {
+MachineInstr MI = *I;
+I = llvm::next(I);
+unsigned DstReg = MI.getOperand(0).getReg();
+DenseMapunsigned, MachineInstr *::iterator SrcMI =
+RegToConstIndex.find(DstReg);
+if (SrcMI != RegToConstIndex.end()) {
+  SrcMI-second-eraseFromParent();
+  RegToConstIndex.erase(SrcMI);
+}
+MachineInstr *NewMI = 
+TII-buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
+MI.getOperand(0).getReg(), AMDGPU::ALU_CONST);
+TII-setImmOperand(NewMI, R600Operands::SRC0_SEL,
+MI.getOperand(1).getImm());
+RegToConstIndex[DstReg] = NewMI;
+MI.eraseFromParent();
 continue;
-  MachineInstr *NewMI = TII-buildDefaultInstruction(MBB, I, AMDGPU::MOV,
-  MI.getOperand(0).getReg(), AMDGPU::ALU_CONST);
-  NewMI-getOperand(9).setImm(MI.getOperand(1).getImm());
-  MI.eraseFromParent();
+  }
+
+  std::vectorunsigned Defs;
+  // We consider all Instructions as bundled because algorithm that  handle
+  // const read port limitations inside an IG is still valid with single
+  // instructions.
+  std::vectorMachineInstr * Bundle;
+
+  if (I-isBundle()) {
+unsigned BundleSize = I-getBundleSize();
+for (unsigned i = 0; i  BundleSize; i++) {
+  I = llvm::next(I);
+  Bundle.push_back(I);
+}
+  } else if 

[Mesa-dev] [PATCH 1/2] R600: Fold remaining CONST_COPY after expand pseudo inst

2013-01-23 Thread Vincent Lejeune
v2:fix a bug with write masked inst
---
 lib/Target/R600/AMDGPUTargetMachine.cpp |   2 +-
 lib/Target/R600/R600LowerConstCopy.cpp  | 164 +---
 2 files changed, 154 insertions(+), 12 deletions(-)

diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp 
b/lib/Target/R600/AMDGPUTargetMachine.cpp
index 7b069e7..2185be3 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -136,8 +136,8 @@ bool AMDGPUPassConfig::addPreEmitPass() {
 addPass(createAMDGPUCFGPreparationPass(*TM));
 addPass(createAMDGPUCFGStructurizerPass(*TM));
 addPass(createR600ExpandSpecialInstrsPass(*TM));
-addPass(createR600LowerConstCopy(*TM));
 addPass(FinalizeMachineBundlesID);
+addPass(createR600LowerConstCopy(*TM));
   } else {
 addPass(createSILowerLiteralConstantsPass(*TM));
 addPass(createSILowerControlFlowPass(*TM));
diff --git a/lib/Target/R600/R600LowerConstCopy.cpp 
b/lib/Target/R600/R600LowerConstCopy.cpp
index d14ae20..74260ad 100644
--- a/lib/Target/R600/R600LowerConstCopy.cpp
+++ b/lib/Target/R600/R600LowerConstCopy.cpp
@@ -13,7 +13,6 @@
 /// fold them inside vector instruction, like DOT4 or Cube ; ISel emits
 /// ConstCopy instead. This pass (executed after ExpandingSpecialInstr) will 
try
 /// to fold them if possible or replace them by MOV otherwise.
-/// TODO : Implement the folding part, using Copy Propagation algorithm.
 //
 
//===--===//
 
@@ -28,8 +27,16 @@ namespace llvm {
 
 class R600LowerConstCopy : public MachineFunctionPass {
 private:
+  typedef DenseMapunsigned, MachineInstr * SourceMap;
   static char ID;
   const R600InstrInfo *TII;
+
+  struct ConstPairs {
+unsigned XYPair;
+unsigned ZWPair;
+  };
+
+  bool canFoldInBundle(ConstPairs UsedConst, unsigned ReadConst) const;
 public:
   R600LowerConstCopy(TargetMachine tm);
   virtual bool runOnMachineFunction(MachineFunction MF);
@@ -39,27 +46,162 @@ public:
 
 char R600LowerConstCopy::ID = 0;
 
-
 R600LowerConstCopy::R600LowerConstCopy(TargetMachine tm) :
 MachineFunctionPass(ID),
 TII (static_castconst R600InstrInfo *(tm.getInstrInfo()))
 {
 }
 
+bool R600LowerConstCopy::canFoldInBundle(ConstPairs UsedConst,
+unsigned ReadConst) const {
+  unsigned ReadConstChan = ReadConst  3;
+  unsigned ReadConstIndex = ReadConst  (~3);
+  if (ReadConstChan  2) {
+if (!UsedConst.XYPair) {
+  UsedConst.XYPair = ReadConstIndex;
+}
+return UsedConst.XYPair == ReadConstIndex;
+  } else {
+if (!UsedConst.ZWPair) {
+  UsedConst.ZWPair = ReadConstIndex;
+}
+return UsedConst.ZWPair == ReadConstIndex;
+  }
+}
+
+static bool isControlFlow(const MachineInstr MI) {
+  return (MI.getOpcode() == AMDGPU::IF_PREDICATE_SET) ||
+  (MI.getOpcode() == AMDGPU::ENDIF) ||
+  (MI.getOpcode() == AMDGPU::ELSE) ||
+  (MI.getOpcode() == AMDGPU::WHILELOOP) ||
+  (MI.getOpcode() == AMDGPU::BREAK);
+}
+
 bool R600LowerConstCopy::runOnMachineFunction(MachineFunction MF) {
+
   for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
   BB != BB_E; ++BB) {
 MachineBasicBlock MBB = *BB;
-for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
-  I != E;) {
-  MachineInstr MI = *I;
-  I = llvm::next(I);
-  if (MI.getOpcode() != AMDGPU::CONST_COPY)
+SourceMap RegToConstIndex;
+for (MachineBasicBlock::instr_iterator I = MBB.instr_begin(),
+E = MBB.instr_end(); I != E;) {
+
+  if (I-getOpcode() == AMDGPU::CONST_COPY) {
+MachineInstr MI = *I;
+I = llvm::next(I);
+unsigned DstReg = MI.getOperand(0).getReg();
+SourceMap::iterator SrcMI = RegToConstIndex.find(DstReg);
+if (SrcMI != RegToConstIndex.end()) {
+  SrcMI-second-eraseFromParent();
+  RegToConstIndex.erase(SrcMI);
+}
+MachineInstr *NewMI = 
+TII-buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
+MI.getOperand(0).getReg(), AMDGPU::ALU_CONST);
+NewMI-getOperand(9).setImm(MI.getOperand(1).getImm());
+RegToConstIndex[DstReg] = NewMI;
+MI.eraseFromParent();
 continue;
-  MachineInstr *NewMI = TII-buildDefaultInstruction(MBB, I, AMDGPU::MOV,
-  MI.getOperand(0).getReg(), AMDGPU::ALU_CONST);
-  NewMI-getOperand(9).setImm(MI.getOperand(1).getImm());
-  MI.eraseFromParent();
+  }
+
+  std::vectorunsigned Defs;
+  // We consider all Instructions as bundled because algorithm that  handle
+  // const read port limitations inside an IG is still valid with single
+  // instructions.
+  std::vectorMachineInstr * Bundle;
+
+  if (I-isBundle()) {
+unsigned BundleSize = I-getBundleSize();
+for (unsigned i = 0; i  BundleSize; i++) {
+  I = llvm::next(I);
+  Bundle.push_back(I);
+}
+  } 

  1   2   3   4   >