Module: Mesa
Branch: main
Commit: ed6204eb06cb559d9ed354aca00f1ddb0a6f68f7
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=ed6204eb06cb559d9ed354aca00f1ddb0a6f68f7

Author: Gert Wollny <[email protected]>
Date:   Thu Sep 15 18:25:43 2022 +0200

r600/sfn: only use 3 channels on Cayman for trans ops

Signed-off-by: Gert Wollny <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18619>

---

 src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp    | 16 ++++++++++------
 src/gallium/drivers/r600/sfn/sfn_valuefactory.cpp | 18 +++++++++++-------
 src/gallium/drivers/r600/sfn/sfn_valuefactory.h   | 10 ++++++----
 3 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp 
b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp
index e54d3342a43..9f46587a892 100644
--- a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp
@@ -1808,9 +1808,10 @@ static bool emit_alu_b2x(const nir_alu_instr& alu, 
AluInlineConstants mask, Shad
 
    for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) {
       if (alu.dest.write_mask & (1 << i)){
+         auto src = value_factory.src(alu.src[0], i);
          ir = new AluInstr(op2_and_int,
                            value_factory.dest(alu.dest, i, pin),
-                           value_factory.src(alu.src[0], i),
+                           src,
                            value_factory.inline_const(mask, 0),
                            {alu_write});
          if (alu.src[0].negate) ir->set_alu_flag(alu_src0_neg);
@@ -2396,17 +2397,20 @@ static bool emit_alu_trans_op1_cayman(const 
nir_alu_instr& alu, EAluOp opcode, S
 
    auto pin = pin_for_components(alu);
 
+   unsigned ncomp = nir_dest_num_components(alu.dest.dest) == 4 ? 4 : 3;
+
    /* todo: Actually we need only three channels, but then we have
     * to make sure that we don't hava w dest */
-   for (unsigned j = 0; j < 4; ++j) {
+   for (unsigned j = 0; j < ncomp; ++j) {
       if (alu.dest.write_mask & (1 << j)) {
-         AluInstr::SrcValues srcs(4);
-         PRegister dest = value_factory.dest(alu.dest.dest, j, pin);
+         AluInstr::SrcValues srcs(ncomp);
+         PRegister dest = value_factory.dest(alu.dest.dest, j, pin,
+                                             (1 << ncomp) - 1);
 
-         for (unsigned i = 0; i < 4; ++i)
+         for (unsigned i = 0; i < ncomp; ++i)
             srcs[i] = value_factory.src(src0, j);
 
-         auto ir = new AluInstr(opcode, dest,  srcs,  AluInstr::last_write, 4);
+         auto ir = new AluInstr(opcode, dest,  srcs,  AluInstr::last_write, 
ncomp);
 
          if (alu.src[0].abs) ir->set_alu_flag(alu_src0_abs);
          if (alu.src[0].negate) ir->set_alu_flag(alu_src0_neg);
diff --git a/src/gallium/drivers/r600/sfn/sfn_valuefactory.cpp 
b/src/gallium/drivers/r600/sfn/sfn_valuefactory.cpp
index 74fca92b7a2..1b2aae5f38f 100644
--- a/src/gallium/drivers/r600/sfn/sfn_valuefactory.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_valuefactory.cpp
@@ -162,10 +162,10 @@ void ValueFactory::inject_value(const nir_dest& dest, int 
chan, PVirtualValue va
 }
 
 PRegister ValueFactory::dest(const nir_alu_dest& dst, int chan,
-                             Pin pin_channel)
+                             Pin pin_channel, uint8_t chan_mask)
 {
    sfn_log << SfnLog::reg << "Search (ref) " << &dst << "\n";
-   return dest(dst.dest, chan, pin_channel);
+   return dest(dst.dest, chan, pin_channel, chan_mask);
 }
 
 class TranslateRegister: public RegisterVisitor {
@@ -214,10 +214,11 @@ PRegister ValueFactory::resolve_array(nir_register *reg, 
nir_src *indirect,
    }
 }
 
-PRegister ValueFactory::dest(const nir_dest& dst, int chan, Pin pin_channel)
+PRegister ValueFactory::dest(const nir_dest& dst, int chan, Pin pin_channel,
+                             uint8_t chan_mask)
 {
    if (dst.is_ssa) {
-      return dest(dst.ssa, chan, pin_channel);
+      return dest(dst.ssa, chan, pin_channel, chan_mask);
    } else {
       return resolve_array(dst.reg.reg, dst.reg.indirect,
                            dst.reg.base_offset, chan);
@@ -253,7 +254,7 @@ PRegister ValueFactory::temp_register(int pinned_channel, 
bool is_ssa)
 {
    int sel = m_next_register_index++;
    int chan = (pinned_channel >= 0) ?
-            pinned_channel : m_channel_counts.least_used();
+            pinned_channel : m_channel_counts.least_used(0xf);
 
    auto reg = new Register( sel, chan,
                             pinned_channel >= 0 ? pin_chan : pin_free);
@@ -348,7 +349,8 @@ PRegister ValueFactory::dummy_dest(unsigned chan)
 }
 
 PRegister
-ValueFactory::dest(const nir_ssa_def& ssa, int chan, Pin pin_channel)
+ValueFactory::dest(const nir_ssa_def& ssa, int chan, Pin pin_channel,
+                   uint8_t chan_mask)
 {
    RegisterKey key(ssa.index, chan, vp_ssa);
 
@@ -364,11 +366,13 @@ ValueFactory::dest(const nir_ssa_def& ssa, int chan, Pin 
pin_channel)
       sel = isel->second;
    else {
       sel = m_next_register_index++;
+      sfn_log << SfnLog::reg << "Assign " << sel << " to index "
+              << ssa.index << " in " << &m_ssa_index_to_sel << "\n";
       m_ssa_index_to_sel[ssa.index] = sel;
    }
 
    if (pin_channel == pin_free)
-      chan = m_channel_counts.least_used();
+      chan = m_channel_counts.least_used(chan_mask);
 
    auto vreg = new Register( sel, chan, pin_channel);
    m_channel_counts.inc_count(chan);
diff --git a/src/gallium/drivers/r600/sfn/sfn_valuefactory.h 
b/src/gallium/drivers/r600/sfn/sfn_valuefactory.h
index 74ee1a5496e..51fd66e080d 100644
--- a/src/gallium/drivers/r600/sfn/sfn_valuefactory.h
+++ b/src/gallium/drivers/r600/sfn/sfn_valuefactory.h
@@ -167,10 +167,12 @@ struct register_key_hash {
 class ChannelCounts {
 public:
    void inc_count(int chan) {++m_counts[chan];}
-   int least_used() const  {
+   int least_used(uint8_t mask) const  {
       int least_used = 0;
       uint32_t count = m_counts[0];
       for (int i = 1; i < 4; ++i) {
+         if (!((1 << i) & mask))
+            continue;
          if (count > m_counts[i]) {
             count = m_counts[i];
             least_used = i;
@@ -214,9 +216,9 @@ public:
                                                               const 
std::vector<int>& components);
 
 
-    PRegister dest(const nir_alu_dest& dest, int chan, Pin pin_channel);
-    PRegister dest(const nir_dest& dest, int chan, Pin pin_channel);
-    PRegister dest(const nir_ssa_def& dest, int chan, Pin pin_channel);
+    PRegister dest(const nir_alu_dest& dest, int chan, Pin pin_channel, 
uint8_t chan_mask = 0xf);
+    PRegister dest(const nir_dest& dest, int chan, Pin pin_channel, uint8_t 
chan_mask = 0xf);
+    PRegister dest(const nir_ssa_def& dest, int chan, Pin pin_channel, uint8_t 
chan_mask = 0xf);
 
     PRegister dummy_dest(unsigned chan);
     PRegister temp_register(int pinned_channel = -1, bool is_ssa = true);

Reply via email to