Reduce latency to memory during TPC kernel execution.

Signed-off-by: Oded Gabbay <oded.gab...@gmail.com>
---
 drivers/misc/habanalabs/goya/goya.c  | 3 +++
 drivers/misc/habanalabs/habanalabs.h | 7 ++++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/habanalabs/goya/goya.c 
b/drivers/misc/habanalabs/goya/goya.c
index 0b40915bede2..d49f5ecd903b 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -1457,6 +1457,9 @@ static void goya_init_golden_registers(struct hl_device 
*hdev)
                                1 << TPC0_NRTR_SCRAMB_EN_VAL_SHIFT);
                WREG32(mmTPC0_NRTR_NON_LIN_SCRAMB + offset,
                                1 << TPC0_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
+
+               WREG32_FIELD(TPC0_CFG_MSS_CONFIG, offset,
+                               ICACHE_FETCH_LINE_NUM, 2);
        }
 
        WREG32(mmDMA_NRTR_SCRAMB_EN, 1 << DMA_NRTR_SCRAMB_EN_VAL_SHIFT);
diff --git a/drivers/misc/habanalabs/habanalabs.h 
b/drivers/misc/habanalabs/habanalabs.h
index 371d1ec15697..91445371b08b 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -1062,9 +1062,10 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
 
 #define REG_FIELD_SHIFT(reg, field) reg##_##field##_SHIFT
 #define REG_FIELD_MASK(reg, field) reg##_##field##_MASK
-#define WREG32_FIELD(reg, field, val)  \
-       WREG32(mm##reg, (RREG32(mm##reg) & ~REG_FIELD_MASK(reg, field)) | \
-                       (val) << REG_FIELD_SHIFT(reg, field))
+#define WREG32_FIELD(reg, offset, field, val)  \
+       WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & \
+                               ~REG_FIELD_MASK(reg, field)) | \
+                               (val) << REG_FIELD_SHIFT(reg, field))
 
 /* Timeout should be longer when working with simulator but cap the
  * increased timeout to some maximum
-- 
2.17.1

Reply via email to