On Tue, Sep 06, 2016 at 10:54:14AM +0530, Praveen Paneri wrote:
> Decoupled MMIO is an alternative way to access forcewake domain
> registers, which requires less cycles and avoids frequent software
> forcewake.

How about when forcewake is already held? You'll note that we still
require irq-spinlocks so the mmio access is still not great. And we
still will have to frequently take forcewake manually, apparently.

Do you have any statistics to say that we do reduce grabing the fw
wakelock and that the busywait you add instead is negligible. You are
still using a 50ms timeout, so there is some doubt about "less cycles".

> +/*
> + * Decoupled MMIO access for only 1 DWORD
> + */
> +static void __gen9_decoupled_mmio_access(struct drm_i915_private *dev_priv,
> +                                      uint32_t reg, u32 *ptr_data,
> +                                      enum power_domains pd, int operation)
> +{
> +     u32 ctrl_reg_data = 0;
> +
> +     if (operation == GEN9_DECOUPLED_OP_WRITE)
> +             __raw_i915_write32(dev_priv,
> +                             GEN9_DECOUPLED_REG0_DW0,
> +                             *ptr_data);
> +
> +     ctrl_reg_data |= reg;
> +     ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
> +     ctrl_reg_data |= (pd << GEN9_DECOUPLED_PD_SHIFT);
> +     __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
> +
> +     ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
> +     __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
> +
> +     if (wait_for_atomic((__raw_i915_read32(dev_priv,
> +                     GEN9_DECOUPLED_REG0_DW1) & GEN9_DECOUPLED_DW1_GO) == 0,
> +                     FORCEWAKE_ACK_TIMEOUT_MS))
> +             DRM_ERROR("Decoupled MMIO wait timed out\n");
> +
> +     if (operation == GEN9_DECOUPLED_OP_READ)
> +             *ptr_data = __raw_i915_read32(dev_priv,
> +                             GEN9_DECOUPLED_REG0_DW0);
> +}
> +
>  #define GEN2_READ_HEADER(x) \
>       u##x val = 0; \
>       assert_rpm_wakelock_held(dev_priv);
> @@ -932,12 +997,27 @@ chv_read##x(struct drm_i915_private *dev_priv, 
> i915_reg_t reg, bool trace) { \
>  static u##x \
>  gen9_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) 
> { \
>       enum forcewake_domains fw_engine; \
> +     enum power_domains pd_engine; \
>       GEN6_READ_HEADER(x); \
> -     fw_engine = __gen9_reg_read_fw_domains(offset); \
> -     if (fw_engine) \
> -             __force_wake_auto(dev_priv, fw_engine); \
> -     val = __raw_i915_read##x(dev_priv, reg); \
> -     GEN6_READ_FOOTER; \
> +     pd_engine = __gen9_reg_read_power_domains(offset); \
> +     if (HAS_DECOUPLED_MMIO(dev_priv) && pd_engine && x%32 == 0) { \

Move the platform test out of here (since it is already a per-platform
vfunc) and then skip the duplicated gen9 functions.

> +             u32 *ptr_data = (u32 *) &val; \
> +             unsigned i = 0; \
> +             for (i = 0; i < x/32; i++) { \

And tidy up the reassignments.

> +                     __gen9_decoupled_mmio_access(dev_priv, \
> +                                     (offset + i*4), \
> +                                     ptr_data + i, \
> +                                     pd_engine, \
> +                                     GEN9_DECOUPLED_OP_READ); \
> +                     ptr_data++; \
> +             } \
> +     } else { \
> +             fw_engine = __gen9_reg_read_fw_domains(offset); \
> +             if (fw_engine) \
> +                     __force_wake_auto(dev_priv, fw_engine); \
> +             val = __raw_i915_read##x(dev_priv, reg); \
> +     } \
> +             GEN6_READ_FOOTER; \

Misleading indentation.

>  }
>  
>  __gen9_read(8)
> @@ -1101,11 +1181,26 @@ static void \
>  gen9_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, \
>               bool trace) { \
>       enum forcewake_domains fw_engine; \
> +     enum power_domains pd_engine; \
>       GEN6_WRITE_HEADER; \
> -     fw_engine = __gen9_reg_write_fw_domains(offset); \
> -     if (fw_engine) \
> -             __force_wake_auto(dev_priv, fw_engine); \
> -     __raw_i915_write##x(dev_priv, reg, val); \
> +     pd_engine = __gen9_reg_write_power_domains(offset); \
> +     if (HAS_DECOUPLED_MMIO(dev_priv) && pd_engine && x%32 == 0) { \
> +             u32 *ptr_data = (u32 *) &val; \
> +             unsigned i = 0; \
> +             for (i = 0; i < x/32; i++) { \
> +                     __gen9_decoupled_mmio_access(dev_priv, \
> +                                     (offset + i*4), \
> +                                     ptr_data + i, \
> +                                     pd_engine, \
> +                                     GEN9_DECOUPLED_OP_WRITE); \
> +                     ptr_data++; \
> +             } \

This is scary for a 64bit write. They are assumed to be an atomic
transaction with hw - when they are not we encounter fun races where the
hardware operates on the intermediate state. Hence we avoid them.
-Chisr

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to