[Nouveau] [PATCH v2 2/7] clk: Remove dstate

2017-07-01 Thread Karol Herbst
We won't need it now, because we will adjust the clocks depending on engine
loads later on anyway or a static lookup table. It also simplifies the
clocking logic.

This code was nowhere used anyway and just a mock up.

v2: fixed typo in commit message

Signed-off-by: Karol Herbst 
Reviewed-by: Martin Peres 
---
 drm/nouveau/include/nvkm/subdev/clk.h |  2 --
 drm/nouveau/nvkm/subdev/clk/base.c| 16 ++--
 2 files changed, 2 insertions(+), 16 deletions(-)

diff --git a/drm/nouveau/include/nvkm/subdev/clk.h 
b/drm/nouveau/include/nvkm/subdev/clk.h
index ce3bbcfe..1340f5b8 100644
--- a/drm/nouveau/include/nvkm/subdev/clk.h
+++ b/drm/nouveau/include/nvkm/subdev/clk.h
@@ -99,7 +99,6 @@ struct nvkm_clk {
int ustate_ac; /* user-requested (-1 disabled, -2 perfmon) */
int ustate_dc; /* user-requested (-1 disabled, -2 perfmon) */
int astate; /* perfmon adjustment (base) */
-   int dstate; /* display adjustment (min+) */
u8  temp;
 
bool allow_reclock;
@@ -121,7 +120,6 @@ struct nvkm_clk {
 int nvkm_clk_read(struct nvkm_clk *, enum nv_clk_src);
 int nvkm_clk_ustate(struct nvkm_clk *, int req, int pwr);
 int nvkm_clk_astate(struct nvkm_clk *, int req, int rel, bool wait);
-int nvkm_clk_dstate(struct nvkm_clk *, int req, int rel);
 int nvkm_clk_tstate(struct nvkm_clk *, u8 temperature);
 int nvkm_clk_update(struct nvkm_clk *, bool wait);
 
diff --git a/drm/nouveau/nvkm/subdev/clk/base.c 
b/drm/nouveau/nvkm/subdev/clk/base.c
index ecff3ff3..07d530ed 100644
--- a/drm/nouveau/nvkm/subdev/clk/base.c
+++ b/drm/nouveau/nvkm/subdev/clk/base.c
@@ -306,15 +306,14 @@ nvkm_clk_update_work(struct work_struct *work)
return;
clk->pwrsrc = power_supply_is_system_supplied();
 
-   nvkm_trace(subdev, "P %d PWR %d U(AC) %d U(DC) %d A %d T %d°C D %d\n",
+   nvkm_trace(subdev, "P %d PWR %d U(AC) %d U(DC) %d A %d T %d°C\n",
   clk->pstate, clk->pwrsrc, clk->ustate_ac, clk->ustate_dc,
-  clk->astate, clk->temp, clk->dstate);
+  clk->astate, clk->temp);
 
pstate = clk->pwrsrc ? clk->ustate_ac : clk->ustate_dc;
if (clk->state_nr && pstate != -1) {
pstate = (pstate < 0) ? clk->astate : pstate;
pstate = min(pstate, clk->state_nr - 1);
-   pstate = max(pstate, clk->dstate);
} else {
pstate = clk->pstate = -1;
}
@@ -554,16 +553,6 @@ nvkm_clk_tstate(struct nvkm_clk *clk, u8 temp)
return nvkm_clk_update(clk, false);
 }
 
-int
-nvkm_clk_dstate(struct nvkm_clk *clk, int req, int rel)
-{
-   if (!rel) clk->dstate  = req;
-   if ( rel) clk->dstate += rel;
-   clk->dstate = min(clk->dstate, clk->state_nr - 1);
-   clk->dstate = max(clk->dstate, 0);
-   return nvkm_clk_update(clk, true);
-}
-
 static int
 nvkm_clk_pwrsrc(struct nvkm_notify *notify)
 {
@@ -621,7 +610,6 @@ nvkm_clk_init(struct nvkm_subdev *subdev)
return clk->func->init(clk);
 
clk->astate = clk->state_nr - 1;
-   clk->dstate = 0;
clk->pstate = -1;
clk->temp = 90; /* reasonable default value */
nvkm_clk_update(clk, true);
-- 
2.13.2

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH v2 3/7] clk: Make pstate a pointer to nvkm_pstate

2017-07-01 Thread Karol Herbst
We will access the current cstate at least every second and this saves us
some CPU cycles looking them up every second.

v2: Rewording commit message.

Signed-off-by: Karol Herbst 
Reviewed-by: Martin Peres 
---
 drm/nouveau/include/nvkm/subdev/clk.h |  4 +++-
 drm/nouveau/nvkm/engine/device/ctrl.c |  5 -
 drm/nouveau/nvkm/subdev/clk/base.c| 17 -
 drm/nouveau/nvkm/subdev/pmu/gk20a.c   | 18 +++---
 4 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/drm/nouveau/include/nvkm/subdev/clk.h 
b/drm/nouveau/include/nvkm/subdev/clk.h
index 1340f5b8..ec537e08 100644
--- a/drm/nouveau/include/nvkm/subdev/clk.h
+++ b/drm/nouveau/include/nvkm/subdev/clk.h
@@ -10,6 +10,8 @@ struct nvkm_pll_vals;
 #define NVKM_CLK_CSTATE_BASE-2 /* pstate base */
 #define NVKM_CLK_CSTATE_HIGHEST -3 /* highest possible */
 
+#define NVKM_CLK_PSTATE_DEFAULT -1
+
 enum nv_clk_src {
nv_clk_src_crystal,
nv_clk_src_href,
@@ -95,7 +97,7 @@ struct nvkm_clk {
 
struct nvkm_notify pwrsrc_ntfy;
int pwrsrc;
-   int pstate; /* current */
+   struct nvkm_pstate *pstate; /* current */
int ustate_ac; /* user-requested (-1 disabled, -2 perfmon) */
int ustate_dc; /* user-requested (-1 disabled, -2 perfmon) */
int astate; /* perfmon adjustment (base) */
diff --git a/drm/nouveau/nvkm/engine/device/ctrl.c 
b/drm/nouveau/nvkm/engine/device/ctrl.c
index b0ece71a..da70626c 100644
--- a/drm/nouveau/nvkm/engine/device/ctrl.c
+++ b/drm/nouveau/nvkm/engine/device/ctrl.c
@@ -52,7 +52,10 @@ nvkm_control_mthd_pstate_info(struct nvkm_control *ctrl, 
void *data, u32 size)
args->v0.ustate_ac = clk->ustate_ac;
args->v0.ustate_dc = clk->ustate_dc;
args->v0.pwrsrc = clk->pwrsrc;
-   args->v0.pstate = clk->pstate;
+   if (clk->pstate)
+   args->v0.pstate = clk->pstate->pstate;
+   else
+   args->v0.pstate = NVKM_CLK_PSTATE_DEFAULT;
} else {
args->v0.count = 0;
args->v0.ustate_ac = NVIF_CONTROL_PSTATE_INFO_V0_USTATE_DISABLE;
diff --git a/drm/nouveau/nvkm/subdev/clk/base.c 
b/drm/nouveau/nvkm/subdev/clk/base.c
index 07d530ed..0d4d9fdf 100644
--- a/drm/nouveau/nvkm/subdev/clk/base.c
+++ b/drm/nouveau/nvkm/subdev/clk/base.c
@@ -271,13 +271,16 @@ nvkm_pstate_prog(struct nvkm_clk *clk, int pstatei)
struct nvkm_pstate *pstate;
int ret, idx = 0;
 
+   if (pstatei == NVKM_CLK_PSTATE_DEFAULT)
+   return 0;
+
list_for_each_entry(pstate, >states, head) {
if (idx++ == pstatei)
break;
}
 
nvkm_debug(subdev, "setting performance state %d\n", pstatei);
-   clk->pstate = pstatei;
+   clk->pstate = pstate;
 
nvkm_pcie_set_link(pci, pstate->pcie_speed, pstate->pcie_width);
 
@@ -306,8 +309,12 @@ nvkm_clk_update_work(struct work_struct *work)
return;
clk->pwrsrc = power_supply_is_system_supplied();
 
+   if (clk->pstate)
+   pstate = clk->pstate->pstate;
+   else
+   pstate = NVKM_CLK_PSTATE_DEFAULT;
nvkm_trace(subdev, "P %d PWR %d U(AC) %d U(DC) %d A %d T %d°C\n",
-  clk->pstate, clk->pwrsrc, clk->ustate_ac, clk->ustate_dc,
+  pstate, clk->pwrsrc, clk->ustate_ac, clk->ustate_dc,
   clk->astate, clk->temp);
 
pstate = clk->pwrsrc ? clk->ustate_ac : clk->ustate_dc;
@@ -315,11 +322,11 @@ nvkm_clk_update_work(struct work_struct *work)
pstate = (pstate < 0) ? clk->astate : pstate;
pstate = min(pstate, clk->state_nr - 1);
} else {
-   pstate = clk->pstate = -1;
+   pstate = NVKM_CLK_PSTATE_DEFAULT;
}
 
nvkm_trace(subdev, "-> %d\n", pstate);
-   if (pstate != clk->pstate) {
+   if (!clk->pstate || pstate != clk->pstate->pstate) {
int ret = nvkm_pstate_prog(clk, pstate);
if (ret) {
nvkm_error(subdev, "error setting pstate %d: %d\n",
@@ -610,7 +617,7 @@ nvkm_clk_init(struct nvkm_subdev *subdev)
return clk->func->init(clk);
 
clk->astate = clk->state_nr - 1;
-   clk->pstate = -1;
+   clk->pstate = NULL;
clk->temp = 90; /* reasonable default value */
nvkm_clk_update(clk, true);
return 0;
diff --git a/drm/nouveau/nvkm/subdev/pmu/gk20a.c 
b/drm/nouveau/nvkm/subdev/pmu/gk20a.c
index 978aae3c..3dd550c3 100644
--- a/drm/nouveau/nvkm/subdev/pmu/gk20a.c
+++ b/drm/nouveau/nvkm/subdev/pmu/gk20a.c
@@ -55,24 +55,22 @@ gk20a_pmu_dvfs_target(struct gk20a_pmu *pmu, int *state)
return nvkm_clk_astate(clk, *state, 0, false);
 }
 
-static void
-gk20a_pmu_dvfs_get_cur_state(struct gk20a_pmu *pmu, int *state)
-{
-   struct nvkm_clk *clk = 

[Nouveau] [PATCH v2 4/7] clk: Hold information about the current cstate status

2017-07-01 Thread Karol Herbst
Later we will have situations where the expected and the current state
isn't the same.

Signed-off-by: Karol Herbst 
Reviewed-by: Martin Peres 
---
 drm/nouveau/include/nvkm/subdev/clk.h |  2 ++
 drm/nouveau/nvkm/subdev/clk/base.c| 32 +---
 2 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/drm/nouveau/include/nvkm/subdev/clk.h 
b/drm/nouveau/include/nvkm/subdev/clk.h
index ec537e08..f35518c3 100644
--- a/drm/nouveau/include/nvkm/subdev/clk.h
+++ b/drm/nouveau/include/nvkm/subdev/clk.h
@@ -101,6 +101,8 @@ struct nvkm_clk {
int ustate_ac; /* user-requested (-1 disabled, -2 perfmon) */
int ustate_dc; /* user-requested (-1 disabled, -2 perfmon) */
int astate; /* perfmon adjustment (base) */
+   struct nvkm_cstate *cstate;
+   int exp_cstateid;
u8  temp;
 
bool allow_reclock;
diff --git a/drm/nouveau/nvkm/subdev/clk/base.c 
b/drm/nouveau/nvkm/subdev/clk/base.c
index 0d4d9fdf..d37c13b7 100644
--- a/drm/nouveau/nvkm/subdev/clk/base.c
+++ b/drm/nouveau/nvkm/subdev/clk/base.c
@@ -146,9 +146,14 @@ static struct nvkm_cstate *
 nvkm_cstate_get(struct nvkm_clk *clk, struct nvkm_pstate *pstate, int cstatei)
 {
struct nvkm_cstate *cstate;
-   if (cstatei == NVKM_CLK_CSTATE_HIGHEST)
+   switch (cstatei) {
+   case NVKM_CLK_CSTATE_HIGHEST:
return list_last_entry(>list, typeof(*cstate), head);
-   else {
+   case NVKM_CLK_CSTATE_BASE:
+   return >base;
+   case NVKM_CLK_CSTATE_DEFAULT:
+   return NULL;
+   default:
list_for_each_entry(cstate, >list, head) {
if (cstate->id == cstatei)
return cstate;
@@ -167,6 +172,9 @@ nvkm_cstate_prog(struct nvkm_clk *clk, struct nvkm_pstate 
*pstate, int cstatei)
struct nvkm_cstate *cstate;
int ret;
 
+   if (cstatei == NVKM_CLK_CSTATE_DEFAULT)
+   return 0;
+
if (!list_empty(>list)) {
cstate = nvkm_cstate_get(clk, pstate, cstatei);
cstate = nvkm_cstate_find_best(clk, pstate, cstate);
@@ -193,6 +201,7 @@ nvkm_cstate_prog(struct nvkm_clk *clk, struct nvkm_pstate 
*pstate, int cstatei)
 
ret = clk->func->calc(clk, cstate);
if (ret == 0) {
+   clk->cstate = cstate;
ret = clk->func->prog(clk);
clk->func->tidy(clk);
}
@@ -295,7 +304,7 @@ nvkm_pstate_prog(struct nvkm_clk *clk, int pstatei)
ram->func->tidy(ram);
}
 
-   return nvkm_cstate_prog(clk, pstate, NVKM_CLK_CSTATE_HIGHEST);
+   return nvkm_cstate_prog(clk, pstate, clk->exp_cstateid);
 }
 
 static void
@@ -313,9 +322,9 @@ nvkm_clk_update_work(struct work_struct *work)
pstate = clk->pstate->pstate;
else
pstate = NVKM_CLK_PSTATE_DEFAULT;
-   nvkm_trace(subdev, "P %d PWR %d U(AC) %d U(DC) %d A %d T %d°C\n",
+   nvkm_trace(subdev, "P %d PWR %d U(AC) %d U(DC) %d A %d C %d T %d°C\n",
   pstate, clk->pwrsrc, clk->ustate_ac, clk->ustate_dc,
-  clk->astate, clk->temp);
+  clk->astate, clk->exp_cstateid, clk->temp);
 
pstate = clk->pwrsrc ? clk->ustate_ac : clk->ustate_dc;
if (clk->state_nr && pstate != -1) {
@@ -536,6 +545,7 @@ nvkm_clk_ustate(struct nvkm_clk *clk, int req, int pwr)
if (ret >= 0) {
if (ret -= 2, pwr) clk->ustate_ac = ret;
else   clk->ustate_dc = ret;
+   clk->exp_cstateid = NVKM_CLK_CSTATE_HIGHEST;
return nvkm_clk_update(clk, true);
}
return ret;
@@ -548,6 +558,7 @@ nvkm_clk_astate(struct nvkm_clk *clk, int req, int rel, 
bool wait)
if ( rel) clk->astate += rel;
clk->astate = min(clk->astate, clk->state_nr - 1);
clk->astate = max(clk->astate, 0);
+   clk->exp_cstateid = NVKM_CLK_CSTATE_BASE;
return nvkm_clk_update(clk, wait);
 }
 
@@ -618,6 +629,8 @@ nvkm_clk_init(struct nvkm_subdev *subdev)
 
clk->astate = clk->state_nr - 1;
clk->pstate = NULL;
+   clk->exp_cstateid = NVKM_CLK_CSTATE_DEFAULT;
+   clk->cstate = NULL;
clk->temp = 90; /* reasonable default value */
nvkm_clk_update(clk, true);
return 0;
@@ -701,15 +714,20 @@ nvkm_clk_ctor(const struct nvkm_clk_func *func, struct 
nvkm_device *device,
if (mode) {
clk->ustate_ac = nvkm_clk_nstate(clk, mode, arglen);
clk->ustate_dc = nvkm_clk_nstate(clk, mode, arglen);
+   clk->exp_cstateid = NVKM_CLK_CSTATE_HIGHEST;
}
 
mode = nvkm_stropt(device->cfgopt, "NvClkModeAC", );
-   if (mode)
+   if (mode) {
clk->ustate_ac = nvkm_clk_nstate(clk, mode, arglen);
+   clk->exp_cstateid = NVKM_CLK_CSTATE_HIGHEST;
+   }
 
mode = 

[Nouveau] [PATCH v2 7/7] clk: Check pm_runtime status before reclocking

2017-07-01 Thread Karol Herbst
We don't want to change anything on the GPU if it's suspended. Also we
need to increase the refcount on the pm_runtime counter so that the GPU
won't be suspended while reclocking.

v2: convert to C style comments

Signed-off-by: Karol Herbst 
---
 drm/nouveau/nvkm/subdev/clk/base.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drm/nouveau/nvkm/subdev/clk/base.c 
b/drm/nouveau/nvkm/subdev/clk/base.c
index 54188d2b..81093e13 100644
--- a/drm/nouveau/nvkm/subdev/clk/base.c
+++ b/drm/nouveau/nvkm/subdev/clk/base.c
@@ -315,6 +315,7 @@ nvkm_clk_update_work(struct work_struct *work)
 {
struct nvkm_clk *clk = container_of(work, typeof(*clk), work);
struct nvkm_subdev *subdev = >subdev;
+   struct device *dev = subdev->device->dev;
int pstate;
 
if (!atomic_xchg(>waiting, 0))
@@ -337,8 +338,17 @@ nvkm_clk_update_work(struct work_struct *work)
}
 
nvkm_trace(subdev, "-> %d\n", pstate);
-   if (!clk->pstate || pstate != clk->pstate->pstate) {
-   int ret = nvkm_pstate_prog(clk, pstate);
+
+   /* only call into the code if the GPU is powered on */
+   if ((!clk->pstate || pstate != clk->pstate->pstate)
+&& !pm_runtime_suspended(dev)) {
+   int ret;
+   /* it would be a shame if the GPU goes into suspend while doing
+* the reclock
+*/
+   pm_runtime_get_sync(dev);
+   ret = nvkm_pstate_prog(clk, pstate);
+   pm_runtime_put(dev);
if (ret) {
nvkm_error(subdev, "error setting pstate %d: %d\n",
   pstate, ret);
-- 
2.13.2

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH v2 5/7] clk: We should pass the pstate id around not the index in the list

2017-07-01 Thread Karol Herbst
This makes the code easier, because we can compare the id with
pstate->pstate and saves us from the trouble of iterating over the pstates
to match the index.

v2: reword commit message

Signed-off-by: Karol Herbst 
Reviewed-by: Martin Peres 
---
 drm/nouveau/nouveau_debugfs.c  |  6 +--
 drm/nouveau/nvkm/subdev/clk/base.c | 78 +++---
 2 files changed, 41 insertions(+), 43 deletions(-)

diff --git a/drm/nouveau/nouveau_debugfs.c b/drm/nouveau/nouveau_debugfs.c
index fd64dfdc..b114a429 100644
--- a/drm/nouveau/nouveau_debugfs.c
+++ b/drm/nouveau/nouveau_debugfs.c
@@ -96,11 +96,11 @@ nouveau_debugfs_pstate_get(struct seq_file *m, void *data)
} while (attr.index);
 
if (state >= 0) {
-   if (info.ustate_ac == state)
+   if (info.ustate_ac == attr.state)
seq_printf(m, " AC");
-   if (info.ustate_dc == state)
+   if (info.ustate_dc == attr.state)
seq_printf(m, " DC");
-   if (info.pstate == state)
+   if (info.pstate == attr.state)
seq_printf(m, " *");
} else {
if (info.ustate_ac < -1)
diff --git a/drm/nouveau/nvkm/subdev/clk/base.c 
b/drm/nouveau/nvkm/subdev/clk/base.c
index d37c13b7..1d71bf09 100644
--- a/drm/nouveau/nvkm/subdev/clk/base.c
+++ b/drm/nouveau/nvkm/subdev/clk/base.c
@@ -272,23 +272,26 @@ nvkm_cstate_new(struct nvkm_clk *clk, int idx, struct 
nvkm_pstate *pstate)
  * P-States
  */
 static int
-nvkm_pstate_prog(struct nvkm_clk *clk, int pstatei)
+nvkm_pstate_prog(struct nvkm_clk *clk, int pstateid)
 {
struct nvkm_subdev *subdev = >subdev;
struct nvkm_fb *fb = subdev->device->fb;
struct nvkm_pci *pci = subdev->device->pci;
struct nvkm_pstate *pstate;
-   int ret, idx = 0;
+   int ret;
 
-   if (pstatei == NVKM_CLK_PSTATE_DEFAULT)
+   if (pstateid == NVKM_CLK_PSTATE_DEFAULT)
return 0;
 
list_for_each_entry(pstate, >states, head) {
-   if (idx++ == pstatei)
+   if (pstate->pstate == pstateid)
break;
}
 
-   nvkm_debug(subdev, "setting performance state %d\n", pstatei);
+   if (!pstate)
+   return -EINVAL;
+
+   nvkm_debug(subdev, "setting performance state %x\n", pstateid);
clk->pstate = pstate;
 
nvkm_pcie_set_link(pci, pstate->pcie_speed, pstate->pcie_width);
@@ -329,7 +332,6 @@ nvkm_clk_update_work(struct work_struct *work)
pstate = clk->pwrsrc ? clk->ustate_ac : clk->ustate_dc;
if (clk->state_nr && pstate != -1) {
pstate = (pstate < 0) ? clk->astate : pstate;
-   pstate = min(pstate, clk->state_nr - 1);
} else {
pstate = NVKM_CLK_PSTATE_DEFAULT;
}
@@ -491,32 +493,9 @@ nvkm_pstate_new(struct nvkm_clk *clk, int idx)
  * Adjustment triggers
  */
 static int
-nvkm_clk_ustate_update(struct nvkm_clk *clk, int req)
-{
-   struct nvkm_pstate *pstate;
-   int i = 0;
-
-   if (!clk->allow_reclock)
-   return -ENOSYS;
-
-   if (req != -1 && req != -2) {
-   list_for_each_entry(pstate, >states, head) {
-   if (pstate->pstate == req)
-   break;
-   i++;
-   }
-
-   if (pstate->pstate != req)
-   return -EINVAL;
-   req = i;
-   }
-
-   return req + 2;
-}
-
-static int
 nvkm_clk_nstate(struct nvkm_clk *clk, const char *mode, int arglen)
 {
+   struct nvkm_pstate *pstate;
int ret = 1;
 
if (clk->allow_reclock && !strncasecmpz(mode, "auto", arglen))
@@ -528,27 +507,46 @@ nvkm_clk_nstate(struct nvkm_clk *clk, const char *mode, 
int arglen)
 
((char *)mode)[arglen] = '\0';
if (!kstrtol(mode, 0, )) {
-   ret = nvkm_clk_ustate_update(clk, v);
+   ret = v;
if (ret < 0)
ret = 1;
}
((char *)mode)[arglen] = save;
}
 
-   return ret - 2;
+   if (ret < 0)
+   return ret;
+
+   list_for_each_entry(pstate, >states, head) {
+   if (pstate->pstate == ret)
+   return ret;
+   }
+   return -EINVAL;
 }
 
 int
 nvkm_clk_ustate(struct nvkm_clk *clk, int req, int pwr)
 {
-   int ret = nvkm_clk_ustate_update(clk, req);
-   if (ret >= 0) {
-   if (ret -= 2, pwr) clk->ustate_ac = ret;
-   else   clk->ustate_dc = ret;
- 

[Nouveau] [PATCH v2 1/7] clk: Rename nvkm_pstate_calc to nvkm_clk_update and export it

2017-07-01 Thread Karol Herbst
This function will be used to update the current clock state.

This will happen for various reasons:
  * Temperature changes
  * User changes clocking state
  * Load changes

v2: remove parameter name

Signed-off-by: Karol Herbst 
---
 drm/nouveau/include/nvkm/subdev/clk.h |  1 +
 drm/nouveau/nvkm/subdev/clk/base.c| 26 --
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/drm/nouveau/include/nvkm/subdev/clk.h 
b/drm/nouveau/include/nvkm/subdev/clk.h
index e5275f74..ce3bbcfe 100644
--- a/drm/nouveau/include/nvkm/subdev/clk.h
+++ b/drm/nouveau/include/nvkm/subdev/clk.h
@@ -123,6 +123,7 @@ int nvkm_clk_ustate(struct nvkm_clk *, int req, int pwr);
 int nvkm_clk_astate(struct nvkm_clk *, int req, int rel, bool wait);
 int nvkm_clk_dstate(struct nvkm_clk *, int req, int rel);
 int nvkm_clk_tstate(struct nvkm_clk *, u8 temperature);
+int nvkm_clk_update(struct nvkm_clk *, bool wait);
 
 int nv04_clk_new(struct nvkm_device *, int, struct nvkm_clk **);
 int nv40_clk_new(struct nvkm_device *, int, struct nvkm_clk **);
diff --git a/drm/nouveau/nvkm/subdev/clk/base.c 
b/drm/nouveau/nvkm/subdev/clk/base.c
index e4c8d310..ecff3ff3 100644
--- a/drm/nouveau/nvkm/subdev/clk/base.c
+++ b/drm/nouveau/nvkm/subdev/clk/base.c
@@ -296,7 +296,7 @@ nvkm_pstate_prog(struct nvkm_clk *clk, int pstatei)
 }
 
 static void
-nvkm_pstate_work(struct work_struct *work)
+nvkm_clk_update_work(struct work_struct *work)
 {
struct nvkm_clk *clk = container_of(work, typeof(*clk), work);
struct nvkm_subdev *subdev = >subdev;
@@ -332,9 +332,15 @@ nvkm_pstate_work(struct work_struct *work)
nvkm_notify_get(>pwrsrc_ntfy);
 }
 
-static int
-nvkm_pstate_calc(struct nvkm_clk *clk, bool wait)
+int
+nvkm_clk_update(struct nvkm_clk *clk, bool wait)
 {
+   if (!clk)
+   return -EINVAL;
+
+   if (!clk->allow_reclock)
+   return -ENODEV;
+
atomic_set(>waiting, 1);
schedule_work(>work);
if (wait)
@@ -524,7 +530,7 @@ nvkm_clk_ustate(struct nvkm_clk *clk, int req, int pwr)
if (ret >= 0) {
if (ret -= 2, pwr) clk->ustate_ac = ret;
else   clk->ustate_dc = ret;
-   return nvkm_pstate_calc(clk, true);
+   return nvkm_clk_update(clk, true);
}
return ret;
 }
@@ -536,7 +542,7 @@ nvkm_clk_astate(struct nvkm_clk *clk, int req, int rel, 
bool wait)
if ( rel) clk->astate += rel;
clk->astate = min(clk->astate, clk->state_nr - 1);
clk->astate = max(clk->astate, 0);
-   return nvkm_pstate_calc(clk, wait);
+   return nvkm_clk_update(clk, wait);
 }
 
 int
@@ -545,7 +551,7 @@ nvkm_clk_tstate(struct nvkm_clk *clk, u8 temp)
if (clk->temp == temp)
return 0;
clk->temp = temp;
-   return nvkm_pstate_calc(clk, false);
+   return nvkm_clk_update(clk, false);
 }
 
 int
@@ -555,7 +561,7 @@ nvkm_clk_dstate(struct nvkm_clk *clk, int req, int rel)
if ( rel) clk->dstate += rel;
clk->dstate = min(clk->dstate, clk->state_nr - 1);
clk->dstate = max(clk->dstate, 0);
-   return nvkm_pstate_calc(clk, true);
+   return nvkm_clk_update(clk, true);
 }
 
 static int
@@ -563,7 +569,7 @@ nvkm_clk_pwrsrc(struct nvkm_notify *notify)
 {
struct nvkm_clk *clk =
container_of(notify, typeof(*clk), pwrsrc_ntfy);
-   nvkm_pstate_calc(clk, false);
+   nvkm_clk_update(clk, false);
return NVKM_NOTIFY_DROP;
 }
 
@@ -618,7 +624,7 @@ nvkm_clk_init(struct nvkm_subdev *subdev)
clk->dstate = 0;
clk->pstate = -1;
clk->temp = 90; /* reasonable default value */
-   nvkm_pstate_calc(clk, true);
+   nvkm_clk_update(clk, true);
return 0;
 }
 
@@ -675,7 +681,7 @@ nvkm_clk_ctor(const struct nvkm_clk_func *func, struct 
nvkm_device *device,
clk->ustate_dc = -1;
clk->allow_reclock = allow_reclock;
 
-   INIT_WORK(>work, nvkm_pstate_work);
+   INIT_WORK(>work, nvkm_clk_update_work);
init_waitqueue_head(>wait);
atomic_set(>waiting, 0);
 
-- 
2.13.2

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH v2 6/7] clk: Set clocks to pre suspend state after suspend

2017-07-01 Thread Karol Herbst
The idea is to clear out the saved state, because after a resume we can't
know what the GPU is clocked to. The reclock is triggered by the call to
nvkm_clk_update later in nvkm_clk_init.

v2: convert to C style comments

Signed-off-by: Karol Herbst 
Reviewed-by: Martin Peres 
---
 drm/nouveau/nvkm/subdev/clk/base.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drm/nouveau/nvkm/subdev/clk/base.c 
b/drm/nouveau/nvkm/subdev/clk/base.c
index 1d71bf09..54188d2b 100644
--- a/drm/nouveau/nvkm/subdev/clk/base.c
+++ b/drm/nouveau/nvkm/subdev/clk/base.c
@@ -625,11 +625,10 @@ nvkm_clk_init(struct nvkm_subdev *subdev)
if (clk->func->init)
return clk->func->init(clk);
 
-   clk->astate = NVKM_CLK_PSTATE_DEFAULT;
+   /* after a resume we have no idea what clocks are set, reset the state
+*/
clk->pstate = NULL;
-   clk->exp_cstateid = NVKM_CLK_CSTATE_DEFAULT;
clk->cstate = NULL;
-   clk->temp = 90; /* reasonable default value */
nvkm_clk_update(clk, true);
return 0;
 }
@@ -683,8 +682,13 @@ nvkm_clk_ctor(const struct nvkm_clk_func *func, struct 
nvkm_device *device,
clk->func = func;
INIT_LIST_HEAD(>states);
clk->domains = func->domains;
+
+   clk->astate = NVKM_CLK_PSTATE_DEFAULT;
clk->ustate_ac = -1;
clk->ustate_dc = -1;
+   clk->exp_cstateid = NVKM_CLK_CSTATE_DEFAULT;
+   clk->temp = 90; /* reasonable default value */
+
clk->allow_reclock = allow_reclock;
 
INIT_WORK(>work, nvkm_clk_update_work);
-- 
2.13.2

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH v2 0/7] clk subdev updates

2017-07-01 Thread Karol Herbst
This series addresses various issues inside the reclocking code:
1. after resume the set clocks are reset
2. reclocking not possible while GPU is suspended

Some of the patches were part of the bigger reclocking series I sent months
ago, some things have changed though.

This is also preparation work of changing the clock state due to temperature
changes and dynamic reclocking.

v2: remove commits to support partial reclocks

Karol Herbst (7):
  clk: Rename nvkm_pstate_calc to nvkm_clk_update and export it
  clk: Remove dstate
  clk: Make pstate a pointer to nvkm_pstate
  clk: Hold information about the current cstate status
  clk: We should pass the pstate id around not the index in the list
  clk: Set clocks to pre suspend state after suspend
  clk: Check pm_runtime status before reclocking

 drm/nouveau/include/nvkm/subdev/clk.h |   9 +-
 drm/nouveau/nouveau_debugfs.c |   6 +-
 drm/nouveau/nvkm/engine/device/ctrl.c |   5 +-
 drm/nouveau/nvkm/subdev/clk/base.c| 175 --
 drm/nouveau/nvkm/subdev/pmu/gk20a.c   |  18 ++--
 5 files changed, 123 insertions(+), 90 deletions(-)

-- 
2.13.2

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH 1/2] nv110/exa: Remove depbars

2017-07-01 Thread Aaryaman Vasishta
Removed explicit depar instructions as they're not used by the blob anymore.

Signed-off-by: Aaryaman Vasishta 
---
 src/shader/exac8nv110.fp  |  5 ++---
 src/shader/exac8nv110.fpc | 10 --
 src/shader/exacanv110.fp  |  5 ++---
 src/shader/exacanv110.fpc | 10 --
 src/shader/exacmnv110.fp  |  5 ++---
 src/shader/exacmnv110.fpc | 10 --
 src/shader/exas8nv110.fp  |  4 +---
 src/shader/exas8nv110.fpc |  8 ++--
 src/shader/exasanv110.fp  |  5 ++---
 src/shader/exasanv110.fpc | 10 --
 src/shader/exascnv110.fp  |  2 --
 src/shader/exascnv110.fpc |  4 
 src/shader/videonv110.fp  |  9 +++--
 src/shader/videonv110.fpc | 18 ++
 14 files changed, 36 insertions(+), 69 deletions(-)

diff --git a/src/shader/exac8nv110.fp b/src/shader/exac8nv110.fp
index ce78036..220d7e5 100644
--- a/src/shader/exac8nv110.fp
+++ b/src/shader/exac8nv110.fp
@@ -36,12 +36,11 @@ ipa $r3 a[0x84] $r0 0x0 0x1
 sched (st 0x0) (st 0x0) (st 0x0)
 ipa $r2 a[0x80] $r0 0x0 0x1
 tex nodep $r0 $r2 0x0 0x0 t2d 0x8
-depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
 fmul ftz $r3 $r0 $r1
+sched (st 0x0) (st 0x0) (st 0x0)
 mov $r2 $r3 0xf
 mov $r1 $r3 0xf
-sched (st 0x0) (st 0x0) (st 0x0)
 mov $r0 $r3 0xf
+sched (st 0x0) (st 0x0) (st 0x0)
 exit
 #endif
diff --git a/src/shader/exac8nv110.fpc b/src/shader/exac8nv110.fpc
index 4aa1368..d8d5517 100644
--- a/src/shader/exac8nv110.fpc
+++ b/src/shader/exac8nv110.fpc
@@ -20,19 +20,17 @@
 0xe043ff88,
 0x2ff70200,
 0xc03a0004,
-0x3407,
-0xf0f0,
-0xfc0007e0,
-0x001f8000,
 0x00170003,
 0x5c681000,
+0xfc0007e0,
+0x001f8000,
 0x00370002,
 0x5c980780,
 0x00370001,
 0x5c980780,
-0xfc0007e0,
-0x001f8000,
 0x0037,
 0x5c980780,
+0xfc0007e0,
+0x001f8000,
 0x0007000f,
 0xe300,
diff --git a/src/shader/exacanv110.fp b/src/shader/exacanv110.fp
index a70d5c5..bad34c9 100644
--- a/src/shader/exacanv110.fp
+++ b/src/shader/exacanv110.fp
@@ -36,12 +36,11 @@ ipa $r1 a[0x84] $r0 0x0 0x1
 sched (st 0x0) (st 0x0) (st 0x0)
 ipa $r0 a[0x80] $r0 0x0 0x1
 tex nodep $r0 $r0 0x0 0x0 t2d 0xf
-depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
 fmul ftz $r3 $r3 $r7
+sched (st 0x0) (st 0x0) (st 0x0)
 fmul ftz $r2 $r2 $r6
 fmul ftz $r1 $r1 $r5
-sched (st 0x0) (st 0x0) (st 0x0)
 fmul ftz $r0 $r0 $r4
+sched (st 0x0) (st 0x0) (st 0x0)
 exit
 #endif
diff --git a/src/shader/exacanv110.fpc b/src/shader/exacanv110.fpc
index 7c0ca5e..1a151a2 100644
--- a/src/shader/exacanv110.fpc
+++ b/src/shader/exacanv110.fpc
@@ -20,19 +20,17 @@
 0xe043ff88,
 0xaff7,
 0xc03a0007,
-0x3407,
-0xf0f0,
-0xfc0007e0,
-0x001f8000,
 0x00770303,
 0x5c681000,
+0xfc0007e0,
+0x001f8000,
 0x00670202,
 0x5c681000,
 0x00570101,
 0x5c681000,
-0xfc0007e0,
-0x001f8000,
 0x0047,
 0x5c681000,
+0xfc0007e0,
+0x001f8000,
 0x0007000f,
 0xe300,
diff --git a/src/shader/exacmnv110.fp b/src/shader/exacmnv110.fp
index fe5c294..c1eb38e 100644
--- a/src/shader/exacmnv110.fp
+++ b/src/shader/exacmnv110.fp
@@ -36,12 +36,11 @@ ipa $r1 a[0x84] $r0 0x0 0x1
 sched (st 0x0) (st 0x0) (st 0x0)
 ipa $r0 a[0x80] $r0 0x0 0x1
 tex nodep $r0 $r0 0x0 0x0 t2d 0xf
-depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
 fmul ftz $r3 $r3 $r4
+sched (st 0x0) (st 0x0) (st 0x0)
 fmul ftz $r2 $r2 $r4
 fmul ftz $r1 $r1 $r4
-sched (st 0x0) (st 0x0) (st 0x0)
 fmul ftz $r0 $r0 $r4
+sched (st 0x0) (st 0x0) (st 0x0)
 exit
 #endif
diff --git a/src/shader/exacmnv110.fpc b/src/shader/exacmnv110.fpc
index 9d62c1a..0b21a0a 100644
--- a/src/shader/exacmnv110.fpc
+++ b/src/shader/exacmnv110.fpc
@@ -20,19 +20,17 @@
 0xe043ff88,
 0xaff7,
 0xc03a0007,
-0x3407,
-0xf0f0,
-0xfc0007e0,
-0x001f8000,
 0x00470303,
 0x5c681000,
+0xfc0007e0,
+0x001f8000,
 0x00470202,
 0x5c681000,
 0x00470101,
 0x5c681000,
-0xfc0007e0,
-0x001f8000,
 0x0047,
 0x5c681000,
+0xfc0007e0,
+0x001f8000,
 0x0007000f,
 0xe300,
diff --git a/src/shader/exas8nv110.fp b/src/shader/exas8nv110.fp
index 4fe2e19..c5349d9 100644
--- a/src/shader/exas8nv110.fp
+++ b/src/shader/exas8nv110.fp
@@ -32,11 +32,9 @@ ipa $r1 a[0x84] $r0 0x0 0x1
 sched (st 0x0) (st 0x0) (st 0x0)
 ipa $r0 a[0x80] $r0 0x0 0x1
 tex nodep $r0 $r0 0x0 0x0 t2d 0x8
-depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
 mov $r3 $r0 0xf
+sched (st 0x0) (st 0x0) (st 0x0)
 mov $r2 $r0 0xf
 mov $r1 $r0 0xf
-sched (st 0x0) (st 0x0) (st 0x0)
 exit
 #endif
diff --git a/src/shader/exas8nv110.fpc b/src/shader/exas8nv110.fpc
index 1181c41..fabf980 100644
--- a/src/shader/exas8nv110.fpc
+++ b/src/shader/exas8nv110.fpc
@@ -12,17 +12,13 @@
 0xe043ff88,
 0x2ff7,
 0xc03a0004,
-0x3407,
-0xf0f0,
-0xfc0007e0,
-0x001f8000,
 0x00070003,
 0x5c980780,
+0xfc0007e0,
+0x001f8000,
 0x00070002,
 0x5c980780,
 0x00070001,
 0x5c980780,
-0xfc0007e0,
-0x001f8000,
 0x0007000f,
 0xe300,
diff --git a/src/shader/exasanv110.fp b/src/shader/exasanv110.fp
index 61374a6..4392c1f 100644
--- a/src/shader/exasanv110.fp
+++ b/src/shader/exasanv110.fp
@@ -36,12 +36,11 @@ ipa $r1 a[0x94] $r0 

[Nouveau] [PATCH v5 2/2] nv110/exa: update sched codes

2017-07-01 Thread Aaryaman Vasishta
v5: Rebased on depbar removal patch; removed a redundant read dep-bar.

This patch adds proper delays to maxwell exa shaders. rendercheck tests
seem consistent with/without this patch. I haven't extensively tested
them though.

Trello:
https://trello.com/c/6LPB2EIS/174-update-maxwell-shaders-with-proper-delays

Signed-off-by: Aaryaman Vasishta 
---
 src/shader/exac8nv110.fp  | 10 +-
 src/shader/exac8nv110.fpc | 18 +-
 src/shader/exacanv110.fp  | 10 +-
 src/shader/exacanv110.fpc | 18 +-
 src/shader/exacmnv110.fp  | 10 +-
 src/shader/exacmnv110.fpc | 18 +-
 src/shader/exas8nv110.fp  |  6 +++---
 src/shader/exas8nv110.fpc | 12 ++--
 src/shader/exasanv110.fp  | 10 +-
 src/shader/exasanv110.fpc | 18 +-
 src/shader/exascnv110.fp  |  4 ++--
 src/shader/exascnv110.fpc |  8 
 src/shader/videonv110.fp  | 12 ++--
 src/shader/videonv110.fpc | 22 +++---
 14 files changed, 88 insertions(+), 88 deletions(-)

diff --git a/src/shader/exac8nv110.fp b/src/shader/exac8nv110.fp
index 220d7e5..7797ef4 100644
--- a/src/shader/exac8nv110.fp
+++ b/src/shader/exac8nv110.fp
@@ -25,22 +25,22 @@ NV110FP_Composite_A8[] = {
 };
 #else
 
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
 ipa pass $r0 a[0x7c] 0x0 0x0 0x1
 mufu rcp $r0 $r0
 ipa $r3 a[0x94] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x0 wt 0x1)
 ipa $r2 a[0x90] $r0 0x0 0x1
 tex nodep $r1 $r2 0x0 0x1 t2d 0x8
 ipa $r3 a[0x84] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0x6 wt 0x1)
 ipa $r2 a[0x80] $r0 0x0 0x1
 tex nodep $r0 $r2 0x0 0x0 t2d 0x8
 fmul ftz $r3 $r0 $r1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1) (st 0x1) (st 0x1)
 mov $r2 $r3 0xf
 mov $r1 $r3 0xf
 mov $r0 $r3 0xf
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf) (st 0x0) (st 0x0)
 exit
 #endif
diff --git a/src/shader/exac8nv110.fpc b/src/shader/exac8nv110.fpc
index d8d5517..7eb9c02 100644
--- a/src/shader/exac8nv110.fpc
+++ b/src/shader/exac8nv110.fpc
@@ -1,36 +1,36 @@
-0xfc0007e0,
-0x001f8000,
+0xe1a0070f,
+0x003c3c01,
 0xcff7ff00,
 0xe003ff87,
 0x0047,
 0x5080,
 0x4007ff03,
 0xe043ff89,
-0xfc0007e0,
-0x001f8000,
+0xe1e0072f,
+0x003c3c03,
 0x0007ff02,
 0xe043ff89,
 0x2ff70201,
 0xc03a0014,
 0x4007ff03,
 0xe043ff88,
-0xfc0007e0,
-0x001f8000,
+0xe1e0072f,
+0x003f9803,
 0x0007ff02,
 0xe043ff88,
 0x2ff70200,
 0xc03a0004,
 0x00170003,
 0x5c681000,
-0xfc0007e0,
-0x001f8000,
+0xfc2007e1,
+0x001f8400,
 0x00370002,
 0x5c980780,
 0x00370001,
 0x5c980780,
 0x0037,
 0x5c980780,
-0xfc0007e0,
+0xfc0007ef,
 0x001f8000,
 0x0007000f,
 0xe300,
diff --git a/src/shader/exacanv110.fp b/src/shader/exacanv110.fp
index bad34c9..912f630 100644
--- a/src/shader/exacanv110.fp
+++ b/src/shader/exacanv110.fp
@@ -25,22 +25,22 @@ NV110FP_CAComposite[] = {
 };
 #else
 
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
 ipa pass $r0 a[0x7c] 0x0 0x0 0x1
 mufu rcp $r0 $r0
 ipa $r3 a[0x94] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1)
 ipa $r2 a[0x90] $r0 0x0 0x1
 tex nodep $r4 $r2 0x0 0x1 t2d 0xf
 ipa $r1 a[0x84] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0x1 wt 0x3)
 ipa $r0 a[0x80] $r0 0x0 0x1
 tex nodep $r0 $r0 0x0 0x0 t2d 0xf
 fmul ftz $r3 $r3 $r7
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1) (st 0x1) (st 0x1)
 fmul ftz $r2 $r2 $r6
 fmul ftz $r1 $r1 $r5
 fmul ftz $r0 $r0 $r4
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf) (st 0x0) (st 0x0)
 exit
 #endif
diff --git a/src/shader/exacanv110.fpc b/src/shader/exacanv110.fpc
index 1a151a2..4c35845 100644
--- a/src/shader/exacanv110.fpc
+++ b/src/shader/exacanv110.fpc
@@ -1,36 +1,36 @@
-0xfc0007e0,
-0x001f8000,
+0xe1a0070f,
+0x003c3c01,
 0xcff7ff00,
 0xe003ff87,
 0x0047,
 0x5080,
 0x4007ff03,
 0xe043ff89,
-0xfc0007e0,
-0x001f8000,
+0xe1e0072f,
+0x001cbc03,
 0x0007ff02,
 0xe043ff89,
 0xaff70204,
 0xc03a0017,
 0x4007ff01,
 0xe043ff88,
-0xfc0007e0,
-0x001f8000,
+0xe5e0172f,
+0x007f8402,
 0x0007ff00,
 0xe043ff88,
 0xaff7,
 0xc03a0007,
 0x00770303,
 0x5c681000,
-0xfc0007e0,
-0x001f8000,
+0xfc2007e1,
+0x001f8400,
 0x00670202,
 0x5c681000,
 0x00570101,
 0x5c681000,
 0x0047,
 0x5c681000,
-0xfc0007e0,
+0xfc0007ef,
 0x001f8000,
 0x0007000f,
 0xe300,
diff --git a/src/shader/exacmnv110.fp b/src/shader/exacmnv110.fp
index c1eb38e..02a0835 100644
--- a/src/shader/exacmnv110.fp
+++ b/src/shader/exacmnv110.fp
@@ -25,22 +25,22 @@ NV110FP_Composite[] = {
 };
 #else
 
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
 ipa pass $r0 a[0x7c] 0x0 0x0 0x1
 mufu rcp $r0 $r0
 ipa $r3 a[0x94] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 

Re: [Nouveau] [PATCH v4] nv110/exa: update sched codes

2017-07-01 Thread Aaryaman Vasishta
Hi,

On Fri, Jun 30, 2017 at 6:26 AM, Samuel Pitoiset 
wrote:

> Do you still have some glitches or does it work correctly now?

No visible glitches on my machine so far (Pascal 1080, Debain stretch GNOME
desktop)
I used "rendercheck -f a8r8g8b8" to make sure there's no differences
between the test results after the patch. They've helped me in my debugging
to an extent.

> Did you also remove the spurious wait dep bars between v3 and v4?

There's a redundant read dep-bar on "sched (st 0xf wr 0x1) (st 0xf wr 0x0
rd 0x1 wt 0x3) (st 0xf wr 0x1 wt 0x2)" which I've removed in v5 of this
patch. I'll be sending them to the ML now.

Cheers,
Aaryaman

>
> On 06/27/2017 05:16 PM, Aaryaman Vasishta wrote:
>
>> v4: Updated the wait dependancy bars based on tex component masks.
>>
>> This patch adds proper delays to maxwell exa shaders. Tested with
>> rendercheck -f a8r8g8b8.
>>
>> I am still wondering whether the rd's are required. We could
>> still wait on the write bars instead. eg. see
>> "sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)" in
>> exacmnv110.fp
>>
>> Trello:
>> https://trello.com/c/6LPB2EIS/174-update-maxwell-shaders-wit
>> h-proper-delays
>>
>> Signed-off-by: Aaryaman Vasishta 
>> ---
>>   src/shader/exac8nv110.fp  | 10 +-
>>   src/shader/exac8nv110.fpc | 18 +-
>>   src/shader/exacanv110.fp  | 10 +-
>>   src/shader/exacanv110.fpc | 18 +-
>>   src/shader/exacmnv110.fp  | 10 +-
>>   src/shader/exacmnv110.fpc | 18 +-
>>   src/shader/exas8nv110.fp  |  6 +++---
>>   src/shader/exas8nv110.fpc | 12 ++--
>>   src/shader/exasanv110.fp  | 10 +-
>>   src/shader/exasanv110.fpc | 18 +-
>>   src/shader/exascnv110.fp  |  6 +++---
>>   src/shader/exascnv110.fpc | 10 +-
>>   src/shader/videonv110.fp  | 14 +++---
>>   src/shader/videonv110.fpc | 26 +-
>>   14 files changed, 93 insertions(+), 93 deletions(-)
>>
>> diff --git a/src/shader/exac8nv110.fp b/src/shader/exac8nv110.fp
>> index ce78036..101b67f 100644
>> --- a/src/shader/exac8nv110.fp
>> +++ b/src/shader/exac8nv110.fp
>> @@ -25,23 +25,23 @@ NV110FP_Composite_A8[] = {
>>   };
>>   #else
>>   -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
>>   ipa pass $r0 a[0x7c] 0x0 0x0 0x1
>>   mufu rcp $r0 $r0
>>   ipa $r3 a[0x94] $r0 0x0 0x1
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x3) (st 0xf wr 0x1 wt
>> 0x2)
>>   ipa $r2 a[0x90] $r0 0x0 0x1
>>   tex nodep $r1 $r2 0x0 0x1 t2d 0x8
>>   ipa $r3 a[0x84] $r0 0x0 0x1
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0xf wr 0x2) (st 0xf wr 0x1 wt 0x6) (st 0xf)
>>   ipa $r2 a[0x80] $r0 0x0 0x1
>>   tex nodep $r0 $r2 0x0 0x0 t2d 0x8
>>   depbar le 0x5 0x0 0x0
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0x6 wt 0x3) (st 0x1) (st 0x1)
>>   fmul ftz $r3 $r0 $r1
>>   mov $r2 $r3 0xf
>>   mov $r1 $r3 0xf
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0x1) (st 0xf) (st 0x0)
>>   mov $r0 $r3 0xf
>>   exit
>>   #endif
>> diff --git a/src/shader/exac8nv110.fpc b/src/shader/exac8nv110.fpc
>> index 4aa1368..1f7d649 100644
>> --- a/src/shader/exac8nv110.fpc
>> +++ b/src/shader/exac8nv110.fpc
>> @@ -1,36 +1,36 @@
>> -0xfc0007e0,
>> -0x001f8000,
>> +0xe1a0070f,
>> +0x003c3c01,
>>   0xcff7ff00,
>>   0xe003ff87,
>>   0x0047,
>>   0x5080,
>>   0x4007ff03,
>>   0xe043ff89,
>> -0xfc0007e0,
>> -0x001f8000,
>> +0x21e0072f,
>> +0x005cbc03,
>>   0x0007ff02,
>>   0xe043ff89,
>>   0x2ff70201,
>>   0xc03a0014,
>>   0x4007ff03,
>>   0xe043ff88,
>> -0xfc0007e0,
>> -0x001f8000,
>> +0xe5e0074f,
>> +0x001fbc06,
>>   0x0007ff02,
>>   0xe043ff88,
>>   0x2ff70200,
>>   0xc03a0004,
>>   0x3407,
>>   0xf0f0,
>> -0xfc0007e0,
>> -0x001f8000,
>> +0xfc201fe6,
>> +0x001f8400,
>>   0x00170003,
>>   0x5c681000,
>>   0x00370002,
>>   0x5c980780,
>>   0x00370001,
>>   0x5c980780,
>> -0xfc0007e0,
>> +0xfde007e1,
>>   0x001f8000,
>>   0x0037,
>>   0x5c980780,
>> diff --git a/src/shader/exacanv110.fp b/src/shader/exacanv110.fp
>> index a70d5c5..fe55fcd 100644
>> --- a/src/shader/exacanv110.fp
>> +++ b/src/shader/exacanv110.fp
>> @@ -25,23 +25,23 @@ NV110FP_CAComposite[] = {
>>   };
>>   #else
>>   -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
>>   ipa pass $r0 a[0x7c] 0x0 0x0 0x1
>>   mufu rcp $r0 $r0
>>   ipa $r3 a[0x94] $r0 0x0 0x1
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1)
>>   ipa $r2 a[0x90] $r0 0x0 0x1
>>   tex nodep $r4 $r2 0x0 0x1 t2d 0xf
>>   ipa $r1 a[0x84] $r0 0x0 0x1
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)
>>   ipa $r0 a[0x80] $r0 0x0 0x1
>>   tex nodep $r0 $r0 0x0 0x0 t2d 0xf
>>   depbar le 0x5 0x0 0x0
>> -sched (st 0x0) (st 0x0) (st 0x0)
>>