[Freedreno] [PATCH 3/9] drm/msm: Support multiple ringbuffers

2017-10-11 Thread Jordan Crouse
Add the infrastructure to support the idea of multiple ringbuffers.
Assign each ringbuffer an id and use that as an index for the various
ring specific operations.

The biggest delta is to support legacy fences. Each fence gets its own
sequence number but the legacy functions expect to use a unique integer.
To handle this we return a unique identifier for each submission but
map it to a specific ring/sequence under the covers. Newer users use
a dma_fence pointer anyway so they don't care about the actual sequence
ID or ring.

The actual mechanics for multiple ringbuffers are very target specific
so this code just allows for the possibility but still only defines
one ringbuffer for each target family.

Signed-off-by: Jordan Crouse 
---
 drivers/gpu/drm/msm/adreno/a3xx_gpu.c   |   9 +-
 drivers/gpu/drm/msm/adreno/a4xx_gpu.c   |   9 +-
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c   |  54 ++-
 drivers/gpu/drm/msm/adreno/a5xx_gpu.h   |   2 +-
 drivers/gpu/drm/msm/adreno/a5xx_power.c |   6 +-
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 137 ---
 drivers/gpu/drm/msm/adreno/adreno_gpu.h |  20 ++--
 drivers/gpu/drm/msm/msm_drv.c   |  23 +++--
 drivers/gpu/drm/msm/msm_drv.h   |   8 +-
 drivers/gpu/drm/msm/msm_fence.c |   2 +-
 drivers/gpu/drm/msm/msm_fence.h |   2 +-
 drivers/gpu/drm/msm/msm_gem.h   |   4 +-
 drivers/gpu/drm/msm/msm_gem_submit.c|  12 ++-
 drivers/gpu/drm/msm/msm_gpu.c   | 163 ++--
 drivers/gpu/drm/msm/msm_gpu.h   |  42 
 drivers/gpu/drm/msm/msm_ringbuffer.c|  35 +--
 drivers/gpu/drm/msm/msm_ringbuffer.h|  20 +++-
 drivers/gpu/drm/msm/msm_submitqueue.c   |  27 +-
 include/uapi/drm/msm_drm.h  |   1 +
 19 files changed, 366 insertions(+), 210 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
index 789f7fb..4baef27 100644
--- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
@@ -44,7 +44,7 @@
 
 static bool a3xx_me_init(struct msm_gpu *gpu)
 {
-   struct msm_ringbuffer *ring = gpu->rb;
+   struct msm_ringbuffer *ring = gpu->rb[0];
 
OUT_PKT3(ring, CP_ME_INIT, 17);
OUT_RING(ring, 0x03f7);
@@ -65,7 +65,7 @@ static bool a3xx_me_init(struct msm_gpu *gpu)
OUT_RING(ring, 0x);
OUT_RING(ring, 0x);
 
-   gpu->funcs->flush(gpu);
+   gpu->funcs->flush(gpu, ring);
return a3xx_idle(gpu);
 }
 
@@ -339,7 +339,7 @@ static void a3xx_destroy(struct msm_gpu *gpu)
 static bool a3xx_idle(struct msm_gpu *gpu)
 {
/* wait for ringbuffer to drain: */
-   if (!adreno_idle(gpu))
+   if (!adreno_idle(gpu, gpu->rb[0]))
return false;
 
/* then wait for GPU to finish: */
@@ -446,6 +446,7 @@ static void a3xx_dump(struct msm_gpu *gpu)
.recover = a3xx_recover,
.submit = adreno_submit,
.flush = adreno_flush,
+   .active_ring = adreno_active_ring,
.irq = a3xx_irq,
.destroy = a3xx_destroy,
 #ifdef CONFIG_DEBUG_FS
@@ -491,7 +492,7 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
adreno_gpu->registers = a3xx_registers;
adreno_gpu->reg_offsets = a3xx_register_offsets;
 
-   ret = adreno_gpu_init(dev, pdev, adreno_gpu, );
+   ret = adreno_gpu_init(dev, pdev, adreno_gpu, , 1);
if (ret)
goto fail;
 
diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
index f87c4312..8199a4b 100644
--- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
@@ -116,7 +116,7 @@ static void a4xx_enable_hwcg(struct msm_gpu *gpu)
 
 static bool a4xx_me_init(struct msm_gpu *gpu)
 {
-   struct msm_ringbuffer *ring = gpu->rb;
+   struct msm_ringbuffer *ring = gpu->rb[0];
 
OUT_PKT3(ring, CP_ME_INIT, 17);
OUT_RING(ring, 0x03f7);
@@ -137,7 +137,7 @@ static bool a4xx_me_init(struct msm_gpu *gpu)
OUT_RING(ring, 0x);
OUT_RING(ring, 0x);
 
-   gpu->funcs->flush(gpu);
+   gpu->funcs->flush(gpu, ring);
return a4xx_idle(gpu);
 }
 
@@ -337,7 +337,7 @@ static void a4xx_destroy(struct msm_gpu *gpu)
 static bool a4xx_idle(struct msm_gpu *gpu)
 {
/* wait for ringbuffer to drain: */
-   if (!adreno_idle(gpu))
+   if (!adreno_idle(gpu, gpu->rb[0]))
return false;
 
/* then wait for GPU to finish: */
@@ -534,6 +534,7 @@ static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t 
*value)
.recover = a4xx_recover,
.submit = adreno_submit,
.flush = adreno_flush,
+   .active_ring = adreno_active_ring,
.irq = a4xx_irq,
.destroy = a4xx_destroy,
 #ifdef CONFIG_DEBUG_FS
@@ -573,7 +574,7 @@ struct msm_gpu 

[Freedreno] [PATCH 8/9] drm/msm: Removed unused struct_mutex_task

2017-10-11 Thread Jordan Crouse
Recent changes to locking have rendered struct_mutex_task
unused.

Signed-off-by: Jordan Crouse 
---
 drivers/gpu/drm/msm/msm_drv.h| 6 --
 drivers/gpu/drm/msm/msm_gem_submit.c | 2 --
 2 files changed, 8 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 9853e3e..c46dc12 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -154,12 +154,6 @@ struct msm_drm_private {
struct shrinker shrinker;
 
struct msm_vblank_ctrl vblank_ctrl;
-
-   /* task holding struct_mutex.. currently only used in submit path
-* to detect and reject faults from copy_from_user() for submit
-* ioctl.
-*/
-   struct task_struct *struct_mutex_task;
 };
 
 struct msm_format {
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c 
b/drivers/gpu/drm/msm/msm_gem_submit.c
index 9e087cf..d68bfa6 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -449,7 +449,6 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
goto out_unlock;
}
}
-   priv->struct_mutex_task = current;
 
submit = submit_create(dev, gpu, queue, args->nr_bos, args->nr_cmds);
if (!submit) {
@@ -569,7 +568,6 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 out_unlock:
if (ret && (out_fence_fd >= 0))
put_unused_fd(out_fence_fd);
-   priv->struct_mutex_task = NULL;
mutex_unlock(>struct_mutex);
return ret;
 }
-- 
1.9.1

___
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno


[Freedreno] [PATCH 6/9] drm/msm: Make the value of RB_CNTL (almost) generic

2017-10-11 Thread Jordan Crouse
We use a global ringbuffer size and block size for all targets and
at least for 5XX preemption we need to know the value the RB_CNTL
in several locations so it makes sense to calculate it once and use
it everywhere.

The only monkey wrench is that we need to disable the RPTR shadow
for A430 targets but that only needs to be done once and doesn't
affect A5XX so we can or in the value at init time.

Signed-off-by: Jordan Crouse 
---
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 12 +++-
 drivers/gpu/drm/msm/msm_gpu.h   |  5 +
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 77eaa46..72f1132 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -21,7 +21,6 @@
 #include "msm_gem.h"
 #include "msm_mmu.h"
 
-#define RB_BLKSIZE 32
 
 int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value)
 {
@@ -96,11 +95,14 @@ int adreno_hw_init(struct msm_gpu *gpu)
ring->memptrs->rptr = 0;
}
 
-   /* Setup REG_CP_RB_CNTL: */
+   /*
+* Setup REG_CP_RB_CNTL.  The same value is used across targets (with
+* the excpetion of A430 that disables the RPTR shadow) - the cacluation
+* for the ringbuffer size and block size is moved to msm_gpu.h for the
+* pre-processor to deal with and the A430 variant is ORed in here
+*/
adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_CNTL,
-   /* size is log2(quad-words): */
-   AXXX_CP_RB_CNTL_BUFSZ(ilog2(MSM_GPU_RINGBUFFER_SZ / 8)) |
-   AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)) |
+   MSM_GPU_RB_CNTL_DEFAULT |
(adreno_is_a430(adreno_gpu) ? AXXX_CP_RB_CNTL_NO_UPDATE : 0));
 
/* Setup ringbuffer address - use ringbuffer[0] for GPU init */
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 1be0317..e113d64 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -129,6 +129,11 @@ struct msm_gpu {
 
 /* It turns out that all targets use the same ringbuffer size */
 #define MSM_GPU_RINGBUFFER_SZ SZ_32K
+#define MSM_GPU_RINGBUFFER_BLKSIZE 32
+
+#define MSM_GPU_RB_CNTL_DEFAULT \
+   (AXXX_CP_RB_CNTL_BUFSZ(ilog2(MSM_GPU_RINGBUFFER_SZ / 8)) | \
+   AXXX_CP_RB_CNTL_BLKSZ(ilog2(MSM_GPU_RINGBUFFER_BLKSIZE / 8)))
 
 static inline bool msm_gpu_active(struct msm_gpu *gpu)
 {
-- 
1.9.1

___
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno


[Freedreno] [PATCH 7/9] drm/msm: Implement preemption for A5XX targets

2017-10-11 Thread Jordan Crouse
Implement preemption for A5XX targets - this allows multiple
ringbuffers for different priorities with automatic preemption
of a lower priority ringbuffer if a higher one is ready.

Signed-off-by: Jordan Crouse 
---
 drivers/gpu/drm/msm/Makefile  |   1 +
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 176 -
 drivers/gpu/drm/msm/adreno/a5xx_gpu.h | 107 ++-
 drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 305 ++
 drivers/gpu/drm/msm/adreno/adreno_gpu.c   |  14 +-
 drivers/gpu/drm/msm/adreno/adreno_gpu.h   |   7 +-
 drivers/gpu/drm/msm/msm_drv.h |   2 +-
 drivers/gpu/drm/msm/msm_gpu.c |   5 +-
 drivers/gpu/drm/msm/msm_ringbuffer.c  |   1 +
 drivers/gpu/drm/msm/msm_ringbuffer.h  |   1 +
 10 files changed, 599 insertions(+), 20 deletions(-)
 create mode 100644 drivers/gpu/drm/msm/adreno/a5xx_preempt.c

diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
index 3c234e7..d0b26dd 100644
--- a/drivers/gpu/drm/msm/Makefile
+++ b/drivers/gpu/drm/msm/Makefile
@@ -8,6 +8,7 @@ msm-y := \
adreno/a4xx_gpu.o \
adreno/a5xx_gpu.o \
adreno/a5xx_power.o \
+   adreno/a5xx_preempt.o \
hdmi/hdmi.o \
hdmi/hdmi_audio.o \
hdmi/hdmi_bridge.o \
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index b7b9f4f..a0f69b2 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -95,13 +95,65 @@ static int zap_shader_load_mdt(struct device *dev, const 
char *fwname)
return ret;
 }
 
+static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
+{
+   struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+   struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
+   uint32_t wptr;
+   unsigned long flags;
+
+   spin_lock_irqsave(>lock, flags);
+
+   /* Copy the shadow to the actual register */
+   ring->cur = ring->next;
+
+   /* Make sure to wrap wptr if we need to */
+   wptr = get_wptr(ring);
+
+   spin_unlock_irqrestore(>lock, flags);
+
+   /* Make sure everything is posted before making a decision */
+   mb();
+
+   /* Update HW if this is the current ring and we are not in preempt */
+   if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
+   gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
+}
+
 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
struct msm_file_private *ctx)
 {
+   struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+   struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
struct msm_drm_private *priv = gpu->dev->dev_private;
struct msm_ringbuffer *ring = submit->ring;
unsigned int i, ibs = 0;
 
+   OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
+   OUT_RING(ring, 0x02);
+
+   /* Turn off protected mode to write to special registers */
+   OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
+   OUT_RING(ring, 0);
+
+   /* Set the save preemption record for the ring/command */
+   OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
+   OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
+   OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
+
+   /* Turn back on protected mode */
+   OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
+   OUT_RING(ring, 1);
+
+   /* Enable local preemption for finegrain preemption */
+   OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
+   OUT_RING(ring, 0x02);
+
+   /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
+   OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
+   OUT_RING(ring, 0x02);
+
+   /* Submit the commands */
for (i = 0; i < submit->nr_cmds; i++) {
switch (submit->cmd[i].type) {
case MSM_SUBMIT_CMD_IB_TARGET_BUF:
@@ -119,16 +171,54 @@ static void a5xx_submit(struct msm_gpu *gpu, struct 
msm_gem_submit *submit,
}
}
 
+   /*
+* Write the render mode to NULL (0) to indicate to the CP that the IBs
+* are done rendering - otherwise a lucky preemption would start
+* replaying from the last checkpoint
+*/
+   OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
+   OUT_RING(ring, 0);
+   OUT_RING(ring, 0);
+   OUT_RING(ring, 0);
+   OUT_RING(ring, 0);
+   OUT_RING(ring, 0);
+
+   /* Turn off IB level preemptions */
+   OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
+   OUT_RING(ring, 0x01);
+
+   /* Write the fence to the scratch register */
OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
OUT_RING(ring, submit->seqno);
 
+   /*
+* Execute a CACHE_FLUSH_TS event. This will ensure that the
+* timestamp is written to the memory and then triggers the interrupt
+*/
OUT_PKT7(ring, CP_EVENT_WRITE, 4);

[Freedreno] [PATCH 5/9] drm/msm: Shadow current pointer in the ring until command is complete

2017-10-11 Thread Jordan Crouse
Add a shadow pointer to track the current command being written into
the ring. Don't commit it as 'cur' until the command is submitted.
Because 'cur' is used to construct the software copy of the wptr this
ensures that somebody peeking in on the ring doesn't assume that a
command is inflight while it is being written. This isn't a huge deal
with a single ring (though technically the hangcheck could assume
the system is prematurely busy when it isn't) but it will be rather
important for preemption where the decision to preempt is based
on a non-empty ringbuffer. Without a shadow an aggressive preemption
scheme could assume that the ringbuffer is non empty and switch to it
before the CPU is done writing the command and boom.

Even though preemption won't be supported for all targets because of
the way the code is organized it is simpler to make this generic for
all targets. The extra load for non-preemption targets should be
minimal.

Signed-off-by: Jordan Crouse 
---
 drivers/gpu/drm/msm/adreno/adreno_gpu.c |  9 +++--
 drivers/gpu/drm/msm/msm_ringbuffer.c|  1 +
 drivers/gpu/drm/msm/msm_ringbuffer.h| 12 
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index cf91840..77eaa46 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -89,6 +89,7 @@ int adreno_hw_init(struct msm_gpu *gpu)
}
 
ring->cur = ring->start;
+   ring->next = ring->start;
 
/* reset completed fence seqno: */
ring->memptrs->fence = ring->seqno;
@@ -233,12 +234,15 @@ void adreno_flush(struct msm_gpu *gpu, struct 
msm_ringbuffer *ring)
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
uint32_t wptr;
 
+   /* Copy the shadow to the actual register */
+   ring->cur = ring->next;
+
/*
 * Mask wptr value that we calculate to fit in the HW range. This is
 * to account for the possibility that the last command fit exactly into
 * the ringbuffer and rb->next hasn't wrapped to zero yet
 */
-   wptr = get_wptr(ring) % (MSM_GPU_RINGBUFFER_SZ >> 2);
+   wptr = (ring->cur - ring->start) % (MSM_GPU_RINGBUFFER_SZ >> 2);
 
/* ensure writes to ringbuffer have hit system memory: */
mb();
@@ -350,7 +354,8 @@ static uint32_t ring_freewords(struct msm_ringbuffer *ring)
 {
struct adreno_gpu *adreno_gpu = to_adreno_gpu(ring->gpu);
uint32_t size = MSM_GPU_RINGBUFFER_SZ >> 2;
-   uint32_t wptr = get_wptr(ring);
+   /* Use ring->next to calculate free size */
+   uint32_t wptr = ring->next - ring->start;
uint32_t rptr = get_rptr(adreno_gpu, ring);
return (rptr + (size - 1) - wptr) % size;
 }
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c 
b/drivers/gpu/drm/msm/msm_ringbuffer.c
index febf2541..6015959 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
@@ -46,6 +46,7 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu 
*gpu, int id,
goto fail;
}
ring->end   = ring->start + (MSM_GPU_RINGBUFFER_SZ >> 2);
+   ring->next  = ring->start;
ring->cur   = ring->start;
 
ring->memptrs = memptrs;
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h 
b/drivers/gpu/drm/msm/msm_ringbuffer.h
index ec44251..3749764 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.h
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.h
@@ -32,7 +32,7 @@ struct msm_ringbuffer {
struct msm_gpu *gpu;
int id;
struct drm_gem_object *bo;
-   uint32_t *start, *end, *cur;
+   uint32_t *start, *end, *cur, *next;
struct list_head submits;
uint64_t iova;
uint32_t seqno;
@@ -51,9 +51,13 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu 
*gpu, int id,
 static inline void
 OUT_RING(struct msm_ringbuffer *ring, uint32_t data)
 {
-   if (ring->cur == ring->end)
-   ring->cur = ring->start;
-   *(ring->cur++) = data;
+   /*
+* ring->next points to the current command being written - it won't be
+* committed as ring->cur until the flush
+*/
+   if (ring->next == ring->end)
+   ring->next = ring->start;
+   *(ring->next++) = data;
 }
 
 #endif /* __MSM_RINGBUFFER_H__ */
-- 
1.9.1

___
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno


[Freedreno] [PATCH 9/9] drm/msm: dump a rd GPUADDR header for all buffers in the command

2017-10-11 Thread Jordan Crouse
Currently the rd dump avoids any buffers marked as WRITE under the
the assumption that the contents are not interesting.  While it
is true that the contents are uninteresting we should still print
the iova and size for all buffers so that any listening replay
tools can correctly construct the submission.

Print the header for all buffers but only dump the contents for
buffers marked as READ.

Signed-off-by: Jordan Crouse 
---
 drivers/gpu/drm/msm/msm_rd.c | 30 +++---
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c
index 0366b80..4c858d8 100644
--- a/drivers/gpu/drm/msm/msm_rd.c
+++ b/drivers/gpu/drm/msm/msm_rd.c
@@ -268,10 +268,6 @@ static void snapshot_buf(struct msm_rd_state *rd,
struct msm_gem_object *obj = submit->bos[idx].obj;
const char *buf;
 
-   buf = msm_gem_get_vaddr(>base);
-   if (IS_ERR(buf))
-   return;
-
if (iova) {
buf += iova - submit->bos[idx].iova;
} else {
@@ -279,8 +275,21 @@ static void snapshot_buf(struct msm_rd_state *rd,
size = obj->base.size;
}
 
+   /*
+* Always write the GPUADDR header so can get a complete list of all the
+* buffers in the cmd
+*/
rd_write_section(rd, RD_GPUADDR,
(uint32_t[3]){ iova, size, iova >> 32 }, 12);
+
+   /* But only dump the contents of buffers marked READ */
+   if (!(submit->bos[idx].flags & MSM_SUBMIT_BO_READ))
+   return;
+
+   buf = msm_gem_get_vaddr(>base);
+   if (IS_ERR(buf))
+   return;
+
rd_write_section(rd, RD_BUFFER_CONTENTS, buf, size);
 
msm_gem_put_vaddr(>base);
@@ -309,17 +318,8 @@ void msm_rd_dump_submit(struct msm_gem_submit *submit)
 
rd_write_section(rd, RD_CMD, msg, ALIGN(n, 4));
 
-   if (rd_full) {
-   for (i = 0; i < submit->nr_bos; i++) {
-   /* buffers that are written to probably don't start out
-* with anything interesting:
-*/
-   if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE)
-   continue;
-
-   snapshot_buf(rd, submit, i, 0, 0);
-   }
-   }
+   for (i = 0; rd_full && i < submit->nr_bos; i++)
+   snapshot_buf(rd, submit, i, 0, 0);
 
for (i = 0; i < submit->nr_cmds; i++) {
uint64_t iova = submit->cmd[i].iova;
-- 
1.9.1

___
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno


[Freedreno] [PATCH 1/9] drm/msm: Add per-instance submit queues

2017-10-11 Thread Jordan Crouse
Currently the behavior of a command stream is provided by the user
application during submission and the application is expected to internally
maintain the settings for each 'context' or 'rendering queue' and specify
the correct ones.

This works okay for simple cases but as applications become more
complex we will want to set context specific flags and do various
permission checks to allow certain contexts to enable additional
privileges.

Add kernel-side submit queues to be analogous to 'contexts' or
'rendering queues' on the application side. Each file descriptor
instance will maintain its own list of queues. Queues cannot be
shared between file descriptors.

For backwards compatibility context id '0' is defined as a default
context specifying no priority and no special flags. This is
intended to be the usual configuration for 99% of applications so
that a garden variety application can function correctly without
creating a queue. Only those applications requiring the specific
benefit of different queues need create one.

Signed-off-by: Jordan Crouse 
---
 drivers/gpu/drm/msm/Makefile  |   3 +-
 drivers/gpu/drm/msm/msm_drv.c |  56 --
 drivers/gpu/drm/msm/msm_drv.h |  20 +++--
 drivers/gpu/drm/msm/msm_gem.h |   1 +
 drivers/gpu/drm/msm/msm_gem_submit.c  |  14 +++-
 drivers/gpu/drm/msm/msm_gpu.h |  15 
 drivers/gpu/drm/msm/msm_submitqueue.c | 135 ++
 include/uapi/drm/msm_drm.h|  22 ++
 8 files changed, 250 insertions(+), 16 deletions(-)
 create mode 100644 drivers/gpu/drm/msm/msm_submitqueue.c

diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
index 33008fa..3c234e7 100644
--- a/drivers/gpu/drm/msm/Makefile
+++ b/drivers/gpu/drm/msm/Makefile
@@ -57,7 +57,8 @@ msm-y := \
msm_iommu.o \
msm_perf.o \
msm_rd.o \
-   msm_ringbuffer.o
+   msm_ringbuffer.o \
+   msm_submitqueue.o
 
 msm-$(CONFIG_DRM_FBDEV_EMULATION) += msm_fbdev.o
 msm-$(CONFIG_COMMON_CLK) += mdp/mdp4/mdp4_lvds_pll.o
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index dda55b6..cf743c7 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -29,9 +29,12 @@
  * - 1.0.0 - initial interface
  * - 1.1.0 - adds madvise, and support for submits with > 4 cmd buffers
  * - 1.2.0 - adds explicit fence support for submit ioctl
+ * - 1.3.0 - adds GMEM_BASE + NR_RINGS params, SUBMITQUEUE_NEW +
+ *   SUBMITQUEUE_CLOSE ioctls, and MSM_INFO_IOVA flag for
+ *   MSM_GEM_INFO ioctl.
  */
 #define MSM_VERSION_MAJOR  1
-#define MSM_VERSION_MINOR  2
+#define MSM_VERSION_MINOR  3
 #define MSM_VERSION_PATCHLEVEL 0
 
 static void msm_fb_output_poll_changed(struct drm_device *dev)
@@ -514,24 +517,37 @@ static void load_gpu(struct drm_device *dev)
mutex_unlock(_lock);
 }
 
-static int msm_open(struct drm_device *dev, struct drm_file *file)
+static int context_init(struct drm_file *file)
 {
struct msm_file_private *ctx;
 
-   /* For now, load gpu on open.. to avoid the requirement of having
-* firmware in the initrd.
-*/
-   load_gpu(dev);
-
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
 
+   msm_submitqueue_init(ctx);
+
file->driver_priv = ctx;
 
return 0;
 }
 
+static int msm_open(struct drm_device *dev, struct drm_file *file)
+{
+   /* For now, load gpu on open.. to avoid the requirement of having
+* firmware in the initrd.
+*/
+   load_gpu(dev);
+
+   return context_init(file);
+}
+
+static void context_close(struct msm_file_private *ctx)
+{
+   msm_submitqueue_close(ctx);
+   kfree(ctx);
+}
+
 static void msm_postclose(struct drm_device *dev, struct drm_file *file)
 {
struct msm_drm_private *priv = dev->dev_private;
@@ -542,7 +558,7 @@ static void msm_postclose(struct drm_device *dev, struct 
drm_file *file)
priv->lastctx = NULL;
mutex_unlock(>struct_mutex);
 
-   kfree(ctx);
+   context_close(ctx);
 }
 
 static void msm_lastclose(struct drm_device *dev)
@@ -787,6 +803,28 @@ static int msm_ioctl_gem_madvise(struct drm_device *dev, 
void *data,
return ret;
 }
 
+
+static int msm_ioctl_submitqueue_new(struct drm_device *dev, void *data,
+   struct drm_file *file)
+{
+   struct drm_msm_submitqueue *args = data;
+
+   if (args->flags & ~MSM_SUBMITQUEUE_FLAGS)
+   return -EINVAL;
+
+   return msm_submitqueue_create(file->driver_priv, args->prio,
+   args->flags, >id);
+}
+
+
+static int msm_ioctl_submitqueue_close(struct drm_device *dev, void *data,
+   struct drm_file *file)
+{
+   u32 id = *(u32 *) data;
+
+   return msm_submitqueue_remove(file->driver_priv, id);
+}
+
 static const struct drm_ioctl_desc msm_ioctls[] = {

[Freedreno] [PATCH v2 0/9] drm/msm for 4.15

2017-10-11 Thread Jordan Crouse
Here are the refreshed submitqueue/ringbuffer/preemption changes for 4.15.
These are the original changes with bug fixes, improvements and suggestions
squashed in:
 
 - Moved SUBMITQUEUE_CLOSE param to a u32 instead of reusing the struct
 - Changed to use per-ring fence contexts
 - Squashed in Rob's change to the driver version
 - Squshed in fix for preemption race condition

These apply against drm-msm-next-2017-08-22 from ~robclark/linux.

*** BLURB HERE ***

Jordan Crouse (9):
  drm/msm: Add per-instance submit queues
  drm/msm: Move memptrs to msm_gpu
  drm/msm: Support multiple ringbuffers
  drm/msm: Add a parameter query for the number of ringbuffers
  drm/msm: Shadow current pointer in the ring until command is complete
  drm/msm: Make the value of RB_CNTL (almost) generic
  drm/msm: Implement preemption for A5XX targets
  drm/msm: Removed unused struct_mutex_task
  drm/msm: dump a rd GPUADDR header for all buffers in the command

 drivers/gpu/drm/msm/Makefile  |   4 +-
 drivers/gpu/drm/msm/adreno/a3xx_gpu.c |  10 +-
 drivers/gpu/drm/msm/adreno/a4xx_gpu.c |  10 +-
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 222 +++---
 drivers/gpu/drm/msm/adreno/a5xx_gpu.h | 109 ++-
 drivers/gpu/drm/msm/adreno/a5xx_power.c   |   6 +-
 drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 305 ++
 drivers/gpu/drm/msm/adreno/adreno_gpu.c   | 183 +-
 drivers/gpu/drm/msm/adreno/adreno_gpu.h   |  43 ++---
 drivers/gpu/drm/msm/msm_drv.c |  71 ++-
 drivers/gpu/drm/msm/msm_drv.h |  28 +--
 drivers/gpu/drm/msm/msm_fence.c   |   2 +-
 drivers/gpu/drm/msm/msm_fence.h   |   2 +-
 drivers/gpu/drm/msm/msm_gem.h |   5 +-
 drivers/gpu/drm/msm/msm_gem_submit.c  |  26 ++-
 drivers/gpu/drm/msm/msm_gpu.c | 174 -
 drivers/gpu/drm/msm/msm_gpu.h |  51 +++--
 drivers/gpu/drm/msm/msm_rd.c  |  30 +--
 drivers/gpu/drm/msm/msm_ringbuffer.c  |  37 +++-
 drivers/gpu/drm/msm/msm_ringbuffer.h  |  33 +++-
 drivers/gpu/drm/msm/msm_submitqueue.c | 152 +++
 include/uapi/drm/msm_drm.h|  24 +++
 22 files changed, 1249 insertions(+), 278 deletions(-)
 create mode 100644 drivers/gpu/drm/msm/adreno/a5xx_preempt.c
 create mode 100644 drivers/gpu/drm/msm/msm_submitqueue.c

-- 
1.9.1

___
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno


[Freedreno] [PATCH 2/9] drm/msm: Move memptrs to msm_gpu

2017-10-11 Thread Jordan Crouse
When we move to multiple ringbuffers we're going to store the data
in the memptrs on a per-ring basis. In order to prepare for that
move the current memptrs from the adreno namespace into msm_gpu.
This is way cleaner and immediately lets us kill off some sub
functions so there is much less cost later when we do move to
per-ring structs.

Signed-off-by: Jordan Crouse 
---
 drivers/gpu/drm/msm/adreno/a3xx_gpu.c   |  1 -
 drivers/gpu/drm/msm/adreno/a4xx_gpu.c   |  1 -
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c   |  8 ++---
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 52 -
 drivers/gpu/drm/msm/adreno/adreno_gpu.h | 16 --
 drivers/gpu/drm/msm/msm_gpu.c   | 30 +--
 drivers/gpu/drm/msm/msm_gpu.h   | 17 +--
 7 files changed, 57 insertions(+), 68 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
index 7791313..789f7fb 100644
--- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
@@ -444,7 +444,6 @@ static void a3xx_dump(struct msm_gpu *gpu)
.pm_suspend = msm_gpu_pm_suspend,
.pm_resume = msm_gpu_pm_resume,
.recover = a3xx_recover,
-   .last_fence = adreno_last_fence,
.submit = adreno_submit,
.flush = adreno_flush,
.irq = a3xx_irq,
diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
index 58341ef..f87c4312 100644
--- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
@@ -532,7 +532,6 @@ static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t 
*value)
.pm_suspend = a4xx_pm_suspend,
.pm_resume = a4xx_pm_resume,
.recover = a4xx_recover,
-   .last_fence = adreno_last_fence,
.submit = adreno_submit,
.flush = adreno_flush,
.irq = a4xx_irq,
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 17c59d8..32a5b55 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -98,7 +98,6 @@ static int zap_shader_load_mdt(struct device *dev, const char 
*fwname)
 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
struct msm_file_private *ctx)
 {
-   struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct msm_drm_private *priv = gpu->dev->dev_private;
struct msm_ringbuffer *ring = gpu->rb;
unsigned int i, ibs = 0;
@@ -125,8 +124,8 @@ static void a5xx_submit(struct msm_gpu *gpu, struct 
msm_gem_submit *submit,
 
OUT_PKT7(ring, CP_EVENT_WRITE, 4);
OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
-   OUT_RING(ring, lower_32_bits(rbmemptr(adreno_gpu, fence)));
-   OUT_RING(ring, upper_32_bits(rbmemptr(adreno_gpu, fence)));
+   OUT_RING(ring, lower_32_bits(rbmemptr(gpu, fence)));
+   OUT_RING(ring, upper_32_bits(rbmemptr(gpu, fence)));
OUT_RING(ring, submit->fence->seqno);
 
gpu->funcs->flush(gpu);
@@ -804,7 +803,7 @@ static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
struct msm_drm_private *priv = dev->dev_private;
 
dev_err(dev->dev, "gpu fault fence %x status %8.8X rb %4.4x/%4.4x ib1 
%16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
-   gpu->funcs->last_fence(gpu),
+   gpu->memptrs->fence,
gpu_read(gpu, REG_A5XX_RBBM_STATUS),
gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
@@ -992,7 +991,6 @@ static void a5xx_show(struct msm_gpu *gpu, struct seq_file 
*m)
.pm_suspend = a5xx_pm_suspend,
.pm_resume = a5xx_pm_resume,
.recover = a5xx_recover,
-   .last_fence = adreno_last_fence,
.submit = a5xx_submit,
.flush = adreno_flush,
.irq = a5xx_irq,
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index c8b4ac2..b9238ac 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -82,8 +82,8 @@ int adreno_hw_init(struct msm_gpu *gpu)
gpu->rb->cur = gpu->rb->start;
 
/* reset completed fence seqno: */
-   adreno_gpu->memptrs->fence = gpu->fctx->completed_fence;
-   adreno_gpu->memptrs->rptr  = 0;
+   gpu->memptrs->fence = gpu->fctx->completed_fence;
+   gpu->memptrs->rptr  = 0;
 
/* Setup REG_CP_RB_CNTL: */
adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_CNTL,
@@ -98,8 +98,7 @@ int adreno_hw_init(struct msm_gpu *gpu)
 
if (!adreno_is_a430(adreno_gpu)) {
adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR,
-   REG_ADRENO_CP_RB_RPTR_ADDR_HI,
-   rbmemptr(adreno_gpu, rptr));
+

[Freedreno] [PATCH 4/9] drm/msm: Add a parameter query for the number of ringbuffers

2017-10-11 Thread Jordan Crouse
In order to manage ringbuffer priority to its fullest userspace
should know how many ringbuffers it has to work with. Add a
parameter to return the number of active rings.

Signed-off-by: Jordan Crouse 
---
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 3 +++
 include/uapi/drm/msm_drm.h  | 1 +
 2 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index fad946946..cf91840 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -57,6 +57,9 @@ int adreno_get_param(struct msm_gpu *gpu, uint32_t param, 
uint64_t *value)
return ret;
}
return -EINVAL;
+   case MSM_PARAM_NR_RINGS:
+   *value = gpu->nr_rings;
+   return 0;
default:
DBG("%s: invalid param: %u", gpu->name, param);
return -EINVAL;
diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index 42fa781..6363d94 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -73,6 +73,7 @@ struct drm_msm_timespec {
 #define MSM_PARAM_MAX_FREQ   0x04
 #define MSM_PARAM_TIMESTAMP  0x05
 #define MSM_PARAM_GMEM_BASE  0x06
+#define MSM_PARAM_NR_RINGS   0x07
 
 struct drm_msm_param {
__u32 pipe;   /* in, MSM_PIPE_x */
-- 
1.9.1

___
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno


[Freedreno] [PATCH] drm/msm: fix return value check in _msm_gem_kernel_new()

2017-10-11 Thread Wei Yongjun
In case of error, the function msm_gem_get_vaddr() returns ERR_PTR()
and never returns NULL. The NULL test in the return value check should
be replaced with IS_ERR().

Fixes: 8223286d62e2 ("drm/msm: Add a helper function for in-kernel
buffer allocations")
Signed-off-by: Wei Yongjun 
---
 drivers/gpu/drm/msm/msm_gem.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index f15821a0..0b338fb 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -1045,10 +1045,10 @@ static void *_msm_gem_kernel_new(struct drm_device 
*dev, uint32_t size,
}
 
vaddr = msm_gem_get_vaddr(obj);
-   if (!vaddr) {
+   if (IS_ERR(vaddr)) {
msm_gem_put_iova(obj, aspace);
drm_gem_object_unreference(obj);
-   return ERR_PTR(-ENOMEM);
+   return ERR_CAST(vaddr);
}
 
if (bo)

___
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno


Re: [Freedreno] [PATCH] drm/msm/mdp5: remove less than 0 comparison for unsigned value

2017-10-11 Thread Rob Clark
On Tue, Oct 10, 2017 at 2:42 PM, Aishwarya Pant  wrote:
> pipe is an unsigned int and less than zero comparison for unsigned
> values is always false.
>
> Detected using the following cocci script:
>
> @@
> unsigned int i;
> @@
> * i < 0
>

Thanks

Reviewed-by: Rob Clark 

> Signed-off-by: Aishwarya Pant 
> ---
>  drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c 
> b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
> index f7c0698fec40..7e829a8d1cb1 100644
> --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
> +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
> @@ -599,7 +599,7 @@ static u32 mdp5_get_vblank_counter(struct drm_device 
> *dev, unsigned int pipe)
> struct drm_crtc *crtc;
> struct drm_encoder *encoder;
>
> -   if (pipe < 0 || pipe >= priv->num_crtcs)
> +   if (pipe >= priv->num_crtcs)
> return 0;
>
> crtc = priv->crtcs[pipe];
> --
> 2.11.0
>
___
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno