[PATCH v3 1/2] media: tc358743: fix connected/active CSI-2 lane reporting

2018-12-05 Thread Philipp Zabel
g_mbus_config was supposed to indicate all supported lane numbers, not
only the number of those currently in active use. Since the TC358743
can dynamically reduce the number of active lanes if the required
bandwidth allows for it, report all lane numbers up to the connected
number of lanes as supported in pdata mode.
In device tree mode, do not report lane count and clock mode at all, as
the receiver driver can determine these from the device tree.

To allow communicating the number of currently active lanes, add a new
bitfield to the v4l2_mbus_config flags. This is a temporary fix, to be
used only until a better solution is found.

Signed-off-by: Philipp Zabel 
Tested-by: Dave Stevenson 
---
Changes since v2 [1]:
 - Rebased onto media/master

[1] https://patchwork.kernel.org/patch/9964141/
---
 drivers/media/i2c/tc358743.c  | 30 --
 include/media/v4l2-mediabus.h |  9 +
 2 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/drivers/media/i2c/tc358743.c b/drivers/media/i2c/tc358743.c
index 00dc930e049f..b1e1ed4d9e0c 100644
--- a/drivers/media/i2c/tc358743.c
+++ b/drivers/media/i2c/tc358743.c
@@ -1606,28 +1606,29 @@ static int tc358743_g_mbus_config(struct v4l2_subdev 
*sd,
 struct v4l2_mbus_config *cfg)
 {
struct tc358743_state *state = to_state(sd);
+   const u32 mask = V4L2_MBUS_CSI2_LANE_MASK;
+
+   if (state->csi_lanes_in_use > state->bus.num_data_lanes)
+   return -EINVAL;
 
cfg->type = V4L2_MBUS_CSI2_DPHY;
+   cfg->flags = (state->csi_lanes_in_use << __ffs(mask)) & mask;
 
-   /* Support for non-continuous CSI-2 clock is missing in the driver */
-   cfg->flags = V4L2_MBUS_CSI2_CONTINUOUS_CLOCK;
+   /* In DT mode, only report the number of active lanes */
+   if (sd->dev->of_node)
+   return 0;
 
-   switch (state->csi_lanes_in_use) {
-   case 1:
+   /* Support for non-continuous CSI-2 clock is missing in pdata mode */
+   cfg->flags |= V4L2_MBUS_CSI2_CONTINUOUS_CLOCK;
+
+   if (state->bus.num_data_lanes > 0)
cfg->flags |= V4L2_MBUS_CSI2_1_LANE;
-   break;
-   case 2:
+   if (state->bus.num_data_lanes > 1)
cfg->flags |= V4L2_MBUS_CSI2_2_LANE;
-   break;
-   case 3:
+   if (state->bus.num_data_lanes > 2)
cfg->flags |= V4L2_MBUS_CSI2_3_LANE;
-   break;
-   case 4:
+   if (state->bus.num_data_lanes > 3)
cfg->flags |= V4L2_MBUS_CSI2_4_LANE;
-   break;
-   default:
-   return -EINVAL;
-   }
 
return 0;
 }
@@ -2053,6 +2054,7 @@ static int tc358743_probe(struct i2c_client *client,
if (pdata) {
state->pdata = *pdata;
state->bus.flags = V4L2_MBUS_CSI2_CONTINUOUS_CLOCK;
+   state->bus.num_data_lanes = 4;
} else {
err = tc358743_probe_of(state);
if (err == -ENODEV)
diff --git a/include/media/v4l2-mediabus.h b/include/media/v4l2-mediabus.h
index 66cb746ceeb5..e127e3d1740e 100644
--- a/include/media/v4l2-mediabus.h
+++ b/include/media/v4l2-mediabus.h
@@ -71,6 +71,15 @@
 V4L2_MBUS_CSI2_CHANNEL_2 | \
 V4L2_MBUS_CSI2_CHANNEL_3)
 
+/*
+ * Number of lanes in use, 0 == use all available lanes (default)
+ *
+ * This is a temporary fix for devices that need to reduce the number of active
+ * lanes for certain modes, until g_mbus_config() can be replaced with a better
+ * solution.
+ */
+#define V4L2_MBUS_CSI2_LANE_MASK(0xf << 10)
+
 /**
  * enum v4l2_mbus_type - media bus type
  * @V4L2_MBUS_UNKNOWN: unknown bus type, no V4L2 mediabus configuration
-- 
2.19.1



[PATCH v3 2/2] media: imx: ask source subdevice for number of active data lanes

2018-12-05 Thread Philipp Zabel
Temporarily use g_mbus_config() to determine the number of active data
lanes used by the transmitter. If g_mbus_config is not supported or
does not return the number of active lines, default to using all
connected data lines.

Signed-off-by: Philipp Zabel 
Acked-by: Steve Longerbeam 
---
Changes since v2 [1]:
 - Rebased onto media/master

[1] https://patchwork.kernel.org/patch/9964151/
---
 drivers/staging/media/imx/imx6-mipi-csi2.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/media/imx/imx6-mipi-csi2.c 
b/drivers/staging/media/imx/imx6-mipi-csi2.c
index 6a1cee55a49b..ae91f0d138f3 100644
--- a/drivers/staging/media/imx/imx6-mipi-csi2.c
+++ b/drivers/staging/media/imx/imx6-mipi-csi2.c
@@ -135,10 +135,8 @@ static void csi2_enable(struct csi2_dev *csi2, bool enable)
}
 }
 
-static void csi2_set_lanes(struct csi2_dev *csi2)
+static void csi2_set_lanes(struct csi2_dev *csi2, int lanes)
 {
-   int lanes = csi2->bus.num_data_lanes;
-
writel(lanes - 1, csi2->base + CSI2_N_LANES);
 }
 
@@ -301,6 +299,9 @@ static void csi2ipu_gasket_init(struct csi2_dev *csi2)
 
 static int csi2_start(struct csi2_dev *csi2)
 {
+   const u32 mask = V4L2_MBUS_CSI2_LANE_MASK;
+   struct v4l2_mbus_config cfg;
+   int lanes = 0;
int ret;
 
ret = clk_prepare_enable(csi2->pix_clk);
@@ -316,7 +317,10 @@ static int csi2_start(struct csi2_dev *csi2)
goto err_disable_clk;
 
/* Step 4 */
-   csi2_set_lanes(csi2);
+   ret = v4l2_subdev_call(csi2->src_sd, video, g_mbus_config, );
+   if (ret == 0)
+   lanes = (cfg.flags & mask) >> __ffs(mask);
+   csi2_set_lanes(csi2, lanes ?: csi2->bus.num_data_lanes);
csi2_enable(csi2, true);
 
/* Step 5 */
-- 
2.19.1



[PATCH v5] media: imx: add mem2mem device

2018-12-03 Thread Philipp Zabel
Add a single imx-media mem2mem video device that uses the IPU IC PP
(image converter post processing) task for scaling and colorspace
conversion.
On i.MX6Q/DL SoCs with two IPUs currently only the first IPU is used.

The hardware only supports writing to destination buffers up to
1024x1024 pixels in a single pass, arbitrary sizes can be achieved
by rendering multiple tiles per frame.

Signed-off-by: Philipp Zabel 
[slongerb...@gmail.com: use ipu_image_convert_adjust(), fix
 device_run() error handling]
Signed-off-by: Steve Longerbeam 
---
Changes since v4:
 - No functional changes.
 - Dropped deprecated TODO comment. This driver has no interaction with
   the IC task v4l2 subdevices.
 - Dropped ipu-v3 patches, those are merged independently via imx-drm.
---
 drivers/staging/media/imx/Kconfig |   1 +
 drivers/staging/media/imx/Makefile|   1 +
 drivers/staging/media/imx/imx-media-dev.c |  10 +
 drivers/staging/media/imx/imx-media-mem2mem.c | 873 ++
 drivers/staging/media/imx/imx-media.h |  10 +
 5 files changed, 895 insertions(+)
 create mode 100644 drivers/staging/media/imx/imx-media-mem2mem.c

diff --git a/drivers/staging/media/imx/Kconfig 
b/drivers/staging/media/imx/Kconfig
index bfc17de56b17..07013cb3cb66 100644
--- a/drivers/staging/media/imx/Kconfig
+++ b/drivers/staging/media/imx/Kconfig
@@ -6,6 +6,7 @@ config VIDEO_IMX_MEDIA
depends on HAS_DMA
select VIDEOBUF2_DMA_CONTIG
select V4L2_FWNODE
+   select V4L2_MEM2MEM_DEV
---help---
  Say yes here to enable support for video4linux media controller
  driver for the i.MX5/6 SOC.
diff --git a/drivers/staging/media/imx/Makefile 
b/drivers/staging/media/imx/Makefile
index 698a4210316e..f2e722d0fa19 100644
--- a/drivers/staging/media/imx/Makefile
+++ b/drivers/staging/media/imx/Makefile
@@ -6,6 +6,7 @@ imx-media-ic-objs := imx-ic-common.o imx-ic-prp.o 
imx-ic-prpencvf.o
 obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media.o
 obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media-common.o
 obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media-capture.o
+obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media-mem2mem.o
 obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media-vdic.o
 obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media-ic.o
 
diff --git a/drivers/staging/media/imx/imx-media-dev.c 
b/drivers/staging/media/imx/imx-media-dev.c
index 4b344a4a3706..0376b52cb784 100644
--- a/drivers/staging/media/imx/imx-media-dev.c
+++ b/drivers/staging/media/imx/imx-media-dev.c
@@ -318,6 +318,16 @@ static int imx_media_probe_complete(struct 
v4l2_async_notifier *notifier)
goto unlock;
 
ret = v4l2_device_register_subdev_nodes(>v4l2_dev);
+   if (ret)
+   goto unlock;
+
+   imxmd->m2m_vdev = imx_media_mem2mem_device_init(imxmd);
+   if (IS_ERR(imxmd->m2m_vdev)) {
+   ret = PTR_ERR(imxmd->m2m_vdev);
+   goto unlock;
+   }
+
+   ret = imx_media_mem2mem_device_register(imxmd->m2m_vdev);
 unlock:
mutex_unlock(>mutex);
if (ret)
diff --git a/drivers/staging/media/imx/imx-media-mem2mem.c 
b/drivers/staging/media/imx/imx-media-mem2mem.c
new file mode 100644
index ..a2a4dca017ce
--- /dev/null
+++ b/drivers/staging/media/imx/imx-media-mem2mem.c
@@ -0,0 +1,873 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * i.MX IPUv3 mem2mem Scaler/CSC driver
+ *
+ * Copyright (C) 2011 Pengutronix, Sascha Hauer
+ * Copyright (C) 2018 Pengutronix, Philipp Zabel
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "imx-media.h"
+
+#define fh_to_ctx(__fh)container_of(__fh, struct mem2mem_ctx, fh)
+
+enum {
+   V4L2_M2M_SRC = 0,
+   V4L2_M2M_DST = 1,
+};
+
+struct mem2mem_priv {
+   struct imx_media_video_dev vdev;
+
+   struct v4l2_m2m_dev   *m2m_dev;
+   struct device *dev;
+
+   struct imx_media_dev  *md;
+
+   struct mutex  mutex;   /* mem2mem device mutex */
+
+   atomic_t  num_inst;
+};
+
+#define to_mem2mem_priv(v) container_of(v, struct mem2mem_priv, vdev)
+
+/* Per-queue, driver-specific private data */
+struct mem2mem_q_data {
+   struct v4l2_pix_format  cur_fmt;
+   struct v4l2_rectrect;
+};
+
+struct mem2mem_ctx {
+   struct mem2mem_priv *priv;
+
+   struct v4l2_fh  fh;
+   struct mem2mem_q_data   q_data[2];
+   int error;
+   struct ipu_image_convert_ctx *icc;
+
+   struct v4l2_ctrl_handler ctrl_hdlr;
+   int rotate;
+   bool hflip;
+   bool vflip;
+   enum ipu_rotate_moderot_mode;
+};
+
+static struct mem2mem_q_data *get_q_data(struct mem2mem_ctx *ctx,
+enum v4l2_buf_type type)
+{
+   if (V4L2_TYPE_IS_OUTPUT(type))
+   return >q_data[V4L2_M2M_SRC];
+   else

[PATCH 0/2] Clarify H.264 loop filter offset controls and fix them for coda

2018-11-28 Thread Philipp Zabel
Hi,

the coda driver handles the H.264 loop filter alpha/beta offset controls
incorrectly. When trying to fix them, I noticed that the documentation
is not clear about what these values actually are.

>From the value range of -6 to +6 used in the existing drivers (s5p-mfc,
venus), it looks like they currently correspond directly to the values
stored into the slice headers: slice_alpha_c0_offset_div2 and
slice_beta_offset_div2. These are only half of the actual alpha/beta
filter offsets.

The ITU-T Rec. H.264 (02/2016) states:

  slice_alpha_c0_offset_div2 specifies the offset used in accessing the
  α [...] deblocking filter tables for filtering operations controlled
  by the macroblocks within the slice. From this value, the offset that
  shall be applied when addressing these tables shall be computed as

  FilterOffsetA = slice_alpha_c0_offset_div2 << 1 (7-32)

  The value of slice_alpha_c0_offset_div2 shall be in the range of −6 to
  +6, inclusive. When slice_alpha_c0_offset_div2 is not present in the
  slice header, the value of slice_alpha_c0_offset_div2 shall be inferred
  to be equal to 0.

And the same for slice_beta_offset_div2 / FilterOffsetB.

Do the s5p-mfc and venus drivers use the controls
V4L2_MPEG_VIDEO_H264_LOOP_FILTER_ALPHA and _BETA directly as slice
header fields, and thus their values are to be interpreted as half of
FilterOffsetA/B defined in the H.264 spec, respectively?

regards
Philipp

Philipp Zabel (2):
  media: v4l2: clarify H.264 loop filter offset controls
  media: coda: fix H.264 deblocking filter controls

 .../media/uapi/v4l/extended-controls.rst  |  6 ++
 drivers/media/platform/coda/coda-bit.c| 19 +--
 drivers/media/platform/coda/coda-common.c | 15 +++
 drivers/media/platform/coda/coda.h|  6 +++---
 drivers/media/platform/coda/coda_regs.h   |  2 +-
 5 files changed, 26 insertions(+), 22 deletions(-)

-- 
2.19.1



[PATCH 2/2] media: coda: fix H.264 deblocking filter controls

2018-11-28 Thread Philipp Zabel
Add support for the third loop filter mode
V4L2_MPEG_VIDEO_H264_LOOP_FILTER_MODE_DISABLED_AT_SLICE_BOUNDARY,
and fix V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_ALPHA and
V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_BETA controls.

The filter offset controls are signed values in the -6 to 6 range and
are stored into the slice header fields slice_alpha_c0_offset_div2 and
slice_beta_offset_div2. The actual filter offsets FilterOffsetA/B are
double their value, in range of -12 to 12.

Rename variables to more closely match the nomenclature in the H.264
specification.

Signed-off-by: Philipp Zabel 
---
This is under the assumption that loop filter alpha/beta controls should
store the slice header values in the -6 to 6 range. If they should store
the actual filter offset, the controls should be changed to the range
of -12 to 12, step 2, and the values halved before passing them to the
firmware.
---
 drivers/media/platform/coda/coda-bit.c| 19 +--
 drivers/media/platform/coda/coda-common.c | 15 +++
 drivers/media/platform/coda/coda.h|  6 +++---
 drivers/media/platform/coda/coda_regs.h   |  2 +-
 4 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/drivers/media/platform/coda/coda-bit.c 
b/drivers/media/platform/coda/coda-bit.c
index f2c0aa261c9b..8e0194993a52 100644
--- a/drivers/media/platform/coda/coda-bit.c
+++ b/drivers/media/platform/coda/coda-bit.c
@@ -1002,16 +1002,15 @@ static int coda_start_encoding(struct coda_ctx *ctx)
else
coda_write(dev, CODA_STD_H264,
   CODA_CMD_ENC_SEQ_COD_STD);
-   if (ctx->params.h264_deblk_enabled) {
-   value = ((ctx->params.h264_deblk_alpha &
- CODA_264PARAM_DEBLKFILTEROFFSETALPHA_MASK) <<
-CODA_264PARAM_DEBLKFILTEROFFSETALPHA_OFFSET) |
-   ((ctx->params.h264_deblk_beta &
- CODA_264PARAM_DEBLKFILTEROFFSETBETA_MASK) <<
-CODA_264PARAM_DEBLKFILTEROFFSETBETA_OFFSET);
-   } else {
-   value = 1 << CODA_264PARAM_DISABLEDEBLK_OFFSET;
-   }
+   value = ((ctx->params.h264_disable_deblocking_filter_idc &
+ CODA_264PARAM_DISABLEDEBLK_MASK) <<
+CODA_264PARAM_DISABLEDEBLK_OFFSET) |
+   ((ctx->params.h264_slice_alpha_c0_offset_div2 &
+ CODA_264PARAM_DEBLKFILTEROFFSETALPHA_MASK) <<
+CODA_264PARAM_DEBLKFILTEROFFSETALPHA_OFFSET) |
+   ((ctx->params.h264_slice_beta_offset_div2 &
+ CODA_264PARAM_DEBLKFILTEROFFSETBETA_MASK) <<
+CODA_264PARAM_DEBLKFILTEROFFSETBETA_OFFSET);
coda_write(dev, value, CODA_CMD_ENC_SEQ_264_PARA);
break;
case V4L2_PIX_FMT_JPEG:
diff --git a/drivers/media/platform/coda/coda-common.c 
b/drivers/media/platform/coda/coda-common.c
index a62c47843d2f..7518f01c48f7 100644
--- a/drivers/media/platform/coda/coda-common.c
+++ b/drivers/media/platform/coda/coda-common.c
@@ -1831,14 +1831,13 @@ static int coda_s_ctrl(struct v4l2_ctrl *ctrl)
ctx->params.h264_max_qp = ctrl->val;
break;
case V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_ALPHA:
-   ctx->params.h264_deblk_alpha = ctrl->val;
+   ctx->params.h264_slice_alpha_c0_offset_div2 = ctrl->val;
break;
case V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_BETA:
-   ctx->params.h264_deblk_beta = ctrl->val;
+   ctx->params.h264_slice_beta_offset_div2 = ctrl->val;
break;
case V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_MODE:
-   ctx->params.h264_deblk_enabled = (ctrl->val ==
-   V4L2_MPEG_VIDEO_H264_LOOP_FILTER_MODE_ENABLED);
+   ctx->params.h264_disable_deblocking_filter_idc = ctrl->val;
break;
case V4L2_CID_MPEG_VIDEO_H264_PROFILE:
/* TODO: switch between baseline and constrained baseline */
@@ -1919,13 +1918,13 @@ static void coda_encode_ctrls(struct coda_ctx *ctx)
v4l2_ctrl_new_std(>ctrls, _ctrl_ops,
V4L2_CID_MPEG_VIDEO_H264_MAX_QP, 0, 51, 1, 51);
v4l2_ctrl_new_std(>ctrls, _ctrl_ops,
-   V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_ALPHA, 0, 15, 1, 0);
+   V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_ALPHA, -6, 6, 1, 0);
v4l2_ctrl_new_std(>ctrls, _ctrl_ops,
-   V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_BETA, 0, 15, 1, 0);
+   V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_BETA, -6, 6, 1, 0);
v4l2_ctrl_new_std_menu(>ctrls, _ctrl_ops

[PATCH 1/2] media: v4l2: clarify H.264 loop filter offset controls

2018-11-28 Thread Philipp Zabel
The venus and s5p-mfc drivers add the loop filter alpha/beta offset
controls V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_ALPHA/BETA with a range of
-6 to +6, inclusive. This is exactly the range specified for the slice
header fields slice_alpha_c0_offset_div2 and slice_beta_offset_div2,
which store half the actual filter offsets FilterOffsetA/B.

Clarify that this control contains the halved offsets.

Signed-off-by: Philipp Zabel 
---
I assume that the venus and s5p-mfc drivers use the loop filter control
values directly as halved filter offsets, because of the ranges. If this
is not the case, the documentation should be changed to clarify that the
control values correspond to FilterOffsetA/B directly, instead.
---
 Documentation/media/uapi/v4l/extended-controls.rst | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/Documentation/media/uapi/v4l/extended-controls.rst 
b/Documentation/media/uapi/v4l/extended-controls.rst
index 65a1d873196b..8dff21391c1f 100644
--- a/Documentation/media/uapi/v4l/extended-controls.rst
+++ b/Documentation/media/uapi/v4l/extended-controls.rst
@@ -1110,10 +1110,16 @@ enum v4l2_mpeg_video_h264_loop_filter_mode -
 
 ``V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_ALPHA (integer)``
 Loop filter alpha coefficient, defined in the H264 standard.
+This value corresponds to the slice_alpha_c0_offset_div2 slice header
+field, and should be in the range of -6 to +6, inclusive. The actual alpha
+offset FilterOffsetA is twice this value.
 Applicable to the H264 encoder.
 
 ``V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_BETA (integer)``
 Loop filter beta coefficient, defined in the H264 standard.
+This corresponds to the slice_beta_offset_div2 slice header field, and
+should be in the range of -6 to +6, inclusive. The actual beta offset
+FilterOffsetB is twice this value.
 Applicable to the H264 encoder.
 
 .. _v4l2-mpeg-video-h264-entropy-mode:
-- 
2.19.1



Re: [PATCH v3 6/6] media: mt9m111: allow to setup pixclk polarity

2018-11-27 Thread Philipp Zabel
Hi Sakari,

On Tue, 2018-11-27 at 15:50 +0200, Sakari Ailus wrote:
> Hi Philipp,
> 
> On Tue, Nov 27, 2018 at 02:39:27PM +0100, Philipp Zabel wrote:
> > Hi Sakari,
> > 
> > On Tue, 2018-11-27 at 15:19 +0200, Sakari Ailus wrote:
> > > Hi Marco,
> > > 
> > > On Tue, Nov 27, 2018 at 11:02:53AM +0100, Marco Felsch wrote:
> > > > From: Enrico Scholz 
> > > > 
> > > > The chip can be configured to output data transitions on the
> > > > rising or falling edge of PIXCLK (Datasheet R58:1[9]), default is on the
> > > > falling edge.
> > > > 
> > > > Parsing the fw-node is made in a subfunction to bundle all (future)
> > > > dt-parsing / fw-parsing stuff.
> > > > 
> > > > Signed-off-by: Enrico Scholz 
> > > > (m.grzesc...@pengutronix.de: Fix inverting clock. INV_PIX_CLOCK bit is 
> > > > set
> > > > per default. Set bit to 0 (enable mask bit without value) to enable
> > > > falling edge sampling.)
> > > > Signed-off-by: Michael Grzeschik 
> > > > (m.fel...@pengutronix.de: use fwnode helpers)
> > > > (m.fel...@pengutronix.de: mv fw parsing into own function)
> > > > (m.fel...@pengutronix.de: adapt commit msg)
> > > > Signed-off-by: Marco Felsch 
> > > 
> > > Applied with the following diff:
> > > 
> > > diff --git a/drivers/media/i2c/mt9m111.c b/drivers/media/i2c/mt9m111.c
> > > index 2ef332b9b914..b6011bfddde8 100644
> > > --- a/drivers/media/i2c/mt9m111.c
> > > +++ b/drivers/media/i2c/mt9m111.c
> > > @@ -1172,24 +1172,24 @@ static int mt9m111_video_probe(struct i2c_client 
> > > *client)
> > >  
> > >  static int mt9m111_probe_fw(struct i2c_client *client, struct mt9m111 
> > > *mt9m111)
> > >  {
> > > - struct v4l2_fwnode_endpoint *bus_cfg;
> > > + struct v4l2_fwnode_endpoint bus_cfg = {
> > > + .bus_type = V4L2_MBUS_PARALLEL
> > > + };
> > >   struct fwnode_handle *np;
> > > - int ret = 0;
> > > + int ret;
> > >  
> > >   np = fwnode_graph_get_next_endpoint(dev_fwnode(>dev), NULL);
> > >   if (!np)
> > >   return -EINVAL;
> > >  
> > > - bus_cfg = v4l2_fwnode_endpoint_alloc_parse(np);
> > > - if (IS_ERR(bus_cfg)) {
> > > - ret = PTR_ERR(bus_cfg);
> > > + ret = v4l2_fwnode_endpoint_alloc_parse(np, _cfg);
> > 
> > Should that be
> > 
> > +   ret = v4l2_fwnode_endpoint_parse(np, _cfg);
> > 
> > intead?
> 
> Could be. I'd expect the driver to need the link frequency at some point
> after which you'd need the variable size properties anyway. But that's not
> the case now.

I don't think the link-frequencies property will be used, this is just a
parallel device. But Marco chose to use _alloc_parse because of what the
v4l2_fwnode_endpoint_parse() documentation says:

/*
 * NOTE: This function does not parse properties the size of which is variable
 * without a low fixed limit. Please use v4l2_fwnode_endpoint_alloc_parse() in  
  
 * new drivers instead.
 */

So maybe we want to use v4l2_fwnode_endpoint_alloc_parse() always. There
is no unnecessary allocation, just a lookup of the non-existing link-
frequencies property.

regards
Philipp


Re: [PATCH v3 6/6] media: mt9m111: allow to setup pixclk polarity

2018-11-27 Thread Philipp Zabel
Hi Sakari,

On Tue, 2018-11-27 at 15:19 +0200, Sakari Ailus wrote:
> Hi Marco,
> 
> On Tue, Nov 27, 2018 at 11:02:53AM +0100, Marco Felsch wrote:
> > From: Enrico Scholz 
> > 
> > The chip can be configured to output data transitions on the
> > rising or falling edge of PIXCLK (Datasheet R58:1[9]), default is on the
> > falling edge.
> > 
> > Parsing the fw-node is made in a subfunction to bundle all (future)
> > dt-parsing / fw-parsing stuff.
> > 
> > Signed-off-by: Enrico Scholz 
> > (m.grzesc...@pengutronix.de: Fix inverting clock. INV_PIX_CLOCK bit is set
> > per default. Set bit to 0 (enable mask bit without value) to enable
> > falling edge sampling.)
> > Signed-off-by: Michael Grzeschik 
> > (m.fel...@pengutronix.de: use fwnode helpers)
> > (m.fel...@pengutronix.de: mv fw parsing into own function)
> > (m.fel...@pengutronix.de: adapt commit msg)
> > Signed-off-by: Marco Felsch 
> 
> Applied with the following diff:
> 
> diff --git a/drivers/media/i2c/mt9m111.c b/drivers/media/i2c/mt9m111.c
> index 2ef332b9b914..b6011bfddde8 100644
> --- a/drivers/media/i2c/mt9m111.c
> +++ b/drivers/media/i2c/mt9m111.c
> @@ -1172,24 +1172,24 @@ static int mt9m111_video_probe(struct i2c_client 
> *client)
>  
>  static int mt9m111_probe_fw(struct i2c_client *client, struct mt9m111 
> *mt9m111)
>  {
> - struct v4l2_fwnode_endpoint *bus_cfg;
> + struct v4l2_fwnode_endpoint bus_cfg = {
> + .bus_type = V4L2_MBUS_PARALLEL
> + };
>   struct fwnode_handle *np;
> - int ret = 0;
> + int ret;
>  
>   np = fwnode_graph_get_next_endpoint(dev_fwnode(>dev), NULL);
>   if (!np)
>   return -EINVAL;
>  
> - bus_cfg = v4l2_fwnode_endpoint_alloc_parse(np);
> - if (IS_ERR(bus_cfg)) {
> - ret = PTR_ERR(bus_cfg);
> + ret = v4l2_fwnode_endpoint_alloc_parse(np, _cfg);

Should that be

+   ret = v4l2_fwnode_endpoint_parse(np, _cfg);

intead?

> + if (ret)
>   goto out_put_fw;
> - }
>  
> - mt9m111->pclk_sample = !!(bus_cfg->bus.parallel.flags &
> + mt9m111->pclk_sample = !!(bus_cfg.bus.parallel.flags &
> V4L2_MBUS_PCLK_SAMPLE_RISING);
>  
> - v4l2_fwnode_endpoint_free(bus_cfg);
> + v4l2_fwnode_endpoint_free(_cfg);
>  
>  out_put_fw:
>   fwnode_handle_put(np);
> 
> Please base on current media tree master on the next time. Thanks.

regards
Philipp


Re: 'bad remote port parent' warnings

2018-11-22 Thread Philipp Zabel
On Tue, 2018-11-20 at 10:10 -0200, Fabio Estevam wrote:
> Hi,
> 
> On a imx6q-wandboard running linux-next 20181120 there the following warnings:
> 
> [4.327794] video-mux 20e.iomuxc-gpr:ipu1_csi0_mux: bad remote
> port parent
> [4.336118] video-mux 20e.iomuxc-gpr:ipu2_csi1_mux: bad remote
> port parent
> 
> Is there anything we should do to prevent this from happening?

There are empty endpoint nodes (without remote-endpoint property)
labeled ipu1_csi[01]_mux_from_parallel_sensor in the i.MX6 device trees
for board DT implementers' convenience. See commit 2539f517acbdc ("ARM:
dts: imx6qdl: Add video multiplexers, mipi_csi, and their connections").

We had a discussion about this issue in February when this caused a
probing error: https://patchwork.kernel.org/patch/10234469/

We could demote the warning to a debug message, make the wording a bit
less misleading (there is no bad remote port parent, there is just no
remote endpoint at all), or we could just accept the error message for
old DTBs and mark these empty endpoint nodes with the /omit-if-no-ref/
keyword to let dtc remove them if they are unused.

regards
Philipp


[PATCH v4] media: vb2: Allow reqbufs(0) with "in use" MMAP buffers

2018-11-15 Thread Philipp Zabel
From: John Sheu 

Videobuf2 presently does not allow VIDIOC_REQBUFS to destroy outstanding
buffers if the queue is of type V4L2_MEMORY_MMAP, and if the buffers are
considered "in use".  This is different behavior than for other memory
types and prevents us from deallocating buffers in following two cases:

1) There are outstanding mmap()ed views on the buffer. However even if
   we put the buffer in reqbufs(0), there will be remaining references,
   due to vma .open/close() adjusting vb2 buffer refcount appropriately.
   This means that the buffer will be in fact freed only when the last
   mmap()ed view is unmapped.

2) Buffer has been exported as a DMABUF. Refcount of the vb2 buffer
   is managed properly by VB2 DMABUF ops, i.e. incremented on DMABUF
   get and decremented on DMABUF release. This means that the buffer
   will be alive until all importers release it.

Considering both cases above, there does not seem to be any need to
prevent reqbufs(0) operation, because buffer lifetime is already
properly managed by both mmap() and DMABUF code paths. Let's remove it
and allow userspace freeing the queue (and potentially allocating a new
one) even though old buffers might be still in processing.

To let userspace know that the kernel now supports orphaning buffers
that are still in use, add a new V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS
to be set by reqbufs and create_bufs.

Signed-off-by: John Sheu 
Reviewed-by: Pawel Osciak 
Reviewed-by: Tomasz Figa 
Signed-off-by: Tomasz Figa 
[p.za...@pengutronix.de: added V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS,
 updated documentation, and added back debug message]
Signed-off-by: Philipp Zabel 
Acked-by: Sakari Ailus 
---
Changes since v3:
 - Rephrased documentation
 - Added debug message
---
 Documentation/media/uapi/v4l/vidioc-reqbufs.rst | 16 +---
 drivers/media/common/videobuf2/videobuf2-core.c |  8 +++-
 drivers/media/common/videobuf2/videobuf2-v4l2.c |  2 +-
 include/uapi/linux/videodev2.h  |  1 +
 4 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/Documentation/media/uapi/v4l/vidioc-reqbufs.rst 
b/Documentation/media/uapi/v4l/vidioc-reqbufs.rst
index d40c60e8..fb1e643fda5f 100644
--- a/Documentation/media/uapi/v4l/vidioc-reqbufs.rst
+++ b/Documentation/media/uapi/v4l/vidioc-reqbufs.rst
@@ -59,9 +59,14 @@ When the I/O method is not supported the ioctl returns an 
``EINVAL`` error
 code.
 
 Applications can call :ref:`VIDIOC_REQBUFS` again to change the number of
-buffers, however this cannot succeed when any buffers are still mapped.
-A ``count`` value of zero frees all buffers, after aborting or finishing
-any DMA in progress, an implicit
+buffers. Note that if any buffers are still mapped or exported via DMABUF,
+then :ref:`VIDIOC_REQBUFS` can only succeed if the
+``V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS`` capability is set. Otherwise
+:ref:`VIDIOC_REQBUFS` will return the ``EBUSY`` error code.
+If ``V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS`` is set, then these buffers are
+orphaned and will be freed when they are unmapped or when the exported DMABUF
+fds are closed. A ``count`` value of zero frees or orphans all buffers, after
+aborting or finishing any DMA in progress, an implicit
 :ref:`VIDIOC_STREAMOFF `.
 
 
@@ -132,6 +137,11 @@ any DMA in progress, an implicit
 * - ``V4L2_BUF_CAP_SUPPORTS_REQUESTS``
   - 0x0008
   - This buffer type supports :ref:`requests `.
+* - ``V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS``
+  - 0x0010
+  - The kernel allows calling :ref:`VIDIOC_REQBUFS` while buffers are still
+mapped or exported via DMABUF. These orphaned buffers will be freed
+when they are unmapped or when the exported DMABUF fds are closed.
 
 Return Value
 
diff --git a/drivers/media/common/videobuf2/videobuf2-core.c 
b/drivers/media/common/videobuf2/videobuf2-core.c
index 975ff5669f72..7329cafc080a 100644
--- a/drivers/media/common/videobuf2/videobuf2-core.c
+++ b/drivers/media/common/videobuf2/videobuf2-core.c
@@ -679,11 +679,9 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory 
memory,
 * are not in use and can be freed.
 */
mutex_lock(>mmap_lock);
-   if (q->memory == VB2_MEMORY_MMAP && __buffers_in_use(q)) {
-   mutex_unlock(>mmap_lock);
-   dprintk(1, "memory in use, cannot free\n");
-   return -EBUSY;
-   }
+   if (debug && q->memory == VB2_MEMORY_MMAP &&
+   __buffers_in_use(q))
+   dprintk(1, "memory in use, orphaning buffers\n");
 
/*
 * Call queue_cancel to clean up any buffers in the
diff --git a/drivers/media/common/videobuf2/videobuf2-v4l2.c 
b/drivers/media/common/videobuf2/videobuf2-v4l2.c
index a17033ab2c22..f02d452ceeb9 100644
--- a/drivers/media/common/videobuf2/vide

[PATCH v2] v4l2-compliance: test orphaned buffer support

2018-11-15 Thread Philipp Zabel
Test that V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS is reported equally for
both MMAP and DMABUF memory types. If supported, try to orphan buffers
by calling reqbufs(0) before unmapping or closing DMABUF fds.

Also close exported DMABUF fds and free buffers in testDmaBuf if
orphaned buffers are not supported.

Signed-off-by: Philipp Zabel 
---
Changes since v1:
 - Rename has_orphaned_bufs to supports_orphaned_bufs
 - Check that capabilities are independent of memory type
 - Check that orphaned buffer support is independent of queue for M2M
 - Check that reqbufs(0) returns -EBUSY without orphaned buffer support
---
 contrib/freebsd/include/linux/videodev2.h   |  1 +
 include/linux/videodev2.h   |  1 +
 utils/common/v4l2-info.cpp  |  1 +
 utils/v4l2-compliance/v4l2-compliance.h |  1 +
 utils/v4l2-compliance/v4l2-test-buffers.cpp | 51 ++---
 5 files changed, 50 insertions(+), 5 deletions(-)

diff --git a/contrib/freebsd/include/linux/videodev2.h 
b/contrib/freebsd/include/linux/videodev2.h
index 9928c00e4b68..33153b53c175 100644
--- a/contrib/freebsd/include/linux/videodev2.h
+++ b/contrib/freebsd/include/linux/videodev2.h
@@ -907,6 +907,7 @@ struct v4l2_requestbuffers {
 #define V4L2_BUF_CAP_SUPPORTS_USERPTR  (1 << 1)
 #define V4L2_BUF_CAP_SUPPORTS_DMABUF   (1 << 2)
 #define V4L2_BUF_CAP_SUPPORTS_REQUESTS (1 << 3)
+#define V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS (1 << 4)
 
 /**
  * struct v4l2_plane - plane info for multi-planar buffers
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 79418cd39480..a39300cacb6a 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -873,6 +873,7 @@ struct v4l2_requestbuffers {
 #define V4L2_BUF_CAP_SUPPORTS_USERPTR  (1 << 1)
 #define V4L2_BUF_CAP_SUPPORTS_DMABUF   (1 << 2)
 #define V4L2_BUF_CAP_SUPPORTS_REQUESTS (1 << 3)
+#define V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS (1 << 4)
 
 /**
  * struct v4l2_plane - plane info for multi-planar buffers
diff --git a/utils/common/v4l2-info.cpp b/utils/common/v4l2-info.cpp
index 258e5446f030..3699c35cb9d6 100644
--- a/utils/common/v4l2-info.cpp
+++ b/utils/common/v4l2-info.cpp
@@ -200,6 +200,7 @@ static const flag_def bufcap_def[] = {
{ V4L2_BUF_CAP_SUPPORTS_USERPTR, "userptr" },
{ V4L2_BUF_CAP_SUPPORTS_DMABUF, "dmabuf" },
{ V4L2_BUF_CAP_SUPPORTS_REQUESTS, "requests" },
+   { V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS, "orphaned-bufs" },
{ 0, NULL }
 };
 
diff --git a/utils/v4l2-compliance/v4l2-compliance.h 
b/utils/v4l2-compliance/v4l2-compliance.h
index def185f17261..02d616f0b47c 100644
--- a/utils/v4l2-compliance/v4l2-compliance.h
+++ b/utils/v4l2-compliance/v4l2-compliance.h
@@ -119,6 +119,7 @@ struct base_node {
__u32 valid_buftypes;
__u32 valid_buftype;
__u32 valid_memorytype;
+   bool supports_orphaned_bufs;
 };
 
 struct node : public base_node, public cv4l_fd {
diff --git a/utils/v4l2-compliance/v4l2-test-buffers.cpp 
b/utils/v4l2-compliance/v4l2-test-buffers.cpp
index a84be0ab799a..42e743fef43b 100644
--- a/utils/v4l2-compliance/v4l2-test-buffers.cpp
+++ b/utils/v4l2-compliance/v4l2-test-buffers.cpp
@@ -400,14 +400,18 @@ int testReqBufs(struct node *node)
mmap_valid = !ret;
if (mmap_valid)
caps = q.g_capabilities();
-   if (caps)
+   if (caps) {
fail_on_test(mmap_valid ^ !!(caps & 
V4L2_BUF_CAP_SUPPORTS_MMAP));
+   if (caps & V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS)
+   node->supports_orphaned_bufs = true;
+   }
 
q.init(i, V4L2_MEMORY_USERPTR);
ret = q.reqbufs(node, 0);
fail_on_test(ret && ret != EINVAL);
userptr_valid = !ret;
fail_on_test(!mmap_valid && userptr_valid);
+   fail_on_test(userptr_valid && (caps != q.g_capabilities()));
if (caps)
fail_on_test(userptr_valid ^ !!(caps & 
V4L2_BUF_CAP_SUPPORTS_USERPTR));
 
@@ -416,6 +420,7 @@ int testReqBufs(struct node *node)
fail_on_test(ret && ret != EINVAL);
dmabuf_valid = !ret;
fail_on_test(!mmap_valid && dmabuf_valid);
+   fail_on_test(dmabuf_valid && (caps != q.g_capabilities()));
if (caps)
fail_on_test(dmabuf_valid ^ !!(caps & 
V4L2_BUF_CAP_SUPPORTS_DMABUF));
 
@@ -754,9 +759,13 @@ static int captureBufs(struct node *node, const cv4l_queue 
,
 
 static int setupM2M(struct node *node, cv4l_queue )
 {
+   __u32 caps;
+
last_m2m_seq.init();
 
fail_on_test(q.reqbufs(node, 2));
+   caps = q.g_capabilities();
+   fail_on_test(node->supports_orphaned_bufs ^ !!(ca

Re: [PATCH v4l-utils] v4l2-compliance: test orphaned buffer support

2018-11-15 Thread Philipp Zabel
On Thu, 2018-11-15 at 11:21 +0100, Hans Verkuil wrote:
> On 11/14/18 15:38, Philipp Zabel wrote:
> > Test that V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS is reported equally for
> > both MMAP and DMABUF memory types. If supported, try to orphan buffers
> > by calling reqbufs(0) before unmapping or closing DMABUF fds.
> > 
> > Also close exported DMABUF fds and free buffers in testDmaBuf if
> > orphaned buffers are not supported.
> > 
> > Signed-off-by: Philipp Zabel 
> > ---
> >  contrib/freebsd/include/linux/videodev2.h   |  1 +
> >  include/linux/videodev2.h   |  1 +
> >  utils/common/v4l2-info.cpp  |  1 +
> >  utils/v4l2-compliance/v4l2-compliance.h |  1 +
> >  utils/v4l2-compliance/v4l2-test-buffers.cpp | 35 +
> >  5 files changed, 33 insertions(+), 6 deletions(-)
> > 
> > diff --git a/contrib/freebsd/include/linux/videodev2.h 
> > b/contrib/freebsd/include/linux/videodev2.h
> > index 9928c00e4b68..33153b53c175 100644
> > --- a/contrib/freebsd/include/linux/videodev2.h
> > +++ b/contrib/freebsd/include/linux/videodev2.h
> > @@ -907,6 +907,7 @@ struct v4l2_requestbuffers {
> >  #define V4L2_BUF_CAP_SUPPORTS_USERPTR  (1 << 1)
> >  #define V4L2_BUF_CAP_SUPPORTS_DMABUF   (1 << 2)
> >  #define V4L2_BUF_CAP_SUPPORTS_REQUESTS (1 << 3)
> > +#define V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS (1 << 4)
> >  
> >  /**
> >   * struct v4l2_plane - plane info for multi-planar buffers
> > diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
> > index 79418cd39480..a39300cacb6a 100644
> > --- a/include/linux/videodev2.h
> > +++ b/include/linux/videodev2.h
> > @@ -873,6 +873,7 @@ struct v4l2_requestbuffers {
> >  #define V4L2_BUF_CAP_SUPPORTS_USERPTR  (1 << 1)
> >  #define V4L2_BUF_CAP_SUPPORTS_DMABUF   (1 << 2)
> >  #define V4L2_BUF_CAP_SUPPORTS_REQUESTS (1 << 3)
> > +#define V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS (1 << 4)
> >  
> >  /**
> >   * struct v4l2_plane - plane info for multi-planar buffers
> > diff --git a/utils/common/v4l2-info.cpp b/utils/common/v4l2-info.cpp
> > index 258e5446f030..3699c35cb9d6 100644
> > --- a/utils/common/v4l2-info.cpp
> > +++ b/utils/common/v4l2-info.cpp
> > @@ -200,6 +200,7 @@ static const flag_def bufcap_def[] = {
> > { V4L2_BUF_CAP_SUPPORTS_USERPTR, "userptr" },
> > { V4L2_BUF_CAP_SUPPORTS_DMABUF, "dmabuf" },
> > { V4L2_BUF_CAP_SUPPORTS_REQUESTS, "requests" },
> > +   { V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS, "orphaned-bufs" },
> > { 0, NULL }
> >  };
> >  
> > diff --git a/utils/v4l2-compliance/v4l2-compliance.h 
> > b/utils/v4l2-compliance/v4l2-compliance.h
> > index def185f17261..88ec260a9bcc 100644
> > --- a/utils/v4l2-compliance/v4l2-compliance.h
> > +++ b/utils/v4l2-compliance/v4l2-compliance.h
> > @@ -119,6 +119,7 @@ struct base_node {
> > __u32 valid_buftypes;
> > __u32 valid_buftype;
> > __u32 valid_memorytype;
> > +   bool has_orphaned_bufs;
> 
> I'd rename that to supports_orphaned_bufs.

Ok.

> >  };
> >  
> >  struct node : public base_node, public cv4l_fd {
> > diff --git a/utils/v4l2-compliance/v4l2-test-buffers.cpp 
> > b/utils/v4l2-compliance/v4l2-test-buffers.cpp
> > index c59a56d9ced7..6174015cb4e7 100644
> > --- a/utils/v4l2-compliance/v4l2-test-buffers.cpp
> > +++ b/utils/v4l2-compliance/v4l2-test-buffers.cpp
> > @@ -400,8 +400,11 @@ int testReqBufs(struct node *node)
> > mmap_valid = !ret;
> > if (mmap_valid)
> > caps = q.g_capabilities();
> > -   if (caps)
> > +   if (caps) {
> > fail_on_test(mmap_valid ^ !!(caps & 
> > V4L2_BUF_CAP_SUPPORTS_MMAP));
> > +   if (caps & V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS)
> > +   node->has_orphaned_bufs = true;
> > +   }
> >  
> > q.init(i, V4L2_MEMORY_USERPTR);
> > ret = q.reqbufs(node, 0);
> > @@ -418,8 +421,11 @@ int testReqBufs(struct node *node)
> > fail_on_test(!mmap_valid && dmabuf_valid);
> > // Note: dmabuf is only supported with vb2, so we can assume a
> > // non-0 caps value if dmabuf is supported.
> > -   if (caps || dmabuf_valid)
> > +   if (caps || dmabuf_valid) {
> > fail_on_test(dmabuf_valid ^ !!(caps & 
> > V4L2_BUF_CAP_S

[PATCH v4l-utils] v4l2-compliance: limit acceptable width/height to 65536 in VIDIOC_SUBDEV_G/S_FMT test

2018-11-14 Thread Philipp Zabel
Fail if the driver returns unrealistically large frame sizes.

Signed-off-by: Philipp Zabel 
---
 utils/v4l2-compliance/v4l2-test-subdevs.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/utils/v4l2-compliance/v4l2-test-subdevs.cpp 
b/utils/v4l2-compliance/v4l2-test-subdevs.cpp
index 031fd6e78c56..29987b310448 100644
--- a/utils/v4l2-compliance/v4l2-test-subdevs.cpp
+++ b/utils/v4l2-compliance/v4l2-test-subdevs.cpp
@@ -308,8 +308,8 @@ int testSubDevFrameInterval(struct node *node, unsigned pad)
 static int checkMBusFrameFmt(struct node *node, struct v4l2_mbus_framefmt )
 {
fail_on_test(check_0(fmt.reserved, sizeof(fmt.reserved)));
-   fail_on_test(fmt.width == 0 || fmt.width == ~0U);
-   fail_on_test(fmt.height == 0 || fmt.height == ~0U);
+   fail_on_test(fmt.width == 0 || fmt.width > 65536);
+   fail_on_test(fmt.height == 0 || fmt.height > 65536);
fail_on_test(fmt.code == 0 || fmt.code == ~0U);
fail_on_test(fmt.field == ~0U);
if (!node->is_passthrough_subdev) {
-- 
2.19.1



[PATCH v3] media: vb2: Allow reqbufs(0) with "in use" MMAP buffers

2018-11-14 Thread Philipp Zabel
From: John Sheu 

Videobuf2 presently does not allow VIDIOC_REQBUFS to destroy outstanding
buffers if the queue is of type V4L2_MEMORY_MMAP, and if the buffers are
considered "in use".  This is different behavior than for other memory
types and prevents us from deallocating buffers in following two cases:

1) There are outstanding mmap()ed views on the buffer. However even if
   we put the buffer in reqbufs(0), there will be remaining references,
   due to vma .open/close() adjusting vb2 buffer refcount appropriately.
   This means that the buffer will be in fact freed only when the last
   mmap()ed view is unmapped.

2) Buffer has been exported as a DMABUF. Refcount of the vb2 buffer
   is managed properly by VB2 DMABUF ops, i.e. incremented on DMABUF
   get and decremented on DMABUF release. This means that the buffer
   will be alive until all importers release it.

Considering both cases above, there does not seem to be any need to
prevent reqbufs(0) operation, because buffer lifetime is already
properly managed by both mmap() and DMABUF code paths. Let's remove it
and allow userspace freeing the queue (and potentially allocating a new
one) even though old buffers might be still in processing.

To let userspace know that the kernel now supports orphaning buffers
that are still in use, add a new V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS
to be set by reqbufs and create_bufs.

Signed-off-by: John Sheu 
Reviewed-by: Pawel Osciak 
Reviewed-by: Tomasz Figa 
Signed-off-by: Tomasz Figa 
[p.za...@pengutronix.de: moved __vb2_queue_cancel out of the mmap_lock
 and added V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS]
Signed-off-by: Philipp Zabel 
Acked-by: Sakari Ailus 
---
Changes since v2:
 - Added documentation for V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS
---
 .../media/uapi/v4l/vidioc-reqbufs.rst | 15 ---
 .../media/common/videobuf2/videobuf2-core.c   | 26 +--
 .../media/common/videobuf2/videobuf2-v4l2.c   |  2 +-
 include/uapi/linux/videodev2.h|  1 +
 4 files changed, 15 insertions(+), 29 deletions(-)

diff --git a/Documentation/media/uapi/v4l/vidioc-reqbufs.rst 
b/Documentation/media/uapi/v4l/vidioc-reqbufs.rst
index d40c60e8..d53006b938ac 100644
--- a/Documentation/media/uapi/v4l/vidioc-reqbufs.rst
+++ b/Documentation/media/uapi/v4l/vidioc-reqbufs.rst
@@ -59,9 +59,12 @@ When the I/O method is not supported the ioctl returns an 
``EINVAL`` error
 code.
 
 Applications can call :ref:`VIDIOC_REQBUFS` again to change the number of
-buffers, however this cannot succeed when any buffers are still mapped.
-A ``count`` value of zero frees all buffers, after aborting or finishing
-any DMA in progress, an implicit
+buffers. Note that if any buffers are still mapped or exported via DMABUF,
+this can only succeed if the ``V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS`` flag
+is set. In that case these buffers are orphaned and will be freed when they
+are unmapped or when the exported DMABUF fds are closed.
+A ``count`` value of zero frees or orphans all buffers, after aborting or
+finishing any DMA in progress, an implicit
 :ref:`VIDIOC_STREAMOFF `.
 
 
@@ -132,6 +135,12 @@ any DMA in progress, an implicit
 * - ``V4L2_BUF_CAP_SUPPORTS_REQUESTS``
   - 0x0008
   - This buffer type supports :ref:`requests `.
+* - ``V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS``
+  - 0x0010
+  - The kernel allows calling :ref:`VIDIOC_REQBUFS` with a ``count`` value
+of zero while buffers are still mapped or exported via DMABUF. These
+orphaned buffers will be freed when they are unmapped or when the
+exported DMABUF fds are closed.
 
 Return Value
 
diff --git a/drivers/media/common/videobuf2/videobuf2-core.c 
b/drivers/media/common/videobuf2/videobuf2-core.c
index 975ff5669f72..608459450c1e 100644
--- a/drivers/media/common/videobuf2/videobuf2-core.c
+++ b/drivers/media/common/videobuf2/videobuf2-core.c
@@ -553,20 +553,6 @@ bool vb2_buffer_in_use(struct vb2_queue *q, struct 
vb2_buffer *vb)
 }
 EXPORT_SYMBOL(vb2_buffer_in_use);
 
-/*
- * __buffers_in_use() - return true if any buffers on the queue are in use and
- * the queue cannot be freed (by the means of REQBUFS(0)) call
- */
-static bool __buffers_in_use(struct vb2_queue *q)
-{
-   unsigned int buffer;
-   for (buffer = 0; buffer < q->num_buffers; ++buffer) {
-   if (vb2_buffer_in_use(q, q->bufs[buffer]))
-   return true;
-   }
-   return false;
-}
-
 void vb2_core_querybuf(struct vb2_queue *q, unsigned int index, void *pb)
 {
call_void_bufop(q, fill_user_buffer, q->bufs[index], pb);
@@ -674,23 +660,13 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory 
memory,
 
if (*count == 0 || q->num_buffers != 0 ||
(q->memory != VB2_MEMORY_UNKNOWN && q->memory != memory)) {
-   /*
-* We already have buffers allocated, so first check if they
-  

Re: [PATCH 07/15] media: coda: don't disable IRQs across buffer meta handling

2018-11-14 Thread Philipp Zabel
On Mon, 2018-11-05 at 16:25 +0100, Philipp Zabel wrote:
> From: Lucas Stach 
> 
> The CODA driver uses threaded IRQs only, so there is nothing happening
> in hardirq context that could interfere with the buffer meta handling.
> 
> Signed-off-by: Lucas Stach 

Signed-off-by: Philipp Zabel 

regards
Philipp


Re: [PATCH 04/15] media: coda: limit queueing into internal bitstream buffer

2018-11-14 Thread Philipp Zabel
Hi,

I forgot to add the proper SoB tag:

On Mon, 2018-11-05 at 16:25 +0100, Philipp Zabel wrote:
> From: Lucas Stach 
> 
> The ringbuffer used to hold the bitstream is very conservatively sized,
> as keyframes can get very large and still need to fit into this buffer.
> This means that the buffer is way oversized for the average stream to
> the extend that it will hold a few hundred frames when the video data
> is compressing well.
> 
> The current strategy of queueing as much bitstream data as possible
> leads to large delays when draining the decoder. In order to keep the
> drain latency to a reasonable bound, try to only queue a full reorder
> window of buffers. We can't always hit this low target for very well
> compressible video data, as we might end up with less than the minimum
> amount of data that needs to be available to the bitstream prefetcher,
> so we must take this into account and allow more buffers to be queued
> in this case.
> 
> Signed-off-by: Lucas Stach 

Signed-off-by: Philipp Zabel 

regards
Philipp


[PATCH v4l-utils] v4l2-compliance: test orphaned buffer support

2018-11-14 Thread Philipp Zabel
Test that V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS is reported equally for
both MMAP and DMABUF memory types. If supported, try to orphan buffers
by calling reqbufs(0) before unmapping or closing DMABUF fds.

Also close exported DMABUF fds and free buffers in testDmaBuf if
orphaned buffers are not supported.

Signed-off-by: Philipp Zabel 
---
 contrib/freebsd/include/linux/videodev2.h   |  1 +
 include/linux/videodev2.h   |  1 +
 utils/common/v4l2-info.cpp  |  1 +
 utils/v4l2-compliance/v4l2-compliance.h |  1 +
 utils/v4l2-compliance/v4l2-test-buffers.cpp | 35 +
 5 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/contrib/freebsd/include/linux/videodev2.h 
b/contrib/freebsd/include/linux/videodev2.h
index 9928c00e4b68..33153b53c175 100644
--- a/contrib/freebsd/include/linux/videodev2.h
+++ b/contrib/freebsd/include/linux/videodev2.h
@@ -907,6 +907,7 @@ struct v4l2_requestbuffers {
 #define V4L2_BUF_CAP_SUPPORTS_USERPTR  (1 << 1)
 #define V4L2_BUF_CAP_SUPPORTS_DMABUF   (1 << 2)
 #define V4L2_BUF_CAP_SUPPORTS_REQUESTS (1 << 3)
+#define V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS (1 << 4)
 
 /**
  * struct v4l2_plane - plane info for multi-planar buffers
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 79418cd39480..a39300cacb6a 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -873,6 +873,7 @@ struct v4l2_requestbuffers {
 #define V4L2_BUF_CAP_SUPPORTS_USERPTR  (1 << 1)
 #define V4L2_BUF_CAP_SUPPORTS_DMABUF   (1 << 2)
 #define V4L2_BUF_CAP_SUPPORTS_REQUESTS (1 << 3)
+#define V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS (1 << 4)
 
 /**
  * struct v4l2_plane - plane info for multi-planar buffers
diff --git a/utils/common/v4l2-info.cpp b/utils/common/v4l2-info.cpp
index 258e5446f030..3699c35cb9d6 100644
--- a/utils/common/v4l2-info.cpp
+++ b/utils/common/v4l2-info.cpp
@@ -200,6 +200,7 @@ static const flag_def bufcap_def[] = {
{ V4L2_BUF_CAP_SUPPORTS_USERPTR, "userptr" },
{ V4L2_BUF_CAP_SUPPORTS_DMABUF, "dmabuf" },
{ V4L2_BUF_CAP_SUPPORTS_REQUESTS, "requests" },
+   { V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS, "orphaned-bufs" },
{ 0, NULL }
 };
 
diff --git a/utils/v4l2-compliance/v4l2-compliance.h 
b/utils/v4l2-compliance/v4l2-compliance.h
index def185f17261..88ec260a9bcc 100644
--- a/utils/v4l2-compliance/v4l2-compliance.h
+++ b/utils/v4l2-compliance/v4l2-compliance.h
@@ -119,6 +119,7 @@ struct base_node {
__u32 valid_buftypes;
__u32 valid_buftype;
__u32 valid_memorytype;
+   bool has_orphaned_bufs;
 };
 
 struct node : public base_node, public cv4l_fd {
diff --git a/utils/v4l2-compliance/v4l2-test-buffers.cpp 
b/utils/v4l2-compliance/v4l2-test-buffers.cpp
index c59a56d9ced7..6174015cb4e7 100644
--- a/utils/v4l2-compliance/v4l2-test-buffers.cpp
+++ b/utils/v4l2-compliance/v4l2-test-buffers.cpp
@@ -400,8 +400,11 @@ int testReqBufs(struct node *node)
mmap_valid = !ret;
if (mmap_valid)
caps = q.g_capabilities();
-   if (caps)
+   if (caps) {
fail_on_test(mmap_valid ^ !!(caps & 
V4L2_BUF_CAP_SUPPORTS_MMAP));
+   if (caps & V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS)
+   node->has_orphaned_bufs = true;
+   }
 
q.init(i, V4L2_MEMORY_USERPTR);
ret = q.reqbufs(node, 0);
@@ -418,8 +421,11 @@ int testReqBufs(struct node *node)
fail_on_test(!mmap_valid && dmabuf_valid);
// Note: dmabuf is only supported with vb2, so we can assume a
// non-0 caps value if dmabuf is supported.
-   if (caps || dmabuf_valid)
+   if (caps || dmabuf_valid) {
fail_on_test(dmabuf_valid ^ !!(caps & 
V4L2_BUF_CAP_SUPPORTS_DMABUF));
+   if (node->has_orphaned_bufs)
+   fail_on_test(userptr_valid ^ !!(caps & 
V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS));
+   }
 
fail_on_test((can_stream && !is_overlay) && !mmap_valid && 
!userptr_valid && !dmabuf_valid);
fail_on_test((!can_stream || is_overlay) && (mmap_valid || 
userptr_valid || dmabuf_valid));
@@ -967,12 +973,22 @@ int testMmap(struct node *node, unsigned frame_count)
fail_on_test(captureBufs(node, q, m2m_q, frame_count, true));
fail_on_test(node->streamoff(q.g_type()));
fail_on_test(node->streamoff(q.g_type()));
-   q.munmap_bufs(node);
-   fail_on_test(q.reqbufs(node, 0));
+   if (node->has_orphaned_bufs) {
+   fail_on_test(q.reqbufs(node, 0));
+   q.munmap_bufs(node);
+  

Re: [PATCH] media: vb2: Allow reqbufs(0) with "in use" MMAP buffers

2018-11-14 Thread Philipp Zabel
Hi Hans,

On Tue, 2018-11-13 at 16:43 +0100, Hans Verkuil wrote:
> Hi Philipp,
> 
> On 11/13/18 16:06, Philipp Zabel wrote:
> > From: John Sheu 
> > 
> > Videobuf2 presently does not allow VIDIOC_REQBUFS to destroy outstanding
> > buffers if the queue is of type V4L2_MEMORY_MMAP, and if the buffers are
> > considered "in use".  This is different behavior than for other memory
> > types and prevents us from deallocating buffers in following two cases:
> > 
> > 1) There are outstanding mmap()ed views on the buffer. However even if
> >we put the buffer in reqbufs(0), there will be remaining references,
> >due to vma .open/close() adjusting vb2 buffer refcount appropriately.
> >This means that the buffer will be in fact freed only when the last
> >mmap()ed view is unmapped.
> > 
> > 2) Buffer has been exported as a DMABUF. Refcount of the vb2 buffer
> >is managed properly by VB2 DMABUF ops, i.e. incremented on DMABUF
> >get and decremented on DMABUF release. This means that the buffer
> >will be alive until all importers release it.
> > 
> > Considering both cases above, there does not seem to be any need to
> > prevent reqbufs(0) operation, because buffer lifetime is already
> > properly managed by both mmap() and DMABUF code paths. Let's remove it
> > and allow userspace freeing the queue (and potentially allocating a new
> > one) even though old buffers might be still in processing.
> > 
> > To let userspace know that the kernel now supports orphaning buffers
> > that are still in use, add a new V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS
> > to be set by reqbufs and create_bufs.
> 
> Looks good, but I have some questions:
> 
> 1) does v4l2-compliance together with vivid (easiest to test) still work?
>I don't think I have a proper test for this in v4l2-compliance, but
>I'm not 100% certain. If it fails with this patch, then please provide
>a fix for v4l2-compliance as well.

I have tested on v4.20-rc2 with 92539d3eda2c ("media: v4l: event: Add
subscription to list before calling "add" operation") and this patch
applied:

$ modprobe vivid no_error_inj=1
vivid-000: V4L2 capture device registered as video15
vivid-000: V4L2 output device registered as video16

$ v4l2-compliance -d 15 -s 1 --expbuf-device 16
v4l2-compliance SHA: 98b4c9f276a18535b5691e5f350f59ffbf5a9aa5, 32 bits
...
Total: 112, Succeeded: 112, Failed: 0, Warnings: 4

The warnings are:
warn: v4l2-test-formats.cpp(1426): doioctl(node, 
VIDIOC_CROPCAP, )
test Cropping: OK
(one per input) and:
warn: v4l2-test-controls.cpp(845): V4L2_CID_DV_RX_POWER_PRESENT 
not found for input 3
test VIDIOC_(UN)SUBSCRIBE_EVENT/DQEVENT: OK

> 2) I would like to see a new test in v4l2-compliance for this: i.e. if
>the capability is set, then check that you can call REQBUFS(0) before
>unmapping all buffers. Ditto with dmabuffers.
>
> I said during the media summit that I wanted to be more strict about
> requiring compliance tests before adding new features, so you're the
> unlucky victim of that :-)

That's fair. The SHA above is actually a lie, I had one patch applied.

regards
Philipp


Re: [PATCH] media: vb2: Allow reqbufs(0) with "in use" MMAP buffers

2018-11-14 Thread Philipp Zabel
Hi Sakari,

On Wed, 2018-11-14 at 00:27 +0200, Sakari Ailus wrote:
[...]
> This lets the user to allocate lots of mmap'ed buffers that are pinned in
> physical memory.

This is already possible without this patch, by closing the fd instead
of calling reqbufs(0).

> Considering that we don't really have a proper mechanism
> to limit that anyway,
> 
> Acked-by: Sakari Ailus 
>
> That said, the patch must be accompanied by the documentation change in
> Documentation/media/uapi/v4l/vidioc-reqbufs.rst .

Oh right, thanks. I'll add V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS to
_v4l2-buf-capabilities in v2.

regards
Philipp


Re: [PATCH] media: vb2: Allow reqbufs(0) with "in use" MMAP buffers

2018-11-13 Thread Philipp Zabel
Sorry, that should have said [PATCH v2].

regards
Philipp


[PATCH] media: vb2: Allow reqbufs(0) with "in use" MMAP buffers

2018-11-13 Thread Philipp Zabel
From: John Sheu 

Videobuf2 presently does not allow VIDIOC_REQBUFS to destroy outstanding
buffers if the queue is of type V4L2_MEMORY_MMAP, and if the buffers are
considered "in use".  This is different behavior than for other memory
types and prevents us from deallocating buffers in following two cases:

1) There are outstanding mmap()ed views on the buffer. However even if
   we put the buffer in reqbufs(0), there will be remaining references,
   due to vma .open/close() adjusting vb2 buffer refcount appropriately.
   This means that the buffer will be in fact freed only when the last
   mmap()ed view is unmapped.

2) Buffer has been exported as a DMABUF. Refcount of the vb2 buffer
   is managed properly by VB2 DMABUF ops, i.e. incremented on DMABUF
   get and decremented on DMABUF release. This means that the buffer
   will be alive until all importers release it.

Considering both cases above, there does not seem to be any need to
prevent reqbufs(0) operation, because buffer lifetime is already
properly managed by both mmap() and DMABUF code paths. Let's remove it
and allow userspace freeing the queue (and potentially allocating a new
one) even though old buffers might be still in processing.

To let userspace know that the kernel now supports orphaning buffers
that are still in use, add a new V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS
to be set by reqbufs and create_bufs.

Signed-off-by: John Sheu 
Reviewed-by: Pawel Osciak 
Reviewed-by: Tomasz Figa 
Signed-off-by: Tomasz Figa 
[p.za...@pengutronix.de: moved __vb2_queue_cancel out of the mmap_lock
 and added V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS]
Signed-off-by: Philipp Zabel 
---
 .../media/common/videobuf2/videobuf2-core.c   | 26 +--
 .../media/common/videobuf2/videobuf2-v4l2.c   |  2 +-
 include/uapi/linux/videodev2.h|  1 +
 3 files changed, 3 insertions(+), 26 deletions(-)

diff --git a/drivers/media/common/videobuf2/videobuf2-core.c 
b/drivers/media/common/videobuf2/videobuf2-core.c
index 975ff5669f72..608459450c1e 100644
--- a/drivers/media/common/videobuf2/videobuf2-core.c
+++ b/drivers/media/common/videobuf2/videobuf2-core.c
@@ -553,20 +553,6 @@ bool vb2_buffer_in_use(struct vb2_queue *q, struct 
vb2_buffer *vb)
 }
 EXPORT_SYMBOL(vb2_buffer_in_use);
 
-/*
- * __buffers_in_use() - return true if any buffers on the queue are in use and
- * the queue cannot be freed (by the means of REQBUFS(0)) call
- */
-static bool __buffers_in_use(struct vb2_queue *q)
-{
-   unsigned int buffer;
-   for (buffer = 0; buffer < q->num_buffers; ++buffer) {
-   if (vb2_buffer_in_use(q, q->bufs[buffer]))
-   return true;
-   }
-   return false;
-}
-
 void vb2_core_querybuf(struct vb2_queue *q, unsigned int index, void *pb)
 {
call_void_bufop(q, fill_user_buffer, q->bufs[index], pb);
@@ -674,23 +660,13 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory 
memory,
 
if (*count == 0 || q->num_buffers != 0 ||
(q->memory != VB2_MEMORY_UNKNOWN && q->memory != memory)) {
-   /*
-* We already have buffers allocated, so first check if they
-* are not in use and can be freed.
-*/
-   mutex_lock(>mmap_lock);
-   if (q->memory == VB2_MEMORY_MMAP && __buffers_in_use(q)) {
-   mutex_unlock(>mmap_lock);
-   dprintk(1, "memory in use, cannot free\n");
-   return -EBUSY;
-   }
-
/*
 * Call queue_cancel to clean up any buffers in the
 * QUEUED state which is possible if buffers were prepared or
 * queued without ever calling STREAMON.
 */
__vb2_queue_cancel(q);
+   mutex_lock(>mmap_lock);
ret = __vb2_queue_free(q, q->num_buffers);
mutex_unlock(>mmap_lock);
if (ret)
diff --git a/drivers/media/common/videobuf2/videobuf2-v4l2.c 
b/drivers/media/common/videobuf2/videobuf2-v4l2.c
index a17033ab2c22..f02d452ceeb9 100644
--- a/drivers/media/common/videobuf2/videobuf2-v4l2.c
+++ b/drivers/media/common/videobuf2/videobuf2-v4l2.c
@@ -624,7 +624,7 @@ EXPORT_SYMBOL(vb2_querybuf);
 
 static void fill_buf_caps(struct vb2_queue *q, u32 *caps)
 {
-   *caps = 0;
+   *caps = V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS;
if (q->io_modes & VB2_MMAP)
*caps |= V4L2_BUF_CAP_SUPPORTS_MMAP;
if (q->io_modes & VB2_USERPTR)
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index c8e8ff810190..2a223835214c 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -879,6 +879,7 @@ struct v4l2_requestbuffers {
 #define V4L2_BUF_CAP_SUPPORTS_USERPTR  (1 << 1)
 #define V4L2_BUF_CAP_SUPPORTS_DMABUF   (1 << 2)

Re: [RFP] Which V4L2 ioctls could be replaced by better versions?

2018-11-12 Thread Philipp Zabel
Hi Tomasz,

On Sun, 2018-11-11 at 12:43 +0900, Tomasz Figa wrote:
> On Sat, Nov 10, 2018 at 6:06 AM Nicolas Dufresne  wrote:
> > 
> > Le jeudi 08 novembre 2018 à 16:45 +0900, Tomasz Figa a écrit :
> > > > In this patch we should consider a way to tell userspace that this has
> > > > been opt in, otherwise existing userspace will have to remain using
> > > > sub-optimal copy based reclaiming in order to ensure that renegotiation
> > > > can work on older kernel tool. At worst someone could probably do trial
> > > > and error (reqbufs(1)/mmap/reqbufs(0)) but on CMA with large buffers
> > > > this introduces extra startup time.
> > > 
> > > Would such REQBUFS dance be really needed? Couldn't one simply try
> > > reqbufs(0) when it's really needed and if it fails then do the copy,
> > > otherwise just proceed normally?
> > 
> > In simple program, maybe, in modularized code, where the consumer of
> > these buffer (the one that is forced to make a copy) does not know the
> > origin of the DMABuf, it's a bit complicated.
> > 
> > In GStreamer as an example, the producer is a plugin called
> > libgstvideo4linux2.so, while the common consumer would be libgstkms.so.
> > They don't know each other. The pipeline would be described as:
> > 
> >   v4l2src ! kmssink
> > 
> > GStreamer does not have an explicit reclaiming mechanism. No one knew
> > about V4L2 restrictions when this was designed, DMABuf didn't exist and
> > GStreamer didn't have OMX support.
> > 
> > What we ended up crafting, as a plaster, is that when upstream element
> > (v4l2src) query a new allocation from downstream (kmssink), we always
> > copy and return any ancient buffers by copying. kmssink holds on a
> > buffer because we can't remove the scannout buffer on the display. This
> > is slow and inefficient, and also totally unneeded if the dmabuf
> > originate from other kernel subsystems (like DRM).
> > 
> > So what I'd like to be able to do, to support this in a more optimal
> > and generic way, is to mark the buffers that needs reclaiming before
> > letting them go. But for that, I would need a flag somewhere to tell me
> > this kernel allow this.
> 
> Okay, got it. Thanks for explaining it.
> 
> > 
> > You got the context, maybe the conclusion is that I should simply do
> > kernel version check, though I'm sure a lot of people will backport
> > this, which means that check won't work so well.
> > 
> > Let me know, I understand adding more API is not fun, but as nothing is
> > ever versionned in the linux-media world, it's really hard to detect
> > and use new behaviour while supporting what everyone currently run on
> > their systems.
> > 
> > I would probably try and find a way to implement your suggestion, and
> > then introduce a flag in the query itself, but I would need to think
> > about it a little more. It's not as simple as it look like
> > unfortunately.
> 
> It sounds like a good fit for a new capability in v4l2_requestbuffers
> and v4l2_create_buffers structs [1]. Perhaps something like
> V4L2_BUF_CAP_SUPPORTS_FREE_AFTER_EXPORT? Hans, what do you think?

Maybe V4L2_BUF_CAP_SUPPORTS_ORPHANS? With this patch, while the buffers
are in use, reqbufs(0) doesn't free them, they are orphaned. Also, this
patch allows reqbufs(0) not only after export, but also while mmapped.

regards
Philipp


Re: [PATCH v5 8/9] media: uvcvideo: Rename uvc_{un,}init_video()

2018-11-09 Thread Philipp Zabel
On Wed, 2018-11-07 at 22:25 +0200, Laurent Pinchart wrote:
> Hi Kieran,
> 
> On Wednesday, 7 November 2018 16:30:46 EET Kieran Bingham wrote:
> > On 06/11/2018 23:13, Laurent Pinchart wrote:
> > > On Tuesday, 6 November 2018 23:27:19 EET Kieran Bingham wrote:
> > > > From: Kieran Bingham 
> > > > 
> > > > We have both uvc_init_video() and uvc_video_init() calls which can be
> > > > quite confusing to determine the process for each. Now that video
> > > > uvc_video_enable() has been renamed to uvc_video_start_streaming(),
> > > > adapt these calls to suit the new flow.
> > > > 
> > > > Rename uvc_init_video() to uvc_video_start() and uvc_uninit_video() to
> > > > uvc_video_stop().
> > > 
> > > I agree that these functions are badly named and should be renamed. We are
> > > however entering the nitpicking territory :-) The two functions do more
> > > that starting and stopping, they also allocate and free URBs and the
> > > associated buffers. It could also be argued that they don't actually
> > > start and stop anything, as beyond URB management, they just queue the
> > > URBs initially and kill them. I thus wonder if we could come up with
> > > better names.
> > 
> > Well the act of killing (poisoning now) the URBs will certainly stop the
> > stream, but I guess submitting the URBs isn't necessarily the key act to
> > starting the stream.
> > 
> > I believe that needs the interface to be set correctly, and the buffers
> > to be available?
> > 
> > Although - I've just double-checked uvc_{video_start,init_video}() and
> > that is indeed what it does?
> > 
> >  - start stats
> >  - Initialise endpoints
> >- Perform allocations
> >  - Submit URBs
> > 
> > Am I missing something? Is there another step that is pivotal to
> > starting the USB packet/urb stream flow after this point ?
> > 
> > 
> > Is it not true that the USB stack will start processing data at
> > submitting URB completion callbacks after the end of uvc_video_start();
> > and will no longer process data at the end of uvc_video_stop() (and thus
> > no more completion callbacks)?
> > 
> >  (That's a real question to verify my interpretation)
> > 
> > To me - these functions feel like the real 'start' and 'stop' components
> > of the data stream - hence my choice in naming.
> 
> The other part of the start operation is committing the streaming parameters 
> (see uvc_video_start_streaming()). For the stop operation it's issuing a 
> SET_INTERFACE or CLEAR_FEATURE(HALT) request (see uvc_video_stop_streaming()).
> 
> > Is your concern that you would like the functions to be more descriptive
> > over their other actions such as? :
> > 
> >   uvc_video_initialise_start()
> >   uvc_video_allocate_init_start()
> > 
> > Or something else? (I don't think those two are good names though)
> 
> Probably something else :-) A possibly equally bad proposal would be 
> uvc_video_start_transfer() and uvc_video_stop_transfer().

I think this is still better than what we have now.

At least it contains "transfer" to make it clear it deals with the
isoc/bulk transfer setup/teardown part of streaming, not actually
starting or stopping the device streaming.

regards
Philipp


Re: [PATCH 3/3] media: imx: lift CSI width alignment restriction

2018-11-09 Thread Philipp Zabel
On Thu, 2018-11-08 at 21:46 -0800, Steve Longerbeam wrote:
> On 11/5/18 7:20 AM, Philipp Zabel wrote:
> > The CSI subdevice shouldn't have to care about IDMAC line start
> > address alignment. With compose rectangle support in the capture
> > driver, it doesn't have to anymore.
> > 
> > Signed-off-by: Philipp Zabel 
> > ---
> >   drivers/staging/media/imx/imx-media-capture.c |  9 -
> >   drivers/staging/media/imx/imx-media-csi.c |  2 +-
> >   drivers/staging/media/imx/imx-media-utils.c   | 15 ---
> >   3 files changed, 17 insertions(+), 9 deletions(-)
> > 
> > diff --git a/drivers/staging/media/imx/imx-media-capture.c 
> > b/drivers/staging/media/imx/imx-media-capture.c
> > index 2d49d9573056..f87d6e8019e5 100644
> > --- a/drivers/staging/media/imx/imx-media-capture.c
> > +++ b/drivers/staging/media/imx/imx-media-capture.c
> > @@ -204,10 +204,9 @@ static int capture_g_fmt_vid_cap(struct file *file, 
> > void *fh,
> >   }
> >   
> >   static int __capture_try_fmt_vid_cap(struct capture_priv *priv,
> > -struct v4l2_subev_format *fmt_src,
> > +struct v4l2_subdev_format *fmt_src,
> >  struct v4l2_format *f)
> >   {
> > -   struct capture_priv *priv = video_drvdata(file);
> > const struct imx_media_pixfmt *cc, *cc_src;
> >   
> > cc_src = imx_media_find_ipu_format(fmt_src->format.code, CS_SEL_ANY);
> > @@ -250,7 +249,7 @@ static int capture_try_fmt_vid_cap(struct file *file, 
> > void *fh,
> > if (ret)
> > return ret;
> >   
> > -   return __capture_try_fmt(priv, _src, f);
> > +   return __capture_try_fmt_vid_cap(priv, _src, f);
> >   }
> >   
> >   static int capture_s_fmt_vid_cap(struct file *file, void *fh,
> > @@ -280,8 +279,8 @@ static int capture_s_fmt_vid_cap(struct file *file, 
> > void *fh,
> >   CS_SEL_ANY, true);
> > priv->vdev.compose.left = 0;
> > priv->vdev.compose.top = 0;
> > -   priv->vdev.compose.width = fmt_src.width;
> > -   priv->vdev.compose.height = fmt_src.height;
> > +   priv->vdev.compose.width = fmt_src.format.width;
> > +   priv->vdev.compose.height = fmt_src.format.height;
> >   
> > return 0;
> >   }
> > diff --git a/drivers/staging/media/imx/imx-media-csi.c 
> > b/drivers/staging/media/imx/imx-media-csi.c
> > index c4523afe7b48..d39682192a67 100644
> > --- a/drivers/staging/media/imx/imx-media-csi.c
> > +++ b/drivers/staging/media/imx/imx-media-csi.c
> > @@ -41,7 +41,7 @@
> >   #define MIN_H   144
> >   #define MAX_W  4096
> >   #define MAX_H  4096
> > -#define W_ALIGN4 /* multiple of 16 pixels */
> > +#define W_ALIGN1 /* multiple of 2 pixels */
> 
> 
> This works for the IDMAC output pad because the channel's cpmem width 
> and stride can be rounded up, but width align at the CSI sink still 
> needs to be 8 pixels when directed to the IC via the CSI_SRC_PAD_DIRECT 
> pad, in order to support the 8x8 block rotator in the IC PRP, and 
> there's no way AFAIK to do the same trick of rounding up width and 
> stride for non-IDMAC direct paths through the IPU.

Actually, this is not necessary at all. csi_try_crop takes care of this
by setting:
crop->width &= ~0x7;
Which is then used to set compose rectangle and source pad formats.

So this should be relaxed as well, if the SRC_DIRECT pad is not enabled.
And further, I think there is no reason to align crop->left to multiples
of 4 pixels?

regards
Philipp


Re: [PATCH 3/3] media: imx: lift CSI width alignment restriction

2018-11-09 Thread Philipp Zabel
Hi Steve,

On Thu, 2018-11-08 at 21:46 -0800, Steve Longerbeam wrote:
> > diff --git a/drivers/staging/media/imx/imx-media-csi.c 
> > b/drivers/staging/media/imx/imx-media-csi.c
> > index c4523afe7b48..d39682192a67 100644
> > --- a/drivers/staging/media/imx/imx-media-csi.c
> > +++ b/drivers/staging/media/imx/imx-media-csi.c
> > @@ -41,7 +41,7 @@
> >   #define MIN_H   144
> >   #define MAX_W  4096
> >   #define MAX_H  4096
> > -#define W_ALIGN4 /* multiple of 16 pixels */
> > +#define W_ALIGN1 /* multiple of 2 pixels */
> 
> This works for the IDMAC output pad because the channel's cpmem width 
> and stride can be rounded up, but width align at the CSI sink still 
> needs to be 8 pixels when directed to the IC via the CSI_SRC_PAD_DIRECT 
> pad, in order to support the 8x8 block rotator in the IC PRP, and 
> there's no way AFAIK to do the same trick of rounding up width andq 
> stride for non-IDMAC direct paths through the IPU.

Can't we just disallow rotation on prp subdevs if sink format is not
aligned to 2^3? Another possibility would be to align sink pad format
width to 2^3 only if the PAD_DIRECT link is enabled.

> Also, the imx-ic-prpencvf.c W_ALIGN_SRC can be relaxed to 2 pixels as
> well.

True, added for v2.

regards
Philipp


Re: [PATCH 2/3] media: imx: set compose rectangle to mbus format

2018-11-09 Thread Philipp Zabel
On Thu, 2018-11-08 at 21:33 -0800, Steve Longerbeam wrote:
> Hi Philipp,
> 
> On 11/5/18 7:20 AM, Philipp Zabel wrote:
> > Prepare for mbus format being smaller than the written rectangle
> > due to burst size.
> > 
> > Signed-off-by: Philipp Zabel 
> > ---
> >   drivers/staging/media/imx/imx-media-capture.c | 55 +--
> >   1 file changed, 38 insertions(+), 17 deletions(-)
> > 
> > diff --git a/drivers/staging/media/imx/imx-media-capture.c 
> > b/drivers/staging/media/imx/imx-media-capture.c
> > index cace8a51aca8..2d49d9573056 100644
> > --- a/drivers/staging/media/imx/imx-media-capture.c
> > +++ b/drivers/staging/media/imx/imx-media-capture.c
> > @@ -203,21 +203,14 @@ static int capture_g_fmt_vid_cap(struct file *file, 
> > void *fh,
> > return 0;
> >   }
> >   
> > -static int capture_try_fmt_vid_cap(struct file *file, void *fh,
> > -  struct v4l2_format *f)
> > +static int __capture_try_fmt_vid_cap(struct capture_priv *priv,
> > +struct v4l2_subev_format *fmt_src,
> 
> 
> typo: struct v4l2_subdev_format *fmt_src,

Fixed, thanks.

[...]
> > 
> > +   return __capture_try_fmt(priv, _src, f);
> 
> 
> typo: return __capture_try_fmt_vid_cap(priv, _src, f);

And thanks. Looks like I've misplaced a fixup! patch.

regards
Philipp


Re: [PATCH 1/3] media: imx: add capture compose rectangle

2018-11-09 Thread Philipp Zabel
Hi Steve,

thank you for the review.

On Thu, 2018-11-08 at 21:33 -0800, Steve Longerbeam wrote:
[...]
> > --- a/drivers/staging/media/imx/imx-media-capture.c
> > +++ b/drivers/staging/media/imx/imx-media-capture.c
> > @@ -262,6 +262,10 @@ static int capture_s_fmt_vid_cap(struct file *file, 
> > void *fh,
> > priv->vdev.fmt.fmt.pix = f->fmt.pix;
> > priv->vdev.cc = imx_media_find_format(f->fmt.pix.pixelformat,
> >   CS_SEL_ANY, true);
> > +   priv->vdev.compose.left = 0;
> > +   priv->vdev.compose.top = 0;
> > +   priv->vdev.compose.width = f->fmt.fmt.pix.width;
> > +   priv->vdev.compose.height = f->fmt.fmt.pix.height;
> 
> this should be:
> 
> priv->vdev.compose.width = fmt_src.format.width;
> priv->vdev.compose.height = fmt_src.format.height;
> 
> (corrected in the next patches but needs to be corrected here).

Thanks for catching this, it should be

+   priv->vdev.compose.width = f->fmt.pix.width;
+   priv->vdev.compose.height = f->fmt.pix.height;

though, fmt_src is only introduced in patch 2.

regards
Philipp


Re: [PATCH 1/3] media: imx: add capture compose rectangle

2018-11-06 Thread Philipp Zabel
Hi Sakari,

On Tue, 2018-11-06 at 16:01 +0200, Sakari Ailus wrote:
[...]
> @@ -290,6 +294,35 @@ static int capture_s_std(struct file *file, void *fh, 
> v4l2_std_id std)
> > return v4l2_subdev_call(priv->src_sd, video, s_std, std);
> >  }
> >  
> > +static int capture_g_selection(struct file *file, void *fh,
> > +  struct v4l2_selection *s)
> > +{
> > +   struct capture_priv *priv = video_drvdata(file);
> > +
> > +   switch (s->target) {
> > +   case V4L2_SEL_TGT_CROP:
> > +   case V4L2_SEL_TGT_CROP_DEFAULT:
> > +   case V4L2_SEL_TGT_CROP_BOUNDS:
> > +   case V4L2_SEL_TGT_NATIVE_SIZE:
> 
> The NATIVE_SIZE is for devices such as sensors. It doesn't make sense here.

Should this be documented in Documentation/media/uapi/v4l/v4l2-
selection-targets.rst ? There it only mentions when to make it
writeable.

> With that removed,
> 
> Acked-by: Sakari Ailus 

Thank you, I'll remove that line.

regards
Philipp


[PATCH v2] media: coda: fix memory corruption in case more than 32 instances are opened

2018-11-06 Thread Philipp Zabel
The ffz() return value is undefined if the instance mask does not
contain any zeros. If it returned 32, the following set_bit would
corrupt the debugfs_root pointer.
Switch to IDA for context index allocation. This also removes the
artificial 32 instance limit for all except CodaDx6.

Signed-off-by: Philipp Zabel 
---
Changes since v1:
 - #include  explicitly where struct ida or ida_*
   functions are used, reported by Ian Arkver
---
 drivers/media/platform/coda/coda-common.c | 26 +--
 drivers/media/platform/coda/coda.h|  3 ++-
 2 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/drivers/media/platform/coda/coda-common.c 
b/drivers/media/platform/coda/coda-common.c
index 2848ea5f464d..547acf80c89d 100644
--- a/drivers/media/platform/coda/coda-common.c
+++ b/drivers/media/platform/coda/coda-common.c
@@ -17,6 +17,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -2099,17 +2100,6 @@ int coda_decoder_queue_init(void *priv, struct vb2_queue 
*src_vq,
return coda_queue_init(priv, dst_vq);
 }
 
-static int coda_next_free_instance(struct coda_dev *dev)
-{
-   int idx = ffz(dev->instance_mask);
-
-   if ((idx < 0) ||
-   (dev->devtype->product == CODA_DX6 && idx > CODADX6_MAX_INSTANCES))
-   return -EBUSY;
-
-   return idx;
-}
-
 /*
  * File operations
  */
@@ -2118,7 +2108,8 @@ static int coda_open(struct file *file)
 {
struct video_device *vdev = video_devdata(file);
struct coda_dev *dev = video_get_drvdata(vdev);
-   struct coda_ctx *ctx = NULL;
+   struct coda_ctx *ctx;
+   unsigned int max = ~0;
char *name;
int ret;
int idx;
@@ -2127,12 +2118,13 @@ static int coda_open(struct file *file)
if (!ctx)
return -ENOMEM;
 
-   idx = coda_next_free_instance(dev);
+   if (dev->devtype->product == CODA_DX6)
+   max = CODADX6_MAX_INSTANCES - 1;
+   idx = ida_alloc_max(>ida, max, GFP_KERNEL);
if (idx < 0) {
ret = idx;
goto err_coda_max;
}
-   set_bit(idx, >instance_mask);
 
name = kasprintf(GFP_KERNEL, "context%d", idx);
if (!name) {
@@ -2241,8 +2233,8 @@ static int coda_open(struct file *file)
 err_pm_get:
v4l2_fh_del(>fh);
v4l2_fh_exit(>fh);
-   clear_bit(ctx->idx, >instance_mask);
 err_coda_name_init:
+   ida_free(>ida, ctx->idx);
 err_coda_max:
kfree(ctx);
return ret;
@@ -2284,7 +2276,7 @@ static int coda_release(struct file *file)
pm_runtime_put_sync(>plat_dev->dev);
v4l2_fh_del(>fh);
v4l2_fh_exit(>fh);
-   clear_bit(ctx->idx, >instance_mask);
+   ida_free(>ida, ctx->idx);
if (ctx->ops->release)
ctx->ops->release(ctx);
debugfs_remove_recursive(ctx->debugfs_entry);
@@ -2745,6 +2737,7 @@ static int coda_probe(struct platform_device *pdev)
 
mutex_init(>dev_mutex);
mutex_init(>coda_mutex);
+   ida_init(>ida);
 
dev->debugfs_root = debugfs_create_dir("coda", NULL);
if (!dev->debugfs_root)
@@ -2832,6 +2825,7 @@ static int coda_remove(struct platform_device *pdev)
coda_free_aux_buf(dev, >tempbuf);
coda_free_aux_buf(dev, >workbuf);
debugfs_remove_recursive(dev->debugfs_root);
+   ida_destroy(>ida);
return 0;
 }
 
diff --git a/drivers/media/platform/coda/coda.h 
b/drivers/media/platform/coda/coda.h
index 19ac0b9dc6eb..680c7035c9d4 100644
--- a/drivers/media/platform/coda/coda.h
+++ b/drivers/media/platform/coda/coda.h
@@ -16,6 +16,7 @@
 #define __CODA_H__
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -95,7 +96,7 @@ struct coda_dev {
struct workqueue_struct *workqueue;
struct v4l2_m2m_dev *m2m_dev;
struct list_headinstances;
-   unsigned long   instance_mask;
+   struct ida  ida;
struct dentry   *debugfs_root;
 };
 
-- 
2.19.1



Re: [PATCH 01/15] media: coda: fix memory corruption in case more than 32 instances are opened

2018-11-06 Thread Philipp Zabel
On Mon, 2018-11-05 at 16:32 +, Ian Arkver wrote:
> Hi Philipp,
> 
> On 05/11/2018 15:24, Philipp Zabel wrote:
> > The ffz() return value is undefined if the instance mask does not
> > contain any zeros. If it returned 32, the following set_bit would
> > corrupt the debugfs_root pointer.
> > Switch to IDA for context index allocation. This also removes the
> > artificial 32 instance limit for all except CodaDx6.
> > 
> > Signed-off-by: Philipp Zabel 
> > ---
> >   drivers/media/platform/coda/coda-common.c | 25 ---
> >   drivers/media/platform/coda/coda.h|  2 +-
> >   2 files changed, 10 insertions(+), 17 deletions(-)
> > 
> > diff --git a/drivers/media/platform/coda/coda-common.c 
> > b/drivers/media/platform/coda/coda-common.c
> > index 2848ea5f464d..cbb59c2f3a82 100644
> > --- a/drivers/media/platform/coda/coda-common.c
> > +++ b/drivers/media/platform/coda/coda-common.c
> > @@ -2099,17 +2099,6 @@ int coda_decoder_queue_init(void *priv, struct 
> > vb2_queue *src_vq,
> > return coda_queue_init(priv, dst_vq);
> >   }
> >   
> > -static int coda_next_free_instance(struct coda_dev *dev)
> > -{
> > -   int idx = ffz(dev->instance_mask);
> > -
> > -   if ((idx < 0) ||
> > -   (dev->devtype->product == CODA_DX6 && idx > CODADX6_MAX_INSTANCES))
> > -   return -EBUSY;
> > -
> > -   return idx;
> > -}
> > -
> >   /*
> >* File operations
> >*/
> > @@ -2118,7 +2107,8 @@ static int coda_open(struct file *file)
> >   {
> > struct video_device *vdev = video_devdata(file);
> > struct coda_dev *dev = video_get_drvdata(vdev);
> > -   struct coda_ctx *ctx = NULL;
> > +   struct coda_ctx *ctx;
> > +   unsigned int max = ~0;
> > char *name;
> > int ret;
> > int idx;
> > @@ -2127,12 +2117,13 @@ static int coda_open(struct file *file)
> > if (!ctx)
> > return -ENOMEM;
> >   
> > -   idx = coda_next_free_instance(dev);
> > +   if (dev->devtype->product == CODA_DX6)
> > +   max = CODADX6_MAX_INSTANCES - 1;
> > +   idx = ida_alloc_max(>ida, max, GFP_KERNEL);
> > if (idx < 0) {
> > ret = idx;
> > goto err_coda_max;
> > }
> > -   set_bit(idx, >instance_mask);
> >   
> > name = kasprintf(GFP_KERNEL, "context%d", idx);
> > if (!name) {
> > @@ -2241,8 +2232,8 @@ static int coda_open(struct file *file)
> >   err_pm_get:
> > v4l2_fh_del(>fh);
> > v4l2_fh_exit(>fh);
> > -   clear_bit(ctx->idx, >instance_mask);
> >   err_coda_name_init:
> > +   ida_free(>ida, ctx->idx);
> >   err_coda_max:
> > kfree(ctx);
> > return ret;
> > @@ -2284,7 +2275,7 @@ static int coda_release(struct file *file)
> > pm_runtime_put_sync(>plat_dev->dev);
> > v4l2_fh_del(>fh);
> > v4l2_fh_exit(>fh);
> > -   clear_bit(ctx->idx, >instance_mask);
> > +   ida_free(>ida, ctx->idx);
> > if (ctx->ops->release)
> > ctx->ops->release(ctx);
> > debugfs_remove_recursive(ctx->debugfs_entry);
> > @@ -2745,6 +2736,7 @@ static int coda_probe(struct platform_device *pdev)
> >   
> > mutex_init(>dev_mutex);
> > mutex_init(>coda_mutex);
> > +   ida_init(>ida);
> >   
> > dev->debugfs_root = debugfs_create_dir("coda", NULL);
> > if (!dev->debugfs_root)
> > @@ -2832,6 +2824,7 @@ static int coda_remove(struct platform_device *pdev)
> > coda_free_aux_buf(dev, >tempbuf);
> > coda_free_aux_buf(dev, >workbuf);
> > debugfs_remove_recursive(dev->debugfs_root);
> > +   ida_destroy(>ida);
> > return 0;
> >   }
> >   
> > diff --git a/drivers/media/platform/coda/coda.h 
> > b/drivers/media/platform/coda/coda.h
> > index 19ac0b9dc6eb..b6cd14ee91ea 100644
> > --- a/drivers/media/platform/coda/coda.h
> > +++ b/drivers/media/platform/coda/coda.h
> 
> Should you add:
> #include 
> to this header?

Yes, thanks. It currently is pulled in indirectly. I'll send a v2 with
the #include added for the first patch.

regards
Philipp


Re: [PATCH v2 3/3] media: imx-pxp: Improve pxp_soft_reset() error message

2018-11-06 Thread Philipp Zabel
On Mon, 2018-11-05 at 18:45 -0200, Fabio Estevam wrote:
> Improve the pxp_soft_reset() error message by moving it to the
> caller function, associating it with a proper device and also
> by displaying the error code.
> 
> Signed-off-by: Fabio Estevam 
> ---
> Changes since v1:
> - Newly introduced in this version
> 
>  drivers/media/platform/imx-pxp.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/media/platform/imx-pxp.c 
> b/drivers/media/platform/imx-pxp.c
> index b3700b8..1b765c9 100644
> --- a/drivers/media/platform/imx-pxp.c
> +++ b/drivers/media/platform/imx-pxp.c
> @@ -1619,10 +1619,8 @@ static int pxp_soft_reset(struct pxp_dev *dev)
>  
>   ret = readl_poll_timeout(dev->mmio + HW_PXP_CTRL, val,
>val & BM_PXP_CTRL_CLKGATE, 0, 100);
> - if (ret < 0) {
> - pr_err("PXP reset timeout\n");
> + if (ret < 0)
>   return ret;
> - }
>  
>   writel(BM_PXP_CTRL_SFTRST, dev->mmio + HW_PXP_CTRL_CLR);
>   writel(BM_PXP_CTRL_CLKGATE, dev->mmio + HW_PXP_CTRL_CLR);
> @@ -1675,8 +1673,10 @@ static int pxp_probe(struct platform_device *pdev)
>   return ret;
>  
>   ret = pxp_soft_reset(dev);
> - if (ret < 0)
> + if (ret < 0) {
> + dev_err(>dev, "PXP reset timeout: %d\n", ret);
>       return ret;
> + }
>  
>   spin_lock_init(>irqlock);

This should be rebased onto the fixed 2/2 or squashed into it,
but otherwise
Reviewed-by: Philipp Zabel 

regards
Philipp


Re: [PATCH v2 2/3] media: imx-pxp: Check for pxp_soft_reset() error

2018-11-06 Thread Philipp Zabel
On Mon, 2018-11-05 at 18:45 -0200, Fabio Estevam wrote:
> pxp_soft_reset() may fail with a timeout, so it is better to propagate
> the error in this case.
> 
> Signed-off-by: Fabio Estevam 
> ---
> Changes since v1:
> - None
> 
>  drivers/media/platform/imx-pxp.c | 12 +---
>  1 file changed, 9 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/media/platform/imx-pxp.c 
> b/drivers/media/platform/imx-pxp.c
> index 27780f1..b3700b8 100644
> --- a/drivers/media/platform/imx-pxp.c
> +++ b/drivers/media/platform/imx-pxp.c
> @@ -1607,7 +1607,7 @@ static const struct v4l2_m2m_ops m2m_ops = {
>   .job_abort  = pxp_job_abort,
>  };
>  
> -static void pxp_soft_reset(struct pxp_dev *dev)
> +static int pxp_soft_reset(struct pxp_dev *dev)
>  {
>   int ret;
>   u32 val;
> @@ -1619,11 +1619,15 @@ static void pxp_soft_reset(struct pxp_dev *dev)
>  
>   ret = readl_poll_timeout(dev->mmio + HW_PXP_CTRL, val,
>val & BM_PXP_CTRL_CLKGATE, 0, 100);
> - if (ret < 0)
> + if (ret < 0) {
>   pr_err("PXP reset timeout\n");
> + return ret;
> + }
>  
>   writel(BM_PXP_CTRL_SFTRST, dev->mmio + HW_PXP_CTRL_CLR);

I'm not sure if we should clear SFTRST again after a timeout. It
probably doesn't matter as something went wrong anyway and the next
probe will try to clear it again.

>   writel(BM_PXP_CTRL_CLKGATE, dev->mmio + HW_PXP_CTRL_CLR);

Clearing CLKGATE if it was not set by the SFTRST in time should have no
effect, so we could do this unconditionally as well.

> +
> + return 0;

So you could just "return ret;" here instead of breaking out above.
I have no preference either way.

>  }
>  
>  static int pxp_probe(struct platform_device *pdev)
> @@ -1670,7 +1674,9 @@ static int pxp_probe(struct platform_device *pdev)
>   if (ret < 0)
>   return ret;
>  
> - pxp_soft_reset(dev);
> + ret = pxp_soft_reset(dev);
> + if (ret < 0)
> + return ret;

This should "goto err_clk;" instead, though. With that changed,

Reviewed-by: Philipp Zabel 

regards
Philipp


Re: [PATCH v2 1/3] media: imx-pxp: Check the return value from clk_prepare_enable()

2018-11-06 Thread Philipp Zabel
Hi Fabio,

thank you for the fixes!

On Mon, 2018-11-05 at 18:45 -0200, Fabio Estevam wrote:
> clk_prepare_enable() may fail, so we should better check its return value
> and propagate it in the case of error.
> 
> Signed-off-by: Fabio Estevam 
> ---
> Changes since v1:
> - Properly enumerate the series
> 
>  drivers/media/platform/imx-pxp.c | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/media/platform/imx-pxp.c 
> b/drivers/media/platform/imx-pxp.c
> index b76cd0e..27780f1 100644
> --- a/drivers/media/platform/imx-pxp.c
> +++ b/drivers/media/platform/imx-pxp.c
> @@ -1666,7 +1666,10 @@ static int pxp_probe(struct platform_device *pdev)
>   return ret;
>   }
>  
> - clk_prepare_enable(dev->clk);
> + ret = clk_prepare_enable(dev->clk);
> + if (ret < 0)
> + return ret;
> +
>   pxp_soft_reset(dev);
>  
>   spin_lock_init(>irqlock);

Reviewed-by: Philipp Zabel 

regards
Philipp


[PATCH 10/15] media: coda: never set infinite timeperframe

2018-11-05 Thread Philipp Zabel
v4l2-compliance complains if G_PARM returns 0 in the denominator.

Signed-off-by: Philipp Zabel 
---
 drivers/media/platform/coda/coda-common.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/media/platform/coda/coda-common.c 
b/drivers/media/platform/coda/coda-common.c
index 32998da39cac..c4d48069606c 100644
--- a/drivers/media/platform/coda/coda-common.c
+++ b/drivers/media/platform/coda/coda-common.c
@@ -1112,10 +1112,10 @@ static void coda_approximate_timeperframe(struct 
v4l2_fract *timeperframe)
return;
}
 
-   /* Upper bound is 65536/1, map everything above to infinity */
+   /* Upper bound is 65536/1 */
if (s.denominator == 0 || s.numerator / s.denominator > 65536) {
-   timeperframe->numerator = 1;
-   timeperframe->denominator = 0;
+   timeperframe->numerator = 65536;
+   timeperframe->denominator = 1;
return;
}
 
-- 
2.19.1



[PATCH 05/15] media: coda: reduce minimum frame size to 48x16 pixels.

2018-11-05 Thread Philipp Zabel
Three macroblocks seem to be the minimum resolution that can be encoded
and decoded by the CODA960 h.264 codec. Picture run commands fail for
smaller resolutions.

Signed-off-by: Philipp Zabel 
---
 drivers/media/platform/coda/coda-common.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/media/platform/coda/coda-common.c 
b/drivers/media/platform/coda/coda-common.c
index c7a274c60ff9..01deb454e60b 100644
--- a/drivers/media/platform/coda/coda-common.c
+++ b/drivers/media/platform/coda/coda-common.c
@@ -50,8 +50,8 @@
 
 #define CODA_ISRAM_SIZE(2048 * 2)
 
-#define MIN_W 176
-#define MIN_H 144
+#define MIN_W 48
+#define MIN_H 16
 
 #define S_ALIGN1 /* multiple of 2 */
 #define W_ALIGN1 /* multiple of 2 */
-- 
2.19.1



[PATCH 04/15] media: coda: limit queueing into internal bitstream buffer

2018-11-05 Thread Philipp Zabel
From: Lucas Stach 

The ringbuffer used to hold the bitstream is very conservatively sized,
as keyframes can get very large and still need to fit into this buffer.
This means that the buffer is way oversized for the average stream to
the extend that it will hold a few hundred frames when the video data
is compressing well.

The current strategy of queueing as much bitstream data as possible
leads to large delays when draining the decoder. In order to keep the
drain latency to a reasonable bound, try to only queue a full reorder
window of buffers. We can't always hit this low target for very well
compressible video data, as we might end up with less than the minimum
amount of data that needs to be available to the bitstream prefetcher,
so we must take this into account and allow more buffers to be queued
in this case.

Signed-off-by: Lucas Stach 
---
 drivers/media/platform/coda/coda-bit.c | 28 ++
 1 file changed, 28 insertions(+)

diff --git a/drivers/media/platform/coda/coda-bit.c 
b/drivers/media/platform/coda/coda-bit.c
index e5ce0bec8ec3..ee9d2a402ccd 100644
--- a/drivers/media/platform/coda/coda-bit.c
+++ b/drivers/media/platform/coda/coda-bit.c
@@ -269,6 +269,23 @@ void coda_fill_bitstream(struct coda_ctx *ctx, struct 
list_head *buffer_list)
ctx->num_metas > 1)
break;
 
+   if (ctx->num_internal_frames &&
+   ctx->num_metas >= ctx->num_internal_frames) {
+   meta = list_first_entry(>buffer_meta_list,
+   struct coda_buffer_meta, list);
+
+   /*
+* If we managed to fill in at least a full reorder
+* window of buffers (num_internal_frames is a
+* conservative estimate for this) and the bitstream
+* prefetcher has at least 2 256 bytes periods beyond
+* the first buffer to fetch, we can safely stop queuing
+* in order to limit the decoder drain latency.
+*/
+   if (coda_bitstream_can_fetch_past(ctx, meta->end))
+   break;
+   }
+
src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
 
/* Drop frames that do not start/end with a SOI/EOI markers */
@@ -2252,6 +2269,17 @@ static void coda_finish_decode(struct coda_ctx *ctx)
 
/* The rotator will copy the current display frame next time */
ctx->display_idx = display_idx;
+
+   /*
+* The current decode run might have brought the bitstream fill level
+* below the size where we can start the next decode run. As userspace
+* might have filled the output queue completely and might thus be
+* blocked, we can't rely on the next qbuf to trigger the bitstream
+* refill. Check if we have data to refill the bitstream now.
+*/
+   mutex_lock(>bitstream_mutex);
+   coda_fill_bitstream(ctx, NULL);
+   mutex_unlock(>bitstream_mutex);
 }
 
 static void coda_decode_timeout(struct coda_ctx *ctx)
-- 
2.19.1



[PATCH 15/15] media: coda: debug output when setting visible size via crop selection

2018-11-05 Thread Philipp Zabel
In addition to the S_FMT debug output, S_SELECTION (SEL_TGT_CROP) is
relevant to determine encoded size. Add debug output for it.

Signed-off-by: Philipp Zabel 
---
 drivers/media/platform/coda/coda-common.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/media/platform/coda/coda-common.c 
b/drivers/media/platform/coda/coda-common.c
index 2a0e0d04c67a..bf4b21b7cdb3 100644
--- a/drivers/media/platform/coda/coda-common.c
+++ b/drivers/media/platform/coda/coda-common.c
@@ -961,6 +961,9 @@ static int coda_s_selection(struct file *file, void *fh,
 
q_data->rect = s->r;
 
+   coda_dbg(1, ctx, "Setting crop rectangle: %dx%d\n",
+s->r.width, s->r.height);
+
return 0;
}
/* else fall through */
-- 
2.19.1



[PATCH 07/15] media: coda: don't disable IRQs across buffer meta handling

2018-11-05 Thread Philipp Zabel
From: Lucas Stach 

The CODA driver uses threaded IRQs only, so there is nothing happening
in hardirq context that could interfere with the buffer meta handling.

Signed-off-by: Lucas Stach 
---
 drivers/media/platform/coda/coda-bit.c| 19 +++
 drivers/media/platform/coda/coda-common.c |  5 ++---
 2 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/drivers/media/platform/coda/coda-bit.c 
b/drivers/media/platform/coda/coda-bit.c
index ee9d2a402ccd..348b17140715 100644
--- a/drivers/media/platform/coda/coda-bit.c
+++ b/drivers/media/platform/coda/coda-bit.c
@@ -253,7 +253,6 @@ void coda_fill_bitstream(struct coda_ctx *ctx, struct 
list_head *buffer_list)
 {
struct vb2_v4l2_buffer *src_buf;
struct coda_buffer_meta *meta;
-   unsigned long flags;
u32 start;
 
if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG)
@@ -332,13 +331,11 @@ void coda_fill_bitstream(struct coda_ctx *ctx, struct 
list_head *buffer_list)
meta->timestamp = src_buf->vb2_buf.timestamp;
meta->start = start;
meta->end = ctx->bitstream_fifo.kfifo.in;
-   spin_lock_irqsave(>buffer_meta_lock,
- flags);
+   spin_lock(>buffer_meta_lock);
list_add_tail(>list,
  >buffer_meta_list);
ctx->num_metas++;
-   spin_unlock_irqrestore(>buffer_meta_lock,
-  flags);
+   spin_unlock(>buffer_meta_lock);
 
trace_coda_bit_queue(ctx, src_buf, meta);
}
@@ -1894,7 +1891,6 @@ static int coda_prepare_decode(struct coda_ctx *ctx)
struct coda_dev *dev = ctx->dev;
struct coda_q_data *q_data_dst;
struct coda_buffer_meta *meta;
-   unsigned long flags;
u32 rot_mode = 0;
u32 reg_addr, reg_stride;
 
@@ -1988,7 +1984,7 @@ static int coda_prepare_decode(struct coda_ctx *ctx)
coda_write(dev, ctx->iram_info.axi_sram_use,
CODA7_REG_BIT_AXI_SRAM_USE);
 
-   spin_lock_irqsave(>buffer_meta_lock, flags);
+   spin_lock(>buffer_meta_lock);
meta = list_first_entry_or_null(>buffer_meta_list,
struct coda_buffer_meta, list);
 
@@ -2007,7 +2003,7 @@ static int coda_prepare_decode(struct coda_ctx *ctx)
kfifo_in(>bitstream_fifo, buf, pad);
}
}
-   spin_unlock_irqrestore(>buffer_meta_lock, flags);
+   spin_unlock(>buffer_meta_lock);
 
coda_kfifo_sync_to_device_full(ctx);
 
@@ -2029,7 +2025,6 @@ static void coda_finish_decode(struct coda_ctx *ctx)
struct vb2_v4l2_buffer *dst_buf;
struct coda_buffer_meta *meta;
unsigned long payload;
-   unsigned long flags;
int width, height;
int decoded_idx;
int display_idx;
@@ -2161,13 +2156,13 @@ static void coda_finish_decode(struct coda_ctx *ctx)
} else {
val = coda_read(dev, CODA_RET_DEC_PIC_FRAME_NUM) - 1;
val -= ctx->sequence_offset;
-   spin_lock_irqsave(>buffer_meta_lock, flags);
+   spin_lock(>buffer_meta_lock);
if (!list_empty(>buffer_meta_list)) {
meta = list_first_entry(>buffer_meta_list,
  struct coda_buffer_meta, list);
list_del(>list);
ctx->num_metas--;
-   spin_unlock_irqrestore(>buffer_meta_lock, flags);
+   spin_unlock(>buffer_meta_lock);
/*
 * Clamp counters to 16 bits for comparison, as the HW
 * counter rolls over at this point for h.264. This
@@ -2184,7 +2179,7 @@ static void coda_finish_decode(struct coda_ctx *ctx)
ctx->frame_metas[decoded_idx] = *meta;
kfree(meta);
} else {
-   spin_unlock_irqrestore(>buffer_meta_lock, flags);
+   spin_unlock(>buffer_meta_lock);
v4l2_err(>v4l2_dev, "empty timestamp list!\n");
memset(>frame_metas[decoded_idx], 0,
   sizeof(struct coda_buffer_meta));
diff --git a/drivers/media/platform/coda/coda-common.c 
b/drivers/media/platform/coda/coda-common.c
index 54b7344231c0..60b866160094 100644
--- a/drivers/media/platform/coda/coda-common.c
+++ b/drivers/media/platform/coda/coda-common.c
@@ -1689,7 +1689,6 @@ static void coda_stop_streaming(struct vb2_queue *q)
struct coda_ctx *ctx = vb2_get_drv_priv(q);
struct coda_dev *dev = 

[PATCH 13/15] media: coda: improve queue busy error message

2018-11-05 Thread Philipp Zabel
Use v4l2_type_names to indicate which of the two queues is busy.

Signed-off-by: Philipp Zabel 
---
 drivers/media/platform/coda/coda-common.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/media/platform/coda/coda-common.c 
b/drivers/media/platform/coda/coda-common.c
index fd9bc19cd79b..b3d73965614a 100644
--- a/drivers/media/platform/coda/coda-common.c
+++ b/drivers/media/platform/coda/coda-common.c
@@ -703,7 +703,8 @@ static int coda_s_fmt(struct coda_ctx *ctx, struct 
v4l2_format *f,
return -EINVAL;
 
if (vb2_is_busy(vq)) {
-   v4l2_err(>dev->v4l2_dev, "%s queue busy\n", __func__);
+   v4l2_err(>dev->v4l2_dev, "%s: %s queue busy: %d\n",
+__func__, v4l2_type_names[f->type], vq->num_buffers);
return -EBUSY;
}
 
-- 
2.19.1



[PATCH 06/15] media: coda: remove unused instances list

2018-11-05 Thread Philipp Zabel
The per-device instance list is unused, remove it.

Signed-off-by: Philipp Zabel 
---
 drivers/media/platform/coda/coda-common.c | 9 -
 drivers/media/platform/coda/coda.h| 2 --
 2 files changed, 11 deletions(-)

diff --git a/drivers/media/platform/coda/coda-common.c 
b/drivers/media/platform/coda/coda-common.c
index 01deb454e60b..54b7344231c0 100644
--- a/drivers/media/platform/coda/coda-common.c
+++ b/drivers/media/platform/coda/coda-common.c
@@ -2214,10 +2214,6 @@ static int coda_open(struct file *file)
INIT_LIST_HEAD(>buffer_meta_list);
spin_lock_init(>buffer_meta_lock);
 
-   mutex_lock(>dev_mutex);
-   list_add(>list, >instances);
-   mutex_unlock(>dev_mutex);
-
v4l2_dbg(1, coda_debug, >v4l2_dev, "Created instance %d (%p)\n",
 ctx->idx, ctx);
 
@@ -2264,10 +2260,6 @@ static int coda_release(struct file *file)
flush_work(>seq_end_work);
}
 
-   mutex_lock(>dev_mutex);
-   list_del(>list);
-   mutex_unlock(>dev_mutex);
-
if (ctx->dev->devtype->product == CODA_DX6)
coda_free_aux_buf(dev, >workbuf);
 
@@ -2672,7 +2664,6 @@ static int coda_probe(struct platform_device *pdev)
return -EINVAL;
 
spin_lock_init(>irqlock);
-   INIT_LIST_HEAD(>instances);
 
dev->plat_dev = pdev;
dev->clk_per = devm_clk_get(>dev, "per");
diff --git a/drivers/media/platform/coda/coda.h 
b/drivers/media/platform/coda/coda.h
index 6cb19f47cbed..aaa90c3d9a16 100644
--- a/drivers/media/platform/coda/coda.h
+++ b/drivers/media/platform/coda/coda.h
@@ -94,7 +94,6 @@ struct coda_dev {
struct mutexcoda_mutex;
struct workqueue_struct *workqueue;
struct v4l2_m2m_dev *m2m_dev;
-   struct list_headinstances;
struct ida  ida;
struct dentry   *debugfs_root;
 };
@@ -192,7 +191,6 @@ struct coda_context_ops {
 struct coda_ctx {
struct coda_dev *dev;
struct mutexbuffer_mutex;
-   struct list_headlist;
struct work_struct  pic_run_work;
struct work_struct  seq_end_work;
struct completion   completion;
-- 
2.19.1



[PATCH 08/15] media: coda: set V4L2_CAP_TIMEPERFRAME flag in coda_s_parm

2018-11-05 Thread Philipp Zabel
The flag is already set in coda_g_parm, but v4l2-compliance complains
about it not being set during S_PARM.

Signed-off-by: Philipp Zabel 
---
 drivers/media/platform/coda/coda-common.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/media/platform/coda/coda-common.c 
b/drivers/media/platform/coda/coda-common.c
index 60b866160094..1ba3301b35de 100644
--- a/drivers/media/platform/coda/coda-common.c
+++ b/drivers/media/platform/coda/coda-common.c
@@ -1135,6 +1135,7 @@ static int coda_s_parm(struct file *file, void *fh, 
struct v4l2_streamparm *a)
if (a->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
return -EINVAL;
 
+   a->parm.output.capability = V4L2_CAP_TIMEPERFRAME;
tpf = >parm.output.timeperframe;
coda_approximate_timeperframe(tpf);
ctx->params.framerate = coda_timeperframe_to_frate(tpf);
-- 
2.19.1



[PATCH 02/15] media: coda: store unmasked fifo position in meta

2018-11-05 Thread Philipp Zabel
Storing the unmasked kfifo->in position as meta->start and ->end allows
to more easily compare a point past meta->end with the current
kfifo->in.

Signed-off-by: Philipp Zabel 
---
 drivers/media/platform/coda/coda-bit.c|  9 +++--
 drivers/media/platform/coda/coda-common.c |  1 -
 drivers/media/platform/coda/coda.h|  4 ++--
 drivers/media/platform/coda/trace.h   | 10 ++
 4 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/drivers/media/platform/coda/coda-bit.c 
b/drivers/media/platform/coda/coda-bit.c
index d26c2d85a009..e5ce0bec8ec3 100644
--- a/drivers/media/platform/coda/coda-bit.c
+++ b/drivers/media/platform/coda/coda-bit.c
@@ -299,8 +299,7 @@ void coda_fill_bitstream(struct coda_ctx *ctx, struct 
list_head *buffer_list)
}
 
/* Buffer start position */
-   start = ctx->bitstream_fifo.kfifo.in &
-   ctx->bitstream_fifo.kfifo.mask;
+   start = ctx->bitstream_fifo.kfifo.in;
 
if (coda_bitstream_try_queue(ctx, src_buf)) {
/*
@@ -315,8 +314,7 @@ void coda_fill_bitstream(struct coda_ctx *ctx, struct 
list_head *buffer_list)
meta->timecode = src_buf->timecode;
meta->timestamp = src_buf->vb2_buf.timestamp;
meta->start = start;
-   meta->end = ctx->bitstream_fifo.kfifo.in &
-   ctx->bitstream_fifo.kfifo.mask;
+   meta->end = ctx->bitstream_fifo.kfifo.in;
spin_lock_irqsave(>buffer_meta_lock,
  flags);
list_add_tail(>list,
@@ -1980,8 +1978,7 @@ static int coda_prepare_decode(struct coda_ctx *ctx)
if (meta && ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG) {
 
/* If this is the last buffer in the bitstream, add padding */
-   if (meta->end == (ctx->bitstream_fifo.kfifo.in &
- ctx->bitstream_fifo.kfifo.mask)) {
+   if (meta->end == ctx->bitstream_fifo.kfifo.in) {
static unsigned char buf[512];
unsigned int pad;
 
diff --git a/drivers/media/platform/coda/coda-common.c 
b/drivers/media/platform/coda/coda-common.c
index cbb59c2f3a82..c53ecc884e15 100644
--- a/drivers/media/platform/coda/coda-common.c
+++ b/drivers/media/platform/coda/coda-common.c
@@ -1297,7 +1297,6 @@ static int coda_job_ready(void *m2m_priv)
return 0;
}
 
-
if (!src_bufs && !stream_end &&
(coda_get_bitstream_payload(ctx) < 512)) {
v4l2_dbg(1, coda_debug, >dev->v4l2_dev,
diff --git a/drivers/media/platform/coda/coda.h 
b/drivers/media/platform/coda/coda.h
index b6cd14ee91ea..00d0fa50bcd1 100644
--- a/drivers/media/platform/coda/coda.h
+++ b/drivers/media/platform/coda/coda.h
@@ -144,8 +144,8 @@ struct coda_buffer_meta {
u32 sequence;
struct v4l2_timecodetimecode;
u64 timestamp;
-   u32 start;
-   u32 end;
+   unsigned intstart;
+   unsigned intend;
 };
 
 /* Per-queue, driver-specific private data */
diff --git a/drivers/media/platform/coda/trace.h 
b/drivers/media/platform/coda/trace.h
index ca671e315ad0..a672bfc4c6ba 100644
--- a/drivers/media/platform/coda/trace.h
+++ b/drivers/media/platform/coda/trace.h
@@ -97,8 +97,8 @@ DECLARE_EVENT_CLASS(coda_buf_meta_class,
TP_fast_assign(
__entry->minor = ctx->fh.vdev->minor;
__entry->index = buf->vb2_buf.index;
-   __entry->start = meta->start;
-   __entry->end = meta->end;
+   __entry->start = meta->start & ctx->bitstream_fifo.kfifo.mask;
+   __entry->end = meta->end & ctx->bitstream_fifo.kfifo.mask;
__entry->ctx = ctx->idx;
),
 
@@ -127,8 +127,10 @@ DECLARE_EVENT_CLASS(coda_meta_class,
 
TP_fast_assign(
__entry->minor = ctx->fh.vdev->minor;
-   __entry->start = meta ? meta->start : 0;
-   __entry->end = meta ? meta->end : 0;
+   __entry->start = meta ? (meta->start &
+ctx->bitstream_fifo.kfifo.mask) : 0;
+   __entry->end = meta ? (meta->end &
+  ctx->bitstream_fifo.kfifo.mask) : 0;
__entry->ctx = ctx->idx;
),
 
-- 
2.19.1



[PATCH 11/15] media: coda: fail S_SELECTION for read-only targets

2018-11-05 Thread Philipp Zabel
v4l2-compose complains if S_SELECTION returns 0 for read-only targets.

Signed-off-by: Philipp Zabel 
---
 drivers/media/platform/coda/coda-common.c | 51 +--
 1 file changed, 29 insertions(+), 22 deletions(-)

diff --git a/drivers/media/platform/coda/coda-common.c 
b/drivers/media/platform/coda/coda-common.c
index c4d48069606c..fd9bc19cd79b 100644
--- a/drivers/media/platform/coda/coda-common.c
+++ b/drivers/media/platform/coda/coda-common.c
@@ -938,32 +938,39 @@ static int coda_s_selection(struct file *file, void *fh,
struct coda_ctx *ctx = fh_to_ctx(fh);
struct coda_q_data *q_data;
 
-   if (ctx->inst_type == CODA_INST_ENCODER &&
-   s->type == V4L2_BUF_TYPE_VIDEO_OUTPUT &&
-   s->target == V4L2_SEL_TGT_CROP) {
-   q_data = get_q_data(ctx, s->type);
-   if (!q_data)
-   return -EINVAL;
-
-   s->r.left = 0;
-   s->r.top = 0;
-   s->r.width = clamp(s->r.width, 2U, q_data->width);
-   s->r.height = clamp(s->r.height, 2U, q_data->height);
+   switch (s->target) {
+   case V4L2_SEL_TGT_CROP:
+   if (ctx->inst_type == CODA_INST_ENCODER &&
+   s->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) {
+   q_data = get_q_data(ctx, s->type);
+   if (!q_data)
+   return -EINVAL;
 
-   if (s->flags & V4L2_SEL_FLAG_LE) {
-   s->r.width = round_up(s->r.width, 2);
-   s->r.height = round_up(s->r.height, 2);
-   } else {
-   s->r.width = round_down(s->r.width, 2);
-   s->r.height = round_down(s->r.height, 2);
-   }
+   s->r.left = 0;
+   s->r.top = 0;
+   s->r.width = clamp(s->r.width, 2U, q_data->width);
+   s->r.height = clamp(s->r.height, 2U, q_data->height);
+
+   if (s->flags & V4L2_SEL_FLAG_LE) {
+   s->r.width = round_up(s->r.width, 2);
+   s->r.height = round_up(s->r.height, 2);
+   } else {
+   s->r.width = round_down(s->r.width, 2);
+   s->r.height = round_down(s->r.height, 2);
+   }
 
-   q_data->rect = s->r;
+   q_data->rect = s->r;
 
-   return 0;
+   return 0;
+   }
+   /* else fall through */
+   case V4L2_SEL_TGT_NATIVE_SIZE:
+   case V4L2_SEL_TGT_COMPOSE:
+   return coda_g_selection(file, fh, s);
+   default:
+   /* v4l2-compliance expects this to fail for read-only targets */
+   return -EINVAL;
}
-
-   return coda_g_selection(file, fh, s);
 }
 
 static int coda_try_encoder_cmd(struct file *file, void *fh,
-- 
2.19.1



[PATCH 12/15] media: coda: print SEQ_INIT error code as hex value

2018-11-05 Thread Philipp Zabel
From: Michael Tretter 

The error code looks much more like a bit field than an error value.
Print it as hex rather than decimal.

Signed-off-by: Michael Tretter 
Signed-off-by: Philipp Zabel 
---
 drivers/media/platform/coda/coda-bit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/platform/coda/coda-bit.c 
b/drivers/media/platform/coda/coda-bit.c
index 348b17140715..53f1a83e72a9 100644
--- a/drivers/media/platform/coda/coda-bit.c
+++ b/drivers/media/platform/coda/coda-bit.c
@@ -1748,7 +1748,7 @@ static int __coda_start_decoding(struct coda_ctx *ctx)
 
if (coda_read(dev, CODA_RET_DEC_SEQ_SUCCESS) == 0) {
v4l2_err(>v4l2_dev,
-   "CODA_COMMAND_SEQ_INIT failed, error code = %d\n",
+   "CODA_COMMAND_SEQ_INIT failed, error code = 0x%x\n",
coda_read(dev, CODA_RET_DEC_SEQ_ERR_REASON));
return -EAGAIN;
}
-- 
2.19.1



[PATCH 14/15] media: coda: normalise debug output

2018-11-05 Thread Philipp Zabel
Consistently add the context index to debug output, which otherwise is
impossible to make sense of when two contexts are running concurrently.
For this purpose, add a convenience macro coda_dbg(). Use the function
name with the coda_ prefix stripped as keyword where applicable, and
consistently use vid-out and vid-cap names for the queues. Add sequence
counters to the decoder job finished message and correctly indicate B
frames. Add a start streaming message to complement the stop streaming
message and a start encoding message to complement the existing start
decoding message.

Signed-off-by: Philipp Zabel 
---
 drivers/media/platform/coda/coda-bit.c| 55 +++-
 drivers/media/platform/coda/coda-common.c | 79 ++-
 drivers/media/platform/coda/coda.h|  7 ++
 3 files changed, 65 insertions(+), 76 deletions(-)

diff --git a/drivers/media/platform/coda/coda-bit.c 
b/drivers/media/platform/coda/coda-bit.c
index 53f1a83e72a9..f2c0aa261c9b 100644
--- a/drivers/media/platform/coda/coda-bit.c
+++ b/drivers/media/platform/coda/coda-bit.c
@@ -725,8 +725,7 @@ static void coda_setup_iram(struct coda_ctx *ctx)
 
 out:
if (!(iram_info->axi_sram_use & CODA7_USE_HOST_IP_ENABLE))
-   v4l2_dbg(1, coda_debug, >dev->v4l2_dev,
-"IRAM smaller than needed\n");
+   coda_dbg(1, ctx, "IRAM smaller than needed\n");
 
if (dev->devtype->product == CODA_HX4 ||
dev->devtype->product == CODA_7541) {
@@ -1213,6 +1212,12 @@ static int coda_start_encoding(struct coda_ctx *ctx)
goto out;
}
 
+   coda_dbg(1, ctx, "start encoding %dx%d %4.4s->%4.4s @ %d/%d Hz\n",
+q_data_src->rect.width, q_data_src->rect.height,
+(char *)>codec->src_fourcc, (char *)_fourcc,
+ctx->params.framerate & 0x,
+(ctx->params.framerate >> 16) + 1);
+
/* Save stream headers */
buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
switch (dst_fourcc) {
@@ -1474,8 +1479,7 @@ static void coda_finish_encode(struct coda_ctx *ctx)
vb2_set_plane_payload(_buf->vb2_buf, 0, wr_ptr - start_ptr);
}
 
-   v4l2_dbg(1, coda_debug, >dev->v4l2_dev, "frame size = %u\n",
-wr_ptr - start_ptr);
+   coda_dbg(1, ctx, "frame size = %u\n", wr_ptr - start_ptr);
 
coda_read(dev, CODA_RET_ENC_PIC_SLICE_NUM);
coda_read(dev, CODA_RET_ENC_PIC_FLAG);
@@ -1504,11 +1508,9 @@ static void coda_finish_encode(struct coda_ctx *ctx)
if (ctx->gopcounter < 0)
ctx->gopcounter = ctx->params.gop_size - 1;
 
-   v4l2_dbg(1, coda_debug, >v4l2_dev,
-   "job finished: encoding frame (%d) (%s)\n",
-   dst_buf->sequence,
-   (dst_buf->flags & V4L2_BUF_FLAG_KEYFRAME) ?
-   "KEYFRAME" : "PFRAME");
+   coda_dbg(1, ctx, "job finished: encoded %c frame (%d)\n",
+(dst_buf->flags & V4L2_BUF_FLAG_KEYFRAME) ? 'I' : 'P',
+dst_buf->sequence);
 }
 
 static void coda_seq_end_work(struct work_struct *work)
@@ -1522,9 +1524,7 @@ static void coda_seq_end_work(struct work_struct *work)
if (ctx->initialized == 0)
goto out;
 
-   v4l2_dbg(1, coda_debug, >v4l2_dev,
-"%d: %s: sent command 'SEQ_END' to coda\n", ctx->idx,
-__func__);
+   coda_dbg(1, ctx, "%s: sent command 'SEQ_END' to coda\n", __func__);
if (coda_command_sync(ctx, CODA_COMMAND_SEQ_END)) {
v4l2_err(>v4l2_dev,
 "CODA_COMMAND_SEQ_END failed\n");
@@ -1667,8 +1667,7 @@ static int __coda_start_decoding(struct coda_ctx *ctx)
u32 val;
int ret;
 
-   v4l2_dbg(1, coda_debug, >v4l2_dev,
-"Video Data Order Adapter: %s\n",
+   coda_dbg(1, ctx, "Video Data Order Adapter: %s\n",
 ctx->use_vdoa ? "Enabled" : "Disabled");
 
/* Start decoding */
@@ -1772,8 +1771,7 @@ static int __coda_start_decoding(struct coda_ctx *ctx)
width = round_up(width, 16);
height = round_up(height, 16);
 
-   v4l2_dbg(1, coda_debug, >v4l2_dev, "%s instance %d now: %dx%d\n",
-__func__, ctx->idx, width, height);
+   coda_dbg(1, ctx, "start decoding: %dx%d\n", width, height);
 
ctx->num_internal_frames = coda_read(dev, CODA_RET_DEC_SEQ_FRAME_NEED);
/*
@@ -1904,8 +1902,7 @@ static int coda_prepare_decode(struct coda_ctx *ctx)
 
if (coda_get_bitstream_payload(ctx) < 512 &&
(!(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG))) {
-   v4l2_dbg(1, c

[PATCH 03/15] media: coda: always hold back decoder jobs until we have enough bitstream payload

2018-11-05 Thread Philipp Zabel
The bitstream prefetch unit reads data in 256 byte blocks with some kind
of queueing. For the decoder to see data up to a desired position in the
next run, the bitstream has to be filled for 2 256 byte blocks past that
position aligned up to the next 256 byte boundary.
This should make sure we never run into a buffer underrun condition if
userspace does not supply new input buffers fast enough.

Signed-off-by: Philipp Zabel 
---
 drivers/media/platform/coda/coda-common.c | 13 -
 drivers/media/platform/coda/coda.h| 12 
 2 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/drivers/media/platform/coda/coda-common.c 
b/drivers/media/platform/coda/coda-common.c
index c53ecc884e15..c7a274c60ff9 100644
--- a/drivers/media/platform/coda/coda-common.c
+++ b/drivers/media/platform/coda/coda-common.c
@@ -1272,6 +1272,7 @@ static int coda_job_ready(void *m2m_priv)
bool stream_end = ctx->bit_stream_param &
  CODA_BIT_STREAM_END_FLAG;
int num_metas = ctx->num_metas;
+   struct coda_buffer_meta *meta;
unsigned int count;
 
count = hweight32(ctx->frm_dis_flg);
@@ -1292,16 +1293,18 @@ static int coda_job_ready(void *m2m_priv)
 
if (!stream_end && (num_metas + src_bufs) < 2) {
v4l2_dbg(1, coda_debug, >dev->v4l2_dev,
-"%d: not ready: need 2 buffers available (%d, 
%d)\n",
+"%d: not ready: need 2 buffers available 
(queue:%d + bitstream:%d)\n",
 ctx->idx, num_metas, src_bufs);
return 0;
}
 
-   if (!src_bufs && !stream_end &&
-   (coda_get_bitstream_payload(ctx) < 512)) {
+   meta = list_first_entry(>buffer_meta_list,
+   struct coda_buffer_meta, list);
+   if (!coda_bitstream_can_fetch_past(ctx, meta->end) &&
+   !stream_end) {
v4l2_dbg(1, coda_debug, >dev->v4l2_dev,
-"%d: not ready: not enough bitstream data 
(%d).\n",
-ctx->idx, coda_get_bitstream_payload(ctx));
+"not ready: not enough bitstream data to read 
past %u (%u)\n",
+meta->end, ctx->bitstream_fifo.kfifo.in);
return 0;
}
}
diff --git a/drivers/media/platform/coda/coda.h 
b/drivers/media/platform/coda/coda.h
index 00d0fa50bcd1..6cb19f47cbed 100644
--- a/drivers/media/platform/coda/coda.h
+++ b/drivers/media/platform/coda/coda.h
@@ -295,6 +295,18 @@ static inline unsigned int 
coda_get_bitstream_payload(struct coda_ctx *ctx)
return kfifo_len(>bitstream_fifo);
 }
 
+/*
+ * The bitstream prefetcher needs to read at least 2 256 byte periods past
+ * the desired bitstream position for all data to reach the decoder.
+ */
+static inline bool coda_bitstream_can_fetch_past(struct coda_ctx *ctx,
+unsigned int pos)
+{
+   return (int)(ctx->bitstream_fifo.kfifo.in - ALIGN(pos, 256)) > 512;
+}
+
+bool coda_bitstream_can_fetch_past(struct coda_ctx *ctx, unsigned int pos);
+
 void coda_bit_stream_end_flag(struct coda_ctx *ctx);
 
 void coda_m2m_buf_done(struct coda_ctx *ctx, struct vb2_v4l2_buffer *buf,
-- 
2.19.1



[PATCH 01/15] media: coda: fix memory corruption in case more than 32 instances are opened

2018-11-05 Thread Philipp Zabel
The ffz() return value is undefined if the instance mask does not
contain any zeros. If it returned 32, the following set_bit would
corrupt the debugfs_root pointer.
Switch to IDA for context index allocation. This also removes the
artificial 32 instance limit for all except CodaDx6.

Signed-off-by: Philipp Zabel 
---
 drivers/media/platform/coda/coda-common.c | 25 ---
 drivers/media/platform/coda/coda.h|  2 +-
 2 files changed, 10 insertions(+), 17 deletions(-)

diff --git a/drivers/media/platform/coda/coda-common.c 
b/drivers/media/platform/coda/coda-common.c
index 2848ea5f464d..cbb59c2f3a82 100644
--- a/drivers/media/platform/coda/coda-common.c
+++ b/drivers/media/platform/coda/coda-common.c
@@ -2099,17 +2099,6 @@ int coda_decoder_queue_init(void *priv, struct vb2_queue 
*src_vq,
return coda_queue_init(priv, dst_vq);
 }
 
-static int coda_next_free_instance(struct coda_dev *dev)
-{
-   int idx = ffz(dev->instance_mask);
-
-   if ((idx < 0) ||
-   (dev->devtype->product == CODA_DX6 && idx > CODADX6_MAX_INSTANCES))
-   return -EBUSY;
-
-   return idx;
-}
-
 /*
  * File operations
  */
@@ -2118,7 +2107,8 @@ static int coda_open(struct file *file)
 {
struct video_device *vdev = video_devdata(file);
struct coda_dev *dev = video_get_drvdata(vdev);
-   struct coda_ctx *ctx = NULL;
+   struct coda_ctx *ctx;
+   unsigned int max = ~0;
char *name;
int ret;
int idx;
@@ -2127,12 +2117,13 @@ static int coda_open(struct file *file)
if (!ctx)
return -ENOMEM;
 
-   idx = coda_next_free_instance(dev);
+   if (dev->devtype->product == CODA_DX6)
+   max = CODADX6_MAX_INSTANCES - 1;
+   idx = ida_alloc_max(>ida, max, GFP_KERNEL);
if (idx < 0) {
ret = idx;
goto err_coda_max;
}
-   set_bit(idx, >instance_mask);
 
name = kasprintf(GFP_KERNEL, "context%d", idx);
if (!name) {
@@ -2241,8 +2232,8 @@ static int coda_open(struct file *file)
 err_pm_get:
v4l2_fh_del(>fh);
v4l2_fh_exit(>fh);
-   clear_bit(ctx->idx, >instance_mask);
 err_coda_name_init:
+   ida_free(>ida, ctx->idx);
 err_coda_max:
kfree(ctx);
return ret;
@@ -2284,7 +2275,7 @@ static int coda_release(struct file *file)
pm_runtime_put_sync(>plat_dev->dev);
v4l2_fh_del(>fh);
v4l2_fh_exit(>fh);
-   clear_bit(ctx->idx, >instance_mask);
+   ida_free(>ida, ctx->idx);
if (ctx->ops->release)
ctx->ops->release(ctx);
debugfs_remove_recursive(ctx->debugfs_entry);
@@ -2745,6 +2736,7 @@ static int coda_probe(struct platform_device *pdev)
 
mutex_init(>dev_mutex);
mutex_init(>coda_mutex);
+   ida_init(>ida);
 
dev->debugfs_root = debugfs_create_dir("coda", NULL);
if (!dev->debugfs_root)
@@ -2832,6 +2824,7 @@ static int coda_remove(struct platform_device *pdev)
coda_free_aux_buf(dev, >tempbuf);
coda_free_aux_buf(dev, >workbuf);
debugfs_remove_recursive(dev->debugfs_root);
+   ida_destroy(>ida);
return 0;
 }
 
diff --git a/drivers/media/platform/coda/coda.h 
b/drivers/media/platform/coda/coda.h
index 19ac0b9dc6eb..b6cd14ee91ea 100644
--- a/drivers/media/platform/coda/coda.h
+++ b/drivers/media/platform/coda/coda.h
@@ -95,7 +95,7 @@ struct coda_dev {
struct workqueue_struct *workqueue;
struct v4l2_m2m_dev *m2m_dev;
struct list_headinstances;
-   unsigned long   instance_mask;
+   struct ida  ida;
struct dentry   *debugfs_root;
 };
 
-- 
2.19.1



[PATCH 09/15] media: coda: implement ENUM_FRAMEINTERVALS

2018-11-05 Thread Philipp Zabel
v4l2-compliance complains about S_PARM being supported, but not
ENUM_FRAMEINTERVALS.
Report a continuous frame interval even though the hardware only
supports 16-bit numerator and denominator, with min/max values
that can be programmed into the mailbox registers.

Signed-off-by: Philipp Zabel 
---
 drivers/media/platform/coda/coda-common.c | 34 +++
 1 file changed, 34 insertions(+)

diff --git a/drivers/media/platform/coda/coda-common.c 
b/drivers/media/platform/coda/coda-common.c
index 1ba3301b35de..32998da39cac 100644
--- a/drivers/media/platform/coda/coda-common.c
+++ b/drivers/media/platform/coda/coda-common.c
@@ -1044,6 +1044,38 @@ static int coda_decoder_cmd(struct file *file, void *fh,
return 0;
 }
 
+static int coda_enum_frameintervals(struct file *file, void *fh,
+   struct v4l2_frmivalenum *f)
+{
+   struct coda_ctx *ctx = fh_to_ctx(fh);
+   int i;
+
+   if (f->index)
+   return -EINVAL;
+
+   /* Disallow YUYV if the vdoa is not available */
+   if (!ctx->vdoa && f->pixel_format == V4L2_PIX_FMT_YUYV)
+   return -EINVAL;
+
+   for (i = 0; i < CODA_MAX_FORMATS; i++) {
+   if (f->pixel_format == ctx->cvd->src_formats[i] ||
+   f->pixel_format == ctx->cvd->dst_formats[i])
+   break;
+   }
+   if (i == CODA_MAX_FORMATS)
+   return -EINVAL;
+
+   f->type = V4L2_FRMIVAL_TYPE_CONTINUOUS;
+   f->stepwise.min.numerator = 1;
+   f->stepwise.min.denominator = 65535;
+   f->stepwise.max.numerator = 65536;
+   f->stepwise.max.denominator = 1;
+   f->stepwise.step.numerator = 1;
+   f->stepwise.step.denominator = 1;
+
+   return 0;
+}
+
 static int coda_g_parm(struct file *file, void *fh, struct v4l2_streamparm *a)
 {
struct coda_ctx *ctx = fh_to_ctx(fh);
@@ -1190,6 +1222,8 @@ static const struct v4l2_ioctl_ops coda_ioctl_ops = {
.vidioc_g_parm  = coda_g_parm,
.vidioc_s_parm  = coda_s_parm,
 
+   .vidioc_enum_frameintervals = coda_enum_frameintervals,
+
.vidioc_subscribe_event = coda_subscribe_event,
.vidioc_unsubscribe_event = v4l2_event_unsubscribe,
 };
-- 
2.19.1



[PATCH 1/3] media: imx: add capture compose rectangle

2018-11-05 Thread Philipp Zabel
Allowing to compose captured images into larger memory buffers
will let us lift alignment restrictions on CSI crop width.

Signed-off-by: Philipp Zabel 
---
 drivers/staging/media/imx/imx-ic-prpencvf.c   |  3 +-
 drivers/staging/media/imx/imx-media-capture.c | 38 +++
 drivers/staging/media/imx/imx-media-csi.c |  3 +-
 drivers/staging/media/imx/imx-media-vdic.c|  4 +-
 drivers/staging/media/imx/imx-media.h |  2 +
 5 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/media/imx/imx-ic-prpencvf.c 
b/drivers/staging/media/imx/imx-ic-prpencvf.c
index 28f41caba05d..fe5a77baa592 100644
--- a/drivers/staging/media/imx/imx-ic-prpencvf.c
+++ b/drivers/staging/media/imx/imx-ic-prpencvf.c
@@ -366,8 +366,7 @@ static int prp_setup_channel(struct prp_priv *priv,
 
memset(, 0, sizeof(image));
image.pix = vdev->fmt.fmt.pix;
-   image.rect.width = image.pix.width;
-   image.rect.height = image.pix.height;
+   image.rect = vdev->compose;
 
if (rot_swap_width_height) {
swap(image.pix.width, image.pix.height);
diff --git a/drivers/staging/media/imx/imx-media-capture.c 
b/drivers/staging/media/imx/imx-media-capture.c
index b37e1186eb2f..cace8a51aca8 100644
--- a/drivers/staging/media/imx/imx-media-capture.c
+++ b/drivers/staging/media/imx/imx-media-capture.c
@@ -262,6 +262,10 @@ static int capture_s_fmt_vid_cap(struct file *file, void 
*fh,
priv->vdev.fmt.fmt.pix = f->fmt.pix;
priv->vdev.cc = imx_media_find_format(f->fmt.pix.pixelformat,
  CS_SEL_ANY, true);
+   priv->vdev.compose.left = 0;
+   priv->vdev.compose.top = 0;
+   priv->vdev.compose.width = f->fmt.fmt.pix.width;
+   priv->vdev.compose.height = f->fmt.fmt.pix.height;
 
return 0;
 }
@@ -290,6 +294,35 @@ static int capture_s_std(struct file *file, void *fh, 
v4l2_std_id std)
return v4l2_subdev_call(priv->src_sd, video, s_std, std);
 }
 
+static int capture_g_selection(struct file *file, void *fh,
+  struct v4l2_selection *s)
+{
+   struct capture_priv *priv = video_drvdata(file);
+
+   switch (s->target) {
+   case V4L2_SEL_TGT_CROP:
+   case V4L2_SEL_TGT_CROP_DEFAULT:
+   case V4L2_SEL_TGT_CROP_BOUNDS:
+   case V4L2_SEL_TGT_NATIVE_SIZE:
+   case V4L2_SEL_TGT_COMPOSE:
+   case V4L2_SEL_TGT_COMPOSE_DEFAULT:
+   case V4L2_SEL_TGT_COMPOSE_BOUNDS:
+   case V4L2_SEL_TGT_COMPOSE_PADDED:
+   s->r = priv->vdev.compose;
+   break;
+   default:
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
+static int capture_s_selection(struct file *file, void *fh,
+  struct v4l2_selection *s)
+{
+   return capture_g_selection(file, fh, s);
+}
+
 static int capture_g_parm(struct file *file, void *fh,
  struct v4l2_streamparm *a)
 {
@@ -350,6 +383,9 @@ static const struct v4l2_ioctl_ops capture_ioctl_ops = {
.vidioc_g_std   = capture_g_std,
.vidioc_s_std   = capture_s_std,
 
+   .vidioc_g_selection = capture_g_selection,
+   .vidioc_s_selection = capture_s_selection,
+
.vidioc_g_parm  = capture_g_parm,
.vidioc_s_parm  = capture_s_parm,
 
@@ -687,6 +723,8 @@ int imx_media_capture_device_register(struct 
imx_media_video_dev *vdev)
vdev->fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
imx_media_mbus_fmt_to_pix_fmt(>fmt.fmt.pix,
  _src.format, NULL);
+   vdev->compose.width = fmt_src.format.width;
+   vdev->compose.height = fmt_src.format.height;
vdev->cc = imx_media_find_format(vdev->fmt.fmt.pix.pixelformat,
 CS_SEL_ANY, false);
 
diff --git a/drivers/staging/media/imx/imx-media-csi.c 
b/drivers/staging/media/imx/imx-media-csi.c
index 4223f8d418ae..c4523afe7b48 100644
--- a/drivers/staging/media/imx/imx-media-csi.c
+++ b/drivers/staging/media/imx/imx-media-csi.c
@@ -413,8 +413,7 @@ static int csi_idmac_setup_channel(struct csi_priv *priv)
 
memset(, 0, sizeof(image));
image.pix = vdev->fmt.fmt.pix;
-   image.rect.width = image.pix.width;
-   image.rect.height = image.pix.height;
+   image.rect = vdev->compose;
 
csi_idmac_setup_vb2_buf(priv, phys);
 
diff --git a/drivers/staging/media/imx/imx-media-vdic.c 
b/drivers/staging/media/imx/imx-media-vdic.c
index 482250d47e7c..e08d296cf4eb 100644
--- a/drivers/staging/media/imx/imx-media-vdic.c
+++ b/drivers/staging/media/imx/imx-media-vdic.c
@@ -263,10 +263,10 @@ static int setup_vdi_channel(struct vdic_priv *priv,
 
memset(, 0, sizeof(image));
image.pix = vdev->fmt.fmt.pix;
+   image.rect = vdev->compose;
/* one field to VDIC channels */
image.pix.heig

[PATCH 3/3] media: imx: lift CSI width alignment restriction

2018-11-05 Thread Philipp Zabel
The CSI subdevice shouldn't have to care about IDMAC line start
address alignment. With compose rectangle support in the capture
driver, it doesn't have to anymore.

Signed-off-by: Philipp Zabel 
---
 drivers/staging/media/imx/imx-media-capture.c |  9 -
 drivers/staging/media/imx/imx-media-csi.c |  2 +-
 drivers/staging/media/imx/imx-media-utils.c   | 15 ---
 3 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/drivers/staging/media/imx/imx-media-capture.c 
b/drivers/staging/media/imx/imx-media-capture.c
index 2d49d9573056..f87d6e8019e5 100644
--- a/drivers/staging/media/imx/imx-media-capture.c
+++ b/drivers/staging/media/imx/imx-media-capture.c
@@ -204,10 +204,9 @@ static int capture_g_fmt_vid_cap(struct file *file, void 
*fh,
 }
 
 static int __capture_try_fmt_vid_cap(struct capture_priv *priv,
-struct v4l2_subev_format *fmt_src,
+struct v4l2_subdev_format *fmt_src,
 struct v4l2_format *f)
 {
-   struct capture_priv *priv = video_drvdata(file);
const struct imx_media_pixfmt *cc, *cc_src;
 
cc_src = imx_media_find_ipu_format(fmt_src->format.code, CS_SEL_ANY);
@@ -250,7 +249,7 @@ static int capture_try_fmt_vid_cap(struct file *file, void 
*fh,
if (ret)
return ret;
 
-   return __capture_try_fmt(priv, _src, f);
+   return __capture_try_fmt_vid_cap(priv, _src, f);
 }
 
 static int capture_s_fmt_vid_cap(struct file *file, void *fh,
@@ -280,8 +279,8 @@ static int capture_s_fmt_vid_cap(struct file *file, void 
*fh,
  CS_SEL_ANY, true);
priv->vdev.compose.left = 0;
priv->vdev.compose.top = 0;
-   priv->vdev.compose.width = fmt_src.width;
-   priv->vdev.compose.height = fmt_src.height;
+   priv->vdev.compose.width = fmt_src.format.width;
+   priv->vdev.compose.height = fmt_src.format.height;
 
return 0;
 }
diff --git a/drivers/staging/media/imx/imx-media-csi.c 
b/drivers/staging/media/imx/imx-media-csi.c
index c4523afe7b48..d39682192a67 100644
--- a/drivers/staging/media/imx/imx-media-csi.c
+++ b/drivers/staging/media/imx/imx-media-csi.c
@@ -41,7 +41,7 @@
 #define MIN_H   144
 #define MAX_W  4096
 #define MAX_H  4096
-#define W_ALIGN4 /* multiple of 16 pixels */
+#define W_ALIGN1 /* multiple of 2 pixels */
 #define H_ALIGN1 /* multiple of 2 lines */
 #define S_ALIGN1 /* multiple of 2 */
 
diff --git a/drivers/staging/media/imx/imx-media-utils.c 
b/drivers/staging/media/imx/imx-media-utils.c
index 0eaa353d5cb3..5f110d90a4ef 100644
--- a/drivers/staging/media/imx/imx-media-utils.c
+++ b/drivers/staging/media/imx/imx-media-utils.c
@@ -580,6 +580,7 @@ int imx_media_mbus_fmt_to_pix_fmt(struct v4l2_pix_format 
*pix,
  struct v4l2_mbus_framefmt *mbus,
  const struct imx_media_pixfmt *cc)
 {
+   u32 width;
u32 stride;
 
if (!cc) {
@@ -602,9 +603,16 @@ int imx_media_mbus_fmt_to_pix_fmt(struct v4l2_pix_format 
*pix,
cc = imx_media_find_mbus_format(code, CS_SEL_YUV, false);
}
 
-   stride = cc->planar ? mbus->width : (mbus->width * cc->bpp) >> 3;
+   /* Round up width for minimum burst size */
+   width = round_up(mbus->width, 8);
 
-   pix->width = mbus->width;
+   /* Round up stride for IDMAC line start address alignment */
+   if (cc->planar)
+   stride = round_up(width, 16);
+   else
+   stride = round_up((width * cc->bpp) >> 3, 8);
+
+   pix->width = width;
pix->height = mbus->height;
pix->pixelformat = cc->fourcc;
pix->colorspace = mbus->colorspace;
@@ -613,7 +621,8 @@ int imx_media_mbus_fmt_to_pix_fmt(struct v4l2_pix_format 
*pix,
pix->quantization = mbus->quantization;
pix->field = mbus->field;
pix->bytesperline = stride;
-   pix->sizeimage = (pix->width * pix->height * cc->bpp) >> 3;
+   pix->sizeimage = cc->planar ? ((stride * pix->height * cc->bpp) >> 3) :
+stride * pix->height;
 
return 0;
 }
-- 
2.19.1



[PATCH 2/3] media: imx: set compose rectangle to mbus format

2018-11-05 Thread Philipp Zabel
Prepare for mbus format being smaller than the written rectangle
due to burst size.

Signed-off-by: Philipp Zabel 
---
 drivers/staging/media/imx/imx-media-capture.c | 55 +--
 1 file changed, 38 insertions(+), 17 deletions(-)

diff --git a/drivers/staging/media/imx/imx-media-capture.c 
b/drivers/staging/media/imx/imx-media-capture.c
index cace8a51aca8..2d49d9573056 100644
--- a/drivers/staging/media/imx/imx-media-capture.c
+++ b/drivers/staging/media/imx/imx-media-capture.c
@@ -203,21 +203,14 @@ static int capture_g_fmt_vid_cap(struct file *file, void 
*fh,
return 0;
 }
 
-static int capture_try_fmt_vid_cap(struct file *file, void *fh,
-  struct v4l2_format *f)
+static int __capture_try_fmt_vid_cap(struct capture_priv *priv,
+struct v4l2_subev_format *fmt_src,
+struct v4l2_format *f)
 {
struct capture_priv *priv = video_drvdata(file);
-   struct v4l2_subdev_format fmt_src;
const struct imx_media_pixfmt *cc, *cc_src;
-   int ret;
-
-   fmt_src.pad = priv->src_sd_pad;
-   fmt_src.which = V4L2_SUBDEV_FORMAT_ACTIVE;
-   ret = v4l2_subdev_call(priv->src_sd, pad, get_fmt, NULL, _src);
-   if (ret)
-   return ret;
 
-   cc_src = imx_media_find_ipu_format(fmt_src.format.code, CS_SEL_ANY);
+   cc_src = imx_media_find_ipu_format(fmt_src->format.code, CS_SEL_ANY);
if (cc_src) {
u32 fourcc, cs_sel;
 
@@ -231,7 +224,7 @@ static int capture_try_fmt_vid_cap(struct file *file, void 
*fh,
cc = imx_media_find_format(fourcc, cs_sel, false);
}
} else {
-   cc_src = imx_media_find_mbus_format(fmt_src.format.code,
+   cc_src = imx_media_find_mbus_format(fmt_src->format.code,
CS_SEL_ANY, true);
if (WARN_ON(!cc_src))
return -EINVAL;
@@ -239,15 +232,32 @@ static int capture_try_fmt_vid_cap(struct file *file, 
void *fh,
cc = cc_src;
}
 
-   imx_media_mbus_fmt_to_pix_fmt(>fmt.pix, _src.format, cc);
+   imx_media_mbus_fmt_to_pix_fmt(>fmt.pix, _src->format, cc);
 
return 0;
 }
 
+static int capture_try_fmt_vid_cap(struct file *file, void *fh,
+  struct v4l2_format *f)
+{
+   struct capture_priv *priv = video_drvdata(file);
+   struct v4l2_subdev_format fmt_src;
+   int ret;
+
+   fmt_src.pad = priv->src_sd_pad;
+   fmt_src.which = V4L2_SUBDEV_FORMAT_ACTIVE;
+   ret = v4l2_subdev_call(priv->src_sd, pad, get_fmt, NULL, _src);
+   if (ret)
+   return ret;
+
+   return __capture_try_fmt(priv, _src, f);
+}
+
 static int capture_s_fmt_vid_cap(struct file *file, void *fh,
 struct v4l2_format *f)
 {
struct capture_priv *priv = video_drvdata(file);
+   struct v4l2_subdev_format fmt_src;
int ret;
 
if (vb2_is_busy(>q)) {
@@ -255,7 +265,13 @@ static int capture_s_fmt_vid_cap(struct file *file, void 
*fh,
return -EBUSY;
}
 
-   ret = capture_try_fmt_vid_cap(file, priv, f);
+   fmt_src.pad = priv->src_sd_pad;
+   fmt_src.which = V4L2_SUBDEV_FORMAT_ACTIVE;
+   ret = v4l2_subdev_call(priv->src_sd, pad, get_fmt, NULL, _src);
+   if (ret)
+   return ret;
+
+   ret = __capture_try_fmt_vid_cap(priv, _src, f);
if (ret)
return ret;
 
@@ -264,8 +280,8 @@ static int capture_s_fmt_vid_cap(struct file *file, void 
*fh,
  CS_SEL_ANY, true);
priv->vdev.compose.left = 0;
priv->vdev.compose.top = 0;
-   priv->vdev.compose.width = f->fmt.fmt.pix.width;
-   priv->vdev.compose.height = f->fmt.fmt.pix.height;
+   priv->vdev.compose.width = fmt_src.width;
+   priv->vdev.compose.height = fmt_src.height;
 
return 0;
 }
@@ -307,9 +323,14 @@ static int capture_g_selection(struct file *file, void *fh,
case V4L2_SEL_TGT_COMPOSE:
case V4L2_SEL_TGT_COMPOSE_DEFAULT:
case V4L2_SEL_TGT_COMPOSE_BOUNDS:
-   case V4L2_SEL_TGT_COMPOSE_PADDED:
s->r = priv->vdev.compose;
break;
+   case V4L2_SEL_TGT_COMPOSE_PADDED:
+   s->r.left = 0;
+   s->r.top = 0;
+   s->r.width = priv->vdev.fmt.fmt.pix.width;
+   s->r.height = priv->vdev.fmt.fmt.pix.height;
+   break;
default:
return -EINVAL;
}
-- 
2.19.1



[RFC] media: imx: queue subdevice events on the video device in the same pipeline

2018-11-05 Thread Philipp Zabel
While subdevice and video device are in the same pipeline, pass
subdevice events on to userspace via the video device node.

Signed-off-by: Philipp Zabel 
---
This would allow to see source change events from the source subdevice
on the video device node, for example.
---
 drivers/staging/media/imx/imx-media-dev.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/drivers/staging/media/imx/imx-media-dev.c 
b/drivers/staging/media/imx/imx-media-dev.c
index 4b344a4a3706..2fe6fdf2faf1 100644
--- a/drivers/staging/media/imx/imx-media-dev.c
+++ b/drivers/staging/media/imx/imx-media-dev.c
@@ -442,6 +442,23 @@ static const struct media_device_ops imx_media_md_ops = {
.link_notify = imx_media_link_notify,
 };
 
+static void imx_media_notify(struct v4l2_subdev *sd, unsigned int notification,
+void *arg)
+{
+   struct imx_media_dev *imxmd;
+   struct imx_media_video_dev *vdev;
+
+   imxmd = container_of(sd->v4l2_dev, struct imx_media_dev, v4l2_dev);
+   list_for_each_entry(vdev, >vdev_list, list) {
+   if (sd->entity.pipe &&
+   sd->entity.pipe == vdev->vfd->entity.pipe &&
+   notification == V4L2_DEVICE_NOTIFY_EVENT) {
+   v4l2_event_queue(vdev->vfd, arg);
+   break;
+   }
+   }
+}
+
 static int imx_media_probe(struct platform_device *pdev)
 {
struct device *dev = >dev;
@@ -464,6 +481,7 @@ static int imx_media_probe(struct platform_device *pdev)
imxmd->v4l2_dev.mdev = >md;
strscpy(imxmd->v4l2_dev.name, "imx-media",
sizeof(imxmd->v4l2_dev.name));
+   imxmd->v4l2_dev.notify = imx_media_notify;
 
media_device_init(>md);
 
-- 
2.19.1



Re: [PATCH] media: imx: csi: fix enum_mbus_code for unknown mbus format codes

2018-11-05 Thread Philipp Zabel
Hi Steve,

On Fri, 2018-02-09 at 17:43 -0800, Steve Longerbeam wrote:
[...]
> I *think* by implementing init_cfg in the CSI, it will prevent the
> NULL deref in csi_enum_mbus_code(). However I think this patch
> is a good idea in any case.

Ack on both. Can we still get this patch applied?

regards
Philipp


Re: [PATCH v4 00/22] i.MX media mem2mem scaler

2018-10-30 Thread Philipp Zabel
Hi Steve,

On Fri, 2018-10-19 at 11:51 -0700, Steve Longerbeam wrote:
> Awesome, thanks Philipp.
> 
> For the whole series:
> 
> Acked-by: Steve Longerbeam 
> Tested-by: Steve Longerbeam 
> on i.MX6q SabreSD.

Thank you. I have added the tags where it made sense to me and applied
the ipu-v3 patches (all except 1/22) to imx-drm/next.

regards
Philipp


Re: [RFP] Which V4L2 ioctls could be replaced by better versions?

2018-10-23 Thread Philipp Zabel
Hi Tomasz,

On Mon, Oct 22, 2018 at 12:37:57PM +0900, Tomasz Figa wrote:
[...]
> On Mon, Oct 22, 2018 at 1:28 AM Philipp Zabel  wrote:
[...]
> > REQBUFS 0 fails if the vb2 buffer is still in use, including from dmabuf
> > attachments: vb2_buffer_in_use checks the num_users memop. The refcount
> > returned by num_users shared between the vmarea handler and dmabuf ops,
> > so any dmabuf attachment counts towards in_use.
> 
> Ah, right. I've managed to completely forget about it, since we have a
> downstream patch that we attempted to upstream earlier [1], but didn't
> have a chance to follow up on the comments and there wasn't much
> interest in it in general.
> 
> [1] https://lore.kernel.org/patchwork/patch/607853/
> 
> Perhaps it would be worth reviving?

Yes, thanks for the pointer. I've completely missed that patch.

I was under the mistaken impression that there was some technical reason
to keep the queue around until after the last dmabuf attachment is gone,
but everything is properly refcounted.

regards
Philipp


Re: [PATCH v3 01/16] media: imx: add mem2mem device

2018-10-21 Thread Philipp Zabel
On Fri, Oct 19, 2018 at 01:19:10PM -0700, Steve Longerbeam wrote:
> 
> On 10/19/18 2:53 AM, Philipp Zabel wrote:
> > Hi Tim,
> > 
> > On Thu, 2018-10-18 at 15:53 -0700, Tim Harvey wrote:
> > [...]
> > > Philipp,
> > > 
> > > Thanks for submitting this!
> > > 
> > > I'm hoping this lets us use non-IMX capture devices along with the IMX
> > > media controller entities to so we can use hardware
> > > CSC,scaling,pixel-format-conversions and ultimately coda based encode.
> > > 
> > > I've built this on top of linux-media and see that it registers as
> > > /dev/video8 but I'm not clear how to use it? I don't see it within the
> > > media controller graph.
> > It's a V4L2 mem2mem device that can be handled by the GstV4l2Transform
> > element, for example. GStreamer should create a v4l2video8convert
> > element of that type.
> > 
> > The mem2mem device is not part of the media controller graph on purpose.
> > There is no interaction with any of the entities in the media controller
> > graph apart from the fact that the IC PP task we are using for mem2mem
> > scaling is sharing hardware resources with the IC PRP tasks used for the
> > media controller scaler entitites.
> 
> It would be nice in the future to link mem2mem output-side to the ipu_vdic:1
> pad, to make use of h/w VDIC de-interlace as part of mem2mem operations.
> The progressive output from a new ipu_vdic:3 pad can then be sent to the
> image_convert APIs by the mem2mem driver for further tiled scaling, CSC,
> and rotation by the IC PP task. The ipu_vdic:1 pad makes use of pure
> DMA-based de-interlace, that is, all input frames (N-1, N, N+1) to the
> VDIC are sent from DMA buffers, and this VDIC mode of operation is
> well understood and produces clean de-interlace output. The risk is
> that this would require iDMAC channel 5 for ipu_vdic:3, which IFAIK is
> not verified to work yet.

Tiled mem2mem deinterlacing support would be nice, I'm not sure yet how
though. I'd limit media controller links to marking VDIC as unavailable
for the capture pipeline. The V4L2 subdev API is too lowlevel for tiling
mem2mem purposes, as we'd need to change the subdev format multiple
times per frame.
Also I'd like to keep the option of scheduling tile jobs to both IPUs on
i.MX6Q, which will become difficult to describe via MC, as both IPUs'
ipu_vdics would have to be involved.

> The other problem with that currently is that mem2mem would have to be split
> into separate device nodes: a /dev/videoN for output-side (linked to
> ipu_vdic:1), and a /dev/videoM for capture-side (linked from
> ipu_vdic:3). And then it no longer presents to userspace as a mem2mem
> device with a single device node for both output and capture sides.

I don't understand why we'd need separate video devices for output and
capture, deinterlacing is still single input single (double rate)
output. As soon as we begin tiling, we are one layer of abstraction
away from the hardware pads anyway. Now if we want to support combining
on the other hand...

> Or is there another way? I recall work to integrate mem2mem with media
> control. There is v4l2_m2m_register_media_controller(), but that
> create three
> entities:
> source, processing, and sink. The VDIC entity would be part of mem2mem
> processing but this entity already exists for the current graph. This
> function could however be used as a guide to incorporate the VDIC
> entity into m2m device.

I'm not sure if this is the right abstraction. Without tiling or
multi-IPU scheduling, sure. But the mem2mem driver does not directly
describe hardware operation anyway.

regards
Philipp


Re: [RFP] Which V4L2 ioctls could be replaced by better versions?

2018-10-21 Thread Philipp Zabel
On Wed, Oct 03, 2018 at 05:24:39PM +0900, Tomasz Figa wrote:
[...]
> > Yes, but that would fall in a complete redesign I guess. The buffer
> > allocation scheme is very inflexible. You can't have buffers of two
> > dimensions allocated at the same time for the same queue. Worst, you
> > cannot leave even 1 buffer as your scannout buffer while reallocating
> > new buffers, this is not permitted by the framework (in software). As a
> > side effect, there is no way to optimize the resolution changes, you
> > even have to copy your scannout buffer on the CPU, to free it in order
> > to proceed. Resolution changes are thus painfully slow, by design.
[...]
> Also, I fail to understand the scanout issue. If one exports a vb2
> buffer to a DMA-buf and import it to the scanout engine, it can keep
> scanning out from it as long as it want, because the DMA-buf will hold
> a reference on the buffer, even if it's removed from the vb2 queue.

REQBUFS 0 fails if the vb2 buffer is still in use, including from dmabuf
attachments: vb2_buffer_in_use checks the num_users memop. The refcount
returned by num_users shared between the vmarea handler and dmabuf ops,
so any dmabuf attachment counts towards in_use.

regards
Philipp


Re: [RFP] Which V4L2 ioctls could be replaced by better versions?

2018-10-21 Thread Philipp Zabel
On Thu, Sep 20, 2018 at 02:14:07PM -0400, Nicolas Dufresne wrote:
> > Do we have more ioctls that could use a refresh? S/G/TRY_FMT perhaps, again 
> > in
> > order to improve single vs multiplanar handling.
> 
> Yes, but that would fall in a complete redesign I guess. The buffer
> allocation scheme is very inflexible. You can't have buffers of two
> dimensions allocated at the same time for the same queue. Worst, you
> cannot leave even 1 buffer as your scannout buffer while reallocating
> new buffers, this is not permitted by the framework (in software). As a
> side effect, there is no way to optimize the resolution changes, you
> even have to copy your scannout buffer on the CPU, to free it in order
> to proceed. Resolution changes are thus painfully slow, by design.

I've seen the same issue when exporting dmabufs from a V4L2 decoder and
importing them into OpenGL textures. Mesa caches state so aggressively,
even destroying all textures and flushing OpenGL is not enough to remove
all references to the imported resource. Only after another render step
the dmabuf fds are closed and thus make REQBUFS 0 possible on the
exporting capture queue.
This leads to a catch-22 situation during a resolution change, because
we'd already need the new buffers to do an OpenGL render without the old
buffers, so that the old buffers can be released back to V4L2, so that
V4L2 can allocate the new buffers...
It would be very helpful in this situation if exported dmabufs could
just be orphaned by REQBUFS 0.

regards
Philipp


Re: [PATCH v3 00/16] i.MX media mem2mem scaler

2018-10-19 Thread Philipp Zabel
Hi Steve,

On Wed, 2018-10-17 at 16:46 -0700, Steve Longerbeam wrote:
> Hi Philipp,
> 
> On 10/12/18 5:29 PM, Steve Longerbeam wrote:
> > 
> > 
> > But one last thing. Conversions to and from YV12 are producing images
> > with wrong colors, it looks like the .uv_swapped boolean needs to be 
> > checked
> > additionally somewhere. Any ideas?
> 
> 
> Sorry, this was my fault. I fixed this in
> 
> "gpu: ipu-v3: Add chroma plane offset overrides to ipu_cpmem_set_image()"
> 
> in my fork g...@github.com:slongerbeam/mediatree.git, branch imx-mem2mem.3.
> 
> Steve

Thanks a lot for testing, fixes, and integration. Basically I've just
resubmitted that branch as v4.
After this round I'll pick up all non-controversial ipu-v3 / image-
convert patches.

regards
Philipp


[PATCH v4 02/22] gpu: ipu-cpmem: add WARN_ON_ONCE() for unaligned dma buffers

2018-10-19 Thread Philipp Zabel
From: Steve Longerbeam 

Add a WARN_ON_ONCE() if either the Y/packed buffer, or the U/V offsets,
are not aligned on 8-byte boundaries. This will catch alignment
bugs in DRM, V4L2.

Signed-off-by: Steve Longerbeam 
---
No changes since v3.
---
 drivers/gpu/ipu-v3/ipu-cpmem.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/ipu-v3/ipu-cpmem.c b/drivers/gpu/ipu-v3/ipu-cpmem.c
index a9d2501500a1..7e65954f13c2 100644
--- a/drivers/gpu/ipu-v3/ipu-cpmem.c
+++ b/drivers/gpu/ipu-v3/ipu-cpmem.c
@@ -259,6 +259,8 @@ EXPORT_SYMBOL_GPL(ipu_cpmem_set_high_priority);
 
 void ipu_cpmem_set_buffer(struct ipuv3_channel *ch, int bufnum, dma_addr_t buf)
 {
+   WARN_ON_ONCE(buf & 0x7);
+
if (bufnum)
ipu_ch_param_write_field(ch, IPU_FIELD_EBA1, buf >> 3);
else
@@ -268,6 +270,8 @@ EXPORT_SYMBOL_GPL(ipu_cpmem_set_buffer);
 
 void ipu_cpmem_set_uv_offset(struct ipuv3_channel *ch, u32 u_off, u32 v_off)
 {
+   WARN_ON_ONCE((u_off & 0x7) || (v_off & 0x7));
+
ipu_ch_param_write_field(ch, IPU_FIELD_UBO, u_off / 8);
ipu_ch_param_write_field(ch, IPU_FIELD_VBO, v_off / 8);
 }
@@ -435,6 +439,8 @@ void ipu_cpmem_set_yuv_planar_full(struct ipuv3_channel *ch,
   unsigned int uv_stride,
   unsigned int u_offset, unsigned int v_offset)
 {
+   WARN_ON_ONCE((u_offset & 0x7) || (v_offset & 0x7));
+
ipu_ch_param_write_field(ch, IPU_FIELD_SLUV, uv_stride - 1);
ipu_ch_param_write_field(ch, IPU_FIELD_UBO, u_offset / 8);
ipu_ch_param_write_field(ch, IPU_FIELD_VBO, v_offset / 8);
-- 
2.19.0



[PATCH v4 16/22] gpu: ipu-v3: image-convert: select optimal seam positions

2018-10-19 Thread Philipp Zabel
Select seam positions that minimize distortions during seam hiding while
satifying input and output IDMAC, rotator, and image format constraints.

This code looks for aligned output seam positions that minimize the
difference between the fractional corresponding ideal input positions
and the input positions rounded to alignment requirements.

Since now tiles can be sized differently, alignment restrictions of the
complete image can be relaxed in the next step.

Signed-off-by: Philipp Zabel 
---
Changes since v3:
 - Fix tile_left_align for 24-bit RGB formats and reduce alignment
   restrictions for U/V packed planar YUV formats.
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 343 -
 1 file changed, 337 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c 
b/drivers/gpu/ipu-v3/ipu-image-convert.c
index a407ca3b367b..a674241dd0b8 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -432,6 +432,126 @@ static int calc_image_resize_coefficients(struct 
ipu_image_convert_ctx *ctx,
return 0;
 }
 
+#define round_closest(x, y) round_down((x) + (y)/2, (y))
+
+/*
+ * Find the best aligned seam position in the inverval [out_start, out_end].
+ * Rotation and image offsets are out of scope.
+ *
+ * @out_start: start of inverval, must be within 1024 pixels / lines
+ * of out_end
+ * @out_end: end of interval, smaller than or equal to out_edge
+ * @in_edge: input right / bottom edge
+ * @out_edge: output right / bottom edge
+ * @in_align: input alignment, either horizontal 8-byte line start address
+ *alignment, or pixel alignment due to image format
+ * @out_align: output alignment, either horizontal 8-byte line start address
+ * alignment, or pixel alignment due to image format or rotator
+ * block size
+ * @in_burst: horizontal input burst size in case of horizontal flip
+ * @out_burst: horizontal output burst size or rotator block size
+ * @downsize_coeff: downsizing section coefficient
+ * @resize_coeff: main processing section resizing coefficient
+ * @_in_seam: aligned input seam position return value
+ * @_out_seam: aligned output seam position return value
+ */
+static void find_best_seam(struct ipu_image_convert_ctx *ctx,
+  unsigned int out_start,
+  unsigned int out_end,
+  unsigned int in_edge,
+  unsigned int out_edge,
+  unsigned int in_align,
+  unsigned int out_align,
+  unsigned int in_burst,
+  unsigned int out_burst,
+  unsigned int downsize_coeff,
+  unsigned int resize_coeff,
+  u32 *_in_seam,
+  u32 *_out_seam)
+{
+   struct device *dev = ctx->chan->priv->ipu->dev;
+   unsigned int out_pos;
+   /* Input / output seam position candidates */
+   unsigned int out_seam = 0;
+   unsigned int in_seam = 0;
+   unsigned int min_diff = UINT_MAX;
+
+   /*
+* Output tiles must start at a multiple of 8 bytes horizontally and
+* possibly at an even line horizontally depending on the pixel format.
+* Only consider output aligned positions for the seam.
+*/
+   out_start = round_up(out_start, out_align);
+   for (out_pos = out_start; out_pos < out_end; out_pos += out_align) {
+   unsigned int in_pos;
+   unsigned int in_pos_aligned;
+   unsigned int abs_diff;
+
+   /*
+* Tiles in the right row / bottom column may not be allowed to
+* overshoot horizontally / vertically. out_burst may be the
+* actual DMA burst size, or the rotator block size.
+*/
+   if ((out_burst > 1) && (out_edge - out_pos) % out_burst)
+   continue;
+
+   /*
+* Input sample position, corresponding to out_pos, 19.13 fixed
+* point.
+*/
+   in_pos = (out_pos * resize_coeff) << downsize_coeff;
+   /*
+* The closest input sample position that we could actually
+* start the input tile at, 19.13 fixed point.
+*/
+   in_pos_aligned = round_closest(in_pos, 8192U * in_align);
+
+   if ((in_burst > 1) &&
+   (in_edge - in_pos_aligned / 8192U) % in_burst)
+   continue;
+
+   if (in_pos < in_pos_aligned)
+   abs_diff = in_pos_aligned - in_pos;
+   else
+   abs_diff = in_pos - in_pos_aligned;
+
+   if (abs_diff < min_diff) {
+   in_seam = in_pos_aligned;
+   out

[PATCH v4 14/22] gpu: ipu-v3: image-convert: calculate tile dimensions and offsets outside fill_image

2018-10-19 Thread Philipp Zabel
This will allow to calculate seam positions after initializing the
ipu_image base structure but before calculating tile dimensions.

Signed-off-by: Philipp Zabel 
---
No changes since v3.
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 18 +-
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c 
b/drivers/gpu/ipu-v3/ipu-image-convert.c
index d14ee7b303a1..542c091cfef1 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -1467,9 +1467,7 @@ static int fill_image(struct ipu_image_convert_ctx *ctx,
else
ic_image->stride  = ic_image->base.pix.bytesperline;
 
-   calc_tile_dimensions(ctx, ic_image);
-
-   return calc_tile_offsets(ctx, ic_image);
+   return 0;
 }
 
 /* borrowed from drivers/media/v4l2-core/v4l2-common.c */
@@ -1673,14 +1671,24 @@ ipu_image_convert_prepare(struct ipu_soc *ipu, enum 
ipu_ic_task ic_task,
ctx->num_tiles = d_image->num_cols * d_image->num_rows;
ctx->rot_mode = rot_mode;
 
+   ret = fill_image(ctx, s_image, in, IMAGE_CONVERT_IN);
+   if (ret)
+   goto out_free;
+   ret = fill_image(ctx, d_image, out, IMAGE_CONVERT_OUT);
+   if (ret)
+   goto out_free;
+
ret = calc_image_resize_coefficients(ctx, in, out);
if (ret)
goto out_free;
 
-   ret = fill_image(ctx, s_image, in, IMAGE_CONVERT_IN);
+   calc_tile_dimensions(ctx, s_image);
+   ret = calc_tile_offsets(ctx, s_image);
if (ret)
goto out_free;
-   ret = fill_image(ctx, d_image, out, IMAGE_CONVERT_OUT);
+
+   calc_tile_dimensions(ctx, d_image);
+   ret = calc_tile_offsets(ctx, d_image);
if (ret)
goto out_free;
 
-- 
2.19.0



[PATCH v4 03/22] gpu: ipu-v3: Add chroma plane offset overrides to ipu_cpmem_set_image()

2018-10-19 Thread Philipp Zabel
From: Steve Longerbeam 

Allow the caller of ipu_cpmem_set_image() to override the latters
calculation of the chroma plane offsets, by adding override U/V
plane offsets to 'struct ipu_image'.

Signed-off-by: Steve Longerbeam 
---
New since v3.
---
 drivers/gpu/ipu-v3/ipu-cpmem.c | 46 +++---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 10 +++---
 include/video/imx-ipu-v3.h |  3 ++
 3 files changed, 35 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-cpmem.c b/drivers/gpu/ipu-v3/ipu-cpmem.c
index 7e65954f13c2..163fadb8a33a 100644
--- a/drivers/gpu/ipu-v3/ipu-cpmem.c
+++ b/drivers/gpu/ipu-v3/ipu-cpmem.c
@@ -745,48 +745,56 @@ int ipu_cpmem_set_image(struct ipuv3_channel *ch, struct 
ipu_image *image)
switch (pix->pixelformat) {
case V4L2_PIX_FMT_YUV420:
offset = Y_OFFSET(pix, image->rect.left, image->rect.top);
-   u_offset = U_OFFSET(pix, image->rect.left,
-   image->rect.top) - offset;
-   v_offset = V_OFFSET(pix, image->rect.left,
-   image->rect.top) - offset;
+   u_offset = image->u_offset ?
+   image->u_offset : U_OFFSET(pix, image->rect.left,
+  image->rect.top) - offset;
+   v_offset = image->v_offset ?
+   image->v_offset : V_OFFSET(pix, image->rect.left,
+  image->rect.top) - offset;
 
ipu_cpmem_set_yuv_planar_full(ch, pix->bytesperline / 2,
  u_offset, v_offset);
break;
case V4L2_PIX_FMT_YVU420:
offset = Y_OFFSET(pix, image->rect.left, image->rect.top);
-   u_offset = U_OFFSET(pix, image->rect.left,
-   image->rect.top) - offset;
-   v_offset = V_OFFSET(pix, image->rect.left,
-   image->rect.top) - offset;
+   u_offset = image->u_offset ?
+   image->u_offset : V_OFFSET(pix, image->rect.left,
+  image->rect.top) - offset;
+   v_offset = image->v_offset ?
+   image->v_offset : U_OFFSET(pix, image->rect.left,
+  image->rect.top) - offset;
 
ipu_cpmem_set_yuv_planar_full(ch, pix->bytesperline / 2,
- v_offset, u_offset);
+ u_offset, v_offset);
break;
case V4L2_PIX_FMT_YUV422P:
offset = Y_OFFSET(pix, image->rect.left, image->rect.top);
-   u_offset = U2_OFFSET(pix, image->rect.left,
-image->rect.top) - offset;
-   v_offset = V2_OFFSET(pix, image->rect.left,
-image->rect.top) - offset;
+   u_offset = image->u_offset ?
+   image->u_offset : U2_OFFSET(pix, image->rect.left,
+   image->rect.top) - offset;
+   v_offset = image->v_offset ?
+   image->v_offset : V2_OFFSET(pix, image->rect.left,
+   image->rect.top) - offset;
 
ipu_cpmem_set_yuv_planar_full(ch, pix->bytesperline / 2,
  u_offset, v_offset);
break;
case V4L2_PIX_FMT_NV12:
offset = Y_OFFSET(pix, image->rect.left, image->rect.top);
-   u_offset = UV_OFFSET(pix, image->rect.left,
-image->rect.top) - offset;
-   v_offset = 0;
+   u_offset = image->u_offset ?
+   image->u_offset : UV_OFFSET(pix, image->rect.left,
+   image->rect.top) - offset;
+   v_offset = image->v_offset ? image->v_offset : 0;
 
ipu_cpmem_set_yuv_planar_full(ch, pix->bytesperline,
  u_offset, v_offset);
break;
case V4L2_PIX_FMT_NV16:
offset = Y_OFFSET(pix, image->rect.left, image->rect.top);
-   u_offset = UV2_OFFSET(pix, image->rect.left,
- image->rect.top) - offset;
-   v_offset = 0;
+   u_offset = image->u_offset ?
+   image->u_offset : UV2_OFFSET(pix, image->rect.left,
+image->rect.top) - offset;
+   v_offset = image->v_offset ? image->v_offset : 0;
 
ipu_cpmem_set_yuv_planar_full(ch, pix->bytesperline,
  u_offset, v_offset);
diff --git 

[PATCH v4 13/22] gpu: ipu-v3: image-convert: store tile top/left position

2018-10-19 Thread Philipp Zabel
Store tile top/left position in pixels in the tile structure.
This will allow overlapping tiles with different sizes later.

Signed-off-by: Philipp Zabel 
---
No functional changes since v3.
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 27 ++
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c 
b/drivers/gpu/ipu-v3/ipu-image-convert.c
index cb47981741b4..d14ee7b303a1 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -84,6 +84,8 @@ struct ipu_image_convert_dma_chan {
 struct ipu_image_tile {
u32 width;
u32 height;
+   u32 left;
+   u32 top;
/* size and strides are in bytes */
u32 size;
u32 stride;
@@ -433,13 +435,17 @@ static int calc_image_resize_coefficients(struct 
ipu_image_convert_ctx *ctx,
 static void calc_tile_dimensions(struct ipu_image_convert_ctx *ctx,
 struct ipu_image_convert_image *image)
 {
-   int i;
+   unsigned int i;
 
for (i = 0; i < ctx->num_tiles; i++) {
struct ipu_image_tile *tile = >tile[i];
+   const unsigned int row = i / image->num_cols;
+   const unsigned int col = i % image->num_cols;
 
tile->height = image->base.pix.height / image->num_rows;
tile->width = image->base.pix.width / image->num_cols;
+   tile->left = col * tile->width;
+   tile->top = row * tile->height;
tile->size = ((tile->height * image->fmt->bpp) >> 3) *
tile->width;
 
@@ -535,7 +541,7 @@ static int calc_tile_offsets_planar(struct 
ipu_image_convert_ctx *ctx,
struct ipu_image_convert_priv *priv = chan->priv;
const struct ipu_image_pixfmt *fmt = image->fmt;
unsigned int row, col, tile = 0;
-   u32 H, w, h, y_stride, uv_stride;
+   u32 H, top, y_stride, uv_stride;
u32 uv_row_off, uv_col_off, uv_off, u_off, v_off, tmp;
u32 y_row_off, y_col_off, y_off;
u32 y_size, uv_size;
@@ -552,13 +558,12 @@ static int calc_tile_offsets_planar(struct 
ipu_image_convert_ctx *ctx,
uv_size = y_size / (fmt->uv_width_dec * fmt->uv_height_dec);
 
for (row = 0; row < image->num_rows; row++) {
-   w = image->tile[tile].width;
-   h = image->tile[tile].height;
-   y_row_off = row * h * y_stride;
-   uv_row_off = (row * h * uv_stride) / fmt->uv_height_dec;
+   top = image->tile[tile].top;
+   y_row_off = top * y_stride;
+   uv_row_off = (top * uv_stride) / fmt->uv_height_dec;
 
for (col = 0; col < image->num_cols; col++) {
-   y_col_off = col * w;
+   y_col_off = image->tile[tile].left;
uv_col_off = y_col_off / fmt->uv_width_dec;
if (fmt->uv_packed)
uv_col_off *= 2;
@@ -601,7 +606,7 @@ static int calc_tile_offsets_packed(struct 
ipu_image_convert_ctx *ctx,
struct ipu_image_convert_priv *priv = chan->priv;
const struct ipu_image_pixfmt *fmt = image->fmt;
unsigned int row, col, tile = 0;
-   u32 w, h, bpp, stride, offset;
+   u32 bpp, stride, offset;
u32 row_off, col_off;
 
/* setup some convenience vars */
@@ -609,12 +614,10 @@ static int calc_tile_offsets_packed(struct 
ipu_image_convert_ctx *ctx,
bpp = fmt->bpp;
 
for (row = 0; row < image->num_rows; row++) {
-   w = image->tile[tile].width;
-   h = image->tile[tile].height;
-   row_off = row * h * stride;
+   row_off = image->tile[tile].top * stride;
 
for (col = 0; col < image->num_cols; col++) {
-   col_off = (col * w * bpp) >> 3;
+   col_off = (image->tile[tile].left * bpp) >> 3;
 
offset = row_off + col_off;
 
-- 
2.19.0



[PATCH v4 19/22] gpu: ipu-v3: image-convert: fix bytesperline adjustment

2018-10-19 Thread Philipp Zabel
For planar formats, bytesperline does not depend on BPP. It must always
be larger than width and aligned to tile width alignment restrictions.

The input bytesperline to ipu_image_convert_adjust() may be
uninitialized, so don't rely on input bytesperline as the
minimum value for clamp_align(). Use 2 << w_align as the minimum
instead.

Signed-off-by: Philipp Zabel 
[slongerb...@gmail.com: clamp input bytesperline]
Signed-off-by: Steve Longerbeam 
---
No changes since v3.
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c 
b/drivers/gpu/ipu-v3/ipu-image-convert.c
index 0829723a7599..b735065fe288 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -1915,10 +1915,18 @@ void ipu_image_convert_adjust(struct ipu_image *in, 
struct ipu_image *out,
out->pix.height = clamp_align(out->pix.height, MIN_H, MAX_H, h_align);
 
/* set input/output strides and image sizes */
-   in->pix.bytesperline = (in->pix.width * infmt->bpp) >> 3;
-   in->pix.sizeimage = in->pix.height * in->pix.bytesperline;
-   out->pix.bytesperline = (out->pix.width * outfmt->bpp) >> 3;
-   out->pix.sizeimage = out->pix.height * out->pix.bytesperline;
+   in->pix.bytesperline = infmt->planar ?
+   clamp_align(in->pix.width, 2 << w_align, MAX_W, w_align) :
+   clamp_align((in->pix.width * infmt->bpp) >> 3,
+   2 << w_align, MAX_W, w_align);
+   in->pix.sizeimage = infmt->planar ?
+   (in->pix.height * in->pix.bytesperline * infmt->bpp) >> 3 :
+   in->pix.height * in->pix.bytesperline;
+   out->pix.bytesperline = outfmt->planar ? out->pix.width :
+   (out->pix.width * outfmt->bpp) >> 3;
+   out->pix.sizeimage = outfmt->planar ?
+   (out->pix.height * out->pix.bytesperline * outfmt->bpp) >> 3 :
+   out->pix.height * out->pix.bytesperline;
 }
 EXPORT_SYMBOL_GPL(ipu_image_convert_adjust);
 
-- 
2.19.0



[PATCH v4 15/22] gpu: ipu-v3: image-convert: move tile alignment helpers

2018-10-19 Thread Philipp Zabel
Move tile_width_align and tile_height_align up so they
can be used by the tile edge position calculation code.

Signed-off-by: Philipp Zabel 
---
No changes since v3.
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 54 +-
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c 
b/drivers/gpu/ipu-v3/ipu-image-convert.c
index 542c091cfef1..a407ca3b367b 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -432,6 +432,33 @@ static int calc_image_resize_coefficients(struct 
ipu_image_convert_ctx *ctx,
return 0;
 }
 
+/*
+ * We have to adjust the tile width such that the tile physaddrs and
+ * U and V plane offsets are multiples of 8 bytes as required by
+ * the IPU DMA Controller. For the planar formats, this corresponds
+ * to a pixel alignment of 16 (but use a more formal equation since
+ * the variables are available). For all the packed formats, 8 is
+ * good enough.
+ */
+static inline u32 tile_width_align(const struct ipu_image_pixfmt *fmt)
+{
+   return fmt->planar ? 8 * fmt->uv_width_dec : 8;
+}
+
+/*
+ * For tile height alignment, we have to ensure that the output tile
+ * heights are multiples of 8 lines if the IRT is required by the
+ * given rotation mode (the IRT performs rotations on 8x8 blocks
+ * at a time). If the IRT is not used, or for input image tiles,
+ * 2 lines are good enough.
+ */
+static inline u32 tile_height_align(enum ipu_image_convert_type type,
+   enum ipu_rotate_mode rot_mode)
+{
+   return (type == IMAGE_CONVERT_OUT &&
+   ipu_rot_mode_is_irt(rot_mode)) ? 8 : 2;
+}
+
 static void calc_tile_dimensions(struct ipu_image_convert_ctx *ctx,
 struct ipu_image_convert_image *image)
 {
@@ -1487,33 +1514,6 @@ static unsigned int clamp_align(unsigned int x, unsigned 
int min,
return x;
 }
 
-/*
- * We have to adjust the tile width such that the tile physaddrs and
- * U and V plane offsets are multiples of 8 bytes as required by
- * the IPU DMA Controller. For the planar formats, this corresponds
- * to a pixel alignment of 16 (but use a more formal equation since
- * the variables are available). For all the packed formats, 8 is
- * good enough.
- */
-static inline u32 tile_width_align(const struct ipu_image_pixfmt *fmt)
-{
-   return fmt->planar ? 8 * fmt->uv_width_dec : 8;
-}
-
-/*
- * For tile height alignment, we have to ensure that the output tile
- * heights are multiples of 8 lines if the IRT is required by the
- * given rotation mode (the IRT performs rotations on 8x8 blocks
- * at a time). If the IRT is not used, or for input image tiles,
- * 2 lines are good enough.
- */
-static inline u32 tile_height_align(enum ipu_image_convert_type type,
-   enum ipu_rotate_mode rot_mode)
-{
-   return (type == IMAGE_CONVERT_OUT &&
-   ipu_rot_mode_is_irt(rot_mode)) ? 8 : 2;
-}
-
 /* Adjusts input/output images to IPU restrictions */
 void ipu_image_convert_adjust(struct ipu_image *in, struct ipu_image *out,
  enum ipu_rotate_mode rot_mode)
-- 
2.19.0



[PATCH v4 00/22] i.MX media mem2mem scaler

2018-10-19 Thread Philipp Zabel
Hi,

this is the fourth version of the i.MX mem2mem scaler series.

An alignment issue with 24-bit RGB formats has been corrected in the
seam position selection patch and a few new fixes by Steve have been
added. If there are no more issues, I'll pick up the ipu-v3 patches
via imx-drm/next. The first patch could be merged via the media tree
independently.

Changes since v3:
 - Fix tile_left_align for 24-bit RGB formats and reduce alignment
   restrictions for U/V packed planar YUV formats
 - Catch unaligned tile offsets in image-convert
 - Add chroma plane offset overrides to ipu_cpmem_set_image() to
   prevent a false positive warning in some cases
 - Fix a race between run and unprepare and make abort reentrant.


Changes since v2:
 - Rely on ipu_image_convert_adjust() in mem2mem_try_fmt() for format
   adjustments. This makes the mem2mem driver mostly a V4L2 mem2mem API
   wrapper around the IPU image converter, and independent of the
   internal image converter implementation.
 - Remove the source and destination buffers on error in device_run().
   Otherwise the conversion is re-attempted apparently over and over
   again (with WARN() backtraces).
 - Allow subscribing to control changes.
 - Fix seam position selection for more corner cases:
- Switch width/height properly and align tile top left positions to 8x8
  IRT block size when rotating.
- Align input width to input burst length in case the scaling step
  flips horizontally.
- Fix bottom edge calculation.

Changes since v1:
 - Fix inverted allow_overshoot logic
 - Correctly switch horizontal / vertical tile alignment when
   determining seam positions with the 90° rotator active.
 - Fix SPDX-License-Identifier and remove superfluous license
   text.
 - Fix uninitialized walign in try_fmt

Previous cover letter:

we have image conversion code for scaling and colorspace conversion in
the IPUv3 base driver for a while. Since the IC hardware can only write
up to 1024x1024 pixel buffers, it scales to larger output buffers by
splitting the input and output frame into similarly sized tiles.

This causes the issue that the bilinear interpolation resets at the tile
boundary: instead of smoothly interpolating across the seam, there is a
jump in the input sample position that is very apparent for high
upscaling factors. This can be avoided by slightly changing the scaling
coefficients to let the left/top tiles overshoot their input sampling
into the first pixel / line of their right / bottom neighbors. The error
can be further reduced by letting tiles be differently sized and by
selecting seam positions that minimize the input sampling position error
at tile boundaries.
This is complicated by different DMA start address, burst size, and
rotator block size alignment requirements, depending on the input and
output pixel formats, and the fact that flipping happens in different
places depending on the rotation.

This series implements optimal seam position selection and seam hiding
with per-tile resizing coefficients and adds a scaling mem2mem device
to the imx-media driver.

regards
Philipp

Philipp Zabel (15):
  media: imx: add mem2mem device
  gpu: ipu-v3: ipu-ic: allow to manually set resize coefficients
  gpu: ipu-v3: image-convert: prepare for per-tile configuration
  gpu: ipu-v3: image-convert: calculate per-tile resize coefficients
  gpu: ipu-v3: image-convert: reconfigure IC per tile
  gpu: ipu-v3: image-convert: store tile top/left position
  gpu: ipu-v3: image-convert: calculate tile dimensions and offsets
outside fill_image
  gpu: ipu-v3: image-convert: move tile alignment helpers
  gpu: ipu-v3: image-convert: select optimal seam positions
  gpu: ipu-v3: image-convert: fix debug output for varying tile sizes
  gpu: ipu-v3: image-convert: relax alignment restrictions
  gpu: ipu-v3: image-convert: fix bytesperline adjustment
  gpu: ipu-v3: image-convert: add some ASCII art to the exposition
  gpu: ipu-v3: image-convert: disable double buffering if necessary
  gpu: ipu-v3: image-convert: allow three rows or columns

Steve Longerbeam (7):
  gpu: ipu-cpmem: add WARN_ON_ONCE() for unaligned dma buffers
  gpu: ipu-v3: Add chroma plane offset overrides to
ipu_cpmem_set_image()
  gpu: ipu-v3: image-convert: Prevent race between run and unprepare
  gpu: ipu-v3: image-convert: Only wait for abort completion if active
run
  gpu: ipu-v3: image-convert: Allow reentrancy into abort
  gpu: ipu-v3: image-convert: Remove need_abort flag
  gpu: ipu-v3: image-convert: Catch unaligned tile offsets

 drivers/gpu/ipu-v3/ipu-cpmem.c|   52 +-
 drivers/gpu/ipu-v3/ipu-ic.c   |   52 +-
 drivers/gpu/ipu-v3/ipu-image-convert.c| 1019 ++---
 drivers/staging/media/imx/Kconfig |1 +
 drivers/staging/media/imx/Makefile|1 +
 drivers/staging/media/imx/imx-media-dev.c |   11 +
 drivers/staging/media/imx/imx-media-mem2mem.c |  873 ++
 drivers/staging/media

[PATCH v4 12/22] gpu: ipu-v3: image-convert: reconfigure IC per tile

2018-10-19 Thread Philipp Zabel
For differently sized tiles or if the resizing coefficients change,
we have to stop, reconfigure, and restart the IC between tiles.

Signed-off-by: Philipp Zabel 
---
No changes since v3.
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 65 +-
 1 file changed, 44 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c 
b/drivers/gpu/ipu-v3/ipu-image-convert.c
index 31e7186bcc00..cb47981741b4 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -1151,6 +1151,24 @@ static irqreturn_t do_bh(int irq, void *dev_id)
return IRQ_HANDLED;
 }
 
+static bool ic_settings_changed(struct ipu_image_convert_ctx *ctx)
+{
+   unsigned int cur_tile = ctx->next_tile - 1;
+   unsigned int next_tile = ctx->next_tile;
+
+   if (ctx->resize_coeffs_h[cur_tile % ctx->in.num_cols] !=
+   ctx->resize_coeffs_h[next_tile % ctx->in.num_cols] ||
+   ctx->resize_coeffs_v[cur_tile / ctx->in.num_cols] !=
+   ctx->resize_coeffs_v[next_tile / ctx->in.num_cols] ||
+   ctx->in.tile[cur_tile].width != ctx->in.tile[next_tile].width ||
+   ctx->in.tile[cur_tile].height != ctx->in.tile[next_tile].height ||
+   ctx->out.tile[cur_tile].width != ctx->out.tile[next_tile].width ||
+   ctx->out.tile[cur_tile].height != ctx->out.tile[next_tile].height)
+   return true;
+
+   return false;
+}
+
 /* hold irqlock when calling */
 static irqreturn_t do_irq(struct ipu_image_convert_run *run)
 {
@@ -1194,27 +1212,32 @@ static irqreturn_t do_irq(struct ipu_image_convert_run 
*run)
 * not done, place the next tile buffers.
 */
if (!ctx->double_buffering) {
-
-   src_tile = _image->tile[ctx->next_tile];
-   dst_idx = ctx->out_tile_map[ctx->next_tile];
-   dst_tile = _image->tile[dst_idx];
-
-   ipu_cpmem_set_buffer(chan->in_chan, 0,
-s_image->base.phys0 + src_tile->offset);
-   ipu_cpmem_set_buffer(outch, 0,
-d_image->base.phys0 + dst_tile->offset);
-   if (s_image->fmt->planar)
-   ipu_cpmem_set_uv_offset(chan->in_chan,
-   src_tile->u_off,
-   src_tile->v_off);
-   if (d_image->fmt->planar)
-   ipu_cpmem_set_uv_offset(outch,
-   dst_tile->u_off,
-   dst_tile->v_off);
-
-   ipu_idmac_select_buffer(chan->in_chan, 0);
-   ipu_idmac_select_buffer(outch, 0);
-
+   if (ic_settings_changed(ctx)) {
+   convert_stop(run);
+   convert_start(run, ctx->next_tile);
+   } else {
+   src_tile = _image->tile[ctx->next_tile];
+   dst_idx = ctx->out_tile_map[ctx->next_tile];
+   dst_tile = _image->tile[dst_idx];
+
+   ipu_cpmem_set_buffer(chan->in_chan, 0,
+s_image->base.phys0 +
+src_tile->offset);
+   ipu_cpmem_set_buffer(outch, 0,
+d_image->base.phys0 +
+dst_tile->offset);
+   if (s_image->fmt->planar)
+   ipu_cpmem_set_uv_offset(chan->in_chan,
+   src_tile->u_off,
+   src_tile->v_off);
+   if (d_image->fmt->planar)
+   ipu_cpmem_set_uv_offset(outch,
+   dst_tile->u_off,
+   dst_tile->v_off);
+
+   ipu_idmac_select_buffer(chan->in_chan, 0);
+   ipu_idmac_select_buffer(outch, 0);
+   }
} else if (ctx->next_tile < ctx->num_tiles - 1) {
 
src_tile = _image->tile[ctx->next_tile + 1];
-- 
2.19.0



[PATCH v4 18/22] gpu: ipu-v3: image-convert: relax alignment restrictions

2018-10-19 Thread Philipp Zabel
For the planar but U/V-packed formats NV12 and NV16, 8 pixel width
alignment is good enough to fulfill the 8 byte stride requirement.
If we allow the input 8-pixel DMA bursts to overshoot the end of the
line, the only input alignment restrictions are dictated by the pixel
format and 8-byte aligned line start address.
Since different tile sizes are allowed, the output tile with / height
alignment doesn't need to be multiplied by number of columns / rows.

Signed-off-by: Philipp Zabel 
[slongerb...@gmail.com: Bring in the fixes to format width and
 height alignment restrictions from imx-media-mem2mem.c.]
Signed-off-by: Steve Longerbeam 
---
No changes since v3.
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 81 +-
 1 file changed, 41 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c 
b/drivers/gpu/ipu-v3/ipu-image-convert.c
index 0451d699f515..0829723a7599 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -551,31 +551,46 @@ static inline u32 tile_top_align(const struct 
ipu_image_pixfmt *fmt)
return fmt->uv_height_dec > 1 ? 2 : 1;
 }
 
-/*
- * We have to adjust the tile width such that the tile physaddrs and
- * U and V plane offsets are multiples of 8 bytes as required by
- * the IPU DMA Controller. For the planar formats, this corresponds
- * to a pixel alignment of 16 (but use a more formal equation since
- * the variables are available). For all the packed formats, 8 is
- * good enough.
- */
-static inline u32 tile_width_align(const struct ipu_image_pixfmt *fmt)
+static inline u32 tile_width_align(enum ipu_image_convert_type type,
+  const struct ipu_image_pixfmt *fmt,
+  enum ipu_rotate_mode rot_mode)
 {
-   return fmt->planar ? 8 * fmt->uv_width_dec : 8;
+   if (type == IMAGE_CONVERT_IN) {
+   /*
+* The IC burst reads 8 pixels at a time. Reading beyond the
+* end of the line is usually acceptable. Those pixels are
+* ignored, unless the IC has to write the scaled line in
+* reverse.
+*/
+   return (!ipu_rot_mode_is_irt(rot_mode) &&
+   (rot_mode & IPU_ROT_BIT_HFLIP)) ? 8 : 2;
+   }
+
+   /*
+* Align to 16x16 pixel blocks for planar 4:2:0 chroma subsampled
+* formats to guarantee 8-byte aligned line start addresses in the
+* chroma planes when IRT is used. Align to 8x8 pixel IRT block size
+* for all other formats.
+*/
+   return (ipu_rot_mode_is_irt(rot_mode) &&
+   fmt->planar && !fmt->uv_packed) ?
+   8 * fmt->uv_width_dec : 8;
 }
 
-/*
- * For tile height alignment, we have to ensure that the output tile
- * heights are multiples of 8 lines if the IRT is required by the
- * given rotation mode (the IRT performs rotations on 8x8 blocks
- * at a time). If the IRT is not used, or for input image tiles,
- * 2 lines are good enough.
- */
 static inline u32 tile_height_align(enum ipu_image_convert_type type,
+   const struct ipu_image_pixfmt *fmt,
enum ipu_rotate_mode rot_mode)
 {
-   return (type == IMAGE_CONVERT_OUT &&
-   ipu_rot_mode_is_irt(rot_mode)) ? 8 : 2;
+   if (type == IMAGE_CONVERT_IN || !ipu_rot_mode_is_irt(rot_mode))
+   return 2;
+
+   /*
+* Align to 16x16 pixel blocks for planar 4:2:0 chroma subsampled
+* formats to guarantee 8-byte aligned line start addresses in the
+* chroma planes when IRT is used. Align to 8x8 pixel IRT block size
+* for all other formats.
+*/
+   return (fmt->planar && !fmt->uv_packed) ? 8 * fmt->uv_width_dec : 8;
 }
 
 /*
@@ -661,8 +676,9 @@ static void find_seams(struct ipu_image_convert_ctx *ctx,
unsigned int in_top_align = tile_top_align(in->fmt);
unsigned int out_left_align = tile_left_align(out->fmt);
unsigned int out_top_align = tile_top_align(out->fmt);
-   unsigned int out_width_align = tile_width_align(out->fmt);
-   unsigned int out_height_align = tile_height_align(out->type,
+   unsigned int out_width_align = tile_width_align(out->type, out->fmt,
+   ctx->rot_mode);
+   unsigned int out_height_align = tile_height_align(out->type, out->fmt,
  ctx->rot_mode);
unsigned int in_right = in->base.rect.width;
unsigned int in_bottom = in->base.rect.height;
@@ -1855,8 +1871,6 @@ void ipu_image_convert_adjust(struct ipu_image *in, 
struct ipu_image *out,
  enum ipu_rotate_mode rot_mode)
 {
const struct ipu_image_pixfmt *infmt, *ou

[PATCH v4 07/22] gpu: ipu-v3: image-convert: Allow reentrancy into abort

2018-10-19 Thread Philipp Zabel
From: Steve Longerbeam 

Allow reentrancy into ipu_image_convert_abort(), by moving re-init
of ctx->aborted completion under the spin lock, and only if there is
an active run, and complete all waiters do_bh(). Note:
ipu_image_convert_unprepare() is still _not_ reentrant, and can't
be made reentrant.

Signed-off-by: Steve Longerbeam 
---
New since v3.
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c 
b/drivers/gpu/ipu-v3/ipu-image-convert.c
index e3e032252604..abd8afb22b48 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -896,7 +896,7 @@ static irqreturn_t do_bh(int irq, void *dev_id)
dev_dbg(priv->ipu->dev,
"%s: task %u: signaling abort for ctx %p\n",
__func__, chan->ic_task, ctx);
-   complete(>aborted);
+   complete_all(>aborted);
}
}
 
@@ -1533,8 +1533,6 @@ static void __ipu_image_convert_abort(struct 
ipu_image_convert_ctx *ctx)
int run_count, ret;
bool need_abort;
 
-   reinit_completion(>aborted);
-
spin_lock_irqsave(>irqlock, flags);
 
/* move all remaining pending runs in this context to done_q */
@@ -1549,6 +1547,9 @@ static void __ipu_image_convert_abort(struct 
ipu_image_convert_ctx *ctx)
active_run = (chan->current_run && chan->current_run->ctx == ctx) ?
chan->current_run : NULL;
 
+   if (active_run)
+   reinit_completion(>aborted);
+
need_abort = (run_count || active_run);
 
ctx->aborting = true;
-- 
2.19.0



[PATCH v4 04/22] gpu: ipu-v3: ipu-ic: allow to manually set resize coefficients

2018-10-19 Thread Philipp Zabel
For tiled scaling, we want to compute the scaling coefficients
externally in such a way that the interpolation overshoots tile
boundaries and samples up to the first pixel of the next tile.
Prepare to override the resizing coefficients from the image
conversion code.

Signed-off-by: Philipp Zabel 
---
No changes since v3.
---
 drivers/gpu/ipu-v3/ipu-ic.c | 52 +++--
 include/video/imx-ipu-v3.h  |  6 +
 2 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-ic.c b/drivers/gpu/ipu-v3/ipu-ic.c
index 67cc820253a9..594c3cbc8291 100644
--- a/drivers/gpu/ipu-v3/ipu-ic.c
+++ b/drivers/gpu/ipu-v3/ipu-ic.c
@@ -442,36 +442,40 @@ int ipu_ic_task_graphics_init(struct ipu_ic *ic,
 }
 EXPORT_SYMBOL_GPL(ipu_ic_task_graphics_init);
 
-int ipu_ic_task_init(struct ipu_ic *ic,
-int in_width, int in_height,
-int out_width, int out_height,
-enum ipu_color_space in_cs,
-enum ipu_color_space out_cs)
+int ipu_ic_task_init_rsc(struct ipu_ic *ic,
+int in_width, int in_height,
+int out_width, int out_height,
+enum ipu_color_space in_cs,
+enum ipu_color_space out_cs,
+u32 rsc)
 {
struct ipu_ic_priv *priv = ic->priv;
-   u32 reg, downsize_coeff, resize_coeff;
+   u32 downsize_coeff, resize_coeff;
unsigned long flags;
int ret = 0;
 
-   /* Setup vertical resizing */
-   ret = calc_resize_coeffs(ic, in_height, out_height,
-_coeff, _coeff);
-   if (ret)
-   return ret;
+   if (!rsc) {
+   /* Setup vertical resizing */
 
-   reg = (downsize_coeff << 30) | (resize_coeff << 16);
+   ret = calc_resize_coeffs(ic, in_height, out_height,
+_coeff, _coeff);
+   if (ret)
+   return ret;
+
+   rsc = (downsize_coeff << 30) | (resize_coeff << 16);
 
-   /* Setup horizontal resizing */
-   ret = calc_resize_coeffs(ic, in_width, out_width,
-_coeff, _coeff);
-   if (ret)
-   return ret;
+   /* Setup horizontal resizing */
+   ret = calc_resize_coeffs(ic, in_width, out_width,
+_coeff, _coeff);
+   if (ret)
+   return ret;
 
-   reg |= (downsize_coeff << 14) | resize_coeff;
+   rsc |= (downsize_coeff << 14) | resize_coeff;
+   }
 
spin_lock_irqsave(>lock, flags);
 
-   ipu_ic_write(ic, reg, ic->reg->rsc);
+   ipu_ic_write(ic, rsc, ic->reg->rsc);
 
/* Setup color space conversion */
ic->in_cs = in_cs;
@@ -487,6 +491,16 @@ int ipu_ic_task_init(struct ipu_ic *ic,
spin_unlock_irqrestore(>lock, flags);
return ret;
 }
+
+int ipu_ic_task_init(struct ipu_ic *ic,
+int in_width, int in_height,
+int out_width, int out_height,
+enum ipu_color_space in_cs,
+enum ipu_color_space out_cs)
+{
+   return ipu_ic_task_init_rsc(ic, in_width, in_height, out_width,
+   out_height, in_cs, out_cs, 0);
+}
 EXPORT_SYMBOL_GPL(ipu_ic_task_init);
 
 int ipu_ic_task_idma_init(struct ipu_ic *ic, struct ipuv3_channel *channel,
diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h
index 8bb163cd9314..e582e8e7527a 100644
--- a/include/video/imx-ipu-v3.h
+++ b/include/video/imx-ipu-v3.h
@@ -390,6 +390,12 @@ int ipu_ic_task_init(struct ipu_ic *ic,
 int out_width, int out_height,
 enum ipu_color_space in_cs,
 enum ipu_color_space out_cs);
+int ipu_ic_task_init_rsc(struct ipu_ic *ic,
+int in_width, int in_height,
+int out_width, int out_height,
+enum ipu_color_space in_cs,
+enum ipu_color_space out_cs,
+u32 rsc);
 int ipu_ic_task_graphics_init(struct ipu_ic *ic,
  enum ipu_color_space in_g_cs,
  bool galpha_en, u32 galpha,
-- 
2.19.0



[PATCH v4 11/22] gpu: ipu-v3: image-convert: calculate per-tile resize coefficients

2018-10-19 Thread Philipp Zabel
Slightly modifying resize coefficients per-tile allows to completely
hide the seams between tiles and to sample the correct input pixels at
the bottom and right edges of the image.

Tiling requires a bilinear interpolator reset at each tile start, which
causes the image to be slightly shifted if the starting pixel should not
have been sampled from an integer pixel position in the source image
according to the full image resizing ratio. To work around this
hardware limitation, calculate per-tile resizing coefficients that make
sure that the correct input pixels are sampled at the tile end.

Signed-off-by: Philipp Zabel 
---
No changes since v3.
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 236 -
 1 file changed, 234 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c 
b/drivers/gpu/ipu-v3/ipu-image-convert.c
index c2f82d681c48..31e7186bcc00 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -135,6 +135,12 @@ struct ipu_image_convert_ctx {
struct ipu_image_convert_image in;
struct ipu_image_convert_image out;
enum ipu_rotate_mode rot_mode;
+   u32 downsize_coeff_h;
+   u32 downsize_coeff_v;
+   u32 image_resize_coeff_h;
+   u32 image_resize_coeff_v;
+   u32 resize_coeffs_h[MAX_STRIPES_W];
+   u32 resize_coeffs_v[MAX_STRIPES_H];
 
/* intermediate buffer for rotation */
struct ipu_image_convert_dma_buf rot_intermediate[2];
@@ -361,6 +367,69 @@ static inline int num_stripes(int dim)
return 4;
 }
 
+/*
+ * Calculate downsizing coefficients, which are the same for all tiles,
+ * and bilinear resizing coefficients, which are used to find the best
+ * seam positions.
+ */
+static int calc_image_resize_coefficients(struct ipu_image_convert_ctx *ctx,
+ struct ipu_image *in,
+ struct ipu_image *out)
+{
+   u32 downsized_width = in->rect.width;
+   u32 downsized_height = in->rect.height;
+   u32 downsize_coeff_v = 0;
+   u32 downsize_coeff_h = 0;
+   u32 resized_width = out->rect.width;
+   u32 resized_height = out->rect.height;
+   u32 resize_coeff_h;
+   u32 resize_coeff_v;
+
+   if (ipu_rot_mode_is_irt(ctx->rot_mode)) {
+   resized_width = out->rect.height;
+   resized_height = out->rect.width;
+   }
+
+   /* Do not let invalid input lead to an endless loop below */
+   if (WARN_ON(resized_width == 0 || resized_height == 0))
+   return -EINVAL;
+
+   while (downsized_width >= resized_width * 2) {
+   downsized_width >>= 1;
+   downsize_coeff_h++;
+   }
+
+   while (downsized_height >= resized_height * 2) {
+   downsized_height >>= 1;
+   downsize_coeff_v++;
+   }
+
+   /*
+* Calculate the bilinear resizing coefficients that could be used if
+* we were converting with a single tile. The bottom right output pixel
+* should sample as close as possible to the bottom right input pixel
+* out of the decimator, but not overshoot it:
+*/
+   resize_coeff_h = 8192 * (downsized_width - 1) / (resized_width - 1);
+   resize_coeff_v = 8192 * (downsized_height - 1) / (resized_height - 1);
+
+   dev_dbg(ctx->chan->priv->ipu->dev,
+   "%s: hscale: >>%u, *8192/%u vscale: >>%u, *8192/%u, %ux%u 
tiles\n",
+   __func__, downsize_coeff_h, resize_coeff_h, downsize_coeff_v,
+   resize_coeff_v, ctx->in.num_cols, ctx->in.num_rows);
+
+   if (downsize_coeff_h > 2 || downsize_coeff_v  > 2 ||
+   resize_coeff_h > 0x3fff || resize_coeff_v > 0x3fff)
+   return -EINVAL;
+
+   ctx->downsize_coeff_h = downsize_coeff_h;
+   ctx->downsize_coeff_v = downsize_coeff_v;
+   ctx->image_resize_coeff_h = resize_coeff_h;
+   ctx->image_resize_coeff_v = resize_coeff_v;
+
+   return 0;
+}
+
 static void calc_tile_dimensions(struct ipu_image_convert_ctx *ctx,
 struct ipu_image_convert_image *image)
 {
@@ -578,6 +647,149 @@ static int calc_tile_offsets(struct ipu_image_convert_ctx 
*ctx,
return calc_tile_offsets_packed(ctx, image);
 }
 
+/*
+ * Calculate the resizing ratio for the IC main processing section given input
+ * size, fixed downsizing coefficient, and output size.
+ * Either round to closest for the next tile's first pixel to minimize seams
+ * and distortion (for all but right column / bottom row), or round down to
+ * avoid sampling beyond the edges of the input image for this tile's last
+ * pixel.
+ * Returns the resizing coefficient, resizing ratio is 8192.0 / resize_coeff.
+ */
+static u32 calc_resize_coeff(u32 input_size, u32 downsize_coeff,
+   

[PATCH v4 09/22] gpu: ipu-v3: image-convert: Catch unaligned tile offsets

2018-10-19 Thread Philipp Zabel
From: Steve Longerbeam 

Catch calculated tile offsets that are not 8-byte aligned as required by the
IDMAC engine and return error in calc_tile_offsets().

Signed-off-by: Steve Longerbeam 
---
New since v3.
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 61 --
 1 file changed, 37 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c 
b/drivers/gpu/ipu-v3/ipu-image-convert.c
index b8a400182a00..5fccba176e39 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -459,8 +459,8 @@ static void calc_out_tile_map(struct ipu_image_convert_ctx 
*ctx)
}
 }
 
-static void calc_tile_offsets_planar(struct ipu_image_convert_ctx *ctx,
-struct ipu_image_convert_image *image)
+static int calc_tile_offsets_planar(struct ipu_image_convert_ctx *ctx,
+   struct ipu_image_convert_image *image)
 {
struct ipu_image_convert_chan *chan = ctx->chan;
struct ipu_image_convert_priv *priv = chan->priv;
@@ -509,24 +509,30 @@ static void calc_tile_offsets_planar(struct 
ipu_image_convert_ctx *ctx,
image->tile[tile].u_off = u_off;
image->tile[tile++].v_off = v_off;
 
-   dev_dbg(priv->ipu->dev,
-   "task %u: ctx %p: %s@[%d,%d]: y_off %08x, u_off 
%08x, v_off %08x\n",
-   chan->ic_task, ctx,
-   image->type == IMAGE_CONVERT_IN ?
-   "Input" : "Output", row, col,
-   y_off, u_off, v_off);
+   if ((y_off & 0x7) || (u_off & 0x7) || (v_off & 0x7)) {
+   dev_err(priv->ipu->dev,
+   "task %u: ctx %p: %s@[%d,%d]: "
+   "y_off %08x, u_off %08x, v_off %08x\n",
+   chan->ic_task, ctx,
+   image->type == IMAGE_CONVERT_IN ?
+   "Input" : "Output", row, col,
+   y_off, u_off, v_off);
+   return -EINVAL;
+   }
}
}
+
+   return 0;
 }
 
-static void calc_tile_offsets_packed(struct ipu_image_convert_ctx *ctx,
-struct ipu_image_convert_image *image)
+static int calc_tile_offsets_packed(struct ipu_image_convert_ctx *ctx,
+   struct ipu_image_convert_image *image)
 {
struct ipu_image_convert_chan *chan = ctx->chan;
struct ipu_image_convert_priv *priv = chan->priv;
const struct ipu_image_pixfmt *fmt = image->fmt;
unsigned int row, col, tile = 0;
-   u32 w, h, bpp, stride;
+   u32 w, h, bpp, stride, offset;
u32 row_off, col_off;
 
/* setup some convenience vars */
@@ -541,27 +547,35 @@ static void calc_tile_offsets_packed(struct 
ipu_image_convert_ctx *ctx,
for (col = 0; col < image->num_cols; col++) {
col_off = (col * w * bpp) >> 3;
 
-   image->tile[tile].offset = row_off + col_off;
+   offset = row_off + col_off;
+
+   image->tile[tile].offset = offset;
image->tile[tile].u_off = 0;
image->tile[tile++].v_off = 0;
 
-   dev_dbg(priv->ipu->dev,
-   "task %u: ctx %p: %s@[%d,%d]: phys %08x\n",
-   chan->ic_task, ctx,
-   image->type == IMAGE_CONVERT_IN ?
-   "Input" : "Output", row, col,
-   row_off + col_off);
+   if (offset & 0x7) {
+   dev_err(priv->ipu->dev,
+   "task %u: ctx %p: %s@[%d,%d]: "
+   "phys %08x\n",
+   chan->ic_task, ctx,
+   image->type == IMAGE_CONVERT_IN ?
+   "Input" : "Output", row, col,
+   row_off + col_off);
+   return -EINVAL;
+   }
}
}
+
+   return 0;
 }
 
-static void calc_tile_offsets(struct ipu_image_convert_ctx *ctx,
+static int calc_tile_offsets(struct ipu_image_convert_ctx *ctx,
  struct ipu_image_convert_image *image)
 {
if (image->fmt->planar)
-   calc_tile_offsets_planar(ctx, image);
-   else
-   calc_tile_offsets_packed(ctx, image);
+   return calc_tile_offsets_planar(ctx, image);
+
+   return calc_tile_offsets_packed(ctx, image);
 }
 
 /*
@@ -1199,9 +1213,8 @@ 

[PATCH v4 20/22] gpu: ipu-v3: image-convert: add some ASCII art to the exposition

2018-10-19 Thread Philipp Zabel
Visualize the scaling and rotation pipeline with some ASCII art
diagrams. Remove the FIXME comment about missing seam prevention.

Signed-off-by: Philipp Zabel 
---
No changes since v3.
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 39 +++---
 1 file changed, 29 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c 
b/drivers/gpu/ipu-v3/ipu-image-convert.c
index b735065fe288..91fe8f1672b4 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -37,17 +37,36 @@
  * when double_buffering boolean is set).
  *
  * Note that the input frame must be split up into the same number
- * of tiles as the output frame.
+ * of tiles as the output frame:
  *
- * FIXME: at this point there is no attempt to deal with visible seams
- * at the tile boundaries when upscaling. The seams are caused by a reset
- * of the bilinear upscale interpolation when starting a new tile. The
- * seams are barely visible for small upscale factors, but become
- * increasingly visible as the upscale factor gets larger, since more
- * interpolated pixels get thrown out at the tile boundaries. A possilble
- * fix might be to overlap tiles of different sizes, but this must be done
- * while also maintaining the IDMAC dma buffer address alignment and 8x8 IRT
- * alignment restrictions of each tile.
+ *   +-+-+
+ *   +-+---+ |  A  | B   |
+ *   | A   | B | | | |
+ *   +-+---+   -->   +-+-+
+ *   | C   | D | |  C  | D   |
+ *   +-+---+ | | |
+ *   +-+-+
+ *
+ * Clockwise 90° rotations are handled by first rescaling into a
+ * reusable temporary tile buffer and then rotating with the 8x8
+ * block rotator, writing to the correct destination:
+ *
+ * +-+-+
+ * | | |
+ *   +-+---+ +-+   | C   | A   |
+ *   | A   | B | | A,B, |  |   | | |
+ *   +-+---+   -->   | C,D  |  |  -->  | | |
+ *   | C   | D | +-+   +-+-+
+ *   +-+---+   | D   | B   |
+ * | | |
+ * +-+-+
+ *
+ * If the 8x8 block rotator is used, horizontal or vertical flipping
+ * is done during the rotation step, otherwise flipping is done
+ * during the scaling step.
+ * With rotation or flipping, tile order changes between input and
+ * output image. Tiles are numbered row major from top left to bottom
+ * right for both input and output image.
  */
 
 #define MAX_STRIPES_W4
-- 
2.19.0



[PATCH v4 05/22] gpu: ipu-v3: image-convert: Prevent race between run and unprepare

2018-10-19 Thread Philipp Zabel
From: Steve Longerbeam 

Prevent possible race by parallel threads between ipu_image_convert_run()
and ipu_image_convert_unprepare(). This involves setting ctx->aborting
to true unconditionally so that no new job runs can be queued during
unprepare, and holding the ctx->aborting flag until the context is freed.

Note that the "normal" ipu_image_convert_abort() case (e.g. not during
context unprepare) should clear the ctx->aborting flag after aborting
any active run and clearing the context's pending queue. This is because
it should be possible to continue to use the conversion context and queue
more runs after an abort.

Signed-off-by: Steve Longerbeam 
---
New since v3.
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c 
b/drivers/gpu/ipu-v3/ipu-image-convert.c
index 41fb62b88c54..6c15bf8efaa2 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -1524,7 +1524,7 @@ int ipu_image_convert_queue(struct ipu_image_convert_run 
*run)
 EXPORT_SYMBOL_GPL(ipu_image_convert_queue);
 
 /* Abort any active or pending conversions for this context */
-void ipu_image_convert_abort(struct ipu_image_convert_ctx *ctx)
+static void __ipu_image_convert_abort(struct ipu_image_convert_ctx *ctx)
 {
struct ipu_image_convert_chan *chan = ctx->chan;
struct ipu_image_convert_priv *priv = chan->priv;
@@ -1551,7 +1551,7 @@ void ipu_image_convert_abort(struct ipu_image_convert_ctx 
*ctx)
 
need_abort = (run_count || active_run);
 
-   ctx->aborting = need_abort;
+   ctx->aborting = true;
 
spin_unlock_irqrestore(>irqlock, flags);
 
@@ -1572,7 +1572,11 @@ void ipu_image_convert_abort(struct 
ipu_image_convert_ctx *ctx)
dev_warn(priv->ipu->dev, "%s: timeout\n", __func__);
force_abort(ctx);
}
+}
 
+void ipu_image_convert_abort(struct ipu_image_convert_ctx *ctx)
+{
+   __ipu_image_convert_abort(ctx);
ctx->aborting = false;
 }
 EXPORT_SYMBOL_GPL(ipu_image_convert_abort);
@@ -1586,7 +1590,7 @@ void ipu_image_convert_unprepare(struct 
ipu_image_convert_ctx *ctx)
bool put_res;
 
/* make sure no runs are hanging around */
-   ipu_image_convert_abort(ctx);
+   __ipu_image_convert_abort(ctx);
 
dev_dbg(priv->ipu->dev, "%s: task %u: removing ctx %p\n", __func__,
chan->ic_task, ctx);
-- 
2.19.0



[PATCH v4 17/22] gpu: ipu-v3: image-convert: fix debug output for varying tile sizes

2018-10-19 Thread Philipp Zabel
Since tile dimensions now vary between tiles, add debug output for each
tile's position and dimensions.

Signed-off-by: Philipp Zabel 
---
No changes since v3.
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c 
b/drivers/gpu/ipu-v3/ipu-image-convert.c
index a674241dd0b8..0451d699f515 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -308,12 +308,11 @@ static void dump_format(struct ipu_image_convert_ctx *ctx,
struct ipu_image_convert_priv *priv = chan->priv;
 
dev_dbg(priv->ipu->dev,
-   "task %u: ctx %p: %s format: %dx%d (%dx%d tiles of size %dx%d), 
%c%c%c%c\n",
+   "task %u: ctx %p: %s format: %dx%d (%dx%d tiles), %c%c%c%c\n",
chan->ic_task, ctx,
ic_image->type == IMAGE_CONVERT_OUT ? "Output" : "Input",
ic_image->base.pix.width, ic_image->base.pix.height,
ic_image->num_cols, ic_image->num_rows,
-   ic_image->tile[0].width, ic_image->tile[0].height,
ic_image->fmt->fourcc & 0xff,
(ic_image->fmt->fourcc >> 8) & 0xff,
(ic_image->fmt->fourcc >> 16) & 0xff,
@@ -789,6 +788,8 @@ static void find_seams(struct ipu_image_convert_ctx *ctx,
 static void calc_tile_dimensions(struct ipu_image_convert_ctx *ctx,
 struct ipu_image_convert_image *image)
 {
+   struct ipu_image_convert_chan *chan = ctx->chan;
+   struct ipu_image_convert_priv *priv = chan->priv;
unsigned int i;
 
for (i = 0; i < ctx->num_tiles; i++) {
@@ -813,6 +814,13 @@ static void calc_tile_dimensions(struct 
ipu_image_convert_ctx *ctx,
tile->rot_stride =
(image->fmt->bpp * tile->height) >> 3;
}
+
+   dev_dbg(priv->ipu->dev,
+   "task %u: ctx %p: %s@[%u,%u]: %ux%u@%u,%u\n",
+   chan->ic_task, ctx,
+   image->type == IMAGE_CONVERT_IN ? "Input" : "Output",
+   row, col,
+   tile->width, tile->height, tile->left, tile->top);
}
 }
 
-- 
2.19.0



[PATCH v4 22/22] gpu: ipu-v3: image-convert: allow three rows or columns

2018-10-19 Thread Philipp Zabel
If width or height are in the [2049, 3072] range, allow to
use just three tiles in this dimension, instead of four.

Signed-off-by: Philipp Zabel 
---
No changes since v3.
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c 
b/drivers/gpu/ipu-v3/ipu-image-convert.c
index 3e73494d5930..13103ab86050 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -379,12 +379,7 @@ static int alloc_dma_buf(struct ipu_image_convert_priv 
*priv,
 
 static inline int num_stripes(int dim)
 {
-   if (dim <= 1024)
-   return 1;
-   else if (dim <= 2048)
-   return 2;
-   else
-   return 4;
+   return (dim - 1) / 1024 + 1;
 }
 
 /*
-- 
2.19.0



[PATCH v4 01/22] media: imx: add mem2mem device

2018-10-19 Thread Philipp Zabel
Add a single imx-media mem2mem video device that uses the IPU IC PP
(image converter post processing) task for scaling and colorspace
conversion.
On i.MX6Q/DL SoCs with two IPUs currently only the first IPU is used.

The hardware only supports writing to destination buffers up to
1024x1024 pixels in a single pass, arbitrary sizes can be achieved
by rendering multiple tiles per frame.

Signed-off-by: Philipp Zabel 
[slongerb...@gmail.com: use ipu_image_convert_adjust(), fix
 device_run() error handling]
Signed-off-by: Steve Longerbeam 
---
No changes since v3.
---
 drivers/staging/media/imx/Kconfig |   1 +
 drivers/staging/media/imx/Makefile|   1 +
 drivers/staging/media/imx/imx-media-dev.c |  11 +
 drivers/staging/media/imx/imx-media-mem2mem.c | 873 ++
 drivers/staging/media/imx/imx-media.h |  10 +
 5 files changed, 896 insertions(+)
 create mode 100644 drivers/staging/media/imx/imx-media-mem2mem.c

diff --git a/drivers/staging/media/imx/Kconfig 
b/drivers/staging/media/imx/Kconfig
index bfc17de56b17..07013cb3cb66 100644
--- a/drivers/staging/media/imx/Kconfig
+++ b/drivers/staging/media/imx/Kconfig
@@ -6,6 +6,7 @@ config VIDEO_IMX_MEDIA
depends on HAS_DMA
select VIDEOBUF2_DMA_CONTIG
select V4L2_FWNODE
+   select V4L2_MEM2MEM_DEV
---help---
  Say yes here to enable support for video4linux media controller
  driver for the i.MX5/6 SOC.
diff --git a/drivers/staging/media/imx/Makefile 
b/drivers/staging/media/imx/Makefile
index 698a4210316e..f2e722d0fa19 100644
--- a/drivers/staging/media/imx/Makefile
+++ b/drivers/staging/media/imx/Makefile
@@ -6,6 +6,7 @@ imx-media-ic-objs := imx-ic-common.o imx-ic-prp.o 
imx-ic-prpencvf.o
 obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media.o
 obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media-common.o
 obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media-capture.o
+obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media-mem2mem.o
 obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media-vdic.o
 obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media-ic.o
 
diff --git a/drivers/staging/media/imx/imx-media-dev.c 
b/drivers/staging/media/imx/imx-media-dev.c
index 481840195071..c4324df54c2e 100644
--- a/drivers/staging/media/imx/imx-media-dev.c
+++ b/drivers/staging/media/imx/imx-media-dev.c
@@ -309,6 +309,17 @@ static int imx_media_probe_complete(struct 
v4l2_async_notifier *notifier)
goto unlock;
 
ret = v4l2_device_register_subdev_nodes(>v4l2_dev);
+   if (ret)
+   goto unlock;
+
+   /* TODO: check whether we have IC subdevices first */
+   imxmd->m2m_vdev = imx_media_mem2mem_device_init(imxmd);
+   if (IS_ERR(imxmd->m2m_vdev)) {
+   ret = PTR_ERR(imxmd->m2m_vdev);
+   goto unlock;
+   }
+
+   ret = imx_media_mem2mem_device_register(imxmd->m2m_vdev);
 unlock:
mutex_unlock(>mutex);
if (ret)
diff --git a/drivers/staging/media/imx/imx-media-mem2mem.c 
b/drivers/staging/media/imx/imx-media-mem2mem.c
new file mode 100644
index ..a2a4dca017ce
--- /dev/null
+++ b/drivers/staging/media/imx/imx-media-mem2mem.c
@@ -0,0 +1,873 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * i.MX IPUv3 mem2mem Scaler/CSC driver
+ *
+ * Copyright (C) 2011 Pengutronix, Sascha Hauer
+ * Copyright (C) 2018 Pengutronix, Philipp Zabel
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "imx-media.h"
+
+#define fh_to_ctx(__fh)container_of(__fh, struct mem2mem_ctx, fh)
+
+enum {
+   V4L2_M2M_SRC = 0,
+   V4L2_M2M_DST = 1,
+};
+
+struct mem2mem_priv {
+   struct imx_media_video_dev vdev;
+
+   struct v4l2_m2m_dev   *m2m_dev;
+   struct device *dev;
+
+   struct imx_media_dev  *md;
+
+   struct mutex  mutex;   /* mem2mem device mutex */
+
+   atomic_t  num_inst;
+};
+
+#define to_mem2mem_priv(v) container_of(v, struct mem2mem_priv, vdev)
+
+/* Per-queue, driver-specific private data */
+struct mem2mem_q_data {
+   struct v4l2_pix_format  cur_fmt;
+   struct v4l2_rectrect;
+};
+
+struct mem2mem_ctx {
+   struct mem2mem_priv *priv;
+
+   struct v4l2_fh  fh;
+   struct mem2mem_q_data   q_data[2];
+   int error;
+   struct ipu_image_convert_ctx *icc;
+
+   struct v4l2_ctrl_handler ctrl_hdlr;
+   int rotate;
+   bool hflip;
+   bool vflip;
+   enum ipu_rotate_moderot_mode;
+};
+
+static struct mem2mem_q_data *get_q_data(struct mem2mem_ctx *ctx,
+enum v4l2_buf_type type)
+{
+   if (V4L2_TYPE_IS_OUTPUT(type))
+   return >q_data[V4L2_M2M_SRC];
+   else
+   return >q_data[V4L2_M2M_DST];
+}
+
+/*
+ * mem2mem callbacks
+ */
+
+static void job_abort(void *_ctx)
+{
+   

[PATCH v4 10/22] gpu: ipu-v3: image-convert: prepare for per-tile configuration

2018-10-19 Thread Philipp Zabel
Let convert_start start from a given tile index, allocate intermediate
tile with maximum tile size.

Signed-off-by: Philipp Zabel 
---
No changes since v3.
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 60 +++---
 1 file changed, 35 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c 
b/drivers/gpu/ipu-v3/ipu-image-convert.c
index 5fccba176e39..c2f82d681c48 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -625,7 +625,8 @@ static void init_idmac_channel(struct ipu_image_convert_ctx 
*ctx,
   struct ipuv3_channel *channel,
   struct ipu_image_convert_image *image,
   enum ipu_rotate_mode rot_mode,
-  bool rot_swap_width_height)
+  bool rot_swap_width_height,
+  unsigned int tile)
 {
struct ipu_image_convert_chan *chan = ctx->chan;
unsigned int burst_size;
@@ -635,23 +636,23 @@ static void init_idmac_channel(struct 
ipu_image_convert_ctx *ctx,
unsigned int tile_idx[2];
 
if (image->type == IMAGE_CONVERT_OUT) {
-   tile_idx[0] = ctx->out_tile_map[0];
+   tile_idx[0] = ctx->out_tile_map[tile];
tile_idx[1] = ctx->out_tile_map[1];
} else {
-   tile_idx[0] = 0;
+   tile_idx[0] = tile;
tile_idx[1] = 1;
}
 
if (rot_swap_width_height) {
-   width = image->tile[0].height;
-   height = image->tile[0].width;
-   stride = image->tile[0].rot_stride;
+   width = image->tile[tile_idx[0]].height;
+   height = image->tile[tile_idx[0]].width;
+   stride = image->tile[tile_idx[0]].rot_stride;
addr0 = ctx->rot_intermediate[0].phys;
if (ctx->double_buffering)
addr1 = ctx->rot_intermediate[1].phys;
} else {
-   width = image->tile[0].width;
-   height = image->tile[0].height;
+   width = image->tile[tile_idx[0]].width;
+   height = image->tile[tile_idx[0]].height;
stride = image->stride;
addr0 = image->base.phys0 +
image->tile[tile_idx[0]].offset;
@@ -701,7 +702,7 @@ static void init_idmac_channel(struct ipu_image_convert_ctx 
*ctx,
ipu_idmac_set_double_buffer(channel, ctx->double_buffering);
 }
 
-static int convert_start(struct ipu_image_convert_run *run)
+static int convert_start(struct ipu_image_convert_run *run, unsigned int tile)
 {
struct ipu_image_convert_ctx *ctx = run->ctx;
struct ipu_image_convert_chan *chan = ctx->chan;
@@ -709,28 +710,29 @@ static int convert_start(struct ipu_image_convert_run 
*run)
struct ipu_image_convert_image *s_image = >in;
struct ipu_image_convert_image *d_image = >out;
enum ipu_color_space src_cs, dest_cs;
+   unsigned int dst_tile = ctx->out_tile_map[tile];
unsigned int dest_width, dest_height;
int ret;
 
-   dev_dbg(priv->ipu->dev, "%s: task %u: starting ctx %p run %p\n",
-   __func__, chan->ic_task, ctx, run);
+   dev_dbg(priv->ipu->dev, "%s: task %u: starting ctx %p run %p tile %u -> 
%u\n",
+   __func__, chan->ic_task, ctx, run, tile, dst_tile);
 
src_cs = ipu_pixelformat_to_colorspace(s_image->fmt->fourcc);
dest_cs = ipu_pixelformat_to_colorspace(d_image->fmt->fourcc);
 
if (ipu_rot_mode_is_irt(ctx->rot_mode)) {
/* swap width/height for resizer */
-   dest_width = d_image->tile[0].height;
-   dest_height = d_image->tile[0].width;
+   dest_width = d_image->tile[dst_tile].height;
+   dest_height = d_image->tile[dst_tile].width;
} else {
-   dest_width = d_image->tile[0].width;
-   dest_height = d_image->tile[0].height;
+   dest_width = d_image->tile[dst_tile].width;
+   dest_height = d_image->tile[dst_tile].height;
}
 
/* setup the IC resizer and CSC */
ret = ipu_ic_task_init(chan->ic,
-  s_image->tile[0].width,
-  s_image->tile[0].height,
+  s_image->tile[tile].width,
+  s_image->tile[tile].height,
   dest_width,
   dest_height,
   src_cs, dest_cs);
@@ -741,27 +743,27 @@ static int convert_start(struct ipu_image_convert_run 
*run)
 
/* init the source MEM-->IC PP IDMAC channel */
init_idmac_channel(ctx, ch

[PATCH v4 21/22] gpu: ipu-v3: image-convert: disable double buffering if necessary

2018-10-19 Thread Philipp Zabel
Double-buffering only works if tile sizes are the same and the resizing
coefficient does not change between tiles, even for non-planar formats.

Signed-off-by: Philipp Zabel 
---
No changes since v3.
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 27 --
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c 
b/drivers/gpu/ipu-v3/ipu-image-convert.c
index 91fe8f1672b4..3e73494d5930 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -1990,6 +1990,7 @@ ipu_image_convert_prepare(struct ipu_soc *ipu, enum 
ipu_ic_task ic_task,
struct ipu_image_convert_chan *chan;
struct ipu_image_convert_ctx *ctx;
unsigned long flags;
+   unsigned int i;
bool get_res;
int ret;
 
@@ -2077,15 +2078,37 @@ ipu_image_convert_prepare(struct ipu_soc *ipu, enum 
ipu_ic_task ic_task,
 * for every tile, and therefore would have to be updated for
 * each buffer which is not possible. So double-buffering is
 * impossible when either the source or destination images are
-* a planar format (YUV420, YUV422P, etc.).
+* a planar format (YUV420, YUV422P, etc.). Further, differently
+* sized tiles or different resizing coefficients per tile
+* prevent double-buffering as well.
 */
ctx->double_buffering = (ctx->num_tiles > 1 &&
 !s_image->fmt->planar &&
 !d_image->fmt->planar);
+   for (i = 1; i < ctx->num_tiles; i++) {
+   if (ctx->in.tile[i].width != ctx->in.tile[0].width ||
+   ctx->in.tile[i].height != ctx->in.tile[0].height ||
+   ctx->out.tile[i].width != ctx->out.tile[0].width ||
+   ctx->out.tile[i].height != ctx->out.tile[0].height) {
+   ctx->double_buffering = false;
+   break;
+   }
+   }
+   for (i = 1; i < ctx->in.num_cols; i++) {
+   if (ctx->resize_coeffs_h[i] != ctx->resize_coeffs_h[0]) {
+   ctx->double_buffering = false;
+   break;
+   }
+   }
+   for (i = 1; i < ctx->in.num_rows; i++) {
+   if (ctx->resize_coeffs_v[i] != ctx->resize_coeffs_v[0]) {
+   ctx->double_buffering = false;
+   break;
+   }
+   }
 
if (ipu_rot_mode_is_irt(ctx->rot_mode)) {
unsigned long intermediate_size = d_image->tile[0].size;
-   unsigned int i;
 
for (i = 1; i < ctx->num_tiles; i++) {
if (d_image->tile[i].size > intermediate_size)
-- 
2.19.0



[PATCH v4 06/22] gpu: ipu-v3: image-convert: Only wait for abort completion if active run

2018-10-19 Thread Philipp Zabel
From: Steve Longerbeam 

Only wait for the ctx->aborted completion if there is an active run
in progress, otherwise the wait will just timeout after 10 seconds.
If there is no active run in progress, the done queue just needs to
be emptied.

Signed-off-by: Steve Longerbeam 
---
New since v3.
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c 
b/drivers/gpu/ipu-v3/ipu-image-convert.c
index 6c15bf8efaa2..e3e032252604 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -1562,9 +1562,14 @@ static void __ipu_image_convert_abort(struct 
ipu_image_convert_ctx *ctx)
return;
}
 
+   if (!active_run) {
+   empty_done_q(chan);
+   return;
+   }
+
dev_dbg(priv->ipu->dev,
-   "%s: task %u: wait for completion: %d runs, active run %p\n",
-   __func__, chan->ic_task, run_count, active_run);
+   "%s: task %u: wait for completion: %d runs\n",
+   __func__, chan->ic_task, run_count);
 
ret = wait_for_completion_timeout(>aborted,
  msecs_to_jiffies(1));
-- 
2.19.0



[PATCH v4 08/22] gpu: ipu-v3: image-convert: Remove need_abort flag

2018-10-19 Thread Philipp Zabel
From: Steve Longerbeam 

The need_abort flag is not really needed anymore in
__ipu_image_convert_abort(), remove it.
No functional changes.

Signed-off-by: Steve Longerbeam 
---
New since v3.
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c 
b/drivers/gpu/ipu-v3/ipu-image-convert.c
index abd8afb22b48..b8a400182a00 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -1531,7 +1531,6 @@ static void __ipu_image_convert_abort(struct 
ipu_image_convert_ctx *ctx)
struct ipu_image_convert_run *run, *active_run, *tmp;
unsigned long flags;
int run_count, ret;
-   bool need_abort;
 
spin_lock_irqsave(>irqlock, flags);
 
@@ -1550,13 +1549,11 @@ static void __ipu_image_convert_abort(struct 
ipu_image_convert_ctx *ctx)
if (active_run)
reinit_completion(>aborted);
 
-   need_abort = (run_count || active_run);
-
ctx->aborting = true;
 
spin_unlock_irqrestore(>irqlock, flags);
 
-   if (!need_abort) {
+   if (!run_count && !active_run) {
dev_dbg(priv->ipu->dev,
"%s: task %u: no abort needed for ctx %p\n",
__func__, chan->ic_task, ctx);
-- 
2.19.0



Re: [PATCH v3 01/16] media: imx: add mem2mem device

2018-10-19 Thread Philipp Zabel
Hi Tim,

On Thu, 2018-10-18 at 15:53 -0700, Tim Harvey wrote:
[...]
> Philipp,
> 
> Thanks for submitting this!
> 
> I'm hoping this lets us use non-IMX capture devices along with the IMX
> media controller entities to so we can use hardware
> CSC,scaling,pixel-format-conversions and ultimately coda based encode.
> 
> I've built this on top of linux-media and see that it registers as
> /dev/video8 but I'm not clear how to use it? I don't see it within the
> media controller graph.

It's a V4L2 mem2mem device that can be handled by the GstV4l2Transform
element, for example. GStreamer should create a v4l2video8convert
element of that type.

The mem2mem device is not part of the media controller graph on purpose.
There is no interaction with any of the entities in the media controller
graph apart from the fact that the IC PP task we are using for mem2mem
scaling is sharing hardware resources with the IC PRP tasks used for the
media controller scaler entitites.

regards
Philipp


Re: i.MX6 IPU CSI analog video input on Ventana

2018-10-19 Thread Philipp Zabel
On Wed, 2018-10-17 at 14:33 -0700, Steve Longerbeam wrote:
[...]
> > I'm also interested in looking at Philipps' 'i.MX media mem2mem
> > scaler' series (https://patchwork.kernel.org/cover/10603881/) and am
> > wondering if anyone has some example pipelines showing that in use.
> > I'm hoping that is what is needed to be able to use hardware
> > scaling/CSC and coda based encoding on streams from v4l2 PCI capture
> > devices.
> 
> Yes exactly, I'll let Philipp answer. I'm also interested in the gstreamer
> element needed to make use of h/w scaling/CSC from the mem2mem
> device.

GStreamer should create a GstV4l2Transform element "v4l2videoXconvert"
for the /dev/videoX mem2mem scaler device.

> For coda encode, my understanding is that the v4l2h264enc element will
> make use of coda h/w encode, something like this example which encodes to
> a h.264 file (I haven't verified this works, still need to build a later 
> version of gst-plugins-good that has the vl2h264enc support):
> 
> gst-launch-1.0 v4l2src io-mode=dmabuf device=/dev/video$dev !\ 
> "video/x-raw,format=$fmt,width=$w,height=$h"  ! \
> v4l2h264enc output-io-mode=dmabuf-import  ! queue ! matroskamux ! \
> filesink location=$filename

With GStreamer 1.14 the capture side io-mode parameter is not necessary
anymore to export dmabufs.
The output-io-mode parameter is currently still needed though, as the
V4L2 elements don't support negotiating dmabuf using caps via
video/x-raw(memory:DMABuf) yet.

Also there's a h264parse missing to convert the video/x-h264,stream-
format=byte-stream from v4l2h264enc to video/x-h264,stream-format=avc as
required by matroskamux:

gst-launch-1.0 \
v4l2src ! \
v4l2video10convert output-io-mode=dmabuf-import ! \
v4l2h264enc output-io-mode=dmabuf-import ! \
h264parse ! \
matroskamux ! \
filesink

> > Lastly, is there any hope to use IMX6 hardware compositing to say
> > stitch together multiple streams from a v4l2 PCI capture device into a
> > single stream for coda based hw encoding?
> 
> The IPUv3 Image Converter has a combining unit that can combine pixels from
> two images, but there is no support for that in mainline AFAIK.

I don't think there is any V4L2 API for compositing yet.

regards
Philipp


Re: [PATCH v3 10/16] gpu: ipu-v3: image-convert: select optimal seam positions

2018-10-17 Thread Philipp Zabel
On Fri, 2018-10-12 at 17:33 -0700, Steve Longerbeam wrote:
> 
> On 09/18/2018 02:34 AM, Philipp Zabel wrote:
> 
> 
> > +/*
> > + * Tile left edges are required to be aligned to multiples of 8 bytes
> > + * by the IDMAC.
> > + */
> > +static inline u32 tile_left_align(const struct ipu_image_pixfmt *fmt)
> > +{
> > +   return fmt->planar ? 8 * fmt->uv_width_dec : 64 / fmt->bpp;
> > +}
> 
> 
> 
> As I indicated, shouldn't this be
> 
> return fmt->planar ? 8 * fmt->uv_width_dec : 8;
> 
> ?
>
> Just from a unit analysis perspective, "64 / fmt->bp" has
> units of pixels / 8-bytes, it should have units of bytes.

The tile alignment is in pixels, not in bytes. For 16-bit and 32-bit
packed formats, we only need to align to 4 or 2 pixels, respectively,
as the LCM of 8-byte alignment and 2-byte or 4-byte pixel size is
always 8 bytes.

But now that you pointed it out, it is quite obvious that this can't
work for 24-bit packed formats. Here the LCM of 8-byte alignment and 3-
byte pixels is 24 bytes, or 8 pixels.

How about:

if (fmt->planar)
return fmt->uv_packed ? 8 : 8 * fmt->uv_width_dec;
else
return fmt->bpp == 32 ? 2 : fmt->bpp == 16 ? 4 : 8;

regards
Philipp


Re: [RFC] Informal meeting during ELCE to discuss userspace support for stateless codecs

2018-10-11 Thread Philipp Zabel
On Mon, 2018-10-08 at 13:53 +0200, Hans Verkuil wrote:
> Hi all,
> 
> I would like to meet up somewhere during the ELCE to discuss userspace support
> for stateless (and perhaps stateful as well?) codecs.
> 
> It is also planned as a topic during the summit, but I would prefer to prepare
> for that in advance, esp. since I myself do not have any experience writing
> userspace SW for such devices.
> 
> Nicolas, it would be really great if you can participate in this meeting
> since you probably have the most experience with this by far.
> 
> Looking through the ELCE program I found two timeslots that are likely to work
> for most of us (because the topics in the program appear to be boring for us
> media types!):
> 
> Tuesday from 10:50-15:50
> 
> or:
> 
> Monday from 15:45 onward
> 
> My guess is that we need 2-3 hours or so. Hard to predict.
> 
> The basic question that I would like to have answered is what the userspace
> component should look like? libv4l-like plugin or a library that userspace can
> link with? Do we want more general support for stateful codecs as well that 
> deals
> with resolution changes and the more complex parts of the codec API?
> 
> I've mailed this directly to those that I expect are most interested in this,
> but if someone want to join in let me know.
> 
> I want to keep the group small though, so you need to bring relevant 
> experience
> to the table.

I'd like to join in as well. CODA960 on i.MX6 has a stateless JPEG codec
that I'd like to support using the stateless codec API.

regards
Philipp


Re: [PATCH -next] media: imx-pxp: remove duplicated include from imx-pxp.c

2018-10-08 Thread Philipp Zabel
On Sat, 2018-10-06 at 07:36 +, YueHaibing wrote:
> Remove duplicated include.
> 
> Signed-off-by: YueHaibing 
> ---
>  drivers/media/platform/imx-pxp.c | 1 -
>  1 file changed, 1 deletion(-)
> 
> diff --git a/drivers/media/platform/imx-pxp.c 
> b/drivers/media/platform/imx-pxp.c
> index b76cd0e..229c23a 100644
> --- a/drivers/media/platform/imx-pxp.c
> +++ b/drivers/media/platform/imx-pxp.c
> @@ -16,7 +16,6 @@
>  #include 
>  #include 
>  #include 
> -#include 
>  #include 
>  #include 
>  #include 

This reverts a41d203a1d34, which was a duplicate of already applied
b4fbf423cef9: https://patchwork.linuxtv.org/patch/52243/

Acked-by: Philipp Zabel 

regards
Philipp


[PATCH] media: imx: use well defined 32-bit RGB pixel format

2018-09-18 Thread Philipp Zabel
The documentation in Documentation/media/uapi/v4l/pixfmt-packed-rgb.rst
tells us that the V4L2_PIX_FMT_RGB32 format is deprecated and must not
be used by new drivers. Replace it with V4L2_PIX_FMT_XRGB32.

Signed-off-by: Philipp Zabel 
---
 drivers/staging/media/imx/imx-media-utils.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/media/imx/imx-media-utils.c 
b/drivers/staging/media/imx/imx-media-utils.c
index 8aa13403b09d..0eaa353d5cb3 100644
--- a/drivers/staging/media/imx/imx-media-utils.c
+++ b/drivers/staging/media/imx/imx-media-utils.c
@@ -88,7 +88,7 @@ static const struct imx_media_pixfmt rgb_formats[] = {
.cs = IPUV3_COLORSPACE_RGB,
.bpp= 24,
}, {
-   .fourcc = V4L2_PIX_FMT_RGB32,
+   .fourcc = V4L2_PIX_FMT_XRGB32,
.codes  = {MEDIA_BUS_FMT_ARGB_1X32},
.cs = IPUV3_COLORSPACE_RGB,
.bpp= 32,
@@ -212,7 +212,7 @@ static const struct imx_media_pixfmt ipu_yuv_formats[] = {
 
 static const struct imx_media_pixfmt ipu_rgb_formats[] = {
{
-   .fourcc = V4L2_PIX_FMT_RGB32,
+   .fourcc = V4L2_PIX_FMT_XRGB32,
.codes  = {MEDIA_BUS_FMT_ARGB_1X32},
.cs = IPUV3_COLORSPACE_RGB,
.bpp= 32,
-- 
2.19.0



[PATCH v3 11/16] gpu: ipu-v3: image-convert: fix debug output for varying tile sizes

2018-09-18 Thread Philipp Zabel
Since tile dimensions now vary between tiles, add debug output for each
tile's position and dimensions.

Signed-off-by: Philipp Zabel 
---
No changes since v2.
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c 
b/drivers/gpu/ipu-v3/ipu-image-convert.c
index 4a513dea7913..aba973aedb75 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -308,12 +308,11 @@ static void dump_format(struct ipu_image_convert_ctx *ctx,
struct ipu_image_convert_priv *priv = chan->priv;
 
dev_dbg(priv->ipu->dev,
-   "task %u: ctx %p: %s format: %dx%d (%dx%d tiles of size %dx%d), 
%c%c%c%c\n",
+   "task %u: ctx %p: %s format: %dx%d (%dx%d tiles), %c%c%c%c\n",
chan->ic_task, ctx,
ic_image->type == IMAGE_CONVERT_OUT ? "Output" : "Input",
ic_image->base.pix.width, ic_image->base.pix.height,
ic_image->num_cols, ic_image->num_rows,
-   ic_image->tile[0].width, ic_image->tile[0].height,
ic_image->fmt->fourcc & 0xff,
(ic_image->fmt->fourcc >> 8) & 0xff,
(ic_image->fmt->fourcc >> 16) & 0xff,
@@ -786,6 +785,8 @@ static void find_seams(struct ipu_image_convert_ctx *ctx,
 static void calc_tile_dimensions(struct ipu_image_convert_ctx *ctx,
 struct ipu_image_convert_image *image)
 {
+   struct ipu_image_convert_chan *chan = ctx->chan;
+   struct ipu_image_convert_priv *priv = chan->priv;
unsigned int i;
 
for (i = 0; i < ctx->num_tiles; i++) {
@@ -810,6 +811,13 @@ static void calc_tile_dimensions(struct 
ipu_image_convert_ctx *ctx,
tile->rot_stride =
(image->fmt->bpp * tile->height) >> 3;
}
+
+   dev_dbg(priv->ipu->dev,
+   "task %u: ctx %p: %s@[%u,%u]: %ux%u@%u,%u\n",
+   chan->ic_task, ctx,
+   image->type == IMAGE_CONVERT_IN ? "Input" : "Output",
+   row, col,
+   tile->width, tile->height, tile->left, tile->top);
}
 }
 
-- 
2.19.0



[PATCH v3 14/16] gpu: ipu-v3: image-convert: add some ASCII art to the exposition

2018-09-18 Thread Philipp Zabel
Visualize the scaling and rotation pipeline with some ASCII art
diagrams. Remove the FIXME comment about missing seam prevention.

Signed-off-by: Philipp Zabel 
---
No changes since v2.
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 39 +++---
 1 file changed, 29 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c 
b/drivers/gpu/ipu-v3/ipu-image-convert.c
index 16d400b2b3d2..6179d8bd123c 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -37,17 +37,36 @@
  * when double_buffering boolean is set).
  *
  * Note that the input frame must be split up into the same number
- * of tiles as the output frame.
+ * of tiles as the output frame:
  *
- * FIXME: at this point there is no attempt to deal with visible seams
- * at the tile boundaries when upscaling. The seams are caused by a reset
- * of the bilinear upscale interpolation when starting a new tile. The
- * seams are barely visible for small upscale factors, but become
- * increasingly visible as the upscale factor gets larger, since more
- * interpolated pixels get thrown out at the tile boundaries. A possilble
- * fix might be to overlap tiles of different sizes, but this must be done
- * while also maintaining the IDMAC dma buffer address alignment and 8x8 IRT
- * alignment restrictions of each tile.
+ *   +-+-+
+ *   +-+---+ |  A  | B   |
+ *   | A   | B | | | |
+ *   +-+---+   -->   +-+-+
+ *   | C   | D | |  C  | D   |
+ *   +-+---+ | | |
+ *   +-+-+
+ *
+ * Clockwise 90° rotations are handled by first rescaling into a
+ * reusable temporary tile buffer and then rotating with the 8x8
+ * block rotator, writing to the correct destination:
+ *
+ * +-+-+
+ * | | |
+ *   +-+---+ +-+   | C   | A   |
+ *   | A   | B | | A,B, |  |   | | |
+ *   +-+---+   -->   | C,D  |  |  -->  | | |
+ *   | C   | D | +-+   +-+-+
+ *   +-+---+   | D   | B   |
+ * | | |
+ * +-+-+
+ *
+ * If the 8x8 block rotator is used, horizontal or vertical flipping
+ * is done during the rotation step, otherwise flipping is done
+ * during the scaling step.
+ * With rotation or flipping, tile order changes between input and
+ * output image. Tiles are numbered row major from top left to bottom
+ * right for both input and output image.
  */
 
 #define MAX_STRIPES_W4
-- 
2.19.0



[PATCH v3 15/16] gpu: ipu-v3: image-convert: disable double buffering if necessary

2018-09-18 Thread Philipp Zabel
Double-buffering only works if tile sizes are the same and the resizing
coefficient does not change between tiles, even for non-planar formats.

Signed-off-by: Philipp Zabel 
---
No changes since v2.
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 27 --
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c 
b/drivers/gpu/ipu-v3/ipu-image-convert.c
index 6179d8bd123c..6ab880416919 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -1973,6 +1973,7 @@ ipu_image_convert_prepare(struct ipu_soc *ipu, enum 
ipu_ic_task ic_task,
struct ipu_image_convert_chan *chan;
struct ipu_image_convert_ctx *ctx;
unsigned long flags;
+   unsigned int i;
bool get_res;
int ret;
 
@@ -2056,15 +2057,37 @@ ipu_image_convert_prepare(struct ipu_soc *ipu, enum 
ipu_ic_task ic_task,
 * for every tile, and therefore would have to be updated for
 * each buffer which is not possible. So double-buffering is
 * impossible when either the source or destination images are
-* a planar format (YUV420, YUV422P, etc.).
+* a planar format (YUV420, YUV422P, etc.). Further, differently
+* sized tiles or different resizing coefficients per tile
+* prevent double-buffering as well.
 */
ctx->double_buffering = (ctx->num_tiles > 1 &&
 !s_image->fmt->planar &&
 !d_image->fmt->planar);
+   for (i = 1; i < ctx->num_tiles; i++) {
+   if (ctx->in.tile[i].width != ctx->in.tile[0].width ||
+   ctx->in.tile[i].height != ctx->in.tile[0].height ||
+   ctx->out.tile[i].width != ctx->out.tile[0].width ||
+   ctx->out.tile[i].height != ctx->out.tile[0].height) {
+   ctx->double_buffering = false;
+   break;
+   }
+   }
+   for (i = 1; i < ctx->in.num_cols; i++) {
+   if (ctx->resize_coeffs_h[i] != ctx->resize_coeffs_h[0]) {
+   ctx->double_buffering = false;
+   break;
+   }
+   }
+   for (i = 1; i < ctx->in.num_rows; i++) {
+   if (ctx->resize_coeffs_v[i] != ctx->resize_coeffs_v[0]) {
+   ctx->double_buffering = false;
+   break;
+   }
+   }
 
if (ipu_rot_mode_is_irt(ctx->rot_mode)) {
unsigned long intermediate_size = d_image->tile[0].size;
-   unsigned int i;
 
for (i = 1; i < ctx->num_tiles; i++) {
if (d_image->tile[i].size > intermediate_size)
-- 
2.19.0



[PATCH v3 01/16] media: imx: add mem2mem device

2018-09-18 Thread Philipp Zabel
Add a single imx-media mem2mem video device that uses the IPU IC PP
(image converter post processing) task for scaling and colorspace
conversion.
On i.MX6Q/DL SoCs with two IPUs currently only the first IPU is used.

The hardware only supports writing to destination buffers up to
1024x1024 pixels in a single pass, arbitrary sizes can be achieved
by rendering multiple tiles per frame.

Signed-off-by: Philipp Zabel 
[steve_longerb...@mentor.com: use ipu_image_convert_adjust(), fix
 device_run() error handling]
Signed-off-by: Steve Longerbeam 
---
Changes since v2:
 - Rely on ipu_image_convert_adjust() in mem2mem_try_fmt() for format
   adjustments. This makes the mem2mem driver mostly a V4L2 mem2mem API
   wrapper around the IPU image converter, and independent of the
   internal image converter implementation.
 - Remove the source and destination buffers on error in device_run().
   Otherwise the conversion is re-attempted apparently over and over
   again (with WARN() backtraces).
 - Allow subscribing to control changes.
---
 drivers/staging/media/imx/Kconfig |   1 +
 drivers/staging/media/imx/Makefile|   1 +
 drivers/staging/media/imx/imx-media-dev.c |  11 +
 drivers/staging/media/imx/imx-media-mem2mem.c | 873 ++
 drivers/staging/media/imx/imx-media.h |  10 +
 5 files changed, 896 insertions(+)
 create mode 100644 drivers/staging/media/imx/imx-media-mem2mem.c

diff --git a/drivers/staging/media/imx/Kconfig 
b/drivers/staging/media/imx/Kconfig
index bfc17de56b17..07013cb3cb66 100644
--- a/drivers/staging/media/imx/Kconfig
+++ b/drivers/staging/media/imx/Kconfig
@@ -6,6 +6,7 @@ config VIDEO_IMX_MEDIA
depends on HAS_DMA
select VIDEOBUF2_DMA_CONTIG
select V4L2_FWNODE
+   select V4L2_MEM2MEM_DEV
---help---
  Say yes here to enable support for video4linux media controller
  driver for the i.MX5/6 SOC.
diff --git a/drivers/staging/media/imx/Makefile 
b/drivers/staging/media/imx/Makefile
index 698a4210316e..f2e722d0fa19 100644
--- a/drivers/staging/media/imx/Makefile
+++ b/drivers/staging/media/imx/Makefile
@@ -6,6 +6,7 @@ imx-media-ic-objs := imx-ic-common.o imx-ic-prp.o 
imx-ic-prpencvf.o
 obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media.o
 obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media-common.o
 obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media-capture.o
+obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media-mem2mem.o
 obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media-vdic.o
 obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media-ic.o
 
diff --git a/drivers/staging/media/imx/imx-media-dev.c 
b/drivers/staging/media/imx/imx-media-dev.c
index 1931d1b038dc..9c59687612a0 100644
--- a/drivers/staging/media/imx/imx-media-dev.c
+++ b/drivers/staging/media/imx/imx-media-dev.c
@@ -359,6 +359,17 @@ static int imx_media_probe_complete(struct 
v4l2_async_notifier *notifier)
goto unlock;
 
ret = v4l2_device_register_subdev_nodes(>v4l2_dev);
+   if (ret)
+   goto unlock;
+
+   /* TODO: check whether we have IC subdevices first */
+   imxmd->m2m_vdev = imx_media_mem2mem_device_init(imxmd);
+   if (IS_ERR(imxmd->m2m_vdev)) {
+   ret = PTR_ERR(imxmd->m2m_vdev);
+   goto unlock;
+   }
+
+   ret = imx_media_mem2mem_device_register(imxmd->m2m_vdev);
 unlock:
mutex_unlock(>mutex);
if (ret)
diff --git a/drivers/staging/media/imx/imx-media-mem2mem.c 
b/drivers/staging/media/imx/imx-media-mem2mem.c
new file mode 100644
index ..a2a4dca017ce
--- /dev/null
+++ b/drivers/staging/media/imx/imx-media-mem2mem.c
@@ -0,0 +1,873 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * i.MX IPUv3 mem2mem Scaler/CSC driver
+ *
+ * Copyright (C) 2011 Pengutronix, Sascha Hauer
+ * Copyright (C) 2018 Pengutronix, Philipp Zabel
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "imx-media.h"
+
+#define fh_to_ctx(__fh)container_of(__fh, struct mem2mem_ctx, fh)
+
+enum {
+   V4L2_M2M_SRC = 0,
+   V4L2_M2M_DST = 1,
+};
+
+struct mem2mem_priv {
+   struct imx_media_video_dev vdev;
+
+   struct v4l2_m2m_dev   *m2m_dev;
+   struct device *dev;
+
+   struct imx_media_dev  *md;
+
+   struct mutex  mutex;   /* mem2mem device mutex */
+
+   atomic_t  num_inst;
+};
+
+#define to_mem2mem_priv(v) container_of(v, struct mem2mem_priv, vdev)
+
+/* Per-queue, driver-specific private data */
+struct mem2mem_q_data {
+   struct v4l2_pix_format  cur_fmt;
+   struct v4l2_rectrect;
+};
+
+struct mem2mem_ctx {
+   struct mem2mem_priv *priv;
+
+   struct v4l2_fh  fh;
+   struct mem2mem_q_data   q_data[2];
+   int error;
+   struct ipu_image_convert_ctx *icc;
+
+   struct v4l2_ctrl_handler ctrl_hdlr;
+   in

[PATCH v3 08/16] gpu: ipu-v3: image-convert: calculate tile dimensions and offsets outside fill_image

2018-09-18 Thread Philipp Zabel
This will allow to calculate seam positions after initializing the
ipu_image base structure but before calculating tile dimensions.

Signed-off-by: Philipp Zabel 
---
No changes since v2.
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c 
b/drivers/gpu/ipu-v3/ipu-image-convert.c
index e4b198777d0f..830622277588 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -1453,9 +1453,6 @@ static int fill_image(struct ipu_image_convert_ctx *ctx,
else
ic_image->stride  = ic_image->base.pix.bytesperline;
 
-   calc_tile_dimensions(ctx, ic_image);
-   calc_tile_offsets(ctx, ic_image);
-
return 0;
 }
 
@@ -1660,10 +1657,6 @@ ipu_image_convert_prepare(struct ipu_soc *ipu, enum 
ipu_ic_task ic_task,
ctx->num_tiles = d_image->num_cols * d_image->num_rows;
ctx->rot_mode = rot_mode;
 
-   ret = calc_image_resize_coefficients(ctx, in, out);
-   if (ret)
-   goto out_free;
-
ret = fill_image(ctx, s_image, in, IMAGE_CONVERT_IN);
if (ret)
goto out_free;
@@ -1671,6 +1664,16 @@ ipu_image_convert_prepare(struct ipu_soc *ipu, enum 
ipu_ic_task ic_task,
if (ret)
goto out_free;
 
+   ret = calc_image_resize_coefficients(ctx, in, out);
+   if (ret)
+   goto out_free;
+
+   calc_tile_dimensions(ctx, s_image);
+   calc_tile_offsets(ctx, s_image);
+
+   calc_tile_dimensions(ctx, d_image);
+   calc_tile_offsets(ctx, d_image);
+
calc_out_tile_map(ctx);
calc_tile_resize_coefficients(ctx);
 
-- 
2.19.0



[PATCH v3 04/16] gpu: ipu-v3: image-convert: prepare for per-tile configuration

2018-09-18 Thread Philipp Zabel
Let convert_start start from a given tile index, allocate intermediate
tile with maximum tile size.

Signed-off-by: Philipp Zabel 
---
No changes since v2.
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 60 +++---
 1 file changed, 35 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c 
b/drivers/gpu/ipu-v3/ipu-image-convert.c
index f4081962784c..f4db1553d23a 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -611,7 +611,8 @@ static void init_idmac_channel(struct ipu_image_convert_ctx 
*ctx,
   struct ipuv3_channel *channel,
   struct ipu_image_convert_image *image,
   enum ipu_rotate_mode rot_mode,
-  bool rot_swap_width_height)
+  bool rot_swap_width_height,
+  unsigned int tile)
 {
struct ipu_image_convert_chan *chan = ctx->chan;
unsigned int burst_size;
@@ -621,23 +622,23 @@ static void init_idmac_channel(struct 
ipu_image_convert_ctx *ctx,
unsigned int tile_idx[2];
 
if (image->type == IMAGE_CONVERT_OUT) {
-   tile_idx[0] = ctx->out_tile_map[0];
+   tile_idx[0] = ctx->out_tile_map[tile];
tile_idx[1] = ctx->out_tile_map[1];
} else {
-   tile_idx[0] = 0;
+   tile_idx[0] = tile;
tile_idx[1] = 1;
}
 
if (rot_swap_width_height) {
-   width = image->tile[0].height;
-   height = image->tile[0].width;
-   stride = image->tile[0].rot_stride;
+   width = image->tile[tile_idx[0]].height;
+   height = image->tile[tile_idx[0]].width;
+   stride = image->tile[tile_idx[0]].rot_stride;
addr0 = ctx->rot_intermediate[0].phys;
if (ctx->double_buffering)
addr1 = ctx->rot_intermediate[1].phys;
} else {
-   width = image->tile[0].width;
-   height = image->tile[0].height;
+   width = image->tile[tile_idx[0]].width;
+   height = image->tile[tile_idx[0]].height;
stride = image->stride;
addr0 = image->base.phys0 +
image->tile[tile_idx[0]].offset;
@@ -687,7 +688,7 @@ static void init_idmac_channel(struct ipu_image_convert_ctx 
*ctx,
ipu_idmac_set_double_buffer(channel, ctx->double_buffering);
 }
 
-static int convert_start(struct ipu_image_convert_run *run)
+static int convert_start(struct ipu_image_convert_run *run, unsigned int tile)
 {
struct ipu_image_convert_ctx *ctx = run->ctx;
struct ipu_image_convert_chan *chan = ctx->chan;
@@ -695,28 +696,29 @@ static int convert_start(struct ipu_image_convert_run 
*run)
struct ipu_image_convert_image *s_image = >in;
struct ipu_image_convert_image *d_image = >out;
enum ipu_color_space src_cs, dest_cs;
+   unsigned int dst_tile = ctx->out_tile_map[tile];
unsigned int dest_width, dest_height;
int ret;
 
-   dev_dbg(priv->ipu->dev, "%s: task %u: starting ctx %p run %p\n",
-   __func__, chan->ic_task, ctx, run);
+   dev_dbg(priv->ipu->dev, "%s: task %u: starting ctx %p run %p tile %u -> 
%u\n",
+   __func__, chan->ic_task, ctx, run, tile, dst_tile);
 
src_cs = ipu_pixelformat_to_colorspace(s_image->fmt->fourcc);
dest_cs = ipu_pixelformat_to_colorspace(d_image->fmt->fourcc);
 
if (ipu_rot_mode_is_irt(ctx->rot_mode)) {
/* swap width/height for resizer */
-   dest_width = d_image->tile[0].height;
-   dest_height = d_image->tile[0].width;
+   dest_width = d_image->tile[dst_tile].height;
+   dest_height = d_image->tile[dst_tile].width;
} else {
-   dest_width = d_image->tile[0].width;
-   dest_height = d_image->tile[0].height;
+   dest_width = d_image->tile[dst_tile].width;
+   dest_height = d_image->tile[dst_tile].height;
}
 
/* setup the IC resizer and CSC */
ret = ipu_ic_task_init(chan->ic,
-  s_image->tile[0].width,
-  s_image->tile[0].height,
+  s_image->tile[tile].width,
+  s_image->tile[tile].height,
   dest_width,
   dest_height,
   src_cs, dest_cs);
@@ -727,27 +729,27 @@ static int convert_start(struct ipu_image_convert_run 
*run)
 
/* init the source MEM-->IC PP IDMAC channel */
init_idmac_channel(ctx, ch

[PATCH v3 06/16] gpu: ipu-v3: image-convert: reconfigure IC per tile

2018-09-18 Thread Philipp Zabel
For differently sized tiles or if the resizing coefficients change,
we have to stop, reconfigure, and restart the IC between tiles.

Signed-off-by: Philipp Zabel 
---
No changes since v2.
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 65 +-
 1 file changed, 44 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c 
b/drivers/gpu/ipu-v3/ipu-image-convert.c
index 01a63eb8ccaf..65f0321a7971 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -1137,6 +1137,24 @@ static irqreturn_t do_bh(int irq, void *dev_id)
return IRQ_HANDLED;
 }
 
+static bool ic_settings_changed(struct ipu_image_convert_ctx *ctx)
+{
+   unsigned int cur_tile = ctx->next_tile - 1;
+   unsigned int next_tile = ctx->next_tile;
+
+   if (ctx->resize_coeffs_h[cur_tile % ctx->in.num_cols] !=
+   ctx->resize_coeffs_h[next_tile % ctx->in.num_cols] ||
+   ctx->resize_coeffs_v[cur_tile / ctx->in.num_cols] !=
+   ctx->resize_coeffs_v[next_tile / ctx->in.num_cols] ||
+   ctx->in.tile[cur_tile].width != ctx->in.tile[next_tile].width ||
+   ctx->in.tile[cur_tile].height != ctx->in.tile[next_tile].height ||
+   ctx->out.tile[cur_tile].width != ctx->out.tile[next_tile].width ||
+   ctx->out.tile[cur_tile].height != ctx->out.tile[next_tile].height)
+   return true;
+
+   return false;
+}
+
 /* hold irqlock when calling */
 static irqreturn_t do_irq(struct ipu_image_convert_run *run)
 {
@@ -1180,27 +1198,32 @@ static irqreturn_t do_irq(struct ipu_image_convert_run 
*run)
 * not done, place the next tile buffers.
 */
if (!ctx->double_buffering) {
-
-   src_tile = _image->tile[ctx->next_tile];
-   dst_idx = ctx->out_tile_map[ctx->next_tile];
-   dst_tile = _image->tile[dst_idx];
-
-   ipu_cpmem_set_buffer(chan->in_chan, 0,
-s_image->base.phys0 + src_tile->offset);
-   ipu_cpmem_set_buffer(outch, 0,
-d_image->base.phys0 + dst_tile->offset);
-   if (s_image->fmt->planar)
-   ipu_cpmem_set_uv_offset(chan->in_chan,
-   src_tile->u_off,
-   src_tile->v_off);
-   if (d_image->fmt->planar)
-   ipu_cpmem_set_uv_offset(outch,
-   dst_tile->u_off,
-   dst_tile->v_off);
-
-   ipu_idmac_select_buffer(chan->in_chan, 0);
-   ipu_idmac_select_buffer(outch, 0);
-
+   if (ic_settings_changed(ctx)) {
+   convert_stop(run);
+   convert_start(run, ctx->next_tile);
+   } else {
+   src_tile = _image->tile[ctx->next_tile];
+   dst_idx = ctx->out_tile_map[ctx->next_tile];
+   dst_tile = _image->tile[dst_idx];
+
+   ipu_cpmem_set_buffer(chan->in_chan, 0,
+s_image->base.phys0 +
+src_tile->offset);
+   ipu_cpmem_set_buffer(outch, 0,
+d_image->base.phys0 +
+dst_tile->offset);
+   if (s_image->fmt->planar)
+   ipu_cpmem_set_uv_offset(chan->in_chan,
+   src_tile->u_off,
+   src_tile->v_off);
+   if (d_image->fmt->planar)
+   ipu_cpmem_set_uv_offset(outch,
+   dst_tile->u_off,
+   dst_tile->v_off);
+
+   ipu_idmac_select_buffer(chan->in_chan, 0);
+   ipu_idmac_select_buffer(outch, 0);
+   }
} else if (ctx->next_tile < ctx->num_tiles - 1) {
 
src_tile = _image->tile[ctx->next_tile + 1];
-- 
2.19.0



[PATCH v3 10/16] gpu: ipu-v3: image-convert: select optimal seam positions

2018-09-18 Thread Philipp Zabel
Select seam positions that minimize distortions during seam hiding while
satifying input and output IDMAC, rotator, and image format constraints.

This code looks for aligned output seam positions that minimize the
difference between the fractional corresponding ideal input positions
and the input positions rounded to alignment requirements.

Since now tiles can be sized differently, alignment restrictions of the
complete image can be relaxed in the next step.

Signed-off-by: Philipp Zabel 
---
Changes since v2:
 - Switch width/height properly and align tile top left positions to 8x8
   IRT block size when rotating.
 - Align input width to input burst length in case the scaling step
   flips horizontally.
 - Fix bottom edge calculation.
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 340 -
 1 file changed, 334 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c 
b/drivers/gpu/ipu-v3/ipu-image-convert.c
index 97061049a9d2..4a513dea7913 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -432,6 +432,123 @@ static int calc_image_resize_coefficients(struct 
ipu_image_convert_ctx *ctx,
return 0;
 }
 
+#define round_closest(x, y) round_down((x) + (y)/2, (y))
+
+/*
+ * Find the best aligned seam position in the inverval [out_start, out_end].
+ * Rotation and image offsets are out of scope.
+ *
+ * @out_start: start of inverval, must be within 1024 pixels / lines
+ * of out_end
+ * @out_end: end of interval, smaller than or equal to out_edge
+ * @in_edge: input right / bottom edge
+ * @out_edge: output right / bottom edge
+ * @in_align: input alignment, either horizontal 8-byte line start address
+ *alignment, or pixel alignment due to image format
+ * @out_align: output alignment, either horizontal 8-byte line start address
+ * alignment, or pixel alignment due to image format or rotator
+ * block size
+ * @in_burst: horizontal input burst size in case of horizontal flip
+ * @out_burst: horizontal output burst size or rotator block size
+ * @downsize_coeff: downsizing section coefficient
+ * @resize_coeff: main processing section resizing coefficient
+ * @_in_seam: aligned input seam position return value
+ * @_out_seam: aligned output seam position return value
+ */
+static void find_best_seam(struct ipu_image_convert_ctx *ctx,
+  unsigned int out_start,
+  unsigned int out_end,
+  unsigned int in_edge,
+  unsigned int out_edge,
+  unsigned int in_align,
+  unsigned int out_align,
+  unsigned int in_burst,
+  unsigned int out_burst,
+  unsigned int downsize_coeff,
+  unsigned int resize_coeff,
+  u32 *_in_seam,
+  u32 *_out_seam)
+{
+   struct device *dev = ctx->chan->priv->ipu->dev;
+   unsigned int out_pos;
+   /* Input / output seam position candidates */
+   unsigned int out_seam = 0;
+   unsigned int in_seam = 0;
+   unsigned int min_diff = UINT_MAX;
+
+   /*
+* Output tiles must start at a multiple of 8 bytes horizontally and
+* possibly at an even line horizontally depending on the pixel format.
+* Only consider output aligned positions for the seam.
+*/
+   out_start = round_up(out_start, out_align);
+   for (out_pos = out_start; out_pos < out_end; out_pos += out_align) {
+   unsigned int in_pos;
+   unsigned int in_pos_aligned;
+   unsigned int abs_diff;
+
+   /*
+* Tiles in the right row / bottom column may not be allowed to
+* overshoot horizontally / vertically. out_burst may be the
+* actual DMA burst size, or the rotator block size.
+*/
+   if ((out_burst > 1) && (out_edge - out_pos) % out_burst)
+   continue;
+
+   /*
+* Input sample position, corresponding to out_pos, 19.13 fixed
+* point.
+*/
+   in_pos = (out_pos * resize_coeff) << downsize_coeff;
+   /*
+* The closest input sample position that we could actually
+* start the input tile at, 19.13 fixed point.
+*/
+   in_pos_aligned = round_closest(in_pos, 8192U * in_align);
+
+   if ((in_burst > 1) &&
+   (in_edge - in_pos_aligned / 8192U) % in_burst)
+   continue;
+
+   if (in_pos < in_pos_aligned)
+   abs_diff = in_pos_aligned - in_pos;
+   else
+   abs_diff = in_pos - in_pos_alig

[PATCH v3 02/16] gpu: ipu-cpmem: add WARN_ON_ONCE() for unaligned dma buffers

2018-09-18 Thread Philipp Zabel
From: Steve Longerbeam 

Add a WARN_ON_ONCE() if either the Y/packed buffer, or the U/V offsets,
are not aligned on 8-byte boundaries. This will catch alignment
bugs in DRM, V4L2.

Signed-off-by: Steve Longerbeam 
---
New since v2.
---
 drivers/gpu/ipu-v3/ipu-cpmem.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/ipu-v3/ipu-cpmem.c b/drivers/gpu/ipu-v3/ipu-cpmem.c
index a9d2501500a1..7e65954f13c2 100644
--- a/drivers/gpu/ipu-v3/ipu-cpmem.c
+++ b/drivers/gpu/ipu-v3/ipu-cpmem.c
@@ -259,6 +259,8 @@ EXPORT_SYMBOL_GPL(ipu_cpmem_set_high_priority);
 
 void ipu_cpmem_set_buffer(struct ipuv3_channel *ch, int bufnum, dma_addr_t buf)
 {
+   WARN_ON_ONCE(buf & 0x7);
+
if (bufnum)
ipu_ch_param_write_field(ch, IPU_FIELD_EBA1, buf >> 3);
else
@@ -268,6 +270,8 @@ EXPORT_SYMBOL_GPL(ipu_cpmem_set_buffer);
 
 void ipu_cpmem_set_uv_offset(struct ipuv3_channel *ch, u32 u_off, u32 v_off)
 {
+   WARN_ON_ONCE((u_off & 0x7) || (v_off & 0x7));
+
ipu_ch_param_write_field(ch, IPU_FIELD_UBO, u_off / 8);
ipu_ch_param_write_field(ch, IPU_FIELD_VBO, v_off / 8);
 }
@@ -435,6 +439,8 @@ void ipu_cpmem_set_yuv_planar_full(struct ipuv3_channel *ch,
   unsigned int uv_stride,
   unsigned int u_offset, unsigned int v_offset)
 {
+   WARN_ON_ONCE((u_offset & 0x7) || (v_offset & 0x7));
+
ipu_ch_param_write_field(ch, IPU_FIELD_SLUV, uv_stride - 1);
ipu_ch_param_write_field(ch, IPU_FIELD_UBO, u_offset / 8);
ipu_ch_param_write_field(ch, IPU_FIELD_VBO, v_offset / 8);
-- 
2.19.0



[PATCH v3 03/16] gpu: ipu-v3: ipu-ic: allow to manually set resize coefficients

2018-09-18 Thread Philipp Zabel
For tiled scaling, we want to compute the scaling coefficients
externally in such a way that the interpolation overshoots tile
boundaries and samples up to the first pixel of the next tile.
Prepare to override the resizing coefficients from the image
conversion code.

Signed-off-by: Philipp Zabel 
---
No changes since v2.
---
 drivers/gpu/ipu-v3/ipu-ic.c | 52 +++--
 include/video/imx-ipu-v3.h  |  6 +
 2 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-ic.c b/drivers/gpu/ipu-v3/ipu-ic.c
index 67cc820253a9..594c3cbc8291 100644
--- a/drivers/gpu/ipu-v3/ipu-ic.c
+++ b/drivers/gpu/ipu-v3/ipu-ic.c
@@ -442,36 +442,40 @@ int ipu_ic_task_graphics_init(struct ipu_ic *ic,
 }
 EXPORT_SYMBOL_GPL(ipu_ic_task_graphics_init);
 
-int ipu_ic_task_init(struct ipu_ic *ic,
-int in_width, int in_height,
-int out_width, int out_height,
-enum ipu_color_space in_cs,
-enum ipu_color_space out_cs)
+int ipu_ic_task_init_rsc(struct ipu_ic *ic,
+int in_width, int in_height,
+int out_width, int out_height,
+enum ipu_color_space in_cs,
+enum ipu_color_space out_cs,
+u32 rsc)
 {
struct ipu_ic_priv *priv = ic->priv;
-   u32 reg, downsize_coeff, resize_coeff;
+   u32 downsize_coeff, resize_coeff;
unsigned long flags;
int ret = 0;
 
-   /* Setup vertical resizing */
-   ret = calc_resize_coeffs(ic, in_height, out_height,
-_coeff, _coeff);
-   if (ret)
-   return ret;
+   if (!rsc) {
+   /* Setup vertical resizing */
 
-   reg = (downsize_coeff << 30) | (resize_coeff << 16);
+   ret = calc_resize_coeffs(ic, in_height, out_height,
+_coeff, _coeff);
+   if (ret)
+   return ret;
+
+   rsc = (downsize_coeff << 30) | (resize_coeff << 16);
 
-   /* Setup horizontal resizing */
-   ret = calc_resize_coeffs(ic, in_width, out_width,
-_coeff, _coeff);
-   if (ret)
-   return ret;
+   /* Setup horizontal resizing */
+   ret = calc_resize_coeffs(ic, in_width, out_width,
+_coeff, _coeff);
+   if (ret)
+   return ret;
 
-   reg |= (downsize_coeff << 14) | resize_coeff;
+   rsc |= (downsize_coeff << 14) | resize_coeff;
+   }
 
spin_lock_irqsave(>lock, flags);
 
-   ipu_ic_write(ic, reg, ic->reg->rsc);
+   ipu_ic_write(ic, rsc, ic->reg->rsc);
 
/* Setup color space conversion */
ic->in_cs = in_cs;
@@ -487,6 +491,16 @@ int ipu_ic_task_init(struct ipu_ic *ic,
spin_unlock_irqrestore(>lock, flags);
return ret;
 }
+
+int ipu_ic_task_init(struct ipu_ic *ic,
+int in_width, int in_height,
+int out_width, int out_height,
+enum ipu_color_space in_cs,
+enum ipu_color_space out_cs)
+{
+   return ipu_ic_task_init_rsc(ic, in_width, in_height, out_width,
+   out_height, in_cs, out_cs, 0);
+}
 EXPORT_SYMBOL_GPL(ipu_ic_task_init);
 
 int ipu_ic_task_idma_init(struct ipu_ic *ic, struct ipuv3_channel *channel,
diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h
index abbad94e14a1..94f0eec821c8 100644
--- a/include/video/imx-ipu-v3.h
+++ b/include/video/imx-ipu-v3.h
@@ -387,6 +387,12 @@ int ipu_ic_task_init(struct ipu_ic *ic,
 int out_width, int out_height,
 enum ipu_color_space in_cs,
 enum ipu_color_space out_cs);
+int ipu_ic_task_init_rsc(struct ipu_ic *ic,
+int in_width, int in_height,
+int out_width, int out_height,
+enum ipu_color_space in_cs,
+enum ipu_color_space out_cs,
+u32 rsc);
 int ipu_ic_task_graphics_init(struct ipu_ic *ic,
  enum ipu_color_space in_g_cs,
  bool galpha_en, u32 galpha,
-- 
2.19.0



[PATCH v3 05/16] gpu: ipu-v3: image-convert: calculate per-tile resize coefficients

2018-09-18 Thread Philipp Zabel
Slightly modifying resize coefficients per-tile allows to completely
hide the seams between tiles and to sample the correct input pixels at
the bottom and right edges of the image.

Tiling requires a bilinear interpolator reset at each tile start, which
causes the image to be slightly shifted if the starting pixel should not
have been sampled from an integer pixel position in the source image
according to the full image resizing ratio. To work around this
hardware limitation, calculate per-tile resizing coefficients that make
sure that the correct input pixels are sampled at the tile end.

Signed-off-by: Philipp Zabel 
---
No changes since v2.
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 236 -
 1 file changed, 234 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c 
b/drivers/gpu/ipu-v3/ipu-image-convert.c
index f4db1553d23a..01a63eb8ccaf 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -135,6 +135,12 @@ struct ipu_image_convert_ctx {
struct ipu_image_convert_image in;
struct ipu_image_convert_image out;
enum ipu_rotate_mode rot_mode;
+   u32 downsize_coeff_h;
+   u32 downsize_coeff_v;
+   u32 image_resize_coeff_h;
+   u32 image_resize_coeff_v;
+   u32 resize_coeffs_h[MAX_STRIPES_W];
+   u32 resize_coeffs_v[MAX_STRIPES_H];
 
/* intermediate buffer for rotation */
struct ipu_image_convert_dma_buf rot_intermediate[2];
@@ -361,6 +367,69 @@ static inline int num_stripes(int dim)
return 4;
 }
 
+/*
+ * Calculate downsizing coefficients, which are the same for all tiles,
+ * and bilinear resizing coefficients, which are used to find the best
+ * seam positions.
+ */
+static int calc_image_resize_coefficients(struct ipu_image_convert_ctx *ctx,
+ struct ipu_image *in,
+ struct ipu_image *out)
+{
+   u32 downsized_width = in->rect.width;
+   u32 downsized_height = in->rect.height;
+   u32 downsize_coeff_v = 0;
+   u32 downsize_coeff_h = 0;
+   u32 resized_width = out->rect.width;
+   u32 resized_height = out->rect.height;
+   u32 resize_coeff_h;
+   u32 resize_coeff_v;
+
+   if (ipu_rot_mode_is_irt(ctx->rot_mode)) {
+   resized_width = out->rect.height;
+   resized_height = out->rect.width;
+   }
+
+   /* Do not let invalid input lead to an endless loop below */
+   if (WARN_ON(resized_width == 0 || resized_height == 0))
+   return -EINVAL;
+
+   while (downsized_width >= resized_width * 2) {
+   downsized_width >>= 1;
+   downsize_coeff_h++;
+   }
+
+   while (downsized_height >= resized_height * 2) {
+   downsized_height >>= 1;
+   downsize_coeff_v++;
+   }
+
+   /*
+* Calculate the bilinear resizing coefficients that could be used if
+* we were converting with a single tile. The bottom right output pixel
+* should sample as close as possible to the bottom right input pixel
+* out of the decimator, but not overshoot it:
+*/
+   resize_coeff_h = 8192 * (downsized_width - 1) / (resized_width - 1);
+   resize_coeff_v = 8192 * (downsized_height - 1) / (resized_height - 1);
+
+   dev_dbg(ctx->chan->priv->ipu->dev,
+   "%s: hscale: >>%u, *8192/%u vscale: >>%u, *8192/%u, %ux%u 
tiles\n",
+   __func__, downsize_coeff_h, resize_coeff_h, downsize_coeff_v,
+   resize_coeff_v, ctx->in.num_cols, ctx->in.num_rows);
+
+   if (downsize_coeff_h > 2 || downsize_coeff_v  > 2 ||
+   resize_coeff_h > 0x3fff || resize_coeff_v > 0x3fff)
+   return -EINVAL;
+
+   ctx->downsize_coeff_h = downsize_coeff_h;
+   ctx->downsize_coeff_v = downsize_coeff_v;
+   ctx->image_resize_coeff_h = resize_coeff_h;
+   ctx->image_resize_coeff_v = resize_coeff_v;
+
+   return 0;
+}
+
 static void calc_tile_dimensions(struct ipu_image_convert_ctx *ctx,
 struct ipu_image_convert_image *image)
 {
@@ -564,6 +633,149 @@ static void calc_tile_offsets(struct 
ipu_image_convert_ctx *ctx,
calc_tile_offsets_packed(ctx, image);
 }
 
+/*
+ * Calculate the resizing ratio for the IC main processing section given input
+ * size, fixed downsizing coefficient, and output size.
+ * Either round to closest for the next tile's first pixel to minimize seams
+ * and distortion (for all but right column / bottom row), or round down to
+ * avoid sampling beyond the edges of the input image for this tile's last
+ * pixel.
+ * Returns the resizing coefficient, resizing ratio is 8192.0 / resize_coeff.
+ */
+static u32 calc_resize_coeff(u32 input_size, u32 downsize_coeff,
+   

[PATCH v3 07/16] gpu: ipu-v3: image-convert: store tile top/left position

2018-09-18 Thread Philipp Zabel
Store tile top/left position in pixels in the tile structure.
This will allow overlapping tiles with different sizes later.

Signed-off-by: Philipp Zabel 
---
No changes since v2.
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 27 ++
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c 
b/drivers/gpu/ipu-v3/ipu-image-convert.c
index 65f0321a7971..e4b198777d0f 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -84,6 +84,8 @@ struct ipu_image_convert_dma_chan {
 struct ipu_image_tile {
u32 width;
u32 height;
+   u32 left;
+   u32 top;
/* size and strides are in bytes */
u32 size;
u32 stride;
@@ -433,13 +435,17 @@ static int calc_image_resize_coefficients(struct 
ipu_image_convert_ctx *ctx,
 static void calc_tile_dimensions(struct ipu_image_convert_ctx *ctx,
 struct ipu_image_convert_image *image)
 {
-   int i;
+   unsigned int i;
 
for (i = 0; i < ctx->num_tiles; i++) {
struct ipu_image_tile *tile = >tile[i];
+   const unsigned int row = i / image->num_cols;
+   const unsigned int col = i % image->num_cols;
 
tile->height = image->base.pix.height / image->num_rows;
tile->width = image->base.pix.width / image->num_cols;
+   tile->left = col * tile->width;
+   tile->top = row * tile->height;
tile->size = ((tile->height * image->fmt->bpp) >> 3) *
tile->width;
 
@@ -535,7 +541,7 @@ static void calc_tile_offsets_planar(struct 
ipu_image_convert_ctx *ctx,
struct ipu_image_convert_priv *priv = chan->priv;
const struct ipu_image_pixfmt *fmt = image->fmt;
unsigned int row, col, tile = 0;
-   u32 H, w, h, y_stride, uv_stride;
+   u32 H, top, y_stride, uv_stride;
u32 uv_row_off, uv_col_off, uv_off, u_off, v_off, tmp;
u32 y_row_off, y_col_off, y_off;
u32 y_size, uv_size;
@@ -552,13 +558,12 @@ static void calc_tile_offsets_planar(struct 
ipu_image_convert_ctx *ctx,
uv_size = y_size / (fmt->uv_width_dec * fmt->uv_height_dec);
 
for (row = 0; row < image->num_rows; row++) {
-   w = image->tile[tile].width;
-   h = image->tile[tile].height;
-   y_row_off = row * h * y_stride;
-   uv_row_off = (row * h * uv_stride) / fmt->uv_height_dec;
+   top = image->tile[tile].top;
+   y_row_off = top * y_stride;
+   uv_row_off = (top * uv_stride) / fmt->uv_height_dec;
 
for (col = 0; col < image->num_cols; col++) {
-   y_col_off = col * w;
+   y_col_off = image->tile[tile].left;
uv_col_off = y_col_off / fmt->uv_width_dec;
if (fmt->uv_packed)
uv_col_off *= 2;
@@ -595,7 +600,7 @@ static void calc_tile_offsets_packed(struct 
ipu_image_convert_ctx *ctx,
struct ipu_image_convert_priv *priv = chan->priv;
const struct ipu_image_pixfmt *fmt = image->fmt;
unsigned int row, col, tile = 0;
-   u32 w, h, bpp, stride;
+   u32 bpp, stride;
u32 row_off, col_off;
 
/* setup some convenience vars */
@@ -603,12 +608,10 @@ static void calc_tile_offsets_packed(struct 
ipu_image_convert_ctx *ctx,
bpp = fmt->bpp;
 
for (row = 0; row < image->num_rows; row++) {
-   w = image->tile[tile].width;
-   h = image->tile[tile].height;
-   row_off = row * h * stride;
+   row_off = image->tile[tile].top * stride;
 
for (col = 0; col < image->num_cols; col++) {
-   col_off = (col * w * bpp) >> 3;
+   col_off = (image->tile[tile].left * bpp) >> 3;
 
image->tile[tile].offset = row_off + col_off;
image->tile[tile].u_off = 0;
-- 
2.19.0



  1   2   3   4   5   6   7   8   9   10   >