Re: [RFC PATCH v2 6/8] media: v4l2: introduce two IOCTLs for object detection

2012-01-18 Thread Ming Lei
Remove other unrelated lists.

Hi Sylwester,

Thanks for your comment.

On Sat, Jan 14, 2012 at 5:16 AM, Sylwester Nawrocki  wrote:
> Hi Ming,
>
> sorry for the late response. It's all looking better now, however there
> is still a few things that could be improved.
>
> On 12/14/2011 03:00 PM, Ming Lei wrote:
>> This patch introduces two new IOCTLs and related data
>> structure which will be used by the coming video device
>> with object detect capability.
>>
>> The two IOCTLs and related data structure will be used by
>> user space application to retrieve the results of object
>> detection.
>>
>> The utility fdif[1] is useing the two IOCTLs to find
>> objects(faces) deteced in raw images or video streams.
>>
>> [1],http://kernel.ubuntu.com/git?p=ming/fdif.git;a=shortlog;h=refs/heads/v4l2-fdif
>>
>> Signed-off-by: Ming Lei
>> ---
>> v2:
>>       - extend face detection API to object detection API
>>       - introduce capability of V4L2_CAP_OBJ_DETECTION for object detection
>>       - 32/64 safe array parameter
>> ---
>>   drivers/media/video/v4l2-ioctl.c |   41 -
>>   include/linux/videodev2.h        |  124 
>> ++
>>   include/media/v4l2-ioctl.h       |    6 ++
>>   3 files changed, 170 insertions(+), 1 deletions(-)
>>
>> diff --git a/drivers/media/video/v4l2-ioctl.c 
>> b/drivers/media/video/v4l2-ioctl.c
>> index ded8b72..575d445 100644
>> --- a/drivers/media/video/v4l2-ioctl.c
>> +++ b/drivers/media/video/v4l2-ioctl.c
>> @@ -2140,6 +2140,30 @@ static long __video_do_ioctl(struct file *file,
>>               dbgarg(cmd, "index=%d", b->index);
>>               break;
>>       }
>> +     case VIDIOC_G_OD_RESULT:
>> +     {
>> +             struct v4l2_od_result *or = arg;
>> +
>> +             if (!ops->vidioc_g_od_result)
>> +                     break;
>> +
>> +             ret = ops->vidioc_g_od_result(file, fh, or);
>> +
>> +             dbgarg(cmd, "index=%d", or->frm_seq);
>> +             break;
>> +     }
>
>> +     case VIDIOC_G_OD_COUNT:
>> +     {
>> +             struct v4l2_od_count *oc = arg;
>> +
>> +             if (!ops->vidioc_g_od_count)
>> +                     break;
>> +
>> +             ret = ops->vidioc_g_od_count(file, fh, oc);
>> +
>> +             dbgarg(cmd, "index=%d", oc->frm_seq);
>> +             break;
>> +     }
>
> I'm uncertain if we need this ioctl at all. Now struct v4l2_od_result is:

IMO, it can simplify user application very much if the ioctl of
VIDIOC_G_OD_COUNT
is kept, see below.

>
> struct v4l2_od_result {
>        __u32                   frame_sequence;
>        __u32                   object_count;
>        __u32                   reserved[6];
>        struct v4l2_od_object   objects[0];
> };
>
> and
>
> struct v4l2_od_object {
>        __u16                   type;
>        __u16                   confidence;
>        union {
>                struct v4l2_od_face_desc  face;
>                struct v4l2_od_eye_desc   eye;
>                struct v4l2_od_mouth_desc mouth;
>                __u8    rawdata[60];
>        } o;
> };
>
> If we had added a 'size' field to struct v4l2_od_result, i.e.
>
> struct v4l2_od_result {
>        __u32                   size;
>        __u32                   frame_sequence;
>        __u32                   objects_count;
>        __u32                   reserved[5];
>        struct v4l2_od_object   objects[0];
> };
>
> the application could have allocated memory for the objects array and
> have the 'size' member set to the size of that allocation. Then it
> would have called VIDIOC_G_OD_RESULT and the driver would have filled
> the 'objects'  array, if it was big enough for the requested result
> data. The driver would also update the 'objects_count'. If the size
> would be too small to fit the result data, i.e.
>
> size < number_of_detected_objects * sizeof(struct v4l2_od_object)
>
> the driver could return -ENOSPC error while also setting 'size' to
> the required value. Something similar is done with

Without VIDIOC_G_OD_COUNT ioctl, user applications has no way to
know how many objects are detected in the specified frame, so it has
to allocate much more space to send to VIDIOC_G_OD_RESULT. Sometimes
it is enough, and sometimes it is not enough, looks a bit extra complicated
logic is introduced to space application.

> VIDIOC_G_EXT_CTRLS ioctl [3].
>
> There is one more OD API requirement, for camera sensors with embedded
> SoC ISP that support face detection, i.e. VIDIOC_G_OD_RESULT should
> allow to retrieve face detection result for the very last image frame,
> i.e. current frame.

IMO, it is better to always retrieve detection result via frame sequence number
if the seq can be known beforehand. But if it is difficult to get the
seq number in
user application for camera sensor case,  maybe we can introduce the flags to
handle it.

>
> One solution to support this could be adding a 'flags' field, i.e.
>
> struct v4l2_od_result {
>        __u32                   size;
>        

Re: [RFC PATCH v2 6/8] media: v4l2: introduce two IOCTLs for object detection

2012-01-13 Thread Sylwester Nawrocki
Hi Ming,

sorry for the late response. It's all looking better now, however there
is still a few things that could be improved.

On 12/14/2011 03:00 PM, Ming Lei wrote:
> This patch introduces two new IOCTLs and related data
> structure which will be used by the coming video device
> with object detect capability.
>
> The two IOCTLs and related data structure will be used by
> user space application to retrieve the results of object
> detection.
>
> The utility fdif[1] is useing the two IOCTLs to find
> objects(faces) deteced in raw images or video streams.
>
> [1],http://kernel.ubuntu.com/git?p=ming/fdif.git;a=shortlog;h=refs/heads/v4l2-fdif
>
> Signed-off-by: Ming Lei
> ---
> v2:
>   - extend face detection API to object detection API
>   - introduce capability of V4L2_CAP_OBJ_DETECTION for object detection
>   - 32/64 safe array parameter
> ---
>   drivers/media/video/v4l2-ioctl.c |   41 -
>   include/linux/videodev2.h|  124 
> ++
>   include/media/v4l2-ioctl.h   |6 ++
>   3 files changed, 170 insertions(+), 1 deletions(-)
>
> diff --git a/drivers/media/video/v4l2-ioctl.c 
> b/drivers/media/video/v4l2-ioctl.c
> index ded8b72..575d445 100644
> --- a/drivers/media/video/v4l2-ioctl.c
> +++ b/drivers/media/video/v4l2-ioctl.c
> @@ -2140,6 +2140,30 @@ static long __video_do_ioctl(struct file *file,
>   dbgarg(cmd, "index=%d", b->index);
>   break;
>   }
> + case VIDIOC_G_OD_RESULT:
> + {
> + struct v4l2_od_result *or = arg;
> +
> + if (!ops->vidioc_g_od_result)
> + break;
> +
> + ret = ops->vidioc_g_od_result(file, fh, or);
> +
> + dbgarg(cmd, "index=%d", or->frm_seq);
> + break;
> + }

> + case VIDIOC_G_OD_COUNT:
> + {
> + struct v4l2_od_count *oc = arg;
> +
> + if (!ops->vidioc_g_od_count)
> + break;
> +
> + ret = ops->vidioc_g_od_count(file, fh, oc);
> +
> + dbgarg(cmd, "index=%d", oc->frm_seq);
> + break;
> + }

I'm uncertain if we need this ioctl at all. Now struct v4l2_od_result is:

struct v4l2_od_result {
__u32   frame_sequence;
__u32   object_count;
__u32   reserved[6];
struct v4l2_od_object   objects[0];
};

and

struct v4l2_od_object {
__u16   type;
__u16   confidence;
union {
struct v4l2_od_face_desc  face;
struct v4l2_od_eye_desc   eye;
struct v4l2_od_mouth_desc mouth;
__u8rawdata[60];
} o;
};

If we had added a 'size' field to struct v4l2_od_result, i.e.

struct v4l2_od_result {
__u32   size;
__u32   frame_sequence;
__u32   objects_count;
__u32   reserved[5];
struct v4l2_od_object   objects[0];
};

the application could have allocated memory for the objects array and
have the 'size' member set to the size of that allocation. Then it
would have called VIDIOC_G_OD_RESULT and the driver would have filled
the 'objects'  array, if it was big enough for the requested result
data. The driver would also update the 'objects_count'. If the size 
would be too small to fit the result data, i.e.

size < number_of_detected_objects * sizeof(struct v4l2_od_object)

the driver could return -ENOSPC error while also setting 'size' to 
the required value. Something similar is done with 
VIDIOC_G_EXT_CTRLS ioctl [3].

There is one more OD API requirement, for camera sensors with embedded
SoC ISP that support face detection, i.e. VIDIOC_G_OD_RESULT should 
allow to retrieve face detection result for the very last image frame,
i.e. current frame.

One solution to support this could be adding a 'flags' field, i.e.

struct v4l2_od_result {
__u32   size;
__u32   flags;
__u32   frame_sequence;
__u32   objects_count;
__u16   group_index;
__u16   group_count;
__u16   reserved[7];
struct v4l2_od_object   objects[0];
};

and additionally group_index to specify which face object the user is
interested in. I'm not saying we have to implement this now but it's
good to consider beforehand. The group_count would be used to return 
the number of detected faces. What do you think ?

/* flags */
#define V4L2_OD_FL_SEL_FRAME_SEQ(0 << 0)
#define V4L2_OD_FL_SEL_FRAME_LAST   (1 << 0)
#define V4L2_OD_FL_SEL_GROUP(1 << 1)

Or maybe we should just use "face_" instead of "group_" ?

>   default:
>   if (!ops->vidioc_default)
>   break;
> @@ -2241,7 +2265,22 @@ static int check_array_args(unsigned int cmd, void 
> *par

[RFC PATCH v2 6/8] media: v4l2: introduce two IOCTLs for object detection

2011-12-14 Thread Ming Lei
This patch introduces two new IOCTLs and related data
structure which will be used by the coming video device
with object detect capability.

The two IOCTLs and related data structure will be used by
user space application to retrieve the results of object
detection.

The utility fdif[1] is useing the two IOCTLs to find
objects(faces) deteced in raw images or video streams.

[1],http://kernel.ubuntu.com/git?p=ming/fdif.git;a=shortlog;h=refs/heads/v4l2-fdif

Signed-off-by: Ming Lei 
---
v2:
- extend face detection API to object detection API
- introduce capability of V4L2_CAP_OBJ_DETECTION for object detection
- 32/64 safe array parameter
---
 drivers/media/video/v4l2-ioctl.c |   41 -
 include/linux/videodev2.h|  124 ++
 include/media/v4l2-ioctl.h   |6 ++
 3 files changed, 170 insertions(+), 1 deletions(-)

diff --git a/drivers/media/video/v4l2-ioctl.c b/drivers/media/video/v4l2-ioctl.c
index ded8b72..575d445 100644
--- a/drivers/media/video/v4l2-ioctl.c
+++ b/drivers/media/video/v4l2-ioctl.c
@@ -2140,6 +2140,30 @@ static long __video_do_ioctl(struct file *file,
dbgarg(cmd, "index=%d", b->index);
break;
}
+   case VIDIOC_G_OD_RESULT:
+   {
+   struct v4l2_od_result *or = arg;
+
+   if (!ops->vidioc_g_od_result)
+   break;
+
+   ret = ops->vidioc_g_od_result(file, fh, or);
+
+   dbgarg(cmd, "index=%d", or->frm_seq);
+   break;
+   }
+   case VIDIOC_G_OD_COUNT:
+   {
+   struct v4l2_od_count *oc = arg;
+
+   if (!ops->vidioc_g_od_count)
+   break;
+
+   ret = ops->vidioc_g_od_count(file, fh, oc);
+
+   dbgarg(cmd, "index=%d", oc->frm_seq);
+   break;
+   }
default:
if (!ops->vidioc_default)
break;
@@ -2241,7 +2265,22 @@ static int check_array_args(unsigned int cmd, void 
*parg, size_t *array_size,
 
 static int is_64_32_array_args(unsigned int cmd, void *parg, int *extra_len)
 {
-   return 0;
+   int ret = 0;
+
+   switch (cmd) {
+   case VIDIOC_G_OD_RESULT: {
+   struct v4l2_od_result *or = parg;
+
+   *extra_len = or->obj_cnt *
+   sizeof(struct v4l2_od_object);
+   ret = 1;
+   break;
+   }
+   default:
+   break;
+   }
+
+   return ret;
 }
 
 long
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 4b752d5..c08ceaf 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -270,6 +270,9 @@ struct v4l2_capability {
 #define V4L2_CAP_RADIO 0x0004  /* is a radio device */
 #define V4L2_CAP_MODULATOR 0x0008  /* has a modulator */
 
+/* The device has capability of object detection */
+#define V4L2_CAP_OBJ_DETECTION 0x0010
+
 #define V4L2_CAP_READWRITE  0x0100  /* read/write systemcalls 
*/
 #define V4L2_CAP_ASYNCIO0x0200  /* async I/O */
 #define V4L2_CAP_STREAMING  0x0400  /* streaming I/O ioctls */
@@ -2160,6 +2163,125 @@ struct v4l2_create_buffers {
__u32   reserved[8];
 };
 
+/**
+ * struct v4l2_od_obj_desc
+ * @centerx:   return, position in x direction of detected object
+ * @centery:   return, position in y direction of detected object
+ * @sizex: return, size in x direction of detected object
+ * @sizey: return, size in y direction of detected object
+ * @angle: return, angle of detected object
+ * 0 deg ~ 359 deg, vertical is 0 deg, clockwise
+ * @reserved:  future extensions
+ */
+struct v4l2_od_obj_desc {
+   __u16   centerx;
+   __u16   centery;
+   __u16   sizex;
+   __u16   sizey;
+   __u16   angle;
+   __u16   reserved[5];
+};
+
+/**
+ * struct v4l2_od_face_desc
+ * @id:return, used to be associated with detected eyes, mouth,
+ * and other objects inside this face, and each face in one
+ * frame has a unique id, start from 1
+ * @smile_level:return, smile level of the face
+ * @f: return, face description
+ */
+struct v4l2_od_face_desc {
+   __u16   id;
+   __u8smile_level;
+   __u8reserved[15];
+
+   struct v4l2_od_obj_desc f;
+};
+
+/**
+ * struct v4l2_od_eye_desc
+ * @face_id:   return, used to associate with which face, 0 means
+ * no face associated with the eye
+ * @blink_level:return, blink level of the eye
+ * @e: return, eye description
+ */
+struct v4l2_od_eye_desc {
+   __u16   face_id;
+   __u8blink_level;
+   __u8reserved[15];
+
+   struct v4l2_od_obj_desc e;
+};
+
+/**
+ * struct v4l2_od_mouth_desc
+ * @face_id:   return, used to associate with which fa