Re: [FFmpeg-devel] [RFC PATCH] avformat/rtpdec: Audio level RTP extension RFC6464

2024-03-17 Thread Jonathan Baudanza
On Sat, Feb 10, 2024, at 9:30 PM, j...@jonb.org wrote:
> From: Jonathan Baudanza 
> 
> libwebrtc will add audio level (in decibels) and VAD status to each RTP 
> packet.
> 
> This patch will add both values to the packet sidedata.
> 
> I've been using this patch in production for about a year on live audio RTP
> streams to detect when users are speaking without needing to decode the audio
> data.
> 

Does anyone have any feedback on this patch?
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [RFC PATCH] avformat/rtpdec: Audio level RTP extension RFC6464

2024-02-10 Thread jon
From: Jonathan Baudanza 

libwebrtc will add audio level (in decibels) and VAD status to each RTP packet.

This patch will add both values to the packet sidedata.

I've been using this patch in production for about a year on live audio RTP
streams to detect when users are speaking without needing to decode the audio
data.
---
 libavcodec/avpacket.c |  1 +
 libavcodec/defs.h | 15 
 libavcodec/packet.h   |  5 +++
 libavformat/rtpdec.c  | 87 +++
 libavformat/rtpdec.h  |  5 +++
 libavformat/rtsp.c| 16 
 libavformat/rtsp.h|  2 +
 7 files changed, 131 insertions(+)

diff --git a/libavcodec/avpacket.c b/libavcodec/avpacket.c
index e118bbaad1..73e0341bf7 100644
--- a/libavcodec/avpacket.c
+++ b/libavcodec/avpacket.c
@@ -305,6 +305,7 @@ const char *av_packet_side_data_name(enum 
AVPacketSideDataType type)
 case AV_PKT_DATA_IAMF_MIX_GAIN_PARAM:return "IAMF Mix Gain 
Parameter Data";
 case AV_PKT_DATA_IAMF_DEMIXING_INFO_PARAM:   return "IAMF Demixing Info 
Parameter Data";
 case AV_PKT_DATA_IAMF_RECON_GAIN_INFO_PARAM: return "IAMF Recon Gain Info 
Parameter Data";
+case AV_PKT_DATA_SSRC_AUDIO_LEVEL:   return "RTP SSRC Audio Level";
 }
 return NULL;
 }
diff --git a/libavcodec/defs.h b/libavcodec/defs.h
index 00d840ec19..87e8814760 100644
--- a/libavcodec/defs.h
+++ b/libavcodec/defs.h
@@ -323,6 +323,21 @@ typedef struct AVProducerReferenceTime {
 int flags;
 } AVProducerReferenceTime;
 
+/**
+ * Audio level structure from the ssrc-audio-level RTP header extension.
+ */
+typedef struct AVAudioLevel {
+/**
+ * Audio level for this packet, measured in dBov: -127 - 0
+ */
+int8_t level;
+
+/**
+ * Set to 1 if the encoder believes this packet contains voice.
+ */
+int voice;
+} AVAudioLevel;
+
 /**
  * Encode extradata length to a buffer. Used by xiph codecs.
  *
diff --git a/libavcodec/packet.h b/libavcodec/packet.h
index 8558ae849e..f7f1deb6e0 100644
--- a/libavcodec/packet.h
+++ b/libavcodec/packet.h
@@ -330,6 +330,11 @@ enum AVPacketSideDataType {
 */
 AV_PKT_DATA_AMBIENT_VIEWING_ENVIRONMENT,
 
+/**
+ * Audio Level and VAD data from the RTP header extension as defined by 
RFC 6464.
+ */
+AV_PKT_DATA_SSRC_AUDIO_LEVEL,
+
 /**
  * The number of side data types.
  * This is not part of the public API/ABI in the sense that it may
diff --git a/libavformat/rtpdec.c b/libavformat/rtpdec.c
index fa7544cc07..479ea2e245 100644
--- a/libavformat/rtpdec.c
+++ b/libavformat/rtpdec.c
@@ -694,6 +694,79 @@ static void finalize_packet(RTPDemuxContext *s, AVPacket 
*pkt, uint32_t timestam
s->base_timestamp;
 }
 
+
+static const uint8_t* find_header_ext_data(int id, const uint8_t *buf, uint8_t 
*len) {
+int buflen = (AV_RB16(buf + 2)) * 4;
+
+const uint8_t *p = buf + 4;
+int idx = 0;
+int this_id;
+int this_len;
+
+// This is a one-byte extention format, as defined by RFC rfc5285
+if (buf[0] == 0xbe && buf[1] == 0xde) {
+while (idx + 1 < buflen) {
+if (p[idx] == 0) {
+idx++; // skip padding
+} else {
+this_id = p[idx] >> 4;
+this_len = (p[idx] & 0xf) + 1;
+
+// spec says 15 is reserved
+if (this_id == 15) {
+break; // reject
+}
+
+if (this_id == id) {
+if (this_len > buflen - idx - 1) {
+break; // reject
+}
+
+if (len != NULL)
+*len = this_len;
+
+return p + idx + 1;
+}
+
+idx += 1 + this_len;
+}
+}
+} else if (buf[0] == 0x10 && (buf[1] & 0xff) == 0) {
+// This is a two-byte extention format
+while (idx + 1 < buflen) {
+if (p[idx] == 0) {
+idx++; // Skip padding
+} else {
+this_id = p[idx];
+this_len = p[idx + 1];
+
+// spec says 15 is reserved
+if (this_id == 15) {
+break; // reject
+}
+
+if (this_id == id) {
+if (this_len > buflen - idx - 2) {
+break; // reject
+}
+
+if (len != NULL)
+*len = this_len;
+return p + idx + 2;
+}
+
+idx += 2 + this_len;
+}
+}
+}
+
+if (len != NULL)
+*len = 0;
+
+return NULL;
+}
+
+
 static int rtp_parse_packet_internal(RTPDemuxContext *s, AVPacket *pkt,
  const uint8_t *buf, int len)
 {
@@ -703,6 +776,7 @@ static int rtp_parse_packet_internal(RTPDemuxContext *s, 
AVPacket *pkt,
 AVStream *st;
 uint32_t timestamp;
 int rv = 0;
+