From: Jonathan Baudanza
libwebrtc will add audio level (in decibels) and VAD status to each RTP packet.
This patch will add both values to the packet sidedata.
I've been using this patch in production for about a year on live audio RTP
streams to detect when users are speaking without needing to decode the audio
data.
---
libavcodec/avpacket.c | 1 +
libavcodec/defs.h | 15
libavcodec/packet.h | 5 +++
libavformat/rtpdec.c | 87 +++
libavformat/rtpdec.h | 5 +++
libavformat/rtsp.c| 16
libavformat/rtsp.h| 2 +
7 files changed, 131 insertions(+)
diff --git a/libavcodec/avpacket.c b/libavcodec/avpacket.c
index e118bbaad1..73e0341bf7 100644
--- a/libavcodec/avpacket.c
+++ b/libavcodec/avpacket.c
@@ -305,6 +305,7 @@ const char *av_packet_side_data_name(enum
AVPacketSideDataType type)
case AV_PKT_DATA_IAMF_MIX_GAIN_PARAM:return "IAMF Mix Gain
Parameter Data";
case AV_PKT_DATA_IAMF_DEMIXING_INFO_PARAM: return "IAMF Demixing Info
Parameter Data";
case AV_PKT_DATA_IAMF_RECON_GAIN_INFO_PARAM: return "IAMF Recon Gain Info
Parameter Data";
+case AV_PKT_DATA_SSRC_AUDIO_LEVEL: return "RTP SSRC Audio Level";
}
return NULL;
}
diff --git a/libavcodec/defs.h b/libavcodec/defs.h
index 00d840ec19..87e8814760 100644
--- a/libavcodec/defs.h
+++ b/libavcodec/defs.h
@@ -323,6 +323,21 @@ typedef struct AVProducerReferenceTime {
int flags;
} AVProducerReferenceTime;
+/**
+ * Audio level structure from the ssrc-audio-level RTP header extension.
+ */
+typedef struct AVAudioLevel {
+/**
+ * Audio level for this packet, measured in dBov: -127 - 0
+ */
+int8_t level;
+
+/**
+ * Set to 1 if the encoder believes this packet contains voice.
+ */
+int voice;
+} AVAudioLevel;
+
/**
* Encode extradata length to a buffer. Used by xiph codecs.
*
diff --git a/libavcodec/packet.h b/libavcodec/packet.h
index 8558ae849e..f7f1deb6e0 100644
--- a/libavcodec/packet.h
+++ b/libavcodec/packet.h
@@ -330,6 +330,11 @@ enum AVPacketSideDataType {
*/
AV_PKT_DATA_AMBIENT_VIEWING_ENVIRONMENT,
+/**
+ * Audio Level and VAD data from the RTP header extension as defined by
RFC 6464.
+ */
+AV_PKT_DATA_SSRC_AUDIO_LEVEL,
+
/**
* The number of side data types.
* This is not part of the public API/ABI in the sense that it may
diff --git a/libavformat/rtpdec.c b/libavformat/rtpdec.c
index fa7544cc07..479ea2e245 100644
--- a/libavformat/rtpdec.c
+++ b/libavformat/rtpdec.c
@@ -694,6 +694,79 @@ static void finalize_packet(RTPDemuxContext *s, AVPacket
*pkt, uint32_t timestam
s->base_timestamp;
}
+
+static const uint8_t* find_header_ext_data(int id, const uint8_t *buf, uint8_t
*len) {
+int buflen = (AV_RB16(buf + 2)) * 4;
+
+const uint8_t *p = buf + 4;
+int idx = 0;
+int this_id;
+int this_len;
+
+// This is a one-byte extention format, as defined by RFC rfc5285
+if (buf[0] == 0xbe && buf[1] == 0xde) {
+while (idx + 1 < buflen) {
+if (p[idx] == 0) {
+idx++; // skip padding
+} else {
+this_id = p[idx] >> 4;
+this_len = (p[idx] & 0xf) + 1;
+
+// spec says 15 is reserved
+if (this_id == 15) {
+break; // reject
+}
+
+if (this_id == id) {
+if (this_len > buflen - idx - 1) {
+break; // reject
+}
+
+if (len != NULL)
+*len = this_len;
+
+return p + idx + 1;
+}
+
+idx += 1 + this_len;
+}
+}
+} else if (buf[0] == 0x10 && (buf[1] & 0xff) == 0) {
+// This is a two-byte extention format
+while (idx + 1 < buflen) {
+if (p[idx] == 0) {
+idx++; // Skip padding
+} else {
+this_id = p[idx];
+this_len = p[idx + 1];
+
+// spec says 15 is reserved
+if (this_id == 15) {
+break; // reject
+}
+
+if (this_id == id) {
+if (this_len > buflen - idx - 2) {
+break; // reject
+}
+
+if (len != NULL)
+*len = this_len;
+return p + idx + 2;
+}
+
+idx += 2 + this_len;
+}
+}
+}
+
+if (len != NULL)
+*len = 0;
+
+return NULL;
+}
+
+
static int rtp_parse_packet_internal(RTPDemuxContext *s, AVPacket *pkt,
const uint8_t *buf, int len)
{
@@ -703,6 +776,7 @@ static int rtp_parse_packet_internal(RTPDemuxContext *s,
AVPacket *pkt,
AVStream *st;
uint32_t timestamp;
int rv = 0;
+