Hi!

I enhanced QuickTimeFileSink to detect h264 IDR Frames.

If -y is set in openRTSP it now waits
*) until Audio is in sync first
*) then until h264 is in sync and an IDR Frame is received

This ensures that the recorded stream starts with an IDR Frame.

I've limited this detection to feeds with 2 sources (mostly audio+h264 I
guess).

And I've introduced class SyncFrame to store frame numbers on sync frames
for the stss atom. In case of h264 I store the IDR frame numbers and write
those instead of the fixed += 12 interval.

Last but not least I changed the hinf.payt atom payload format from 8 bit to
32 bit since IMO the specs says so. See Table 3-12 at
http://developer.apple.com/mac/library/documentation/QuickTime/QTFF/QTFFChap3/qtff3.html#//apple_ref/doc/uid/TP40000939-CH205-85342

Makes mp4dump happy so far;-)

Greetings, Wolfgang

PS: the patch works with --fuzz=0 for 2010.03.15
-- 
Wolfgang Breyha <[email protected]> | http://www.blafasel.at/
Vienna University Computer Center | Austria

--- live.orig/liveMedia/QuickTimeFileSink.cpp   2010-03-14 14:02:15.000000000 
+0100
+++ live/liveMedia/QuickTimeFileSink.cpp        2010-03-14 13:58:10.000000000 
+0100
@@ -32,6 +32,8 @@
 
 #define fourChar(x,y,z,w) ( ((x)<<24)|((y)<<16)|((z)<<8)|(w) )
 
+#define H264_IDR_FRAME 0x65  //bit 8 == 0, bits 7-6 (ref) == 3, bits 5-0 
(type) == 5
+
 ////////// SubsessionIOState, ChunkDescriptor ///////////
 // A structure used to represent the I/O state of each input 'subsession':
 
@@ -84,6 +86,16 @@
   unsigned fBytesInUse;
 };
 
+class SyncFrame {
+public:
+  SyncFrame(unsigned frameNum);
+  virtual ~SyncFrame();
+
+public:
+  class SyncFrame *nextSyncFrame;
+  unsigned sfFrameNum;  
+};
+
 // A 64-bit counter, used below:
 
 class Count64 {
@@ -155,6 +167,7 @@
       // if there's a pause at the beginning
 
   ChunkDescriptor *fHeadChunk, *fTailChunk;
+  SyncFrame *fHeadSyncFrame, *fTailSyncFrame;
   unsigned fNumChunks;
 
   // Counters to be used in the hint track's 'udta'/'hinf' atom;
@@ -511,7 +524,8 @@
                                     MediaSubsession& subsession)
   : fHintTrackForUs(NULL), fTrackHintedByUs(NULL),
     fOurSink(sink), fOurSubsession(subsession),
-    fLastPacketRTPSeqNum(0), fHaveBeenSynced(False), fQTTotNumSamples(0),
+    fLastPacketRTPSeqNum(0), fHaveBeenSynced(False), fQTTotNumSamples(0), 
+    fHeadSyncFrame(NULL), fTailSyncFrame(NULL),
     fHeadChunk(NULL), fTailChunk(NULL), fNumChunks(0) {
   fTrackID = ++fCurrentTrackNumber;
 
@@ -529,7 +543,7 @@
 
 SubsessionIOState::~SubsessionIOState() {
   delete fBuffer; delete fPrevBuffer;
-  delete fHeadChunk;
+  delete fHeadChunk, delete fHeadSyncFrame;
 }
 
 Boolean SubsessionIOState::setQTstate() {
@@ -787,6 +801,20 @@
       sampleNumberOfFrameStart = fQTTotNumSamples + 1;
     }
 
+    if (avcHack && (*frameSource == H264_IDR_FRAME))
+    {
+      SyncFrame* newSyncFrame = new SyncFrame(fQTTotNumSamples + 1);
+      if (fTailSyncFrame == NULL)
+      {
+        fHeadSyncFrame = newSyncFrame;
+      }
+      else
+      {
+        fTailSyncFrame->nextSyncFrame = newSyncFrame;
+      }
+      fTailSyncFrame = newSyncFrame;
+    }
+
     // Remember the current frame for next time:
     fPrevFrameState.frameSize = frameSize;
     fPrevFrameState.presentationTime = presentationTime;
@@ -1057,6 +1085,25 @@
     if (!fHaveBeenSynced) {
       // We weren't synchronized before
       if (fOurSubsession.rtpSource()->hasBeenSynchronizedUsingRTCP()) {
+       // H264 ?
+       if (fQTMediaDataAtomCreator == &QuickTimeFileSink::addAtom_avc1)
+       {
+               // special case: audio + H264 video: wait until audio is in sync
+               if ((s.fNumSubsessions == 2) && (s.fNumSyncedSubsessions < 
(s.fNumSubsessions - 1)))
+               {
+                       return false;
+               }
+               // if audio is in sync, wait for the next IDR frame to start
+               unsigned char* const frameSource = fBuffer->dataStart();
+               if (*frameSource != H264_IDR_FRAME)
+               {
+                       return false;
+               }
+               else
+               {
+                       fprintf(stderr, "IDR Frame found. Go!\n");
+               }
+       }
        // But now we are
        fHaveBeenSynced = True;
        fSyncTime = presentationTime;
@@ -1082,6 +1129,15 @@
   if (hintTrack != NULL) hintTrack->fTrackHintedByUs = hintedTrack;
 }
 
+SyncFrame
+::SyncFrame(unsigned frameNum)
+  : nextSyncFrame(NULL), sfFrameNum(frameNum) {
+}  
+
+SyncFrame::~SyncFrame() {
+  delete nextSyncFrame;
+}
+
 void Count64::operator+=(unsigned arg) {
   unsigned newLo = lo + arg;
   if (newLo < lo) { // lo has overflowed
@@ -1953,29 +2009,42 @@
   int64_t numEntriesPosition = TellFile64(fOutFid);
   size += addWord(0); // dummy for "Number of entries"
 
-  // Then, run through the chunk descriptors, counting up the total nuber of 
samples:
   unsigned numEntries = 0, numSamplesSoFar = 0;
-  unsigned const samplesPerFrame = fCurrentIOState->fQTSamplesPerFrame;
-  ChunkDescriptor* chunk = fCurrentIOState->fHeadChunk;
-  while (chunk != NULL) {
-    unsigned const numSamples = chunk->fNumFrames*samplesPerFrame;
-    numSamplesSoFar += numSamples;
-    chunk = chunk->fNextChunk;
-  }
-
-  // Then, write out the sample numbers that we deem correspond to 'sync 
samples':
-  unsigned i;
-  for (i = 0; i < numSamplesSoFar; i += 12) {
-    // For an explanation of the constant "12", see 
http://lists.live555.com/pipermail/live-devel/2009-July/010969.html
-    // (Perhaps we should really try to keep track of which 'samples' 
('frames' for video) really are 'key frames'?)
-    size += addWord(i+1);
-    ++numEntries;
+  if (fCurrentIOState->fHeadSyncFrame != NULL)
+  {
+    SyncFrame* currentSyncFrame = fCurrentIOState->fHeadSyncFrame;
+    while(currentSyncFrame != NULL)
+    {
+      ++numEntries;
+      size += addWord(currentSyncFrame->sfFrameNum);
+      currentSyncFrame = currentSyncFrame->nextSyncFrame;
+    }
   }
-
-  // Then, write out the last entry (if we haven't already done so):
-  if (i != (numSamplesSoFar - 1)) {
-    size += addWord(numSamplesSoFar);
-    ++numEntries;
+  else
+  {
+    // Then, run through the chunk descriptors, counting up the total nuber of 
samples:
+    unsigned const samplesPerFrame = fCurrentIOState->fQTSamplesPerFrame;
+    ChunkDescriptor* chunk = fCurrentIOState->fHeadChunk;
+    while (chunk != NULL) {
+      unsigned const numSamples = chunk->fNumFrames*samplesPerFrame;
+      numSamplesSoFar += numSamples;
+      chunk = chunk->fNextChunk;
+    }
+  
+    // Then, write out the sample numbers that we deem correspond to 'sync 
samples':
+    unsigned i;
+    for (i = 0; i < numSamplesSoFar; i += 12) {
+      // For an explanation of the constant "12", see 
http://lists.live555.com/pipermail/live-devel/2009-July/010969.html
+      // (Perhaps we should really try to keep track of which 'samples' 
('frames' for video) really are 'key frames'?)
+      size += addWord(i+1);
+      ++numEntries;
+    }
+  
+    // Then, write out the last entry (if we haven't already done so):
+    if (i != (numSamplesSoFar - 1)) {
+      size += addWord(numSamplesSoFar);
+      ++numEntries;
+    }
   }
 
   // Now go back and fill in the "Number of entries" field:
@@ -2236,7 +2305,7 @@
 addAtom(payt);
   MediaSubsession& ourSubsession = fCurrentIOState->fOurSubsession;
   RTPSource* rtpSource = ourSubsession.rtpSource();
-  size += addByte(rtpSource->rtpPayloadFormat());
+  size += addWord(rtpSource->rtpPayloadFormat());
 
   // Also, add a 'rtpmap' string: <mime-subtype>/<rtp-frequency>
   unsigned rtpmapStringLength = strlen(ourSubsession.codecName()) + 20;

_______________________________________________
live-devel mailing list
[email protected]
http://lists.live555.com/mailman/listinfo/live-devel

Reply via email to