2014-01-27 77 views
3

我正在解碼OGG視頻(Theora & vorbis作爲編解碼器),並希望在播放聲音時在屏幕上顯示它(使用Ogre 3D)。我可以將圖像流解碼得很好,並且視頻以正確的幀速率完美播放等。FFmpeg + OpenAL - 從視頻播放流式傳輸聲音將不起作用

但是,我無法使用OpenAL播放聲音。

編輯:我設法讓播放聲音至少有點像視頻中的實際音頻。更新了示例代碼。

編輯2:我現在能夠得到「幾乎」正確的聲音。我必須將OpenAL設置爲使用AL_FORMAT_STEREO_FLOAT32(初始化擴展後)而不是STEREO16。現在聲音「只」極高,並且以正確的速度播放。

這是我如何解碼音頻數據包(在後臺線程,等效工作蠻好的視頻文件的圖像流):

//------------------------------------------------------------------------------ 
int decodeAudioPacket( AVPacket& p_packet, AVCodecContext* p_audioCodecContext, AVFrame* p_frame, 
         FFmpegVideoPlayer* p_player, VideoInfo& p_videoInfo) 
{ 
    // Decode audio frame 
    int got_frame = 0; 
    int decoded = avcodec_decode_audio4(p_audioCodecContext, p_frame, &got_frame, &p_packet); 
    if (decoded < 0) 
    { 
     p_videoInfo.error = "Error decoding audio frame."; 
     return decoded; 
    } 

    // Frame is complete, store it in audio frame queue 
    if (got_frame) 
    { 
     int bufferSize = av_samples_get_buffer_size(NULL, p_audioCodecContext->channels, p_frame->nb_samples, 
                p_audioCodecContext->sample_fmt, 0); 

     int64_t duration = p_frame->pkt_duration; 
     int64_t dts = p_frame->pkt_dts; 

     if (staticOgreLog) 
     { 
      staticOgreLog->logMessage("Audio frame bufferSize/duration/dts: " 
        + boost::lexical_cast<std::string>(bufferSize) + "/" 
        + boost::lexical_cast<std::string>(duration) + "/" 
        + boost::lexical_cast<std::string>(dts), Ogre::LML_NORMAL); 
     } 

     // Create the audio frame 
     AudioFrame* frame = new AudioFrame(); 
     frame->dataSize = bufferSize; 
     frame->data = new uint8_t[bufferSize]; 
     if (p_frame->channels == 2) 
     { 
      memcpy(frame->data, p_frame->data[0], bufferSize >> 1); 
      memcpy(frame->data + (bufferSize >> 1), p_frame->data[1], bufferSize >> 1); 
     } 
     else 
     { 
      memcpy(frame->data, p_frame->data, bufferSize); 
     } 
     double timeBase = ((double)p_audioCodecContext->time_base.num)/(double)p_audioCodecContext->time_base.den; 
     frame->lifeTime = duration * timeBase; 

     p_player->addAudioFrame(frame); 
    } 

    return decoded; 
} 

所以,你可以看到,我解碼幀,memcpy它到我自己的結構,AudioFrame。現在,當播放聲音,我用這些音頻幀像這樣:

int numBuffers = 4; 
    ALuint buffers[4]; 
    alGenBuffers(numBuffers, buffers); 
    ALenum success = alGetError(); 
    if(success != AL_NO_ERROR) 
    { 
     CONSOLE_LOG("Error on alGenBuffers : " + Ogre::StringConverter::toString(success) + alGetString(success)); 
     return; 
    } 

    // Fill a number of data buffers with audio from the stream 
    std::vector<AudioFrame*> audioBuffers; 
    std::vector<unsigned int> audioBufferSizes; 
    unsigned int numReturned = FFMPEG_PLAYER->getDecodedAudioFrames(numBuffers, audioBuffers, audioBufferSizes); 

    // Assign the data buffers to the OpenAL buffers 
    for (unsigned int i = 0; i < numReturned; ++i) 
    { 
     alBufferData(buffers[i], _streamingFormat, audioBuffers[i]->data, audioBufferSizes[i], _streamingFrequency); 

     success = alGetError(); 
     if(success != AL_NO_ERROR) 
     { 
      CONSOLE_LOG("Error on alBufferData : " + Ogre::StringConverter::toString(success) + alGetString(success) 
          + " size: " + Ogre::StringConverter::toString(audioBufferSizes[i])); 
      return; 
     } 
    } 

    // Queue the buffers into OpenAL 
    alSourceQueueBuffers(_source, numReturned, buffers); 
    success = alGetError(); 
    if(success != AL_NO_ERROR) 
    { 
     CONSOLE_LOG("Error queuing streaming buffers: " + Ogre::StringConverter::toString(success) + alGetString(success)); 
     return; 
    } 
} 

alSourcePlay(_source); 

的格式和頻率我給OpenAL的是AL_FORMAT_STEREO_FLOAT32(這是一個立體聲流,和我做初始化FLOAT32擴展名), 48000(這是音頻流的AVCodecContext的採樣率)。

和回放過程中,我這樣做筆芯的OpenAL的緩衝區:

ALint numBuffersProcessed; 

// Check if OpenAL is done with any of the queued buffers 
alGetSourcei(_source, AL_BUFFERS_PROCESSED, &numBuffersProcessed); 
if(numBuffersProcessed <= 0) 
    return; 

// Fill a number of data buffers with audio from the stream 
std::vector<AudiFrame*> audioBuffers; 
std::vector<unsigned int> audioBufferSizes; 
unsigned int numFilled = FFMPEG_PLAYER->getDecodedAudioFrames(numBuffersProcessed, audioBuffers, audioBufferSizes); 

// Assign the data buffers to the OpenAL buffers 
ALuint buffer; 
for (unsigned int i = 0; i < numFilled; ++i) 
{ 
    // Pop the oldest queued buffer from the source, 
    // fill it with the new data, then re-queue it 
    alSourceUnqueueBuffers(_source, 1, &buffer); 

    ALenum success = alGetError(); 
    if(success != AL_NO_ERROR) 
    { 
     CONSOLE_LOG("Error Unqueuing streaming buffers: " + Ogre::StringConverter::toString(success)); 
     return; 
    } 

    alBufferData(buffer, _streamingFormat, audioBuffers[i]->data, audioBufferSizes[i], _streamingFrequency); 

    success = alGetError(); 
    if(success != AL_NO_ERROR) 
    { 
     CONSOLE_LOG("Error on re- alBufferData: " + Ogre::StringConverter::toString(success)); 
     return; 
    } 

    alSourceQueueBuffers(_source, 1, &buffer); 

    success = alGetError(); 
    if(success != AL_NO_ERROR) 
    { 
     CONSOLE_LOG("Error re-queuing streaming buffers: " + Ogre::StringConverter::toString(success) + " " 
        + alGetString(success)); 
     return; 
    } 
} 

// Make sure the source is still playing, 
// and restart it if needed. 
ALint playStatus; 
alGetSourcei(_source, AL_SOURCE_STATE, &playStatus); 
if(playStatus != AL_PLAYING) 
    alSourcePlay(_source); 

正如你所看到的,我做的相當重的錯誤檢查。但是我從OpenAL和FFmpeg都沒有發現任何錯誤。 編輯:我聽到的聲音有點類似於視頻中的實際音頻,但非常高調和口吃非常多。另外,它似乎在電視噪音之上播放。很奇怪。另外,它的播放速度比正確的音頻慢得多。 編輯:2使用AL_FORMAT_STEREO_FLOAT32後,聲音以正確的速度播放,但仍然非常高調和口吃(儘管比以前少)。

視頻本身沒有被破壞,它可以在任何玩家身上玩得很好。 OpenAL也可以在同一個應用程序中播放* .way文件,所以它也可以工作。

任何想法可能是錯誤的地方或如何正確地做到這一點?

我唯一的猜測是,FFmpeg的解碼函數不會產生OpenGL可以讀取的數據。但是,就FFmpeg解碼示例而言,所以我不知道缺少什麼。據我所知,decode_audio4函數將幀解碼爲原始數據。 OpenAL應該能夠處理RAW數據(或者說,不能用於其他任何工作)。

+0

不知何故錯過了一個點? – rogerdpack

+0

解碼時,我看到PTS都是AV_NOPTS_VALUE。所以我使用dts。這些都是爲了。這就是爲什麼我自己不做任何訂單。 – TheSHEEEP

+0

或者你的意思是使用PTS跳過/複製幀進行播放?你在OpenGL中唯一能做的就是重新填充源緩衝區(它有點像後緩衝區,只是有更多的緩衝區)。我不知道如何在那裏跳過/複製音頻幀,因爲我不知道OpenGL在「未來」中需要什麼。您重新填充的緩衝區是那些將在X幀中播放的緩衝區,並且您無法知道X.在播放* .wav文件時,OpenGL也不需要被告知跳過/重複幀,所以我確信它本身就是這樣做的。 – TheSHEEEP

回答

1

所以,我終於想出瞭如何去做。哎,真是一團糟。這是來自libav-users郵件列表中的一位用戶的hint,它使我走上了正確的道路。

這裏是我的錯誤:

  1. 在alBufferData功能使用的格式不正確。我使用了AL_FORMAT_STEREO16(因爲這是OpenAL使用的每個單個流示例)。我應該使用AL_FORMAT_STEREO_FLOAT32,因爲視頻流是Ogg,vorbis是以浮點形式存儲的。而使用swr_convert將AV_SAMPLE_FMT_FLTP轉換爲AV_SAMPLE_FMT_S16只是崩潰。不知道爲什麼。

  2. 不使用swr_convert到解碼的音頻幀轉換爲目標格式。當我試圖使用swr_convert從FLTP轉換爲S16時,它會在沒有給出原因的情況下崩潰,我認爲它已損壞。但在弄清楚我的第一個錯誤之後,我再次嘗試,從FLTP轉換爲FLT(非平面),然後運行!所以OpenAL使用交錯格式,而不是平面。很高興知道。

所以這裏是爲我工作用的Ogg視頻decodeAudioPacket功能,Vorbis音頻流:

int decodeAudioPacket( AVPacket& p_packet, AVCodecContext* p_audioCodecContext, AVFrame* p_frame, 
         SwrContext* p_swrContext, uint8_t** p_destBuffer, int p_destLinesize, 
         FFmpegVideoPlayer* p_player, VideoInfo& p_videoInfo) 
{ 
    // Decode audio frame 
    int got_frame = 0; 
    int decoded = avcodec_decode_audio4(p_audioCodecContext, p_frame, &got_frame, &p_packet); 
    if (decoded < 0) 
    { 
     p_videoInfo.error = "Error decoding audio frame."; 
     return decoded; 
    } 

    if(decoded <= p_packet.size) 
    { 
     /* Move the unread data to the front and clear the end bits */ 
     int remaining = p_packet.size - decoded; 
     memmove(p_packet.data, &p_packet.data[decoded], remaining); 
     av_shrink_packet(&p_packet, remaining); 
    } 

    // Frame is complete, store it in audio frame queue 
    if (got_frame) 
    { 
     int outputSamples = swr_convert(p_swrContext, 
             p_destBuffer, p_destLinesize, 
             (const uint8_t**)p_frame->extended_data, p_frame->nb_samples); 

     int bufferSize = av_get_bytes_per_sample(AV_SAMPLE_FMT_FLT) * p_videoInfo.audioNumChannels 
          * outputSamples; 

     int64_t duration = p_frame->pkt_duration; 
     int64_t dts = p_frame->pkt_dts; 

     if (staticOgreLog) 
     { 
      staticOgreLog->logMessage("Audio frame bufferSize/duration/dts: " 
        + boost::lexical_cast<std::string>(bufferSize) + "/" 
        + boost::lexical_cast<std::string>(duration) + "/" 
        + boost::lexical_cast<std::string>(dts), Ogre::LML_NORMAL); 
     } 

     // Create the audio frame 
     AudioFrame* frame = new AudioFrame(); 
     frame->dataSize = bufferSize; 
     frame->data = new uint8_t[bufferSize]; 
     memcpy(frame->data, p_destBuffer[0], bufferSize); 
     double timeBase = ((double)p_audioCodecContext->time_base.num)/(double)p_audioCodecContext->time_base.den; 
     frame->lifeTime = duration * timeBase; 

     p_player->addAudioFrame(frame); 
    } 

    return decoded; 
} 

這裏是我如何初始化的背景和目的緩衝區:

// Initialize SWR context 
SwrContext* swrContext = swr_alloc_set_opts(NULL, 
      audioCodecContext->channel_layout, AV_SAMPLE_FMT_FLT, audioCodecContext->sample_rate, 
      audioCodecContext->channel_layout, audioCodecContext->sample_fmt, audioCodecContext->sample_rate, 
      0, NULL); 
int result = swr_init(swrContext); 

// Create destination sample buffer 
uint8_t** destBuffer = NULL; 
int destBufferLinesize; 
av_samples_alloc_array_and_samples(&destBuffer, 
            &destBufferLinesize, 
            videoInfo.audioNumChannels, 
            2048, 
            AV_SAMPLE_FMT_FLT, 
            0);