2014-04-16 48 views
1

到目前爲止,當使用libav *庫將H.264轉碼爲VP8時,我至今未能獲得正確的輸出幀速率。我使用FFmpeg源代碼中的transcoding.c示例的修改,創建了一個功能正常的Sintel.2010.720p.mkv編碼爲WebM(VP8/Vorbis)。不幸的是,結果文件是48 fps不像原來的24 fps和我試圖模仿ffmpeg命令的輸出。使用libav *將H.264轉換爲VP8 *具有不正確的幀速率

我注意到ffprobe產生的tbc是這個和其他H.264視頻的fps的兩倍,而由ffmpeg命令產生的VP8流的tbc是默認的1000.股票transcoding.c例子複製時間解碼器的基址爲編碼器AVCodecContext,它是1/48。通過gdb運行ffmpeg命令,它看起來像AVCodecContext的時基被設置爲1/24,但是單獨做這種更改只會導致生成的視頻在24 fps時減慢兩倍的持續時間。

我可以創建一個可用的視頻,但幀速率加倍。當輸出幀頻爲正確的24 fps時,視頻平滑但速度減半。

這是我對示例的修改。

/* 
    * Copyright (c) 2010 Nicolas George 
    * Copyright (c) 2011 Stefano Sabatini 
    * Copyright (c) 2014 Andrey Utkin 
    * 
    * Permission is hereby granted, free of charge, to any person obtaining a copy 
    * of this software and associated documentation files (the "Software"), to deal 
    * in the Software without restriction, including without limitation the rights 
    * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 
    * copies of the Software, and to permit persons to whom the Software is 
    * furnished to do so, subject to the following conditions: 
    * 
    * The above copyright notice and this permission notice shall be included in 
    * all copies or substantial portions of the Software. 
    * 
    * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
    * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
    * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
    * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
    * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
    * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 
    * THE SOFTWARE. 
    */ 

/** 
    * @file 
    * API example for demuxing, decoding, filtering, encoding and muxing 
    * @example doc/examples/transcoding.c 
    */ 

#include <libavcodec/avcodec.h> 
#include <libavformat/avformat.h> 
#include <libavfilter/avfiltergraph.h> 
#include <libavfilter/avcodec.h> 
#include <libavfilter/buffersink.h> 
#include <libavfilter/buffersrc.h> 
#include <libavutil/opt.h> 
#include <libavutil/pixdesc.h> 

#define STATS_LOG "stats.log" 

static AVFormatContext *ifmt_ctx; 
static AVFormatContext *ofmt_ctx; 
typedef struct FilteringContext { 
    AVFilterContext *buffersink_ctx; 
    AVFilterContext *buffersrc_ctx; 
    AVFilterGraph *filter_graph; 
} FilteringContext; 
static FilteringContext *filter_ctx; 

static int open_input_file(const char *filename) { 
    int ret; 
    unsigned int i; 

    ifmt_ctx = NULL; 
    if ((ret = avformat_open_input(&ifmt_ctx, filename, NULL, NULL)) < 0) { 
av_log(NULL, AV_LOG_ERROR, "Cannot open input file\n"); 
return ret; 
    } 

    if ((ret = avformat_find_stream_info(ifmt_ctx, NULL)) < 0) { 
av_log(NULL, AV_LOG_ERROR, "Cannot find stream information\n"); 
return ret; 
    } 

    for (i = 0; i < ifmt_ctx->nb_streams; i++) { 
AVStream *stream; 
AVCodecContext *codec_ctx; 
stream = ifmt_ctx->streams[i]; 
codec_ctx = stream->codec; 
/* Reencode video & audio and remux subtitles etc. */ 
if (codec_ctx->codec_type == AVMEDIA_TYPE_VIDEO 
    || codec_ctx->codec_type == AVMEDIA_TYPE_AUDIO) { 
    /* Open decoder */ 
    ret = avcodec_open2(codec_ctx, 
       avcodec_find_decoder(codec_ctx->codec_id), NULL); 
    if (ret < 0) { 
    av_log(NULL, AV_LOG_ERROR, "Failed to open decoder for stream #%u\n", i); 
    return ret; 
    } 
} 
    } 

    av_dump_format(ifmt_ctx, 0, filename, 0); 
    return 0; 
} 

static int init_output_context(char* filename) { 
    int ret; 
    ofmt_ctx = NULL; 

    avformat_alloc_output_context2(&ofmt_ctx, NULL, NULL, filename); 
    if (!ofmt_ctx) { 
av_log(NULL, AV_LOG_ERROR, "Could not create output context\n"); 
return AVERROR_UNKNOWN; 
    } 

    return 0; 
} 

static int init_webm_encoders(int audioBitRate, int crf, int videoMaxBitRate, int threads, 
       char* quality, int speed, int pass, char* stats) { 
    AVStream *out_stream; 
    AVStream *in_stream; 
    AVCodecContext *dec_ctx, *enc_ctx; 
    AVCodec *encoder; 
    int ret; 
    unsigned int i; 

    for (i = 0; i < ifmt_ctx->nb_streams; i++) { 
in_stream = ifmt_ctx->streams[i]; 
dec_ctx = in_stream->codec; 
if (dec_ctx->codec_type == AVMEDIA_TYPE_VIDEO || dec_ctx->codec_type == AVMEDIA_TYPE_AUDIO) { 

    AVDictionary *opts = NULL; 
    if (dec_ctx->codec_type == AVMEDIA_TYPE_VIDEO) { 
    encoder = avcodec_find_encoder(AV_CODEC_ID_VP8); 
    out_stream = avformat_new_stream(ofmt_ctx, encoder); 
    if (!out_stream) { 
     av_log(NULL, AV_LOG_ERROR, "Failed allocating output stream\n"); 
     return AVERROR_UNKNOWN; 
    } 

    enc_ctx = out_stream->codec; 
    enc_ctx->height = dec_ctx->height; 
    enc_ctx->width = dec_ctx->width; 
    enc_ctx->sample_aspect_ratio = dec_ctx->sample_aspect_ratio; 
    /* take first format from list of supported formats */ 
    enc_ctx->pix_fmt = encoder->pix_fmts[0]; 
    /* video time_base can be set to whatever is handy and supported by encoder */ 
    enc_ctx->time_base = dec_ctx->time_base; 
    /* enc_ctx->time_base.num = 1; */ 
    /* enc_ctx->time_base.den = 24; */ 

    enc_ctx->bit_rate = videoMaxBitRate; 
    enc_ctx->thread_count = threads; 
    switch (pass) { 
    case 1: 
     enc_ctx->flags |= CODEC_FLAG_PASS1; 
     break; 
    case 2: 
     enc_ctx->flags |= CODEC_FLAG_PASS2; 
     if (stats) { 
    enc_ctx->stats_in = stats; 
     } 
     break; 
    } 

    char crfString[3]; 
    snprintf(crfString, 3, "%d", crf); 
    av_dict_set(&opts, "crf", crfString, 0); 
    av_dict_set(&opts, "quality", quality, 0); 
    char speedString[3]; 
    snprintf(speedString, 3, "%d", speed); 
    av_dict_set(&opts, "speed", speedString, 0); 
    } else { 
    encoder = avcodec_find_encoder(AV_CODEC_ID_VORBIS); 
    out_stream = avformat_new_stream(ofmt_ctx, encoder); 
    if (!out_stream) { 
     av_log(NULL, AV_LOG_ERROR, "Failed allocating output stream\n"); 
     return AVERROR_UNKNOWN; 
    } 

    /* in_stream = ifmt_ctx->streams[i]; */ 
    /* dec_ctx = in_stream->codec; */ 
    enc_ctx = out_stream->codec; 
    /* encoder = out_stream->codec->codec; */ 

    enc_ctx->sample_rate = dec_ctx->sample_rate; 
    enc_ctx->channel_layout = dec_ctx->channel_layout; 
    enc_ctx->channels = av_get_channel_layout_nb_channels(enc_ctx->channel_layout); 
    /* take first format from list of supported formats */ 
    enc_ctx->sample_fmt = encoder->sample_fmts[0]; 
    enc_ctx->time_base = (AVRational){1, enc_ctx->sample_rate}; 
    enc_ctx->bit_rate = audioBitRate; 
    } 

    /* Open codec with the set options */ 
    ret = avcodec_open2(enc_ctx, encoder, &opts); 
    if (ret < 0) { 
    av_log(NULL, AV_LOG_ERROR, "Cannot open video encoder for stream #%u\n", i); 
    return ret; 
    } 
    int unused = av_dict_count(opts); 
    if (unused > 0) { 
    av_log(NULL, AV_LOG_WARNING, "%d unused options\n", unused); 
    } 
    /* } else if (dec_ctx->codec_type == AVMEDIA_TYPE_UNKNOWN) { */ 
} else { 
    av_log(NULL, AV_LOG_FATAL, "Elementary stream #%d is of unknown type, cannot proceed\n", i); 
    return AVERROR_INVALIDDATA; 
} /* else { */ 
    /* /\* if this stream must be remuxed *\/ */ 
    /* ret = avcodec_copy_context(ofmt_ctx->streams[i]->codec, */ 
    /*    ifmt_ctx->streams[i]->codec); */ 
    /* if (ret < 0) { */ 
    /* av_log(NULL, AV_LOG_ERROR, "Copying stream context failed\n"); */ 
    /* return ret; */ 
    /* } */ 
    /* } */ 

if (ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER) 
    enc_ctx->flags |= CODEC_FLAG_GLOBAL_HEADER; 
    } 

    return 0; 
} 

static int open_output_file(const char *filename) { 
    int ret; 

    av_dump_format(ofmt_ctx, 0, filename, 1); 

    if (!(ofmt_ctx->oformat->flags & AVFMT_NOFILE)) { 
ret = avio_open(&ofmt_ctx->pb, filename, AVIO_FLAG_WRITE); 
if (ret < 0) { 
    av_log(NULL, AV_LOG_ERROR, "Could not open output file '%s'", filename); 
    return ret; 
} 
    } 

    /* init muxer, write output file header */ 
    ret = avformat_write_header(ofmt_ctx, NULL); 
    if (ret < 0) { 
av_log(NULL, AV_LOG_ERROR, "Error occurred when opening output file\n"); 
return ret; 
    } 

    return 0; 
} 

static int init_filter(FilteringContext* fctx, AVCodecContext *dec_ctx, 
      AVCodecContext *enc_ctx, const char *filter_spec) { 
    char args[512]; 
    int ret = 0; 
    AVFilter *buffersrc = NULL; 
    AVFilter *buffersink = NULL; 
    AVFilterContext *buffersrc_ctx = NULL; 
    AVFilterContext *buffersink_ctx = NULL; 
    AVFilterInOut *outputs = avfilter_inout_alloc(); 
    AVFilterInOut *inputs = avfilter_inout_alloc(); 
    AVFilterGraph *filter_graph = avfilter_graph_alloc(); 

    if (!outputs || !inputs || !filter_graph) { 
ret = AVERROR(ENOMEM); 
goto end; 
    } 

    if (dec_ctx->codec_type == AVMEDIA_TYPE_VIDEO) { 
buffersrc = avfilter_get_by_name("buffer"); 
buffersink = avfilter_get_by_name("buffersink"); 
if (!buffersrc || !buffersink) { 
    av_log(NULL, AV_LOG_ERROR, "filtering source or sink element not found\n"); 
    ret = AVERROR_UNKNOWN; 
    goto end; 
} 

snprintf(args, sizeof(args), 
     "video_size=%dx%d:pix_fmt=%d:time_base=%d/%d:pixel_aspect=%d/%d", 
     dec_ctx->width, dec_ctx->height, dec_ctx->pix_fmt, 
     dec_ctx->time_base.num, dec_ctx->time_base.den, 
     dec_ctx->sample_aspect_ratio.num, 
     dec_ctx->sample_aspect_ratio.den); 

ret = avfilter_graph_create_filter(&buffersrc_ctx, buffersrc, "in", 
        args, NULL, filter_graph); 
if (ret < 0) { 
    av_log(NULL, AV_LOG_ERROR, "Cannot create buffer source\n"); 
    goto end; 
} 

ret = avfilter_graph_create_filter(&buffersink_ctx, buffersink, "out", 
        NULL, NULL, filter_graph); 
if (ret < 0) { 
    av_log(NULL, AV_LOG_ERROR, "Cannot create buffer sink\n"); 
    goto end; 
} 

ret = av_opt_set_bin(buffersink_ctx, "pix_fmts", 
       (uint8_t*)&enc_ctx->pix_fmt, sizeof(enc_ctx->pix_fmt), 
       AV_OPT_SEARCH_CHILDREN); 
if (ret < 0) { 
    av_log(NULL, AV_LOG_ERROR, "Cannot set output pixel format\n"); 
    goto end; 
} 
    } else if (dec_ctx->codec_type == AVMEDIA_TYPE_AUDIO) { 
buffersrc = avfilter_get_by_name("abuffer"); 
buffersink = avfilter_get_by_name("abuffersink"); 
if (!buffersrc || !buffersink) { 
    av_log(NULL, AV_LOG_ERROR, "filtering source or sink element not found\n"); 
    ret = AVERROR_UNKNOWN; 
    goto end; 
} 

if (!dec_ctx->channel_layout) 
    dec_ctx->channel_layout = 
    av_get_default_channel_layout(dec_ctx->channels); 
snprintf(args, sizeof(args), 
     "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%"PRIx64, 
     dec_ctx->time_base.num, dec_ctx->time_base.den, dec_ctx->sample_rate, 
     av_get_sample_fmt_name(dec_ctx->sample_fmt), 
     dec_ctx->channel_layout); 
ret = avfilter_graph_create_filter(&buffersrc_ctx, buffersrc, "in", 
        args, NULL, filter_graph); 
if (ret < 0) { 
    av_log(NULL, AV_LOG_ERROR, "Cannot create audio buffer source\n"); 
    goto end; 
} 

ret = avfilter_graph_create_filter(&buffersink_ctx, buffersink, "out", 
        NULL, NULL, filter_graph); 
if (ret < 0) { 
    av_log(NULL, AV_LOG_ERROR, "Cannot create audio buffer sink\n"); 
    goto end; 
} 

ret = av_opt_set_bin(buffersink_ctx, "sample_fmts", 
       (uint8_t*)&enc_ctx->sample_fmt, sizeof(enc_ctx->sample_fmt), 
       AV_OPT_SEARCH_CHILDREN); 
if (ret < 0) { 
    av_log(NULL, AV_LOG_ERROR, "Cannot set output sample format\n"); 
    goto end; 
} 

ret = av_opt_set_bin(buffersink_ctx, "channel_layouts", 
       (uint8_t*)&enc_ctx->channel_layout, 
       sizeof(enc_ctx->channel_layout), AV_OPT_SEARCH_CHILDREN); 
if (ret < 0) { 
    av_log(NULL, AV_LOG_ERROR, "Cannot set output channel layout\n"); 
    goto end; 
} 

ret = av_opt_set_bin(buffersink_ctx, "sample_rates", 
       (uint8_t*)&enc_ctx->sample_rate, sizeof(enc_ctx->sample_rate), 
       AV_OPT_SEARCH_CHILDREN); 
if (ret < 0) { 
    av_log(NULL, AV_LOG_ERROR, "Cannot set output sample rate\n"); 
    goto end; 
} 
    } else { 
ret = AVERROR_UNKNOWN; 
goto end; 
    } 

    /* Endpoints for the filter graph. */ 
    outputs->name  = av_strdup("in"); 
    outputs->filter_ctx = buffersrc_ctx; 
    outputs->pad_idx = 0; 
    outputs->next  = NULL; 

    inputs->name  = av_strdup("out"); 
    inputs->filter_ctx = buffersink_ctx; 
    inputs->pad_idx = 0; 
    inputs->next  = NULL; 

    if (!outputs->name || !inputs->name) { 
ret = AVERROR(ENOMEM); 
goto end; 
    } 

    if ((ret = avfilter_graph_parse_ptr(filter_graph, filter_spec, 
        &inputs, &outputs, NULL)) < 0) 
goto end; 

    if ((ret = avfilter_graph_config(filter_graph, NULL)) < 0) 
goto end; 

    /* Fill FilteringContext */ 
    fctx->buffersrc_ctx = buffersrc_ctx; 
    fctx->buffersink_ctx = buffersink_ctx; 
    fctx->filter_graph = filter_graph; 

    end: 
    avfilter_inout_free(&inputs); 
    avfilter_inout_free(&outputs); 

    return ret; 
} 

static int init_filters(enum AVCodecID audioCodec) { 
    const char *filter_spec; 
    unsigned int i; 
    int ret; 
    filter_ctx = av_malloc_array(ifmt_ctx->nb_streams, sizeof(*filter_ctx)); 
    if (!filter_ctx) 
return AVERROR(ENOMEM); 

    for (i = 0; i < ifmt_ctx->nb_streams; i++) { 
filter_ctx[i].buffersrc_ctx = NULL; 
filter_ctx[i].buffersink_ctx = NULL; 
filter_ctx[i].filter_graph = NULL; 
/* Skip streams that are neither audio nor video */ 
if (!(ifmt_ctx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO 
     || ifmt_ctx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO)) 
    continue; 


if (ifmt_ctx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO) 
    filter_spec = "null"; /* passthrough (dummy) filter for video */ 
else 
    /* TODO: make this more general */ 
    if (audioCodec == AV_CODEC_ID_VORBIS) { 
    filter_spec = "asetnsamples=n=64"; 
    } else { 
    /* filter_spec = "null"; /\* passthrough (dummy) filter for audio *\/ */ 
    filter_spec = "fps=24"; 
    /* filter_spec = "settb=expr=1/24"; */ 
    } 
ret = init_filter(&filter_ctx[i], ifmt_ctx->streams[i]->codec, 
      ofmt_ctx->streams[i]->codec, filter_spec); 
if (ret) 
    return ret; 
    } 
    return 0; 
} 

static int encode_write_frame(AVFrame *filt_frame, unsigned int stream_index, int *got_frame) { 
    int ret; 
    int got_frame_local; 
    AVPacket enc_pkt; 
    int (*enc_func)(AVCodecContext *, AVPacket *, const AVFrame *, int *) = 
(ifmt_ctx->streams[stream_index]->codec->codec_type == 
    AVMEDIA_TYPE_VIDEO) ? avcodec_encode_video2 : avcodec_encode_audio2; 

    if (!got_frame) 
got_frame = &got_frame_local; 

    /* av_log(NULL, AV_LOG_INFO, "Encoding frame\n"); */ 
    /* encode filtered frame */ 
    enc_pkt.data = NULL; 
    enc_pkt.size = 0; 
    av_init_packet(&enc_pkt); 
    ret = enc_func(ofmt_ctx->streams[stream_index]->codec, &enc_pkt, 
      filt_frame, got_frame); 
    av_frame_free(&filt_frame); 
    if (ret < 0) 
return ret; 
    if (!(*got_frame)) 
return 0; 

    /* prepare packet for muxing */ 
    enc_pkt.stream_index = stream_index; 
    enc_pkt.dts = av_rescale_q_rnd(enc_pkt.dts, 
        ofmt_ctx->streams[stream_index]->codec->time_base, 
        ofmt_ctx->streams[stream_index]->time_base, 
        AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX); 
    enc_pkt.pts = av_rescale_q_rnd(enc_pkt.pts, 
        ofmt_ctx->streams[stream_index]->codec->time_base, 
        ofmt_ctx->streams[stream_index]->time_base, 
        AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX); 
    enc_pkt.duration = av_rescale_q(enc_pkt.duration, 
        ofmt_ctx->streams[stream_index]->codec->time_base, 
        ofmt_ctx->streams[stream_index]->time_base); 

    /* av_log(NULL, AV_LOG_DEBUG, "Muxing frame\n"); */ 
    /* mux encoded frame */ 
    ret = av_interleaved_write_frame(ofmt_ctx, &enc_pkt); 
    return ret; 
} 

static int filter_encode_write_frame(AVFrame *frame, unsigned int stream_index) { 
    int ret; 
    AVFrame *filt_frame; 

    /* av_log(NULL, AV_LOG_INFO, "Pushing decoded frame to filters\n"); */ 
    /* push the decoded frame into the filtergraph */ 
    ret = av_buffersrc_add_frame_flags(filter_ctx[stream_index].buffersrc_ctx, 
        frame, 0); 
    if (ret < 0) { 
av_log(NULL, AV_LOG_ERROR, "Error while feeding the filtergraph\n"); 
return ret; 
    } 

    /* pull filtered frames from the filtergraph */ 
    while (1) { 
filt_frame = av_frame_alloc(); 
if (!filt_frame) { 
    ret = AVERROR(ENOMEM); 
    break; 
} 
/* av_log(NULL, AV_LOG_INFO, "Pulling filtered frame from filters\n"); */ 
ret = av_buffersink_get_frame(filter_ctx[stream_index].buffersink_ctx, 
        filt_frame); 
if (ret < 0) { 
    /* if no more frames for output - returns AVERROR(EAGAIN) 
    * if flushed and no more frames for output - returns AVERROR_EOF 
    * rewrite retcode to 0 to show it as normal procedure completion 
    */ 
    if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) 
    ret = 0; 
    av_frame_free(&filt_frame); 
    break; 
} 

filt_frame->pict_type = AV_PICTURE_TYPE_NONE; 
ret = encode_write_frame(filt_frame, stream_index, NULL); 
if (ret < 0) 
    break; 
    } 

    return ret; 
} 

static int flush_encoder(unsigned int stream_index) { 
    int ret; 
    int got_frame; 

    if (!(ofmt_ctx->streams[stream_index]->codec->codec->capabilities & 
    CODEC_CAP_DELAY)) 
return 0; 

    while (1) { 
av_log(NULL, AV_LOG_INFO, "Flushing stream #%u encoder\n", stream_index); 
ret = encode_write_frame(NULL, stream_index, &got_frame); 
if (ret < 0) 
    break; 
if (!got_frame) 
    return 0; 
    } 
    return ret; 
} 

static int transcode() { 
    int ret; 
    AVPacket packet = { .data = NULL, .size = 0 }; 
    AVFrame *frame = NULL; 
    enum AVMediaType type; 
    unsigned int stream_index; 
    unsigned int i; 
    int got_frame; 
    int (*dec_func)(AVCodecContext *, AVFrame *, int *, const AVPacket *); 

    /* read all packets */ 
    while (1) { 
if ((ret = av_read_frame(ifmt_ctx, &packet)) < 0) 
    break; 
stream_index = packet.stream_index; 
type = ifmt_ctx->streams[packet.stream_index]->codec->codec_type; 
av_log(NULL, AV_LOG_DEBUG, "Demuxer gave frame of stream_index %u\n", 
    stream_index); 

if (filter_ctx[stream_index].filter_graph) { 
    av_log(NULL, AV_LOG_DEBUG, "Going to reencode&filter the frame\n"); 
    frame = av_frame_alloc(); 
    if (!frame) { 
    ret = AVERROR(ENOMEM); 
    break; 
    } 
    packet.dts = av_rescale_q_rnd(packet.dts, 
       ifmt_ctx->streams[stream_index]->time_base, 
       ifmt_ctx->streams[stream_index]->codec->time_base, 
       AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX); 
    packet.pts = av_rescale_q_rnd(packet.pts, 
       ifmt_ctx->streams[stream_index]->time_base, 
       ifmt_ctx->streams[stream_index]->codec->time_base, 
       AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX); 
    dec_func = (type == AVMEDIA_TYPE_VIDEO) ? avcodec_decode_video2 : 
    avcodec_decode_audio4; 
    ret = dec_func(ifmt_ctx->streams[stream_index]->codec, frame, 
      &got_frame, &packet); 
    if (ret < 0) { 
    av_frame_free(&frame); 
    av_log(NULL, AV_LOG_ERROR, "Decoding failed\n"); 
    break; 
    } 

    if (got_frame) { 
    frame->pts = av_frame_get_best_effort_timestamp(frame); 
    ret = filter_encode_write_frame(frame, stream_index); 
    av_frame_free(&frame); 
    if (ret < 0) 
     goto end; 
    } else { 
    av_frame_free(&frame); 
    } 
} else { 
    /* remux this frame without reencoding */ 
    packet.dts = av_rescale_q_rnd(packet.dts, 
       ifmt_ctx->streams[stream_index]->time_base, 
       ofmt_ctx->streams[stream_index]->time_base, 
       AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX); 
    packet.pts = av_rescale_q_rnd(packet.pts, 
       ifmt_ctx->streams[stream_index]->time_base, 
       ofmt_ctx->streams[stream_index]->time_base, 
       AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX); 

    ret = av_interleaved_write_frame(ofmt_ctx, &packet); 
    if (ret < 0) 
    goto end; 
} 
av_free_packet(&packet); 
    } 

    /* flush filters and encoders */ 
    for (i = 0; i < ifmt_ctx->nb_streams; i++) { 
/* flush filter */ 
if (!filter_ctx[i].filter_graph) 
    continue; 
ret = filter_encode_write_frame(NULL, i); 
if (ret < 0) { 
    av_log(NULL, AV_LOG_ERROR, "Flushing filter failed\n"); 
    goto end; 
} 

/* flush encoder */ 
ret = flush_encoder(i); 
if (ret < 0) { 
    av_log(NULL, AV_LOG_ERROR, "Flushing encoder failed\n"); 
    goto end; 
} 
    } 

    av_write_trailer(ofmt_ctx); 

    // Retrieve and store the first instance of codec statistics 
    // TODO: less naive, deal with multiple instances of statistics 
    for (i = 0; i < ofmt_ctx->nb_streams; i++) { 
AVCodecContext* codec = ofmt_ctx->streams[i]->codec; 
if ((codec->flags & CODEC_FLAG_PASS1) && (codec->stats_out)){ 
    FILE* logfile = fopen(STATS_LOG, "wb"); 
    fprintf(logfile, "%s", codec->stats_out); 
    if (fclose(logfile) < 0) { 
    av_log(NULL, AV_LOG_ERROR, "Error closing log file.\n"); 
    } 
    break; 
} 
    } 

    av_log(NULL, AV_LOG_INFO, "output duration = %" PRId64 "\n", ofmt_ctx->duration); 

    end: 
    av_free_packet(&packet); 
    av_frame_free(&frame); 
    for (i = 0; i < ifmt_ctx->nb_streams; i++) { 
avcodec_close(ifmt_ctx->streams[i]->codec); 
if (ofmt_ctx && ofmt_ctx->nb_streams > i && ofmt_ctx->streams[i] && ofmt_ctx->streams[i]->codec) 
    avcodec_close(ofmt_ctx->streams[i]->codec); 
if (filter_ctx && filter_ctx[i].filter_graph) 
    avfilter_graph_free(&filter_ctx[i].filter_graph); 
    } 
    av_free(filter_ctx); 
    avformat_close_input(&ifmt_ctx); 
    if (ofmt_ctx && !(ofmt_ctx->oformat->flags & AVFMT_NOFILE)) 
avio_close(ofmt_ctx->pb); 
    avformat_free_context(ofmt_ctx); 

    if (ret < 0) 
av_log(NULL, AV_LOG_ERROR, "Error occurred: %s\n", av_err2str(ret)); 

    return ret ? 1 : 0; 
} 

int TranscodeToWebM(char* inputPath, char* outputPath, int audioBitRate, int crf, int videoMaxBitRate, int threads, 
     char* quality, int speed) { 
    int ret; 
    unsigned int pass; 
    char* stats = NULL; 

    av_register_all(); 
    avfilter_register_all(); 

    for (pass = 1; pass <= 2; pass++) { 
if ((ret = open_input_file(inputPath)) < 0) 
    goto end; 

if ((ret = init_output_context(outputPath)) < 0) 
    goto end; 

if (pass == 2) { 
    size_t stats_length; 
    if (cmdutils_read_file(STATS_LOG, &stats, &stats_length) < 0) { 
    av_log(NULL, AV_LOG_ERROR, "Error reading stats file.\n"); 
    break; 
    } 
} 

if ((ret = init_webm_encoders(audioBitRate, crf, videoMaxBitRate, threads, quality, speed, pass, stats)) < 0) 
    goto end; 

if ((ret = open_output_file(outputPath)) < 0) 
    goto end; 

if ((ret = init_filters(AV_CODEC_ID_VORBIS)) < 0) 
    goto end; 

if ((ret = transcode()) < 0) 
    goto end; 
    } 

    if (remove(STATS_LOG) != 0) { 
av_log(NULL, AV_LOG_ERROR, "Failed to remove %s\n", STATS_LOG); 
    } 

    end: 
    if (ret < 0) { 
av_log(NULL, AV_LOG_ERROR, "Error occurred: %s\n", av_err2str(ret)); 
return ret; 
    } 

    return 0; 
} 

這是我試圖模仿的ffmpeg命令的輸出。

ffmpeg version N-62301-g59a5384 Copyright (c) 2000-2014 the FFmpeg developers 
    built on Apr 9 2014 09:58:44 with gcc 4.8.2 (GCC) 20140206 (prerelease) 
    configuration: --prefix=/opt/ffmpeg --extra-cflags=-I/opt/x264/include --extra-ldflags=-L/opt/x264/lib --extra-libs=-ldl --enable-gpl --enable-nonfree --enable-libfdk-aac --enable-libopus --enable-libvorbis --enable-libvpx --enable-libx264 
    libavutil  52. 75.100/52. 75.100 
    libavcodec  55. 58.103/55. 58.103 
    libavformat 55. 36.102/55. 36.102 
    libavdevice 55. 11.100/55. 11.100 
    libavfilter  4. 3.100/4. 3.100 
    libswscale  2. 6.100/2. 6.100 
    libswresample 0. 18.100/0. 18.100 
    libpostproc 52. 3.100/52. 3.100 
Input #0, matroska,webm, from '/mnt/scratch/test_source/Sintel.2010.720p.mkv': 
    Metadata: 
encoder   : libebml v1.0.0 + libmatroska v1.0.0 
creation_time : 2011-04-24 17:20:33 
    Duration: 00:14:48.03, start: 0.000000, bitrate: 6071 kb/s 
Chapter #0.0: start 0.000000, end 103.125000 
Metadata: 
    title   : Chapter 01 
Chapter #0.1: start 103.125000, end 148.667000 
Metadata: 
    title   : Chapter 02 
Chapter #0.2: start 148.667000, end 349.792000 
Metadata: 
    title   : Chapter 03 
Chapter #0.3: start 349.792000, end 437.208000 
Metadata: 
    title   : Chapter 04 
Chapter #0.4: start 437.208000, end 472.075000 
Metadata: 
    title   : Chapter 05 
Chapter #0.5: start 472.075000, end 678.833000 
Metadata: 
    title   : Chapter 06 
Chapter #0.6: start 678.833000, end 744.083000 
Metadata: 
    title   : Chapter 07 
Chapter #0.7: start 744.083000, end 888.032000 
Metadata: 
    title   : Chapter 08 
Stream #0:0(eng): Video: h264 (High), yuv420p(tv, bt709), 1280x544, SAR 1:1 DAR 40:17, 24 fps, 24 tbr, 1k tbn, 48 tbc 
Stream #0:1(eng): Audio: ac3, 48000 Hz, 5.1(side), fltp, 640 kb/s 
Metadata: 
    title   : AC3 5.1 @ 640 Kbps 
Stream #0:2(ger): Subtitle: subrip 
Stream #0:3(eng): Subtitle: subrip 
Stream #0:4(spa): Subtitle: subrip 
Stream #0:5(fre): Subtitle: subrip 
Stream #0:6(ita): Subtitle: subrip 
Stream #0:7(dut): Subtitle: subrip 
Stream #0:8(pol): Subtitle: subrip 
Stream #0:9(por): Subtitle: subrip 
Stream #0:10(rus): Subtitle: subrip 
Stream #0:11(vie): Subtitle: subrip 
[libvpx @ 0x24b74c0] v1.3.0 
Output #0, webm, to '/mnt/scratch/test_out/Sintel.2010.720p.script.webm': 
    Metadata: 
encoder   : Lavf55.36.102 
Chapter #0.0: start 0.000000, end 103.125000 
Metadata: 
    title   : Chapter 01 
Chapter #0.1: start 103.125000, end 148.667000 
Metadata: 
    title   : Chapter 02 
Chapter #0.2: start 148.667000, end 349.792000 
Metadata: 
    title   : Chapter 03 
Chapter #0.3: start 349.792000, end 437.208000 
Metadata: 
    title   : Chapter 04 
Chapter #0.4: start 437.208000, end 472.075000 
Metadata: 
    title   : Chapter 05 
Chapter #0.5: start 472.075000, end 678.833000 
Metadata: 
    title   : Chapter 06 
Chapter #0.6: start 678.833000, end 744.083000 
Metadata: 
    title   : Chapter 07 
Chapter #0.7: start 744.083000, end 888.032000 
Metadata: 
    title   : Chapter 08 
Stream #0:0(eng): Video: vp8 (libvpx), yuv420p, 1280x544 [SAR 1:1 DAR 40:17], q=-1--1, pass 2, 60000 kb/s, 1k tbn, 24 tbc 
Stream #0:1(eng): Audio: vorbis (libvorbis), 48000 Hz, 5.1(side), fltp, 384 kb/s 
Metadata: 
    title   : AC3 5.1 @ 640 Kbps 
Stream mapping: 
    Stream #0:0 -> #0:0 (h264 -> libvpx) 
    Stream #0:1 -> #0:1 (ac3 -> libvorbis) 
Press [q] to stop, [?] for help 
frame=21312 fps= 11 q=0.0 Lsize= 567191kB time=00:14:48.01 bitrate=5232.4kbits/s  
video:537377kB audio:29266kB subtitle:0kB other streams:0kB global headers:7kB muxing overhead: 0.096885% 

回答

1

看來,你的問題是你需要你的輸入格式PTS/DTS和輸出格式PTS/DTS之間av_rescale_q轉碼功能。

不要相信在任意數字中放置任意數字,通過從上下文和編解碼器中獲得它,繼續做你正在做的事情。

我寫了bit about PTS/DTS here它可能對您有用。在你的情況下,儘管你已經有了「正確」的PTS/DTS,你仍然需要轉換成你的輸出PTS/DTS。

+0

編碼數據包的PTS和DTS增加了2。這個和減半的時間都是由於解碼AVCodecContext的ticks_per_frame字段爲2而引起的。該字段的文檔很有啓發性。分解來自PTS和DTS的ticks_per_frame可以更正持續時間。在time_base中對它進行計算可以更正幀速率。 –