C# - ffmpeg(FFmpeg.AutoGen)를 이용한 muxing.c 예제 포팅

지난 예제에 이어,

C# - ffmpeg(FFmpeg.AutoGen)를 이용한 avio_reading.c 예제 포팅

이번에는 ffmpeg 예제 중 "muxing.c" 파일을 FFmpeg.AutoGen으로 포팅하겠습니다.

using FFmpeg.AutoGen;
using FFmpeg.AutoGen.Example;
using System;
using System.Diagnostics;

namespace ffmpeg_pgm_solve
    internal unsafe class Program
        const float STREAM_DURATION = 10.0f;
        const int STREAM_FRAME_RATE = 25;
        const AVPixelFormat STREAM_PIX_FMT = AVPixelFormat.AV_PIX_FMT_YUV420P;
        const int SCALE_FLAGS = ffmpeg.SWS_BICUBIC;

        static int Main(string[] args)
            Console.WriteLine("Current directory: " + Environment.CurrentDirectory);
            Console.WriteLine("Running in {0}-bit mode.", Environment.Is64BitProcess ? "64" : "32");
            Console.WriteLine($"FFmpeg version info: {ffmpeg.av_version_info()}");
            Console.WriteLine($"LIBAVFORMAT Version: {ffmpeg.LIBAVFORMAT_VERSION_MAJOR}.{ffmpeg.LIBAVFORMAT_VERSION_MINOR}");
            OutputStream video_st = new OutputStream();
            OutputStream audio_st = new OutputStream();

            AVOutputFormat* fmt;
            AVFormatContext* oc;
            AVCodec* audio_codec = null;
            AVCodec* video_codec = null;
            int ret;
            int have_video = 0;
            int have_audio = 0;
            bool encode_video = false, encode_audio = false;
            AVDictionary* opt = null;
            int i;

            string filename = @"C:\temp\output\test.mp4";

            ffmpeg.avformat_alloc_output_context2(&oc, null, null, filename);
            if (oc == null)
                Console.WriteLine("Could not deduce output format from file extension: using MPEG.");
                ffmpeg.avformat_alloc_output_context2(&oc, null, "mpeg", filename);

            if (oc == null)
                return 1;

            fmt = oc->oformat;

            if (fmt->video_codec != AVCodecID.AV_CODEC_ID_NONE)
                add_stream(&video_st, oc, &video_codec, fmt->video_codec);
                have_video = 1;
                encode_video = true;
            if (fmt->audio_codec != AVCodecID.AV_CODEC_ID_NONE)
                add_stream(&audio_st, oc, &audio_codec, fmt->audio_codec);
                have_audio = 1;
                encode_audio = true;

            if (have_video == 1)
                open_video(oc, video_codec, &video_st, opt);

            if (have_audio == 1)
                open_audio(oc, audio_codec, &audio_st, opt);

            ffmpeg.av_dump_format(oc, 0, filename, 1);

            if ((fmt->flags & ffmpeg.AVFMT_NOFILE) == 0)
                ret = ffmpeg.avio_open(&oc->pb, filename, ffmpeg.AVIO_FLAG_WRITE);
                if (ret < 0)
                    Console.WriteLine($"Could not open '{filename}': {FFmpegHelper.av_strerror(ret)}");
                    return 1;

            ret = ffmpeg.avformat_write_header(oc, &opt);
            if (ret < 0)
                Console.WriteLine($"Error occurred when opening output file: {FFmpegHelper.av_strerror(ret)}");
                return 1;

            while (encode_video || encode_audio)
                if (encode_video && (!encode_audio || ffmpeg.av_compare_ts(video_st.next_pts, video_st.enc->time_base, audio_st.next_pts, audio_st.enc->time_base) <= 0))
                    encode_video = write_video_frame(oc, &video_st) == 0;
                } else
                    encode_audio = write_audio_frame(oc, &audio_st) == 0;


            if (have_video == 1)
                close_stream(oc, &video_st);

            if (have_audio == 1)
                close_stream(oc, &audio_st);

            if ((fmt->flags & ffmpeg.AVFMT_NOFILE) == 0)


            return 0;

        public unsafe static void log_packet(AVFormatContext* fmt_ctx, AVPacket* pkt)
            AVRational* time_base = &(fmt_ctx->streams[pkt->stream_index]->time_base);

            Console.WriteLine($"pts:{pkt->pts,9:0,0} pts_time: {av_q2d(pkt->pts, time_base),10:#0.000000} dts: {pkt->dts,9:0,0} dts_time: {av_q2d(pkt->dts, time_base),10:#0.000000} duration: {pkt->duration,5:0,0} duration_time: {av_q2d(pkt->duration, time_base):0.000000} stream_index: {pkt->stream_index}");

        public static double av_q2d(long ts, AVRational* ar)
            return (ar->num / (double)ar->den) * ts;

        public static int write_frame(AVFormatContext* fmt_ctx, AVCodecContext* c, AVStream* st, AVFrame* frame, AVPacket* pkt)
            int ret = ffmpeg.avcodec_send_frame(c, frame);
            if (ret < 0)
                Console.WriteLine($"Error sending a frame to the encoder: {ret}");

            while (ret >= 0)
                ret = ffmpeg.avcodec_receive_packet(c, pkt);
                if (ret == ffmpeg.AVERROR(ffmpeg.EAGAIN) || ret == ffmpeg.AVERROR_EOF)
                else if (ret < 0)
                    Console.WriteLine("Error encoding a frame: {ret}");

                ffmpeg.av_packet_rescale_ts(pkt, c->time_base, st->time_base);
                pkt->stream_index = st->index;

                log_packet(fmt_ctx, pkt);
                ret = ffmpeg.av_interleaved_write_frame(fmt_ctx, pkt);

                if (ret < 0)
                    Console.WriteLine("Error while writing output packet: {ret}");


            return ret == ffmpeg.AVERROR_EOF ? 1 : 0;

        public unsafe static void add_stream(OutputStream* ost, AVFormatContext* oc, AVCodec** codec, AVCodecID codec_id)
            AVCodecContext* c;
            int i;

            *codec = ffmpeg.avcodec_find_encoder(codec_id);
            if (*codec == null)
                throw new ApplicationException($"Could not find encoder for '{ffmpeg.avcodec_get_name(codec_id)}'");

            ost->tmp_pkt = ffmpeg.av_packet_alloc();
            if (ost->tmp_pkt == null)
                throw new ApplicationException("Could not allocate AVPacket");

            ost->st = ffmpeg.avformat_new_stream(oc, null);
            if (ost->st == null)
                throw new ApplicationException("Could not allocate stream");

            ost->st->id = (int)(oc->nb_streams - 1);
            c = ffmpeg.avcodec_alloc_context3(*codec);
            if (c == null)
                throw new ApplicationException("Could not alloc an encoding context");

            ost->enc = c;

            switch ((*codec)->type)
                case AVMediaType.AVMEDIA_TYPE_AUDIO:
                    c->sample_fmt = (*codec)->sample_fmts != null ? (*codec)->sample_fmts[0] : AVSampleFormat.AV_SAMPLE_FMT_FLTP;
                    c->bit_rate = 64000;
                    c->sample_rate = 44100;
                    if ((*codec)->supported_samplerates != null)
                        c->sample_rate = (*codec)->supported_samplerates[0];
                        for (i = 0; (*codec)->supported_samplerates[i] != 0; i++)
                            if ((*codec)->supported_samplerates[i] == 44100)
                                c->sample_rate = 44100;

                    c->channels = ffmpeg.av_get_channel_layout_nb_channels(c->channel_layout);
                    c->channel_layout = ffmpeg.AV_CH_LAYOUT_STEREO;
                    if ((*codec)->channel_layouts != null)
                        c->channel_layout = (*codec)->channel_layouts[0];
                        for (i = 0; (*codec)->channel_layouts[i] != 0; i++)
                            if ((*codec)->channel_layouts[i] == ffmpeg.AV_CH_LAYOUT_STEREO)
                                c->channel_layout = ffmpeg.AV_CH_LAYOUT_STEREO;

                    c->channels = ffmpeg.av_get_channel_layout_nb_channels(c->channel_layout);
                    ost->st->time_base = new AVRational { num = 1, den = c->sample_rate };

                case AVMediaType.AVMEDIA_TYPE_VIDEO:
                    c->codec_id = codec_id;
                    c->bit_rate = 400000;
                    c->width = 352;
                    c->height = 288;
                    ost->st->time_base = new AVRational { num = 1, den = STREAM_FRAME_RATE };
                    c->time_base = ost->st->time_base;

                    c->gop_size = 12;
                    c->pix_fmt = STREAM_PIX_FMT;
                    if (c->codec_id == AVCodecID.AV_CODEC_ID_MPEG2VIDEO)
                        c->max_b_frames = 2;

                    if (c->codec_id == AVCodecID.AV_CODEC_ID_MPEG1VIDEO)
                        c->mb_decision = 2;


            if ((oc->oformat->flags & ffmpeg.AVFMT_GLOBALHEADER) == ffmpeg.AVFMT_GLOBALHEADER)
                c->flags |= ffmpeg.AV_CODEC_FLAG_GLOBAL_HEADER;

        public unsafe static AVFrame* alloc_audio_frame(AVSampleFormat sample_fmt, ulong channel_layout, int sample_rate, int nb_samples)
            AVFrame* frame = ffmpeg.av_frame_alloc();
            int ret;

            if (frame == null)
                throw new ApplicationException("Error allocating an audio frame");

            frame->format = (int)sample_fmt;
            frame->channel_layout = channel_layout;
            frame->sample_rate = sample_rate;
            frame->nb_samples = nb_samples;

            if (nb_samples != 0)
                ret = ffmpeg.av_frame_get_buffer(frame, 0);
                if (ret < 0)
                    throw new ApplicationException("Error allocating an audio buffer");

            return frame;

        public unsafe static void open_audio(AVFormatContext* oc, AVCodec* codec, OutputStream* ost, AVDictionary* opt_arg)
            AVCodecContext* c;
            int nb_samples;
            int ret;
            AVDictionary* opt = null;

            c = ost->enc;

            ffmpeg.av_dict_copy(&opt, opt_arg, 0);
            ret = ffmpeg.avcodec_open2(c, codec, &opt);
            if (ret < 0)
                Console.WriteLine("Could not open audio codec: {ret}");

            ost->t = 0;
            ost->tincr = (float)(2 * Math.PI * 110.0 / c->sample_rate);
            ost->tincr2 = ost->tincr / c->sample_rate;

            if ((c->codec->capabilities & ffmpeg.AV_CODEC_CAP_VARIABLE_FRAME_SIZE) == ffmpeg.AV_CODEC_CAP_VARIABLE_FRAME_SIZE)
                nb_samples = 10000;
                nb_samples = c->frame_size;

            ost->frame = alloc_audio_frame(c->sample_fmt, c->channel_layout, c->sample_rate, nb_samples);
            ost->tmp_frame = alloc_audio_frame(AVSampleFormat.AV_SAMPLE_FMT_S16, c->channel_layout, c->sample_rate, nb_samples);

            ret = ffmpeg.avcodec_parameters_from_context(ost->st->codecpar, c);
            if (ret < 0)
                throw new ApplicationException("Could not copy the stream parameters");

            ost->swr_ctx = ffmpeg.swr_alloc();
            if (ost->swr_ctx == null)
                throw new ApplicationException("Could not allocate resampler context");

            ffmpeg.av_opt_set_int(ost->swr_ctx, "in_channel_count", c->channels, 0);
            ffmpeg.av_opt_set_int(ost->swr_ctx, "in_sample_rate", c->sample_rate, 0);
            ffmpeg.av_opt_set_sample_fmt(ost->swr_ctx, "in_sample_fmt", AVSampleFormat.AV_SAMPLE_FMT_S16, 0);
            ffmpeg.av_opt_set_int(ost->swr_ctx, "out_channel_count", c->channels, 0);
            ffmpeg.av_opt_set_int(ost->swr_ctx, "out_sample_rate", c->sample_rate, 0);
            ffmpeg.av_opt_set_sample_fmt(ost->swr_ctx, "out_sample_fmt", c->sample_fmt, 0);

            if ((ret = ffmpeg.swr_init(ost->swr_ctx)) < 0)
                throw new ApplicationException("Failed to initialize the resampling context");

        public unsafe static AVFrame* get_audio_frame(OutputStream* ost)
            AVFrame* frame = ost->tmp_frame;
            int j, i, v;
            short* q = (short*)frame->data[0];

            if (ffmpeg.av_compare_ts(ost->next_pts, ost->enc->time_base, (long)STREAM_DURATION, new AVRational { num = 1, den = 1 }) > 0)
                return null;

            for (j = 0; j < frame->nb_samples; j++)
                v = (int)(Math.Sin(ost->t) * 10000);

                for (i = 0; i < ost->enc->channels; i++)
                    *q++ = (short)v;

                ost->t += ost->tincr;
                ost->tincr += ost->tincr2;

            frame->pts = ost->next_pts;
            ost->next_pts += frame->nb_samples;

            return frame;

        public unsafe static int write_audio_frame(AVFormatContext* oc, OutputStream* ost)
            AVCodecContext* c;
            AVFrame* frame;
            int ret;
            long dst_nb_samples = 0;

            c = ost->enc;
            frame = get_audio_frame(ost);

            if (frame != null)
                dst_nb_samples = ffmpeg.av_rescale_rnd(ffmpeg.swr_get_delay(ost->swr_ctx, c->sample_rate) + frame->nb_samples,
                    c->sample_rate, c->sample_rate, AVRounding.AV_ROUND_UP);

                Debug.Assert(dst_nb_samples == frame->nb_samples);

                ret = ffmpeg.av_frame_make_writable(ost->frame);

                ret = ffmpeg.swr_convert(ost->swr_ctx, (byte**)&ost->frame->data, (int)dst_nb_samples, (byte**)&frame->data, frame->nb_samples);
                if (ret < 0)
                    throw new ApplicationException($"Error while converting: {ret}");

                frame = ost->frame;
                frame->pts = ffmpeg.av_rescale_q(ost->samples_count, new AVRational { num = 1, den = c->sample_rate }, c->time_base);
                ost->samples_count += (int)dst_nb_samples;

            return write_frame(oc, c, ost->st, frame, ost->tmp_pkt);

        public unsafe static AVFrame* alloc_picture(AVPixelFormat pix_fmt, int width, int height)
            AVFrame* picture;
            int ret;

            picture = ffmpeg.av_frame_alloc();
            if (picture == null)
                return null;

            picture->format = (int)pix_fmt;
            picture->width = width;
            picture->height = height;

            ret = ffmpeg.av_frame_get_buffer(picture, 0);
            if (ret < 0)
                throw new ApplicationException($"Could not allocate frame data: {ret}");

            return picture;

        public unsafe static void open_video(AVFormatContext* oc, AVCodec* codec, OutputStream* ost, AVDictionary* opt_arg)
            int ret;
            AVCodecContext* c = ost->enc;
            AVDictionary* opt = null;

            ffmpeg.av_dict_copy(&opt, opt_arg, 0);

            ret = ffmpeg.avcodec_open2(c, codec, &opt);

            ost->frame = alloc_picture(c->pix_fmt, c->width, c->height);
            if (ost->frame == null)
                throw new ApplicationException("Could not allocate video frame");

            ost->tmp_frame = null;
            if (c->pix_fmt != AVPixelFormat.AV_PIX_FMT_YUV420P)
                ost->tmp_frame = alloc_picture(AVPixelFormat.AV_PIX_FMT_YUV420P, c->width, c->height);
                if (ost->tmp_frame == null)
                    throw new ApplicationException("Could not allocate temporary picture");

            ret = ffmpeg.avcodec_parameters_from_context(ost->st->codecpar, c);
            if (ret < 0)
                throw new ApplicationException("Could not copy the stream parameters");

        public unsafe static void fill_yuv_image(AVFrame* pict, int frame_index, int width, int height)
            int x, y, i;
            i = frame_index;

            for (y = 0; y < height; y++)
                for (x = 0; x < width; x++)
                    pict->data[0][y * pict->linesize[0] + x] = (byte)(x + y + i * 3);

            for (y = 0; y < height / 2; y ++)
                for (x = 0; x < width / 2; x ++)
                    pict->data[1][y * pict->linesize[1] + x] = (byte)(128 + y + i * 2);
                    pict->data[2][y * pict->linesize[2] + x] = (byte)(64 + y + i * 5);

        public unsafe static AVFrame* get_video_frame(OutputStream* ost)
            AVCodecContext* c = ost->enc;

            if (ffmpeg.av_compare_ts(ost->next_pts, c->time_base, (long)STREAM_DURATION, new AVRational { num = 1, den = 1 }) > 0)
                return null;

            if (ffmpeg.av_frame_make_writable(ost->frame) < 0)
                throw new ApplicationException("av_frame_make_writable failed");

            if (c->pix_fmt != AVPixelFormat.AV_PIX_FMT_YUV420P)
                if (ost->sws_ctx == null)
                    ost->sws_ctx = ffmpeg.sws_getContext(c->width, c->height, AVPixelFormat.AV_PIX_FMT_YUV420P,
                        c->width, c->height, c->pix_fmt, SCALE_FLAGS, null, null, null);

                    if (ost->sws_ctx == null)
                        throw new ApplicationException("Could not initialize the conversion context");

                fill_yuv_image(ost->tmp_frame, (int)ost->next_pts, c->width, c->height);
                ffmpeg.sws_scale(ost->sws_ctx, ost->tmp_frame->data,
                    ost->tmp_frame->linesize, 0, c->height, ost->frame->data, ost->frame->linesize);
            } else
                fill_yuv_image(ost->frame, (int)ost->next_pts, c->width, c->height);

            ost->frame->pts = ost->next_pts++;

            return ost->frame;

        public unsafe static int write_video_frame(AVFormatContext* oc, OutputStream* ost)
            return write_frame(oc, ost->enc, ost->st, get_video_frame(ost), ost->tmp_pkt);

        public unsafe static void close_stream(AVFormatContext* oc, OutputStream* ost)

    public unsafe struct OutputStream
        public AVStream* st;
        public AVCodecContext* enc;

        public long next_pts;
        public int samples_count;

        public AVFrame* frame;
        public AVFrame* tmp_frame;

        public AVPacket* tmp_pkt;

        public float t;
        public float tincr;
        public float tincr2;

        public SwsContext* sws_ctx;
        public SwrContext* swr_ctx;

실행해 보면, 10초 분량의 c:\temp\output\test.mp4 동영상 파일이 생성되고, ffprobe로 확인하면 다음과 같은 정보가 나옵니다.

C:\temp> ffprobe c:\temp\output\test.mp4
Input #0, mov,mp4,m4a,3gp,3g2,mj2, from 'c:\temp\output\test.mp4':
    major_brand     : isom
    minor_version   : 512
    compatible_brands: isomiso2mp41
    encoder         : Lavf58.76.100
  Duration: 00:00:10.01, start: 0.000000, bitrate: 612 kb/s
  Stream #0:0(und): Video: mpeg4 (Simple Profile) (mp4v / 0x7634706D), yuv420p, 352x288 [SAR 1:1 DAR 11:9], 541 kb/s, 25.10 fps, 25 tbr, 12800 tbn, 25 tbc (default)
      handler_name    : VideoHandler
      vendor_id       : [0][0][0][0]
  Stream #0:1(und): Audio: aac (LC) (mp4a / 0x6134706D), 44100 Hz, stereo, fltp, 65 kb/s (default)
      handler_name    : SoundHandler
      vendor_id       : [0][0][0][0]

"C# - ffmpeg(FFmpeg.AutoGen): Bitmap으로부터 h264 형식의 파일로 쓰기" 글에서 생성한 동영상 파일과는 달리, muxing.c로 만들어진 mp4 파일은 컨테이너와 함께 스트림을 추가한 유형이기 때문에 일반적인 동영상 플레이어로 재생이 가능합니다.

참고로, youtube-dl로 다운로드한 경우 동영상 정보가 이렇게 나옵니다.

D:\media_sample> ffprobe theade-i-was-young.mp4
Input #0, mov,mp4,m4a,3gp,3g2,mj2, from 'theade-i-was-young.mp4':
    major_brand     : isom
    minor_version   : 512
    compatible_brands: isomiso2avc1mp41
    encoder         : Lavf58.45.100
  Duration: 00:03:08.01, start: 0.000000, bitrate: 575 kb/s
  Stream #0:0(und): Video: h264 (Main) (avc1 / 0x31637661), yuv420p(tv, bt709), 640x360 [SAR 1:1 DAR 16:9], 439 kb/s, 29.97 fps, 29.97 tbr, 30k tbn, 59.94 tbc (default)
      handler_name    : ISO Media file produced by Google Inc.
      vendor_id       : [0][0][0][0]
  Stream #0:1(eng): Audio: aac (LC) (mp4a / 0x6134706D), 44100 Hz, stereo, fltp, 127 kb/s (default)
      handler_name    : ISO Media file produced by Google Inc.
      vendor_id       : [0][0][0][0]

가장 큰 차이점이 muxing.c의 경우 mpeg4 포맷의 동영상인데, 혹시 이걸 h264로 바꿀 수 있을까요? 가만 보니, avformat_alloc_output_context2의 3번째 인자가 format 이름입니다.

ffmpeg.avformat_alloc_output_context2(&oc, null, "...format_name....", filename);

저 값으로 들어갈 수 있는 값은 ffmpeg를 이용해 확인할 수 있는데요,

D:\media_sample> ffmpeg -formats
File formats:
 D. = Demuxing supported
 .E = Muxing supported
 D  3dostr          3DO STR
  E 3g2             3GP2 (3GPP2 file format)
  E 3gp             3GP (3GPP file format)
 D  4xm             4X Technologies
  E a64             a64 - video for Commodore 64
 D  aa              Audible AA format files
 D  aac             raw ADTS AAC (Advanced Audio Coding)
 DE ac3             raw AC-3
 D  acm             Interplay ACM
 D  act             ACT Voice file format
 D  adf             Artworx Data Format
 D  adp             ADP
 D  ads             Sony PS2 ADS
  E adts            ADTS AAC (Advanced Audio Coding)
 DE adx             CRI ADX
 D  aea             MD STUDIO audio
 D  afc             AFC
 DE aiff            Audio IFF
 D  aix             CRI AIX
 DE alaw            PCM A-law
 D  alias_pix       Alias/Wavefront PIX image
 D  alp             LEGO Racers ALP
 DE amr             3GPP AMR
 D  amrnb           raw AMR-NB
 D  amrwb           raw AMR-WB
 D  anm             Deluxe Paint Animation
 D  apc             CRYO APC
 D  ape             Monkey's Audio
 D  apm             Ubisoft Rayman 2 APM
 DE apng            Animated Portable Network Graphics
 DE aptx            raw aptX (Audio Processing Technology for Bluetooth)
 DE aptx_hd         raw aptX HD (Audio Processing Technology for Bluetooth)
 D  aqtitle         AQTitle subtitles
 D  argo_asf        Argonaut Games ASF
 DE asf             ASF (Advanced / Active Streaming Format)
 D  asf_o           ASF (Advanced / Active Streaming Format)
  E asf_stream      ASF (Advanced / Active Streaming Format)
 DE ass             SSA (SubStation Alpha) subtitle
 DE ast             AST (Audio Stream)
 DE au              Sun AU
 D  av1             AV1 Annex B
 DE avi             AVI (Audio Video Interleaved)
 D  avisynth        AviSynth script
  E avm2            SWF (ShockWave Flash) (AVM2)
 D  avr             AVR (Audio Visual Research)
 D  avs             Argonaut Games Creature Shock
 DE avs2            raw AVS2-P2/IEEE1857.4 video
 D  bethsoftvid     Bethesda Softworks VID
 D  bfi             Brute Force & Ignorance
 D  bfstm           BFSTM (Binary Cafe Stream)
 D  bin             Binary text
 D  bink            Bink
 DE bit             G.729 BIT file format
 D  bmp_pipe        piped bmp sequence
 D  bmv             Discworld II BMV
 D  boa             Black Ops Audio
 D  brender_pix     BRender PIX image
 D  brstm           BRSTM (Binary Revolution Stream)
 D  c93             Interplay C93
  E caca            caca (color ASCII art) output device
 DE caf             Apple CAF (Core Audio Format)
 DE cavsvideo       raw Chinese AVS (Audio Video Standard) video
 D  cdg             CD Graphics
 D  cdxl            Commodore CDXL video
  E chromaprint     Chromaprint
 D  cine            Phantom Cine
 DE codec2          codec2 .c2 muxer
 DE codec2raw       raw codec2 muxer
 D  concat          Virtual concatenation script
  E crc             CRC testing
 DE dash            DASH Muxer
 DE data            raw data
 DE daud            D-Cinema audio
 D  dcstr           Sega DC STR
 D  dds_pipe        piped dds sequence
 D  derf            Xilam DERF
 D  dfa             Chronomaster DFA
 D  dhav            Video DAV
 DE dirac           raw Dirac
 DE dnxhd           raw DNxHD (SMPTE VC-3)
 D  dpx_pipe        piped dpx sequence
 D  dsf             DSD Stream File (DSF)
 D  dshow           DirectShow capture
 D  dsicin          Delphine Software International CIN
 D  dss             Digital Speech Standard (DSS)
 DE dts             raw DTS
 D  dtshd           raw DTS-HD
 DE dv              DV (Digital Video)
 D  dvbsub          raw dvbsub
 D  dvbtxt          dvbtxt
  E dvd             MPEG-2 PS (DVD VOB)
 D  dxa             DXA
 D  ea              Electronic Arts Multimedia
 D  ea_cdata        Electronic Arts cdata
 DE eac3            raw E-AC-3
 D  epaf            Ensoniq Paris Audio File
 D  exr_pipe        piped exr sequence
 DE f32be           PCM 32-bit floating-point big-endian
 DE f32le           PCM 32-bit floating-point little-endian
  E f4v             F4V Adobe Flash Video
 DE f64be           PCM 64-bit floating-point big-endian
 DE f64le           PCM 64-bit floating-point little-endian
 DE ffmetadata      FFmpeg metadata in text
  E fifo            FIFO queue pseudo-muxer
  E fifo_test       Fifo test muxer
 DE film_cpk        Sega FILM / CPK
 DE filmstrip       Adobe Filmstrip
 DE fits            Flexible Image Transport System
 DE flac            raw FLAC
 D  flic            FLI/FLC/FLX animation
 DE flv             FLV (Flash Video)
  E framecrc        framecrc testing
  E framehash       Per-frame hash testing
  E framemd5        Per-frame MD5 testing
 D  frm             Megalux Frame
 D  fsb             FMOD Sample Bank
 D  fwse            Capcom's MT Framework sound
 DE g722            raw G.722
 DE g723_1          raw G.723.1
 DE g726            raw big-endian G.726 ("left-justified")
 DE g726le          raw little-endian G.726 ("right-justified")
 D  g729            G.729 raw format demuxer
 D  gdigrab         GDI API Windows frame grabber
 D  gdv             Gremlin Digital Video
 D  genh            GENeric Header
 DE gif             CompuServe Graphics Interchange Format (GIF)
 D  gif_pipe        piped gif sequence
 DE gsm             raw GSM
 DE gxf             GXF (General eXchange Format)
 DE h261            raw H.261
 DE h263            raw H.263
 DE h264            raw H.264 video
  E hash            Hash testing
 D  hca             CRI HCA
 D  hcom            Macintosh HCOM
  E hds             HDS Muxer
 DE hevc            raw HEVC video
 DE hls             Apple HTTP Live Streaming
 D  hnm             Cryo HNM v4
 DE ico             Microsoft Windows ICO
 D  idcin           id Cinematic
 D  idf             iCE Draw File
 D  iff             IFF (Interchange File Format)
 D  ifv             IFV CCTV DVR
 DE ilbc            iLBC storage
 DE image2          image2 sequence
 DE image2pipe      piped image2 sequence
 D  ingenient       raw Ingenient MJPEG
 D  ipmovie         Interplay MVE
  E ipod            iPod H.264 MP4 (MPEG-4 Part 14)
 DE ircam           Berkeley/IRCAM/CARL Sound Format
  E ismv            ISMV/ISMA (Smooth Streaming)
 D  iss             Funcom ISS
 D  iv8             IndigoVision 8000 video
 DE ivf             On2 IVF
 D  ivr             IVR (Internet Video Recording)
 D  j2k_pipe        piped j2k sequence
 DE jacosub         JACOsub subtitle format
 D  jpeg_pipe       piped jpeg sequence
 D  jpegls_pipe     piped jpegls sequence
 D  jv              Bitmap Brothers JV
 D  kux             KUX (YouKu)
 DE kvag            Simon & Schuster Interactive VAG
  E latm            LOAS/LATM
 D  lavfi           Libavfilter virtual input device
 D  libcdio          
 D  libgme          Game Music Emu demuxer
 D  libmodplug      ModPlug demuxer
 D  libopenmpt      Tracker formats (libopenmpt)
 D  live_flv        live RTMP FLV (Flash Video)
 D  lmlm4           raw lmlm4
 D  loas            LOAS AudioSyncStream
 DE lrc             LRC lyrics
 D  lvf             LVF
 D  lxf             VR native stream (LXF)
 DE m4v             raw MPEG-4 video
  E matroska        Matroska
 D  matroska,webm   Matroska / WebM
  E md5             MD5 testing
 D  mgsts           Metal Gear Solid: The Twin Snakes
 DE microdvd        MicroDVD subtitle format
 DE mjpeg           raw MJPEG video
 D  mjpeg_2000      raw MJPEG 2000 video
  E mkvtimestamp_v2 extract pts as timecode v2 format, as defined by mkvtoolnix
 DE mlp             raw MLP
 D  mlv             Magic Lantern Video (MLV)
 D  mm              American Laser Games MM
 DE mmf             Yamaha SMAF
  E mov             QuickTime / MOV
 D  mov,mp4,m4a,3gp,3g2,mj2 QuickTime / MOV
  E mp2             MP2 (MPEG audio layer 2)
 DE mp3             MP3 (MPEG audio layer 3)
  E mp4             MP4 (MPEG-4 Part 14)
 D  mpc             Musepack
 D  mpc8            Musepack SV8
 DE mpeg            MPEG-1 Systems / MPEG program stream
  E mpeg1video      raw MPEG-1 video
  E mpeg2video      raw MPEG-2 video
 DE mpegts          MPEG-TS (MPEG-2 Transport Stream)
 D  mpegtsraw       raw MPEG-TS (MPEG-2 Transport Stream)
 D  mpegvideo       raw MPEG video
 DE mpjpeg          MIME multipart JPEG
 D  mpl2            MPL2 subtitles
 D  mpsub           MPlayer subtitles
 D  msf             Sony PS3 MSF
 D  msnwctcp        MSN TCP Webcam stream
 D  mtaf            Konami PS2 MTAF
 D  mtv             MTV
 DE mulaw           PCM mu-law
 D  musx            Eurocom MUSX
 D  mv              Silicon Graphics Movie
 D  mvi             Motion Pixels MVI
 DE mxf             MXF (Material eXchange Format)
  E mxf_d10         MXF (Material eXchange Format) D-10 Mapping
  E mxf_opatom      MXF (Material eXchange Format) Operational Pattern Atom
 D  mxg             MxPEG clip
 D  nc              NC camera feed
 D  nistsphere      NIST SPeech HEader REsources
 D  nsp             Computerized Speech Lab NSP
 D  nsv             Nullsoft Streaming Video
  E null            raw null video
 DE nut             NUT
 D  nuv             NuppelVideo
  E oga             Ogg Audio
 DE ogg             Ogg
  E ogv             Ogg Video
 DE oma             Sony OpenMG audio
  E opus            Ogg Opus
 D  paf             Amazing Studio Packed Animation File
 D  pam_pipe        piped pam sequence
 D  pbm_pipe        piped pbm sequence
 D  pcx_pipe        piped pcx sequence
 D  pgm_pipe        piped pgm sequence
 D  pgmyuv_pipe     piped pgmyuv sequence
 D  pictor_pipe     piped pictor sequence
 D  pjs             PJS (Phoenix Japanimation Society) subtitles
 D  pmp             Playstation Portable PMP
 D  png_pipe        piped png sequence
 D  pp_bnk          Pro Pinball Series Soundbank
 D  ppm_pipe        piped ppm sequence
 D  psd_pipe        piped psd sequence
  E psp             PSP MP4 (MPEG-4 Part 14)
 D  psxstr          Sony Playstation STR
 D  pva             TechnoTrend PVA
 D  pvf             PVF (Portable Voice Format)
 D  qcp             QCP
 D  qdraw_pipe      piped qdraw sequence
 D  r3d             REDCODE R3D
 DE rawvideo        raw video
 D  realtext        RealText subtitle format
 D  redspark        RedSpark
 D  rl2             RL2
 DE rm              RealMedia
 DE roq             raw id RoQ
 D  rpl             RPL / ARMovie
 D  rsd             GameCube RSD
 DE rso             Lego Mindstorms RSO
 DE rtp             RTP output
  E rtp_mpegts      RTP/mpegts output format
 DE rtsp            RTSP output
 DE s16be           PCM signed 16-bit big-endian
 DE s16le           PCM signed 16-bit little-endian
 DE s24be           PCM signed 24-bit big-endian
 DE s24le           PCM signed 24-bit little-endian
 DE s32be           PCM signed 32-bit big-endian
 DE s32le           PCM signed 32-bit little-endian
 D  s337m           SMPTE 337M
 DE s8              PCM signed 8-bit
 D  sami            SAMI subtitle format
 DE sap             SAP output
 DE sbc             raw SBC
 D  sbg             SBaGen binaural beats script
 DE scc             Scenarist Closed Captions
  E sdl,sdl2        SDL2 output device
 D  sdp             SDP
 D  sdr2            SDR2
 D  sds             MIDI Sample Dump Standard
 D  sdx             Sample Dump eXchange
  E segment         segment
 D  ser             SER (Simple uncompressed video format for astronomical capturing)
 D  sgi_pipe        piped sgi sequence
 D  shn             raw Shorten
 D  siff            Beam Software SIFF
  E singlejpeg      JPEG single image
 D  sln             Asterisk raw pcm
 DE smjpeg          Loki SDL MJPEG
 D  smk             Smacker
  E smoothstreaming Smooth Streaming Muxer
 D  smush           LucasArts Smush
 D  sol             Sierra SOL
 DE sox             SoX native
 DE spdif           IEC 61937 (used on S/PDIF - IEC958)
  E spx             Ogg Speex
 DE srt             SubRip subtitle
 D  stl             Spruce subtitle format
  E stream_segment,ssegment streaming segment muxer
  E streamhash      Per-stream hash testing
 D  subviewer       SubViewer subtitle format
 D  subviewer1      SubViewer v1 subtitle format
 D  sunrast_pipe    piped sunrast sequence
 DE sup             raw HDMV Presentation Graphic Stream subtitles
 D  svag            Konami PS2 SVAG
  E svcd            MPEG-2 PS (SVCD)
 D  svg_pipe        piped svg sequence
 DE swf             SWF (ShockWave Flash)
 D  tak             raw TAK
 D  tedcaptions     TED Talks captions
  E tee             Multiple muxer tee
 D  thp             THP
 D  tiertexseq      Tiertex Limited SEQ
 D  tiff_pipe       piped tiff sequence
 D  tmv             8088flex TMV
 DE truehd          raw TrueHD
 DE tta             TTA (True Audio)
 D  tty             Tele-typewriter
 D  txd             Renderware TeXture Dictionary
 D  ty              TiVo TY Stream
 DE u16be           PCM unsigned 16-bit big-endian
 DE u16le           PCM unsigned 16-bit little-endian
 DE u24be           PCM unsigned 24-bit big-endian
 DE u24le           PCM unsigned 24-bit little-endian
 DE u32be           PCM unsigned 32-bit big-endian
 DE u32le           PCM unsigned 32-bit little-endian
 DE u8              PCM unsigned 8-bit
  E uncodedframecrc uncoded framecrc testing
 D  v210            Uncompressed 4:2:2 10-bit
 D  v210x           Uncompressed 4:2:2 10-bit
 D  vag             Sony PS2 VAG
 DE vc1             raw VC-1 video
 DE vc1test         VC-1 test bitstream
  E vcd             MPEG-1 Systems / MPEG program stream (VCD)
 D  vfwcap          VfW video capture
 DE vidc            PCM Archimedes VIDC
 D  vividas         Vividas VIV
 D  vivo            Vivo
 D  vmd             Sierra VMD
  E vob             MPEG-2 PS (VOB)
 D  vobsub          VobSub subtitle format
 DE voc             Creative Voice
 D  vpk             Sony PS2 VPK
 D  vplayer         VPlayer subtitles
 D  vqf             Nippon Telegraph and Telephone Corporation (NTT) TwinVQ
 DE w64             Sony Wave64
 DE wav             WAV / WAVE (Waveform Audio)
 D  wc3movie        Wing Commander III movie
  E webm            WebM
  E webm_chunk      WebM Chunk Muxer
 DE webm_dash_manifest WebM DASH Manifest
  E webp            WebP
 D  webp_pipe       piped webp sequence
 DE webvtt          WebVTT subtitle
 D  wsaud           Westwood Studios audio
 D  wsd             Wideband Single-bit Data (WSD)
 D  wsvqa           Westwood Studios VQA
 DE wtv             Windows Television (WTV)
 DE wv              raw WavPack
 D  wve             Psion 3 audio
 D  xa              Maxis XA
 D  xbin            eXtended BINary text (XBIN)
 D  xmv             Microsoft XMV
 D  xpm_pipe        piped xpm sequence
 D  xvag            Sony PS3 XVAG
 D  xwd_pipe        piped xwd sequence
 D  xwma            Microsoft xWMA
 D  yop             Psygnosis YOP
 DE yuv4mpegpipe    YUV4MPEG pipe

근데, h264 + aac로 먹싱하기 위해 딱히 뭘 넣어야 할지 ^^; 막막하군요, 이 방법보다는 오히려, 그냥 예제 코드에서 video_codec만 바꾸는 것이 더 좋을 것 같습니다.

if (fmt->video_codec != AVCodecID.AV_CODEC_ID_NONE)
    // AccessViolationException at FFmpeg.AutoGen 5.0.0
    // fmt->video_codec = AVCodecID.AV_CODEC_ID_H264;
    add_stream(&video_st, oc, &video_codec, AVCodecID.AV_CODEC_ID_H264);
    have_video = 1;
    encode_video = true;

이렇게 바꾸고 실행하면 10초 길이의 테스트 동영상이 mpeg4인 경우 749KB인데, h264로 바꾸면 565KB로 바뀝니다. 실제로 ffprobe로 확인해도 우리가 원하는 대로 h264가 나옵니다.

D:\media_sample> ffprobe c:\temp\output\test.mp4
Input #0, mov,mp4,m4a,3gp,3g2,mj2, from 'c:\temp\output\test.mp4':
    major_brand     : isom
    minor_version   : 512
    compatible_brands: isomiso2avc1mp41
    encoder         : Lavf58.76.100
  Duration: 00:00:10.01, start: 0.000000, bitrate: 462 kb/s
  Stream #0:0(und): Video: h264 (Constrained Baseline) (avc1 / 0x31637661), yuv420p, 352x288, 390 kb/s, 25.10 fps, 25 tbr, 12800 tbn, 50 tbc (default)
      handler_name    : VideoHandler
      vendor_id       : [0][0][0][0]
  Stream #0:1(und): Audio: aac (LC) (mp4a / 0x6134706D), 44100 Hz, stereo, fltp, 65 kb/s (default)
      handler_name    : SoundHandler
      vendor_id       : [0][0][0][0]

슬슬 좀 뭔가 되는 것 같습니다. ^^

(첨부 파일은 이 글의 예제 코드를 포함합니다.)
(이 글의 소스 코드는 github에 올려져 있습니다.)

댓글 작성자

2022-02-16 10시00분
[이승준] 내용을 보니 mpeg4에서 h.264로 바꾼 소스를 올리신것 같네요.
이 소스로 테스트 하시는 분들의 경우는 아마도 mpeg4로 저장되지 않을겁니다.

컨테이너 설정과 비디오/오디오 코덱은 기본적으로는 관련이 없습니다.
컨테이너가 지원하는 코덱이라면 어떤것을 사용해도 무방합니다.
다만 컨테이너를 mp4로 설정을 할 경우 비디오 코덱과 오디오 코덱을 따로 설정하지 않으면
지원 코덱 중 가장 기본적인 코덱이 선택 및 설정이 됩니다. 물론 ffmpeg이 알아서 처리해 주는거죠.

추가로 소스를 보면 pts값이 단순하게 1,2,3,4 이런식으로 증가하면서 입력되는것을 보실 수 있습니다.
실제로 저리 저장이 되는건 아니고 ffmpeg에서 알아서 fps에 맞게 변환을 하는 겁니다.
그래서 저장을 시작 할 때의 오디오 pts가 중요 합니다. 오디오의 지연에 따라 조절을 해야 할 경우도 있고
또 캡쳐보드의 입력일 경우 프레임 드롭이 있을 수 있는데. 이때 그냥 값을 증가 시키명 싱크가 틀어지기 시작하죠.
사실 입력에 문제만 없다면 인코딩이 플레이보다 훨씬 쉽습니다.
플레이어의 경우 비디오/오디오 pts에 따라 처리가 복잡합니다. 특히 싱크 맞춰 주는게 아주 쥐약이 됩니다.
2022-02-16 11시48분
[후드] @이승준 어떤용도이고,,, pts이건 무엇에 쓰는건가요??
2022-02-16 02시19분
[이승준] pts는 Presentation Time Stamp 의 약어로
이름 자체에 내용이 들어있죠. 보여주기위한 시간 값 정도로 보면 되겠고요.
비디오의 경우는 화면간 지연시간입니다. 오디오는 데이타크기가 pts라고 보면 됩니다. (pts = data_size / sample_bytes / channels)
일단 녹화 시에 정상적인 pts값을 입력하지 않으면 대부분의 플레이어에서 제대로 재생이 되지 않습니다. 플레이어들이 fps를 가지고 재생하는게 아니거든요.
아래는 참고 링크 입니다.
비교적 실제 사용하기 좋게 나와 있네요.
2022-02-16 09시55분
ffmpeg가 나와서 동영상 처리가 쉬워졌다고는 하지만, 여전히 어려운 개념들이 많아서... ^^
2022-02-17 09시27분
[후드] 정말 ffmpeg는 쉽게 곁을 안주는거같아요... 사용사례나 이런거며 대부분 바이너리로 다루는게 허다하고.. 서비스 로직 부분은 정성태님이나 이승준님 아니엿으면 그자리에 멤돌앗을거같아요.. 모르는용어에 대해서 친절히 설명도 해주시고. 감사합니다 정말로 ㅠㅠ
2022-02-17 09시32분
[guest] (pts = data_size / sample_bytes / channels)에서 sample_bytes가 혹시 영상에서 들어온 프레임 수인거죠 ?
2022-02-21 02시21분
[이승준] 오디오 포맷 정보에 관련된 내용 입니다.
오디오 포맷이 보통 PCM, 48000hz, 16bit, stereo(2channels) 요정도로 표기하는데요.
PCM은 오디오 포맷 타입
48000hz 는 sample rate
16bit 는 sample bits 인데요. 이걸 sample bytes 로하면 2가 됩니다.
channels 는 오디오 채널 수 입니다. 스테레오는 2이고 5.1 이런거는 6이죠.

