FFmpeg音视频播放器系列(开篇)

简介

学习FFmpeg已经有一段时间了,都是断断续续、零零散散的,没有系统的总结过,为了保证学习效果,决定自己写一个音视频播放器,借着这个项目对FFmpeg进行系统的学习与总结,争取对FFmpeg达到熟练应用的程度。
FFmpeg音视频播放器系列,本着循序渐进的过程,先从最简单的开始,逐步深入到使用QT写出一个带有GUI界面的应用软件。这个项目估计要耗时很长,但愿自己能坚持下来。
先对这个项目确定一个小目标吧:
1、模仿市面上的播放器界面使用QT完成GUI
2、可以选择单独播放视频或者音频
3、可以播放网络视频流
4、可以录制音频与视频
5、可以播放摄像头视频
好了,就这么多,多了怕自己完成不了。
自己的开发平台如下:
Ubuntu16.04
FFmpeg:4.0.2
SDL2:2.0.8
QT:5.12.1

FFmpeg的学习我基本上是基于雷神的博客,再次他致敬!他的FFmpeg博客地址在此先行贴出:
https://blog.csdn.net/leixiaohua1020/column/info/ffmpeg-devel

FFMPEG+SDL的视频播放器

雷神也有这篇博客,不过他是将视频与音频分开的,带有音频的视频在播放时,只能播放视频而没有声音,因此本篇就先进行一下综合,写出一个基于FFMPEG+SDL的简单视频播放器,可以同时播放音频与视频,可能会有音视频播放同步的问题,待到下一篇进行总结与说明。

代码

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define __STDC_CONSTANT_MACROS

//Linux...
#ifdef __cplusplus
extern "C"
{
#endif
#include <libavutil/time.h>
#include <libavutil/imgutils.h>
#include <libavutil/mathematics.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavdevice/avdevice.h>
#include <libswscale/swscale.h>
#include <libswresample/swresample.h>
#include <SDL2/SDL.h>
#ifdef __cplusplus
};
#endif


#define MAX_AUDIO_FRAME_SIZE 192000 // 1 second of 48khz 32bit audio

static Uint8 *audio_chunk;
static Sint32 audio_len;
static Uint8 *audio_pos;


//Refresh Event
#define SFM_REFRESH_VIDEO_EVENT (SDL_USEREVENT + 1)
#define SFM_REFRESH_AUDIO_EVENT (SDL_USEREVENT + 2)
#define SFM_BREAK_EVENT (SDL_USEREVENT + 3)

int thread_exit=0;
int thread_pause=0;

int sfp_refresh_thread(void *opaque){
thread_exit=0;
thread_pause=0;

while (!thread_exit)
{
if(!thread_pause)
{
SDL_Event event;
event.type = SFM_REFRESH_VIDEO_EVENT;
SDL_PushEvent(&event);
}
SDL_Delay(40);
}
thread_exit=0;
thread_pause=0;
//Break
SDL_Event event;
event.type = SFM_BREAK_EVENT;
SDL_PushEvent(&event);

return 0;
}

/* The audio function callback takes the following parameters:
 * stream: A pointer to the audio buffer to be filled
 * len: The length (in bytes) of the audio buffer
*/
void fill_audio(void *udata,Uint8 *stream,int len){
//SDL 2.0
SDL_memset(stream, 0, len);
if(audio_len==0)
return;

len=(len>audio_len?audio_len:len);/* Mix as much data as possible */

SDL_MixAudio(stream, audio_pos, len, SDL_MIX_MAXVOLUME);
audio_pos += len;
audio_len -= len;
}



int main(int argc, char* argv[])
{
AVFormatContext*pFormatCtx;
AVCodecContext*pVideoCodecCtx, *pAudioCodecCtx;
AVCodec*pVideoCodec, *pAudioCodec;
AVFrame*pVideoFrame,*pAudioFrame,*pFrameYUV;
unsigned char *pVideoOutBuffer, *pAudioOutBuffer;
//AVPacket *pVideoPacket, *pAudioPacket;
AVPacket *Packet;
int i, ret, GotVideoPicture, GotAudioPicture;
int audioCnt, videoCnt;

//------------SDL----------------
int screen_w,screen_h;
SDL_Window *screen;
SDL_Renderer* sdlRenderer;
SDL_Texture* sdlTexture;
SDL_Rect sdlRect;
SDL_Thread *video_tid;
SDL_Event event;

struct SwsContext *VideoConvertCtx;
struct SwrContext *AudioConvertCtx;
int VideoIndex;
int AudioIndex;

char *filepath = argv[1];

av_register_all();
avformat_network_init();
pFormatCtx = avformat_alloc_context();

if(avformat_open_input(&pFormatCtx, filepath, NULL, NULL) !=0 )
{
printf("Couldn't open input stream.\n");
return -1;
}
if(avformat_find_stream_info(pFormatCtx,NULL) < 0)
{
printf("Couldn't find stream information.\n");
return -1;
}

VideoIndex = -1;
AudioIndex = -1;
for(i=0; i<pFormatCtx->nb_streams; i++)
{
if(pFormatCtx->streams[i]->codec->codec_type==AVMEDIA_TYPE_VIDEO)
{
VideoIndex = i;
printf("video AVRational.num:%d, AVRational.den:%d,\n",
pFormatCtx->streams[VideoIndex]->time_base.num,
pFormatCtx->streams[VideoIndex]->time_base.den);
}

if(pFormatCtx->streams[i]->codec->codec_type==AVMEDIA_TYPE_AUDIO)
{
AudioIndex = i;
printf("audio AVRational.num:%d, AVRational.den:%d,\n",
pFormatCtx->streams[AudioIndex]->time_base.num,
pFormatCtx->streams[AudioIndex]->time_base.den);
}
}

if(VideoIndex == -1)
{
printf("Didn't find a video stream.\n");
return -1;
}

if(AudioIndex == -1)
{
printf("Didn't find a audio stream.\n");
return -1;
}

pVideoCodecCtx = pFormatCtx->streams[VideoIndex]->codec;
pVideoCodec = avcodec_find_decoder(pVideoCodecCtx->codec_id);
if(pVideoCodec == NULL)
{
printf("Video Codec not found.\n");
return -1;
}
if(avcodec_open2(pVideoCodecCtx, pVideoCodec,NULL) < 0)
{
printf("Could not open video codec.\n");
return -1;
}

pAudioCodecCtx = pFormatCtx->streams[AudioIndex]->codec;
pAudioCodec = avcodec_find_decoder(pAudioCodecCtx->codec_id);
if(pAudioCodec == NULL)
{
printf("Audio Codec not found.\n");
return -1;
}
if(avcodec_open2(pAudioCodecCtx, pAudioCodec,NULL) < 0)
{
printf("Could not open audio codec.\n");
return -1;
}

// prepare video
pVideoFrame = av_frame_alloc();
pFrameYUV = av_frame_alloc();

i = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, pVideoCodecCtx->width, pVideoCodecCtx->height, 1);
pVideoOutBuffer = (unsigned char *)av_malloc(i);
av_image_fill_arrays(pFrameYUV->data, pFrameYUV->linesize, pVideoOutBuffer,
AV_PIX_FMT_YUV420P, pVideoCodecCtx->width, pVideoCodecCtx->height, 1);

VideoConvertCtx = sws_getContext(pVideoCodecCtx->width, pVideoCodecCtx->height, pVideoCodecCtx->pix_fmt,
 pVideoCodecCtx->width, pVideoCodecCtx->height,
 AV_PIX_FMT_YUV420P, SWS_BICUBIC, NULL, NULL, NULL);

// prepare audio

//Out Audio Param
uint64_t AudioOutChannelLayout = AV_CH_LAYOUT_STEREO;
//nb_samples: AAC-1024 MP3-1152
int out_nb_samples = pAudioCodecCtx->frame_size;
AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_S16;
int out_sample_rate= pAudioCodecCtx->sample_rate;
int out_channels= av_get_channel_layout_nb_channels(AudioOutChannelLayout);
//Out Buffer Size
int out_buffer_size= av_samples_get_buffer_size(NULL,out_channels ,out_nb_samples,out_sample_fmt, 1);

//out_nb_samples:1152, out_channels:2, out_buffer_size:4608, pCodecCtx->channels:2
printf("out_nb_samples:%d, out_channels:%d, out_buffer_size:%d, pCodecCtx->channels:%d\n",
out_nb_samples, out_channels, out_buffer_size, pAudioCodecCtx->channels);
pAudioOutBuffer = (uint8_t *)av_malloc(MAX_AUDIO_FRAME_SIZE*2);
pAudioFrame= av_frame_alloc();

//FIX:Some Codec's Context Information is missing
int64_t in_channel_layout= av_get_default_channel_layout(pAudioCodecCtx->channels);
//Swr
AudioConvertCtx = swr_alloc();
AudioConvertCtx= swr_alloc_set_opts(AudioConvertCtx, AudioOutChannelLayout,
 out_sample_fmt, out_sample_rate,
 in_channel_layout, pAudioCodecCtx->sample_fmt ,
 pAudioCodecCtx->sample_rate, 0, NULL);
swr_init(AudioConvertCtx);

//Output Info-----------------------------
printf("---------------- File Information ---------------\n");
av_dump_format(pFormatCtx, 0, filepath, 0);
printf("-------------------------------------------------\n");

if(SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER))
{
printf( "Could not initialize SDL - %s\n", SDL_GetError());
return -1;
}

//SDL 2.0 Support for multiple windows
//SDL_VideoSpec
screen_w = pVideoCodecCtx->width;
screen_h = pVideoCodecCtx->height;
screen = SDL_CreateWindow("Simplest ffmpeg player's Window", SDL_WINDOWPOS_UNDEFINED,
 SDL_WINDOWPOS_UNDEFINED, screen_w, screen_h, SDL_WINDOW_OPENGL);

if(!screen)
{
printf("SDL: could not create window - exiting:%s\n",SDL_GetError());
return -1;
}
sdlRenderer = SDL_CreateRenderer(screen, -1, 0);
//IYUV: Y + U + V (3 planes)
//YV12: Y + V + U (3 planes)
sdlTexture = SDL_CreateTexture(sdlRenderer, SDL_PIXELFORMAT_IYUV, SDL_TEXTUREACCESS_STREAMING,
  pVideoCodecCtx->width, pVideoCodecCtx->height);

sdlRect.x=0;
sdlRect.y=0;
sdlRect.w=screen_w;
sdlRect.h=screen_h;

//SDL_AudioSpec
SDL_AudioSpec AudioSpec;
AudioSpec.freq = out_sample_rate;
AudioSpec.format = AUDIO_S16SYS;
AudioSpec.channels = out_channels;
AudioSpec.silence = 0;
AudioSpec.samples = out_nb_samples;
AudioSpec.callback = fill_audio;
AudioSpec.userdata = pAudioCodecCtx;

if (SDL_OpenAudio(&AudioSpec, NULL) < 0)
{
printf("can't open audio.\n");
return -1;
}

//pAudioPacket = (AVPacket *)av_malloc(sizeof(AVPacket));
//pVideoPacket = (AVPacket *)av_malloc(sizeof(AVPacket));
Packet = (AVPacket *)av_malloc(sizeof(AVPacket));
av_init_packet(Packet);

index = 0;
audioCnt = 0;
videoCnt = 0;
video_tid = SDL_CreateThread(sfp_refresh_thread, NULL, NULL);
SDL_PauseAudio(0);

while(1)
{
if(av_read_frame(pFormatCtx, Packet) < 0)
{
thread_exit = 1;
break;
}
if(Packet->stream_index == VideoIndex)
{
//printf("get video -------------> count:%d\n", videoCnt);
ret = avcodec_decode_video2(pVideoCodecCtx, pVideoFrame, &GotVideoPicture, Packet);
if(ret < 0)
{
printf("Video Decode Error.\n");
return -1;
}
if(GotVideoPicture)
{
sws_scale(VideoConvertCtx, (const unsigned char* const*)pVideoFrame->data,
 pVideoFrame->linesize, 0, pVideoCodecCtx->height, pFrameYUV->data, pFrameYUV->linesize);
printf("Video cnt:%5d\t pts:%ld\t packet size:%d, pFrame->nb_samples:%d\n",
videocnt, Packet->pts, Packet->size, pVideoFrame->nb_samples);
//SDL---------------------------
SDL_UpdateTexture( sdlTexture, NULL, pFrameYUV->data[0], pFrameYUV->linesize[0] );
SDL_RenderClear( sdlRenderer );
//SDL_RenderCopy( sdlRenderer, sdlTexture, &sdlRect, &sdlRect );
SDL_RenderCopy( sdlRenderer, sdlTexture, NULL, NULL);
SDL_RenderPresent( sdlRenderer );
SDL_Delay(40);
videocnt++;
//SDL End-----------------------
}
}

if(Packet->stream_index == AudioIndex)
{
//printf("get audio count:%d\n", audioCnt);
ret = avcodec_decode_audio4( pAudioCodecCtx, pAudioFrame,&GotAudioPicture, Packet);
if ( ret < 0 )
{
printf("Error in decoding audio frame.\n");
return -1;
}
if ( GotAudioPicture > 0 )
{
swr_convert(AudioConvertCtx,&pAudioOutBuffer, MAX_AUDIO_FRAME_SIZE,
(const uint8_t **)pAudioFrame->data , pAudioFrame->nb_samples);
printf("Auduo cnt:%5d\t pts:%ld\t packet size:%d, pFrame->nb_samples:%d\n",
audiocnt, Packet->pts, Packet->size, pAudioFrame->nb_samples);

audiocnt++;
}

while(audio_len > 0)//Wait until finish
SDL_Delay(1);

//Set audio buffer (PCM data)
audio_chunk = (Uint8 *) pAudioOutBuffer;
//Audio buffer length
audio_len = out_buffer_size;
audio_pos = audio_chunk;
}
av_free_packet(Packet);
//SDL_WaitEvent(&event);
//if(event.type == SFM_REFRESH_VIDEO_EVENT)
//{
//
//}
//else if(event.type == SDL_KEYDOWN)
//{
////Pause
//if(event.key.keysym.sym == SDLK_SPACE)
//{
//thread_pause = !thread_pause;
//printf("video got pause event!\n");
//}
//}
//else if(event.type == SDL_QUIT)
//{
//thread_exit = 1;
//}
//else if(event.type==SFM_BREAK_EVENT)
//{
//break;
//}
}

swr_free(&AudioConvertCtx);
sws_freeContext(VideoConvertCtx);

av_free(pVideoOutBuffer);
av_frame_free(&pFrameYUV);
av_frame_free(&pVideoFrame);
avcodec_close(pVideoCodecCtx);

SDL_CloseAudio();//Close SDL
SDL_Quit();

av_free(pAudioOutBuffer);
av_frame_free(&pAudioFrame);
avcodec_close(pAudioCodecCtx);
avformat_close_input(&pFormatCtx);
}

makefile

TARGET = ffmpeg_sdl2_av_play
LIB_PATH = /usr/local/lib/
 
FFMPEG_LIBS = -lavutil -lavdevice -lavformat -lavcodec -lswresample -lavfilter -lswscale 
SDL_LIBS= -lSDL2
EXTRA_LIBS = -lz -lm -lpthread -lstdc++ -lm -lrt -lpcre
ALL_LIBS= $(EXTRA_LIBS) $(SDL_LIBS) $(FFMPEG_LIBS) 
 
COMPILE_OPTS = -v -g -Wall -Wno-deprecated-declarations 

C_COMPILER  = g++
C_FLAGS = $(CFLAGS) $(COMPILE_OPTS) 
LD_FLAGS = -L$(LIB_PATH) $(LDFLAGS)

SRC = ffmpeg_sdl2_av_play.cpp

ALL:
$(C_COMPILER) $(C_FLAGS) $(LD_FLAGS) $(SRC) -o $(TARGET) $(ALL_LIBS) 
clean:
rm -rf $(TARGET) *.o *.mp4 *.wav *.h264 *.avi *.flv 

问题

以上代码不可避免的出现了音视频播放同步的问题,经过短暂的研究,发现发现播放同步内容也不少,因此就在下一篇解决音视频播放同步的问题,并总结同步的原理,与解决的方法。
下一篇将研究如何根据文件信息,获取视频帧率,音频信息,时间戳pts信息等,然后说明如何进行音视频播放同步。