925 lines
		
	
	
		
			35 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			925 lines
		
	
	
		
			35 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/env python3
 | |
| 
 | |
| import argparse
 | |
| import locale
 | |
| import re
 | |
| from sys import exit
 | |
| from datetime import datetime,timedelta,time
 | |
| import coloredlogs, logging
 | |
| from functools import cmp_to_key
 | |
| from subprocess import Popen, PIPE
 | |
| from os import read, write, lseek, pipe, set_inheritable, memfd_create, SEEK_SET, close, unlink
 | |
| import os.path
 | |
| from io import BytesIO, TextIOWrapper
 | |
| import json
 | |
| from enum import Enum, IntEnum, unique, auto
 | |
| import shutil
 | |
| from tqdm import tqdm, trange
 | |
| from select import select
 | |
| from math import floor, ceil, log
 | |
| from shutil import copyfile, which
 | |
| 
 | |
| # Useful SPS/PPS discussion
 | |
| # TODO: improve situation of SPS and PPS header mismatch when merging MVK with mkvmerge to remove warnings.
 | |
| # https://copyprogramming.com/howto/including-sps-and-pps-in-a-raw-h264-track
 | |
| # https://gitlab.com/mbunkus/mkvtoolnix/-/issues/2390
 | |
| 
 | |
| 
 | |
| def checkRequiredTools():
 | |
|     logger = logging.getLogger(__name__)
 | |
|     required = ['ffmpeg', 'ffprobe', 'mkvmerge']
 | |
|     optional = ['mkvextract', 'vobsubocr']
 | |
|     for tool in required:
 | |
|         if which(tool) == None:
 | |
|             logger.error('Required tool: %s is missing.' % tool)
 | |
|             exit(-1)
 | |
|     for tool in optional:
 | |
|         if which(tool) == None:
 | |
|             logger.info('Optional tool: %s is missing.' % tool)
 | |
| 
 | |
| 
 | |
| @unique
 | |
| class SupportedFormat(IntEnum):
 | |
|     TS = 1
 | |
|     MP4 = 2
 | |
|     Matroska = 3
 | |
| 
 | |
|     def __str__(self):
 | |
|         if self is SupportedFormat.TS:
 | |
|             return 'mpegts'
 | |
|         elif self is SupportedFormat.MP4:
 | |
|             return 'mov,mp4,m4a,3gp,3g2,mj2'
 | |
|         elif self is SupportedFormat.Matroska:
 | |
|             return 'matroska,webm'
 | |
|         else:
 | |
|             return 'Unsupported format'
 | |
| 
 | |
| # Extract SPS/PPS 
 | |
| # https://gitlab.com/mbunkus/mkvtoolnix/-/issues/2390
 | |
| # ffmpeg -i <InputFile (before concatenation)> -c:v copy -an -sn -bsf:v trace_headers -t 0.01 -report -loglevel 0 -f null -
 | |
| 
 | |
| 
 | |
| def getFormat(inputFile):
 | |
|     logger = logging.getLogger(__name__)
 | |
|         
 | |
|     infd = inputFile.fileno()
 | |
|     inputFile.seek(0,0)
 | |
|     set_inheritable(infd, True)
 | |
|     with Popen(['ffprobe', '-loglevel', 'quiet', '-show_format', '-of', 'json', '-i', '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as ffprobe:
 | |
|         out, _ = ffprobe.communicate()
 | |
|         out = json.load(BytesIO(out))
 | |
|         if 'format' in out:
 | |
|             return out['format']
 | |
|         else:
 | |
|             logger.error('Impossible to retrieve format of file')
 | |
| 
 | |
|     return None
 | |
| 
 | |
| def getStreams(inputFile):
 | |
|     logger = logging.getLogger(__name__)
 | |
|         
 | |
|     infd = inputFile.fileno()
 | |
|     inputFile.seek(0,0)
 | |
|     set_inheritable(infd, True)
 | |
|     with Popen(['ffprobe', '-loglevel', 'quiet', '-show_streams', '-of', 'json', '-i', '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as ffprobe:
 | |
|         out, _ = ffprobe.communicate()
 | |
|         out = json.load(BytesIO(out))
 | |
|         if 'streams' in out:
 | |
|             return out['streams']
 | |
|         else:
 | |
|             logger.error('Impossible to retrieve streams inside file')
 | |
|     
 | |
|     return None
 | |
| 
 | |
| def parseTimestamp(ts):
 | |
|     logger = logging.getLogger(__name__)
 | |
|     
 | |
|     tsRegExp = '^(?P<hour>[0-9]{1,2}):(?P<minute>[0-9]{1,2}):(?P<second>[0-9]{1,2})(\.(?P<us>[0-9]{1,6}))?$'
 | |
|     p = re.compile(tsRegExp)
 | |
|     m = p.match(ts)
 | |
|     if m == None:
 | |
|         logger.error("Impossible to parse timestamp: %s" % ts)
 | |
|         return None
 | |
|   
 | |
|     values = m.groupdict()
 | |
|     hour = 0
 | |
|     minute = 0
 | |
|     second = 0
 | |
|     us = 0
 | |
|     if values['hour'] != None:
 | |
|         hour = int(values['hour'])
 | |
|     if values['minute'] != None:
 | |
|         minute = int(values['minute'])
 | |
|     if values['second'] != None:
 | |
|         second = int(values['second'])
 | |
|     if values['us'] != None:
 | |
|         us = int(values['us'])
 | |
|   
 | |
|     if hour < 0 or hour > 23:
 | |
|         logger.error("hour must be in [0,24[")
 | |
|         return None
 | |
|     if minute < 0 or minute > 59:
 | |
|         logger.error("minute must be in [0,60[")
 | |
|         return None
 | |
|     if second < 0 or second > 59:
 | |
|         logger.error("second must be in [0,60[")
 | |
|         return None
 | |
|     if us < 0 or us > 1000000:
 | |
|         logger.error("milliseconds must be in [0,1000000[")
 | |
|         return None
 | |
|     
 | |
|     ts = timedelta(hours=hour, minutes=minute, seconds=second, microseconds=us)
 | |
| 
 | |
|     return ts
 | |
| 
 | |
| def parseTimeInterval(interval):
 | |
|     logger = logging.getLogger(__name__)
 | |
|     
 | |
|     intervalRegExp = '^(?P<hour1>[0-9]{1,2}):(?P<minute1>[0-9]{1,2}):(?P<second1>[0-9]{1,2})(\.(?P<ms1>[0-9]{1,3}))?-(?P<hour2>[0-9]{1,2}):(?P<minute2>[0-9]{1,2}):(?P<second2>[0-9]{1,2})(\.(?P<ms2>[0-9]{1,3}))?$'
 | |
|     p = re.compile(intervalRegExp)
 | |
|     m = p.match(interval)
 | |
|     if m == None:
 | |
|         logger.error("Impossible to parse time interval")
 | |
|         return None
 | |
|   
 | |
|     values = m.groupdict()
 | |
|     hour1 = 0
 | |
|     minute1 = 0
 | |
|     second1 = 0
 | |
|     ms1 = 0
 | |
|     hour2 = 0
 | |
|     minute2 = 0
 | |
|     second2 = 0
 | |
|     ms2 = 0
 | |
|     if values['hour1'] != None:
 | |
|         hour1 = int(values['hour1'])
 | |
|     if values['minute1'] != None:
 | |
|         minute1 = int(values['minute1'])
 | |
|     if values['second1'] != None:
 | |
|         second1 = int(values['second1'])
 | |
|     if values['ms1'] != None:
 | |
|         ms1 = int(values['ms1'])
 | |
|     if values['hour2'] != None:
 | |
|         hour2 = int(values['hour2'])
 | |
|     if values['minute2'] != None:
 | |
|         minute2 = int(values['minute2'])
 | |
|     if values['second2'] != None:
 | |
|         second2 = int(values['second2'])
 | |
|     if values['ms2'] != None:
 | |
|         ms2 = int(values['ms2'])
 | |
|   
 | |
|     if hour1 < 0 or hour1 > 23:
 | |
|         logger.error("hour must be in [0,24[")
 | |
|         return None, None
 | |
|     if minute1 < 0 or minute1 > 59:
 | |
|         logger.error("minute must be in [0,60[")
 | |
|         return None, None
 | |
|     if second1 < 0 or second1 > 59:
 | |
|         logger.error("second must be in [0,60[")
 | |
|         return None, None
 | |
|     if ms1 < 0 or ms1 > 1000:
 | |
|         logger.error("milliseconds must be in [0,1000[")
 | |
|         return None, None
 | |
|   
 | |
|     if hour2 < 0 or hour2 > 23:
 | |
|         logger.error("hour must be in [0,24[")
 | |
|         return None, None
 | |
|     if minute2 < 0 or minute2 > 59:
 | |
|         logger.error("minute must be in [0,60[")
 | |
|         return None, None
 | |
|     if second2 < 0 or second2 > 59:
 | |
|         logger.error("second must be in [0,60[")
 | |
|         return None, None
 | |
|     if ms2 < 0 or ms2 > 1000:
 | |
|         logger.error("milliseconds must be in [0,1000[")
 | |
|         return None, None
 | |
|   
 | |
|     ts1 = timedelta(hours=hour1, minutes=minute1, seconds=second1, microseconds=ms1*1000)
 | |
|     ts2 = timedelta(hours=hour2, minutes=minute2, seconds=second2, microseconds=ms2*1000)
 | |
|   
 | |
|     if ts2 < ts1:
 | |
|         logger.error("Non monotonic interval")
 | |
|         return None,None
 | |
|     
 | |
|     return (ts1, ts2)
 | |
| 
 | |
| def compareTimeInterval(interval1, interval2):
 | |
|     ts11,ts12 = interval1
 | |
|     ts21,ts22 = interval2
 | |
|     
 | |
|     if ts12 < ts21:
 | |
|         return -1
 | |
|     elif ts22 < ts11:
 | |
|         return 1
 | |
|     else:
 | |
|         return 0
 | |
| 
 | |
| 
 | |
| 
 | |
| def ffmpegConvert(inputFile, inputFormat, outputFile, outputFormat, duration):
 | |
|     logger = logging.getLogger(__name__)
 | |
|     
 | |
|     infd = inputFile.fileno()
 | |
|     outfd = outputFile.fileno()
 | |
|     set_inheritable(infd, True)
 | |
|     set_inheritable(outfd, True)
 | |
|     # TODO: canvas size to be fixed !
 | |
|     with Popen(['ffmpeg', '-y', '-loglevel', 'quiet', '-progress', '/dev/stdout', '-canvas_size', '720x560', '-f', inputFormat, '-i', '/proc/self/fd/%d' % infd, 
 | |
|                 '-map', '0:v', '-map', '0:a', '-map', '0:s', '-bsf:v', 'h264_mp4toannexb,dump_extra=freq=keyframe', '-vcodec', 'copy', '-acodec', 'copy', '-scodec', 'dvdsub', 
 | |
|                 '-f', outputFormat, '/proc/self/fd/%d' % outfd], stdout=PIPE, close_fds=False) as ffmpeg:
 | |
|         pb = tqdm(TextIOWrapper(ffmpeg.stdout, encoding="utf-8"), total=int(duration/timedelta(seconds=1)), unit='s', desc='Conversion')
 | |
|         for line in pb:
 | |
|             if line.startswith('out_time='):
 | |
|                 ts = line.split('=')[1].strip()
 | |
|                 ts = parseTimestamp(ts)
 | |
|                 pb.n = int(ts/timedelta(seconds=1))
 | |
|                 pb.update()
 | |
|         status = ffmpeg.wait()
 | |
|         if status != 0:
 | |
|             logger.error('Conversion failed with status code: %d' % status)
 | |
|         
 | |
| 
 | |
| def getFramesInStream(inputFile, begin, end, streamKind, subStreamId=0):
 | |
|     logger = logging.getLogger(__name__)
 | |
|     infd = inputFile.fileno()
 | |
|     set_inheritable(infd, True)
 | |
|     
 | |
|     with Popen(['ffprobe', '-loglevel', 'quiet', '-read_intervals', ('%s%%%s' %(begin, end)), '-show_entries', 'frame', '-select_streams', '%s:%d' % (streamKind, subStreamId), '-of', 'json', '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as ffprobe:
 | |
|         out, _ = ffprobe.communicate()
 | |
|         frames = json.load(BytesIO(out))
 | |
|         status = ffprobe.wait()
 | |
|         if status != 0:
 | |
|             logger.error('ffprobe failed with status code: %d' % status)
 | |
|             return None
 | |
|         res = []
 | |
|         if 'frames' in frames:
 | |
|             frames = frames['frames']
 | |
|             for frame in frames:
 | |
|                 if 'pts_time' in frame:
 | |
|                     pts_time = float(frame['pts_time'])
 | |
|                 elif 'pkt_pts_time' in frame:
 | |
|                     pts_time = float(frame['pkt_pts_time'])
 | |
|                 else:
 | |
|                     logger.error('Impossible to find timestamp of frame %s' % frame)
 | |
|                     return None
 | |
|                 
 | |
|                 ts = timedelta(seconds=pts_time)
 | |
|                 if begin <= ts and ts <= end:
 | |
|                     res.append(frame)
 | |
|             return res
 | |
|         else:
 | |
|            logger.error('Impossible to retrieve frames inside file around [%s,%s]' % (begin, end))
 | |
|            return None
 | |
| 
 | |
| def getNearestIFrame(inputFile, timestamp, before=True, delta=timedelta(seconds=2)):
 | |
|     logger = logging.getLogger(__name__)
 | |
|     
 | |
|     zero = timedelta()
 | |
|     tbegin = timestamp-delta
 | |
|     tend = timestamp+delta
 | |
|     if tbegin < zero:
 | |
|         tbegin = zero
 | |
|   
 | |
|     infd = inputFile.fileno()
 | |
|     set_inheritable(infd, True)
 | |
|     
 | |
|     logger.debug('Looking for iframe in [%s, %s]' % (tbegin, tend))
 | |
|     
 | |
|     frames = getFramesInStream(inputFile=inputFile, begin=tbegin, end=tend, streamKind='v')
 | |
|     if frames == None:
 | |
|         return None
 | |
|     
 | |
|     iframes = []
 | |
|     for frame in frames:
 | |
|         if frame['pict_type'] == 'I':
 | |
|             iframes.append(frame)
 | |
|     
 | |
|     found = False
 | |
|     for frame in iframes: 
 | |
|         if before and timedelta(seconds=float(frame['pts_time'])) <= timestamp:
 | |
|             found = True
 | |
|             iframe = frame
 | |
|         if not before and timedelta(seconds=float(frame['pts_time'])) >= timestamp:
 | |
|             found = True
 | |
|             iframe = frame
 | |
|             break
 | |
| 
 | |
|     if found:
 | |
|         logger.info("Found i-frame at: %s" % iframe)
 | |
|         logger.debug("Found i-frame at %s" % iframe)
 | |
|         
 | |
|         its = timedelta(seconds=float(iframe['pts_time'])) 
 | |
|         nbFrames = 0
 | |
|         for frame in frames:
 | |
|             ts = timedelta(seconds=float(frame['pts_time'])) 
 | |
|             if before:
 | |
|                 if its <= ts and ts <= timestamp:
 | |
|                     logger.info("Retrieve a frame between %s and %s at %s" % (its, timestamp, ts))
 | |
|                     nbFrames = nbFrames+1
 | |
|             else:
 | |
|                 if timestamp <= ts and ts <= its:
 | |
|                     logger.info("Retrieve a frame between %s and %s at %s" % (ts, timestamp, its))
 | |
|                     nbFrames = nbFrames+1
 | |
|     else:
 | |
|         logger.error("Impossible to find I-frame around: %s" % timestamp)
 | |
|             
 | |
|     return(nbFrames, iframe)
 | |
| 
 | |
| def extractMKVPart(inputFile, outputFile, begin, end):
 | |
|     logger = logging.getLogger(__name__)
 | |
|     
 | |
|     logger.info('Extract video between I-frames at %s and %s' % (begin,end))
 | |
|     inputFile.seek(0,0)
 | |
|     outputFile.seek(0,0)
 | |
|     infd = inputFile.fileno()
 | |
|     outfd = outputFile.fileno()
 | |
|     set_inheritable(infd, True)
 | |
|     set_inheritable(outfd, True)
 | |
|     warnings = []
 | |
|     with Popen(['mkvmerge', '-o', '/proc/self/fd/%d' % outfd, '--split', 'parts:%s-%s' % (begin, end), '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as mkvmerge:
 | |
|         pb = tqdm(TextIOWrapper(mkvmerge.stdout, encoding="utf-8"), total=100, unit='%', desc='Extraction')
 | |
|         for line in pb:
 | |
|             if line.startswith('Progression :'):
 | |
|                 p = re.compile('^Progression : (?P<progress>[0-9]{1,3})%$')
 | |
|                 m = p.match(line)
 | |
|                 if m == None:
 | |
|                     logger.error('Impossible to parse progress')
 | |
|                 pb.n = int(m['progress'])
 | |
|                 pb.update()
 | |
|             elif line.startswith('Avertissement'):
 | |
|                 warnings.append(line)
 | |
|                 
 | |
|         status = mkvmerge.wait()
 | |
|         if status == 1:
 | |
|             logger.warning('Extraction returns warning')
 | |
|             for w in warnings:
 | |
|                 logger.warning(w)
 | |
|         elif status == 2:
 | |
|             logger.error('Extraction returns errors')
 | |
|         
 | |
|         
 | |
| def extractPictures(inputFile, begin, nbFrames, width=640, height=480):
 | |
|     logger = logging.getLogger(__name__)
 | |
|         
 | |
|     inputFile.seek(0,0)
 | |
|     infd = inputFile.fileno()
 | |
|     outfd = memfd_create('pictures', flags=0)
 | |
|     set_inheritable(outfd, True)
 | |
|     # PPM header
 | |
|     # "P6\nWIDTH HEIGHT\n255\n"
 | |
|     headerLen=2+1+ceil(log(width, 10))+1+ceil(log(height, 10))+1+3+1
 | |
|     logger.debug('Header length: %d' % headerLen)
 | |
|     imageLength = width*height*3+headerLen
 | |
|     length = imageLength*nbFrames
 | |
|     logger.debug("Estimated length: %d" % length)
 | |
|     
 | |
|     images = bytes()
 | |
|     with Popen(['ffmpeg', '-loglevel', 'quiet' ,'-y', '-ss', '%s'%begin, '-i', '/proc/self/fd/%d' % infd, '-s', '%dx%d'%(width, height), '-vframes', '%d'%nbFrames, '-c:v', 'ppm', '-f', 'image2pipe', '/proc/self/fd/%d' % outfd ], stdout=PIPE, close_fds=False) as ffmpeg:
 | |
|         status = ffmpeg.wait()
 | |
|         if status != 0:
 | |
|             logger.error('Conversion failed with status code: %d' % status)
 | |
|             return None, None
 | |
| 
 | |
|         lseek(outfd, 0, SEEK_SET)
 | |
|         images = read(outfd,length)
 | |
|         if len(images) != length:
 | |
|             logger.info("Received %d bytes but %d were expected." % (len(images), length))          
 | |
|             return None, None
 | |
|     
 | |
|     lseek(outfd, 0, SEEK_SET)
 | |
|     return images, outfd
 | |
| 
 | |
| def extractSound(inputFile, begin, outputFileName, packetDuration, subChannel=0, nbPackets=0, sampleRate=48000, nbChannels=2):
 | |
|     logger = logging.getLogger(__name__)
 | |
|     
 | |
|     inputFile.seek(0,0)
 | |
|     outfd = memfd_create(outputFileName, flags=0)
 | |
|     infd = inputFile.fileno()
 | |
|     set_inheritable(infd, True)
 | |
|     set_inheritable(outfd, True)
 | |
|     sound = bytes()
 | |
|     length = int(nbChannels*sampleRate*4*nbPackets*packetDuration/1000)
 | |
|     
 | |
|     with Popen(['ffmpeg', '-y', '-loglevel', 'quiet', '-ss', '%s'%begin, '-i', '/proc/self/fd/%d' % infd, '-frames:a:%d' % subChannel, '%d' % (nbPackets+1), 
 | |
|                 '-c:a', 'pcm_s32le', '-sample_rate', '%d' % sampleRate, '-channels', '%d' % nbChannels, '-f', 's32le', '/proc/self/fd/%d' % outfd], stdout=PIPE, close_fds=False) as ffmpeg:
 | |
|         status = ffmpeg.wait()
 | |
|         if status != 0:
 | |
|             logger.error('Sound extraction returns error code: %d' % status)
 | |
|             return None, None
 | |
|             
 | |
|         lseek(outfd, 0, SEEK_SET)
 | |
|         sound = read(outfd, length)
 | |
|         
 | |
|         if (len(sound) != length):
 | |
|             logger.info("Received %d bytes but %d were expected (channels=%d, freq=%d, packets=%d, duration=%d ms)." % (len(sound), length, nbChannels, sampleRate, nbPackets, packetDuration))          
 | |
|             return None, None
 | |
|         
 | |
|         return sound, outfd
 | |
| 
 | |
| def dumpPPM(pictures, prefix, temporaries):
 | |
|     logger = logging.getLogger(__name__)
 | |
|     
 | |
|     # "P6\nWIDTH HEIGHT\n255\n"    
 | |
|     pos = 0
 | |
|     picture = 0
 | |
|     while pos<len(pictures):
 | |
|         filename = '%s-%03d.ppm' % (prefix, picture)
 | |
|         header = BytesIO(pictures[pos:])
 | |
|         magic = header.readline().decode('utf8')
 | |
|         dimensions = header.readline().decode('utf8')
 | |
|         maxvalue = header.readline().decode('utf8')
 | |
|         if magic == 'P6\n':
 | |
|             pattern = re.compile('^(?P<width>[0-9]+) (?P<height>[0-9]+)\n$') 
 | |
|             m = pattern.match(dimensions)
 | |
|             if m != None:
 | |
|                 width = int(m['width'])
 | |
|                 height = int(m['height'])
 | |
|             else:
 | |
|                 logger.error('Impossible to parse dimensions of picture')
 | |
|                 return
 | |
|         else:
 | |
|             logger.error('Not a PPM picture')
 | |
|             return 
 | |
|         
 | |
|         headerLen=2+1+ceil(log(width, 10))+1+ceil(log(height, 10))+1+3+1
 | |
|         try:
 | |
|             out = open(filename, 'w')
 | |
|             outfd = out.fileno()
 | |
|         except IOError:
 | |
|             logger.error('Impossible to create file: %s' % filename)
 | |
|         temporaries.append(out)
 | |
|         
 | |
|         length=headerLen+3*width*height
 | |
|         nbBytes = 0
 | |
|         while nbBytes < length:
 | |
|             nbBytes+=write(outfd, pictures[pos+nbBytes:pos+length])
 | |
|         pos+=length
 | |
|         picture+=1
 | |
|         
 | |
| def extractAllStreams(inputFile, begin, end, streams, filesPrefix, nbFrames, width, height, temporaries, dumpMemFD=False):
 | |
|     logger = logging.getLogger(__name__)
 | |
|     encoderParams = [ 'ffmpeg', '-y', '-loglevel', 'quiet' ]
 | |
|     inputParams = []
 | |
|     codecsParams = []
 | |
|     
 | |
|     if begin < end:
 | |
|         videoID=0
 | |
|         audioID=0
 | |
|         subTitleID=0
 | |
|         memfds = []
 | |
|         for stream in streams:
 | |
|             if stream['codec_type'] == 'video':
 | |
|                 logger.info("Extracting video stream v:%d" % videoID)
 | |
|                 frameRate = stream['r_frame_rate']
 | |
|                 pattern = re.compile('^(?P<numerator>[0-9]+)/(?P<denominator>[0-9]+)$') 
 | |
|                 m = pattern.match(frameRate)
 | |
|                 if m != None:
 | |
|                     frameRate = float(m['numerator']) / float(m['denominator'])
 | |
|                 sar = stream['sample_aspect_ratio']
 | |
|                 dar = stream['display_aspect_ratio']
 | |
|                 pixelFormat = stream['pix_fmt']
 | |
|                 colorRange = stream['color_range']
 | |
|                 colorSpace =stream['color_space']
 | |
|                 colorTransfer = stream['color_transfer']
 | |
|                 colorPrimaries = stream['color_primaries']
 | |
|                 # TODO: do something with these informations
 | |
|                 # When interlaced TOP or BOTTOM 
 | |
|                 #  -top 1 -flags:v +ilme+ildct
 | |
|                 #  -top 0 -flags:v +ilme+ildct
 | |
|                 chromaLocation = stream['chroma_location']
 | |
|                 fieldOrder = stream['field_order']
 | |
|                 # ======================================= #
 | |
|                 # TODO: adjust SAR and DAR
 | |
|                 # https://superuser.com/questions/907933/correct-aspect-ratio-without-re-encoding-video-file
 | |
|                 codec = stream['codec_name']
 | |
|                 imagesBytes, memfd = extractPictures(inputFile=inputFile, begin=begin, nbFrames=nbFrames, width=width, height=height)
 | |
|                 if imagesBytes == None:
 | |
|                     exit(-1)
 | |
|                 
 | |
|                 memfds.append(memfd)
 | |
|                 
 | |
|                 if dumpMemFD:
 | |
|                     dumpPPM(imagesBytes, '%s-%d' % (filesPrefix,videoID), temporaries)
 | |
|                 
 | |
|                 # We rewind to zero the memory file descriptor
 | |
|                 lseek(memfd, 0, SEEK_SET)
 | |
|                 set_inheritable(memfd, True)
 | |
| 
 | |
|                 inputParams.extend(['-framerate', '%f'%frameRate, '-f', 'image2pipe', '-i', '/proc/self/fd/%d' % memfd])
 | |
|                 codecsParams.extend(['-c:v:%d' % videoID, codec, '-pix_fmt', pixelFormat, '-colorspace:v:%d' % videoID, colorSpace, '-color_primaries:v:%d' % videoID, colorPrimaries, 
 | |
|                                      '-color_trc:v:%d' % videoID, colorTransfer,  '-color_range:v:%d' % videoID, colorRange])
 | |
|                 videoID=videoID+1
 | |
|             elif stream['codec_type'] == 'audio':
 | |
|                 logger.info("Extracting audio stream: a:%d" % audioID)
 | |
|                 sampleRate = int(stream['sample_rate'])
 | |
|                 nbChannels = int(stream['channels'])
 | |
|                 bitRate = int(stream['bit_rate'])
 | |
|                 codec = stream['codec_name']
 | |
|                 if 'tags' in stream:
 | |
|                     if 'language' in stream['tags']:
 | |
|                         codecsParams.extend(['-metadata:s:a:%d' % audioID, 'language=%s' % stream['tags']['language']])
 | |
|                 packets = getFramesInStream(inputFile=inputFile, begin=begin, end=end, streamKind='a', subStreamId=audioID)
 | |
|                 nbPackets = len(packets)
 | |
|                 logger.debug("Found %d packets to be extracted from audio track." % nbPackets)
 | |
|                 if(nbPackets > 0):
 | |
|                     packetDuration = packets[0]['duration']
 | |
|                 
 | |
|                 tmpname = '%s-%d.pcm' % (filesPrefix,audioID)
 | |
|                 
 | |
|                 soundBytes , memfd = extractSound(inputFile=inputFile, begin=begin, nbPackets=nbPackets, packetDuration=packetDuration, outputFileName=tmpname, sampleRate=sampleRate, nbChannels=nbChannels)
 | |
|                 
 | |
|                 if soundBytes == None:
 | |
|                     exit(-1)
 | |
|                     
 | |
|                 memfds.append(memfd)
 | |
|                 
 | |
|                 if dumpMemFD:
 | |
|                     try:
 | |
|                         output = open(tmpname,'w')
 | |
|                     except IOError:
 | |
|                         logger.error('Impossible to create file: %s' % tmpname)
 | |
|                         return None
 | |
| 
 | |
|                     outfd = output.fileno()
 | |
|                     pos = 0
 | |
|                     while pos < len(soundBytes):
 | |
|                         pos+=write(outfd, soundBytes[pos:])
 | |
|                     temporaries.append(output)
 | |
|                 
 | |
|                 # We rewind to zero the memory file descriptor
 | |
|                 lseek(memfd, 0, SEEK_SET)
 | |
|                 set_inheritable(memfd, True)
 | |
|             
 | |
|                 inputParams.extend(['-f', 's32le', '-ar', '%d'%sampleRate, '-ac', '%d'%nbChannels, '-i', '/proc/self/fd/%d' % memfd])
 | |
|                 codecsParams.extend(['-c:a:%d' % audioID, codec, '-b:a:%d' % audioID, '%d' % bitRate])
 | |
|                 audioID=audioID+1
 | |
|             elif stream['codec_type'] == 'subtitle':
 | |
|                 logger.info("Extracting a subtitle stream: s:%d" % subTitleID)
 | |
|                 codec = stream['codec_name']
 | |
|                 inputParams.extend(['-i', './empty.idx'])
 | |
|                 if 'tags' in stream:
 | |
|                     if 'language' in stream['tags']:
 | |
|                         codecsParams.extend(['-metadata:s:s:%d' % subTitleID, 'language=%s' % stream['tags']['language']])
 | |
|                 codecsParams.extend(['-c:s:%d' % subTitleID, 'copy'])
 | |
|                 subTitleID=subTitleID+1
 | |
|             else:
 | |
|                 logger.error("Unknown stream type: %s" % stream['codec_type'])
 | |
| 
 | |
|         # Create a new MKV movie with all streams that have been extracted.
 | |
|         encoderParams.extend(inputParams)
 | |
|         for index in range(0,videoID+audioID+subTitleID):
 | |
|             encoderParams.extend(['-map', '%d' % index])
 | |
|         encoderParams.extend(codecsParams)
 | |
|         fileName = '%s.mkv' % filesPrefix
 | |
|         try:
 | |
|             output = open(fileName,'w')
 | |
|         except IOError:
 | |
|             logger.error('Impossible to create file: %s' % fileName)
 | |
|             return None
 | |
|             
 | |
|         outfd = output.fileno()
 | |
|         set_inheritable(outfd, True)
 | |
|         # TODO: manage interlaced to previous parameters.
 | |
|         encoderParams.extend(['-top', '1', '-flags:v', '+ilme+ildct', '-bsf:v', 'h264_mp4toannexb,dump_extra=freq=keyframe', '-f', 'matroska', '/proc/self/fd/%d' % outfd])
 | |
|         
 | |
|         logger.info('Encoding video: %s' % fileName)
 | |
|         with Popen(encoderParams, stdout=PIPE, close_fds=False) as ffmpeg:
 | |
|             status = ffmpeg.wait()
 | |
|             if status != 0:
 | |
|                 logger.error('Encoding failed with status code: %d' % status)
 | |
|                 return None
 | |
|         
 | |
|             temporaries.append(output)
 | |
|         
 | |
|         for memfd in memfds:
 | |
|             close(memfd)
 | |
|         
 | |
|         return output
 | |
|         
 | |
|     else:
 | |
|         # Nothing to be done. We are already at a i-frame boundary.
 | |
|         return None
 | |
| 
 | |
| # Merge a list of mkv files passed as input, and produce a new MKV as output
 | |
| def mergeMKVs(inputs, outputName):
 | |
|     logger = logging.getLogger(__name__)
 | |
|     
 | |
|     fds = []
 | |
|     try:
 | |
|         out = open(outputName, 'w')
 | |
|     except IOError:
 | |
|         logger.error('Impossible to create file: %s' % outputName)
 | |
|         return None
 | |
|     
 | |
|     outfd = out.fileno()
 | |
|     fds.append(outfd)
 | |
|     set_inheritable(outfd, True)
 | |
|         
 | |
|     mergeParams = ['mkvmerge']
 | |
|     first = True
 | |
|     for mkv in inputs:
 | |
|         if mkv !=None:
 | |
|             fd = mkv.fileno()
 | |
|             fds.append(fd)
 | |
|             set_inheritable(fd, True)
 | |
|             if first:
 | |
|                 mergeParams.append('/proc/self/fd/%d' % fd)
 | |
|                 first = False
 | |
|             else:
 | |
|                 mergeParams.append('+/proc/self/fd/%d' % fd)
 | |
|                 
 | |
|     mergeParams.extend(['-o', '/proc/self/fd/%d' % outfd])
 | |
|         
 | |
|     # We merge all files.
 | |
|     warnings = []
 | |
|     with Popen(mergeParams, stdout=PIPE, close_fds=False) as mkvmerge:
 | |
|         pb = tqdm(TextIOWrapper(mkvmerge.stdout, encoding="utf-8"), total=100, unit='%', desc='Merging')
 | |
|         for line in pb:
 | |
|             if line.startswith('Progression :'):
 | |
|                 p = re.compile('^Progression : (?P<progress>[0-9]{1,3})%$')
 | |
|                 m = p.match(line)
 | |
|                 if m == None:
 | |
|                     logger.error('Impossible to parse progress')
 | |
|                 pb.n = int(m['progress'])
 | |
|                 pb.update()
 | |
|             elif line.startswith('Avertissement'):
 | |
|                 warnings.append(line)
 | |
|                 
 | |
|         status = mkvmerge.wait()
 | |
|         if status == 1:
 | |
|             logger.warning('Extraction returns warning')
 | |
|             for w in warnings:
 | |
|                 logger.warning(w)
 | |
|         elif status == 2:
 | |
|             logger.error('Extraction returns errors')
 | |
|     
 | |
|     for fd in fds:
 | |
|         set_inheritable(fd, False)
 | |
|     
 | |
|     return out
 | |
| 
 | |
| def findSubtitlesTracks(filename):
 | |
|     # ffprobe -loglevel quiet -select_streams s -show_entries stream=index:stream_tags=language -of json corgi.ts
 | |
|     logger = logging.getLogger(__name__)
 | |
|     
 | |
|     with Popen(['ffprobe', '-i', filename, '-select_streams', 's', '-show_entries', 'stream=index:stream_tags=language', '-of', 'json'], stdout=PIPE, close_fds=False) as ffprobe:
 | |
|         out, _ = ffprobe.communicate()
 | |
|         out = json.load(BytesIO(out))
 | |
|         if 'streams' in out:
 | |
|             return out['streams']
 | |
|         else:
 | |
|             logger.error('Impossible to retrieve format of file')
 | |
|     pass
 | |
| 
 | |
| def extractSubTitleTrack(inputFileName, index, lang):
 | |
|     #  mkvextract video.mkv tracks position:nom [position:nom]
 | |
|     logger = logging.getLogger(__name__)
 | |
|     
 | |
|     with Popen(['mkvextract', inputFileName, 'tracks', '%d:%s' % (index,lang)], stdout=PIPE, close_fds=False) as mkvextract:
 | |
|         out, _ = mkvextract.communicate()
 | |
|         for lines in out:
 | |
|             logger.info(out)
 | |
| 
 | |
|     
 | |
|     
 | |
| def main():
 | |
|     logger = logging.getLogger(__name__)
 | |
|     coloredlogs.install()
 | |
|     locale.setlocale(locale.LC_ALL, 'fr_FR.UTF8')
 | |
|     parser = argparse.ArgumentParser()
 | |
|     parser.add_argument("-i", "--input", dest='inputFile', type=str, required=True, help="Input file to process (can be .ts, .mp4 or .mkv).")
 | |
|     parser.add_argument("-o", "--output", dest='outputFile', type=str, required=True, help="Output MKV file to produce.")
 | |
|     parser.add_argument("-p", "--part", dest='parts', nargs='+', required=False, action='append', metavar="hh:mm:ss[.mmm]-hh:mm:ss[.mmm]", help="Extract this exact part of the original file.")
 | |
|     parser.add_argument("-k", "--keep", action='store_true', help="Do not cleanup temporary files after processing.")
 | |
|     parser.add_argument("--dump-memory", action='store_true', dest='dump', help="For debug purpose, dump all memory mapping of headers (and trailers) before (after) each part. They are kept in memory only otherwise.")
 | |
|     parser.add_argument("-s","--srt", action='store_true', dest='srt', help="Dump subtitles ")
 | |
|   
 | |
|     args = parser.parse_args()
 | |
|     logger.debug("Arguments: %s" % args)
 | |
|     
 | |
|     checkRequiredTools()
 | |
| 
 | |
|     # Flatten args.parts
 | |
|     intervals = []
 | |
|     if args.parts != None:
 | |
|         for part in args.parts:
 | |
|             for subpart in part:
 | |
|                 intervals.append(subpart)
 | |
| 
 | |
|     parts=[]
 | |
|     # Parse each interval
 | |
|     for interval in intervals:
 | |
|         ts1, ts2 = parseTimeInterval(interval)
 | |
|         if ts1 == None or ts2 == None:
 | |
|             logger.error("Illegal time interval: %s" % interval)
 | |
|             exit(-1)
 | |
|         parts.append((ts1,ts2))
 | |
|     
 | |
|     # Sort intervals
 | |
|     parts.sort(key=cmp_to_key(compareTimeInterval))
 | |
|     
 | |
|     # Check that no intervals are overlapping
 | |
|     prevts = timedelta(0)
 | |
|     for part in parts:
 | |
|         ts1, ts2 = part
 | |
|         if prevts > ts1:
 | |
|             logger.error('Intervals are overlapping')
 | |
|             exit(-1)
 | |
|         prevts = ts2
 | |
| 
 | |
|     temporaries = []
 | |
| 
 | |
|     basename = os.path.splitext(os.path.basename(args.inputFile))[0]
 | |
|     mp4filename = basename+'.mp4'
 | |
|     mkvfilename = basename+'.mkv'
 | |
| 
 | |
|     try:
 | |
|         inputFile = open(args.inputFile, mode='r')
 | |
|     except IOError:
 | |
|         logger.error("Impossible to open %s" % args.inputFile)
 | |
|         exit(-1)
 | |
|     
 | |
|     formatOfFile = getFormat(inputFile)
 | |
|    
 | |
|     if formatOfFile == None:
 | |
|         exit(-1)
 | |
|    
 | |
|     duration = timedelta(seconds=float(formatOfFile['duration']))
 | |
|     logger.info("Durée de l'enregistrement: %s" % duration)
 | |
|     
 | |
|     found = False
 | |
|     for f in SupportedFormat:
 | |
|         if 'format_name' in formatOfFile:
 | |
|             if formatOfFile['format_name'] == str(f):
 | |
|                 found = True
 | |
|                 formatOfFile = f
 | |
|                 break
 | |
|         
 | |
|     if not found:
 | |
|         logger.error('Unsupported format of file')
 | |
| 
 | |
|     if formatOfFile == SupportedFormat.TS:
 | |
|         logger.info("Converting TS to MP4 (to fix timestamps).")
 | |
|         try:
 | |
|             with open(mp4filename, 'w') as mp4: 
 | |
|                 ffmpegConvert(inputFile, 'mpegts', mp4, 'mp4', duration)
 | |
|                 temporaries.append(mp4)
 | |
|                 logger.info("Converting MP4 to MKV.")
 | |
|                 try:
 | |
|                     mkv = open(mkvfilename, 'w')
 | |
|                 except IOError:
 | |
|                     logger.error('')
 | |
|                     
 | |
|                 ffmpegConvert(mp4, 'mp4', mkv, 'matroska', duration)
 | |
|                 temporaries.append(mkv)
 | |
|         except IOError:
 | |
|             logger.error('')
 | |
|             
 | |
|     elif formatOfFile == SupportedFormat.MP4:
 | |
|         logger.info("Converting MP4 to MKV")
 | |
|         try:
 | |
|             mkv = open(mkvfilename, 'w')
 | |
|         except IOError:
 | |
|             logger.error('')
 | |
|         ffmpegConvert(mp4, 'mp4', mkv, 'matroska', duration)
 | |
|         temporaries.append(mkv)
 | |
|     else:
 | |
|         logger.info("Already in MKV")
 | |
|         mkv = inputFile
 | |
|         
 | |
|     streams = getStreams(mkv)
 | |
|     
 | |
|     mainVideo = None
 | |
|     for stream in streams:
 | |
|         if stream['codec_type'] == 'video' and stream['disposition']['default'] == 1:
 | |
|             mainVideo = stream
 | |
|             width = stream['width']
 | |
|             height = stream['height']
 | |
|     
 | |
|     if mainVideo == None:
 | |
|         logger.error('Impossible to find main video stream.')
 | |
|         exit(-1)
 | |
|     
 | |
|     # Pour chaque portion
 | |
|     partnum = 0
 | |
|     mkvparts = []
 | |
|     checks = []
 | |
|     pos = timedelta()
 | |
|     
 | |
|     for ts1, ts2 in parts:
 | |
|         # Trouver l'estampille de la trame 'I' la plus proche (mais postérieure) au début de la portion.
 | |
|         # Trouver l'estampille de la trame 'I' la plus proche (mais antérieure) à la fin de la portion.
 | |
|         # On a alors
 | |
|         # debut  -----    trame  --------- trame  ---------  fin.
 | |
|         #  'B/P'  'B/P'*   'I'               'I'   'B/P'*   'B/P'
 | |
|         # Si la trame de début est déjà 'I', il n'y a rien à faire (idem pour la fin).
 | |
|         # Sinon on extrait les trames 'B' ou 'P' depuis le début jusqu'à la trame 'I' non incluse
 | |
|         
 | |
|         partnum = partnum + 1
 | |
|         
 | |
|         headFrames = getNearestIFrame(mkv, ts1, before=False)
 | |
|         if headFrames == None:
 | |
|             exit(-1)
 | |
|             
 | |
|         tailFrames = getNearestIFrame(mkv, ts2, before=True)
 | |
|         if tailFrames == None:
 | |
|             exit(-1)
 | |
|         
 | |
|         nbHeadFrames, headIFrame = headFrames
 | |
|         nbTailFrames, tailIFrame = tailFrames
 | |
|         
 | |
|         logger.info("Found %d frames between beginning of current part and first I-frame" % nbHeadFrames)
 | |
|         logger.info("Found %d frames between last I-frame and end of current part" % nbTailFrames)
 | |
|         
 | |
|         headIFrameTS = timedelta(seconds=float(headIFrame['pts_time']))
 | |
|         tailIFrameTS = timedelta(seconds=float(tailIFrame['pts_time']))
 | |
|         
 | |
|         checks.append(pos+headIFrameTS-ts1)
 | |
|         
 | |
|         subparts = []
 | |
|         
 | |
|         if nbHeadFrames > 0:
 | |
|             # We extract all frames between the beginning upto the frame that immediately preceeds the I-frame.
 | |
|             head = extractAllStreams(inputFile=mkv, begin=ts1, end=headIFrameTS, nbFrames=nbHeadFrames-1, filesPrefix='part-%d-head' % (partnum), streams=streams, width=width, height=height, temporaries=temporaries, dumpMemFD=args.dump)
 | |
|             subparts.append(head)
 | |
|         
 | |
|         # Creating MKV file that corresponds to current part between I-frames
 | |
|         try:
 | |
|             internal = open('part-%d-internal.mkv' % partnum, 'w')
 | |
|         except IOError:
 | |
|             logger.error('Impossible to create file: part-%d-internal.mkv' % partnum)
 | |
|             exit(-1)
 | |
|         temporaries.append(internal)
 | |
|         extractMKVPart(inputFile=mkv, outputFile=internal, begin=headIFrameTS, end=tailIFrameTS)
 | |
|         subparts.append(internal)
 | |
|         
 | |
|         if nbTailFrames > 0:
 | |
|             # We extract all frames between the I-frame (including it) upto the end.
 | |
|             tail = extractAllStreams(inputFile=mkv, begin=tailIFrameTS, end=ts2, nbFrames=nbTailFrames, filesPrefix='part-%d-tail' % (partnum), streams=streams, width=width, height=height, temporaries=temporaries, dumpMemFD=args.dump)
 | |
|             subparts.append(tail)
 | |
|         
 | |
|         logger.info('Merging: %s' % subparts)
 | |
|         part = mergeMKVs(inputs=subparts, outputName="part-%d.mkv" % partnum)
 | |
|         mkvparts.append(part)
 | |
|         temporaries.append(part)
 | |
|         
 | |
|         pos = pos+tailIFrameTS-ts1
 | |
|     
 | |
|     # We need to check the end also
 | |
|     checks.append(pos)
 | |
|     
 | |
|     nbParts = len(mkvparts)
 | |
|     if nbParts > 1:
 | |
|         logger.info('Merging: %s' % mkvparts)
 | |
|         mergeMKVs(inputs=mkvparts, outputName=args.outputFile)
 | |
|     elif nbParts == 1:
 | |
|         copyfile('part-1.mkv', args.outputFile)
 | |
|     else:
 | |
|         logger.info("Nothing else to do.")
 | |
|   
 | |
|     if args.srt:
 | |
|         logger.info("Find subtitles tracks and language.")
 | |
|         subtitles = findSubtitlesTracks(args.outputFile)
 | |
|         sts = {}
 | |
|         for subtitle in subtitles:
 | |
|             index = subtitle['index']
 | |
|             if 'tags' in subtitle:
 | |
|                 if 'language' in subtitle['tags']:
 | |
|                     lang = subtitle['tags']['language']
 | |
|                     if lang in sts:
 | |
|                         sts[lang].append(index)
 | |
|                     else:
 | |
|                         sts[lang] = [index]
 | |
|                 else:
 | |
|                     logger.error("Dropping subtitle: %s because it is missing language indication")
 | |
|             else:
 | |
|                 logger.error("Dropping subtitle: %s because it is missing language indication")
 | |
|      
 | |
|         for lang in sts:
 | |
|             indexes = sts[lang]
 | |
|             if len(indexes) == 0:
 | |
|                 # Nothing to do. This should not happen.
 | |
|                 continue
 | |
|             if len(indexes) == 1:
 | |
|                 index = indexes[0]
 | |
|                 filename = 'essai-%s.srt' % lang
 | |
|             elif len(indexes) > 1:
 | |
|                 nbsrt = 1
 | |
|                 for index in indexes:
 | |
|                     filename = 'essai-%s-%d.srt' % (lang, nbsrt)
 | |
|                     nbsrt+=1
 | |
|   
 | |
|     if not args.keep:
 | |
|         logger.info("Cleaning temporary files")
 | |
|         for f in temporaries:
 | |
|             path = os.path.realpath(f.name)
 | |
|             logger.info("Removing: %s" % path)
 | |
|             f.close()
 | |
|             unlink(path)
 | |
|   
 | |
|     for c in checks:
 | |
|         logger.info("Please check cut smoothness at: %s" % c)
 | |
|     
 | |
| if __name__ == "__main__":
 | |
|     main()
 |