924 lines
35 KiB
Python
Executable File
924 lines
35 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
import argparse
|
|
import locale
|
|
import re
|
|
from sys import exit
|
|
from datetime import datetime,timedelta,time
|
|
import coloredlogs, logging
|
|
from functools import cmp_to_key
|
|
from subprocess import Popen, PIPE
|
|
from os import read, write, lseek, pipe, set_inheritable, memfd_create, SEEK_SET, close, unlink
|
|
import os.path
|
|
from io import BytesIO, TextIOWrapper
|
|
import json
|
|
from enum import Enum, IntEnum, unique, auto
|
|
import shutil
|
|
from tqdm import tqdm, trange
|
|
from select import select
|
|
from math import floor, ceil, log
|
|
from shutil import copyfile, which
|
|
|
|
# Useful SPS/PPS discussion
|
|
# TODO: improve situation of SPS and PPS header mismatch when merging MVK with mkvmerge to remove warnings.
|
|
# https://copyprogramming.com/howto/including-sps-and-pps-in-a-raw-h264-track
|
|
# https://gitlab.com/mbunkus/mkvtoolnix/-/issues/2390
|
|
|
|
|
|
def checkRequiredTools():
|
|
logger = logging.getLogger(__name__)
|
|
required = ['ffmpeg', 'ffprobe', 'mkvmerge']
|
|
optional = ['mkvextract', 'vobsubocr']
|
|
for tool in required:
|
|
if which(tool) == None:
|
|
logger.error('Required tool: %s is missing.' % tool)
|
|
exit(-1)
|
|
for tool in optional:
|
|
if which(tool) == None:
|
|
logger.info('Optional tool: %s is missing.' % tool)
|
|
|
|
|
|
@unique
|
|
class SupportedFormat(IntEnum):
|
|
TS = 1
|
|
MP4 = 2
|
|
Matroska = 3
|
|
|
|
def __str__(self):
|
|
if self is SupportedFormat.TS:
|
|
return 'mpegts'
|
|
elif self is SupportedFormat.MP4:
|
|
return 'mov,mp4,m4a,3gp,3g2,mj2'
|
|
elif self is SupportedFormat.Matroska:
|
|
return 'matroska,webm'
|
|
else:
|
|
return 'Unsupported format'
|
|
|
|
# Extract SPS/PPS
|
|
# https://gitlab.com/mbunkus/mkvtoolnix/-/issues/2390
|
|
# ffmpeg -i <InputFile (before concatenation)> -c:v copy -an -sn -bsf:v trace_headers -t 0.01 -report -loglevel 0 -f null -
|
|
|
|
|
|
def getFormat(inputFile):
|
|
logger = logging.getLogger(__name__)
|
|
|
|
infd = inputFile.fileno()
|
|
inputFile.seek(0,0)
|
|
set_inheritable(infd, True)
|
|
with Popen(['ffprobe', '-loglevel', 'quiet', '-show_format', '-of', 'json', '-i', '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as ffprobe:
|
|
out, _ = ffprobe.communicate()
|
|
out = json.load(BytesIO(out))
|
|
if 'format' in out:
|
|
return out['format']
|
|
else:
|
|
logger.error('Impossible to retrieve format of file')
|
|
|
|
return None
|
|
|
|
def getStreams(inputFile):
|
|
logger = logging.getLogger(__name__)
|
|
|
|
infd = inputFile.fileno()
|
|
inputFile.seek(0,0)
|
|
set_inheritable(infd, True)
|
|
with Popen(['ffprobe', '-loglevel', 'quiet', '-show_streams', '-of', 'json', '-i', '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as ffprobe:
|
|
out, _ = ffprobe.communicate()
|
|
out = json.load(BytesIO(out))
|
|
if 'streams' in out:
|
|
return out['streams']
|
|
else:
|
|
logger.error('Impossible to retrieve streams inside file')
|
|
|
|
return None
|
|
|
|
def parseTimestamp(ts):
|
|
logger = logging.getLogger(__name__)
|
|
|
|
tsRegExp = '^(?P<hour>[0-9]{1,2}):(?P<minute>[0-9]{1,2}):(?P<second>[0-9]{1,2})(\.(?P<us>[0-9]{1,6}))?$'
|
|
p = re.compile(tsRegExp)
|
|
m = p.match(ts)
|
|
if m == None:
|
|
logger.error("Impossible to parse timestamp: %s" % ts)
|
|
return None
|
|
|
|
values = m.groupdict()
|
|
hour = 0
|
|
minute = 0
|
|
second = 0
|
|
us = 0
|
|
if values['hour'] != None:
|
|
hour = int(values['hour'])
|
|
if values['minute'] != None:
|
|
minute = int(values['minute'])
|
|
if values['second'] != None:
|
|
second = int(values['second'])
|
|
if values['us'] != None:
|
|
us = int(values['us'])
|
|
|
|
if hour < 0 or hour > 23:
|
|
logger.error("hour must be in [0,24[")
|
|
return None
|
|
if minute < 0 or minute > 59:
|
|
logger.error("minute must be in [0,60[")
|
|
return None
|
|
if second < 0 or second > 59:
|
|
logger.error("second must be in [0,60[")
|
|
return None
|
|
if us < 0 or us > 1000000:
|
|
logger.error("milliseconds must be in [0,1000000[")
|
|
return None
|
|
|
|
ts = timedelta(hours=hour, minutes=minute, seconds=second, microseconds=us)
|
|
|
|
return ts
|
|
|
|
def parseTimeInterval(interval):
|
|
logger = logging.getLogger(__name__)
|
|
|
|
intervalRegExp = '^(?P<hour1>[0-9]{1,2}):(?P<minute1>[0-9]{1,2}):(?P<second1>[0-9]{1,2})(\.(?P<ms1>[0-9]{1,3}))?-(?P<hour2>[0-9]{1,2}):(?P<minute2>[0-9]{1,2}):(?P<second2>[0-9]{1,2})(\.(?P<ms2>[0-9]{1,3}))?$'
|
|
p = re.compile(intervalRegExp)
|
|
m = p.match(interval)
|
|
if m == None:
|
|
logger.error("Impossible to parse time interval")
|
|
return None
|
|
|
|
values = m.groupdict()
|
|
hour1 = 0
|
|
minute1 = 0
|
|
second1 = 0
|
|
ms1 = 0
|
|
hour2 = 0
|
|
minute2 = 0
|
|
second2 = 0
|
|
ms2 = 0
|
|
if values['hour1'] != None:
|
|
hour1 = int(values['hour1'])
|
|
if values['minute1'] != None:
|
|
minute1 = int(values['minute1'])
|
|
if values['second1'] != None:
|
|
second1 = int(values['second1'])
|
|
if values['ms1'] != None:
|
|
ms1 = int(values['ms1'])
|
|
if values['hour2'] != None:
|
|
hour2 = int(values['hour2'])
|
|
if values['minute2'] != None:
|
|
minute2 = int(values['minute2'])
|
|
if values['second2'] != None:
|
|
second2 = int(values['second2'])
|
|
if values['ms2'] != None:
|
|
ms2 = int(values['ms2'])
|
|
|
|
if hour1 < 0 or hour1 > 23:
|
|
logger.error("hour must be in [0,24[")
|
|
return None, None
|
|
if minute1 < 0 or minute1 > 59:
|
|
logger.error("minute must be in [0,60[")
|
|
return None, None
|
|
if second1 < 0 or second1 > 59:
|
|
logger.error("second must be in [0,60[")
|
|
return None, None
|
|
if ms1 < 0 or ms1 > 1000:
|
|
logger.error("milliseconds must be in [0,1000[")
|
|
return None, None
|
|
|
|
if hour2 < 0 or hour2 > 23:
|
|
logger.error("hour must be in [0,24[")
|
|
return None, None
|
|
if minute2 < 0 or minute2 > 59:
|
|
logger.error("minute must be in [0,60[")
|
|
return None, None
|
|
if second2 < 0 or second2 > 59:
|
|
logger.error("second must be in [0,60[")
|
|
return None, None
|
|
if ms2 < 0 or ms2 > 1000:
|
|
logger.error("milliseconds must be in [0,1000[")
|
|
return None, None
|
|
|
|
ts1 = timedelta(hours=hour1, minutes=minute1, seconds=second1, microseconds=ms1*1000)
|
|
ts2 = timedelta(hours=hour2, minutes=minute2, seconds=second2, microseconds=ms2*1000)
|
|
|
|
if ts2 < ts1:
|
|
logger.error("Non monotonic interval")
|
|
return None,None
|
|
|
|
return (ts1, ts2)
|
|
|
|
def compareTimeInterval(interval1, interval2):
|
|
ts11,ts12 = interval1
|
|
ts21,ts22 = interval2
|
|
|
|
if ts12 < ts21:
|
|
return -1
|
|
elif ts22 < ts11:
|
|
return 1
|
|
else:
|
|
return 0
|
|
|
|
|
|
|
|
def ffmpegConvert(inputFile, inputFormat, outputFile, outputFormat, duration):
|
|
logger = logging.getLogger(__name__)
|
|
|
|
infd = inputFile.fileno()
|
|
outfd = outputFile.fileno()
|
|
set_inheritable(infd, True)
|
|
set_inheritable(outfd, True)
|
|
# TODO: canvas size to be fixed !
|
|
with Popen(['ffmpeg', '-y', '-loglevel', 'quiet', '-progress', '/dev/stdout', '-canvas_size', '720x560', '-f', inputFormat, '-i', '/proc/self/fd/%d' % infd,
|
|
'-map', '0:v', '-map', '0:a', '-map', '0:s', '-bsf:v', 'h264_mp4toannexb,dump_extra=freq=keyframe', '-vcodec', 'copy', '-acodec', 'copy', '-scodec', 'dvdsub',
|
|
'-f', outputFormat, '/proc/self/fd/%d' % outfd], stdout=PIPE, close_fds=False) as ffmpeg:
|
|
pb = tqdm(TextIOWrapper(ffmpeg.stdout, encoding="utf-8"), total=int(duration/timedelta(seconds=1)), unit='s', desc='Conversion')
|
|
for line in pb:
|
|
if line.startswith('out_time='):
|
|
ts = line.split('=')[1].strip()
|
|
ts = parseTimestamp(ts)
|
|
pb.n = int(ts/timedelta(seconds=1))
|
|
pb.update()
|
|
status = ffmpeg.wait()
|
|
if status != 0:
|
|
logger.error('Conversion failed with status code: %d' % status)
|
|
|
|
|
|
def getFramesInStream(inputFile, begin, end, streamKind, subStreamId=0):
|
|
logger = logging.getLogger(__name__)
|
|
infd = inputFile.fileno()
|
|
set_inheritable(infd, True)
|
|
|
|
with Popen(['ffprobe', '-loglevel', 'quiet', '-read_intervals', ('%s%%%s' %(begin, end)), '-show_entries', 'frame', '-select_streams', '%s:%d' % (streamKind, subStreamId), '-of', 'json', '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as ffprobe:
|
|
out, _ = ffprobe.communicate()
|
|
frames = json.load(BytesIO(out))
|
|
status = ffprobe.wait()
|
|
if status != 0:
|
|
logger.error('ffprobe failed with status code: %d' % status)
|
|
return None
|
|
res = []
|
|
if 'frames' in frames:
|
|
frames = frames['frames']
|
|
for frame in frames:
|
|
if 'pts_time' in frame:
|
|
pts_time = frame['pts_time']
|
|
elif 'pkt_pts_time' in frame:
|
|
pts_time = frame['pkt_pts_time']
|
|
else:
|
|
logger.error('Impossible to find timestamp of frame %s' % frame)
|
|
return None
|
|
ts = timedelta(seconds=pts_time)
|
|
if begin <= ts and ts <= end:
|
|
res.append(frame)
|
|
return res
|
|
else:
|
|
logger.error('Impossible to retrieve frames inside file around [%s,%s]' % (begin, end))
|
|
return None
|
|
|
|
def getNearestIFrame(inputFile, timestamp, before=True, delta=timedelta(seconds=2)):
|
|
logger = logging.getLogger(__name__)
|
|
|
|
zero = timedelta()
|
|
tbegin = timestamp-delta
|
|
tend = timestamp+delta
|
|
if tbegin < zero:
|
|
tbegin = zero
|
|
|
|
infd = inputFile.fileno()
|
|
set_inheritable(infd, True)
|
|
|
|
logger.debug('Looking for iframe in [%s, %s]' % (tbegin, tend))
|
|
|
|
frames = getFramesInStream(inputFile=inputFile, begin=tbegin, end=tend, streamKind='v')
|
|
if frames == None:
|
|
return None
|
|
|
|
iframes = []
|
|
for frame in frames:
|
|
if frame['pict_type'] == 'I':
|
|
iframes.append(frame)
|
|
|
|
found = False
|
|
for frame in iframes:
|
|
if before and timedelta(seconds=float(frame['pts_time'])) <= timestamp:
|
|
found = True
|
|
iframe = frame
|
|
if not before and timedelta(seconds=float(frame['pts_time'])) >= timestamp:
|
|
found = True
|
|
iframe = frame
|
|
break
|
|
|
|
if found:
|
|
logger.info("Found i-frame at: %s" % iframe)
|
|
logger.debug("Found i-frame at %s" % iframe)
|
|
|
|
its = timedelta(seconds=float(iframe['pts_time']))
|
|
nbFrames = 0
|
|
for frame in frames:
|
|
ts = timedelta(seconds=float(frame['pts_time']))
|
|
if before:
|
|
if its <= ts and ts <= timestamp:
|
|
logger.info("Retrieve a frame between %s and %s at %s" % (its, timestamp, ts))
|
|
nbFrames = nbFrames+1
|
|
else:
|
|
if timestamp <= ts and ts <= its:
|
|
logger.info("Retrieve a frame between %s and %s at %s" % (ts, timestamp, its))
|
|
nbFrames = nbFrames+1
|
|
else:
|
|
logger.error("Impossible to find I-frame around: %s" % timestamp)
|
|
|
|
return(nbFrames, iframe)
|
|
|
|
def extractMKVPart(inputFile, outputFile, begin, end):
|
|
logger = logging.getLogger(__name__)
|
|
|
|
logger.info('Extract video between I-frames at %s and %s' % (begin,end))
|
|
inputFile.seek(0,0)
|
|
outputFile.seek(0,0)
|
|
infd = inputFile.fileno()
|
|
outfd = outputFile.fileno()
|
|
set_inheritable(infd, True)
|
|
set_inheritable(outfd, True)
|
|
warnings = []
|
|
with Popen(['mkvmerge', '-o', '/proc/self/fd/%d' % outfd, '--split', 'parts:%s-%s' % (begin, end), '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as mkvmerge:
|
|
pb = tqdm(TextIOWrapper(mkvmerge.stdout, encoding="utf-8"), total=100, unit='%', desc='Extraction')
|
|
for line in pb:
|
|
if line.startswith('Progression :'):
|
|
p = re.compile('^Progression : (?P<progress>[0-9]{1,3})%$')
|
|
m = p.match(line)
|
|
if m == None:
|
|
logger.error('Impossible to parse progress')
|
|
pb.n = int(m['progress'])
|
|
pb.update()
|
|
elif line.startswith('Avertissement'):
|
|
warnings.append(line)
|
|
|
|
status = mkvmerge.wait()
|
|
if status == 1:
|
|
logger.warning('Extraction returns warning')
|
|
for w in warnings:
|
|
logger.warning(w)
|
|
elif status == 2:
|
|
logger.error('Extraction returns errors')
|
|
|
|
|
|
def extractPictures(inputFile, begin, nbFrames, width=640, height=480):
|
|
logger = logging.getLogger(__name__)
|
|
|
|
inputFile.seek(0,0)
|
|
infd = inputFile.fileno()
|
|
outfd = memfd_create('pictures', flags=0)
|
|
set_inheritable(outfd, True)
|
|
# PPM header
|
|
# "P6\nWIDTH HEIGHT\n255\n"
|
|
headerLen=2+1+ceil(log(width, 10))+1+ceil(log(height, 10))+1+3+1
|
|
logger.debug('Header length: %d' % headerLen)
|
|
imageLength = width*height*3+headerLen
|
|
length = imageLength*nbFrames
|
|
logger.debug("Estimated length: %d" % length)
|
|
|
|
images = bytes()
|
|
with Popen(['ffmpeg', '-loglevel', 'quiet' ,'-y', '-ss', '%s'%begin, '-i', '/proc/self/fd/%d' % infd, '-s', '%dx%d'%(width, height), '-vframes', '%d'%nbFrames, '-c:v', 'ppm', '-f', 'image2pipe', '/proc/self/fd/%d' % outfd ], stdout=PIPE, close_fds=False) as ffmpeg:
|
|
status = ffmpeg.wait()
|
|
if status != 0:
|
|
logger.error('Conversion failed with status code: %d' % status)
|
|
return None, None
|
|
|
|
lseek(outfd, 0, SEEK_SET)
|
|
images = read(outfd,length)
|
|
if len(images) != length:
|
|
logger.info("Received %d bytes but %d were expected." % (len(images), length))
|
|
return None, None
|
|
|
|
lseek(outfd, 0, SEEK_SET)
|
|
return images, outfd
|
|
|
|
def extractSound(inputFile, begin, outputFileName, packetDuration, subChannel=0, nbPackets=0, sampleRate=48000, nbChannels=2):
|
|
logger = logging.getLogger(__name__)
|
|
|
|
inputFile.seek(0,0)
|
|
outfd = memfd_create(outputFileName, flags=0)
|
|
infd = inputFile.fileno()
|
|
set_inheritable(infd, True)
|
|
set_inheritable(outfd, True)
|
|
sound = bytes()
|
|
length = int(nbChannels*sampleRate*4*nbPackets*packetDuration/1000)
|
|
|
|
with Popen(['ffmpeg', '-y', '-loglevel', 'quiet', '-ss', '%s'%begin, '-i', '/proc/self/fd/%d' % infd, '-frames:a:%d' % subChannel, '%d' % (nbPackets+1),
|
|
'-c:a', 'pcm_s32le', '-sample_rate', '%d' % sampleRate, '-channels', '%d' % nbChannels, '-f', 's32le', '/proc/self/fd/%d' % outfd], stdout=PIPE, close_fds=False) as ffmpeg:
|
|
status = ffmpeg.wait()
|
|
if status != 0:
|
|
logger.error('Sound extraction returns error code: %d' % status)
|
|
return None, None
|
|
|
|
lseek(outfd, 0, SEEK_SET)
|
|
sound = read(outfd, length)
|
|
|
|
if (len(sound) != length):
|
|
logger.info("Received %d bytes but %d were expected (channels=%d, freq=%d, packets=%d, duration=%d ms)." % (len(sound), length, nbChannels, sampleRate, nbPackets, packetDuration))
|
|
return None, None
|
|
|
|
return sound, outfd
|
|
|
|
def dumpPPM(pictures, prefix, temporaries):
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# "P6\nWIDTH HEIGHT\n255\n"
|
|
pos = 0
|
|
picture = 0
|
|
while pos<len(pictures):
|
|
filename = '%s-%03d.ppm' % (prefix, picture)
|
|
header = BytesIO(pictures[pos:])
|
|
magic = header.readline().decode('utf8')
|
|
dimensions = header.readline().decode('utf8')
|
|
maxvalue = header.readline().decode('utf8')
|
|
if magic == 'P6\n':
|
|
pattern = re.compile('^(?P<width>[0-9]+) (?P<height>[0-9]+)\n$')
|
|
m = pattern.match(dimensions)
|
|
if m != None:
|
|
width = int(m['width'])
|
|
height = int(m['height'])
|
|
else:
|
|
logger.error('Impossible to parse dimensions of picture')
|
|
return
|
|
else:
|
|
logger.error('Not a PPM picture')
|
|
return
|
|
|
|
headerLen=2+1+ceil(log(width, 10))+1+ceil(log(height, 10))+1+3+1
|
|
try:
|
|
out = open(filename, 'w')
|
|
outfd = out.fileno()
|
|
except IOError:
|
|
logger.error('Impossible to create file: %s' % filename)
|
|
temporaries.append(out)
|
|
|
|
length=headerLen+3*width*height
|
|
nbBytes = 0
|
|
while nbBytes < length:
|
|
nbBytes+=write(outfd, pictures[pos+nbBytes:pos+length])
|
|
pos+=length
|
|
picture+=1
|
|
|
|
def extractAllStreams(inputFile, begin, end, streams, filesPrefix, nbFrames, width, height, temporaries, dumpMemFD=False):
|
|
logger = logging.getLogger(__name__)
|
|
encoderParams = [ 'ffmpeg', '-y', '-loglevel', 'quiet' ]
|
|
inputParams = []
|
|
codecsParams = []
|
|
|
|
if begin < end:
|
|
videoID=0
|
|
audioID=0
|
|
subTitleID=0
|
|
memfds = []
|
|
for stream in streams:
|
|
if stream['codec_type'] == 'video':
|
|
logger.info("Extracting video stream v:%d" % videoID)
|
|
frameRate = stream['r_frame_rate']
|
|
pattern = re.compile('^(?P<numerator>[0-9]+)/(?P<denominator>[0-9]+)$')
|
|
m = pattern.match(frameRate)
|
|
if m != None:
|
|
frameRate = float(m['numerator']) / float(m['denominator'])
|
|
sar = stream['sample_aspect_ratio']
|
|
dar = stream['display_aspect_ratio']
|
|
pixelFormat = stream['pix_fmt']
|
|
colorRange = stream['color_range']
|
|
colorSpace =stream['color_space']
|
|
colorTransfer = stream['color_transfer']
|
|
colorPrimaries = stream['color_primaries']
|
|
# TODO: do something with these informations
|
|
# When interlaced TOP or BOTTOM
|
|
# -top 1 -flags:v +ilme+ildct
|
|
# -top 0 -flags:v +ilme+ildct
|
|
chromaLocation = stream['chroma_location']
|
|
fieldOrder = stream['field_order']
|
|
# ======================================= #
|
|
# TODO: adjust SAR and DAR
|
|
# https://superuser.com/questions/907933/correct-aspect-ratio-without-re-encoding-video-file
|
|
codec = stream['codec_name']
|
|
imagesBytes, memfd = extractPictures(inputFile=inputFile, begin=begin, nbFrames=nbFrames, width=width, height=height)
|
|
if imagesBytes == None:
|
|
exit(-1)
|
|
|
|
memfds.append(memfd)
|
|
|
|
if dumpMemFD:
|
|
dumpPPM(imagesBytes, '%s-%d' % (filesPrefix,videoID), temporaries)
|
|
|
|
# We rewind to zero the memory file descriptor
|
|
lseek(memfd, 0, SEEK_SET)
|
|
set_inheritable(memfd, True)
|
|
|
|
inputParams.extend(['-framerate', '%f'%frameRate, '-f', 'image2pipe', '-i', '/proc/self/fd/%d' % memfd])
|
|
codecsParams.extend(['-c:v:%d' % videoID, codec, '-pix_fmt', pixelFormat, '-colorspace:v:%d' % videoID, colorSpace, '-color_primaries:v:%d' % videoID, colorPrimaries,
|
|
'-color_trc:v:%d' % videoID, colorTransfer, '-color_range:v:%d' % videoID, colorRange])
|
|
videoID=videoID+1
|
|
elif stream['codec_type'] == 'audio':
|
|
logger.info("Extracting audio stream: a:%d" % audioID)
|
|
sampleRate = int(stream['sample_rate'])
|
|
nbChannels = int(stream['channels'])
|
|
bitRate = int(stream['bit_rate'])
|
|
codec = stream['codec_name']
|
|
if 'tags' in stream:
|
|
if 'language' in stream['tags']:
|
|
codecsParams.extend(['-metadata:s:a:%d' % audioID, 'language=%s' % stream['tags']['language']])
|
|
packets = getFramesInStream(inputFile=inputFile, begin=begin, end=end, streamKind='a', subStreamId=audioID)
|
|
nbPackets = len(packets)
|
|
logger.debug("Found %d packets to be extracted from audio track." % nbPackets)
|
|
if(nbPackets > 0):
|
|
packetDuration = packets[0]['duration']
|
|
|
|
tmpname = '%s-%d.pcm' % (filesPrefix,audioID)
|
|
|
|
soundBytes , memfd = extractSound(inputFile=inputFile, begin=begin, nbPackets=nbPackets, packetDuration=packetDuration, outputFileName=tmpname, sampleRate=sampleRate, nbChannels=nbChannels)
|
|
|
|
if soundBytes == None:
|
|
exit(-1)
|
|
|
|
memfds.append(memfd)
|
|
|
|
if dumpMemFD:
|
|
try:
|
|
output = open(tmpname,'w')
|
|
except IOError:
|
|
logger.error('Impossible to create file: %s' % tmpname)
|
|
return None
|
|
|
|
outfd = output.fileno()
|
|
pos = 0
|
|
while pos < len(soundBytes):
|
|
pos+=write(outfd, soundBytes[pos:])
|
|
temporaries.append(output)
|
|
|
|
# We rewind to zero the memory file descriptor
|
|
lseek(memfd, 0, SEEK_SET)
|
|
set_inheritable(memfd, True)
|
|
|
|
inputParams.extend(['-f', 's32le', '-ar', '%d'%sampleRate, '-ac', '%d'%nbChannels, '-i', '/proc/self/fd/%d' % memfd])
|
|
codecsParams.extend(['-c:a:%d' % audioID, codec, '-b:a:%d' % audioID, '%d' % bitRate])
|
|
audioID=audioID+1
|
|
elif stream['codec_type'] == 'subtitle':
|
|
logger.info("Extracting a subtitle stream: s:%d" % subTitleID)
|
|
codec = stream['codec_name']
|
|
inputParams.extend(['-i', './empty.idx'])
|
|
if 'tags' in stream:
|
|
if 'language' in stream['tags']:
|
|
codecsParams.extend(['-metadata:s:s:%d' % subTitleID, 'language=%s' % stream['tags']['language']])
|
|
codecsParams.extend(['-c:s:%d' % subTitleID, 'copy'])
|
|
subTitleID=subTitleID+1
|
|
else:
|
|
logger.error("Unknown stream type: %s" % stream['codec_type'])
|
|
|
|
# Create a new MKV movie with all streams that have been extracted.
|
|
encoderParams.extend(inputParams)
|
|
for index in range(0,videoID+audioID+subTitleID):
|
|
encoderParams.extend(['-map', '%d' % index])
|
|
encoderParams.extend(codecsParams)
|
|
fileName = '%s.mkv' % filesPrefix
|
|
try:
|
|
output = open(fileName,'w')
|
|
except IOError:
|
|
logger.error('Impossible to create file: %s' % fileName)
|
|
return None
|
|
|
|
outfd = output.fileno()
|
|
set_inheritable(outfd, True)
|
|
# TODO: manage interlaced to previous parameters.
|
|
encoderParams.extend(['-top', '1', '-flags:v', '+ilme+ildct', '-bsf:v', 'h264_mp4toannexb,dump_extra=freq=keyframe', '-f', 'matroska', '/proc/self/fd/%d' % outfd])
|
|
|
|
logger.info('Encoding video: %s' % fileName)
|
|
with Popen(encoderParams, stdout=PIPE, close_fds=False) as ffmpeg:
|
|
status = ffmpeg.wait()
|
|
if status != 0:
|
|
logger.error('Encoding failed with status code: %d' % status)
|
|
return None
|
|
|
|
temporaries.append(output)
|
|
|
|
for memfd in memfds:
|
|
close(memfd)
|
|
|
|
return output
|
|
|
|
else:
|
|
# Nothing to be done. We are already at a i-frame boundary.
|
|
return None
|
|
|
|
# Merge a list of mkv files passed as input, and produce a new MKV as output
|
|
def mergeMKVs(inputs, outputName):
|
|
logger = logging.getLogger(__name__)
|
|
|
|
fds = []
|
|
try:
|
|
out = open(outputName, 'w')
|
|
except IOError:
|
|
logger.error('Impossible to create file: %s' % outputName)
|
|
return None
|
|
|
|
outfd = out.fileno()
|
|
fds.append(outfd)
|
|
set_inheritable(outfd, True)
|
|
|
|
mergeParams = ['mkvmerge']
|
|
first = True
|
|
for mkv in inputs:
|
|
if mkv !=None:
|
|
fd = mkv.fileno()
|
|
fds.append(fd)
|
|
set_inheritable(fd, True)
|
|
if first:
|
|
mergeParams.append('/proc/self/fd/%d' % fd)
|
|
first = False
|
|
else:
|
|
mergeParams.append('+/proc/self/fd/%d' % fd)
|
|
|
|
mergeParams.extend(['-o', '/proc/self/fd/%d' % outfd])
|
|
|
|
# We merge all files.
|
|
warnings = []
|
|
with Popen(mergeParams, stdout=PIPE, close_fds=False) as mkvmerge:
|
|
pb = tqdm(TextIOWrapper(mkvmerge.stdout, encoding="utf-8"), total=100, unit='%', desc='Merging')
|
|
for line in pb:
|
|
if line.startswith('Progression :'):
|
|
p = re.compile('^Progression : (?P<progress>[0-9]{1,3})%$')
|
|
m = p.match(line)
|
|
if m == None:
|
|
logger.error('Impossible to parse progress')
|
|
pb.n = int(m['progress'])
|
|
pb.update()
|
|
elif line.startswith('Avertissement'):
|
|
warnings.append(line)
|
|
|
|
status = mkvmerge.wait()
|
|
if status == 1:
|
|
logger.warning('Extraction returns warning')
|
|
for w in warnings:
|
|
logger.warning(w)
|
|
elif status == 2:
|
|
logger.error('Extraction returns errors')
|
|
|
|
for fd in fds:
|
|
set_inheritable(fd, False)
|
|
|
|
return out
|
|
|
|
def findSubtitlesTracks(filename):
|
|
# ffprobe -loglevel quiet -select_streams s -show_entries stream=index:stream_tags=language -of json corgi.ts
|
|
logger = logging.getLogger(__name__)
|
|
|
|
with Popen(['ffprobe', '-i', filename, '-select_streams', 's', '-show_entries', 'stream=index:stream_tags=language', '-of', 'json'], stdout=PIPE, close_fds=False) as ffprobe:
|
|
out, _ = ffprobe.communicate()
|
|
out = json.load(BytesIO(out))
|
|
if 'streams' in out:
|
|
return out['streams']
|
|
else:
|
|
logger.error('Impossible to retrieve format of file')
|
|
pass
|
|
|
|
def extractSubTitleTrack(inputFileName, index, lang):
|
|
# mkvextract video.mkv tracks position:nom [position:nom]
|
|
logger = logging.getLogger(__name__)
|
|
|
|
with Popen(['mkvextract', inputFileName, 'tracks', '%d:%s' % (index,lang)], stdout=PIPE, close_fds=False) as mkvextract:
|
|
out, _ = mkvextract.communicate()
|
|
for lines in out:
|
|
logger.info(out)
|
|
|
|
|
|
|
|
def main():
|
|
logger = logging.getLogger(__name__)
|
|
coloredlogs.install()
|
|
locale.setlocale(locale.LC_ALL, 'fr_FR.UTF8')
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("-i", "--input", dest='inputFile', type=str, required=True, help="Input file to process (can be .ts, .mp4 or .mkv).")
|
|
parser.add_argument("-o", "--output", dest='outputFile', type=str, required=True, help="Output MKV file to produce.")
|
|
parser.add_argument("-p", "--part", dest='parts', nargs='+', required=False, action='append', metavar="hh:mm:ss[.mmm]-hh:mm:ss[.mmm]", help="Extract this exact part of the original file.")
|
|
parser.add_argument("-k", "--keep", action='store_true', help="Do not cleanup temporary files after processing.")
|
|
parser.add_argument("--dump-memory", action='store_true', dest='dump', help="For debug purpose, dump all memory mapping of headers (and trailers) before (after) each part. They are kept in memory only otherwise.")
|
|
parser.add_argument("-s","--srt", action='store_true', dest='srt', help="Dump subtitles ")
|
|
|
|
args = parser.parse_args()
|
|
logger.debug("Arguments: %s" % args)
|
|
|
|
checkRequiredTools()
|
|
|
|
# Flatten args.parts
|
|
intervals = []
|
|
if args.parts != None:
|
|
for part in args.parts:
|
|
for subpart in part:
|
|
intervals.append(subpart)
|
|
|
|
parts=[]
|
|
# Parse each interval
|
|
for interval in intervals:
|
|
ts1, ts2 = parseTimeInterval(interval)
|
|
if ts1 == None or ts2 == None:
|
|
logger.error("Illegal time interval: %s" % interval)
|
|
exit(-1)
|
|
parts.append((ts1,ts2))
|
|
|
|
# Sort intervals
|
|
parts.sort(key=cmp_to_key(compareTimeInterval))
|
|
|
|
# Check that no intervals are overlapping
|
|
prevts = timedelta(0)
|
|
for part in parts:
|
|
ts1, ts2 = part
|
|
if prevts > ts1:
|
|
logger.error('Intervals are overlapping')
|
|
exit(-1)
|
|
prevts = ts2
|
|
|
|
temporaries = []
|
|
|
|
basename = os.path.splitext(os.path.basename(args.inputFile))[0]
|
|
mp4filename = basename+'.mp4'
|
|
mkvfilename = basename+'.mkv'
|
|
|
|
try:
|
|
inputFile = open(args.inputFile, mode='r')
|
|
except IOError:
|
|
logger.error("Impossible to open %s" % args.inputFile)
|
|
exit(-1)
|
|
|
|
formatOfFile = getFormat(inputFile)
|
|
|
|
if formatOfFile == None:
|
|
exit(-1)
|
|
|
|
duration = timedelta(seconds=float(formatOfFile['duration']))
|
|
logger.info("Durée de l'enregistrement: %s" % duration)
|
|
|
|
found = False
|
|
for f in SupportedFormat:
|
|
if 'format_name' in formatOfFile:
|
|
if formatOfFile['format_name'] == str(f):
|
|
found = True
|
|
formatOfFile = f
|
|
break
|
|
|
|
if not found:
|
|
logger.error('Unsupported format of file')
|
|
|
|
if formatOfFile == SupportedFormat.TS:
|
|
logger.info("Converting TS to MP4 (to fix timestamps).")
|
|
try:
|
|
with open(mp4filename, 'w') as mp4:
|
|
ffmpegConvert(inputFile, 'mpegts', mp4, 'mp4', duration)
|
|
temporaries.append(mp4)
|
|
logger.info("Converting MP4 to MKV.")
|
|
try:
|
|
mkv = open(mkvfilename, 'w')
|
|
except IOError:
|
|
logger.error('')
|
|
|
|
ffmpegConvert(mp4, 'mp4', mkv, 'matroska', duration)
|
|
temporaries.append(mkv)
|
|
except IOError:
|
|
logger.error('')
|
|
|
|
elif formatOfFile == SupportedFormat.MP4:
|
|
logger.info("Converting MP4 to MKV")
|
|
try:
|
|
mkv = open(mkvfilename, 'w')
|
|
except IOError:
|
|
logger.error('')
|
|
ffmpegConvert(mp4, 'mp4', mkv, 'matroska', duration)
|
|
temporaries.append(mkv)
|
|
else:
|
|
logger.info("Already in MKV")
|
|
mkv = inputFile
|
|
|
|
streams = getStreams(mkv)
|
|
|
|
mainVideo = None
|
|
for stream in streams:
|
|
if stream['codec_type'] == 'video' and stream['disposition']['default'] == 1:
|
|
mainVideo = stream
|
|
width = stream['width']
|
|
height = stream['height']
|
|
|
|
if mainVideo == None:
|
|
logger.error('Impossible to find main video stream.')
|
|
exit(-1)
|
|
|
|
# Pour chaque portion
|
|
partnum = 0
|
|
mkvparts = []
|
|
checks = []
|
|
pos = timedelta()
|
|
|
|
for ts1, ts2 in parts:
|
|
# Trouver l'estampille de la trame 'I' la plus proche (mais postérieure) au début de la portion.
|
|
# Trouver l'estampille de la trame 'I' la plus proche (mais antérieure) à la fin de la portion.
|
|
# On a alors
|
|
# debut ----- trame --------- trame --------- fin.
|
|
# 'B/P' 'B/P'* 'I' 'I' 'B/P'* 'B/P'
|
|
# Si la trame de début est déjà 'I', il n'y a rien à faire (idem pour la fin).
|
|
# Sinon on extrait les trames 'B' ou 'P' depuis le début jusqu'à la trame 'I' non incluse
|
|
|
|
partnum = partnum + 1
|
|
|
|
headFrames = getNearestIFrame(mkv, ts1, before=False)
|
|
if headFrames == None:
|
|
exit(-1)
|
|
|
|
tailFrames = getNearestIFrame(mkv, ts2, before=True)
|
|
if tailFrames == None:
|
|
exit(-1)
|
|
|
|
nbHeadFrames, headIFrame = headFrames
|
|
nbTailFrames, tailIFrame = tailFrames
|
|
|
|
logger.info("Found %d frames between beginning of current part and first I-frame" % nbHeadFrames)
|
|
logger.info("Found %d frames between last I-frame and end of current part" % nbTailFrames)
|
|
|
|
headIFrameTS = timedelta(seconds=float(headIFrame['pts_time']))
|
|
tailIFrameTS = timedelta(seconds=float(tailIFrame['pts_time']))
|
|
|
|
checks.append(pos+headIFrameTS-ts1)
|
|
|
|
subparts = []
|
|
|
|
if nbHeadFrames > 0:
|
|
# We extract all frames between the beginning upto the frame that immediately preceeds the I-frame.
|
|
head = extractAllStreams(inputFile=mkv, begin=ts1, end=headIFrameTS, nbFrames=nbHeadFrames-1, filesPrefix='part-%d-head' % (partnum), streams=streams, width=width, height=height, temporaries=temporaries, dumpMemFD=args.dump)
|
|
subparts.append(head)
|
|
|
|
# Creating MKV file that corresponds to current part between I-frames
|
|
try:
|
|
internal = open('part-%d-internal.mkv' % partnum, 'w')
|
|
except IOError:
|
|
logger.error('Impossible to create file: part-%d-internal.mkv' % partnum)
|
|
exit(-1)
|
|
temporaries.append(internal)
|
|
extractMKVPart(inputFile=mkv, outputFile=internal, begin=headIFrameTS, end=tailIFrameTS)
|
|
subparts.append(internal)
|
|
|
|
if nbTailFrames > 0:
|
|
# We extract all frames between the I-frame (including it) upto the end.
|
|
tail = extractAllStreams(inputFile=mkv, begin=tailIFrameTS, end=ts2, nbFrames=nbTailFrames, filesPrefix='part-%d-tail' % (partnum), streams=streams, width=width, height=height, temporaries=temporaries, dumpMemFD=args.dump)
|
|
subparts.append(tail)
|
|
|
|
logger.info('Merging: %s' % subparts)
|
|
part = mergeMKVs(inputs=subparts, outputName="part-%d.mkv" % partnum)
|
|
mkvparts.append(part)
|
|
temporaries.append(part)
|
|
|
|
pos = pos+tailIFrameTS-ts1
|
|
|
|
# We need to check the end also
|
|
checks.append(pos)
|
|
|
|
nbParts = len(mkvparts)
|
|
if nbParts > 1:
|
|
logger.info('Merging: %s' % mkvparts)
|
|
mergeMKVs(inputs=mkvparts, outputName=args.outputFile)
|
|
elif nbParts == 1:
|
|
copyfile('part-1.mkv', args.outputFile)
|
|
else:
|
|
logger.info("Nothing else to do.")
|
|
|
|
if args.srt:
|
|
logger.info("Find subtitles tracks and language.")
|
|
subtitles = findSubtitlesTracks(args.outputFile)
|
|
sts = {}
|
|
for subtitle in subtitles:
|
|
index = subtitle['index']
|
|
if 'tags' in subtitle:
|
|
if 'language' in subtitle['tags']:
|
|
lang = subtitle['tags']['language']
|
|
if lang in sts:
|
|
sts[lang].append(index)
|
|
else:
|
|
sts[lang] = [index]
|
|
else:
|
|
logger.error("Dropping subtitle: %s because it is missing language indication")
|
|
else:
|
|
logger.error("Dropping subtitle: %s because it is missing language indication")
|
|
|
|
for lang in sts:
|
|
indexes = sts[lang]
|
|
if len(indexes) == 0:
|
|
# Nothing to do. This should not happen.
|
|
continue
|
|
if len(indexes) == 1:
|
|
index = indexes[0]
|
|
filename = 'essai-%s.srt' % lang
|
|
elif len(indexes) > 1:
|
|
nbsrt = 1
|
|
for index in indexes:
|
|
filename = 'essai-%s-%d.srt' % (lang, nbsrt)
|
|
nbsrt+=1
|
|
|
|
if not args.keep:
|
|
logger.info("Cleaning temporary files")
|
|
for f in temporaries:
|
|
path = os.path.realpath(f.name)
|
|
logger.info("Removing: %s" % path)
|
|
f.close()
|
|
unlink(path)
|
|
|
|
for c in checks:
|
|
logger.info("Please check cut smoothness at: %s" % c)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|