1415 lines
53 KiB
Python
Executable File
1415 lines
53 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
import argparse
|
|
import re
|
|
from sys import exit
|
|
from datetime import datetime,timedelta,time
|
|
import coloredlogs, logging
|
|
from functools import cmp_to_key
|
|
from subprocess import Popen, PIPE
|
|
from os import read, write, lseek, pipe, set_inheritable, memfd_create, SEEK_SET, close, unlink, fstat, ftruncate
|
|
import os.path
|
|
from io import BytesIO, TextIOWrapper
|
|
import json
|
|
from enum import Enum, IntEnum, unique, auto
|
|
import shutil
|
|
from tqdm import tqdm, trange
|
|
from select import select
|
|
from math import floor, ceil, log
|
|
from shutil import copyfile, which
|
|
import hexdump
|
|
|
|
|
|
# Useful SPS/PPS discussion
|
|
# TODO: improve situation of SPS and PPS header mismatch when merging MVK with mkvmerge to remove warnings.
|
|
# https://copyprogramming.com/howto/including-sps-and-pps-in-a-raw-h264-track
|
|
# https://gitlab.com/mbunkus/mkvtoolnix/-/issues/2390
|
|
|
|
|
|
def checkRequiredTools():
|
|
logger = logging.getLogger(__name__)
|
|
allOptionalTools = True
|
|
paths = {}
|
|
required = ['ffmpeg', 'ffprobe', 'mkvmerge', 'mkvinfo']
|
|
optional = ['mkvextract', 'vobsubocr','tesseract']
|
|
for tool in required:
|
|
path = which(tool)
|
|
if path == None:
|
|
logger.error('Required tool: %s is missing.' % tool)
|
|
exit(-1)
|
|
else:
|
|
paths[tool] = path
|
|
for tool in optional:
|
|
path = which(tool)
|
|
if path == None:
|
|
logger.info('Optional tool: %s is missing.' % tool)
|
|
allOptionalTools = False
|
|
else:
|
|
paths[tool] = path
|
|
|
|
return allOptionalTools, paths
|
|
|
|
def getTesseractSupportedLang(tesseract):
|
|
logger = logging.getLogger(__name__)
|
|
res = {}
|
|
|
|
with Popen([tesseract, '--list-langs'], stdout=PIPE) as tesseract:
|
|
for line in tesseract.stdout:
|
|
line = line.decode('utf8')
|
|
p = re.compile('(?P<lang>[a-z]{3})\n')
|
|
m = re.match(p,line)
|
|
if m != None:
|
|
try:
|
|
lang = m.group('lang')
|
|
key = Lang(lang)
|
|
res[key] = lang
|
|
except InvalidLanguageValue as e:
|
|
pass
|
|
|
|
tesseract.wait()
|
|
|
|
if tesseract.returncode != 0:
|
|
logger.error("Tesseract returns an error code: %d" % tesseract.returncode)
|
|
return None
|
|
|
|
return res
|
|
|
|
|
|
def getSubTitlesTracks(ffprobe, mkvPath):
|
|
logger = logging.getLogger(__name__)
|
|
tracks={}
|
|
|
|
nbSubTitles = 0
|
|
with Popen([ffprobe, '-loglevel', 'quiet', '-select_streams', 's', '-show_entries', 'stream=index,codec_name:stream_tags=language', '-of', 'json', mkvPath], stdout=PIPE) as ffprobe:
|
|
out, _ = ffprobe.communicate()
|
|
out = json.load(BytesIO(out))
|
|
if 'streams' in out:
|
|
for stream in out['streams']:
|
|
index = stream['index']
|
|
codec = stream['codec']
|
|
lang = stream['tags']['language']
|
|
if codec == 'dvd_subtitle':
|
|
if lang not in tracks:
|
|
tracks[lang] = [track]
|
|
else:
|
|
l = tracks[lang]
|
|
l.append(track)
|
|
tracks[lang] = l
|
|
else:
|
|
return None
|
|
|
|
ffprobe.wait()
|
|
|
|
if ffprobe.returncode != 0:
|
|
logger.error("ffprobe returns an error code: %d" % ffprobe.returncode)
|
|
return None
|
|
|
|
return tracks
|
|
|
|
def extractSRT(mkvextract, mkvPath, destPath, tracks, langs):
|
|
logger = logging.getLogger(__name__)
|
|
params = [mkvextract, mkvPath, 'tracks']
|
|
|
|
res = []
|
|
|
|
for lang in tracks:
|
|
iso = Lang(lang)
|
|
|
|
if iso in langs:
|
|
ocrlang = langs[iso]
|
|
else:
|
|
logger.warning("Language not supported by Tesseract: %s" % iso.name)
|
|
ocrlang ='osd'
|
|
|
|
if len(tracks[lang]) == 1:
|
|
params.append('%d:%s/%s' % (tracks[lang][0], destPath ,lang))
|
|
res.append(('%s/%s.idx' % (destPath, lang), lang, ocrlang))
|
|
else:
|
|
count = 1
|
|
for track in tracks[lang]:
|
|
params.append('%d:%s/%s-%d' % (track, destPath, lang, count))
|
|
res.append(('%s/%s-%d.idx' % (destPath, lang,count), lang, ocrlang))
|
|
count = count+1
|
|
|
|
with Popen(params) as extract:
|
|
extract.wait()
|
|
|
|
if extract.returncode != 0:
|
|
print("Erreur de mkvextract: %d" % extract.returncode)
|
|
else:
|
|
print("Extracted")
|
|
|
|
return res
|
|
|
|
def doOCR(vobsubocr, idxs):
|
|
res = []
|
|
|
|
for filename, lang, iso in idxs:
|
|
print(filename)
|
|
srtname = '%s.srt' % os.path.splitext(filename)[0]
|
|
print(srtname)
|
|
# Tesseract reconnaît la chaîne de caractères ... comme le texte 'su'
|
|
p = re.compile('^su\n$')
|
|
|
|
if not os.path.isfile(srtname):
|
|
with open(srtname, 'w+') as srt:
|
|
with Popen([vobsubocr, '--lang', iso, filename], stdout=PIPE) as ocr:
|
|
for line in ocr.stdout:
|
|
line = line.decode('utf8')
|
|
m = re.match(p,line)
|
|
if m != None:
|
|
srt.write('...')
|
|
else:
|
|
srt.write(line)
|
|
res.append((srtname, lang))
|
|
|
|
return res
|
|
|
|
|
|
|
|
@unique
|
|
class SupportedFormat(IntEnum):
|
|
TS = 1
|
|
MP4 = 2
|
|
Matroska = 3
|
|
|
|
def __str__(self):
|
|
if self is SupportedFormat.TS:
|
|
return 'mpegts'
|
|
elif self is SupportedFormat.MP4:
|
|
return 'mov,mp4,m4a,3gp,3g2,mj2'
|
|
elif self is SupportedFormat.Matroska:
|
|
return 'matroska,webm'
|
|
else:
|
|
return 'Unsupported format'
|
|
|
|
# Extract SPS/PPS
|
|
# https://gitlab.com/mbunkus/mkvtoolnix/-/issues/2390
|
|
# ffmpeg -i <InputFile (before concatenation)> -c:v copy -an -sn -bsf:v trace_headers -t 0.01 -report -loglevel 0 -f null -
|
|
|
|
# Found codec private data using mkvinfo
|
|
def getCodecPrivateData(mkvinfo, inputFile):
|
|
logger = logging.getLogger(__name__)
|
|
|
|
infd = inputFile.fileno()
|
|
lseek(infd, 0, SEEK_SET)
|
|
set_inheritable(infd, True)
|
|
found = False
|
|
env = {**os.environ, 'LANG': 'C'}
|
|
# Output example
|
|
# Codec's private data: size 48 (H.264 profile: High @L4.0) hexdump 01 64 00 28 ff e1 00 1b 67 64 00 28 ac d9 40 78 04 4f dc d4 04 04 05 00 00 92 ef 00 1d ad a6 1f 16 2d 96 01 00 06 68 fb a3 cb 22 c0 fd f8 f8 00 at 406 size 51 data size 48
|
|
|
|
with Popen([mkvinfo, '-z', '-X', '-P', '/proc/self/fd/%d' % infd ], stdout=PIPE, close_fds=False, env=env) as mkvinfo:
|
|
out, _ = mkvinfo.communicate()
|
|
out = out.decode('utf8')
|
|
regExp = "^.*Codec's private data: size ([0-9]+) \(H.264.*\) hexdump (?P<hexdump>([0-9a-f]{2} )+)at (?P<position>[0-9]+) size (?P<size>[0-9]+).*$"
|
|
p = re.compile(regExp)
|
|
for line in out.splitlines():
|
|
m = p.match(line)
|
|
if m != None:
|
|
size = int(m.group('size'))
|
|
position = int(m.group('position'))
|
|
logger.debug("Found codec private data at position: %s, size: %d" % (position, size))
|
|
found = True
|
|
mkvinfo.wait()
|
|
break
|
|
|
|
if found:
|
|
lseek(infd, position, SEEK_SET)
|
|
data = read(infd, size)
|
|
return position, data
|
|
else:
|
|
return None, None
|
|
|
|
|
|
def parseMKVTree(mkvinfo, inputFile):
|
|
logger = logging.getLogger(__name__)
|
|
|
|
infd = inputFile.fileno()
|
|
lseek(infd, 0, SEEK_SET)
|
|
set_inheritable(infd, True)
|
|
env = {**os.environ, 'LANG': 'C'}
|
|
elements = {}
|
|
|
|
with Popen([mkvinfo, '-z', '-X', '-P', '/proc/self/fd/%d' % infd ], stdout=PIPE, close_fds=False, env=env) as mkvinfo:
|
|
out, _ = mkvinfo.communicate()
|
|
out = out.decode('utf8')
|
|
prefix = []
|
|
regExp = "(^(?P<root>\+)|(\|(?P<depth>[ ]*\+))).*at (?P<position>[0-9]+) size (?P<size>[0-9]+).*$"
|
|
p = re.compile(regExp)
|
|
prevDepth = -1
|
|
for line in out.splitlines():
|
|
m = p.match(line)
|
|
if m == None:
|
|
logger.error("Impossible to match line: %s" % line)
|
|
else:
|
|
position = int(m.group('position'))
|
|
size = int(m.group('size'))
|
|
root = (m.group('root')!=None)
|
|
if root:
|
|
depth = 0
|
|
else:
|
|
depth = len(m.group('depth'))
|
|
|
|
if depth > prevDepth:
|
|
for i in range(depth-prevDepth):
|
|
prefix.append(1)
|
|
elif depth == prevDepth:
|
|
subid = prefix[-1]
|
|
subid+=1
|
|
prefix.pop()
|
|
prefix.append(subid)
|
|
else:
|
|
for i in range(prevDepth-depth):
|
|
prefix.pop()
|
|
subid = prefix[-1]
|
|
subid+=1
|
|
prefix.pop()
|
|
prefix.append(subid)
|
|
|
|
prevDepth = depth
|
|
key=".".join(map(str, prefix))
|
|
|
|
elements[key] = (position, size)
|
|
|
|
mkvinfo.wait()
|
|
return elements
|
|
|
|
# MKV is formatted as an EBML file (Extended Binary Markup Langage).
|
|
# cf http://matroska-org.github.io/libebml/specs.html
|
|
# It is a Type, Length, Value (TLV) kind of binary file.
|
|
# Types are encoded as follows:
|
|
# 1xxx xxxx - Class A IDs (2^7 -1 possible values)
|
|
# 01xx xxxx xxxx xxxx - Class B IDs (2^14-1 possible values)
|
|
# 001x xxxx xxxx xxxx xxxx xxxx - Class C IDs (2^21-1 possible values)
|
|
# 0001 xxxx xxxx xxxx xxxx xxxx xxxx xxxx - Class D IDs (2^28-1 possible values)
|
|
# Lengths are encoded as follows:
|
|
# 1xxx xxxx - value 0 to 2^7-2
|
|
# 01xx xxxx xxxx xxxx - value 0 to 2^14-2
|
|
# 001x xxxx xxxx xxxx xxxx xxxx - value 0 to 2^21-2
|
|
# 0001 xxxx xxxx xxxx xxxx xxxx xxxx xxxx - value 0 to 2^28-2
|
|
# 0000 1xxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx - value 0 to 2^35-2
|
|
# 0000 01xx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx - value 0 to 2^42-2
|
|
# 0000 001x xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx - value 0 to 2^49-2
|
|
# 0000 0001 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx - value 0 to 2^56-2
|
|
|
|
|
|
def getEBMLLength(length):
|
|
if (0 <= length) and (length <= 2^7-2):
|
|
size = 1
|
|
elif length <= 2^14-2:
|
|
size = 2
|
|
elif length <= 2^21-2:
|
|
size = 3
|
|
elif length <= 2^28-2:
|
|
size = 4
|
|
elif length <= 2^35-2:
|
|
size = 5
|
|
elif length <= 2^42-2:
|
|
size = 6
|
|
elif length <= 2^49-2:
|
|
size = 7
|
|
elif length <= 2^56-2:
|
|
size = 8
|
|
elif length < 0:
|
|
logger.error('Impossible to encode a negative length with EBML.')
|
|
return None
|
|
else:
|
|
logger.error('Impossible to encode a length larger than 2^56-2 with EBML.')
|
|
return None
|
|
|
|
encodedLength = length + ((128>>(size-1))<<((size-1)*8))
|
|
res = (encodedLength).to_bytes(size, byteorder='big')
|
|
return res
|
|
|
|
def changeEBMLElementSize(inputFile, position, addendum):
|
|
logger = logging.getLogger(__name__)
|
|
|
|
initialPosition = position
|
|
infd = inputFile.fileno()
|
|
lseek(infd, position, SEEK_SET)
|
|
|
|
buf = read(infd, 1)
|
|
elementType = int.from_bytes(buf, byteorder='big')
|
|
mask=128
|
|
found = False
|
|
for i in range(1,5):
|
|
if elementType&mask:
|
|
typeSize = i
|
|
found = True
|
|
break
|
|
else:
|
|
mask = mask>>1
|
|
|
|
if not found:
|
|
logger.error('Size of element type cannot be determined: %b' % elementType)
|
|
exit(-1)
|
|
|
|
# We seek to size
|
|
position+=typeSize
|
|
lseek(infd, position, SEEK_SET)
|
|
|
|
buf = read(infd, 1)
|
|
sizeHead = int.from_bytes(buf, byteorder='big')
|
|
logger.info('First byte of size: %x' % sizeHead)
|
|
mask=128
|
|
found = False
|
|
for i in range(1,9):
|
|
if sizeHead&mask:
|
|
sizeOfDataSize = i
|
|
found = True
|
|
break
|
|
else:
|
|
mask = mask>>1
|
|
|
|
if not found:
|
|
logger.error('Size of data size cannot be determined: %b' % sizeHead)
|
|
exit(-1)
|
|
else:
|
|
logger.info('Size of data size: %d.' % sizeOfDataSize)
|
|
|
|
lseek(infd, position, SEEK_SET)
|
|
oldSizeBuf = read(infd, sizeOfDataSize)
|
|
maxSize = 2**(sizeOfDataSize*7)-2
|
|
sizeOfData = int.from_bytes(oldSizeBuf, byteorder='big')
|
|
logger.info('Size of data with mask: %x mask: %d.' % (sizeOfData, mask))
|
|
sizeOfData-= (mask<<((sizeOfDataSize-1)*8))
|
|
logger.info('Found element at position: %d, size of type: %d size of data: %d maximal size: %d.' % (initialPosition, typeSize, sizeOfData, maxSize))
|
|
|
|
newSize = sizeOfData+addendum
|
|
delta = 0
|
|
if newSize > maxSize:
|
|
# TODO: Test this code ...
|
|
newEncodedSize = getEBMLLength(newSize)
|
|
sizeOfNewEncodedSize = len(newEncodedSize)
|
|
if sizeOfNewEncodedSize <= sizeOfDataSize:
|
|
logger.error('New encoded size is smaller (%d) or equal than previous size (%d). This should not happen.' % (sizeOfNewEncodedSize, sizeOfDataSize))
|
|
exit(-1)
|
|
# The difference of length between old size field and new one.
|
|
delta = sizeOfNewEncodedSize - sizeOfDataSize
|
|
fileLength = fstat(infd).st_size
|
|
# We seek after actual length field
|
|
lseek(infd, position+sizeOfDataSize, SEEK_SET)
|
|
# We read the rest of file
|
|
tail = read(infd, fileLength-(position+sizeOfDataSize))
|
|
# We increase file length
|
|
ftruncate(infd, fileLength+delta)
|
|
# We go to the beginning of length field
|
|
lseek(infd, position, SEEK_SET)
|
|
# We write the new length field
|
|
write(infd, newEncodedSize)
|
|
# We overwrite the rest of file with its previous content that has been offset.
|
|
write(infd, tail)
|
|
else:
|
|
size = newSize + ((128>>(sizeOfDataSize-1))<<((sizeOfDataSize-1)*8))
|
|
newSizeBuf = (size).to_bytes(sizeOfDataSize, byteorder='big')
|
|
|
|
logger.info('Old encoded size: %s New encoded size: %s' % (hexdump.dump(oldSizeBuf,sep=':'), hexdump.dump(newSizeBuf, sep=':')))
|
|
lseek(infd, position, SEEK_SET)
|
|
write(infd, newSizeBuf)
|
|
|
|
# We return the potential increase in size of the file if the length field had to be increased.
|
|
return delta
|
|
|
|
def changeCodecPrivateData(mkvinfo, inputFile, codecData):
|
|
logger = logging.getLogger(__name__)
|
|
|
|
infd = inputFile.fileno()
|
|
lseek(infd, 0, SEEK_SET)
|
|
|
|
currentLength = fstat(infd).st_size
|
|
logger.info('Current size of file: %d' % currentLength)
|
|
position, currentData = getCodecPrivateData(mkvinfo, inputFile)
|
|
currentDataLength = len(currentData)
|
|
futureLength = currentLength - currentDataLength + len(codecData)
|
|
logger.info('Expected size of file: %d' % futureLength)
|
|
|
|
logger.info('Current data at position %d: %s' % (position, hexdump.dump(currentData, sep=":")))
|
|
logger.info('Future data: %s' % hexdump.dump(codecData, sep=":"))
|
|
|
|
elements = parseMKVTree(mkvinfo, inputFile)
|
|
|
|
found = False
|
|
for key in elements:
|
|
pos, size = elements[key]
|
|
if pos == position:
|
|
logger.info('Codec private data key: %s' % key)
|
|
found = True
|
|
break
|
|
|
|
if not found:
|
|
logger.error('Impossible to retrieve the key of codec private data')
|
|
exit(-1)
|
|
|
|
if currentLength < futureLength:
|
|
lseek(infd, position+currentDataLength, SEEK_SET)
|
|
tail = read(infd, currentLength-(position+currentDataLength))
|
|
# We extend the file at the end with zeroes
|
|
ftruncate(infd, futureLength)
|
|
lseek(infd, position+len(codecData), SEEK_SET)
|
|
write(infd, tail)
|
|
lseek(infd, position, SEEK_SET)
|
|
write(infd, codecData)
|
|
elif currentLength == futureLength:
|
|
# Almost nothing to do except overwriting old private codec data with new ones.
|
|
lseek(infd, position, SEEK_SET)
|
|
write(infd, codecData)
|
|
else:
|
|
lseek(infd, position+currentDataLength, SEEK_SET)
|
|
tail = read(infd, currentLength-(position+currentDataLength))
|
|
lseek(infd, position+len(codecData), SEEK_SET)
|
|
write(infd, tail)
|
|
lseek(infd, position, SEEK_SET)
|
|
write(infd, codecData)
|
|
# We reduce the length of file.
|
|
ftruncate(infd, futureLength)
|
|
|
|
# We have to modify the tree elements up to the root that contains the codec private data.
|
|
keys = key.split('.')
|
|
logger.info(keys)
|
|
|
|
delta = futureLength-currentLength
|
|
# if there is no modification of the private codec data, no need to change anything.
|
|
if delta != 0:
|
|
for i in range(0, len(keys)-1):
|
|
keys.pop()
|
|
key=".".join(map(str, keys))
|
|
pos, size = elements[key]
|
|
logger.info('Trying to fix element with key: %s at position: %d with actual size: %d.' % (key, pos, size))
|
|
# Changing an element can increase its size (in very rare case).
|
|
# In that case, we update the new delta that will be larger (because the element has been resized).
|
|
# For now, the function always return 0.
|
|
delta+=changeEBMLElementSize(inputFile, pos, delta)
|
|
|
|
|
|
def getFormat(ffprobe, inputFile):
|
|
logger = logging.getLogger(__name__)
|
|
|
|
infd = inputFile.fileno()
|
|
lseek(infd, 0, SEEK_SET)
|
|
set_inheritable(infd, True)
|
|
with Popen([ffprobe, '-loglevel', 'quiet', '-show_format', '-of', 'json', '-i', '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as ffprobe:
|
|
out, _ = ffprobe.communicate()
|
|
out = json.load(BytesIO(out))
|
|
if 'format' in out:
|
|
return out['format']
|
|
else:
|
|
logger.error('Impossible to retrieve format of file')
|
|
|
|
return None
|
|
|
|
def getStreams(ffprobe, inputFile):
|
|
logger = logging.getLogger(__name__)
|
|
|
|
infd = inputFile.fileno()
|
|
lseek(infd, 0, SEEK_SET)
|
|
set_inheritable(infd, True)
|
|
with Popen([ffprobe, '-loglevel', 'quiet', '-show_streams', '-of', 'json', '-i', '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as ffprobe:
|
|
out, _ = ffprobe.communicate()
|
|
out = json.load(BytesIO(out))
|
|
if 'streams' in out:
|
|
return out['streams']
|
|
else:
|
|
logger.error('Impossible to retrieve streams inside file')
|
|
|
|
return None
|
|
|
|
def parseTimestamp(ts):
|
|
logger = logging.getLogger(__name__)
|
|
|
|
tsRegExp = '^(?P<hour>[0-9]{1,2}):(?P<minute>[0-9]{1,2}):(?P<second>[0-9]{1,2})(\.(?P<us>[0-9]{1,6}))?$'
|
|
p = re.compile(tsRegExp)
|
|
m = p.match(ts)
|
|
if m == None:
|
|
logger.error("Impossible to parse timestamp: %s" % ts)
|
|
return None
|
|
|
|
values = m.groupdict()
|
|
hour = 0
|
|
minute = 0
|
|
second = 0
|
|
us = 0
|
|
if values['hour'] != None:
|
|
hour = int(values['hour'])
|
|
if values['minute'] != None:
|
|
minute = int(values['minute'])
|
|
if values['second'] != None:
|
|
second = int(values['second'])
|
|
if values['us'] != None:
|
|
us = int(values['us'])
|
|
|
|
if hour < 0 or hour > 23:
|
|
logger.error("hour must be in [0,24[")
|
|
return None
|
|
if minute < 0 or minute > 59:
|
|
logger.error("minute must be in [0,60[")
|
|
return None
|
|
if second < 0 or second > 59:
|
|
logger.error("second must be in [0,60[")
|
|
return None
|
|
if us < 0 or us > 1000000:
|
|
logger.error("milliseconds must be in [0,1000000[")
|
|
return None
|
|
|
|
ts = timedelta(hours=hour, minutes=minute, seconds=second, microseconds=us)
|
|
|
|
return ts
|
|
|
|
def parseTimeInterval(interval):
|
|
logger = logging.getLogger(__name__)
|
|
|
|
intervalRegExp = '^(?P<hour1>[0-9]{1,2}):(?P<minute1>[0-9]{1,2}):(?P<second1>[0-9]{1,2})(\.(?P<ms1>[0-9]{1,3}))?-(?P<hour2>[0-9]{1,2}):(?P<minute2>[0-9]{1,2}):(?P<second2>[0-9]{1,2})(\.(?P<ms2>[0-9]{1,3}))?$'
|
|
p = re.compile(intervalRegExp)
|
|
m = p.match(interval)
|
|
if m == None:
|
|
logger.error("Impossible to parse time interval")
|
|
return None
|
|
|
|
values = m.groupdict()
|
|
hour1 = 0
|
|
minute1 = 0
|
|
second1 = 0
|
|
ms1 = 0
|
|
hour2 = 0
|
|
minute2 = 0
|
|
second2 = 0
|
|
ms2 = 0
|
|
if values['hour1'] != None:
|
|
hour1 = int(values['hour1'])
|
|
if values['minute1'] != None:
|
|
minute1 = int(values['minute1'])
|
|
if values['second1'] != None:
|
|
second1 = int(values['second1'])
|
|
if values['ms1'] != None:
|
|
ms1 = int(values['ms1'])
|
|
if values['hour2'] != None:
|
|
hour2 = int(values['hour2'])
|
|
if values['minute2'] != None:
|
|
minute2 = int(values['minute2'])
|
|
if values['second2'] != None:
|
|
second2 = int(values['second2'])
|
|
if values['ms2'] != None:
|
|
ms2 = int(values['ms2'])
|
|
|
|
if hour1 < 0 or hour1 > 23:
|
|
logger.error("hour must be in [0,24[")
|
|
return None, None
|
|
if minute1 < 0 or minute1 > 59:
|
|
logger.error("minute must be in [0,60[")
|
|
return None, None
|
|
if second1 < 0 or second1 > 59:
|
|
logger.error("second must be in [0,60[")
|
|
return None, None
|
|
if ms1 < 0 or ms1 > 1000:
|
|
logger.error("milliseconds must be in [0,1000[")
|
|
return None, None
|
|
|
|
if hour2 < 0 or hour2 > 23:
|
|
logger.error("hour must be in [0,24[")
|
|
return None, None
|
|
if minute2 < 0 or minute2 > 59:
|
|
logger.error("minute must be in [0,60[")
|
|
return None, None
|
|
if second2 < 0 or second2 > 59:
|
|
logger.error("second must be in [0,60[")
|
|
return None, None
|
|
if ms2 < 0 or ms2 > 1000:
|
|
logger.error("milliseconds must be in [0,1000[")
|
|
return None, None
|
|
|
|
ts1 = timedelta(hours=hour1, minutes=minute1, seconds=second1, microseconds=ms1*1000)
|
|
ts2 = timedelta(hours=hour2, minutes=minute2, seconds=second2, microseconds=ms2*1000)
|
|
|
|
if ts2 < ts1:
|
|
logger.error("Non monotonic interval")
|
|
return None,None
|
|
|
|
return (ts1, ts2)
|
|
|
|
def compareTimeInterval(interval1, interval2):
|
|
ts11,ts12 = interval1
|
|
ts21,ts22 = interval2
|
|
|
|
if ts12 < ts21:
|
|
return -1
|
|
elif ts22 < ts11:
|
|
return 1
|
|
else:
|
|
return 0
|
|
|
|
|
|
|
|
def ffmpegConvert(ffmpeg, inputFile, inputFormat, outputFile, outputFormat, duration):
|
|
logger = logging.getLogger(__name__)
|
|
|
|
infd = inputFile.fileno()
|
|
outfd = outputFile.fileno()
|
|
set_inheritable(infd, True)
|
|
set_inheritable(outfd, True)
|
|
# TODO: canvas size to be fixed !
|
|
with Popen([ffmpeg, '-y', '-loglevel', 'quiet', '-progress', '/dev/stdout', '-canvas_size', '720x560', '-f', inputFormat, '-i', '/proc/self/fd/%d' % infd,
|
|
'-map', '0:v', '-map', '0:a', '-map', '0:s', '-bsf:v', 'h264_mp4toannexb,dump_extra=freq=keyframe', '-vcodec', 'copy', '-acodec', 'copy', '-scodec', 'dvdsub',
|
|
'-f', outputFormat, '/proc/self/fd/%d' % outfd], stdout=PIPE, close_fds=False) as ffmpeg:
|
|
pb = tqdm(TextIOWrapper(ffmpeg.stdout, encoding="utf-8"), total=int(duration/timedelta(seconds=1)), unit='s', desc='Conversion')
|
|
for line in pb:
|
|
if line.startswith('out_time='):
|
|
ts = line.split('=')[1].strip()
|
|
ts = parseTimestamp(ts)
|
|
pb.n = int(ts/timedelta(seconds=1))
|
|
pb.update()
|
|
status = ffmpeg.wait()
|
|
if status != 0:
|
|
logger.error('Conversion failed with status code: %d' % status)
|
|
|
|
def getTSFrame(frame):
|
|
if 'pts_time' in frame:
|
|
pts_time = float(frame['pts_time'])
|
|
elif 'pkt_pts_time' in frame:
|
|
pts_time = float(frame['pkt_pts_time'])
|
|
else:
|
|
logger.error('Impossible to find timestamp of frame %s' % frame)
|
|
return None
|
|
|
|
ts = timedelta(seconds=pts_time)
|
|
return ts
|
|
|
|
def getPacketDuration(packet):
|
|
if 'duration' in packet:
|
|
duration = int(packet['duration'])
|
|
elif 'pkt_duration' in packet:
|
|
duration = int(packet['pkt_duration'])
|
|
else:
|
|
logger.error('Impossible to find duration of packet %s' % packet)
|
|
return None
|
|
|
|
return duration
|
|
|
|
|
|
def getFramesInStream(ffprobe, inputFile, begin, end, streamKind, subStreamId=0):
|
|
logger = logging.getLogger(__name__)
|
|
infd = inputFile.fileno()
|
|
set_inheritable(infd, True)
|
|
|
|
with Popen([ffprobe, '-loglevel', 'quiet', '-read_intervals', ('%s%%%s' %(begin, end)), '-show_entries', 'frame', '-select_streams', '%s:%d' % (streamKind, subStreamId), '-of', 'json', '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as ffprobe:
|
|
out, _ = ffprobe.communicate()
|
|
frames = json.load(BytesIO(out))
|
|
status = ffprobe.wait()
|
|
if status != 0:
|
|
logger.error('ffprobe failed with status code: %d' % status)
|
|
return None
|
|
res = []
|
|
if 'frames' in frames:
|
|
frames = frames['frames']
|
|
for frame in frames:
|
|
ts = getTSFrame(frame)
|
|
if ts == None:
|
|
return None
|
|
if begin <= ts and ts <= end:
|
|
res.append(frame)
|
|
return res
|
|
else:
|
|
logger.error('Impossible to retrieve frames inside file around [%s,%s]' % (begin, end))
|
|
return None
|
|
|
|
def getNearestIFrame(ffprobe, inputFile, timestamp, before=True, delta=timedelta(seconds=2)):
|
|
logger = logging.getLogger(__name__)
|
|
|
|
zero = timedelta()
|
|
tbegin = timestamp-delta
|
|
tend = timestamp+delta
|
|
if tbegin < zero:
|
|
tbegin = zero
|
|
|
|
infd = inputFile.fileno()
|
|
set_inheritable(infd, True)
|
|
|
|
logger.debug('Looking for iframe in [%s, %s]' % (tbegin, tend))
|
|
|
|
frames = getFramesInStream(ffprobe, inputFile=inputFile, begin=tbegin, end=tend, streamKind='v')
|
|
if frames == None:
|
|
return None
|
|
|
|
iframes = []
|
|
for frame in frames:
|
|
if frame['pict_type'] == 'I':
|
|
iframes.append(frame)
|
|
|
|
found = False
|
|
for frame in iframes:
|
|
ts = getTSFrame(frame)
|
|
if ts == None:
|
|
return None
|
|
|
|
if before and ts <= timestamp:
|
|
found = True
|
|
iframe = frame
|
|
if not before and ts >= timestamp:
|
|
found = True
|
|
iframe = frame
|
|
break
|
|
|
|
if found:
|
|
logger.info("Found i-frame at: %s" % iframe)
|
|
logger.debug("Found i-frame at %s" % iframe)
|
|
|
|
its = getTSFrame(iframe)
|
|
if its == None:
|
|
return None
|
|
|
|
nbFrames = 0
|
|
for frame in frames:
|
|
ts = getTSFrame(frame)
|
|
if ts == None:
|
|
return None
|
|
|
|
if before:
|
|
if its <= ts and ts <= timestamp:
|
|
logger.info("Retrieve a frame between %s and %s at %s" % (its, timestamp, ts))
|
|
nbFrames = nbFrames+1
|
|
else:
|
|
if timestamp <= ts and ts <= its:
|
|
logger.info("Retrieve a frame between %s and %s at %s" % (ts, timestamp, its))
|
|
nbFrames = nbFrames+1
|
|
else:
|
|
logger.error("Impossible to find I-frame around: %s" % timestamp)
|
|
|
|
return(nbFrames, iframe)
|
|
|
|
def extractMKVPart(mkvmerge, inputFile, outputFile, begin, end):
|
|
logger = logging.getLogger(__name__)
|
|
|
|
logger.info('Extract video between I-frames at %s and %s' % (begin,end))
|
|
infd = inputFile.fileno()
|
|
outfd = outputFile.fileno()
|
|
lseek(infd, 0, SEEK_SET)
|
|
lseek(outfd, 0, SEEK_SET)
|
|
set_inheritable(infd, True)
|
|
set_inheritable(outfd, True)
|
|
env = {**os.environ, 'LANG': 'C'}
|
|
warnings = []
|
|
with Popen([mkvmerge, '-o', '/proc/self/fd/%d' % outfd, '--split', 'parts:%s-%s' % (begin, end), '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False, env=env) as mkvmerge:
|
|
pb = tqdm(TextIOWrapper(mkvmerge.stdout, encoding="utf-8"), total=100, unit='%', desc='Extraction')
|
|
for line in pb:
|
|
if line.startswith('Progress :'):
|
|
p = re.compile('^Progress : (?P<progress>[0-9]{1,3})%$')
|
|
m = p.match(line)
|
|
if m == None:
|
|
logger.error('Impossible to parse progress')
|
|
pb.update(int(m['progress'])-pb.n)
|
|
elif line.startswith('Warning'):
|
|
warnings.append(line)
|
|
pb.update(100-pb.n)
|
|
pb.refresh()
|
|
pb.close()
|
|
|
|
|
|
status = mkvmerge.wait()
|
|
if status == 1:
|
|
logger.warning('Extraction returns warning')
|
|
for w in warnings:
|
|
logger.warning(w)
|
|
elif status == 2:
|
|
logger.error('Extraction returns errors')
|
|
|
|
|
|
def extractPictures(ffmpeg, inputFile, begin, nbFrames, width=640, height=480):
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
infd = inputFile.fileno()
|
|
lseek(infd, 0, SEEK_SET)
|
|
outfd = memfd_create('pictures', flags=0)
|
|
set_inheritable(outfd, True)
|
|
# PPM header
|
|
# "P6\nWIDTH HEIGHT\n255\n"
|
|
headerLen=2+1+ceil(log(width, 10))+1+ceil(log(height, 10))+1+3+1
|
|
logger.debug('Header length: %d' % headerLen)
|
|
imageLength = width*height*3+headerLen
|
|
length = imageLength*nbFrames
|
|
logger.debug("Estimated length: %d" % length)
|
|
|
|
images = bytes()
|
|
with Popen([ffmpeg, '-loglevel', 'quiet' ,'-y', '-ss', '%s'%begin, '-i', '/proc/self/fd/%d' % infd, '-s', '%dx%d'%(width, height), '-vframes', '%d'%nbFrames, '-c:v', 'ppm', '-f', 'image2pipe', '/proc/self/fd/%d' % outfd ], stdout=PIPE, close_fds=False) as ffmpeg:
|
|
status = ffmpeg.wait()
|
|
if status != 0:
|
|
logger.error('Conversion failed with status code: %d' % status)
|
|
return None, None
|
|
|
|
lseek(outfd, 0, SEEK_SET)
|
|
images = read(outfd,length)
|
|
if len(images) != length:
|
|
logger.info("Received %d bytes but %d were expected." % (len(images), length))
|
|
return None, None
|
|
|
|
lseek(outfd, 0, SEEK_SET)
|
|
return images, outfd
|
|
|
|
def extractSound(ffmpeg, inputFile, begin, outputFileName, packetDuration, subChannel=0, nbPackets=0, sampleRate=48000, nbChannels=2):
|
|
logger = logging.getLogger(__name__)
|
|
|
|
outfd = memfd_create(outputFileName, flags=0)
|
|
infd = inputFile.fileno()
|
|
lseek(infd, 0, SEEK_SET)
|
|
set_inheritable(infd, True)
|
|
set_inheritable(outfd, True)
|
|
sound = bytes()
|
|
length = int(nbChannels*sampleRate*4*nbPackets*packetDuration/1000)
|
|
|
|
with Popen([ffmpeg, '-y', '-loglevel', 'quiet', '-ss', '%s'%begin, '-i', '/proc/self/fd/%d' % infd, '-frames:a:%d' % subChannel, '%d' % (nbPackets+1),
|
|
'-c:a', 'pcm_s32le', '-sample_rate', '%d' % sampleRate, '-channels', '%d' % nbChannels, '-f', 's32le', '/proc/self/fd/%d' % outfd], stdout=PIPE, close_fds=False) as ffmpeg:
|
|
status = ffmpeg.wait()
|
|
if status != 0:
|
|
logger.error('Sound extraction returns error code: %d' % status)
|
|
return None, None
|
|
|
|
lseek(outfd, 0, SEEK_SET)
|
|
sound = read(outfd, length)
|
|
|
|
if (len(sound) != length):
|
|
logger.info("Received %d bytes but %d were expected (channels=%d, freq=%d, packets=%d, duration=%d ms)." % (len(sound), length, nbChannels, sampleRate, nbPackets, packetDuration))
|
|
return None, None
|
|
|
|
return sound, outfd
|
|
|
|
def dumpPPM(pictures, prefix, temporaries):
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# "P6\nWIDTH HEIGHT\n255\n"
|
|
pos = 0
|
|
picture = 0
|
|
while pos<len(pictures):
|
|
filename = '%s-%03d.ppm' % (prefix, picture)
|
|
header = BytesIO(pictures[pos:])
|
|
magic = header.readline().decode('utf8')
|
|
dimensions = header.readline().decode('utf8')
|
|
maxvalue = header.readline().decode('utf8')
|
|
if magic == 'P6\n':
|
|
pattern = re.compile('^(?P<width>[0-9]+) (?P<height>[0-9]+)\n$')
|
|
m = pattern.match(dimensions)
|
|
if m != None:
|
|
width = int(m['width'])
|
|
height = int(m['height'])
|
|
else:
|
|
logger.error('Impossible to parse dimensions of picture')
|
|
return
|
|
else:
|
|
logger.error('Not a PPM picture')
|
|
return
|
|
|
|
headerLen=2+1+ceil(log(width, 10))+1+ceil(log(height, 10))+1+3+1
|
|
try:
|
|
out = open(filename, 'w')
|
|
outfd = out.fileno()
|
|
except IOError:
|
|
logger.error('Impossible to create file: %s' % filename)
|
|
temporaries.append(out)
|
|
|
|
length=headerLen+3*width*height
|
|
nbBytes = 0
|
|
while nbBytes < length:
|
|
nbBytes+=write(outfd, pictures[pos+nbBytes:pos+length])
|
|
pos+=length
|
|
picture+=1
|
|
|
|
def extractAllStreams(ffmpeg, ffprobe, inputFile, begin, end, streams, filesPrefix, nbFrames, width, height, temporaries, dumpMemFD=False):
|
|
logger = logging.getLogger(__name__)
|
|
encoderParams = [ 'ffmpeg', '-y', '-loglevel', 'quiet' ]
|
|
inputParams = []
|
|
codecsParams = []
|
|
|
|
if begin < end:
|
|
videoID=0
|
|
audioID=0
|
|
subTitleID=0
|
|
memfds = []
|
|
for stream in streams:
|
|
if stream['codec_type'] == 'video':
|
|
logger.info("Extracting %d frames of video stream v:%d" % (nbFrames,videoID))
|
|
frameRate = stream['r_frame_rate']
|
|
pattern = re.compile('^(?P<numerator>[0-9]+)/(?P<denominator>[0-9]+)$')
|
|
m = pattern.match(frameRate)
|
|
if m != None:
|
|
frameRate = float(m['numerator']) / float(m['denominator'])
|
|
sar = stream['sample_aspect_ratio']
|
|
dar = stream['display_aspect_ratio']
|
|
pixelFormat = stream['pix_fmt']
|
|
colorRange = stream['color_range']
|
|
colorSpace =stream['color_space']
|
|
colorTransfer = stream['color_transfer']
|
|
colorPrimaries = stream['color_primaries']
|
|
# TODO: do something with these informations
|
|
# When interlaced TOP or BOTTOM
|
|
# -top 1 -flags:v +ilme+ildct
|
|
# -top 0 -flags:v +ilme+ildct
|
|
chromaLocation = stream['chroma_location']
|
|
fieldOrder = stream['field_order']
|
|
# ======================================= #
|
|
# TODO: adjust SAR and DAR
|
|
# https://superuser.com/questions/907933/correct-aspect-ratio-without-re-encoding-video-file
|
|
codec = stream['codec_name']
|
|
imagesBytes, memfd = extractPictures(ffmpeg, inputFile=inputFile, begin=begin, nbFrames=nbFrames, width=width, height=height)
|
|
if imagesBytes == None:
|
|
exit(-1)
|
|
|
|
memfds.append(memfd)
|
|
|
|
if dumpMemFD:
|
|
dumpPPM(imagesBytes, '%s-%d' % (filesPrefix,videoID), temporaries)
|
|
|
|
# We rewind to zero the memory file descriptor
|
|
lseek(memfd, 0, SEEK_SET)
|
|
set_inheritable(memfd, True)
|
|
|
|
inputParams.extend(['-framerate', '%f'%frameRate, '-f', 'image2pipe', '-i', '/proc/self/fd/%d' % memfd])
|
|
codecsParams.extend(['-c:v:%d' % videoID, codec, '-pix_fmt', pixelFormat, '-colorspace:v:%d' % videoID, colorSpace, '-color_primaries:v:%d' % videoID, colorPrimaries,
|
|
'-color_trc:v:%d' % videoID, colorTransfer, '-color_range:v:%d' % videoID, colorRange])
|
|
videoID=videoID+1
|
|
elif stream['codec_type'] == 'audio':
|
|
sampleRate = int(stream['sample_rate'])
|
|
nbChannels = int(stream['channels'])
|
|
bitRate = int(stream['bit_rate'])
|
|
codec = stream['codec_name']
|
|
if 'tags' in stream:
|
|
if 'language' in stream['tags']:
|
|
codecsParams.extend(['-metadata:s:a:%d' % audioID, 'language=%s' % stream['tags']['language']])
|
|
packets = getFramesInStream(ffprobe, inputFile=inputFile, begin=begin, end=end, streamKind='a', subStreamId=audioID)
|
|
nbPackets = len(packets)
|
|
logger.debug("Found %d packets to be extracted from audio track." % nbPackets)
|
|
if(nbPackets > 0):
|
|
packetDuration = getPacketDuration(packets[0])
|
|
if packetDuration == None:
|
|
return None
|
|
else:
|
|
packetDuration = 0
|
|
|
|
|
|
logger.info("Extracting %d packets of audio stream: a:%d" % (nbPackets, audioID))
|
|
tmpname = '%s-%d.pcm' % (filesPrefix,audioID)
|
|
|
|
soundBytes, memfd = extractSound(ffmpeg=ffmpeg, inputFile=inputFile, begin=begin, nbPackets=nbPackets, packetDuration=packetDuration, outputFileName=tmpname, sampleRate=sampleRate, nbChannels=nbChannels)
|
|
|
|
if soundBytes == None:
|
|
exit(-1)
|
|
|
|
memfds.append(memfd)
|
|
|
|
if dumpMemFD:
|
|
try:
|
|
output = open(tmpname,'w')
|
|
except IOError:
|
|
logger.error('Impossible to create file: %s' % tmpname)
|
|
return None
|
|
|
|
outfd = output.fileno()
|
|
pos = 0
|
|
while pos < len(soundBytes):
|
|
pos+=write(outfd, soundBytes[pos:])
|
|
temporaries.append(output)
|
|
|
|
# We rewind to zero the memory file descriptor
|
|
lseek(memfd, 0, SEEK_SET)
|
|
set_inheritable(memfd, True)
|
|
|
|
inputParams.extend(['-f', 's32le', '-ar', '%d'%sampleRate, '-ac', '%d'%nbChannels, '-i', '/proc/self/fd/%d' % memfd])
|
|
codecsParams.extend(['-c:a:%d' % audioID, codec, '-b:a:%d' % audioID, '%d' % bitRate])
|
|
audioID=audioID+1
|
|
elif stream['codec_type'] == 'subtitle':
|
|
logger.info("Extracting a subtitle stream: s:%d" % subTitleID)
|
|
codec = stream['codec_name']
|
|
inputParams.extend(['-i', './empty.idx'])
|
|
if 'tags' in stream:
|
|
if 'language' in stream['tags']:
|
|
codecsParams.extend(['-metadata:s:s:%d' % subTitleID, 'language=%s' % stream['tags']['language']])
|
|
codecsParams.extend(['-c:s:%d' % subTitleID, 'copy'])
|
|
subTitleID=subTitleID+1
|
|
else:
|
|
logger.error("Unknown stream type: %s" % stream['codec_type'])
|
|
|
|
# Create a new MKV movie with all streams that have been extracted.
|
|
encoderParams.extend(inputParams)
|
|
for index in range(0,videoID+audioID+subTitleID):
|
|
encoderParams.extend(['-map', '%d' % index])
|
|
encoderParams.extend(codecsParams)
|
|
fileName = '%s.mkv' % filesPrefix
|
|
try:
|
|
output = open(fileName,'w+')
|
|
except IOError:
|
|
logger.error('Impossible to create file: %s' % fileName)
|
|
return None
|
|
|
|
outfd = output.fileno()
|
|
set_inheritable(outfd, True)
|
|
# TODO: manage interlaced to previous parameters.
|
|
encoderParams.extend(['-top', '1', '-flags:v', '+ilme+ildct', '-bsf:v', 'h264_mp4toannexb,dump_extra=freq=keyframe', '-f', 'matroska', '/proc/self/fd/%d' % outfd])
|
|
|
|
logger.info('Encoding video: %s' % fileName)
|
|
with Popen(encoderParams, stdout=PIPE, close_fds=False) as ffmpeg:
|
|
status = ffmpeg.wait()
|
|
if status != 0:
|
|
logger.error('Encoding failed with status code: %d' % status)
|
|
return None
|
|
|
|
temporaries.append(output)
|
|
|
|
for memfd in memfds:
|
|
close(memfd)
|
|
|
|
return output
|
|
|
|
else:
|
|
# Nothing to be done. We are already at a i-frame boundary.
|
|
return None
|
|
|
|
# Merge a list of mkv files passed as input, and produce a new MKV as output
|
|
def mergeMKVs(inputs, outputName):
|
|
logger = logging.getLogger(__name__)
|
|
|
|
fds = []
|
|
try:
|
|
out = open(outputName, 'w')
|
|
except IOError:
|
|
logger.error('Impossible to create file: %s' % outputName)
|
|
return None
|
|
|
|
outfd = out.fileno()
|
|
fds.append(outfd)
|
|
set_inheritable(outfd, True)
|
|
|
|
mergeParams = ['mkvmerge']
|
|
first = True
|
|
for mkv in inputs:
|
|
if mkv !=None:
|
|
fd = mkv.fileno()
|
|
fds.append(fd)
|
|
set_inheritable(fd, True)
|
|
if first:
|
|
mergeParams.append('/proc/self/fd/%d' % fd)
|
|
first = False
|
|
else:
|
|
mergeParams.append('+/proc/self/fd/%d' % fd)
|
|
|
|
mergeParams.extend(['-o', '/proc/self/fd/%d' % outfd])
|
|
|
|
# We merge all files.
|
|
warnings = []
|
|
env = {**os.environ, 'LANG': 'C'}
|
|
with Popen(mergeParams, stdout=PIPE, close_fds=False, env=env) as mkvmerge:
|
|
pb = tqdm(TextIOWrapper(mkvmerge.stdout, encoding="utf-8"), total=100, unit='%', desc='Merging')
|
|
for line in pb:
|
|
if line.startswith('Progress :'):
|
|
p = re.compile('^Progress : (?P<progress>[0-9]{1,3})%$')
|
|
m = p.match(line)
|
|
if m == None:
|
|
logger.error('Impossible to parse progress')
|
|
pb.n = int(m['progress'])
|
|
pb.update()
|
|
elif line.startswith('Warning'):
|
|
warnings.append(line)
|
|
|
|
status = mkvmerge.wait()
|
|
if status == 1:
|
|
logger.warning('Extraction returns warning')
|
|
for w in warnings:
|
|
logger.warning(w)
|
|
elif status == 2:
|
|
logger.error('Extraction returns errors')
|
|
|
|
for fd in fds:
|
|
set_inheritable(fd, False)
|
|
|
|
return out
|
|
|
|
def findSubtitlesTracks(ffprobe, filename):
|
|
# ffprobe -loglevel quiet -select_streams s -show_entries stream=index:stream_tags=language -of json corgi.ts
|
|
logger = logging.getLogger(__name__)
|
|
|
|
with Popen([ffprobe, '-i', filename, '-select_streams', 's', '-show_entries', 'stream=index:stream_tags=language', '-of', 'json'], stdout=PIPE, close_fds=False) as ffprobe:
|
|
out, _ = ffprobe.communicate()
|
|
out = json.load(BytesIO(out))
|
|
if 'streams' in out:
|
|
return out['streams']
|
|
else:
|
|
logger.error('Impossible to retrieve format of file')
|
|
pass
|
|
|
|
def extractSubTitleTrack(mkvmerge, inputFileName, index, lang):
|
|
# mkvextract video.mkv tracks position:nom [position:nom]
|
|
logger = logging.getLogger(__name__)
|
|
|
|
with Popen([mkvmerge, inputFileName, 'tracks', '%d:%s' % (index,lang)], stdout=PIPE, close_fds=False) as mkvextract:
|
|
out, _ = mkvextract.communicate()
|
|
for lines in out:
|
|
logger.info(out)
|
|
|
|
|
|
|
|
def main():
|
|
logger = logging.getLogger(__name__)
|
|
coloredlogs.install()
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("-i", "--input", dest='inputFile', type=str, required=True, help="Input file to process (can be .ts, .mp4 or .mkv).")
|
|
parser.add_argument("-o", "--output", dest='outputFile', type=str, required=True, help="Output MKV file to produce.")
|
|
parser.add_argument("-p", "--part", dest='parts', nargs='+', required=False, action='append', metavar="hh:mm:ss[.mmm]-hh:mm:ss[.mmm]", help="Extract this exact part of the original file.")
|
|
parser.add_argument("-k", "--keep", action='store_true', help="Do not cleanup temporary files after processing.")
|
|
parser.add_argument("--safe", action='store_true', help="Suppress headers and trailers to only keep safe parts of the movie.")
|
|
parser.add_argument("-t", "--threshold", action='store', type=int, help="Suppress headers and trailers that are smaller than the threshold.")
|
|
parser.add_argument("-f", "--fusion", action='store_true', help="Collapse the trailer of a part with the header of the next (when it makes sense).")
|
|
parser.add_argument("--dump-memory", action='store_true', dest='dump', help="For debug purpose, dump all memory mapping of headers (and trailers) before (after) each part. They are kept in memory only otherwise.")
|
|
parser.add_argument("-s","--srt", action='store_true', dest='srt', help="Dump subtitles ")
|
|
|
|
args = parser.parse_args()
|
|
logger.debug("Arguments: %s" % args)
|
|
|
|
if args.threshold == None:
|
|
args.threshold = 0
|
|
|
|
allOptionalTools, paths = checkRequiredTools()
|
|
|
|
# Flatten args.parts
|
|
intervals = []
|
|
if args.parts != None:
|
|
for part in args.parts:
|
|
for subpart in part:
|
|
intervals.append(subpart)
|
|
|
|
parts=[]
|
|
# Parse each interval
|
|
for interval in intervals:
|
|
ts1, ts2 = parseTimeInterval(interval)
|
|
if ts1 == None or ts2 == None:
|
|
logger.error("Illegal time interval: %s" % interval)
|
|
exit(-1)
|
|
parts.append((ts1,ts2))
|
|
|
|
# Sort intervals
|
|
parts.sort(key=cmp_to_key(compareTimeInterval))
|
|
|
|
# Check that no intervals are overlapping
|
|
prevts = timedelta(0)
|
|
for part in parts:
|
|
ts1, ts2 = part
|
|
if prevts > ts1:
|
|
logger.error('Intervals are overlapping')
|
|
exit(-1)
|
|
prevts = ts2
|
|
|
|
nbParts = len(parts)
|
|
temporaries = []
|
|
|
|
basename = os.path.splitext(os.path.basename(args.inputFile))[0]
|
|
mp4filename = basename+'.mp4'
|
|
mkvfilename = basename+'.mkv'
|
|
|
|
try:
|
|
inputFile = open(args.inputFile, mode='r')
|
|
except IOError:
|
|
logger.error("Impossible to open %s" % args.inputFile)
|
|
exit(-1)
|
|
|
|
formatOfFile = getFormat(paths['ffprobe'], inputFile)
|
|
|
|
if formatOfFile == None:
|
|
exit(-1)
|
|
|
|
duration = timedelta(seconds=float(formatOfFile['duration']))
|
|
logger.info("Durée de l'enregistrement: %s" % duration)
|
|
|
|
found = False
|
|
for f in SupportedFormat:
|
|
if 'format_name' in formatOfFile:
|
|
if formatOfFile['format_name'] == str(f):
|
|
found = True
|
|
formatOfFile = f
|
|
break
|
|
|
|
if not found:
|
|
logger.error('Unsupported format of file')
|
|
|
|
if formatOfFile == SupportedFormat.TS:
|
|
logger.info("Converting TS to MP4 (to fix timestamps).")
|
|
try:
|
|
with open(mp4filename, 'w') as mp4:
|
|
ffmpegConvert(paths['ffmpeg'], inputFile, 'mpegts', mp4, 'mp4', duration)
|
|
temporaries.append(mp4)
|
|
logger.info("Converting MP4 to MKV.")
|
|
try:
|
|
mkv = open(mkvfilename, 'w')
|
|
except IOError:
|
|
logger.error('')
|
|
|
|
ffmpegConvert(paths['ffmpeg'], mp4, 'mp4', mkv, 'matroska', duration)
|
|
if nbParts > 0:
|
|
temporaries.append(mkv)
|
|
except IOError:
|
|
logger.error('')
|
|
|
|
elif formatOfFile == SupportedFormat.MP4:
|
|
logger.info("Converting MP4 to MKV")
|
|
try:
|
|
mkv = open(mkvfilename, 'w')
|
|
except IOError:
|
|
logger.error('')
|
|
ffmpegConvert(paths['ffmpeg'], mp4, 'mp4', mkv, 'matroska', duration)
|
|
if nbParts > 0:
|
|
temporaries.append(mkv)
|
|
else:
|
|
logger.info("Already in MKV")
|
|
mkv = inputFile
|
|
|
|
_, codecData = getCodecPrivateData(paths['mkvinfo'], mkv)
|
|
|
|
if codecData == None:
|
|
logger.error('Impossible to retrieve codec private data')
|
|
exit(-1)
|
|
|
|
streams = getStreams(paths['ffprobe'], mkv)
|
|
|
|
mainVideo = None
|
|
for stream in streams:
|
|
if stream['codec_type'] == 'video' and stream['disposition']['default'] == 1:
|
|
mainVideo = stream
|
|
width = stream['width']
|
|
height = stream['height']
|
|
|
|
if mainVideo == None:
|
|
logger.error('Impossible to find main video stream.')
|
|
exit(-1)
|
|
|
|
# Pour chaque portion
|
|
partnum = 0
|
|
mkvparts = []
|
|
checks = []
|
|
pos = timedelta()
|
|
|
|
for ts1, ts2 in parts:
|
|
# Trouver l'estampille de la trame 'I' la plus proche (mais postérieure) au début de la portion.
|
|
# Trouver l'estampille de la trame 'I' la plus proche (mais antérieure) à la fin de la portion.
|
|
# On a alors
|
|
# debut ----- trame --------- trame --------- fin.
|
|
# 'B/P' 'B/P'* 'I' 'I' 'B/P'* 'B/P'
|
|
# Si la trame de début est déjà 'I', il n'y a rien à faire (idem pour la fin).
|
|
# Sinon on extrait les trames 'B' ou 'P' depuis le début jusqu'à la trame 'I' non incluse
|
|
|
|
partnum = partnum + 1
|
|
|
|
headFrames = getNearestIFrame(paths['ffprobe'], mkv, ts1, before=False)
|
|
if headFrames == None:
|
|
exit(-1)
|
|
|
|
tailFrames = getNearestIFrame(paths['ffprobe'], mkv, ts2, before=True)
|
|
if tailFrames == None:
|
|
exit(-1)
|
|
|
|
nbHeadFrames, headIFrame = headFrames
|
|
nbTailFrames, tailIFrame = tailFrames
|
|
|
|
logger.info("Found %d frames between beginning of current part and first I-frame" % nbHeadFrames)
|
|
logger.info("Found %d frames between last I-frame and end of current part" % nbTailFrames)
|
|
|
|
headIFrameTS = getTSFrame(headIFrame)
|
|
if headIFrameTS == None:
|
|
exit(-1)
|
|
tailIFrameTS = getTSFrame(tailIFrame)
|
|
if tailIFrameTS == None:
|
|
exit(-1)
|
|
|
|
checks.append(pos+headIFrameTS-ts1)
|
|
|
|
subparts = []
|
|
|
|
if nbHeadFrames > args.threshold:
|
|
# We extract all frames between the beginning upto the frame that immediately preceeds the I-frame.
|
|
head = extractAllStreams(ffmpeg=paths['ffmpeg'], ffprobe=paths['ffprobe'], inputFile=mkv, begin=ts1, end=headIFrameTS, nbFrames=nbHeadFrames-1, filesPrefix='part-%d-head' % (partnum), streams=streams, width=width, height=height, temporaries=temporaries, dumpMemFD=args.dump)
|
|
# Change private codec data of the new file so that it is the same as the one of the original movie
|
|
changeCodecPrivateData(paths['mkvinfo'], head, codecData)
|
|
subparts.append(head)
|
|
|
|
# Creating MKV file that corresponds to current part between I-frames
|
|
try:
|
|
internal = open('part-%d-internal.mkv' % partnum, 'w')
|
|
except IOError:
|
|
logger.error('Impossible to create file: part-%d-internal.mkv' % partnum)
|
|
exit(-1)
|
|
temporaries.append(internal)
|
|
extractMKVPart(mkvmerge=paths['mkvmerge'], inputFile=mkv, outputFile=internal, begin=headIFrameTS, end=tailIFrameTS)
|
|
subparts.append(internal)
|
|
|
|
if nbTailFrames > args.threshold:
|
|
# We extract all frames between the I-frame (including it) upto the end.
|
|
tail = extractAllStreams(ffmpeg=paths['ffmpeg'], ffprobe=paths['ffprobe'], inputFile=mkv, begin=tailIFrameTS, end=ts2, nbFrames=nbTailFrames, filesPrefix='part-%d-tail' % (partnum), streams=streams, width=width, height=height, temporaries=temporaries, dumpMemFD=args.dump)
|
|
# Change private codec data of the new file so that it is the same as the one of the original movie
|
|
changeCodecPrivateData(paths['mkvinfo'], tail, codecData)
|
|
subparts.append(tail)
|
|
|
|
if not args.fusion:
|
|
logger.info('Merging: %s' % subparts)
|
|
part = mergeMKVs(inputs=subparts, outputName="part-%d.mkv" % partnum)
|
|
mkvparts.append(part)
|
|
temporaries.append(part)
|
|
else:
|
|
logging.error("Not yet implemented")
|
|
exit(-1)
|
|
|
|
pos = pos+tailIFrameTS-ts1
|
|
|
|
# We need to check the end also
|
|
checks.append(pos)
|
|
|
|
nbMKVParts = len(mkvparts)
|
|
if nbMKVParts > 1:
|
|
logger.info('Merging: %s' % mkvparts)
|
|
mergeMKVs(inputs=mkvparts, outputName=args.outputFile)
|
|
elif nbMKVParts == 1:
|
|
copyfile('part-1.mkv', args.outputFile)
|
|
else:
|
|
logger.info("Nothing else to do.")
|
|
|
|
if args.srt:
|
|
if not allOptionalTools:
|
|
logger.warning("Missing tools for extracting subtitles.")
|
|
else:
|
|
logger.info("Find subtitles tracks and language.")
|
|
subtitles = findSubtitlesTracks(args.outputFile)
|
|
sts = {}
|
|
for subtitle in subtitles:
|
|
index = subtitle['index']
|
|
if 'tags' in subtitle:
|
|
if 'language' in subtitle['tags']:
|
|
lang = subtitle['tags']['language']
|
|
if lang in sts:
|
|
sts[lang].append(index)
|
|
else:
|
|
sts[lang] = [index]
|
|
else:
|
|
logger.error("Dropping subtitle: %s because it is missing language indication")
|
|
else:
|
|
logger.error("Dropping subtitle: %s because it is missing language indication")
|
|
|
|
for lang in sts:
|
|
indexes = sts[lang]
|
|
if len(indexes) == 0:
|
|
# Nothing to do. This should not happen.
|
|
continue
|
|
if len(indexes) == 1:
|
|
index = indexes[0]
|
|
filename = 'essai-%s.srt' % lang
|
|
elif len(indexes) > 1:
|
|
nbsrt = 1
|
|
for index in indexes:
|
|
filename = 'essai-%s-%d.srt' % (lang, nbsrt)
|
|
nbsrt+=1
|
|
|
|
if not args.keep:
|
|
logger.info("Cleaning temporary files")
|
|
for f in temporaries:
|
|
path = os.path.realpath(f.name)
|
|
logger.info("Removing: %s" % path)
|
|
f.close()
|
|
unlink(path)
|
|
|
|
for c in checks:
|
|
logger.info("Please check cut smoothness at: %s" % c)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|