Compare commits

...

2 Commits

View File

@@ -17,6 +17,8 @@ from tqdm import tqdm, trange
from select import select
from math import floor, ceil, log
from shutil import copyfile, which
import hexdump
# Useful SPS/PPS discussion
# TODO: improve situation of SPS and PPS header mismatch when merging MVK with mkvmerge to remove warnings.
@@ -185,6 +187,245 @@ class SupportedFormat(IntEnum):
# https://gitlab.com/mbunkus/mkvtoolnix/-/issues/2390
# ffmpeg -i <InputFile (before concatenation)> -c:v copy -an -sn -bsf:v trace_headers -t 0.01 -report -loglevel 0 -f null -
# Found codec private data using mkvinfo
def getCodecPrivateData(mkvinfo, inputFile):
logger = logging.getLogger(__name__)
infd = inputFile.fileno()
lseek(infd, 0, SEEK_SET)
set_inheritable(infd, True)
found = False
env = {**os.environ, 'LANG': 'C'}
# Output example
# Codec's private data: size 48 (H.264 profile: High @L4.0) hexdump 01 64 00 28 ff e1 00 1b 67 64 00 28 ac d9 40 78 04 4f dc d4 04 04 05 00 00 92 ef 00 1d ad a6 1f 16 2d 96 01 00 06 68 fb a3 cb 22 c0 fd f8 f8 00 at 406 size 51 data size 48
with Popen([mkvinfo, '-z', '-X', '-P', '/proc/self/fd/%d' % infd ], stdout=PIPE, close_fds=False, env=env) as mkvinfo:
out, _ = mkvinfo.communicate()
out = out.decode('utf8')
regExp = "^.*Codec's private data: size ([0-9]+) \(H.264.*\) hexdump (?P<hexdump>([0-9a-f]{2} )+)at (?P<position>[0-9]+) size (?P<size>[0-9]+).*$"
p = re.compile(regExp)
for line in out.splitlines():
m = p.match(line)
if m != None:
size = int(m.group('size'))
position = int(m.group('position'))
logger.debug("Found codec private data at position: %s, size: %d" % (position, size))
found = True
mkvinfo.wait()
break
if found:
lseek(infd, position, SEEK_SET)
data = read(infd, size)
return position, data
else:
return None, None
def parseMKVTree(mkvinfo, inputFile):
logger = logging.getLogger(__name__)
infd = inputFile.fileno()
lseek(infd, 0, SEEK_SET)
set_inheritable(infd, True)
env = {**os.environ, 'LANG': 'C'}
elements = {}
with Popen([mkvinfo, '-z', '-X', '-P', '/proc/self/fd/%d' % infd ], stdout=PIPE, close_fds=False, env=env) as mkvinfo:
out, _ = mkvinfo.communicate()
out = out.decode('utf8')
prefix = []
regExp = "(^(?P<root>\+)|(\|(?P<depth>[ ]*\+))).*at (?P<position>[0-9]+) size (?P<size>[0-9]+).*$"
p = re.compile(regExp)
prevDepth = -1
for line in out.splitlines():
m = p.match(line)
if m == None:
logger.error("Impossible to match line: %s" % line)
else:
position = int(m.group('position'))
size = int(m.group('size'))
root = (m.group('root')!=None)
if root:
depth = 0
else:
depth = len(m.group('depth'))
if depth > prevDepth:
for i in range(depth-prevDepth):
prefix.append(1)
elif depth == prevDepth:
subid = prefix[-1]
subid+=1
prefix.pop()
prefix.append(subid)
else:
for i in range(prevDepth-depth):
prefix.pop()
subid = prefix[-1]
subid+=1
prefix.pop()
prefix.append(subid)
prevDepth = depth
key=".".join(map(str, prefix))
elements[key] = (position, size)
mkvinfo.wait()
return elements
# MKV is formatted as an EBML file (Extended Binary Markup Langage).
# cf http://matroska-org.github.io/libebml/specs.html
# It is a Type, Length, Value (TLV) kind of binary file.
# Types are encoded as follows:
# 1xxx xxxx - Class A IDs (2^7 -1 possible values)
# 01xx xxxx xxxx xxxx - Class B IDs (2^14-1 possible values)
# 001x xxxx xxxx xxxx xxxx xxxx - Class C IDs (2^21-1 possible values)
# 0001 xxxx xxxx xxxx xxxx xxxx xxxx xxxx - Class D IDs (2^28-1 possible values)
# Lengths are encoded as follows:
# 1xxx xxxx - value 0 to 2^7-2
# 01xx xxxx xxxx xxxx - value 0 to 2^14-2
# 001x xxxx xxxx xxxx xxxx xxxx - value 0 to 2^21-2
# 0001 xxxx xxxx xxxx xxxx xxxx xxxx xxxx - value 0 to 2^28-2
# 0000 1xxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx - value 0 to 2^35-2
# 0000 01xx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx - value 0 to 2^42-2
# 0000 001x xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx - value 0 to 2^49-2
# 0000 0001 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx - value 0 to 2^56-2
def changeEBMLElementSize(inputFile, position, addendum):
logger = logging.getLogger(__name__)
initialPosition = position
infd = inputFile.fileno()
lseek(infd, position, SEEK_SET)
buf = read(infd, 1)
elementType = int.from_bytes(buf, byteorder='big')
mask=128
found = False
for i in range(1,5):
if elementType&mask:
typeSize = i
found = True
break
else:
mask = mask>>1
if not found:
logger.error('Size of element type cannot be determined: %b' % elementType)
exit(-1)
# We seek to size
position+=typeSize
lseek(infd, position, SEEK_SET)
buf = read(infd, 1)
sizeHead = int.from_bytes(buf, byteorder='big')
logger.info('First byte of size: %x' % sizeHead)
mask=128
found = False
for i in range(1,9):
if sizeHead&mask:
sizeOfDataSize = i
found = True
break
else:
mask = mask>>1
if not found:
logger.error('Size of data size cannot be determined: %b' % sizeHead)
exit(-1)
else:
logger.info('Size of data size: %d.' % sizeOfDataSize)
lseek(infd, position, SEEK_SET)
oldSizeBuf = read(infd, sizeOfDataSize)
maxSize = 2**(sizeOfDataSize*7)-2
sizeOfData = int.from_bytes(oldSizeBuf, byteorder='big')
logger.info('Size of data with mask: %x mask: %d.' % (sizeOfData, mask))
sizeOfData-= (mask<<((sizeOfDataSize-1)*8))
logger.info('Found element at position: %d, size of type: %d size of data: %d maximal size: %d.' % (initialPosition, typeSize, sizeOfData, maxSize))
newSize = sizeOfData+addendum
if newSize > maxSize:
logger.error('New size is too big to be encoded in actual size field.')
exit(-1)
size = newSize + ((128>>(sizeOfDataSize-1))<<((sizeOfDataSize-1)*8))
newSizeBuf = (size).to_bytes(sizeOfDataSize, byteorder='big')
logger.info('Old encoded size: %s New encoded size: %s' % (hexdump.dump(oldSizeBuf,sep=':'), hexdump.dump(newSizeBuf, sep=':')))
lseek(infd, position, SEEK_SET)
write(infd, newSizeBuf)
def changeCodecPrivateData(mkvinfo, inputFile, codecData):
logger = logging.getLogger(__name__)
infd = inputFile.fileno()
lseek(infd, 0, SEEK_SET)
output = open('save.mkv', 'w')
outfd = output.fileno()
save = read(infd, 10000000)
write(outfd, save)
close(outfd)
lseek(infd, 0, SEEK_SET)
currentLength = fstat(infd).st_size
logger.info('Current size of file: %d' % currentLength)
position, currentData = getCodecPrivateData(mkvinfo, inputFile)
currentDataLength = len(currentData)
futureLength = currentLength - currentDataLength + len(codecData)
logger.info('Expected size of file: %d' % futureLength)
logger.info('Current data at position %d: %s' % (position, hexdump.dump(currentData, sep=":")))
logger.info('Future data: %s' % hexdump.dump(codecData, sep=":"))
elements = parseMKVTree(mkvinfo, inputFile)
found = False
for key in elements:
pos, size = elements[key]
if pos == position:
logger.info('Codec private data key: %s' % key)
found = True
break
if not found:
logger.error('Impossible to retrieve the key of codec private data')
exit(-1)
if currentLength < futureLength:
lseek(infd, position+currentDataLength, SEEK_SET)
tail = read(infd, currentLength-(position+currentDataLength))
# We extend the file at the end with zeroes
ftruncate(infd, futureLength)
lseek(infd, position+len(codecData), SEEK_SET)
write(infd, tail)
lseek(infd, position, SEEK_SET)
write(infd, codecData)
keys = key.split('.')
logger.info(keys)
for i in range(0, len(keys)-1):
keys.pop()
key=".".join(map(str, keys))
pos, size = elements[key]
logger.info('Trying to fix element with key: %s at position: %d with actual size: %d.' % (key, pos, size))
changeEBMLElementSize(inputFile, pos, futureLength-currentLength)
elif currentLength == futureLength:
logger.error("Not yet implemented")
exit(-1)
else:
logger.error("Not yet implemented")
exit(-1)
def getFormat(ffprobe, inputFile):
logger = logging.getLogger(__name__)
@@ -735,7 +976,7 @@ def extractAllStreams(ffmpeg, ffprobe, inputFile, begin, end, streams, filesPref
encoderParams.extend(codecsParams)
fileName = '%s.mkv' % filesPrefix
try:
output = open(fileName,'w')
output = open(fileName,'w+')
except IOError:
logger.error('Impossible to create file: %s' % fileName)
return None
@@ -853,12 +1094,18 @@ def main():
parser.add_argument("-o", "--output", dest='outputFile', type=str, required=True, help="Output MKV file to produce.")
parser.add_argument("-p", "--part", dest='parts', nargs='+', required=False, action='append', metavar="hh:mm:ss[.mmm]-hh:mm:ss[.mmm]", help="Extract this exact part of the original file.")
parser.add_argument("-k", "--keep", action='store_true', help="Do not cleanup temporary files after processing.")
parser.add_argument("--safe", action='store_true', help="Suppress headers and trailers to only keep safe parts of the movie.")
parser.add_argument("-t", "--threshold", action='store', type=int, help="Suppress headers and trailers that are smaller than the threshold.")
parser.add_argument("-f", "--fusion", action='store_true', help="Collapse the trailer of a part with the header of the next (when it makes sense).")
parser.add_argument("--dump-memory", action='store_true', dest='dump', help="For debug purpose, dump all memory mapping of headers (and trailers) before (after) each part. They are kept in memory only otherwise.")
parser.add_argument("-s","--srt", action='store_true', dest='srt', help="Dump subtitles ")
args = parser.parse_args()
logger.debug("Arguments: %s" % args)
if args.threshold == None:
args.threshold = 0
allOptionalTools, paths = checkRequiredTools()
# Flatten args.parts
@@ -951,7 +1198,13 @@ def main():
else:
logger.info("Already in MKV")
mkv = inputFile
_, codecData = getCodecPrivateData(paths['mkvinfo'], mkv)
if codecData == None:
logger.error('Impossible to retrieve codec private data')
exit(-1)
streams = getStreams(paths['ffprobe'], mkv)
mainVideo = None
@@ -1007,9 +1260,11 @@ def main():
subparts = []
if nbHeadFrames > 0:
if nbHeadFrames > args.threshold:
# We extract all frames between the beginning upto the frame that immediately preceeds the I-frame.
head = extractAllStreams(ffmpeg=paths['ffmpeg'], ffprobe=paths['ffprobe'], inputFile=mkv, begin=ts1, end=headIFrameTS, nbFrames=nbHeadFrames-1, filesPrefix='part-%d-head' % (partnum), streams=streams, width=width, height=height, temporaries=temporaries, dumpMemFD=args.dump)
# Change private codec data of the new file so that it is the same as the one of the original movie
changeCodecPrivateData(paths['mkvinfo'], head, codecData)
subparts.append(head)
# Creating MKV file that corresponds to current part between I-frames
@@ -1022,15 +1277,21 @@ def main():
extractMKVPart(mkvmerge=paths['mkvmerge'], inputFile=mkv, outputFile=internal, begin=headIFrameTS, end=tailIFrameTS)
subparts.append(internal)
if nbTailFrames > 0:
if nbTailFrames > args.threshold:
# We extract all frames between the I-frame (including it) upto the end.
tail = extractAllStreams(ffmpeg=paths['ffmpeg'], ffprobe=paths['ffprobe'], inputFile=mkv, begin=tailIFrameTS, end=ts2, nbFrames=nbTailFrames, filesPrefix='part-%d-tail' % (partnum), streams=streams, width=width, height=height, temporaries=temporaries, dumpMemFD=args.dump)
# Change private codec data of the new file so that it is the same as the one of the original movie
changeCodecPrivateData(paths['mkvinfo'], tail, codecData)
subparts.append(tail)
logger.info('Merging: %s' % subparts)
part = mergeMKVs(inputs=subparts, outputName="part-%d.mkv" % partnum)
mkvparts.append(part)
temporaries.append(part)
if not args.fusion:
logger.info('Merging: %s' % subparts)
part = mergeMKVs(inputs=subparts, outputName="part-%d.mkv" % partnum)
mkvparts.append(part)
temporaries.append(part)
else:
logging.error("Not yet implemented")
exit(-1)
pos = pos+tailIFrameTS-ts1