Adding a bunch of functions to modify codec private data inside video tracks, correct mkv binary representation after such changes.
This commit is contained in:
253
removeads.py
253
removeads.py
@@ -185,6 +185,245 @@ class SupportedFormat(IntEnum):
|
||||
# https://gitlab.com/mbunkus/mkvtoolnix/-/issues/2390
|
||||
# ffmpeg -i <InputFile (before concatenation)> -c:v copy -an -sn -bsf:v trace_headers -t 0.01 -report -loglevel 0 -f null -
|
||||
|
||||
# Found codec private data using mkvinfo
|
||||
def getCodecPrivateData(mkvinfo, inputFile):
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
infd = inputFile.fileno()
|
||||
lseek(infd, 0, SEEK_SET)
|
||||
set_inheritable(infd, True)
|
||||
found = False
|
||||
env = {**os.environ, 'LANG': 'C'}
|
||||
# Output example
|
||||
# Codec's private data: size 48 (H.264 profile: High @L4.0) hexdump 01 64 00 28 ff e1 00 1b 67 64 00 28 ac d9 40 78 04 4f dc d4 04 04 05 00 00 92 ef 00 1d ad a6 1f 16 2d 96 01 00 06 68 fb a3 cb 22 c0 fd f8 f8 00 at 406 size 51 data size 48
|
||||
|
||||
with Popen([mkvinfo, '-z', '-X', '-P', '/proc/self/fd/%d' % infd ], stdout=PIPE, close_fds=False, env=env) as mkvinfo:
|
||||
out, _ = mkvinfo.communicate()
|
||||
out = out.decode('utf8')
|
||||
regExp = "^.*Codec's private data: size ([0-9]+) \(H.264.*\) hexdump (?P<hexdump>([0-9a-f]{2} )+)at (?P<position>[0-9]+) size (?P<size>[0-9]+).*$"
|
||||
p = re.compile(regExp)
|
||||
for line in out.splitlines():
|
||||
m = p.match(line)
|
||||
if m != None:
|
||||
size = int(m.group('size'))
|
||||
position = int(m.group('position'))
|
||||
logger.debug("Found codec private data at position: %s, size: %d" % (position, size))
|
||||
found = True
|
||||
mkvinfo.wait()
|
||||
break
|
||||
|
||||
if found:
|
||||
lseek(infd, position, SEEK_SET)
|
||||
data = read(infd, size)
|
||||
return position, data
|
||||
else:
|
||||
return None, None
|
||||
|
||||
|
||||
def parseMKVTree(mkvinfo, inputFile):
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
infd = inputFile.fileno()
|
||||
lseek(infd, 0, SEEK_SET)
|
||||
set_inheritable(infd, True)
|
||||
env = {**os.environ, 'LANG': 'C'}
|
||||
elements = {}
|
||||
|
||||
with Popen([mkvinfo, '-z', '-X', '-P', '/proc/self/fd/%d' % infd ], stdout=PIPE, close_fds=False, env=env) as mkvinfo:
|
||||
out, _ = mkvinfo.communicate()
|
||||
out = out.decode('utf8')
|
||||
prefix = []
|
||||
regExp = "(^(?P<root>\+)|(\|(?P<depth>[ ]*\+))).*at (?P<position>[0-9]+) size (?P<size>[0-9]+).*$"
|
||||
p = re.compile(regExp)
|
||||
prevDepth = -1
|
||||
for line in out.splitlines():
|
||||
m = p.match(line)
|
||||
if m == None:
|
||||
logger.error("Impossible to match line: %s" % line)
|
||||
else:
|
||||
position = int(m.group('position'))
|
||||
size = int(m.group('size'))
|
||||
root = (m.group('root')!=None)
|
||||
if root:
|
||||
depth = 0
|
||||
else:
|
||||
depth = len(m.group('depth'))
|
||||
|
||||
if depth > prevDepth:
|
||||
for i in range(depth-prevDepth):
|
||||
prefix.append(1)
|
||||
elif depth == prevDepth:
|
||||
subid = prefix[-1]
|
||||
subid+=1
|
||||
prefix.pop()
|
||||
prefix.append(subid)
|
||||
else:
|
||||
for i in range(prevDepth-depth):
|
||||
prefix.pop()
|
||||
subid = prefix[-1]
|
||||
subid+=1
|
||||
prefix.pop()
|
||||
prefix.append(subid)
|
||||
|
||||
prevDepth = depth
|
||||
key=".".join(map(str, prefix))
|
||||
|
||||
elements[key] = (position, size)
|
||||
|
||||
mkvinfo.wait()
|
||||
return elements
|
||||
|
||||
# MKV is formatted as an EBML file (Extended Binary Markup Langage).
|
||||
# cf http://matroska-org.github.io/libebml/specs.html
|
||||
# It is a Type, Length, Value (TLV) kind of binary file.
|
||||
# Types are encoded as follows:
|
||||
# 1xxx xxxx - Class A IDs (2^7 -1 possible values)
|
||||
# 01xx xxxx xxxx xxxx - Class B IDs (2^14-1 possible values)
|
||||
# 001x xxxx xxxx xxxx xxxx xxxx - Class C IDs (2^21-1 possible values)
|
||||
# 0001 xxxx xxxx xxxx xxxx xxxx xxxx xxxx - Class D IDs (2^28-1 possible values)
|
||||
# Lengths are encoded as follows:
|
||||
# 1xxx xxxx - value 0 to 2^7-2
|
||||
# 01xx xxxx xxxx xxxx - value 0 to 2^14-2
|
||||
# 001x xxxx xxxx xxxx xxxx xxxx - value 0 to 2^21-2
|
||||
# 0001 xxxx xxxx xxxx xxxx xxxx xxxx xxxx - value 0 to 2^28-2
|
||||
# 0000 1xxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx - value 0 to 2^35-2
|
||||
# 0000 01xx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx - value 0 to 2^42-2
|
||||
# 0000 001x xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx - value 0 to 2^49-2
|
||||
# 0000 0001 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx - value 0 to 2^56-2
|
||||
|
||||
|
||||
def changeEBMLElementSize(inputFile, position, addendum):
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
initialPosition = position
|
||||
infd = inputFile.fileno()
|
||||
lseek(infd, position, SEEK_SET)
|
||||
|
||||
buf = read(infd, 1)
|
||||
elementType = int.from_bytes(buf, byteorder='big')
|
||||
mask=128
|
||||
found = False
|
||||
for i in range(1,5):
|
||||
if elementType&mask:
|
||||
typeSize = i
|
||||
found = True
|
||||
break
|
||||
else:
|
||||
mask = mask>>1
|
||||
|
||||
if not found:
|
||||
logger.error('Size of element type cannot be determined: %b' % elementType)
|
||||
exit(-1)
|
||||
|
||||
# We seek to size
|
||||
position+=typeSize
|
||||
lseek(infd, position, SEEK_SET)
|
||||
|
||||
buf = read(infd, 1)
|
||||
sizeHead = int.from_bytes(buf, byteorder='big')
|
||||
logger.info('First byte of size: %x' % sizeHead)
|
||||
mask=128
|
||||
found = False
|
||||
for i in range(1,9):
|
||||
if sizeHead&mask:
|
||||
sizeOfDataSize = i
|
||||
found = True
|
||||
break
|
||||
else:
|
||||
mask = mask>>1
|
||||
|
||||
if not found:
|
||||
logger.error('Size of data size cannot be determined: %b' % sizeHead)
|
||||
exit(-1)
|
||||
else:
|
||||
logger.info('Size of data size: %d.' % sizeOfDataSize)
|
||||
|
||||
lseek(infd, position, SEEK_SET)
|
||||
oldSizeBuf = read(infd, sizeOfDataSize)
|
||||
maxSize = 2**(sizeOfDataSize*7)-2
|
||||
sizeOfData = int.from_bytes(oldSizeBuf, byteorder='big')
|
||||
logger.info('Size of data with mask: %x mask: %d.' % (sizeOfData, mask))
|
||||
sizeOfData-= (mask<<((sizeOfDataSize-1)*8))
|
||||
logger.info('Found element at position: %d, size of type: %d size of data: %d maximal size: %d.' % (initialPosition, typeSize, sizeOfData, maxSize))
|
||||
|
||||
newSize = sizeOfData+addendum
|
||||
if newSize > maxSize:
|
||||
logger.error('New size is too big to be encoded in actual size field.')
|
||||
exit(-1)
|
||||
|
||||
size = newSize + ((128>>(sizeOfDataSize-1))<<((sizeOfDataSize-1)*8))
|
||||
newSizeBuf = (size).to_bytes(sizeOfDataSize, byteorder='big')
|
||||
|
||||
logger.info('Old encoded size: %s New encoded size: %s' % (hexdump.dump(oldSizeBuf,sep=':'), hexdump.dump(newSizeBuf, sep=':')))
|
||||
lseek(infd, position, SEEK_SET)
|
||||
write(infd, newSizeBuf)
|
||||
|
||||
|
||||
|
||||
def changeCodecPrivateData(mkvinfo, inputFile, codecData):
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
infd = inputFile.fileno()
|
||||
lseek(infd, 0, SEEK_SET)
|
||||
|
||||
output = open('save.mkv', 'w')
|
||||
outfd = output.fileno()
|
||||
save = read(infd, 10000000)
|
||||
write(outfd, save)
|
||||
close(outfd)
|
||||
lseek(infd, 0, SEEK_SET)
|
||||
|
||||
currentLength = fstat(infd).st_size
|
||||
logger.info('Current size of file: %d' % currentLength)
|
||||
position, currentData = getCodecPrivateData(mkvinfo, inputFile)
|
||||
currentDataLength = len(currentData)
|
||||
futureLength = currentLength - currentDataLength + len(codecData)
|
||||
logger.info('Expected size of file: %d' % futureLength)
|
||||
|
||||
logger.info('Current data at position %d: %s' % (position, hexdump.dump(currentData, sep=":")))
|
||||
logger.info('Future data: %s' % hexdump.dump(codecData, sep=":"))
|
||||
|
||||
elements = parseMKVTree(mkvinfo, inputFile)
|
||||
|
||||
found = False
|
||||
for key in elements:
|
||||
pos, size = elements[key]
|
||||
if pos == position:
|
||||
logger.info('Codec private data key: %s' % key)
|
||||
found = True
|
||||
break
|
||||
|
||||
if not found:
|
||||
logger.error('Impossible to retrieve the key of codec private data')
|
||||
exit(-1)
|
||||
|
||||
if currentLength < futureLength:
|
||||
lseek(infd, position+currentDataLength, SEEK_SET)
|
||||
tail = read(infd, currentLength-(position+currentDataLength))
|
||||
# We extend the file at the end with zeroes
|
||||
ftruncate(infd, futureLength)
|
||||
lseek(infd, position+len(codecData), SEEK_SET)
|
||||
write(infd, tail)
|
||||
lseek(infd, position, SEEK_SET)
|
||||
write(infd, codecData)
|
||||
|
||||
keys = key.split('.')
|
||||
logger.info(keys)
|
||||
|
||||
for i in range(0, len(keys)-1):
|
||||
keys.pop()
|
||||
key=".".join(map(str, keys))
|
||||
pos, size = elements[key]
|
||||
logger.info('Trying to fix element with key: %s at position: %d with actual size: %d.' % (key, pos, size))
|
||||
changeEBMLElementSize(inputFile, pos, futureLength-currentLength)
|
||||
|
||||
elif currentLength == futureLength:
|
||||
logger.error("Not yet implemented")
|
||||
exit(-1)
|
||||
else:
|
||||
logger.error("Not yet implemented")
|
||||
exit(-1)
|
||||
|
||||
|
||||
def getFormat(ffprobe, inputFile):
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -735,7 +974,7 @@ def extractAllStreams(ffmpeg, ffprobe, inputFile, begin, end, streams, filesPref
|
||||
encoderParams.extend(codecsParams)
|
||||
fileName = '%s.mkv' % filesPrefix
|
||||
try:
|
||||
output = open(fileName,'w')
|
||||
output = open(fileName,'w+')
|
||||
except IOError:
|
||||
logger.error('Impossible to create file: %s' % fileName)
|
||||
return None
|
||||
@@ -951,7 +1190,13 @@ def main():
|
||||
else:
|
||||
logger.info("Already in MKV")
|
||||
mkv = inputFile
|
||||
|
||||
|
||||
_, codecData = getCodecPrivateData(paths['mkvinfo'], mkv)
|
||||
|
||||
if codecData == None:
|
||||
logger.error('Impossible to retrieve codec private data')
|
||||
exit(-1)
|
||||
|
||||
streams = getStreams(paths['ffprobe'], mkv)
|
||||
|
||||
mainVideo = None
|
||||
@@ -1010,6 +1255,8 @@ def main():
|
||||
if nbHeadFrames > 0:
|
||||
# We extract all frames between the beginning upto the frame that immediately preceeds the I-frame.
|
||||
head = extractAllStreams(ffmpeg=paths['ffmpeg'], ffprobe=paths['ffprobe'], inputFile=mkv, begin=ts1, end=headIFrameTS, nbFrames=nbHeadFrames-1, filesPrefix='part-%d-head' % (partnum), streams=streams, width=width, height=height, temporaries=temporaries, dumpMemFD=args.dump)
|
||||
# Change private codec data of the new file so that it is the same as the one of the original movie
|
||||
changeCodecPrivateData(paths['mkvinfo'], head, codecData)
|
||||
subparts.append(head)
|
||||
|
||||
# Creating MKV file that corresponds to current part between I-frames
|
||||
@@ -1025,6 +1272,8 @@ def main():
|
||||
if nbTailFrames > 0:
|
||||
# We extract all frames between the I-frame (including it) upto the end.
|
||||
tail = extractAllStreams(ffmpeg=paths['ffmpeg'], ffprobe=paths['ffprobe'], inputFile=mkv, begin=tailIFrameTS, end=ts2, nbFrames=nbTailFrames, filesPrefix='part-%d-tail' % (partnum), streams=streams, width=width, height=height, temporaries=temporaries, dumpMemFD=args.dump)
|
||||
# Change private codec data of the new file so that it is the same as the one of the original movie
|
||||
changeCodecPrivateData(paths['mkvinfo'], tail, codecData)
|
||||
subparts.append(tail)
|
||||
|
||||
logger.info('Merging: %s' % subparts)
|
||||
|
||||
Reference in New Issue
Block a user