Subtitles extracted through OCR can be remuxed with the final cut movie (in addition to image based ones).
This commit is contained in:
105
removeads.py
105
removeads.py
@@ -16,7 +16,7 @@ import shutil
|
||||
from tqdm import tqdm, trange
|
||||
from select import select
|
||||
from math import floor, ceil, log
|
||||
from shutil import copyfile, which
|
||||
from shutil import copyfile, which, move
|
||||
import hexdump
|
||||
from iso639 import Lang
|
||||
from iso639.exceptions import InvalidLanguageValue
|
||||
@@ -107,9 +107,10 @@ def getSubTitlesTracks(ffprobe, mkvPath):
|
||||
|
||||
return tracks
|
||||
|
||||
def extractSRT(mkvextract, mkvPath, subtitles, langs):
|
||||
def extractSRT(mkvextract, fileName, subtitles, langs):
|
||||
logger = logging.getLogger(__name__)
|
||||
params = [mkvextract, mkvPath, 'tracks']
|
||||
|
||||
params = [mkvextract, fileName, 'tracks']
|
||||
|
||||
res = []
|
||||
|
||||
@@ -133,8 +134,20 @@ def extractSRT(mkvextract, mkvPath, subtitles, langs):
|
||||
count = count+1
|
||||
|
||||
env = {**os.environ, 'LANG': 'C'}
|
||||
with Popen(params, env=env) as extract:
|
||||
extract.wait()
|
||||
with Popen(params, stdout=PIPE, close_fds=False, env=env) as extract:
|
||||
pb = tqdm(TextIOWrapper(extract.stdout, encoding="utf-8"), total=100, unit='%', desc='Extraction:')
|
||||
for line in pb:
|
||||
if line.startswith('Progress :'):
|
||||
p = re.compile('^Progress : (?P<progress>[0-9]{1,3})%$')
|
||||
m = p.match(line)
|
||||
if m == None:
|
||||
logger.error('Impossible to parse progress')
|
||||
pb.update(int(m['progress'])-pb.n)
|
||||
pb.update(100-pb.n)
|
||||
pb.refresh()
|
||||
pb.close()
|
||||
|
||||
extract.wait()
|
||||
|
||||
if extract.returncode != 0:
|
||||
logger.error('Mkvextract returns an error code: %d' % extract.returncode)
|
||||
@@ -982,12 +995,15 @@ def dumpPPM(pictures, prefix, temporaries):
|
||||
pos+=length
|
||||
picture+=1
|
||||
|
||||
|
||||
|
||||
def extractAllStreams(ffmpeg, ffprobe, inputFile, begin, end, streams, filesPrefix, nbFrames, width, height, temporaries, dumpMemFD=False):
|
||||
logger = logging.getLogger(__name__)
|
||||
encoderParams = [ 'ffmpeg', '-y', '-loglevel', 'quiet' ]
|
||||
inputParams = []
|
||||
codecsParams = []
|
||||
|
||||
|
||||
if begin < end:
|
||||
videoID=0
|
||||
audioID=0
|
||||
@@ -1001,6 +1017,8 @@ def extractAllStreams(ffmpeg, ffprobe, inputFile, begin, end, streams, filesPref
|
||||
m = pattern.match(frameRate)
|
||||
if m != None:
|
||||
frameRate = float(m['numerator']) / float(m['denominator'])
|
||||
# TODO: Framerate estimation seems broken ...
|
||||
frameRate = 25.
|
||||
sar = stream['sample_aspect_ratio']
|
||||
dar = stream['display_aspect_ratio']
|
||||
pixelFormat = stream['pix_fmt']
|
||||
@@ -1217,6 +1235,52 @@ def extractSubTitleTrack(mkvmerge, inputFileName, index, lang):
|
||||
for lines in out:
|
||||
logger.info(out)
|
||||
|
||||
def remuxSRTSubtitles(mkvmerge, inputFile, outputFileName, subtitles):
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
out = open(outputFileName, 'w')
|
||||
except IOError:
|
||||
logger.error('Impossible to create file: %s' % outputFileName)
|
||||
return None
|
||||
|
||||
outfd = out.fileno()
|
||||
infd = inputFile.fileno()
|
||||
lseek(infd, 0, SEEK_SET)
|
||||
set_inheritable(infd, True)
|
||||
set_inheritable(outfd, True)
|
||||
|
||||
mkvmergeParams = [mkvmerge, '/proc/self/fd/%d' % infd]
|
||||
for fd, lang in subtitles:
|
||||
lseek(fd, 0, SEEK_SET)
|
||||
set_inheritable(fd, True)
|
||||
mkvmergeParams.extend(['--language', '0:%s' % lang, '/proc/self/fd/%d' % fd])
|
||||
|
||||
mkvmergeParams.extend(['-o', '/proc/self/fd/%d' % outfd])
|
||||
|
||||
warnings = []
|
||||
env = {**os.environ, 'LANG': 'C'}
|
||||
logger.info('Remux subtitles: %s' % mkvmergeParams)
|
||||
with Popen(mkvmergeParams, stdout=PIPE, close_fds=False, env=env) as mkvmerge:
|
||||
pb = tqdm(TextIOWrapper(mkvmerge.stdout, encoding="utf-8"), total=100, unit='%', desc='Remux subtitles:')
|
||||
for line in pb:
|
||||
if line.startswith('Progress :'):
|
||||
p = re.compile('^Progress : (?P<progress>[0-9]{1,3})%$')
|
||||
m = p.match(line)
|
||||
if m == None:
|
||||
logger.error('Impossible to parse progress')
|
||||
pb.n = int(m['progress'])
|
||||
pb.update()
|
||||
elif line.startswith('Warning'):
|
||||
warnings.append(line)
|
||||
|
||||
status = mkvmerge.wait()
|
||||
if status == 1:
|
||||
logger.warning('Extraction returns warning')
|
||||
for w in warnings:
|
||||
logger.warning(w)
|
||||
elif status == 2:
|
||||
logger.error('Extraction returns errors')
|
||||
|
||||
|
||||
def main():
|
||||
@@ -1431,30 +1495,35 @@ def main():
|
||||
# We need to check the end also
|
||||
checks.append(pos)
|
||||
|
||||
finalCutName = '%s-cut.mkv' % basename
|
||||
|
||||
nbMKVParts = len(mkvparts)
|
||||
if nbMKVParts > 1:
|
||||
logger.info('Merging: %s' % mkvparts)
|
||||
mergeMKVs(inputs=mkvparts, outputName=args.outputFile)
|
||||
mergeMKVs(inputs=mkvparts, outputName=finalCutName)
|
||||
elif nbMKVParts == 1:
|
||||
copyfile('part-1.mkv', args.outputFile)
|
||||
copyfile('part-1.mkv', finalCutName)
|
||||
else:
|
||||
logger.info("Nothing else to do.")
|
||||
|
||||
try:
|
||||
finalCut = open(finalCutName, mode='r')
|
||||
except IOError:
|
||||
logger.error("Impossible to open %s to finalize processing." % finalCutName)
|
||||
exit(-1)
|
||||
|
||||
if args.srt:
|
||||
if not allOptionalTools:
|
||||
logger.warning("Missing tools for extracting subtitles.")
|
||||
move(finalCutName, args.outputFile)
|
||||
else:
|
||||
try:
|
||||
final = open(args.outputFile, mode='r')
|
||||
except IOError:
|
||||
logger.error("Impossible to open %s to finalize processing." % args.outputFile)
|
||||
exit(-1)
|
||||
|
||||
duration = getMovieDuration(paths['ffprobe'], final)
|
||||
# Final cut is not any more the final step.
|
||||
temporaries.append(finalCut)
|
||||
duration = getMovieDuration(paths['ffprobe'], finalCut)
|
||||
supportedLangs = getTesseractSupportedLang(paths['tesseract'])
|
||||
logger.info('Supported lang: %s' % supportedLangs)
|
||||
logger.info('Find subtitles tracks and language.')
|
||||
subtitles = findSubtitlesTracks(paths['ffprobe'], final)
|
||||
subtitles = findSubtitlesTracks(paths['ffprobe'], finalCut)
|
||||
logger.info(subtitles)
|
||||
sts = {}
|
||||
for subtitle in subtitles:
|
||||
@@ -1472,7 +1541,7 @@ def main():
|
||||
logger.error("Dropping subtitle: %s because it is missing language indication")
|
||||
|
||||
logger.debug(sts)
|
||||
listOfSubtitles = extractSRT(paths['mkvextract'], args.outputFile, sts, supportedLangs)
|
||||
listOfSubtitles = extractSRT(paths['mkvextract'], finalCutName, sts, supportedLangs)
|
||||
logger.info(listOfSubtitles)
|
||||
for idxName, subName, _, _ in listOfSubtitles:
|
||||
try:
|
||||
@@ -1492,6 +1561,10 @@ def main():
|
||||
ocr = doOCR(paths['vobsubocr'], listOfSubtitles, duration, temporaries, args.dump)
|
||||
logger.info(ocr)
|
||||
|
||||
# Remux SRT subtitles
|
||||
remuxSRTSubtitles(paths['mkvmerge'], finalCut, args.outputFile, ocr)
|
||||
else:
|
||||
move(finalCutName, args.outputFile)
|
||||
|
||||
if not args.keep:
|
||||
logger.info("Cleaning temporary files")
|
||||
|
||||
Reference in New Issue
Block a user