Subtitles extracted through OCR can be remuxed with the final cut movie (in addition to image based ones).
This commit is contained in:
107
removeads.py
107
removeads.py
@@ -16,7 +16,7 @@ import shutil
|
|||||||
from tqdm import tqdm, trange
|
from tqdm import tqdm, trange
|
||||||
from select import select
|
from select import select
|
||||||
from math import floor, ceil, log
|
from math import floor, ceil, log
|
||||||
from shutil import copyfile, which
|
from shutil import copyfile, which, move
|
||||||
import hexdump
|
import hexdump
|
||||||
from iso639 import Lang
|
from iso639 import Lang
|
||||||
from iso639.exceptions import InvalidLanguageValue
|
from iso639.exceptions import InvalidLanguageValue
|
||||||
@@ -107,9 +107,10 @@ def getSubTitlesTracks(ffprobe, mkvPath):
|
|||||||
|
|
||||||
return tracks
|
return tracks
|
||||||
|
|
||||||
def extractSRT(mkvextract, mkvPath, subtitles, langs):
|
def extractSRT(mkvextract, fileName, subtitles, langs):
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
params = [mkvextract, mkvPath, 'tracks']
|
|
||||||
|
params = [mkvextract, fileName, 'tracks']
|
||||||
|
|
||||||
res = []
|
res = []
|
||||||
|
|
||||||
@@ -133,8 +134,20 @@ def extractSRT(mkvextract, mkvPath, subtitles, langs):
|
|||||||
count = count+1
|
count = count+1
|
||||||
|
|
||||||
env = {**os.environ, 'LANG': 'C'}
|
env = {**os.environ, 'LANG': 'C'}
|
||||||
with Popen(params, env=env) as extract:
|
with Popen(params, stdout=PIPE, close_fds=False, env=env) as extract:
|
||||||
extract.wait()
|
pb = tqdm(TextIOWrapper(extract.stdout, encoding="utf-8"), total=100, unit='%', desc='Extraction:')
|
||||||
|
for line in pb:
|
||||||
|
if line.startswith('Progress :'):
|
||||||
|
p = re.compile('^Progress : (?P<progress>[0-9]{1,3})%$')
|
||||||
|
m = p.match(line)
|
||||||
|
if m == None:
|
||||||
|
logger.error('Impossible to parse progress')
|
||||||
|
pb.update(int(m['progress'])-pb.n)
|
||||||
|
pb.update(100-pb.n)
|
||||||
|
pb.refresh()
|
||||||
|
pb.close()
|
||||||
|
|
||||||
|
extract.wait()
|
||||||
|
|
||||||
if extract.returncode != 0:
|
if extract.returncode != 0:
|
||||||
logger.error('Mkvextract returns an error code: %d' % extract.returncode)
|
logger.error('Mkvextract returns an error code: %d' % extract.returncode)
|
||||||
@@ -981,6 +994,8 @@ def dumpPPM(pictures, prefix, temporaries):
|
|||||||
nbBytes+=write(outfd, pictures[pos+nbBytes:pos+length])
|
nbBytes+=write(outfd, pictures[pos+nbBytes:pos+length])
|
||||||
pos+=length
|
pos+=length
|
||||||
picture+=1
|
picture+=1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def extractAllStreams(ffmpeg, ffprobe, inputFile, begin, end, streams, filesPrefix, nbFrames, width, height, temporaries, dumpMemFD=False):
|
def extractAllStreams(ffmpeg, ffprobe, inputFile, begin, end, streams, filesPrefix, nbFrames, width, height, temporaries, dumpMemFD=False):
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -988,6 +1003,7 @@ def extractAllStreams(ffmpeg, ffprobe, inputFile, begin, end, streams, filesPref
|
|||||||
inputParams = []
|
inputParams = []
|
||||||
codecsParams = []
|
codecsParams = []
|
||||||
|
|
||||||
|
|
||||||
if begin < end:
|
if begin < end:
|
||||||
videoID=0
|
videoID=0
|
||||||
audioID=0
|
audioID=0
|
||||||
@@ -1001,6 +1017,8 @@ def extractAllStreams(ffmpeg, ffprobe, inputFile, begin, end, streams, filesPref
|
|||||||
m = pattern.match(frameRate)
|
m = pattern.match(frameRate)
|
||||||
if m != None:
|
if m != None:
|
||||||
frameRate = float(m['numerator']) / float(m['denominator'])
|
frameRate = float(m['numerator']) / float(m['denominator'])
|
||||||
|
# TODO: Framerate estimation seems broken ...
|
||||||
|
frameRate = 25.
|
||||||
sar = stream['sample_aspect_ratio']
|
sar = stream['sample_aspect_ratio']
|
||||||
dar = stream['display_aspect_ratio']
|
dar = stream['display_aspect_ratio']
|
||||||
pixelFormat = stream['pix_fmt']
|
pixelFormat = stream['pix_fmt']
|
||||||
@@ -1217,6 +1235,52 @@ def extractSubTitleTrack(mkvmerge, inputFileName, index, lang):
|
|||||||
for lines in out:
|
for lines in out:
|
||||||
logger.info(out)
|
logger.info(out)
|
||||||
|
|
||||||
|
def remuxSRTSubtitles(mkvmerge, inputFile, outputFileName, subtitles):
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
try:
|
||||||
|
out = open(outputFileName, 'w')
|
||||||
|
except IOError:
|
||||||
|
logger.error('Impossible to create file: %s' % outputFileName)
|
||||||
|
return None
|
||||||
|
|
||||||
|
outfd = out.fileno()
|
||||||
|
infd = inputFile.fileno()
|
||||||
|
lseek(infd, 0, SEEK_SET)
|
||||||
|
set_inheritable(infd, True)
|
||||||
|
set_inheritable(outfd, True)
|
||||||
|
|
||||||
|
mkvmergeParams = [mkvmerge, '/proc/self/fd/%d' % infd]
|
||||||
|
for fd, lang in subtitles:
|
||||||
|
lseek(fd, 0, SEEK_SET)
|
||||||
|
set_inheritable(fd, True)
|
||||||
|
mkvmergeParams.extend(['--language', '0:%s' % lang, '/proc/self/fd/%d' % fd])
|
||||||
|
|
||||||
|
mkvmergeParams.extend(['-o', '/proc/self/fd/%d' % outfd])
|
||||||
|
|
||||||
|
warnings = []
|
||||||
|
env = {**os.environ, 'LANG': 'C'}
|
||||||
|
logger.info('Remux subtitles: %s' % mkvmergeParams)
|
||||||
|
with Popen(mkvmergeParams, stdout=PIPE, close_fds=False, env=env) as mkvmerge:
|
||||||
|
pb = tqdm(TextIOWrapper(mkvmerge.stdout, encoding="utf-8"), total=100, unit='%', desc='Remux subtitles:')
|
||||||
|
for line in pb:
|
||||||
|
if line.startswith('Progress :'):
|
||||||
|
p = re.compile('^Progress : (?P<progress>[0-9]{1,3})%$')
|
||||||
|
m = p.match(line)
|
||||||
|
if m == None:
|
||||||
|
logger.error('Impossible to parse progress')
|
||||||
|
pb.n = int(m['progress'])
|
||||||
|
pb.update()
|
||||||
|
elif line.startswith('Warning'):
|
||||||
|
warnings.append(line)
|
||||||
|
|
||||||
|
status = mkvmerge.wait()
|
||||||
|
if status == 1:
|
||||||
|
logger.warning('Extraction returns warning')
|
||||||
|
for w in warnings:
|
||||||
|
logger.warning(w)
|
||||||
|
elif status == 2:
|
||||||
|
logger.error('Extraction returns errors')
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@@ -1431,30 +1495,35 @@ def main():
|
|||||||
# We need to check the end also
|
# We need to check the end also
|
||||||
checks.append(pos)
|
checks.append(pos)
|
||||||
|
|
||||||
|
finalCutName = '%s-cut.mkv' % basename
|
||||||
|
|
||||||
nbMKVParts = len(mkvparts)
|
nbMKVParts = len(mkvparts)
|
||||||
if nbMKVParts > 1:
|
if nbMKVParts > 1:
|
||||||
logger.info('Merging: %s' % mkvparts)
|
logger.info('Merging: %s' % mkvparts)
|
||||||
mergeMKVs(inputs=mkvparts, outputName=args.outputFile)
|
mergeMKVs(inputs=mkvparts, outputName=finalCutName)
|
||||||
elif nbMKVParts == 1:
|
elif nbMKVParts == 1:
|
||||||
copyfile('part-1.mkv', args.outputFile)
|
copyfile('part-1.mkv', finalCutName)
|
||||||
else:
|
else:
|
||||||
logger.info("Nothing else to do.")
|
logger.info("Nothing else to do.")
|
||||||
|
|
||||||
|
try:
|
||||||
|
finalCut = open(finalCutName, mode='r')
|
||||||
|
except IOError:
|
||||||
|
logger.error("Impossible to open %s to finalize processing." % finalCutName)
|
||||||
|
exit(-1)
|
||||||
|
|
||||||
if args.srt:
|
if args.srt:
|
||||||
if not allOptionalTools:
|
if not allOptionalTools:
|
||||||
logger.warning("Missing tools for extracting subtitles.")
|
logger.warning("Missing tools for extracting subtitles.")
|
||||||
|
move(finalCutName, args.outputFile)
|
||||||
else:
|
else:
|
||||||
try:
|
# Final cut is not any more the final step.
|
||||||
final = open(args.outputFile, mode='r')
|
temporaries.append(finalCut)
|
||||||
except IOError:
|
duration = getMovieDuration(paths['ffprobe'], finalCut)
|
||||||
logger.error("Impossible to open %s to finalize processing." % args.outputFile)
|
|
||||||
exit(-1)
|
|
||||||
|
|
||||||
duration = getMovieDuration(paths['ffprobe'], final)
|
|
||||||
supportedLangs = getTesseractSupportedLang(paths['tesseract'])
|
supportedLangs = getTesseractSupportedLang(paths['tesseract'])
|
||||||
logger.info('Supported lang: %s' % supportedLangs)
|
logger.info('Supported lang: %s' % supportedLangs)
|
||||||
logger.info('Find subtitles tracks and language.')
|
logger.info('Find subtitles tracks and language.')
|
||||||
subtitles = findSubtitlesTracks(paths['ffprobe'], final)
|
subtitles = findSubtitlesTracks(paths['ffprobe'], finalCut)
|
||||||
logger.info(subtitles)
|
logger.info(subtitles)
|
||||||
sts = {}
|
sts = {}
|
||||||
for subtitle in subtitles:
|
for subtitle in subtitles:
|
||||||
@@ -1472,7 +1541,7 @@ def main():
|
|||||||
logger.error("Dropping subtitle: %s because it is missing language indication")
|
logger.error("Dropping subtitle: %s because it is missing language indication")
|
||||||
|
|
||||||
logger.debug(sts)
|
logger.debug(sts)
|
||||||
listOfSubtitles = extractSRT(paths['mkvextract'], args.outputFile, sts, supportedLangs)
|
listOfSubtitles = extractSRT(paths['mkvextract'], finalCutName, sts, supportedLangs)
|
||||||
logger.info(listOfSubtitles)
|
logger.info(listOfSubtitles)
|
||||||
for idxName, subName, _, _ in listOfSubtitles:
|
for idxName, subName, _, _ in listOfSubtitles:
|
||||||
try:
|
try:
|
||||||
@@ -1491,7 +1560,11 @@ def main():
|
|||||||
|
|
||||||
ocr = doOCR(paths['vobsubocr'], listOfSubtitles, duration, temporaries, args.dump)
|
ocr = doOCR(paths['vobsubocr'], listOfSubtitles, duration, temporaries, args.dump)
|
||||||
logger.info(ocr)
|
logger.info(ocr)
|
||||||
|
|
||||||
|
# Remux SRT subtitles
|
||||||
|
remuxSRTSubtitles(paths['mkvmerge'], finalCut, args.outputFile, ocr)
|
||||||
|
else:
|
||||||
|
move(finalCutName, args.outputFile)
|
||||||
|
|
||||||
if not args.keep:
|
if not args.keep:
|
||||||
logger.info("Cleaning temporary files")
|
logger.info("Cleaning temporary files")
|
||||||
|
|||||||
Reference in New Issue
Block a user