diff --git a/removeads.py b/removeads.py index 32c1872..79f6a93 100755 --- a/removeads.py +++ b/removeads.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 + import argparse import locale import re @@ -6,11 +8,30 @@ from datetime import datetime,timedelta,time import coloredlogs, logging from functools import cmp_to_key from subprocess import Popen, PIPE -from os import mkdir, set_inheritable +import os from io import BytesIO, TextIOWrapper import json from enum import Enum, IntEnum, unique, auto import shutil +from tqdm import tqdm, trange +from select import select +from math import floor, ceil, log +from shutil import copyfile, which + +# Useful SPS/PPS discussion +# TODO: improve situation of SPS and PPS header mismatch when merging MVK with mkvmerge to remove warnings. +# https://copyprogramming.com/howto/including-sps-and-pps-in-a-raw-h264-track +# https://gitlab.com/mbunkus/mkvtoolnix/-/issues/2390 + + +def checkRequiredTools(): + logger = logging.getLogger(__name__) + tools = ['ffmpeg', 'ffprobe', 'mkvmerge'] + for tool in tools: + if which(tool) == None: + logger.error('Required tool: %s is missing.' % tool) + exit(-1) + @unique class SupportedFormat(IntEnum): @@ -33,7 +54,7 @@ def getFormat(inputFile): infd = inputFile.fileno() inputFile.seek(0,0) - set_inheritable(infd, True) + os.set_inheritable(infd, True) with Popen(['ffprobe', '-loglevel', 'quiet', '-show_format', '-of', 'json', '-i', '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as ffprobe: out, _ = ffprobe.communicate() out = json.load(BytesIO(out)) @@ -49,7 +70,7 @@ def getStreams(inputFile): infd = inputFile.fileno() inputFile.seek(0,0) - set_inheritable(infd, True) + os.set_inheritable(infd, True) with Popen(['ffprobe', '-loglevel', 'quiet', '-show_streams', '-of', 'json', '-i', '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as ffprobe: out, _ = ffprobe.communicate() out = json.load(BytesIO(out)) @@ -60,247 +81,46 @@ def getStreams(inputFile): return None -def ffmpegConvert(inputFile, inputFormat, outputFile, outputFormat): +def parseTimestamp(ts): logger = logging.getLogger(__name__) - infd = inputFile.fileno() - outfd = outputFile.fileno() - set_inheritable(infd, True) - set_inheritable(outfd, True) - # TODO: canvas size to be fixed ! - with Popen(['ffmpeg', '-loglevel', 'quiet', '-progress', '/dev/stdout', '-canvas_size', '720x560', '-f', inputFormat, '-i', '/proc/self/fd/%d' % infd, - '-map', '0:v', '-map', '0:a', '-map', '0:s', '-vcodec', 'copy', '-acodec', 'copy', '-scodec', 'dvdsub', - '-f', outputFormat, '/proc/self/fd/%d' % outfd], stdout=PIPE, close_fds=False) as ffmpeg: - for line in TextIOWrapper(ffmpeg.stdout, encoding="utf-8"): - if line.startswith('out_time='): - print(line, end='') - -def getFramesInStream(inputFile, begin, end, streamKind, subStreamId=0): - logger = logging.getLogger(__name__) - infd = inputFile.fileno() - set_inheritable(infd, True) - - with Popen(['ffprobe', '-loglevel', 'quiet', '-read_intervals', ('%s%%%s' %(begin, end)), '-show_entries', 'frame', '-select_streams', '%s:%d' % (streamKind, subStreamId), '-of', 'json', '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as ffprobe: - out, _ = ffprobe.communicate() - frames = json.load(BytesIO(out)) - res = [] - if 'frames' in frames: - frames = frames['frames'] - for frame in frames: - ts = timedelta(seconds=float(frame['pts_time'])) - if begin <= ts and ts <= end: - res.append(frame) - return res - else: - logger.error('Impossible to retrieve frames inside file around [%s,%s]' % (begin, end)) - return None - -def getNearestIFrame(inputFile, timestamp, before=True, delta=timedelta(seconds=2)): - logger = logging.getLogger(__name__) - - zero = timedelta() - tbegin = timestamp-delta - tend = timestamp+delta - if tbegin < zero: - tbegin = zero + tsRegExp = '^(?P[0-9]{1,2}):(?P[0-9]{1,2}):(?P[0-9]{1,2})(\.(?P[0-9]{1,6}))?$' + p = re.compile(tsRegExp) + m = p.match(ts) + if m == None: + logger.error("Impossible to parse timestamp: %s" % ts) + return None - infd = inputFile.fileno() - set_inheritable(infd, True) - - logger.debug('Looking for iframe in [%s, %s]' % (tbegin, tend)) - - frames = getFramesInStream(inputFile=inputFile, begin=tbegin, end=tend, streamKind='v') - if frames == None: + values = m.groupdict() + hour = 0 + minute = 0 + second = 0 + us = 0 + if values['hour'] != None: + hour = int(values['hour']) + if values['minute'] != None: + minute = int(values['minute']) + if values['second'] != None: + second = int(values['second']) + if values['us'] != None: + us = int(values['us']) + + if hour < 0 or hour > 23: + logger.error("hour must be in [0,24[") + return None + if minute < 0 or minute > 59: + logger.error("minute must be in [0,60[") + return None + if second < 0 or second > 59: + logger.error("second must be in [0,60[") + return None + if us < 0 or us > 1000000: + logger.error("milliseconds must be in [0,1000000[") return None - iframes = [] - for frame in frames: - if frame['pict_type'] == 'I': - iframes.append(frame) - - found = False - res = None - for frame in iframes: - if before and timedelta(seconds=float(frame['pts_time'])) <= timestamp: - found = True - iframe = frame - if not before and timedelta(seconds=float(frame['pts_time'])) >= timestamp: - found = True - iframe = frame - break + ts = timedelta(hours=hour, minutes=minute, seconds=second, microseconds=us) - if found: - logger.debug("Found: %s" % res) - - its = timedelta(seconds=float(iframe['pts_time'])) - nbFrames = 0 - for frame in frames: - ts = timedelta(seconds=float(frame['pts_time'])) - if before: - if its <= ts and ts <= timestamp: - nbFrames = nbFrames+1 - else: - if timestamp <= ts and ts <= its: - nbFrames = nbFrames+1 - else: - logger.error("Impossible to find I-frame around: %s" % timestamp) - - return(nbFrames-1, iframe) - -def extractMKVPart(inputFile, outputFile, begin, end): - inputFile.seek(0,0) - outputFile.seek(0,0) - infd = inputFile.fileno() - outfd = outputFile.fileno() - set_inheritable(infd, True) - set_inheritable(outfd, True) - with Popen(['mkvmerge', '-o', '/proc/self/fd/%d' % outfd, '--split', 'parts:%s-%s' % (begin, end), '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as mkvmerge: - for line in TextIOWrapper(mkvmerge.stdout, encoding="utf-8"): - print(line, end='') - -def extractPictures(inputFile, begin, nbFrames, prefix, width=640, height=480): - inputFile.seek(0,0) - infd = inputFile.fileno() - set_inheritable(infd, True) - with Popen(['ffmpeg', '-y', '-loglevel', 'quiet', '-ss', '%s'%begin, '-i', '/proc/self/fd/%d' % infd, '-s', '%dx%d'%(width, height), '-vframes', '%d'%nbFrames, '-c:v', 'ppm', '-f', 'image2', '%s-%%03d.ppm' % prefix], stdout=PIPE, close_fds=False) as ffmpeg: - for line in TextIOWrapper(ffmpeg.stdout, encoding="utf-8"): - print(line, end='') - -def extractSound(inputFile, begin, outputFile, subChannel=0, nbPackets=0, sampleRate=48000, nbChannels=2): - inputFile.seek(0,0) - outputFile.seek(0,0) - infd = inputFile.fileno() - outfd = outputFile.fileno() - set_inheritable(infd, True) - set_inheritable(outfd, True) - with Popen(['ffmpeg', '-y', '-loglevel', 'quiet', '-ss', '%s'%begin, '-i', '/proc/self/fd/%d' % infd, '-frames:a:%d' % subChannel, '%d' % nbPackets, - '-c:a', 'pcm_s32le', '-sample_rate', '%d' % sampleRate, '-channels', '%d' % nbChannels, '-f', 's32le', '/proc/self/fd/%d' % outfd], stdout=PIPE, close_fds=False) as ffmpeg: - for line in TextIOWrapper(ffmpeg.stdout, encoding="utf-8"): - print(line, end='') - -def extractAllStreams(inputFile, begin, end, streams, filesPrefix, nbFrames, width, height): - logger = logging.getLogger(__name__) - # encoderParams = [ 'ffmpeg', '-y', '-loglevel', 'quiet' ] - encoderParams = [ 'ffmpeg', '-y' ] - inputParams = [] - codecsParams = [] - - if begin < end: - videoID=0 - audioID=0 - subTitleID=0 - audioFiles = {} - for stream in streams: - if stream['codec_type'] == 'video': - print("Extracting video stream: %s" % stream) - frameRate = stream['r_frame_rate'] - pattern = re.compile('^(?P[0-9]+)/(?P[0-9]+)$') - m = pattern.match(frameRate) - print(m) - if m != None: - frameRate = float(m['numerator']) / float(m['denominator']) - print(frameRate) - sar = stream['sample_aspect_ratio'] - dar = stream['display_aspect_ratio'] - pixelFormat = stream['pix_fmt'] - colorRange = stream['color_range'] - colorSpace =stream['color_space'] - colorTransfer = stream['color_transfer'] - colorPrimaries = stream['color_primaries'] - codec = stream['codec_name'] - extractPictures(inputFile=inputFile, begin=begin, nbFrames=nbFrames, prefix="%s-%d" % (filesPrefix, videoID), width=width, height=height) - inputParams.extend(['-framerate', '%f'%frameRate, '-i', '%s-%d-%%03d.ppm' % (filesPrefix, videoID)]) - codecsParams.extend(['-c:v:%d' % videoID, codec, '-pix_fmt', pixelFormat, '-colorspace:v:%d' % videoID, colorSpace, '-color_primaries:v:%d' % videoID, colorPrimaries, - '-color_trc:v:%d' % videoID, colorTransfer, '-color_range:v:%d' % videoID, colorRange]) - videoID=videoID+1 - elif stream['codec_type'] == 'audio': - print("Extracting audio stream: %s" % stream) - sampleRate = int(stream['sample_rate']) - nbChannels = int(stream['channels']) - bitRate = int(stream['bit_rate']) - codec = stream['codec_name'] - if 'tags' in stream: - if 'language' in stream['tags']: - codecsParams.extend(['-metadata:s:a:%d' % audioID, 'language=%s' % stream['tags']['language']]) - packets = getFramesInStream(inputFile=inputFile, begin=begin, end=end, streamKind='a', subStreamId=audioID) - nbPackets = len(packets) - print("Found %d packets to be extracted from audio track." % nbPackets) - audioFiles[audioID] = open('%s-%d.pcm' % (filesPrefix,audioID), 'w') - # TODO: test if successfully openened - extractSound(inputFile=inputFile, begin=begin, nbPackets=nbPackets, outputFile=audioFiles[audioID], sampleRate=sampleRate, nbChannels=nbChannels) - inputParams.extend(['-f', 's32le', '-ar', '%d'%sampleRate, '-ac', '%d'%nbChannels, '-i', '/proc/self/fd/%d' % audioFiles[audioID].fileno()]) - codecsParams.extend(['-c:a:%d' % audioID, codec, '-b:a:%d' % audioID, '%d' % bitRate]) - audioID=audioID+1 - elif stream['codec_type'] == 'subtitle': - print("Extracting a subtitle stream: %s" % stream) - codec = stream['codec_name'] - inputParams.extend(['-i', './empty.idx']) - if 'tags' in stream: - if 'language' in stream['tags']: - codecsParams.extend(['-metadata:s:s:%d' % subTitleID, 'language=%s' % stream['tags']['language']]) - codecsParams.extend(['-c:s:%d' % subTitleID, 'copy']) - subTitleID=subTitleID+1 - else: - logger.info("Unknown stream type: %s" % stream['codec_type']) - - # Example: - # ffmpeg -framerate 25.85 -i image-%02d.ppm -f s32le -ar 48000 -ac 2 -i ./audio-1.pcm -c:a eac3 -b:a 128k -c:v libx264 -crf 25.85 -vf "scale=1920:1080,format=yuv420p" -colorspace:v "bt709" -color_primaries:v "bt709" -color_trc:v "bt709" -color_range:v "tv" -top 1 -flags:v +ilme+ildct -bsf:v h264_mp4toannexb,dump_extra=keyframe -metadata MAJOR_BRAND=isom -metadata MINOR_VERSION=512 -movflags +faststart cut-1.mkv - - # Create a new MKV movie with all streams that have been extracted. - encoderParams.extend(inputParams) - for index in range(0,videoID+audioID+subTitleID): - encoderParams.extend(['-map', '%d' % index]) - encoderParams.extend(codecsParams) - output = open('%s.mkv' % filesPrefix,'w') - outfd = output.fileno() - set_inheritable(outfd, True) - encoderParams.extend(['-top', '1', '-bsf:v', 'h264_mp4toannexb,dump_extra=keyframe', '-f', 'matroska', '/proc/self/fd/%d' % outfd]) - - print(encoderParams) - - with Popen(encoderParams, stdout=PIPE, close_fds=False) as ffmpeg: - for line in TextIOWrapper(ffmpeg.stdout, encoding="utf-8"): - print(line, end='') - - return output - - else: - # Nothing to be done. We are already at a i-frame boundary. - return None - -# Merge a list of mkv files passed as input, and produce a new MKV as output -def mergeMKVs(inputs, outputName): - fds = [] - out = open(outputName, 'w') - # TODO: Check success or failure - outfd = out.fileno() - fds.append(outfd) - set_inheritable(outfd, True) - - mergeParams = ['mkvmerge'] - first = True - for mkv in inputs: - if mkv !=None: - fd = mkv.fileno() - fds.append(fd) - set_inheritable(fd, True) - if first: - mergeParams.append('/proc/self/fd/%d' % fd) - first = False - else: - mergeParams.append('+/proc/self/fd/%d' % fd) - - mergeParams.extend(['-o', '/proc/self/fd/%d' % outfd]) - - # We merge all files. - with Popen(mergeParams, stdout=PIPE, close_fds=False) as mkvmerge: - for line in TextIOWrapper(mkvmerge.stdout, encoding="utf-8"): - print(line, end='') - - for fd in fds: - set_inheritable(fd, False) - - return out + return ts def parseTimeInterval(interval): logger = logging.getLogger(__name__) @@ -385,6 +205,426 @@ def compareTimeInterval(interval1, interval2): return 0 + +def ffmpegConvert(inputFile, inputFormat, outputFile, outputFormat, duration): + logger = logging.getLogger(__name__) + + infd = inputFile.fileno() + outfd = outputFile.fileno() + os.set_inheritable(infd, True) + os.set_inheritable(outfd, True) + # TODO: canvas size to be fixed ! + with Popen(['ffmpeg', '-y', '-loglevel', 'quiet', '-progress', '/dev/stdout', '-canvas_size', '720x560', '-f', inputFormat, '-i', '/proc/self/fd/%d' % infd, + '-map', '0:v', '-map', '0:a', '-map', '0:s', '-bsf:v', 'h264_mp4toannexb,dump_extra=freq=keyframe', '-vcodec', 'copy', '-acodec', 'copy', '-scodec', 'dvdsub', + '-f', outputFormat, '/proc/self/fd/%d' % outfd], stdout=PIPE, close_fds=False) as ffmpeg: + pb = tqdm(TextIOWrapper(ffmpeg.stdout, encoding="utf-8"), total=int(duration/timedelta(seconds=1)), unit='s', desc='Conversion') + for line in pb: + if line.startswith('out_time='): + ts = line.split('=')[1].strip() + ts = parseTimestamp(ts) + pb.n = int(ts/timedelta(seconds=1)) + pb.update() + status = ffmpeg.wait() + if status != 0: + logger.error('Conversion failed with status code: %d' % status) + + +def getFramesInStream(inputFile, begin, end, streamKind, subStreamId=0): + logger = logging.getLogger(__name__) + infd = inputFile.fileno() + os.set_inheritable(infd, True) + + with Popen(['ffprobe', '-loglevel', 'quiet', '-read_intervals', ('%s%%%s' %(begin, end)), '-show_entries', 'frame', '-select_streams', '%s:%d' % (streamKind, subStreamId), '-of', 'json', '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as ffprobe: + out, _ = ffprobe.communicate() + frames = json.load(BytesIO(out)) + status = ffprobe.wait() + if status != 0: + logger.error('ffprobe failed with status code: %d' % status) + return None + res = [] + if 'frames' in frames: + frames = frames['frames'] + for frame in frames: + ts = timedelta(seconds=float(frame['pts_time'])) + if begin <= ts and ts <= end: + res.append(frame) + return res + else: + logger.error('Impossible to retrieve frames inside file around [%s,%s]' % (begin, end)) + return None + +def getNearestIFrame(inputFile, timestamp, before=True, delta=timedelta(seconds=2)): + logger = logging.getLogger(__name__) + + zero = timedelta() + tbegin = timestamp-delta + tend = timestamp+delta + if tbegin < zero: + tbegin = zero + + infd = inputFile.fileno() + os.set_inheritable(infd, True) + + logger.debug('Looking for iframe in [%s, %s]' % (tbegin, tend)) + + frames = getFramesInStream(inputFile=inputFile, begin=tbegin, end=tend, streamKind='v') + if frames == None: + return None + + iframes = [] + for frame in frames: + if frame['pict_type'] == 'I': + iframes.append(frame) + + found = False + for frame in iframes: + if before and timedelta(seconds=float(frame['pts_time'])) <= timestamp: + found = True + iframe = frame + if not before and timedelta(seconds=float(frame['pts_time'])) >= timestamp: + found = True + iframe = frame + break + + if found: + logger.info("Found i-frame at: %s" % iframe) + logger.debug("Found i-frame at %s" % iframe) + + its = timedelta(seconds=float(iframe['pts_time'])) + nbFrames = 0 + for frame in frames: + ts = timedelta(seconds=float(frame['pts_time'])) + if before: + if its <= ts and ts <= timestamp: + logger.info("Retrieve a frame between %s and %s at %s" % (its, timestamp, ts)) + nbFrames = nbFrames+1 + else: + if timestamp <= ts and ts <= its: + logger.info("Retrieve a frame between %s and %s at %s" % (ts, timestamp, its)) + nbFrames = nbFrames+1 + else: + logger.error("Impossible to find I-frame around: %s" % timestamp) + + return(nbFrames, iframe) + +def extractMKVPart(inputFile, outputFile, begin, end): + logger = logging.getLogger(__name__) + + logger.info('Extract video between I-frames at %s and %s' % (begin,end)) + inputFile.seek(0,0) + outputFile.seek(0,0) + infd = inputFile.fileno() + outfd = outputFile.fileno() + os.set_inheritable(infd, True) + os.set_inheritable(outfd, True) + warnings = [] + with Popen(['mkvmerge', '-o', '/proc/self/fd/%d' % outfd, '--split', 'parts:%s-%s' % (begin, end), '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as mkvmerge: + pb = tqdm(TextIOWrapper(mkvmerge.stdout, encoding="utf-8"), total=100, unit='%', desc='Extraction') + for line in pb: + if line.startswith('Progression :'): + p = re.compile('^Progression : (?P[0-9]{1,3})%$') + m = p.match(line) + if m == None: + logger.error('Impossible to parse progress') + pb.n = int(m['progress']) + pb.update() + elif line.startswith('Avertissement'): + warnings.append(line) + + status = mkvmerge.wait() + if status == 1: + logger.warning('Extraction returns warning') + for w in warnings: + logger.warning(w) + elif status == 2: + logger.error('Extraction returns errors') + + +def extractPictures(inputFile, begin, nbFrames, width=640, height=480): + logger = logging.getLogger(__name__) + + inputFile.seek(0,0) + infd = inputFile.fileno() + fdr, fdw = os.pipe() + os.set_inheritable(infd, True) + os.set_inheritable(fdr, False) + os.set_inheritable(fdw, True) + # "P6\nWIDTH HEIGHT\n255\n" + headerLen=2+1+ceil(log(width, 10))+1+ceil(log(height, 10))+1+3+1 + logger.debug('Header length: %d' % headerLen) + length = (width*height*3+headerLen)*nbFrames + logger.debug("Estimated length: %d" % length) + + pg = trange(length) + images = bytes() + with Popen(['ffmpeg', '-loglevel', 'quiet' ,'-y', '-ss', '%s'%begin, '-i', '/proc/self/fd/%d' % infd, '-s', '%dx%d'%(width, height), '-vframes', '%d'%nbFrames, '-c:v', 'ppm', '-f', 'image2pipe', '/proc/self/fd/%d' % fdw ], stdout=PIPE, close_fds=False) as ffmpeg: + while ffmpeg.poll() == None: + fds, _, _ = select([fdr, ffmpeg.stdout], [], [], .1) + if fdr in fds: + buf = os.read(fdr, 1000000) + # print("Read %d bytes of image. ffmpeg finished: %s" % (len(buf), ffmpeg.poll())) + if len(buf) == 0: + break + pg.update(len(buf)) + images=images+buf + if ffmpeg.stdout in fds: + for line in TextIOWrapper(ffmpeg.stdout, encoding="utf-8"): + logger.debug(line) + + status = ffmpeg.wait() + + # Finishing to read residual bytes from pipe + while True: + fd, _, _ = select([fdr], [], [], .1) + if fd != []: + buf = os.read(fdr, 1000000) + # print("Read %d bytes of image" % len(buf)) + if len(buf) == 0: + break + pg.update(len(buf)) + images=images+buf + else: + # Nothing more to read + break + + logger.debug("%d bytes received." % len(images)) + + os.close(fdr) + os.close(fdw) + + if status != 0: + logger.error('Image extraction returns error code: %d' % status) + + return images + +def extractSound(inputFile, begin, outputFile, subChannel=0, nbPackets=0, sampleRate=48000, nbChannels=2): + logger = logging.getLogger(__name__) + + inputFile.seek(0,0) + outputFile.seek(0,0) + infd = inputFile.fileno() + outfd = outputFile.fileno() + os.set_inheritable(infd, True) + os.set_inheritable(outfd, True) + with Popen(['ffmpeg', '-y', '-loglevel', 'quiet', '-ss', '%s'%begin, '-i', '/proc/self/fd/%d' % infd, '-frames:a:%d' % subChannel, '%d' % nbPackets, + '-c:a', 'pcm_s32le', '-sample_rate', '%d' % sampleRate, '-channels', '%d' % nbChannels, '-f', 's32le', '/proc/self/fd/%d' % outfd], stdout=PIPE, close_fds=False) as ffmpeg: + for line in TextIOWrapper(ffmpeg.stdout, encoding="utf-8"): + logger.debug(line) + + status = ffmpeg.wait() + if status != 0: + logger.error('Sound extraction returns error code: %d' % status) + +def dumpPPM(pictures, prefix): + # "P6\nWIDTH HEIGHT\n255\n" + # headerLen=2+1+ceil(log(width, 10))+1+ceil(log(height, 10))+1+3+1 + pass + +def extractAllStreams(inputFile, begin, end, streams, filesPrefix, nbFrames, width, height, temporaries, dumpPictures=False): + logger = logging.getLogger(__name__) + # encoderParams = [ 'ffmpeg', '-y', '-loglevel', 'quiet' ] + encoderParams = [ 'ffmpeg', '-y', '-loglevel', 'quiet' ] + inputParams = [] + codecsParams = [] + + if begin < end: + videoID=0 + audioID=0 + subTitleID=0 + audioFiles = {} + imagesPipes = {} + for stream in streams: + if stream['codec_type'] == 'video': + logger.info("Extracting video stream v:%d" % videoID) + frameRate = stream['r_frame_rate'] + pattern = re.compile('^(?P[0-9]+)/(?P[0-9]+)$') + m = pattern.match(frameRate) + if m != None: + frameRate = float(m['numerator']) / float(m['denominator']) + sar = stream['sample_aspect_ratio'] + dar = stream['display_aspect_ratio'] + pixelFormat = stream['pix_fmt'] + colorRange = stream['color_range'] + colorSpace =stream['color_space'] + colorTransfer = stream['color_transfer'] + colorPrimaries = stream['color_primaries'] + # TODO: do something with these informations + # When interlaced TOP or BOTTOM + # -top 1 -flags:v +ilme+ildct + # -top 0 -flags:v +ilme+ildct + chromaLocation = stream['chroma_location'] + fieldOrder = stream['field_order'] + # ======================================= # + # TODO: adjust SAR and DAR + # https://superuser.com/questions/907933/correct-aspect-ratio-without-re-encoding-video-file + codec = stream['codec_name'] + imagesBytes = extractPictures(inputFile=inputFile, begin=begin, nbFrames=nbFrames, width=width, height=height) + if dumpPictures: + dumpPPM(imagesBytes, '%s-%d' % (filesPrefix,videoID)) + + # imagesBytes contains now a buffer of bytes that represents the pictures that have been dumped by ffmpeg. + fdr, fdw = os.pipe() + os.set_inheritable(fdr, True) + # The writalbe end of the pipe (fdw) must not be stayed opened in ffmpeg child, otherwise ffmpeg will not be able + # to detect the end of pictures data sent by the other end of the pipe it is reading from (fdr). + # We manually force non inheritance to be sure (although this should be the case since Python 3.4). + os.set_inheritable(fdw, False) + logger.debug("Creating pipes for images: r:%d w:%d" % (fdr,fdw)) + imagesPipes[videoID] = (imagesBytes, fdr, fdw) + inputParams.extend(['-framerate', '%f'%frameRate, '-f', 'image2pipe', '-i', '/proc/self/fd/%d' % fdr]) + codecsParams.extend(['-c:v:%d' % videoID, codec, '-pix_fmt', pixelFormat, '-colorspace:v:%d' % videoID, colorSpace, '-color_primaries:v:%d' % videoID, colorPrimaries, + '-color_trc:v:%d' % videoID, colorTransfer, '-color_range:v:%d' % videoID, colorRange]) + videoID=videoID+1 + elif stream['codec_type'] == 'audio': + logger.info("Extracting audio stream: a:%d" % audioID) + sampleRate = int(stream['sample_rate']) + nbChannels = int(stream['channels']) + bitRate = int(stream['bit_rate']) + codec = stream['codec_name'] + if 'tags' in stream: + if 'language' in stream['tags']: + codecsParams.extend(['-metadata:s:a:%d' % audioID, 'language=%s' % stream['tags']['language']]) + packets = getFramesInStream(inputFile=inputFile, begin=begin, end=end, streamKind='a', subStreamId=audioID) + nbPackets = len(packets) + logger.debug("Found %d packets to be extracted from audio track." % nbPackets) + try: + audioFiles[audioID] = open('%s-%d.pcm' % (filesPrefix,audioID), 'w') + except IOError: + logger.error('Impossible to create file: %s-%d.pcm' % (filesPrefix,audioID)) + return None + temporaries.append(audioFiles[audioID]) + extractSound(inputFile=inputFile, begin=begin, nbPackets=nbPackets, outputFile=audioFiles[audioID], sampleRate=sampleRate, nbChannels=nbChannels) + inputParams.extend(['-f', 's32le', '-ar', '%d'%sampleRate, '-ac', '%d'%nbChannels, '-i', '/proc/self/fd/%d' % audioFiles[audioID].fileno()]) + codecsParams.extend(['-c:a:%d' % audioID, codec, '-b:a:%d' % audioID, '%d' % bitRate]) + audioID=audioID+1 + elif stream['codec_type'] == 'subtitle': + logger.info("Extracting a subtitle stream: s:%d" % subTitleID) + codec = stream['codec_name'] + inputParams.extend(['-i', './empty.idx']) + if 'tags' in stream: + if 'language' in stream['tags']: + codecsParams.extend(['-metadata:s:s:%d' % subTitleID, 'language=%s' % stream['tags']['language']]) + codecsParams.extend(['-c:s:%d' % subTitleID, 'copy']) + subTitleID=subTitleID+1 + else: + logger.error("Unknown stream type: %s" % stream['codec_type']) + + # Create a new MKV movie with all streams that have been extracted. + encoderParams.extend(inputParams) + for index in range(0,videoID+audioID+subTitleID): + encoderParams.extend(['-map', '%d' % index]) + encoderParams.extend(codecsParams) + fileName = '%s.mkv' % filesPrefix + try: + output = open(fileName,'w') + except IOError: + logger.error('Impossible to create file: %s' % fileName) + return None + + outfd = output.fileno() + os.set_inheritable(outfd, True) + # TODO: manage interlaced to previous parameters. + encoderParams.extend(['-top', '1', '-flags:v', '+ilme+ildct', '-bsf:v', 'h264_mp4toannexb,dump_extra=freq=keyframe', '-f', 'matroska', '/proc/self/fd/%d' % outfd]) + + logger.info('Encoding video: %s' % fileName) + with Popen(encoderParams, stdout=PIPE, close_fds=False) as ffmpeg: + pos = {} + totalLength = 0 + for vid in range(videoID): + pos[vid]=0 + img, fdr, _ = imagesPipes[vid] + # We close the end of the pipe used by ffmepg to read data. + os.close(fdr) + totalLength+=len(img) + length = 0 + pg = trange(totalLength) + while length[0-9]{1,3})%$') + m = p.match(line) + if m == None: + logger.error('Impossible to parse progress') + pb.n = int(m['progress']) + pb.update() + elif line.startswith('Avertissement'): + warnings.append(line) + + status = mkvmerge.wait() + if status == 1: + logger.warning('Extraction returns warning') + for w in warnings: + logger.warning(w) + elif status == 2: + logger.error('Extraction returns errors') + + for fd in fds: + os.set_inheritable(fd, False) + + return out + + + def main(): logger = logging.getLogger(__name__) coloredlogs.install() @@ -392,15 +632,21 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument("-i", "--input", dest='inputFile', type=str, required=True, help="Input file to process (can be .ts, .mp4 or .mkv).") parser.add_argument("-o", "--output", dest='outputFile', type=str, required=True, help="Output MKV file to produce.") - parser.add_argument("-p", "--part", dest='parts', nargs='+', required=True, action='append', metavar="hh:mm:ss[.mmm]-hh:mm:ss[.mmm]", help="Extract this exact part of the original file.") + parser.add_argument("-p", "--part", dest='parts', nargs='+', required=False, action='append', metavar="hh:mm:ss[.mmm]-hh:mm:ss[.mmm]", help="Extract this exact part of the original file.") + parser.add_argument("-k", "--keep", action='store_true', help="Do not cleanup temporary files after processing.") + parser.add_argument("--dump-pictures", action='store_true', help="For debug purpose, dump pictures of headers (and trailers) before (after) each part. They are kept in memory only otherwise.") args = parser.parse_args() + logger.debug("Arguments: %s" % args) + + checkRequiredTools() # Flatten args.parts intervals = [] - for part in args.parts: - for subpart in part: - intervals.append(subpart) + if args.parts != None: + for part in args.parts: + for subpart in part: + intervals.append(subpart) parts=[] # Parse each interval @@ -422,9 +668,16 @@ def main(): logger.error('Intervals are overlapping') exit(-1) prevts = ts2 - - inputFile = open(args.inputFile, mode='r') - if inputFile == None: + + temporaries = [] + + basename = os.path.splitext(os.path.basename(args.inputFile))[0] + mp4filename = basename+'.mp4' + mkvfilename = basename+'.mkv' + + try: + inputFile = open(args.inputFile, mode='r') + except IOError: logger.error("Impossible to open %s" % args.inputFile) exit(-1) @@ -449,15 +702,29 @@ def main(): if formatOfFile == SupportedFormat.TS: logger.info("Converting TS to MP4 (to fix timestamps).") - with open('essai.mp4', 'w') as mp4: - ffmpegConvert(inputFile, 'mpegts', mp4, 'mp4') - logger.info("Converting MP4 to MKV.") - with open('essai.mkv', 'w') as mkv: - ffmpegConvert(mp4, 'mp4', mkv, 'matroska') + try: + with open(mp4filename, 'w') as mp4: + ffmpegConvert(inputFile, 'mpegts', mp4, 'mp4', duration) + temporaries.append(mp4) + logger.info("Converting MP4 to MKV.") + try: + mkv = open(mkvfilename, 'w') + except IOError: + logger.error('') + + ffmpegConvert(mp4, 'mp4', mkv, 'matroska', duration) + temporaries.append(mkv) + except IOError: + logger.error('') + elif formatOfFile == SupportedFormat.MP4: logger.info("Converting MP4 to MKV") - with open('essai.mkv', 'w') as mkv: - ffmpegConvert(mp4, 'mp4', mkv, 'matroska') + try: + mkv = open(mkvfilename, 'w') + except IOError: + logger.error('') + ffmpegConvert(mp4, 'mp4', mkv, 'matroska', duration) + temporaries.append(mkv) else: logger.info("Already in MKV") mkv = inputFile @@ -503,8 +770,8 @@ def main(): nbHeadFrames, headIFrame = headFrames nbTailFrames, tailIFrame = tailFrames - print("Found head I-frame and %d frames between: %s" % (nbHeadFrames, headIFrame)) - print("Found I-frame and %d frames between: %s" % (nbTailFrames, tailIFrame)) + logger.info("Found %d frames between beginning of current part and first I-frame" % nbHeadFrames) + logger.info("Found %d frames between last I-frame and end of current part" % nbTailFrames) headIFrameTS = timedelta(seconds=float(headIFrame['pts_time'])) tailIFrameTS = timedelta(seconds=float(tailIFrame['pts_time'])) @@ -514,23 +781,29 @@ def main(): subparts = [] if nbHeadFrames > 0: - head = extractAllStreams(inputFile=mkv, begin=ts1, end=headIFrameTS, nbFrames=nbHeadFrames, filesPrefix='part-%d-head' % (partnum), streams=streams, width=width, height=height) + # We extract all frames between the beginning upto the frame that immediately preceeds the I-frame. + head = extractAllStreams(inputFile=mkv, begin=ts1, end=headIFrameTS, nbFrames=nbHeadFrames-1, filesPrefix='part-%d-head' % (partnum), streams=streams, width=width, height=height, temporaries=temporaries) subparts.append(head) - if nbTailFrames > 0: - tail = extractAllStreams(inputFile=mkv, begin=tailIFrameTS, end=ts2, nbFrames=nbTailFrames, filesPrefix='part-%d-tail' % (partnum), streams=streams, width=width, height=height) - - # Creating MKV file that corresponds to current part between I-frames - internal = open('part-%d-internal.mkv' % partnum, 'w') - # TODO: test if failure + # Creating MKV file that corresponds to current part between I-frames + try: + internal = open('part-%d-internal.mkv' % partnum, 'w') + except IOError: + logger.error('Impossible to create file: part-%d-internal.mkv' % partnum) + exit(-1) + temporaries.append(internal) extractMKVPart(inputFile=mkv, outputFile=internal, begin=headIFrameTS, end=tailIFrameTS) subparts.append(internal) if nbTailFrames > 0: + # We extract all frames between the I-frame (including it) upto the end. + tail = extractAllStreams(inputFile=mkv, begin=tailIFrameTS, end=ts2, nbFrames=nbTailFrames, filesPrefix='part-%d-tail' % (partnum), streams=streams, width=width, height=height, temporaries=temporaries) subparts.append(tail) + logger.info('Merging: %s' % subparts) part = mergeMKVs(inputs=subparts, outputName="part-%d.mkv" % partnum) mkvparts.append(part) + temporaries.append(part) pos = pos+tailIFrameTS-ts1 @@ -539,11 +812,20 @@ def main(): nbParts = len(mkvparts) if nbParts > 1: + logger.info('Merging: %s' % mkvparts) mergeMKVs(inputs=mkvparts, outputName=args.outputFile) elif nbParts == 1: - print("A single part") + copyfile('part-1.mkv', args.outputFile) else: - print("Nothing produced !") + logger.info("Nothing else to do.") + + if not args.keep: + logger.info("Cleaning temporary files") + for f in temporaries: + path = os.path.realpath(f.name) + logger.info("Removing: %s" % path) + f.close() + os.unlink(path) for c in checks: logger.info("Please check cut smoothness at: %s" % c)