diff --git a/removeads.py b/removeads.py new file mode 100755 index 0000000..a79a010 --- /dev/null +++ b/removeads.py @@ -0,0 +1,332 @@ +import argparse +import locale +import re +from sys import exit +from datetime import datetime,timedelta,time +import coloredlogs, logging +from functools import cmp_to_key +from subprocess import Popen, PIPE +from os import mkdir, set_inheritable +from io import BytesIO, TextIOWrapper +import json +from enum import Enum, IntEnum, unique, auto + +@unique +class SupportedFormat(IntEnum): + TS = 1 + MP4 = 2 + Matroska = 3 + + def __str__(self): + if self is SupportedFormat.TS: + return 'mpegts' + elif self is SupportedFormat.MP4: + return 'mov,mp4,m4a,3gp,3g2,mj2' + elif self is SupportedFormat.Matroska: + return 'matroska,webm' + else: + return "Unsupported format" + + +def getFormat(inputFile): + logger = logging.getLogger(__name__) + + infd = inputFile.fileno() + inputFile.seek(0,0) + set_inheritable(infd, True) + with Popen(['ffprobe', '-loglevel', 'quiet', '-show_format', '-of', 'json', '-i', '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as ffprobe: + out, _ = ffprobe.communicate() + out = json.load(BytesIO(out)) + if 'format' in out: + return out['format'] + else: + logger.error('Impossible to retrieve format of file') + + return None + +def getStreams(inputFile): + logger = logging.getLogger(__name__) + + infd = inputFile.fileno() + inputFile.seek(0,0) + set_inheritable(infd, True) + with Popen(['ffprobe', '-loglevel', 'quiet', '-show_streams', '-of', 'json', '-i', '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as ffprobe: + out, _ = ffprobe.communicate() + out = json.load(BytesIO(out)) + if 'streams' in out: + return out['streams'] + else: + logger.error('Impossible to retrieve streams inside file') + + return None + +def ffmpegConvert(inputFile, inputFormat, outputFile, outputFormat): + logger = logging.getLogger(__name__) + + infd = inputFile.fileno() + outfd = outputFile.fileno() + set_inheritable(infd, True) + set_inheritable(outfd, True) + # TODO: canvas size to be fixed ! + with Popen(['ffmpeg', '-loglevel', 'quiet', '-progress', '/dev/stdout', '-canvas_size', '720x560', '-f', inputFormat, '-i', '/proc/self/fd/%d' % infd, + '-map', '0:v', '-map', '0:a', '-map', '0:s', '-vcodec', 'copy', '-acodec', 'copy', '-scodec', 'dvdsub', + '-f', outputFormat, '/proc/self/fd/%d' % outfd], stdout=PIPE, close_fds=False) as ffmpeg: + for line in TextIOWrapper(ffmpeg.stdout, encoding="utf-8"): + if line.startswith('out_time='): + print(line, end='') + +def getNearestIFrame(inputFile, timestamp, before=True, delta=timedelta(seconds=2)): + logger = logging.getLogger(__name__) + + zero = timedelta() + tbegin = timestamp-delta + tend = timestamp+delta + if tbegin < zero: + tbegin = zero + + infd = inputFile.fileno() + set_inheritable(infd, True) + + logger.debug('Looking for iframe in [%s, %s]' % (tbegin, tend)) + + with Popen(['ffprobe', '-loglevel', 'quiet', '-read_intervals', ('%s%%%s' %(tbegin, tend)), '-show_entries', 'frame', '-select_streams', 'v', '-of', 'json', '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as ffprobe: + out, _ = ffprobe.communicate() + frames = json.load(BytesIO(out)) + if 'frames' in frames: + frames = frames['frames'] + iframes = [] + for frame in frames: + if frame['pict_type'] == 'I': + iframes.append(frame) + + found = False + res = None + for frame in iframes: + if before and timedelta(seconds=float(frame['pts_time'])) <= timestamp: + found = True + res = frame + if not before and timedelta(seconds=float(frame['pts_time'])) >= timestamp: + found = True + res = frame + break + + if found: + logger.debug("Found: %s" % res) + else: + logger.error("Impossible to find I-frame around: %s" % timestamp) + return(res) + else: + logger.error('Impossible to retrieve video frames inside file around [%s,%s]' % (tbegin, tend)) + + return None + + +def parseTimeInterval(interval): + logger = logging.getLogger(__name__) + + intervalRegExp = '^(?P[0-9]{1,2}):(?P[0-9]{1,2}):(?P[0-9]{1,2})(\.(?P[0-9]{1,3}))?-(?P[0-9]{1,2}):(?P[0-9]{1,2}):(?P[0-9]{1,2})(\.(?P[0-9]{1,3}))?$' + p = re.compile(intervalRegExp) + m = p.match(interval) + if m == None: + logger.error("Impossible to parse time interval") + return None + + values = m.groupdict() + hour1 = 0 + minute1 = 0 + second1 = 0 + ms1 = 0 + hour2 = 0 + minute2 = 0 + second2 = 0 + ms2 = 0 + if values['hour1'] != None: + hour1 = int(values['hour1']) + if values['minute1'] != None: + minute1 = int(values['minute1']) + if values['second1'] != None: + second1 = int(values['second1']) + if values['ms1'] != None: + ms1 = int(values['ms1']) + if values['hour2'] != None: + hour2 = int(values['hour2']) + if values['minute2'] != None: + minute2 = int(values['minute2']) + if values['second2'] != None: + second2 = int(values['second2']) + if values['ms2'] != None: + ms2 = int(values['ms2']) + + if hour1 < 0 or hour1 > 23: + logger.error("hour must be in [0,24[") + return None, None + if minute1 < 0 or minute1 > 59: + logger.error("minute must be in [0,60[") + return None, None + if second1 < 0 or second1 > 59: + logger.error("second must be in [0,60[") + return None, None + if ms1 < 0 or ms1 > 1000: + logger.error("milliseconds must be in [0,1000[") + return None, None + + if hour2 < 0 or hour2 > 23: + logger.error("hour must be in [0,24[") + return None, None + if minute2 < 0 or minute2 > 59: + logger.error("minute must be in [0,60[") + return None, None + if second2 < 0 or second2 > 59: + logger.error("second must be in [0,60[") + return None, None + if ms2 < 0 or ms2 > 1000: + logger.error("milliseconds must be in [0,1000[") + return None, None + + ts1 = timedelta(hours=hour1, minutes=minute1, seconds=second1, microseconds=ms1*1000) + ts2 = timedelta(hours=hour2, minutes=minute2, seconds=second2, microseconds=ms2*1000) + + if ts2 < ts1: + logger.error("Non monotonic interval") + return None,None + + return (ts1, ts2) + +def compareTimeInterval(interval1, interval2): + ts11,ts12 = interval1 + ts21,ts22 = interval2 + + if ts12 < ts21: + return -1 + elif ts22 < ts11: + return 1 + else: + return 0 + + +def main(): + logger = logging.getLogger(__name__) + coloredlogs.install() + locale.setlocale(locale.LC_ALL, 'fr_FR.UTF8') + parser = argparse.ArgumentParser() + parser.add_argument("-i", "--input", dest='inputFile', type=str, required=True, help="Input file to process (can be .ts, .mp4 or .mkv).") + parser.add_argument("-o", "--output", dest='outputFile', type=str, required=True, help="Output MKV file to produce.") + parser.add_argument("-p", "--part", dest='parts', nargs='+', required=True, action='append', metavar="hh:mm:ss[.mmm]-hh:mm:ss[.mmm]", help="Extract this exact part of the original file.") + + args = parser.parse_args() + + # Flatten args.parts + intervals = [] + for part in args.parts: + for subpart in part: + intervals.append(subpart) + + parts=[] + # Parse each interval + for interval in intervals: + ts1, ts2 = parseTimeInterval(interval) + if ts1 == None or ts2 == None: + logger.error("Illegal time interval: %s" % interval) + exit(-1) + parts.append((ts1,ts2)) + + # Sort intervals + parts.sort(key=cmp_to_key(compareTimeInterval)) + + # Check that no intervals are overlapping + prevts = timedelta(0) + for part in parts: + ts1, ts2 = part + if prevts > ts1: + logger.error('Intervals are overlapping') + exit(-1) + prevts = ts2 + + inputFile = open(args.inputFile, mode='r') + if inputFile == None: + logger.error("Impossible to open %s" % args.inputFile) + exit(-1) + + formatOfFile = getFormat(inputFile) + + if formatOfFile == None: + exit(-1) + + duration = timedelta(seconds=float(formatOfFile['duration'])) + logger.info("Durée de l'enregistrement: %s" % duration) + + found = False + for f in SupportedFormat: + if 'format_name' in formatOfFile: + if formatOfFile['format_name'] == str(f): + found = True + formatOfFile = f + break + + if not found: + logger.error('Unsupported format of file') + + if formatOfFile == SupportedFormat.TS: + logger.info("Converting TS to MP4 (to fix timestamps).") + with open('essai.mp4', 'w') as mp4: + ffmpegConvert(inputFile, 'mpegts', mp4, 'mp4') + logger.info("Converting MP4 to MKV.") + with open('essai.mkv', 'w') as mkv: + ffmpegConvert(mp4, 'mp4', mkv, 'matroska') + elif formatOfFile == SupportedFormat.MP4: + logger.info("Converting MP4 to MKV") + with open('essai.mkv', 'w') as mkv: + ffmpegConvert(mp4, 'mp4', mkv, 'matroska') + else: + logger.info("Already in MKV") + mkv = inputFile + + streams = getStreams(mkv) + + mainVideo = None + for stream in streams: + if stream['codec_type'] == 'video' and stream['disposition']['default'] == 1: + mainVideo = stream + + if mainVideo == None: + logger.error('Impossible to find main video stream.') + exit(-1) + + # Pour chaque portion + partnum = 0 + for ts1, ts2 in parts: + partnum = partnum + 1 + + preFrame = getNearestIFrame(mkv, ts1, before=False) + if preFrame == None: + exit(-1) + + postFrame = getNearestIFrame(mkv, ts2, before=True) + if postFrame == None: + exit(-1) + + + print(preFrame) + print(postFrame) + + if timedelta(seconds=float(preFrame['pts_time'])) == ts1: + # Nothing to do ! + pass + + if timedelta(seconds=float(postFrame['pts_time'])) == ts2: + # Nothing to do ! + pass + + # Trouver l'estampille de la trame 'I' la plus proche (mais postérieure) au début de la portion. + # Trouver l'estampille de la trame 'I' la plus proche (mais antérieure) à la fin de la portion. + # On a alors + # debut ----- trame --------- trame --------- fin. + # 'B/P' 'B/P'* 'I' 'I' 'B/P'* 'B/P' + # Si la trame de début est déjà 'I', il n'y a rien à faire (idem pour la fin). + # Sinon on extrait les trames 'B' ou 'P' depuis le début jusqu'à la trame 'I' non incluse + # Fabriquer une courte vidéo au format MKV reprenant les mêmes codecs que la vidéo originale avec les fichiers extraits précedemment. + + # Appeler mkvmerge + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d8b4690 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +xmltodict +requests +pygame +coloredlogs