Initial version of the Python script to remove ads from TV recording.
This commit is contained in:
332
removeads.py
Executable file
332
removeads.py
Executable file
@@ -0,0 +1,332 @@
|
||||
import argparse
|
||||
import locale
|
||||
import re
|
||||
from sys import exit
|
||||
from datetime import datetime,timedelta,time
|
||||
import coloredlogs, logging
|
||||
from functools import cmp_to_key
|
||||
from subprocess import Popen, PIPE
|
||||
from os import mkdir, set_inheritable
|
||||
from io import BytesIO, TextIOWrapper
|
||||
import json
|
||||
from enum import Enum, IntEnum, unique, auto
|
||||
|
||||
@unique
|
||||
class SupportedFormat(IntEnum):
|
||||
TS = 1
|
||||
MP4 = 2
|
||||
Matroska = 3
|
||||
|
||||
def __str__(self):
|
||||
if self is SupportedFormat.TS:
|
||||
return 'mpegts'
|
||||
elif self is SupportedFormat.MP4:
|
||||
return 'mov,mp4,m4a,3gp,3g2,mj2'
|
||||
elif self is SupportedFormat.Matroska:
|
||||
return 'matroska,webm'
|
||||
else:
|
||||
return "Unsupported format"
|
||||
|
||||
|
||||
def getFormat(inputFile):
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
infd = inputFile.fileno()
|
||||
inputFile.seek(0,0)
|
||||
set_inheritable(infd, True)
|
||||
with Popen(['ffprobe', '-loglevel', 'quiet', '-show_format', '-of', 'json', '-i', '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as ffprobe:
|
||||
out, _ = ffprobe.communicate()
|
||||
out = json.load(BytesIO(out))
|
||||
if 'format' in out:
|
||||
return out['format']
|
||||
else:
|
||||
logger.error('Impossible to retrieve format of file')
|
||||
|
||||
return None
|
||||
|
||||
def getStreams(inputFile):
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
infd = inputFile.fileno()
|
||||
inputFile.seek(0,0)
|
||||
set_inheritable(infd, True)
|
||||
with Popen(['ffprobe', '-loglevel', 'quiet', '-show_streams', '-of', 'json', '-i', '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as ffprobe:
|
||||
out, _ = ffprobe.communicate()
|
||||
out = json.load(BytesIO(out))
|
||||
if 'streams' in out:
|
||||
return out['streams']
|
||||
else:
|
||||
logger.error('Impossible to retrieve streams inside file')
|
||||
|
||||
return None
|
||||
|
||||
def ffmpegConvert(inputFile, inputFormat, outputFile, outputFormat):
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
infd = inputFile.fileno()
|
||||
outfd = outputFile.fileno()
|
||||
set_inheritable(infd, True)
|
||||
set_inheritable(outfd, True)
|
||||
# TODO: canvas size to be fixed !
|
||||
with Popen(['ffmpeg', '-loglevel', 'quiet', '-progress', '/dev/stdout', '-canvas_size', '720x560', '-f', inputFormat, '-i', '/proc/self/fd/%d' % infd,
|
||||
'-map', '0:v', '-map', '0:a', '-map', '0:s', '-vcodec', 'copy', '-acodec', 'copy', '-scodec', 'dvdsub',
|
||||
'-f', outputFormat, '/proc/self/fd/%d' % outfd], stdout=PIPE, close_fds=False) as ffmpeg:
|
||||
for line in TextIOWrapper(ffmpeg.stdout, encoding="utf-8"):
|
||||
if line.startswith('out_time='):
|
||||
print(line, end='')
|
||||
|
||||
def getNearestIFrame(inputFile, timestamp, before=True, delta=timedelta(seconds=2)):
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
zero = timedelta()
|
||||
tbegin = timestamp-delta
|
||||
tend = timestamp+delta
|
||||
if tbegin < zero:
|
||||
tbegin = zero
|
||||
|
||||
infd = inputFile.fileno()
|
||||
set_inheritable(infd, True)
|
||||
|
||||
logger.debug('Looking for iframe in [%s, %s]' % (tbegin, tend))
|
||||
|
||||
with Popen(['ffprobe', '-loglevel', 'quiet', '-read_intervals', ('%s%%%s' %(tbegin, tend)), '-show_entries', 'frame', '-select_streams', 'v', '-of', 'json', '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as ffprobe:
|
||||
out, _ = ffprobe.communicate()
|
||||
frames = json.load(BytesIO(out))
|
||||
if 'frames' in frames:
|
||||
frames = frames['frames']
|
||||
iframes = []
|
||||
for frame in frames:
|
||||
if frame['pict_type'] == 'I':
|
||||
iframes.append(frame)
|
||||
|
||||
found = False
|
||||
res = None
|
||||
for frame in iframes:
|
||||
if before and timedelta(seconds=float(frame['pts_time'])) <= timestamp:
|
||||
found = True
|
||||
res = frame
|
||||
if not before and timedelta(seconds=float(frame['pts_time'])) >= timestamp:
|
||||
found = True
|
||||
res = frame
|
||||
break
|
||||
|
||||
if found:
|
||||
logger.debug("Found: %s" % res)
|
||||
else:
|
||||
logger.error("Impossible to find I-frame around: %s" % timestamp)
|
||||
return(res)
|
||||
else:
|
||||
logger.error('Impossible to retrieve video frames inside file around [%s,%s]' % (tbegin, tend))
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def parseTimeInterval(interval):
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
intervalRegExp = '^(?P<hour1>[0-9]{1,2}):(?P<minute1>[0-9]{1,2}):(?P<second1>[0-9]{1,2})(\.(?P<ms1>[0-9]{1,3}))?-(?P<hour2>[0-9]{1,2}):(?P<minute2>[0-9]{1,2}):(?P<second2>[0-9]{1,2})(\.(?P<ms2>[0-9]{1,3}))?$'
|
||||
p = re.compile(intervalRegExp)
|
||||
m = p.match(interval)
|
||||
if m == None:
|
||||
logger.error("Impossible to parse time interval")
|
||||
return None
|
||||
|
||||
values = m.groupdict()
|
||||
hour1 = 0
|
||||
minute1 = 0
|
||||
second1 = 0
|
||||
ms1 = 0
|
||||
hour2 = 0
|
||||
minute2 = 0
|
||||
second2 = 0
|
||||
ms2 = 0
|
||||
if values['hour1'] != None:
|
||||
hour1 = int(values['hour1'])
|
||||
if values['minute1'] != None:
|
||||
minute1 = int(values['minute1'])
|
||||
if values['second1'] != None:
|
||||
second1 = int(values['second1'])
|
||||
if values['ms1'] != None:
|
||||
ms1 = int(values['ms1'])
|
||||
if values['hour2'] != None:
|
||||
hour2 = int(values['hour2'])
|
||||
if values['minute2'] != None:
|
||||
minute2 = int(values['minute2'])
|
||||
if values['second2'] != None:
|
||||
second2 = int(values['second2'])
|
||||
if values['ms2'] != None:
|
||||
ms2 = int(values['ms2'])
|
||||
|
||||
if hour1 < 0 or hour1 > 23:
|
||||
logger.error("hour must be in [0,24[")
|
||||
return None, None
|
||||
if minute1 < 0 or minute1 > 59:
|
||||
logger.error("minute must be in [0,60[")
|
||||
return None, None
|
||||
if second1 < 0 or second1 > 59:
|
||||
logger.error("second must be in [0,60[")
|
||||
return None, None
|
||||
if ms1 < 0 or ms1 > 1000:
|
||||
logger.error("milliseconds must be in [0,1000[")
|
||||
return None, None
|
||||
|
||||
if hour2 < 0 or hour2 > 23:
|
||||
logger.error("hour must be in [0,24[")
|
||||
return None, None
|
||||
if minute2 < 0 or minute2 > 59:
|
||||
logger.error("minute must be in [0,60[")
|
||||
return None, None
|
||||
if second2 < 0 or second2 > 59:
|
||||
logger.error("second must be in [0,60[")
|
||||
return None, None
|
||||
if ms2 < 0 or ms2 > 1000:
|
||||
logger.error("milliseconds must be in [0,1000[")
|
||||
return None, None
|
||||
|
||||
ts1 = timedelta(hours=hour1, minutes=minute1, seconds=second1, microseconds=ms1*1000)
|
||||
ts2 = timedelta(hours=hour2, minutes=minute2, seconds=second2, microseconds=ms2*1000)
|
||||
|
||||
if ts2 < ts1:
|
||||
logger.error("Non monotonic interval")
|
||||
return None,None
|
||||
|
||||
return (ts1, ts2)
|
||||
|
||||
def compareTimeInterval(interval1, interval2):
|
||||
ts11,ts12 = interval1
|
||||
ts21,ts22 = interval2
|
||||
|
||||
if ts12 < ts21:
|
||||
return -1
|
||||
elif ts22 < ts11:
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
def main():
|
||||
logger = logging.getLogger(__name__)
|
||||
coloredlogs.install()
|
||||
locale.setlocale(locale.LC_ALL, 'fr_FR.UTF8')
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("-i", "--input", dest='inputFile', type=str, required=True, help="Input file to process (can be .ts, .mp4 or .mkv).")
|
||||
parser.add_argument("-o", "--output", dest='outputFile', type=str, required=True, help="Output MKV file to produce.")
|
||||
parser.add_argument("-p", "--part", dest='parts', nargs='+', required=True, action='append', metavar="hh:mm:ss[.mmm]-hh:mm:ss[.mmm]", help="Extract this exact part of the original file.")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Flatten args.parts
|
||||
intervals = []
|
||||
for part in args.parts:
|
||||
for subpart in part:
|
||||
intervals.append(subpart)
|
||||
|
||||
parts=[]
|
||||
# Parse each interval
|
||||
for interval in intervals:
|
||||
ts1, ts2 = parseTimeInterval(interval)
|
||||
if ts1 == None or ts2 == None:
|
||||
logger.error("Illegal time interval: %s" % interval)
|
||||
exit(-1)
|
||||
parts.append((ts1,ts2))
|
||||
|
||||
# Sort intervals
|
||||
parts.sort(key=cmp_to_key(compareTimeInterval))
|
||||
|
||||
# Check that no intervals are overlapping
|
||||
prevts = timedelta(0)
|
||||
for part in parts:
|
||||
ts1, ts2 = part
|
||||
if prevts > ts1:
|
||||
logger.error('Intervals are overlapping')
|
||||
exit(-1)
|
||||
prevts = ts2
|
||||
|
||||
inputFile = open(args.inputFile, mode='r')
|
||||
if inputFile == None:
|
||||
logger.error("Impossible to open %s" % args.inputFile)
|
||||
exit(-1)
|
||||
|
||||
formatOfFile = getFormat(inputFile)
|
||||
|
||||
if formatOfFile == None:
|
||||
exit(-1)
|
||||
|
||||
duration = timedelta(seconds=float(formatOfFile['duration']))
|
||||
logger.info("Durée de l'enregistrement: %s" % duration)
|
||||
|
||||
found = False
|
||||
for f in SupportedFormat:
|
||||
if 'format_name' in formatOfFile:
|
||||
if formatOfFile['format_name'] == str(f):
|
||||
found = True
|
||||
formatOfFile = f
|
||||
break
|
||||
|
||||
if not found:
|
||||
logger.error('Unsupported format of file')
|
||||
|
||||
if formatOfFile == SupportedFormat.TS:
|
||||
logger.info("Converting TS to MP4 (to fix timestamps).")
|
||||
with open('essai.mp4', 'w') as mp4:
|
||||
ffmpegConvert(inputFile, 'mpegts', mp4, 'mp4')
|
||||
logger.info("Converting MP4 to MKV.")
|
||||
with open('essai.mkv', 'w') as mkv:
|
||||
ffmpegConvert(mp4, 'mp4', mkv, 'matroska')
|
||||
elif formatOfFile == SupportedFormat.MP4:
|
||||
logger.info("Converting MP4 to MKV")
|
||||
with open('essai.mkv', 'w') as mkv:
|
||||
ffmpegConvert(mp4, 'mp4', mkv, 'matroska')
|
||||
else:
|
||||
logger.info("Already in MKV")
|
||||
mkv = inputFile
|
||||
|
||||
streams = getStreams(mkv)
|
||||
|
||||
mainVideo = None
|
||||
for stream in streams:
|
||||
if stream['codec_type'] == 'video' and stream['disposition']['default'] == 1:
|
||||
mainVideo = stream
|
||||
|
||||
if mainVideo == None:
|
||||
logger.error('Impossible to find main video stream.')
|
||||
exit(-1)
|
||||
|
||||
# Pour chaque portion
|
||||
partnum = 0
|
||||
for ts1, ts2 in parts:
|
||||
partnum = partnum + 1
|
||||
|
||||
preFrame = getNearestIFrame(mkv, ts1, before=False)
|
||||
if preFrame == None:
|
||||
exit(-1)
|
||||
|
||||
postFrame = getNearestIFrame(mkv, ts2, before=True)
|
||||
if postFrame == None:
|
||||
exit(-1)
|
||||
|
||||
|
||||
print(preFrame)
|
||||
print(postFrame)
|
||||
|
||||
if timedelta(seconds=float(preFrame['pts_time'])) == ts1:
|
||||
# Nothing to do !
|
||||
pass
|
||||
|
||||
if timedelta(seconds=float(postFrame['pts_time'])) == ts2:
|
||||
# Nothing to do !
|
||||
pass
|
||||
|
||||
# Trouver l'estampille de la trame 'I' la plus proche (mais postérieure) au début de la portion.
|
||||
# Trouver l'estampille de la trame 'I' la plus proche (mais antérieure) à la fin de la portion.
|
||||
# On a alors
|
||||
# debut ----- trame --------- trame --------- fin.
|
||||
# 'B/P' 'B/P'* 'I' 'I' 'B/P'* 'B/P'
|
||||
# Si la trame de début est déjà 'I', il n'y a rien à faire (idem pour la fin).
|
||||
# Sinon on extrait les trames 'B' ou 'P' depuis le début jusqu'à la trame 'I' non incluse
|
||||
# Fabriquer une courte vidéo au format MKV reprenant les mêmes codecs que la vidéo originale avec les fichiers extraits précedemment.
|
||||
|
||||
# Appeler mkvmerge
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
4
requirements.txt
Normal file
4
requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
xmltodict
|
||||
requests
|
||||
pygame
|
||||
coloredlogs
|
||||
Reference in New Issue
Block a user