removeads/removeads.py

#!/usr/bin/env python3

import argparse
import locale
import re
from sys import exit
from datetime import datetime,timedelta,time
import coloredlogs, logging
from functools import cmp_to_key
from subprocess import Popen, PIPE
from os import read, write, lseek, pipe, set_inheritable, memfd_create, SEEK_SET, close, unlink
import os.path
from io import BytesIO, TextIOWrapper
import json
from enum import Enum, IntEnum, unique, auto
import shutil
from tqdm import tqdm, trange
from select import select
from math import floor, ceil, log
from shutil import copyfile, which

# Useful SPS/PPS discussion
# TODO: improve situation of SPS and PPS header mismatch when merging MVK with mkvmerge to remove warnings.
# https://copyprogramming.com/howto/including-sps-and-pps-in-a-raw-h264-track
# https://gitlab.com/mbunkus/mkvtoolnix/-/issues/2390


def checkRequiredTools():
    logger = logging.getLogger(__name__)
    required = ['ffmpeg', 'ffprobe', 'mkvmerge']
    optional = ['mkvextract', 'vobsubocr']
    for tool in required:
        if which(tool) == None:
            logger.error('Required tool: %s is missing.' % tool)
            exit(-1)
    for tool in optional:
        if which(tool) == None:
            logger.info('Optional tool: %s is missing.' % tool)


@unique
class SupportedFormat(IntEnum):
    TS = 1
    MP4 = 2
    Matroska = 3

    def __str__(self):
        if self is SupportedFormat.TS:
            return 'mpegts'
        elif self is SupportedFormat.MP4:
            return 'mov,mp4,m4a,3gp,3g2,mj2'
        elif self is SupportedFormat.Matroska:
            return 'matroska,webm'
        else:
            return 'Unsupported format'

# Extract SPS/PPS
# https://gitlab.com/mbunkus/mkvtoolnix/-/issues/2390
# ffmpeg -i <InputFile (before concatenation)> -c:v copy -an -sn -bsf:v trace_headers -t 0.01 -report -loglevel 0 -f null -


def getFormat(inputFile):
    logger = logging.getLogger(__name__)

    infd = inputFile.fileno()
    inputFile.seek(0,0)
    set_inheritable(infd, True)
    with Popen(['ffprobe', '-loglevel', 'quiet', '-show_format', '-of', 'json', '-i', '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as ffprobe:
        out, _ = ffprobe.communicate()
        out = json.load(BytesIO(out))
        if 'format' in out:
            return out['format']
        else:
            logger.error('Impossible to retrieve format of file')

    return None

def getStreams(inputFile):
    logger = logging.getLogger(__name__)

    infd = inputFile.fileno()
    inputFile.seek(0,0)
    set_inheritable(infd, True)
    with Popen(['ffprobe', '-loglevel', 'quiet', '-show_streams', '-of', 'json', '-i', '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as ffprobe:
        out, _ = ffprobe.communicate()
        out = json.load(BytesIO(out))
        if 'streams' in out:
            return out['streams']
        else:
            logger.error('Impossible to retrieve streams inside file')

    return None

def parseTimestamp(ts):
    logger = logging.getLogger(__name__)

    tsRegExp = '^(?P<hour>[0-9]{1,2}):(?P<minute>[0-9]{1,2}):(?P<second>[0-9]{1,2})(\.(?P<us>[0-9]{1,6}))?$'
    p = re.compile(tsRegExp)
    m = p.match(ts)
    if m == None:
        logger.error("Impossible to parse timestamp: %s" % ts)
        return None

    values = m.groupdict()
    hour = 0
    minute = 0
    second = 0
    us = 0
    if values['hour'] != None:
        hour = int(values['hour'])
    if values['minute'] != None:
        minute = int(values['minute'])
    if values['second'] != None:
        second = int(values['second'])
    if values['us'] != None:
        us = int(values['us'])

    if hour < 0 or hour > 23:
        logger.error("hour must be in [0,24[")
        return None
    if minute < 0 or minute > 59:
        logger.error("minute must be in [0,60[")
        return None
    if second < 0 or second > 59:
        logger.error("second must be in [0,60[")
        return None
    if us < 0 or us > 1000000:
        logger.error("milliseconds must be in [0,1000000[")
        return None

    ts = timedelta(hours=hour, minutes=minute, seconds=second, microseconds=us)

    return ts

def parseTimeInterval(interval):
    logger = logging.getLogger(__name__)

    intervalRegExp = '^(?P<hour1>[0-9]{1,2}):(?P<minute1>[0-9]{1,2}):(?P<second1>[0-9]{1,2})(\.(?P<ms1>[0-9]{1,3}))?-(?P<hour2>[0-9]{1,2}):(?P<minute2>[0-9]{1,2}):(?P<second2>[0-9]{1,2})(\.(?P<ms2>[0-9]{1,3}))?$'
    p = re.compile(intervalRegExp)
    m = p.match(interval)
    if m == None:
        logger.error("Impossible to parse time interval")
        return None

    values = m.groupdict()
    hour1 = 0
    minute1 = 0
    second1 = 0
    ms1 = 0
    hour2 = 0
    minute2 = 0
    second2 = 0
    ms2 = 0
    if values['hour1'] != None:
        hour1 = int(values['hour1'])
    if values['minute1'] != None:
        minute1 = int(values['minute1'])
    if values['second1'] != None:
        second1 = int(values['second1'])
    if values['ms1'] != None:
        ms1 = int(values['ms1'])
    if values['hour2'] != None:
        hour2 = int(values['hour2'])
    if values['minute2'] != None:
        minute2 = int(values['minute2'])
    if values['second2'] != None:
        second2 = int(values['second2'])
    if values['ms2'] != None:
        ms2 = int(values['ms2'])

    if hour1 < 0 or hour1 > 23:
        logger.error("hour must be in [0,24[")
        return None, None
    if minute1 < 0 or minute1 > 59:
        logger.error("minute must be in [0,60[")
        return None, None
    if second1 < 0 or second1 > 59:
        logger.error("second must be in [0,60[")
        return None, None
    if ms1 < 0 or ms1 > 1000:
        logger.error("milliseconds must be in [0,1000[")
        return None, None

    if hour2 < 0 or hour2 > 23:
        logger.error("hour must be in [0,24[")
        return None, None
    if minute2 < 0 or minute2 > 59:
        logger.error("minute must be in [0,60[")
        return None, None
    if second2 < 0 or second2 > 59:
        logger.error("second must be in [0,60[")
        return None, None
    if ms2 < 0 or ms2 > 1000:
        logger.error("milliseconds must be in [0,1000[")
        return None, None

    ts1 = timedelta(hours=hour1, minutes=minute1, seconds=second1, microseconds=ms1*1000)
    ts2 = timedelta(hours=hour2, minutes=minute2, seconds=second2, microseconds=ms2*1000)

    if ts2 < ts1:
        logger.error("Non monotonic interval")
        return None,None

    return (ts1, ts2)

def compareTimeInterval(interval1, interval2):
    ts11,ts12 = interval1
    ts21,ts22 = interval2

    if ts12 < ts21:
        return -1
    elif ts22 < ts11:
        return 1
    else:
        return 0


def ffmpegConvert(inputFile, inputFormat, outputFile, outputFormat, duration):
    logger = logging.getLogger(__name__)

    infd = inputFile.fileno()
    outfd = outputFile.fileno()
    set_inheritable(infd, True)
    set_inheritable(outfd, True)
    # TODO: canvas size to be fixed !
    with Popen(['ffmpeg', '-y', '-loglevel', 'quiet', '-progress', '/dev/stdout', '-canvas_size', '720x560', '-f', inputFormat, '-i', '/proc/self/fd/%d' % infd,
                '-map', '0:v', '-map', '0:a', '-map', '0:s', '-bsf:v', 'h264_mp4toannexb,dump_extra=freq=keyframe', '-vcodec', 'copy', '-acodec', 'copy', '-scodec', 'dvdsub',
                '-f', outputFormat, '/proc/self/fd/%d' % outfd], stdout=PIPE, close_fds=False) as ffmpeg:
        pb = tqdm(TextIOWrapper(ffmpeg.stdout, encoding="utf-8"), total=int(duration/timedelta(seconds=1)), unit='s', desc='Conversion')
        for line in pb:
            if line.startswith('out_time='):
                ts = line.split('=')[1].strip()
                ts = parseTimestamp(ts)
                pb.n = int(ts/timedelta(seconds=1))
                pb.update()
        status = ffmpeg.wait()
        if status != 0:
            logger.error('Conversion failed with status code: %d' % status)


def getFramesInStream(inputFile, begin, end, streamKind, subStreamId=0):
    logger = logging.getLogger(__name__)
    infd = inputFile.fileno()
    set_inheritable(infd, True)

    with Popen(['ffprobe', '-loglevel', 'quiet', '-read_intervals', ('%s%%%s' %(begin, end)), '-show_entries', 'frame', '-select_streams', '%s:%d' % (streamKind, subStreamId), '-of', 'json', '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as ffprobe:
        out, _ = ffprobe.communicate()
        frames = json.load(BytesIO(out))
        status = ffprobe.wait()
        if status != 0:
            logger.error('ffprobe failed with status code: %d' % status)
            return None
        res = []
        if 'frames' in frames:
            frames = frames['frames']
            for frame in frames:
                if 'pts_time' in frame:
                    pts_time = float(frame['pts_time'])
                elif 'pkt_pts_time' in frame:
                    pts_time = float(frame['pkt_pts_time'])
                else:
                    logger.error('Impossible to find timestamp of frame %s' % frame)
                    return None

                ts = timedelta(seconds=pts_time)
                if begin <= ts and ts <= end:
                    res.append(frame)
            return res
        else:
           logger.error('Impossible to retrieve frames inside file around [%s,%s]' % (begin, end))
           return None

def getNearestIFrame(inputFile, timestamp, before=True, delta=timedelta(seconds=2)):
    logger = logging.getLogger(__name__)

    zero = timedelta()
    tbegin = timestamp-delta
    tend = timestamp+delta
    if tbegin < zero:
        tbegin = zero

    infd = inputFile.fileno()
    set_inheritable(infd, True)

    logger.debug('Looking for iframe in [%s, %s]' % (tbegin, tend))

    frames = getFramesInStream(inputFile=inputFile, begin=tbegin, end=tend, streamKind='v')
    if frames == None:
        return None

    iframes = []
    for frame in frames:
        if frame['pict_type'] == 'I':
            iframes.append(frame)

    found = False
    for frame in iframes:
        if before and timedelta(seconds=float(frame['pts_time'])) <= timestamp:
            found = True
            iframe = frame
        if not before and timedelta(seconds=float(frame['pts_time'])) >= timestamp:
            found = True
            iframe = frame
            break

    if found:
        logger.info("Found i-frame at: %s" % iframe)
        logger.debug("Found i-frame at %s" % iframe)

        its = timedelta(seconds=float(iframe['pts_time']))
        nbFrames = 0
        for frame in frames:
            ts = timedelta(seconds=float(frame['pts_time']))
            if before:
                if its <= ts and ts <= timestamp:
                    logger.info("Retrieve a frame between %s and %s at %s" % (its, timestamp, ts))
                    nbFrames = nbFrames+1
            else:
                if timestamp <= ts and ts <= its:
                    logger.info("Retrieve a frame between %s and %s at %s" % (ts, timestamp, its))
                    nbFrames = nbFrames+1
    else:
        logger.error("Impossible to find I-frame around: %s" % timestamp)

    return(nbFrames, iframe)

def extractMKVPart(inputFile, outputFile, begin, end):
    logger = logging.getLogger(__name__)

    logger.info('Extract video between I-frames at %s and %s' % (begin,end))
    inputFile.seek(0,0)
    outputFile.seek(0,0)
    infd = inputFile.fileno()
    outfd = outputFile.fileno()
    set_inheritable(infd, True)
    set_inheritable(outfd, True)
    warnings = []
    with Popen(['mkvmerge', '-o', '/proc/self/fd/%d' % outfd, '--split', 'parts:%s-%s' % (begin, end), '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as mkvmerge:
        pb = tqdm(TextIOWrapper(mkvmerge.stdout, encoding="utf-8"), total=100, unit='%', desc='Extraction')
        for line in pb:
            if line.startswith('Progression :'):
                p = re.compile('^Progression : (?P<progress>[0-9]{1,3})%$')
                m = p.match(line)
                if m == None:
                    logger.error('Impossible to parse progress')
                pb.n = int(m['progress'])
                pb.update()
            elif line.startswith('Avertissement'):
                warnings.append(line)

        status = mkvmerge.wait()
        if status == 1:
            logger.warning('Extraction returns warning')
            for w in warnings:
                logger.warning(w)
        elif status == 2:
            logger.error('Extraction returns errors')


def extractPictures(inputFile, begin, nbFrames, width=640, height=480):
    logger = logging.getLogger(__name__)

    inputFile.seek(0,0)
    infd = inputFile.fileno()
    outfd = memfd_create('pictures', flags=0)
    set_inheritable(outfd, True)
    # PPM header
    # "P6\nWIDTH HEIGHT\n255\n"
    headerLen=2+1+ceil(log(width, 10))+1+ceil(log(height, 10))+1+3+1
    logger.debug('Header length: %d' % headerLen)
    imageLength = width*height*3+headerLen
    length = imageLength*nbFrames
    logger.debug("Estimated length: %d" % length)

    images = bytes()
    with Popen(['ffmpeg', '-loglevel', 'quiet' ,'-y', '-ss', '%s'%begin, '-i', '/proc/self/fd/%d' % infd, '-s', '%dx%d'%(width, height), '-vframes', '%d'%nbFrames, '-c:v', 'ppm', '-f', 'image2pipe', '/proc/self/fd/%d' % outfd ], stdout=PIPE, close_fds=False) as ffmpeg:
        status = ffmpeg.wait()
        if status != 0:
            logger.error('Conversion failed with status code: %d' % status)
            return None, None

        lseek(outfd, 0, SEEK_SET)
        images = read(outfd,length)
        if len(images) != length:
            logger.info("Received %d bytes but %d were expected." % (len(images), length))
            return None, None

    lseek(outfd, 0, SEEK_SET)
    return images, outfd

def extractSound(inputFile, begin, outputFileName, packetDuration, subChannel=0, nbPackets=0, sampleRate=48000, nbChannels=2):
    logger = logging.getLogger(__name__)

    inputFile.seek(0,0)
    outfd = memfd_create(outputFileName, flags=0)
    infd = inputFile.fileno()
    set_inheritable(infd, True)
    set_inheritable(outfd, True)
    sound = bytes()
    length = int(nbChannels*sampleRate*4*nbPackets*packetDuration/1000)

    with Popen(['ffmpeg', '-y', '-loglevel', 'quiet', '-ss', '%s'%begin, '-i', '/proc/self/fd/%d' % infd, '-frames:a:%d' % subChannel, '%d' % (nbPackets+1),
                '-c:a', 'pcm_s32le', '-sample_rate', '%d' % sampleRate, '-channels', '%d' % nbChannels, '-f', 's32le', '/proc/self/fd/%d' % outfd], stdout=PIPE, close_fds=False) as ffmpeg:
        status = ffmpeg.wait()
        if status != 0:
            logger.error('Sound extraction returns error code: %d' % status)
            return None, None

        lseek(outfd, 0, SEEK_SET)
        sound = read(outfd, length)

        if (len(sound) != length):
            logger.info("Received %d bytes but %d were expected (channels=%d, freq=%d, packets=%d, duration=%d ms)." % (len(sound), length, nbChannels, sampleRate, nbPackets, packetDuration))
            return None, None

        return sound, outfd

def dumpPPM(pictures, prefix, temporaries):
    logger = logging.getLogger(__name__)

    # "P6\nWIDTH HEIGHT\n255\n"
    pos = 0
    picture = 0
    while pos<len(pictures):
        filename = '%s-%03d.ppm' % (prefix, picture)
        header = BytesIO(pictures[pos:])
        magic = header.readline().decode('utf8')
        dimensions = header.readline().decode('utf8')
        maxvalue = header.readline().decode('utf8')
        if magic == 'P6\n':
            pattern = re.compile('^(?P<width>[0-9]+) (?P<height>[0-9]+)\n$')
            m = pattern.match(dimensions)
            if m != None:
                width = int(m['width'])
                height = int(m['height'])
            else:
                logger.error('Impossible to parse dimensions of picture')
                return
        else:
            logger.error('Not a PPM picture')
            return

        headerLen=2+1+ceil(log(width, 10))+1+ceil(log(height, 10))+1+3+1
        try:
            out = open(filename, 'w')
            outfd = out.fileno()
        except IOError:
            logger.error('Impossible to create file: %s' % filename)
        temporaries.append(out)

        length=headerLen+3*width*height
        nbBytes = 0
        while nbBytes < length:
            nbBytes+=write(outfd, pictures[pos+nbBytes:pos+length])
        pos+=length
        picture+=1

def extractAllStreams(inputFile, begin, end, streams, filesPrefix, nbFrames, width, height, temporaries, dumpMemFD=False):
    logger = logging.getLogger(__name__)
    encoderParams = [ 'ffmpeg', '-y', '-loglevel', 'quiet' ]
    inputParams = []
    codecsParams = []

    if begin < end:
        videoID=0
        audioID=0
        subTitleID=0
        memfds = []
        for stream in streams:
            if stream['codec_type'] == 'video':
                logger.info("Extracting video stream v:%d" % videoID)
                frameRate = stream['r_frame_rate']
                pattern = re.compile('^(?P<numerator>[0-9]+)/(?P<denominator>[0-9]+)$')
                m = pattern.match(frameRate)
                if m != None:
                    frameRate = float(m['numerator']) / float(m['denominator'])
                sar = stream['sample_aspect_ratio']
                dar = stream['display_aspect_ratio']
                pixelFormat = stream['pix_fmt']
                colorRange = stream['color_range']
                colorSpace =stream['color_space']
                colorTransfer = stream['color_transfer']
                colorPrimaries = stream['color_primaries']
                # TODO: do something with these informations
                # When interlaced TOP or BOTTOM
                #  -top 1 -flags:v +ilme+ildct
                #  -top 0 -flags:v +ilme+ildct
                chromaLocation = stream['chroma_location']
                fieldOrder = stream['field_order']
                # ======================================= #
                # TODO: adjust SAR and DAR
                # https://superuser.com/questions/907933/correct-aspect-ratio-without-re-encoding-video-file
                codec = stream['codec_name']
                imagesBytes, memfd = extractPictures(inputFile=inputFile, begin=begin, nbFrames=nbFrames, width=width, height=height)
                if imagesBytes == None:
                    exit(-1)

                memfds.append(memfd)

                if dumpMemFD:
                    dumpPPM(imagesBytes, '%s-%d' % (filesPrefix,videoID), temporaries)

                # We rewind to zero the memory file descriptor
                lseek(memfd, 0, SEEK_SET)
                set_inheritable(memfd, True)

                inputParams.extend(['-framerate', '%f'%frameRate, '-f', 'image2pipe', '-i', '/proc/self/fd/%d' % memfd])
                codecsParams.extend(['-c:v:%d' % videoID, codec, '-pix_fmt', pixelFormat, '-colorspace:v:%d' % videoID, colorSpace, '-color_primaries:v:%d' % videoID, colorPrimaries,
                                     '-color_trc:v:%d' % videoID, colorTransfer,  '-color_range:v:%d' % videoID, colorRange])
                videoID=videoID+1
            elif stream['codec_type'] == 'audio':
                logger.info("Extracting audio stream: a:%d" % audioID)
                sampleRate = int(stream['sample_rate'])
                nbChannels = int(stream['channels'])
                bitRate = int(stream['bit_rate'])
                codec = stream['codec_name']
                if 'tags' in stream:
                    if 'language' in stream['tags']:
                        codecsParams.extend(['-metadata:s:a:%d' % audioID, 'language=%s' % stream['tags']['language']])
                packets = getFramesInStream(inputFile=inputFile, begin=begin, end=end, streamKind='a', subStreamId=audioID)
                nbPackets = len(packets)
                logger.debug("Found %d packets to be extracted from audio track." % nbPackets)
                if(nbPackets > 0):
                    packetDuration = packets[0]['duration']

                tmpname = '%s-%d.pcm' % (filesPrefix,audioID)

                soundBytes , memfd = extractSound(inputFile=inputFile, begin=begin, nbPackets=nbPackets, packetDuration=packetDuration, outputFileName=tmpname, sampleRate=sampleRate, nbChannels=nbChannels)

                if soundBytes == None:
                    exit(-1)

                memfds.append(memfd)

                if dumpMemFD:
                    try:
                        output = open(tmpname,'w')
                    except IOError:
                        logger.error('Impossible to create file: %s' % tmpname)
                        return None

                    outfd = output.fileno()
                    pos = 0
                    while pos < len(soundBytes):
                        pos+=write(outfd, soundBytes[pos:])
                    temporaries.append(output)

                # We rewind to zero the memory file descriptor
                lseek(memfd, 0, SEEK_SET)
                set_inheritable(memfd, True)

                inputParams.extend(['-f', 's32le', '-ar', '%d'%sampleRate, '-ac', '%d'%nbChannels, '-i', '/proc/self/fd/%d' % memfd])
                codecsParams.extend(['-c:a:%d' % audioID, codec, '-b:a:%d' % audioID, '%d' % bitRate])
                audioID=audioID+1
            elif stream['codec_type'] == 'subtitle':
                logger.info("Extracting a subtitle stream: s:%d" % subTitleID)
                codec = stream['codec_name']
                inputParams.extend(['-i', './empty.idx'])
                if 'tags' in stream:
                    if 'language' in stream['tags']:
                        codecsParams.extend(['-metadata:s:s:%d' % subTitleID, 'language=%s' % stream['tags']['language']])
                codecsParams.extend(['-c:s:%d' % subTitleID, 'copy'])
                subTitleID=subTitleID+1
            else:
                logger.error("Unknown stream type: %s" % stream['codec_type'])

        # Create a new MKV movie with all streams that have been extracted.
        encoderParams.extend(inputParams)
        for index in range(0,videoID+audioID+subTitleID):
            encoderParams.extend(['-map', '%d' % index])
        encoderParams.extend(codecsParams)
        fileName = '%s.mkv' % filesPrefix
        try:
            output = open(fileName,'w')
        except IOError:
            logger.error('Impossible to create file: %s' % fileName)
            return None

        outfd = output.fileno()
        set_inheritable(outfd, True)
        # TODO: manage interlaced to previous parameters.
        encoderParams.extend(['-top', '1', '-flags:v', '+ilme+ildct', '-bsf:v', 'h264_mp4toannexb,dump_extra=freq=keyframe', '-f', 'matroska', '/proc/self/fd/%d' % outfd])

        logger.info('Encoding video: %s' % fileName)
        with Popen(encoderParams, stdout=PIPE, close_fds=False) as ffmpeg:
            status = ffmpeg.wait()
            if status != 0:
                logger.error('Encoding failed with status code: %d' % status)
                return None

            temporaries.append(output)

        for memfd in memfds:
            close(memfd)

        return output

    else:
        # Nothing to be done. We are already at a i-frame boundary.
        return None

# Merge a list of mkv files passed as input, and produce a new MKV as output
def mergeMKVs(inputs, outputName):
    logger = logging.getLogger(__name__)

    fds = []
    try:
        out = open(outputName, 'w')
    except IOError:
        logger.error('Impossible to create file: %s' % outputName)
        return None

    outfd = out.fileno()
    fds.append(outfd)
    set_inheritable(outfd, True)

    mergeParams = ['mkvmerge']
    first = True
    for mkv in inputs:
        if mkv !=None:
            fd = mkv.fileno()
            fds.append(fd)
            set_inheritable(fd, True)
            if first:
                mergeParams.append('/proc/self/fd/%d' % fd)
                first = False
            else:
                mergeParams.append('+/proc/self/fd/%d' % fd)

    mergeParams.extend(['-o', '/proc/self/fd/%d' % outfd])

    # We merge all files.
    warnings = []
    with Popen(mergeParams, stdout=PIPE, close_fds=False) as mkvmerge:
        pb = tqdm(TextIOWrapper(mkvmerge.stdout, encoding="utf-8"), total=100, unit='%', desc='Merging')
        for line in pb:
            if line.startswith('Progression :'):
                p = re.compile('^Progression : (?P<progress>[0-9]{1,3})%$')
                m = p.match(line)
                if m == None:
                    logger.error('Impossible to parse progress')
                pb.n = int(m['progress'])
                pb.update()
            elif line.startswith('Avertissement'):
                warnings.append(line)

        status = mkvmerge.wait()
        if status == 1:
            logger.warning('Extraction returns warning')
            for w in warnings:
                logger.warning(w)
        elif status == 2:
            logger.error('Extraction returns errors')

    for fd in fds:
        set_inheritable(fd, False)

    return out

def findSubtitlesTracks(filename):
    # ffprobe -loglevel quiet -select_streams s -show_entries stream=index:stream_tags=language -of json corgi.ts
    logger = logging.getLogger(__name__)

    with Popen(['ffprobe', '-i', filename, '-select_streams', 's', '-show_entries', 'stream=index:stream_tags=language', '-of', 'json'], stdout=PIPE, close_fds=False) as ffprobe:
        out, _ = ffprobe.communicate()
        out = json.load(BytesIO(out))
        if 'streams' in out:
            return out['streams']
        else:
            logger.error('Impossible to retrieve format of file')
    pass

def extractSubTitleTrack(inputFileName, index, lang):
    #  mkvextract video.mkv tracks position:nom [position:nom]
    logger = logging.getLogger(__name__)

    with Popen(['mkvextract', inputFileName, 'tracks', '%d:%s' % (index,lang)], stdout=PIPE, close_fds=False) as mkvextract:
        out, _ = mkvextract.communicate()
        for lines in out:
            logger.info(out)


def main():
    logger = logging.getLogger(__name__)
    coloredlogs.install()
    locale.setlocale(locale.LC_ALL, 'fr_FR.UTF8')
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--input", dest='inputFile', type=str, required=True, help="Input file to process (can be .ts, .mp4 or .mkv).")
    parser.add_argument("-o", "--output", dest='outputFile', type=str, required=True, help="Output MKV file to produce.")
    parser.add_argument("-p", "--part", dest='parts', nargs='+', required=False, action='append', metavar="hh:mm:ss[.mmm]-hh:mm:ss[.mmm]", help="Extract this exact part of the original file.")
    parser.add_argument("-k", "--keep", action='store_true', help="Do not cleanup temporary files after processing.")
    parser.add_argument("--dump-memory", action='store_true', dest='dump', help="For debug purpose, dump all memory mapping of headers (and trailers) before (after) each part. They are kept in memory only otherwise.")
    parser.add_argument("-s","--srt", action='store_true', dest='srt', help="Dump subtitles ")

    args = parser.parse_args()
    logger.debug("Arguments: %s" % args)

    checkRequiredTools()

    # Flatten args.parts
    intervals = []
    if args.parts != None:
        for part in args.parts:
            for subpart in part:
                intervals.append(subpart)

    parts=[]
    # Parse each interval
    for interval in intervals:
        ts1, ts2 = parseTimeInterval(interval)
        if ts1 == None or ts2 == None:
            logger.error("Illegal time interval: %s" % interval)
            exit(-1)
        parts.append((ts1,ts2))

    # Sort intervals
    parts.sort(key=cmp_to_key(compareTimeInterval))

    # Check that no intervals are overlapping
    prevts = timedelta(0)
    for part in parts:
        ts1, ts2 = part
        if prevts > ts1:
            logger.error('Intervals are overlapping')
            exit(-1)
        prevts = ts2

    temporaries = []

    basename = os.path.splitext(os.path.basename(args.inputFile))[0]
    mp4filename = basename+'.mp4'
    mkvfilename = basename+'.mkv'

    try:
        inputFile = open(args.inputFile, mode='r')
    except IOError:
        logger.error("Impossible to open %s" % args.inputFile)
        exit(-1)

    formatOfFile = getFormat(inputFile)

    if formatOfFile == None:
        exit(-1)

    duration = timedelta(seconds=float(formatOfFile['duration']))
    logger.info("Durée de l'enregistrement: %s" % duration)

    found = False
    for f in SupportedFormat:
        if 'format_name' in formatOfFile:
            if formatOfFile['format_name'] == str(f):
                found = True
                formatOfFile = f
                break

    if not found:
        logger.error('Unsupported format of file')

    if formatOfFile == SupportedFormat.TS:
        logger.info("Converting TS to MP4 (to fix timestamps).")
        try:
            with open(mp4filename, 'w') as mp4:
                ffmpegConvert(inputFile, 'mpegts', mp4, 'mp4', duration)
                temporaries.append(mp4)
                logger.info("Converting MP4 to MKV.")
                try:
                    mkv = open(mkvfilename, 'w')
                except IOError:
                    logger.error('')

                ffmpegConvert(mp4, 'mp4', mkv, 'matroska', duration)
                temporaries.append(mkv)
        except IOError:
            logger.error('')

    elif formatOfFile == SupportedFormat.MP4:
        logger.info("Converting MP4 to MKV")
        try:
            mkv = open(mkvfilename, 'w')
        except IOError:
            logger.error('')
        ffmpegConvert(mp4, 'mp4', mkv, 'matroska', duration)
        temporaries.append(mkv)
    else:
        logger.info("Already in MKV")
        mkv = inputFile

    streams = getStreams(mkv)

    mainVideo = None
    for stream in streams:
        if stream['codec_type'] == 'video' and stream['disposition']['default'] == 1:
            mainVideo = stream
            width = stream['width']
            height = stream['height']

    if mainVideo == None:
        logger.error('Impossible to find main video stream.')
        exit(-1)

    # Pour chaque portion
    partnum = 0
    mkvparts = []
    checks = []
    pos = timedelta()

    for ts1, ts2 in parts:
        # Trouver l'estampille de la trame 'I' la plus proche (mais postérieure) au début de la portion.
        # Trouver l'estampille de la trame 'I' la plus proche (mais antérieure) à la fin de la portion.
        # On a alors
        # debut  -----    trame  --------- trame  ---------  fin.
        #  'B/P'  'B/P'*   'I'               'I'   'B/P'*   'B/P'
        # Si la trame de début est déjà 'I', il n'y a rien à faire (idem pour la fin).
        # Sinon on extrait les trames 'B' ou 'P' depuis le début jusqu'à la trame 'I' non incluse

        partnum = partnum + 1

        headFrames = getNearestIFrame(mkv, ts1, before=False)
        if headFrames == None:
            exit(-1)

        tailFrames = getNearestIFrame(mkv, ts2, before=True)
        if tailFrames == None:
            exit(-1)

        nbHeadFrames, headIFrame = headFrames
        nbTailFrames, tailIFrame = tailFrames

        logger.info("Found %d frames between beginning of current part and first I-frame" % nbHeadFrames)
        logger.info("Found %d frames between last I-frame and end of current part" % nbTailFrames)

        headIFrameTS = timedelta(seconds=float(headIFrame['pts_time']))
        tailIFrameTS = timedelta(seconds=float(tailIFrame['pts_time']))

        checks.append(pos+headIFrameTS-ts1)

        subparts = []

        if nbHeadFrames > 0:
            # We extract all frames between the beginning upto the frame that immediately preceeds the I-frame.
            head = extractAllStreams(inputFile=mkv, begin=ts1, end=headIFrameTS, nbFrames=nbHeadFrames-1, filesPrefix='part-%d-head' % (partnum), streams=streams, width=width, height=height, temporaries=temporaries, dumpMemFD=args.dump)
            subparts.append(head)

        # Creating MKV file that corresponds to current part between I-frames
        try:
            internal = open('part-%d-internal.mkv' % partnum, 'w')
        except IOError:
            logger.error('Impossible to create file: part-%d-internal.mkv' % partnum)
            exit(-1)
        temporaries.append(internal)
        extractMKVPart(inputFile=mkv, outputFile=internal, begin=headIFrameTS, end=tailIFrameTS)
        subparts.append(internal)

        if nbTailFrames > 0:
            # We extract all frames between the I-frame (including it) upto the end.
            tail = extractAllStreams(inputFile=mkv, begin=tailIFrameTS, end=ts2, nbFrames=nbTailFrames, filesPrefix='part-%d-tail' % (partnum), streams=streams, width=width, height=height, temporaries=temporaries, dumpMemFD=args.dump)
            subparts.append(tail)

        logger.info('Merging: %s' % subparts)
        part = mergeMKVs(inputs=subparts, outputName="part-%d.mkv" % partnum)
        mkvparts.append(part)
        temporaries.append(part)

        pos = pos+tailIFrameTS-ts1

    # We need to check the end also
    checks.append(pos)

    nbParts = len(mkvparts)
    if nbParts > 1:
        logger.info('Merging: %s' % mkvparts)
        mergeMKVs(inputs=mkvparts, outputName=args.outputFile)
    elif nbParts == 1:
        copyfile('part-1.mkv', args.outputFile)
    else:
        logger.info("Nothing else to do.")

    if args.srt:
        logger.info("Find subtitles tracks and language.")
        subtitles = findSubtitlesTracks(args.outputFile)
        sts = {}
        for subtitle in subtitles:
            index = subtitle['index']
            if 'tags' in subtitle:
                if 'language' in subtitle['tags']:
                    lang = subtitle['tags']['language']
                    if lang in sts:
                        sts[lang].append(index)
                    else:
                        sts[lang] = [index]
                else:
                    logger.error("Dropping subtitle: %s because it is missing language indication")
            else:
                logger.error("Dropping subtitle: %s because it is missing language indication")

        for lang in sts:
            indexes = sts[lang]
            if len(indexes) == 0:
                # Nothing to do. This should not happen.
                continue
            if len(indexes) == 1:
                index = indexes[0]
                filename = 'essai-%s.srt' % lang
            elif len(indexes) > 1:
                nbsrt = 1
                for index in indexes:
                    filename = 'essai-%s-%d.srt' % (lang, nbsrt)
                    nbsrt+=1

    if not args.keep:
        logger.info("Cleaning temporary files")
        for f in temporaries:
            path = os.path.realpath(f.name)
            logger.info("Removing: %s" % path)
            f.close()
            unlink(path)

    for c in checks:
        logger.info("Please check cut smoothness at: %s" % c)

if __name__ == "__main__":
    main()