import argparse
import locale
import re
from sys import exit
from datetime import datetime,timedelta,time
import coloredlogs, logging
from functools import cmp_to_key
from subprocess import Popen, PIPE
from os import mkdir, set_inheritable
from io import BytesIO, TextIOWrapper
import json
from enum import Enum, IntEnum, unique, auto
import shutil

@unique
class SupportedFormat(IntEnum):
    TS = 1
    MP4 = 2
    Matroska = 3

    def __str__(self):
        if self is SupportedFormat.TS:
            return 'mpegts'
        elif self is SupportedFormat.MP4:
            return 'mov,mp4,m4a,3gp,3g2,mj2'
        elif self is SupportedFormat.Matroska:
            return 'matroska,webm'
        else:
            return 'Unsupported format'

@unique
class ColorSpace(IntEnum):
    BT709=0,
    FCC=1,
    BT601=2,
    BT470=3,
    BT470BG=4,
    SMPTE170M=5,
    SMPTE240M=6,
    BT2020=7
    
    def __str__(self):
        if self is ColorSpace.BT709:
            return 'bt709'
        elif self is ColorSpace.FCC:
            return 'fcc'
        elif self is ColorSpace.BT601:
            return 'bt601'
        elif self is ColorSpace.BT470:
            return 'bt470'
        elif self is ColorSpace.BT470BG:
            return 'bt470bg'
        elif self is ColorSpace.SMPTE170M:
            return 'smpte170m'
        elif self is ColorSpace.SMPTE240M:
            return 'smpte240m'
        elif self is ColorSpace.BT2020:
            return 'bt2020'
        else:
            return 'Unsupported color space'

def getFormat(inputFile):
    logger = logging.getLogger(__name__)
        
    infd = inputFile.fileno()
    inputFile.seek(0,0)
    set_inheritable(infd, True)
    with Popen(['ffprobe', '-loglevel', 'quiet', '-show_format', '-of', 'json', '-i', '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as ffprobe:
        out, _ = ffprobe.communicate()
        out = json.load(BytesIO(out))
        if 'format' in out:
            return out['format']
        else:
            logger.error('Impossible to retrieve format of file')

    return None

def getStreams(inputFile):
    logger = logging.getLogger(__name__)
        
    infd = inputFile.fileno()
    inputFile.seek(0,0)
    set_inheritable(infd, True)
    with Popen(['ffprobe', '-loglevel', 'quiet', '-show_streams', '-of', 'json', '-i', '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as ffprobe:
        out, _ = ffprobe.communicate()
        out = json.load(BytesIO(out))
        if 'streams' in out:
            return out['streams']
        else:
            logger.error('Impossible to retrieve streams inside file')
    
    return None

def ffmpegConvert(inputFile, inputFormat, outputFile, outputFormat):
    logger = logging.getLogger(__name__)
    
    infd = inputFile.fileno()
    outfd = outputFile.fileno()
    set_inheritable(infd, True)
    set_inheritable(outfd, True)
    # TODO: canvas size to be fixed !
    with Popen(['ffmpeg', '-loglevel', 'quiet', '-progress', '/dev/stdout', '-canvas_size', '720x560', '-f', inputFormat, '-i', '/proc/self/fd/%d' % infd, 
                '-map', '0:v', '-map', '0:a', '-map', '0:s', '-vcodec', 'copy', '-acodec', 'copy', '-scodec', 'dvdsub', 
                '-f', outputFormat, '/proc/self/fd/%d' % outfd], stdout=PIPE, close_fds=False) as ffmpeg:
        for line in TextIOWrapper(ffmpeg.stdout, encoding="utf-8"):
            if line.startswith('out_time='):
                print(line, end='')

def getFramesInStream(inputFile, begin, end, streamKind, subStreamId=0):
    logger = logging.getLogger(__name__)
    infd = inputFile.fileno()
    set_inheritable(infd, True)
    
    with Popen(['ffprobe', '-loglevel', 'quiet', '-read_intervals', ('%s%%%s' %(begin, end)), '-show_entries', 'frame', '-select_streams', '%s:%d' % (streamKind, subStreamId), '-of', 'json', '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as ffprobe:
        out, _ = ffprobe.communicate()
        frames = json.load(BytesIO(out))
        res = []
        if 'frames' in frames:
            frames = frames['frames']
            for frame in frames:
                ts = timedelta(seconds=float(frame['pts_time'])) 
                if begin <= ts and ts <= end:
                    res.append(frame)
            return res
        else:
           logger.error('Impossible to retrieve frames inside file around [%s,%s]' % (begin, end))
           return None

def getNearestIFrame(inputFile, timestamp, before=True, delta=timedelta(seconds=2)):
    logger = logging.getLogger(__name__)
    
    zero = timedelta()
    tbegin = timestamp-delta
    tend = timestamp+delta
    if tbegin < zero:
        tbegin = zero
  
    infd = inputFile.fileno()
    set_inheritable(infd, True)
    
    logger.debug('Looking for iframe in [%s, %s]' % (tbegin, tend))
    
    frames = getFramesInStream(inputFile=inputFile, begin=tbegin, end=tend, streamKind='v')
    if frames == None:
        return None
    
    iframes = []
    for frame in frames:
        if frame['pict_type'] == 'I':
            iframes.append(frame)
    
    found = False
    res = None
    for frame in iframes: 
        if before and timedelta(seconds=float(frame['pts_time'])) <= timestamp:
            found = True
            iframe = frame
        if not before and timedelta(seconds=float(frame['pts_time'])) >= timestamp:
            found = True
            iframe = frame
            break

    if found:
        logger.debug("Found: %s" % res)
        
        its = timedelta(seconds=float(iframe['pts_time'])) 
        nbFrames = 0
        for frame in frames:
            ts = timedelta(seconds=float(frame['pts_time'])) 
            if before:
                if its <= ts and ts <= timestamp:
                    nbFrames = nbFrames+1
            else:
                if timestamp <= ts and ts <= its:
                    nbFrames = nbFrames+1
    else:
        logger.error("Impossible to find I-frame around: %s" % timestamp)
            
    return(nbFrames-1, iframe)

def extractMKVPart(inputFile, outputFile, begin, end):
    inputFile.seek(0,0)
    outputFile.seek(0,0)
    infd = inputFile.fileno()
    outfd = outputFile.fileno()
    set_inheritable(infd, True)
    set_inheritable(outfd, True)
    with Popen(['mkvmerge', '-o', '/proc/self/fd/%d' % outfd, '--split', 'parts:%s-%s' % (begin, end), '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as mkvmerge:
        for line in TextIOWrapper(mkvmerge.stdout, encoding="utf-8"):
            print(line, end='')

def extractPictures(inputFile, begin, nbFrames, prefix, width=640, height=480):
    inputFile.seek(0,0)
    infd = inputFile.fileno()
    set_inheritable(infd, True)
    with Popen(['ffmpeg', '-y', '-loglevel', 'quiet', '-ss', '%s'%begin, '-i', '/proc/self/fd/%d' % infd, '-s', '%dx%d'%(width, height), '-vframes', '%d'%nbFrames, '-c:v', 'ppm', '-f', 'image2', '%s-%%03d.ppm' % prefix], stdout=PIPE, close_fds=False) as ffmpeg:
        for line in TextIOWrapper(ffmpeg.stdout, encoding="utf-8"):
            print(line, end='')

def extractSound(inputFile, begin, outputFile, subChannel=0, nbPackets=0, sampleRate=48000, nbChannels=2):
    inputFile.seek(0,0)
    outputFile.seek(0,0)
    infd = inputFile.fileno()
    outfd = outputFile.fileno()
    set_inheritable(infd, True)
    set_inheritable(outfd, True)
    with Popen(['ffmpeg', '-y', '-loglevel', 'quiet', '-ss', '%s'%begin, '-i', '/proc/self/fd/%d' % infd, '-frames:a:%d' % subChannel, '%d' % nbPackets, 
                '-c:a', 'pcm_s32le', '-sample_rate', '%d' % sampleRate, '-channels', '%d' % nbChannels, '-f', 's32le', '/proc/self/fd/%d' % outfd], stdout=PIPE, close_fds=False) as ffmpeg:
        for line in TextIOWrapper(ffmpeg.stdout, encoding="utf-8"):
            print(line, end='')

def extractAllStreams(inputFile, begin, end, streams, filesPrefix, nbFrames, width, height):
    logger = logging.getLogger(__name__)
    # encoderParams = [ 'ffmpeg', '-y', '-loglevel', 'quiet' ]
    encoderParams = [ 'ffmpeg', '-y' ]
    inputParams = []
    codecsParams = []
    
    if begin < end:
        videoID=0
        audioID=0
        subTitleID=0
        audioFiles = {}
        for stream in streams:
            if stream['codec_type'] == 'video':
                print("Extracting video stream: %s" % stream)
                frameRate = stream['r_frame_rate']
                sar = stream['sample_aspect_ratio']
                dar = stream['display_aspect_ratio']
                pixelFormat = stream['pix_fmt']
                colorRange = stream['color_range']
                colorSpace =stream['color_space']
                colorTransfer = stream['color_transfer']
                colorPrimaries = stream['color_primaries']
                codec = stream['codec_name']
                extractPictures(inputFile=inputFile, begin=begin, nbFrames=nbFrames, prefix="%s-%d" % (filesPrefix, videoID), width=width, height=height)
                inputParams.extend(['-i', '%s-%d-%%03d.ppm' % (filesPrefix, videoID)])
                codecsParams.extend(['-c:v:%d' % videoID, codec, '-pix_fmt', pixelFormat, '-colorspace:v:%d' % videoID, colorSpace, '-color_primaries:v:%d' % videoID, colorPrimaries, 
                                     '-color_trc:v:%d' % videoID, colorTransfer,  '-color_range:v:%d' % videoID, colorRange ])
                videoID=videoID+1
            elif stream['codec_type'] == 'audio':
                print("Extracting audio stream: %s" % stream)
                sampleRate = int(stream['sample_rate'])
                nbChannels = int(stream['channels'])
                bitRate = int(stream['bit_rate'])
                codec = stream['codec_name']
                if 'tags' in stream:
                    if 'language' in stream['tags']:
                        codecsParams.extend(['-metadata:s:a:%d' % audioID, 'language=%s' % stream['tags']['language']])
                packets = getFramesInStream(inputFile=inputFile, begin=begin, end=end, streamKind='a', subStreamId=audioID)
                nbPackets = len(packets)
                print("Found %d packets to be extracted from audio track." % nbPackets)
                audioFiles[audioID] = open('%s-%d.pcm' % (filesPrefix,audioID), 'w')
                # TODO: test if successfully openened
                extractSound(inputFile=inputFile, begin=begin, nbPackets=nbPackets, outputFile=audioFiles[audioID], sampleRate=sampleRate, nbChannels=nbChannels)
                inputParams.extend(['-f', 's32le', '-ar', '%d'%sampleRate, '-ac', '%d'%nbChannels, '-i', '/proc/self/fd/%d' % audioFiles[audioID].fileno()])
                codecsParams.extend(['-c:a:%d' % audioID, codec, '-b:a:%d' % audioID, '%d' % bitRate])
                audioID=audioID+1
            elif stream['codec_type'] == 'subtitle':
                print("Extracting a subtitle stream: %s" % stream)
                codec = stream['codec_name']
                inputParams.extend(['-i', './empty.idx'])
                if 'tags' in stream:
                    if 'language' in stream['tags']:
                        codecsParams.extend(['-metadata:s:s:%d' % subTitleID, 'language=%s' % stream['tags']['language']])
                codecsParams.extend(['-c:s:%d' % subTitleID, 'copy'])
                subTitleID=subTitleID+1
            else:
                logger.info("Unknown stream type: %s" % stream['codec_type'])
    
        # Example:
        # ffmpeg -framerate 25.85 -i image-%02d.ppm -f s32le -ar 48000 -ac 2 -i ./audio-1.pcm -c:a eac3 -b:a 128k -c:v libx264 -crf 25.85 -vf "scale=1920:1080,format=yuv420p" -colorspace:v "bt709" -color_primaries:v "bt709" -color_trc:v "bt709" -color_range:v "tv" -top 1 -flags:v +ilme+ildct -bsf:v h264_mp4toannexb,dump_extra=keyframe  -metadata MAJOR_BRAND=isom -metadata MINOR_VERSION=512 -movflags +faststart cut-1.mkv

        # Create a new MKV movie with all streams that have been extracted.
        encoderParams.extend(inputParams)
        for index in range(0,videoID+audioID+subTitleID):
            encoderParams.extend(['-map', '%d' % index])
        encoderParams.extend(codecsParams)
        output = open('%s.mkv' % filesPrefix,'w')
        outfd = output.fileno()
        set_inheritable(outfd, True)
        encoderParams.extend(['-top', '1', '-bsf:v', 'h264_mp4toannexb,dump_extra=keyframe', '-f', 'matroska', '/proc/self/fd/%d' % outfd])
        
        print(encoderParams)
        
        with Popen(encoderParams, stdout=PIPE, close_fds=False) as ffmpeg:
            for line in TextIOWrapper(ffmpeg.stdout, encoding="utf-8"):
                print(line, end='')
        
        return output
        
    else:
        # Nothing to be done. We are already at a i-frame boundary.
        return None

# Merge a list of mkv files passed as input, and produce a new MKV as output
def mergeMKVs(inputs, outputName):
    fds = []
    out = open(outputName, 'w')
    # TODO: Check success or failure
    outfd = out.fileno()
    fds.append(outfd)
    set_inheritable(outfd, True)
        
    mergeParams = ['mkvmerge']
    first = True
    for mkv in inputs:
        if mkv !=None:
            fd = mkv.fileno()
            fds.append(fd)
            set_inheritable(fd, True)
            if first:
                mergeParams.append('/proc/self/fd/%d' % fd)
                first = False
            else:
                mergeParams.append('+/proc/self/fd/%d' % fd)
                
    mergeParams.extend(['-o', '/proc/self/fd/%d' % outfd])
        
    # We merge all files.
    with Popen(mergeParams, stdout=PIPE, close_fds=False) as mkvmerge:
        for line in TextIOWrapper(mkvmerge.stdout, encoding="utf-8"):
            print(line, end='')
    
    for fd in fds:
        set_inheritable(fd, False)
    
    return out

def parseTimeInterval(interval):
    logger = logging.getLogger(__name__)
    
    intervalRegExp = '^(?P<hour1>[0-9]{1,2}):(?P<minute1>[0-9]{1,2}):(?P<second1>[0-9]{1,2})(\.(?P<ms1>[0-9]{1,3}))?-(?P<hour2>[0-9]{1,2}):(?P<minute2>[0-9]{1,2}):(?P<second2>[0-9]{1,2})(\.(?P<ms2>[0-9]{1,3}))?$'
    p = re.compile(intervalRegExp)
    m = p.match(interval)
    if m == None:
        logger.error("Impossible to parse time interval")
        return None
  
    values = m.groupdict()
    hour1 = 0
    minute1 = 0
    second1 = 0
    ms1 = 0
    hour2 = 0
    minute2 = 0
    second2 = 0
    ms2 = 0
    if values['hour1'] != None:
        hour1 = int(values['hour1'])
    if values['minute1'] != None:
        minute1 = int(values['minute1'])
    if values['second1'] != None:
        second1 = int(values['second1'])
    if values['ms1'] != None:
        ms1 = int(values['ms1'])
    if values['hour2'] != None:
        hour2 = int(values['hour2'])
    if values['minute2'] != None:
        minute2 = int(values['minute2'])
    if values['second2'] != None:
        second2 = int(values['second2'])
    if values['ms2'] != None:
        ms2 = int(values['ms2'])
  
    if hour1 < 0 or hour1 > 23:
        logger.error("hour must be in [0,24[")
        return None, None
    if minute1 < 0 or minute1 > 59:
        logger.error("minute must be in [0,60[")
        return None, None
    if second1 < 0 or second1 > 59:
        logger.error("second must be in [0,60[")
        return None, None
    if ms1 < 0 or ms1 > 1000:
        logger.error("milliseconds must be in [0,1000[")
        return None, None
  
    if hour2 < 0 or hour2 > 23:
        logger.error("hour must be in [0,24[")
        return None, None
    if minute2 < 0 or minute2 > 59:
        logger.error("minute must be in [0,60[")
        return None, None
    if second2 < 0 or second2 > 59:
        logger.error("second must be in [0,60[")
        return None, None
    if ms2 < 0 or ms2 > 1000:
        logger.error("milliseconds must be in [0,1000[")
        return None, None
  
    ts1 = timedelta(hours=hour1, minutes=minute1, seconds=second1, microseconds=ms1*1000)
    ts2 = timedelta(hours=hour2, minutes=minute2, seconds=second2, microseconds=ms2*1000)
  
    if ts2 < ts1:
        logger.error("Non monotonic interval")
        return None,None
    
    return (ts1, ts2)

def compareTimeInterval(interval1, interval2):
    ts11,ts12 = interval1
    ts21,ts22 = interval2
    
    if ts12 < ts21:
        return -1
    elif ts22 < ts11:
        return 1
    else:
        return 0


def main():
    logger = logging.getLogger(__name__)
    coloredlogs.install()
    locale.setlocale(locale.LC_ALL, 'fr_FR.UTF8')
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--input", dest='inputFile', type=str, required=True, help="Input file to process (can be .ts, .mp4 or .mkv).")
    parser.add_argument("-o", "--output", dest='outputFile', type=str, required=True, help="Output MKV file to produce.")
    parser.add_argument("-p", "--part", dest='parts', nargs='+', required=True, action='append', metavar="hh:mm:ss[.mmm]-hh:mm:ss[.mmm]", help="Extract this exact part of the original file.")
  
    args = parser.parse_args()

    # Flatten args.parts
    intervals = []
    for part in args.parts:
        for subpart in part:
            intervals.append(subpart)

    parts=[]
    # Parse each interval
    for interval in intervals:
        ts1, ts2 = parseTimeInterval(interval)
        if ts1 == None or ts2 == None:
            logger.error("Illegal time interval: %s" % interval)
            exit(-1)
        parts.append((ts1,ts2))
    
    # Sort intervals
    parts.sort(key=cmp_to_key(compareTimeInterval))
    
    # Check that no intervals are overlapping
    prevts = timedelta(0)
    for part in parts:
        ts1, ts2 = part
        if prevts > ts1:
            logger.error('Intervals are overlapping')
            exit(-1)
        prevts = ts2
        
    inputFile = open(args.inputFile, mode='r')
    if inputFile == None:
        logger.error("Impossible to open %s" % args.inputFile)
        exit(-1)
    
    formatOfFile = getFormat(inputFile)
   
    if formatOfFile == None:
        exit(-1)
   
    duration = timedelta(seconds=float(formatOfFile['duration']))
    logger.info("Durée de l'enregistrement: %s" % duration)
    
    found = False
    for f in SupportedFormat:
        if 'format_name' in formatOfFile:
            if formatOfFile['format_name'] == str(f):
                found = True
                formatOfFile = f
                break
        
    if not found:
        logger.error('Unsupported format of file')

    if formatOfFile == SupportedFormat.TS:
        logger.info("Converting TS to MP4 (to fix timestamps).")
        with open('essai.mp4', 'w') as mp4: 
            ffmpegConvert(inputFile, 'mpegts', mp4, 'mp4')
            logger.info("Converting MP4 to MKV.")
            with open('essai.mkv', 'w') as mkv: 
                ffmpegConvert(mp4, 'mp4', mkv, 'matroska')
    elif formatOfFile == SupportedFormat.MP4:
        logger.info("Converting MP4 to MKV")
        with open('essai.mkv', 'w') as mkv: 
            ffmpegConvert(mp4, 'mp4', mkv, 'matroska')
    else:
        logger.info("Already in MKV")
        mkv = inputFile
        
    streams = getStreams(mkv)
    
    mainVideo = None
    for stream in streams:
        if stream['codec_type'] == 'video' and stream['disposition']['default'] == 1:
            mainVideo = stream
            width = stream['width']
            height = stream['height']
    
    if mainVideo == None:
        logger.error('Impossible to find main video stream.')
        exit(-1)
    
    # Pour chaque portion
    partnum = 0
    mkvparts = []
    
    for ts1, ts2 in parts:
        # Trouver l'estampille de la trame 'I' la plus proche (mais postérieure) au début de la portion.
        # Trouver l'estampille de la trame 'I' la plus proche (mais antérieure) à la fin de la portion.
        # On a alors
        # debut  -----    trame  --------- trame  ---------  fin.
        #  'B/P'  'B/P'*   'I'               'I'   'B/P'*   'B/P'
        # Si la trame de début est déjà 'I', il n'y a rien à faire (idem pour la fin).
        # Sinon on extrait les trames 'B' ou 'P' depuis le début jusqu'à la trame 'I' non incluse
        
        partnum = partnum + 1
        
        headFrames = getNearestIFrame(mkv, ts1, before=False)
        if headFrames == None:
            exit(-1)
            
        tailFrames = getNearestIFrame(mkv, ts2, before=True)
        if tailFrames == None:
            exit(-1)
        
        nbHeadFrames, headIFrame = headFrames
        nbTailFrames, tailIFrame = tailFrames
        
        print("Found head I-frame and %d frames between: %s" % (nbHeadFrames, headIFrame))
        print("Found I-frame and %d frames between: %s" % (nbTailFrames, tailIFrame))
        
        headIFrameTS = timedelta(seconds=float(headIFrame['pts_time']))
        tailIFrameTS = timedelta(seconds=float(tailIFrame['pts_time']))
        
        
        subparts = []
        
        head = extractAllStreams(inputFile=mkv, begin=ts1, end=headIFrameTS, nbFrames=nbHeadFrames, filesPrefix='part-%d-head' % (partnum), streams=streams, width=width, height=height)
        subparts.append(head)
        
        tail = extractAllStreams(inputFile=mkv, begin=tailIFrameTS, end=ts2, nbFrames=nbTailFrames, filesPrefix='part-%d-tail' % (partnum), streams=streams, width=width, height=height)
        
        # Creating MKV file that corresponds to current part between I-frames 
        internal = open('part-%d-internal.mkv' % partnum, 'w')
        # TODO: test if failure
        extractMKVPart(inputFile=mkv, outputFile=internal, begin=headIFrameTS, end=tailIFrameTS)
        subparts.append(internal)
        
        subparts.append(tail)
        
        part = mergeMKVs(inputs=subparts, outputName="part-%d.mkv" % partnum)
        mkvparts.append(part)
    
    nbParts = len(mkvparts)
    if nbParts > 1:
        mergeMKVs(inputs=mvkparts, outputName=args.output)
    elif nbParts == 1:
        print("A single part")
    else:
        print("Nothing produced !")
  
if __name__ == "__main__":
    main()