From 076e3c990bf7e5c5cf63ad9841650fbadecbc077 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Tronel?= Date: Sat, 2 Dec 2023 17:25:55 +0100 Subject: [PATCH] Better performances and simplification by removing pipes and using memory file descriptors. --- removeads.py | 252 ++++++++++++++++++++++----------------------------- 1 file changed, 110 insertions(+), 142 deletions(-) diff --git a/removeads.py b/removeads.py index 2d93a22..cff3e6f 100755 --- a/removeads.py +++ b/removeads.py @@ -8,7 +8,8 @@ from datetime import datetime,timedelta,time import coloredlogs, logging from functools import cmp_to_key from subprocess import Popen, PIPE -import os +from os import read, write, lseek, pipe, set_inheritable, memfd_create, SEEK_SET, close, unlink +import os.path from io import BytesIO, TextIOWrapper import json from enum import Enum, IntEnum, unique, auto @@ -63,7 +64,7 @@ def getFormat(inputFile): infd = inputFile.fileno() inputFile.seek(0,0) - os.set_inheritable(infd, True) + set_inheritable(infd, True) with Popen(['ffprobe', '-loglevel', 'quiet', '-show_format', '-of', 'json', '-i', '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as ffprobe: out, _ = ffprobe.communicate() out = json.load(BytesIO(out)) @@ -79,7 +80,7 @@ def getStreams(inputFile): infd = inputFile.fileno() inputFile.seek(0,0) - os.set_inheritable(infd, True) + set_inheritable(infd, True) with Popen(['ffprobe', '-loglevel', 'quiet', '-show_streams', '-of', 'json', '-i', '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as ffprobe: out, _ = ffprobe.communicate() out = json.load(BytesIO(out)) @@ -220,8 +221,8 @@ def ffmpegConvert(inputFile, inputFormat, outputFile, outputFormat, duration): infd = inputFile.fileno() outfd = outputFile.fileno() - os.set_inheritable(infd, True) - os.set_inheritable(outfd, True) + set_inheritable(infd, True) + set_inheritable(outfd, True) # TODO: canvas size to be fixed ! with Popen(['ffmpeg', '-y', '-loglevel', 'quiet', '-progress', '/dev/stdout', '-canvas_size', '720x560', '-f', inputFormat, '-i', '/proc/self/fd/%d' % infd, '-map', '0:v', '-map', '0:a', '-map', '0:s', '-bsf:v', 'h264_mp4toannexb,dump_extra=freq=keyframe', '-vcodec', 'copy', '-acodec', 'copy', '-scodec', 'dvdsub', @@ -241,7 +242,7 @@ def ffmpegConvert(inputFile, inputFormat, outputFile, outputFormat, duration): def getFramesInStream(inputFile, begin, end, streamKind, subStreamId=0): logger = logging.getLogger(__name__) infd = inputFile.fileno() - os.set_inheritable(infd, True) + set_inheritable(infd, True) with Popen(['ffprobe', '-loglevel', 'quiet', '-read_intervals', ('%s%%%s' %(begin, end)), '-show_entries', 'frame', '-select_streams', '%s:%d' % (streamKind, subStreamId), '-of', 'json', '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as ffprobe: out, _ = ffprobe.communicate() @@ -272,7 +273,7 @@ def getNearestIFrame(inputFile, timestamp, before=True, delta=timedelta(seconds= tbegin = zero infd = inputFile.fileno() - os.set_inheritable(infd, True) + set_inheritable(infd, True) logger.debug('Looking for iframe in [%s, %s]' % (tbegin, tend)) @@ -324,8 +325,8 @@ def extractMKVPart(inputFile, outputFile, begin, end): outputFile.seek(0,0) infd = inputFile.fileno() outfd = outputFile.fileno() - os.set_inheritable(infd, True) - os.set_inheritable(outfd, True) + set_inheritable(infd, True) + set_inheritable(outfd, True) warnings = [] with Popen(['mkvmerge', '-o', '/proc/self/fd/%d' % outfd, '--split', 'parts:%s-%s' % (begin, end), '/proc/self/fd/%d' % infd], stdout=PIPE, close_fds=False) as mkvmerge: pb = tqdm(TextIOWrapper(mkvmerge.stdout, encoding="utf-8"), total=100, unit='%', desc='Extraction') @@ -354,10 +355,9 @@ def extractPictures(inputFile, begin, nbFrames, width=640, height=480): inputFile.seek(0,0) infd = inputFile.fileno() - fdr, fdw = os.pipe() - os.set_inheritable(infd, True) - os.set_inheritable(fdr, False) - os.set_inheritable(fdw, True) + outfd = memfd_create('pictures', flags=0) + set_inheritable(outfd, True) + # PPM header # "P6\nWIDTH HEIGHT\n255\n" headerLen=2+1+ceil(log(width, 10))+1+ceil(log(height, 10))+1+3+1 logger.debug('Header length: %d' % headerLen) @@ -365,67 +365,48 @@ def extractPictures(inputFile, begin, nbFrames, width=640, height=480): length = imageLength*nbFrames logger.debug("Estimated length: %d" % length) - pg = trange(nbFrames) images = bytes() - with Popen(['ffmpeg', '-loglevel', 'quiet' ,'-y', '-ss', '%s'%begin, '-i', '/proc/self/fd/%d' % infd, '-s', '%dx%d'%(width, height), '-vframes', '%d'%nbFrames, '-c:v', 'ppm', '-f', 'image2pipe', '/proc/self/fd/%d' % fdw ], stdout=PIPE, close_fds=False) as ffmpeg: - while ffmpeg.poll() == None: - # TODO: understand why this line ends up in reading on an already closed file descriptor - # fds, _, _ = select([fdr, ffmpeg.stdout], [], [], .1) - fds, _, _ = select([fdr], [], [], .1) - if fdr in fds: - buf = os.read(fdr, imageLength) - # print("Read %d bytes of image. ffmpeg finished: %s" % (len(buf), ffmpeg.poll())) - if len(buf) == 0: - break - pg.update(len(buf)/imageLength) - images=images+buf - if ffmpeg.stdout in fds: - for line in TextIOWrapper(ffmpeg.stdout, encoding="utf-8"): - logger.debug(line) - - status = ffmpeg.wait() - - # Finishing to read residual bytes from pipe - while True: - fd, _, _ = select([fdr], [], [], .1) - if fd != []: - buf = os.read(fdr, imageLength) - # print("Read %d bytes of image" % len(buf)) - if len(buf) == 0: - break - pg.update(len(buf)/imageLength) - images=images+buf - else: - # Nothing more to read - break - - logger.debug("%d bytes received." % len(images)) - - os.close(fdr) - os.close(fdw) - - if status != 0: - logger.error('Image extraction returns error code: %d' % status) - - return images + with Popen(['ffmpeg', '-loglevel', 'quiet' ,'-y', '-ss', '%s'%begin, '-i', '/proc/self/fd/%d' % infd, '-s', '%dx%d'%(width, height), '-vframes', '%d'%nbFrames, '-c:v', 'ppm', '-f', 'image2pipe', '/proc/self/fd/%d' % outfd ], stdout=PIPE, close_fds=False) as ffmpeg: + status = ffmpeg.wait() + if status != 0: + logger.error('Conversion failed with status code: %d' % status) + return None, None -def extractSound(inputFile, begin, outputFile, subChannel=0, nbPackets=0, sampleRate=48000, nbChannels=2): + lseek(outfd, 0, SEEK_SET) + images = read(outfd,length) + if len(images) != length: + logger.info("Received %d bytes but %d were expected." % (len(images), length)) + return None, None + + lseek(outfd, 0, SEEK_SET) + return images, outfd + +def extractSound(inputFile, begin, outputFileName, packetDuration, subChannel=0, nbPackets=0, sampleRate=48000, nbChannels=2): logger = logging.getLogger(__name__) inputFile.seek(0,0) - outputFile.seek(0,0) + outfd = memfd_create(outputFileName, flags=0) infd = inputFile.fileno() - outfd = outputFile.fileno() - os.set_inheritable(infd, True) - os.set_inheritable(outfd, True) - with Popen(['ffmpeg', '-y', '-loglevel', 'quiet', '-ss', '%s'%begin, '-i', '/proc/self/fd/%d' % infd, '-frames:a:%d' % subChannel, '%d' % nbPackets, - '-c:a', 'pcm_s32le', '-sample_rate', '%d' % sampleRate, '-channels', '%d' % nbChannels, '-f', 's32le', '/proc/self/fd/%d' % outfd], stdout=PIPE, close_fds=False) as ffmpeg: - for line in TextIOWrapper(ffmpeg.stdout, encoding="utf-8"): - logger.debug(line) + set_inheritable(infd, True) + set_inheritable(outfd, True) + sound = bytes() + length = int(nbChannels*sampleRate*4*nbPackets*packetDuration/1000) - status = ffmpeg.wait() - if status != 0: + with Popen(['ffmpeg', '-y', '-loglevel', 'quiet', '-ss', '%s'%begin, '-i', '/proc/self/fd/%d' % infd, '-frames:a:%d' % subChannel, '%d' % (nbPackets+1), + '-c:a', 'pcm_s32le', '-sample_rate', '%d' % sampleRate, '-channels', '%d' % nbChannels, '-f', 's32le', '/proc/self/fd/%d' % outfd], stdout=PIPE, close_fds=False) as ffmpeg: + status = ffmpeg.wait() + if status != 0: logger.error('Sound extraction returns error code: %d' % status) + return None, None + + lseek(outfd, 0, SEEK_SET) + sound = read(outfd, length) + + if (len(sound) != length): + logger.info("Received %d bytes but %d were expected (channels=%d, freq=%d, packets=%d, duration=%d ms)." % (len(sound), length, nbChannels, sampleRate, nbPackets, packetDuration)) + return None, None + + return sound, outfd def dumpPPM(pictures, prefix, temporaries): logger = logging.getLogger(__name__) @@ -463,14 +444,12 @@ def dumpPPM(pictures, prefix, temporaries): length=headerLen+3*width*height nbBytes = 0 while nbBytes < length: - nbBytes+=os.write(outfd, pictures[pos+nbBytes:pos+length]) + nbBytes+=write(outfd, pictures[pos+nbBytes:pos+length]) pos+=length picture+=1 - -def extractAllStreams(inputFile, begin, end, streams, filesPrefix, nbFrames, width, height, temporaries, dumpPictures=False): +def extractAllStreams(inputFile, begin, end, streams, filesPrefix, nbFrames, width, height, temporaries, dumpMemFD=False): logger = logging.getLogger(__name__) - # encoderParams = [ 'ffmpeg', '-y', '-loglevel', 'quiet' ] encoderParams = [ 'ffmpeg', '-y', '-loglevel', 'quiet' ] inputParams = [] codecsParams = [] @@ -479,8 +458,6 @@ def extractAllStreams(inputFile, begin, end, streams, filesPrefix, nbFrames, wid videoID=0 audioID=0 subTitleID=0 - audioFiles = {} - imagesPipes = {} for stream in streams: if stream['codec_type'] == 'video': logger.info("Extracting video stream v:%d" % videoID) @@ -506,20 +483,18 @@ def extractAllStreams(inputFile, begin, end, streams, filesPrefix, nbFrames, wid # TODO: adjust SAR and DAR # https://superuser.com/questions/907933/correct-aspect-ratio-without-re-encoding-video-file codec = stream['codec_name'] - imagesBytes = extractPictures(inputFile=inputFile, begin=begin, nbFrames=nbFrames, width=width, height=height) - if dumpPictures: + imagesBytes, memfd = extractPictures(inputFile=inputFile, begin=begin, nbFrames=nbFrames, width=width, height=height) + if imagesBytes == None: + sys.exit(-1) + + if dumpMemFD: dumpPPM(imagesBytes, '%s-%d' % (filesPrefix,videoID), temporaries) - # imagesBytes contains now a buffer of bytes that represents the pictures that have been dumped by ffmpeg. - fdr, fdw = os.pipe() - os.set_inheritable(fdr, True) - # The writalbe end of the pipe (fdw) must not be stayed opened in ffmpeg child, otherwise ffmpeg will not be able - # to detect the end of pictures data sent by the other end of the pipe it is reading from (fdr). - # We manually force non inheritance to be sure (although this should be the case since Python 3.4). - os.set_inheritable(fdw, False) - logger.debug("Creating pipes for images: r:%d w:%d" % (fdr,fdw)) - imagesPipes[videoID] = (imagesBytes, fdr, fdw) - inputParams.extend(['-framerate', '%f'%frameRate, '-f', 'image2pipe', '-i', '/proc/self/fd/%d' % fdr]) + # We rewind to zero the memory file descriptor + lseek(memfd, 0, SEEK_SET) + set_inheritable(memfd, True) + + inputParams.extend(['-framerate', '%f'%frameRate, '-f', 'image2pipe', '-i', '/proc/self/fd/%d' % memfd]) codecsParams.extend(['-c:v:%d' % videoID, codec, '-pix_fmt', pixelFormat, '-colorspace:v:%d' % videoID, colorSpace, '-color_primaries:v:%d' % videoID, colorPrimaries, '-color_trc:v:%d' % videoID, colorTransfer, '-color_range:v:%d' % videoID, colorRange]) videoID=videoID+1 @@ -535,14 +510,31 @@ def extractAllStreams(inputFile, begin, end, streams, filesPrefix, nbFrames, wid packets = getFramesInStream(inputFile=inputFile, begin=begin, end=end, streamKind='a', subStreamId=audioID) nbPackets = len(packets) logger.debug("Found %d packets to be extracted from audio track." % nbPackets) - try: - audioFiles[audioID] = open('%s-%d.pcm' % (filesPrefix,audioID), 'w') - except IOError: - logger.error('Impossible to create file: %s-%d.pcm' % (filesPrefix,audioID)) - return None - temporaries.append(audioFiles[audioID]) - extractSound(inputFile=inputFile, begin=begin, nbPackets=nbPackets, outputFile=audioFiles[audioID], sampleRate=sampleRate, nbChannels=nbChannels) - inputParams.extend(['-f', 's32le', '-ar', '%d'%sampleRate, '-ac', '%d'%nbChannels, '-i', '/proc/self/fd/%d' % audioFiles[audioID].fileno()]) + if(nbPackets > 0): + packetDuration = packets[0]['duration'] + + tmpname = '%s-%d.pcm' % (filesPrefix,audioID) + + soundBytes , memfd = extractSound(inputFile=inputFile, begin=begin, nbPackets=nbPackets, packetDuration=packetDuration, outputFileName=tmpname, sampleRate=sampleRate, nbChannels=nbChannels) + + if dumpMemFD: + try: + output = open(tmpname,'w') + except IOError: + logger.error('Impossible to create file: %s' % tmpname) + return None + + outfd = output.fileno() + pos = 0 + while pos < len(soundBytes): + pos+=write(outfd, soundBytes[pos:]) + temporaries.append(output) + + # We rewind to zero the memory file descriptor + lseek(memfd, 0, SEEK_SET) + set_inheritable(memfd, True) + + inputParams.extend(['-f', 's32le', '-ar', '%d'%sampleRate, '-ac', '%d'%nbChannels, '-i', '/proc/self/fd/%d' % memfd]) codecsParams.extend(['-c:a:%d' % audioID, codec, '-b:a:%d' % audioID, '%d' % bitRate]) audioID=audioID+1 elif stream['codec_type'] == 'subtitle': @@ -570,42 +562,18 @@ def extractAllStreams(inputFile, begin, end, streams, filesPrefix, nbFrames, wid return None outfd = output.fileno() - os.set_inheritable(outfd, True) + set_inheritable(outfd, True) # TODO: manage interlaced to previous parameters. encoderParams.extend(['-top', '1', '-flags:v', '+ilme+ildct', '-bsf:v', 'h264_mp4toannexb,dump_extra=freq=keyframe', '-f', 'matroska', '/proc/self/fd/%d' % outfd]) logger.info('Encoding video: %s' % fileName) with Popen(encoderParams, stdout=PIPE, close_fds=False) as ffmpeg: - pos = {} - totalLength = 0 - for vid in range(videoID): - pos[vid]=0 - img, fdr, _ = imagesPipes[vid] - # We close the end of the pipe used by ffmepg to read data. - os.close(fdr) - totalLength+=len(img) - length = 0 - pg = trange(totalLength) - while length 0: # We extract all frames between the beginning upto the frame that immediately preceeds the I-frame. - head = extractAllStreams(inputFile=mkv, begin=ts1, end=headIFrameTS, nbFrames=nbHeadFrames-1, filesPrefix='part-%d-head' % (partnum), streams=streams, width=width, height=height, temporaries=temporaries, dumpPictures=args.dump) + head = extractAllStreams(inputFile=mkv, begin=ts1, end=headIFrameTS, nbFrames=nbHeadFrames-1, filesPrefix='part-%d-head' % (partnum), streams=streams, width=width, height=height, temporaries=temporaries, dumpMemFD=args.dump) subparts.append(head) # Creating MKV file that corresponds to current part between I-frames @@ -868,7 +836,7 @@ def main(): if nbTailFrames > 0: # We extract all frames between the I-frame (including it) upto the end. - tail = extractAllStreams(inputFile=mkv, begin=tailIFrameTS, end=ts2, nbFrames=nbTailFrames, filesPrefix='part-%d-tail' % (partnum), streams=streams, width=width, height=height, temporaries=temporaries, dumpPictures=args.dump) + tail = extractAllStreams(inputFile=mkv, begin=tailIFrameTS, end=ts2, nbFrames=nbTailFrames, filesPrefix='part-%d-tail' % (partnum), streams=streams, width=width, height=height, temporaries=temporaries, dumpMemFD=args.dump) subparts.append(tail) logger.info('Merging: %s' % subparts) @@ -908,19 +876,19 @@ def main(): else: logger.error("Dropping subtitle: %s because it is missing language indication") - for lang in sts: - indexes = sts[lang] - if len(indexes) == 0: - # Nothing to do. This should not happen. - continue - if len(indexes) == 1: - index = indexes[0] - filename = 'essai-%s.srt' % lang - elif len(indexes) > 1: - nbsrt = 1 - for index in indexes: - filename = 'essai-%s-%d.srt' % (lang, nbsrt) - nbsrt+=1 + for lang in sts: + indexes = sts[lang] + if len(indexes) == 0: + # Nothing to do. This should not happen. + continue + if len(indexes) == 1: + index = indexes[0] + filename = 'essai-%s.srt' % lang + elif len(indexes) > 1: + nbsrt = 1 + for index in indexes: + filename = 'essai-%s-%d.srt' % (lang, nbsrt) + nbsrt+=1 if not args.keep: logger.info("Cleaning temporary files") @@ -928,7 +896,7 @@ def main(): path = os.path.realpath(f.name) logger.info("Removing: %s" % path) f.close() - os.unlink(path) + unlink(path) for c in checks: logger.info("Please check cut smoothness at: %s" % c)