Adding doc string for more functions.
This commit is contained in:
190
removeads.py
190
removeads.py
@@ -57,12 +57,20 @@ from iso639.exceptions import InvalidLanguageValue
|
|||||||
|
|
||||||
@typechecked
|
@typechecked
|
||||||
def check_required_tools() -> tuple[bool,dict[str,str]]:
|
def check_required_tools() -> tuple[bool,dict[str,str]]:
|
||||||
"""Check if required external tools are installed.
|
"""
|
||||||
|
Checks if all required external tools are installed.
|
||||||
|
|
||||||
|
This function verifies the presence of required and optional external tools on the system.
|
||||||
|
It returns a tuple containing a boolean indicating whether all optional tools are installed,
|
||||||
|
along with a dictionary containing the paths to all tools.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
None
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
tuple[bool, list[str]] : does all optional tools are installed and the paths of all tools.
|
tuple[bool, dict[str, str]]:
|
||||||
|
- bool: True if all optional tools are installed, False otherwise
|
||||||
|
- dict[str, str]: dictionary containing the paths to all tools
|
||||||
"""
|
"""
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
all_optional_tools = True
|
all_optional_tools = True
|
||||||
@@ -88,13 +96,20 @@ def check_required_tools() -> tuple[bool,dict[str,str]]:
|
|||||||
|
|
||||||
@typechecked
|
@typechecked
|
||||||
def get_tesseract_supported_lang(tesseract_path:str) -> dict[Lang, str]|None:
|
def get_tesseract_supported_lang(tesseract_path:str) -> dict[Lang, str]|None:
|
||||||
"""Returns the set of natural languages supported by Tesseract OCR tool.
|
"""
|
||||||
|
Retrieves the set of natural languages supported by the Tesseract OCR tool.
|
||||||
|
|
||||||
|
This function runs the Tesseract binary with the --list-langs option and parses the output
|
||||||
|
to extract the supported languages.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
tesseract_path: str: path to tesseract binary.
|
tesseract_path (str): The path to the Tesseract binary.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dict[str, str] : a mapping ....
|
dict[Lang, str] | None:
|
||||||
|
- A dictionary mapping Lang objects to their corresponding language codes
|
||||||
|
(e.g., "eng" for English)
|
||||||
|
- None if an error occurs while running the Tesseract binary
|
||||||
"""
|
"""
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
res = {}
|
res = {}
|
||||||
@@ -123,6 +138,25 @@ def get_tesseract_supported_lang(tesseract_path:str) -> dict[Lang, str]|None:
|
|||||||
|
|
||||||
@typechecked
|
@typechecked
|
||||||
def get_frame_rate(ffprobe_path:str, input_file: IO[bytes]) -> float|None:
|
def get_frame_rate(ffprobe_path:str, input_file: IO[bytes]) -> float|None:
|
||||||
|
"""
|
||||||
|
Retrieves the frame rate of a video file using the ffprobe tool.
|
||||||
|
|
||||||
|
This function runs the ffprobe binary with the specified input file and parses the output
|
||||||
|
to extract the frame rate.
|
||||||
|
It uses two methods to calculate the frame rate: one based on the timestamp of the frames
|
||||||
|
and another based on the duration of the frames.
|
||||||
|
If the two calculated frame rates are significantly different, the function returns an error
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ffprobe_path (str): The path to the ffprobe binary.
|
||||||
|
input_file (IO[bytes]): The input video file.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
float | None:
|
||||||
|
- The frame rate of the video file as a floating-point number
|
||||||
|
- None if an error occurs while running the ffprobe binary or if the calculated
|
||||||
|
frame rates are inconsistent
|
||||||
|
"""
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
infd = input_file.fileno()
|
infd = input_file.fileno()
|
||||||
@@ -278,7 +312,8 @@ def extract_srt(mkvextract:str, filename:str, subtitles:dict[str, list[int]],
|
|||||||
return res
|
return res
|
||||||
|
|
||||||
@typechecked
|
@typechecked
|
||||||
def do_ocr(vobsubocr:str, idxs: list[tuple[str,str,str,str]], duration:timedelta, temporaries:list[IO[bytes]], dump_mem_fd:bool=False):
|
def do_ocr(vobsubocr:str, idxs: list[tuple[str,str,str,str]], duration:timedelta,
|
||||||
|
temporaries:list[IO[bytes]], dump_mem_fd:bool=False):
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
res = []
|
res = []
|
||||||
|
|
||||||
@@ -407,6 +442,22 @@ def get_codec_private_data_from_mkv(mkvinfo_path:str,
|
|||||||
# ISO/IEC 14496-15
|
# ISO/IEC 14496-15
|
||||||
@typechecked
|
@typechecked
|
||||||
def read_bit(buf:bytes, bit_position: int) -> tuple[int, int]:
|
def read_bit(buf:bytes, bit_position: int) -> tuple[int, int]:
|
||||||
|
"""
|
||||||
|
Read a single bit from a byte buffer.
|
||||||
|
|
||||||
|
This function is part of the implementation of the H.264/AVC video compression standard,
|
||||||
|
as specified in ISO/IEC H.264-201602 and ISO/IEC 14496-15.
|
||||||
|
It takes a byte buffer and a bit position as input, and returns a tuple containing
|
||||||
|
the updated bit position and the value of the bit at the specified position.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
buf (bytes): The byte buffer to read from.
|
||||||
|
bit_position (int): The position of the bit to read, starting from 0.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple[int, int]: A tuple containing the updated bit position (bit_position + 1) and the
|
||||||
|
value of the bit (0 or 1).
|
||||||
|
"""
|
||||||
byte_position = floor(floor(bit_position/8))
|
byte_position = floor(floor(bit_position/8))
|
||||||
byte = buf[byte_position]
|
byte = buf[byte_position]
|
||||||
bit = (byte >> (7-(bit_position % 8))) & 1
|
bit = (byte >> (7-(bit_position % 8))) & 1
|
||||||
@@ -414,6 +465,21 @@ def read_bit(buf:bytes, bit_position: int) -> tuple[int, int]:
|
|||||||
|
|
||||||
@typechecked
|
@typechecked
|
||||||
def read_boolean(buf:bytes, bit_position: int) -> tuple[int, bool]:
|
def read_boolean(buf:bytes, bit_position: int) -> tuple[int, bool]:
|
||||||
|
"""
|
||||||
|
Read a boolean value from a byte buffer.
|
||||||
|
|
||||||
|
This function reads a single bit from the byte buffer at the specified position and interprets
|
||||||
|
it as a boolean value.
|
||||||
|
It returns a tuple containing the updated bit position and the boolean value.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
buf (bytes): The byte buffer to read from.
|
||||||
|
bit_position (int): The position of the bit to read, starting from 0.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple[int, bool]: A tuple containing the updated bit position and the boolean value
|
||||||
|
(True if the bit is 1, False if the bit is 0).
|
||||||
|
"""
|
||||||
bit_position, b = read_bit(buf, bit_position)
|
bit_position, b = read_bit(buf, bit_position)
|
||||||
return bit_position, b==1
|
return bit_position, b==1
|
||||||
|
|
||||||
@@ -2006,6 +2072,25 @@ def get_streams(ffprobe_path:str, input_file: IO[bytes]) -> list|None:
|
|||||||
|
|
||||||
@typechecked
|
@typechecked
|
||||||
def with_subtitles(ffprobe_path:str, input_file: IO[bytes]) -> bool:
|
def with_subtitles(ffprobe_path:str, input_file: IO[bytes]) -> bool:
|
||||||
|
"""
|
||||||
|
Checks if a media file contains subtitles using the ffprobe tool.
|
||||||
|
|
||||||
|
This function runs the ffprobe binary with the specified input file and parses the output
|
||||||
|
to determine if the file contains subtitles.
|
||||||
|
It returns True if at least one subtitle stream is found, False otherwise.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ffprobe_path (str): The path to the ffprobe binary.
|
||||||
|
input_file (IO[bytes]): The input media file.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool:
|
||||||
|
- True if the media file contains at least one subtitle stream
|
||||||
|
- False if:
|
||||||
|
- the media file does not contain any subtitle streams
|
||||||
|
- an error occurs while running the ffprobe binary
|
||||||
|
- the streams information cannot be retrieved from the media file
|
||||||
|
"""
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
infd = input_file.fileno()
|
infd = input_file.fileno()
|
||||||
@@ -2027,6 +2112,23 @@ def with_subtitles(ffprobe_path:str, input_file: IO[bytes]) -> bool:
|
|||||||
|
|
||||||
@typechecked
|
@typechecked
|
||||||
def parse_timestamp(ts:str) -> timedelta|None:
|
def parse_timestamp(ts:str) -> timedelta|None:
|
||||||
|
"""
|
||||||
|
Parse a timestamp string into a timedelta object.
|
||||||
|
|
||||||
|
This function takes a string representing a timestamp in the format HH:MM:SS[.us] and returns
|
||||||
|
a timedelta object representing the corresponding time interval.
|
||||||
|
The timestamp string can have an optional microsecond component.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ts (str): The timestamp string to parse.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
timedelta | None:
|
||||||
|
- A timedelta object representing the parsed timestamp
|
||||||
|
- None if:
|
||||||
|
- the timestamp string is not in the correct format
|
||||||
|
- the timestamp values are out of range (e.g. hour > 23, minute > 59, etc.)
|
||||||
|
"""
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
ts_reg_exp = (r'^(?P<hour>[0-9]{1,2}):(?P<minute>[0-9]{1,2})'
|
ts_reg_exp = (r'^(?P<hour>[0-9]{1,2}):(?P<minute>[0-9]{1,2})'
|
||||||
@@ -2068,7 +2170,24 @@ def parse_timestamp(ts:str) -> timedelta|None:
|
|||||||
return res
|
return res
|
||||||
|
|
||||||
@typechecked
|
@typechecked
|
||||||
def parse_time_interval(interval:str) -> tuple[timedelta,timedelta]:
|
def parse_time_interval(interval: str) -> tuple[timedelta, timedelta] | None:
|
||||||
|
"""
|
||||||
|
Parse a time interval string into a tuple of two timedelta objects.
|
||||||
|
|
||||||
|
This function takes a string representing a time interval in the format HH:MM:SS[.ms]-HH:MM:SS[.ms] and returns a tuple of two timedelta objects representing the start and end times of the interval.
|
||||||
|
The time interval string can have an optional millisecond component.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
interval (str): The time interval string to parse.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple[timedelta, timedelta] | None:
|
||||||
|
- A tuple of two timedelta objects representing the start and end times of the interval
|
||||||
|
- None if:
|
||||||
|
- the time interval string is not in the correct format
|
||||||
|
- the time values are out of range (e.g. hour > 23, minute > 59, etc.)
|
||||||
|
- the end time is before the start time (non-monotonic interval)
|
||||||
|
"""
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
interval_reg_exp = (r'^(?P<hour1>[0-9]{1,2}):(?P<minute1>[0-9]{1,2}):(?P<second1>[0-9]{1,2})'
|
interval_reg_exp = (r'^(?P<hour1>[0-9]{1,2}):(?P<minute1>[0-9]{1,2}):(?P<second1>[0-9]{1,2})'
|
||||||
@@ -2144,6 +2263,22 @@ def parse_time_interval(interval:str) -> tuple[timedelta,timedelta]:
|
|||||||
@typechecked
|
@typechecked
|
||||||
def compare_time_interval(interval1: tuple[timedelta, timedelta],
|
def compare_time_interval(interval1: tuple[timedelta, timedelta],
|
||||||
interval2: tuple[timedelta, timedelta]) -> int:
|
interval2: tuple[timedelta, timedelta]) -> int:
|
||||||
|
"""
|
||||||
|
Compare two time intervals.
|
||||||
|
|
||||||
|
This function compares two time intervals represented by tuples of two timedelta objects.
|
||||||
|
It returns an integer indicating the relationship between the two intervals:
|
||||||
|
- -1 if interval 1 is before interval 2
|
||||||
|
- 1 if interval 1 is after interval 2
|
||||||
|
- 0 if the two intervals overlap or are equal
|
||||||
|
|
||||||
|
Args:
|
||||||
|
interval1 (tuple[timedelta, timedelta]): The first time interval
|
||||||
|
interval2 (tuple[timedelta, timedelta]): The second time interval
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
int: The relationship between the two time intervals
|
||||||
|
"""
|
||||||
ts11,ts12 = interval1
|
ts11,ts12 = interval1
|
||||||
ts21,ts22 = interval2
|
ts21,ts22 = interval2
|
||||||
|
|
||||||
@@ -2441,6 +2576,27 @@ def extract_mkv_part(mkvmerge_path:str, input_file:IO[bytes], output_file:IO[byt
|
|||||||
@typechecked
|
@typechecked
|
||||||
def extract_pictures(ffmpeg_path:str, input_file:IO[bytes], begin:timedelta, nb_frames:int,
|
def extract_pictures(ffmpeg_path:str, input_file:IO[bytes], begin:timedelta, nb_frames:int,
|
||||||
width:int=640, height:int=480) -> tuple[bytes,int]|tuple[None,None]:
|
width:int=640, height:int=480) -> tuple[bytes,int]|tuple[None,None]:
|
||||||
|
"""
|
||||||
|
Extract pictures from a video file using FFmpeg.
|
||||||
|
|
||||||
|
This function runs the FFmpeg binary to extract a specified number of frames from a video file,
|
||||||
|
starting at a given time.
|
||||||
|
The extracted frames are stored in memory as PPM images and returned as a tuple containing
|
||||||
|
the image data and a file descriptor to the memory created by memfd_create.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ffmpeg_path (str): The path to the FFmpeg binary.
|
||||||
|
input_file (IO[bytes]): The input video file.
|
||||||
|
begin (timedelta): The start time of the extraction.
|
||||||
|
nb_frames (int): The number of frames to extract.
|
||||||
|
width (int, optional): The width of the extracted images. Defaults to 640.
|
||||||
|
height (int, optional): The height of the extracted images. Defaults to 480.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple[bytes, int] | tuple[None, None]:
|
||||||
|
- A tuple containing the extracted image data as bytes and a file descriptor
|
||||||
|
- A tuple containing None, None if the extraction fails
|
||||||
|
"""
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
infd = input_file.fileno()
|
infd = input_file.fileno()
|
||||||
@@ -2516,6 +2672,26 @@ def extract_sound(ffmpeg_path:str, input_file: IO[bytes], begin:timedelta, outpu
|
|||||||
|
|
||||||
@typechecked
|
@typechecked
|
||||||
def dump_ppm(pictures: bytes, prefix: str, temporaries: list[IO[bytes]]) -> None:
|
def dump_ppm(pictures: bytes, prefix: str, temporaries: list[IO[bytes]]) -> None:
|
||||||
|
"""
|
||||||
|
Dump PPM pictures from a bytes buffer to files.
|
||||||
|
|
||||||
|
This function takes a bytes buffer containing PPM pictures, a prefix for the output file names, and a list of temporary files.
|
||||||
|
It extracts each PPM picture from the buffer, checks its validity, and writes it to a file.
|
||||||
|
The output files are named according to the prefix and a zero-padded three-digit number.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
pictures (bytes): The bytes buffer containing the PPM pictures.
|
||||||
|
prefix (str): The prefix for the output file names.
|
||||||
|
temporaries (list[IO[bytes]]): A list of temporary files that will be used to store the output files.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
None
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
None, but logs errors if:
|
||||||
|
- the PPM picture is not valid (e.g. wrong magic number, dimensions, or color encoding)
|
||||||
|
- an I/O error occurs while creating or writing to an output file
|
||||||
|
"""
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# "P6\nWIDTH HEIGHT\n255\n"
|
# "P6\nWIDTH HEIGHT\n255\n"
|
||||||
|
|||||||
Reference in New Issue
Block a user