#!/usr/bin/python3 """Extract .mp3 audio from a list of video files. This command will spawn off multiple ffmpeg worker processes in parallel. It won’t overwrite existing .mp3 files unless they are out of date with respect to the input files, so you can run it repeatedly with the same wildcard filenames as input. BUGS: files missing audio streams will kill worker threads. Running multiple concurrent ffmpeg processes may leave your terminal screwed up. """ import argparse import threading # I tried using multiprocessing but kept getting empty queues import os import queue import re import subprocess import traceback from contextlib import suppress def parse_args(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-j', '--jobs', type=int, default=guess_cpus()+2, help='number of simultaneous jobs to run' + ' (default %(default)s)') parser.add_argument('input_files', nargs='+', help='files to extract audio from') parser.add_argument('-d', '--dest-dir', default='rip', help='directory to put the audio files into' + ' (default %(default)s)') parser.add_argument('-v', '--verbose', action='store_true', help='explain the disposition of each file') return parser.parse_args() cpu_pat = re.compile(r'cpu\d+$') def guess_cpus(): try: return len([fname for fname in os.listdir('/sys/devices/system/cpu') if cpu_pat.match(fname)]) except FileNotFoundError: return 3 # should be a pretty safe value def worker(args, q): me = id(threading.current_thread()) % 256 # a shitty PID outfile_tmp = None try: while True: try: infile = q.get(block=False) except queue.Empty: if args.verbose: print("queue empty, exiting", me) return basename = os.path.splitext(os.path.basename(infile))[0] outfile_tmp = os.path.join(args.dest_dir, '.tmp.' + basename + '.mp3') outfile = os.path.join(args.dest_dir, basename + '.mp3') if args.verbose: print('start', me, infile, '→', outfile) try: in_stat = os.stat(infile) out_mtime = 0 # safe if the files aren’t from before 1971 with suppress(OSError): out_stat = os.stat(outfile) out_mtime = out_stat.st_mtime if in_stat.st_mtime < out_mtime: if args.verbose: print('up-to-date', outfile) continue with suppress(OSError): os.unlink(outfile_tmp) cmdline = ['ffmpeg', '-i', infile, outfile_tmp] if args.verbose: print('run', cmdline) kid = subprocess.Popen(cmdline) status = kid.wait() if args.verbose: print('converted', infile) if status != 0: raise Exception("ffmpeg exited with", status) os.rename(outfile_tmp, outfile) if args.verbose: print('done', infile) except Exception: traceback.print_exc() return finally: if outfile_tmp is not None: with suppress(OSError): os.unlink(outfile_tmp) def main(): args = parse_args() q = queue.Queue() with suppress(FileExistsError): os.makedirs(args.dest_dir) for filename in args.input_files: q.put(filename) workers = [threading.Thread(target=worker, args=(args, q)) for i in range(args.jobs)] for w in workers: w.start() for w in workers: w.join() if __name__ == '__main__': main()