diff --git a/run.py b/run.py index 365a203..694f3a7 100644 --- a/run.py +++ b/run.py @@ -1,95 +1,96 @@ #!/usr/bin/env python3 """ Generate torrents for Wikimedia dumps Copyright (C) 2017 Kunal Mehta This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . """ import os import subprocess +import sys # Pending resolution of https://phabricator.wikimedia.org/T155470 LABS_MKTORRENT = '/data/project/dump-torrents/mktorrent-1.0/mktorrent' if os.path.isfile(LABS_MKTORRENT): MKTORRENT = LABS_MKTORRENT else: MKTORRENT = '/usr/bin/mktorrent' DUMP_DIR = '/public/dumps/public/' OUR_DIR = '/data/project/dump-torrents/public_html/' TRACKERS = [ 'udp://tracker.opentrackr.org:1337', 'udp://tracker.openbittorrent.com:80/announce', 'udp://tracker.coppersurfer.tk:6969/announce', ] MIRRORS = [ 'https://dumps.wikimedia.org', 'http://dumps.wikimedia.your.org', 'http://ftp.acc.umu.se/mirror/wikimedia.org/dumps', 'http://wikipedia.c3sl.ufpr.br' ] def process_wiki(dbname): for date in os.listdir(os.path.join(DUMP_DIR, dbname)): process_wikidate(dbname, date) def mktorrent(dbname, date, fname): dest = os.path.join(OUR_DIR, dbname, date, fname + '.torrent') if os.path.exists(dest): print('Skipping creation of ' + fname) return args = [MKTORRENT] for tracker in TRACKERS: args.append('-a') args.append(tracker) for mirror in MIRRORS: args.append('-w') args.append('{mirror}/{dbname}/{date}/{fname}'.format( mirror=mirror, dbname=dbname, date=date, fname=fname) ) args.append('-o') args.append(dest) args.append('-l') # piece length args.append('20') # 2^20 bytes args.append(os.path.join(DUMP_DIR, dbname, date, fname)) try: os.makedirs(os.path.join(OUR_DIR, dbname, date), exist_ok=True) except OSError: # Before Python 3.4.1, if exist_ok was True and the directory existed, makedirs() would # still raise an error if mode did not match the mode of the existing directory. pass print(subprocess.check_output(args).decode()) def process_wikidate(dbname, date): path = os.path.join(DUMP_DIR, dbname, date) files = [] for file in os.listdir(path): if file.endswith(('.gz', '.bz2', '.7z')): files.append(file) mktorrent(dbname, date, file) with open(os.path.join(DUMP_DIR, dbname, date, 'index.html')) as f: html = f.read() for file in files: html = html.replace(file, file + '.torrent') # Be terrible and fix URL paths html = html.replace('