import subprocess input_file = '/mnt/data/xmldatadumps/public/svwiki/20190301/svwiki-20190301-pages-meta-history.xml.bz2' output = subprocess.check_output(['getlastidinbz2xml', '-f', input_file, '--type', 'page']) revid = int(output.split(':')[1]) file_id = 0 step = 1000000 fspec = [] for range_start in range(0, revid, step): file_id += 1 range_end = range_start + step if range_start == 0: range_start = 1 if range_end > revid: range_end = revid fspec.append('svwiki-%02d.bz2:%d:%d' % (file_id, range_start, range_end)) fspec = ','.join(fspec) command = ['writeuptopageid', '-fspec', fspec, '-i', input_file, '-odir', '.'] print(' '.join(command)) subprocess.call(command)