''' retrieve and display the max page id for a given wiki ''' import sys import getopt from dumps.utils import PageAndEditStats from dumps.WikiDump import Wiki, Config class DbStatsRetriever(object): ''' retrieve various stats from the db for the specific wiki ''' def __init__(self, configfile, wikiname): config = Config(configfile) config.parse_conffile_per_project(wikiname) self.wiki = Wiki(config, wikiname) self.stats = PageAndEditStats(self.wiki, wikiname) def get_max_pageid(self): ''' return the results from select MAX(page_id) from page if the command was not successful, None is returned ''' return self.stats.get_total_pages() def usage(message): ''' display a message about how to use this script, along with an optional leading sentence or two, and exit with 1 ''' if message: sys.stderr.write(message + "\n") usage_message = """Usage: python get_max_pageid.py [options] This script writes MAX(pageid) to stdout, for the specified wiki. --configfile (-c): path to configuration file --wiki (-w): name of wiki for which to retrieve the max page id; this should be the name of the wiki database, for example, elwiki --help (-h): display this message """ sys.stderr.write(usage_message) sys.exit(1) def check_args(configfile, wikiname, remainder): ''' whine about any missing/extra args and exit with 1 ''' if remainder: usage("Unknown option(s) specified: <%s>" % remainder[0]) if configfile is None: usage("mandatory arg 'configfile' is missing") sys.exit(1) if wikiname is None: usage("mandatory arg 'wiki' is missing") sys.exit(1) def do_main(): ''' Either displays max page id on stdout or exits with 1 on error. No stack trace if things go wrong though :-P ''' configfile = None wikiname = None try: (options, remainder) = getopt.gnu_getopt( sys.argv[1:], "c:w:vh", ['configfile=', 'wiki=', 'help']) except getopt.GetoptError as err: print str(err) usage("Unknown option specified") for (opt, val) in options: if opt in ["-c", "--configfile"]: configfile = val elif opt in ["-w", "--wiki"]: wikiname = val elif opt in ["-h", "--help"]: usage("Help for this script:") check_args(configfile, wikiname, remainder) retriever = DbStatsRetriever(configfile, wikiname) maxid = retriever.get_max_pageid() if maxid is not None: print maxid else: sys.exit(1) if __name__ == '__main__': do_main()