Page MenuHomePhabricator
Paste P6974

get_max_pageid.py
ActivePublic

Authored by ArielGlenn on Apr 10 2018, 10:41 AM.
'''
retrieve and display the max page id for a given wiki
'''
import sys
import getopt
from dumps.utils import PageAndEditStats
from dumps.WikiDump import Wiki, Config
class DbStatsRetriever(object):
'''
retrieve various stats from the db for the specific wiki
'''
def __init__(self, configfile, wikiname):
config = Config(configfile)
config.parse_conffile_per_project(wikiname)
self.wiki = Wiki(config, wikiname)
self.stats = PageAndEditStats(self.wiki, wikiname)
def get_max_pageid(self):
'''
return the results from select MAX(page_id) from page
if the command was not successful, None is returned
'''
return self.stats.get_total_pages()
def usage(message):
'''
display a message about how to use this script, along with an optional
leading sentence or two, and exit with 1
'''
if message:
sys.stderr.write(message + "\n")
usage_message = """Usage: python get_max_pageid.py [options]
This script writes MAX(pageid) to stdout, for the specified
wiki.
--configfile (-c): path to configuration file
--wiki (-w): name of wiki for which to retrieve the max page id;
this should be the name of the wiki database, for
example, elwiki
--help (-h): display this message
"""
sys.stderr.write(usage_message)
sys.exit(1)
def check_args(configfile, wikiname, remainder):
'''
whine about any missing/extra args and exit with 1
'''
if remainder:
usage("Unknown option(s) specified: <%s>" % remainder[0])
if configfile is None:
usage("mandatory arg 'configfile' is missing")
sys.exit(1)
if wikiname is None:
usage("mandatory arg 'wiki' is missing")
sys.exit(1)
def do_main():
'''
Either displays max page id on stdout or
exits with 1 on error. No stack trace if things
go wrong though :-P
'''
configfile = None
wikiname = None
try:
(options, remainder) = getopt.gnu_getopt(
sys.argv[1:], "c:w:vh",
['configfile=', 'wiki=', 'help'])
except getopt.GetoptError as err:
print str(err)
usage("Unknown option specified")
for (opt, val) in options:
if opt in ["-c", "--configfile"]:
configfile = val
elif opt in ["-w", "--wiki"]:
wikiname = val
elif opt in ["-h", "--help"]:
usage("Help for this script:")
check_args(configfile, wikiname, remainder)
retriever = DbStatsRetriever(configfile, wikiname)
maxid = retriever.get_max_pageid()
if maxid is not None:
print maxid
else:
sys.exit(1)
if __name__ == '__main__':
do_main()