Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Paste
P6974
get_max_pageid.py
Active
Public
Actions
Authored by
ArielGlenn
on Apr 10 2018, 10:41 AM.
Edit Paste
Archive Paste
View Raw File
Subscribe
Mute Notifications
Award Token
Flag For Later
Tags
None
Referenced Files
F16896069: get_max_pageid.py
Apr 10 2018, 10:41 AM
2018-04-10 10:41:46 (UTC+0)
Subscribers
hoo
'''
retrieve and display the max page id for a given wiki
'''
import
sys
import
getopt
from
dumps.utils
import
PageAndEditStats
from
dumps.WikiDump
import
Wiki
,
Config
class
DbStatsRetriever
(
object
):
'''
retrieve various stats from the db for the specific wiki
'''
def
__init__
(
self
,
configfile
,
wikiname
):
config
=
Config
(
configfile
)
config
.
parse_conffile_per_project
(
wikiname
)
self
.
wiki
=
Wiki
(
config
,
wikiname
)
self
.
stats
=
PageAndEditStats
(
self
.
wiki
,
wikiname
)
def
get_max_pageid
(
self
):
'''
return the results from select MAX(page_id) from page
if the command was not successful, None is returned
'''
return
self
.
stats
.
get_total_pages
()
def
usage
(
message
):
'''
display a message about how to use this script, along with an optional
leading sentence or two, and exit with 1
'''
if
message
:
sys
.
stderr
.
write
(
message
+
"
\n
"
)
usage_message
=
"""Usage: python get_max_pageid.py [options]
This script writes MAX(pageid) to stdout, for the specified
wiki.
--configfile (-c): path to configuration file
--wiki (-w): name of wiki for which to retrieve the max page id;
this should be the name of the wiki database, for
example, elwiki
--help (-h): display this message
"""
sys
.
stderr
.
write
(
usage_message
)
sys
.
exit
(
1
)
def
check_args
(
configfile
,
wikiname
,
remainder
):
'''
whine about any missing/extra args and exit with 1
'''
if
remainder
:
usage
(
"Unknown option(s) specified: <
%s
>"
%
remainder
[
0
])
if
configfile
is
None
:
usage
(
"mandatory arg 'configfile' is missing"
)
sys
.
exit
(
1
)
if
wikiname
is
None
:
usage
(
"mandatory arg 'wiki' is missing"
)
sys
.
exit
(
1
)
def
do_main
():
'''
Either displays max page id on stdout or
exits with 1 on error. No stack trace if things
go wrong though :-P
'''
configfile
=
None
wikiname
=
None
try
:
(
options
,
remainder
)
=
getopt
.
gnu_getopt
(
sys
.
argv
[
1
:],
"c:w:vh"
,
[
'configfile='
,
'wiki='
,
'help'
])
except
getopt
.
GetoptError
as
err
:
print
str
(
err
)
usage
(
"Unknown option specified"
)
for
(
opt
,
val
)
in
options
:
if
opt
in
[
"-c"
,
"--configfile"
]:
configfile
=
val
elif
opt
in
[
"-w"
,
"--wiki"
]:
wikiname
=
val
elif
opt
in
[
"-h"
,
"--help"
]:
usage
(
"Help for this script:"
)
check_args
(
configfile
,
wikiname
,
remainder
)
retriever
=
DbStatsRetriever
(
configfile
,
wikiname
)
maxid
=
retriever
.
get_max_pageid
()
if
maxid
is
not
None
:
print
maxid
else
:
sys
.
exit
(
1
)
if
__name__
==
'__main__'
:
do_main
()
Event Timeline
ArielGlenn
created this paste.
Apr 10 2018, 10:41 AM
2018-04-10 10:41:46 (UTC+0)
ArielGlenn
mentioned this in
T190513: Make sure Wikidata entity dump scripts run for only about 1-2hours
.
Log In to Comment