Page MenuHomePhabricator

pagegenerators.py.diff

Authored By
bzimport
Nov 22 2014, 2:09 AM
Size
6 KB
Referenced Files
None
Subscribers
None

pagegenerators.py.diff

Index: pagegenerators.py
===================================================================
--- pagegenerators.py (revision 11781)
+++ pagegenerators.py (working copy)
@@ -193,6 +193,19 @@
-page Work on a single page. Argument can also be given as
"-page:pagetitle".
+
+-followredirects Used with other arguments that specify a set of pages.
+ If a specified page is a redirect page, work on its
+ target page.
+
+-intersecting Argument to be used between two other arguments.
+ Work only on pages normally specified by both the
+ previous and the next argument.
+
+-excluding Argument to be used between two other arguments.
+ Work only on pages normally specified by the
+ previous argument but not by the next argument.
+
"""
docuReplacements = {'&params;': parameterHelp}
@@ -218,6 +231,9 @@
self.gens = []
self.namespaces = []
self.limit = None
+ self.followredirects = False
+ self.intersecting = False
+ self.excluding = False
def getNamespaces(self):
return map(int, self.namespaces)
@@ -240,6 +256,8 @@
genToReturn = DuplicateFilterPageGenerator(gensList, total=self.limit)
if (self.getNamespaces()):
genToReturn = NamespaceFilterPageGenerator(genToReturn, self.getNamespaces())
+ if self.followredirects:
+ genToReturn = FollowRedirectsGenerator(genToReturn)
return genToReturn
def getCategoryGen(self, arg, length, recurse=False):
@@ -501,6 +519,16 @@
gen = RegexFilterPageGenerator(site.allpages(), [regex])
elif arg.startswith('-yahoo'):
gen = YahooSearchPageGenerator(arg[7:])
+ elif arg == "-followredirects":
+ self.followredirects = True
+ return True
+ elif arg == "-intersecting":
+ self.intersecting = True
+ return True
+ elif arg == "-excluding":
+ self.intersecting = True
+ self.excluding = True
+ return True
elif arg.startswith('-'):
mode, log, user = arg.partition('log')
if log == 'log' and mode not in ['-', '-no']: #exclude -log, -nolog
@@ -522,7 +550,16 @@
pass
gen = LogpagesPageGenerator(number, mode[1:], user)
if gen:
- self.gens.append(gen)
+ if self.intersecting:
+ try: self.gens[-1] = IntersectionGenerator([self.gens[-1], gen], self.excluding)
+ except IndexError:
+ if pywikibot.verbose:
+ pywikibot.output('-intersecting or -excluding must be preceded by a page specifying argument.')
+ return False
+ self.intersecting = False
+ self.excluding = False
+ else:
+ self.gens.append(gen)
return self.getCombinedGenerator()
else:
return False
@@ -674,7 +711,8 @@
if site is None:
site = pywikibot.getSite()
f = codecs.open(filename, 'r', config.textfile_encoding)
- R = re.compile(ur'\[\[(.+?)(?:\]\]|\|)') # title ends either before | or before ]]
+ R = re.compile(ur'\[\[(.+?)(?:\]\]|\||#)') # title ends either before | or before ]]
+ # or before # (ignore section)
pageTitle = None
for pageTitle in R.findall(f.read()):
# If the link is in interwiki format, the Page object may reside
@@ -689,6 +727,8 @@
title = title.strip()
if '|' in title:
title = title[:title.index('|')]
+ if '#' in title:
+ title = title[:title.index('#')] # ignore section
if title:
yield pywikibot.Page(site, title)
f.close()
@@ -1095,12 +1135,11 @@
Wraps around another generator. Yields all pages, but prevents
duplicates.
"""
- seenPages = dict()
+ seenPages = set()
count = 0
for page in generator:
- _page = u"%s:%s:%s" % (page._site.family.name, page._site.lang, page._title)
- if _page not in seenPages:
- seenPages[_page] = True
+ if page not in seenPages:
+ seenPages.add(page)
if total:
count += 1
if count > total:
@@ -1171,6 +1210,25 @@
for page in generator:
yield page
+def IntersectionGenerator(generators, excluding=False):
+ """
+ Wraps around a list of other generators. Yields only pages generated by
+ all generators.
+ if excluding is True, yields pages generated by the first and not generated
+ by the other generators.
+ """
+ if type(generators) != list:
+ generators = [generators]
+ seenPages = set()
+ for i in generators[1:]:
+ for page in i:
+ seenPages.add(page)
+ for page in generators[0]: #We want the pages to appear in the order they are in the first argument
+ if excluding:
+ if page not in seenPages: yield page
+ else:
+ if page in seenPages: yield page
+
def CategoryGenerator(generator):
"""
Wraps around another generator. Yields the same pages, but as Category
@@ -1200,7 +1258,20 @@
if not page.isTalkPage():
yield page.toggleTalkPage()
+def FollowRedirectsGenerator(generator):
+ """
+ Wraps around another generator. Yields the target pages of pages that are redirects.
+ """
+ for page in generator:
+ try:
+ if page.isRedirectPage():
+ yield page.getRedirectTarget()
+ else:
+ yield page
+ except pywikibot.Error: yield page
+
+
class PreloadingGenerator(object):
"""
Yields the same pages as generator generator. Retrieves 60 pages (or
@@ -1267,6 +1338,7 @@
def main(*args):
try:
+ _site = pywikibot.getSite()
genFactory = GeneratorFactory()
for arg in pywikibot.handleArgs(*args):
if not genFactory.handleArg(arg):
@@ -1278,7 +1350,9 @@
i = 0
for page in gen:
i += 1
- pywikibot.output("%4d: %s" % (i, page.title()), toStdout = True)
+ _title = page.title()
+ if page.site() != _site(): _title = ":%s:%s" % (page.site, _title)
+ pywikibot.output("%4d: %s" % (i, _title), toStdout = True)
else:
pywikibot.showHelp()
finally:

File Metadata

Mime Type
text/x-diff
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
11323
Default Alt Text
pagegenerators.py.diff (6 KB)

Event Timeline