diff --git a/integraality/pages_processor.py b/integraality/pages_processor.py index c74be8c..d5a66c0 100644 --- a/integraality/pages_processor.py +++ b/integraality/pages_processor.py @@ -1,210 +1,210 @@ #!/usr/bin/python # -*- coding: utf-8 -*- """ Bot to generate statistics """ import os import re from redis import StrictRedis from ww import f import pywikibot from pywikibot import pagegenerators from cache import RedisCache from property_statistics import ( ColumnConfigMaker, ColumnSyntaxException, PropertyStatistics, QueryException ) REQUIRED_CONFIG_FIELDS = ['selector_sparql', 'grouping_property', 'properties'] class ProcessingException(Exception): pass class ConfigException(ProcessingException): pass class NoEndTemplateException(ProcessingException): pass class NoStartTemplateException(ProcessingException): pass class PagesProcessor: def __init__(self, url="https://www.wikidata.org/wiki/", cache_client=None): self.site = pywikibot.Site(url=url) self.template_name = 'Property dashboard' self.end_template_name = 'Property dashboard end' self.summary = u'Update property usage stats' self.outputs = [] if not cache_client: host = os.getenv("REDIS_HOST", 'tools-redis.svc.eqiad.wmflabs') cache_client = StrictRedis(host=host, decode_responses=False) self.cache = RedisCache(cache_client=cache_client) def make_cache_key(self, page_title): return ":".join([self.site.code, page_title]).replace(" ", "_") def get_all_pages(self): template = pywikibot.Page(self.site, self.template_name, ns=10) - return pagegenerators.ReferringPageGenerator(template, onlyTemplateInclusion=True) + return template.getReferences(only_template_inclusion=True) @staticmethod def extract_elements_from_template_param(template_param): """Extract and sanitize the contents of a parsed template param.""" (field, _, value) = template_param.partition(u'=') return (field.strip(), value.replace('{{!}}', '|')) def parse_config_from_params(self, params): return { key: value for (key, value) in [self.extract_elements_from_template_param(param) for param in params] if key } def make_stats_object_arguments_for_page(self, page): all_templates_with_params = page.templatesWithParams() if self.template_name not in [template.title(with_ns=False) for (template, _) in all_templates_with_params]: msg = ( "No start template '%s' found." "The likely explanation is that inteGraality was invoked from a page that transcludes the page with the template. " "Please invoke inteGraality directly from the page with the template." % self.template_name ) raise NoStartTemplateException(msg) if self.end_template_name not in [template.title(with_ns=False) for (template, _) in all_templates_with_params]: raise NoEndTemplateException("No end template '%s' provided" % self.end_template_name) start_templates_with_params = [ (template, params) for (template, params) in all_templates_with_params if template.title(with_ns=False) == self.template_name ] if len(start_templates_with_params) > 1: pywikibot.warn("More than one template on the page %s" % page.title()) (template, params) = start_templates_with_params[0] parsed_config = self.parse_config_from_params(params) config = self.parse_config(parsed_config) key = self.make_cache_key(page.title()) self.cache.set_cache_value(key, config) return config def make_stats_object_for_page(self, page): config = self.make_stats_object_arguments_for_page(page) try: return PropertyStatistics(**config) except TypeError: raise ConfigException("The template parameters are incorrect.") def process_page(self, page): self.cache.invalidate(self.make_cache_key(page.title())) stats = self.make_stats_object_for_page(page) output = stats.retrieve_and_process_data() new_text = self.replace_in_page(output, page.get()) page.put(new_text, self.summary) def parse_config(self, config): for field in REQUIRED_CONFIG_FIELDS: if field not in config: pywikibot.output("Missing required field %s" % field) raise ConfigException("A required field is missing: %s" % field) config['columns'] = self.parse_config_properties(config['properties']) del config['properties'] config['stats_for_no_group'] = bool(config.get('stats_for_no_group', False)) return config @staticmethod def parse_config_properties(properties_string): properties = [x.strip() for x in properties_string.split(',')] properties_data = [] for prop in properties: try: (key, title) = prop.split(':') except ValueError: (key, title) = (prop, None) if key: try: properties_data.append(ColumnConfigMaker.make(key, title)) except ColumnSyntaxException as e: raise ConfigException(e) return properties_data def replace_in_page(self, output, page_text): regex_text = f('({{{{{self.template_name}.*?(?