diff --git a/integraality/column.py b/integraality/column.py index 0dd7c62..33eb01a 100644 --- a/integraality/column.py +++ b/integraality/column.py @@ -1,282 +1,273 @@ #!/usr/bin/python # -*- coding: utf-8 -*- """ Column configuration classes """ import json import os from enum import Enum class GroupingType(Enum): YEAR = "year" class ColumnSyntaxException(Exception): pass class ColumnMaker: @staticmethod def make(key, title): current_dir = os.path.dirname(__file__) wikiprojects_path = os.path.join(current_dir, "wikiprojects.json") wikiprojects = json.load(open(wikiprojects_path, "r")) if key.startswith("P"): splitted = key.split("/") if len(splitted) == 3: (property_name, value, qualifier) = splitted elif len(splitted) == 2: (property_name, value, qualifier) = (splitted[0], None, splitted[1]) else: (property_name, value, qualifier) = (key, None, None) return PropertyColumn( property=property_name, title=title, qualifier=qualifier, value=value ) elif key.startswith("L"): return LabelColumn(language=key[1:]) elif key.startswith("D"): return DescriptionColumn(language=key[1:]) elif key in wikiprojects: wikiproject = wikiprojects.get(key) return SitelinkColumn(project=key, title=title) else: raise ColumnSyntaxException("Unknown column syntax %s" % key) class AbstractColumn: def get_info_query(self, property_statistics): """ Get the usage counts for a column for the groupings :return: (str) SPARQL query """ grouping_selector = "\n".join(property_statistics.grouping_configuration.get_grouping_selector()) query = f""" SELECT ?grouping (COUNT(DISTINCT ?entity) as ?count) WHERE {{ ?entity {property_statistics.selector_sparql} . {grouping_selector} FILTER(EXISTS {{{self.get_filter_for_info()} }}) }} GROUP BY ?grouping HAVING (?count >= {property_statistics.property_threshold}) ORDER BY DESC(?count) LIMIT 1000 """ return query def get_totals_query(self, property_statistics): """ Get the totals of entities with the column set. :return: (str) SPARQL query """ query = f""" SELECT (COUNT(*) as ?count) WHERE {{ ?entity {property_statistics.selector_sparql} FILTER(EXISTS {{{self.get_filter_for_info()} }}) }} """ return query def get_info_no_grouping_query(self, property_statistics): """ Get the usage counts for a column without a grouping :return: (str) SPARQL query """ query = f""" SELECT (COUNT(*) AS ?count) WHERE {{ ?entity {property_statistics.selector_sparql} . MINUS {{ ?entity wdt:{property_statistics.grouping_configuration.property} _:b28. }} FILTER(EXISTS {{{self.get_filter_for_info()} }}) }} """ return query class PropertyColumn(AbstractColumn): def __init__(self, property, title=None, value=None, qualifier=None): self.property = property self.title = title self.value = value self.qualifier = qualifier def __eq__(self, other): return ( self.property == other.property and self.title == other.title and self.value == other.value and self.qualifier == other.qualifier ) - def get_title(self): - return "/".join([x for x in [self.property, self.value, self.qualifier] if x]) - def get_key(self): - return "".join([x for x in [self.property, self.value, self.qualifier] if x]) + return "/".join([x for x in [self.property, self.value, self.qualifier] if x]) def get_type_name(self): return "property" def format_html_snippet(self): return f'{self.property}' def make_column_header(self): if self.qualifier: property_link = self.qualifier else: property_link = self.property if self.title: label = f"[[Property:{property_link}|{self.title}]]" else: label = f"{{{{Property|{property_link}}}}}" return f'! data-sort-type="number"|{label}\n' def get_filter_for_info(self): if self.qualifier: property_value = f"wd:{self.value}" if self.value else "[]" return f""" ?entity p:{self.property} [ ps:{self.property} {property_value} ; pq:{self.qualifier} [] ]""" else: return f""" ?entity p:{self.property}[]""" def get_filter_for_positive_query(self): return f""" ?entity p:{self.property} ?prop . OPTIONAL {{ ?prop ps:{self.property} ?value }} SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }} """ def get_filter_for_negative_query(self): return f""" {{?entity a wdno:{self.property} .}} UNION {{?entity wdt:{self.property} ?prop .}} }} SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }} """ class TextColumn(AbstractColumn): def __init__(self, language, title=None): self.language = language self.title = title def __eq__(self, other): return self.language == other.language and self.title == other.title - def get_title(self): - return self.get_key() - def format_html_snippet(self): return f"{self.language} {self.get_type_name()}" def make_column_header(self): if self.title: text = f"{self.title}" else: text = f"{{{{#language:{self.language}}}}}" return f'! data-sort-type="number"|{text}\n' def get_filter_for_info(self): return f""" ?entity {self.get_selector()} ?lang_label. FILTER((LANG(?lang_label)) = '{self.language}').""" def get_filter_for_positive_query(self): return f""" FILTER(EXISTS {{ ?entity {self.get_selector()} ?lang_label. FILTER((LANG(?lang_label)) = "{self.language}"). }}) SERVICE wikibase:label {{ bd:serviceParam wikibase:language "{self.language}". }} """ def get_filter_for_negative_query(self): return f""" {{ ?entity {self.get_selector()} ?lang_label. FILTER((LANG(?lang_label)) = "{self.language}") }} }} SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }} """ class LabelColumn(TextColumn): def get_key(self): return "L%s" % self.language def get_selector(self): return "rdfs:label" def get_type_name(self): return "label" class DescriptionColumn(TextColumn): def get_key(self): return "D%s" % self.language def get_selector(self): return "schema:description" def get_type_name(self): return "description" class SitelinkColumn(AbstractColumn): def __init__(self, project, title=None): current_dir = os.path.dirname(__file__) wikiprojects_path = os.path.join(current_dir, "wikiprojects.json") wikiprojects = json.load(open(wikiprojects_path, "r")) self.project = project self.url = wikiprojects[project]["url"] self.item = wikiprojects[project]["item"] self.title = title def __eq__(self, other): return self.url == other.url and self.title == other.title def get_key(self): return self.project - def get_title(self): - return self.get_key() - def get_type_name(self): return "sitelink" def format_html_snippet(self): - return f'{self.get_title()} {self.get_type_name()}' + return f'{self.get_key()} {self.get_type_name()}' def make_column_header(self): if self.title: label = f"[[{self.item}|{self.title}]]" else: label = f"{{{{Q|{self.item}}}}}" return f'! data-sort-type="number"|{label}\n' def get_filter_for_info(self): return f""" ?sitelink schema:about ?entity; schema:isPartOf <{self.url}>.""" def get_filter_for_positive_query(self): return f""" ?sitelink schema:about ?entity; schema:isPartOf <{self.url}>; schema:name ?value. SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }} """ def get_filter_for_negative_query(self): return f""" ?sitelink schema:about ?entity; schema:isPartOf <{self.url}>. }} SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }} """ diff --git a/integraality/line.py b/integraality/line.py index b6afdf9..4f47533 100644 --- a/integraality/line.py +++ b/integraality/line.py @@ -1,232 +1,232 @@ #!/usr/bin/python # -*- coding: utf-8 -*- """ Line configuration classes """ import collections import logging import re import pywikibot class AbstractLine: def __init__(self, count, cells=None): self.count = count if not cells: cells = collections.OrderedDict() self.cells = cells def get_percentage(self, value): if not value: return 0 return round(1.0 * value / max(self.count, 1) * 100, 2) class Grouping(AbstractLine): is_linkable = True def __init__(self, count, cells=None, title=None, higher_grouping=None): super().__init__(count, cells) self.title = title self.higher_grouping = higher_grouping def __eq__(self, other): return ( self.count == other.count and self.title == other.title and self.higher_grouping == other.higher_grouping and self.cells == other.cells ) def __repr__(self): cell = ",".join(["%s:%s" % (key, value) for (key, value) in self.cells.items()]) return f"{self.title}:{self.count} - {cell}" def get_key(self): return self.title def format_header_cell(self, grouping_configuration, grouping_type): text = "" if self.higher_grouping is None: pass elif not grouping_configuration.higher_grouping: pass else: text += self.format_higher_grouping_text(grouping_type) text += f"| {self.heading()}\n" return text def format_cell(self, column_entry, cell_template): column_count = self.cells.get(column_entry.get_key(), 0) percentage = self.get_percentage(column_count) fields = [ cell_template, str(percentage), str(column_count), - f"column={column_entry.get_title()}", + f"column={column_entry.get_key()}", f"grouping={self.title}", ] return f'| {{{{{"|".join(fields)}}}}}\n' def row_opener(self): return "|-\n" def format_count_cell(self, grouping_link, repo): if grouping_link and self.is_linkable: return self.format_grouping_link(grouping_link, repo) else: return f"| {self.count} \n" def format_grouping_link(self, grouping_link, repo=None): return f"| [[{grouping_link}/{self.title}|{self.count}]] \n" def postive_query(self, selector_sparql, grouping_predicate=None, grouping=None): query = [] query.extend( [ "SELECT DISTINCT ?entity ?entityLabel ?value ?valueLabel WHERE {", f" ?entity {selector_sparql} .", ] ) query.extend( self.postive_query_filter_out_fragment(grouping_predicate, grouping) ) return "\n".join(query) def postive_query_filter_out_fragment(self, grouping_predicate=None, grouping=None): return [] def negative_query(self, selector_sparql, grouping_predicate=None, grouping=None): query = [] query.extend( [ "SELECT DISTINCT ?entity ?entityLabel WHERE {", f" ?entity {selector_sparql} .", ] ) query.extend( self.negative_query_filter_out_fragment(grouping_predicate, grouping) ) return "\n".join(query) def negative_query_filter_out_fragment(self, grouping_predicate=None, grouping=None): return self.postive_query_filter_out_fragment(grouping_predicate, grouping) class NoGroupGrouping(Grouping): """Group for items that do not belong to any group.""" is_linkable = False def heading(self): return "No grouping" def format_higher_grouping_text(self, grouping_type=None): return "|\n" def postive_query_filter_out_fragment(self, grouping_predicate, grouping=None): return [" MINUS {", f" ?entity {grouping_predicate} [] .", " }"] def negative_query_filter_out_fragment(self, grouping_predicate, grouping=None): return [ " MINUS {", f" {{?entity {grouping_predicate} [] .}} UNION", ] class ItemGrouping(Grouping): def format_grouping_link(self, grouping_link, repo): try: group_item = pywikibot.ItemPage(repo, self.title) group_item.get(get_redirect=True) label = group_item.labels["en"] except ( pywikibot.exceptions.InvalidTitleError, pywikibot.exceptions.NoPageError, KeyError, ): logging.info(f"Could not retrieve label for {self.title}") label = self.title return f"| [[{grouping_link}/{label}|{self.count}]] \n" def format_higher_grouping_text(self, grouping_type): higher_grouping_value = self.higher_grouping type_mapping = { "country": "{{Flag|%s}}" % higher_grouping_value, } if re.match(r"Q\d+", higher_grouping_value): higher_grouping_text = f"{{{{Q|{higher_grouping_value}}}}}" elif re.match( r"http://commons.wikimedia.org/wiki/Special:FilePath/(.*?)$", higher_grouping_value, ): match = re.match( r"http://commons.wikimedia.org/wiki/Special:FilePath/(.*?)$", higher_grouping_value, ) image_name = match.groups()[0] higher_grouping_text = f"[[File:{image_name}|center|100px]]" higher_grouping_value = image_name elif grouping_type in type_mapping: higher_grouping_text = type_mapping.get(grouping_type) else: higher_grouping_text = higher_grouping_value return f'| data-sort-value="{higher_grouping_value}"| {higher_grouping_text}\n' def heading(self): return f"{{{{Q|{self.title}}}}}" def postive_query_filter_out_fragment(self, grouping_predicate, grouping): return [f" ?entity {grouping_predicate} wd:{grouping} ."] def negative_query_filter_out_fragment(self, grouping_predicate, grouping): return self.postive_query_filter_out_fragment(grouping_predicate, grouping) class YearGrouping(Grouping): def heading(self): return f"{self.title}" def postive_query_filter_out_fragment(self, grouping_predicate, grouping): return [ f" ?entity {grouping_predicate} ?date.", " BIND(YEAR(?date) as ?year).", f" FILTER(?year = {grouping}).", ] def negative_query_filter_out_fragment(self, grouping_predicate, grouping): return self.postive_query_filter_out_fragment(grouping_predicate, grouping) class UnknownValueGrouping(Grouping): def get_key(self): return "UNKNOWN_VALUE" def heading(self): return "{{int:wikibase-snakview-variations-somevalue-label}}" def postive_query_filter_out_fragment(self, grouping_predicate, grouping=None): return [ f" ?entity {grouping_predicate} ?grouping.", " FILTER wikibase:isSomeValue(?grouping).", ] def negative_query_filter_out_fragment(self, grouping_predicate, grouping): return self.postive_query_filter_out_fragment(grouping_predicate, grouping) class TotalsGrouping(Grouping): is_linkable = False def heading(self): return "'''Totals''' (all items)" def format_higher_grouping_text(self, grouping_type=None): return "||\n" def row_opener(self): return '|- class="sortbottom"\n' diff --git a/integraality/tests/test_property_statistics.py b/integraality/tests/test_property_statistics.py index a16f63b..4466f13 100644 --- a/integraality/tests/test_property_statistics.py +++ b/integraality/tests/test_property_statistics.py @@ -1,1750 +1,1750 @@ # -*- coding: utf-8 -*- """Unit tests for functions.py.""" import unittest from collections import OrderedDict from unittest.mock import patch import pywikibot from column import (DescriptionColumn, LabelColumn, PropertyColumn, SitelinkColumn) from grouping import ItemGroupingConfiguration, YearGroupingConfiguration from line import ItemGrouping, UnknownValueGrouping, YearGrouping from property_statistics import PropertyStatistics from sparql_utils import QueryException class PropertyStatisticsTest(unittest.TestCase): def setUp(self): self.columns = [ PropertyColumn(property="P21"), PropertyColumn(property="P19"), PropertyColumn(property="P1", qualifier="P2"), PropertyColumn(property="P3", value="Q4", qualifier="P5"), LabelColumn(language="br"), DescriptionColumn(language="xy"), SitelinkColumn(project="brwiki"), ] self.grouping_configuration = ItemGroupingConfiguration("P551") self.stats = PropertyStatistics( columns=self.columns, grouping_configuration=self.grouping_configuration, selector_sparql="wdt:P31 wd:Q41960", property_threshold=10, ) class SparqlQueryTest(unittest.TestCase): def setUp(self): super().setUp() patcher = patch("pywikibot.data.sparql.SparqlQuery", autospec=True) self.mock_sparql_query = patcher.start() self.addCleanup(patcher.stop) def assert_query_called(self, query): self.mock_sparql_query.return_value.select.assert_called_once_with(query) class TestLabelColumn(PropertyStatisticsTest): def setUp(self): super().setUp() self.column = LabelColumn("br") def test_simple(self): result = self.column.make_column_header() expected = '! data-sort-type="number"|{{#language:br}}\n' self.assertEqual(result, expected) def test_get_key(self): result = self.column.get_key() self.assertEqual(result, "Lbr") def test_get_totals_query(self): result = self.column.get_totals_query(self.stats) query = ( "\n" "SELECT (COUNT(*) as ?count) WHERE {\n" " ?entity wdt:P31 wd:Q41960\n" " FILTER(EXISTS {\n" " ?entity rdfs:label ?lang_label.\n" " FILTER((LANG(?lang_label)) = 'br').\n" " })\n" "}\n" ) self.assertEqual(result, query) def test_get_info_query(self): result = self.column.get_info_query(self.stats) query = ( "\n" "SELECT ?grouping (COUNT(DISTINCT ?entity) as ?count) WHERE {\n" " ?entity wdt:P31 wd:Q41960 .\n" " ?entity wdt:P551 ?grouping .\n" " FILTER(EXISTS {\n" " ?entity rdfs:label ?lang_label.\n" " FILTER((LANG(?lang_label)) = 'br').\n" " })\n" "}\n" "GROUP BY ?grouping\n" "HAVING (?count >= 10)\n" "ORDER BY DESC(?count)\n" "LIMIT 1000\n" ) self.assertEqual(result, query) def test_get_info_no_grouping_query(self): result = self.column.get_info_no_grouping_query(self.stats) query = ( "\n" "SELECT (COUNT(*) AS ?count) WHERE {\n" " ?entity wdt:P31 wd:Q41960 .\n" " MINUS { ?entity wdt:P551 _:b28. }\n" " FILTER(EXISTS {\n" " ?entity rdfs:label ?lang_label.\n" " FILTER((LANG(?lang_label)) = 'br').\n" " })\n" "}\n" ) self.assertEqual(result, query) class TestDescriptionColumn(PropertyStatisticsTest): def setUp(self): super().setUp() self.column = DescriptionColumn("br") def test_simple(self): result = self.column.make_column_header() expected = '! data-sort-type="number"|{{#language:br}}\n' self.assertEqual(result, expected) def test_get_key(self): result = self.column.get_key() self.assertEqual(result, "Dbr") def test_get_totals_query(self): result = self.column.get_totals_query(self.stats) query = ( "\n" "SELECT (COUNT(*) as ?count) WHERE {\n" " ?entity wdt:P31 wd:Q41960\n" " FILTER(EXISTS {\n" " ?entity schema:description ?lang_label.\n" " FILTER((LANG(?lang_label)) = 'br').\n" " })\n" "}\n" ) self.assertEqual(result, query) def test_get_info_query(self): result = self.column.get_info_query(self.stats) query = ( "\n" "SELECT ?grouping (COUNT(DISTINCT ?entity) as ?count) WHERE {\n" " ?entity wdt:P31 wd:Q41960 .\n" " ?entity wdt:P551 ?grouping .\n" " FILTER(EXISTS {\n" " ?entity schema:description ?lang_label.\n" " FILTER((LANG(?lang_label)) = 'br').\n" " })\n" "}\n" "GROUP BY ?grouping\n" "HAVING (?count >= 10)\n" "ORDER BY DESC(?count)\n" "LIMIT 1000\n" ) self.assertEqual(result, query) def test_get_info_no_grouping_query(self): result = self.column.get_info_no_grouping_query(self.stats) query = ( "\n" "SELECT (COUNT(*) AS ?count) WHERE {\n" " ?entity wdt:P31 wd:Q41960 .\n" " MINUS { ?entity wdt:P551 _:b28. }\n" " FILTER(EXISTS {\n" " ?entity schema:description ?lang_label.\n" " FILTER((LANG(?lang_label)) = 'br').\n" " })\n" "}\n" ) self.assertEqual(result, query) class MakeStatsForNoGroupTest(SparqlQueryTest, PropertyStatisticsTest): def setUp(self): super().setUp() patcher1 = patch( "property_statistics.PropertyStatistics.get_totals_no_grouping", autospec=True, ) self.mock_get_totals_no_grouping = patcher1.start() self.addCleanup(patcher1.stop) self.mock_get_totals_no_grouping.return_value = 20 self.mock_sparql_query.return_value.select.side_effect = [ [{"count": "2"}], [{"count": "10"}], [{"count": "15"}], [{"count": "5"}], [{"count": "4"}], [{"count": "8"}], [{"count": "4"}], ] def test_make_stats_for_no_group(self): result = self.stats.make_stats_for_no_group() expected = ( "|-\n" "| No grouping\n" "| 20 \n" "| {{Integraality cell|10.0|2|column=P21|grouping=None}}\n" "| {{Integraality cell|50.0|10|column=P19|grouping=None}}\n" "| {{Integraality cell|75.0|15|column=P1/P2|grouping=None}}\n" "| {{Integraality cell|25.0|5|column=P3/Q4/P5|grouping=None}}\n" "| {{Integraality cell|20.0|4|column=Lbr|grouping=None}}\n" "| {{Integraality cell|40.0|8|column=Dxy|grouping=None}}\n" "| {{Integraality cell|20.0|4|column=brwiki|grouping=None}}\n" ) self.assertEqual(result, expected) self.mock_get_totals_no_grouping.assert_called_once_with(self.stats) self.assertEqual(self.mock_sparql_query.call_count, 7) def test_make_stats_for_no_group_with_higher_grouping(self): self.stats.grouping_configuration.higher_grouping = "wdt:P17/wdt:P298" result = self.stats.make_stats_for_no_group() expected = ( "|-\n" "|\n" "| No grouping\n" "| 20 \n" "| {{Integraality cell|10.0|2|column=P21|grouping=None}}\n" "| {{Integraality cell|50.0|10|column=P19|grouping=None}}\n" "| {{Integraality cell|75.0|15|column=P1/P2|grouping=None}}\n" "| {{Integraality cell|25.0|5|column=P3/Q4/P5|grouping=None}}\n" "| {{Integraality cell|20.0|4|column=Lbr|grouping=None}}\n" "| {{Integraality cell|40.0|8|column=Dxy|grouping=None}}\n" "| {{Integraality cell|20.0|4|column=brwiki|grouping=None}}\n" ) self.assertEqual(result, expected) self.mock_get_totals_no_grouping.assert_called_once_with(self.stats) self.assertEqual(self.mock_sparql_query.call_count, 7) def test_make_stats_for_no_group_with_grouping_link(self): self.stats.grouping_link = "Foo" result = self.stats.make_stats_for_no_group() expected = ( "|-\n" "| No grouping\n" "| 20 \n" "| {{Integraality cell|10.0|2|column=P21|grouping=None}}\n" "| {{Integraality cell|50.0|10|column=P19|grouping=None}}\n" "| {{Integraality cell|75.0|15|column=P1/P2|grouping=None}}\n" "| {{Integraality cell|25.0|5|column=P3/Q4/P5|grouping=None}}\n" "| {{Integraality cell|20.0|4|column=Lbr|grouping=None}}\n" "| {{Integraality cell|40.0|8|column=Dxy|grouping=None}}\n" "| {{Integraality cell|20.0|4|column=brwiki|grouping=None}}\n" ) self.assertEqual(result, expected) self.mock_get_totals_no_grouping.assert_called_once_with(self.stats) self.assertEqual(self.mock_sparql_query.call_count, 7) class MakeStatsForOneGroupingTest(PropertyStatisticsTest): def setUp(self): super().setUp() # self.stats.column_data = { # 'P21': OrderedDict([ # ('Q3115846', 10), ('Q5087901', 6), # ('UNKNOWN_VALUE', 4) # ]), # 'P19': OrderedDict([('Q3115846', 8), ('Q2166574', 5)]), - # 'P1P2': OrderedDict([('Q3115846', 2), ('Q2166574', 9)]), - # 'P3Q4P5': OrderedDict([('Q3115846', 7), ('Q2166574', 1)]), + # 'P1/P2': OrderedDict([('Q3115846', 2), ('Q2166574', 9)]), + # 'P3/Q4/P5': OrderedDict([('Q3115846', 7), ('Q2166574', 1)]), # 'Lbr': OrderedDict([('Q3115846', 1), ('Q2166574', 2)]), # 'Dxy': OrderedDict([('Q3115846', 2), ('Q2166574', 1)]), # } def test_format_stats_for_one_grouping(self): grouping = ItemGrouping(title="Q3115846", count=10) grouping.cells = OrderedDict( [ ("P21", 10), ("P19", 8), - ("P1P2", 2), - ("P3Q4P5", 7), + ("P1/P2", 2), + ("P3/Q4/P5", 7), ("Lbr", 1), ("Dxy", 2), ("brwiki", 1), ] ) result = self.stats.format_stats_for_one_grouping(grouping) expected = ( "|-\n" "| {{Q|Q3115846}}\n" "| 10 \n" "| {{Integraality cell|100.0|10|column=P21|grouping=Q3115846}}\n" "| {{Integraality cell|80.0|8|column=P19|grouping=Q3115846}}\n" "| {{Integraality cell|20.0|2|column=P1/P2|grouping=Q3115846}}\n" "| {{Integraality cell|70.0|7|column=P3/Q4/P5|grouping=Q3115846}}\n" "| {{Integraality cell|10.0|1|column=Lbr|grouping=Q3115846}}\n" "| {{Integraality cell|20.0|2|column=Dxy|grouping=Q3115846}}\n" "| {{Integraality cell|10.0|1|column=brwiki|grouping=Q3115846}}\n" ) self.assertEqual(result, expected) def test_make_stats_for_unknown_grouping(self): grouping = UnknownValueGrouping(title="UNKNOWN_VALUE", count=10) grouping.cells = OrderedDict( [ ("P21", 4), ] ) result = self.stats.format_stats_for_one_grouping(grouping) expected = ( "|-\n" "| {{int:wikibase-snakview-variations-somevalue-label}}\n" "| 10 \n" "| {{Integraality cell|40.0|4|column=P21|grouping=UNKNOWN_VALUE}}\n" "| {{Integraality cell|0|0|column=P19|grouping=UNKNOWN_VALUE}}\n" "| {{Integraality cell|0|0|column=P1/P2|grouping=UNKNOWN_VALUE}}\n" "| {{Integraality cell|0|0|column=P3/Q4/P5|grouping=UNKNOWN_VALUE}}\n" "| {{Integraality cell|0|0|column=Lbr|grouping=UNKNOWN_VALUE}}\n" "| {{Integraality cell|0|0|column=Dxy|grouping=UNKNOWN_VALUE}}\n" "| {{Integraality cell|0|0|column=brwiki|grouping=UNKNOWN_VALUE}}\n" ) self.assertEqual(result, expected) def test_make_stats_for_unknown_grouping_with_grouping_link(self): self.stats.grouping_link = "Foo" grouping = UnknownValueGrouping(title="UNKNOWN_VALUE", count=10) grouping.cells = OrderedDict( [ ("P21", 4), ] ) result = self.stats.format_stats_for_one_grouping(grouping) expected = ( "|-\n" "| {{int:wikibase-snakview-variations-somevalue-label}}\n" "| [[Foo/UNKNOWN_VALUE|10]] \n" "| {{Integraality cell|40.0|4|column=P21|grouping=UNKNOWN_VALUE}}\n" "| {{Integraality cell|0|0|column=P19|grouping=UNKNOWN_VALUE}}\n" "| {{Integraality cell|0|0|column=P1/P2|grouping=UNKNOWN_VALUE}}\n" "| {{Integraality cell|0|0|column=P3/Q4/P5|grouping=UNKNOWN_VALUE}}\n" "| {{Integraality cell|0|0|column=Lbr|grouping=UNKNOWN_VALUE}}\n" "| {{Integraality cell|0|0|column=Dxy|grouping=UNKNOWN_VALUE}}\n" "| {{Integraality cell|0|0|column=brwiki|grouping=UNKNOWN_VALUE}}\n" ) self.assertEqual(result, expected) def test_format_stats_for_one_grouping_with_higher_grouping(self): self.stats.higher_grouping = "wdt:P17/wdt:P298" self.stats.grouping_configuration = ItemGroupingConfiguration("P551", "XYZ") grouping = ItemGrouping(title="Q3115846", count=10, higher_grouping="Q1") grouping.cells = OrderedDict( [ ("P21", 10), ("P19", 8), - ("P1P2", 2), - ("P3Q4P5", 7), + ("P1/P2", 2), + ("P3/Q4/P5", 7), ("Lbr", 1), ("Dxy", 2), ("brwiki", 1), ] ) result = self.stats.format_stats_for_one_grouping(grouping) expected = ( "|-\n" '| data-sort-value="Q1"| {{Q|Q1}}\n' "| {{Q|Q3115846}}\n" "| 10 \n" "| {{Integraality cell|100.0|10|column=P21|grouping=Q3115846}}\n" "| {{Integraality cell|80.0|8|column=P19|grouping=Q3115846}}\n" "| {{Integraality cell|20.0|2|column=P1/P2|grouping=Q3115846}}\n" "| {{Integraality cell|70.0|7|column=P3/Q4/P5|grouping=Q3115846}}\n" "| {{Integraality cell|10.0|1|column=Lbr|grouping=Q3115846}}\n" "| {{Integraality cell|20.0|2|column=Dxy|grouping=Q3115846}}\n" "| {{Integraality cell|10.0|1|column=brwiki|grouping=Q3115846}}\n" ) self.assertEqual(result, expected) @patch("pywikibot.ItemPage", autospec=True) def test_format_stats_for_one_grouping_with_grouping_link(self, mock_item_page): mock_item_page.return_value.labels = {"en": "Bar"} self.stats.grouping_link = "Foo" grouping = ItemGrouping(title="Q3115846", count=10) grouping.cells = OrderedDict( [ ("P21", 10), ("P19", 8), - ("P1P2", 2), - ("P3Q4P5", 7), + ("P1/P2", 2), + ("P3/Q4/P5", 7), ("Lbr", 1), ("Dxy", 2), ("brwiki", 1), ] ) result = self.stats.format_stats_for_one_grouping(grouping) expected = ( "|-\n" "| {{Q|Q3115846}}\n" "| [[Foo/Bar|10]] \n" "| {{Integraality cell|100.0|10|column=P21|grouping=Q3115846}}\n" "| {{Integraality cell|80.0|8|column=P19|grouping=Q3115846}}\n" "| {{Integraality cell|20.0|2|column=P1/P2|grouping=Q3115846}}\n" "| {{Integraality cell|70.0|7|column=P3/Q4/P5|grouping=Q3115846}}\n" "| {{Integraality cell|10.0|1|column=Lbr|grouping=Q3115846}}\n" "| {{Integraality cell|20.0|2|column=Dxy|grouping=Q3115846}}\n" "| {{Integraality cell|10.0|1|column=brwiki|grouping=Q3115846}}\n" ) self.assertEqual(result, expected) @patch("pywikibot.ItemPage", autospec=True) def test_format_stats_for_one_grouping_with_grouping_link_failure( self, mock_item_page ): mock_item_page.side_effect = pywikibot.exceptions.InvalidTitleError("Error") self.stats.grouping_link = "Foo" grouping = ItemGrouping(title="Q3115846", count=10) grouping.cells = OrderedDict( [ ("P21", 10), ("P19", 8), - ("P1P2", 2), - ("P3Q4P5", 7), + ("P1/P2", 2), + ("P3/Q4/P5", 7), ("Lbr", 1), ("Dxy", 2), ("brwiki", 1), ] ) with self.assertLogs(level="INFO") as cm: result = self.stats.format_stats_for_one_grouping(grouping) expected = ( "|-\n" "| {{Q|Q3115846}}\n" "| [[Foo/Q3115846|10]] \n" "| {{Integraality cell|100.0|10|column=P21|grouping=Q3115846}}\n" "| {{Integraality cell|80.0|8|column=P19|grouping=Q3115846}}\n" "| {{Integraality cell|20.0|2|column=P1/P2|grouping=Q3115846}}\n" "| {{Integraality cell|70.0|7|column=P3/Q4/P5|grouping=Q3115846}}\n" "| {{Integraality cell|10.0|1|column=Lbr|grouping=Q3115846}}\n" "| {{Integraality cell|20.0|2|column=Dxy|grouping=Q3115846}}\n" "| {{Integraality cell|10.0|1|column=brwiki|grouping=Q3115846}}\n" ) self.assertEqual(result, expected) self.assertEqual(cm.output, ["INFO:root:Could not retrieve label for Q3115846"]) def test_make_stats_for_year_grouping(self): grouping = YearGrouping(title="2001", count=10) grouping.cells = OrderedDict( [ ("P21", 4), ] ) result = self.stats.format_stats_for_one_grouping(grouping) expected = ( "|-\n" "| 2001\n" "| 10 \n" "| {{Integraality cell|40.0|4|column=P21|grouping=2001}}\n" "| {{Integraality cell|0|0|column=P19|grouping=2001}}\n" "| {{Integraality cell|0|0|column=P1/P2|grouping=2001}}\n" "| {{Integraality cell|0|0|column=P3/Q4/P5|grouping=2001}}\n" "| {{Integraality cell|0|0|column=Lbr|grouping=2001}}\n" "| {{Integraality cell|0|0|column=Dxy|grouping=2001}}\n" "| {{Integraality cell|0|0|column=brwiki|grouping=2001}}\n" ) self.assertEqual(result, expected) def test_make_stats_for_year_grouping_with_grouping_link(self): self.stats.grouping_link = "Foo" grouping = YearGrouping(title="2001", count=10) grouping.cells = OrderedDict( [ ("P21", 4), ] ) result = self.stats.format_stats_for_one_grouping(grouping) expected = ( "|-\n" "| 2001\n" "| [[Foo/2001|10]] \n" "| {{Integraality cell|40.0|4|column=P21|grouping=2001}}\n" "| {{Integraality cell|0|0|column=P19|grouping=2001}}\n" "| {{Integraality cell|0|0|column=P1/P2|grouping=2001}}\n" "| {{Integraality cell|0|0|column=P3/Q4/P5|grouping=2001}}\n" "| {{Integraality cell|0|0|column=Lbr|grouping=2001}}\n" "| {{Integraality cell|0|0|column=Dxy|grouping=2001}}\n" "| {{Integraality cell|0|0|column=brwiki|grouping=2001}}\n" ) self.assertEqual(result, expected) class GetQueryForItemsForPropertyPositive(PropertyStatisticsTest): def test_get_query_for_items_for_property_positive(self): result = self.stats.get_query_for_items_for_property_positive( self.stats.columns.get("P21"), "Q3115846" ) expected = """ SELECT DISTINCT ?entity ?entityLabel ?value ?valueLabel WHERE { ?entity wdt:P31 wd:Q41960 . ?entity wdt:P551 wd:Q3115846 . ?entity p:P21 ?prop . OPTIONAL { ?prop ps:P21 ?value } SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } } """ self.assertEqual(result, expected) def test_get_query_for_items_for_property_positive_no_grouping(self): result = self.stats.get_query_for_items_for_property_positive( self.stats.columns.get("P21"), self.stats.GROUP_MAPPING.NO_GROUPING ) expected = """ SELECT DISTINCT ?entity ?entityLabel ?value ?valueLabel WHERE { ?entity wdt:P31 wd:Q41960 . MINUS { ?entity wdt:P551 [] . } ?entity p:P21 ?prop . OPTIONAL { ?prop ps:P21 ?value } SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } } """ self.assertEqual(result, expected) def test_get_query_for_items_for_property_positive_totals(self): result = self.stats.get_query_for_items_for_property_positive( self.stats.columns.get("P21"), self.stats.GROUP_MAPPING.TOTALS ) expected = """ SELECT DISTINCT ?entity ?entityLabel ?value ?valueLabel WHERE { ?entity wdt:P31 wd:Q41960 . ?entity p:P21 ?prop . OPTIONAL { ?prop ps:P21 ?value } SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } } """ self.assertEqual(result, expected) def test_get_query_for_items_for_property_positive_label(self): result = self.stats.get_query_for_items_for_property_positive( self.stats.columns.get("Lbr"), "Q3115846" ) expected = """ SELECT DISTINCT ?entity ?entityLabel ?value ?valueLabel WHERE { ?entity wdt:P31 wd:Q41960 . ?entity wdt:P551 wd:Q3115846 . FILTER(EXISTS { ?entity rdfs:label ?lang_label. FILTER((LANG(?lang_label)) = "br"). }) SERVICE wikibase:label { bd:serviceParam wikibase:language "br". } } """ self.assertEqual(result, expected) def test_get_query_for_items_for_property_positive_unknown_value_grouping(self): result = self.stats.get_query_for_items_for_property_positive( self.stats.columns.get("P21"), self.stats.GROUP_MAPPING.UNKNOWN_VALUE ) expected = """ SELECT DISTINCT ?entity ?entityLabel ?value ?valueLabel WHERE { ?entity wdt:P31 wd:Q41960 . ?entity wdt:P551 ?grouping. FILTER wikibase:isSomeValue(?grouping). ?entity p:P21 ?prop . OPTIONAL { ?prop ps:P21 ?value } SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } } """ self.assertEqual(result, expected) def test_get_query_for_items_for_property_positive_year_grouping(self): stats = PropertyStatistics( columns=self.columns, grouping_configuration=YearGroupingConfiguration("P577"), selector_sparql="wdt:P31 wd:Q41960", grouping_type="year", property_threshold=10, ) result = stats.get_query_for_items_for_property_positive( self.stats.columns.get("P21"), 2006 ) expected = """ SELECT DISTINCT ?entity ?entityLabel ?value ?valueLabel WHERE { ?entity wdt:P31 wd:Q41960 . ?entity wdt:P577 ?date. BIND(YEAR(?date) as ?year). FILTER(?year = 2006). ?entity p:P21 ?prop . OPTIONAL { ?prop ps:P21 ?value } SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } } """ self.assertEqual(result, expected) def test_get_query_for_items_for_property_positive_sitelink(self): result = self.stats.get_query_for_items_for_property_positive( self.stats.columns.get("brwiki"), "Q3115846" ) expected = """ SELECT DISTINCT ?entity ?entityLabel ?value ?valueLabel WHERE { ?entity wdt:P31 wd:Q41960 . ?entity wdt:P551 wd:Q3115846 . ?sitelink schema:about ?entity; schema:isPartOf ; schema:name ?value. SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } } """ self.assertEqual(result, expected) class GetQueryForItemsForPropertyNegative(PropertyStatisticsTest): def test_get_query_for_items_for_property_negative(self): result = self.stats.get_query_for_items_for_property_negative( self.stats.columns.get("P21"), "Q3115846" ) expected = """ SELECT DISTINCT ?entity ?entityLabel WHERE { ?entity wdt:P31 wd:Q41960 . ?entity wdt:P551 wd:Q3115846 . MINUS { {?entity a wdno:P21 .} UNION {?entity wdt:P21 ?prop .} } SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } } """ self.assertEqual(result, expected) def test_get_query_for_items_for_property_negative_no_grouping(self): result = self.stats.get_query_for_items_for_property_negative( self.stats.columns.get("P21"), self.stats.GROUP_MAPPING.NO_GROUPING ) expected = """ SELECT DISTINCT ?entity ?entityLabel WHERE { ?entity wdt:P31 wd:Q41960 . MINUS { {?entity wdt:P551 [] .} UNION {?entity a wdno:P21 .} UNION {?entity wdt:P21 ?prop .} } SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } } """ self.assertEqual(result, expected) def test_get_query_for_items_for_property_negative_totals(self): result = self.stats.get_query_for_items_for_property_negative( self.stats.columns.get("P21"), self.stats.GROUP_MAPPING.TOTALS ) expected = """ SELECT DISTINCT ?entity ?entityLabel WHERE { ?entity wdt:P31 wd:Q41960 . MINUS { {?entity a wdno:P21 .} UNION {?entity wdt:P21 ?prop .} } SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } } """ self.assertEqual(result, expected) def test_get_query_for_items_for_property_negative_label(self): result = self.stats.get_query_for_items_for_property_negative( self.stats.columns.get("Lbr"), "Q3115846" ) expected = """ SELECT DISTINCT ?entity ?entityLabel WHERE { ?entity wdt:P31 wd:Q41960 . ?entity wdt:P551 wd:Q3115846 . MINUS { { ?entity rdfs:label ?lang_label. FILTER((LANG(?lang_label)) = "br") } } SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } } """ self.assertEqual(result, expected) def test_get_query_for_items_for_property_negative_unknown_value_grouping(self): result = self.stats.get_query_for_items_for_property_negative( self.stats.columns.get("P21"), self.stats.GROUP_MAPPING.UNKNOWN_VALUE ) expected = """ SELECT DISTINCT ?entity ?entityLabel WHERE { ?entity wdt:P31 wd:Q41960 . ?entity wdt:P551 ?grouping. FILTER wikibase:isSomeValue(?grouping). MINUS { {?entity a wdno:P21 .} UNION {?entity wdt:P21 ?prop .} } SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } } """ self.assertEqual(result, expected) def test_get_query_for_items_for_property_negative_year_grouping(self): stats = PropertyStatistics( columns=self.columns, grouping_configuration=YearGroupingConfiguration("P577"), selector_sparql="wdt:P31 wd:Q41960", grouping_type="year", property_threshold=10, ) result = stats.get_query_for_items_for_property_negative( self.stats.columns.get("P21"), 2006 ) expected = """ SELECT DISTINCT ?entity ?entityLabel WHERE { ?entity wdt:P31 wd:Q41960 . ?entity wdt:P577 ?date. BIND(YEAR(?date) as ?year). FILTER(?year = 2006). MINUS { {?entity a wdno:P21 .} UNION {?entity wdt:P21 ?prop .} } SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } } """ self.assertEqual(result, expected) def test_get_query_for_items_for_property_negative_sitelink(self): result = self.stats.get_query_for_items_for_property_negative( self.stats.columns.get("brwiki"), "Q3115846" ) expected = """ SELECT DISTINCT ?entity ?entityLabel WHERE { ?entity wdt:P31 wd:Q41960 . ?entity wdt:P551 wd:Q3115846 . MINUS { ?sitelink schema:about ?entity; schema:isPartOf . } SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } } """ self.assertEqual(result, expected) class GetCountFromSparqlTest(SparqlQueryTest, PropertyStatisticsTest): def test_return_count(self): self.mock_sparql_query.return_value.select.return_value = [{"count": "18"}] result = self.stats._get_count_from_sparql("SELECT X") self.assert_query_called("SELECT X") self.assertEqual(result, 18) def test_return_None(self): self.mock_sparql_query.return_value.select.return_value = None with self.assertRaises(QueryException): self.stats._get_count_from_sparql("SELECT X") self.assert_query_called("SELECT X") class GetGroupingCountsFromSparqlTest(SparqlQueryTest, PropertyStatisticsTest): def test_return_count(self): self.mock_sparql_query.return_value.select.return_value = [ {"grouping": "http://www.wikidata.org/entity/Q1", "count": 10}, {"grouping": "http://www.wikidata.org/entity/Q2", "count": 5}, ] result = self.stats._get_grouping_counts_from_sparql("SELECT X") self.assert_query_called("SELECT X") expected = OrderedDict([("Q1", 10), ("Q2", 5)]) self.assertEqual(result, expected) def test_return_None(self): self.mock_sparql_query.return_value.select.return_value = None result = self.stats._get_grouping_counts_from_sparql("SELECT X") self.assert_query_called("SELECT X") self.assertEqual(result, None) def test_return_count_with_unknown(self): self.mock_sparql_query.return_value.select.return_value = [ {"grouping": "http://www.wikidata.org/entity/Q1", "count": 10}, {"grouping": "http://www.wikidata.org/entity/Q2", "count": 5}, { "grouping": "http://www.wikidata.org/.well-known/genid/6ab4c2d7cb4ac72721335af5b8ba09c7", "count": 2, }, { "grouping": "http://www.wikidata.org/.well-known/genid/1469448a291c6fbe5df8306cb52ef18b", "count": 1, }, ] result = self.stats._get_grouping_counts_from_sparql("SELECT X") self.assert_query_called("SELECT X") expected = OrderedDict([("Q1", 10), ("Q2", 5), ("UNKNOWN_VALUE", 3)]) self.assertEqual(result, expected) class SparqlCountTest(SparqlQueryTest, PropertyStatisticsTest): def setUp(self): super().setUp() self.mock_sparql_query.return_value.select.return_value = [{"count": "18"}] def test_get_totals_no_grouping(self): result = self.stats.get_totals_no_grouping() query = ( "\n" "SELECT (COUNT(*) as ?count) WHERE {\n" " ?entity wdt:P31 wd:Q41960\n" " MINUS { ?entity wdt:P551 _:b28. }\n" "}\n" ) self.assert_query_called(query) self.assertEqual(result, 18) def test_get_totals(self): result = self.stats.get_totals() query = ( "\n" "SELECT (COUNT(*) as ?count) WHERE {\n" " ?entity wdt:P31 wd:Q41960\n" "}\n" ) self.assert_query_called(query) self.assertEqual(result, 18) class GetGroupingInformationTest(SparqlQueryTest, PropertyStatisticsTest): def test_get_grouping_information(self): self.mock_sparql_query.return_value.select.return_value = [ {"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "10"}, {"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "6"}, {"grouping": "http://www.wikidata.org/entity/Q623333", "count": "6"}, ] expected = { "Q3115846": ItemGrouping(title="Q3115846", count=10), "Q5087901": ItemGrouping(title="Q5087901", count=6), "Q623333": ItemGrouping(title="Q623333", count=6), } query = ( "\n" "SELECT ?grouping (COUNT(DISTINCT ?entity) as ?count) WHERE {\n" " ?entity wdt:P31 wd:Q41960 .\n" " ?entity wdt:P551 ?grouping .\n" "} GROUP BY ?grouping\n" "HAVING (?count >= 20)\n" "ORDER BY DESC(?count)\n" "LIMIT 1000\n" ) result = self.stats.get_grouping_information() self.assert_query_called(query) self.assertEqual(result, expected) def test_get_grouping_information_with_grouping_threshold(self): self.mock_sparql_query.return_value.select.return_value = [ {"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "10"}, {"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "6"}, {"grouping": "http://www.wikidata.org/entity/Q623333", "count": "6"}, ] expected = { "Q3115846": ItemGrouping(title="Q3115846", count=10), "Q5087901": ItemGrouping(title="Q5087901", count=6), "Q623333": ItemGrouping(title="Q623333", count=6), } self.stats.grouping_configuration.grouping_threshold = 5 query = ( "\n" "SELECT ?grouping (COUNT(DISTINCT ?entity) as ?count) WHERE {\n" " ?entity wdt:P31 wd:Q41960 .\n" " ?entity wdt:P551 ?grouping .\n" "} GROUP BY ?grouping\n" "HAVING (?count >= 5)\n" "ORDER BY DESC(?count)\n" "LIMIT 1000\n" ) result = self.stats.get_grouping_information() self.assert_query_called(query) self.assertEqual(result, expected) def test_get_grouping_information_with_higher_grouping(self): self.mock_sparql_query.return_value.select.return_value = [ { "grouping": "http://www.wikidata.org/entity/Q3115846", "higher_grouping": "NZL", "count": "10", }, { "grouping": "http://www.wikidata.org/entity/Q5087901", "higher_grouping": "USA", "count": "6", }, { "grouping": "http://www.wikidata.org/entity/Q623333", "higher_grouping": "USA", "count": "6", }, ] expected = { "Q3115846": ItemGrouping(title="Q3115846", count=10, higher_grouping="NZL"), "Q5087901": ItemGrouping(title="Q5087901", count=6, higher_grouping="USA"), "Q623333": ItemGrouping(title="Q623333", count=6, higher_grouping="USA"), } self.stats.grouping_configuration.higher_grouping = "wdt:P17/wdt:P298" query = ( "\n" "SELECT ?grouping (SAMPLE(?_higher_grouping) as ?higher_grouping) " "(COUNT(DISTINCT ?entity) as ?count) WHERE {\n" " ?entity wdt:P31 wd:Q41960 .\n" " ?entity wdt:P551 ?grouping .\n" " OPTIONAL { ?grouping wdt:P17/wdt:P298 ?_higher_grouping }.\n" "} GROUP BY ?grouping ?higher_grouping\n" "HAVING (?count >= 20)\n" "ORDER BY DESC(?count)\n" "LIMIT 1000\n" ) result = self.stats.get_grouping_information() self.assert_query_called(query) self.assertEqual(result, expected) def test_get_grouping_information_empty_result(self): self.mock_sparql_query.return_value.select.return_value = None query = ( "\n" "SELECT ?grouping (COUNT(DISTINCT ?entity) as ?count) WHERE {\n" " ?entity wdt:P31 wd:Q41960 .\n" " ?entity wdt:P551 ?grouping .\n" "} GROUP BY ?grouping\n" "HAVING (?count >= 20)\n" "ORDER BY DESC(?count)\n" "LIMIT 1000\n" ) with self.assertRaises(QueryException): self.stats.get_grouping_information() self.assert_query_called(query) def test_get_grouping_information_timeout(self): self.mock_sparql_query.return_value.select.side_effect = ( pywikibot.exceptions.TimeoutError("Error") ) query = ( "\n" "SELECT ?grouping (COUNT(DISTINCT ?entity) as ?count) WHERE {\n" " ?entity wdt:P31 wd:Q41960 .\n" " ?entity wdt:P551 ?grouping .\n" "} GROUP BY ?grouping\n" "HAVING (?count >= 20)\n" "ORDER BY DESC(?count)\n" "LIMIT 1000\n" ) with self.assertRaises(QueryException): self.stats.get_grouping_information() self.assert_query_called(query) def test_get_grouping_information_unknown_value(self): self.mock_sparql_query.return_value.select.return_value = [ {"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "10"}, {"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "6"}, { "grouping": "http://www.wikidata.org/.well-known/genid/6ab4c2d7cb4ac72721335af5b8ba09c7", "count": "2", }, { "grouping": "http://www.wikidata.org/.well-known/genid/1469448a291c6fbe5df8306cb52ef18b", "count": "1", }, ] expected = { "Q3115846": ItemGrouping(title="Q3115846", count=10), "Q5087901": ItemGrouping(title="Q5087901", count=6), "UNKNOWN_VALUE": UnknownValueGrouping(count=3), } query = ( "\n" "SELECT ?grouping (COUNT(DISTINCT ?entity) as ?count) WHERE {\n" " ?entity wdt:P31 wd:Q41960 .\n" " ?entity wdt:P551 ?grouping .\n" "} GROUP BY ?grouping\n" "HAVING (?count >= 20)\n" "ORDER BY DESC(?count)\n" "LIMIT 1000\n" ) result = self.stats.get_grouping_information() self.assert_query_called(query) self.assertEqual(result, expected) def test_get_grouping_information_year(self): stats = PropertyStatistics( columns=self.columns, grouping_configuration=YearGroupingConfiguration("P577"), selector_sparql="wdt:P31 wd:Q41960", grouping_type="year", property_threshold=10, ) self.mock_sparql_query.return_value.select.return_value = [ {"grouping": "2001", "count": "10"}, {"grouping": "2002", "count": "6"}, ] expected = { "2001": YearGrouping(title="2001", count=10), "2002": YearGrouping(title="2002", count=6), } query = ( "\n" "SELECT ?grouping (COUNT(DISTINCT ?entity) as ?count) WHERE {\n" " ?entity wdt:P31 wd:Q41960 .\n" " ?entity wdt:P577 ?date .\n" " BIND(YEAR(?date) as ?grouping) .\n" "} GROUP BY ?grouping\n" "HAVING (?count >= 20)\n" "ORDER BY DESC(?count)\n" "LIMIT 1000\n" ) result = stats.get_grouping_information() self.assert_query_called(query) self.assertEqual(result, expected) def test_get_grouping_information_year_unknown_value(self): stats = PropertyStatistics( columns=self.columns, grouping_configuration=YearGroupingConfiguration("P577"), selector_sparql="wdt:P31 wd:Q41960", grouping_type="year", property_threshold=10, ) self.mock_sparql_query.return_value.select.return_value = [ {"grouping": "2001", "count": "10"}, {"grouping": "2002", "count": "6"}, {"grouping": "", "count": "4"}, ] expected = { "2001": YearGrouping(title="2001", count=10), "2002": YearGrouping(title="2002", count=6), "UNKNOWN_VALUE": UnknownValueGrouping(count=4), } query = ( "\n" "SELECT ?grouping (COUNT(DISTINCT ?entity) as ?count) WHERE {\n" " ?entity wdt:P31 wd:Q41960 .\n" " ?entity wdt:P577 ?date .\n" " BIND(YEAR(?date) as ?grouping) .\n" "} GROUP BY ?grouping\n" "HAVING (?count >= 20)\n" "ORDER BY DESC(?count)\n" "LIMIT 1000\n" ) result = stats.get_grouping_information() self.assert_query_called(query) self.assertEqual(result, expected) class TestGetHeader(PropertyStatisticsTest): def setUp(self): super().setUp() self.stats.grouping_configuration.grouping_threshold = 7 self.stats.property_threshold = 4 def test_get_header(self): result = self.stats.get_header() expected = ( '{| class="wikitable sortable"\n' '! colspan="2" |Top groupings (Minimum 7 items)\n' '! colspan="7"|Top Properties (used at least 4 times per grouping)\n' "|-\n" "! Name\n" "! Count\n" '! data-sort-type="number"|{{Property|P21}}\n' '! data-sort-type="number"|{{Property|P19}}\n' '! data-sort-type="number"|{{Property|P2}}\n' '! data-sort-type="number"|{{Property|P5}}\n' '! data-sort-type="number"|{{#language:br}}\n' '! data-sort-type="number"|{{#language:xy}}\n' '! data-sort-type="number"|{{Q|Q846871}}\n' ) self.assertEqual(result, expected) def test_get_header_with_higher_grouping(self): self.stats.grouping_configuration.higher_grouping = "wdt:P17/wdt:P298" result = self.stats.get_header() expected = ( '{| class="wikitable sortable"\n' '! colspan="3" |Top groupings (Minimum 7 items)\n' '! colspan="7"|Top Properties (used at least 4 times per grouping)\n' "|-\n" "! \n" "! Name\n" "! Count\n" '! data-sort-type="number"|{{Property|P21}}\n' '! data-sort-type="number"|{{Property|P19}}\n' '! data-sort-type="number"|{{Property|P2}}\n' '! data-sort-type="number"|{{Property|P5}}\n' '! data-sort-type="number"|{{#language:br}}\n' '! data-sort-type="number"|{{#language:xy}}\n' '! data-sort-type="number"|{{Q|Q846871}}\n' ) self.assertEqual(result, expected) class MakeTotalsTest(SparqlQueryTest, PropertyStatisticsTest): def setUp(self): super().setUp() self.mock_sparql_query.return_value.select.side_effect = [ [{"count": "120"}], [{"count": "30"}], [{"count": "80"}], [{"count": "10"}], [{"count": "12"}], [{"count": "24"}], [{"count": "36"}], [{"count": "24"}], ] def test_make_totals(self): result = self.stats.make_totals() expected = ( '|- class="sortbottom"\n' "| '''Totals''' (all items)\n" "| 120 \n" "| {{Integraality cell|25.0|30|column=P21|grouping=}}\n" "| {{Integraality cell|66.67|80|column=P19|grouping=}}\n" "| {{Integraality cell|8.33|10|column=P1/P2|grouping=}}\n" "| {{Integraality cell|10.0|12|column=P3/Q4/P5|grouping=}}\n" "| {{Integraality cell|20.0|24|column=Lbr|grouping=}}\n" "| {{Integraality cell|30.0|36|column=Dxy|grouping=}}\n" "| {{Integraality cell|20.0|24|column=brwiki|grouping=}}\n" ) self.assertEqual(result, expected) def test_make_totals_with_higher_grouping(self): self.stats.grouping_configuration.higher_grouping = "wdt:P17/wdt:P298" result = self.stats.make_totals() expected = ( '|- class="sortbottom"\n' "||\n" "| '''Totals''' (all items)\n" "| 120 \n" "| {{Integraality cell|25.0|30|column=P21|grouping=}}\n" "| {{Integraality cell|66.67|80|column=P19|grouping=}}\n" "| {{Integraality cell|8.33|10|column=P1/P2|grouping=}}\n" "| {{Integraality cell|10.0|12|column=P3/Q4/P5|grouping=}}\n" "| {{Integraality cell|20.0|24|column=Lbr|grouping=}}\n" "| {{Integraality cell|30.0|36|column=Dxy|grouping=}}\n" "| {{Integraality cell|20.0|24|column=brwiki|grouping=}}\n" ) self.assertEqual(result, expected) def test_make_totals_with_grouping_link(self): self.stats.grouping_link = "Foo" result = self.stats.make_totals() expected = ( '|- class="sortbottom"\n' "| '''Totals''' (all items)\n" "| 120 \n" "| {{Integraality cell|25.0|30|column=P21|grouping=}}\n" "| {{Integraality cell|66.67|80|column=P19|grouping=}}\n" "| {{Integraality cell|8.33|10|column=P1/P2|grouping=}}\n" "| {{Integraality cell|10.0|12|column=P3/Q4/P5|grouping=}}\n" "| {{Integraality cell|20.0|24|column=Lbr|grouping=}}\n" "| {{Integraality cell|30.0|36|column=Dxy|grouping=}}\n" "| {{Integraality cell|20.0|24|column=brwiki|grouping=}}\n" ) self.assertEqual(result, expected) class PopulateGroupingsTest(SparqlQueryTest, PropertyStatisticsTest): def test_populate_groupings_empty(self): result = self.stats.populate_groupings(None) self.assertEqual(result, None) def test_populate_groupings_no_columns(self): groupings = { "Q3115846": ItemGrouping(title="Q3115846", count=10), "Q5087901": ItemGrouping(title="Q5087901", count=6), "Q623333": ItemGrouping(title="Q623333", count=6), } result = self.stats.populate_groupings(groupings) self.assertEqual(result, groupings) def test_populate_groupings_with_columns(self): groupings = { "Q3115846": ItemGrouping(title="Q3115846", count=10), "Q5087901": ItemGrouping(title="Q5087901", count=6), "Q623333": ItemGrouping(title="Q623333", count=6), } self.mock_sparql_query.return_value.select.side_effect = [ [ {"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "1"}, {"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "2"}, {"grouping": "http://www.wikidata.org/entity/Q623333", "count": "3"}, {"grouping": "http://www.wikidata.org/entity/Q11953090", "count": "4"}, ], [ {"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "5"}, {"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "6"}, {"grouping": "http://www.wikidata.org/entity/Q623333", "count": "7"}, {"grouping": "http://www.wikidata.org/entity/Q11953090", "count": "8"}, ], [ {"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "9"}, {"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "10"}, {"grouping": "http://www.wikidata.org/entity/Q623333", "count": "11"}, {"grouping": "http://www.wikidata.org/entity/Q11953090", "count": "12"}, ], [ {"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "13"}, {"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "14"}, {"grouping": "http://www.wikidata.org/entity/Q623333", "count": "15"}, {"grouping": "http://www.wikidata.org/entity/Q11953090", "count": "16"}, ], [ {"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "17"}, {"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "18"}, {"grouping": "http://www.wikidata.org/entity/Q623333", "count": "19"}, {"grouping": "http://www.wikidata.org/entity/Q11953090", "count": "20"}, ], [ {"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "21"}, {"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "22"}, {"grouping": "http://www.wikidata.org/entity/Q623333", "count": "23"}, {"grouping": "http://www.wikidata.org/entity/Q11953090", "count": "24"}, ], [ {"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "25"}, {"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "26"}, {"grouping": "http://www.wikidata.org/entity/Q623333", "count": "27"}, {"grouping": "http://www.wikidata.org/entity/Q11953090", "count": "28"}, ], ] result = self.stats.populate_groupings(groupings) expected = { "Q3115846": ItemGrouping( title="Q3115846", count=10, cells=OrderedDict( [ ("P21", 1), ("P19", 5), - ("P1P2", 9), - ("P3Q4P5", 13), + ("P1/P2", 9), + ("P3/Q4/P5", 13), ("Lbr", 17), ("Dxy", 21), ("brwiki", 25), ] ), ), "Q5087901": ItemGrouping( title="Q5087901", count=6, cells=OrderedDict( [ ("P21", 2), ("P19", 6), - ("P1P2", 10), - ("P3Q4P5", 14), + ("P1/P2", 10), + ("P3/Q4/P5", 14), ("Lbr", 18), ("Dxy", 22), ("brwiki", 26), ] ), ), "Q623333": ItemGrouping( title="Q623333", count=6, cells=OrderedDict( [ ("P21", 3), ("P19", 7), - ("P1P2", 11), - ("P3Q4P5", 15), + ("P1/P2", 11), + ("P3/Q4/P5", 15), ("Lbr", 19), ("Dxy", 23), ("brwiki", 27), ] ), ), } self.assertEqual(result, expected) def test_populate_groupings_with_columns_one_empty(self): groupings = { "Q3115846": ItemGrouping(title="Q3115846", count=10), "Q5087901": ItemGrouping(title="Q5087901", count=6), "Q623333": ItemGrouping(title="Q623333", count=6), } self.mock_sparql_query.return_value.select.side_effect = [ [ {"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "1"}, {"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "2"}, {"grouping": "http://www.wikidata.org/entity/Q623333", "count": "3"}, ], None, [ {"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "9"}, {"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "10"}, {"grouping": "http://www.wikidata.org/entity/Q623333", "count": "11"}, ], [ {"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "13"}, {"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "14"}, {"grouping": "http://www.wikidata.org/entity/Q623333", "count": "15"}, ], [ {"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "17"}, {"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "18"}, {"grouping": "http://www.wikidata.org/entity/Q623333", "count": "19"}, ], [ {"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "21"}, {"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "22"}, {"grouping": "http://www.wikidata.org/entity/Q623333", "count": "23"}, ], [ {"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "24"}, {"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "25"}, {"grouping": "http://www.wikidata.org/entity/Q623333", "count": "26"}, ], ] result = self.stats.populate_groupings(groupings) expected = { "Q3115846": ItemGrouping( title="Q3115846", count=10, cells=OrderedDict( [ ("P21", 1), - ("P1P2", 9), - ("P3Q4P5", 13), + ("P1/P2", 9), + ("P3/Q4/P5", 13), ("Lbr", 17), ("Dxy", 21), ("brwiki", 24), ] ), ), "Q5087901": ItemGrouping( title="Q5087901", count=6, cells=OrderedDict( [ ("P21", 2), - ("P1P2", 10), - ("P3Q4P5", 14), + ("P1/P2", 10), + ("P3/Q4/P5", 14), ("Lbr", 18), ("Dxy", 22), ("brwiki", 25), ] ), ), "Q623333": ItemGrouping( title="Q623333", count=6, cells=OrderedDict( [ ("P21", 3), - ("P1P2", 11), - ("P3Q4P5", 15), + ("P1/P2", 11), + ("P3/Q4/P5", 15), ("Lbr", 19), ("Dxy", 23), ("brwiki", 26), ] ), ), } self.assertEqual(result, expected) class RetrieveDataTest(SparqlQueryTest, PropertyStatisticsTest): def test_retrieve_data_empty(self): result = self.stats.retrieve_data() expected = {} self.assertEqual(result, expected) def test_retrieve_data(self): self.mock_sparql_query.return_value.select.return_value = [ {"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "10"}, {"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "6"}, {"grouping": "http://www.wikidata.org/entity/Q623333", "count": "6"}, ] result = self.stats.retrieve_data() print(result) expected = { "Q3115846": ItemGrouping( title="Q3115846", count=10, cells=OrderedDict( [ ("P21", 10), ("P19", 10), - ("P1P2", 10), - ("P3Q4P5", 10), + ("P1/P2", 10), + ("P3/Q4/P5", 10), ("Lbr", 10), ("Dxy", 10), ("brwiki", 10), ] ), ), "Q5087901": ItemGrouping( title="Q5087901", count=6, cells=OrderedDict( [ ("P21", 6), ("P19", 6), - ("P1P2", 6), - ("P3Q4P5", 6), + ("P1/P2", 6), + ("P3/Q4/P5", 6), ("Lbr", 6), ("Dxy", 6), ("brwiki", 6), ] ), ), "Q623333": ItemGrouping( title="Q623333", count=6, cells=OrderedDict( [ ("P21", 6), ("P19", 6), - ("P1P2", 6), - ("P3Q4P5", 6), + ("P1/P2", 6), + ("P3/Q4/P5", 6), ("Lbr", 6), ("Dxy", 6), ("brwiki", 6), ] ), ), } self.assertEqual(result, expected) class ProcessDataTest(SparqlQueryTest, PropertyStatisticsTest): def test_process_data_empty(self): result = self.stats.process_data({}) expected = ( '{| class="wikitable sortable"\n' '! colspan="2" |Top groupings (Minimum 20 items)\n' '! colspan="7"|Top Properties (used at least 10 times per grouping)\n' "|-\n" "! Name\n" "! Count\n" '! data-sort-type="number"|{{Property|P21}}\n' '! data-sort-type="number"|{{Property|P19}}\n' '! data-sort-type="number"|{{Property|P2}}\n' '! data-sort-type="number"|{{Property|P5}}\n' '! data-sort-type="number"|{{#language:br}}\n' '! data-sort-type="number"|{{#language:xy}}\n' '! data-sort-type="number"|{{Q|Q846871}}\n' '|- class="sortbottom"\n' "| '''Totals''' (all items)\n" "| 1 \n" "| {{Integraality cell|100.0|1|column=P21|grouping=}}\n" "| {{Integraality cell|100.0|1|column=P19|grouping=}}\n" "| {{Integraality cell|100.0|1|column=P1/P2|grouping=}}\n" "| {{Integraality cell|100.0|1|column=P3/Q4/P5|grouping=}}\n" "| {{Integraality cell|100.0|1|column=Lbr|grouping=}}\n" "| {{Integraality cell|100.0|1|column=Dxy|grouping=}}\n" "| {{Integraality cell|100.0|1|column=brwiki|grouping=}}\n" "|}\n" ) self.assertEqual(result, expected) def test_process_data(self): grouping_data = { "Q3115846": ItemGrouping( title="Q3115846", count=10, cells=OrderedDict( [ ("P21", 10), ("P19", 8), - ("P1P2", 2), - ("P3Q4P5", 7), + ("P1/P2", 2), + ("P3/Q4/P5", 7), ("Lbr", 1), ("Dxy", 2), ("brwiki", 1), ] ), ), "Q5087901": ItemGrouping( title="Q5087901", count=6, cells=OrderedDict( [ ("P21", 6), ("P19", 0), - ("P1P2", 0), - ("P3Q4P5", 0), + ("P1/P2", 0), + ("P3/Q4/P5", 0), ("Lbr", 0), ("Dxy", 0), ("brwiki", 0), ] ), ), } result = self.stats.process_data(grouping_data) expected = ( '{| class="wikitable sortable"\n' '! colspan="2" |Top groupings (Minimum 20 items)\n' '! colspan="7"|Top Properties (used at least 10 times per grouping)\n' "|-\n" "! Name\n" "! Count\n" '! data-sort-type="number"|{{Property|P21}}\n' '! data-sort-type="number"|{{Property|P19}}\n' '! data-sort-type="number"|{{Property|P2}}\n' '! data-sort-type="number"|{{Property|P5}}\n' '! data-sort-type="number"|{{#language:br}}\n' '! data-sort-type="number"|{{#language:xy}}\n' '! data-sort-type="number"|{{Q|Q846871}}\n' "|-\n" "| {{Q|Q3115846}}\n" "| 10 \n" "| {{Integraality cell|100.0|10|column=P21|grouping=Q3115846}}\n" "| {{Integraality cell|80.0|8|column=P19|grouping=Q3115846}}\n" "| {{Integraality cell|20.0|2|column=P1/P2|grouping=Q3115846}}\n" "| {{Integraality cell|70.0|7|column=P3/Q4/P5|grouping=Q3115846}}\n" "| {{Integraality cell|10.0|1|column=Lbr|grouping=Q3115846}}\n" "| {{Integraality cell|20.0|2|column=Dxy|grouping=Q3115846}}\n" "| {{Integraality cell|10.0|1|column=brwiki|grouping=Q3115846}}\n" "|-\n" "| {{Q|Q5087901}}\n" "| 6 \n" "| {{Integraality cell|100.0|6|column=P21|grouping=Q5087901}}\n" "| {{Integraality cell|0|0|column=P19|grouping=Q5087901}}\n" "| {{Integraality cell|0|0|column=P1/P2|grouping=Q5087901}}\n" "| {{Integraality cell|0|0|column=P3/Q4/P5|grouping=Q5087901}}\n" "| {{Integraality cell|0|0|column=Lbr|grouping=Q5087901}}\n" "| {{Integraality cell|0|0|column=Dxy|grouping=Q5087901}}\n" "| {{Integraality cell|0|0|column=brwiki|grouping=Q5087901}}\n" '|- class="sortbottom"\n' "| '''Totals''' (all items)\n" "| 1 \n" "| {{Integraality cell|100.0|1|column=P21|grouping=}}\n" "| {{Integraality cell|100.0|1|column=P19|grouping=}}\n" "| {{Integraality cell|100.0|1|column=P1/P2|grouping=}}\n" "| {{Integraality cell|100.0|1|column=P3/Q4/P5|grouping=}}\n" "| {{Integraality cell|100.0|1|column=Lbr|grouping=}}\n" "| {{Integraality cell|100.0|1|column=Dxy|grouping=}}\n" "| {{Integraality cell|100.0|1|column=brwiki|grouping=}}\n" "|}\n" ) self.assertEqual(result, expected) def test_process_data_year_grouping(self): grouping_data = { "2001": YearGrouping( title="2001", count=10, cells=OrderedDict( [ ("P21", 10), ("P19", 8), - ("P1P2", 2), - ("P3Q4P5", 7), + ("P1/P2", 2), + ("P3/Q4/P5", 7), ("Lbr", 1), ("Dxy", 2), ("brwiki", 1), ] ), ), "2018": YearGrouping( title="2018", count=6, cells=OrderedDict( [ ("P21", 6), ("P19", 0), - ("P1P2", 0), - ("P3Q4P5", 0), + ("P1/P2", 0), + ("P3/Q4/P5", 0), ("Lbr", 0), ("Dxy", 0), ("brwiki", 0), ] ), ), } result = self.stats.process_data(grouping_data) expected = ( '{| class="wikitable sortable"\n' '! colspan="2" |Top groupings (Minimum 20 items)\n' '! colspan="7"|Top Properties (used at least 10 times per grouping)\n' "|-\n" "! Name\n" "! Count\n" '! data-sort-type="number"|{{Property|P21}}\n' '! data-sort-type="number"|{{Property|P19}}\n' '! data-sort-type="number"|{{Property|P2}}\n' '! data-sort-type="number"|{{Property|P5}}\n' '! data-sort-type="number"|{{#language:br}}\n' '! data-sort-type="number"|{{#language:xy}}\n' '! data-sort-type="number"|{{Q|Q846871}}\n' "|-\n" "| 2001\n" "| 10 \n" "| {{Integraality cell|100.0|10|column=P21|grouping=2001}}\n" "| {{Integraality cell|80.0|8|column=P19|grouping=2001}}\n" "| {{Integraality cell|20.0|2|column=P1/P2|grouping=2001}}\n" "| {{Integraality cell|70.0|7|column=P3/Q4/P5|grouping=2001}}\n" "| {{Integraality cell|10.0|1|column=Lbr|grouping=2001}}\n" "| {{Integraality cell|20.0|2|column=Dxy|grouping=2001}}\n" "| {{Integraality cell|10.0|1|column=brwiki|grouping=2001}}\n" "|-\n" "| 2018\n" "| 6 \n" "| {{Integraality cell|100.0|6|column=P21|grouping=2018}}\n" "| {{Integraality cell|0|0|column=P19|grouping=2018}}\n" "| {{Integraality cell|0|0|column=P1/P2|grouping=2018}}\n" "| {{Integraality cell|0|0|column=P3/Q4/P5|grouping=2018}}\n" "| {{Integraality cell|0|0|column=Lbr|grouping=2018}}\n" "| {{Integraality cell|0|0|column=Dxy|grouping=2018}}\n" "| {{Integraality cell|0|0|column=brwiki|grouping=2018}}\n" '|- class="sortbottom"\n' "| '''Totals''' (all items)\n" "| 1 \n" "| {{Integraality cell|100.0|1|column=P21|grouping=}}\n" "| {{Integraality cell|100.0|1|column=P19|grouping=}}\n" "| {{Integraality cell|100.0|1|column=P1/P2|grouping=}}\n" "| {{Integraality cell|100.0|1|column=P3/Q4/P5|grouping=}}\n" "| {{Integraality cell|100.0|1|column=Lbr|grouping=}}\n" "| {{Integraality cell|100.0|1|column=Dxy|grouping=}}\n" "| {{Integraality cell|100.0|1|column=brwiki|grouping=}}\n" "|}\n" ) self.assertEqual(result, expected) class RetrieveAndProcessDataTest(SparqlQueryTest, PropertyStatisticsTest): def test_retrieve_and_process_data(self): self.mock_sparql_query.return_value.select.return_value = [ {"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "10"}, {"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "6"}, {"grouping": "http://www.wikidata.org/entity/Q623333", "count": "6"}, ] result = self.stats.retrieve_and_process_data() expected = ( '{| class="wikitable sortable"\n' '! colspan="2" |Top groupings (Minimum 20 items)\n' '! colspan="7"|Top Properties (used at least 10 times per grouping)\n' "|-\n" "! Name\n" "! Count\n" '! data-sort-type="number"|{{Property|P21}}\n' '! data-sort-type="number"|{{Property|P19}}\n' '! data-sort-type="number"|{{Property|P2}}\n' '! data-sort-type="number"|{{Property|P5}}\n' '! data-sort-type="number"|{{#language:br}}\n' '! data-sort-type="number"|{{#language:xy}}\n' '! data-sort-type="number"|{{Q|Q846871}}\n' "|-\n" "| {{Q|Q3115846}}\n" "| 10 \n" "| {{Integraality cell|100.0|10|column=P21|grouping=Q3115846}}\n" "| {{Integraality cell|100.0|10|column=P19|grouping=Q3115846}}\n" "| {{Integraality cell|100.0|10|column=P1/P2|grouping=Q3115846}}\n" "| {{Integraality cell|100.0|10|column=P3/Q4/P5|grouping=Q3115846}}\n" "| {{Integraality cell|100.0|10|column=Lbr|grouping=Q3115846}}\n" "| {{Integraality cell|100.0|10|column=Dxy|grouping=Q3115846}}\n" "| {{Integraality cell|100.0|10|column=brwiki|grouping=Q3115846}}\n" "|-\n" "| {{Q|Q5087901}}\n" "| 6 \n" "| {{Integraality cell|100.0|6|column=P21|grouping=Q5087901}}\n" "| {{Integraality cell|100.0|6|column=P19|grouping=Q5087901}}\n" "| {{Integraality cell|100.0|6|column=P1/P2|grouping=Q5087901}}\n" "| {{Integraality cell|100.0|6|column=P3/Q4/P5|grouping=Q5087901}}\n" "| {{Integraality cell|100.0|6|column=Lbr|grouping=Q5087901}}\n" "| {{Integraality cell|100.0|6|column=Dxy|grouping=Q5087901}}\n" "| {{Integraality cell|100.0|6|column=brwiki|grouping=Q5087901}}\n" "|-\n" "| {{Q|Q623333}}\n" "| 6 \n" "| {{Integraality cell|100.0|6|column=P21|grouping=Q623333}}\n" "| {{Integraality cell|100.0|6|column=P19|grouping=Q623333}}\n" "| {{Integraality cell|100.0|6|column=P1/P2|grouping=Q623333}}\n" "| {{Integraality cell|100.0|6|column=P3/Q4/P5|grouping=Q623333}}\n" "| {{Integraality cell|100.0|6|column=Lbr|grouping=Q623333}}\n" "| {{Integraality cell|100.0|6|column=Dxy|grouping=Q623333}}\n" "| {{Integraality cell|100.0|6|column=brwiki|grouping=Q623333}}\n" '|- class="sortbottom"\n' "| '''Totals''' (all items)\n" "| 10 \n" "| {{Integraality cell|100.0|10|column=P21|grouping=}}\n" "| {{Integraality cell|100.0|10|column=P19|grouping=}}\n" "| {{Integraality cell|100.0|10|column=P1/P2|grouping=}}\n" "| {{Integraality cell|100.0|10|column=P3/Q4/P5|grouping=}}\n" "| {{Integraality cell|100.0|10|column=Lbr|grouping=}}\n" "| {{Integraality cell|100.0|10|column=Dxy|grouping=}}\n" "| {{Integraality cell|100.0|10|column=brwiki|grouping=}}\n" "|}\n" ) self.assertEqual(result, expected) def test_retrieve_and_process_data_year_grouping(self): self.grouping_configuration = YearGroupingConfiguration("P551") self.stats = PropertyStatistics( columns=self.columns, grouping_configuration=self.grouping_configuration, selector_sparql="wdt:P31 wd:Q41960", property_threshold=10, ) self.mock_sparql_query.return_value.select.return_value = [ {"grouping": "2001", "count": "10"}, {"grouping": "2012", "count": "6"}, {"grouping": "2023", "count": "6"}, ] result = self.stats.retrieve_and_process_data() expected = ( '{| class="wikitable sortable"\n' '! colspan="2" |Top groupings (Minimum 20 items)\n' '! colspan="7"|Top Properties (used at least 10 times per grouping)\n' "|-\n" "! Name\n" "! Count\n" '! data-sort-type="number"|{{Property|P21}}\n' '! data-sort-type="number"|{{Property|P19}}\n' '! data-sort-type="number"|{{Property|P2}}\n' '! data-sort-type="number"|{{Property|P5}}\n' '! data-sort-type="number"|{{#language:br}}\n' '! data-sort-type="number"|{{#language:xy}}\n' '! data-sort-type="number"|{{Q|Q846871}}\n' "|-\n" "| 2001\n" "| 10 \n" "| {{Integraality cell|100.0|10|column=P21|grouping=2001}}\n" "| {{Integraality cell|100.0|10|column=P19|grouping=2001}}\n" "| {{Integraality cell|100.0|10|column=P1/P2|grouping=2001}}\n" "| {{Integraality cell|100.0|10|column=P3/Q4/P5|grouping=2001}}\n" "| {{Integraality cell|100.0|10|column=Lbr|grouping=2001}}\n" "| {{Integraality cell|100.0|10|column=Dxy|grouping=2001}}\n" "| {{Integraality cell|100.0|10|column=brwiki|grouping=2001}}\n" "|-\n" "| 2012\n" "| 6 \n" "| {{Integraality cell|100.0|6|column=P21|grouping=2012}}\n" "| {{Integraality cell|100.0|6|column=P19|grouping=2012}}\n" "| {{Integraality cell|100.0|6|column=P1/P2|grouping=2012}}\n" "| {{Integraality cell|100.0|6|column=P3/Q4/P5|grouping=2012}}\n" "| {{Integraality cell|100.0|6|column=Lbr|grouping=2012}}\n" "| {{Integraality cell|100.0|6|column=Dxy|grouping=2012}}\n" "| {{Integraality cell|100.0|6|column=brwiki|grouping=2012}}\n" "|-\n" "| 2023\n" "| 6 \n" "| {{Integraality cell|100.0|6|column=P21|grouping=2023}}\n" "| {{Integraality cell|100.0|6|column=P19|grouping=2023}}\n" "| {{Integraality cell|100.0|6|column=P1/P2|grouping=2023}}\n" "| {{Integraality cell|100.0|6|column=P3/Q4/P5|grouping=2023}}\n" "| {{Integraality cell|100.0|6|column=Lbr|grouping=2023}}\n" "| {{Integraality cell|100.0|6|column=Dxy|grouping=2023}}\n" "| {{Integraality cell|100.0|6|column=brwiki|grouping=2023}}\n" '|- class="sortbottom"\n' "| '''Totals''' (all items)\n" "| 10 \n" "| {{Integraality cell|100.0|10|column=P21|grouping=}}\n" "| {{Integraality cell|100.0|10|column=P19|grouping=}}\n" "| {{Integraality cell|100.0|10|column=P1/P2|grouping=}}\n" "| {{Integraality cell|100.0|10|column=P3/Q4/P5|grouping=}}\n" "| {{Integraality cell|100.0|10|column=Lbr|grouping=}}\n" "| {{Integraality cell|100.0|10|column=Dxy|grouping=}}\n" "| {{Integraality cell|100.0|10|column=brwiki|grouping=}}\n" "|}\n" ) self.assertEqual(result, expected)