diff --git a/integraality/column.py b/integraality/column.py
index 0dd7c62..33eb01a 100644
--- a/integraality/column.py
+++ b/integraality/column.py
@@ -1,282 +1,273 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
Column configuration classes
"""
import json
import os
from enum import Enum
class GroupingType(Enum):
YEAR = "year"
class ColumnSyntaxException(Exception):
pass
class ColumnMaker:
@staticmethod
def make(key, title):
current_dir = os.path.dirname(__file__)
wikiprojects_path = os.path.join(current_dir, "wikiprojects.json")
wikiprojects = json.load(open(wikiprojects_path, "r"))
if key.startswith("P"):
splitted = key.split("/")
if len(splitted) == 3:
(property_name, value, qualifier) = splitted
elif len(splitted) == 2:
(property_name, value, qualifier) = (splitted[0], None, splitted[1])
else:
(property_name, value, qualifier) = (key, None, None)
return PropertyColumn(
property=property_name, title=title, qualifier=qualifier, value=value
)
elif key.startswith("L"):
return LabelColumn(language=key[1:])
elif key.startswith("D"):
return DescriptionColumn(language=key[1:])
elif key in wikiprojects:
wikiproject = wikiprojects.get(key)
return SitelinkColumn(project=key, title=title)
else:
raise ColumnSyntaxException("Unknown column syntax %s" % key)
class AbstractColumn:
def get_info_query(self, property_statistics):
"""
Get the usage counts for a column for the groupings
:return: (str) SPARQL query
"""
grouping_selector = "\n".join(property_statistics.grouping_configuration.get_grouping_selector())
query = f"""
SELECT ?grouping (COUNT(DISTINCT ?entity) as ?count) WHERE {{
?entity {property_statistics.selector_sparql} .
{grouping_selector}
FILTER(EXISTS {{{self.get_filter_for_info()}
}})
}}
GROUP BY ?grouping
HAVING (?count >= {property_statistics.property_threshold})
ORDER BY DESC(?count)
LIMIT 1000
"""
return query
def get_totals_query(self, property_statistics):
"""
Get the totals of entities with the column set.
:return: (str) SPARQL query
"""
query = f"""
SELECT (COUNT(*) as ?count) WHERE {{
?entity {property_statistics.selector_sparql}
FILTER(EXISTS {{{self.get_filter_for_info()}
}})
}}
"""
return query
def get_info_no_grouping_query(self, property_statistics):
"""
Get the usage counts for a column without a grouping
:return: (str) SPARQL query
"""
query = f"""
SELECT (COUNT(*) AS ?count) WHERE {{
?entity {property_statistics.selector_sparql} .
MINUS {{ ?entity wdt:{property_statistics.grouping_configuration.property} _:b28. }}
FILTER(EXISTS {{{self.get_filter_for_info()}
}})
}}
"""
return query
class PropertyColumn(AbstractColumn):
def __init__(self, property, title=None, value=None, qualifier=None):
self.property = property
self.title = title
self.value = value
self.qualifier = qualifier
def __eq__(self, other):
return (
self.property == other.property
and self.title == other.title
and self.value == other.value
and self.qualifier == other.qualifier
)
- def get_title(self):
- return "/".join([x for x in [self.property, self.value, self.qualifier] if x])
-
def get_key(self):
- return "".join([x for x in [self.property, self.value, self.qualifier] if x])
+ return "/".join([x for x in [self.property, self.value, self.qualifier] if x])
def get_type_name(self):
return "property"
def format_html_snippet(self):
return f'{self.property}'
def make_column_header(self):
if self.qualifier:
property_link = self.qualifier
else:
property_link = self.property
if self.title:
label = f"[[Property:{property_link}|{self.title}]]"
else:
label = f"{{{{Property|{property_link}}}}}"
return f'! data-sort-type="number"|{label}\n'
def get_filter_for_info(self):
if self.qualifier:
property_value = f"wd:{self.value}" if self.value else "[]"
return f"""
?entity p:{self.property} [ ps:{self.property} {property_value} ; pq:{self.qualifier} [] ]"""
else:
return f"""
?entity p:{self.property}[]"""
def get_filter_for_positive_query(self):
return f"""
?entity p:{self.property} ?prop . OPTIONAL {{ ?prop ps:{self.property} ?value }}
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
"""
def get_filter_for_negative_query(self):
return f"""
{{?entity a wdno:{self.property} .}} UNION
{{?entity wdt:{self.property} ?prop .}}
}}
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
"""
class TextColumn(AbstractColumn):
def __init__(self, language, title=None):
self.language = language
self.title = title
def __eq__(self, other):
return self.language == other.language and self.title == other.title
- def get_title(self):
- return self.get_key()
-
def format_html_snippet(self):
return f"{self.language} {self.get_type_name()}"
def make_column_header(self):
if self.title:
text = f"{self.title}"
else:
text = f"{{{{#language:{self.language}}}}}"
return f'! data-sort-type="number"|{text}\n'
def get_filter_for_info(self):
return f"""
?entity {self.get_selector()} ?lang_label.
FILTER((LANG(?lang_label)) = '{self.language}')."""
def get_filter_for_positive_query(self):
return f"""
FILTER(EXISTS {{
?entity {self.get_selector()} ?lang_label.
FILTER((LANG(?lang_label)) = "{self.language}").
}})
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "{self.language}". }}
"""
def get_filter_for_negative_query(self):
return f"""
{{ ?entity {self.get_selector()} ?lang_label.
FILTER((LANG(?lang_label)) = "{self.language}") }}
}}
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
"""
class LabelColumn(TextColumn):
def get_key(self):
return "L%s" % self.language
def get_selector(self):
return "rdfs:label"
def get_type_name(self):
return "label"
class DescriptionColumn(TextColumn):
def get_key(self):
return "D%s" % self.language
def get_selector(self):
return "schema:description"
def get_type_name(self):
return "description"
class SitelinkColumn(AbstractColumn):
def __init__(self, project, title=None):
current_dir = os.path.dirname(__file__)
wikiprojects_path = os.path.join(current_dir, "wikiprojects.json")
wikiprojects = json.load(open(wikiprojects_path, "r"))
self.project = project
self.url = wikiprojects[project]["url"]
self.item = wikiprojects[project]["item"]
self.title = title
def __eq__(self, other):
return self.url == other.url and self.title == other.title
def get_key(self):
return self.project
- def get_title(self):
- return self.get_key()
-
def get_type_name(self):
return "sitelink"
def format_html_snippet(self):
- return f'{self.get_title()} {self.get_type_name()}'
+ return f'{self.get_key()} {self.get_type_name()}'
def make_column_header(self):
if self.title:
label = f"[[{self.item}|{self.title}]]"
else:
label = f"{{{{Q|{self.item}}}}}"
return f'! data-sort-type="number"|{label}\n'
def get_filter_for_info(self):
return f"""
?sitelink schema:about ?entity;
schema:isPartOf <{self.url}>."""
def get_filter_for_positive_query(self):
return f"""
?sitelink schema:about ?entity;
schema:isPartOf <{self.url}>;
schema:name ?value.
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
"""
def get_filter_for_negative_query(self):
return f"""
?sitelink schema:about ?entity;
schema:isPartOf <{self.url}>.
}}
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
"""
diff --git a/integraality/line.py b/integraality/line.py
index b6afdf9..4f47533 100644
--- a/integraality/line.py
+++ b/integraality/line.py
@@ -1,232 +1,232 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
Line configuration classes
"""
import collections
import logging
import re
import pywikibot
class AbstractLine:
def __init__(self, count, cells=None):
self.count = count
if not cells:
cells = collections.OrderedDict()
self.cells = cells
def get_percentage(self, value):
if not value:
return 0
return round(1.0 * value / max(self.count, 1) * 100, 2)
class Grouping(AbstractLine):
is_linkable = True
def __init__(self, count, cells=None, title=None, higher_grouping=None):
super().__init__(count, cells)
self.title = title
self.higher_grouping = higher_grouping
def __eq__(self, other):
return (
self.count == other.count
and self.title == other.title
and self.higher_grouping == other.higher_grouping
and self.cells == other.cells
)
def __repr__(self):
cell = ",".join(["%s:%s" % (key, value) for (key, value) in self.cells.items()])
return f"{self.title}:{self.count} - {cell}"
def get_key(self):
return self.title
def format_header_cell(self, grouping_configuration, grouping_type):
text = ""
if self.higher_grouping is None:
pass
elif not grouping_configuration.higher_grouping:
pass
else:
text += self.format_higher_grouping_text(grouping_type)
text += f"| {self.heading()}\n"
return text
def format_cell(self, column_entry, cell_template):
column_count = self.cells.get(column_entry.get_key(), 0)
percentage = self.get_percentage(column_count)
fields = [
cell_template,
str(percentage),
str(column_count),
- f"column={column_entry.get_title()}",
+ f"column={column_entry.get_key()}",
f"grouping={self.title}",
]
return f'| {{{{{"|".join(fields)}}}}}\n'
def row_opener(self):
return "|-\n"
def format_count_cell(self, grouping_link, repo):
if grouping_link and self.is_linkable:
return self.format_grouping_link(grouping_link, repo)
else:
return f"| {self.count} \n"
def format_grouping_link(self, grouping_link, repo=None):
return f"| [[{grouping_link}/{self.title}|{self.count}]] \n"
def postive_query(self, selector_sparql, grouping_predicate=None, grouping=None):
query = []
query.extend(
[
"SELECT DISTINCT ?entity ?entityLabel ?value ?valueLabel WHERE {",
f" ?entity {selector_sparql} .",
]
)
query.extend(
self.postive_query_filter_out_fragment(grouping_predicate, grouping)
)
return "\n".join(query)
def postive_query_filter_out_fragment(self, grouping_predicate=None, grouping=None):
return []
def negative_query(self, selector_sparql, grouping_predicate=None, grouping=None):
query = []
query.extend(
[
"SELECT DISTINCT ?entity ?entityLabel WHERE {",
f" ?entity {selector_sparql} .",
]
)
query.extend(
self.negative_query_filter_out_fragment(grouping_predicate, grouping)
)
return "\n".join(query)
def negative_query_filter_out_fragment(self, grouping_predicate=None, grouping=None):
return self.postive_query_filter_out_fragment(grouping_predicate, grouping)
class NoGroupGrouping(Grouping):
"""Group for items that do not belong to any group."""
is_linkable = False
def heading(self):
return "No grouping"
def format_higher_grouping_text(self, grouping_type=None):
return "|\n"
def postive_query_filter_out_fragment(self, grouping_predicate, grouping=None):
return [" MINUS {", f" ?entity {grouping_predicate} [] .", " }"]
def negative_query_filter_out_fragment(self, grouping_predicate, grouping=None):
return [
" MINUS {",
f" {{?entity {grouping_predicate} [] .}} UNION",
]
class ItemGrouping(Grouping):
def format_grouping_link(self, grouping_link, repo):
try:
group_item = pywikibot.ItemPage(repo, self.title)
group_item.get(get_redirect=True)
label = group_item.labels["en"]
except (
pywikibot.exceptions.InvalidTitleError,
pywikibot.exceptions.NoPageError,
KeyError,
):
logging.info(f"Could not retrieve label for {self.title}")
label = self.title
return f"| [[{grouping_link}/{label}|{self.count}]] \n"
def format_higher_grouping_text(self, grouping_type):
higher_grouping_value = self.higher_grouping
type_mapping = {
"country": "{{Flag|%s}}" % higher_grouping_value,
}
if re.match(r"Q\d+", higher_grouping_value):
higher_grouping_text = f"{{{{Q|{higher_grouping_value}}}}}"
elif re.match(
r"http://commons.wikimedia.org/wiki/Special:FilePath/(.*?)$",
higher_grouping_value,
):
match = re.match(
r"http://commons.wikimedia.org/wiki/Special:FilePath/(.*?)$",
higher_grouping_value,
)
image_name = match.groups()[0]
higher_grouping_text = f"[[File:{image_name}|center|100px]]"
higher_grouping_value = image_name
elif grouping_type in type_mapping:
higher_grouping_text = type_mapping.get(grouping_type)
else:
higher_grouping_text = higher_grouping_value
return f'| data-sort-value="{higher_grouping_value}"| {higher_grouping_text}\n'
def heading(self):
return f"{{{{Q|{self.title}}}}}"
def postive_query_filter_out_fragment(self, grouping_predicate, grouping):
return [f" ?entity {grouping_predicate} wd:{grouping} ."]
def negative_query_filter_out_fragment(self, grouping_predicate, grouping):
return self.postive_query_filter_out_fragment(grouping_predicate, grouping)
class YearGrouping(Grouping):
def heading(self):
return f"{self.title}"
def postive_query_filter_out_fragment(self, grouping_predicate, grouping):
return [
f" ?entity {grouping_predicate} ?date.",
" BIND(YEAR(?date) as ?year).",
f" FILTER(?year = {grouping}).",
]
def negative_query_filter_out_fragment(self, grouping_predicate, grouping):
return self.postive_query_filter_out_fragment(grouping_predicate, grouping)
class UnknownValueGrouping(Grouping):
def get_key(self):
return "UNKNOWN_VALUE"
def heading(self):
return "{{int:wikibase-snakview-variations-somevalue-label}}"
def postive_query_filter_out_fragment(self, grouping_predicate, grouping=None):
return [
f" ?entity {grouping_predicate} ?grouping.",
" FILTER wikibase:isSomeValue(?grouping).",
]
def negative_query_filter_out_fragment(self, grouping_predicate, grouping):
return self.postive_query_filter_out_fragment(grouping_predicate, grouping)
class TotalsGrouping(Grouping):
is_linkable = False
def heading(self):
return "'''Totals''' (all items)"
def format_higher_grouping_text(self, grouping_type=None):
return "||\n"
def row_opener(self):
return '|- class="sortbottom"\n'
diff --git a/integraality/tests/test_property_statistics.py b/integraality/tests/test_property_statistics.py
index a16f63b..4466f13 100644
--- a/integraality/tests/test_property_statistics.py
+++ b/integraality/tests/test_property_statistics.py
@@ -1,1750 +1,1750 @@
# -*- coding: utf-8 -*-
"""Unit tests for functions.py."""
import unittest
from collections import OrderedDict
from unittest.mock import patch
import pywikibot
from column import (DescriptionColumn, LabelColumn, PropertyColumn,
SitelinkColumn)
from grouping import ItemGroupingConfiguration, YearGroupingConfiguration
from line import ItemGrouping, UnknownValueGrouping, YearGrouping
from property_statistics import PropertyStatistics
from sparql_utils import QueryException
class PropertyStatisticsTest(unittest.TestCase):
def setUp(self):
self.columns = [
PropertyColumn(property="P21"),
PropertyColumn(property="P19"),
PropertyColumn(property="P1", qualifier="P2"),
PropertyColumn(property="P3", value="Q4", qualifier="P5"),
LabelColumn(language="br"),
DescriptionColumn(language="xy"),
SitelinkColumn(project="brwiki"),
]
self.grouping_configuration = ItemGroupingConfiguration("P551")
self.stats = PropertyStatistics(
columns=self.columns,
grouping_configuration=self.grouping_configuration,
selector_sparql="wdt:P31 wd:Q41960",
property_threshold=10,
)
class SparqlQueryTest(unittest.TestCase):
def setUp(self):
super().setUp()
patcher = patch("pywikibot.data.sparql.SparqlQuery", autospec=True)
self.mock_sparql_query = patcher.start()
self.addCleanup(patcher.stop)
def assert_query_called(self, query):
self.mock_sparql_query.return_value.select.assert_called_once_with(query)
class TestLabelColumn(PropertyStatisticsTest):
def setUp(self):
super().setUp()
self.column = LabelColumn("br")
def test_simple(self):
result = self.column.make_column_header()
expected = '! data-sort-type="number"|{{#language:br}}\n'
self.assertEqual(result, expected)
def test_get_key(self):
result = self.column.get_key()
self.assertEqual(result, "Lbr")
def test_get_totals_query(self):
result = self.column.get_totals_query(self.stats)
query = (
"\n"
"SELECT (COUNT(*) as ?count) WHERE {\n"
" ?entity wdt:P31 wd:Q41960\n"
" FILTER(EXISTS {\n"
" ?entity rdfs:label ?lang_label.\n"
" FILTER((LANG(?lang_label)) = 'br').\n"
" })\n"
"}\n"
)
self.assertEqual(result, query)
def test_get_info_query(self):
result = self.column.get_info_query(self.stats)
query = (
"\n"
"SELECT ?grouping (COUNT(DISTINCT ?entity) as ?count) WHERE {\n"
" ?entity wdt:P31 wd:Q41960 .\n"
" ?entity wdt:P551 ?grouping .\n"
" FILTER(EXISTS {\n"
" ?entity rdfs:label ?lang_label.\n"
" FILTER((LANG(?lang_label)) = 'br').\n"
" })\n"
"}\n"
"GROUP BY ?grouping\n"
"HAVING (?count >= 10)\n"
"ORDER BY DESC(?count)\n"
"LIMIT 1000\n"
)
self.assertEqual(result, query)
def test_get_info_no_grouping_query(self):
result = self.column.get_info_no_grouping_query(self.stats)
query = (
"\n"
"SELECT (COUNT(*) AS ?count) WHERE {\n"
" ?entity wdt:P31 wd:Q41960 .\n"
" MINUS { ?entity wdt:P551 _:b28. }\n"
" FILTER(EXISTS {\n"
" ?entity rdfs:label ?lang_label.\n"
" FILTER((LANG(?lang_label)) = 'br').\n"
" })\n"
"}\n"
)
self.assertEqual(result, query)
class TestDescriptionColumn(PropertyStatisticsTest):
def setUp(self):
super().setUp()
self.column = DescriptionColumn("br")
def test_simple(self):
result = self.column.make_column_header()
expected = '! data-sort-type="number"|{{#language:br}}\n'
self.assertEqual(result, expected)
def test_get_key(self):
result = self.column.get_key()
self.assertEqual(result, "Dbr")
def test_get_totals_query(self):
result = self.column.get_totals_query(self.stats)
query = (
"\n"
"SELECT (COUNT(*) as ?count) WHERE {\n"
" ?entity wdt:P31 wd:Q41960\n"
" FILTER(EXISTS {\n"
" ?entity schema:description ?lang_label.\n"
" FILTER((LANG(?lang_label)) = 'br').\n"
" })\n"
"}\n"
)
self.assertEqual(result, query)
def test_get_info_query(self):
result = self.column.get_info_query(self.stats)
query = (
"\n"
"SELECT ?grouping (COUNT(DISTINCT ?entity) as ?count) WHERE {\n"
" ?entity wdt:P31 wd:Q41960 .\n"
" ?entity wdt:P551 ?grouping .\n"
" FILTER(EXISTS {\n"
" ?entity schema:description ?lang_label.\n"
" FILTER((LANG(?lang_label)) = 'br').\n"
" })\n"
"}\n"
"GROUP BY ?grouping\n"
"HAVING (?count >= 10)\n"
"ORDER BY DESC(?count)\n"
"LIMIT 1000\n"
)
self.assertEqual(result, query)
def test_get_info_no_grouping_query(self):
result = self.column.get_info_no_grouping_query(self.stats)
query = (
"\n"
"SELECT (COUNT(*) AS ?count) WHERE {\n"
" ?entity wdt:P31 wd:Q41960 .\n"
" MINUS { ?entity wdt:P551 _:b28. }\n"
" FILTER(EXISTS {\n"
" ?entity schema:description ?lang_label.\n"
" FILTER((LANG(?lang_label)) = 'br').\n"
" })\n"
"}\n"
)
self.assertEqual(result, query)
class MakeStatsForNoGroupTest(SparqlQueryTest, PropertyStatisticsTest):
def setUp(self):
super().setUp()
patcher1 = patch(
"property_statistics.PropertyStatistics.get_totals_no_grouping",
autospec=True,
)
self.mock_get_totals_no_grouping = patcher1.start()
self.addCleanup(patcher1.stop)
self.mock_get_totals_no_grouping.return_value = 20
self.mock_sparql_query.return_value.select.side_effect = [
[{"count": "2"}],
[{"count": "10"}],
[{"count": "15"}],
[{"count": "5"}],
[{"count": "4"}],
[{"count": "8"}],
[{"count": "4"}],
]
def test_make_stats_for_no_group(self):
result = self.stats.make_stats_for_no_group()
expected = (
"|-\n"
"| No grouping\n"
"| 20 \n"
"| {{Integraality cell|10.0|2|column=P21|grouping=None}}\n"
"| {{Integraality cell|50.0|10|column=P19|grouping=None}}\n"
"| {{Integraality cell|75.0|15|column=P1/P2|grouping=None}}\n"
"| {{Integraality cell|25.0|5|column=P3/Q4/P5|grouping=None}}\n"
"| {{Integraality cell|20.0|4|column=Lbr|grouping=None}}\n"
"| {{Integraality cell|40.0|8|column=Dxy|grouping=None}}\n"
"| {{Integraality cell|20.0|4|column=brwiki|grouping=None}}\n"
)
self.assertEqual(result, expected)
self.mock_get_totals_no_grouping.assert_called_once_with(self.stats)
self.assertEqual(self.mock_sparql_query.call_count, 7)
def test_make_stats_for_no_group_with_higher_grouping(self):
self.stats.grouping_configuration.higher_grouping = "wdt:P17/wdt:P298"
result = self.stats.make_stats_for_no_group()
expected = (
"|-\n"
"|\n"
"| No grouping\n"
"| 20 \n"
"| {{Integraality cell|10.0|2|column=P21|grouping=None}}\n"
"| {{Integraality cell|50.0|10|column=P19|grouping=None}}\n"
"| {{Integraality cell|75.0|15|column=P1/P2|grouping=None}}\n"
"| {{Integraality cell|25.0|5|column=P3/Q4/P5|grouping=None}}\n"
"| {{Integraality cell|20.0|4|column=Lbr|grouping=None}}\n"
"| {{Integraality cell|40.0|8|column=Dxy|grouping=None}}\n"
"| {{Integraality cell|20.0|4|column=brwiki|grouping=None}}\n"
)
self.assertEqual(result, expected)
self.mock_get_totals_no_grouping.assert_called_once_with(self.stats)
self.assertEqual(self.mock_sparql_query.call_count, 7)
def test_make_stats_for_no_group_with_grouping_link(self):
self.stats.grouping_link = "Foo"
result = self.stats.make_stats_for_no_group()
expected = (
"|-\n"
"| No grouping\n"
"| 20 \n"
"| {{Integraality cell|10.0|2|column=P21|grouping=None}}\n"
"| {{Integraality cell|50.0|10|column=P19|grouping=None}}\n"
"| {{Integraality cell|75.0|15|column=P1/P2|grouping=None}}\n"
"| {{Integraality cell|25.0|5|column=P3/Q4/P5|grouping=None}}\n"
"| {{Integraality cell|20.0|4|column=Lbr|grouping=None}}\n"
"| {{Integraality cell|40.0|8|column=Dxy|grouping=None}}\n"
"| {{Integraality cell|20.0|4|column=brwiki|grouping=None}}\n"
)
self.assertEqual(result, expected)
self.mock_get_totals_no_grouping.assert_called_once_with(self.stats)
self.assertEqual(self.mock_sparql_query.call_count, 7)
class MakeStatsForOneGroupingTest(PropertyStatisticsTest):
def setUp(self):
super().setUp()
# self.stats.column_data = {
# 'P21': OrderedDict([
# ('Q3115846', 10), ('Q5087901', 6),
# ('UNKNOWN_VALUE', 4)
# ]),
# 'P19': OrderedDict([('Q3115846', 8), ('Q2166574', 5)]),
- # 'P1P2': OrderedDict([('Q3115846', 2), ('Q2166574', 9)]),
- # 'P3Q4P5': OrderedDict([('Q3115846', 7), ('Q2166574', 1)]),
+ # 'P1/P2': OrderedDict([('Q3115846', 2), ('Q2166574', 9)]),
+ # 'P3/Q4/P5': OrderedDict([('Q3115846', 7), ('Q2166574', 1)]),
# 'Lbr': OrderedDict([('Q3115846', 1), ('Q2166574', 2)]),
# 'Dxy': OrderedDict([('Q3115846', 2), ('Q2166574', 1)]),
# }
def test_format_stats_for_one_grouping(self):
grouping = ItemGrouping(title="Q3115846", count=10)
grouping.cells = OrderedDict(
[
("P21", 10),
("P19", 8),
- ("P1P2", 2),
- ("P3Q4P5", 7),
+ ("P1/P2", 2),
+ ("P3/Q4/P5", 7),
("Lbr", 1),
("Dxy", 2),
("brwiki", 1),
]
)
result = self.stats.format_stats_for_one_grouping(grouping)
expected = (
"|-\n"
"| {{Q|Q3115846}}\n"
"| 10 \n"
"| {{Integraality cell|100.0|10|column=P21|grouping=Q3115846}}\n"
"| {{Integraality cell|80.0|8|column=P19|grouping=Q3115846}}\n"
"| {{Integraality cell|20.0|2|column=P1/P2|grouping=Q3115846}}\n"
"| {{Integraality cell|70.0|7|column=P3/Q4/P5|grouping=Q3115846}}\n"
"| {{Integraality cell|10.0|1|column=Lbr|grouping=Q3115846}}\n"
"| {{Integraality cell|20.0|2|column=Dxy|grouping=Q3115846}}\n"
"| {{Integraality cell|10.0|1|column=brwiki|grouping=Q3115846}}\n"
)
self.assertEqual(result, expected)
def test_make_stats_for_unknown_grouping(self):
grouping = UnknownValueGrouping(title="UNKNOWN_VALUE", count=10)
grouping.cells = OrderedDict(
[
("P21", 4),
]
)
result = self.stats.format_stats_for_one_grouping(grouping)
expected = (
"|-\n"
"| {{int:wikibase-snakview-variations-somevalue-label}}\n"
"| 10 \n"
"| {{Integraality cell|40.0|4|column=P21|grouping=UNKNOWN_VALUE}}\n"
"| {{Integraality cell|0|0|column=P19|grouping=UNKNOWN_VALUE}}\n"
"| {{Integraality cell|0|0|column=P1/P2|grouping=UNKNOWN_VALUE}}\n"
"| {{Integraality cell|0|0|column=P3/Q4/P5|grouping=UNKNOWN_VALUE}}\n"
"| {{Integraality cell|0|0|column=Lbr|grouping=UNKNOWN_VALUE}}\n"
"| {{Integraality cell|0|0|column=Dxy|grouping=UNKNOWN_VALUE}}\n"
"| {{Integraality cell|0|0|column=brwiki|grouping=UNKNOWN_VALUE}}\n"
)
self.assertEqual(result, expected)
def test_make_stats_for_unknown_grouping_with_grouping_link(self):
self.stats.grouping_link = "Foo"
grouping = UnknownValueGrouping(title="UNKNOWN_VALUE", count=10)
grouping.cells = OrderedDict(
[
("P21", 4),
]
)
result = self.stats.format_stats_for_one_grouping(grouping)
expected = (
"|-\n"
"| {{int:wikibase-snakview-variations-somevalue-label}}\n"
"| [[Foo/UNKNOWN_VALUE|10]] \n"
"| {{Integraality cell|40.0|4|column=P21|grouping=UNKNOWN_VALUE}}\n"
"| {{Integraality cell|0|0|column=P19|grouping=UNKNOWN_VALUE}}\n"
"| {{Integraality cell|0|0|column=P1/P2|grouping=UNKNOWN_VALUE}}\n"
"| {{Integraality cell|0|0|column=P3/Q4/P5|grouping=UNKNOWN_VALUE}}\n"
"| {{Integraality cell|0|0|column=Lbr|grouping=UNKNOWN_VALUE}}\n"
"| {{Integraality cell|0|0|column=Dxy|grouping=UNKNOWN_VALUE}}\n"
"| {{Integraality cell|0|0|column=brwiki|grouping=UNKNOWN_VALUE}}\n"
)
self.assertEqual(result, expected)
def test_format_stats_for_one_grouping_with_higher_grouping(self):
self.stats.higher_grouping = "wdt:P17/wdt:P298"
self.stats.grouping_configuration = ItemGroupingConfiguration("P551", "XYZ")
grouping = ItemGrouping(title="Q3115846", count=10, higher_grouping="Q1")
grouping.cells = OrderedDict(
[
("P21", 10),
("P19", 8),
- ("P1P2", 2),
- ("P3Q4P5", 7),
+ ("P1/P2", 2),
+ ("P3/Q4/P5", 7),
("Lbr", 1),
("Dxy", 2),
("brwiki", 1),
]
)
result = self.stats.format_stats_for_one_grouping(grouping)
expected = (
"|-\n"
'| data-sort-value="Q1"| {{Q|Q1}}\n'
"| {{Q|Q3115846}}\n"
"| 10 \n"
"| {{Integraality cell|100.0|10|column=P21|grouping=Q3115846}}\n"
"| {{Integraality cell|80.0|8|column=P19|grouping=Q3115846}}\n"
"| {{Integraality cell|20.0|2|column=P1/P2|grouping=Q3115846}}\n"
"| {{Integraality cell|70.0|7|column=P3/Q4/P5|grouping=Q3115846}}\n"
"| {{Integraality cell|10.0|1|column=Lbr|grouping=Q3115846}}\n"
"| {{Integraality cell|20.0|2|column=Dxy|grouping=Q3115846}}\n"
"| {{Integraality cell|10.0|1|column=brwiki|grouping=Q3115846}}\n"
)
self.assertEqual(result, expected)
@patch("pywikibot.ItemPage", autospec=True)
def test_format_stats_for_one_grouping_with_grouping_link(self, mock_item_page):
mock_item_page.return_value.labels = {"en": "Bar"}
self.stats.grouping_link = "Foo"
grouping = ItemGrouping(title="Q3115846", count=10)
grouping.cells = OrderedDict(
[
("P21", 10),
("P19", 8),
- ("P1P2", 2),
- ("P3Q4P5", 7),
+ ("P1/P2", 2),
+ ("P3/Q4/P5", 7),
("Lbr", 1),
("Dxy", 2),
("brwiki", 1),
]
)
result = self.stats.format_stats_for_one_grouping(grouping)
expected = (
"|-\n"
"| {{Q|Q3115846}}\n"
"| [[Foo/Bar|10]] \n"
"| {{Integraality cell|100.0|10|column=P21|grouping=Q3115846}}\n"
"| {{Integraality cell|80.0|8|column=P19|grouping=Q3115846}}\n"
"| {{Integraality cell|20.0|2|column=P1/P2|grouping=Q3115846}}\n"
"| {{Integraality cell|70.0|7|column=P3/Q4/P5|grouping=Q3115846}}\n"
"| {{Integraality cell|10.0|1|column=Lbr|grouping=Q3115846}}\n"
"| {{Integraality cell|20.0|2|column=Dxy|grouping=Q3115846}}\n"
"| {{Integraality cell|10.0|1|column=brwiki|grouping=Q3115846}}\n"
)
self.assertEqual(result, expected)
@patch("pywikibot.ItemPage", autospec=True)
def test_format_stats_for_one_grouping_with_grouping_link_failure(
self, mock_item_page
):
mock_item_page.side_effect = pywikibot.exceptions.InvalidTitleError("Error")
self.stats.grouping_link = "Foo"
grouping = ItemGrouping(title="Q3115846", count=10)
grouping.cells = OrderedDict(
[
("P21", 10),
("P19", 8),
- ("P1P2", 2),
- ("P3Q4P5", 7),
+ ("P1/P2", 2),
+ ("P3/Q4/P5", 7),
("Lbr", 1),
("Dxy", 2),
("brwiki", 1),
]
)
with self.assertLogs(level="INFO") as cm:
result = self.stats.format_stats_for_one_grouping(grouping)
expected = (
"|-\n"
"| {{Q|Q3115846}}\n"
"| [[Foo/Q3115846|10]] \n"
"| {{Integraality cell|100.0|10|column=P21|grouping=Q3115846}}\n"
"| {{Integraality cell|80.0|8|column=P19|grouping=Q3115846}}\n"
"| {{Integraality cell|20.0|2|column=P1/P2|grouping=Q3115846}}\n"
"| {{Integraality cell|70.0|7|column=P3/Q4/P5|grouping=Q3115846}}\n"
"| {{Integraality cell|10.0|1|column=Lbr|grouping=Q3115846}}\n"
"| {{Integraality cell|20.0|2|column=Dxy|grouping=Q3115846}}\n"
"| {{Integraality cell|10.0|1|column=brwiki|grouping=Q3115846}}\n"
)
self.assertEqual(result, expected)
self.assertEqual(cm.output, ["INFO:root:Could not retrieve label for Q3115846"])
def test_make_stats_for_year_grouping(self):
grouping = YearGrouping(title="2001", count=10)
grouping.cells = OrderedDict(
[
("P21", 4),
]
)
result = self.stats.format_stats_for_one_grouping(grouping)
expected = (
"|-\n"
"| 2001\n"
"| 10 \n"
"| {{Integraality cell|40.0|4|column=P21|grouping=2001}}\n"
"| {{Integraality cell|0|0|column=P19|grouping=2001}}\n"
"| {{Integraality cell|0|0|column=P1/P2|grouping=2001}}\n"
"| {{Integraality cell|0|0|column=P3/Q4/P5|grouping=2001}}\n"
"| {{Integraality cell|0|0|column=Lbr|grouping=2001}}\n"
"| {{Integraality cell|0|0|column=Dxy|grouping=2001}}\n"
"| {{Integraality cell|0|0|column=brwiki|grouping=2001}}\n"
)
self.assertEqual(result, expected)
def test_make_stats_for_year_grouping_with_grouping_link(self):
self.stats.grouping_link = "Foo"
grouping = YearGrouping(title="2001", count=10)
grouping.cells = OrderedDict(
[
("P21", 4),
]
)
result = self.stats.format_stats_for_one_grouping(grouping)
expected = (
"|-\n"
"| 2001\n"
"| [[Foo/2001|10]] \n"
"| {{Integraality cell|40.0|4|column=P21|grouping=2001}}\n"
"| {{Integraality cell|0|0|column=P19|grouping=2001}}\n"
"| {{Integraality cell|0|0|column=P1/P2|grouping=2001}}\n"
"| {{Integraality cell|0|0|column=P3/Q4/P5|grouping=2001}}\n"
"| {{Integraality cell|0|0|column=Lbr|grouping=2001}}\n"
"| {{Integraality cell|0|0|column=Dxy|grouping=2001}}\n"
"| {{Integraality cell|0|0|column=brwiki|grouping=2001}}\n"
)
self.assertEqual(result, expected)
class GetQueryForItemsForPropertyPositive(PropertyStatisticsTest):
def test_get_query_for_items_for_property_positive(self):
result = self.stats.get_query_for_items_for_property_positive(
self.stats.columns.get("P21"), "Q3115846"
)
expected = """
SELECT DISTINCT ?entity ?entityLabel ?value ?valueLabel WHERE {
?entity wdt:P31 wd:Q41960 .
?entity wdt:P551 wd:Q3115846 .
?entity p:P21 ?prop . OPTIONAL { ?prop ps:P21 ?value }
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
"""
self.assertEqual(result, expected)
def test_get_query_for_items_for_property_positive_no_grouping(self):
result = self.stats.get_query_for_items_for_property_positive(
self.stats.columns.get("P21"), self.stats.GROUP_MAPPING.NO_GROUPING
)
expected = """
SELECT DISTINCT ?entity ?entityLabel ?value ?valueLabel WHERE {
?entity wdt:P31 wd:Q41960 .
MINUS {
?entity wdt:P551 [] .
}
?entity p:P21 ?prop . OPTIONAL { ?prop ps:P21 ?value }
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
"""
self.assertEqual(result, expected)
def test_get_query_for_items_for_property_positive_totals(self):
result = self.stats.get_query_for_items_for_property_positive(
self.stats.columns.get("P21"), self.stats.GROUP_MAPPING.TOTALS
)
expected = """
SELECT DISTINCT ?entity ?entityLabel ?value ?valueLabel WHERE {
?entity wdt:P31 wd:Q41960 .
?entity p:P21 ?prop . OPTIONAL { ?prop ps:P21 ?value }
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
"""
self.assertEqual(result, expected)
def test_get_query_for_items_for_property_positive_label(self):
result = self.stats.get_query_for_items_for_property_positive(
self.stats.columns.get("Lbr"), "Q3115846"
)
expected = """
SELECT DISTINCT ?entity ?entityLabel ?value ?valueLabel WHERE {
?entity wdt:P31 wd:Q41960 .
?entity wdt:P551 wd:Q3115846 .
FILTER(EXISTS {
?entity rdfs:label ?lang_label.
FILTER((LANG(?lang_label)) = "br").
})
SERVICE wikibase:label { bd:serviceParam wikibase:language "br". }
}
"""
self.assertEqual(result, expected)
def test_get_query_for_items_for_property_positive_unknown_value_grouping(self):
result = self.stats.get_query_for_items_for_property_positive(
self.stats.columns.get("P21"), self.stats.GROUP_MAPPING.UNKNOWN_VALUE
)
expected = """
SELECT DISTINCT ?entity ?entityLabel ?value ?valueLabel WHERE {
?entity wdt:P31 wd:Q41960 .
?entity wdt:P551 ?grouping.
FILTER wikibase:isSomeValue(?grouping).
?entity p:P21 ?prop . OPTIONAL { ?prop ps:P21 ?value }
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
"""
self.assertEqual(result, expected)
def test_get_query_for_items_for_property_positive_year_grouping(self):
stats = PropertyStatistics(
columns=self.columns,
grouping_configuration=YearGroupingConfiguration("P577"),
selector_sparql="wdt:P31 wd:Q41960",
grouping_type="year",
property_threshold=10,
)
result = stats.get_query_for_items_for_property_positive(
self.stats.columns.get("P21"), 2006
)
expected = """
SELECT DISTINCT ?entity ?entityLabel ?value ?valueLabel WHERE {
?entity wdt:P31 wd:Q41960 .
?entity wdt:P577 ?date.
BIND(YEAR(?date) as ?year).
FILTER(?year = 2006).
?entity p:P21 ?prop . OPTIONAL { ?prop ps:P21 ?value }
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
"""
self.assertEqual(result, expected)
def test_get_query_for_items_for_property_positive_sitelink(self):
result = self.stats.get_query_for_items_for_property_positive(
self.stats.columns.get("brwiki"), "Q3115846"
)
expected = """
SELECT DISTINCT ?entity ?entityLabel ?value ?valueLabel WHERE {
?entity wdt:P31 wd:Q41960 .
?entity wdt:P551 wd:Q3115846 .
?sitelink schema:about ?entity;
schema:isPartOf ;
schema:name ?value.
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
"""
self.assertEqual(result, expected)
class GetQueryForItemsForPropertyNegative(PropertyStatisticsTest):
def test_get_query_for_items_for_property_negative(self):
result = self.stats.get_query_for_items_for_property_negative(
self.stats.columns.get("P21"), "Q3115846"
)
expected = """
SELECT DISTINCT ?entity ?entityLabel WHERE {
?entity wdt:P31 wd:Q41960 .
?entity wdt:P551 wd:Q3115846 .
MINUS {
{?entity a wdno:P21 .} UNION
{?entity wdt:P21 ?prop .}
}
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
"""
self.assertEqual(result, expected)
def test_get_query_for_items_for_property_negative_no_grouping(self):
result = self.stats.get_query_for_items_for_property_negative(
self.stats.columns.get("P21"), self.stats.GROUP_MAPPING.NO_GROUPING
)
expected = """
SELECT DISTINCT ?entity ?entityLabel WHERE {
?entity wdt:P31 wd:Q41960 .
MINUS {
{?entity wdt:P551 [] .} UNION
{?entity a wdno:P21 .} UNION
{?entity wdt:P21 ?prop .}
}
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
"""
self.assertEqual(result, expected)
def test_get_query_for_items_for_property_negative_totals(self):
result = self.stats.get_query_for_items_for_property_negative(
self.stats.columns.get("P21"), self.stats.GROUP_MAPPING.TOTALS
)
expected = """
SELECT DISTINCT ?entity ?entityLabel WHERE {
?entity wdt:P31 wd:Q41960 .
MINUS {
{?entity a wdno:P21 .} UNION
{?entity wdt:P21 ?prop .}
}
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
"""
self.assertEqual(result, expected)
def test_get_query_for_items_for_property_negative_label(self):
result = self.stats.get_query_for_items_for_property_negative(
self.stats.columns.get("Lbr"), "Q3115846"
)
expected = """
SELECT DISTINCT ?entity ?entityLabel WHERE {
?entity wdt:P31 wd:Q41960 .
?entity wdt:P551 wd:Q3115846 .
MINUS {
{ ?entity rdfs:label ?lang_label.
FILTER((LANG(?lang_label)) = "br") }
}
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
"""
self.assertEqual(result, expected)
def test_get_query_for_items_for_property_negative_unknown_value_grouping(self):
result = self.stats.get_query_for_items_for_property_negative(
self.stats.columns.get("P21"), self.stats.GROUP_MAPPING.UNKNOWN_VALUE
)
expected = """
SELECT DISTINCT ?entity ?entityLabel WHERE {
?entity wdt:P31 wd:Q41960 .
?entity wdt:P551 ?grouping.
FILTER wikibase:isSomeValue(?grouping).
MINUS {
{?entity a wdno:P21 .} UNION
{?entity wdt:P21 ?prop .}
}
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
"""
self.assertEqual(result, expected)
def test_get_query_for_items_for_property_negative_year_grouping(self):
stats = PropertyStatistics(
columns=self.columns,
grouping_configuration=YearGroupingConfiguration("P577"),
selector_sparql="wdt:P31 wd:Q41960",
grouping_type="year",
property_threshold=10,
)
result = stats.get_query_for_items_for_property_negative(
self.stats.columns.get("P21"), 2006
)
expected = """
SELECT DISTINCT ?entity ?entityLabel WHERE {
?entity wdt:P31 wd:Q41960 .
?entity wdt:P577 ?date.
BIND(YEAR(?date) as ?year).
FILTER(?year = 2006).
MINUS {
{?entity a wdno:P21 .} UNION
{?entity wdt:P21 ?prop .}
}
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
"""
self.assertEqual(result, expected)
def test_get_query_for_items_for_property_negative_sitelink(self):
result = self.stats.get_query_for_items_for_property_negative(
self.stats.columns.get("brwiki"), "Q3115846"
)
expected = """
SELECT DISTINCT ?entity ?entityLabel WHERE {
?entity wdt:P31 wd:Q41960 .
?entity wdt:P551 wd:Q3115846 .
MINUS {
?sitelink schema:about ?entity;
schema:isPartOf .
}
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
"""
self.assertEqual(result, expected)
class GetCountFromSparqlTest(SparqlQueryTest, PropertyStatisticsTest):
def test_return_count(self):
self.mock_sparql_query.return_value.select.return_value = [{"count": "18"}]
result = self.stats._get_count_from_sparql("SELECT X")
self.assert_query_called("SELECT X")
self.assertEqual(result, 18)
def test_return_None(self):
self.mock_sparql_query.return_value.select.return_value = None
with self.assertRaises(QueryException):
self.stats._get_count_from_sparql("SELECT X")
self.assert_query_called("SELECT X")
class GetGroupingCountsFromSparqlTest(SparqlQueryTest, PropertyStatisticsTest):
def test_return_count(self):
self.mock_sparql_query.return_value.select.return_value = [
{"grouping": "http://www.wikidata.org/entity/Q1", "count": 10},
{"grouping": "http://www.wikidata.org/entity/Q2", "count": 5},
]
result = self.stats._get_grouping_counts_from_sparql("SELECT X")
self.assert_query_called("SELECT X")
expected = OrderedDict([("Q1", 10), ("Q2", 5)])
self.assertEqual(result, expected)
def test_return_None(self):
self.mock_sparql_query.return_value.select.return_value = None
result = self.stats._get_grouping_counts_from_sparql("SELECT X")
self.assert_query_called("SELECT X")
self.assertEqual(result, None)
def test_return_count_with_unknown(self):
self.mock_sparql_query.return_value.select.return_value = [
{"grouping": "http://www.wikidata.org/entity/Q1", "count": 10},
{"grouping": "http://www.wikidata.org/entity/Q2", "count": 5},
{
"grouping": "http://www.wikidata.org/.well-known/genid/6ab4c2d7cb4ac72721335af5b8ba09c7",
"count": 2,
},
{
"grouping": "http://www.wikidata.org/.well-known/genid/1469448a291c6fbe5df8306cb52ef18b",
"count": 1,
},
]
result = self.stats._get_grouping_counts_from_sparql("SELECT X")
self.assert_query_called("SELECT X")
expected = OrderedDict([("Q1", 10), ("Q2", 5), ("UNKNOWN_VALUE", 3)])
self.assertEqual(result, expected)
class SparqlCountTest(SparqlQueryTest, PropertyStatisticsTest):
def setUp(self):
super().setUp()
self.mock_sparql_query.return_value.select.return_value = [{"count": "18"}]
def test_get_totals_no_grouping(self):
result = self.stats.get_totals_no_grouping()
query = (
"\n"
"SELECT (COUNT(*) as ?count) WHERE {\n"
" ?entity wdt:P31 wd:Q41960\n"
" MINUS { ?entity wdt:P551 _:b28. }\n"
"}\n"
)
self.assert_query_called(query)
self.assertEqual(result, 18)
def test_get_totals(self):
result = self.stats.get_totals()
query = (
"\n"
"SELECT (COUNT(*) as ?count) WHERE {\n"
" ?entity wdt:P31 wd:Q41960\n"
"}\n"
)
self.assert_query_called(query)
self.assertEqual(result, 18)
class GetGroupingInformationTest(SparqlQueryTest, PropertyStatisticsTest):
def test_get_grouping_information(self):
self.mock_sparql_query.return_value.select.return_value = [
{"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "10"},
{"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "6"},
{"grouping": "http://www.wikidata.org/entity/Q623333", "count": "6"},
]
expected = {
"Q3115846": ItemGrouping(title="Q3115846", count=10),
"Q5087901": ItemGrouping(title="Q5087901", count=6),
"Q623333": ItemGrouping(title="Q623333", count=6),
}
query = (
"\n"
"SELECT ?grouping (COUNT(DISTINCT ?entity) as ?count) WHERE {\n"
" ?entity wdt:P31 wd:Q41960 .\n"
" ?entity wdt:P551 ?grouping .\n"
"} GROUP BY ?grouping\n"
"HAVING (?count >= 20)\n"
"ORDER BY DESC(?count)\n"
"LIMIT 1000\n"
)
result = self.stats.get_grouping_information()
self.assert_query_called(query)
self.assertEqual(result, expected)
def test_get_grouping_information_with_grouping_threshold(self):
self.mock_sparql_query.return_value.select.return_value = [
{"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "10"},
{"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "6"},
{"grouping": "http://www.wikidata.org/entity/Q623333", "count": "6"},
]
expected = {
"Q3115846": ItemGrouping(title="Q3115846", count=10),
"Q5087901": ItemGrouping(title="Q5087901", count=6),
"Q623333": ItemGrouping(title="Q623333", count=6),
}
self.stats.grouping_configuration.grouping_threshold = 5
query = (
"\n"
"SELECT ?grouping (COUNT(DISTINCT ?entity) as ?count) WHERE {\n"
" ?entity wdt:P31 wd:Q41960 .\n"
" ?entity wdt:P551 ?grouping .\n"
"} GROUP BY ?grouping\n"
"HAVING (?count >= 5)\n"
"ORDER BY DESC(?count)\n"
"LIMIT 1000\n"
)
result = self.stats.get_grouping_information()
self.assert_query_called(query)
self.assertEqual(result, expected)
def test_get_grouping_information_with_higher_grouping(self):
self.mock_sparql_query.return_value.select.return_value = [
{
"grouping": "http://www.wikidata.org/entity/Q3115846",
"higher_grouping": "NZL",
"count": "10",
},
{
"grouping": "http://www.wikidata.org/entity/Q5087901",
"higher_grouping": "USA",
"count": "6",
},
{
"grouping": "http://www.wikidata.org/entity/Q623333",
"higher_grouping": "USA",
"count": "6",
},
]
expected = {
"Q3115846": ItemGrouping(title="Q3115846", count=10, higher_grouping="NZL"),
"Q5087901": ItemGrouping(title="Q5087901", count=6, higher_grouping="USA"),
"Q623333": ItemGrouping(title="Q623333", count=6, higher_grouping="USA"),
}
self.stats.grouping_configuration.higher_grouping = "wdt:P17/wdt:P298"
query = (
"\n"
"SELECT ?grouping (SAMPLE(?_higher_grouping) as ?higher_grouping) "
"(COUNT(DISTINCT ?entity) as ?count) WHERE {\n"
" ?entity wdt:P31 wd:Q41960 .\n"
" ?entity wdt:P551 ?grouping .\n"
" OPTIONAL { ?grouping wdt:P17/wdt:P298 ?_higher_grouping }.\n"
"} GROUP BY ?grouping ?higher_grouping\n"
"HAVING (?count >= 20)\n"
"ORDER BY DESC(?count)\n"
"LIMIT 1000\n"
)
result = self.stats.get_grouping_information()
self.assert_query_called(query)
self.assertEqual(result, expected)
def test_get_grouping_information_empty_result(self):
self.mock_sparql_query.return_value.select.return_value = None
query = (
"\n"
"SELECT ?grouping (COUNT(DISTINCT ?entity) as ?count) WHERE {\n"
" ?entity wdt:P31 wd:Q41960 .\n"
" ?entity wdt:P551 ?grouping .\n"
"} GROUP BY ?grouping\n"
"HAVING (?count >= 20)\n"
"ORDER BY DESC(?count)\n"
"LIMIT 1000\n"
)
with self.assertRaises(QueryException):
self.stats.get_grouping_information()
self.assert_query_called(query)
def test_get_grouping_information_timeout(self):
self.mock_sparql_query.return_value.select.side_effect = (
pywikibot.exceptions.TimeoutError("Error")
)
query = (
"\n"
"SELECT ?grouping (COUNT(DISTINCT ?entity) as ?count) WHERE {\n"
" ?entity wdt:P31 wd:Q41960 .\n"
" ?entity wdt:P551 ?grouping .\n"
"} GROUP BY ?grouping\n"
"HAVING (?count >= 20)\n"
"ORDER BY DESC(?count)\n"
"LIMIT 1000\n"
)
with self.assertRaises(QueryException):
self.stats.get_grouping_information()
self.assert_query_called(query)
def test_get_grouping_information_unknown_value(self):
self.mock_sparql_query.return_value.select.return_value = [
{"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "10"},
{"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "6"},
{
"grouping": "http://www.wikidata.org/.well-known/genid/6ab4c2d7cb4ac72721335af5b8ba09c7",
"count": "2",
},
{
"grouping": "http://www.wikidata.org/.well-known/genid/1469448a291c6fbe5df8306cb52ef18b",
"count": "1",
},
]
expected = {
"Q3115846": ItemGrouping(title="Q3115846", count=10),
"Q5087901": ItemGrouping(title="Q5087901", count=6),
"UNKNOWN_VALUE": UnknownValueGrouping(count=3),
}
query = (
"\n"
"SELECT ?grouping (COUNT(DISTINCT ?entity) as ?count) WHERE {\n"
" ?entity wdt:P31 wd:Q41960 .\n"
" ?entity wdt:P551 ?grouping .\n"
"} GROUP BY ?grouping\n"
"HAVING (?count >= 20)\n"
"ORDER BY DESC(?count)\n"
"LIMIT 1000\n"
)
result = self.stats.get_grouping_information()
self.assert_query_called(query)
self.assertEqual(result, expected)
def test_get_grouping_information_year(self):
stats = PropertyStatistics(
columns=self.columns,
grouping_configuration=YearGroupingConfiguration("P577"),
selector_sparql="wdt:P31 wd:Q41960",
grouping_type="year",
property_threshold=10,
)
self.mock_sparql_query.return_value.select.return_value = [
{"grouping": "2001", "count": "10"},
{"grouping": "2002", "count": "6"},
]
expected = {
"2001": YearGrouping(title="2001", count=10),
"2002": YearGrouping(title="2002", count=6),
}
query = (
"\n"
"SELECT ?grouping (COUNT(DISTINCT ?entity) as ?count) WHERE {\n"
" ?entity wdt:P31 wd:Q41960 .\n"
" ?entity wdt:P577 ?date .\n"
" BIND(YEAR(?date) as ?grouping) .\n"
"} GROUP BY ?grouping\n"
"HAVING (?count >= 20)\n"
"ORDER BY DESC(?count)\n"
"LIMIT 1000\n"
)
result = stats.get_grouping_information()
self.assert_query_called(query)
self.assertEqual(result, expected)
def test_get_grouping_information_year_unknown_value(self):
stats = PropertyStatistics(
columns=self.columns,
grouping_configuration=YearGroupingConfiguration("P577"),
selector_sparql="wdt:P31 wd:Q41960",
grouping_type="year",
property_threshold=10,
)
self.mock_sparql_query.return_value.select.return_value = [
{"grouping": "2001", "count": "10"},
{"grouping": "2002", "count": "6"},
{"grouping": "", "count": "4"},
]
expected = {
"2001": YearGrouping(title="2001", count=10),
"2002": YearGrouping(title="2002", count=6),
"UNKNOWN_VALUE": UnknownValueGrouping(count=4),
}
query = (
"\n"
"SELECT ?grouping (COUNT(DISTINCT ?entity) as ?count) WHERE {\n"
" ?entity wdt:P31 wd:Q41960 .\n"
" ?entity wdt:P577 ?date .\n"
" BIND(YEAR(?date) as ?grouping) .\n"
"} GROUP BY ?grouping\n"
"HAVING (?count >= 20)\n"
"ORDER BY DESC(?count)\n"
"LIMIT 1000\n"
)
result = stats.get_grouping_information()
self.assert_query_called(query)
self.assertEqual(result, expected)
class TestGetHeader(PropertyStatisticsTest):
def setUp(self):
super().setUp()
self.stats.grouping_configuration.grouping_threshold = 7
self.stats.property_threshold = 4
def test_get_header(self):
result = self.stats.get_header()
expected = (
'{| class="wikitable sortable"\n'
'! colspan="2" |Top groupings (Minimum 7 items)\n'
'! colspan="7"|Top Properties (used at least 4 times per grouping)\n'
"|-\n"
"! Name\n"
"! Count\n"
'! data-sort-type="number"|{{Property|P21}}\n'
'! data-sort-type="number"|{{Property|P19}}\n'
'! data-sort-type="number"|{{Property|P2}}\n'
'! data-sort-type="number"|{{Property|P5}}\n'
'! data-sort-type="number"|{{#language:br}}\n'
'! data-sort-type="number"|{{#language:xy}}\n'
'! data-sort-type="number"|{{Q|Q846871}}\n'
)
self.assertEqual(result, expected)
def test_get_header_with_higher_grouping(self):
self.stats.grouping_configuration.higher_grouping = "wdt:P17/wdt:P298"
result = self.stats.get_header()
expected = (
'{| class="wikitable sortable"\n'
'! colspan="3" |Top groupings (Minimum 7 items)\n'
'! colspan="7"|Top Properties (used at least 4 times per grouping)\n'
"|-\n"
"! \n"
"! Name\n"
"! Count\n"
'! data-sort-type="number"|{{Property|P21}}\n'
'! data-sort-type="number"|{{Property|P19}}\n'
'! data-sort-type="number"|{{Property|P2}}\n'
'! data-sort-type="number"|{{Property|P5}}\n'
'! data-sort-type="number"|{{#language:br}}\n'
'! data-sort-type="number"|{{#language:xy}}\n'
'! data-sort-type="number"|{{Q|Q846871}}\n'
)
self.assertEqual(result, expected)
class MakeTotalsTest(SparqlQueryTest, PropertyStatisticsTest):
def setUp(self):
super().setUp()
self.mock_sparql_query.return_value.select.side_effect = [
[{"count": "120"}],
[{"count": "30"}],
[{"count": "80"}],
[{"count": "10"}],
[{"count": "12"}],
[{"count": "24"}],
[{"count": "36"}],
[{"count": "24"}],
]
def test_make_totals(self):
result = self.stats.make_totals()
expected = (
'|- class="sortbottom"\n'
"| '''Totals''' (all items)\n"
"| 120 \n"
"| {{Integraality cell|25.0|30|column=P21|grouping=}}\n"
"| {{Integraality cell|66.67|80|column=P19|grouping=}}\n"
"| {{Integraality cell|8.33|10|column=P1/P2|grouping=}}\n"
"| {{Integraality cell|10.0|12|column=P3/Q4/P5|grouping=}}\n"
"| {{Integraality cell|20.0|24|column=Lbr|grouping=}}\n"
"| {{Integraality cell|30.0|36|column=Dxy|grouping=}}\n"
"| {{Integraality cell|20.0|24|column=brwiki|grouping=}}\n"
)
self.assertEqual(result, expected)
def test_make_totals_with_higher_grouping(self):
self.stats.grouping_configuration.higher_grouping = "wdt:P17/wdt:P298"
result = self.stats.make_totals()
expected = (
'|- class="sortbottom"\n'
"||\n"
"| '''Totals''' (all items)\n"
"| 120 \n"
"| {{Integraality cell|25.0|30|column=P21|grouping=}}\n"
"| {{Integraality cell|66.67|80|column=P19|grouping=}}\n"
"| {{Integraality cell|8.33|10|column=P1/P2|grouping=}}\n"
"| {{Integraality cell|10.0|12|column=P3/Q4/P5|grouping=}}\n"
"| {{Integraality cell|20.0|24|column=Lbr|grouping=}}\n"
"| {{Integraality cell|30.0|36|column=Dxy|grouping=}}\n"
"| {{Integraality cell|20.0|24|column=brwiki|grouping=}}\n"
)
self.assertEqual(result, expected)
def test_make_totals_with_grouping_link(self):
self.stats.grouping_link = "Foo"
result = self.stats.make_totals()
expected = (
'|- class="sortbottom"\n'
"| '''Totals''' (all items)\n"
"| 120 \n"
"| {{Integraality cell|25.0|30|column=P21|grouping=}}\n"
"| {{Integraality cell|66.67|80|column=P19|grouping=}}\n"
"| {{Integraality cell|8.33|10|column=P1/P2|grouping=}}\n"
"| {{Integraality cell|10.0|12|column=P3/Q4/P5|grouping=}}\n"
"| {{Integraality cell|20.0|24|column=Lbr|grouping=}}\n"
"| {{Integraality cell|30.0|36|column=Dxy|grouping=}}\n"
"| {{Integraality cell|20.0|24|column=brwiki|grouping=}}\n"
)
self.assertEqual(result, expected)
class PopulateGroupingsTest(SparqlQueryTest, PropertyStatisticsTest):
def test_populate_groupings_empty(self):
result = self.stats.populate_groupings(None)
self.assertEqual(result, None)
def test_populate_groupings_no_columns(self):
groupings = {
"Q3115846": ItemGrouping(title="Q3115846", count=10),
"Q5087901": ItemGrouping(title="Q5087901", count=6),
"Q623333": ItemGrouping(title="Q623333", count=6),
}
result = self.stats.populate_groupings(groupings)
self.assertEqual(result, groupings)
def test_populate_groupings_with_columns(self):
groupings = {
"Q3115846": ItemGrouping(title="Q3115846", count=10),
"Q5087901": ItemGrouping(title="Q5087901", count=6),
"Q623333": ItemGrouping(title="Q623333", count=6),
}
self.mock_sparql_query.return_value.select.side_effect = [
[
{"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "1"},
{"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "2"},
{"grouping": "http://www.wikidata.org/entity/Q623333", "count": "3"},
{"grouping": "http://www.wikidata.org/entity/Q11953090", "count": "4"},
],
[
{"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "5"},
{"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "6"},
{"grouping": "http://www.wikidata.org/entity/Q623333", "count": "7"},
{"grouping": "http://www.wikidata.org/entity/Q11953090", "count": "8"},
],
[
{"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "9"},
{"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "10"},
{"grouping": "http://www.wikidata.org/entity/Q623333", "count": "11"},
{"grouping": "http://www.wikidata.org/entity/Q11953090", "count": "12"},
],
[
{"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "13"},
{"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "14"},
{"grouping": "http://www.wikidata.org/entity/Q623333", "count": "15"},
{"grouping": "http://www.wikidata.org/entity/Q11953090", "count": "16"},
],
[
{"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "17"},
{"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "18"},
{"grouping": "http://www.wikidata.org/entity/Q623333", "count": "19"},
{"grouping": "http://www.wikidata.org/entity/Q11953090", "count": "20"},
],
[
{"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "21"},
{"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "22"},
{"grouping": "http://www.wikidata.org/entity/Q623333", "count": "23"},
{"grouping": "http://www.wikidata.org/entity/Q11953090", "count": "24"},
],
[
{"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "25"},
{"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "26"},
{"grouping": "http://www.wikidata.org/entity/Q623333", "count": "27"},
{"grouping": "http://www.wikidata.org/entity/Q11953090", "count": "28"},
],
]
result = self.stats.populate_groupings(groupings)
expected = {
"Q3115846": ItemGrouping(
title="Q3115846",
count=10,
cells=OrderedDict(
[
("P21", 1),
("P19", 5),
- ("P1P2", 9),
- ("P3Q4P5", 13),
+ ("P1/P2", 9),
+ ("P3/Q4/P5", 13),
("Lbr", 17),
("Dxy", 21),
("brwiki", 25),
]
),
),
"Q5087901": ItemGrouping(
title="Q5087901",
count=6,
cells=OrderedDict(
[
("P21", 2),
("P19", 6),
- ("P1P2", 10),
- ("P3Q4P5", 14),
+ ("P1/P2", 10),
+ ("P3/Q4/P5", 14),
("Lbr", 18),
("Dxy", 22),
("brwiki", 26),
]
),
),
"Q623333": ItemGrouping(
title="Q623333",
count=6,
cells=OrderedDict(
[
("P21", 3),
("P19", 7),
- ("P1P2", 11),
- ("P3Q4P5", 15),
+ ("P1/P2", 11),
+ ("P3/Q4/P5", 15),
("Lbr", 19),
("Dxy", 23),
("brwiki", 27),
]
),
),
}
self.assertEqual(result, expected)
def test_populate_groupings_with_columns_one_empty(self):
groupings = {
"Q3115846": ItemGrouping(title="Q3115846", count=10),
"Q5087901": ItemGrouping(title="Q5087901", count=6),
"Q623333": ItemGrouping(title="Q623333", count=6),
}
self.mock_sparql_query.return_value.select.side_effect = [
[
{"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "1"},
{"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "2"},
{"grouping": "http://www.wikidata.org/entity/Q623333", "count": "3"},
],
None,
[
{"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "9"},
{"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "10"},
{"grouping": "http://www.wikidata.org/entity/Q623333", "count": "11"},
],
[
{"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "13"},
{"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "14"},
{"grouping": "http://www.wikidata.org/entity/Q623333", "count": "15"},
],
[
{"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "17"},
{"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "18"},
{"grouping": "http://www.wikidata.org/entity/Q623333", "count": "19"},
],
[
{"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "21"},
{"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "22"},
{"grouping": "http://www.wikidata.org/entity/Q623333", "count": "23"},
],
[
{"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "24"},
{"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "25"},
{"grouping": "http://www.wikidata.org/entity/Q623333", "count": "26"},
],
]
result = self.stats.populate_groupings(groupings)
expected = {
"Q3115846": ItemGrouping(
title="Q3115846",
count=10,
cells=OrderedDict(
[
("P21", 1),
- ("P1P2", 9),
- ("P3Q4P5", 13),
+ ("P1/P2", 9),
+ ("P3/Q4/P5", 13),
("Lbr", 17),
("Dxy", 21),
("brwiki", 24),
]
),
),
"Q5087901": ItemGrouping(
title="Q5087901",
count=6,
cells=OrderedDict(
[
("P21", 2),
- ("P1P2", 10),
- ("P3Q4P5", 14),
+ ("P1/P2", 10),
+ ("P3/Q4/P5", 14),
("Lbr", 18),
("Dxy", 22),
("brwiki", 25),
]
),
),
"Q623333": ItemGrouping(
title="Q623333",
count=6,
cells=OrderedDict(
[
("P21", 3),
- ("P1P2", 11),
- ("P3Q4P5", 15),
+ ("P1/P2", 11),
+ ("P3/Q4/P5", 15),
("Lbr", 19),
("Dxy", 23),
("brwiki", 26),
]
),
),
}
self.assertEqual(result, expected)
class RetrieveDataTest(SparqlQueryTest, PropertyStatisticsTest):
def test_retrieve_data_empty(self):
result = self.stats.retrieve_data()
expected = {}
self.assertEqual(result, expected)
def test_retrieve_data(self):
self.mock_sparql_query.return_value.select.return_value = [
{"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "10"},
{"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "6"},
{"grouping": "http://www.wikidata.org/entity/Q623333", "count": "6"},
]
result = self.stats.retrieve_data()
print(result)
expected = {
"Q3115846": ItemGrouping(
title="Q3115846",
count=10,
cells=OrderedDict(
[
("P21", 10),
("P19", 10),
- ("P1P2", 10),
- ("P3Q4P5", 10),
+ ("P1/P2", 10),
+ ("P3/Q4/P5", 10),
("Lbr", 10),
("Dxy", 10),
("brwiki", 10),
]
),
),
"Q5087901": ItemGrouping(
title="Q5087901",
count=6,
cells=OrderedDict(
[
("P21", 6),
("P19", 6),
- ("P1P2", 6),
- ("P3Q4P5", 6),
+ ("P1/P2", 6),
+ ("P3/Q4/P5", 6),
("Lbr", 6),
("Dxy", 6),
("brwiki", 6),
]
),
),
"Q623333": ItemGrouping(
title="Q623333",
count=6,
cells=OrderedDict(
[
("P21", 6),
("P19", 6),
- ("P1P2", 6),
- ("P3Q4P5", 6),
+ ("P1/P2", 6),
+ ("P3/Q4/P5", 6),
("Lbr", 6),
("Dxy", 6),
("brwiki", 6),
]
),
),
}
self.assertEqual(result, expected)
class ProcessDataTest(SparqlQueryTest, PropertyStatisticsTest):
def test_process_data_empty(self):
result = self.stats.process_data({})
expected = (
'{| class="wikitable sortable"\n'
'! colspan="2" |Top groupings (Minimum 20 items)\n'
'! colspan="7"|Top Properties (used at least 10 times per grouping)\n'
"|-\n"
"! Name\n"
"! Count\n"
'! data-sort-type="number"|{{Property|P21}}\n'
'! data-sort-type="number"|{{Property|P19}}\n'
'! data-sort-type="number"|{{Property|P2}}\n'
'! data-sort-type="number"|{{Property|P5}}\n'
'! data-sort-type="number"|{{#language:br}}\n'
'! data-sort-type="number"|{{#language:xy}}\n'
'! data-sort-type="number"|{{Q|Q846871}}\n'
'|- class="sortbottom"\n'
"| '''Totals''' (all items)\n"
"| 1 \n"
"| {{Integraality cell|100.0|1|column=P21|grouping=}}\n"
"| {{Integraality cell|100.0|1|column=P19|grouping=}}\n"
"| {{Integraality cell|100.0|1|column=P1/P2|grouping=}}\n"
"| {{Integraality cell|100.0|1|column=P3/Q4/P5|grouping=}}\n"
"| {{Integraality cell|100.0|1|column=Lbr|grouping=}}\n"
"| {{Integraality cell|100.0|1|column=Dxy|grouping=}}\n"
"| {{Integraality cell|100.0|1|column=brwiki|grouping=}}\n"
"|}\n"
)
self.assertEqual(result, expected)
def test_process_data(self):
grouping_data = {
"Q3115846": ItemGrouping(
title="Q3115846",
count=10,
cells=OrderedDict(
[
("P21", 10),
("P19", 8),
- ("P1P2", 2),
- ("P3Q4P5", 7),
+ ("P1/P2", 2),
+ ("P3/Q4/P5", 7),
("Lbr", 1),
("Dxy", 2),
("brwiki", 1),
]
),
),
"Q5087901": ItemGrouping(
title="Q5087901",
count=6,
cells=OrderedDict(
[
("P21", 6),
("P19", 0),
- ("P1P2", 0),
- ("P3Q4P5", 0),
+ ("P1/P2", 0),
+ ("P3/Q4/P5", 0),
("Lbr", 0),
("Dxy", 0),
("brwiki", 0),
]
),
),
}
result = self.stats.process_data(grouping_data)
expected = (
'{| class="wikitable sortable"\n'
'! colspan="2" |Top groupings (Minimum 20 items)\n'
'! colspan="7"|Top Properties (used at least 10 times per grouping)\n'
"|-\n"
"! Name\n"
"! Count\n"
'! data-sort-type="number"|{{Property|P21}}\n'
'! data-sort-type="number"|{{Property|P19}}\n'
'! data-sort-type="number"|{{Property|P2}}\n'
'! data-sort-type="number"|{{Property|P5}}\n'
'! data-sort-type="number"|{{#language:br}}\n'
'! data-sort-type="number"|{{#language:xy}}\n'
'! data-sort-type="number"|{{Q|Q846871}}\n'
"|-\n"
"| {{Q|Q3115846}}\n"
"| 10 \n"
"| {{Integraality cell|100.0|10|column=P21|grouping=Q3115846}}\n"
"| {{Integraality cell|80.0|8|column=P19|grouping=Q3115846}}\n"
"| {{Integraality cell|20.0|2|column=P1/P2|grouping=Q3115846}}\n"
"| {{Integraality cell|70.0|7|column=P3/Q4/P5|grouping=Q3115846}}\n"
"| {{Integraality cell|10.0|1|column=Lbr|grouping=Q3115846}}\n"
"| {{Integraality cell|20.0|2|column=Dxy|grouping=Q3115846}}\n"
"| {{Integraality cell|10.0|1|column=brwiki|grouping=Q3115846}}\n"
"|-\n"
"| {{Q|Q5087901}}\n"
"| 6 \n"
"| {{Integraality cell|100.0|6|column=P21|grouping=Q5087901}}\n"
"| {{Integraality cell|0|0|column=P19|grouping=Q5087901}}\n"
"| {{Integraality cell|0|0|column=P1/P2|grouping=Q5087901}}\n"
"| {{Integraality cell|0|0|column=P3/Q4/P5|grouping=Q5087901}}\n"
"| {{Integraality cell|0|0|column=Lbr|grouping=Q5087901}}\n"
"| {{Integraality cell|0|0|column=Dxy|grouping=Q5087901}}\n"
"| {{Integraality cell|0|0|column=brwiki|grouping=Q5087901}}\n"
'|- class="sortbottom"\n'
"| '''Totals''' (all items)\n"
"| 1 \n"
"| {{Integraality cell|100.0|1|column=P21|grouping=}}\n"
"| {{Integraality cell|100.0|1|column=P19|grouping=}}\n"
"| {{Integraality cell|100.0|1|column=P1/P2|grouping=}}\n"
"| {{Integraality cell|100.0|1|column=P3/Q4/P5|grouping=}}\n"
"| {{Integraality cell|100.0|1|column=Lbr|grouping=}}\n"
"| {{Integraality cell|100.0|1|column=Dxy|grouping=}}\n"
"| {{Integraality cell|100.0|1|column=brwiki|grouping=}}\n"
"|}\n"
)
self.assertEqual(result, expected)
def test_process_data_year_grouping(self):
grouping_data = {
"2001": YearGrouping(
title="2001",
count=10,
cells=OrderedDict(
[
("P21", 10),
("P19", 8),
- ("P1P2", 2),
- ("P3Q4P5", 7),
+ ("P1/P2", 2),
+ ("P3/Q4/P5", 7),
("Lbr", 1),
("Dxy", 2),
("brwiki", 1),
]
),
),
"2018": YearGrouping(
title="2018",
count=6,
cells=OrderedDict(
[
("P21", 6),
("P19", 0),
- ("P1P2", 0),
- ("P3Q4P5", 0),
+ ("P1/P2", 0),
+ ("P3/Q4/P5", 0),
("Lbr", 0),
("Dxy", 0),
("brwiki", 0),
]
),
),
}
result = self.stats.process_data(grouping_data)
expected = (
'{| class="wikitable sortable"\n'
'! colspan="2" |Top groupings (Minimum 20 items)\n'
'! colspan="7"|Top Properties (used at least 10 times per grouping)\n'
"|-\n"
"! Name\n"
"! Count\n"
'! data-sort-type="number"|{{Property|P21}}\n'
'! data-sort-type="number"|{{Property|P19}}\n'
'! data-sort-type="number"|{{Property|P2}}\n'
'! data-sort-type="number"|{{Property|P5}}\n'
'! data-sort-type="number"|{{#language:br}}\n'
'! data-sort-type="number"|{{#language:xy}}\n'
'! data-sort-type="number"|{{Q|Q846871}}\n'
"|-\n"
"| 2001\n"
"| 10 \n"
"| {{Integraality cell|100.0|10|column=P21|grouping=2001}}\n"
"| {{Integraality cell|80.0|8|column=P19|grouping=2001}}\n"
"| {{Integraality cell|20.0|2|column=P1/P2|grouping=2001}}\n"
"| {{Integraality cell|70.0|7|column=P3/Q4/P5|grouping=2001}}\n"
"| {{Integraality cell|10.0|1|column=Lbr|grouping=2001}}\n"
"| {{Integraality cell|20.0|2|column=Dxy|grouping=2001}}\n"
"| {{Integraality cell|10.0|1|column=brwiki|grouping=2001}}\n"
"|-\n"
"| 2018\n"
"| 6 \n"
"| {{Integraality cell|100.0|6|column=P21|grouping=2018}}\n"
"| {{Integraality cell|0|0|column=P19|grouping=2018}}\n"
"| {{Integraality cell|0|0|column=P1/P2|grouping=2018}}\n"
"| {{Integraality cell|0|0|column=P3/Q4/P5|grouping=2018}}\n"
"| {{Integraality cell|0|0|column=Lbr|grouping=2018}}\n"
"| {{Integraality cell|0|0|column=Dxy|grouping=2018}}\n"
"| {{Integraality cell|0|0|column=brwiki|grouping=2018}}\n"
'|- class="sortbottom"\n'
"| '''Totals''' (all items)\n"
"| 1 \n"
"| {{Integraality cell|100.0|1|column=P21|grouping=}}\n"
"| {{Integraality cell|100.0|1|column=P19|grouping=}}\n"
"| {{Integraality cell|100.0|1|column=P1/P2|grouping=}}\n"
"| {{Integraality cell|100.0|1|column=P3/Q4/P5|grouping=}}\n"
"| {{Integraality cell|100.0|1|column=Lbr|grouping=}}\n"
"| {{Integraality cell|100.0|1|column=Dxy|grouping=}}\n"
"| {{Integraality cell|100.0|1|column=brwiki|grouping=}}\n"
"|}\n"
)
self.assertEqual(result, expected)
class RetrieveAndProcessDataTest(SparqlQueryTest, PropertyStatisticsTest):
def test_retrieve_and_process_data(self):
self.mock_sparql_query.return_value.select.return_value = [
{"grouping": "http://www.wikidata.org/entity/Q3115846", "count": "10"},
{"grouping": "http://www.wikidata.org/entity/Q5087901", "count": "6"},
{"grouping": "http://www.wikidata.org/entity/Q623333", "count": "6"},
]
result = self.stats.retrieve_and_process_data()
expected = (
'{| class="wikitable sortable"\n'
'! colspan="2" |Top groupings (Minimum 20 items)\n'
'! colspan="7"|Top Properties (used at least 10 times per grouping)\n'
"|-\n"
"! Name\n"
"! Count\n"
'! data-sort-type="number"|{{Property|P21}}\n'
'! data-sort-type="number"|{{Property|P19}}\n'
'! data-sort-type="number"|{{Property|P2}}\n'
'! data-sort-type="number"|{{Property|P5}}\n'
'! data-sort-type="number"|{{#language:br}}\n'
'! data-sort-type="number"|{{#language:xy}}\n'
'! data-sort-type="number"|{{Q|Q846871}}\n'
"|-\n"
"| {{Q|Q3115846}}\n"
"| 10 \n"
"| {{Integraality cell|100.0|10|column=P21|grouping=Q3115846}}\n"
"| {{Integraality cell|100.0|10|column=P19|grouping=Q3115846}}\n"
"| {{Integraality cell|100.0|10|column=P1/P2|grouping=Q3115846}}\n"
"| {{Integraality cell|100.0|10|column=P3/Q4/P5|grouping=Q3115846}}\n"
"| {{Integraality cell|100.0|10|column=Lbr|grouping=Q3115846}}\n"
"| {{Integraality cell|100.0|10|column=Dxy|grouping=Q3115846}}\n"
"| {{Integraality cell|100.0|10|column=brwiki|grouping=Q3115846}}\n"
"|-\n"
"| {{Q|Q5087901}}\n"
"| 6 \n"
"| {{Integraality cell|100.0|6|column=P21|grouping=Q5087901}}\n"
"| {{Integraality cell|100.0|6|column=P19|grouping=Q5087901}}\n"
"| {{Integraality cell|100.0|6|column=P1/P2|grouping=Q5087901}}\n"
"| {{Integraality cell|100.0|6|column=P3/Q4/P5|grouping=Q5087901}}\n"
"| {{Integraality cell|100.0|6|column=Lbr|grouping=Q5087901}}\n"
"| {{Integraality cell|100.0|6|column=Dxy|grouping=Q5087901}}\n"
"| {{Integraality cell|100.0|6|column=brwiki|grouping=Q5087901}}\n"
"|-\n"
"| {{Q|Q623333}}\n"
"| 6 \n"
"| {{Integraality cell|100.0|6|column=P21|grouping=Q623333}}\n"
"| {{Integraality cell|100.0|6|column=P19|grouping=Q623333}}\n"
"| {{Integraality cell|100.0|6|column=P1/P2|grouping=Q623333}}\n"
"| {{Integraality cell|100.0|6|column=P3/Q4/P5|grouping=Q623333}}\n"
"| {{Integraality cell|100.0|6|column=Lbr|grouping=Q623333}}\n"
"| {{Integraality cell|100.0|6|column=Dxy|grouping=Q623333}}\n"
"| {{Integraality cell|100.0|6|column=brwiki|grouping=Q623333}}\n"
'|- class="sortbottom"\n'
"| '''Totals''' (all items)\n"
"| 10 \n"
"| {{Integraality cell|100.0|10|column=P21|grouping=}}\n"
"| {{Integraality cell|100.0|10|column=P19|grouping=}}\n"
"| {{Integraality cell|100.0|10|column=P1/P2|grouping=}}\n"
"| {{Integraality cell|100.0|10|column=P3/Q4/P5|grouping=}}\n"
"| {{Integraality cell|100.0|10|column=Lbr|grouping=}}\n"
"| {{Integraality cell|100.0|10|column=Dxy|grouping=}}\n"
"| {{Integraality cell|100.0|10|column=brwiki|grouping=}}\n"
"|}\n"
)
self.assertEqual(result, expected)
def test_retrieve_and_process_data_year_grouping(self):
self.grouping_configuration = YearGroupingConfiguration("P551")
self.stats = PropertyStatistics(
columns=self.columns,
grouping_configuration=self.grouping_configuration,
selector_sparql="wdt:P31 wd:Q41960",
property_threshold=10,
)
self.mock_sparql_query.return_value.select.return_value = [
{"grouping": "2001", "count": "10"},
{"grouping": "2012", "count": "6"},
{"grouping": "2023", "count": "6"},
]
result = self.stats.retrieve_and_process_data()
expected = (
'{| class="wikitable sortable"\n'
'! colspan="2" |Top groupings (Minimum 20 items)\n'
'! colspan="7"|Top Properties (used at least 10 times per grouping)\n'
"|-\n"
"! Name\n"
"! Count\n"
'! data-sort-type="number"|{{Property|P21}}\n'
'! data-sort-type="number"|{{Property|P19}}\n'
'! data-sort-type="number"|{{Property|P2}}\n'
'! data-sort-type="number"|{{Property|P5}}\n'
'! data-sort-type="number"|{{#language:br}}\n'
'! data-sort-type="number"|{{#language:xy}}\n'
'! data-sort-type="number"|{{Q|Q846871}}\n'
"|-\n"
"| 2001\n"
"| 10 \n"
"| {{Integraality cell|100.0|10|column=P21|grouping=2001}}\n"
"| {{Integraality cell|100.0|10|column=P19|grouping=2001}}\n"
"| {{Integraality cell|100.0|10|column=P1/P2|grouping=2001}}\n"
"| {{Integraality cell|100.0|10|column=P3/Q4/P5|grouping=2001}}\n"
"| {{Integraality cell|100.0|10|column=Lbr|grouping=2001}}\n"
"| {{Integraality cell|100.0|10|column=Dxy|grouping=2001}}\n"
"| {{Integraality cell|100.0|10|column=brwiki|grouping=2001}}\n"
"|-\n"
"| 2012\n"
"| 6 \n"
"| {{Integraality cell|100.0|6|column=P21|grouping=2012}}\n"
"| {{Integraality cell|100.0|6|column=P19|grouping=2012}}\n"
"| {{Integraality cell|100.0|6|column=P1/P2|grouping=2012}}\n"
"| {{Integraality cell|100.0|6|column=P3/Q4/P5|grouping=2012}}\n"
"| {{Integraality cell|100.0|6|column=Lbr|grouping=2012}}\n"
"| {{Integraality cell|100.0|6|column=Dxy|grouping=2012}}\n"
"| {{Integraality cell|100.0|6|column=brwiki|grouping=2012}}\n"
"|-\n"
"| 2023\n"
"| 6 \n"
"| {{Integraality cell|100.0|6|column=P21|grouping=2023}}\n"
"| {{Integraality cell|100.0|6|column=P19|grouping=2023}}\n"
"| {{Integraality cell|100.0|6|column=P1/P2|grouping=2023}}\n"
"| {{Integraality cell|100.0|6|column=P3/Q4/P5|grouping=2023}}\n"
"| {{Integraality cell|100.0|6|column=Lbr|grouping=2023}}\n"
"| {{Integraality cell|100.0|6|column=Dxy|grouping=2023}}\n"
"| {{Integraality cell|100.0|6|column=brwiki|grouping=2023}}\n"
'|- class="sortbottom"\n'
"| '''Totals''' (all items)\n"
"| 10 \n"
"| {{Integraality cell|100.0|10|column=P21|grouping=}}\n"
"| {{Integraality cell|100.0|10|column=P19|grouping=}}\n"
"| {{Integraality cell|100.0|10|column=P1/P2|grouping=}}\n"
"| {{Integraality cell|100.0|10|column=P3/Q4/P5|grouping=}}\n"
"| {{Integraality cell|100.0|10|column=Lbr|grouping=}}\n"
"| {{Integraality cell|100.0|10|column=Dxy|grouping=}}\n"
"| {{Integraality cell|100.0|10|column=brwiki|grouping=}}\n"
"|}\n"
)
self.assertEqual(result, expected)