Page MenuHomePhabricator
Paste P45844

parsevalue script
ActivePublic

Authored by matej_suchanek on Mar 14 2023, 8:19 AM.
Tags
None
Referenced Files
F36910717: parsevalue script
Mar 14 2023, 8:19 AM
Subscribers
None
from collections import namedtuple
from enum import IntEnum
import matplotlib.colors as mcolors
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import numpy as np
import pywikibot
from pywikibot import WbTime
from pywikibot.tools.itertools import itergroup
class Status(IntEnum):
UNDEFINED = 0
ERROR = 1
OK = 2
MONTH_DAY = 3
WRONG = 4
colors = ['black', 'grey', 'green', 'yellow', 'red']
labels = ['Undefined', 'Error', 'OK', 'MDY', 'Different date']
cmap = mcolors.ListedColormap(colors)
patches = [mpatches.Patch(color=color, label=label)
for color, label in zip(colors, labels)]
site = pywikibot.Site('wikidata', 'wikidata')
matrix = np.full((12, 31), Status.UNDEFINED, dtype=np.uint8)
months = [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
for m, d in enumerate(months):
matrix[m, :d] = Status.ERROR
language = 'cs'
year = 2022
template = '{0.day}.{0.month}.{0.year}'
#template = '{0.day:02}.{0.month:02}.{0.year:02}'
dates = []
d_type = namedtuple('Date', ['year', 'month', 'day'])
for m, days in enumerate(months, start=1):
for d in range(1, days+1):
dates.append(d_type(year, m, d))
all_strings = [template.format(date) for date in dates]
length = len(all_strings)
batch = 16
stack = []
for i in range(0, length, batch):
stack.append((i, min(i+batch, length)))
stack = stack[::-1]
while stack:
start, end = stack.pop()
strings = all_strings[start:end]
try:
parsed = site.parsevalue('time', strings, options={'lang': language}, language=language)
except ValueError:
mid = start + (end - start) // 2
if start != mid and mid != end:
stack.append((mid, end))
stack.append((start, mid))
continue
for date, text, out in zip(dates[start:end], strings, parsed):
year, m, d = date
value = WbTime.fromWikibase(out, site)
if value.year == year and value.month == m and value.day == d:
matrix[m-1, d-1] = Status.OK
elif value.year == year and value.month == d and value.day == m:
matrix[m-1, d-1] = Status.MONTH_DAY
else:
matrix[m-1, d-1] = Status.WRONG
plt.figure(tight_layout=True)
plt.imshow(matrix.T, cmap=cmap, interpolation='nearest', vmin=0, vmax=4)
plt.title(f'Format: "{all_strings[0]}"')
plt.xlabel('Month')
plt.yticks(range(12), map(str, range(1, 13)))
plt.ylabel('Day')
plt.yticks(range(31), map(str, range(1, 32)))
plt.legend(handles=patches, bbox_to_anchor=(1.05, 1.0), loc='upper left')
plt.savefig(f"../{template.format(d_type(year, 1, 1)).replace(' ', '_')}.png",
format='png')
plt.show()