Page MenuHomePhabricator

parsevalue script

Authored By
matej_suchanek
Mar 14 2023, 8:19 AM
Size
2 KB
Referenced Files
None
Subscribers
None

parsevalue script

from collections import namedtuple
from enum import IntEnum
import matplotlib.colors as mcolors
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import numpy as np
import pywikibot
from pywikibot import WbTime
from pywikibot.tools.itertools import itergroup
class Status(IntEnum):
UNDEFINED = 0
ERROR = 1
OK = 2
MONTH_DAY = 3
WRONG = 4
colors = ['black', 'grey', 'green', 'yellow', 'red']
labels = ['Undefined', 'Error', 'OK', 'MDY', 'Different date']
cmap = mcolors.ListedColormap(colors)
patches = [mpatches.Patch(color=color, label=label)
for color, label in zip(colors, labels)]
site = pywikibot.Site('wikidata', 'wikidata')
matrix = np.full((12, 31), Status.UNDEFINED, dtype=np.uint8)
months = [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
for m, d in enumerate(months):
matrix[m, :d] = Status.ERROR
language = 'cs'
year = 2022
template = '{0.day}.{0.month}.{0.year}'
#template = '{0.day:02}.{0.month:02}.{0.year:02}'
dates = []
d_type = namedtuple('Date', ['year', 'month', 'day'])
for m, days in enumerate(months, start=1):
for d in range(1, days+1):
dates.append(d_type(year, m, d))
all_strings = [template.format(date) for date in dates]
length = len(all_strings)
batch = 16
stack = []
for i in range(0, length, batch):
stack.append((i, min(i+batch, length)))
stack = stack[::-1]
while stack:
start, end = stack.pop()
strings = all_strings[start:end]
try:
parsed = site.parsevalue('time', strings, options={'lang': language}, language=language)
except ValueError:
mid = start + (end - start) // 2
if start != mid and mid != end:
stack.append((mid, end))
stack.append((start, mid))
continue
for date, text, out in zip(dates[start:end], strings, parsed):
year, m, d = date
value = WbTime.fromWikibase(out, site)
if value.year == year and value.month == m and value.day == d:
matrix[m-1, d-1] = Status.OK
elif value.year == year and value.month == d and value.day == m:
matrix[m-1, d-1] = Status.MONTH_DAY
else:
matrix[m-1, d-1] = Status.WRONG
plt.figure(tight_layout=True)
plt.imshow(matrix.T, cmap=cmap, interpolation='nearest', vmin=0, vmax=4)
plt.title(f'Format: "{all_strings[0]}"')
plt.xlabel('Month')
plt.yticks(range(12), map(str, range(1, 13)))
plt.ylabel('Day')
plt.yticks(range(31), map(str, range(1, 32)))
plt.legend(handles=patches, bbox_to_anchor=(1.05, 1.0), loc='upper left')
plt.savefig(f"../{template.format(d_type(year, 1, 1)).replace(' ', '_')}.png",
format='png')
plt.show()

File Metadata

Mime Type
text/plain; charset=utf-8
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
10772208
Default Alt Text
parsevalue script (2 KB)

Event Timeline