Page MenuHomePhabricator

[WMSE-WLE] Bad ids and unused images generator

Authored By
Lokal_Profil
May 3 2018, 11:07 AM
Size
2 KB
Referenced Files
None
Subscribers
None

[WMSE-WLE] Bad ids and unused images generator

import pywikibot
import wikidataStuff.wdqsLookup as query
# retrieve info from Wikidata
def process_data(raw_data):
"""Process the raw output of query.make_select_wdqs_query."""
data = {}
for entry in raw_data:
idno = entry['id']
image = False
if entry.get('P18'):
image = True
qid = entry['item'][len('http://www.wikidata.org/entity/'):]
if idno in data and data[idno]['qid'] != qid:
print('duplicate idno for {}'.format(idno))
data[idno] = {'image': image, 'qid': qid}
return data
# generate list of bad ids and unused images
def quick_output(output_data):
output = ''
for k, v in output_data.items():
caption = k
if v.get('qid'):
caption = '{} ({})'.format(k, v.get('qid'))
output += '{}\n'.format(caption)
output += '\t{}\n'.format('\n\t'.join(v.get('images')))
return output
# generate list of bad ids and unused images
def wikitext_output(output_data):
output = ''
for k, v in output_data.items():
caption = k
if v.get('qid'):
caption = '{} [[:d:{}]]'.format(k, v.get('qid'))
output += '<gallery caption="{}">\n'.format(caption)
output += '\n'.join(v.get('images'))
output += '\n</gallery>\n\n'
return output
wle_items = process_data(
query.make_select_wdqs_query(
'?item wdt:P3613 ?id', optional_props=['P18'],
select_value='id', allow_multiple=True, raw=True))
# retrieve info from Commons
commons = pywikibot.Site('commons', 'commons')
cat_name = 'Category:Protected areas of Sweden with known IDs'
commons_data = {}
for member in pywikibot.data.api.QueryGenerator(
site=commons, list='categorymembers',
cmprop='title|sortkeyprefix', cmtitle=cat_name):
if member['sortkeyprefix'] not in commons_data:
commons_data[member['sortkeyprefix']] = []
commons_data[member['sortkeyprefix']].append(member['title'])
unknown_ids = {}
for k, v in commons_data.items():
if k not in wle_items:
unknown_ids[k] = {
'images': v,
'qid': None
}
unused_images = {}
for k, v in commons_data.items():
if k in wle_items and not wle_items[k]['image']:
unused_images[k] = {
'images': v,
'qid': wle_items[k]['qid']
}
# print(quick_output(unknown_ids))
# print(quick_output(unused_images))
# print(wikitext_output(unknown_ids))
# print(wikitext_output(unused_images))

File Metadata

Mime Type
text/plain; charset=utf-8
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
5791773
Default Alt Text
[WMSE-WLE] Bad ids and unused images generator (2 KB)

Event Timeline