Page MenuHomePhabricator

[WMSE-WLE] Bad ids and unused images generator

Authored By
Lokal_Profil
Jul 6 2017, 9:50 AM
Size
1 KB
Referenced Files
None
Subscribers
None

[WMSE-WLE] Bad ids and unused images generator

import pywikibot
import wikidataStuff.helpers as helpers
import wikidataStuff.wdqsLookup as query
# retrieve info from Wikidata
def process_data(raw_data):
"""Process the raw output of query.make_select_wdqs_query."""
data = {}
for entry in raw_data:
idno = entry['id']
image = False
if entry.get('P18'):
image = True
qid = entry['item'][len('http://www.wikidata.org/entity/'):]
if idno in data and data[idno]['qid'] != qid:
print 'duplicate idno for {}'.format(idno)
data[idno] = {'image': image, 'qid': qid}
return data
wle_items = process_data(
query.make_select_wdqs_query(
'?item wdt:P3613 ?id', optional_props=['P18'],
select_value='id', allow_multiple=True, raw=True))
# retrieve info from Commons
commons = pywikibot.Site('commons', 'commons')
cat_name = 'Category:Protected areas of Sweden with known IDs'
commons_data = {}
for member in pywikibot.data.api.QueryGenerator(
site=commons, list='categorymembers',
cmprop='title|sortkeyprefix', cmtitle=cat_name):
if member['sortkeyprefix'] not in commons_data:
commons_data[member['sortkeyprefix']] = []
commons_data[member['sortkeyprefix']].append(member['title'])
# generate list of bad ids and unused images
def quick_output(output_data):
for k, v in output_data.iteritems():
print k
print u'\t{}'.format(u'\n\t'.join(v))
unknown_ids = {}
for k, v in commons_data.iteritems():
if k not in wle_items:
unknown_ids[k] = v
unused_images = {}
for k, v in commons_data.iteritems():
if k in wle_items and not wle_items[k]['image']:
unused_images[k] = v
# quick_output(unknown_ids)
# quick_output(unused_images)

File Metadata

Mime Type
text/plain; charset=utf-8
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
4759951
Default Alt Text
[WMSE-WLE] Bad ids and unused images generator (1 KB)

Event Timeline