import pywikibot
import wikidataStuff.helpers as helpers
import wikidataStuff.wdqsLookup as query
# retrieve info from Wikidata
def process_data(raw_data):
"""Process the raw output of query.make_select_wdqs_query."""
data = {}
for entry in raw_data:
idno = entry['id']
image = False
if entry.get('P18'):
image = True
qid = entry['item'][len('http://www.wikidata.org/entity/'):]
if idno in data and data[idno]['qid'] != qid:
print 'duplicate idno for {}'.format(idno)
data[idno] = {'image': image, 'qid': qid}
return data
wle_items = process_data(
query.make_select_wdqs_query(
'?item wdt:P3613 ?id', optional_props=['P18'],
select_value='id', allow_multiple=True, raw=True))
# retrieve info from Commons
commons = pywikibot.Site('commons', 'commons')
cat_name = 'Category:Protected areas of Sweden with known IDs'
commons_data = {}
for member in pywikibot.data.api.QueryGenerator(
site=commons, list='categorymembers',
cmprop='title|sortkeyprefix', cmtitle='Category:' + cat_name):
if member['sortkeyprefix'] not in commons_data:
commons_data[member['sortkeyprefix']] = []
commons_data[member['sortkeyprefix']].append(member['title'])
wle_items = helpers.fill_cache_wdqs('P3613', no_strip=True)
# generate list of bad ids
unknown_ids = {}
for k, v in commons_data.iteritems():
if k not in wle_items:
unknown_ids[k] = v
# generate list of unused images
unused_images = {}
for k, v in commons_data.iteritems():
if k in wle_items and not wle_items[k]['image']:
unused_images[k] = v