import pywikibot import wikidataStuff.wdqsLookup as query # retrieve info from Wikidata def process_data(raw_data): """Process the raw output of query.make_select_wdqs_query.""" data = {} for entry in raw_data: idno = entry['id'] image = False if entry.get('P18'): image = True qid = entry['item'][len('http://www.wikidata.org/entity/'):] if idno in data and data[idno]['qid'] != qid: print('duplicate idno for {}'.format(idno)) data[idno] = {'image': image, 'qid': qid} return data # generate list of bad ids and unused images def quick_output(output_data): output = '' for k, v in output_data.items(): caption = k if v.get('qid'): caption = '{} ({})'.format(k, v.get('qid')) output += '{}\n'.format(caption) output += '\t{}\n'.format('\n\t'.join(v.get('images'))) return output # generate list of bad ids and unused images def wikitext_output(output_data): output = '' for k, v in output_data.items(): caption = k if v.get('qid'): caption = '{} [[:d:{}]]'.format(k, v.get('qid')) output += '\n'.format(caption) output += '\n'.join(v.get('images')) output += '\n\n\n' return output wle_items = process_data( query.make_select_wdqs_query( '?item wdt:P3613 ?id', optional_props=['P18'], select_value='id', allow_multiple=True, raw=True)) # retrieve info from Commons commons = pywikibot.Site('commons', 'commons') cat_name = 'Category:Protected areas of Sweden with known IDs' commons_data = {} for member in pywikibot.data.api.QueryGenerator( site=commons, list='categorymembers', cmprop='title|sortkeyprefix', cmtitle=cat_name): if member['sortkeyprefix'] not in commons_data: commons_data[member['sortkeyprefix']] = [] commons_data[member['sortkeyprefix']].append(member['title']) unknown_ids = {} for k, v in commons_data.items(): if k not in wle_items: unknown_ids[k] = { 'images': v, 'qid': None } unused_images = {} for k, v in commons_data.items(): if k in wle_items and not wle_items[k]['image']: unused_images[k] = { 'images': v, 'qid': wle_items[k]['qid'] } # print(quick_output(unknown_ids)) # print(quick_output(unused_images)) # print(wikitext_output(unknown_ids)) # print(wikitext_output(unused_images))