Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F35564788
T292270 gadgets gadgets code code!
No One
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Authored By
Aklapper
Oct 12 2022, 4:08 PM
2022-10-12 16:08:31 (UTC+0)
Size
5 KB
Referenced Files
None
Subscribers
None
T292270 gadgets gadgets code code!
View Options
# based on https://paws-public.wmflabs.org/paws-public/User:Harej_(WMF)/Lists%20of%20gadgets%20for%20every%20Wikimedia%20wiki.ipynb
# based on https://public.paws.wmcloud.org/User:SSethi_(WMF)/List%20of%20most%20used%20gadgets%20across%20wikimedia%20wikis.ipynb
# if this was my code (I am Andre Klapper, <aklapper@wikimedia.org>) this was licensed under Creative Commons Zero.
import requests
import re
import json
from collections import OrderedDict, Counter
# Download the Site Matrix to get list of Wikimedia wikis
sitematrix = 'https://en.wikipedia.org/w/api.php?action=sitematrix&format=json'
r = requests.get(sitematrix)
r = r.json()
r = r['sitematrix']
site_urls = []
for blob in r.values():
if type(blob) is dict:
for subblob in blob['site']:
if 'closed' not in subblob: # Exclude closed sites
site_urls.append(subblob['url'])
elif type(blob) is list: # "Special" wikis, including Commons and Wikidata, are different for some reason
for subblob in blob:
if 'closed' not in subblob: # Exclude closed sites
site_urls.append(subblob['url'])
# Download [[MediaWiki:Gadgets-definition]] from each of these wikis
api_request_gadgets_definition = ('/w/api.php?action=parse&format=json&prop=wikitext&page=MediaWiki%3AGadgets-definition')
gadget_defs = {}
for url in site_urls:
q = url + api_request_gadgets_definition
r = requests.get(q)
r = r.json()
if 'error' not in r: # Check if [[MediaWiki:Gadgets-definition]] exists on that wiki
gadget_defs[url] = r['parse']['wikitext']['*']
print('✅ ' + url + '/wiki/MediaWiki:Gadgets-definition')
else:
print(' ❎ ' + url + '/wiki/MediaWiki:Gadgets-definition')
print('\nGadget definition pages on active wikis: ' + str(len(gadget_defs)) + '\n')
# Data cleanup and retrieving editor information
gadget_entries = {}
for site, blob in gadget_defs.items():
# print(site)
blob = blob.split('\n')
for entry in blob:
if len(entry) > 0:
if entry[0] == '*' and '|' in entry:
gadget = entry.replace('*', '').strip()
canonical = gadget.upper().\
replace('-', '').\
replace('_', '').\
replace(' ', '').\
split('|')[0].\
split('[')[0].\
strip()
if canonical in gadget_entries:
if gadget in gadget_entries[canonical]:
gadget_entries[canonical][gadget][site] = {}
else:
gadget_entries[canonical][gadget] = {site: {}}
else:
gadget_entries[canonical] = {gadget: {site: {}}}
gadget_pages = gadget
gadget_pages = re.sub(r'\*+', '', gadget_pages)
gadget_pages = re.sub(r'\[.*?\]', '', gadget_pages)
gadget_pages = re.sub(r'<!--.*?-->', '', gadget_pages) # Must be first
gadget_pages = re.sub(r'<!--.*$', '', gadget_pages) # Must be after
gadget_pages = re.sub(r'^.*-->', '', gadget_pages) # Must be after
gadget_pages = gadget_pages.split('|')
gadget_pages = ['MediaWiki:Gadget-' + x.strip().replace(' ', '_') for x in gadget_pages \
if x.strip() != '']
for gadget_page in gadget_pages:
print(site + '/wiki/' + gadget_page)
api_request_gadget_definition = ('/w/api.php?action=parse&format=json&prop=wikitext&page=' + gadget_page)
q = site + api_request_gadget_definition
r = requests.get(q)
r = r.json()
if 'error' in r:
# Check for values with an equal sign as they imply a typo (incorrect ResourceLoader array parameters)
match1 = re.search('=', gadget_page)
if match1:
print('ERROR: Gadget definition for ' + gadget_page + ' includes an equal sign, potential typo on ' + site)
else:
# Check if listed pages actually exist but exclude description/translation page URLs that don't end in .js or .css
match2 = re.search('\.js\Z', gadget_page)
match3 = re.search('\.css\Z', gadget_page)
if (match2 or match3):
print('ERROR: Non-existing gadget page on ' + site + '/wiki/MediaWiki:Gadgets-definition : ' + gadget_page)
gadget_entries[canonical][gadget][site][gadget_page] = site
gadget_entries = OrderedDict(sorted(gadget_entries.items()))
print('\nDone')
for canonical, gadgetblob in gadget_entries.items():
counter=0
for gadget, siteblob in gadgetblob.items():
for site in siteblob.keys():
counter=counter+1
if(counter>10):
print('== ' + canonical.lower() + ' ==\n')
print(gadget)
print(counter)
# for gadget, siteblob in gadgetblob.items():
# for site in siteblob.keys():
# print(site)
File Metadata
Details
Attached
Mime Type
text/plain; charset=utf-8
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
9772037
Default Alt Text
T292270 gadgets gadgets code code! (5 KB)
Attached To
Mode
P35447 T292270 gadgets gadgets code code!
Attached
Detach File
Event Timeline
Log In to Comment