Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F37108085
Simple python script to migrate spam blacklist
No One
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Authored By
Ladsgroup
Jun 17 2023, 10:16 PM
2023-06-17 22:16:38 (UTC+0)
Size
1 KB
Referenced Files
None
Subscribers
None
Simple python script to migrate spam blacklist
View Options
import requests
import json
import re
domain = 'https://en.wikipedia.beta.wmflabs.org'
current_spam_blacklist = requests.get(domain + '/wiki/MediaWiki:Spam-blacklist?action=raw').text
current_blocked_domains = requests.get(domain + '/wiki/MediaWiki:BlockedExternalDomains.json?action=raw')
query_res = requests.get(domain + '/w/api.php', params= { 'action': 'query', 'format': 'json', 'prop': 'revisions', 'titles': 'MediaWiki:Spam-blacklist', 'rvslots': 'main', 'rvprops': 'ids'}).json()['query']['pages']
spam_blacklist_rev_id = query_res[list(query_res.keys())[0]]['revisions'][0]['revid']
if current_blocked_domains.status_code == 404:
current_blocked_domains = []
else:
current_blocked_domains = current_blocked_domains.json()
new_spam_blacklist = []
for line in current_spam_blacklist.split('\n'):
if line.startswith('#'):
new_spam_blacklist.append(line)
continue
domain_regex = line.strip()
if domain_regex.startswith('\\b') and domain_regex.endswith('\\b'):
domain_regex = domain_regex[2:-2]
else:
new_spam_blacklist.append(line)
continue
if '/' in domain_regex:
new_spam_blacklist.append(line)
continue
if re.search(r'[^\\][\.\$\^\{\[\(\|\)\*\+\?]', domain_regex):
new_spam_blacklist.append(line)
continue
if re.search(r'\\[^\.]', domain_regex):
new_spam_blacklist.append(line)
continue
if '[' in domain_regex or ']' in domain_regex:
new_spam_blacklist.append(line)
continue
current_blocked_domains.append(
{ 'domain': domain_regex.replace('\\', ''), 'notes': 'Moved from [[Special:PermaLink/{}|MediaWiki:Spam-blacklist]]'.format(spam_blacklist_rev_id)}
)
print('\n'.join(new_spam_blacklist))
print(json.dumps(current_blocked_domains, ensure_ascii=False, indent='\t'))
File Metadata
Details
Attached
Mime Type
text/plain; charset=utf-8
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
10900801
Default Alt Text
Simple python script to migrate spam blacklist (1 KB)
Attached To
Mode
P49299 Simple python script to migrate spam blacklist
Attached
Detach File
Event Timeline
Log In to Comment