Page MenuHomePhabricator
Paste P50570

Simple python script to migrate spam blacklist (frwiki)
ActivePublic

Authored by LD on Aug 13 2023, 8:43 PM.
import re
import json
import requests
domain = 'https://fr.wikipedia.org'
page_url = '/w/index.php?title=MediaWiki:Spam-blacklist&action=raw'
response = requests.get(domain + page_url)
spam_text = response.text
spam_entries = re.findall(r'(.*?)\s+#(.*)', spam_text)
spam_list = []
for entry in spam_entries:
domain, notes = entry
if domain:
if notes:
domain = domain.replace('\\b', '').replace('\\.', '.').replace('\\-', '-')
special_chars = '()[]{}*\\'
special_chars_in_domain = [char for char in special_chars if char in domain]
if not special_chars_in_domain:
spam_list.append({
"domain": domain.strip(),
"notes": notes.strip()
})
print(json.dumps(spam_list, ensure_ascii=False, indent='\t'))