import requests import json import re domain = 'https://pl.wikipedia.org' current_spam_blacklist = requests.get(domain + '/wiki/MediaWiki:Spam-blacklist?action=raw&oldid=71023370').text current_blocked_domains = requests.get(domain + '/wiki/MediaWiki:BlockedExternalDomains.json?action=raw') query_res = requests.get(domain + '/w/api.php', params= { 'action': 'query', 'format': 'json', 'prop': 'revisions', 'titles': 'MediaWiki:Spam-blacklist', 'rvslots': 'main', 'rvprops': 'ids'}).json()['query']['pages'] spam_blacklist_rev_id = query_res[list(query_res.keys())[0]]['revisions'][0]['revid'] if current_blocked_domains.status_code == 404: current_blocked_domains = [] else: current_blocked_domains = current_blocked_domains.json() new_spam_blacklist = [] section_notes = '' for line in current_spam_blacklist.split('\n'): if line.startswith('#'): # plwiki: Migrate section comments (comment on a line by itself after an empty line) if section_notes == '': section_notes = line.strip(' #') + ' / ' new_spam_blacklist.append(line) continue # plwiki: Migrate comments notes = '' if '#' in line: (domain_regex, _, notes) = line.partition('#') notes = notes.strip() + ' / ' else: domain_regex = line domain_regex = domain_regex.strip() if len(domain_regex) == 0: # plwiki: End of section, clear the section comment section_notes = '' continue if domain_regex.startswith('\\b') and domain_regex.endswith('\\b'): domain_regex = domain_regex[2:-2] # plwiki: Alternative syntax for word boundary if domain_regex.startswith('(?