Page MenuHomePhabricator
Paste P29487

linttrap.py
ActivePublic

Authored by Legoktm on Jun 7 2022, 6:59 PM.
Tags
None
Referenced Files
F35217111: linttrap.py
Jun 8 2022, 2:52 AM
F35216095: fix_font.py
Jun 7 2022, 6:59 PM
F35216094: fix_font.py
Jun 7 2022, 6:59 PM
import tempfile
import time
import mwparserfromhell as mwph
import pywikibot
from selenium import webdriver
from selenium.webdriver.common.by import By
site = pywikibot.Site('en', 'wikipedia')
def screenshot(old: tempfile.NamedTemporaryFile, new: tempfile.NamedTemporaryFile):
options = webdriver.FirefoxOptions()
options.headless = True
driver = webdriver.Firefox(options=options)
driver.get(f'file://{old.name}')
time.sleep(1)
# Via https://pythonbasics.org/selenium-screenshot/
scroll = lambda attr: driver.execute_script('return document.body.parentNode.scroll' + attr)
driver.set_window_size(scroll('Width'),
scroll('Height'))
driver.find_element(By.TAG_NAME, 'body').screenshot(f'screenshot-old.png')
old.close()
# Now do new, with the same window size
driver.get(f'file://{new.name}')
time.sleep(1)
driver.find_element(By.TAG_NAME, 'body').screenshot(f'screenshot-new.png')
new.close()
driver.quit()
def parse(wikitext: str, page: pywikibot.Page) -> tempfile.NamedTemporaryFile:
resp = site.simple_request(
action='parse', title=page.title(), text=wikitext, prop='text'
).submit()['parse']['text']['*']
tmp = tempfile.NamedTemporaryFile(mode='w')
tmp.write(resp)
return tmp
def fix_deprecated(wikitext: str) -> str:
original = wikitext
code = mwph.parse(wikitext)
for tag in code.filter_tags():
print(tag)
if tag.tag == "font":
# Turn it into a <span>
tag.tag = "span"
style = ""
# Turn attributes into CSS
if tag.has('color'):
attr = tag.get('color')
# TODO: hash-prefix if necessary
style += f"color: {attr.value};"
tag.remove('color')
if tag.has('face'):
attr = tag.get('face')
style += f" font-family:{attr.value};"
tag.remove('face')
if tag.has('size'):
raise NotImplementedError("font size")
tag.add('style', style.strip())
elif tag.tag == "strike":
tag.tag = "s"
elif tag.tag == "center":
tag.tag = "div"
tag.add('style', "text-align:center;")
elif tag.tag == "tt":
tag.tag = "code"
ret = str(code)
pywikibot.showDiff(original, ret)
return ret
def main():
page = pywikibot.Page(site, "Wikipedia:Articles for deletion/EJ Topping, Author")
tmp_old = parse(page.get(), page)
wikitext = fix_deprecated(page.get())
tmp_new = parse(wikitext, page)
screenshot(tmp_old, tmp_new)
# TODO: compare both screenshots before saving
if __name__ == '__main__':
main()

Event Timeline

Legoktm edited the content of this paste. (Show Details)
Legoktm changed the title of this paste from fix_font.py to linttrap.py.Jun 8 2022, 2:52 AM
Legoktm edited the content of this paste. (Show Details)