Page MenuHomePhabricator

spellcheck-pageSkipWordsStorage[1].diff

Authored By
bzimport
Nov 22 2014, 2:10 AM
Size
4 KB
Referenced Files
None
Subscribers
None

spellcheck-pageSkipWordsStorage[1].diff

Index: spellcheck.py
===================================================================
--- spellcheck.py (revision 10724)
+++ spellcheck.py (working copy)
@@ -63,6 +63,7 @@
import wikipedia as pywikibot
from pywikibot import i18n
import pagegenerators
+import json
class SpecialTerm(object):
@@ -142,7 +143,7 @@
if word[0].isupper():
pywikibot.output(u"c: Add '%s' as correct" % (uncap(word)))
pywikibot.output(u"i: Ignore once (default)")
- pywikibot.output(u"p: Ignore on this page")
+ pywikibot.output(u"p: Always ignore on this page")
pywikibot.output(u"r: Replace text")
pywikibot.output(u"s: Replace text, but do not save as alternative")
pywikibot.output(u"g: Guess (give me a list of similar words)")
@@ -156,7 +157,10 @@
knownwords[word] = word
newwords.append(word)
elif answer in "pP":
- pageskip.append(word)
+ try:
+ pageSkipWords[title].append(word)
+ except:
+ pageSkipWords[title] = [word]
elif answer in "rRsS":
correct = pywikibot.input(u"What should I replace it by?")
if answer in "rR":
@@ -244,7 +248,6 @@
return result
def spellcheck(page, checknames = True, knownonly = False, title=''):
- pageskip = []
text = page
if correct_html_codes:
text = removeHTML(text)
@@ -258,7 +261,7 @@
loc += len(match.group(1))
bigword = Word(match.group(2))
smallword = bigword.derive()
- if not Word(smallword).isCorrect(checkalternative = knownonly) and \
+ if not Word(smallword).isCorrect(checkalternative = knownonly, title=title) and \
(checknames or not smallword[0].isupper()):
replacement = askAlternative(smallword,
context=text[max(0,loc-40):loc + len(match.group(2))+40],
@@ -283,7 +286,6 @@
loc += len(match.group(2))
if correct_html_codes:
text = removeHTML(text)
- pageskip = []
return text
@@ -349,14 +351,14 @@
% (self.derive(), rep, self.word))
return self.word.replace(self.derive(),rep)
- def isCorrect(self,checkalternative = False):
+ def isCorrect(self,checkalternative = False, title = ''):
# If checkalternative is True, the word will only be found incorrect if
# it is on the spelling list as a spelling error. Otherwise it will
# be found incorrect if it is not on the list as a correctly spelled
# word.
if self.word == "":
return True
- if self.word in pageskip:
+ if title in pageSkipWords and self.word in pageSkipWords[title]:
return True
try:
if knownwords[self.word] == self.word:
@@ -419,12 +421,12 @@
page.put(text, summary)
try:
- pageskip = []
edit = SpecialTerm("edit")
endpage = SpecialTerm("end page")
title = []
knownwords = {}
newwords = []
+ pageSkipWords = {}
start = None
newpages = False
longpages = False
@@ -459,6 +461,7 @@
mysite = pywikibot.getSite()
if not checklang:
checklang = mysite.language()
+
filename = pywikibot.config.datafilepath('spelling',
'spelling-' + checklang + '.txt')
print "Getting wordlist"
@@ -488,6 +491,17 @@
print "Warning! There is no wordlist for your language!"
else:
print "Wordlist successfully loaded."
+
+ pageSkipWordsFilepath = pywikibot.config.datafilepath('spelling', 'spelling-{}-{}.txt'.format(checklang, mysite.family.name))
+ print "Loading the page-specific wordlist for your language and site"
+ try:
+ with codecs.open(pageSkipWordsFilepath, 'r', encoding = mysite.encoding()) as f:
+ pageSkipWords = json.loads(f.read())
+ except IOError:
+ print "Warning! There is no page-specific wordlist for the combination of your language and site!"
+ else:
+ print "Page-specific wordlist successfully loaded."
+
# This is a purely interactive bot, we therefore do not want to put-throttle
pywikibot.put_throttle.setDelay(1)
except:
@@ -520,6 +534,8 @@
title = pywikibot.input(u"Which page to check now? (enter to stop)")
finally:
pywikibot.stopme()
+
+ # Language-level known words.
filename = pywikibot.config.datafilepath('spelling',
'spelling-' + checklang + '.txt')
if rebuild:
@@ -540,3 +556,7 @@
else:
f.write("0 %s %s\n"%(word," ".join(knownwords[word])))
f.close()
+
+ # Known words for a specific page and language.
+ with codecs.open(pageSkipWordsFilepath, 'w', encoding = mysite.encoding()) as f:
+ f.write(json.dumps(pageSkipWords))

File Metadata

Mime Type
text/x-diff
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
11365
Default Alt Text
spellcheck-pageSkipWordsStorage[1].diff (4 KB)

Event Timeline