Commit f43e9ffc authored by Manuel Cortez's avatar Manuel Cortez

Added bak the enchant based spelling correction module

parent 45465b24
Pipeline #921 failed with stage
in 10 minutes and 28 seconds
......@@ -2,6 +2,16 @@
## News in this version
## new additions
* the spelling correction module is able to add words to the dictionary so it will learn which words should start to ignore.
### Changes
* The spelling correction module has been rewritten to take advantage of the newest enchant Python module which is more stable and can be added properly to the distribution, as opposed to the first enchant module we have tried.
## News in Version 0.24
### New additions
* Socializer will ask for confirmation before closing the application.
......
......@@ -16,9 +16,8 @@ yandex.translate
mutagen
mock
babel
# cx_freeze for building the executable files.
cx_freeze
symspellpy
pyenchant
# forked repositories previously found at http://q-continuum.net
git+https://code.manuelcortez.net/manuelcortez/libloader
git+https://code.manuelcortez.net/manuelcortez/platform_utils
......
from __future__ import absolute_import
from __future__ import unicode_literals
from . import spellchecker
import platform
if platform.system() == "Windows":
from .wx_ui import *
\ No newline at end of file
from .wx_ui import *
\ No newline at end of file
# -*- coding: utf-8 -*-
""" High level Spell checker module by using the SymSpellPy library. """
import os
import glob
import shutil
import logging
import paths
from symspellpy.symspellpy import SymSpell, Verbosity
from codecs import open as open_
log = logging.getLogger("SpellChecker.checker")
loaded_dicts = dict()
ready = False
def load_dicts():
global loaded_dicts, ready
log.debug("Start dictionary loading for spelling checker module...")
if len(loaded_dicts) > 0:
loaded_dicts = dict()
path = os.path.join(paths.config_path(), "dicts")
if os.path.isdir(path):
log.debug("Loading language dictionaries from path %s" % (path,))
files = glob.glob(os.path.join(path, "*.txt"))
log.debug("%r files found." % (len(files)))
for i in files:
key = os.path.splitext(os.path.basename(i))[0]
dictionary = SymSpell()
dictionary.load_dictionary(i, 0, 1, encoding="utf-8")
loaded_dicts[key] = dictionary
log.debug("Added dictionary for language %s " % (key,))
ready = True
log.debug("All dicts were loaded.")
def prepare_dicts(language):
""" Copy the main dictionary file to the user's config directory so it can be modified and read without needing to require privileged sessions.
@ language: two letter language code.
"""
log.debug("preparing dictionary data...")
path = os.path.join(paths.config_path(), "dicts")
if os.path.exists(path) == False:
log.debug("Creating dicts folder in config directory...")
os.mkdir(path)
original_file = os.path.join(paths.app_path(), "dictionaries", language+".txt")
if os.path.exists(original_file) and os.path.exists(os.path.join(paths.config_path(), "dicts", language+".txt")) == False:
log.debug("Dictionary for language %s is not present in user config. Coppying... " % (language,))
dst_file = shutil.copy(original_file, os.path.join(paths.config_path(), "dicts"))
class SpellChecker(object):
def __init__(self, wordlist=None, *args, **kwargs):
self.kwargs = kwargs
self.dictionary = None
self.ignored_words = []
self.word_index = 0
def set_language(self, lang):
global loaded_dicts
if loaded_dicts.get(lang) != None:
self.dictionary = loaded_dicts[lang]
else:
raise ValueError("Dictionary not found for the specified language")
def set_text(self, text):
self.transformed_words = text.split()
self.word_index = 0
def check_words(self):
for word in range(0, len(self.transformed_words)):
if self.transformed_words[word] in self.ignored_words:
continue
suggestions = self.dictionary.lookup(self.transformed_words[word], Verbosity.CLOSEST, 2, transfer_casing=True)
valid_word = True
if len(suggestions) == 0:
continue
for s in suggestions:
if s.distance == 0:
valid_word = False
if valid_word == False:
continue
if word <= 10:
if len(self.transformed_words) <= 10:
context = " ".join(self.transformed_words)
else:
context = " ".join(self.transformed_words[0:10])
elif word >= len(self.transformed_words)-9:
context = " ".join(self.transformed_words[-10])
else:
context = " ".join(self.transformed_words[word-5:word+5])
self.word_index = word
yield (suggestions, context, word)
def replace(self, suggestion):
if len(self.transformed_words) < self.word_index:
raise ValueError("Word index is not present in the current text")
self.transformed_words[self.word_index] = suggestion
def replace_all(self, word):
existing_word = self.word
for i in range(0, len(self.transformed_words)):
if self.transformed_words[i] == existing_word:
self.transformed_words[i] = word
def ignore_word(self, word):
self.ignored_words.append(word)
@property
def text(self):
return " ".join(self.transformed_words)
@property
def word(self):
if len(self.transformed_words) == 0 or self.word_index >= len(self.transformed_words):
return None
return self.transformed_words[self.word_index]
\ No newline at end of file
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import unicode_literals
from builtins import next
from builtins import object
import os
import logging
from . import wx_ui
import widgetUtils
import output
import config
import languageHandler
from platform_utils import paths
from . import checker
from . import wx_ui
import enchant
import paths
from . import twitterFilter
from enchant.checker import SpellChecker
from enchant.errors import DictNotFoundError
from enchant import tokenize
log = logging.getLogger("extra.SpellChecker.spellChecker")
class spellChecker(object):
def __init__(self, text):
super(spellChecker, self).__init__()
self.active = True
self.checker = checker.SpellChecker()
log.debug("Using language: %s" % (languageHandler.getLanguage(),))
try:
self.checker.set_language(languageHandler.curLang[:2])
except ValueError:
log.exception("Dictionary for language %s not found." % (languageHandler.curLang,))
wx_ui.dict_not_found_error()
self.active = False
self.checker.set_text(text)
self.generator = self.checker.check_words()
if self.active == True:
log.debug("Creating dialog...")
self.dialog = wx_ui.spellCheckerDialog()
widgetUtils.connect_event(self.dialog.ignore, widgetUtils.BUTTON_PRESSED, self.ignore)
widgetUtils.connect_event(self.dialog.ignoreAll, widgetUtils.BUTTON_PRESSED, self.ignoreAll)
widgetUtils.connect_event(self.dialog.replace, widgetUtils.BUTTON_PRESSED, self.replace)
widgetUtils.connect_event(self.dialog.replaceAll, widgetUtils.BUTTON_PRESSED, self.replaceAll)
self.check()
self.dialog.get_response()
self.fixed_text = self.checker.text
def __init__(self, text):
super(spellChecker, self).__init__()
self.active = True
try:
if config.app["app-settings"]["language"] == "system":
log.debug("Using the system language")
self.dict = enchant.DictWithPWL(languageHandler.curLang[:2], os.path.join(paths.config_path(), "wordlist.dict"))
else:
log.debug("Using language: %s" % (languageHandler.getLanguage(),))
self.dict = enchant.DictWithPWL(languageHandler.getLanguage()[:2], os.path.join(paths.config_path(), "wordlist.dict"))
except DictNotFoundError:
log.exception("Dictionary for language %s not found." % (dictionary,))
wx_ui.dict_not_found_error()
self.active = False
self.checker = SpellChecker(self.dict, filters=[twitterFilter.TwitterFilter, tokenize.EmailFilter, tokenize.URLFilter])
self.checker.set_text(text)
if self.active == True:
log.debug("Creating dialog...")
self.dialog = wx_ui.spellCheckerDialog()
widgetUtils.connect_event(self.dialog.ignore, widgetUtils.BUTTON_PRESSED, self.ignore)
widgetUtils.connect_event(self.dialog.ignoreAll, widgetUtils.BUTTON_PRESSED, self.ignoreAll)
widgetUtils.connect_event(self.dialog.replace, widgetUtils.BUTTON_PRESSED, self.replace)
widgetUtils.connect_event(self.dialog.replaceAll, widgetUtils.BUTTON_PRESSED, self.replaceAll)
widgetUtils.connect_event(self.dialog.add, widgetUtils.BUTTON_PRESSED, self.add)
self.check()
self.dialog.get_response()
self.fixed_text = self.checker.get_text()
def check(self):
try:
suggestions, context, self.wordIndex = next(self.generator)
textToSay = _("Misspelled word: %s") % (self.checker.word,)
context = context
self.dialog.set_title(textToSay)
output.speak(textToSay)
self.dialog.set_word_and_suggestions(word=self.checker.word, context=context, suggestions=[suggestion.term for suggestion in suggestions])
except StopIteration:
log.debug("Process finished.")
wx_ui.finished()
self.dialog.Destroy()
def check(self):
try:
next(self.checker)
textToSay = _(u"Misspelled word: %s") % (self.checker.word,)
context = u"... %s %s %s" % (self.checker.leading_context(10), self.checker.word, self.checker.trailing_context(10))
self.dialog.set_title(textToSay)
output.speak(textToSay)
self.dialog.set_word_and_suggestions(word=self.checker.word, context=context, suggestions=self.checker.suggest())
except StopIteration:
log.debug("Process finished.")
wx_ui.finished()
self.dialog.Destroy()
def ignore(self, ev):
self.check()
def ignore(self, ev):
self.check()
def ignoreAll(self, ev):
self.checker.ignore_word(word=self.checker.word)
self.check()
def ignoreAll(self, ev):
self.checker.ignore_always(word=self.checker.word)
self.check()
def replace(self, ev):
self.checker.replace(self.dialog.get_selected_suggestion())
self.check()
def replace(self, ev):
self.checker.replace(self.dialog.get_selected_suggestion())
self.check()
def replaceAll(self, ev):
self.checker.replace_all(self.dialog.get_selected_suggestion())
self.check()
def replaceAll(self, ev):
self.checker.replace_always(self.dialog.get_selected_suggestion())
self.check()
def clean(self):
if hasattr(self, "dialog"):
self.dialog.Destroy()
\ No newline at end of file
def add(self, ev):
self.checker.add()
self.check()
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
from enchant.tokenize import Filter
class TwitterFilter(Filter):
"""Filter skipping over twitter usernames and hashtags.
This filter skips any words matching the following regular expression:
^[#@](\S){1, }$
That is, any words that resemble users and hashtags.
"""
_pattern = re.compile(r"^[#@](\S){1,}$")
def _skip(self,word):
if self._pattern.match(word):
return True
return False
......@@ -21,60 +21,63 @@ import wx
import application
class spellCheckerDialog(wx.Dialog):
def __init__(self):
super(spellCheckerDialog, self).__init__(None, 1)
panel = wx.Panel(self)
sizer = wx.BoxSizer(wx.VERTICAL)
word = wx.StaticText(panel, -1, _("&Misspelled word"))
self.word = wx.TextCtrl(panel, -1)
wordBox = wx.BoxSizer(wx.HORIZONTAL)
wordBox.Add(word, 0, wx.ALL, 5)
wordBox.Add(self.word, 0, wx.ALL, 5)
context = wx.StaticText(panel, -1, _("Con&text"))
self.context = wx.TextCtrl(panel, -1)
contextBox = wx.BoxSizer(wx.HORIZONTAL)
contextBox.Add(context, 0, wx.ALL, 5)
contextBox.Add(self.context, 0, wx.ALL, 5)
suggest = wx.StaticText(panel, -1, _("&Suggestions"))
self.suggestions = wx.ListBox(panel, -1, choices=[], style=wx.LB_SINGLE)
suggestionsBox = wx.BoxSizer(wx.HORIZONTAL)
suggestionsBox.Add(suggest, 0, wx.ALL, 5)
suggestionsBox.Add(self.suggestions, 0, wx.ALL, 5)
self.ignore = wx.Button(panel, -1, _("&Ignore"))
self.ignoreAll = wx.Button(panel, -1, _("Ignore &all"))
self.replace = wx.Button(panel, -1, _("&Replace"))
self.replaceAll = wx.Button(panel, -1, _("Replace a&ll"))
close = wx.Button(panel, wx.ID_CANCEL)
btnBox = wx.BoxSizer(wx.HORIZONTAL)
btnBox.Add(self.ignore, 0, wx.ALL, 5)
btnBox.Add(self.ignoreAll, 0, wx.ALL, 5)
btnBox.Add(self.replace, 0, wx.ALL, 5)
btnBox.Add(self.replaceAll, 0, wx.ALL, 5)
btnBox.Add(close, 0, wx.ALL, 5)
sizer.Add(wordBox, 0, wx.ALL, 5)
sizer.Add(contextBox, 0, wx.ALL, 5)
sizer.Add(suggestionsBox, 0, wx.ALL, 5)
sizer.Add(btnBox, 0, wx.ALL, 5)
panel.SetSizer(sizer)
self.SetClientSize(sizer.CalcMin())
def __init__(self):
super(spellCheckerDialog, self).__init__(None, 1)
panel = wx.Panel(self)
sizer = wx.BoxSizer(wx.VERTICAL)
word = wx.StaticText(panel, -1, _(u"Misspelled word"))
self.word = wx.TextCtrl(panel, -1)
wordBox = wx.BoxSizer(wx.HORIZONTAL)
wordBox.Add(word, 0, wx.ALL, 5)
wordBox.Add(self.word, 0, wx.ALL, 5)
context = wx.StaticText(panel, -1, _(u"Context"))
self.context = wx.TextCtrl(panel, -1)
contextBox = wx.BoxSizer(wx.HORIZONTAL)
contextBox.Add(context, 0, wx.ALL, 5)
contextBox.Add(self.context, 0, wx.ALL, 5)
suggest = wx.StaticText(panel, -1, _(u"Suggestions"))
self.suggestions = wx.ListBox(panel, -1, choices=[], style=wx.LB_SINGLE)
suggestionsBox = wx.BoxSizer(wx.HORIZONTAL)
suggestionsBox.Add(suggest, 0, wx.ALL, 5)
suggestionsBox.Add(self.suggestions, 0, wx.ALL, 5)
self.ignore = wx.Button(panel, -1, _(u"&Ignore"))
self.ignoreAll = wx.Button(panel, -1, _(u"I&gnore all"))
self.replace = wx.Button(panel, -1, _(u"&Replace"))
self.replaceAll = wx.Button(panel, -1, _(u"R&eplace all"))
self.add = wx.Button(panel, -1, _(u"&Add to personal dictionary"))
close = wx.Button(panel, wx.ID_CANCEL)
btnBox = wx.BoxSizer(wx.HORIZONTAL)
btnBox.Add(self.ignore, 0, wx.ALL, 5)
btnBox.Add(self.ignoreAll, 0, wx.ALL, 5)
btnBox.Add(self.replace, 0, wx.ALL, 5)
btnBox.Add(self.replaceAll, 0, wx.ALL, 5)
btnBox.Add(self.add, 0, wx.ALL, 5)
btnBox.Add(close, 0, wx.ALL, 5)
sizer.Add(wordBox, 0, wx.ALL, 5)
sizer.Add(contextBox, 0, wx.ALL, 5)
sizer.Add(suggestionsBox, 0, wx.ALL, 5)
sizer.Add(btnBox, 0, wx.ALL, 5)
panel.SetSizer(sizer)
self.SetClientSize(sizer.CalcMin())
def get_response(self):
return self.ShowModal()
def set_title(self, title):
return self.SetTitle(title)
def get_response(self):
return self.ShowModal()
def set_word_and_suggestions(self, word, context, suggestions):
self.word.SetValue(word)
self.context.ChangeValue(context)
self.suggestions.Set(suggestions)
self.suggestions.SetFocus()
def set_title(self, title):
return self.SetTitle(title)
def get_selected_suggestion(self):
return self.suggestions.GetStringSelection()
def set_word_and_suggestions(self, word, context, suggestions):
self.word.SetValue(word)
self.context.ChangeValue(context)
self.suggestions.Set(suggestions)
self.suggestions.SetFocus()
def get_selected_suggestion(self):
return self.suggestions.GetStringSelection()
def dict_not_found_error():
wx.MessageDialog(None, _("An error has occurred. There are no dictionaries available for the selected language in {0}").format(application.name,), _("Error"), wx.ICON_ERROR).ShowModal()
wx.MessageDialog(None, _(u"An error has occurred. There are no dictionaries available for the selected language in {0}").format(application.name,), _(u"Error"), wx.ICON_ERROR).ShowModal()
def finished():
wx.MessageDialog(None, _("Spell check complete."), application.name, style=wx.OK).ShowModal()
wx.MessageDialog(None, _(u"Spell check complete."), application.name, style=wx.OK).ShowModal()
......@@ -18,7 +18,6 @@ if hasattr(sys, "frozen"):
sys.excepthook = lambda x, y, z: logging.critical(''.join(traceback.format_exception(x, y, z)))
from mysc.thread_utils import call_threaded
from wxUI import commonMessages
from extra.SpellChecker import checker # Load dictionaries in advance for spelling correction
log = logging.getLogger("main")
......@@ -57,8 +56,6 @@ def setup():
del sm
log.debug("Loading dictionaries for spelling correction...")
# Let's copy dictionary files for the selected language just in case it is not present already.
checker.prepare_dicts(languageHandler.curLang[:2])
call_threaded(checker.load_dicts)
r = mainController.Controller()
call_threaded(r.login)
app.run()
......
......@@ -36,7 +36,7 @@ build_exe_options = dict(
include_msvcr=True,
zip_include_packages=["accessible_output2", "sound_lib", "arrow"],
replace_paths = [("*", "")],
include_files=["session.defaults", "cacert.pem", "app-configuration.defaults", "locales", "sounds", "documentation", "../windows-dependencies/x86/oggenc2.exe", "../windows-dependencies/x86/bootstrap.exe", "../windows-dependencies/dictionaries", find_sound_lib_datafiles(), find_accessible_output2_datafiles(), ("../windows-dependencies/msvc32", ".")],
include_files=["session.defaults", "cacert.pem", "app-configuration.defaults", "locales", "sounds", "documentation", "../windows-dependencies/x86/oggenc2.exe", "../windows-dependencies/x86/bootstrap.exe", "../windows-dependencies/dictionaries", find_sound_lib_datafiles(), find_accessible_output2_datafiles(), ("../windows-dependencies/msvc32", "."), ("../windows-dependencies/dictionaries", "lib/enchant/data/mingw32/share/enchant/hunspell")],
packages=["interactors", "presenters", "views", "wxUI"],
)
......
......@@ -46,5 +46,5 @@
## Other
* distribution:
- Create an installer for alpha versions
- (done) Create an installer for alpha versions
- create a 64 bits distribution for both alpha and stable versions.
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
SET UTF-8
TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ'
ICONV 1
ICONV ’ '
NOSUGGEST !
# ordinal numbers
COMPOUNDMIN 1
# only in compounds: 1th, 2th, 3th
ONLYINCOMPOUND c
# compound rules:
# 1. [0-9]*1[0-9]th (10th, 11th, 12th, 56714th, etc.)
# 2. [0-9]*[02-9](1st|2nd|3rd|[4-9]th) (21st, 22nd, 123rd, 1234th, etc.)
COMPOUNDRULE 2
COMPOUNDRULE n*1t
COMPOUNDRULE n*mp
WORDCHARS 0123456789
PFX A Y 1
PFX A 0 re .
PFX I Y 1
PFX I 0 in .
PFX U Y 1
PFX U 0 un .
PFX C Y 1
PFX C 0 de .
PFX E Y 1
PFX E 0 dis .
PFX F Y 1
PFX F 0 con .
PFX K Y 1
PFX K 0 pro .
SFX V N 2
SFX V e ive e
SFX V 0 ive [^e]
SFX N Y 3
SFX N e ion e
SFX N y ication y
SFX N 0 en [^ey]
SFX X Y 3
SFX X e ions e
SFX X y ications y
SFX X 0 ens [^ey]
SFX H N 2
SFX H y ieth y
SFX H 0 th [^y]
SFX Y Y 1
SFX Y 0 ly .
SFX G Y 2
SFX G e ing e
SFX G 0 ing [^e]
SFX J Y 2
SFX J e ings e
SFX J 0 ings [^e]
SFX D Y 4
SFX D 0 d e
SFX D y ied [^aeiou]y
SFX D 0 ed [^ey]
SFX D 0 ed [aeiou]y
SFX T N 4
SFX T 0 st e
SFX T y iest [^aeiou]y
SFX T 0 est [aeiou]y
SFX T 0 est [^ey]
SFX R Y 4
SFX R 0 r e
SFX R y ier [^aeiou]y
SFX R 0 er [aeiou]y
SFX R 0 er [^ey]
SFX Z Y 4
SFX Z 0 rs e
SFX Z y iers [^aeiou]y
SFX Z 0 ers [aeiou]y
SFX Z 0 ers [^ey]
SFX S Y 4
SFX S y ies [^aeiou]y
SFX S 0 s [aeiou]y
SFX S 0 es [sxzh]
SFX S 0 s [^sxzhy]
SFX P Y 3
SFX P y iness [^aeiou]y
SFX P 0 ness [aeiou]y
SFX P 0 ness [^y]
SFX M Y 1
SFX M 0 's .
SFX B Y 3
SFX B 0 able [^aeiou]
SFX B 0 able ee
SFX B e able [^aeiou]e
SFX L Y 1
SFX L 0 ment .
REP 90
REP a ei
REP ei a
REP a ey
REP ey a
REP ai ie
REP ie ai
REP alot a_lot
REP are air
REP are ear
REP are eir
REP air are
REP air ere
REP ere air
REP ere ear
REP ere eir
REP ear are
REP ear air
REP ear ere
REP eir are
REP eir ere
REP ch te
REP te ch
REP ch ti
REP ti ch
REP ch tu
REP tu ch
REP ch s
REP s ch
REP ch k
REP k ch
REP f ph
REP ph f
REP gh f
REP f gh
REP i igh
REP igh i
REP i uy
REP uy i
REP i ee
REP ee i
REP j di
REP di j
REP j gg
REP gg j
REP j ge
REP ge j
REP s ti
REP ti s
REP s ci
REP ci s
REP k cc
REP cc k
REP k qu
REP qu k
REP kw qu
REP o eau
REP eau o
REP o ew
REP ew o
REP oo ew
REP ew oo
REP ew ui
REP ui ew
REP oo ui
REP ui oo
REP ew u
REP u ew
REP oo u
REP u oo
REP u oe
REP oe u
REP u ieu
REP ieu u
REP ue ew
REP ew ue