X-Git-Url: https://scm.cri.minesparis.psl.eu/git/Utf8Splitter.git/blobdiff_plain/d25275c7a6284a8da05e40f231f2e9a3a30d93b5..50b88da70954fb7827784be1ce14d6f75ae9072e:/Products/Utf8Splitter/txtng3normalizer.py diff --git a/Products/Utf8Splitter/txtng3normalizer.py b/Products/Utf8Splitter/txtng3normalizer.py new file mode 100755 index 0000000..65890e8 --- /dev/null +++ b/Products/Utf8Splitter/txtng3normalizer.py @@ -0,0 +1,26 @@ +from zope.interface import implements +from zopyx.txng3.core.interfaces.normalizer import INormalizer +from Utf8Splitter import Utf8Utils + + +class _Normalizer(object) : + + implements(INormalizer) + + def availableLanguages(self) : + return "all" + + def process(self, words, language) : + """ Normalize a word or a sequence of words. Returned the normalized word + or a sequence of normalized words. If there is no normalizer available + for a language then the data is returned unchanged. + """ + return Utf8Utils.udesacc(words) + + def translationTable(self, language) : + """ return the translation table for a given language where the + translation table is represented as list of tuples (from_str, repl_str) + """ + return Utf8Utils._cache.items() + +Normalizer = _Normalizer() \ No newline at end of file