]> CRI, Mines Paris - PSL - Utf8Splitter.git/blobdiff - Products/Utf8Splitter/txtng3normalizer.py
eggification
[Utf8Splitter.git] / Products / Utf8Splitter / txtng3normalizer.py
diff --git a/Products/Utf8Splitter/txtng3normalizer.py b/Products/Utf8Splitter/txtng3normalizer.py
new file mode 100755 (executable)
index 0000000..65890e8
--- /dev/null
@@ -0,0 +1,26 @@
+from zope.interface import implements
+from zopyx.txng3.core.interfaces.normalizer import INormalizer
+from Utf8Splitter import Utf8Utils
+
+
+class _Normalizer(object) :
+       
+       implements(INormalizer)
+       
+       def availableLanguages(self) :
+               return "all"
+
+       def process(self, words, language) :
+               """ Normalize a word or a sequence of words. Returned the normalized word
+                       or a sequence of normalized words. If there is no normalizer available
+                       for a language then the data is returned unchanged.
+               """
+               return Utf8Utils.udesacc(words)
+
+       def translationTable(self, language) :
+               """ return the translation table for a given language where the 
+                       translation table is represented as list of tuples (from_str, repl_str)
+               """
+               return Utf8Utils._cache.items()
+
+Normalizer = _Normalizer()
\ No newline at end of file