รวมมอดูลที่น่าสนใจ
$ wget http://www.bermi.org/downloads/python_inflector-0.1.tar.gz
$ tar xfz python_inflector-0.1.tar.gz
$ cd python_inflector-0.1
$ python
Python 2.5.2 (r252:60911, Jul 31 2008, 07:39:27)
[GCC 4.3.1] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> import Inflector
>>> inf = Inflector.English()
>>> inf.singularize('oxen')
'ox'
>>> inf.pluralize('sheep')
'sheep'
>>>
ยังมีอีกหลายฟังก์ชั่น เอาไว้ค่อยศึกษาอีกที
update$ vi Rules/English.py
# Copyright (c) 2006 Bermi Ferrer Martinez
# info at bermi dot org
# See the end of this file for the free software, open source license (BSD-style).
import re
from Base import Base
class English (Base):
"""
Inflector for pluralize and singularize English nouns.
This is the default Inflector for the Inflector obj
"""
irregular_words = {
'person' : 'people',
'man' : 'men',
'child' : 'children',
'sex' : 'sexes',
'foot' : 'feet',
'goose' : 'geese',
'tooth' : 'teeth',
}
reserve_words = {
'ox' : 'oxen',
'genus': 'genera',
'corpus': 'corpora',
'concerto': 'concerti',
'bus' : 'buses',
'virus' : 'viruses',
'apparatus': 'apparatuses',
'die' : 'dice',
'this' : 'these',
'that' : 'those',
'quiz' : 'quizzes',
'polka' : 'polkas',
'passerby' : 'passersby',
'sarcoma' : 'sarcomata',
'schema' : 'schemata',
'stigma' : 'stigmata',
'stoma' : 'stomata',
'cherub' : 'cherubim',
'kibbutz' : 'kibbutzim',
'seraph' : 'seraphim',
'mum' : 'mums',
'boa' : 'boe',
}
uncountable_words = ['aircraft','equipment', 'information', 'rice', \
'money', 'species', 'series', 'fish', 'sheep', 'shrimp', 'sms', \
'moose', 'bison', 'deer', 'means', 'scissors', 'species', 'swine', \
'salmon',
'meerschuam',
'scum','talcum',
'offspring',
'sorghum','wampum',
'phoenix',
]
def pluralize(self, word) :
'''Pluralizes English nouns.'''
#get rules from
#http://web2.uvcs.uvic.ca/elc/studyzone/330/grammar/irrplu.htm
#http://www2.gsu.edu/~wwwesl/egw/pluralsn.htm
rules = [
['(?i)eau$' , 'eaux'],
['(?i)menon$' , 'mena'],
['(?i)terion$' , 'teria'],
['(?i)(m|[^b]l)ouse$' , '\\1ice'],
['(?i)(d|l|r)ix$' , 'ices'],
['(?i)(d|p|t)ex$' , 'ices'],
['(?i)(ar|f|oo)f$' , '\\1fs'],
['(?i)(f|fe)$' , 'ves'],
['(?i)(pian|sol|temp)o$' , '\\1os'],
['(?i)(a|e|i|o|u|n)o$' , '\\1os'],
['(?i)(o|x|z|ch|ss|sh)$' , '\\1es'],
['(?i)-in-law$' , 's-in-law'],
['(?i)ful$' , 'sful'],
['(?i)is$' , 'es'],
['(?i)(d|(a|e|i|o|u)n|p|pl|r|s|t)us$' , '\\1uses'],
['(?i)(bu)s$' , '\\1ses'],
['(?i)us$' , 'i'],
['(?i)(a|b|dumd|e|g|h|k|(d|l|p|s|sy)l|o|r|s)um$' , '\\1ums'],
['(?i)um$' , 'a'],
['(?i)(a|e|i|o|u)a$' , '\\1as'],
['(?i)a$' , 'ae'],
['(?i)([^aeiouy]|qu)y$' , '\\1ies'],
['(?i)s$' , 'ses'],
['(?i)$' , 's']
]
lower_cased_word = word.lower();
for uncountable_word in self.uncountable_words:
if lower_cased_word[-1*len(uncountable_word):] == uncountable_word :
return word
for irregular in self.irregular_words.keys():
match = re.search('('+irregular+')$',word, re.IGNORECASE)
if match:
return re.sub('(?i)'+irregular+'$', match.expand('\\1')[0]\
+self.irregular_words[irregular][1:], word)
for res_word in self.reserve_words.keys():
if res_word == word:
return self.reserve_words[res_word]
for rule in range(len(rules)):
match = re.search(rules[rule][0], word, re.IGNORECASE)
if match :
groups = match.groups()
for k in range(0,len(groups)) :
if groups[k] == None :
rules[rule][1] = rules[rule][1].replace('\\'+str(k+1), '')
return re.sub(rules[rule][0], rules[rule][1], word)
return word
def singularize (self, word) :
'''Singularizes English nouns.'''
rules = [
['(?i)eaux$' , 'eau'],
['(?i)mena$' , 'menon'],
['(?i)teria$' , 'terion'],
['(?i)([m|l])ice$' , '\\1ouse'],
['(?i)ices$' , '|ix|ex'],
['(?i)ves$' , '|f|fe'],
['(?i)os$' , 'o'],
['(?i)(o|x|z|ch|ss|sh)es$' , '\\1'],
['(?i)s-in-law$' , '-in-law'],
['(?i)sful$' , 'ful'],
['(?i)(d|(a|e|i|o|u)n|p|pl|r|s|t)uses$' , '\\1us'],
['(?i)(a|b|dumd|e|g|h|k|(d|l|p|s|sy)l|o|r|s)ums$' , '\\1um'],
['(?i)a$' , 'um'],
['(?i)i$' , 'us'],
['(?i)ses$' , 's'],
['(?i)es$' , 'is'],
['(?i)(a|e|i|o|u)as$' , '\\1a'],
['(?i)ae$' , 'a'],
['(?i)ies$' , 'y'],
['(?i)s$' , '']
]
irregular_words = dict(
[[self.irregular_words[i],i] for i in self.irregular_words.keys()] )
reserve_words = dict(
[[self.reserve_words[i],i] for i in self.reserve_words.keys()] )
lower_cased_word = word.lower();
for uncountable_word in self.uncountable_words:
if lower_cased_word[-1*len(uncountable_word):] == uncountable_word :
return word
for irregular in irregular_words.keys():
match = re.search('('+irregular+')$',word, re.IGNORECASE)
if match:
return re.sub('(?i)'+irregular+'$', match.expand('\\1')[0]+irregular_words[irregular][1:], word)
for res_word in self.reserve_words.keys():
if res_word == word:
return self.reserve_words[res_word]
for rule in range(len(rules)):
match = re.search(rules[rule][0], word, re.IGNORECASE)
if match :
groups = match.groups()
for k in range(0,len(groups)) :
if groups[k] == None :
rules[rule][1] = rules[rule][1].replace('\\'+str(k+1), '')
return re.sub(rules[rule][0], rules[rule][1], word)
return word
# Copyright (c) 2006 Bermi Ferrer Martinez
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software to deal in this software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of this software, and to permit
# persons to whom this software is furnished to do so, subject to the following
# condition:
#
# THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THIS SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THIS SOFTWARE.