입력된 문자가 음절인지 확인하고, check_ch_type()
음절인 경우 conv_jaso를 통해 자모 분해 & compatibility_jamo로 변환
import unicodedata
def check_ch_type(ch):
valid_type = ['Lo']
if unicodedata.category(ch) in valid_type:
unicode_names = unicodedata.name(ch).split()
if 'HANGUL' in unicode_names and 'SYLLABLE' in unicode_names:
return True
else:
return False
else:
return False
def conv_jaso(ch=u'각'):
def conv_compatibility_jamo(ch):
unicode_names = unicodedata.name(ch)
# print ch, unicode_names
if unicode_names.find('CHOSEONG') >= 0:
unicode_names = unicode_names.replace('CHOSEONG', 'LETTER')
elif unicode_names.find('JUNGSEONG') >= 0:
unicode_names = unicode_names.replace('JUNGSEONG', 'LETTER')
elif unicode_names.find('JONGSEONG') >= 0:
unicode_names = unicode_names.replace('JONGSEONG', 'LETTER')
return unicodedata.lookup(unicode_names)
jaso = []
ch = ord(ch) - 0xAC00
jong = ch % 28
jung = ((ch - jong) / 28) % 21
cho = (((ch - jong) / 28) - jung) / 21
# print 'cho :', cho, (unichr(cho + 0x1100)).encode('utf-8')
# print 'jung :', jung, (unichr(jung + 0x1161)).encode('utf-8')
# print 'jong :', jong, (unichr(jong + 0x11A7)).encode('utf-8')
if cho >= 0:
jaso.append(conv_compatibility_jamo(unichr(cho + 0x1100)))
if jung >= 0:
jaso.append(conv_compatibility_jamo(unichr(jung + 0x1161)))
if jong > 0:
jaso.append(conv_compatibility_jamo(unichr(jong + 0x11A7)))
return ''.join(jaso)
'Computer > Python' 카테고리의 다른 글
inline if (0) | 2015.03.11 |
---|---|
파이썬 json.dumps를 한글에 사용하는 방법 (0) | 2015.02.01 |
nested list comprehesion in python (0) | 2015.01.10 |
python OrderedDict (0) | 2014.12.04 |
파이썬 표준 에러(stderr) 출력 (0) | 2014.11.21 |