한글의 자모 분리

입력된 문자가 음절인지 확인하고, check_ch_type()

음절인 경우 conv_jaso를 통해 자모 분해 & compatibility_jamo로 변환

import unicodedata

def check_ch_type(ch):

valid_type = ['Lo']

if unicodedata.category(ch) in valid_type:

unicode_names = unicodedata.name(ch).split()

if 'HANGUL' in unicode_names and 'SYLLABLE' in unicode_names:

return True

else:

return False

else:

return False

def conv_jaso(ch=u'각'):

def conv_compatibility_jamo(ch):

unicode_names = unicodedata.name(ch)

# print ch, unicode_names

if unicode_names.find('CHOSEONG') >= 0:

unicode_names = unicode_names.replace('CHOSEONG', 'LETTER')

elif unicode_names.find('JUNGSEONG') >= 0:

unicode_names = unicode_names.replace('JUNGSEONG', 'LETTER')

elif unicode_names.find('JONGSEONG') >= 0:

unicode_names = unicode_names.replace('JONGSEONG', 'LETTER')

return unicodedata.lookup(unicode_names)

jaso = []

ch = ord(ch) - 0xAC00

jong = ch % 28

jung = ((ch - jong) / 28) % 21

cho = (((ch - jong) / 28) - jung) / 21

# print 'cho :', cho, (unichr(cho + 0x1100)).encode('utf-8')

# print 'jung :', jung, (unichr(jung + 0x1161)).encode('utf-8')

# print 'jong :', jong, (unichr(jong + 0x11A7)).encode('utf-8')

if cho >= 0:

jaso.append(conv_compatibility_jamo(unichr(cho + 0x1100)))

if jung >= 0:

jaso.append(conv_compatibility_jamo(unichr(jung + 0x1161)))

if jong > 0:

jaso.append(conv_compatibility_jamo(unichr(jong + 0x11A7)))

return ''.join(jaso)

저작자표시 비영리 변경금지 (새창열림)

'Computer > Python' 카테고리의 다른 글

inline if (0)	2015.03.11
파이썬 json.dumps를 한글에 사용하는 방법 (0)	2015.02.01
nested list comprehesion in python (0)	2015.01.10
python OrderedDict (0)	2014.12.04
파이썬 표준 에러(stderr) 출력 (0)	2014.11.21

Dani's stack

한글의 자모 분리

'Computer > Python' 카테고리의 다른 글

티스토리툴바

한글의 자모 분리

'Computer > Python' 카테고리의 다른 글

관련글

티스토리툴바