# -*- coding: euc-jis-2004 -*- # behave.py (¤ª¹Ôµ·Îɤ¯¤·¤Æ¤Í) # ÆüËܸì¥á¡¼¥ë¤Ë¤è¤¯½Ð¤Æ¤¯¤ë JISX-0208 ¤Ç¤Ê¤¤Ê¸»ú¤ò # JISX-0208 ʸ»ú¤ÇÂåÂØ¤¹¤ë¡£ # # ɬÍפʤâ¤Î: pykf, cjkcodecs (Python2.3¤Î¾ì¹ç) # »È¤¤Êý: # ¤³¤Î¥â¥¸¥å¡¼¥ë¤ò Mailman/Handlers ¤ËÆþ¤ì¤ë # ¼¡¤Î¹Ô¤ò mm_cfg.py ¤ËÆþ¤ì¤ë # # GLOBAL_PIPELINE.insert(0,'behave') # # TODO: Subject ¤âÌÌÅݤߤƤ¢¤²¤ë¤È¤¤¤¤¤«¤â. import re import pykf # import cjkcodecs # ¤è¤¯»È¤ï¤ì¤ë¡Öµ¡¼ï°Í¸ʸ»ú¡×¤È¤½¤ÎÂåÂØÊ¸»ú replace = { '­¡': '(1)', '­¢': '(2)', '­£': '(3)', '­¤': '(4)', '­¥': '(5)', '­¦': '(6)', '­§': '(7)', '­¨': '(8)', '­©': '(9)', '­ª': '(10)', '­«': '(11)', '­¬': '(12)', '­­': '(13)', '­®': '(14)', '­¯': '(15)', '­°': '(16)', '­±': '(17)', '­²': '(18)', '­³': '(19)', '­´': '(20)', '­µ': '_I_', '­¶': '_II_', '­·': '_III_', '­¸': '_IV_', '­¹': '_V_', '­º': '_VI_', '­»': '_VII_', '­¼': '_VIII_', '­½': '_IX_', '­¾': '_X_', '­À': '¥ß¥ê', '­Á': '¥­¥í', '­Â': '¥»¥ó¥Á', '­Ã': '¥á¡¼¥È¥ë', '­Ä': '¥°¥é¥à', '­Å': '¥È¥ó', '­Æ': '¥¢¡¼¥ë', '­Ç': '¥Ø¥¯¥¿¡¼¥ë', '­È': '¥ê¥Ã¥È¥ë', '­É': '¥ï¥Ã¥È', '­Ê': '¥«¥í¥ê¡¼', '­Ë': '¥É¥ë', '­Ì': '¥»¥ó¥È', '­Í': '¥Ñ¡¼¥»¥ó¥È', '­Î': '¥ß¥ê¥Ð¡¼¥ë', '­Ï': '¥Ú¡¼¥¸', '­Ð': 'mm', '­Ñ': 'cm', '­Ò': 'km', '­Ó': 'mg', '­Ô': 'kg', '­Õ': 'cc', '­Ö': 'm2', '­ß': 'Ê¿À®', '­à': "''", '­á': ',,', '­â': 'No.', '­ã': 'K.K.', '­ä': 'Tel', '­å': '(¾å)', '­æ': '(Ãæ)', '­ç': '(²¼)', '­è': '(º¸)', '­é': '(±¦)', '­ê': '(³ô)', '­ë': '(Í­)', '­ì': '(Âå)', '­í': 'ÌÀ¼£', '­î': 'ÂçÀµ', '­ï': '¾¼ÏÂ', 'ùÞ': '¤½¤Í', 'ùõ': 'ºê', 'üâ': '¹â' } replpat = '(' + '|'.join(replace.keys()) + ')' replpat = unicode(replpat, 'euc-jis-2004') def replobj(matchobj): return unicode(replace[matchobj.group(0).encode('euc-jis-2004')], 'euc-jis-2004') def process(mlist, msg, msgdata): for part in msg.walk(): if not part.get_content_type().startswith('text/plain'): continue cset = part.get_content_charset() if cset is None: continue if cset <> 'iso-2022-jp': continue s = part.get_payload(decode=True) s = pykf.toeuc(s, pykf.JIS) # Ⱦ³Ñ¥«¥Ê -> Á´³Ñ s = pykf.tofull_kana(s, pykf.EUC) s = unicode(s, 'euc-jis-2004', 'replace') # iso-2022-jp ¤ËÆþ¤é¤Ê¤¤Ê¸»ú¤òÂåÂØÊ¸»ú¤Ë s = re.sub(replpat, replobj, s) s = s.encode(cset, 'replace') del part['content-transfer-encoding'] del part['content-type'] part.set_payload(s, cset) if __name__ == '__main__': import sys from email import message_from_file msg = message_from_file(file(sys.argv[1])) process(None, msg, None) print msg