แปลงไฟล์ จาก utf8 กลับเป็น tis620
Submitted by wd on Wed, 2006-11-22 20:15
มีงานต้องแปลงไฟล์กลับ เลยเขียนโค๊ดอีกทีนึง
( งานที่ทำคือ สันติรำลึก เป็นการแปลงไฟล์กลับจาก Word มาเป็น HTML แบบ tis-620 )
#!/usr/bin/env python # CONVERT FILE CONTENT FROM utf8 TO tis620 import sys,os # GLOBAL VARS decodec="utf8" encodec="cp874" # # VARIABLE decodec AND encodec CAN BE CHANGED. # ALL STANDARD ENCODINGS IS: # ascii, big5, big5hkscs, cp037, cp424, cp437, cp500, cp737, cp775, cp850, # cp852, cp855, cp856, cp857, cp860, cp861, cp862, cp863, cp864, cp865, # cp866, cp869, cp874, cp875, cp932, cp949, cp950, cp1006, cp1026, cp1140, # cp1250, cp1251, cp1252, cp1253, cp1254, cp1255, cp1256, cp1257, cp1258, # euc_jp, euc_jis_2004, euc_jisx0213, euc_kr, gb2312, gbk, gb18030, hz, # iso2022_jp, iso2022_jp_1, iso2022_jp_2, iso2022_jp_2004, iso2022_jp_3, # iso2022_jp_ext, iso2022_kr, latin_1, iso8859_2, iso8859_3, iso8859_4, # iso8859_5, iso8859_6, iso8859_7, iso8859_8, iso8859_9, iso8859_10, # iso8859_13, iso8859_14, iso8859_15, johab, koi8_r, koi8_u, mac_cyrillic, # mac_greek, mac_iceland, mac_latin2, mac_roman, mac_turkish, ptcp154, # shift_jis, shift_jis_2004, shift_jisx0213, utf_16, utf_16_be, utf_16_le, # utf_7, utf_8, utf_8_sig # # SEE http://docs.python.org/lib/standard-encodings.html # FOR MORE INFORMATION. def usage(progname): print "Usage: %s FILE" % (progname) print "Convert FILE from %s to %s, save old file in FILE.bak" % (decodec,encodec) def cannotopenfile(filename): print "Cannot open file %s" % (filename) def genfilename(filename="",ext="new"): if filename=="": return "" # if ext.lower()=="new": ext="new" # if ext.lower()!="new" and ext.lower()!="bak": ext="bak" # if os.path.exists(filename+"."+ext): i=0 while os.path.exists(filename+"."+ext+str(i)) and (i < 1000): i=i+1 # if i>999: return "" # return filename+"."+ext+str(i) else: return filename+"."+ext # def replace_invalid_char(line,utf_char,tis_char): return line.replace(utf_char,tis_char) def convertline(line): # CHECK INVALID CHAR line=replace_invalid_char(line,"\xe2\x80\x98","'") line=replace_invalid_char(line,"\xe2\x80\x99","'") line=replace_invalid_char(line,"\xe2\x80\x9c",'"') line=replace_invalid_char(line,"\xe2\x80\x9d",'"') line=replace_invalid_char(line,"\xe2\x80\xa6","...") line=replace_invalid_char(line,"\xef\x9c\x8f","\xe0\xb8\x8d") #YOR YING line=replace_invalid_char(line,"\xef\x9c\x9a","\xe0\xb8\xba") #PINTU line=replace_invalid_char(line,"\xe2\x80\x93","-") line=replace_invalid_char(line,"\xef\x82\xae","->") line=replace_invalid_char(line,"\xef\xa3\x82","") # UNKNOWN line=replace_invalid_char(line,"\xef\xa3\x83","") # UNKNOWN return line.decode(decodec).encode(encodec) def convertfile(fs_old, fs_new): for eachline in fs_old: newline=convertline(eachline) # try: # newline=convertline(eachline) # except: # newline=eachline # # fs_new.write(newline) # return True if __name__=="__main__": progname=os.path.basename(sys.argv[0]) try: oldfile=sys.argv[1] except: usage(progname) sys.exit(1) # try: fsold=open(oldfile) except: cannotopenfile(oldfile) sys.exit(1) # newfile=genfilename(oldfile,"new") if newfile=="": print "Cannot save backup file" sys.exit(1) # try: fsnew=open(newfile,"w") except: cannotopenfile(newfile) sys.exit(1) # if convertfile(fsold,fsnew)==False: fsold.close() fsnew.close() print "Convert file %s faild" % (oldfile) sys.exit(1) # fsold.close() fsnew.close() bakfile=genfilename(oldfile,"bak") if bakfile=="": print "Cannot create bakup file, so utf8-file is %s" % (newfile) sys.exit(1) # os.rename(oldfile,bakfile) os.rename(newfile,oldfile) print "Convert %s success, save backup file in %s" % (oldfile,bakfile)
โค๊ดยังไม่เรียบร้อยดี แต่ขอแปะโค๊ดไว้ก่อน
- Printer-friendly version
- Log in or register to post comments
- 6975 reads
Recent comments