แปลงไฟล์ จาก utf8 กลับเป็น tis620
Submitted by wd on Wed, 2006-11-22 20:15
มีงานต้องแปลงไฟล์กลับ เลยเขียนโค๊ดอีกทีนึง
( งานที่ทำคือ สันติรำลึก เป็นการแปลงไฟล์กลับจาก Word มาเป็น HTML แบบ tis-620 )
#!/usr/bin/env python
# CONVERT FILE CONTENT FROM utf8 TO tis620
import sys,os
# GLOBAL VARS
decodec="utf8"
encodec="cp874"
#
# VARIABLE decodec AND encodec CAN BE CHANGED.
# ALL STANDARD ENCODINGS IS:
# ascii, big5, big5hkscs, cp037, cp424, cp437, cp500, cp737, cp775, cp850,
# cp852, cp855, cp856, cp857, cp860, cp861, cp862, cp863, cp864, cp865,
# cp866, cp869, cp874, cp875, cp932, cp949, cp950, cp1006, cp1026, cp1140,
# cp1250, cp1251, cp1252, cp1253, cp1254, cp1255, cp1256, cp1257, cp1258,
# euc_jp, euc_jis_2004, euc_jisx0213, euc_kr, gb2312, gbk, gb18030, hz,
# iso2022_jp, iso2022_jp_1, iso2022_jp_2, iso2022_jp_2004, iso2022_jp_3,
# iso2022_jp_ext, iso2022_kr, latin_1, iso8859_2, iso8859_3, iso8859_4,
# iso8859_5, iso8859_6, iso8859_7, iso8859_8, iso8859_9, iso8859_10,
# iso8859_13, iso8859_14, iso8859_15, johab, koi8_r, koi8_u, mac_cyrillic,
# mac_greek, mac_iceland, mac_latin2, mac_roman, mac_turkish, ptcp154,
# shift_jis, shift_jis_2004, shift_jisx0213, utf_16, utf_16_be, utf_16_le,
# utf_7, utf_8, utf_8_sig
#
# SEE http://docs.python.org/lib/standard-encodings.html
# FOR MORE INFORMATION.
def usage(progname):
print "Usage: %s FILE" % (progname)
print "Convert FILE from %s to %s, save old file in FILE.bak" % (decodec,encodec)
def cannotopenfile(filename):
print "Cannot open file %s" % (filename)
def genfilename(filename="",ext="new"):
if filename=="":
return ""
#
if ext.lower()=="new":
ext="new"
#
if ext.lower()!="new" and ext.lower()!="bak":
ext="bak"
#
if os.path.exists(filename+"."+ext):
i=0
while os.path.exists(filename+"."+ext+str(i)) and (i < 1000):
i=i+1
#
if i>999:
return ""
#
return filename+"."+ext+str(i)
else:
return filename+"."+ext
#
def replace_invalid_char(line,utf_char,tis_char):
return line.replace(utf_char,tis_char)
def convertline(line):
# CHECK INVALID CHAR
line=replace_invalid_char(line,"\xe2\x80\x98","'")
line=replace_invalid_char(line,"\xe2\x80\x99","'")
line=replace_invalid_char(line,"\xe2\x80\x9c",'"')
line=replace_invalid_char(line,"\xe2\x80\x9d",'"')
line=replace_invalid_char(line,"\xe2\x80\xa6","...")
line=replace_invalid_char(line,"\xef\x9c\x8f","\xe0\xb8\x8d") #YOR YING
line=replace_invalid_char(line,"\xef\x9c\x9a","\xe0\xb8\xba") #PINTU
line=replace_invalid_char(line,"\xe2\x80\x93","-")
line=replace_invalid_char(line,"\xef\x82\xae","->")
line=replace_invalid_char(line,"\xef\xa3\x82","") # UNKNOWN
line=replace_invalid_char(line,"\xef\xa3\x83","") # UNKNOWN
return line.decode(decodec).encode(encodec)
def convertfile(fs_old, fs_new):
for eachline in fs_old:
newline=convertline(eachline)
# try:
# newline=convertline(eachline)
# except:
# newline=eachline
# #
fs_new.write(newline)
#
return True
if __name__=="__main__":
progname=os.path.basename(sys.argv[0])
try:
oldfile=sys.argv[1]
except:
usage(progname)
sys.exit(1)
#
try:
fsold=open(oldfile)
except:
cannotopenfile(oldfile)
sys.exit(1)
#
newfile=genfilename(oldfile,"new")
if newfile=="":
print "Cannot save backup file"
sys.exit(1)
#
try:
fsnew=open(newfile,"w")
except:
cannotopenfile(newfile)
sys.exit(1)
#
if convertfile(fsold,fsnew)==False:
fsold.close()
fsnew.close()
print "Convert file %s faild" % (oldfile)
sys.exit(1)
#
fsold.close()
fsnew.close()
bakfile=genfilename(oldfile,"bak")
if bakfile=="":
print "Cannot create bakup file, so utf8-file is %s" % (newfile)
sys.exit(1)
#
os.rename(oldfile,bakfile)
os.rename(newfile,oldfile)
print "Convert %s success, save backup file in %s" % (oldfile,bakfile)
โค๊ดยังไม่เรียบร้อยดี แต่ขอแปะโค๊ดไว้ก่อน
- Printer-friendly version
- Log in or register to post comments
- 7276 reads







Recent comments