แปลงรหัส Id3 จาก cp874 เป็น utf8

จาก ThaiLinuxCafe: แก้ไข ID3Tags ใน mp3 ให้ใช้กับ Amarok 1.4 และ Noatun

ใช้แปลงไฟล์ mp3 จากการเข้ารหัสแบบ cp874 มาเป็นยูนิโค๊ด utf8
ตัวโปรแกรมจะแปลงชื่อไฟล์และ ID3 Tags ในไฟล์
ถ้าจะนำไปใช้ โปรดใช้ด้วยความระมัดระวัง
เพราะไม่ได้เขียนฟังก์ชั่นการเตือนไว้ด้วยครับ

ตั้งชื่อไฟล์ว่า d.tags2utf8
$ sudo touch /usr/local/bin/d.tags2utf8
$ sudo chmod 755 /usr/local/bin/d.tags2utf8
$ sudo vi /usr/local/bin/d.tags2utf8

#!/usr/bin/env python
"""
Convert ID3 Tags from CP874 to UTF8 and auto rename file
recursive into subdirectory.

Coding from Khun pong_th's article at:
http://www.thailinuxhosting.com/yabbse/index.php?board=6;action=display;threadid=9429
"""
# 49-11-18 ADD ID3V1 CONVERSION

import os

# GLOBAL VARIABLE
skel=".mp3"
decodec="cp874"
encodec="utf8"


def d_passcheck_invalid_char(string):
    if string=="":
        return False
    i=0
    for ch in ['\x00','\xff']:
        i=i+1
        if ch in string:
            print "%d CH IN STRING %s" % (i,string)
            return False
        #
    #
    return True


def d_convert(string):
    string=string.split('\00')[0]     # TRIM '\x00' CHARACTER
    if d_passcheck_invalid_char(string):
        return string.decode(decodec).encode(encodec)
    else:
        return string
    #


def d_rename2utf8(dir,strname):
    """Convert coding from TIS-620 to UTF-8."""

    for i in strname:
        if i>'\x7f':
            # CHECK FOR UTF STRING
            if i=='\xe0':
                return strname
            newstr=d_convert(strname)
            print "rename file: %s -> %s" % (strname,newstr)
            os.rename(dir+os.sep+strname, dir+os.sep+newstr)
            return newstr
        #
    #
    return strname


def d_getID3V1data(fstream):
    """Get old tags format
    From: http://www.faqs.org/docs/diveintopython/fileinfo_files.html"""

    # ID3V1 format (from:http://www.id3.org/id3v2-00.txt)
    # Field      Length    Offsets
    # Tag        3           0-2
    # Songname   30          3-32
    # Artist     30         33-62
    # Album      30         63-92
    # Year       4          93-96
    # Comment    30         97-126
    # Genre      1           127

    fstream.seek(-128,2)
    tags=fstream.read(128)
    fstream.seek(0)
    return [tags[3:32],tags[33:62],tags[63:92],tags[93:96],tags[97:126],tags[127]]

    
def d_write_eachtags(fstream,tagstitle,tagsdata):
    fstream.write(tagstitle+'\x00\x00\x00')
    fstream.write(chr(len(tagsdata)+1)+'\x00\x00\x03')
    fstream.write(tagsdata)
    return


def d_change_tags2utf8(filename):
    """Change ID3 Tags content from cp874 to utf8"""

    fstream=open(filename,"r+b")
    ispass=False
    if fstream.read(3)=="ID3":
        # READ ID3 TAGS DATA
        fstream.read(6)
        nbyte=ord(fstream.read(1))
        ltags=[]
        ctagsname=fstream.read(4)
        while ctagsname in ["TIT2","TPE1","TALB"]:
            fstream.read(3)
            ntagsbyte=ord(fstream.read(1))
            fstream.read(3)
            ctagscontent=fstream.read(ntagsbyte-1)
            ltags.append([ctagsname,ntagsbyte,ctagscontent])
            ctagsname=fstream.read(4)
        #
        # CONVERT TO utf8
        nnewbyte=0
        for eachtags in ltags:
            if not '\xe0' in eachtags[2]:
                eachtags[2]=d_convert(eachtags[2])
                if not ispass:
                    ispass=True
                #
            else:
                print "File %s already in utf8 format." % (filename)
                fstream.close()
                return False
            #
            eachtags[1]=len(eachtags[2])
            nnewbyte=nnewbyte+4+3+3+eachtags[1]+1 
        #
        # WRITE BACK CONVERTED DATA
        fstream.seek(9)
        fstream.write(chr(nnewbyte))
        for eachtags in ltags:
            d_write_eachtags(fstream,eachtags[0],eachtags[2])
        #
        if nnewbyte<nbyte:
            for i in range(nbyte-nnewbyte): 
                fstream.write('\x00')
            #
        #
        if ispass:
          print "Id3 Tags: file %s converted" % (filename)
        #
    else:
        # CHECK FOR ID3V1
        fstream.seek(0)
        wholefile=fstream.read(-1)
        if 'TAG' in wholefile:
            ltags=d_getID3V1data(fstream)
            nnewbyte=0
            for i in range(len(ltags)):
                ltags[i]=d_convert(ltags[i])
                if d_passcheck_invalid_char(ltags[i]):
                    nnewbyte=nnewbyte+len(ltags[i])+4+3
                #
            #
            fstream.close()
            fstream=open(filename,"w")
            fstream.write('ID3'+'\x04\x00\x00\x00\x00\x08')
            fstream.write(chr(nnewbyte))
            # Songname   30          3-32  :TIT2
            # Artist     30         33-62  :TPE1
            # Album      30         63-92  :TALB
            # Year       4          93-96  :TDOR
            # Comment    30         97-126 :COMM
            # Genre      1           127   :----
            print ltags[0]
            if d_passcheck_invalid_char(ltags[0]):
                d_write_eachtags(fstream,'TIT2',ltags[0])
            if d_passcheck_invalid_char(ltags[1]):
                d_write_eachtags(fstream,'TPE1',ltags[1])
            if d_passcheck_invalid_char(ltags[2]):
                d_write_eachtags(fstream,'TALB',ltags[2])
            if d_passcheck_invalid_char(ltags[3]):
                d_write_eachtags(fstream,'TDOR',ltags[3])
            if d_passcheck_invalid_char(ltags[4]):
                d_write_eachtags(fstream,'COMM',ltags[4])
            # DISCARD Genre TAGS
            for i in range(1016):
                fstream.write('\x00')
            #
            fstream.write(wholefile)
        else:
            print "ID3 Tags not found in %s" % (filename)
        #    
    #
    fstream.close()
    

def process_dir(dir):
    """Process all files in the folder"""

    for f in os.listdir(dir):
        file = dir + os.sep + f
        if os.path.isdir(file):
            print "Enter directory %s" % (file)
            process_dir(file)
            print "---exit directory %s" % (file)
        #
        if f[-4:]==skel:
            # DO CONVERT FILENAME
            file=d_rename2utf8(dir,file)
            # DO CHANGE ID3 TAGS
            d_change_tags2utf8(file) 
        #
    #
    return

def main():
    """main routine"""

    process_dir('.')
    return

if __name__=='__main__':
    main()