python: ลองเขียนสคริปต์ลบสแปม
Submitted by wd on Wed, 2007-10-03 19:25
ลองเขียนสคริปต์ลบแสปม
ใช้กับบอร์ด yabbse กับ smf
ของ smf ยังไม่เสร็จ บันทึกเอาไว้เพื่อลองดูผลเท่านั้น
ต้องเปลี่ยนแปลงสคริปต์ตามธีมที่ใช้ด้วย
*** สคริปต์นี้ใช้กับ thailinuxhosting.com/yabbse เท่านั้น เพราะใส่โค๊ดที่แก้ปัญหาบอร์ดไว้ด้วยครับ
#!/usr/bin/env python # -*- coding: utf-8 -*- user = "wd" password = "mypassword" enc_password = "XXXXXXXXXX" # *** GET ENCRYPTED PASSWORD FROM BROWSER COOKIE site = "http://www.thailinuxhosting.com/yabbse" #"http://www.thaitux.info/smf" board = "yabbse" # "smf", "yabbse" charset = "tis620" # "utf8", "tis620" max_loop = 5 # = RECENT LIST OF BOARD root = "/home/wd/spam" backup_file = root+"/thailinuxhosting-bak.txt" spamtext_file = root+"/spamlist.txt" cookie_file = root+"/thailinuxhosting-cookie" import sys import os import time ##### PRE RUN FOR RETRIEVE COOKIE ##### import urllib2 import cookielib login = "/index.php?action=login2;user=%s;passwrd=%s;cookielength=302400" % (user, password,) cj = cookielib.MozillaCookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) sock = opener.open(site+login) cj.save(cookie_file, ignore_discard=True, ignore_expires=True) sock.close() ####################################### spamlist = [] def decoding(txt): if charset == "tis620": return txt.decode("utf8").encode("tis620") elif charset == "utf8": return txt else: print "Error, CHARSET is not defined" sys.exit[0] def search_line(txt, l, occur=1): for i in range(len(l)): if txt in l[i]: if occur > 1: occur=occur-1 else: return i return -1 def get_msgid(url): if board == "smf": # ...#msgXX return url.split("#msg")[-1] elif board == "yabbse": # ...;start=XX return url.split(";start=")[-1] def check_spam(txt): global spamlist for i in spamlist: if i in txt: return True, i return False, '' def save_backup(txt): f = open(backup_file,'a') f.write(txt+'\n\n\n') f.close() return def die_board(): print "board not exist" sys.exit[0] if board == "smf": # recent_str = "กระททู้เมมื่อเร็วๆ นนี้" recent_str = "\xe0\xb8\x81\xe0\xb8\xa3\xe0\xb8\xb0\xe0\xb8\x97\xe0\xb8\xb9\ \xe0\xb9\x89\xe0\xb9\x80\xe0\xb8\xa1\xe0\xb8\xb7\xe0\xb9\x88\xe0\xb8\xad\ \xe0\xb9\x80\xe0\xb8\xa3\xe0\xb9\x87\xe0\xb8\xa7\xe0\xb9\x86 \xe0\xb8\x99\ \xe0\xb8\xb5\xe0\xb9\x89" elif board == "yabbse": # recent_str = "โพสต์เมมื่อเร็วๆนนี้" recent_str = "\xe0\xb9\x82\xe0\xb8\x9e\xe0\xb8\xaa\xe0\xb8\x95\xe0\xb9\x8c\ \xe0\xb9\x80\xe0\xb8\xa1\xe0\xb8\xb7\xe0\xb9\x88\xe0\xb8\xad\xe0\xb9\x80\ \xe0\xb8\xa3\xe0\xb9\x87\xe0\xb8\xa7\xe0\xb9\x86\xe0\xb8\x99\xe0\xb8\xb5\xe0\xb9\x89" else: die_board() #LOAD SPAM DATA if not os.path.exists(spamtext_file): f = open(spamtext_file,'w') f.close() f = open(spamtext_file) for i in f: if i!='' and len(i)>3: spamlist.append(decoding(i.strip())) f.close() recent_str = decoding(recent_str) #INIT COOKIE & OPENER cj = cookielib.MozillaCookieJar() cj.load(cookie_file) opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) loop_count = 0 url_list_pair = [] #SOLVE yabbse ONLY INDEX TO LAST MESSAGE, SO WE CREATE OUR OWN while loop_count < max_loop: #FIRST PAGE sock = opener.open(site) #HACK: SOLVE yabbse'S BOARD COOKIE ERROR cj._cookies['thailinuxhosting.com']['/yabbse']['YaBBSE140usernamev14'].value = user cj._cookies['thailinuxhosting.com']['/yabbse']['YaBBSE140passwordv14'].value = enc_password html = sock.read() sock.close() l = html.split('\n') #SESSIONID if board == "smf": sstr = "sesc=" line = search_line(sstr, l) if line < 0: sys.exit[0] session_id = l[line].split(sstr)[1].split('">')[0] else: session_id = "" #SEARCH FOR RECENT POST sstr = recent_str line = search_line(sstr, l) if line < 0: sys.exit[0] if board == "smf": url = l[line+9+loop_count].split('<a href="')[1].split('">')[0] author = "" date_submitted = "" elif board == "yabbse": url = l[line+4+loop_count].split('<td valign="top"><a href="')[1].split('">')[0] url_list = [ i[0] for i in url_list_pair ] # SOLVE yabbse MESSAGE INDEX if url in url_list: i = url_list.index(url) url_list_pair[i][1] += 1 index_dec = url_list_pair[i][1] else: url_list_pair.append([url,0]) index_dec = 0 # tmp = 'โดย ' tmp = decoding('\xe0\xb9\x82\xe0\xb8\x94\xe0\xb8\xa2 ') author = l[line+4+loop_count].split(tmp)[1].split('</td>')[0] else: die_board() msgid = get_msgid(url) sock = opener.open(url) html = sock.read() sock.close() l = html.split('\n') #PARSE HTML is_spam = False spam_keyword = '' if board == "smf": sstr = "msg_%s" % (msgid,) line = search_line(sstr, l) elif board == "yabbse": sstr = '<hr width="100%" size="1" class="windowbg3">' count = (int(msgid)-index_dec) % 20 + 1 # 20 MESSAGES PER PAGE - yabbse INDEX DECREMENT print 'loop=',loop_count,' /// count=',count line = search_line(sstr, l, count) tmp = decoding("javascript:DoConfirm('") try: delete_url = l[line-3].split(tmp)[1].split("','")[1].split("""');"><img src""")[0] date_submitted = l[line-4].split('</B> ')[1].split(' »')[0] title = l[line-5].split('<B>')[1].split('</b>')[0] process_line = line+1 is_spam, spam_keyword = check_spam(l[process_line]) if is_spam: print 'line=',line,' /// l[line-3]=', l[line-3] print 'delete_url=',delete_url print "is_spam=",is_spam," /// keyword=",spam_keyword," /// line=",l[process_line] except: is_spam = False else: die_board() if is_spam: if board == "smf": pass elif board == "yabbse": save_backup('delete url: '+delete_url+\ '\nspam keyword: '+spam_keyword+\ '\nscan date: '+time.ctime(time.time())+\ '\ntitle: '+title+\ '\nauthor: '+author+\ '\nsubmitted date: '+date_submitted+\ '\n'+l[process_line]) sock = opener.open(delete_url) sock.close() url_list = [ i[0] for i in url_list_pair ] # RESET yabbse MESSAGE INDEX if url in url_list: i = url_list.index(url) url_list_pair.remove(url_list_pair[i]) else: loop_count = loop_count+1
- Printer-friendly version
- Log in or register to post comments
- 7431 reads
Recent comments