python: ลองเขียนสคริปต์ลบสแปม
Submitted by wd on Wed, 2007-10-03 19:25
ลองเขียนสคริปต์ลบแสปม
ใช้กับบอร์ด yabbse กับ smf
ของ smf ยังไม่เสร็จ บันทึกเอาไว้เพื่อลองดูผลเท่านั้น
ต้องเปลี่ยนแปลงสคริปต์ตามธีมที่ใช้ด้วย
*** สคริปต์นี้ใช้กับ thailinuxhosting.com/yabbse เท่านั้น เพราะใส่โค๊ดที่แก้ปัญหาบอร์ดไว้ด้วยครับ
#!/usr/bin/env python
# -*- coding: utf-8 -*-
user = "wd"
password = "mypassword"
enc_password = "XXXXXXXXXX" # *** GET ENCRYPTED PASSWORD FROM BROWSER COOKIE
site = "http://www.thailinuxhosting.com/yabbse" #"http://www.thaitux.info/smf"
board = "yabbse" # "smf", "yabbse"
charset = "tis620" # "utf8", "tis620"
max_loop = 5 # = RECENT LIST OF BOARD
root = "/home/wd/spam"
backup_file = root+"/thailinuxhosting-bak.txt"
spamtext_file = root+"/spamlist.txt"
cookie_file = root+"/thailinuxhosting-cookie"
import sys
import os
import time
##### PRE RUN FOR RETRIEVE COOKIE #####
import urllib2
import cookielib
login = "/index.php?action=login2;user=%s;passwrd=%s;cookielength=302400" % (user, password,)
cj = cookielib.MozillaCookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
sock = opener.open(site+login)
cj.save(cookie_file, ignore_discard=True, ignore_expires=True)
sock.close()
#######################################
spamlist = []
def decoding(txt):
if charset == "tis620":
return txt.decode("utf8").encode("tis620")
elif charset == "utf8":
return txt
else:
print "Error, CHARSET is not defined"
sys.exit[0]
def search_line(txt, l, occur=1):
for i in range(len(l)):
if txt in l[i]:
if occur > 1:
occur=occur-1
else:
return i
return -1
def get_msgid(url):
if board == "smf": # ...#msgXX
return url.split("#msg")[-1]
elif board == "yabbse": # ...;start=XX
return url.split(";start=")[-1]
def check_spam(txt):
global spamlist
for i in spamlist:
if i in txt:
return True, i
return False, ''
def save_backup(txt):
f = open(backup_file,'a')
f.write(txt+'\n\n\n')
f.close()
return
def die_board():
print "board not exist"
sys.exit[0]
if board == "smf":
# recent_str = "กระททู้เมมื่อเร็วๆ นนี้"
recent_str = "\xe0\xb8\x81\xe0\xb8\xa3\xe0\xb8\xb0\xe0\xb8\x97\xe0\xb8\xb9\
\xe0\xb9\x89\xe0\xb9\x80\xe0\xb8\xa1\xe0\xb8\xb7\xe0\xb9\x88\xe0\xb8\xad\
\xe0\xb9\x80\xe0\xb8\xa3\xe0\xb9\x87\xe0\xb8\xa7\xe0\xb9\x86 \xe0\xb8\x99\
\xe0\xb8\xb5\xe0\xb9\x89"
elif board == "yabbse":
# recent_str = "โพสต์เมมื่อเร็วๆนนี้"
recent_str = "\xe0\xb9\x82\xe0\xb8\x9e\xe0\xb8\xaa\xe0\xb8\x95\xe0\xb9\x8c\
\xe0\xb9\x80\xe0\xb8\xa1\xe0\xb8\xb7\xe0\xb9\x88\xe0\xb8\xad\xe0\xb9\x80\
\xe0\xb8\xa3\xe0\xb9\x87\xe0\xb8\xa7\xe0\xb9\x86\xe0\xb8\x99\xe0\xb8\xb5\xe0\xb9\x89"
else:
die_board()
#LOAD SPAM DATA
if not os.path.exists(spamtext_file):
f = open(spamtext_file,'w')
f.close()
f = open(spamtext_file)
for i in f:
if i!='' and len(i)>3:
spamlist.append(decoding(i.strip()))
f.close()
recent_str = decoding(recent_str)
#INIT COOKIE & OPENER
cj = cookielib.MozillaCookieJar()
cj.load(cookie_file)
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
loop_count = 0
url_list_pair = [] #SOLVE yabbse ONLY INDEX TO LAST MESSAGE, SO WE CREATE OUR OWN
while loop_count < max_loop:
#FIRST PAGE
sock = opener.open(site)
#HACK: SOLVE yabbse'S BOARD COOKIE ERROR
cj._cookies['thailinuxhosting.com']['/yabbse']['YaBBSE140usernamev14'].value = user
cj._cookies['thailinuxhosting.com']['/yabbse']['YaBBSE140passwordv14'].value = enc_password
html = sock.read()
sock.close()
l = html.split('\n')
#SESSIONID
if board == "smf":
sstr = "sesc="
line = search_line(sstr, l)
if line < 0:
sys.exit[0]
session_id = l[line].split(sstr)[1].split('">')[0]
else:
session_id = ""
#SEARCH FOR RECENT POST
sstr = recent_str
line = search_line(sstr, l)
if line < 0:
sys.exit[0]
if board == "smf":
url = l[line+9+loop_count].split('<a href="')[1].split('">')[0]
author = ""
date_submitted = ""
elif board == "yabbse":
url = l[line+4+loop_count].split('<td valign="top"><a href="')[1].split('">')[0]
url_list = [ i[0] for i in url_list_pair ] # SOLVE yabbse MESSAGE INDEX
if url in url_list:
i = url_list.index(url)
url_list_pair[i][1] += 1
index_dec = url_list_pair[i][1]
else:
url_list_pair.append([url,0])
index_dec = 0
# tmp = 'โดย '
tmp = decoding('\xe0\xb9\x82\xe0\xb8\x94\xe0\xb8\xa2 ')
author = l[line+4+loop_count].split(tmp)[1].split('</td>')[0]
else:
die_board()
msgid = get_msgid(url)
sock = opener.open(url)
html = sock.read()
sock.close()
l = html.split('\n')
#PARSE HTML
is_spam = False
spam_keyword = ''
if board == "smf":
sstr = "msg_%s" % (msgid,)
line = search_line(sstr, l)
elif board == "yabbse":
sstr = '<hr width="100%" size="1" class="windowbg3">'
count = (int(msgid)-index_dec) % 20 + 1 # 20 MESSAGES PER PAGE - yabbse INDEX DECREMENT
print 'loop=',loop_count,' /// count=',count
line = search_line(sstr, l, count)
tmp = decoding("javascript:DoConfirm('")
try:
delete_url = l[line-3].split(tmp)[1].split("','")[1].split("""');"><img src""")[0]
date_submitted = l[line-4].split('</B> ')[1].split(' »')[0]
title = l[line-5].split('<B>')[1].split('</b>')[0]
process_line = line+1
is_spam, spam_keyword = check_spam(l[process_line])
if is_spam:
print 'line=',line,' /// l[line-3]=', l[line-3]
print 'delete_url=',delete_url
print "is_spam=",is_spam," /// keyword=",spam_keyword," /// line=",l[process_line]
except:
is_spam = False
else:
die_board()
if is_spam:
if board == "smf":
pass
elif board == "yabbse":
save_backup('delete url: '+delete_url+\
'\nspam keyword: '+spam_keyword+\
'\nscan date: '+time.ctime(time.time())+\
'\ntitle: '+title+\
'\nauthor: '+author+\
'\nsubmitted date: '+date_submitted+\
'\n'+l[process_line])
sock = opener.open(delete_url)
sock.close()
url_list = [ i[0] for i in url_list_pair ] # RESET yabbse MESSAGE INDEX
if url in url_list:
i = url_list.index(url)
url_list_pair.remove(url_list_pair[i])
else:
loop_count = loop_count+1
- Printer-friendly version
- Log in or register to post comments
- 8764 reads







Recent comments