为什么要批量写入mysql数据库?因为单独一条一条插入到数据库,每次都要建立一次连接,导致效率低下,且会有出错的可能,量大的时候,效率上会有比较明显的时间数据效率差异,批量写入,就是先用list把数据攒起来,格式如[(1,3),(2,5),(6,2)],最后使用excutemany批量插入。ps:rank是查alibaba的排名。
#encoding=utf-8 import sys reload(sys) sys.setdefaultencoding( "utf-8" ) import urllib import requests import re,time import MySQLdb info=u'前20页无产品' # op_csv_word=open('ranknum.csv','a') class chapaiming: def __init__(self): self.url='https://hz.productposting.alibaba.com/product/ranksearch/rankSearch.htm' self.cookies={'Cookie':'xxxxxx'} self.l=[] def get_urldata(self): html=requests.get(url=self.url,cookies=self.cookies).content c=re.compile(r"<input name='.+?' type='hidden' value='(.+?)'>") redata=re.findall(c,html) return redata[0] def conndb(self): try: conn=MySQLdb.connect(host='localhost',user='root',passwd='123',charset='utf8') self.conn=conn cur=conn.cursor() # cur.execute('create database if not exists alirank;') # conn.select_db('alirank') # cur.execute('create table alibaba_rank(keywords varchar(50),rank varchar(50))') # conn.select_db('alirank') # cur.execute('insert into host values(%s,%s)',value) # conn.commit() # cur.close() # conn.close() return cur except MySQLdb.Error,err_msg: print err_msg def get_rank(self): for keyword in open('word.txt'): keyword=keyword.strip() # print type(keyword) postdata=self.get_urldata() data={'_csrf_token_':'%s'%postdata,'queryString':'%s'%keyword} html2=requests.post(url=self.url,cookies=self.cookies,data=data).content # print html2 if u'您公司或账号在前20页无产品' in html2: print info # op_csv_word.write('%s,%s\n'%(keyword,info)) else: r=re.compile(r'约第\d+页,第\d+位') num=re.findall(r,html2) word_ouput=num[0]#.decode('utf-8').encode('gbk') # w=re.compile(',') # word_ouput=w.sub('-',word_ouput) word_rank=word_ouput#.decode('utf-8').encode('gbk') print keyword,word_rank value=(keyword,word_rank) print type(value) # print type(value) # print value self.l.append(value) print self.l # print type(self.l) # op_csv_word.writelines('%s,%s\n'%(keyword,word_rank)) # time.sleep(1) def write(self): cur=self.conndb() self.conn.select_db('alirank') sql='insert into alibaba_rank(keywords,rank) values (%s,%s)' # print self.l cur.executemany(sql,self.l) self.conn.commit() cur.close() self.conn.close() def mget(): p=chapaiming() p.get_rank() p.write() mget()