Python批量查询alibaba关键词排名

beautiful

#encoding=utf-8
import urllib
import re
import threading
import requests
import time
def get_ranK(b):
    url='http://www.alibaba.com/products/'+b+'.html'
    url01='http://www.alibaba.com/products/'+b+'/2.html'
    url02='http://www.alibaba.com/products/'+b+'/3.html'
    html=requests.get(url=url).content
    html01=requests.get(url=url01).content
    html02=requests.get(url=url02).content

    company=re.compile(r'<a href="http://gzxibolai\.en\.alibaba\.com/company_profile\.html.*?"[\s\S]*?target="_blank"[\s\S]*?n=(\d+)[\s\S]*?')
    if "http://gzxibolai.en.alibaba.com" in html:
        page=re.findall(company,html)
        for x in page:
            print b,u'约第1页%s位\n'%x
            op_paiming_txt.writelines("%s,约第1页%s位\n"%(b,x))
        time.sleep(2)
        return 
    elif "http://gzxibolai.en.alibaba.com" in html01:
        page01=re.findall(company,html01)
        for m in page01:
            print b,u'约第2页%s位\n'%m
            op_paiming_txt.writelines("%s,约第2页%s位\n"%(b,m))
        time.sleep(2)
        return
    elif "http://gzxibolai.en.alibaba.com" in html02:
        page02=re.findall(company,html02)
        for o in page02:
            print b,u'约第3页%s位\n'%o
            op_paiming_txt.writelines("%s,约第3页%s位\n"%(b,o))
        time.sleep(2)
        return

    else:
        print b,u'前3页无排名\n'
        op_paiming_txt.writelines("%s,前3页无排名\n"%b)
        time.sleep(2)
        return
# if __name__ == '__main__':
#     with open('kfc.txt','r') as c:
#         for i in c.readlines():
#             f=i.strip()
#             get_ranK(f)

if __name__ == '__main__':
    with open('kfc.txt','r') as c:
        for i in c.readlines():
            f=i.strip()
            threading.Lock()
            p=threading.Thread(target=get_ranK,args=(f,))
            p.start()
            op_paiming_txt=open('paiming.csv','a')

发表评论

电子邮件地址不会被公开。