diff --git "a/\347\224\265\344\277\241\347\247\273\345\212\250\351\207\207\350\264\255\346\213\233\346\240\207\344\277\241\346\201\257\351\207\207\351\233\206.py" "b/\347\224\265\344\277\241\347\247\273\345\212\250\351\207\207\350\264\255\346\213\233\346\240\207\344\277\241\346\201\257\351\207\207\351\233\206.py" new file mode 100644 index 0000000000000000000000000000000000000000..ed5bd544f5484825af65d2e59ed8a6301da6db3a --- /dev/null +++ "b/\347\224\265\344\277\241\347\247\273\345\212\250\351\207\207\350\264\255\346\213\233\346\240\207\344\277\241\346\201\257\351\207\207\351\233\206.py" @@ -0,0 +1,210 @@ + +#采购与招标信息网 +#https://www.chinabidding.cn/ +#中国电信外部门户招标信息 +#https://42.99.33.26/MSS-PORTAL/account/login.do +#中国移动采购与招标网 +#https://b2b.10086.cn/b2b/main/showBiao!preIndex.html?noticeType=list1 +#中国移动广东门户招标信息 +#http://www.telewiki.cn/supplier/viewLogin.action + +import urllib.request +import re +import datetime +from lxml import etree +import ssl +ssl._create_default_https_context = ssl._create_unverified_context + +#定义查询的关键词 +keywdL=['移动','宽带','电信'] + +#定义查询日期格式 +dateFormat='%Y-%m-%d' + +#获取系统当前时间 +def get_today(): + h=datetime.datetime.now() + today=h.strftime(dateFormat) + return today + +#获取查询起始时间 +def get_startdate(): + h=datetime.datetime.now()-datetime.timedelta(days=10) + startdate=h.strftime(dateFormat) + return startdate + +#获取系统当前时间字符串 +def get_day(): + h=datetime.datetime.now() + today=h.strftime('%Y%m%d') + return today + +#定义目标文件名 +fileName1="采购与招标网招标信息"+get_day()+'.txt' +fileName2="中国电信外部门户网站招标信息"+get_day()+".txt" +fileName3="中国移动采购与招标网招标信息"+get_day()+".txt" +fileName4="中国移动广东公司供应商门户招标信息"+get_day()+".txt" + +#打开网页,获取网页内容 +def url_open(url): + try: + headers=("user-agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.22 Safari/537.36 SE 2.X MetaSr 1.0") + opener=urllib.request.build_opener() + opener.addheaders=[headers] + urllib.request.install_opener(opener) + data=urllib.request.urlopen(url).read().decode("utf-8","ignore") + return data + except Exception as e: + print(str(e)) + +#获取采购与招标网的招标信息 +def get_content1(url): + try: + data=url_open(url) + url_pat='0): + for i in range(len(urlL)): + page_url="https://www.chinabidding.cn"+urlL[i] + page_title=titleL[i] + time=timeL[i] + with open(fileName1,'a') as fh: + fh.write("招标主题:"+page_title+"\n") + fh.write("招标内容:"+page_url+"\n") + fh.write("发布时间:"+time+"\n") + fh.write("------------------------\n") + except Exception as e: + print(str(e)) + +#将数据保存到文本中 +def caigouwang(): + try: + with open(fileName1,'a') as f: + f.write("采购与招标网:https://www.chinabidding.cn/\n") + for keywd in keywdL: + keywords=urllib.request.quote(keywd) + with open(fileName1,'a') as fh: + fh.write("关键词:"+keywd+"\n") + url="https://www.chinabidding.cn/search/searchzbw/search2?keywords="+keywords+"&table_type=&areaid=&categoryid=&b_date=week" + print("正在查询关键词:"+keywd) + get_content1(url) + print("完成") + except Exception as e: + print(e) + +#获取中国电信外部门户招标信息 +def get_content2(url): + try: + data=url_open(url) + html = etree.HTML(data) + page_titleL = html.xpath('//table[@class="table_data"]//td/a[@href="#"]/text()') + #page_urlL = html.xpath('//table[@class="table_data"]//td/a[@href="#"]/@onclick') + if(len(page_titleL)>0): + for i in range(len(page_titleL)): + page_title=page_titleL[i] + #page_url=page_urlL[i].split(',')[0][6:-1] + #page_url="http://www.telewiki.cn/notice/notice!queryNoticeDetail.action?noticeSO.noticeid="+page_urlL[i][5:-2] + with open(fileName2,'a') as fh: + fh.write("招标主题:"+page_title+"\n") + #fh.write("招标内容:"+page_url+"\n") + fh.write("-----------------\n") + except Exception as e: + print(e) + +#将数据保存到文本中 +def dianxin(): + try: + with open(fileName2,'a') as f: + f.write("中国电信外部门户网站:https://42.99.33.26/MSS-PORTAL/account/login.do\n") + for keywd in keywdL: + keywords=urllib.request.quote(keywd) + proL=['JT','NJT'] + for province in proL: + url="https://42.99.33.26/MSS-PORTAL/announcementjoin/list.do?provinceJT="+province+"&docTitle="+keywords+"&docCode=&provinceCode=&startDate="+get_startdate()+"&endDate=&docType=&paging.start=1&paging.pageSize=40&pageNum=40&goPageNum=1" + print("正在查询关键词("+province+"):"+keywd) + with open(fileName2,'a') as fh: + fh.write("关键词("+province+"):"+keywd+"\n") + get_content2(url) + print("完成") + except Exception as e: + print(e) + +#中国移动采购与招标网 +def get_content3(url): + try: + data=url_open(url) + html = etree.HTML(data) + page_titleL = html.xpath('//td[@style="width:280px;"]/a[@href="#this"]/@title') + if(len(page_titleL)>0): + for i in range(len(page_titleL)): + page_title=page_titleL[i] + with open(fileName3,'a') as fh: + fh.write("招标主题:"+page_title+"\n") + fh.write("-----------------\n") + except Exception as e: + print(e) + +#将数据存入文本中 +def yidong(): + try: + with open(fileName3,'a') as fh: + fh.write("中国移动采购与招标:https://b2b.10086.cn/b2b/main/showBiao!preIndex.html?noticeType=list1\n") + for keywd in keywdL: + keywords=urllib.request.quote(keywd) + url="https://b2b.10086.cn/b2b/main/listVendorNoticeResult.html?page.currentPage=1&page.perPageSize=40¬iceBean.sourceCH=¬iceBean.source=¬iceBean.title="+keywords+"¬iceBean.startDate="+get_startdate()+"¬iceBean.endDate=" + with open(fileName3,'a') as fh: + fh.write("关键词:"+keywd+"\n") + print("正在查询关键词:"+keywd) + get_content3(url) + print("完成") + except Exception as e: + print(e) + +#中国移动广东门户招标信息 +def get_content4(url): + try: + data=url_open(url) + html = etree.HTML(data) + page_titleL = html.xpath("//span[@class='ptitle']/a/text()") + page_urlL = html.xpath("//span[@class='ptitle']/a/@onclick") + if(len(page_titleL)>0): + for i in range(len(page_titleL)): + page_title=page_titleL[i] + page_url="http://www.telewiki.cn/notice/notice!queryNoticeDetail.action?noticeSO.noticeid="+page_urlL[i][5:-2] + with open(fileName4,'a') as fh: + fh.write("招标主题:"+page_title+"\n") + fh.write("招标内容:"+page_url+"\n") + fh.write("-----------------\n") + except Exception as e: + print(e) + +#将数据存入文本中 +def guangdong(): + try: + with open(fileName4,'a') as fh: + fh.write("中国移动广东公司供应商门户网站:http://www.telewiki.cn/supplier/viewLogin.action\n") + for keywd in keywdL: + keywords=urllib.request.quote(keywd) + url="http://www.telewiki.cn/notice/notice!queryPurchaseList.action?random=0.000059963069461321794&queryListSO.queryProjectName="+keywords+"&queryListSO.queryRegionCompany=&queryListSO.queryOpMethod=&queryListSO.queryBegindate="+get_startdate()+"&queryListSO.queryEnddate="+get_today()+"&queryListSO.step=&queryListSO.applyState=&queryListSO.purchaseType=&queryListSO.status=0" + with open(fileName4,'a') as fh: + fh.write("关键词:"+keywd+"\n") + print("正在查询关键词:"+keywd) + get_content4(url) + print("完成") + except Exception as e: + print(e) + +if __name__=='__main__': + print("-----开始爬取采购与招标网------") + caigouwang() + print("-----开始爬取中国电信外部门户网站------") + dianxin() + print("-----开始爬取中国移动采购与招标网------") + yidong() + print("-----开始爬取中国移动广东公司供应商门户网站------") + guangdong()