diff --git "a/\347\224\265\344\277\241\347\247\273\345\212\250\351\207\207\350\264\255\346\213\233\346\240\207\344\277\241\346\201\257\351\207\207\351\233\206.py" "b/\347\224\265\344\277\241\347\247\273\345\212\250\351\207\207\350\264\255\346\213\233\346\240\207\344\277\241\346\201\257\351\207\207\351\233\206.py"
new file mode 100644
index 0000000000000000000000000000000000000000..ed5bd544f5484825af65d2e59ed8a6301da6db3a
--- /dev/null
+++ "b/\347\224\265\344\277\241\347\247\273\345\212\250\351\207\207\350\264\255\346\213\233\346\240\207\344\277\241\346\201\257\351\207\207\351\233\206.py"
@@ -0,0 +1,210 @@
+
+#采购与招标信息网
+#https://www.chinabidding.cn/
+#中国电信外部门户招标信息
+#https://42.99.33.26/MSS-PORTAL/account/login.do
+#中国移动采购与招标网
+#https://b2b.10086.cn/b2b/main/showBiao!preIndex.html?noticeType=list1
+#中国移动广东门户招标信息
+#http://www.telewiki.cn/supplier/viewLogin.action
+
+import urllib.request
+import re
+import datetime
+from lxml import etree
+import ssl
+ssl._create_default_https_context = ssl._create_unverified_context
+
+#定义查询的关键词
+keywdL=['移动','宽带','电信']
+
+#定义查询日期格式
+dateFormat='%Y-%m-%d'
+
+#获取系统当前时间
+def get_today():
+ h=datetime.datetime.now()
+ today=h.strftime(dateFormat)
+ return today
+
+#获取查询起始时间
+def get_startdate():
+ h=datetime.datetime.now()-datetime.timedelta(days=10)
+ startdate=h.strftime(dateFormat)
+ return startdate
+
+#获取系统当前时间字符串
+def get_day():
+ h=datetime.datetime.now()
+ today=h.strftime('%Y%m%d')
+ return today
+
+#定义目标文件名
+fileName1="采购与招标网招标信息"+get_day()+'.txt'
+fileName2="中国电信外部门户网站招标信息"+get_day()+".txt"
+fileName3="中国移动采购与招标网招标信息"+get_day()+".txt"
+fileName4="中国移动广东公司供应商门户招标信息"+get_day()+".txt"
+
+#打开网页,获取网页内容
+def url_open(url):
+ try:
+ headers=("user-agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.22 Safari/537.36 SE 2.X MetaSr 1.0")
+ opener=urllib.request.build_opener()
+ opener.addheaders=[headers]
+ urllib.request.install_opener(opener)
+ data=urllib.request.urlopen(url).read().decode("utf-8","ignore")
+ return data
+ except Exception as e:
+ print(str(e))
+
+#获取采购与招标网的招标信息
+def get_content1(url):
+ try:
+ data=url_open(url)
+ url_pat='
0):
+ for i in range(len(urlL)):
+ page_url="https://www.chinabidding.cn"+urlL[i]
+ page_title=titleL[i]
+ time=timeL[i]
+ with open(fileName1,'a') as fh:
+ fh.write("招标主题:"+page_title+"\n")
+ fh.write("招标内容:"+page_url+"\n")
+ fh.write("发布时间:"+time+"\n")
+ fh.write("------------------------\n")
+ except Exception as e:
+ print(str(e))
+
+#将数据保存到文本中
+def caigouwang():
+ try:
+ with open(fileName1,'a') as f:
+ f.write("采购与招标网:https://www.chinabidding.cn/\n")
+ for keywd in keywdL:
+ keywords=urllib.request.quote(keywd)
+ with open(fileName1,'a') as fh:
+ fh.write("关键词:"+keywd+"\n")
+ url="https://www.chinabidding.cn/search/searchzbw/search2?keywords="+keywords+"&table_type=&areaid=&categoryid=&b_date=week"
+ print("正在查询关键词:"+keywd)
+ get_content1(url)
+ print("完成")
+ except Exception as e:
+ print(e)
+
+#获取中国电信外部门户招标信息
+def get_content2(url):
+ try:
+ data=url_open(url)
+ html = etree.HTML(data)
+ page_titleL = html.xpath('//table[@class="table_data"]//td/a[@href="#"]/text()')
+ #page_urlL = html.xpath('//table[@class="table_data"]//td/a[@href="#"]/@onclick')
+ if(len(page_titleL)>0):
+ for i in range(len(page_titleL)):
+ page_title=page_titleL[i]
+ #page_url=page_urlL[i].split(',')[0][6:-1]
+ #page_url="http://www.telewiki.cn/notice/notice!queryNoticeDetail.action?noticeSO.noticeid="+page_urlL[i][5:-2]
+ with open(fileName2,'a') as fh:
+ fh.write("招标主题:"+page_title+"\n")
+ #fh.write("招标内容:"+page_url+"\n")
+ fh.write("-----------------\n")
+ except Exception as e:
+ print(e)
+
+#将数据保存到文本中
+def dianxin():
+ try:
+ with open(fileName2,'a') as f:
+ f.write("中国电信外部门户网站:https://42.99.33.26/MSS-PORTAL/account/login.do\n")
+ for keywd in keywdL:
+ keywords=urllib.request.quote(keywd)
+ proL=['JT','NJT']
+ for province in proL:
+ url="https://42.99.33.26/MSS-PORTAL/announcementjoin/list.do?provinceJT="+province+"&docTitle="+keywords+"&docCode=&provinceCode=&startDate="+get_startdate()+"&endDate=&docType=&paging.start=1&paging.pageSize=40&pageNum=40&goPageNum=1"
+ print("正在查询关键词("+province+"):"+keywd)
+ with open(fileName2,'a') as fh:
+ fh.write("关键词("+province+"):"+keywd+"\n")
+ get_content2(url)
+ print("完成")
+ except Exception as e:
+ print(e)
+
+#中国移动采购与招标网
+def get_content3(url):
+ try:
+ data=url_open(url)
+ html = etree.HTML(data)
+ page_titleL = html.xpath('//td[@style="width:280px;"]/a[@href="#this"]/@title')
+ if(len(page_titleL)>0):
+ for i in range(len(page_titleL)):
+ page_title=page_titleL[i]
+ with open(fileName3,'a') as fh:
+ fh.write("招标主题:"+page_title+"\n")
+ fh.write("-----------------\n")
+ except Exception as e:
+ print(e)
+
+#将数据存入文本中
+def yidong():
+ try:
+ with open(fileName3,'a') as fh:
+ fh.write("中国移动采购与招标:https://b2b.10086.cn/b2b/main/showBiao!preIndex.html?noticeType=list1\n")
+ for keywd in keywdL:
+ keywords=urllib.request.quote(keywd)
+ url="https://b2b.10086.cn/b2b/main/listVendorNoticeResult.html?page.currentPage=1&page.perPageSize=40¬iceBean.sourceCH=¬iceBean.source=¬iceBean.title="+keywords+"¬iceBean.startDate="+get_startdate()+"¬iceBean.endDate="
+ with open(fileName3,'a') as fh:
+ fh.write("关键词:"+keywd+"\n")
+ print("正在查询关键词:"+keywd)
+ get_content3(url)
+ print("完成")
+ except Exception as e:
+ print(e)
+
+#中国移动广东门户招标信息
+def get_content4(url):
+ try:
+ data=url_open(url)
+ html = etree.HTML(data)
+ page_titleL = html.xpath("//span[@class='ptitle']/a/text()")
+ page_urlL = html.xpath("//span[@class='ptitle']/a/@onclick")
+ if(len(page_titleL)>0):
+ for i in range(len(page_titleL)):
+ page_title=page_titleL[i]
+ page_url="http://www.telewiki.cn/notice/notice!queryNoticeDetail.action?noticeSO.noticeid="+page_urlL[i][5:-2]
+ with open(fileName4,'a') as fh:
+ fh.write("招标主题:"+page_title+"\n")
+ fh.write("招标内容:"+page_url+"\n")
+ fh.write("-----------------\n")
+ except Exception as e:
+ print(e)
+
+#将数据存入文本中
+def guangdong():
+ try:
+ with open(fileName4,'a') as fh:
+ fh.write("中国移动广东公司供应商门户网站:http://www.telewiki.cn/supplier/viewLogin.action\n")
+ for keywd in keywdL:
+ keywords=urllib.request.quote(keywd)
+ url="http://www.telewiki.cn/notice/notice!queryPurchaseList.action?random=0.000059963069461321794&queryListSO.queryProjectName="+keywords+"&queryListSO.queryRegionCompany=&queryListSO.queryOpMethod=&queryListSO.queryBegindate="+get_startdate()+"&queryListSO.queryEnddate="+get_today()+"&queryListSO.step=&queryListSO.applyState=&queryListSO.purchaseType=&queryListSO.status=0"
+ with open(fileName4,'a') as fh:
+ fh.write("关键词:"+keywd+"\n")
+ print("正在查询关键词:"+keywd)
+ get_content4(url)
+ print("完成")
+ except Exception as e:
+ print(e)
+
+if __name__=='__main__':
+ print("-----开始爬取采购与招标网------")
+ caigouwang()
+ print("-----开始爬取中国电信外部门户网站------")
+ dianxin()
+ print("-----开始爬取中国移动采购与招标网------")
+ yidong()
+ print("-----开始爬取中国移动广东公司供应商门户网站------")
+ guangdong()
|