import http.client
import pandas as pd
import requests
from lxml import etree
from bs4 import BeautifulSoup as bs
for i in range(2316, 2317):
conn = http.client.HTTPSConnection("www.tansent.com")
payload = ''
headers = {
'Cookie': '__yjs_duid=1_a833f2824b55d291e5bc1f3e985223171681358466408; Hm_lvt_a3a2d3af6a33805cae54b13133559a7d=1681358452; userinfo=235989%7C-%7C18602737865%7C-%7CUJcA8iPL%7C-%7C3%7C-%7Chttps%3A%2F%2Fimg.tansent.com%2FPublic%2Fupload%2Fcoimg%2Flogo%2F20230220%2F2023-02-20-16768767669274.png%7C-%7C%E6%AD%A6%E6%B1%89%E6%99%AE%E5%8F%8A%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8%7C-%7C0%7C-%7C9%7C-%7C0; userinfo=235989%7C-%7C18602737865%7C-%7CUJcA8iPL%7C-%7C3%7C-%7Chttps%3A%2F%2Fimg.tansent.com%2FPublic%2Fupload%2Fcoimg%2Flogo%2F20230220%2F2023-02-20-16768767669274.png%7C-%7C%E6%AD%A6%E6%B1%89%E6%99%AE%E5%8F%8A%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8%7C-%7C0%7C-%7C9%7C-%7C0; npv=17; Hm_lpvt_a3a2d3af6a33805cae54b13133559a7d=1681367543; __yjs_duid=1_742b5741321a4957d2b86878a66e39e71681367586786'
}
conn.request("GET", "/tscom/?cid=0&sch_v=0&edu_v=0&hosp_v=0&gov_v=0&com_v=0&ind_v=0&ofc_v=0&ZYCF=0&ISO9001=0&ISO14001=0&ISO45001=0&ISO22000=0&HACCP=0&ys_v=0&vip_v=0&page="+str(i), payload, headers)
res = conn.getresponse()
data = res.read()
player_soup = data.decode("utf-8")
soup = bs(player_soup,'html.parser')
dsadasds=soup.find_all("a",{"class":"line1"})# 查找的div标签class为tian_three
daylist=soup.find_all("div",{"class":"tscomItem"})# 查找的div标签class为tian_three
for kg in daylist:
dssss = kg.find("div",{"class":"t2"})# 查找的div标签class为tian_three
acs = dssss.find("a", {"class": "line1"}) # 查找的div标签class为tian_three
#print(acs) # 查a标签的class属性
dsssbbs = kg.find("div", {"class": "bottom"}) # 查找的div标签class为tian_three
dsssbbsdsds = dsssbbs.find("span") # 查找的div标签class为tian_three
print(acs.string + ' '+dsssbbsdsds.string) # 查a标签的string
txtfile = open(r"E:\test.txt", "a")
txtfile.write(acs.string + ' '+dsssbbsdsds.string +"\n")
本文共 个字数,平均阅读时长 ≈ 分钟,您已阅读:0时0分0秒。
649494848