激情久久久_欧美视频区_成人av免费_不卡视频一二三区_欧美精品在欧美一区二区少妇_欧美一区二区三区的

腳本之家,腳本語(yǔ)言編程技術(shù)及教程分享平臺(tái)!
分類(lèi)導(dǎo)航

Python|VBS|Ruby|Lua|perl|VBA|Golang|PowerShell|Erlang|autoit|Dos|bat|

服務(wù)器之家 - 腳本之家 - Python - Python爬蟲(chóng),獲取,解析,存儲(chǔ)詳解

Python爬蟲(chóng),獲取,解析,存儲(chǔ)詳解

2022-01-19 00:24萬(wàn)能守恒定律 Python

這篇文章主要介紹了Python爬蟲(chóng)獲取、解析,獲數(shù)據(jù)操作,其中代碼描述非常詳細(xì),需要的朋友可以參考下,希望能夠給你帶來(lái)幫助

1.獲取數(shù)據(jù)

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
import requests
def drg(url):
    try:
        head ={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/\
       537.36 (KHTML, like Gecko) Chrome/\
       91.0.4472.164 Safari/537.36'}
        r = requests.get(url,headers=head)
        r.raise_for_status()  # 如果狀態(tài)不是200,引發(fā)HTTPError異常
        r.encoding = r.apparent_encoding
        return r.text
    except:
        return "產(chǎn)生異常"
url = "https://www.ip138.com/mobile.asp?mobile=13018305773&action=mobile"
print(drg(url))

2.解析數(shù)據(jù)

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import requests
def login():
    try:
        # 登錄之后界面的url
        urllogin="http://www.cqooc.com/user/login?username=12608199000635&password=48C032612C2A6777D28A969307B52127E198D59AA78522943C1B283CF7B89E69&nonce=6BA36BBB1F623279&cnonce=8257070573EFE28F"
        s=requests.session()
        r=s.post(urllogin,data=Form,headers=headers)
        r.encoding = r.apparent_encoding
        r.raise_for_status()
        return s
    except Exception as error:
        print(error)
def get_html(s,url):
    try:
        r=s.get(url,headers=headers)
        r.encoding = r.apparent_encoding
        r.raise_for_status()
        return r.text
    except Exception as error:
        print(error)
if __name__=="__main__":
    # 登錄之后的界面user-agent
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.164 Safari/537.36",
    }
    # 跟著自己的改變
    Form = {
        "username": "12608199000635",
        "password": "48C032612C2A6777D28A969307B52127E198D59AA78522943C1B283CF7B89E69",
        "nonce": "6BA36BBB1F623279",
        "cnonce": "8257070573EFE28F"
    }
    lin=login()
    # 個(gè)人中心的網(wǎng)址
    url="http://www.cqooc.com/my/learn"
    html=get_html(lin,url)
    print(html)

3.數(shù)據(jù)保存為CSV格式和存入數(shù)據(jù)庫(kù)

保存為CSV

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import  requests
from lxml import etree
import csv
#獲取數(shù)據(jù)
def get_html(url,time=30):
    try:
        r = requests.get(url, timeout=time)
        r.encoding = r.apparent_encoding
        r.raise_for_status()
        return r.text
    except Exception as error:
        print(error)
def parser(html): #解析函數(shù)
    doc=etree.HTML(html) #html轉(zhuǎn)換為soup對(duì)象
    out_list=[] #解析函數(shù)輸出數(shù)據(jù)的列表
    #二次查找法
    for row in  doc.xpath("//*[@class='book-img-text']//li/*[@class='book-mid-info']"):
        row_data=[
            row.xpath("h4/a/text()")[0], #書(shū)名
            row.xpath("p[@class='author']/a/text()")[0], #作者
            row.xpath("p[2]/text()")[0].strip(), #介紹
            row.xpath("p[@class='update']/span/text()")[0] #更新日期
        ]
        out_list.append(row_data) #將解析的每行數(shù)據(jù)插入到輸出列表中
    return out_list
def  save_csv(item,path): #數(shù)據(jù)存儲(chǔ),將list數(shù)據(jù)寫(xiě)入文件,防止亂碼
    with open(path, "a+", newline='',encoding="utf-8") as f: #創(chuàng)建utf8編碼文件
        csv_write = csv.writer(f) #創(chuàng)建寫(xiě)入對(duì)象
        csv_write.writerows(item) #一次性寫(xiě)入多行
if __name__=="__main__":
    for i in range(1,6):
        url="https://www.qidian.com/rank/fengyun?style=1&page={0}".format(i)
        html=get_html(url) #獲取網(wǎng)頁(yè)數(shù)據(jù)
        out_list=parser(html) #解析網(wǎng)頁(yè),輸出列表數(shù)據(jù)
        save_csv(out_list,"d:\\book.csv") #數(shù)據(jù)存儲(chǔ)

存入數(shù)據(jù)庫(kù)

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import pymysql
import requests
from lxml import etree
def get_html(url, time=3000):
    try:
        headers ={
            "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36 Edg/94.0.992.31"
        }
        r = requests.get(url, timeout=time,headers=headers)
        r.encoding = r.apparent_encoding
        r.raise_for_status()
        return r.text
    except Exception as err:
        print(err)
result = []
def parse_html(html):
    html = etree.HTML(html)
    for row in html.xpath('//*[@id="content"]/div/div[1]/ul/li'):
        Naame = row.xpath("div[2]/h2/a/text()")[0].strip()#//*[@id="content"]/div/div[1]/ul[1]/div[2]/h2/a
        score = row.xpath("div[2]/p[2]/span[2]/text()")[0].strip()#//*[@id="content"]/div/div[1]/ul[1]/div[2]/p[2]/span[2]
        price = row.xpath("div[2]/p[1]/text()")[0].strip().split("/")#//*[@id="content"]/div/div[1]/ul[1]/div[2]/p[1]/text()
        price= price[0]
        content= price[1]
        a=price[2]
        b= price[-1]
        detail = [Naame,score,price,content,a,b]
        result.append(detail)
def join_all(sql_insert,vals,**dbinfo):
    try:
        connet = pymysql.connect(**dbinfo)
        cursor = connet.cursor()
        cursor.executemany(sql_insert,vals)
        connet.commit()
        print('添加成功!')
    except Exception as err:
        print(err)
        connet.rollback()
    cursor.close()
if __name__=="__main__":
    for page in range(1,16):
        url="https://book.douban.com/latest?subcat=%E5%85%A8%E9%83%A8&p={0}".format(str(page))
        parms ={
            "host":"127.0.0.1",
            "port":3306,
            "user":"root",
            "passwd":"123456",
            "db":"db",
            "charset":"utf8"
        }
        html=get_html(url)
        parse_html(html)
    sql_insert = "INSERT INTO db(Naame,score,price,content,a,b)\
                           Values(%s,%s,%s,%s,%s,%s)"
    join_all(sql_insert,result,**parms)
    print(result)

總結(jié)

本篇文章就到這里了,希望能夠給你帶來(lái)幫助,也希望您能夠多多關(guān)注服務(wù)器之家的更多內(nèi)容!

原文鏈接:https://blog.csdn.net/qq_50951790/article/details/120643441

延伸 · 閱讀

精彩推薦
主站蜘蛛池模板: 国产午夜精品一区二区三区在线观看 | 国产精品久久久久久久久久大牛 | 成人黄色在线视频 | 久久免费视频一区 | 亚洲第一页视频 | 91看片入口 | 91成人免费视频 | 色婷婷综合久久久中字幕精品久久 | 日韩做爰视频免费 | 羞羞视频.www在线观看 | 欧美一级在线看 | 91网视频在线观看 | av不卡免费在线 | 国产一级毛片高清视频 | 亚洲国产小视频 | 国产三级a三级三级 | 国产日韩a| 极品五月天 | 新久久久久久 | 精品国产一区二区三区四 | 国产va在线观看 | 亚洲一区二区在线视频 | 看片一区二区三区 | 久久视频在线看 | 欧美日韩在线中文 | 欧美激情第一区 | 久久蜜桃香蕉精品一区二区三区 | 一本色道久久综合狠狠躁篇适合什么人看 | 在线免费视频a | 亚洲欧美不卡视频 | 久久国产精品99国产 | 日本xxxx视频 | 亚洲无毛av | 国产99久久久久久免费看农村 | 欧美日本免费一区二区三区 | 黄视频网站免费观看 | 欧美淫视频| www.理论片 | 欧美成人免费香蕉 | 久久99综合久久爱伊人 | 国产精品成人免费一区久久羞羞 |