python爬取B站關(guān)注列表及數(shù)據(jù)庫的設(shè)計與操作
一、數(shù)據(jù)庫的設(shè)計與操作
1、數(shù)據(jù)的分析

B站的關(guān)注列表在
https://api.bilibili.com/x/relation/followings?vmid=UID&pn=1&ps=50&order=desc&order_type=attention
中,一頁最多50條信息。
我們大致分析一下信息,
{
"code": 0,
"message": "0",
"ttl": 1,
"data": {
"list": [{……
首先,列表內(nèi)容存在data:list里。
其次,對于列表中每一項,有如下信息
"mid": 672353429,
"attribute": 2,
"mtime": 1630510107,
"tag": null,
"special": 0,
"contract_info": {
"is_contractor": false,
"ts": 0,
"is_contract": false,
"user_attr": 0
},
"uname": "貝拉kira",
"face": "http://i2.hdslb.com/bfs/face/668af440f8a8065743d3fa79cfa8f017905d0065.jpg",
"sign": "元氣滿滿的A-SOUL舞擔(dān)參上~目標(biāo)TOP IDOL,一起加油!",
"official_verify": {
"type": 0,
"desc": "虛擬偶像團(tuán)體A-SOUL 所屬藝人"
},
"vip": {
"vipType": 2,
"vipDueDate": 1674576000000,
"dueRemark": "",
"accessStatus": 0,
"vipStatus": 1,
"vipStatusWarn": "",
"themeType": 0,
"label": {
"path": "",
"text": "年度大會員",
"label_theme": "annual_vip",
"text_color": "#FFFFFF",
"bg_style": 1,
"bg_color": "#FB7299",
"border_color": ""
},
"avatar_subscript": 1,
"nickname_color": "#FB7299",
"avatar_subscript_url": "http://i0.hdslb.com/bfs/vip/icon_Certification_big_member_22_3x.png"
}
其中,mid為用戶獨一無二的UID,vipType,0是什么都沒開,1是大會員,2是年度大會員,official_verify中,type 0代表官方認(rèn)證,-1代表沒有官方認(rèn)證。
同時我們發(fā)現(xiàn),如果對方鎖了列表,會返回
{"code":-400,"message":"請求錯誤","ttl":1}
2、數(shù)據(jù)庫設(shè)計
基于這些,我們先設(shè)計數(shù)據(jù)庫,包含兩張表,用戶信息的基本屬性表和關(guān)注的關(guān)系表。
def createDB():
link=sqlite3.connect('BiliFollowDB.db')
print("database open success")
UserTableDDL='''
create table if not exists user(
UID int PRIMARY KEY NOT NULL,
NAME varchar NOT NULL,
SIGN varchar DEFAULT NULL,
vipType int NOT NULL,
verifyType int NOT NULL,
verifyDesc varchar DEFAULT NULL)
'''
RelationTableDDL='''
create table if not exists relation(
follower int NOT NULL,
following int NOT NULL,
followTime int NOT NULL,
PRIMARY KEY (follower,following),
FOREIGN KEY(follower,following) REFERENCES user(UID,UID)
)
'''
# create user table
link.execute(UserTableDDL)
# create relation table
link.execute(RelationTableDDL)
print("database create success")
link.commit()
link.close()
3、數(shù)據(jù)庫操作
其次是插入新用戶的列表,我的思路是爬完一個人的關(guān)注列表,把一整個list丟給該函數(shù),判斷是否存在新增用戶,存在則把新增用戶傳回,作為下一次爬蟲的起點。
def insertUser(infos):
conn=sqlite3.connect('BiliFollowDB.db')
link=conn.cursor()
InsertCmd="insert into user (UID,NAME,vipType,verifyType,sign,verifyDesc) values (?,?,?,?,?,?);"
ExistCmd="select count(UID) from user where UID='%d';"# % UID
newID=[]
for info in infos:
answer=link.execute(ExistCmd%info['uid'])
for row in answer:
exist_ID=row[0]
if exist_ID==0:
newID.append(info['uid'])
link.execute(InsertCmd,(info['uid'],info['name'],info['vipType'],info['verifyType'],info['sign'],info['verifyDesc']))
conn.commit()
conn.close()
return newID
然后是插入關(guān)系的函數(shù),這個比較簡單
def insertFollowing(uid:int,subscribe):
conn=sqlite3.connect('BiliFollowDB.db')
link=conn.cursor()
InsertCmd="insert into relation (follower,following,followTime) values (?,?,?);"
for follow in subscribe:
link.execute(InsertCmd,(uid,follow[0],follow[1]))
conn.commit()
conn.close()
二、爬蟲
通過觀察,我們發(fā)現(xiàn)睿叔叔鎖了5頁的關(guān)注列表

即使是人工操作也只能訪問5頁,那沒辦法啦,我們就爬5頁吧。
def getFollowingList(uid:int):
url="https://api.bilibili.com/x/relation/followings?vmid=%d&pn=%d&ps=50&order=desc&order_type=attention&jsonp=jsonp"# % (UID, Page Number)
infos=[]
subscribe=[]
for i in range(1,6):
html=requests.get(url%(uid,i))
if html.status_code!=200:
print("GET ERROR!")
text=html.text
dic=json.loads(text)
if dic['code']==-400:
break
list=dic['data']['list']
for usr in list:
info={}
info['uid']=usr['mid']
info['name']=usr['uname']
info['vipType']=usr['vip']['vipType']
info['verifyType']=usr['official_verify']['type']
info['sign']=usr['sign']
if info['verifyType']==-1:
info['verifyDesc']='NULL'
else :
info['verifyDesc']=usr['official_verify']['desc']
subscribe.append((usr['mid'],usr['mtime']))
infos.append(info)
newID=insertUser(infos)
insertFollowing(uid,subscribe)
return newID
三、完整代碼
#by concyclics
# -*- coding:UTF-8 -*-
import sqlite3
import json
import requests
def createDB():
link=sqlite3.connect('BiliFollowDB.db')
print("database open success")
UserTableDDL='''
create table if not exists user(
UID int PRIMARY KEY NOT NULL,
NAME varchar NOT NULL,
SIGN varchar DEFAULT NULL,
vipType int NOT NULL,
verifyType int NOT NULL,
verifyDesc varchar DEFAULT NULL)
'''
RelationTableDDL='''
create table if not exists relation(
follower int NOT NULL,
following int NOT NULL,
followTime int NOT NULL,
PRIMARY KEY (follower,following),
FOREIGN KEY(follower,following) REFERENCES user(UID,UID)
)
'''
# create user table
link.execute(UserTableDDL)
# create relation table
link.execute(RelationTableDDL)
print("database create success")
link.commit()
link.close()
def insertUser(infos):
conn=sqlite3.connect('BiliFollowDB.db')
link=conn.cursor()
InsertCmd="insert into user (UID,NAME,vipType,verifyType,sign,verifyDesc) values (?,?,?,?,?,?);"
ExistCmd="select count(UID) from user where UID='%d';"# % UID
newID=[]
for info in infos:
answer=link.execute(ExistCmd%info['uid'])
for row in answer:
exist_ID=row[0]
if exist_ID==0:
newID.append(info['uid'])
link.execute(InsertCmd,(info['uid'],info['name'],info['vipType'],info['verifyType'],info['sign'],info['verifyDesc']))
conn.commit()
conn.close()
return newID
def insertFollowing(uid:int,subscribe):
conn=sqlite3.connect('BiliFollowDB.db')
link=conn.cursor()
InsertCmd="insert into relation (follower,following,followTime) values (?,?,?);"
for follow in subscribe:
try:
link.execute(InsertCmd,(uid,follow[0],follow[1]))
except:
print((uid,follow[0],follow[1]))
conn.commit()
conn.close()
def getFollowingList(uid:int):
url="https://api.bilibili.com/x/relation/followings?vmid=%d&pn=%d&ps=50&order=desc&order_type=attention&jsonp=jsonp"# % (UID, Page Number)
infos=[]
subscribe=[]
for i in range(1,6):
html=requests.get(url%(uid,i))
if html.status_code!=200:
print("GET ERROR!")
return []
text=html.text
dic=json.loads(text)
if dic['code']==-400:
return []
try:
list=dic['data']['list']
except:
return []
for usr in list:
info={}
info['uid']=usr['mid']
info['name']=usr['uname']
info['vipType']=usr['vip']['vipType']
info['verifyType']=usr['official_verify']['type']
info['sign']=usr['sign']
if info['verifyType']==-1:
info['verifyDesc']='NULL'
else :
info['verifyDesc']=usr['official_verify']['desc']
subscribe.append((usr['mid'],usr['mtime']))
infos.append(info)
newID=insertUser(infos)
insertFollowing(uid,subscribe)
return newID
def getFollowingUid(uid:int):
url="https://api.bilibili.com/x/relation/followings?vmid=%d&pn=%d&ps=50&order=desc&order_type=attention&jsonp=jsonp"# % (UID, Page Number)
for i in range(1,6):
html=requests.get(url%(uid,i))
if html.status_code!=200:
print("GET ERROR!")
return []
text=html.text
dic=json.loads(text)
if dic['code']==-400:
return []
try:
list=dic['data']['list']
except:
return []
IDs=[]
for usr in list:
IDs.append(usr['mid'])
return IDs
def work(root):
IDlist=root
tmplist=[]
while len(IDlist)!=0:
tmplist=[]
for ID in IDlist:
print(ID)
tmplist+=getFollowingList(ID)
IDlist=tmplist
def rework():
conn=sqlite3.connect('BiliFollowDB.db')
link=conn.cursor()
SelectCmd="select uid from user;"
answer=link.execute(SelectCmd)
IDs=[]
for row in answer:
IDs.append(row[0])
conn.commit()
conn.close()
newID=[]
print(IDs)
for ID in IDs:
ids=getFollowingUid(ID)
for id in ids:
if id not in IDs:
newID.append(id)
return newID
if __name__=="__main__":
createDB()
#work([**put root UID here**,])
四、項目倉庫
https://github.com/Concyclics/BiliBiliFollowSpider
以上就是python爬取B站關(guān)注列表及數(shù)據(jù)庫的設(shè)計與操作的詳細(xì)內(nèi)容,更多關(guān)于python爬取B站關(guān)注列表的資料請關(guān)注腳本之家其它相關(guān)文章!
相關(guān)文章
python基于機器學(xué)習(xí)預(yù)測股票交易信號
近年來,隨著技術(shù)的發(fā)展,機器學(xué)習(xí)和深度學(xué)習(xí)在金融資產(chǎn)量化研究上的應(yīng)用越來越廣泛和深入。目前,大量數(shù)據(jù)科學(xué)家在Kaggle網(wǎng)站上發(fā)布了使用機器學(xué)習(xí)/深度學(xué)習(xí)模型對股票、期貨、比特幣等金融資產(chǎn)做預(yù)測和分析的文章。本文就來看看如何用python預(yù)測股票交易信號2021-05-05
如何把外網(wǎng)python虛擬環(huán)境遷移到內(nèi)網(wǎng)
這篇文章主要介紹了如何把外網(wǎng)python虛擬環(huán)境遷移到內(nèi)網(wǎng),文中通過示例代碼介紹的非常詳細(xì),對大家的學(xué)習(xí)或者工作具有一定的參考學(xué)習(xí)價值,需要的朋友可以參考下2020-05-05
Python實現(xiàn)115網(wǎng)盤自動下載的方法
這篇文章主要介紹了Python實現(xiàn)115網(wǎng)盤自動下載的方法,可實現(xiàn)自動調(diào)用115客戶端進(jìn)行下載的功能,非常實用,需要的朋友可以參考下2014-09-09
Python 腳本實現(xiàn)淘寶準(zhǔn)點秒殺功能
這篇文章主要介紹了python實現(xiàn)淘寶準(zhǔn)點秒殺腳本,本文圖文實例相結(jié)合給大家介紹的非常詳細(xì),具有一定的參考借鑒價值,需要的朋友可以參考下2019-11-11

