python怎么爬取B站关注列表及数据库的设计与操作

python怎么爬取B站关注列表及数据库的设计与操作

这篇文章主要介绍了python怎么爬取B站关注列表及数据库的设计与操作的相关知识,内容详细易懂,操作简单快捷,具有一定借鉴价值,相信大家阅读完这篇python怎么爬取B站关注列表及数据库的设计与操作文章都会有所收获,下面我们一起来看看吧。

    一、数据库的设计与操作

    1、数据的分析

    B站的关注列表在

    https://api.bilibili.com/x/relation/followings?vmid=UID&pn=1&ps=50&order=desc&order_type=attention

    中,一页最多50条信息。

    我们大致分析一下信息,

    {"code":0,"message":"0","ttl":1,"data":{"list":[{……

    首先,列表内容存在data:list里。

    其次,对于列表中每一项,有如下信息

    "mid":672353429,"attribute":2,"mtime":1630510107,"tag":null,"special":0,"contract_info":{"is_contractor":false,"ts":0,"is_contract":false,"user_attr":0},"uname":"贝拉kira","face":"http://i2.hdslb.com/bfs/face/668af440f8a8065743d3fa79cfa8f017905d0065.jpg","sign":"元气满满的A-SOUL舞担参上~目标TOPIDOL,一起加油!","official_verify":{"type":0,"desc":"虚拟偶像团体A-SOUL所属艺人"},"vip":{"vipType":2,"vipDueDate":1674576000000,"dueRemark":"","accessStatus":0,"vipStatus":1,"vipStatusWarn":"","themeType":0,"label":{"path":"","text":"年度大会员","label_theme":"annual_vip","text_color":"#FFFFFF","bg_style":1,"bg_color":"#FB7299","border_color":""},"avatar_subscript":1,"nickname_color":"#FB7299","avatar_subscript_url":"http://i0.hdslb.com/bfs/vip/icon_Certification_big_member_22_3x.png"}

    其中,mid为用户独一无二的UID,vipType,0是什么都没开,1是大会员,2是年度大会员,official_verify中,type 0代表官方认证,-1代表没有官方认证。

    同时我们发现,如果对方锁了列表,会返回

    {"code":-400,"message":"请求错误","ttl":1}

    2、数据库设计

    基于这些,我们先设计数据库,包含两张表,用户信息的基本属性表和关注的关系表。

    defcreateDB():link=sqlite3.connect('BiliFollowDB.db')print("databaseopensuccess")UserTableDDL='''createtableifnotexistsuser(UIDintPRIMARYKEYNOTNULL,NAMEvarcharNOTNULL,SIGNvarcharDEFAULTNULL,vipTypeintNOTNULL,verifyTypeintNOTNULL,verifyDescvarcharDEFAULTNULL)'''RelationTableDDL='''createtableifnotexistsrelation(followerintNOTNULL,followingintNOTNULL,followTimeintNOTNULL,PRIMARYKEY(follower,following),FOREIGNKEY(follower,following)REFERENCESuser(UID,UID))'''#createusertablelink.execute(UserTableDDL)#createrelationtablelink.execute(RelationTableDDL)print("databasecreatesuccess")link.commit()link.close()

    3、数据库操作

    其次是插入新用户的列表,我的思路是爬完一个人的关注列表,把一整个list丢给该函数,判断是否存在新增用户,存在则把新增用户传回,作为下一次爬虫的起点。

    definsertUser(infos):conn=sqlite3.connect('BiliFollowDB.db')link=conn.cursor()InsertCmd="insertintouser(UID,NAME,vipType,verifyType,sign,verifyDesc)values(?,?,?,?,?,?);"ExistCmd="selectcount(UID)fromuserwhereUID='%d';"#%UIDnewID=[]forinfoininfos:answer=link.execute(ExistCmd%info['uid'])forrowinanswer:exist_ID=row[0]ifexist_ID==0:newID.append(info['uid'])link.execute(InsertCmd,(info['uid'],info['name'],info['vipType'],info['verifyType'],info['sign'],info['verifyDesc']))conn.commit()conn.close()returnnewID

    然后是插入关系的函数,这个比较简单

    definsertFollowing(uid:int,subscribe):conn=sqlite3.connect('BiliFollowDB.db')link=conn.cursor()InsertCmd="insertintorelation(follower,following,followTime)values(?,?,?);"forfollowinsubscribe:link.execute(InsertCmd,(uid,follow[0],follow[1]))conn.commit()conn.close()

    二、爬虫

    通过观察,我们发现睿叔叔锁了5页的关注列表

    即使是人工操作也只能访问5页,那没办法啦,我们就爬5页吧。

    defgetFollowingList(uid:int):url="https://api.bilibili.com/x/relation/followings?vmid=%d&pn=%d&ps=50&order=desc&order_type=attention&jsonp=jsonp"#%(UID,PageNumber)infos=[]subscribe=[]foriinrange(1,6):html=requests.get(url%(uid,i))ifhtml.status_code!=200:print("GETERROR!")text=html.textdic=json.loads(text)ifdic['code']==-400:breaklist=dic['data']['list']forusrinlist:info={}info['uid']=usr['mid']info['name']=usr['uname']info['vipType']=usr['vip']['vipType']info['verifyType']=usr['official_verify']['type']info['sign']=usr['sign']ifinfo['verifyType']==-1:info['verifyDesc']='NULL'else:info['verifyDesc']=usr['official_verify']['desc']subscribe.append((usr['mid'],usr['mtime']))infos.append(info)newID=insertUser(infos)insertFollowing(uid,subscribe)returnnewID

    三、完整代码

    #byconcyclics#-*-coding:UTF-8-*-importsqlite3importjsonimportrequestsdefcreateDB():link=sqlite3.connect('BiliFollowDB.db')print("databaseopensuccess")UserTableDDL='''createtableifnotexistsuser(UIDintPRIMARYKEYNOTNULL,NAMEvarcharNOTNULL,SIGNvarcharDEFAULTNULL,vipTypeintNOTNULL,verifyTypeintNOTNULL,verifyDescvarcharDEFAULTNULL)'''RelationTableDDL='''createtableifnotexistsrelation(followerintNOTNULL,followingintNOTNULL,followTimeintNOTNULL,PRIMARYKEY(follower,following),FOREIGNKEY(follower,following)REFERENCESuser(UID,UID))'''#createusertablelink.execute(UserTableDDL)#createrelationtablelink.execute(RelationTableDDL)print("databasecreatesuccess")link.commit()link.close()definsertUser(infos):conn=sqlite3.connect('BiliFollowDB.db')link=conn.cursor()InsertCmd="insertintouser(UID,NAME,vipType,verifyType,sign,verifyDesc)values(?,?,?,?,?,?);"ExistCmd="selectcount(UID)fromuserwhereUID='%d';"#%UIDnewID=[]forinfoininfos:answer=link.execute(ExistCmd%info['uid'])forrowinanswer:exist_ID=row[0]ifexist_ID==0:newID.append(info['uid'])link.execute(InsertCmd,(info['uid'],info['name'],info['vipType'],info['verifyType'],info['sign'],info['verifyDesc']))conn.commit()conn.close()returnnewIDdefinsertFollowing(uid:int,subscribe):conn=sqlite3.connect('BiliFollowDB.db')link=conn.cursor()InsertCmd="insertintorelation(follower,following,followTime)values(?,?,?);"forfollowinsubscribe:try:link.execute(InsertCmd,(uid,follow[0],follow[1]))except:print((uid,follow[0],follow[1]))conn.commit()conn.close()defgetFollowingList(uid:int):url="https://api.bilibili.com/x/relation/followings?vmid=%d&pn=%d&ps=50&order=desc&order_type=attention&jsonp=jsonp"#%(UID,PageNumber)infos=[]subscribe=[]foriinrange(1,6):html=requests.get(url%(uid,i))ifhtml.status_code!=200:print("GETERROR!")return[]text=html.textdic=json.loads(text)ifdic['code']==-400:return[]try:list=dic['data']['list']except:return[]forusrinlist:info={}info['uid']=usr['mid']info['name']=usr['uname']info['vipType']=usr['vip']['vipType']info['verifyType']=usr['official_verify']['type']info['sign']=usr['sign']ifinfo['verifyType']==-1:info['verifyDesc']='NULL'else:info['verifyDesc']=usr['official_verify']['desc']subscribe.append((usr['mid'],usr['mtime']))infos.append(info)newID=insertUser(infos)insertFollowing(uid,subscribe)returnnewIDdefgetFollowingUid(uid:int):url="https://api.bilibili.com/x/relation/followings?vmid=%d&pn=%d&ps=50&order=desc&order_type=attention&jsonp=jsonp"#%(UID,PageNumber)foriinrange(1,6):html=requests.get(url%(uid,i))ifhtml.status_code!=200:print("GETERROR!")return[]text=html.textdic=json.loads(text)ifdic['code']==-400:return[]try:list=dic['data']['list']except:return[]IDs=[]forusrinlist:IDs.append(usr['mid'])returnIDsdefwork(root):IDlist=roottmplist=[]whilelen(IDlist)!=0:tmplist=[]forIDinIDlist:print(ID)tmplist+=getFollowingList(ID)IDlist=tmplistdefrework():conn=sqlite3.connect('BiliFollowDB.db')link=conn.cursor()SelectCmd="selectuidfromuser;"answer=link.execute(SelectCmd)IDs=[]forrowinanswer:IDs.append(row[0])conn.commit()conn.close()newID=[]print(IDs)forIDinIDs:ids=getFollowingUid(ID)foridinids:ifidnotinIDs:newID.append(id)returnnewIDif__name__=="__main__":createDB()#work([**putrootUIDhere**,])

    关于“python怎么爬取B站关注列表及数据库的设计与操作”这篇文章的内容就介绍到这里,感谢各位的阅读!相信大家对“python怎么爬取B站关注列表及数据库的设计与操作”知识都有一定的了解,大家如果还想学习更多知识,欢迎关注恰卡编程网行业资讯频道。

    发布于 2022-05-19 10:35:16
    收藏
    分享
    海报
    0 条评论
    25
    上一篇:jquery如何移除前一个元素 下一篇:Android单选多选按钮怎么使用
    目录

      0 条评论

      本站已关闭游客评论,请登录或者注册后再评论吧~

      忘记密码?

      图形验证码