51工具盒子

依楼听风雨
笑看云卷云舒,淡观潮起潮落

Python学习之路-网易云音乐评论爬取

爬取指定歌单内所有歌曲的热评, 写得很垃, 不想改了

import csv
import json
import time
import urllib.request

code = 'utf-8'


def http_r(url, c):
# 设置UA
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/90.0.4430.212 Safari/537.36'
}
request = urllib.request.Request(url=url, headers=headers)
return urllib.request.urlopen(request).read().decode(c)


def getUrls(playListID):  # 通过歌单ID利用接口获取歌曲列表并提取出歌曲ID
s = \[\]
print('开始获取歌曲列表')
for key in json.loads(http_r("https://api.fczbl.vip/163/?type=playlist\&id=" + playListID, code)):
s.append((key\['url'\]).replace('https://api.fczbl.vip/163/?type=url\&id=', ''))
print('已获取 ' + str(len(s)) + ' 首歌曲')
return s


def getContent(url, c):  # 获取接口内容, 判断是否获取失败
back = http_r(url, c)
while back\[0:1\] != '{' and ((json.loads(back))\["code"\] \< 0):
print('获取失败, 重试中')
time.sleep(1)
back = http_r(url, c)
return back


def getComment(urls):  # 解析接口返回内容
i = 0
n = str(len(urls))
url_1 = 'https://api.fczbl.vip/163/?type=single\&id='  # 第三方API
url_2 = 'https://music.163.com/api/v1/resource/comments/R_SO_4_'  # 网易云音乐评论API
print("开始获取歌曲详情")
newCsv()
for mID in urls:
i += 1
print(mID + ' (' + str(i) + '/' + n + '): 正在获取歌曲详情')
info_m = getContent(url_1 + mID, code)
info_m = json.loads(info_m)
print(mID + ' (' + str(i) + '/' + n + '): 正在获取评论列表')
hotCommentsList = getContent(url_2 + mID, code)


        try:
            hotCommentsList = (json.loads(hotCommentsList))["hotComments"]  # 热评列表
        except Exception:
            print(mID + ' (' + str(i) + '/' + n + '): 获取失败已跳过')
            continue

        print(mID + ' (' + str(i) + '/' + n + '): 已获取到 ' + str(len(hotCommentsList)) + ' 条热评')
        hotComments = {}
        print(mID + ' (' + str(i) + '/' + n + '): 正在解析热评列表')
        for val in hotCommentsList:
            hotComments[val["user"]["nickname"]] = val["content"]
        info = {"title": info_m["name"], "author": info_m["artist"], "comment": hotComments}
        print(mID + ' (' + str(i) + '/' + n + '): ' + str(info))
        writeOut(info)
        time.sleep(2)




def writeOut(info):  # 将解析好的数据按格式写入文本
with open('infos.csv', 'a', encoding='utf-8') as file_obj:  # 将数据追加写出到同级目录下的infos.txt中
f_csv = csv.writer(file_obj)
name = info\['title'\]
author = info\['author'\]
commentList = info\['comment'\]


        for key in commentList:
            f_csv.writerow([name, author, key, commentList[key]])




def newCsv():
with open('infos.csv', 'w', encoding='utf-8')as f:
f_csv = csv.writer(f)
f_csv.writerow(\['歌曲名', '歌曲作者', '评论者', '评论'\])

`musicIDList = getUrls("926056136")  # 歌单ID
getComment(musicIDList)
`

赞(0)
未经允许不得转载:工具盒子 » Python学习之路-网易云音乐评论爬取