用到的包
import requests
import re,lxml
from selenium import webdriver
from time import sleep
from bs4 import BeautifulSoup as bs
from selenium.webdriver.chrome.options import Options
selenium无可视化
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
主要代码
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.116 Safari/537.36'
}
name = input('请输入歌曲名:')
url_1 = 'https://music.163.com/#/search/m/?s=' + name + '&type=1'
browser = webdriver.Chrome(executable_path = 'chromedriver.exe',options = chrome_options)
browser.get(url = url_1)
browser.switch_to.frame('g_iframe')
sleep(0.5)
page_text = browser.execute_script('return document.documentElement.outerHTML')
browser.quit()
bf = bs(page_text,'lxml')
html = bf.find('div',class_ = "ztag j-flag")
html = str(html)
pattern = re.compile('<a.*?id=(.*?)"',re.M)
music_id = re.findall(pattern, html)
##这里可以写一个for循环,爬取多个版本
url = 'http://music.163.com/song/media/outer/url?id=' + music_id[1] + '.mp3'
#外链地址
print(url)
audio_content = requests.get(url,headers = headers).content
filepath = name + '.mp3'
with open (filepath,'wb') as f:
f.write(audio_content)
f.close()
print('爬取完成!')
结束
请注意无法爬取VIP歌曲