''' 爬取豌豆荚APP数据 url:https://www.wandoujia.com/category/6001 data: 名称、详情页、下载人数、APP大小 app_name detail_url download_num app_size (.*?).*?(.*?).*?(.*?) ''' import requests import re # 1.发送请求 def get_page(url): response=requests.get(url) return response def parse_index(html): movie_list=re.findall('.*?.*?(.*?)万人安装 ・ .*?MB', html, re.S) return movie_list # 保持数据 def save_data(movie): detail_url,app_name,download_num,app_size=movie data=f''' 游戏名称:{app_name} 详情页url:{detail_url} 下载人数:{download_num}万人 APP大小:{app_size} \n \n ''' print(data) with open('wandoujia.text','a',encoding='utf-8')as f: f.write(data) # print("写入成功!") if __name__ == '__main__': url=f'https://www.wandoujia.com/category/6001' print(url) index_res=get_page(url) movie_list=parse_index(index_res.text) for movie in movie_list: save_data(movie)