python爬取京东商品

2020-07-07 15:01:10LanceLee数据爬虫1105

- N +

最近写了个专门爬百度的，后来又想爬京东的，还是采用上次的BeautifulSoup+requests模块

下面直接上代码，看不懂的可以看这篇文章或者注释来学习

很多人学习python，不知道从何学起。
很多人学习python ，掌握了基本语法过后，不知道在哪里寻找案例上手。
很多已经做案例的人，却不知道如何去学习更加高深的知识。
那么针对这三类人，我给大家提供一个好的学习平台，免费领取视频教程，电子书籍，以及课程的源代码！
QQ群：1097524789

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#written by DY
#http://dyblog.tk
#e-mail:[email protected]
########import###############
import requests
from bs4 import BeautifulSoup
#from openpyxl import *#写入表格使用	，写入txt时报错
import time
from tkinter import * 
import tkinter.messagebox
from  tkinter import ttk
########import结束############

#----------全局变量-----------
https = 'https:'
headers = {
    "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
    "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.16 Safari/537.36",
}#定义头部信息，防止被网站阻止
name = []
price = []
introduct = []
urlss = []
#----------全局变量结束-------

#===============函数区域==============
#--------图形界面函数开始--------
def genxin():
	top = Tk()
	top.title("'%s'在'京东'中查询结果"%E1.get())
	top.geometry("800x600+600+100")
	columns = ("物品名", "价格", "简介", "链接")
	treeview = ttk.Treeview(top, show="headings", columns=columns, height='100')
	 
	treeview.column("物品名", width=200, anchor='center')
	treeview.column("价格", width=50, anchor='center')
	treeview.column("简介", width=200, anchor='center')
	treeview.column("链接", width=50, anchor='center')
	
	treeview.heading("物品名", text="物品名")
	treeview.heading("价格", text="价格")
	treeview.heading("简介", text="简介")
	treeview.heading("链接", text="链接")
	treeview.pack()
	print(name)
	print(price)
	print(introduct)
	print(urlss)
	for write_ in range(min(len(name),len(price),len(introduct),len(urlss))): # 写入数据
	    treeview.insert('', write_, values=(name[write_], price[write_], introduct[write_], urlss[write_]))
	top.mainloop()
#--------图形界面函数结束--------
					
def searchstart():#打开页面查找，获取html
	url='https://search.jd.com/Search?keyword='+E1.get()
	url = str(url)
	html = requests.get(url,headers=headers).text#打开链接	，获取html
	soup = BeautifulSoup(html, 'html.parser')
	for div in soup.find_all('div',class_="ml-wrap"):#包含价格，销量，商品	，页数
		for shangpin in div.find_all('div',class_="goods-list-v2 gl-type-1 J-goods-list"):
			for prices in shangpin.find_all('div',class_="p-price"):#商品价格
				for pricess in prices.find_all('i'):
					if pricess=='':
						pricess='无'
					price.append(pricess.text)
		for shangpin in div.find_all('div',class_="goods-list-v2 gl-type-1 J-goods-list"):#商品
			for name_ in shangpin.find_all('div',class_="p-name p-name-type-2"):
				for titlename in name_.find_all('em'):#简介
					if titlename=='':
						titlename='无'
					introduct.append(titlename.text)
			for name_ in shangpin.find_all('div',class_="p-name p-name-type-2"):
				for introduction in name_.find_all('a',target="_blank"):#商品名
					introduction = introduction.get('title')
					if introduction=='':
						introduction='无'
					name.append(introduction)
			for url in shangpin.find_all('div',class_="p-name p-name-type-2"):
				for urls in url.find_all('a'):
					urlss.append(https+urls['href'])
	print(introduct)
	print(name)
	genxin()
#===============函数区域结束==========


##########图形界面开始#########
root = Tk()
root.title('京东商品 查询')
root.geometry('250x160')
L1 = Label(root, text="商品名： ")
L1.place(x = 5,y = 15)
E1 = Entry(root, bd =2)
E1.place(x = 60,y = 15)

A = Button(root, text ="确定",font=('Arial', 12), width=10, height=1,command=searchstart)
A.place(x = 350,y = 10)#确定按钮

root.mainloop()
###########图形界面结束#########
#written by DY
#http://dyblog.tk
#e-mail:[email protected]