Python-Crawler-Demo/TaobaoProductPrice.py at master · cuidezhu/Python-Crawler-Demo · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# -*- coding: utf-8 -*-
# @Author: cuidezhu
# @Date:   2018-03-30 14:24:01
# @Last Modified by:   cuidezhu
# @Last Modified time: 2018-03-30 14:50:26

import requests
import re

def getHTMLText(url):
  try:
    r = requests.get(url, timeout = 30)
    r.raise_for_status()
    r.encoding = r.apparent_encoding
    return r.text
  except:
    return ""

def parsePage(ilt, html):
  try:
    plt = re.findall(r'\"view_price\"\:\"[\d\.]*\"',html)
    tlt = re.findall(r'\"raw_title\"\:\".*?\"', html)
    for i in range(len(plt)):
      price = eval(plt[i].split(':')[1])
      title = eval(tlt[i].split(':')[1])
      ilt.append([price, title])
  except:
    print("")

def printGoodsList(ilt):
  tplt = "{:4}\t{:8}\t{:16}"
  print(tplt.format("序号", "价格", "商品名称"))
  count = 0
  for g in ilt:
    count = count + 1
    print(tplt.format(count, g[0], g[1]))

def main():
  goods = "书包"
  depth = 2
  start_url = 'https://s.taobao.com/search?q=' + goods
  infoList = []
  for i in range(depth):
    try:
      url = start_url + '&s=' + str(44*i)
      html = getHTMLText(url)
      parsePage(infoList, html)
    except:
      continue
  printGoodsList(infoList)

main()