爬虫神器PyQuery的使用方法
参考: https://segmentfault.com/a/1190000005182997https://segmentfault.com/a/1190000005182997
参考: https://blog.csdn.net/qq_42739440/article/details/90209084
其他方式: https://zhuanlan.zhihu.com/p/68650066
# coding:utf-8
import requests
from pyquery import PyQuery
from random_useragent import UserAgent # pip install r-useragent
headers = {
"User-Agent": UserAgent().android(),
}
def test(content):
try:
req = PyQuery(url=content, headers=headers,verify=False) # 和requests方法一样
title = req('title').text() or req('').text()
keywords = req('[name=keywords]').attr('content') or req('[name=Keywords]').attr('content')
description = req('[name=description]').attr('content') or req('[name=Description]').attr('content')
except Exception as e:
print('页面分析出现错误,错误内容:{}'.format(e))
return False, False, False
return title, keywords, description
# r = requests.get('https://zhihu.com', verify=False)
# print(test(r))
print(test(content='https://www.9ku.com'))
Last updated