爬虫神器PyQuery的使用方法

  • 参考: https://segmentfault.com/a/1190000005182997https://segmentfault.com/a/1190000005182997

  • 参考: https://blog.csdn.net/qq_42739440/article/details/90209084

  • 其他方式: https://zhuanlan.zhihu.com/p/68650066

# coding:utf-8
import requests
from pyquery import PyQuery
from random_useragent import UserAgent  # pip install r-useragent

headers = {
            "User-Agent": UserAgent().android(),
        }


def test(content):
    try:
        req = PyQuery(url=content, headers=headers,verify=False) # 和requests方法一样
        title = req('title').text() or req('').text()
        keywords = req('[name=keywords]').attr('content') or req('[name=Keywords]').attr('content')
        description = req('[name=description]').attr('content') or req('[name=Description]').attr('content')
    except Exception as e:
        print('页面分析出现错误,错误内容:{}'.format(e))
        return False, False, False
    return title, keywords, description




# r = requests.get('https://zhihu.com', verify=False)
# print(test(r))

print(test(content='https://www.9ku.com'))

Last updated