域名备案查询
# -*- coding: utf-8 -*-
import re
import os
import cv2
import time
import base64
import hashlib
import requests
import openpyxl as xl
from openpyxl.styles import Alignment
os.environ['no_proxy'] = '*'
def query_base(val):
try:
# info = input("请完整输入公司全称 / 域名以查询备案信息:\n\n").replace(" ", "").replace("https://www.", "").replace("http://www.", "").replace("http://", "")
info = val
# 过滤空值和特殊字符,只允许 - . () 分别用于域名和公司名
if info == "":
raise ValueError("InputNone")
info = re.sub("[^\\u4e00-\\u9fa5-A-Za-z0-9,-.()()]", "", info)
input_zh = re.compile(u'[\u4e00-\u9fa5]')
zh_match = input_zh.search(info)
if zh_match:
info_result = info
else:
# 检测是否为可备案的域名类型(类型同步日期2022/01/06)
# TODO 部分特殊域名, 如51.la也能备案, 可能是特事特办
input_url = re.compile(
r'([^.]+)(?:\.(?:GOV\.cn|ORG\.cn|AC\.cn|MIL\.cn|NET\.cn|EDU\.cn|COM\.cn|BJ\.cn|TJ\.cn|SH\.cn|CQ\.cn|HE\.cn|SX\.cn|NM\.cn|LN\.cn|JL\.cn|HL\.cn|JS\.cn|ZJ\.cn|AH\.cn|FJ\.cn|JX\.cn|SD\.cn|HA\.cn|HB\.cn|HN\.cn|GD\.cn|GX\.cn|HI\.cn|SC\.cn|GZ\.cn|YN\.cn|XZ\.cn|SN\.cn|GS\.cn|QH\.cn|NX\.cn|XJ\.cn|TW\.cn|HK\.cn|MO\.cn|cn|REN|WANG|CITIC|TOP|SOHU|XIN|COM|NET|CLUB|XYZ|VIP|SITE|SHOP|INK|INFO|MOBI|RED|PRO|KIM|LTD|GROUP|BIZ|AUTO|LINK|WORK|LAW|BEER|STORE|TECH|FUN|ONLINE|ART|DESIGN|WIKI|LOVE|CENTER|VIDEO|SOCIAL|TEAM|SHOW|COOL|ZONE|WORLD|TODAY|CITY|CHAT|COMPANY|LIVE|FUND|GOLD|PLUS|GURU|RUN|PUB|EMAIL|LIFE|CO|FASHION|FIT|LUXE|YOGA|BAIDU|CLOUD|HOST|SPACE|PRESS|WEBSITE|ARCHI|ASIA|BIO|BLACK|BLUE|GREEN|LOTTO|ORGANIC|PET|PINK|POKER|PROMO|SKI|VOTE|VOTO|ICU))',
flags=re.IGNORECASE)
info_result = input_url.search(info)
if info_result is None:
if info.split(".")[0] == "":
raise ValueError("OnlyDomainInput")
raise ValueError("ValidType")
else:
info_result = info_result.group()
info_data = {'pageNum': '1', 'pageSize': '40', 'unitName': info_result}
return info_data
except ValueError as e:
if str(e) == 'InputNone' or str(e) == 'OnlyDomainInput':
print("\n ************** 请正确输入域名 **************\n")
else:
print("\n*** 该域名不支持备案,请查阅:http://xn--fiq8ituh5mn9d1qbc28lu5dusc.xn--vuq861b/ ***\n")
def get_cookies():
cookie_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36 Edg/101.0.1210.32'}
err_num = 0
while err_num < 3:
try:
cookie = requests.utils.dict_from_cookiejar(requests.get('https://beian.miit.gov.cn/', headers=cookie_headers).cookies)['__jsluid_s']
return cookie
except:
err_num += 1
time.sleep(3)
return -1
def get_token():
timeStamp = round(time.time() * 1000)
authSecret = 'testtest' + str(timeStamp)
authKey = hashlib.md5(authSecret.encode(encoding='UTF-8')).hexdigest()
auth_data = {'authKey': authKey, 'timeStamp': timeStamp}
url = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/auth'
try:
t_response = requests.post(url=url, data=auth_data, headers=base_header).json()
token = t_response['params']['bussiness']
except:
return -1
return token
def get_check_pic(token):
url = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/image/getCheckImage'
base_header['Accept'] = 'application/json, text/plain, */*'
base_header.update({'Content-Length': '0', 'token': token})
try:
p_request = requests.post(url=url, data='', headers=base_header).json()
p_uuid = p_request['params']['uuid']
big_image = p_request['params']['bigImage']
small_image = p_request['params']['smallImage']
except:
return -1
# 解码图片,写入并计算图片缺口位置
with open('bigImage.jpg', 'wb') as f:
f.write(base64.b64decode(big_image))
with open('smallImage.jpg', 'wb') as f:
f.write(base64.b64decode(small_image))
background_image = cv2.imread('bigImage.jpg', cv2.COLOR_GRAY2RGB)
fill_image = cv2.imread('smallImage.jpg', cv2.COLOR_GRAY2RGB)
position_match = cv2.matchTemplate(background_image, fill_image, cv2.TM_CCOEFF_NORMED)
max_loc = cv2.minMaxLoc(position_match)[3][0]
mouse_length = max_loc + 1
os.remove('bigImage.jpg')
os.remove('smallImage.jpg')
check_data = {'key': p_uuid, 'value': mouse_length}
return check_data
def get_sign(check_data, token):
check_url = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/image/checkImage'
base_header.update({'Content-Length': '60', 'token': token, 'Content-Type': 'application/json'})
try:
pic_sign = requests.post(check_url, json=check_data, headers=base_header).json()
sign = pic_sign['params']
except:
return -1
return sign
def get_beian_info(info_data, p_uuid, token, sign):
domain_list = []
info_url = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/icpAbbreviateInfo/queryByCondition'
base_header.update({'Content-Length': '78', 'uuid': p_uuid, 'token': token, 'sign': sign})
try:
beian_info = requests.post(url=info_url, json=info_data, headers=base_header).json()
if not beian_info["success"]:
# print(f'请求错误: CODE {beian_info["code"]} MSG {beian_info["msg"]}')
return domain_list
domain_total = beian_info['params']['total']
page_total = beian_info['params']['lastPage']
end_row = beian_info['params']['endRow']
info = info_data['unitName']
# print(f"\n查询对象:{info} 共有 {domain_total} 个已备案域名\n")
for i in range(0, page_total):
# print(f"正在查询第{i+1}页……\n")
for k in range(0, end_row + 1):
info_base = beian_info['params']['list'][k]
domain_name = info_base['domain']
domain_type = info_base['natureName']
domain_licence = info_base['mainLicence']
website_licence = info_base['serviceLicence']
domain_status = info_base['limitAccess']
domain_approve_date = info_base['updateRecordTime']
domain_owner = info_base['unitName']
try:
domain_content_approved = info_base['contentTypeName']
if domain_content_approved == "":
domain_content_approved = "无"
except KeyError:
domain_content_approved = "无"
row_data = domain_owner, domain_name, domain_licence, website_licence, domain_type, domain_content_approved, domain_status, domain_approve_date
domain_list.append(row_data)
info_data = {'pageNum': i + 2, 'pageSize': '40', 'unitName': info}
if beian_info['params']['isLastPage'] is not True:
beian_info = requests.post(info_url, json=info_data, headers=base_header).json()
end_row = beian_info['params']['endRow']
time.sleep(3)
except Exception as e:
# print(f"意外错误: {e}")
return domain_list
return domain_list
def main(val):
cookie = get_cookies()
info = query_base(val)
try:
global base_header
base_header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36 Edg/101.0.1210.32',
'Origin': 'https://beian.miit.gov.cn',
'Referer': 'https://beian.miit.gov.cn/',
'Cookie': f'__jsluid_s={cookie}'
}
# -1代表对应步骤失败了,不是-1则正常执行下一步
if cookie != -1:
token = get_token()
if token != -1:
check_data = get_check_pic(token)
if check_data != -1:
sign = get_sign(check_data, token)
p_uuid = check_data['key']
if sign != -1:
domain_list = get_beian_info(info, p_uuid, token, sign)
# ['域名主办方', '域名', '备案许可证号', '网站备案号', '域名类型', '网站前置审批项', '是否限制接入', '审核通过日期']
return domain_list
# data_saver(domain_list)
else:
raise ValueError("获取Sign遇到错误,请重试!")
else:
raise ValueError("计算图片缺口位置错误,请重试!")
else:
raise ValueError("获取Token失败,如频繁失败请关闭程序后等待几分钟再试!")
else:
cookie = get_cookies()
raise ValueError("获取Cookie失败,请重试!")
except Exception as e:
print(f'{e}\n')
if __name__ == '__main__':
key = ['域名主办方', '域名', '备案许可证号', '网站备案号', '域名类型', '网站前置审批项', '是否限制接入', '审核通过日期']
result = main(val="31idc.com")
data = list(map(lambda x:dict(zip(key,x)), result))
print(data)
Last updated