.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
searx.engines.baidu Namespace Reference

Functions

 init (_)
 
 request (query, params)
 
 response (resp)
 
 parse_general (data)
 
 parse_images (data)
 
 parse_it (data)
 

Variables

dict about
 
bool paging = True
 
list categories = []
 
int results_per_page = 10
 
str baidu_category = 'general'
 
bool time_range_support = True
 
dict time_range_dict = {"day": 86400, "week": 604800, "month": 2592000, "year": 31536000}
 

Detailed Description

Baidu_

.. _Baidu: https://www.baidu.com

Function Documentation

◆ init()

searx.engines.baidu.init ( _)

Definition at line 39 of file baidu.py.

39def init(_):
40 if baidu_category not in ('general', 'images', 'it'):
41 raise SearxEngineAPIException(f"Unsupported category: {baidu_category}")
42
43

◆ parse_general()

searx.engines.baidu.parse_general ( data)

Definition at line 107 of file baidu.py.

107def parse_general(data):
108 results = []
109 if not data.get("feed", {}).get("entry"):
110 raise SearxEngineAPIException("Invalid response")
111
112 for entry in data["feed"]["entry"]:
113 if not entry.get("title") or not entry.get("url"):
114 continue
115
116 published_date = None
117 if entry.get("time"):
118 try:
119 published_date = datetime.fromtimestamp(entry["time"])
120 except (ValueError, TypeError):
121 published_date = None
122
123 # title and content sometimes containing characters such as & ' " etc...
124 title = unescape(entry["title"])
125 content = unescape(entry.get("abs", ""))
126
127 results.append(
128 {
129 "title": title,
130 "url": entry["url"],
131 "content": content,
132 "publishedDate": published_date,
133 }
134 )
135 return results
136
137

◆ parse_images()

searx.engines.baidu.parse_images ( data)

Definition at line 138 of file baidu.py.

138def parse_images(data):
139 results = []
140 if "data" in data:
141 for item in data["data"]:
142 if not item:
143 # the last item in the JSON list is empty, the JSON string ends with "}, {}]"
144 continue
145 replace_url = item.get("replaceUrl", [{}])[0]
146 width = item.get("width")
147 height = item.get("height")
148 img_date = item.get("bdImgnewsDate")
149 publishedDate = None
150 if img_date:
151 publishedDate = datetime.strptime(img_date, "%Y-%m-%d %H:%M")
152 results.append(
153 {
154 "template": "images.html",
155 "url": replace_url.get("FromURL"),
156 "thumbnail_src": item.get("thumbURL"),
157 "img_src": replace_url.get("ObjURL"),
158 "title": html_to_text(item.get("fromPageTitle")),
159 "source": item.get("fromURLHost"),
160 "resolution": f"{width} x {height}",
161 "img_format": item.get("type"),
162 "filesize": item.get("filesize"),
163 "publishedDate": publishedDate,
164 }
165 )
166 return results
167
168

◆ parse_it()

searx.engines.baidu.parse_it ( data)

Definition at line 169 of file baidu.py.

169def parse_it(data):
170 results = []
171 if not data.get("data", {}).get("documents", {}).get("data"):
172 raise SearxEngineAPIException("Invalid response")
173
174 for entry in data["data"]["documents"]["data"]:
175 results.append(
176 {
177 'title': entry["techDocDigest"]["title"],
178 'url': entry["techDocDigest"]["url"],
179 'content': entry["techDocDigest"]["summary"],
180 }
181 )
182 return results

◆ request()

searx.engines.baidu.request ( query,
params )

Definition at line 44 of file baidu.py.

44def request(query, params):
45 page_num = params["pageno"]
46
47 category_config = {
48 'general': {
49 'endpoint': 'https://www.baidu.com/s',
50 'params': {
51 "wd": query,
52 "rn": results_per_page,
53 "pn": (page_num - 1) * results_per_page,
54 "tn": "json",
55 },
56 },
57 'images': {
58 'endpoint': 'https://image.baidu.com/search/acjson',
59 'params': {
60 "word": query,
61 "rn": results_per_page,
62 "pn": (page_num - 1) * results_per_page,
63 "tn": "resultjson_com",
64 },
65 },
66 'it': {
67 'endpoint': 'https://kaifa.baidu.com/rest/v1/search',
68 'params': {
69 "wd": query,
70 "pageSize": results_per_page,
71 "pageNum": page_num,
72 "paramList": f"page_num={page_num},page_size={results_per_page}",
73 "position": 0,
74 },
75 },
76 }
77
78 query_params = category_config[baidu_category]['params']
79 query_url = category_config[baidu_category]['endpoint']
80
81 if params.get("time_range") in time_range_dict:
82 now = int(time.time())
83 past = now - time_range_dict[params["time_range"]]
84
85 if baidu_category == 'general':
86 query_params["gpc"] = f"stf={past},{now}|stftype=1"
87
88 if baidu_category == 'it':
89 query_params["paramList"] += f",timestamp_range={past}-{now}"
90
91 params["url"] = f"{query_url}?{urlencode(query_params)}"
92 return params
93
94

◆ response()

searx.engines.baidu.response ( resp)

Definition at line 95 of file baidu.py.

95def response(resp):
96
97 text = resp.text
98 if baidu_category == 'images':
99 # baidu's JSON encoder wrongly quotes / and ' characters by \\ and \'
100 text = text.replace(r"\/", "/").replace(r"\'", "'")
101 data = json.loads(text, strict=False)
102 parsers = {'general': parse_general, 'images': parse_images, 'it': parse_it}
103
104 return parsers[baidu_category](data)
105
106

Variable Documentation

◆ about

dict searx.engines.baidu.about
Initial value:
1= {
2 "website": "https://www.baidu.com",
3 "wikidata_id": "Q14772",
4 "official_api_documentation": None,
5 "use_official_api": False,
6 "require_api_key": False,
7 "results": "JSON",
8 "language": "zh",
9}

Definition at line 19 of file baidu.py.

◆ baidu_category

str searx.engines.baidu.baidu_category = 'general'

Definition at line 33 of file baidu.py.

◆ categories

list searx.engines.baidu.categories = []

Definition at line 30 of file baidu.py.

◆ paging

bool searx.engines.baidu.paging = True

Definition at line 29 of file baidu.py.

◆ results_per_page

int searx.engines.baidu.results_per_page = 10

Definition at line 31 of file baidu.py.

◆ time_range_dict

dict searx.engines.baidu.time_range_dict = {"day": 86400, "week": 604800, "month": 2592000, "year": 31536000}

Definition at line 36 of file baidu.py.

◆ time_range_support

bool searx.engines.baidu.time_range_support = True

Definition at line 35 of file baidu.py.