.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
searx.engines.quark Namespace Reference

Functions

 is_alibaba_captcha (html)
 
 init (_)
 
 request (query, params)
 
 response (resp)
 
 parse_addition (data)
 
 parse_ai_page (data)
 
 parse_baike_sc (data)
 
 parse_finance_shuidi (data)
 
 parse_kk_yidian_all (data)
 
 parse_life_show_general_image (data)
 
 parse_med_struct (data)
 
 parse_music_new_song (data)
 
 parse_nature_result (data)
 
 parse_news_uchq (data)
 
 parse_ss_doc (data)
 
 parse_ss_note (data)
 
 parse_travel_dest_overview (data)
 
 parse_travel_ranking_list (data)
 

Variables

dict about
 
list categories = []
 
bool paging = True
 
int results_per_page = 10
 
str quark_category = 'general'
 
bool time_range_support = True
 
dict time_range_dict = {'day': '4', 'week': '3', 'month': '2', 'year': '1'}
 
str CAPTCHA_PATTERN = r'\{[^{]*?"action"\s*:\s*"captcha"\s*,\s*"url"\s*:\s*"([^"]+)"[^{]*?\}'
 

Detailed Description

Quark (Shenma) search engine for searxng

Function Documentation

◆ init()

searx.engines.quark.init ( _)

Definition at line 46 of file quark.py.

46def init(_):
47 if quark_category not in ('general', 'images'):
48 raise SearxEngineAPIException(f"Unsupported category: {quark_category}")
49
50

◆ is_alibaba_captcha()

searx.engines.quark.is_alibaba_captcha ( html)
Detects if the response contains an Alibaba X5SEC CAPTCHA page.

Quark may return a CAPTCHA challenge after 9 requests in a short period.

Typically, the ban duration is around 15 minutes.

Definition at line 35 of file quark.py.

35def is_alibaba_captcha(html):
36 """
37 Detects if the response contains an Alibaba X5SEC CAPTCHA page.
38
39 Quark may return a CAPTCHA challenge after 9 requests in a short period.
40
41 Typically, the ban duration is around 15 minutes.
42 """
43 return bool(re.search(CAPTCHA_PATTERN, html))
44
45

Referenced by response().

+ Here is the caller graph for this function:

◆ parse_addition()

searx.engines.quark.parse_addition ( data)

Definition at line 165 of file quark.py.

165def parse_addition(data):
166 return {
167 "title": html_to_text(data.get('title', {}).get('content')),
168 "url": data.get('source', {}).get('url'),
169 "content": html_to_text(data.get('summary', {}).get('content')),
170 }
171
172

◆ parse_ai_page()

searx.engines.quark.parse_ai_page ( data)

Definition at line 173 of file quark.py.

173def parse_ai_page(data):
174 results = []
175 for item in data.get('list', []):
176 content = (
177 " | ".join(map(str, item.get('content', [])))
178 if isinstance(item.get('content'), list)
179 else str(item.get('content'))
180 )
181
182 try:
183 published_date = datetime.fromtimestamp(int(item.get('source', {}).get('time')))
184 except (ValueError, TypeError):
185 published_date = None
186
187 results.append(
188 {
189 "title": html_to_text(item.get('title')),
190 "url": item.get('url'),
191 "content": html_to_text(content),
192 "publishedDate": published_date,
193 }
194 )
195 return results
196
197

◆ parse_baike_sc()

searx.engines.quark.parse_baike_sc ( data)

Definition at line 198 of file quark.py.

198def parse_baike_sc(data):
199 return {
200 "title": html_to_text(data.get('data', {}).get('title')),
201 "url": data.get('data', {}).get('url'),
202 "content": html_to_text(data.get('data', {}).get('abstract')),
203 "thumbnail": data.get('data', {}).get('img').replace("http://", "https://"),
204 }
205
206

◆ parse_finance_shuidi()

searx.engines.quark.parse_finance_shuidi ( data)

Definition at line 207 of file quark.py.

207def parse_finance_shuidi(data):
208 content = " | ".join(
209 (
210 info
211 for info in [
212 data.get('establish_time'),
213 data.get('company_status'),
214 data.get('controled_type'),
215 data.get('company_type'),
216 data.get('capital'),
217 data.get('address'),
218 data.get('business_scope'),
219 ]
220 if info
221 )
222 )
223 return {
224 "title": html_to_text(data.get('company_name')),
225 "url": data.get('title_url'),
226 "content": html_to_text(content),
227 }
228
229

◆ parse_kk_yidian_all()

searx.engines.quark.parse_kk_yidian_all ( data)

Definition at line 230 of file quark.py.

230def parse_kk_yidian_all(data):
231 content_list = []
232 for section in data.get('list_container', []):
233 for item in section.get('list_container', []):
234 if 'dot_text' in item:
235 content_list.append(item['dot_text'])
236
237 return {
238 "title": html_to_text(data.get('title')),
239 "url": data.get('title_url'),
240 "content": html_to_text(' '.join(content_list)),
241 }
242
243

◆ parse_life_show_general_image()

searx.engines.quark.parse_life_show_general_image ( data)

Definition at line 244 of file quark.py.

244def parse_life_show_general_image(data):
245 results = []
246 for item in data.get('image', []):
247 try:
248 published_date = datetime.fromtimestamp(int(item.get("publish_time")))
249 except (ValueError, TypeError):
250 published_date = None
251
252 results.append(
253 {
254 "template": "images.html",
255 "url": item.get("imgUrl"),
256 "thumbnail_src": item.get("img"),
257 "img_src": item.get("bigPicUrl"),
258 "title": item.get("title"),
259 "source": item.get("site"),
260 "resolution": f"{item['width']} x {item['height']}",
261 "publishedDate": published_date,
262 }
263 )
264 return results
265
266

◆ parse_med_struct()

searx.engines.quark.parse_med_struct ( data)

Definition at line 267 of file quark.py.

267def parse_med_struct(data):
268 return {
269 "title": html_to_text(data.get('title')),
270 "url": data.get('message', {}).get('statistics', {}).get('nu'),
271 "content": html_to_text(data.get('message', {}).get('content_text')),
272 "thumbnail": data.get('message', {}).get('video_img').replace("http://", "https://"),
273 }
274
275

◆ parse_music_new_song()

searx.engines.quark.parse_music_new_song ( data)

Definition at line 276 of file quark.py.

276def parse_music_new_song(data):
277 results = []
278 for item in data.get('hit3', []):
279 results.append(
280 {
281 "title": f"{item['song_name']} | {item['song_singer']}",
282 "url": item.get("play_url"),
283 "content": html_to_text(item.get("lyrics")),
284 "thumbnail": item.get("image_url").replace("http://", "https://"),
285 }
286 )
287 return results
288
289

◆ parse_nature_result()

searx.engines.quark.parse_nature_result ( data)

Definition at line 290 of file quark.py.

290def parse_nature_result(data):
291 return {"title": html_to_text(data.get('title')), "url": data.get('url'), "content": html_to_text(data.get('desc'))}
292
293

◆ parse_news_uchq()

searx.engines.quark.parse_news_uchq ( data)

Definition at line 294 of file quark.py.

294def parse_news_uchq(data):
295 results = []
296 for item in data.get('feed', []):
297 try:
298 published_date = datetime.strptime(item.get('time'), "%Y-%m-%d")
299 except (ValueError, TypeError):
300 # Sometime Quark will return non-standard format like "1天前", set published_date as None
301 published_date = None
302
303 results.append(
304 {
305 "title": html_to_text(item.get('title')),
306 "url": item.get('url'),
307 "content": html_to_text(item.get('summary')),
308 "thumbnail": item.get('image').replace("http://", "https://"),
309 "publishedDate": published_date,
310 }
311 )
312 return results
313
314

◆ parse_ss_doc()

searx.engines.quark.parse_ss_doc ( data)

Definition at line 315 of file quark.py.

315def parse_ss_doc(data):
316 published_date = None
317 try:
318 timestamp = int(data.get('sourceProps', {}).get('time'))
319
320 # Sometime Quark will return 0, set published_date as None
321 if timestamp != 0:
322 published_date = datetime.fromtimestamp(timestamp)
323 except (ValueError, TypeError):
324 pass
325
326 try:
327 thumbnail = data.get('picListProps', [])[0].get('src').replace("http://", "https://")
328 except (ValueError, TypeError, IndexError):
329 thumbnail = None
330
331 return {
332 "title": html_to_text(
333 data.get('titleProps', {}).get('content')
334 # ss_kv variant 1 & 2
335 or data.get('title')
336 ),
337 "url": data.get('sourceProps', {}).get('dest_url')
338 # ss_kv variant 1
339 or data.get('normal_url')
340 # ss_kv variant 2
341 or data.get('url'),
342 "content": html_to_text(
343 data.get('summaryProps', {}).get('content')
344 # ss_doc variant 1
345 or data.get('message', {}).get('replyContent')
346 # ss_kv variant 1
347 or data.get('show_body')
348 # ss_kv variant 2
349 or data.get('desc')
350 ),
351 "publishedDate": published_date,
352 "thumbnail": thumbnail,
353 }
354
355

◆ parse_ss_note()

searx.engines.quark.parse_ss_note ( data)

Definition at line 356 of file quark.py.

356def parse_ss_note(data):
357 try:
358 published_date = datetime.fromtimestamp(int(data.get('source', {}).get('time')))
359 except (ValueError, TypeError):
360 published_date = None
361
362 return {
363 "title": html_to_text(data.get('title', {}).get('content')),
364 "url": data.get('source', {}).get('dest_url'),
365 "content": html_to_text(data.get('summary', {}).get('content')),
366 "publishedDate": published_date,
367 }
368
369

◆ parse_travel_dest_overview()

searx.engines.quark.parse_travel_dest_overview ( data)

Definition at line 370 of file quark.py.

370def parse_travel_dest_overview(data):
371 return {
372 "title": html_to_text(data.get('strong', {}).get('title')),
373 "url": data.get('strong', {}).get('baike_url'),
374 "content": html_to_text(data.get('strong', {}).get('baike_text')),
375 }
376
377

◆ parse_travel_ranking_list()

searx.engines.quark.parse_travel_ranking_list ( data)

Definition at line 378 of file quark.py.

378def parse_travel_ranking_list(data):
379 return {
380 "title": html_to_text(data.get('title', {}).get('text')),
381 "url": data.get('title', {}).get('url'),
382 "content": html_to_text(data.get('title', {}).get('title_tag')),
383 }

◆ request()

searx.engines.quark.request ( query,
params )

Definition at line 51 of file quark.py.

51def request(query, params):
52 page_num = params["pageno"]
53
54 category_config = {
55 'general': {
56 'endpoint': 'https://m.quark.cn/s',
57 'params': {
58 "q": query,
59 "layout": "html",
60 "page": page_num,
61 },
62 },
63 'images': {
64 'endpoint': 'https://vt.sm.cn/api/pic/list',
65 'params': {
66 "query": query,
67 "limit": results_per_page,
68 "start": (page_num - 1) * results_per_page,
69 },
70 },
71 }
72
73 query_params = category_config[quark_category]['params']
74 query_url = category_config[quark_category]['endpoint']
75
76 if time_range_dict.get(params['time_range']) and quark_category == 'general':
77 query_params["tl_request"] = time_range_dict.get(params['time_range'])
78
79 params["url"] = f"{query_url}?{urlencode(query_params)}"
80 params["headers"] = {
81 "User-Agent": gen_useragent(),
82 }
83 return params
84
85

◆ response()

searx.engines.quark.response ( resp)

Definition at line 86 of file quark.py.

86def response(resp):
87 results = []
88 text = resp.text
89
90 if is_alibaba_captcha(text):
91 raise SearxEngineCaptchaException(
92 suspended_time=900, message="Alibaba CAPTCHA detected. Please try again later."
93 )
94
95 if quark_category == 'images':
96 data = json.loads(text)
97 for item in data.get('data', {}).get('hit', {}).get('imgInfo', {}).get('item', []):
98 try:
99 published_date = datetime.fromtimestamp(int(item.get("publish_time")))
100 except (ValueError, TypeError):
101 published_date = None
102
103 results.append(
104 {
105 "template": "images.html",
106 "url": item.get("imgUrl"),
107 "thumbnail_src": item.get("img"),
108 "img_src": item.get("bigPicUrl"),
109 "title": item.get("title"),
110 "source": item.get("site"),
111 "resolution": f"{item['width']} x {item['height']}",
112 "publishedDate": published_date,
113 }
114 )
115
116 if quark_category == 'general':
117 # Quark returns a variety of different sc values on a single page, depending on the query type.
118 source_category_parsers = {
119 'addition': parse_addition,
120 'ai_page': parse_ai_page,
121 'baike_sc': parse_baike_sc,
122 'finance_shuidi': parse_finance_shuidi,
123 'kk_yidian_all': parse_kk_yidian_all,
124 'life_show_general_image': parse_life_show_general_image,
125 'med_struct': parse_med_struct,
126 'music_new_song': parse_music_new_song,
127 'nature_result': parse_nature_result,
128 'news_uchq': parse_news_uchq,
129 'ss_note': parse_ss_note,
130 # ss_kv, ss_pic, ss_text, ss_video, baike, structure_web_novel use the same struct as ss_doc
131 'ss_doc': parse_ss_doc,
132 'ss_kv': parse_ss_doc,
133 'ss_pic': parse_ss_doc,
134 'ss_text': parse_ss_doc,
135 'ss_video': parse_ss_doc,
136 'baike': parse_ss_doc,
137 'structure_web_novel': parse_ss_doc,
138 'travel_dest_overview': parse_travel_dest_overview,
139 'travel_ranking_list': parse_travel_ranking_list,
140 }
141
142 pattern = r'<script\s+type="application/json"\s+id="s-data-[^"]+"\s+data-used-by="hydrate">(.*?)</script>'
143 matches = re.findall(pattern, text, re.DOTALL)
144
145 for match in matches:
146 data = json.loads(match)
147 initial_data = data.get('data', {}).get('initialData', {})
148 extra_data = data.get('extraData', {})
149
150 source_category = extra_data.get('sc')
151
152 parsers = source_category_parsers.get(source_category)
153 if parsers:
154 parsed_results = parsers(initial_data)
155 if isinstance(parsed_results, list):
156 # Extend if the result is a list
157 results.extend(parsed_results)
158 else:
159 # Append if it's a single result
160 results.append(parsed_results)
161
162 return results
163
164

References is_alibaba_captcha().

+ Here is the call graph for this function:

Variable Documentation

◆ about

dict searx.engines.quark.about
Initial value:
1= {
2 "website": "https://m.quark.cn/",
3 "wikidata_id": "Q48816502",
4 "use_official_api": False,
5 "require_api_key": False,
6 "results": "HTML",
7 "language": "zh",
8}

Definition at line 13 of file quark.py.

◆ CAPTCHA_PATTERN

str searx.engines.quark.CAPTCHA_PATTERN = r'\{[^{]*?"action"\s*:\s*"captcha"\s*,\s*"url"\s*:\s*"([^"]+)"[^{]*?\}'

Definition at line 32 of file quark.py.

◆ categories

list searx.engines.quark.categories = []

Definition at line 23 of file quark.py.

◆ paging

bool searx.engines.quark.paging = True

Definition at line 24 of file quark.py.

◆ quark_category

str searx.engines.quark.quark_category = 'general'

Definition at line 27 of file quark.py.

◆ results_per_page

int searx.engines.quark.results_per_page = 10

Definition at line 25 of file quark.py.

◆ time_range_dict

dict searx.engines.quark.time_range_dict = {'day': '4', 'week': '3', 'month': '2', 'year': '1'}

Definition at line 30 of file quark.py.

◆ time_range_support

bool searx.engines.quark.time_range_support = True

Definition at line 29 of file quark.py.