.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
searx.engines.quark Namespace Reference

Functions

 is_alibaba_captcha (html)
 init (_)
 request (query, params)
 response (resp)
 parse_addition (data)
 parse_ai_page (data)
 parse_baike_sc (data)
 parse_finance_shuidi (data)
 parse_kk_yidian_all (data)
 parse_life_show_general_image (data)
 parse_med_struct (data)
 parse_music_new_song (data)
 parse_nature_result (data)
 parse_news_uchq (data)
 parse_ss_doc (data)
 parse_ss_note (data)
 parse_travel_dest_overview (data)
 parse_travel_ranking_list (data)

Variables

dict about
list categories = []
bool paging = True
int results_per_page = 10
str quark_category = 'general'
bool time_range_support = True
dict time_range_dict = {'day': '4', 'week': '3', 'month': '2', 'year': '1'}
str CAPTCHA_PATTERN = r'\{[^{]*?"action"\s*:\s*"captcha"\s*,\s*"url"\s*:\s*"([^"]+)"[^{]*?\}'

Detailed Description

Quark (Shenma) search engine for searxng

Function Documentation

◆ init()

searx.engines.quark.init ( _)

Definition at line 46 of file quark.py.

46def init(_):
47 if quark_category not in ('general', 'images'):
48 raise SearxEngineAPIException(f"Unsupported category: {quark_category}")
49
50

◆ is_alibaba_captcha()

searx.engines.quark.is_alibaba_captcha ( html)
Detects if the response contains an Alibaba X5SEC CAPTCHA page.

Quark may return a CAPTCHA challenge after 9 requests in a short period.

Typically, the ban duration is around 15 minutes.

Definition at line 35 of file quark.py.

35def is_alibaba_captcha(html):
36 """
37 Detects if the response contains an Alibaba X5SEC CAPTCHA page.
38
39 Quark may return a CAPTCHA challenge after 9 requests in a short period.
40
41 Typically, the ban duration is around 15 minutes.
42 """
43 return bool(re.search(CAPTCHA_PATTERN, html))
44
45

Referenced by response().

Here is the caller graph for this function:

◆ parse_addition()

searx.engines.quark.parse_addition ( data)

Definition at line 162 of file quark.py.

162def parse_addition(data):
163 return {
164 "title": html_to_text(data.get('title', {}).get('content')),
165 "url": data.get('source', {}).get('url'),
166 "content": html_to_text(data.get('summary', {}).get('content')),
167 }
168
169

◆ parse_ai_page()

searx.engines.quark.parse_ai_page ( data)

Definition at line 170 of file quark.py.

170def parse_ai_page(data):
171 results = []
172 for item in data.get('list', []):
173 content = (
174 " | ".join(map(str, item.get('content', [])))
175 if isinstance(item.get('content'), list)
176 else str(item.get('content'))
177 )
178
179 try:
180 published_date = datetime.fromtimestamp(int(item.get('source', {}).get('time')))
181 except (ValueError, TypeError):
182 published_date = None
183
184 results.append(
185 {
186 "title": html_to_text(item.get('title')),
187 "url": item.get('url'),
188 "content": html_to_text(content),
189 "publishedDate": published_date,
190 }
191 )
192 return results
193
194

◆ parse_baike_sc()

searx.engines.quark.parse_baike_sc ( data)

Definition at line 195 of file quark.py.

195def parse_baike_sc(data):
196 return {
197 "title": html_to_text(data.get('data', {}).get('title')),
198 "url": data.get('data', {}).get('url'),
199 "content": html_to_text(data.get('data', {}).get('abstract')),
200 "thumbnail": data.get('data', {}).get('img').replace("http://", "https://"),
201 }
202
203

◆ parse_finance_shuidi()

searx.engines.quark.parse_finance_shuidi ( data)

Definition at line 204 of file quark.py.

204def parse_finance_shuidi(data):
205 content = " | ".join(
206 (
207 info
208 for info in [
209 data.get('establish_time'),
210 data.get('company_status'),
211 data.get('controled_type'),
212 data.get('company_type'),
213 data.get('capital'),
214 data.get('address'),
215 data.get('business_scope'),
216 ]
217 if info
218 )
219 )
220 return {
221 "title": html_to_text(data.get('company_name')),
222 "url": data.get('title_url'),
223 "content": html_to_text(content),
224 }
225
226

◆ parse_kk_yidian_all()

searx.engines.quark.parse_kk_yidian_all ( data)

Definition at line 227 of file quark.py.

227def parse_kk_yidian_all(data):
228 content_list = []
229 for section in data.get('list_container', []):
230 for item in section.get('list_container', []):
231 if 'dot_text' in item:
232 content_list.append(item['dot_text'])
233
234 return {
235 "title": html_to_text(data.get('title')),
236 "url": data.get('title_url'),
237 "content": html_to_text(' '.join(content_list)),
238 }
239
240

◆ parse_life_show_general_image()

searx.engines.quark.parse_life_show_general_image ( data)

Definition at line 241 of file quark.py.

241def parse_life_show_general_image(data):
242 results = []
243 for item in data.get('image', []):
244 try:
245 published_date = datetime.fromtimestamp(int(item.get("publish_time")))
246 except (ValueError, TypeError):
247 published_date = None
248
249 results.append(
250 {
251 "template": "images.html",
252 "url": item.get("imgUrl"),
253 "thumbnail_src": item.get("img"),
254 "img_src": item.get("bigPicUrl"),
255 "title": item.get("title"),
256 "source": item.get("site"),
257 "resolution": f"{item['width']} x {item['height']}",
258 "publishedDate": published_date,
259 }
260 )
261 return results
262
263

◆ parse_med_struct()

searx.engines.quark.parse_med_struct ( data)

Definition at line 264 of file quark.py.

264def parse_med_struct(data):
265 return {
266 "title": html_to_text(data.get('title')),
267 "url": data.get('message', {}).get('statistics', {}).get('nu'),
268 "content": html_to_text(data.get('message', {}).get('content_text')),
269 "thumbnail": data.get('message', {}).get('video_img').replace("http://", "https://"),
270 }
271
272

◆ parse_music_new_song()

searx.engines.quark.parse_music_new_song ( data)

Definition at line 273 of file quark.py.

273def parse_music_new_song(data):
274 results = []
275 for item in data.get('hit3', []):
276 results.append(
277 {
278 "title": f"{item['song_name']} | {item['song_singer']}",
279 "url": item.get("play_url"),
280 "content": html_to_text(item.get("lyrics")),
281 "thumbnail": item.get("image_url").replace("http://", "https://"),
282 }
283 )
284 return results
285
286

◆ parse_nature_result()

searx.engines.quark.parse_nature_result ( data)

Definition at line 287 of file quark.py.

287def parse_nature_result(data):
288 return {"title": html_to_text(data.get('title')), "url": data.get('url'), "content": html_to_text(data.get('desc'))}
289
290

◆ parse_news_uchq()

searx.engines.quark.parse_news_uchq ( data)

Definition at line 291 of file quark.py.

291def parse_news_uchq(data):
292 results = []
293 for item in data.get('feed', []):
294 try:
295 published_date = datetime.strptime(item.get('time'), "%Y-%m-%d")
296 except (ValueError, TypeError):
297 # Sometime Quark will return non-standard format like "1天前", set published_date as None
298 published_date = None
299
300 results.append(
301 {
302 "title": html_to_text(item.get('title')),
303 "url": item.get('url'),
304 "content": html_to_text(item.get('summary')),
305 "thumbnail": item.get('image').replace("http://", "https://"),
306 "publishedDate": published_date,
307 }
308 )
309 return results
310
311

◆ parse_ss_doc()

searx.engines.quark.parse_ss_doc ( data)

Definition at line 312 of file quark.py.

312def parse_ss_doc(data):
313 published_date = None
314 try:
315 timestamp = int(data.get('sourceProps', {}).get('time'))
316
317 # Sometime Quark will return 0, set published_date as None
318 if timestamp != 0:
319 published_date = datetime.fromtimestamp(timestamp)
320 except (ValueError, TypeError):
321 pass
322
323 try:
324 thumbnail = data.get('picListProps', [])[0].get('src').replace("http://", "https://")
325 except (ValueError, TypeError, IndexError):
326 thumbnail = None
327
328 return {
329 "title": html_to_text(
330 data.get('titleProps', {}).get('content')
331 # ss_kv variant 1 & 2
332 or data.get('title')
333 ),
334 "url": data.get('sourceProps', {}).get('dest_url')
335 # ss_kv variant 1
336 or data.get('normal_url')
337 # ss_kv variant 2
338 or data.get('url'),
339 "content": html_to_text(
340 data.get('summaryProps', {}).get('content')
341 # ss_doc variant 1
342 or data.get('message', {}).get('replyContent')
343 # ss_kv variant 1
344 or data.get('show_body')
345 # ss_kv variant 2
346 or data.get('desc')
347 ),
348 "publishedDate": published_date,
349 "thumbnail": thumbnail,
350 }
351
352

◆ parse_ss_note()

searx.engines.quark.parse_ss_note ( data)

Definition at line 353 of file quark.py.

353def parse_ss_note(data):
354 try:
355 published_date = datetime.fromtimestamp(int(data.get('source', {}).get('time')))
356 except (ValueError, TypeError):
357 published_date = None
358
359 return {
360 "title": html_to_text(data.get('title', {}).get('content')),
361 "url": data.get('source', {}).get('dest_url'),
362 "content": html_to_text(data.get('summary', {}).get('content')),
363 "publishedDate": published_date,
364 }
365
366

◆ parse_travel_dest_overview()

searx.engines.quark.parse_travel_dest_overview ( data)

Definition at line 367 of file quark.py.

367def parse_travel_dest_overview(data):
368 return {
369 "title": html_to_text(data.get('strong', {}).get('title')),
370 "url": data.get('strong', {}).get('baike_url'),
371 "content": html_to_text(data.get('strong', {}).get('baike_text')),
372 }
373
374

◆ parse_travel_ranking_list()

searx.engines.quark.parse_travel_ranking_list ( data)

Definition at line 375 of file quark.py.

375def parse_travel_ranking_list(data):
376 return {
377 "title": html_to_text(data.get('title', {}).get('text')),
378 "url": data.get('title', {}).get('url'),
379 "content": html_to_text(data.get('title', {}).get('title_tag')),
380 }

◆ request()

searx.engines.quark.request ( query,
params )

Definition at line 51 of file quark.py.

51def request(query, params):
52 page_num = params["pageno"]
53
54 category_config = {
55 'general': {
56 'endpoint': 'https://quark.sm.cn/s',
57 'params': {
58 "q": query,
59 "layout": "html",
60 "page": page_num,
61 },
62 },
63 'images': {
64 'endpoint': 'https://vt.sm.cn/api/pic/list',
65 'params': {
66 "query": query,
67 "limit": results_per_page,
68 "start": (page_num - 1) * results_per_page,
69 },
70 },
71 }
72
73 query_params = category_config[quark_category]['params']
74 query_url = category_config[quark_category]['endpoint']
75
76 if time_range_dict.get(params['time_range']) and quark_category == 'general':
77 query_params["tl_request"] = time_range_dict.get(params['time_range'])
78
79 params["url"] = f"{query_url}?{urlencode(query_params)}"
80 return params
81
82

◆ response()

searx.engines.quark.response ( resp)

Definition at line 83 of file quark.py.

83def response(resp):
84 results = []
85 text = resp.text
86
87 if is_alibaba_captcha(text):
88 raise SearxEngineCaptchaException(
89 suspended_time=900, message="Alibaba CAPTCHA detected. Please try again later."
90 )
91
92 if quark_category == 'images':
93 data = json.loads(text)
94 for item in data.get('data', {}).get('hit', {}).get('imgInfo', {}).get('item', []):
95 try:
96 published_date = datetime.fromtimestamp(int(item.get("publish_time")))
97 except (ValueError, TypeError):
98 published_date = None
99
100 results.append(
101 {
102 "template": "images.html",
103 "url": item.get("imgUrl"),
104 "thumbnail_src": item.get("img"),
105 "img_src": item.get("bigPicUrl"),
106 "title": item.get("title"),
107 "source": item.get("site"),
108 "resolution": f"{item['width']} x {item['height']}",
109 "publishedDate": published_date,
110 }
111 )
112
113 if quark_category == 'general':
114 # Quark returns a variety of different sc values on a single page, depending on the query type.
115 source_category_parsers = {
116 'addition': parse_addition,
117 'ai_page': parse_ai_page,
118 'baike_sc': parse_baike_sc,
119 'finance_shuidi': parse_finance_shuidi,
120 'kk_yidian_all': parse_kk_yidian_all,
121 'life_show_general_image': parse_life_show_general_image,
122 'med_struct': parse_med_struct,
123 'music_new_song': parse_music_new_song,
124 'nature_result': parse_nature_result,
125 'news_uchq': parse_news_uchq,
126 'ss_note': parse_ss_note,
127 # ss_kv, ss_pic, ss_text, ss_video, baike, structure_web_novel use the same struct as ss_doc
128 'ss_doc': parse_ss_doc,
129 'ss_kv': parse_ss_doc,
130 'ss_pic': parse_ss_doc,
131 'ss_text': parse_ss_doc,
132 'ss_video': parse_ss_doc,
133 'baike': parse_ss_doc,
134 'structure_web_novel': parse_ss_doc,
135 'travel_dest_overview': parse_travel_dest_overview,
136 'travel_ranking_list': parse_travel_ranking_list,
137 }
138
139 pattern = r'<script\s+type="application/json"\s+id="s-data-[^"]+"\s+data-used-by="hydrate">(.*?)</script>'
140 matches = re.findall(pattern, text, re.DOTALL)
141
142 for match in matches:
143 data = json.loads(match)
144 initial_data = data.get('data', {}).get('initialData', {})
145 extra_data = data.get('extraData', {})
146
147 source_category = extra_data.get('sc')
148
149 parsers = source_category_parsers.get(source_category)
150 if parsers:
151 parsed_results = parsers(initial_data)
152 if isinstance(parsed_results, list):
153 # Extend if the result is a list
154 results.extend(parsed_results)
155 else:
156 # Append if it's a single result
157 results.append(parsed_results)
158
159 return results
160
161

References is_alibaba_captcha().

Here is the call graph for this function:

Variable Documentation

◆ about

dict searx.engines.quark.about
Initial value:
1= {
2 "website": "https://quark.sm.cn/",
3 "wikidata_id": "Q48816502",
4 "use_official_api": False,
5 "require_api_key": False,
6 "results": "HTML",
7 "language": "zh",
8}

Definition at line 13 of file quark.py.

◆ CAPTCHA_PATTERN

str searx.engines.quark.CAPTCHA_PATTERN = r'\{[^{]*?"action"\s*:\s*"captcha"\s*,\s*"url"\s*:\s*"([^"]+)"[^{]*?\}'

Definition at line 32 of file quark.py.

◆ categories

list searx.engines.quark.categories = []

Definition at line 23 of file quark.py.

◆ paging

bool searx.engines.quark.paging = True

Definition at line 24 of file quark.py.

◆ quark_category

str searx.engines.quark.quark_category = 'general'

Definition at line 27 of file quark.py.

◆ results_per_page

int searx.engines.quark.results_per_page = 10

Definition at line 25 of file quark.py.

◆ time_range_dict

dict searx.engines.quark.time_range_dict = {'day': '4', 'week': '3', 'month': '2', 'year': '1'}

Definition at line 30 of file quark.py.

◆ time_range_support

bool searx.engines.quark.time_range_support = True

Definition at line 29 of file quark.py.