52 page_num = params[
"pageno"]
56 'endpoint':
'https://m.quark.cn/s',
64 'endpoint':
'https://vt.sm.cn/api/pic/list',
67 "limit": results_per_page,
68 "start": (page_num - 1) * results_per_page,
73 query_params = category_config[quark_category][
'params']
74 query_url = category_config[quark_category][
'endpoint']
76 if time_range_dict.get(params[
'time_range'])
and quark_category ==
'general':
77 query_params[
"tl_request"] = time_range_dict.get(params[
'time_range'])
79 params[
"url"] = f
"{query_url}?{urlencode(query_params)}"
81 "User-Agent": gen_useragent(),
92 suspended_time=900, message=
"Alibaba CAPTCHA detected. Please try again later."
95 if quark_category ==
'images':
96 data = json.loads(text)
97 for item
in data.get(
'data', {}).get(
'hit', {}).get(
'imgInfo', {}).get(
'item', []):
99 published_date = datetime.fromtimestamp(int(item.get(
"publish_time")))
100 except (ValueError, TypeError):
101 published_date =
None
105 "template":
"images.html",
106 "url": item.get(
"imgUrl"),
107 "thumbnail_src": item.get(
"img"),
108 "img_src": item.get(
"bigPicUrl"),
109 "title": item.get(
"title"),
110 "source": item.get(
"site"),
111 "resolution": f
"{item['width']} x {item['height']}",
112 "publishedDate": published_date,
116 if quark_category ==
'general':
118 source_category_parsers = {
119 'addition': parse_addition,
120 'ai_page': parse_ai_page,
121 'baike_sc': parse_baike_sc,
122 'finance_shuidi': parse_finance_shuidi,
123 'kk_yidian_all': parse_kk_yidian_all,
124 'life_show_general_image': parse_life_show_general_image,
125 'med_struct': parse_med_struct,
126 'music_new_song': parse_music_new_song,
127 'nature_result': parse_nature_result,
128 'news_uchq': parse_news_uchq,
129 'ss_note': parse_ss_note,
131 'ss_doc': parse_ss_doc,
132 'ss_kv': parse_ss_doc,
133 'ss_pic': parse_ss_doc,
134 'ss_text': parse_ss_doc,
135 'ss_video': parse_ss_doc,
136 'baike': parse_ss_doc,
137 'structure_web_novel': parse_ss_doc,
138 'travel_dest_overview': parse_travel_dest_overview,
139 'travel_ranking_list': parse_travel_ranking_list,
142 pattern =
r'<script\s+type="application/json"\s+id="s-data-[^"]+"\s+data-used-by="hydrate">(.*?)</script>'
143 matches = re.findall(pattern, text, re.DOTALL)
145 for match
in matches:
146 data = json.loads(match)
147 initial_data = data.get(
'data', {}).get(
'initialData', {})
148 extra_data = data.get(
'extraData', {})
150 source_category = extra_data.get(
'sc')
152 parsers = source_category_parsers.get(source_category)
154 parsed_results = parsers(initial_data)
155 if isinstance(parsed_results, list):
157 results.extend(parsed_results)
160 results.append(parsed_results)