.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
acfun.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""Acfun search engine for searxng"""
3
4from urllib.parse import urlencode
5import re
6import json
7from datetime import datetime, timedelta
8from lxml import html
9
10from searx.utils import extract_text
11
12# Metadata
13about = {
14 "website": "https://www.acfun.cn/",
15 "wikidata_id": "Q3077675",
16 "use_official_api": False,
17 "require_api_key": False,
18 "results": "HTML",
19 "language": "zh",
20}
21
22# Engine Configuration
23categories = ["videos"]
24paging = True
25
26# Base URL
27base_url = "https://www.acfun.cn"
28
29
30def request(query, params):
31 query_params = {"keyword": query, "pCursor": params["pageno"]}
32 params["url"] = f"{base_url}/search?{urlencode(query_params)}"
33 return params
34
35
36def response(resp):
37 results = []
38
39 matches = re.findall(r'bigPipe\.onPageletArrive\‍((\{.*?\})\‍);', resp.text, re.DOTALL)
40 if not matches:
41 return results
42
43 for match in matches:
44 try:
45 json_data = json.loads(match)
46 raw_html = json_data.get("html", "")
47 if not raw_html:
48 continue
49
50 tree = html.fromstring(raw_html)
51
52 video_blocks = tree.xpath('//div[contains(@class, "search-video")]')
53 if not video_blocks:
54 continue
55
56 for video_block in video_blocks:
57 video_info = extract_video_data(video_block)
58 if video_info and video_info["title"] and video_info["url"]:
59 results.append(video_info)
60
61 except json.JSONDecodeError:
62 continue
63
64 return results
65
66
67def extract_video_data(video_block):
68 try:
69 data_exposure_log = video_block.get('data-exposure-log')
70 video_data = json.loads(data_exposure_log)
71
72 content_id = video_data.get("content_id", "")
73 title = video_data.get("title", "")
74
75 url = f"{base_url}/v/ac{content_id}"
76 iframe_src = f"{base_url}/player/ac{content_id}"
77
78 create_time = extract_text(video_block.xpath('.//span[contains(@class, "info__create-time")]'))
79 video_cover = extract_text(video_block.xpath('.//div[contains(@class, "video__cover")]/a/img/@src')[0])
80 video_duration = extract_text(video_block.xpath('.//span[contains(@class, "video__duration")]'))
81 video_intro = extract_text(video_block.xpath('.//div[contains(@class, "video__main__intro")]'))
82
83 published_date = None
84 if create_time:
85 try:
86 published_date = datetime.strptime(create_time.strip(), "%Y-%m-%d")
87 except (ValueError, TypeError):
88 pass
89
90 length = None
91 if video_duration:
92 try:
93 timediff = datetime.strptime(video_duration.strip(), "%M:%S")
94 length = timedelta(minutes=timediff.minute, seconds=timediff.second)
95 except (ValueError, TypeError):
96 pass
97
98 return {
99 "title": title,
100 "url": url,
101 "content": video_intro,
102 "thumbnail": video_cover,
103 "length": length,
104 "publishedDate": published_date,
105 "iframe_src": iframe_src,
106 }
107
108 except (json.JSONDecodeError, AttributeError, TypeError, ValueError):
109 return None
request(query, params)
Definition acfun.py:30
extract_video_data(video_block)
Definition acfun.py:67