.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
soundcloud.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""SoundCloud is a German audio streaming service."""
3
4import re
5import datetime
6
7from urllib.parse import quote_plus, urlencode
8
9from dateutil import parser
10from lxml import html
11
12from searx.network import get as http_get
13from searx.enginelib import EngineCache
14
15about = {
16 "website": "https://soundcloud.com",
17 "wikidata_id": "Q568769",
18 "official_api_documentation": "https://developers.soundcloud.com/docs/api/guide",
19 "use_official_api": False,
20 "require_api_key": False,
21 "results": 'JSON',
22}
23
24categories = ["music"]
25paging = True
26
27search_url = "https://api-v2.soundcloud.com/search"
28"""This is not the official (developer) url, it is the API which is used by the
29HTML frontend of the common WEB site.
30"""
31
32cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U)
33results_per_page = 10
34
35soundcloud_facet = "model"
36
37app_locale_map = {
38 "de": "de",
39 "en": "en",
40 "es": "es",
41 "fr": "fr",
42 "oc": "fr",
43 "it": "it",
44 "nl": "nl",
45 "pl": "pl",
46 "szl": "pl",
47 "pt": "pt_BR",
48 "pap": "pt_BR",
49 "sv": "sv",
50}
51
52CACHE: EngineCache
53"""Persistent (SQLite) key/value cache that deletes its values after ``expire``
54seconds."""
55
56
57def request(query, params):
58
59 # missing attributes: user_id, app_version
60 # - user_id=451561-497874-703312-310156
61 # - app_version=1740727428
62
63 guest_client_id = CACHE.get("guest_client_id")
64 if guest_client_id is None:
65 guest_client_id = get_client_id()
66 if guest_client_id:
67 CACHE.set(key="guest_client_id", value=guest_client_id)
68
69 args = {
70 "q": query,
71 "offset": (params['pageno'] - 1) * results_per_page,
72 "limit": results_per_page,
73 "facet": soundcloud_facet,
74 "client_id": guest_client_id,
75 "app_locale": app_locale_map.get(params["language"].split("-")[0], "en"),
76 }
77
78 params['url'] = f"{search_url}?{urlencode(args)}"
79 return params
80
81
82def response(resp):
83 results = []
84 data = resp.json()
85
86 for result in data.get("collection", []):
87
88 if result["kind"] in ("track", "playlist"):
89 url = result.get("permalink_url")
90 if not url:
91 continue
92 uri = quote_plus(result.get("uri"))
93 content = [
94 result.get("description"),
95 result.get("label_name"),
96 ]
97 res = {
98 "url": url,
99 "title": result["title"],
100 "content": " / ".join([c for c in content if c]),
101 "publishedDate": parser.parse(result["last_modified"]),
102 "iframe_src": "https://w.soundcloud.com/player/?url=" + uri,
103 "views": result.get("likes_count"),
104 }
105 thumbnail = result["artwork_url"] or result["user"]["avatar_url"]
106 res["thumbnail"] = thumbnail or None
107 length = int(result.get("duration", 0) / 1000)
108 if length:
109 length = datetime.timedelta(seconds=length)
110 res["length"] = length
111 res["views"] = result.get("playback_count", 0) or None
112 res["author"] = result.get("user", {}).get("full_name") or None
113 results.append(res)
114
115 return results
116
117
118def init(engine_settings): # pylint: disable=unused-argument
119 global CACHE # pylint: disable=global-statement
120 CACHE = EngineCache(engine_settings["name"]) # type:ignore
121
122
123def get_client_id() -> str | None:
124
125 client_id = ""
126 url = "https://soundcloud.com"
127 resp = http_get(url, timeout=10)
128
129 if not resp.ok:
130 logger.error("init: GET %s failed", url)
131 return client_id
132
133 tree = html.fromstring(resp.content)
134 script_tags = tree.xpath("//script[contains(@src, '/assets/')]")
135 app_js_urls = [tag.get("src") for tag in script_tags if tag is not None]
136
137 # extracts valid app_js urls from soundcloud.com content
138
139 for url in app_js_urls[::-1]:
140
141 # gets app_js and search for the client_id
142 resp = http_get(url)
143
144 if not resp.ok:
145 logger.error("init: app_js GET %s failed", url)
146 continue
147
148 cids = cid_re.search(resp.content.decode())
149 if cids and len(cids.groups()):
150 client_id = cids.groups()[0]
151 break
152
153 if client_id:
154 logger.info("using client_id '%s' for soundclud queries", client_id)
155 else:
156 logger.warning("missing valid client_id for soundclud queries")
157 return client_id or None
request(query, params)
Definition soundcloud.py:57
init(engine_settings)