.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
soundcloud.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""SoundCloud is a German audio streaming service."""
3
4import re
5from urllib.parse import quote_plus, urlencode
6import datetime
7
8from dateutil import parser
9from lxml import html
10
11from searx.network import get as http_get
12
13about = {
14 "website": "ttps://soundcloud.com",
15 "wikidata_id": "Q568769",
16 "official_api_documentation": "https://developers.soundcloud.com/docs/api/guide",
17 "use_official_api": False,
18 "require_api_key": False,
19 "results": 'JSON',
20}
21
22categories = ["music"]
23paging = True
24
25search_url = "https://api-v2.soundcloud.com/search"
26"""This is not the official (developer) url, it is the API which is used by the
27HTML frontend of the common WEB site.
28"""
29
30cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U)
31guest_client_id = ""
32results_per_page = 10
33
34soundcloud_facet = "model"
35
36app_locale_map = {
37 "de": "de",
38 "en": "en",
39 "es": "es",
40 "fr": "fr",
41 "oc": "fr",
42 "it": "it",
43 "nl": "nl",
44 "pl": "pl",
45 "szl": "pl",
46 "pt": "pt_BR",
47 "pap": "pt_BR",
48 "sv": "sv",
49}
50
51
52def request(query, params):
53
54 # missing attributes: user_id, app_version
55 # - user_id=451561-497874-703312-310156
56 # - app_version=1740727428
57
58 args = {
59 "q": query,
60 "offset": (params['pageno'] - 1) * results_per_page,
61 "limit": results_per_page,
62 "facet": soundcloud_facet,
63 "client_id": guest_client_id,
64 "app_locale": app_locale_map.get(params["language"].split("-")[0], "en"),
65 }
66
67 params['url'] = f"{search_url}?{urlencode(args)}"
68 return params
69
70
71def response(resp):
72 results = []
73 data = resp.json()
74
75 for result in data.get("collection", []):
76
77 if result["kind"] in ("track", "playlist"):
78 url = result.get("permalink_url")
79 if not url:
80 continue
81 uri = quote_plus(result.get("uri"))
82 content = [
83 result.get("description"),
84 result.get("label_name"),
85 ]
86 res = {
87 "url": url,
88 "title": result["title"],
89 "content": " / ".join([c for c in content if c]),
90 "publishedDate": parser.parse(result["last_modified"]),
91 "iframe_src": "https://w.soundcloud.com/player/?url=" + uri,
92 "views": result.get("likes_count"),
93 }
94 thumbnail = result["artwork_url"] or result["user"]["avatar_url"]
95 res["thumbnail"] = thumbnail or None
96 length = int(result.get("duration", 0) / 1000)
97 if length:
98 length = datetime.timedelta(seconds=length)
99 res["length"] = length
100 res["views"] = result.get("playback_count", 0) or None
101 res["author"] = result.get("user", {}).get("full_name") or None
102 results.append(res)
103
104 return results
105
106
107def init(engine_settings=None): # pylint: disable=unused-argument
108 global guest_client_id # pylint: disable=global-statement
109 guest_client_id = get_client_id()
110
111
112def get_client_id() -> str:
113
114 client_id = ""
115 url = "https://soundcloud.com"
116 resp = http_get(url, timeout=10)
117
118 if not resp.ok:
119 logger.error("init: GET %s failed", url)
120 return client_id
121
122 tree = html.fromstring(resp.content)
123 script_tags = tree.xpath("//script[contains(@src, '/assets/')]")
124 app_js_urls = [tag.get("src") for tag in script_tags if tag is not None]
125
126 # extracts valid app_js urls from soundcloud.com content
127
128 for url in app_js_urls[::-1]:
129
130 # gets app_js and search for the client_id
131 resp = http_get(url)
132
133 if not resp.ok:
134 logger.error("init: app_js GET %s failed", url)
135 continue
136
137 cids = cid_re.search(resp.content.decode())
138 if cids and len(cids.groups()):
139 client_id = cids.groups()[0]
140 break
141
142 if client_id:
143 logger.info("using client_id '%s' for soundclud queries", client_id)
144 else:
145 logger.warning("missing valid client_id for soundclud queries")
146 return client_id
init(engine_settings=None)
request(query, params)
Definition soundcloud.py:52