.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
soundcloud.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""SoundCloud is a German audio streaming service."""
3from __future__ import annotations
4
5import re
6import typing
7import datetime
8
9from urllib.parse import quote_plus, urlencode
10
11from dateutil import parser
12from lxml import html
13
14from searx.network import get as http_get
15from searx.enginelib import EngineCache
16
17if typing.TYPE_CHECKING:
18 import logging
19
20 logger: logging.Logger
21
22about = {
23 "website": "https://soundcloud.com",
24 "wikidata_id": "Q568769",
25 "official_api_documentation": "https://developers.soundcloud.com/docs/api/guide",
26 "use_official_api": False,
27 "require_api_key": False,
28 "results": 'JSON',
29}
30
31categories = ["music"]
32paging = True
33
34search_url = "https://api-v2.soundcloud.com/search"
35"""This is not the official (developer) url, it is the API which is used by the
36HTML frontend of the common WEB site.
37"""
38
39cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U)
40results_per_page = 10
41
42soundcloud_facet = "model"
43
44app_locale_map = {
45 "de": "de",
46 "en": "en",
47 "es": "es",
48 "fr": "fr",
49 "oc": "fr",
50 "it": "it",
51 "nl": "nl",
52 "pl": "pl",
53 "szl": "pl",
54 "pt": "pt_BR",
55 "pap": "pt_BR",
56 "sv": "sv",
57}
58
59CACHE: EngineCache
60"""Persistent (SQLite) key/value cache that deletes its values after ``expire``
61seconds."""
62
63
64def request(query, params):
65
66 # missing attributes: user_id, app_version
67 # - user_id=451561-497874-703312-310156
68 # - app_version=1740727428
69
70 guest_client_id = CACHE.get("guest_client_id")
71 if guest_client_id is None:
72 guest_client_id = get_client_id()
73 if guest_client_id:
74 CACHE.set(key="guest_client_id", value=guest_client_id)
75
76 args = {
77 "q": query,
78 "offset": (params['pageno'] - 1) * results_per_page,
79 "limit": results_per_page,
80 "facet": soundcloud_facet,
81 "client_id": guest_client_id,
82 "app_locale": app_locale_map.get(params["language"].split("-")[0], "en"),
83 }
84
85 params['url'] = f"{search_url}?{urlencode(args)}"
86 return params
87
88
89def response(resp):
90 results = []
91 data = resp.json()
92
93 for result in data.get("collection", []):
94
95 if result["kind"] in ("track", "playlist"):
96 url = result.get("permalink_url")
97 if not url:
98 continue
99 uri = quote_plus(result.get("uri"))
100 content = [
101 result.get("description"),
102 result.get("label_name"),
103 ]
104 res = {
105 "url": url,
106 "title": result["title"],
107 "content": " / ".join([c for c in content if c]),
108 "publishedDate": parser.parse(result["last_modified"]),
109 "iframe_src": "https://w.soundcloud.com/player/?url=" + uri,
110 "views": result.get("likes_count"),
111 }
112 thumbnail = result["artwork_url"] or result["user"]["avatar_url"]
113 res["thumbnail"] = thumbnail or None
114 length = int(result.get("duration", 0) / 1000)
115 if length:
116 length = datetime.timedelta(seconds=length)
117 res["length"] = length
118 res["views"] = result.get("playback_count", 0) or None
119 res["author"] = result.get("user", {}).get("full_name") or None
120 results.append(res)
121
122 return results
123
124
125def init(engine_settings): # pylint: disable=unused-argument
126 global CACHE # pylint: disable=global-statement
127 CACHE = EngineCache(engine_settings["name"]) # type:ignore
128
129
130def get_client_id() -> str | None:
131
132 client_id = ""
133 url = "https://soundcloud.com"
134 resp = http_get(url, timeout=10)
135
136 if not resp.ok:
137 logger.error("init: GET %s failed", url)
138 return client_id
139
140 tree = html.fromstring(resp.content)
141 script_tags = tree.xpath("//script[contains(@src, '/assets/')]")
142 app_js_urls = [tag.get("src") for tag in script_tags if tag is not None]
143
144 # extracts valid app_js urls from soundcloud.com content
145
146 for url in app_js_urls[::-1]:
147
148 # gets app_js and search for the client_id
149 resp = http_get(url)
150
151 if not resp.ok:
152 logger.error("init: app_js GET %s failed", url)
153 continue
154
155 cids = cid_re.search(resp.content.decode())
156 if cids and len(cids.groups()):
157 client_id = cids.groups()[0]
158 break
159
160 if client_id:
161 logger.info("using client_id '%s' for soundclud queries", client_id)
162 else:
163 logger.warning("missing valid client_id for soundclud queries")
164 return client_id or None
request(query, params)
Definition soundcloud.py:64
init(engine_settings)