.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
tubearchivist.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""`Tube Archivist`_ - *Your self hosted YouTube media server.*
3
4.. _Tube Archivist: https://www.tubearchivist.com
5
6This engine connects with a self-hosted instance of `Tube Archivist`_ to allow
7searching for your hosted videos.
8
9`Tube Archivist`_ (TA) requires authentication for all image loads via cookie
10authentication. What this means is that by default, SearXNG will have no way to
11pull images from TA (as there is no way to pass cookies in a URL string only).
12
13In the meantime while work is done on the TA side, this can be worked around by
14bypassing auth for images in TA by altering the default TA nginx file.
15
16This is located in the main tubearchivist docker container at::
17
18 /etc/nginx/sites-available/default
19
20It is **strongly** recommended first setting up the intial connection and
21verying searching works first with broken images, and then attempting this
22change. This will limit any debugging to only images, rather than
23tokens/networking.
24
25Steps to enable **unauthenticated** metadata access for channels and videos:
26
27#. Perform any backups of TA before editing core configurations.
28
29#. Copy the contents of the file ``/etc/nginx/sites-available/default`` in the
30 TA docker container
31
32#. Edit ``location /cache/videos`` and ``location /cache/channels``. Comment
33 out the line ``auth_request /api/ping/;`` to ``# auth_request /api/ping/;``.
34
35#. Save the file to wherever you normally store your docker configuration.
36
37#. Mount this new configuration over the default configuration. With ``docker
38 run``, this would be::
39
40 -v ./your-new-config.yml:/etc/nginx/sites-available/default
41
42 With ``docker compose``, this would be::
43
44 - "./your-new-config.yml:/etc/nginx/sites-available/default:ro"
45
46#. Start the TA container.
47
48After these steps, double check that TA works as normal (nothing should be
49different on the TA side). Searching again should now show images.
50
51
52Configuration
53=============
54
55The engine has the following required settings:
56
57- :py:obj:`base_url`
58- :py:obj:`ta_token`
59
60Optional settings:
61
62- :py:obj:`ta_link_to_mp4`
63
64.. code:: yaml
65
66 - name: tubearchivist
67 engine: tubearchivist
68 shortcut: tuba
69 base_url:
70 ta_token:
71 ta_link_to_mp4: true
72
73Implementations
74===============
75"""
76
77
78from urllib.parse import urlencode
79from dateutil.parser import parse
80from searx.utils import html_to_text, humanize_number
81from searx.result_types import EngineResults
82
83about = {
84 # pylint: disable=line-too-long
85 "website": 'https://www.tubearchivist.com',
86 "official_api_documentation": 'https://docs.tubearchivist.com/api/introduction/',
87 "use_official_api": True,
88 "require_api_key": False,
89 "results": 'JSON',
90}
91
92# engine dependent config
93categories = ["videos"]
94paging = True
95
96base_url = ""
97"""Base URL of the Tube Archivist instance. Fill this in with your own
98Tube Archivist URL (``http://your-instance:port``)."""
99
100ta_token: str = ""
101"""The API key to use for Authorization_ header. Can be found under:
102
103 :menuselection:`Settings --> User --> Admin Interface`.
104
105.. _Authorization: https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Authorization
106"""
107
108ta_link_to_mp4: bool = False
109"""Optional, if true SearXNG will link directly to the mp4 of the video to play
110in the browser. The default behavior is to link into TubeArchivist's interface
111directly."""
112
113
114def absolute_url(relative_url):
115 return f'{base_url.rstrip("/")}{relative_url}'
116
117
118def init(_):
119 if not base_url:
120 raise ValueError('tubearchivist engine: base_url is unset')
121 if not ta_token:
122 raise ValueError('tubearchivist engine: ta_token is unset')
123
124
125def request(query, params):
126 if not query:
127 return False
128
129 args = {'query': query}
130 params['url'] = f"{base_url.rstrip('/')}/api/search?{urlencode(args)}"
131 params['headers']['Authorization'] = f'Token {ta_token}'
132
133 return params
134
135
136def response(resp) -> EngineResults:
137 results = EngineResults()
138 video_response(resp, results)
139 return results
140
141
142def video_response(resp, results: EngineResults) -> None:
143 """Parse video response from Tubearchivist instances."""
144
145 json_data = resp.json()
146
147 if 'results' not in json_data:
148 return
149
150 for channel_result in json_data['results']['channel_results']:
151 channel_url = absolute_url(f'/channel/{channel_result["channel_id"]}')
152
153 res = results.types.MainResult(
154 url=channel_url,
155 title=channel_result['channel_name'],
156 content=html_to_text(channel_result['channel_description']),
157 author=channel_result['channel_name'],
158 views=humanize_number(channel_result['channel_subs']),
159 thumbnail=f'{absolute_url(channel_result["channel_thumb_url"])}?auth={ta_token}',
160 )
161
162 results.add(result=res)
163
164 for video_result in json_data['results']['video_results']:
165 metadata = list(filter(None, [video_result['channel']['channel_name'], *video_result.get('tags', [])]))[:5]
166 if ta_link_to_mp4:
167 url = f'{base_url.rstrip("/")}{video_result["media_url"]}'
168 else:
169 url = f'{base_url.rstrip("/")}/?videoId={video_result["youtube_id"]}'
170
171 # a type for the video.html template is not yet implemented
172 # --> using LegacyResult
173
174 kwargs = {
175 'template': 'videos.html',
176 'url': url,
177 'title': video_result['title'],
178 'content': html_to_text(video_result['description']),
179 'author': video_result['channel']['channel_name'],
180 'length': video_result['player']['duration_str'],
181 'views': humanize_number(video_result['stats']['view_count']),
182 'publishedDate': parse(video_result['published']),
183 'thumbnail': f'{absolute_url(video_result["vid_thumb_url"])}?auth={ta_token}',
184 'metadata': ' | '.join(metadata),
185 }
186 results.add(results.types.LegacyResult(**kwargs))
None video_response(resp, EngineResults results)
EngineResults response(resp)