.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
tubearchivist.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""`Tube Archivist`_ - *Your self hosted YouTube media server.*
3
4.. _Tube Archivist: https://www.tubearchivist.com
5
6This engine connects with a self-hosted instance of `Tube Archivist`_ to allow
7searching for your hosted videos.
8
9`Tube Archivist`_ (TA) requires authentication for all image loads via cookie
10authentication. What this means is that by default, SearXNG will have no way to
11pull images from TA (as there is no way to pass cookies in a URL string only).
12
13In the meantime while work is done on the TA side, this can be worked around by
14bypassing auth for images in TA by altering the default TA nginx file.
15
16This is located in the main tubearchivist docker container at::
17
18 /etc/nginx/sites-available/default
19
20It is **strongly** recommended first setting up the intial connection and
21verying searching works first with broken images, and then attempting this
22change. This will limit any debugging to only images, rather than
23tokens/networking.
24
25Steps to enable **unauthenticated** metadata access for channels and videos:
26
27#. Perform any backups of TA before editing core configurations.
28
29#. Copy the contents of the file ``/etc/nginx/sites-available/default`` in the
30 TA docker container
31
32#. Edit ``location /cache/videos`` and ``location /cache/channels``. Comment
33 out the line ``auth_request /api/ping/;`` to ``# auth_request /api/ping/;``.
34
35#. Save the file to wherever you normally store your docker configuration.
36
37#. Mount this new configuration over the default configuration. With ``docker
38 run``, this would be::
39
40 -v ./your-new-config.yml:/etc/nginx/sites-available/default
41
42 With ``docker compose``, this would be::
43
44 - "./your-new-config.yml:/etc/nginx/sites-available/default:ro"
45
46#. Start the TA container.
47
48After these steps, double check that TA works as normal (nothing should be
49different on the TA side). Searching again should now show images.
50
51
52Configuration
53=============
54
55The engine has the following required settings:
56
57- :py:obj:`base_url`
58- :py:obj:`ta_token`
59
60Optional settings:
61
62- :py:obj:`ta_link_to_mp4`
63
64.. code:: yaml
65
66 - name: tubearchivist
67 engine: tubearchivist
68 shortcut: tuba
69 base_url:
70 ta_token:
71 ta_link_to_mp4: true
72
73Implementations
74===============
75"""
76
77from __future__ import annotations
78
79from urllib.parse import urlencode
80from dateutil.parser import parse
81from searx.utils import html_to_text, humanize_number
82from searx.result_types import EngineResults
83
84about = {
85 # pylint: disable=line-too-long
86 "website": 'https://www.tubearchivist.com',
87 "official_api_documentation": 'https://docs.tubearchivist.com/api/introduction/',
88 "use_official_api": True,
89 "require_api_key": False,
90 "results": 'JSON',
91}
92
93# engine dependent config
94categories = ["videos"]
95paging = True
96
97base_url = ""
98"""Base URL of the Tube Archivist instance. Fill this in with your own
99Tube Archivist URL (``http://your-instance:port``)."""
100
101ta_token: str = ""
102"""The API key to use for Authorization_ header. Can be found under:
103
104 :menuselection:`Settings --> User --> Admin Interface`.
105
106.. _Authorization: https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Authorization
107"""
108
109ta_link_to_mp4: bool = False
110"""Optional, if true SearXNG will link directly to the mp4 of the video to play
111in the browser. The default behavior is to link into TubeArchivist's interface
112directly."""
113
114
115def absolute_url(relative_url):
116 return f'{base_url.rstrip("/")}{relative_url}'
117
118
119def init(_):
120 if not base_url:
121 raise ValueError('tubearchivist engine: base_url is unset')
122 if not ta_token:
123 raise ValueError('tubearchivist engine: ta_token is unset')
124
125
126def request(query, params):
127 if not query:
128 return False
129
130 args = {'query': query}
131 params['url'] = f"{base_url.rstrip('/')}/api/search?{urlencode(args)}"
132 params['headers']['Authorization'] = f'Token {ta_token}'
133
134 return params
135
136
137def response(resp) -> EngineResults:
138 results = EngineResults()
139 video_response(resp, results)
140 return results
141
142
143def video_response(resp, results: EngineResults) -> None:
144 """Parse video response from Tubearchivist instances."""
145
146 json_data = resp.json()
147
148 if 'results' not in json_data:
149 return
150
151 for channel_result in json_data['results']['channel_results']:
152 channel_url = absolute_url(f'/channel/{channel_result["channel_id"]}')
153
154 res = results.types.MainResult(
155 url=channel_url,
156 title=channel_result['channel_name'],
157 content=html_to_text(channel_result['channel_description']),
158 author=channel_result['channel_name'],
159 views=humanize_number(channel_result['channel_subs']),
160 thumbnail=f'{absolute_url(channel_result["channel_thumb_url"])}?auth={ta_token}',
161 )
162
163 results.add(result=res)
164
165 for video_result in json_data['results']['video_results']:
166 metadata = list(filter(None, [video_result['channel']['channel_name'], *video_result.get('tags', [])]))[:5]
167 if ta_link_to_mp4:
168 url = f'{base_url.rstrip("/")}{video_result["media_url"]}'
169 else:
170 url = f'{base_url.rstrip("/")}/?videoId={video_result["youtube_id"]}'
171
172 # a type for the video.html template is not yet implemented
173 # --> using LegacyResult
174
175 kwargs = {
176 'template': 'videos.html',
177 'url': url,
178 'title': video_result['title'],
179 'content': html_to_text(video_result['description']),
180 'author': video_result['channel']['channel_name'],
181 'length': video_result['player']['duration_str'],
182 'views': humanize_number(video_result['stats']['view_count']),
183 'publishedDate': parse(video_result['published']),
184 'thumbnail': f'{absolute_url(video_result["vid_thumb_url"])}?auth={ta_token}',
185 'metadata': ' | '.join(metadata),
186 }
187 results.add(results.types.LegacyResult(**kwargs))
None video_response(resp, EngineResults results)
EngineResults response(resp)