.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
yacy.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""YaCy_ is a free distributed search engine, built on the principles of
3peer-to-peer (P2P) networks.
4
5API: Dev:APIyacysearch_
6
7Releases:
8
9- https://github.com/yacy/yacy_search_server/tags
10- https://download.yacy.net/
11
12.. _Yacy: https://yacy.net/
13.. _Dev:APIyacysearch: https://wiki.yacy.net/index.php/Dev:APIyacysearch
14
15Configuration
16=============
17
18The engine has the following (additional) settings:
19
20- :py:obj:`http_digest_auth_user`
21- :py:obj:`http_digest_auth_pass`
22- :py:obj:`search_mode`
23- :py:obj:`search_type`
24
25The :py:obj:`base_url` has to be set in the engine named `yacy` and is used by
26all yacy engines.
27
28.. code:: yaml
29
30 - name: yacy
31 engine: yacy
32 categories: general
33 search_type: text
34 shortcut: ya
35 base_url:
36 - https://yacy.searchlab.eu
37 - https://search.lomig.me
38 - https://yacy.ecosys.eu
39 - https://search.webproject.link
40
41 - name: yacy images
42 engine: yacy
43 categories: images
44 search_type: image
45 shortcut: yai
46 disabled: true
47
48
49Implementations
50===============
51"""
52# pylint: disable=fixme
53
54from __future__ import annotations
55
56import random
57from json import loads
58from urllib.parse import urlencode
59from dateutil import parser
60
61from httpx import DigestAuth
62
63from searx.utils import html_to_text
64
65# about
66about = {
67 "website": 'https://yacy.net/',
68 "wikidata_id": 'Q1759675',
69 "official_api_documentation": 'https://wiki.yacy.net/index.php/Dev:API',
70 "use_official_api": True,
71 "require_api_key": False,
72 "results": 'JSON',
73}
74
75# engine dependent config
76categories = ['general']
77paging = True
78number_of_results = 10
79http_digest_auth_user = ""
80"""HTTP digest user for the local YACY instance"""
81http_digest_auth_pass = ""
82"""HTTP digest password for the local YACY instance"""
83
84search_mode = 'global'
85"""Yacy search mode ``global`` or ``local``. By default, Yacy operates in ``global``
86mode.
87
88``global``
89 Peer-to-Peer search
90
91``local``
92 Privacy or Stealth mode, restricts the search to local yacy instance.
93"""
94search_type = 'text'
95"""One of ``text``, ``image`` / The search-types ``app``, ``audio`` and
96``video`` are not yet implemented (Pull-Requests are welcome).
97"""
98
99base_url: list | str = 'https://yacy.searchlab.eu'
100"""The value is an URL or a list of URLs. In the latter case instance will be
101selected randomly.
102"""
103
104
105def init(_):
106 valid_types = [
107 'text',
108 'image',
109 # 'app', 'audio', 'video',
110 ]
111 if search_type not in valid_types:
112 raise ValueError('search_type "%s" is not one of %s' % (search_type, valid_types))
113
114
115def _base_url() -> str:
116 from searx.engines import engines # pylint: disable=import-outside-toplevel
117
118 url = engines['yacy'].base_url # type: ignore
119 if isinstance(url, list):
120 url = random.choice(url)
121 if url.endswith("/"):
122 url = url[:-1]
123 return url
124
125
126def request(query, params):
127
128 offset = (params['pageno'] - 1) * number_of_results
129 args = {
130 'query': query,
131 'startRecord': offset,
132 'maximumRecords': number_of_results,
133 'contentdom': search_type,
134 'resource': search_mode,
135 }
136
137 # add language tag if specified
138 if params['language'] != 'all':
139 args['lr'] = 'lang_' + params['language'].split('-')[0]
140
141 params["url"] = f"{_base_url()}/yacysearch.json?{urlencode(args)}"
142
143 if http_digest_auth_user and http_digest_auth_pass:
144 params['auth'] = DigestAuth(http_digest_auth_user, http_digest_auth_pass)
145
146 return params
147
148
149def response(resp):
150 results = []
151
152 raw_search_results = loads(resp.text)
153
154 # return empty array if there are no results
155 if not raw_search_results:
156 return []
157
158 search_results = raw_search_results.get('channels', [])
159
160 if len(search_results) == 0:
161 return []
162
163 for result in search_results[0].get('items', []):
164 # parse image results
165 if search_type == 'image':
166 result_url = ''
167 if 'url' in result:
168 result_url = result['url']
169 elif 'link' in result:
170 result_url = result['link']
171 else:
172 continue
173
174 # append result
175 results.append(
176 {
177 'url': result_url,
178 'title': result['title'],
179 'content': '',
180 'img_src': result['image'],
181 'template': 'images.html',
182 }
183 )
184
185 # parse general results
186 else:
187 publishedDate = None
188 if 'pubDate' in result:
189 publishedDate = parser.parse(result['pubDate'])
190
191 # append result
192 results.append(
193 {
194 'url': result['link'] or '',
195 'title': result['title'],
196 'content': html_to_text(result['description']),
197 'publishedDate': publishedDate,
198 }
199 )
200
201 # TODO parse video, audio and file results
202
203 return results
str _base_url()
Definition yacy.py:115
request(query, params)
Definition yacy.py:126
::1337x
Definition 1337x.py:1