.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
yacy.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""YaCy_ is a free distributed search engine, built on the principles of
3peer-to-peer (P2P) networks.
4
5API: Dev:APIyacysearch_
6
7Releases:
8
9- https://github.com/yacy/yacy_search_server/tags
10- https://download.yacy.net/
11
12.. _Yacy: https://yacy.net/
13.. _Dev:APIyacysearch: https://wiki.yacy.net/index.php/Dev:APIyacysearch
14
15Configuration
16=============
17
18The engine has the following (additional) settings:
19
20- :py:obj:`http_digest_auth_user`
21- :py:obj:`http_digest_auth_pass`
22- :py:obj:`search_mode`
23- :py:obj:`search_type`
24
25The :py:obj:`base_url` has to be set in the engine named `yacy` and is used by
26all yacy engines.
27
28.. code:: yaml
29
30 - name: yacy
31 engine: yacy
32 categories: general
33 search_type: text
34 shortcut: ya
35 base_url:
36 - https://yacy.searchlab.eu
37 - https://search.lomig.me
38 - https://yacy.ecosys.eu
39 - https://search.webproject.link
40
41 - name: yacy images
42 engine: yacy
43 categories: images
44 search_type: image
45 shortcut: yai
46 disabled: true
47
48
49Implementations
50===============
51"""
52# pylint: disable=fixme
53
54
55import random
56from json import loads
57from urllib.parse import urlencode
58from dateutil import parser
59
60from httpx import DigestAuth
61
62from searx.utils import html_to_text
63
64# about
65about = {
66 "website": 'https://yacy.net/',
67 "wikidata_id": 'Q1759675',
68 "official_api_documentation": 'https://wiki.yacy.net/index.php/Dev:API',
69 "use_official_api": True,
70 "require_api_key": False,
71 "results": 'JSON',
72}
73
74# engine dependent config
75categories = ['general']
76paging = True
77number_of_results = 10
78http_digest_auth_user = ""
79"""HTTP digest user for the local YACY instance"""
80http_digest_auth_pass = ""
81"""HTTP digest password for the local YACY instance"""
82
83search_mode = 'global'
84"""Yacy search mode ``global`` or ``local``. By default, Yacy operates in ``global``
85mode.
86
87``global``
88 Peer-to-Peer search
89
90``local``
91 Privacy or Stealth mode, restricts the search to local yacy instance.
92"""
93search_type = 'text'
94"""One of ``text``, ``image`` / The search-types ``app``, ``audio`` and
95``video`` are not yet implemented (Pull-Requests are welcome).
96"""
97
98base_url: list | str = 'https://yacy.searchlab.eu'
99"""The value is an URL or a list of URLs. In the latter case instance will be
100selected randomly.
101"""
102
103
104def init(_):
105 valid_types = [
106 'text',
107 'image',
108 # 'app', 'audio', 'video',
109 ]
110 if search_type not in valid_types:
111 raise ValueError('search_type "%s" is not one of %s' % (search_type, valid_types))
112
113
114def _base_url() -> str:
115 from searx.engines import engines # pylint: disable=import-outside-toplevel
116
117 url = engines['yacy'].base_url # type: ignore
118 if isinstance(url, list):
119 url = random.choice(url)
120 if url.endswith("/"):
121 url = url[:-1]
122 return url
123
124
125def request(query, params):
126
127 offset = (params['pageno'] - 1) * number_of_results
128 args = {
129 'query': query,
130 'startRecord': offset,
131 'maximumRecords': number_of_results,
132 'contentdom': search_type,
133 'resource': search_mode,
134 }
135
136 # add language tag if specified
137 if params['language'] != 'all':
138 args['lr'] = 'lang_' + params['language'].split('-')[0]
139
140 params["url"] = f"{_base_url()}/yacysearch.json?{urlencode(args)}"
141
142 if http_digest_auth_user and http_digest_auth_pass:
143 params['auth'] = DigestAuth(http_digest_auth_user, http_digest_auth_pass)
144
145 return params
146
147
148def response(resp):
149 results = []
150
151 raw_search_results = loads(resp.text)
152
153 # return empty array if there are no results
154 if not raw_search_results:
155 return []
156
157 search_results = raw_search_results.get('channels', [])
158
159 if len(search_results) == 0:
160 return []
161
162 for result in search_results[0].get('items', []):
163 # parse image results
164 if search_type == 'image':
165 result_url = ''
166 if 'url' in result:
167 result_url = result['url']
168 elif 'link' in result:
169 result_url = result['link']
170 else:
171 continue
172
173 # append result
174 results.append(
175 {
176 'url': result_url,
177 'title': result['title'],
178 'content': '',
179 'img_src': result['image'],
180 'template': 'images.html',
181 }
182 )
183
184 # parse general results
185 else:
186 publishedDate = None
187 if 'pubDate' in result:
188 publishedDate = parser.parse(result['pubDate'])
189
190 # append result
191 results.append(
192 {
193 'url': result['link'] or '',
194 'title': result['title'],
195 'content': html_to_text(result['description']),
196 'publishedDate': publishedDate,
197 }
198 )
199
200 # TODO parse video, audio and file results
201
202 return results
str _base_url()
Definition yacy.py:114
request(query, params)
Definition yacy.py:125
::1337x
Definition 1337x.py:1