.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
yacy.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""YaCy_ is a free distributed search engine, built on the principles of
3peer-to-peer (P2P) networks.
4
5API: Dev:APIyacysearch_
6
7Releases:
8
9- https://github.com/yacy/yacy_search_server/tags
10- https://download.yacy.net/
11
12.. _Yacy: https://yacy.net/
13.. _Dev:APIyacysearch: https://wiki.yacy.net/index.php/Dev:APIyacysearch
14
15Configuration
16=============
17
18The engine has the following (additional) settings:
19
20- :py:obj:`http_digest_auth_user`
21- :py:obj:`http_digest_auth_pass`
22- :py:obj:`search_mode`
23- :py:obj:`search_type`
24
25The :py:obj:`base_url` has to be set in the engine named `yacy` and is used by
26all yacy engines.
27
28.. code:: yaml
29
30 - name: yacy
31 engine: yacy
32 categories: general
33 search_type: text
34 shortcut: ya
35 base_url:
36 - https://yacy.searchlab.eu
37 - https://search.lomig.me
38 - https://yacy.ecosys.eu
39 - https://search.webproject.link
40
41 - name: yacy images
42 engine: yacy
43 categories: images
44 search_type: image
45 shortcut: yai
46 disabled: true
47
48
49Implementations
50===============
51"""
52# pylint: disable=fixme
53
54from __future__ import annotations
55
56import random
57from json import loads
58from urllib.parse import urlencode
59from dateutil import parser
60
61from httpx import DigestAuth
62
63from searx.utils import html_to_text
64
65# about
66about = {
67 "website": 'https://yacy.net/',
68 "wikidata_id": 'Q1759675',
69 "official_api_documentation": 'https://wiki.yacy.net/index.php/Dev:API',
70 "use_official_api": True,
71 "require_api_key": False,
72 "results": 'JSON',
73}
74
75# engine dependent config
76categories = ['general']
77paging = True
78number_of_results = 10
79http_digest_auth_user = ""
80"""HTTP digest user for the local YACY instance"""
81http_digest_auth_pass = ""
82"""HTTP digest password for the local YACY instance"""
83
84search_mode = 'global'
85"""Yacy search mode ``global`` or ``local``. By default, Yacy operates in ``global``
86mode.
87
88``global``
89 Peer-to-Peer search
90
91``local``
92 Privacy or Stealth mode, restricts the search to local yacy instance.
93"""
94search_type = 'text'
95"""One of ``text``, ``image`` / The search-types ``app``, ``audio`` and
96``video`` are not yet implemented (Pull-Requests are welcome).
97"""
98
99base_url: list | str = 'https://yacy.searchlab.eu'
100"""The value is an URL or a list of URLs. In the latter case instance will be
101selected randomly.
102"""
103
104
105def init(_):
106 valid_types = [
107 'text',
108 'image',
109 # 'app', 'audio', 'video',
110 ]
111 if search_type not in valid_types:
112 raise ValueError('search_type "%s" is not one of %s' % (search_type, valid_types))
113
114
115def _base_url() -> str:
116 from searx.engines import engines # pylint: disable=import-outside-toplevel
117
118 url = engines['yacy'].base_url # type: ignore
119 if isinstance(url, list):
120 url = random.choice(url)
121 return url
122
123
124def request(query, params):
125
126 offset = (params['pageno'] - 1) * number_of_results
127 args = {
128 'query': query,
129 'startRecord': offset,
130 'maximumRecords': number_of_results,
131 'contentdom': search_type,
132 'resource': search_mode,
133 }
134
135 # add language tag if specified
136 if params['language'] != 'all':
137 args['lr'] = 'lang_' + params['language'].split('-')[0]
138
139 params["url"] = f"{_base_url()}/yacysearch.json?{urlencode(args)}"
140
141 if http_digest_auth_user and http_digest_auth_pass:
142 params['auth'] = DigestAuth(http_digest_auth_user, http_digest_auth_pass)
143
144 return params
145
146
147def response(resp):
148 results = []
149
150 raw_search_results = loads(resp.text)
151
152 # return empty array if there are no results
153 if not raw_search_results:
154 return []
155
156 search_results = raw_search_results.get('channels', [])
157
158 if len(search_results) == 0:
159 return []
160
161 for result in search_results[0].get('items', []):
162 # parse image results
163 if search_type == 'image':
164 result_url = ''
165 if 'url' in result:
166 result_url = result['url']
167 elif 'link' in result:
168 result_url = result['link']
169 else:
170 continue
171
172 # append result
173 results.append(
174 {
175 'url': result_url,
176 'title': result['title'],
177 'content': '',
178 'img_src': result['image'],
179 'template': 'images.html',
180 }
181 )
182
183 # parse general results
184 else:
185 publishedDate = None
186 if 'pubDate' in result:
187 publishedDate = parser.parse(result['pubDate'])
188
189 # append result
190 results.append(
191 {
192 'url': result['link'] or '',
193 'title': result['title'],
194 'content': html_to_text(result['description']),
195 'publishedDate': publishedDate,
196 }
197 )
198
199 # TODO parse video, audio and file results
200
201 return results
str _base_url()
Definition yacy.py:115
request(query, params)
Definition yacy.py:124
::1337x
Definition 1337x.py:1