.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
gentoo.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""
3 Gentoo Wiki
4"""
5
6from urllib.parse import urlencode, urljoin
7from lxml import html
8from searx.utils import extract_text
9
10# about
11about = {
12 "website": 'https://wiki.gentoo.org/',
13 "wikidata_id": 'Q1050637',
14 "official_api_documentation": 'https://wiki.gentoo.org/api.php',
15 "use_official_api": False,
16 "require_api_key": False,
17 "results": 'HTML',
18}
19
20# engine dependent config
21categories = ['it', 'software wikis']
22paging = True
23base_url = 'https://wiki.gentoo.org'
24
25# xpath queries
26xpath_results = '//ul[@class="mw-search-results"]/li'
27xpath_link = './/div[@class="mw-search-result-heading"]/a'
28xpath_content = './/div[@class="searchresult"]'
29
30
31# cut 'en' from 'en-US', 'de' from 'de-CH', and so on
33 if locale.find('-') >= 0:
34 locale = locale.split('-')[0]
35 return locale
36
37
38# wikis for some languages were moved off from the main site, we need to make
39# requests to correct URLs to be able to get results in those languages
40lang_urls = {
41 'en': {'base': 'https://wiki.gentoo.org', 'search': '/index.php?title=Special:Search&offset={offset}&{query}'},
42 'others': {
43 'base': 'https://wiki.gentoo.org',
44 'search': '/index.php?title=Special:Search&offset={offset}&{query}\
45 &profile=translation&languagefilter={language}',
46 },
47}
48
49
50# get base & search URLs for selected language
51def get_lang_urls(language):
52 if language != 'en':
53 return lang_urls['others']
54 return lang_urls['en']
55
56
57# Language names to build search requests for
58# those languages which are hosted on the main site.
59main_langs = {
60 'ar': 'العربية',
61 'bg': 'Български',
62 'cs': 'Česky',
63 'da': 'Dansk',
64 'el': 'Ελληνικά',
65 'es': 'Español',
66 'he': 'עברית',
67 'hr': 'Hrvatski',
68 'hu': 'Magyar',
69 'it': 'Italiano',
70 'ko': '한국어',
71 'lt': 'Lietuviškai',
72 'nl': 'Nederlands',
73 'pl': 'Polski',
74 'pt': 'Português',
75 'ru': 'Русский',
76 'sl': 'Slovenský',
77 'th': 'ไทย',
78 'uk': 'Українська',
79 'zh': '简体中文',
80}
81
82
83# do search-request
84def request(query, params):
85 # translate the locale (e.g. 'en-US') to language code ('en')
86 language = locale_to_lang_code(params['language'])
87
88 # if our language is hosted on the main site, we need to add its name
89 # to the query in order to narrow the results to that language
90 if language in main_langs:
91 query += ' (' + main_langs[language] + ')'
92
93 # prepare the request parameters
94 query = urlencode({'search': query})
95 offset = (params['pageno'] - 1) * 20
96
97 # get request URLs for our language of choice
98 urls = get_lang_urls(language)
99 search_url = urls['base'] + urls['search']
100
101 params['url'] = search_url.format(query=query, offset=offset, language=language)
102
103 return params
104
105
106# get response from search-request
107def response(resp):
108 # get the base URL for the language in which request was made
109 language = locale_to_lang_code(resp.search_params['language'])
110 url = get_lang_urls(language)['base']
111
112 results = []
113
114 dom = html.fromstring(resp.text)
115
116 # parse results
117 for result in dom.xpath(xpath_results):
118 link = result.xpath(xpath_link)[0]
119 href = urljoin(url, link.attrib.get('href'))
120 title = extract_text(link)
121 content = extract_text(result.xpath(xpath_content))
122
123 results.append({'url': href, 'title': title, 'content': content})
124
125 return results
locale_to_lang_code(locale)
Definition gentoo.py:32
get_lang_urls(language)
Definition gentoo.py:51
request(query, params)
Definition gentoo.py:84