.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
sjp.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""Słownik Języka Polskiego
3
4Dictionary of the polish language from PWN (sjp.pwn)
5"""
6
7from lxml.html import fromstring
8from searx import logger
9from searx.utils import extract_text
10from searx.network import raise_for_httperror
11
12logger = logger.getChild('sjp engine')
13
14# about
15about = {
16 "website": 'https://sjp.pwn.pl',
17 "wikidata_id": 'Q55117369',
18 "official_api_documentation": None,
19 "use_official_api": False,
20 "require_api_key": False,
21 "results": 'HTML',
22 "language": 'pl',
23}
24
25categories = ['dictionaries']
26paging = False
27
28URL = 'https://sjp.pwn.pl'
29SEARCH_URL = URL + '/szukaj/{query}.html'
30
31word_xpath = '//div[@class="query"]'
32dict_xpath = [
33 '//div[@class="wyniki sjp-so-wyniki sjp-so-anchor"]',
34 '//div[@class="wyniki sjp-wyniki sjp-anchor"]',
35 '//div[@class="wyniki sjp-doroszewski-wyniki sjp-doroszewski-anchor"]',
36]
37
38
39def request(query, params):
40 params['url'] = SEARCH_URL.format(query=query)
41 logger.debug(f"query_url --> {params['url']}")
42 return params
43
44
45def response(resp):
46 results = []
47
48 raise_for_httperror(resp)
49 dom = fromstring(resp.text)
50 word = extract_text(dom.xpath(word_xpath))
51
52 definitions = []
53
54 for dict_src in dict_xpath:
55 for src in dom.xpath(dict_src):
56 src_text = extract_text(src.xpath('.//span[@class="entry-head-title"]/text()')).strip()
57
58 src_defs = []
59 for def_item in src.xpath('.//div[contains(@class, "ribbon-element")]'):
60 if def_item.xpath('./div[@class="znacz"]'):
61 sub_defs = []
62 for def_sub_item in def_item.xpath('./div[@class="znacz"]'):
63 def_sub_text = extract_text(def_sub_item).lstrip('0123456789. ')
64 sub_defs.append(def_sub_text)
65 src_defs.append((word, sub_defs))
66 else:
67 def_text = extract_text(def_item).strip()
68 def_link = def_item.xpath('./span/a/@href')
69 if 'doroszewski' in def_link[0]:
70 def_text = f"<a href='{def_link[0]}'>{def_text}</a>"
71 src_defs.append((def_text, ''))
72
73 definitions.append((src_text, src_defs))
74
75 if not definitions:
76 return results
77
78 infobox = ''
79 for src in definitions:
80 infobox += f"<div><small>{src[0]}</small>"
81 infobox += "<ul>"
82 for def_text, sub_def in src[1]:
83 infobox += f"<li>{def_text}</li>"
84 if sub_def:
85 infobox += "<ol>"
86 for sub_def_text in sub_def:
87 infobox += f"<li>{sub_def_text}</li>"
88 infobox += "</ol>"
89 infobox += "</ul></div>"
90
91 results.append(
92 {
93 'infobox': word,
94 'content': infobox,
95 }
96 )
97
98 return results
request(query, params)
Definition sjp.py:39
response(resp)
Definition sjp.py:45