.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
yahoo.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""Yahoo Search (Web)
3
4Languages are supported by mapping the language to a domain. If domain is not
5found in :py:obj:`lang2domain` URL ``<lang>.search.yahoo.com`` is used.
6
7"""
8
9from urllib.parse import (
10 unquote,
11 urlencode,
12)
13from lxml import html
14
15from searx.utils import (
16 eval_xpath_getindex,
17 eval_xpath_list,
18 extract_text,
19 html_to_text,
20)
21from searx.enginelib.traits import EngineTraits
22
23traits: EngineTraits
24
25# about
26about = {
27 "website": 'https://search.yahoo.com/',
28 "wikidata_id": None,
29 "official_api_documentation": 'https://developer.yahoo.com/api/',
30 "use_official_api": False,
31 "require_api_key": False,
32 "results": 'HTML',
33}
34
35# engine dependent config
36categories = ['general', 'web']
37paging = True
38time_range_support = True
39# send_accept_language_header = True
40
41time_range_dict = {
42 'day': ('1d', 'd'),
43 'week': ('1w', 'w'),
44 'month': ('1m', 'm'),
45}
46
47region2domain = {
48 "CO": "co.search.yahoo.com", # Colombia
49 "TH": "th.search.yahoo.com", # Thailand
50 "VE": "ve.search.yahoo.com", # Venezuela
51 "CL": "cl.search.yahoo.com", # Chile
52 "HK": "hk.search.yahoo.com", # Hong Kong
53 "PE": "pe.search.yahoo.com", # Peru
54 "CA": "ca.search.yahoo.com", # Canada
55 "DE": "de.search.yahoo.com", # Germany
56 "FR": "fr.search.yahoo.com", # France
57 "TW": "tw.search.yahoo.com", # Taiwan
58 "GB": "uk.search.yahoo.com", # United Kingdom
59 "UK": "uk.search.yahoo.com",
60 "BR": "br.search.yahoo.com", # Brazil
61 "IN": "in.search.yahoo.com", # India
62 "ES": "espanol.search.yahoo.com", # Espanol
63 "PH": "ph.search.yahoo.com", # Philippines
64 "AR": "ar.search.yahoo.com", # Argentina
65 "MX": "mx.search.yahoo.com", # Mexico
66 "SG": "sg.search.yahoo.com", # Singapore
67}
68"""Map regions to domain"""
69
70lang2domain = {
71 'zh_chs': 'hk.search.yahoo.com',
72 'zh_cht': 'tw.search.yahoo.com',
73 'any': 'search.yahoo.com',
74 'en': 'search.yahoo.com',
75 'bg': 'search.yahoo.com',
76 'cs': 'search.yahoo.com',
77 'da': 'search.yahoo.com',
78 'el': 'search.yahoo.com',
79 'et': 'search.yahoo.com',
80 'he': 'search.yahoo.com',
81 'hr': 'search.yahoo.com',
82 'ja': 'search.yahoo.com',
83 'ko': 'search.yahoo.com',
84 'sk': 'search.yahoo.com',
85 'sl': 'search.yahoo.com',
86}
87"""Map language to domain"""
88
89yahoo_languages = {
90 "all": "any",
91 "ar": "ar", # Arabic
92 "bg": "bg", # Bulgarian
93 "cs": "cs", # Czech
94 "da": "da", # Danish
95 "de": "de", # German
96 "el": "el", # Greek
97 "en": "en", # English
98 "es": "es", # Spanish
99 "et": "et", # Estonian
100 "fi": "fi", # Finnish
101 "fr": "fr", # French
102 "he": "he", # Hebrew
103 "hr": "hr", # Croatian
104 "hu": "hu", # Hungarian
105 "it": "it", # Italian
106 "ja": "ja", # Japanese
107 "ko": "ko", # Korean
108 "lt": "lt", # Lithuanian
109 "lv": "lv", # Latvian
110 "nl": "nl", # Dutch
111 "no": "no", # Norwegian
112 "pl": "pl", # Polish
113 "pt": "pt", # Portuguese
114 "ro": "ro", # Romanian
115 "ru": "ru", # Russian
116 "sk": "sk", # Slovak
117 "sl": "sl", # Slovenian
118 "sv": "sv", # Swedish
119 "th": "th", # Thai
120 "tr": "tr", # Turkish
121 "zh": "zh_chs", # ChineseĀ (Simplified)
122 "zh_Hans": "zh_chs",
123 'zh-CN': "zh_chs",
124 "zh_Hant": "zh_cht", # ChineseĀ (Traditional)
125 "zh-HK": "zh_cht",
126 'zh-TW': "zh_cht",
127}
128
129
130def request(query, params):
131 """build request"""
132
133 lang, region = (params["language"].split("-") + [None])[:2]
134 lang = yahoo_languages.get(lang, "any")
135
136 offset = (params['pageno'] - 1) * 7 + 1
137 age, btf = time_range_dict.get(params['time_range'], ('', ''))
138
139 args = urlencode(
140 {
141 'p': query,
142 'ei': 'UTF-8',
143 'fl': 1,
144 'vl': 'lang_' + lang,
145 'btf': btf,
146 'fr2': 'time',
147 'age': age,
148 'b': offset,
149 'xargs': 0,
150 }
151 )
152
153 domain = region2domain.get(region)
154 if not domain:
155 domain = lang2domain.get(lang, '%s.search.yahoo.com' % lang)
156 params['url'] = 'https://%s/search?%s' % (domain, args)
157 params['domain'] = domain
158
159
160def parse_url(url_string):
161 """remove yahoo-specific tracking-url"""
162
163 endings = ['/RS', '/RK']
164 endpositions = []
165 start = url_string.find('http', url_string.find('/RU=') + 1)
166
167 for ending in endings:
168 endpos = url_string.rfind(ending)
169 if endpos > -1:
170 endpositions.append(endpos)
171
172 if start == 0 or len(endpositions) == 0:
173 return url_string
174
175 end = min(endpositions)
176 return unquote(url_string[start:end])
177
178
179def response(resp):
180 """parse response"""
181
182 results = []
183 dom = html.fromstring(resp.text)
184
185 url_xpath = './/div[contains(@class,"compTitle")]/h3/a/@href'
186 title_xpath = './/h3//a/@aria-label'
187
188 domain = resp.search_params['domain']
189 if domain == "search.yahoo.com":
190 url_xpath = './/div[contains(@class,"compTitle")]/a/@href'
191 title_xpath = './/div[contains(@class,"compTitle")]/a/h3/span'
192
193 # parse results
194 for result in eval_xpath_list(dom, '//div[contains(@class,"algo-sr")]'):
195 url = eval_xpath_getindex(result, url_xpath, 0, default=None)
196 if url is None:
197 continue
198 url = parse_url(url)
199
200 title = eval_xpath_getindex(result, title_xpath, 0, default='')
201 title: str = extract_text(title)
202 content = eval_xpath_getindex(result, './/div[contains(@class, "compText")]', 0, default='')
203 content: str = extract_text(content, allow_none=True)
204
205 # append result
206 results.append(
207 {
208 'url': url,
209 # title sometimes contains HTML tags / see
210 # https://github.com/searxng/searxng/issues/3790
211 'title': " ".join(html_to_text(title).strip().split()),
212 'content': " ".join(html_to_text(content).strip().split()),
213 }
214 )
215
216 for suggestion in eval_xpath_list(dom, '//div[contains(@class, "AlsoTry")]//table//a'):
217 # append suggestion
218 results.append({'suggestion': extract_text(suggestion)})
219
220 return results
parse_url(url_string)
Definition yahoo.py:160
request(query, params)
Definition yahoo.py:130