.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
duckduckgo_definitions.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""
3DuckDuckGo Instant Answer API
4~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5
6The `DDG-API <https://duckduckgo.com/api>`__ is no longer documented but from
7reverse engineering we can see that some services (e.g. instant answers) still
8in use from the DDG search engine.
9
10As far we can say the *instant answers* API does not support languages, or at
11least we could not find out how language support should work. It seems that
12most of the features are based on English terms.
13
14"""
15
16from typing import TYPE_CHECKING
17
18from urllib.parse import urlencode, urlparse, urljoin
19from lxml import html
20
21from searx.data import WIKIDATA_UNITS
22from searx.utils import extract_text, html_to_text, get_string_replaces_function
23from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
24from searx.result_types import EngineResults
25
26if TYPE_CHECKING:
27 import logging
28
29 logger: logging.Logger
30
31# about
32about = {
33 "website": 'https://duckduckgo.com/',
34 "wikidata_id": 'Q12805',
35 "official_api_documentation": 'https://duckduckgo.com/api',
36 "use_official_api": True,
37 "require_api_key": False,
38 "results": 'JSON',
39}
40
41send_accept_language_header = True
42
43URL = 'https://api.duckduckgo.com/' + '?{query}&format=json&pretty=0&no_redirect=1&d=1'
44
45WIKIDATA_PREFIX = ['http://www.wikidata.org/entity/', 'https://www.wikidata.org/entity/']
46
47replace_http_by_https = get_string_replaces_function({'http:': 'https:'})
48
49
51 """duckduckgo may return something like ``<a href="xxxx">http://somewhere Related website<a/>``
52
53 The href URL is broken, the "Related website" may contains some HTML.
54
55 The best solution seems to ignore these results.
56 """
57 return text.startswith('http') and ' ' in text
58
59
60def result_to_text(text, htmlResult):
61 # TODO : remove result ending with "Meaning" or "Category" # pylint: disable=fixme
62 result = None
63 dom = html.fromstring(htmlResult)
64 a = dom.xpath('//a')
65 if len(a) >= 1:
66 result = extract_text(a[0])
67 else:
68 result = text
69 if not is_broken_text(result):
70 return result
71 return None
72
73
74def request(query, params):
75 params['url'] = URL.format(query=urlencode({'q': query}))
76 return params
77
78
79def response(resp) -> EngineResults:
80 # pylint: disable=too-many-locals, too-many-branches, too-many-statements
81 results = EngineResults()
82
83 search_res = resp.json()
84
85 # search_res.get('Entity') possible values (not exhaustive) :
86 # * continent / country / department / location / waterfall
87 # * actor / musician / artist
88 # * book / performing art / film / television / media franchise / concert tour / playwright
89 # * prepared food
90 # * website / software / os / programming language / file format / software engineer
91 # * company
92
93 content = ''
94 heading = search_res.get('Heading', '')
95 attributes = []
96 urls = []
97 infobox_id = None
98 relatedTopics = []
99
100 # add answer if there is one
101 answer = search_res.get('Answer', '')
102 if answer:
103 answer_type = search_res.get('AnswerType')
104 logger.debug('AnswerType="%s" Answer="%s"', answer_type, answer)
105 if isinstance(answer, str) and answer_type not in ['calc', 'ip']:
106 results.add(
107 results.types.Answer(
108 answer=html_to_text(answer),
109 url=search_res.get('AbstractURL', ''),
110 )
111 )
112
113 # add infobox
114 if 'Definition' in search_res:
115 content = content + search_res.get('Definition', '')
116
117 if 'Abstract' in search_res:
118 content = content + search_res.get('Abstract', '')
119
120 # image
121 image = search_res.get('Image')
122 image = None if image == '' else image
123 if image is not None and urlparse(image).netloc == '':
124 image = urljoin('https://duckduckgo.com', image)
125
126 # urls
127 # Official website, Wikipedia page
128 for ddg_result in search_res.get('Results', []):
129 firstURL = ddg_result.get('FirstURL')
130 text = ddg_result.get('Text')
131 if firstURL is not None and text is not None:
132 urls.append({'title': text, 'url': firstURL})
133 results.append({'title': heading, 'url': firstURL})
134
135 # related topics
136 for ddg_result in search_res.get('RelatedTopics', []):
137 if 'FirstURL' in ddg_result:
138 firstURL = ddg_result.get('FirstURL')
139 text = ddg_result.get('Text')
140 if not is_broken_text(text):
141 suggestion = result_to_text(text, ddg_result.get('Result'))
142 if suggestion != heading and suggestion is not None:
143 results.append({'suggestion': suggestion})
144 elif 'Topics' in ddg_result:
145 suggestions = []
146 relatedTopics.append({'name': ddg_result.get('Name', ''), 'suggestions': suggestions})
147 for topic_result in ddg_result.get('Topics', []):
148 suggestion = result_to_text(topic_result.get('Text'), topic_result.get('Result'))
149 if suggestion != heading and suggestion is not None:
150 suggestions.append(suggestion)
151
152 # abstract
153 abstractURL = search_res.get('AbstractURL', '')
154 if abstractURL != '':
155 # add as result ? problem always in english
156 infobox_id = abstractURL
157 urls.append({'title': search_res.get('AbstractSource'), 'url': abstractURL, 'official': True})
158 results.append({'url': abstractURL, 'title': heading})
159
160 # definition
161 definitionURL = search_res.get('DefinitionURL', '')
162 if definitionURL != '':
163 # add as result ? as answer ? problem always in english
164 infobox_id = definitionURL
165 urls.append({'title': search_res.get('DefinitionSource'), 'url': definitionURL})
166
167 # to merge with wikidata's infobox
168 if infobox_id:
169 infobox_id = replace_http_by_https(infobox_id)
170
171 # attributes
172 # some will be converted to urls
173 if 'Infobox' in search_res:
174 infobox = search_res.get('Infobox')
175 if 'content' in infobox:
176 osm_zoom = 17
177 coordinates = None
178 for info in infobox.get('content'):
179 data_type = info.get('data_type')
180 data_label = info.get('label')
181 data_value = info.get('value')
182
183 # Workaround: ddg may return a double quote
184 if data_value == '""':
185 continue
186
187 # Is it an external URL ?
188 # * imdb_id / facebook_profile / youtube_channel / youtube_video / twitter_profile
189 # * instagram_profile / rotten_tomatoes / spotify_artist_id / itunes_artist_id / soundcloud_id
190 # * netflix_id
191 external_url = get_external_url(data_type, data_value)
192 if external_url is not None:
193 urls.append({'title': data_label, 'url': external_url})
194 elif data_type in ['instance', 'wiki_maps_trigger', 'google_play_artist_id']:
195 # ignore instance: Wikidata value from "Instance Of" (Qxxxx)
196 # ignore wiki_maps_trigger: reference to a javascript
197 # ignore google_play_artist_id: service shutdown
198 pass
199 elif data_type == 'string' and data_label == 'Website':
200 # There is already an URL for the website
201 pass
202 elif data_type == 'area':
203 attributes.append({'label': data_label, 'value': area_to_str(data_value), 'entity': 'P2046'})
204 osm_zoom = area_to_osm_zoom(data_value.get('amount'))
205 elif data_type == 'coordinates':
206 if data_value.get('globe') == 'http://www.wikidata.org/entity/Q2':
207 # coordinate on Earth
208 # get the zoom information from the area
209 coordinates = info
210 else:
211 # coordinate NOT on Earth
212 attributes.append({'label': data_label, 'value': data_value, 'entity': 'P625'})
213 elif data_type == 'string':
214 attributes.append({'label': data_label, 'value': data_value})
215
216 if coordinates:
217 data_label = coordinates.get('label')
218 data_value = coordinates.get('value')
219 latitude = data_value.get('latitude')
220 longitude = data_value.get('longitude')
221 url = get_earth_coordinates_url(latitude, longitude, osm_zoom)
222 urls.append({'title': 'OpenStreetMap', 'url': url, 'entity': 'P625'})
223
224 if len(heading) > 0:
225 # TODO get infobox.meta.value where .label='article_title' # pylint: disable=fixme
226 if image is None and len(attributes) == 0 and len(urls) == 1 and len(relatedTopics) == 0 and len(content) == 0:
227 results.append({'url': urls[0]['url'], 'title': heading, 'content': content})
228 else:
229 results.append(
230 {
231 'infobox': heading,
232 'id': infobox_id,
233 'content': content,
234 'img_src': image,
235 'attributes': attributes,
236 'urls': urls,
237 'relatedTopics': relatedTopics,
238 }
239 )
240
241 return results
242
243
244def unit_to_str(unit):
245 for prefix in WIKIDATA_PREFIX:
246 if unit.startswith(prefix):
247 wikidata_entity = unit[len(prefix) :]
248 real_unit = WIKIDATA_UNITS.get(wikidata_entity)
249 if real_unit is None:
250 return unit
251 return real_unit['symbol']
252 return unit
253
254
255def area_to_str(area):
256 """parse ``{'unit': 'https://www.wikidata.org/entity/Q712226', 'amount': '+20.99'}``"""
257 unit = unit_to_str(area.get('unit'))
258 if unit is not None:
259 try:
260 amount = float(area.get('amount'))
261 return '{} {}'.format(amount, unit)
262 except ValueError:
263 pass
264 return '{} {}'.format(area.get('amount', ''), area.get('unit', ''))