.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
yahoo_news.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""Yahoo (News)
3
4Yahoo News is "English only" and do not offer localized nor language queries.
5
6"""
7
8# pylint: disable=invalid-name
9
10import re
11from urllib.parse import urlencode
12from datetime import datetime, timedelta
13from dateutil import parser
14from lxml import html
15
16from searx.utils import (
17 eval_xpath_list,
18 eval_xpath_getindex,
19 extract_text,
20)
21
22from searx.engines.yahoo import parse_url
23
24# about
25about = {
26 "website": 'https://news.yahoo.com',
27 "wikidata_id": 'Q3044717',
28 "official_api_documentation": 'https://developer.yahoo.com/api/',
29 "use_official_api": False,
30 "require_api_key": False,
31 "results": 'HTML',
32}
33
34language_support = False
35time_range_support = False
36safesearch = False
37paging = True
38categories = ['news']
39
40# search-url
41search_url = (
42 # fmt: off
43 'https://news.search.yahoo.com/search'
44 '?{query}&b={offset}'
45 # fmt: on
46)
47
48AGO_RE = re.compile(r'([0-9]+)\s*(year|month|week|day|minute|hour)')
49AGO_TIMEDELTA = {
50 'minute': timedelta(minutes=1),
51 'hour': timedelta(hours=1),
52 'day': timedelta(days=1),
53 'week': timedelta(days=7),
54 'month': timedelta(days=30),
55 'year': timedelta(days=365),
56}
57
58
59def request(query, params):
60 offset = (params['pageno'] - 1) * 10 + 1
61
62 params['url'] = search_url.format(offset=offset, query=urlencode({'p': query}))
63 logger.debug("query_url --> %s", params['url'])
64 return params
65
66
67def response(resp):
68 results = []
69 dom = html.fromstring(resp.text)
70
71 # parse results
72 for result in eval_xpath_list(dom, '//ol[contains(@class,"searchCenterMiddle")]//li'):
73
74 url = eval_xpath_getindex(result, './/h4/a/@href', 0, None)
75 if url is None:
76 continue
77 url = parse_url(url)
78 title = extract_text(result.xpath('.//h4/a'))
79 content = extract_text(result.xpath('.//p'))
80 img_src = eval_xpath_getindex(result, './/img/@data-src', 0, None)
81
82 item = {'url': url, 'title': title, 'content': content, 'img_src': img_src}
83
84 pub_date = extract_text(result.xpath('.//span[contains(@class,"s-time")]'))
85 ago = AGO_RE.search(pub_date)
86 if ago:
87 number = int(ago.group(1))
88 delta = AGO_TIMEDELTA[ago.group(2)]
89 pub_date = datetime.now() - delta * number
90 else:
91 try:
92 pub_date = parser.parse(pub_date)
93 except parser.ParserError:
94 pub_date = None
95
96 if pub_date is not None:
97 item['publishedDate'] = pub_date
98 results.append(item)
99
100 for suggestion in eval_xpath_list(dom, '//div[contains(@class,"AlsoTry")]//td'):
101 results.append({'suggestion': extract_text(suggestion)})
102
103 return results
request(query, params)
Definition yahoo_news.py:59