.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
ina.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""
3 INA (Videos)
4"""
5
6from html import unescape
7from urllib.parse import urlencode
8from lxml import html
9from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
10
11# about
12about = {
13 "website": 'https://www.ina.fr/',
14 "wikidata_id": 'Q1665109',
15 "official_api_documentation": None,
16 "use_official_api": False,
17 "require_api_key": False,
18 "results": 'HTML',
19 "language": 'fr',
20}
21
22# engine dependent config
23categories = ['videos']
24paging = True
25page_size = 12
26
27# search-url
28base_url = 'https://www.ina.fr'
29search_url = base_url + '/ajax/recherche?{query}&espace=1&sort=pertinence&order=desc&offset={start}&modified=size'
30
31# specific xpath variables
32results_xpath = '//div[@id="searchHits"]/div'
33url_xpath = './/a/@href'
34title_xpath = './/div[contains(@class,"title-bloc-small")]'
35content_xpath = './/div[contains(@class,"sous-titre-fonction")]'
36thumbnail_xpath = './/img/@data-src'
37publishedDate_xpath = './/div[contains(@class,"dateAgenda")]'
38
39
40# do search-request
41def request(query, params):
42 params['url'] = search_url.format(start=params['pageno'] * page_size, query=urlencode({'q': query}))
43 return params
44
45
46# get response from search-request
47def response(resp):
48 results = []
49
50 # we get html in a JSON container...
51 dom = html.fromstring(resp.text)
52
53 # parse results
54 for result in eval_xpath_list(dom, results_xpath):
55 url_relative = eval_xpath_getindex(result, url_xpath, 0)
56 url = base_url + url_relative
57 title = unescape(extract_text(eval_xpath(result, title_xpath)))
58 thumbnail = extract_text(eval_xpath(result, thumbnail_xpath))
59 content = extract_text(eval_xpath(result, publishedDate_xpath)) + extract_text(
60 eval_xpath(result, content_xpath)
61 )
62
63 # append result
64 results.append(
65 {
66 'url': url,
67 'title': title,
68 'content': content,
69 'template': 'videos.html',
70 'thumbnail': thumbnail,
71 }
72 )
73
74 # return results
75 return results
request(query, params)
Definition ina.py:41
response(resp)
Definition ina.py:47