.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
google_videos.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""This is the implementation of the Google Videos engine.
3
4.. admonition:: Content-Security-Policy (CSP)
5
6 This engine needs to allow images from the `data URLs`_ (prefixed with the
7 ``data:`` scheme)::
8
9 Header set Content-Security-Policy "img-src 'self' data: ;"
10
11.. _data URLs:
12 https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs
13
14"""
15from __future__ import annotations
16
17from typing import TYPE_CHECKING
18
19from urllib.parse import urlencode
20from lxml import html
21
22from searx.utils import (
23 eval_xpath,
24 eval_xpath_list,
25 eval_xpath_getindex,
26 extract_text,
27)
28
29from searx.engines.google import fetch_traits # pylint: disable=unused-import
30from searx.engines.google import (
31 get_google_info,
32 time_range_dict,
33 filter_mapping,
34 suggestion_xpath,
35 detect_google_sorry,
36 ui_async,
37 parse_data_images,
38)
39from searx.enginelib.traits import EngineTraits
40from searx.utils import get_embeded_stream_url
41
42if TYPE_CHECKING:
43 import logging
44
45 logger: logging.Logger
46
47traits: EngineTraits
48
49# about
50about = {
51 "website": 'https://www.google.com',
52 "wikidata_id": 'Q219885',
53 "official_api_documentation": 'https://developers.google.com/custom-search',
54 "use_official_api": False,
55 "require_api_key": False,
56 "results": 'HTML',
57}
58
59# engine dependent config
60
61categories = ['videos', 'web']
62paging = True
63max_page = 50
64"""`Google: max 50 pages`
65
66.. _Google: max 50 pages: https://github.com/searxng/searxng/issues/2982
67"""
68language_support = True
69time_range_support = True
70safesearch = True
71
72
73def request(query, params):
74 """Google-Video search request"""
75
76 google_info = get_google_info(params, traits)
77 start = (params['pageno'] - 1) * 10
78
79 query_url = (
80 'https://'
81 + google_info['subdomain']
82 + '/search'
83 + "?"
84 + urlencode(
85 {
86 'q': query,
87 'tbm': "vid",
88 'start': 10 * params['pageno'],
89 **google_info['params'],
90 'asearch': 'arc',
91 'async': ui_async(start),
92 }
93 )
94 )
95
96 if params['time_range'] in time_range_dict:
97 query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]})
98 if 'safesearch' in params:
99 query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]})
100 params['url'] = query_url
101
102 params['cookies'] = google_info['cookies']
103 params['headers'].update(google_info['headers'])
104 return params
105
106
107def response(resp):
108 """Get response from google's search request"""
109 results = []
110
111 detect_google_sorry(resp)
112 data_image_map = parse_data_images(resp.text)
113
114 # convert the text to dom
115 dom = html.fromstring(resp.text)
116
117 # parse results
118 for result in eval_xpath_list(dom, '//div[contains(@class, "g ")]'):
119
120 thumbnail = eval_xpath_getindex(result, './/img/@src', 0, None)
121 if thumbnail:
122 if thumbnail.startswith('data:image'):
123 img_id = eval_xpath_getindex(result, './/img/@id', 0, None)
124 if img_id:
125 thumbnail = data_image_map.get(img_id)
126 else:
127 thumbnail = None
128
129 title = extract_text(eval_xpath_getindex(result, './/a/h3[1]', 0))
130 url = eval_xpath_getindex(result, './/a/h3[1]/../@href', 0)
131
132 c_node = eval_xpath_getindex(result, './/div[contains(@class, "ITZIwc")]', 0)
133 content = extract_text(c_node)
134 pub_info = extract_text(eval_xpath(result, './/div[contains(@class, "gqF9jc")]'))
135
136 results.append(
137 {
138 'url': url,
139 'title': title,
140 'content': content,
141 'author': pub_info,
142 'thumbnail': thumbnail,
143 'iframe_src': get_embeded_stream_url(url),
144 'template': 'videos.html',
145 }
146 )
147
148 # parse suggestion
149 for suggestion in eval_xpath_list(dom, suggestion_xpath):
150 # append suggestion
151 results.append({'suggestion': extract_text(suggestion)})
152
153 return results