.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
google_videos.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""This is the implementation of the Google Videos engine.
3
4.. admonition:: Content-Security-Policy (CSP)
5
6 This engine needs to allow images from the `data URLs`_ (prefixed with the
7 ``data:`` scheme)::
8
9 Header set Content-Security-Policy "img-src 'self' data: ;"
10
11.. _data URLs:
12 https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs
13"""
14from __future__ import annotations
15
16from urllib.parse import urlencode, urlparse, parse_qs
17from lxml import html
18
19from searx.utils import (
20 eval_xpath_list,
21 eval_xpath_getindex,
22 extract_text,
23)
24
25from searx.engines.google import fetch_traits # pylint: disable=unused-import
26from searx.engines.google import (
27 get_google_info,
28 time_range_dict,
29 filter_mapping,
30 suggestion_xpath,
31 detect_google_sorry,
32 ui_async,
33 parse_data_images,
34)
35from searx.enginelib.traits import EngineTraits
36from searx.utils import get_embeded_stream_url
37
38traits: EngineTraits
39
40# about
41about = {
42 "website": 'https://www.google.com',
43 "wikidata_id": 'Q219885',
44 "official_api_documentation": 'https://developers.google.com/custom-search',
45 "use_official_api": False,
46 "require_api_key": False,
47 "results": 'HTML',
48}
49
50# engine dependent config
51categories = ['videos', 'web']
52paging = True
53max_page = 50
54language_support = True
55time_range_support = True
56safesearch = True
57
58
59def request(query, params):
60 """Google-Video search request"""
61 google_info = get_google_info(params, traits)
62 start = (params['pageno'] - 1) * 10
63
64 query_url = (
65 'https://'
66 + google_info['subdomain']
67 + '/search'
68 + "?"
69 + urlencode(
70 {
71 'q': query,
72 'tbm': "vid",
73 'start': start,
74 **google_info['params'],
75 'asearch': 'arc',
76 'async': ui_async(start),
77 }
78 )
79 )
80
81 if params['time_range'] in time_range_dict:
82 query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]})
83 if 'safesearch' in params:
84 query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]})
85 params['url'] = query_url
86
87 params['cookies'] = google_info['cookies']
88 params['headers'].update(google_info['headers'])
89 return params
90
91
92def response(resp):
93 """Get response from google's search request"""
94 results = []
95
96 detect_google_sorry(resp)
97 data_image_map = parse_data_images(resp.text)
98
99 # convert the text to dom
100 dom = html.fromstring(resp.text)
101
102 result_divs = eval_xpath_list(dom, '//div[contains(@class, "MjjYud")]')
103
104 # parse results
105 for result in result_divs:
106 title = extract_text(
107 eval_xpath_getindex(result, './/h3[contains(@class, "LC20lb")]', 0, default=None), allow_none=True
108 )
109 url = eval_xpath_getindex(result, './/a[@jsname="UWckNb"]/@href', 0, default=None)
110 content = extract_text(
111 eval_xpath_getindex(result, './/div[contains(@class, "ITZIwc")]', 0, default=None), allow_none=True
112 )
113 pub_info = extract_text(
114 eval_xpath_getindex(result, './/div[contains(@class, "gqF9jc")]', 0, default=None), allow_none=True
115 )
116 # Broader XPath to find any <img> element
117 thumbnail = eval_xpath_getindex(result, './/img/@src', 0, default=None)
118 duration = extract_text(
119 eval_xpath_getindex(result, './/span[contains(@class, "k1U36b")]', 0, default=None), allow_none=True
120 )
121 video_id = eval_xpath_getindex(result, './/div[@jscontroller="rTuANe"]/@data-vid', 0, default=None)
122
123 # Fallback for video_id from URL if not found via XPath
124 if not video_id and url and 'youtube.com' in url:
125 parsed_url = urlparse(url)
126 video_id = parse_qs(parsed_url.query).get('v', [None])[0]
127
128 # Handle thumbnail
129 if thumbnail and thumbnail.startswith('data:image'):
130 img_id = eval_xpath_getindex(result, './/img/@id', 0, default=None)
131 if img_id and img_id in data_image_map:
132 thumbnail = data_image_map[img_id]
133 else:
134 thumbnail = None
135 if not thumbnail and video_id:
136 thumbnail = f"https://img.youtube.com/vi/{video_id}/hqdefault.jpg"
137
138 # Handle video embed URL
139 embed_url = None
140 if video_id:
141 embed_url = get_embeded_stream_url(f"https://www.youtube.com/watch?v={video_id}")
142 elif url:
143 embed_url = get_embeded_stream_url(url)
144
145 # Only append results with valid title and url
146 if title and url:
147 results.append(
148 {
149 'url': url,
150 'title': title,
151 'content': content or '',
152 'author': pub_info,
153 'thumbnail': thumbnail,
154 'length': duration,
155 'iframe_src': embed_url,
156 'template': 'videos.html',
157 }
158 )
159
160 # parse suggestion
161 for suggestion in eval_xpath_list(dom, suggestion_xpath):
162 results.append({'suggestion': extract_text(suggestion)})
163
164 return results