.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
tineye.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""This engine implements *Tineye - reverse image search*
3
4Using TinEye, you can search by image or perform what we call a reverse image
5search. You can do that by uploading an image or searching by URL. You can also
6simply drag and drop your images to start your search. TinEye constantly crawls
7the web and adds images to its index. Today, the TinEye index is over 50.2
8billion images `[tineye.com] <https://tineye.com/how>`_.
9
10.. hint::
11
12 This SearXNG engine only supports *'searching by URL'* and it does not use
13 the official API `[api.tineye.com] <https://api.tineye.com/python/docs/>`_.
14
15"""
16
17from urllib.parse import urlencode
18from datetime import datetime
19from flask_babel import gettext
20
21from searx.result_types import EngineResults
22
23about = {
24 "website": 'https://tineye.com',
25 "wikidata_id": 'Q2382535',
26 "official_api_documentation": 'https://api.tineye.com/python/docs/',
27 "use_official_api": False,
28 "require_api_key": False,
29 "results": 'JSON',
30}
31
32engine_type = 'online_url_search'
33""":py:obj:`searx.search.processors.online_url_search`"""
34
35categories = ['general']
36paging = True
37safesearch = False
38base_url = 'https://tineye.com'
39search_string = '/api/v1/result_json/?page={page}&{query}'
40
41FORMAT_NOT_SUPPORTED = gettext(
42 "Could not read that image url. This may be due to an unsupported file"
43 " format. TinEye only supports images that are JPEG, PNG, GIF, BMP, TIFF or WebP."
44)
45"""TinEye error message"""
46
47NO_SIGNATURE_ERROR = gettext(
48 "The image is too simple to find matches. TinEye requires a basic level of"
49 " visual detail to successfully identify matches."
50)
51"""TinEye error message"""
52
53DOWNLOAD_ERROR = gettext("The image could not be downloaded.")
54"""TinEye error message"""
55
56
57def request(query, params):
58 """Build TinEye HTTP request using ``search_urls`` of a :py:obj:`engine_type`."""
59
60 params['raise_for_httperror'] = False
61
62 if params['search_urls']['data:image']:
63 query = params['search_urls']['data:image']
64 elif params['search_urls']['http']:
65 query = params['search_urls']['http']
66
67 logger.debug("query URL: %s", query)
68 query = urlencode({'url': query})
69
70 # see https://github.com/TinEye/pytineye/blob/main/pytineye/api.py
71 params['url'] = base_url + search_string.format(query=query, page=params['pageno'])
72
73 params['headers'].update(
74 {
75 'Connection': 'keep-alive',
76 'Accept-Encoding': 'gzip, defalte, br',
77 'Host': 'tineye.com',
78 'DNT': '1',
79 'TE': 'trailers',
80 }
81 )
82 return params
83
84
85def parse_tineye_match(match_json):
86 """Takes parsed JSON from the API server and turns it into a :py:obj:`dict`
87 object.
88
89 Attributes `(class Match) <https://github.com/TinEye/pytineye/blob/main/pytineye/api.py>`__
90
91 - `image_url`, link to the result image.
92 - `domain`, domain this result was found on.
93 - `score`, a number (0 to 100) that indicates how closely the images match.
94 - `width`, image width in pixels.
95 - `height`, image height in pixels.
96 - `size`, image area in pixels.
97 - `format`, image format.
98 - `filesize`, image size in bytes.
99 - `overlay`, overlay URL.
100 - `tags`, whether this match belongs to a collection or stock domain.
101
102 - `backlinks`, a list of Backlink objects pointing to the original websites
103 and image URLs. List items are instances of :py:obj:`dict`, (`Backlink
104 <https://github.com/TinEye/pytineye/blob/main/pytineye/api.py>`__):
105
106 - `url`, the image URL to the image.
107 - `backlink`, the original website URL.
108 - `crawl_date`, the date the image was crawled.
109
110 """
111
112 # HINT: there exists an alternative backlink dict in the domains list / e.g.::
113 #
114 # match_json['domains'][0]['backlinks']
115
116 backlinks = []
117 if "backlinks" in match_json:
118
119 for backlink_json in match_json["backlinks"]:
120 if not isinstance(backlink_json, dict):
121 continue
122
123 crawl_date = backlink_json.get("crawl_date")
124 if crawl_date:
125 crawl_date = datetime.strptime(crawl_date, '%Y-%m-%d')
126 else:
127 crawl_date = datetime.min
128
129 backlinks.append(
130 {
131 'url': backlink_json.get("url"),
132 'backlink': backlink_json.get("backlink"),
133 'crawl_date': crawl_date,
134 'image_name': backlink_json.get("image_name"),
135 }
136 )
137
138 return {
139 'image_url': match_json.get("image_url"),
140 'domain': match_json.get("domain"),
141 'score': match_json.get("score"),
142 'width': match_json.get("width"),
143 'height': match_json.get("height"),
144 'size': match_json.get("size"),
145 'image_format': match_json.get("format"),
146 'filesize': match_json.get("filesize"),
147 'overlay': match_json.get("overlay"),
148 'tags': match_json.get("tags"),
149 'backlinks': backlinks,
150 }
151
152
153def response(resp) -> EngineResults:
154 """Parse HTTP response from TinEye."""
155 results = EngineResults()
156
157 # handle the 422 client side errors, and the possible 400 status code error
158 if resp.status_code in (400, 422):
159 json_data = resp.json()
160 suggestions = json_data.get('suggestions', {})
161 message = f'HTTP Status Code: {resp.status_code}'
162
163 if resp.status_code == 422:
164 s_key = suggestions.get('key', '')
165 if s_key == "Invalid image URL":
166 # test https://docs.searxng.org/_static/searxng-wordmark.svg
167 message = FORMAT_NOT_SUPPORTED
168 elif s_key == 'NO_SIGNATURE_ERROR':
169 # test https://pngimg.com/uploads/dot/dot_PNG4.png
170 message = NO_SIGNATURE_ERROR
171 elif s_key == 'Download Error':
172 # test https://notexists
173 message = DOWNLOAD_ERROR
174 else:
175 logger.warning("Unknown suggestion key encountered: %s", s_key)
176 else: # 400
177 description = suggestions.get('description')
178 if isinstance(description, list):
179 message = ','.join(description)
180
181 # see https://github.com/searxng/searxng/pull/1456#issuecomment-1193105023
182 # results.add(results.types.Answer(answer=message))
183 logger.info(message)
184 return results
185
186 # Raise for all other responses
187 resp.raise_for_status()
188
189 json_data = resp.json()
190
191 for match_json in json_data['matches']:
192
193 tineye_match = parse_tineye_match(match_json)
194 if not tineye_match['backlinks']:
195 continue
196
197 backlink = tineye_match['backlinks'][0]
198 results.append(
199 {
200 'template': 'images.html',
201 'url': backlink['backlink'],
202 'thumbnail_src': tineye_match['image_url'],
203 'source': backlink['url'],
204 'title': backlink['image_name'],
205 'img_src': backlink['url'],
206 'format': tineye_match['image_format'],
207 'width': tineye_match['width'],
208 'height': tineye_match['height'],
209 'publishedDate': backlink['crawl_date'],
210 }
211 )
212
213 # append number of results
214
215 number_of_results = json_data.get('num_matches')
216 if number_of_results:
217 results.append({'number_of_results': number_of_results})
218
219 return results
parse_tineye_match(match_json)
Definition tineye.py:85
request(query, params)
Definition tineye.py:57
EngineResults response(resp)
Definition tineye.py:153