114def _filter_urls(result: Result | LegacyResult, filter_func: Callable[[Result | LegacyResult, str, str], str | bool]):
115
116
117
118
119
120 url_fields = ["url", "iframe_src", "audio_src", "img_src", "thumbnail_src", "thumbnail"]
121
122 for field_name in url_fields:
123 url_src = getattr(result, field_name, "")
124 if not url_src:
125 continue
126
127 new_url = filter_func(result, field_name, url_src)
128
129 if isinstance(new_url, bool):
130 if new_url:
131
132 continue
133 log.debug("filter_urls: drop field %s URL %s", field_name, url_src)
134 new_url = None
135 else:
136 log.debug("filter_urls: modify field %s URL %s -> %s", field_name, url_src, new_url)
137
138 setattr(result, field_name, new_url)
139 if field_name == "url":
140
141 if not new_url:
142 result.parsed_url = None
143 elif isinstance(new_url, str):
144 result.parsed_url = urllib.parse.urlparse(new_url)
145
146
147
148
149
150
151 infobox_urls: list[dict[str, str]] = getattr(result, "urls", [])
152
153 if infobox_urls:
154
155 new_infobox_urls: list[dict[str, str]] = []
156
157 for item in infobox_urls:
158 url_src = item.get("url")
159 if not url_src:
160 new_infobox_urls.append(item)
161 continue
162
163 new_url = filter_func(result, "infobox_urls", url_src)
164 if isinstance(new_url, bool):
165 if new_url:
166 new_infobox_urls.append(item)
167
168 continue
169 log.debug("filter_urls: remove URL from field 'urls' ('infobox_urls') URL %s", url_src)
170 new_url = None
171 if new_url:
172 log.debug("filter_urls: modify URL from field 'urls' ('infobox_urls') URL %s -> %s", url_src, new_url)
173 item["url"] = new_url
174 new_infobox_urls.append(item)
175
176 setattr(result, "urls", new_infobox_urls)
177
178
179
180
181
182 infobox_attributes: list[dict[str, dict]] = getattr(result, "attributes", [])
183
184 if infobox_attributes:
185
186 new_infobox_attributes: list[dict[str, dict]] = []
187
188 for item in infobox_attributes:
189 image = item.get("image", {})
190 url_src = image.get("src", "")
191 if not url_src:
192 new_infobox_attributes.append(item)
193 continue
194
195 new_url = filter_func(result, "infobox_attributes", url_src)
196 if isinstance(new_url, bool):
197 if new_url:
198 new_infobox_attributes.append(item)
199
200 continue
201 log.debug("filter_urls: drop field 'image.src' ('infobox_attributes') URL %s", url_src)
202 new_url = None
203
204 if new_url:
205 log.debug(
206 "filter_urls: modify 'image.src' ('infobox_attributes') URL %s -> %s",
207 url_src,
208 new_url,
209 )
210 item["image"]["src"] = new_url
211 new_infobox_attributes.append(item)
212
213 setattr(result, "attributes", new_infobox_attributes)
214
215 result.normalize_result_fields()
216
217