114):
115
116
117
118
119
120 url_fields = ["url", "iframe_src", "audio_src", "img_src", "thumbnail_src", "thumbnail"]
121
122 url_src: str
123
124 for field_name in url_fields:
125 url_src = getattr(result, field_name, "")
126 if not url_src:
127 continue
128
129 new_url = filter_func(result, field_name, url_src)
130
131 if isinstance(new_url, bool):
132 if new_url:
133
134 continue
135 log.debug("filter_urls: drop field %s URL %s", field_name, url_src)
136 new_url = None
137 else:
138 log.debug("filter_urls: modify field %s URL %s -> %s", field_name, url_src, new_url)
139
140 setattr(result, field_name, new_url)
141 if field_name == "url":
142
143 if not new_url:
144 result.parsed_url = None
145 elif isinstance(new_url, str):
146 result.parsed_url = urllib.parse.urlparse(new_url)
147
148
149
150
151
152
153 infobox_urls: list[dict[str, str]] = getattr(result, "urls", [])
154
155 if infobox_urls:
156
157 new_infobox_urls: list[dict[str, str]] = []
158
159 for item in infobox_urls:
160 url_src = item.get("url", "")
161 if not url_src:
162 new_infobox_urls.append(item)
163 continue
164
165 new_url = filter_func(result, "infobox_urls", url_src)
166 if isinstance(new_url, bool):
167 if new_url:
168 new_infobox_urls.append(item)
169
170 continue
171 log.debug("filter_urls: remove URL from field 'urls' ('infobox_urls') URL %s", url_src)
172 new_url = None
173 if new_url:
174 log.debug("filter_urls: modify URL from field 'urls' ('infobox_urls') URL %s -> %s", url_src, new_url)
175 item["url"] = new_url
176 new_infobox_urls.append(item)
177
178 setattr(result, "urls", new_infobox_urls)
179
180
181
182
183
184 infobox_attributes: list[dict[str, t.Any]] = getattr(result, "attributes", [])
185
186 if infobox_attributes:
187
188 new_infobox_attributes: list[dict[str, str | list[dict[str, str]]]] = []
189
190 for item in infobox_attributes:
191 image: dict[str, str] = item.get("image", {})
192 url_src = image.get("src", "")
193 if not url_src:
194 new_infobox_attributes.append(item)
195 continue
196
197 new_url = filter_func(result, "infobox_attributes", url_src)
198 if isinstance(new_url, bool):
199 if new_url:
200 new_infobox_attributes.append(item)
201
202 continue
203 log.debug("filter_urls: drop field 'image.src' ('infobox_attributes') URL %s", url_src)
204 new_url = None
205
206 if new_url:
207 log.debug(
208 "filter_urls: modify 'image.src' ('infobox_attributes') URL %s -> %s",
209 url_src,
210 new_url,
211 )
212 item["image"]["src"] = new_url
213 new_infobox_attributes.append(item)
214
215 setattr(result, "attributes", new_infobox_attributes)
216
217 result.normalize_result_fields()
218
219