39 matches = re.findall(
r'bigPipe\.onPageletArrive\((\{.*?\})\);', resp.text, re.DOTALL)
45 json_data = json.loads(match)
46 raw_html = json_data.get(
"html",
"")
50 tree = html.fromstring(raw_html)
52 video_blocks = tree.xpath(
'//div[contains(@class, "search-video")]')
56 for video_block
in video_blocks:
58 if video_info
and video_info[
"title"]
and video_info[
"url"]:
59 results.append(video_info)
61 except json.JSONDecodeError:
69 data_exposure_log = video_block.get(
'data-exposure-log')
70 video_data = json.loads(data_exposure_log)
72 content_id = video_data.get(
"content_id",
"")
73 title = video_data.get(
"title",
"")
75 url = f
"{base_url}/v/ac{content_id}"
76 iframe_src = f
"{base_url}/player/ac{content_id}"
78 create_time = extract_text(video_block.xpath(
'.//span[contains(@class, "info__create-time")]'))
79 video_cover = extract_text(video_block.xpath(
'.//div[contains(@class, "video__cover")]/a/img/@src')[0])
80 video_duration = extract_text(video_block.xpath(
'.//span[contains(@class, "video__duration")]'))
81 video_intro = extract_text(video_block.xpath(
'.//div[contains(@class, "video__main__intro")]'))
86 published_date = datetime.strptime(create_time.strip(),
"%Y-%m-%d")
87 except (ValueError, TypeError):
93 timediff = datetime.strptime(video_duration.strip(),
"%M:%S")
94 length = timedelta(minutes=timediff.minute, seconds=timediff.second)
95 except (ValueError, TypeError):
101 "content": video_intro,
102 "thumbnail": video_cover,
104 "publishedDate": published_date,
105 "iframe_src": iframe_src,
108 except (json.JSONDecodeError, AttributeError, TypeError, ValueError):