67def extract_video_data(video_block):
68 try:
69 data_exposure_log = video_block.get('data-exposure-log')
70 video_data = json.loads(data_exposure_log)
71
72 content_id = video_data.get("content_id", "")
73 title = video_data.get("title", "")
74
75 url = f"{base_url}/v/ac{content_id}"
76 iframe_src = f"{base_url}/player/ac{content_id}"
77
78 create_time = extract_text(video_block.xpath('.//span[contains(@class, "info__create-time")]'))
79 video_cover = extract_text(video_block.xpath('.//div[contains(@class, "video__cover")]/a/img/@src')[0])
80 video_duration = extract_text(video_block.xpath('.//span[contains(@class, "video__duration")]'))
81 video_intro = extract_text(video_block.xpath('.//div[contains(@class, "video__main__intro")]'))
82
83 published_date = None
84 if create_time:
85 try:
86 published_date = datetime.strptime(create_time.strip(), "%Y-%m-%d")
87 except (ValueError, TypeError):
88 pass
89
90 length = None
91 if video_duration:
92 try:
93 timediff = datetime.strptime(video_duration.strip(), "%M:%S")
94 length = timedelta(minutes=timediff.minute, seconds=timediff.second)
95 except (ValueError, TypeError):
96 pass
97
98 return {
99 "title": title,
100 "url": url,
101 "content": video_intro,
102 "thumbnail": video_cover,
103 "length": length,
104 "publishedDate": published_date,
105 "iframe_src": iframe_src,
106 }
107
108 except (json.JSONDecodeError, AttributeError, TypeError, ValueError):
109 return None