41 start_tag =
'window.MESON.initialState = {'
44 dom = html.fromstring(resp.text)
45 script = utils.eval_xpath_getindex(dom,
'//script', 0, default=
None).text
47 pos = script.index(start_tag) + len(start_tag) - 1
49 pos = script.index(end_tag) + len(end_tag) - 1
52 json_resp = utils.js_variable_to_python(script)
56 for item
in json_resp[
'search'][
'webResults'][
'results']:
58 pubdate_original = item.get(
'pubdate_original')
60 pubdate_original = dateutil.parser.parse(pubdate_original)
61 metadata = [item.get(field)
for field
in [
'category_l1',
'catsy']
if item.get(field)]
65 "url": item[
'url'].split(
'&ueid')[0],
66 "title": item[
'title'],
67 "content": item[
'abstract'],
68 "publishedDate": pubdate_original,
70 "metadata":
' | '.join(metadata),