42 start_tag =
'window.MESON.initialState = {'
45 dom = html.fromstring(resp.text)
46 script = utils.eval_xpath_getindex(dom,
'//script', 0, default=
None).text
48 pos = script.index(start_tag) + len(start_tag) - 1
50 pos = script.index(end_tag) + len(end_tag) - 1
53 json_resp = utils.js_variable_to_python(script)
57 for item
in json_resp[
'search'][
'webResults'][
'results']:
59 pubdate_original = item.get(
'pubdate_original')
61 pubdate_original = dateutil.parser.parse(pubdate_original)
62 metadata = [item.get(field)
for field
in [
'category_l1',
'catsy']
if item.get(field)]
66 "url": item[
'url'].split(
'&ueid')[0],
67 "title": item[
'title'],
68 "content": item[
'abstract'],
69 "publishedDate": pubdate_original,
71 "metadata":
' | '.join(metadata),