#!/usr/bin/env python3 import sys from operator import itemgetter import feedparser from ruamel.yaml import YAML API_URL = "http://export.arxiv.org/api/query" DOI_URL = "http://dx.doi.org" def author_query(author): """Return an Arxiv query fragment for an author. Parameters ---------- author: tuple (firstname, surname) """ return "au:" + "_".join(reversed(author)) def search(author=(), max_results=100): """Return all articles written by the author on Arxiv. Parameters ---------- author: tuple (firstname, surname) Returns ------- Parsed Atom feed of articles """ url = "{}?search_query={}&max_results={}".format( API_URL, author_query(author), max_results ) return feedparser.parse(url) def extract_publication(feed_article): pub = dict() pub["title"] = feed_article.title pub["authors"] = [a.name for a in feed_article.authors] pub["abstract"] = feed_article.summary pub["date"] = feed_article.date pub["link"] = feed_article.link pub["ref"] = feed_article.link.split("/")[-1] try: pub["jref"] = feed_article.arxiv_journal_ref pub["jlink"] = "/".join((DOI_URL, feed_article.arxiv_doi)) except AttributeError: pass return pub def main(): feed = search("Joseph Weston".split()) publications = sorted( map(extract_publication, feed.entries), key=itemgetter("date"), reverse=True ) YAML().dump(publications, sys.stdout) if __name__ == "__main__": main()