scripts/fetch-publications.py
22e9c45d
 #!/usr/bin/env python3
 
 import sys
 
 from operator import itemgetter
 import feedparser
 from ruamel.yaml import YAML
 
 API_URL = "http://export.arxiv.org/api/query"
 DOI_URL = "http://dx.doi.org"
 
 
 def author_query(author):
     """Return an Arxiv query fragment for an author.
 
     Parameters
     ----------
     author: tuple
         (firstname, surname)
     """
     return "au:" + "_".join(reversed(author))
 
 
 def search(author=(), max_results=100):
     """Return all articles written by the author on Arxiv.
 
     Parameters
     ----------
     author: tuple
         (firstname, surname)
 
     Returns
     -------
     Parsed Atom feed of articles
     """
     url = "{}?search_query={}&max_results={}".format(
         API_URL, author_query(author), max_results
     )
     return feedparser.parse(url)
 
 
 def extract_publication(feed_article):
     pub = dict()
     pub["title"] = feed_article.title
     pub["authors"] = [a.name for a in feed_article.authors]
     pub["abstract"] = feed_article.summary
     pub["date"] = feed_article.date
     pub["link"] = feed_article.link
     pub["ref"] = feed_article.link.split("/")[-1]
     try:
         pub["jref"] = feed_article.arxiv_journal_ref
         pub["jlink"] = "/".join((DOI_URL, feed_article.arxiv_doi))
     except AttributeError:
         pass
 
     return pub
 
 
 def main():
     feed = search("Joseph Weston".split())
     publications = sorted(
         map(extract_publication, feed.entries), key=itemgetter("date"), reverse=True
     )
     YAML().dump(publications, sys.stdout)
 
 
 if __name__ == "__main__":
     main()