22e9c45d |
#!/usr/bin/env python3
import sys
from operator import itemgetter
import feedparser
from ruamel.yaml import YAML
API_URL = "http://export.arxiv.org/api/query"
DOI_URL = "http://dx.doi.org"
def author_query(author):
"""Return an Arxiv query fragment for an author.
Parameters
----------
author: tuple
(firstname, surname)
"""
return "au:" + "_".join(reversed(author))
def search(author=(), max_results=100):
"""Return all articles written by the author on Arxiv.
Parameters
----------
author: tuple
(firstname, surname)
Returns
-------
Parsed Atom feed of articles
"""
url = "{}?search_query={}&max_results={}".format(
API_URL, author_query(author), max_results
)
return feedparser.parse(url)
def extract_publication(feed_article):
pub = dict()
pub["title"] = feed_article.title
pub["authors"] = [a.name for a in feed_article.authors]
pub["abstract"] = feed_article.summary
pub["date"] = feed_article.date
pub["link"] = feed_article.link
pub["ref"] = feed_article.link.split("/")[-1]
try:
pub["jref"] = feed_article.arxiv_journal_ref
pub["jlink"] = "/".join((DOI_URL, feed_article.arxiv_doi))
except AttributeError:
pass
return pub
def main():
feed = search("Joseph Weston".split())
publications = sorted(
map(extract_publication, feed.entries), key=itemgetter("date"), reverse=True
)
YAML().dump(publications, sys.stdout)
if __name__ == "__main__":
main()
|