Source code for gepyto.db.ensembl

# Utilities to interact with the Ensembl database.
# This module contains code strictly to interact with Ensembl and not to
# interpret the response or the underlying biology.

# This file is part of gepyto.
#
# This work is licensed under the Creative Commons Attribution-NonCommercial
# 4.0 International License. To view a copy of this license, visit
# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to Creative
# Commons, PO Box 1866, Mountain View, CA 94042, USA.


__author__ = "Marc-Andre Legault"
__copyright__ = ("Copyright 2014 Marc-Andre Legault and Louis-Philippe "
                 "Lemieux Perreault. All rights reserved.")
__license__ = "Attribution-NonCommercial 4.0 International (CC BY-NC 4.0)"


import contextlib
import json
import logging
import time

try:
    # Python 2 support
    from urllib2 import urlopen, HTTPError
except ImportError:
    # Python 3 support
    from urllib.request import urlopen, HTTPError


__all__ = ["query_ensembl", "get_url_prefix"]

LAST_QUERY = 0


def query_ensembl(url):
[docs] """Query the given (Ensembl rest api) url and get a json reponse. :param url: The API url to query. :type url: str :returns: A python object loaded from the JSON response from the server. """ global LAST_QUERY this_query_time = time.time() delta_t = this_query_time - LAST_QUERY LAST_QUERY = this_query_time try: with contextlib.closing(urlopen(url)) as stream: response = json.loads(stream.read().decode()) response_info = stream.info() limit = int(response_info["X-RateLimit-Limit"]) # Allowed / h reset = int(response_info["X-RateLimit-Reset"]) # Time to reset period = int(response_info["X-RateLimit-Period"]) remaining = int(response_info["X-RateLimit-Remaining"]) # Max time for request (s / request) to not exceed quota: max_t = 1.0 * reset / remaining if delta_t < max_t: time.sleep(max_t - delta_t + 0.5) # We add a buffer of 0.5s. except HTTPError as e: logging.warning("Request '{}' failed.".format(url)) logging.warning("[{}] {}".format(e.code, e.reason)) # If we busted we wait what they ask us to wait. if e.code == 429: sleep_time = float(e.info().getheader("Retry-After")) logging.warning("Waiting {}s before next Ensembl request (at " "the server's request).".format(sleep_time)) time.sleep(sleep_time) return query_ensembl(url) return None return response def get_url_prefix(build):
[docs] """Generate a Ensembl REST API URL prefix for the given build.""" if build.lower() in ("grch37", "hg19"): return "http://grch37.rest.ensembl.org/" elif build.lower() in ("grch38", "hg38"): return "http://rest.ensembl.org/" else: raise ValueError("Invalid build '{}'. Valid builds are: GRCh37 and " "GRCh38.".format(build))