From dc4a2bfcb5146145ce4bceddcd3b105d963dfe2a Mon Sep 17 00:00:00 2001 From: Ilya Shakhat Date: Thu, 8 Oct 2015 18:15:30 +0300 Subject: [PATCH] Use requests.Session() to reuse connections to Launchpad Usage of Session() allows to reuse the same connection to HTTP server, making consequent requests faster (especially on HTTPS). This patch optimizes communication with Launchpad (since it allows keep-alived connections). But it doesn't do the same neigther for git.openstack.org (it accepts only 1 HTTP request per connection), nor for lists.openstack.org Change-Id: Id7ecc2de3b3604e5ace58e2d4c199a0378edfc44 --- stackalytics/processor/launchpad_utils.py | 14 ++++++---- stackalytics/processor/utils.py | 33 ++++++++++++++--------- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/stackalytics/processor/launchpad_utils.py b/stackalytics/processor/launchpad_utils.py index a07f6a08c..71a1851c8 100644 --- a/stackalytics/processor/launchpad_utils.py +++ b/stackalytics/processor/launchpad_utils.py @@ -14,6 +14,7 @@ # limitations under the License. from oslo_log import log as logging +import requests import six from stackalytics.processor import utils @@ -29,6 +30,8 @@ BUG_STATUSES = ['New', 'Incomplete', 'Opinion', 'Invalid', 'Won\'t Fix', LP_URI_V1 = 'https://api.launchpad.net/1.0/%s' LP_URI_DEVEL = 'https://api.launchpad.net/devel/%s' +launchpad_session = requests.Session() + def link_to_launchpad_id(link): return link[link.find('~') + 1:] @@ -37,7 +40,7 @@ def link_to_launchpad_id(link): def lp_profile_by_launchpad_id(launchpad_id): LOG.debug('Lookup user id %s at Launchpad', launchpad_id) uri = LP_URI_V1 % ('~' + launchpad_id) - lp_profile = utils.read_json_from_uri(uri) + lp_profile = utils.read_json_from_uri(uri, session=launchpad_session) utils.validate_lp_display_name(lp_profile) return lp_profile @@ -45,7 +48,7 @@ def lp_profile_by_launchpad_id(launchpad_id): def lp_profile_by_email(email): LOG.debug('Lookup user email %s at Launchpad', email) uri = LP_URI_V1 % ('people/?ws.op=getByEmail&email=' + email) - lp_profile = utils.read_json_from_uri(uri) + lp_profile = utils.read_json_from_uri(uri, session=launchpad_session) utils.validate_lp_display_name(lp_profile) return lp_profile @@ -63,7 +66,7 @@ def lp_blueprint_generator(module): uri = LP_URI_DEVEL % (module + '/all_specifications') while uri: LOG.debug('Reading chunk from uri %s', uri) - chunk = utils.read_json_from_uri(uri) + chunk = utils.read_json_from_uri(uri, session=launchpad_session) if not chunk: LOG.warn('No data was read from uri %s', uri) @@ -84,7 +87,7 @@ def lp_bug_generator(module, modified_since): while uri: LOG.debug('Reading chunk from uri %s', uri) - chunk = utils.read_json_from_uri(uri) + chunk = utils.read_json_from_uri(uri, session=launchpad_session) if not chunk: LOG.warn('No data was read from uri %s', uri) @@ -95,7 +98,8 @@ def lp_bug_generator(module, modified_since): related_tasks_uri = record['related_tasks_collection_link'] LOG.debug('Reading related task from uri %s', related_tasks_uri) - related_tasks = utils.read_json_from_uri(related_tasks_uri) + related_tasks = utils.read_json_from_uri(related_tasks_uri, + session=launchpad_session) if not related_tasks: LOG.warn('No data was read from uri %s', uri) elif related_tasks['entries']: diff --git a/stackalytics/processor/utils.py b/stackalytics/processor/utils.py index ad5011797..7aec825b8 100644 --- a/stackalytics/processor/utils.py +++ b/stackalytics/processor/utils.py @@ -109,33 +109,40 @@ def check_email_validity(email): user_agents = [ - 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64) Gecko/20100101 Firefox/32.0', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_6) AppleWebKit/537.78.2', - 'Mozilla/5.0 (Windows NT 6.3; WOW64) Gecko/20100101 Firefox/32.0', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X) Chrome/37.0.2062.120', + 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64) Gecko/20100101 Firefox/41.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9', + 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:41.0) Gecko/20100101 Firefox/41.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X) Chrome/45.0.2062.120', 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko' ] -def do_request(uri, method='get'): - with requests.Session() as session: - session.mount('file://', requests_file.FileAdapter()) - user_agent = random.choice(user_agents) +def _session_request(session, uri, method): + session.mount('file://', requests_file.FileAdapter()) + user_agent = random.choice(user_agents) - return session.request(method, uri, headers={'User-Agent': user_agent}) + return session.request(method, uri, headers={'User-Agent': user_agent}) -def read_uri(uri): +def do_request(uri, method='get', session=None): + if session: + return _session_request(session, uri, method) + else: + with requests.Session() as session: + return _session_request(session, uri, method) + + +def read_uri(uri, session=None): try: - return do_request(uri).text + return do_request(uri, session=session).text except Exception as e: LOG.warn('Error "%(error)s" retrieving uri %(uri)s', {'error': e, 'uri': uri}) -def read_json_from_uri(uri): +def read_json_from_uri(uri, session=None): try: - return do_request(uri).json() + return do_request(uri, session=session).json() except Exception as e: LOG.warn('Error "%(error)s" parsing json from uri %(uri)s', {'error': e, 'uri': uri})