diff --git a/stackalytics/processor/launchpad_utils.py b/stackalytics/processor/launchpad_utils.py index a07f6a08c..71a1851c8 100644 --- a/stackalytics/processor/launchpad_utils.py +++ b/stackalytics/processor/launchpad_utils.py @@ -14,6 +14,7 @@ # limitations under the License. from oslo_log import log as logging +import requests import six from stackalytics.processor import utils @@ -29,6 +30,8 @@ BUG_STATUSES = ['New', 'Incomplete', 'Opinion', 'Invalid', 'Won\'t Fix', LP_URI_V1 = 'https://api.launchpad.net/1.0/%s' LP_URI_DEVEL = 'https://api.launchpad.net/devel/%s' +launchpad_session = requests.Session() + def link_to_launchpad_id(link): return link[link.find('~') + 1:] @@ -37,7 +40,7 @@ def link_to_launchpad_id(link): def lp_profile_by_launchpad_id(launchpad_id): LOG.debug('Lookup user id %s at Launchpad', launchpad_id) uri = LP_URI_V1 % ('~' + launchpad_id) - lp_profile = utils.read_json_from_uri(uri) + lp_profile = utils.read_json_from_uri(uri, session=launchpad_session) utils.validate_lp_display_name(lp_profile) return lp_profile @@ -45,7 +48,7 @@ def lp_profile_by_launchpad_id(launchpad_id): def lp_profile_by_email(email): LOG.debug('Lookup user email %s at Launchpad', email) uri = LP_URI_V1 % ('people/?ws.op=getByEmail&email=' + email) - lp_profile = utils.read_json_from_uri(uri) + lp_profile = utils.read_json_from_uri(uri, session=launchpad_session) utils.validate_lp_display_name(lp_profile) return lp_profile @@ -63,7 +66,7 @@ def lp_blueprint_generator(module): uri = LP_URI_DEVEL % (module + '/all_specifications') while uri: LOG.debug('Reading chunk from uri %s', uri) - chunk = utils.read_json_from_uri(uri) + chunk = utils.read_json_from_uri(uri, session=launchpad_session) if not chunk: LOG.warn('No data was read from uri %s', uri) @@ -84,7 +87,7 @@ def lp_bug_generator(module, modified_since): while uri: LOG.debug('Reading chunk from uri %s', uri) - chunk = utils.read_json_from_uri(uri) + chunk = utils.read_json_from_uri(uri, session=launchpad_session) if not chunk: LOG.warn('No data was read from uri %s', uri) @@ -95,7 +98,8 @@ def lp_bug_generator(module, modified_since): related_tasks_uri = record['related_tasks_collection_link'] LOG.debug('Reading related task from uri %s', related_tasks_uri) - related_tasks = utils.read_json_from_uri(related_tasks_uri) + related_tasks = utils.read_json_from_uri(related_tasks_uri, + session=launchpad_session) if not related_tasks: LOG.warn('No data was read from uri %s', uri) elif related_tasks['entries']: diff --git a/stackalytics/processor/utils.py b/stackalytics/processor/utils.py index ad5011797..7aec825b8 100644 --- a/stackalytics/processor/utils.py +++ b/stackalytics/processor/utils.py @@ -109,33 +109,40 @@ def check_email_validity(email): user_agents = [ - 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64) Gecko/20100101 Firefox/32.0', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_6) AppleWebKit/537.78.2', - 'Mozilla/5.0 (Windows NT 6.3; WOW64) Gecko/20100101 Firefox/32.0', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X) Chrome/37.0.2062.120', + 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64) Gecko/20100101 Firefox/41.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9', + 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:41.0) Gecko/20100101 Firefox/41.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X) Chrome/45.0.2062.120', 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko' ] -def do_request(uri, method='get'): - with requests.Session() as session: - session.mount('file://', requests_file.FileAdapter()) - user_agent = random.choice(user_agents) +def _session_request(session, uri, method): + session.mount('file://', requests_file.FileAdapter()) + user_agent = random.choice(user_agents) - return session.request(method, uri, headers={'User-Agent': user_agent}) + return session.request(method, uri, headers={'User-Agent': user_agent}) -def read_uri(uri): +def do_request(uri, method='get', session=None): + if session: + return _session_request(session, uri, method) + else: + with requests.Session() as session: + return _session_request(session, uri, method) + + +def read_uri(uri, session=None): try: - return do_request(uri).text + return do_request(uri, session=session).text except Exception as e: LOG.warn('Error "%(error)s" retrieving uri %(uri)s', {'error': e, 'uri': uri}) -def read_json_from_uri(uri): +def read_json_from_uri(uri, session=None): try: - return do_request(uri).json() + return do_request(uri, session=session).json() except Exception as e: LOG.warn('Error "%(error)s" parsing json from uri %(uri)s', {'error': e, 'uri': uri})