diff --git a/dashboard/templates/commits.html b/dashboard/templates/commits.html index 879065ed8..22258de80 100644 --- a/dashboard/templates/commits.html +++ b/dashboard/templates/commits.html @@ -34,9 +34,9 @@
{% if rec.launchpad_id %} - {{ link('/engineers/' + rec.launchpad_id, rec.author) }} + {{ link('/engineers/' + rec.launchpad_id, rec.author_name) }} {% else %} - {{ rec.author }} + {{ rec.author_name }} {% endif %} {% if rec.company %} ( diff --git a/dashboard/templates/layout.html b/dashboard/templates/layout.html index 6ebdd2bc1..413df6003 100644 --- a/dashboard/templates/layout.html +++ b/dashboard/templates/layout.html @@ -282,19 +282,17 @@
diff --git a/dashboard/templates/module_details.html b/dashboard/templates/module_details.html index 6ebc8207f..a1fb2fcbf 100644 --- a/dashboard/templates/module_details.html +++ b/dashboard/templates/module_details.html @@ -44,9 +44,9 @@
{% if rec.launchpad_id %} - {{ rec.author|link('/engineers/' + rec.launchpad_id)|safe }} + {{ rec.author_name|link('/engineers/' + rec.launchpad_id)|safe }} {% else %} - {{ rec.author }} + {{ rec.author_name }} {% endif %} {% if rec.company_name %} ( diff --git a/dashboard/web.py b/dashboard/web.py index a847d19c1..39861bf4d 100644 --- a/dashboard/web.py +++ b/dashboard/web.py @@ -124,7 +124,7 @@ def init_project_types(vault): for repo in persistent_storage_inst.find('repos'): project_type = repo['project_type'].lower() project_group = None - if 'project_group' in repo: + if ('project_group' in repo) and (repo['project_group']): project_group = repo['project_group'].lower() if project_type in project_type_options: @@ -158,6 +158,12 @@ def get_project_type_options(): return get_vault()['project_type_options'] +def get_release_options(): + releases = list((get_vault()['persistent_storage']).find('releases'))[1:] + releases.reverse() + return releases + + def is_project_type_valid(project_type): if not project_type: return False @@ -337,13 +343,17 @@ def templated(template=None, return_code=200): releases = vault['releases'] if release: release = release.lower() - if release not in releases: - release = None - else: - release = releases[release]['release_name'] + if release != 'all': + if release not in releases: + release = None + else: + release = releases[release]['release_name'] ctx['release'] = (release or get_default('release')).lower() ctx['project_type_options'] = get_project_type_options() + ctx['release_options'] = get_release_options() + ctx['metric_options'] = sorted(METRIC_LABELS.items(), + key=lambda x: x[0]) return flask.render_template(template_name, **ctx), return_code diff --git a/etc/default_data.json b/etc/default_data.json index 322d99ae4..e34974f04 100644 --- a/etc/default_data.json +++ b/etc/default_data.json @@ -14053,339 +14053,56 @@ } ] }, - - { - "branches": ["master"], - "module": "devstack", - "project_type": "openstack", - "project_group": "other", - "uri": "git://github.com/openstack-dev/devstack.git", - "releases": [ - { - "release_name": "Havana", - "tag_from": "896eb66", - "tag_to": "HEAD" - } - ] - }, - { - "branches": ["master"], - "module": "tempest", - "project_type": "openstack", - "project_group": "other", - "uri": "git://github.com/openstack/tempest.git", - "releases": [ - { - "release_name": "Havana", - "tag_from": "017e95c", - "tag_to": "HEAD" - } - ] - }, - { - "branches": ["master"], - "module": "requirements", - "project_type": "openstack", - "project_group": "other", - "uri": "git://github.com/openstack/requirements.git", - "releases": [ - { - "release_name": "Havana", - "tag_from": "aea036d", - "tag_to": "HEAD" - } - ] - }, - { "branches": ["master"], "module": "trove", "project_type": "openstack", "project_group": "incubation", - "uri": "git://github.com/openstack/trove.git", - "releases": [ - { - "release_name": "Havana", - "tag_from": "ca978a0f", - "tag_to": "HEAD" - } - ] + "uri": "git://github.com/openstack/trove.git" }, { "branches": ["master"], "module": "trove-integration", "project_type": "openstack", "project_group": "incubation", - "uri": "git://github.com/openstack/trove-integration.git", - "releases": [ - { - "release_name": "Havana", - "tag_from": "bac5b1b", - "tag_to": "HEAD" - } - ] + "uri": "git://github.com/openstack/trove-integration.git" }, { "branches": ["master"], "module": "python-troveclient", "project_type": "openstack", "project_group": "incubation", - "uri": "git://github.com/openstack/python-troveclient.git", - "releases": [ - { - "release_name": "Havana", - "tag_from": "6222dea", - "tag_to": "HEAD" - } - ] + "uri": "git://github.com/openstack/python-troveclient.git" }, { "branches": ["master"], "module": "ironic", "project_type": "openstack", "project_group": "incubation", - "uri": "git://github.com/openstack/ironic.git", - "releases": [ - { - "release_name": "Havana", - "tag_from": "8e05dbf", - "tag_to": "HEAD" - } - ] - }, + "uri": "git://github.com/openstack/ironic.git" + } + ], + "project_types": [ { - "branches": ["master"], - "module": "savanna", - "project_type": "stackforge", - "uri": "git://github.com/stackforge/savanna.git", - "releases": [ - { - "release_name": "Havana", - "tag_from": "0.1", - "tag_to": "HEAD" - } - ] + "project_type": "openstack", + "project_group": "other", + "uri": "https://api.github.com/orgs/openstack/repos" }, { - "branches": ["master"], - "module": "designate", - "project_type": "stackforge", - "uri": "git://github.com/stackforge/designate.git", - "releases": [ - { - "release_name": "Havana", - "tag_from": "411f1da0a391a25cf9a1aa3a838377a5fc186b98", - "tag_to": "HEAD" - } - ] + "project_type": "openstack", + "project_group": "other", + "uri": "https://api.github.com/orgs/openstack-dev/repos" }, { - "branches": ["master"], - "module": "diskimage-builder", - "project_type": "stackforge", - "uri": "git://github.com/stackforge/diskimage-builder.git", - "releases": [ - { - "release_name": "Havana", - "tag_from": "b196f1bb", - "tag_to": "HEAD" - } - ] + "project_type": "openstack", + "project_group": "infrastructure", + "uri": "https://api.github.com/orgs/openstack-infra/repos" }, { - "branches": ["master"], - "module": "marconi", "project_type": "stackforge", - "uri": "git://github.com/stackforge/marconi.git", - "releases": [ - { - "release_name": "Havana", - "tag_from": "1a15292c", - "tag_to": "HEAD" - } - ] - }, - { - "branches": ["master"], - "module": "stackalytics", - "project_type": "stackforge", - "uri": "git://github.com/stackforge/stackalytics.git", - "releases": [ - { - "release_name": "Havana", - "tag_to": "HEAD" - } - ] - }, - { - "branches": ["master"], - "module": "murano-api", - "project_type": "stackforge", - "uri": "git://github.com/stackforge/murano-api.git", - "releases": [ - { - "release_name": "Havana", - "tag_from": "0.1", - "tag_to": "HEAD" - } - ] - }, - { - "branches": ["master"], - "module": "tripleo-image-elements", - "project_type": "stackforge", - "uri": "git://github.com/stackforge/tripleo-image-elements.git", - "releases": [ - { - "release_name": "Havana", - "tag_from": "ea57869d3e1ae899631f6d55b24f35c3d88b635d", - "tag_to": "HEAD" - } - ] - }, - { - "branches": ["master"], - "module": "tripleo-heat-templates", - "project_type": "stackforge", - "uri": "git://github.com/stackforge/tripleo-heat-templates.git", - "releases": [ - { - "release_name": "Havana", - "tag_from": "7877d912ced0141c3b0812aa9110d7b2d9f95297", - "tag_to": "HEAD" - } - ] - }, - { - "branches": ["master"], - "module": "puppet-openstack", - "project_type": "stackforge", - "uri": "git://github.com/stackforge/puppet-openstack.git", - "releases": [ - { - "release_name": "Havana", - "tag_from": "c95da8fc509a3a290ea052899e22123384289130", - "tag_to": "HEAD" - } - ] - }, - { - "branches": ["master"], - "module": "puppet-nova", - "project_type": "stackforge", - "uri": "git://github.com/stackforge/puppet-nova.git", - "releases": [ - { - "release_name": "Havana", - "tag_from": "3fe4294a857bc2433543467e86e1760955f8fb71", - "tag_to": "HEAD" - } - ] - }, - { - "branches": ["master"], - "module": "puppet-keystone", - "project_type": "stackforge", - "uri": "git://github.com/stackforge/puppet-keystone.git", - "releases": [ - { - "release_name": "Havana", - "tag_from": "d21f4282afee2c0af737204119c5a7e09cbd4a45", - "tag_to": "HEAD" - } - ] - }, - { - "branches": ["master"], - "module": "puppet-glance", - "project_type": "stackforge", - "uri": "git://github.com/stackforge/puppet-glance.git", - "releases": [ - { - "release_name": "Havana", - "tag_from": "3ce02dbd69cc3827e80455568901e7bb030f137d", - "tag_to": "HEAD" - } - ] - }, - { - "branches": ["master"], - "module": "puppet-cinder", - "project_type": "stackforge", - "uri": "git://github.com/stackforge/puppet-cinder.git", - "releases": [ - { - "release_name": "Havana", - "tag_from": "b0fb0456e08412adba1497ab5778291b2f665a00", - "tag_to": "HEAD" - } - ] - }, - { - "branches": ["master"], - "module": "puppet-quantum", - "project_type": "stackforge", - "uri": "git://github.com/stackforge/puppet-quantum.git", - "releases": [ - { - "release_name": "Havana", - "tag_from": "ad2f2099251502cfbfcae00e2286e4f43863e438", - "tag_to": "HEAD" - } - ] - }, - { - "branches": ["master"], - "module": "puppet-horizon", - "project_type": "stackforge", - "uri": "git://github.com/stackforge/puppet-horizon.git", - "releases": [ - { - "release_name": "Havana", - "tag_from": "44b2416f6e3af420f1c52681e528af0440255c66", - "tag_to": "HEAD" - } - ] - }, - { - "branches": ["master"], - "module": "puppet-swift", - "project_type": "stackforge", - "uri": "git://github.com/stackforge/puppet-swift.git", - "releases": [ - { - "release_name": "Havana", - "tag_from": "6d88ca53095fd4a6f0ea0d2bd3446a7d0e401ac2", - "tag_to": "HEAD" - } - ] - }, - { - "branches": ["master"], - "module": "puppet-ceilometer", - "project_type": "stackforge", - "uri": "git://github.com/stackforge/puppet-ceilometer.git", - "releases": [ - { - "release_name": "Havana", - "tag_from": "07c9d444ef816bba26dda86b651980a44a4da2d4", - "tag_to": "HEAD" - } - ] - }, - { - "branches": ["master"], - "module": "packstack", - "project_type": "stackforge", - "uri": "git://github.com/stackforge/packstack.git", - "releases": [ - { - "release_name": "Havana", - "tag_from": "aa8398c865846e3fd2abc8ed52d0865704d7ce08", - "tag_to": "HEAD" - } - ] + "project_group": null, + "uri": "https://api.github.com/orgs/stackforge/repos" } ], diff --git a/etc/stackalytics.conf b/etc/stackalytics.conf index e78bdfce8..4ae588e6c 100644 --- a/etc/stackalytics.conf +++ b/etc/stackalytics.conf @@ -3,7 +3,7 @@ # debug = False # Default data -# default_data = /etc/stackalytics/default_data.json +# default_data_uri = /etc/stackalytics/default_data.json # The folder that holds all project sources to analyze # sources_root = /var/local/stackalytics diff --git a/etc/test_default_data.json b/etc/test_default_data.json index 01d7f98f9..219e0df5a 100644 --- a/etc/test_default_data.json +++ b/etc/test_default_data.json @@ -45,6 +45,14 @@ } ], + "project_types": [ + { + "project_type": "openstack", + "project_group": null, + "uri": "https://api.github.com/orgs/openstack-dev/repos" + } + ], + "releases": [ { "release_name": "prehistory", diff --git a/stackalytics/processor/config.py b/stackalytics/processor/config.py index 80e2525dd..5d699cd3c 100644 --- a/stackalytics/processor/config.py +++ b/stackalytics/processor/config.py @@ -16,8 +16,9 @@ from oslo.config import cfg OPTS = [ - cfg.StrOpt('default-data', default='etc/default_data.json', - help='Default data'), + cfg.StrOpt('default-data-uri', + default='file:///usr/local/etc/default_data.json', + help='URI for default data'), cfg.StrOpt('sources-root', default='/var/local/stackalytics', help='The folder that holds all project sources to analyze'), cfg.StrOpt('runtime-storage-uri', default='memcached://127.0.0.1:11211', @@ -34,8 +35,8 @@ OPTS = [ help='The address of file with corrections data'), cfg.StrOpt('review-uri', default='gerrit://review.openstack.org', help='URI of review system'), - cfg.StrOpt('ssh-key-filename', default='/home/ishakhat/.ssh/4launchpad_id', + cfg.StrOpt('ssh-key-filename', default='/home/user/.ssh/id_rsa', help='SSH key for gerrit review system access'), - cfg.StrOpt('ssh-username', default='ishakhat', + cfg.StrOpt('ssh-username', default='user', help='SSH username for gerrit review system access'), ] diff --git a/stackalytics/processor/default_data_processor.py b/stackalytics/processor/default_data_processor.py index c4b54d86c..74af3f788 100644 --- a/stackalytics/processor/default_data_processor.py +++ b/stackalytics/processor/default_data_processor.py @@ -12,6 +12,8 @@ # implied. # See the License for the specific language governing permissions and # limitations under the License. +import json +import urllib from stackalytics.openstack.common import log as logging from stackalytics.processor import normalizer @@ -22,41 +24,85 @@ from stackalytics.processor import vcs LOG = logging.getLogger(__name__) +def items_match(item, p_item): + if not p_item: + return True + for key, value in item.iteritems(): + if (key not in p_item) or (p_item[key] != value): + return False + return True + + def _update_persistent_storage(persistent_storage_inst, default_data): need_update = False for table, primary_key in persistent_storage.PRIMARY_KEYS.iteritems(): - for item in default_data[table]: - param = {primary_key: item[primary_key]} - for p_item in persistent_storage_inst.find(table, **param): - break - else: - p_item = None - - if item != p_item: - need_update = True - if p_item: - persistent_storage_inst.update(table, item) + if table in default_data: + for item in default_data[table]: + param = {primary_key: item[primary_key]} + for p_item in persistent_storage_inst.find(table, **param): + break else: - persistent_storage_inst.insert(table, item) + p_item = None + + if not items_match(item, p_item): + need_update = True + if p_item: + persistent_storage_inst.update(table, item) + else: + persistent_storage_inst.insert(table, item) return need_update +def _retrieve_project_list(default_data): + + if 'project_types' not in default_data: + return + + repo_index = {} + for repo in default_data['repos']: + repo_index[repo['uri']] = repo + + for project_type in default_data['project_types']: + uri = project_type['uri'] + repos_fd = urllib.urlopen(uri) + raw = repos_fd.read() + repos_fd.close() + repos = json.loads(raw) + + for repo in repos: + repo_uri = repo['git_url'] + repo_name = repo['name'] + + if repo_uri not in repo_index: + r = { + 'branches': ['master'], + 'module': repo_name, + 'project_type': project_type['project_type'], + 'project_group': project_type['project_group'], + 'uri': repo_uri + } + default_data['repos'].append(r) + + def process(persistent_storage_inst, runtime_storage_inst, default_data, sources_root): + _retrieve_project_list(default_data) + normalizer.normalize_default_data(default_data) if _update_persistent_storage(persistent_storage_inst, default_data): + persistent_storage_inst.reset(default_data) + release_index = {} for repo in persistent_storage_inst.find('repos'): vcs_inst = vcs.get_vcs(repo, sources_root) release_index.update(vcs_inst.get_release_index()) - persistent_storage_inst.reset(default_data) record_processor_inst = record_processor.RecordProcessor( persistent_storage_inst) updated_records = record_processor_inst.update( diff --git a/stackalytics/processor/main.py b/stackalytics/processor/main.py index 1eefaa997..46d293521 100644 --- a/stackalytics/processor/main.py +++ b/stackalytics/processor/main.py @@ -139,11 +139,12 @@ def apply_corrections(uri, runtime_storage_inst): runtime_storage_inst.apply_corrections(corrections) -def _read_default_persistent_storage(file_name): +def _read_default_data(uri): try: - with open(file_name, 'r') as content_file: - content = content_file.read() - return json.loads(content) + fd = urllib.urlopen(uri) + raw = fd.read() + fd.close() + return json.loads(raw) except Exception as e: LOG.error('Error while reading config: %s' % e) @@ -164,7 +165,7 @@ def main(): runtime_storage_inst = runtime_storage.get_runtime_storage( cfg.CONF.runtime_storage_uri) - default_data = _read_default_persistent_storage(cfg.CONF.default_data) + default_data = _read_default_data(cfg.CONF.default_data_uri) default_data_processor.process(persistent_storage_inst, runtime_storage_inst, default_data, diff --git a/stackalytics/processor/normalizer.py b/stackalytics/processor/normalizer.py index 569430bc4..0d15acfd9 100644 --- a/stackalytics/processor/normalizer.py +++ b/stackalytics/processor/normalizer.py @@ -19,6 +19,10 @@ from stackalytics.processor import utils LOG = logging.getLogger(__name__) +def get_user_id(launchpad_id, email): + return launchpad_id or email + + def normalize_user(user): user['emails'] = [email.lower() for email in user['emails']] if user['launchpad_id']: @@ -40,7 +44,7 @@ def normalize_user(user): return cmp(x["end_date"], y["end_date"]) user['companies'].sort(cmp=end_date_comparator) - user['user_id'] = user['launchpad_id'] or user['emails'][0] + user['user_id'] = get_user_id(user['launchpad_id'], user['emails'][0]) def _normalize_users(users): diff --git a/stackalytics/processor/persistent_storage.py b/stackalytics/processor/persistent_storage.py index cf2bd1a3e..7ed75fcb8 100644 --- a/stackalytics/processor/persistent_storage.py +++ b/stackalytics/processor/persistent_storage.py @@ -25,6 +25,7 @@ PRIMARY_KEYS = { 'repos': 'uri', 'users': 'user_id', 'releases': 'release_name', + 'project_types': 'uri', } diff --git a/stackalytics/processor/rcs.py b/stackalytics/processor/rcs.py index f82b3cc0f..fd4542631 100644 --- a/stackalytics/processor/rcs.py +++ b/stackalytics/processor/rcs.py @@ -35,10 +35,10 @@ class Rcs(object): pass def log(self, branch, last_id): - pass + return [] def get_last_id(self, branch): - pass + return -1 class Gerrit(Rcs): @@ -96,6 +96,8 @@ class Gerrit(Rcs): review['module'] = module yield review + self.client.close() + def get_last_id(self, branch): module = self.repo['module'] LOG.debug('Get last id for module %s', module) @@ -107,12 +109,19 @@ class Gerrit(Rcs): {'module': module, 'branch': branch}) stdin, stdout, stderr = self.client.exec_command(cmd) + last_id = None for line in stdout: review = json.loads(line) if 'sortKey' in review: - return int(review['sortKey'], 16) + last_id = int(review['sortKey'], 16) + break - raise Exception('Last id is not found for module %s' % module) + self.client.close() + + if not last_id: + raise Exception('Last id is not found for module %s' % module) + + return last_id def get_rcs(repo, uri): @@ -121,4 +130,5 @@ def get_rcs(repo, uri): if match: return Gerrit(repo, uri) else: - raise Exception('Unknown review control system for uri %s' % uri) + LOG.warning('Unsupported review control system, fallback to dummy') + return Rcs(repo, uri) diff --git a/stackalytics/processor/record_processor.py b/stackalytics/processor/record_processor.py index ff9791304..db81c9600 100644 --- a/stackalytics/processor/record_processor.py +++ b/stackalytics/processor/record_processor.py @@ -37,8 +37,6 @@ class RecordProcessor(object): users = persistent_storage_inst.find('users') self.users_index = {} for user in users: - if 'launchpad_id' in user: - self.users_index[user['launchpad_id']] = user for email in user['emails']: self.users_index[email] = user @@ -65,10 +63,33 @@ class RecordProcessor(object): return self.domains_index[m] return None - def _persist_user(self, launchpad_id, email, user_name): - # check if user with launchpad_id exists in persistent storage + def _create_user(self, launchpad_id, email, user_name): + company = (self._get_company_by_email(email) or + self._get_independent()) + user = { + 'user_id': normalizer.get_user_id(launchpad_id, email), + 'launchpad_id': launchpad_id, + 'user_name': user_name, + 'emails': [email], + 'companies': [{ + 'company_name': company, + 'end_date': 0, + }], + } + normalizer.normalize_user(user) + self.persistent_storage_inst.insert('users', user) + return user + + def _persist_user(self, record): + launchpad_id = record['launchpad_id'] + email = record['author_email'] + user_name = record['author_name'] + + # check if user with user_id exists in persistent storage + user_id = normalizer.get_user_id(launchpad_id, email) persistent_user_iterator = self.persistent_storage_inst.find( - 'users', launchpad_id=launchpad_id) + 'users', user_id=user_id) + for persistent_user in persistent_user_iterator: break else: @@ -76,35 +97,23 @@ class RecordProcessor(object): if persistent_user: # user already exist, merge - LOG.debug('User exists in persistent storage, add new email %s', - email) + LOG.debug('User %s (%s) exists, add new email %s', + launchpad_id, user_name, email) persistent_user_email = persistent_user['emails'][0] if persistent_user_email not in self.users_index: - return persistent_user + raise Exception('Email %s not found in user index' % + persistent_user_email) user = self.users_index[persistent_user_email] user['emails'].append(email) self.persistent_storage_inst.update('users', user) else: # add new user - LOG.debug('Add new user into persistent storage') - company = (self._get_company_by_email(email) or - self._get_independent()) - user = { - 'launchpad_id': launchpad_id, - 'user_name': user_name, - 'emails': [email], - 'companies': [{ - 'company_name': company, - 'end_date': 0, - }], - } - normalizer.normalize_user(user) - self.persistent_storage_inst.insert('users', user) + LOG.debug('Add new user %s (%s)', launchpad_id, user_name) + user = self._create_user(launchpad_id, email, user_name) return user - def _unknown_user_email(self, email, user_name): - + def _get_lp_info(self, email): lp_profile = None if not re.match(r'[\w\d_\.-]+@([\w\d_\.-]+\.)+[\w]+', email): LOG.debug('User email is not valid %s' % email) @@ -118,78 +127,47 @@ class RecordProcessor(object): LOG.warn('Lookup of email %s failed %s', email, error.message) if not lp_profile: - # user is not found in Launchpad, create dummy record for commit - # update - LOG.debug('Email is not found at Launchpad, mapping to nobody') - user = { - 'launchpad_id': None, - 'user_name': user_name, - 'emails': [email], - 'companies': [{ - 'company_name': self._get_independent(), - 'end_date': 0 - }] - } - normalizer.normalize_user(user) - # add new user - self.persistent_storage_inst.insert('users', user) - else: - # get user's launchpad id from his profile - launchpad_id = lp_profile.name - user_name = lp_profile.display_name - LOG.debug('Found user %s', launchpad_id) + return None, None - user = self._persist_user(launchpad_id, email, user_name) - - # update local index - self.users_index[email] = user - return user + return lp_profile.name, lp_profile.display_name def _get_independent(self): return self.domains_index[''] - def _update_commit_with_user_data(self, commit): - email = commit['author_email'].lower() - if email in self.users_index: - user = self.users_index[email] - else: - user = self._unknown_user_email(email, commit['author_name']) - - commit['launchpad_id'] = user['launchpad_id'] - commit['user_id'] = user['user_id'] - - company = self._get_company_by_email(email) - if not company: - company = self._find_company(user['companies'], commit['date']) - commit['company_name'] = company - - if 'user_name' in user: - commit['author_name'] = user['user_name'] - - def _process_commit(self, record): - self._update_commit_with_user_data(record) - - record['primary_key'] = record['commit_id'] - record['loc'] = record['lines_added'] + record['lines_deleted'] - - yield record - - def _process_user(self, record): - email = record['author_email'] + def _update_record_and_user(self, record): + email = record['author_email'].lower() + record['author_email'] = email if email in self.users_index: user = self.users_index[email] + record['launchpad_id'] = user['launchpad_id'] else: - user = self._persist_user(record['launchpad_id'], email, - record['author_name']) + if ('launchpad_id' not in record) or (not record['launchpad_id']): + launchpad_id, user_name = self._get_lp_info(email) + record['launchpad_id'] = launchpad_id + if user_name: + record['author_name'] = user_name + + user = self._persist_user(record) self.users_index[email] = user + record['user_id'] = user['user_id'] + company = self._get_company_by_email(email) if not company: company = self._find_company(user['companies'], record['date']) - record['company_name'] = company - record['user_id'] = user['user_id'] + + if 'user_name' in user: + record['author_name'] = user['user_name'] + + def _process_commit(self, record): + record['primary_key'] = record['commit_id'] + record['loc'] = record['lines_added'] + record['lines_deleted'] + + self._update_record_and_user(record) + + yield record def _spawn_review(self, record): # copy everything except pathsets and flatten user data @@ -202,10 +180,10 @@ class RecordProcessor(object): review['primary_key'] = review['id'] review['launchpad_id'] = owner['username'] review['author_name'] = owner['name'] - review['author_email'] = owner['email'].lower() + review['author_email'] = owner['email'] review['date'] = record['createdOn'] - self._process_user(review) + self._update_record_and_user(review) yield review @@ -232,11 +210,11 @@ class RecordProcessor(object): mark['type']) mark['launchpad_id'] = reviewer['username'] mark['author_name'] = reviewer['name'] - mark['author_email'] = reviewer['email'].lower() + mark['author_email'] = reviewer['email'] mark['module'] = module mark['review_id'] = review_id - self._process_user(mark) + self._update_record_and_user(mark) yield mark @@ -278,7 +256,7 @@ class RecordProcessor(object): company_name = record['company_name'] user_id = record['user_id'] - self._process_user(record) + self._update_record_and_user(record) if ((record['company_name'] != company_name) or (record['user_id'] != user_id)): diff --git a/stackalytics/processor/runtime_storage.py b/stackalytics/processor/runtime_storage.py index cebc1b698..4783f7ad7 100644 --- a/stackalytics/processor/runtime_storage.py +++ b/stackalytics/processor/runtime_storage.py @@ -74,6 +74,7 @@ class MemcachedStorage(RuntimeStorage): # update record_id = self.record_index[record['primary_key']] if not merge_handler: + record['record_id'] = record_id self.memcached.set(self._get_record_name(record_id), record) else: diff --git a/stackalytics/processor/vcs.py b/stackalytics/processor/vcs.py index 60c276a84..1ca4bc215 100644 --- a/stackalytics/processor/vcs.py +++ b/stackalytics/processor/vcs.py @@ -182,9 +182,10 @@ class Git(Vcs): def get_vcs(repo, sources_root): uri = repo['uri'] - LOG.debug('Factory is asked for Vcs uri %s' % uri) + LOG.debug('Factory is asked for VCS uri %s' % uri) match = re.search(r'\.git$', uri) if match: return Git(repo, sources_root) else: - raise Exception("Unknown Vcs uri %s" % uri) + LOG.warning('Unsupported VCS, fallback to dummy') + return Vcs(repo, uri) diff --git a/tests/unit/test_record_processor.py b/tests/unit/test_record_processor.py index c9347acd4..578e8f257 100644 --- a/tests/unit/test_record_processor.py +++ b/tests/unit/test_record_processor.py @@ -121,14 +121,14 @@ class TestRecordProcessor(testtools.TestCase): def test_update_commit_existing_user(self): commit = self._make_commit() - self.commit_processor._update_commit_with_user_data(commit) + self.commit_processor._update_record_and_user(commit) self.assertEquals('SuperCompany', commit['company_name']) self.assertEquals('john_doe', commit['launchpad_id']) def test_update_commit_existing_user_old_job(self): commit = self._make_commit(date=1000000000) - self.commit_processor._update_commit_with_user_data(commit) + self.commit_processor._update_record_and_user(commit) self.assertEquals('*independent', commit['company_name']) self.assertEquals('john_doe', commit['launchpad_id']) @@ -149,7 +149,7 @@ class TestRecordProcessor(testtools.TestCase): # tell storage to return existing user self.get_users.return_value = [user] - self.commit_processor._update_commit_with_user_data(commit) + self.commit_processor._update_record_and_user(commit) self.persistent_storage.update.assert_called_once_with('users', user) lp_mock.people.getByEmail.assert_called_once_with(email=email) @@ -173,7 +173,7 @@ class TestRecordProcessor(testtools.TestCase): # tell storage to return existing user self.get_users.return_value = [user] - self.commit_processor._update_commit_with_user_data(commit) + self.commit_processor._update_record_and_user(commit) self.persistent_storage.update.assert_called_once_with('users', user) lp_mock.people.getByEmail.assert_called_once_with(email=email) @@ -196,7 +196,7 @@ class TestRecordProcessor(testtools.TestCase): lp_mock.people.getByEmail = mock.Mock(return_value=lp_profile) self.get_users.return_value = [] - self.commit_processor._update_commit_with_user_data(commit) + self.commit_processor._update_record_and_user(commit) lp_mock.people.getByEmail.assert_called_once_with(email=email) self.assertEquals('NEC', commit['company_name']) @@ -214,7 +214,7 @@ class TestRecordProcessor(testtools.TestCase): lp_mock.people.getByEmail = mock.Mock(return_value=None) self.get_users.return_value = [] - self.commit_processor._update_commit_with_user_data(commit) + self.commit_processor._update_record_and_user(commit) lp_mock.people.getByEmail.assert_called_once_with(email=email) self.assertEquals('*independent', commit['company_name']) @@ -232,7 +232,7 @@ class TestRecordProcessor(testtools.TestCase): side_effect=Exception) self.get_users.return_value = [] - self.commit_processor._update_commit_with_user_data(commit) + self.commit_processor._update_record_and_user(commit) lp_mock.people.getByEmail.assert_called_once_with(email=email) self.assertEquals('*independent', commit['company_name']) @@ -249,7 +249,7 @@ class TestRecordProcessor(testtools.TestCase): lp_mock.people.getByEmail = mock.Mock(return_value=None) self.get_users.return_value = [] - self.commit_processor._update_commit_with_user_data(commit) + self.commit_processor._update_record_and_user(commit) self.assertEquals(0, lp_mock.people.getByEmail.called) self.assertEquals('*independent', commit['company_name'])