Merge user profiles during default data update
* Existing user profiles are updated with data read from default-data * Refactoring of default data processor * Removed unused code from normalizer (validity is enforced by schema) * Add more restrictions into the schema Fixes bug 1260696 Change-Id: Iab1f190ff8a7e2a0c0800712174565437149e119
This commit is contained in:
parent
0ff9761a64
commit
82600d8805
@ -39,8 +39,10 @@
|
|||||||
"pattern": "20\\d{2}-\\w{3}-[0-3]\\d"
|
"pattern": "20\\d{2}-\\w{3}-[0-3]\\d"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"required": ["company_name", "end_date"],
|
||||||
"additionalProperties": false
|
"additionalProperties": false
|
||||||
}
|
},
|
||||||
|
"minItems": 1
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": ["launchpad_id", "user_name", "emails"],
|
"required": ["launchpad_id", "user_name", "emails"],
|
||||||
|
@ -34,7 +34,7 @@ def _check_default_data_change(runtime_storage_inst, default_data):
|
|||||||
|
|
||||||
p_digest = runtime_storage_inst.get_by_key('default_data_digest')
|
p_digest = runtime_storage_inst.get_by_key('default_data_digest')
|
||||||
if digest == p_digest:
|
if digest == p_digest:
|
||||||
LOG.debug('No changes in default data detected, sha1: %s', digest)
|
LOG.debug('No changes in default data, sha1: %s', digest)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
LOG.debug('Default data has changes, sha1: %s', digest)
|
LOG.debug('Default data has changes, sha1: %s', digest)
|
||||||
@ -42,21 +42,16 @@ def _check_default_data_change(runtime_storage_inst, default_data):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def _retrieve_project_list(default_data):
|
def _retrieve_project_list_from_github(project_sources):
|
||||||
|
|
||||||
LOG.info('Retrieving project list from GitHub')
|
LOG.info('Retrieving project list from GitHub')
|
||||||
|
|
||||||
repo_index = {}
|
|
||||||
for repo in default_data['repos']:
|
|
||||||
repo_index[repo['uri']] = repo
|
|
||||||
|
|
||||||
github = MainClass.Github(timeout=60)
|
github = MainClass.Github(timeout=60)
|
||||||
|
|
||||||
for project_source in default_data['project_sources']:
|
repos = []
|
||||||
|
for project_source in project_sources:
|
||||||
organization = project_source['organization']
|
organization = project_source['organization']
|
||||||
LOG.debug('Get list of projects for organization %s', organization)
|
LOG.debug('Get list of projects for organization %s', organization)
|
||||||
try:
|
try:
|
||||||
repos = github.get_organization(organization).get_repos()
|
github_repos = github.get_organization(organization).get_repos()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
LOG.exception(e)
|
LOG.exception(e)
|
||||||
LOG.warn('Fail to retrieve list of projects. Keep it unmodified')
|
LOG.warn('Fail to retrieve list of projects. Keep it unmodified')
|
||||||
@ -64,42 +59,42 @@ def _retrieve_project_list(default_data):
|
|||||||
|
|
||||||
exclude = set(project_source.get('exclude', []))
|
exclude = set(project_source.get('exclude', []))
|
||||||
|
|
||||||
for repo in repos:
|
for repo in github_repos:
|
||||||
repo_uri = repo.git_url
|
if repo.name not in exclude:
|
||||||
repo_name = repo.name
|
|
||||||
|
|
||||||
if (repo_uri not in repo_index) and (repo_name not in exclude):
|
|
||||||
r = {
|
r = {
|
||||||
'branches': ['master'],
|
'branches': ['master'],
|
||||||
'module': repo_name,
|
'module': repo.name,
|
||||||
'organization': organization,
|
'organization': organization,
|
||||||
'project_type': project_source['project_type'],
|
'project_type': project_source['project_type'],
|
||||||
'project_group': project_source['project_group'],
|
'project_group': project_source['project_group'],
|
||||||
'uri': repo_uri,
|
'uri': repo.git_url,
|
||||||
'releases': []
|
'releases': []
|
||||||
}
|
}
|
||||||
default_data['repos'].append(r)
|
repos.append(r)
|
||||||
LOG.debug('Project is added to default data: %s', r)
|
LOG.debug('Project is added to default data: %s', r)
|
||||||
|
return repos
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def _process_users(runtime_storage_inst, users):
|
def _update_project_list(default_data):
|
||||||
users_index = {}
|
|
||||||
|
configured_repos = set([r['uri'] for r in default_data['repos']])
|
||||||
|
|
||||||
|
repos = _retrieve_project_list_from_github(default_data['project_sources'])
|
||||||
|
if repos:
|
||||||
|
default_data['repos'] += [r for r in repos
|
||||||
|
if r['uri'] not in configured_repos]
|
||||||
|
|
||||||
|
|
||||||
|
def _store_users(runtime_storage_inst, users):
|
||||||
for user in users:
|
for user in users:
|
||||||
|
stored_user = utils.load_user(runtime_storage_inst, user['user_id'])
|
||||||
|
if stored_user:
|
||||||
|
stored_user.update(user)
|
||||||
|
user = stored_user
|
||||||
utils.store_user(runtime_storage_inst, user)
|
utils.store_user(runtime_storage_inst, user)
|
||||||
if 'user_id' in user:
|
|
||||||
users_index[user['user_id']] = user
|
|
||||||
if 'launchpad_id' in user:
|
|
||||||
users_index[user['launchpad_id']] = user
|
|
||||||
if 'gerrit_id' in user:
|
|
||||||
users_index[user['gerrit_id']] = user
|
|
||||||
for email in user['emails']:
|
|
||||||
users_index[email] = user
|
|
||||||
runtime_storage_inst.set_by_key('users', users_index)
|
|
||||||
|
|
||||||
|
|
||||||
def _process_companies(runtime_storage_inst, companies):
|
def _store_companies(runtime_storage_inst, companies):
|
||||||
domains_index = {}
|
domains_index = {}
|
||||||
for company in companies:
|
for company in companies:
|
||||||
for domain in company['domains']:
|
for domain in company['domains']:
|
||||||
@ -107,50 +102,54 @@ def _process_companies(runtime_storage_inst, companies):
|
|||||||
runtime_storage_inst.set_by_key('companies', domains_index)
|
runtime_storage_inst.set_by_key('companies', domains_index)
|
||||||
|
|
||||||
|
|
||||||
KEYS = {
|
STORE_FUNCS = {
|
||||||
'users': _process_users,
|
'users': _store_users,
|
||||||
'companies': _process_companies,
|
'companies': _store_companies,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def _update_default_data(runtime_storage_inst, default_data):
|
def _store_default_data(runtime_storage_inst, default_data):
|
||||||
|
normalizer.normalize_default_data(default_data)
|
||||||
|
|
||||||
LOG.debug('Update runtime storage with default data')
|
LOG.debug('Update runtime storage with default data')
|
||||||
for key, value in default_data.iteritems():
|
for key, value in default_data.iteritems():
|
||||||
if key in KEYS:
|
if key in STORE_FUNCS:
|
||||||
KEYS[key](runtime_storage_inst, value)
|
STORE_FUNCS[key](runtime_storage_inst, value)
|
||||||
else:
|
else:
|
||||||
runtime_storage_inst.set_by_key(key, value)
|
runtime_storage_inst.set_by_key(key, value)
|
||||||
|
|
||||||
|
|
||||||
|
def _update_records(runtime_storage_inst, sources_root):
|
||||||
|
LOG.debug('Gather release index for all repos')
|
||||||
|
release_index = {}
|
||||||
|
for repo in utils.load_repos(runtime_storage_inst):
|
||||||
|
vcs_inst = vcs.get_vcs(repo, sources_root)
|
||||||
|
release_index.update(vcs_inst.get_release_index())
|
||||||
|
|
||||||
|
record_processor_inst = record_processor.RecordProcessor(
|
||||||
|
runtime_storage_inst)
|
||||||
|
|
||||||
|
# need to iterate over full view of records and generate valid
|
||||||
|
# users profiles
|
||||||
|
LOG.debug('Iterate all records to create valid users profiles')
|
||||||
|
for record in runtime_storage_inst.get_all_records():
|
||||||
|
record_processor_inst.update_user(record)
|
||||||
|
|
||||||
|
# update records according to generated users profiles
|
||||||
|
LOG.debug('Update all records according to users profiles')
|
||||||
|
updated_records = record_processor_inst.update(
|
||||||
|
runtime_storage_inst.get_all_records(), release_index)
|
||||||
|
runtime_storage_inst.set_records(updated_records)
|
||||||
|
|
||||||
|
|
||||||
def process(runtime_storage_inst, default_data, sources_root, force_update):
|
def process(runtime_storage_inst, default_data, sources_root, force_update):
|
||||||
LOG.debug('Process default data')
|
LOG.debug('Process default data')
|
||||||
|
|
||||||
normalizer.normalize_default_data(default_data)
|
|
||||||
|
|
||||||
dd_changed = _check_default_data_change(runtime_storage_inst, default_data)
|
dd_changed = _check_default_data_change(runtime_storage_inst, default_data)
|
||||||
|
|
||||||
if 'project_sources' in default_data:
|
if 'project_sources' in default_data:
|
||||||
if not _retrieve_project_list(default_data):
|
_update_project_list(default_data)
|
||||||
raise Exception('Unable to retrieve project list')
|
|
||||||
|
|
||||||
_update_default_data(runtime_storage_inst, default_data)
|
if dd_changed or force_update:
|
||||||
|
_store_default_data(runtime_storage_inst, default_data)
|
||||||
if (dd_changed or force_update):
|
_update_records(runtime_storage_inst, sources_root)
|
||||||
LOG.debug('Gather release index for all repos')
|
|
||||||
release_index = {}
|
|
||||||
for repo in utils.load_repos(runtime_storage_inst):
|
|
||||||
vcs_inst = vcs.get_vcs(repo, sources_root)
|
|
||||||
release_index.update(vcs_inst.get_release_index())
|
|
||||||
|
|
||||||
record_processor_inst = record_processor.RecordProcessor(
|
|
||||||
runtime_storage_inst)
|
|
||||||
# need to iterate over full view of records and generate valid
|
|
||||||
# users profiles
|
|
||||||
LOG.debug('Iterate all records to create valid users profiles')
|
|
||||||
for record in runtime_storage_inst.get_all_records():
|
|
||||||
record_processor_inst.update_user(record)
|
|
||||||
# update records according to generated users profiles
|
|
||||||
LOG.debug('Update all records according to users profiles')
|
|
||||||
updated_records = record_processor_inst.update(
|
|
||||||
runtime_storage_inst.get_all_records(), release_index)
|
|
||||||
runtime_storage_inst.set_records(updated_records)
|
|
||||||
|
@ -19,20 +19,9 @@ from stackalytics.processor import utils
|
|||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def get_user_id(launchpad_id, email):
|
def _normalize_user(user):
|
||||||
return launchpad_id or email
|
|
||||||
|
|
||||||
|
|
||||||
def normalize_user(user):
|
|
||||||
user['emails'] = [email.lower() for email in user['emails']]
|
|
||||||
if user['launchpad_id']:
|
|
||||||
user['launchpad_id'] = user['launchpad_id'].lower()
|
|
||||||
|
|
||||||
for c in user['companies']:
|
for c in user['companies']:
|
||||||
end_date_numeric = 0
|
c['end_date'] = utils.date_to_timestamp(c['end_date'])
|
||||||
if c['end_date']:
|
|
||||||
end_date_numeric = utils.date_to_timestamp(c['end_date'])
|
|
||||||
c['end_date'] = end_date_numeric
|
|
||||||
|
|
||||||
# sort companies by end_date
|
# sort companies by end_date
|
||||||
def end_date_comparator(x, y):
|
def end_date_comparator(x, y):
|
||||||
@ -44,19 +33,12 @@ def normalize_user(user):
|
|||||||
return cmp(x["end_date"], y["end_date"])
|
return cmp(x["end_date"], y["end_date"])
|
||||||
|
|
||||||
user['companies'].sort(cmp=end_date_comparator)
|
user['companies'].sort(cmp=end_date_comparator)
|
||||||
if user['emails']:
|
user['user_id'] = user['launchpad_id']
|
||||||
user['user_id'] = get_user_id(user['launchpad_id'], user['emails'][0])
|
|
||||||
else:
|
|
||||||
user['user_id'] = user['launchpad_id']
|
|
||||||
|
|
||||||
|
|
||||||
def _normalize_users(users):
|
def _normalize_users(users):
|
||||||
for user in users:
|
for user in users:
|
||||||
if ('launchpad_id' not in user) or ('emails' not in user):
|
_normalize_user(user)
|
||||||
LOG.warn('Skipping invalid user: %s', user)
|
|
||||||
continue
|
|
||||||
|
|
||||||
normalize_user(user)
|
|
||||||
|
|
||||||
|
|
||||||
def _normalize_releases(releases):
|
def _normalize_releases(releases):
|
||||||
|
@ -18,7 +18,6 @@ import time
|
|||||||
|
|
||||||
from stackalytics.openstack.common import log as logging
|
from stackalytics.openstack.common import log as logging
|
||||||
from stackalytics.processor import launchpad_utils
|
from stackalytics.processor import launchpad_utils
|
||||||
from stackalytics.processor import normalizer
|
|
||||||
from stackalytics.processor import utils
|
from stackalytics.processor import utils
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
@ -82,7 +81,7 @@ class RecordProcessor(object):
|
|||||||
company = (self._get_company_by_email(email) or
|
company = (self._get_company_by_email(email) or
|
||||||
self._get_independent())
|
self._get_independent())
|
||||||
user = {
|
user = {
|
||||||
'user_id': normalizer.get_user_id(launchpad_id, email),
|
'user_id': launchpad_id or email,
|
||||||
'launchpad_id': launchpad_id,
|
'launchpad_id': launchpad_id,
|
||||||
'user_name': user_name or '',
|
'user_name': user_name or '',
|
||||||
'companies': [{
|
'companies': [{
|
||||||
@ -94,7 +93,6 @@ class RecordProcessor(object):
|
|||||||
user['emails'] = [email]
|
user['emails'] = [email]
|
||||||
else:
|
else:
|
||||||
user['emails'] = []
|
user['emails'] = []
|
||||||
normalizer.normalize_user(user)
|
|
||||||
return user
|
return user
|
||||||
|
|
||||||
def _get_lp_info(self, email):
|
def _get_lp_info(self, email):
|
||||||
|
@ -28,6 +28,8 @@ LOG = logging.getLogger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
def date_to_timestamp(d):
|
def date_to_timestamp(d):
|
||||||
|
if not d:
|
||||||
|
return 0
|
||||||
if d == 'now':
|
if d == 'now':
|
||||||
return int(time.time())
|
return int(time.time())
|
||||||
return int(time.mktime(
|
return int(time.mktime(
|
||||||
|
@ -139,7 +139,7 @@ class Git(Vcs):
|
|||||||
LOG.debug('Get release index for repo uri: %s', self.repo['uri'])
|
LOG.debug('Get release index for repo uri: %s', self.repo['uri'])
|
||||||
os.chdir(self.folder)
|
os.chdir(self.folder)
|
||||||
if not self.release_index:
|
if not self.release_index:
|
||||||
for release in self.repo['releases']:
|
for release in self.repo.get('releases', []):
|
||||||
release_name = release['release_name'].lower()
|
release_name = release['release_name'].lower()
|
||||||
|
|
||||||
if 'branch' in release:
|
if 'branch' in release:
|
||||||
|
@ -17,6 +17,7 @@ import copy
|
|||||||
import mock
|
import mock
|
||||||
import testtools
|
import testtools
|
||||||
|
|
||||||
|
from stackalytics.processor import default_data_processor
|
||||||
from stackalytics.processor import normalizer
|
from stackalytics.processor import normalizer
|
||||||
from tests.unit import test_data
|
from tests.unit import test_data
|
||||||
|
|
||||||
@ -49,3 +50,23 @@ class TestDefaultDataProcessor(testtools.TestCase):
|
|||||||
self.assertEqual(test_data.USERS[0]['launchpad_id'],
|
self.assertEqual(test_data.USERS[0]['launchpad_id'],
|
||||||
data['users'][0]['user_id'],
|
data['users'][0]['user_id'],
|
||||||
message='User id should be set')
|
message='User id should be set')
|
||||||
|
|
||||||
|
def test_update_project_list(self):
|
||||||
|
with mock.patch('stackalytics.processor.default_data_processor.'
|
||||||
|
'_retrieve_project_list_from_github') as retriever:
|
||||||
|
retriever.return_value = [
|
||||||
|
{'module': 'nova', 'uri': 'git://github.com/openstack/nova'},
|
||||||
|
{'module': 'qa', 'uri': 'git://github.com/openstack/qa'},
|
||||||
|
]
|
||||||
|
dd = {
|
||||||
|
'repos': [
|
||||||
|
{'module': 'qa', 'uri': 'git://github.com/openstack/qa'},
|
||||||
|
],
|
||||||
|
'project_sources': ['any']
|
||||||
|
}
|
||||||
|
|
||||||
|
default_data_processor._update_project_list(dd)
|
||||||
|
|
||||||
|
self.assertEqual(2, len(dd['repos']))
|
||||||
|
self.assertIn('qa', set([r['module'] for r in dd['repos']]))
|
||||||
|
self.assertIn('nova', set([r['module'] for r in dd['repos']]))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user