Implemented review processing and stats calculation

* Added review control system module that retrieves reviews from Gerrit
* Refactored runtime storage not to depend on storage contents
* Refactored commit processor and new review processor is added
* Review record processor spawns reviews and marks
* Memory storage is made able to store reviews along with commits
* Migrated to hacking-0.6
* Marks are processed in contribution details and shown on engineer details screen

Implements blueprint review-stats

Change-Id: I80b70de372752cd3d1790da1904c0d423d61e81a
This commit is contained in:
Ilya Shakhat 2013-07-18 15:08:12 +04:00
parent 95833ec196
commit 5c554a35f9
20 changed files with 642 additions and 343 deletions

View File

@ -13,8 +13,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from stackalytics.processor import user_utils
MEMORY_STORAGE_CACHED = 0 MEMORY_STORAGE_CACHED = 0
@ -27,38 +25,42 @@ class CachedMemoryStorage(MemoryStorage):
def __init__(self, records): def __init__(self, records):
super(CachedMemoryStorage, self).__init__(records) super(CachedMemoryStorage, self).__init__(records)
# common indexes
self.records = {} self.records = {}
self.company_index = {} self.record_types_index = {}
self.date_index = {}
self.module_index = {} self.module_index = {}
self.launchpad_id_index = {} self.launchpad_id_index = {}
self.company_index = {}
self.release_index = {} self.release_index = {}
self.dates = []
self.indexes = {
'record_type': self.record_types_index,
'company_name': self.company_index,
'module': self.module_index,
'launchpad_id': self.launchpad_id_index,
'release': self.release_index,
}
for record in records: for record in records:
self._save_record(record) self._save_record(record)
self.dates = sorted(self.date_index)
self.company_name_mapping = dict((c.lower(), c) self.company_name_mapping = dict((c.lower(), c)
for c in self.company_index.keys()) for c in self.company_index.keys())
def _save_record(self, record): def _save_record(self, record):
self.records[record['record_id']] = record self.records[record['record_id']] = record
self._add_to_index(self.company_index, record, 'company_name') for key, index in self.indexes.iteritems():
self._add_to_index(self.module_index, record, 'module') self._add_to_index(index, record, key)
self._add_to_index(self.launchpad_id_index, record, 'launchpad_id')
self._add_to_index(self.release_index, record, 'release')
self._add_to_index(self.date_index, record, 'date')
record['week'] = user_utils.timestamp_to_week(record['date']) def update(self, records):
record['loc'] = record['lines_added'] + record['lines_deleted'] for record in records:
if record['record_id'] in self.records:
self._remove_record_from_index(record)
self._save_record(record)
def _remove_record_from_index(self, record): def _remove_record_from_index(self, record):
self.company_index[record['company_name']].remove(record['record_id']) for key, index in self.indexes.iteritems():
self.module_index[record['module']].remove(record['record_id']) index[record[key]].remove(record['record_id'])
self.launchpad_id_index[record['launchpad_id']].remove(
record['record_id'])
self.release_index[record['release']].remove(record['record_id'])
self.date_index[record['date']].remove(record['record_id'])
def _add_to_index(self, record_index, record, key): def _add_to_index(self, record_index, record, key):
record_key = record[key] record_key = record[key]
@ -70,9 +72,8 @@ class CachedMemoryStorage(MemoryStorage):
def _get_record_ids_from_index(self, items, index): def _get_record_ids_from_index(self, items, index):
record_ids = set() record_ids = set()
for item in items: for item in items:
if item not in index: if item in index:
raise Exception('Parameter %s not valid' % item) record_ids |= index[item]
record_ids |= index[item]
return record_ids return record_ids
def get_record_ids_by_modules(self, modules): def get_record_ids_by_modules(self, modules):
@ -91,7 +92,16 @@ class CachedMemoryStorage(MemoryStorage):
return self._get_record_ids_from_index(releases, self.release_index) return self._get_record_ids_from_index(releases, self.release_index)
def get_record_ids(self): def get_record_ids(self):
return set(self.records.keys()) return self.records.keys()
def get_commit_ids(self):
return self.record_types_index['commit']
def get_review_ids(self):
return self.record_types_index['review']
def get_mark_ids(self):
return self.record_types_index['mark']
def get_records(self, record_ids): def get_records(self, record_ids):
for i in record_ids: for i in record_ids:
@ -112,12 +122,6 @@ class CachedMemoryStorage(MemoryStorage):
def get_launchpad_ids(self): def get_launchpad_ids(self):
return self.launchpad_id_index.keys() return self.launchpad_id_index.keys()
def update(self, records):
for record in records:
if record['record_id'] in self.records:
self._remove_record_from_index(record)
self._save_record(record)
def get_memory_storage(memory_storage_type, records): def get_memory_storage(memory_storage_type, records):
if memory_storage_type == MEMORY_STORAGE_CACHED: if memory_storage_type == MEMORY_STORAGE_CACHED:

View File

@ -84,4 +84,9 @@
{% endif %} {% endif %}
<h4>Marks</h4>
{% for mark in [-2, -1, 0, 1, 2] %}
<div>{{ mark }}: {{ marks[mark] }}</div>
{% endfor %}
{% endblock %} {% endblock %}

View File

@ -284,6 +284,8 @@
<label for="metric">Metric&nbsp;</label><select id="metric" name="metric"> <label for="metric">Metric&nbsp;</label><select id="metric" name="metric">
<option value="commits">Commits</option> <option value="commits">Commits</option>
<option value="loc">Lines of code</option> <option value="loc">Lines of code</option>
<option value="reviews">Reviews</option>
<option value="marks">Marks</option>
</select> </select>
</span> </span>
<span class="drop_release" style="float: right;"> <span class="drop_release" style="float: right;">

View File

@ -45,6 +45,8 @@ DEFAULTS = {
METRIC_LABELS = { METRIC_LABELS = {
'loc': 'Lines of code', 'loc': 'Lines of code',
'commits': 'Commits', 'commits': 'Commits',
'reviews': 'Reviews',
'marks': 'Marks',
} }
DEFAULT_RECORDS_LIMIT = 10 DEFAULT_RECORDS_LIMIT = 10
@ -173,12 +175,13 @@ def get_default(param_name):
return None return None
def get_parameter(kwargs, singular_name, plural_name, use_default=True): def get_parameter(kwargs, singular_name, plural_name=None, use_default=True):
if singular_name in kwargs: if singular_name in kwargs:
p = kwargs[singular_name] p = kwargs[singular_name]
else: else:
p = (flask.request.args.get(singular_name) or p = flask.request.args.get(singular_name)
flask.request.args.get(plural_name)) if (not p) and plural_name:
flask.request.args.get(plural_name)
if p: if p:
return p.split(',') return p.split(',')
elif use_default: elif use_default:
@ -200,7 +203,7 @@ def record_filter(ignore=None, use_default=True):
vault = get_vault() vault = get_vault()
memory_storage = vault['memory_storage'] memory_storage = vault['memory_storage']
record_ids = memory_storage.get_record_ids() record_ids = set(memory_storage.get_record_ids()) # make a copy
if 'module' not in ignore: if 'module' not in ignore:
param = get_parameter(kwargs, 'module', 'modules', use_default) param = get_parameter(kwargs, 'module', 'modules', use_default)
@ -242,6 +245,15 @@ def record_filter(ignore=None, use_default=True):
memory_storage.get_record_ids_by_releases( memory_storage.get_record_ids_by_releases(
c.lower() for c in param)) c.lower() for c in param))
if 'metric' not in ignore:
param = get_parameter(kwargs, 'metric')
if 'reviews' in param:
record_ids &= memory_storage.get_review_ids()
elif 'marks' in param:
record_ids &= memory_storage.get_mark_ids()
elif ('loc' in param) or ('commits' in param):
record_ids &= memory_storage.get_commit_ids()
kwargs['records'] = memory_storage.get_records(record_ids) kwargs['records'] = memory_storage.get_records(record_ids)
return f(*args, **kwargs) return f(*args, **kwargs)
@ -258,7 +270,7 @@ def aggregate_filter():
metric_param = (flask.request.args.get('metric') or metric_param = (flask.request.args.get('metric') or
get_default('metric')) get_default('metric'))
metric = metric_param.lower() metric = metric_param.lower()
if metric == 'commits': if metric in ['commits', 'reviews', 'marks']:
metric_filter = lambda r: 1 metric_filter = lambda r: 1
elif metric == 'loc': elif metric == 'loc':
metric_filter = lambda r: r['loc'] metric_filter = lambda r: r['loc']
@ -288,7 +300,7 @@ def exception_handler():
return decorator return decorator
def templated(template=None): def templated(template=None, return_code=200):
def decorator(f): def decorator(f):
@functools.wraps(f) @functools.wraps(f)
def templated_decorated_function(*args, **kwargs): def templated_decorated_function(*args, **kwargs):
@ -326,7 +338,7 @@ def templated(template=None):
ctx['project_type_options'] = get_project_type_options() ctx['project_type_options'] = get_project_type_options()
return flask.render_template(template_name, **ctx) return flask.render_template(template_name, **ctx), return_code
return templated_decorated_function return templated_decorated_function
@ -342,8 +354,9 @@ def overview():
@app.errorhandler(404) @app.errorhandler(404)
@templated('404.html', 404)
def page_not_found(e): def page_not_found(e):
return flask.render_template('404.html'), 404 pass
def contribution_details(records, limit=DEFAULT_RECORDS_LIMIT): def contribution_details(records, limit=DEFAULT_RECORDS_LIMIT):
@ -351,32 +364,37 @@ def contribution_details(records, limit=DEFAULT_RECORDS_LIMIT):
bugs_map = {} bugs_map = {}
companies_map = {} companies_map = {}
commits = [] commits = []
marks = dict((m, 0) for m in [-2, -1, 0, 1, 2])
loc = 0 loc = 0
for record in records: for record in records:
loc += record['loc'] if record['record_type'] == 'commit':
commits.append(record) loc += record['loc']
blueprint = record['blueprint_id'] commits.append(record)
if blueprint: blueprint = record['blueprint_id']
if blueprint in blueprints_map: if blueprint:
blueprints_map[blueprint].append(record) if blueprint in blueprints_map:
else: blueprints_map[blueprint].append(record)
blueprints_map[blueprint] = [record] else:
blueprints_map[blueprint] = [record]
bug = record['bug_id'] bug = record['bug_id']
if bug: if bug:
if bug in bugs_map: if bug in bugs_map:
bugs_map[bug].append(record) bugs_map[bug].append(record)
else: else:
bugs_map[bug] = [record] bugs_map[bug] = [record]
company = record['company_name'] company = record['company_name']
if company: if company:
if company in companies_map: if company in companies_map:
companies_map[company]['loc'] += record['loc'] companies_map[company]['loc'] += record['loc']
companies_map[company]['commits'] += 1 companies_map[company]['commits'] += 1
else: else:
companies_map[company] = {'loc': record['loc'], 'commits': 1} companies_map[company] = {'loc': record['loc'],
'commits': 1}
elif record['record_type'] == 'mark':
marks[int(record['value'])] += 1
blueprints = sorted([{'id': key, blueprints = sorted([{'id': key,
'module': value[0]['module'], 'module': value[0]['module'],
@ -395,6 +413,7 @@ def contribution_details(records, limit=DEFAULT_RECORDS_LIMIT):
'commit_count': len(commits), 'commit_count': len(commits),
'companies': companies_map, 'companies': companies_map,
'loc': loc, 'loc': loc,
'marks': marks,
} }
return result return result
@ -423,7 +442,7 @@ def module_details(module, records):
@app.route('/engineers/<launchpad_id>') @app.route('/engineers/<launchpad_id>')
@exception_handler() @exception_handler()
@templated() @templated()
@record_filter() @record_filter(ignore='metric')
def engineer_details(launchpad_id, records): def engineer_details(launchpad_id, records):
persistent_storage = get_vault()['persistent_storage'] persistent_storage = get_vault()['persistent_storage']
user = list(persistent_storage.get_users(launchpad_id=launchpad_id))[0] user = list(persistent_storage.get_users(launchpad_id=launchpad_id))[0]

View File

@ -1,10 +1,9 @@
{ {
"corrections": [ "corrections": [
{ {
"commit_id": "ee3fe4e836ca1c81e50a8324a9b5f982de4fa97f", "primary_key": "ee3fe4e836ca1c81e50a8324a9b5f982de4fa97f",
"correction_comment": "Reset LOC to 0", "correction_comment": "Reset LOC to 0",
"lines_added": 0, "loc": 0
"lines_deleted": 0
} }
] ]
} }

View File

@ -14378,11 +14378,6 @@
], ],
"releases": [ "releases": [
{
"release_name": "ALL",
"start_date": "2010-May-01",
"end_date": "now"
},
{ {
"release_name": "Essex", "release_name": "Essex",
"start_date": "2011-Oct-01", "start_date": "2011-Oct-01",

View File

@ -3,7 +3,7 @@
# debug = False # debug = False
# Default data # Default data
# default-data = /etc/stackalytics/default_data.json # default_data = /etc/stackalytics/default_data.json
# The folder that holds all project sources to analyze # The folder that holds all project sources to analyze
# sources_root = /var/local/stackalytics # sources_root = /var/local/stackalytics
@ -24,4 +24,13 @@
# listen_port = 8080 # listen_port = 8080
# The address of file with corrections data # The address of file with corrections data
# corrections-uri = https://raw.github.com/stackforge/stackalytics/master/etc/corrections.json # corrections_uri = https://raw.github.com/stackforge/stackalytics/master/etc/corrections.json
# URI of review system
# review_uri = gerrit://review.openstack.org
# SSH key for gerrit review system access
# ssh_key_filename = /home/user/.ssh/id_rsa
# SSH username for gerrit review system access
# ssh_username = user

View File

@ -39,51 +39,13 @@
"repos": [ "repos": [
{ {
"branches": ["master"], "branches": ["master"],
"module": "python-quantumclient", "module": "stackalytics",
"type": "core", "project_type": "stackforge",
"uri": "git://github.com/openstack/python-quantumclient.git", "uri": "git://github.com/stackforge/stackalytics.git",
"releases": [ "releases": [
{
"release_name": "Folsom",
"tag_from": "folsom-1",
"tag_to": "2.1"
},
{
"release_name": "Grizzly",
"tag_from": "2.1",
"tag_to": "2.2.1"
},
{ {
"release_name": "Havana", "release_name": "Havana",
"tag_from": "2.2.1", "tag_from": "5a1376ca",
"tag_to": "HEAD"
}
]
},
{
"branches": ["master"],
"module": "keystone",
"type": "core",
"uri": "git://github.com/openstack/keystone.git",
"releases": [
{
"release_name": "Essex",
"tag_from": "2011.3",
"tag_to": "2012.1"
},
{
"release_name": "Folsom",
"tag_from": "2012.1",
"tag_to": "2012.2"
},
{
"release_name": "Grizzly",
"tag_from": "2012.2",
"tag_to": "2013.1"
},
{
"release_name": "Havana",
"tag_from": "2013.1",
"tag_to": "HEAD" "tag_to": "HEAD"
} }
] ]
@ -91,11 +53,6 @@
], ],
"releases": [ "releases": [
{
"release_name": "ALL",
"start_date": "2010-May-01",
"end_date": "now"
},
{ {
"release_name": "Essex", "release_name": "Essex",
"start_date": "2011-Oct-01", "start_date": "2011-Oct-01",

View File

@ -3,7 +3,8 @@ Flask>=0.9
Flask-Gravatar Flask-Gravatar
iso8601 iso8601
launchpadlib launchpadlib
http://tarballs.openstack.org/oslo.config/oslo.config-1.2.0a2.tar.gz#egg=oslo.config-1.2.0a2 oslo.config
paramiko>=1.8.0
pbr>=0.5.16,<0.6 pbr>=0.5.16,<0.6
psutil psutil
python-memcached python-memcached

View File

@ -1,182 +0,0 @@
# Copyright (c) 2013 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import re
from launchpadlib import launchpad
from oslo.config import cfg
LOG = logging.getLogger(__name__)
COMMIT_PROCESSOR_DUMMY = 0
COMMIT_PROCESSOR_CACHED = 1
class CommitProcessor(object):
def __init__(self, persistent_storage):
self.persistent_storage = persistent_storage
def process(self, commit_iterator):
pass
class DummyProcessor(CommitProcessor):
def __init__(self, persistent_storage):
super(DummyProcessor, self).__init__(persistent_storage)
def process(self, commit_iterator):
return commit_iterator
class CachedProcessor(CommitProcessor):
def __init__(self, persistent_storage):
super(CachedProcessor, self).__init__(persistent_storage)
companies = persistent_storage.get_companies()
self.domains_index = {}
for company in companies:
for domain in company['domains']:
self.domains_index[domain] = company['company_name']
users = persistent_storage.get_users()
self.users_index = {}
for user in users:
for email in user['emails']:
self.users_index[email] = user
LOG.debug('Cached commit processor is instantiated')
def _find_company(self, companies, date):
for r in companies:
if date < r['end_date']:
return r['company_name']
return companies[-1]['company_name']
def _get_company_by_email(self, email):
name, at, domain = email.partition('@')
if domain:
parts = domain.split('.')
for i in range(len(parts), 1, -1):
m = '.'.join(parts[len(parts) - i:])
if m in self.domains_index:
return self.domains_index[m]
return None
def _unknown_user_email(self, email):
lp_profile = None
if not re.match(r'[\w\d_\.-]+@([\w\d_\.-]+\.)+[\w]+', email):
LOG.debug('User email is not valid %s' % email)
else:
LOG.debug('Lookup user email %s at Launchpad' % email)
lp = launchpad.Launchpad.login_anonymously('stackalytics')
try:
lp_profile = lp.people.getByEmail(email=email)
except Exception as error:
LOG.warn('Lookup of email %s failed %s' %
(email, error.message))
if not lp_profile:
# user is not found in Launchpad, create dummy record for commit
# update
LOG.debug('Email is not found at Launchpad, mapping to nobody')
user = {
'launchpad_id': None,
'companies': [{
'company_name': self.domains_index[''],
'end_date': 0
}]
}
else:
# get user's launchpad id from his profile
launchpad_id = lp_profile.name
LOG.debug('Found user %s' % launchpad_id)
# check if user with launchpad_id exists in persistent storage
persistent_user_iterator = self.persistent_storage.get_users(
launchpad_id=launchpad_id)
for persistent_user in persistent_user_iterator:
break
else:
persistent_user = None
if persistent_user:
# user already exist, merge
LOG.debug('User exists in persistent storage, add new email')
persistent_user_email = persistent_user['emails'][0]
if persistent_user_email not in self.users_index:
raise Exception('User index is not valid')
user = self.users_index[persistent_user_email]
user['emails'].append(email)
self.persistent_storage.update_user(user)
else:
# add new user
LOG.debug('Add new user into persistent storage')
company = (self._get_company_by_email(email) or
self.domains_index[''])
user = {
'launchpad_id': launchpad_id,
'user_name': lp_profile.display_name,
'emails': [email],
'companies': [{
'company_name': company,
'end_date': 0,
}],
}
self.persistent_storage.insert_user(user)
# update local index
self.users_index[email] = user
return user
def _update_commit_with_user_data(self, commit):
email = commit['author_email'].lower()
if email in self.users_index:
user = self.users_index[email]
else:
user = self._unknown_user_email(email)
commit['launchpad_id'] = user['launchpad_id']
company = self._get_company_by_email(email)
if not company:
company = self._find_company(user['companies'], commit['date'])
commit['company_name'] = company
if 'user_name' in user:
commit['author_name'] = user['user_name']
def process(self, commit_iterator):
for commit in commit_iterator:
self._update_commit_with_user_data(commit)
if cfg.CONF.filter_robots and commit['company_name'] == '*robots':
continue
yield commit
class CommitProcessorFactory(object):
@staticmethod
def get_processor(commit_processor_type, persistent_storage):
LOG.debug('Factory is asked for commit processor type %s' %
commit_processor_type)
if commit_processor_type == COMMIT_PROCESSOR_DUMMY:
return DummyProcessor(persistent_storage)
elif commit_processor_type == COMMIT_PROCESSOR_CACHED:
return CachedProcessor(persistent_storage)
else:
raise Exception('Unknown commit processor type %s' %
commit_processor_type)

View File

@ -40,4 +40,10 @@ OPTS = [
default=('https://raw.github.com/stackforge/stackalytics/' default=('https://raw.github.com/stackforge/stackalytics/'
'master/etc/corrections.json'), 'master/etc/corrections.json'),
help='The address of file with corrections data'), help='The address of file with corrections data'),
cfg.StrOpt('review-uri', default='gerrit://review.openstack.org',
help='URI of review system'),
cfg.StrOpt('ssh-key-filename', default='/home/ishakhat/.ssh/4launchpad_id',
help='SSH key for gerrit review system access'),
cfg.StrOpt('ssh-username', default='ishakhat',
help='SSH username for gerrit review system access'),
] ]

View File

@ -12,17 +12,20 @@
# implied. # implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import json import json
import urllib
import urllib2
from oslo.config import cfg from oslo.config import cfg
import psutil import psutil
from psutil import _error from psutil import _error
import urllib2
from stackalytics.openstack.common import log as logging from stackalytics.openstack.common import log as logging
from stackalytics.processor import commit_processor
from stackalytics.processor import config from stackalytics.processor import config
from stackalytics.processor import persistent_storage from stackalytics.processor import persistent_storage
from stackalytics.processor import rcs
from stackalytics.processor import record_processor
from stackalytics.processor import runtime_storage from stackalytics.processor import runtime_storage
from stackalytics.processor import vcs from stackalytics.processor import vcs
@ -57,41 +60,74 @@ def update_pids(runtime_storage):
runtime_storage.active_pids(pids) runtime_storage.active_pids(pids)
def process_repo(repo, runtime_storage, processor): def _merge_commits(original, new):
if new['branches'] < original['branches']:
return False
else:
original['branches'] |= new['branches']
return True
def process_repo(repo, runtime_storage, commit_processor, review_processor):
uri = repo['uri'] uri = repo['uri']
LOG.debug('Processing repo uri %s' % uri) LOG.debug('Processing repo uri %s' % uri)
vcs_inst = vcs.get_vcs(repo, cfg.CONF.sources_root) vcs_inst = vcs.get_vcs(repo, cfg.CONF.sources_root)
vcs_inst.fetch() vcs_inst.fetch()
rcs_inst = rcs.get_rcs(repo, cfg.CONF.review_uri)
rcs_inst.setup(key_filename=cfg.CONF.ssh_key_filename,
username=cfg.CONF.ssh_username)
for branch in repo['branches']: for branch in repo['branches']:
LOG.debug('Processing repo %s, branch %s' % (uri, branch)) LOG.debug('Processing repo %s, branch %s', uri, branch)
head_commit_id = runtime_storage.get_head_commit_id(uri, branch) vcs_key = 'vcs:' + str(urllib.quote_plus(uri) + ':' + branch)
last_id = runtime_storage.get_last_id(vcs_key)
commit_iterator = vcs_inst.log(branch, head_commit_id) commit_iterator = vcs_inst.log(branch, last_id)
processed_commit_iterator = processor.process(commit_iterator) processed_commit_iterator = commit_processor.process(commit_iterator)
runtime_storage.set_records(processed_commit_iterator) runtime_storage.set_records(processed_commit_iterator, _merge_commits)
head_commit_id = vcs_inst.get_head_commit_id(branch) last_id = vcs_inst.get_last_id(branch)
runtime_storage.set_head_commit_id(uri, branch, head_commit_id) runtime_storage.set_last_id(vcs_key, last_id)
LOG.debug('Processing reviews for repo %s, branch %s', uri, branch)
rcs_key = 'rcs:' + str(urllib.quote_plus(uri) + ':' + branch)
last_id = runtime_storage.get_last_id(rcs_key)
reviews_iterator = rcs_inst.log(branch, last_id)
processed_review_iterator = review_processor.process(reviews_iterator)
runtime_storage.set_records(processed_review_iterator)
last_id = rcs_inst.get_last_id(branch)
runtime_storage.set_last_id(rcs_key, last_id)
def update_repos(runtime_storage, persistent_storage): def update_repos(runtime_storage, persistent_storage):
repos = persistent_storage.get_repos() repos = persistent_storage.get_repos()
processor = commit_processor.CommitProcessorFactory.get_processor( commit_processor = record_processor.get_record_processor(
commit_processor.COMMIT_PROCESSOR_CACHED, record_processor.COMMIT_PROCESSOR, persistent_storage)
persistent_storage) review_processor = record_processor.get_record_processor(
record_processor.REVIEW_PROCESSOR, persistent_storage)
for repo in repos: for repo in repos:
process_repo(repo, runtime_storage, processor) process_repo(repo, runtime_storage, commit_processor, review_processor)
def apply_corrections(uri, runtime_storage_inst): def apply_corrections(uri, runtime_storage_inst):
corrections_fd = urllib2.urlopen(uri) corrections_fd = urllib2.urlopen(uri)
raw = corrections_fd.read() raw = corrections_fd.read()
corrections_fd.close() corrections_fd.close()
runtime_storage_inst.apply_corrections(json.loads(raw)['corrections']) corrections = json.loads(raw)['corrections']
valid_corrections = []
for c in corrections:
if 'primary_key' in c:
valid_corrections.append(c)
else:
LOG.warn('Correction misses primary key: %s', c)
runtime_storage_inst.apply_corrections(corrections)
def main(): def main():

View File

@ -0,0 +1,124 @@
# Copyright (c) 2013 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import re
import paramiko
from stackalytics.openstack.common import log as logging
LOG = logging.getLogger(__name__)
DEFAULT_PORT = 29418
GERRIT_URI_PREFIX = r'^gerrit:\/\/'
class Rcs(object):
def __init__(self, repo, uri):
self.repo = repo
def setup(self, **kwargs):
pass
def log(self, branch, last_id):
pass
def get_last_id(self, branch):
pass
class Gerrit(Rcs):
def __init__(self, repo, uri):
super(Gerrit, self).__init__(repo, uri)
stripped = re.sub(GERRIT_URI_PREFIX, '', uri)
if stripped:
self.hostname, semicolon, self.port = stripped.partition(':')
if not self.port:
self.port = DEFAULT_PORT
else:
raise Exception('Invalid rcs uri %s' % uri)
self.client = paramiko.SSHClient()
self.client.load_system_host_keys()
self.client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
def setup(self, **kwargs):
if 'key_filename' in kwargs:
self.key_filename = kwargs['key_filename']
else:
self.key_filename = None
if 'username' in kwargs:
self.username = kwargs['username']
else:
self.username = None
def _connect(self):
self.client.connect(self.hostname, port=self.port,
key_filename=self.key_filename,
username=self.username)
LOG.debug('Successfully connected to Gerrit')
def log(self, branch, last_id):
module = self.repo['module']
LOG.debug('Retrieve reviews from gerrit for project %s', module)
self._connect()
cmd = ('gerrit query --all-approvals --patch-sets --format JSON '
'%(module)s '
'branch:%(branch)s' %
{'module': module, 'branch': branch})
if last_id:
cmd += ' NOT resume_sortkey:%016x' % (last_id + 1)
stdin, stdout, stderr = self.client.exec_command(cmd)
for line in stdout:
review = json.loads(line)
if 'sortKey' in review:
review['module'] = module
yield review
def get_last_id(self, branch):
module = self.repo['module']
LOG.debug('Get last id for module %s', module)
self._connect()
cmd = ('gerrit query --all-approvals --patch-sets --format JSON '
'%(module)s branch:%(branch)s limit:1' %
{'module': module, 'branch': branch})
stdin, stdout, stderr = self.client.exec_command(cmd)
for line in stdout:
review = json.loads(line)
if 'sortKey' in review:
return int(review['sortKey'], 16)
raise Exception('Last id is not found for module %s' % module)
def get_rcs(repo, uri):
LOG.debug('Review control system is requested for uri %s' % uri)
match = re.search(GERRIT_URI_PREFIX, uri)
if match:
return Gerrit(repo, uri)
else:
raise Exception('Unknown review control system for uri %s' % uri)

View File

@ -0,0 +1,278 @@
# Copyright (c) 2013 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import bisect
import logging
import re
from launchpadlib import launchpad
from oslo.config import cfg
from stackalytics.processor import user_utils
LOG = logging.getLogger(__name__)
COMMIT_PROCESSOR = 1
REVIEW_PROCESSOR = 2
class RecordProcessor(object):
def __init__(self, persistent_storage):
self.persistent_storage = persistent_storage
def process(self, record_iterator):
pass
class CachedProcessor(RecordProcessor):
def __init__(self, persistent_storage):
super(CachedProcessor, self).__init__(persistent_storage)
companies = persistent_storage.get_companies()
self.domains_index = {}
for company in companies:
for domain in company['domains']:
self.domains_index[domain] = company['company_name']
users = persistent_storage.get_users()
self.users_index = {}
for user in users:
for email in user['emails']:
self.users_index[email] = user
def _find_company(self, companies, date):
for r in companies:
if date < r['end_date']:
return r['company_name']
return companies[-1]['company_name']
def _get_company_by_email(self, email):
name, at, domain = email.partition('@')
if domain:
parts = domain.split('.')
for i in range(len(parts), 1, -1):
m = '.'.join(parts[len(parts) - i:])
if m in self.domains_index:
return self.domains_index[m]
return None
def _persist_user(self, launchpad_id, email, user_name):
# check if user with launchpad_id exists in persistent storage
persistent_user_iterator = self.persistent_storage.get_users(
launchpad_id=launchpad_id)
for persistent_user in persistent_user_iterator:
break
else:
persistent_user = None
if persistent_user:
# user already exist, merge
LOG.debug('User exists in persistent storage, add new email %s',
email)
persistent_user_email = persistent_user['emails'][0]
if persistent_user_email not in self.users_index:
raise Exception('User index is not valid')
user = self.users_index[persistent_user_email]
user['emails'].append(email)
self.persistent_storage.update_user(user)
else:
# add new user
LOG.debug('Add new user into persistent storage')
company = (self._get_company_by_email(email) or
self.domains_index[''])
user = {
'launchpad_id': launchpad_id,
'user_name': user_name,
'emails': [email],
'companies': [{
'company_name': company,
'end_date': 0,
}],
}
self.persistent_storage.insert_user(user)
return user
def _unknown_user_email(self, email):
lp_profile = None
if not re.match(r'[\w\d_\.-]+@([\w\d_\.-]+\.)+[\w]+', email):
LOG.debug('User email is not valid %s' % email)
else:
LOG.debug('Lookup user email %s at Launchpad' % email)
lp = launchpad.Launchpad.login_anonymously('stackalytics')
try:
lp_profile = lp.people.getByEmail(email=email)
except Exception as error:
LOG.warn('Lookup of email %s failed %s' %
(email, error.message))
if not lp_profile:
# user is not found in Launchpad, create dummy record for commit
# update
LOG.debug('Email is not found at Launchpad, mapping to nobody')
user = {
'launchpad_id': None,
'companies': [{
'company_name': self.domains_index[''],
'end_date': 0
}]
}
else:
# get user's launchpad id from his profile
launchpad_id = lp_profile.name
user_name = lp_profile.display_name
LOG.debug('Found user %s' % launchpad_id)
user = self._persist_user(launchpad_id, email, user_name)
# update local index
self.users_index[email] = user
return user
class CommitProcessor(CachedProcessor):
def __init__(self, persistent_storage):
super(CommitProcessor, self).__init__(persistent_storage)
LOG.debug('Commit processor is instantiated')
def _update_commit_with_user_data(self, commit):
email = commit['author_email'].lower()
if email in self.users_index:
user = self.users_index[email]
else:
user = self._unknown_user_email(email)
commit['launchpad_id'] = user['launchpad_id']
company = self._get_company_by_email(email)
if not company:
company = self._find_company(user['companies'], commit['date'])
commit['company_name'] = company
if 'user_name' in user:
commit['author_name'] = user['user_name']
def process(self, record_iterator):
for record in record_iterator:
self._update_commit_with_user_data(record)
if cfg.CONF.filter_robots and record['company_name'] == '*robots':
continue
record['record_type'] = 'commit'
record['primary_key'] = record['commit_id']
record['week'] = user_utils.timestamp_to_week(record['date'])
record['loc'] = record['lines_added'] + record['lines_deleted']
yield record
class ReviewProcessor(CachedProcessor):
def __init__(self, persistent_storage):
super(ReviewProcessor, self).__init__(persistent_storage)
self.launchpad_to_company_index = {}
users = persistent_storage.get_users()
for user in users:
self.launchpad_to_company_index[user['launchpad_id']] = user
self.releases = []
for release in persistent_storage.get_releases():
r = release.copy()
r['end_date_ts'] = user_utils.date_to_timestamp(r['end_date'])
r['release_name'] = r['release_name'].lower()
self.releases.append(r)
self.releases.sort(key=lambda x: x['end_date_ts'])
self.releases_dates = [r['end_date_ts'] for r in self.releases]
LOG.debug('Review processor is instantiated')
def _get_release(self, timestamp):
release_index = bisect.bisect(self.releases_dates, timestamp)
return self.releases[release_index]['release_name']
def _process_user(self, email, launchpad_id, user_name, date):
if email in self.users_index:
user = self.users_index[email]
else:
user = self._persist_user(launchpad_id, email, user_name)
self.users_index[email] = user
company = self._get_company_by_email(email)
if not company:
company = self._find_company(user['companies'], date)
return company
def _spawn_review(self, record):
# copy everything except pathsets and flatten user data
review = dict([(k, v) for k, v in record.iteritems()
if k not in ['patchSets', 'owner']])
owner = record['owner']
company = self._process_user(owner['email'].lower(),
owner['username'],
owner['name'],
record['createdOn'])
review['record_type'] = 'review'
review['primary_key'] = record['id']
review['company_name'] = company
review['launchpad_id'] = owner['username']
review['release'] = self._get_release(review['createdOn'])
yield review
def _spawn_marks(self, record):
review_id = record['id']
for patch in record['patchSets']:
if 'approvals' not in patch:
continue # not reviewed by anyone
for approval in patch['approvals']:
# copy everything and flatten user data
mark = dict([(k, v) for k, v in approval.iteritems()
if k != 'by'])
reviewer = approval['by']
mark['record_type'] = 'mark'
mark['primary_key'] = (record['id'] +
str(mark['grantedOn']) +
mark['type'])
mark['launchpad_id'] = reviewer['username']
mark['module'] = record['module']
if 'email' not in reviewer:
continue
company = self._process_user(reviewer['email'],
reviewer['username'],
reviewer['name'],
mark['grantedOn'])
mark['company_name'] = company
mark['review_id'] = review_id
mark['release'] = self._get_release(mark['grantedOn'])
yield mark
def process(self, record_iterator):
"""
Process a review. Review spawns into records of two types:
* review - records that a user created review request
* mark - records that a user set approval mark to given review
"""
for record in record_iterator:
for gen in [self._spawn_review, self._spawn_marks]:
for r in gen(record):
yield r
def get_record_processor(processor_type, persistent_storage):
LOG.debug('Record processor is requested of type %s' % processor_type)
if processor_type == COMMIT_PROCESSOR:
return CommitProcessor(persistent_storage)
elif processor_type == REVIEW_PROCESSOR:
return ReviewProcessor(persistent_storage)
else:
raise Exception('Unknown commit processor type %s' % processor_type)

View File

@ -16,7 +16,6 @@
import logging import logging
import re import re
import urllib
import memcache import memcache
@ -25,6 +24,7 @@ LOG = logging.getLogger(__name__)
BULK_READ_SIZE = 64 BULK_READ_SIZE = 64
RECORD_ID_PREFIX = 'record:' RECORD_ID_PREFIX = 'record:'
UPDATE_ID_PREFIX = 'update:' UPDATE_ID_PREFIX = 'update:'
MEMCACHED_URI_PREFIX = r'^memcached:\/\/'
class RuntimeStorage(object): class RuntimeStorage(object):
@ -37,10 +37,10 @@ class RuntimeStorage(object):
def apply_corrections(self, corrections_iterator): def apply_corrections(self, corrections_iterator):
pass pass
def get_head_commit_id(self, uri, branch): def get_last_id(self, key):
pass pass
def set_head_commit_id(self, uri, branch, head_commit_id): def set_last_id(self, key, head_commit_id):
pass pass
def get_update(self, pid): def get_update(self, pid):
@ -55,7 +55,7 @@ class MemcachedStorage(RuntimeStorage):
def __init__(self, uri): def __init__(self, uri):
super(MemcachedStorage, self).__init__(uri) super(MemcachedStorage, self).__init__(uri)
stripped = re.sub(r'memcached:\/\/', '', uri) stripped = re.sub(MEMCACHED_URI_PREFIX, '', uri)
if stripped: if stripped:
storage_uri = stripped.split(',') storage_uri = stripped.split(',')
self.memcached = memcache.Client(storage_uri) self.memcached = memcache.Client(storage_uri)
@ -63,15 +63,22 @@ class MemcachedStorage(RuntimeStorage):
else: else:
raise Exception('Invalid storage uri %s' % uri) raise Exception('Invalid storage uri %s' % uri)
def set_records(self, records_iterator): def _build_index(self):
self.record_index = {}
for record in self._get_all_records():
self.record_index[record['primary_key']] = record['record_id']
def set_records(self, records_iterator, merge_handler=None):
for record in records_iterator: for record in records_iterator:
if record['commit_id'] in self.commit_id_index: if record['primary_key'] in self.record_index:
# update # update
record_id = self.commit_id_index[record['commit_id']] record_id = self.record_index[record['primary_key']]
original = self.memcached.get(self._get_record_name(record_id)) original = self.memcached.get(self._get_record_name(record_id))
original['branches'] |= record['branches'] if merge_handler:
LOG.debug('Update record %s' % record) if merge_handler(original, record):
self.memcached.set(self._get_record_name(record_id), original) LOG.debug('Update record %s' % record)
self.memcached.set(self._get_record_name(record_id),
original)
else: else:
# insert record # insert record
record_id = self._get_record_count() record_id = self._get_record_count()
@ -84,10 +91,10 @@ class MemcachedStorage(RuntimeStorage):
def apply_corrections(self, corrections_iterator): def apply_corrections(self, corrections_iterator):
for correction in corrections_iterator: for correction in corrections_iterator:
if correction['commit_id'] not in self.commit_id_index: if correction['primary_key'] not in self.record_index:
continue continue
record_id = self.commit_id_index[correction['commit_id']] record_id = self.record_index[correction['primary_key']]
original = self.memcached.get(self._get_record_name(record_id)) original = self.memcached.get(self._get_record_name(record_id))
need_update = False need_update = False
@ -100,12 +107,10 @@ class MemcachedStorage(RuntimeStorage):
self.memcached.set(self._get_record_name(record_id), original) self.memcached.set(self._get_record_name(record_id), original)
self._commit_update(record_id) self._commit_update(record_id)
def get_head_commit_id(self, uri, branch): def get_last_id(self, key):
key = str(urllib.quote_plus(uri) + ':' + branch)
return self.memcached.get(key) return self.memcached.get(key)
def set_head_commit_id(self, uri, branch, head_commit_id): def set_last_id(self, key, head_commit_id):
key = str(urllib.quote_plus(uri) + ':' + branch)
self.memcached.set(key, head_commit_id) self.memcached.set(key, head_commit_id)
def get_update(self, pid): def get_update(self, pid):
@ -186,15 +191,10 @@ class MemcachedStorage(RuntimeStorage):
self.memcached.set(UPDATE_ID_PREFIX + str(count), record_id) self.memcached.set(UPDATE_ID_PREFIX + str(count), record_id)
self.memcached.set('update:count', count + 1) self.memcached.set('update:count', count + 1)
def _build_index(self):
self.commit_id_index = {}
for record in self._get_all_records():
self.commit_id_index[record['commit_id']] = record['record_id']
def get_runtime_storage(uri): def get_runtime_storage(uri):
LOG.debug('Runtime storage is requested for uri %s' % uri) LOG.debug('Runtime storage is requested for uri %s' % uri)
match = re.search(r'^memcached:\/\/', uri) match = re.search(MEMCACHED_URI_PREFIX, uri)
if match: if match:
return MemcachedStorage(uri) return MemcachedStorage(uri)
else: else:

View File

@ -13,13 +13,14 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import logging
import os import os
import re import re
import sh import sh
from stackalytics.openstack.common import log as logging
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
@ -41,7 +42,7 @@ class Vcs(object):
def log(self, branch, head_commit_id): def log(self, branch, head_commit_id):
pass pass
def get_head_commit_id(self, branch): def get_last_id(self, branch):
pass pass
@ -162,7 +163,7 @@ class Git(Vcs):
yield commit yield commit
def get_head_commit_id(self, branch): def get_last_id(self, branch):
LOG.debug('Get head commit for repo uri %s' % self.repo['uri']) LOG.debug('Get head commit for repo uri %s' % self.repo['uri'])
self._chdir() self._chdir()

View File

@ -2,7 +2,7 @@
pep8==1.4.5 pep8==1.4.5
pyflakes==0.7.2 pyflakes==0.7.2
flake8==2.0 flake8==2.0
hacking>=0.5.3,<0.6 hacking>=0.5.3,<0.7
coverage coverage
discover discover

View File

@ -1,10 +1,25 @@
# Copyright (c) 2013 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from launchpadlib import launchpad from launchpadlib import launchpad
import mock import mock
from oslo.config import cfg from oslo.config import cfg
import testtools import testtools
from stackalytics.processor import commit_processor
from stackalytics.processor import persistent_storage from stackalytics.processor import persistent_storage
from stackalytics.processor import record_processor
class TestCommitProcessor(testtools.TestCase): class TestCommitProcessor(testtools.TestCase):
@ -41,7 +56,7 @@ class TestCommitProcessor(testtools.TestCase):
]) ])
self.persistent_storage = p_storage self.persistent_storage = p_storage
self.commit_processor = commit_processor.CachedProcessor(p_storage) self.commit_processor = record_processor.CommitProcessor(p_storage)
self.launchpad_patch = mock.patch('launchpadlib.launchpad.Launchpad') self.launchpad_patch = mock.patch('launchpadlib.launchpad.Launchpad')
self.launchpad_patch.start() self.launchpad_patch.start()
cfg.CONF = mock.MagicMock() cfg.CONF = mock.MagicMock()

View File

@ -1,3 +1,18 @@
# Copyright (c) 2013 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import mock import mock
import os import os
import testtools import testtools

View File

@ -1,3 +1,18 @@
# Copyright (c) 2013 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import testtools import testtools
from dashboard import web from dashboard import web