Revert "Remove Foundation members report"

Foundation members report was useful to get data on new registrations.
Unfortunately user registration date can only be retrieved from HTML
and not available in OpenStackID-Resources API.

This reverts commit fd2ba439947d754f21c5ec4c555f29519810258e.

Change-Id: I8d86cec906f516be5696c679176ba4919f18edc7
This commit is contained in:
Ilya Shakhat 2017-09-07 14:59:51 +00:00
parent fd2ba43994
commit 43ffa83fe4
24 changed files with 823 additions and 14 deletions

@ -1,12 +1,14 @@
usage: stackalytics-processor [-h] [--config-dir DIR] [--config-file PATH]
[--corrections-uri CORRECTIONS_URI] [--debug]
[--default-data-uri DEFAULT_DATA_URI]
[--corrections-uri CORRECTIONS_URI]
[--days_to_update_members DAYS_TO_UPDATE_MEMBERS]
[--debug] [--default-data-uri DEFAULT_DATA_URI]
[--fetching-user-source FETCHING_USER_SOURCE]
[--gerrit-retry GERRIT_RETRY]
[--git-base-uri GIT_BASE_URI]
[--log-config-append PATH]
[--log-date-format DATE_FORMAT]
[--log-dir LOG_DIR] [--log-file PATH]
[--members-look-ahead MEMBERS_LOOK_AHEAD]
[--nodebug] [--nouse-journal] [--nouse-syslog]
[--nowatch-log-file]
[--read-timeout READ_TIMEOUT]
@ -33,6 +35,8 @@ optional arguments:
precedence. Defaults to None.
--corrections-uri CORRECTIONS_URI
The address of file with corrections data
--days_to_update_members DAYS_TO_UPDATE_MEMBERS
Number of days to update members
--debug, -d If set to true, the logging level will be set to DEBUG
instead of the default INFO level.
--default-data-uri DEFAULT_DATA_URI
@ -67,6 +71,8 @@ optional arguments:
If no default is set, logging will go to stderr as
defined by use_stderr. This option is ignored if
log_config_append is set.
--members-look-ahead MEMBERS_LOOK_AHEAD
How many member profiles to look ahead after the last
--nodebug The inverse of --debug
--nouse-journal The inverse of --use-journal
--nouse-syslog The inverse of --use-syslog

@ -30958,6 +30958,7 @@
"https://lists.opnfv.org/pipermail/opnfv-users/",
"https://lists.opnfv.org/pipermail/test-wg/"
],
"member_lists": ["https://www.openstack.org/community/members/profile/"],
"project_types": [
{
"id": "all",

@ -152,6 +152,9 @@
# The folder that holds all project sources to analyze (string value)
#sources_root = /var/local/stackalytics
# Number of days to update members (integer value)
#days_to_update_members = 30
# The address of file with corrections data (string value)
#corrections_uri = https://git.openstack.org/cgit/openstack/stackalytics/plain/etc/corrections.json
@ -174,6 +177,9 @@
# Allowed values: launchpad, <None>
#fetching_user_source = launchpad
# How many member profiles to look ahead after the last (integer value)
#members_look_ahead = 250
# Number of seconds to wait for remote response (integer value)
#read_timeout = 120

@ -293,6 +293,7 @@
],
"mail_lists": ["http://lists.openstack.org/pipermail/openstack-dev/"],
"member_lists": ["http://www.openstack.org/community/members/profile/"],
"project_types": [
{

@ -365,6 +365,7 @@ def aggregate_filter():
'bpc': (incremental_filter, None),
'filed-bugs': (incremental_filter, None),
'resolved-bugs': (incremental_filter, None),
'members': (incremental_filter, None),
'person-day': (person_day_filter, None),
'patches': (None, None),
'translations': (loc_filter, None),

@ -50,6 +50,7 @@ METRIC_TO_RECORD_TYPE = {
'bpc': ['bpc'],
'filed-bugs': ['bugf'],
'resolved-bugs': ['bugr'],
'members': ['member'],
'person-day': ['mark', 'patch', 'email', 'bpd', 'bugf'],
'patches': ['patch'],
'translations': ['tr'],

@ -28,6 +28,9 @@ from stackalytics.dashboard import vault
from stackalytics.processor import utils
DEFAULT_DAYS_COUNT = 7
FIRST_MEMBER_DATE = "2012-Jul-18"
blueprint = flask.Blueprint('reports', __name__, url_prefix='/report')
@ -145,6 +148,20 @@ def contribution(module, days):
}
@blueprint.route('/members')
@decorators.exception_handler()
@decorators.templated()
def members():
days = int(flask.request.args.get('days') or DEFAULT_DAYS_COUNT)
all_days = int((time.time() - utils.date_to_timestamp_ext(
FIRST_MEMBER_DATE)) / (24 * 60 * 60)) + 1
return {
'days': days,
'all_days': all_days
}
@blueprint.route('/affiliation_changes')
@decorators.exception_handler()
@decorators.templated()

@ -424,7 +424,7 @@ ul#menu-stackamenu li {
div.stackamenu {
text-align: left;
padding-bottom: 10px;
margin-left: 315px;
margin-left: 240px;
}
div.stackamenu a {

@ -192,6 +192,8 @@ show_twitter=False) -%}
<div class="header">Bug &ldquo;${title}&rdquo; (<a href="${web_link}" class="ext_link">${number}</a>)</div>
<div>Status: <span class="status${status_class}">${status}</span></div>
<div>Importance: <span class="importance${importance}">${importance}</span></div>
{%elif record_type == "member" %}
<div class="header"><a href="${member_uri}" target="_blank">Registered</a> in OpenStack Foundation</div>
{%elif record_type == "tr" %}
<div class="header">Translated ${loc} words into ${language}</div>
{%/if%}

@ -36,10 +36,11 @@
<div style="float: left;">
<span id="logo"><a href="{{ url_for('overview') }}"><img src="{{ url_for('static', filename='images/stackalytics_logo.png') }}" alt="Stackalytics" style="width: 100%; max-width: 190px;"></a></span>
</div>
<div class="stackamenu">
<div class="stackamenu" style="margin-left: 240px">
<ul id="menu-stackamenu">
<li class="menu-item current-menu-item"><a href="/"><span class="icon-pie"></span>Code Contribution</a></li>
<li class="menu-item"><a href="/report/driverlog"><span class="icon-cogs"></span>Vendor Drivers <span style="vertical-align: top; font-size: 60%;">&beta;</span></a></li>
<li class="menu-item"><a href="/report/members"><span class="icon-users"></span>Member Directory</a></li>
</ul>
</div>
</div>

@ -28,10 +28,11 @@
<div style="float: left;">
<span id="logo"><a href="{{ url_for('overview') }}"><img src="{{ url_for('static', filename='images/stackalytics_logo.png') }}" alt="Stackalytics" style="width: 100%; max-width: 190px;"></a></span>
</div>
<div class="stackamenu">
<div class="stackamenu" style="margin-left: 240px">
<ul id="menu-stackamenu">
<li class="menu-item"><a href="/"><span class="icon-pie"></span>Code Contribution</a></li>
<li class="menu-item current-menu-item"><a href="/report/driverlog"><span class="icon-cogs"></span>Vendor Drivers <span style="vertical-align: top; font-size: 60%;">&beta;</span></a></li>
<li class="menu-item"><a href="/report/members"><span class="icon-users"></span>Member Directory</a></li>
</ul>
</div>
</div>

@ -0,0 +1,405 @@
{% extends "base.html" %}
{% set active_tab = 'members' %}
{% set page_title = 'OpenStack Foundation members' %}
{% block head %}
<script type="text/javascript">
function get_start_date() {
var days = {{ days }};
return Math.round(new Date().getTime() / 1000) - days * 24 * 60 * 60;
}
function show_engineers_table(options) {
var table_column_names = ["index", "link", "date", "company"];
var table_id = "members_table";
var company = $('#company_selector').val();
$.ajax({
url: makeURI("/api/1.0/members", options),
dataType: "json",
success: function (data) {
var tableData = data["members"];
var tableColumns = [];
var sort_by_column = 2;
for (var i = 0; i < table_column_names.length; i++) {
tableColumns.push({"mData": table_column_names[i]});
}
for (i = 0; i < tableData.length; i++) {
var user_link = tableData[i].member_uri;
tableData[i].link = "<a href=\"" + user_link + "\">" + tableData[i].author_name + "</a>";
tableData[i].date = tableData[i].date_str;
tableData[i].company = tableData[i].company_name;
}
if (table_id) {
$("#" + table_id).dataTable({
"aaSorting": [
[ sort_by_column, "desc" ]
],
"bFilter": true,
"bInfo": true,
"bAutoWidth": false,
"aaData": tableData,
"aoColumns": tableColumns,
"bDestroy": true,
'bPaginate': true,
"sPaginationType": "full_numbers",
"aLengthMenu": [[10, 25, 50, -1], [10, 25, 50, "All"]],
"iDisplayLength": 10
});
}
}
});
}
function show_new_companies_table(options) {
var table_column_names = ["index", "link", "date"];
var table_id = "new_companies_table";
$.ajax({
url: makeURI("/api/1.0/new_companies", options),
dataType: "json",
success: function (data) {
var tableData = data["stats"];
var tableColumns = [];
var sort_by_column = 2;
for (var i = 0; i < table_column_names.length; i++) {
tableColumns.push({"mData": table_column_names[i]});
}
for (i = 0; i < tableData.length; i++) {
var company_link = makeURI('/report/members', {company:tableData[i].name});
tableData[i].link = "<a href=\"" + company_link + "\">" + tableData[i].name + "</a>";
tableData[i].date = tableData[i].date_str;
}
if (table_id) {
$("#" + table_id).dataTable({
"aaSorting": [
[ sort_by_column, "desc" ]
],
"bFilter": true,
"bInfo": true,
"bAutoWidth": false,
"aaData": tableData,
"aoColumns": tableColumns,
"bDestroy": true,
'bPaginate': true,
"sPaginationType": "full_numbers",
"aLengthMenu": [[10, 25, 50, -1], [10, 25, 50, "All"]],
"iDisplayLength": 10
});
}
}
});
}
function show_companies_table(options) {
var table_column_names = ["index", "link", "count"];
var table_id = "companies_table";
$.ajax({
url: makeURI("/api/1.0/stats/companies", options),
dataType: "json",
success: function (data) {
var tableData = data["stats"];
var tableColumns = [];
var sort_by_column = 2;
for (var i = 0; i < table_column_names.length; i++) {
tableColumns.push({"mData": table_column_names[i]});
}
for (i = 0; i < tableData.length; i++) {
var company_link = makeURI('/report/members', {company:tableData[i].name});
tableData[i].link = "<a href=\"" + company_link + "\">" + tableData[i].name + "</a>";
tableData[i].count = tableData[i].metric;
}
if (table_id) {
$("#" + table_id).dataTable({
"aaSorting": [
[ sort_by_column, "desc" ]
],
"bFilter": true,
"bInfo": true,
"bAutoWidth": false,
"aaData": tableData,
"aoColumns": tableColumns,
"bDestroy": true,
'bPaginate': true,
"sPaginationType": "full_numbers",
"aLengthMenu": [[10, 25, 50, -1], [10, 25, 50, "All"]],
"iDisplayLength": 10
});
}
}
});
}
function renderChart(url, chart_id, options) {
$(document).ready(function () {
$.ajax({
url: makeURI(url, options),
dataType: "jsonp",
success: function (data) {
var chartData = [];
const limit = 10;
var aggregate = 0;
var i;
data = data["stats"];
for (i = 0; i < data.length; i++) {
if (i < limit - 1) {
chartData.push([data[i].name, data[i].metric]);
} else {
aggregate += data[i].metric;
}
}
if (i == limit) {
chartData.push([data[i - 1].name, data[i - 1].metric]);
} else if (i > limit) {
chartData.push(["others", aggregate]);
}
if (chart_id) {
var plot = $.jqplot(chart_id, [chartData], {
seriesDefaults: {
renderer: jQuery.jqplot.PieRenderer,
rendererOptions: {
showDataLabels: true
}
},
legend: { show: true, location: 'e' }
});
}
}
});
});
}
function make_options() {
var options = {};
options['release'] = 'all';
options['metric'] = 'members';
options['project_type'] = '{{ project_type }}';
options['company'] = $('#company_selector').val();
options['days'] = $('#days_selector').val();
return options;
}
function reload() {
window.location.search = $.map(make_options(),function (val, index) {
return index + "=" + val;
}).join("&")
}
function show_page() {
var start_date = get_start_date();
var base_options = { metric: 'members', project_type: '{{ project_type }}', release: 'all', start_date: start_date };
renderTimeline(base_options);
show_engineers_table(base_options);
{% if not company %}
show_companies_table(base_options);
show_new_companies_table(base_options);
renderChart("/api/1.0/stats/companies", "members_chart", base_options);
{% else %}
$('#companies_block').hide();
$('#new_companies_table_header').hide();
$('#new_companies_table').hide();
{% endif %}
}
$(document).ready(function () {
var start_date = get_start_date();
var base_options = { metric: 'members', project_type: '{{ project_type }}', release: 'all', start_date: start_date };
initSingleSelector("company", makeURI("/api/1.0/companies", base_options), {allowClear: true});
$("#days_selector").val({{ days }}).select2().on('change', function (evt) {
reload();
});
show_page();
});
</script>
<style type="text/css">
table.dataTable tr.even {
background-color: #EEF1F4;
}
table.dataTable tr.even:hover, table.dataTable tr.odd:hover {
background-color: #F8FFEC;
}
table.dataTable tr.even td.sorting_1 {
background-color: #E0E8E8;
}
</style>
<script type='text/javascript'>
$(document).ready(function () {
$('#days_selector').val({{ days }});
$("#days_selector").select2();
show_page();
});
$(document).on('change', '#days_selector', function (evt) {
reload();
});
$(document).on('change', '#company_selector', function (evt) {
reload();
});
</script>
{% endblock %}
{% block body %}
<div class="page">
<div class="aheader">
<div style="float: right; margin-top: 10px; margin-right: 20px;">
<a href="https://wiki.openstack.org/wiki/Stackalytics" target="_blank">About</a>
</div>
<div id="analytics_header">
<div style="float: left;">
<span id="logo"><a href="{{ url_for('overview') }}"><img src="{{ url_for('static', filename='images/stackalytics_logo.png') }}" alt="Stackalytics" style="width: 100%; max-width: 190px;"></a></span>
</div>
<div class="stackamenu" style="margin-left: 240px;">
<ul id="menu-stackamenu">
<li class="menu-item"><a href="{{ url_for('overview') }}"><span class="icon-pie"></span>Code Contribution</a></li>
<li class="menu-item"><a href="/report/driverlog"><span class="icon-cogs"></span>Vendor Drivers <span style="vertical-align: top; font-size: 60%;">&beta;</span></a></li>
<li class="menu-item current-menu-item"><a href="/report/members"><span class="icon-users"></span>Member Directory</a></li>
</ul>
</div>
</div>
<div class="navigation">
<div id="timeline"
style="width: 100%; height: 120px; margin-top: 15px;"></div>
</div>
<div class="drops">
<div class="drop" style="margin-top: 1em;">
<label for="days_selector">Joined during period</label>
<select id="days_selector" name="days_selector"
style="min-width: 140px;"
data-placeholder="Select period">
<option value="7">week</option>
<option value="14">two weeks</option>
<option value="31">month</option>
<option value="93">quarter</option>
<option value="183">half year</option>
<option value="365">year</option>
<option value="{{ all_days }}">all</option>
</select>
</div>
<div class="drop" style="margin-top: 1em;">
<label for="company_selector">Company</label>
<input id="company_selector" style="width: 140px"
data-placeholder="Any company"/>
</div>
</div>
<table style="width: 100%" cellspacing="0" id="companies_block">
<tr>
<td style="width: 50%; vertical-align: top; padding-right: 3em;">
<h2>OpenStack foundation member companies</h2>
<div class="body" style="margin-right: 1em;">
<table id="companies_table">
<thead>
<tr>
<th>#</th>
<th>Company</th>
<th>Members Count</th>
</tr>
</thead>
<tbody>
</tbody>
</table>
</div>
</td>
<td style="width: 50%; vertical-align: top; padding-left: 3em">
<h2>Members by company</h2>
<div class="body" style="margin-left: 1em;">
<div id="members_container">
<div id="members_chart"
style="width: 100%; height: 350px; margin-bottom: 1em;"></div>
</div>
</div>
</td>
</tr>
</table>
<table style="width: 100%" cellspacing="0">
<tr>
<td style="width: 50%; vertical-align: top; padding-right: 3em;">
<h2>Individual Members</h2>
<div class="body" style="margin-right: 1em;">
<table id="members_table">
<thead>
<tr>
<th>#</th>
<th>Engineer</th>
<th>Date Joined</th>
<th>Company</th>
</tr>
</thead>
<tbody>
</tbody>
</table>
</div>
</td>
<td style="width: 50%; vertical-align: top; padding-right: 3em;">
<div id="new_companies_table_header">
<h2>New Companies</h2>
</div>
<div class="body" style="margin-right: 1em;">
<table id="new_companies_table">
<thead>
<tr>
<th>#</th>
<th>Company</th>
<th>First Member Joined</th>
</tr>
</thead>
<tbody>
</tbody>
</table>
</div>
</td>
</tr>
</table>
</div>
</div>
{% endblock %}

@ -197,6 +197,12 @@
href="https://review.openstack.org/#/c/{{ record.review_number }}"
target="_blank">{{ record.review_id }}</a></div>
{% elif record_type == "member" %}
<div class="header"><a href="{{ record.member_uri }}"
target="_blank">Registered</a> in OpenStack
Foundation
</div>
{% elif record_type == "tr" %}
<div class="header">Translated {{ record.loc }} words
into {{ record.language }}</div>

@ -350,6 +350,27 @@ def get_module(module_id, **kwargs):
return module
@app.route('/api/1.0/members')
@decorators.exception_handler()
@decorators.response()
@decorators.cached(ignore=['release', 'project_type', 'module'])
@decorators.jsonify('members')
@decorators.record_filter(ignore=['release', 'project_type', 'module'])
def get_members(records, **kwargs):
response = []
for record in records:
record = vault.extend_record(record)
nr = dict([(k, record[k]) for k in
['author_name', 'date', 'company_name', 'member_uri']])
nr['date_str'] = helpers.format_date(nr['date'])
response.append(nr)
response.sort(key=lambda x: x['date'], reverse=True)
utils.add_index(response)
return response
@app.route('/api/1.0/stats/bp')
@decorators.exception_handler()
@decorators.response()
@ -564,6 +585,8 @@ def _get_week(kwargs, param_name):
def timeline(records, **kwargs):
# find start and end dates
metric = parameters.get_parameter(kwargs, 'metric')
start_date = int(parameters.get_single_parameter(kwargs, 'start_date')
or 0)
release_name = parameters.get_single_parameter(kwargs, 'release') or 'all'
releases = vault.get_vault()['releases']
@ -627,10 +650,14 @@ def timeline(records, **kwargs):
if start_week <= week < end_week:
week_stat_loc[week] += loc_handler(record)
week_stat_commits[week] += commits_handler(record)
if record.release == release_name:
week_stat_commits_hl[week] += commits_handler(record)
if 'members' in metric:
if record.date >= start_date:
week_stat_commits_hl[week] += 1
else:
if record.release == release_name:
week_stat_commits_hl[week] += commits_handler(record)
if 'all' == release_name:
if 'all' == release_name and 'members' not in metric:
week_stat_commits_hl = week_stat_commits
# form arrays in format acceptable to timeline plugin

@ -32,6 +32,8 @@ PROCESSOR_OPTS = [
'default_data_uri = file:///path/to/default_data.json'),
cfg.StrOpt('sources-root', default='/var/local/stackalytics',
help='The folder that holds all project sources to analyze'),
cfg.IntOpt('days_to_update_members', default=30,
help='Number of days to update members'),
cfg.StrOpt('corrections-uri',
default=('https://git.openstack.org/cgit/'
'openstack/stackalytics/plain/etc/corrections.json'),
@ -51,6 +53,8 @@ PROCESSOR_OPTS = [
cfg.StrOpt("fetching-user-source", default='launchpad',
choices=['launchpad', '<None>'],
help="Source for fetching user profiles"),
cfg.IntOpt('members-look-ahead', default=250,
help='How many member profiles to look ahead after the last'),
cfg.IntOpt('read-timeout', default=120,
help='Number of seconds to wait for remote response'),
cfg.IntOpt('gerrit-retry', default=10,

@ -43,7 +43,8 @@ OPTS = [
SINGLE_KEYS = ['module_groups', 'project_types', 'repos', 'releases',
'companies', 'runtime_storage_update_time']
'companies', 'last_update_members_date', 'last_member_index',
'runtime_storage_update_time']
ARRAY_KEYS = ['record', 'user']
BULK_READ_SIZE = 64
MEMCACHED_URI_PREFIX = r'^memcached:\/\/'
@ -139,6 +140,8 @@ def export_data(memcached_inst, fd):
pickle.dump(('user:%s' % user['launchpad_id'], user), fd)
if user.get('gerrit_id'):
pickle.dump(('user:gerrit:%s' % user['gerrit_id'], user), fd)
if user.get('member_id'):
pickle.dump(('user:member:%s' % user['member_id'], user), fd)
for email in user.get('emails') or []:
pickle.dump((('user:%s' % email).encode('utf8'), user), fd)

@ -27,6 +27,7 @@ from stackalytics.processor import default_data_processor
from stackalytics.processor import governance
from stackalytics.processor import lp
from stackalytics.processor import mls
from stackalytics.processor import mps
from stackalytics.processor import rcs
from stackalytics.processor import record_processor
from stackalytics.processor import runtime_storage
@ -192,6 +193,23 @@ def _process_translation_stats(runtime_storage_inst, record_processor_inst):
runtime_storage_inst.set_records(processed_translation_iterator)
def _process_member_list(uri, runtime_storage_inst, record_processor_inst):
member_iterator = mps.log(uri, runtime_storage_inst,
CONF.days_to_update_members,
CONF.members_look_ahead)
member_iterator_typed = _record_typer(member_iterator, 'member')
processed_member_iterator = record_processor_inst.process(
member_iterator_typed)
runtime_storage_inst.set_records(processed_member_iterator)
def update_members(runtime_storage_inst, record_processor_inst):
member_lists = runtime_storage_inst.get_by_key('member_lists') or []
for member_list in member_lists:
_process_member_list(member_list, runtime_storage_inst,
record_processor_inst)
def _post_process_records(record_processor_inst, repos):
LOG.debug('Build release index')
release_index = {}
@ -308,6 +326,9 @@ def main():
apply_corrections(CONF.corrections_uri, runtime_storage_inst)
# long operation should be the last
update_members(runtime_storage_inst, record_processor_inst)
runtime_storage_inst.set_by_key('runtime_storage_update_time',
utils.date_to_timestamp('now'))
LOG.info('stackalytics-processor succeeded.')

@ -0,0 +1,111 @@
# Copyright (c) 2013 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import random
import re
import time
from oslo_log import log as logging
import requests
import six
from stackalytics.processor import utils
LOG = logging.getLogger(__name__)
NAME_AND_DATE_PATTERN = (r'<h3>(?P<member_name>[^<]*)[\s\S]*?'
r'<div class="span-\d last">(?P<date_joined>[^<]*)')
COMPANY_PATTERN = (r'<strong>Date\sJoined[\s\S]*?<b>(?P<company_draft>[^<]*)'
r'[\s\S]*?From\s(?P<date_from>[\s\S]*?)\(Current\)')
GARBAGE_PATTERN = r'[/\\~%^\*_]+'
def strip_garbage(s):
return re.sub(r'\s+', ' ', re.sub(GARBAGE_PATTERN, '', s))
def _retrieve_member(requests_session, uri, member_id, html_parser):
content = utils.read_uri(uri, session=requests_session)
if not content:
return {}
member = {}
for rec in re.finditer(NAME_AND_DATE_PATTERN, content):
result = rec.groupdict()
member['member_id'] = member_id
member['member_name'] = strip_garbage(result['member_name'])
member['date_joined'] = result['date_joined']
member['member_uri'] = uri
break
member['company_draft'] = '*independent'
for rec in re.finditer(COMPANY_PATTERN, content):
result = rec.groupdict()
member['company_draft'] = strip_garbage(
html_parser.unescape(result['company_draft']))
return member
def log(uri, runtime_storage_inst, days_to_update_members, members_look_ahead):
LOG.debug('Retrieving new openstack.org members')
last_update_members_date = runtime_storage_inst.get_by_key(
'last_update_members_date') or 0
last_member_index = runtime_storage_inst.get_by_key(
'last_member_index') or 0
end_update_date = int(time.time()) - days_to_update_members * 24 * 60 * 60
if last_update_members_date <= end_update_date:
last_member_index = 0
last_update_members_date = int(time.time())
runtime_storage_inst.set_by_key('last_update_members_date',
last_update_members_date)
cnt_empty = 0
cur_index = last_member_index + 1
html_parser = six.moves.html_parser.HTMLParser()
requests_session = requests.Session()
while cnt_empty < members_look_ahead:
profile_uri = uri + str(cur_index)
member = _retrieve_member(requests_session, profile_uri,
str(cur_index), html_parser)
if 'member_name' not in member:
cnt_empty += 1
cur_index += 1
continue
cnt_empty = 0
last_member_index = cur_index
cur_index += 1
LOG.debug('New member: %s', member['member_id'])
yield member
time.sleep(random.random() * 5)
requests_session.close()
LOG.debug('Last_member_index: %s', last_member_index)
runtime_storage_inst.set_by_key('last_member_index', last_member_index)

@ -425,6 +425,39 @@ class RecordProcessor(object):
yield bug_fixed
def _process_member(self, record):
user_id = user_processor.make_user_id(member_id=record['member_id'])
record['primary_key'] = user_id
record['date'] = utils.member_date_to_timestamp(record['date_joined'])
record['author_name'] = record['member_name']
record['module'] = 'unknown'
company_draft = record['company_draft']
company_name = self.domains_index.get(utils.normalize_company_name(
company_draft)) or (utils.normalize_company_draft(company_draft))
# author_email is a key to create new user
record['author_email'] = user_id
record['company_name'] = company_name
# _update_record_and_user function will create new user if needed
self._update_record_and_user(record)
record['company_name'] = company_name
user = user_processor.load_user(self.runtime_storage_inst,
user_id=user_id)
user['user_name'] = record['author_name']
user['companies'] = [{
'company_name': company_name,
'end_date': 0,
}]
user['company_name'] = company_name
user_processor.store_user(self.runtime_storage_inst, user)
record['company_name'] = company_name
yield record
def _process_translation(self, record):
# todo split translation and approval
translation = record.copy()
@ -455,6 +488,7 @@ class RecordProcessor(object):
'email': self._process_email,
'bp': self._process_blueprint,
'bug': self._process_bug,
'member': self._process_member,
'i18n': self._process_translation,
}
@ -681,6 +715,39 @@ class RecordProcessor(object):
self.runtime_storage_inst.set_records(
self._close_patch(cores, marks_patch['marks']))
def _update_members_company_name(self):
LOG.info('Update members with company names')
def record_handler(record):
if record['record_type'] != 'member':
return
company_draft = record['company_draft']
company_name = self.domains_index.get(
utils.normalize_company_name(company_draft)) or (
utils.normalize_company_draft(company_draft))
if company_name == record['company_name']:
return
LOG.debug('Update record %s, company name changed to %s',
record, company_name)
record['company_name'] = company_name
yield record
user = user_processor.load_user(self.runtime_storage_inst,
user_id=record['user_id'])
LOG.debug('Update user %s, company name changed to %s',
user, company_name)
user['companies'] = [{
'company_name': company_name,
'end_date': 0,
}]
user_processor.store_user(self.runtime_storage_inst, user)
yield record_handler
def _update_commits_with_module_alias(self):
LOG.info('Update record with aliases')
@ -706,6 +773,7 @@ class RecordProcessor(object):
self._update_commits_with_module_alias,
self._update_blueprints_with_mention_info,
self._determine_core_contributors,
self._update_members_company_name,
self._update_marks_with_disagreement,
]

@ -225,6 +225,12 @@ default_data = {
"type": "string"
}
},
"member_lists": {
"type": "array",
"items": {
"type": "string"
}
},
"project_types": {
"type": "array",
"items": {

@ -22,11 +22,13 @@ ROBOTS = '*robots'
def make_user_id(emails=None, launchpad_id=None, gerrit_id=None,
github_id=None, zanata_id=None):
member_id=None, github_id=None, zanata_id=None):
if launchpad_id or emails:
return launchpad_id or emails[0]
if gerrit_id:
return 'gerrit:%s' % gerrit_id
if member_id:
return 'member:%s' % member_id
if github_id:
return 'github:%s' % github_id
if zanata_id:
@ -58,11 +60,11 @@ def store_user(runtime_storage_inst, user):
def load_user(runtime_storage_inst, seq=None, user_id=None, email=None,
launchpad_id=None, gerrit_id=None, github_id=None,
zanata_id=None):
launchpad_id=None, gerrit_id=None, member_id=None,
github_id=None, zanata_id=None):
key = make_user_id(gerrit_id=gerrit_id, github_id=github_id,
zanata_id=zanata_id)
key = make_user_id(gerrit_id=gerrit_id, member_id=member_id,
github_id=github_id, zanata_id=zanata_id)
if not key:
key = seq or user_id or launchpad_id or email
if key:

@ -63,6 +63,13 @@ def date_to_timestamp_ext(d):
return int(d)
def member_date_to_timestamp(d):
if not d:
return 0
return int(time.mktime(
datetime.datetime.strptime(d, '%B %d, %Y ').timetuple()))
def iso8601_to_timestamp(s):
return calendar.timegm(iso8601.parse_date(s).utctimetuple())

@ -0,0 +1,58 @@
# Copyright (c) 2013 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
import testtools
from stackalytics.processor import mps
class TestMps(testtools.TestCase):
def test_member_parse_regex(self):
content = '''<h1>Individual Member Profile</h1>
<div class="candidate span-14">
<div class="span-4">
<img src="/themes/openstack/images/generic-profile-photo.png"><p>&nbsp;</p>
</div>
<a name="profile-10501"></a>
<div class="details span-10 last">
<div class="last name-and-title">
<h3>Jim Battenberg</h3>
</div>
<hr><div class="span-4"><strong>Date Joined</strong></div>
<div class="span-6 last">June 25, 2013 <br><br></div>
<div class="span-4"><strong>Affiliations</strong></div>
<div class="span-6 last">
<div>
<b>Rackspace</b> From (Current)
</div>
</div>
<div class="span-4"><strong>Statement of Interest </strong></div>
<div class="span-6 last">
<p>contribute logic and evangelize openstack</p>
</div>
<p>&nbsp;</p>'''
match = re.search(mps.NAME_AND_DATE_PATTERN, content)
self.assertTrue(match)
self.assertEqual('Jim Battenberg', match.group('member_name'))
self.assertEqual('June 25, 2013 ', match.group('date_joined'))
match = re.search(mps.COMPANY_PATTERN, content)
self.assertTrue(match)
self.assertEqual('Rackspace', match.group('company_draft'))

@ -799,6 +799,60 @@ class TestRecordProcessor(testtools.TestCase):
record_processor_inst.runtime_storage_inst,
email='john_doe@gmail.com'))
def test_create_member(self):
member_record = {'member_id': '123456789',
'member_name': 'John Doe',
'member_uri': 'http://www.openstack.org/community'
'/members/profile/123456789',
'date_joined': 'August 01, 2012 ',
'company_draft': 'Mirantis'}
record_processor_inst = self.make_record_processor()
result_member = next(record_processor_inst._process_member(
member_record))
self.assertEqual(result_member['primary_key'], 'member:123456789')
self.assertEqual(result_member['date'], utils.member_date_to_timestamp(
'August 01, 2012 '))
self.assertEqual(result_member['author_name'], 'John Doe')
self.assertEqual(result_member['company_name'], 'Mirantis')
result_user = user_processor.load_user(
record_processor_inst.runtime_storage_inst,
member_id='123456789')
self.assertEqual(result_user['user_name'], 'John Doe')
self.assertEqual(result_user['company_name'], 'Mirantis')
self.assertEqual(result_user['companies'],
[{'company_name': 'Mirantis', 'end_date': 0}])
def test_update_member(self):
member_record = {'member_id': '123456789',
'member_name': 'John Doe',
'member_uri': 'http://www.openstack.org/community'
'/members/profile/123456789',
'date_joined': 'August 01, 2012 ',
'company_draft': 'Mirantis'}
record_processor_inst = self.make_record_processor()
updated_member_record = member_record
updated_member_record['member_name'] = 'Bill Smith'
updated_member_record['company_draft'] = 'Rackspace'
result_member = next(record_processor_inst._process_member(
updated_member_record))
self.assertEqual(result_member['author_name'], 'Bill Smith')
self.assertEqual(result_member['company_name'], 'Rackspace')
result_user = user_processor.load_user(
record_processor_inst.runtime_storage_inst,
member_id='123456789')
self.assertEqual(result_user['user_name'], 'Bill Smith')
self.assertEqual(result_user['companies'],
[{'company_name': 'Rackspace', 'end_date': 0}])
def test_process_email_then_review(self):
# it is expected that the user profile will contain email and
# gerrit id, while LP id will be None