Changed record filter by date

implements bp optimize-date-filtering

Change-Id: I6d4157992613a8188a94e3ad041ebd36768b0077
This commit is contained in:
pkholkin 2014-05-08 18:50:32 +04:00
parent 2314a454df
commit 6f4e031fb6
4 changed files with 39 additions and 14 deletions

View File

@ -32,24 +32,21 @@ from stackalytics import version as stackalytics_version
LOG = logging.getLogger(__name__)
def _get_time_filter(kwargs, ignore):
start_date = parameters.get_single_parameter(kwargs, 'start_date')
def _filter_records_by_days(ignore, start_date, end_date, memory_storage_inst):
if start_date and 'start_date' not in ignore:
start_date = utils.date_to_timestamp_ext(start_date)
else:
start_date = 0
end_date = parameters.get_single_parameter(kwargs, 'end_date')
start_date = memory_storage_inst.get_first_record_day()
if end_date and 'end_date' not in ignore:
end_date = utils.date_to_timestamp_ext(end_date)
else:
end_date = utils.date_to_timestamp_ext('now')
def time_filter(records):
for record in records:
if start_date <= record['date'] <= end_date:
yield record
start_day = utils.timestamp_to_day(start_date)
end_day = utils.timestamp_to_day(end_date)
return time_filter
return memory_storage_inst.get_record_ids_by_days(
six.moves.range(start_day, end_day + 1))
def record_filter(ignore=None, use_default=True):
@ -144,11 +141,17 @@ def record_filter(ignore=None, use_default=True):
memory_storage_inst.get_record_ids_by_blueprint_ids(
param))
time_filter = _get_time_filter(kwargs, ignore)
start_date = parameters.get_single_parameter(kwargs, 'start_date')
end_date = parameters.get_single_parameter(kwargs, 'end_date')
if (start_date and 'start_date' not in ignore) or (
end_date and 'end_date' not in ignore):
record_ids &= _filter_records_by_days(ignore,
start_date, end_date,
memory_storage_inst)
kwargs['record_ids'] = record_ids
kwargs['records'] = time_filter(
memory_storage_inst.get_records(record_ids))
kwargs['records'] = memory_storage_inst.get_records(record_ids)
return f(*args, **kwargs)
return record_filter_decorated_function
@ -217,7 +220,7 @@ def man_days_filter(result, record, param_id):
# effort of the author (author's effort is represented in patches)
return
day = record['date'] // (24 * 3600)
day = utils.timestamp_to_day(record['date'])
result_by_param = result[record[param_id]]
if 'days' not in result_by_param:

View File

@ -15,6 +15,8 @@
import six
from stackalytics.processor import utils
MEMORY_STORAGE_CACHED = 0
@ -38,6 +40,7 @@ class CachedMemoryStorage(MemoryStorage):
self.release_index = {}
self.blueprint_id_index = {}
self.company_name_mapping = {}
self.day_index = {}
self.indexes = {
'primary_key': self.primary_key_index,
@ -60,6 +63,12 @@ class CachedMemoryStorage(MemoryStorage):
else:
self.blueprint_id_index[bp_id] = set([record['record_id']])
record_day = utils.timestamp_to_day(record['date'])
if record_day in self.day_index:
self.day_index[record_day].add(record['record_id'])
else:
self.day_index[record_day] = set([record['record_id']])
def update(self, records):
have_updates = False
@ -81,6 +90,9 @@ class CachedMemoryStorage(MemoryStorage):
for key, index in six.iteritems(self.indexes):
index[record[key]].remove(record['record_id'])
record_day = utils.timestamp_to_day(record['date'])
self.day_index[record_day].remove(record['record_id'])
def _add_to_index(self, record_index, record, key):
record_key = record[key]
if record_key in record_index:
@ -114,6 +126,9 @@ class CachedMemoryStorage(MemoryStorage):
return self._get_record_ids_from_index(blueprint_ids,
self.blueprint_id_index)
def get_record_ids_by_days(self, days):
return self._get_record_ids_from_index(days, self.day_index)
def get_index_keys_by_record_ids(self, index_name, record_ids):
return set([key
for key, value in six.iteritems(self.indexes[index_name])
@ -149,6 +164,9 @@ class CachedMemoryStorage(MemoryStorage):
def get_user_ids(self):
return self.user_id_index.keys()
def get_first_record_day(self):
return min(self.day_index.keys())
def get_memory_storage(memory_storage_type):
if memory_storage_type == MEMORY_STORAGE_CACHED:

View File

@ -541,7 +541,7 @@ def timeline(records, **kwargs):
(record['week'] not in weeks)):
continue
day = record['date'] // (24 * 3600)
day = utils.timestamp_to_day(record['date'])
user = vault.get_user_from_runtime_storage(record['user_id'])
if record['release'] == release_name:
release_stat[day] |= set([user['seq']])

View File

@ -68,6 +68,10 @@ def week_to_date(week):
strftime('%Y-%m-%d %H:%M:%S'))
def timestamp_to_day(timestamp):
return timestamp // (24 * 3600)
def check_email_validity(email):
if email:
return re.match(r'[\w\d_\.-]+@([\w\d_\.-]+\.)+[\w]+', email)