From 6f4e031fb6a13324a7599ca42aa0e22497004ebf Mon Sep 17 00:00:00 2001 From: pkholkin Date: Thu, 8 May 2014 18:50:32 +0400 Subject: [PATCH] Changed record filter by date implements bp optimize-date-filtering Change-Id: I6d4157992613a8188a94e3ad041ebd36768b0077 --- dashboard/decorators.py | 29 ++++++++++++++++------------- dashboard/memory_storage.py | 18 ++++++++++++++++++ dashboard/web.py | 2 +- stackalytics/processor/utils.py | 4 ++++ 4 files changed, 39 insertions(+), 14 deletions(-) diff --git a/dashboard/decorators.py b/dashboard/decorators.py index cf60fc707..d48eb9d05 100644 --- a/dashboard/decorators.py +++ b/dashboard/decorators.py @@ -32,24 +32,21 @@ from stackalytics import version as stackalytics_version LOG = logging.getLogger(__name__) -def _get_time_filter(kwargs, ignore): - start_date = parameters.get_single_parameter(kwargs, 'start_date') +def _filter_records_by_days(ignore, start_date, end_date, memory_storage_inst): if start_date and 'start_date' not in ignore: start_date = utils.date_to_timestamp_ext(start_date) else: - start_date = 0 - end_date = parameters.get_single_parameter(kwargs, 'end_date') + start_date = memory_storage_inst.get_first_record_day() if end_date and 'end_date' not in ignore: end_date = utils.date_to_timestamp_ext(end_date) else: end_date = utils.date_to_timestamp_ext('now') - def time_filter(records): - for record in records: - if start_date <= record['date'] <= end_date: - yield record + start_day = utils.timestamp_to_day(start_date) + end_day = utils.timestamp_to_day(end_date) - return time_filter + return memory_storage_inst.get_record_ids_by_days( + six.moves.range(start_day, end_day + 1)) def record_filter(ignore=None, use_default=True): @@ -144,11 +141,17 @@ def record_filter(ignore=None, use_default=True): memory_storage_inst.get_record_ids_by_blueprint_ids( param)) - time_filter = _get_time_filter(kwargs, ignore) + start_date = parameters.get_single_parameter(kwargs, 'start_date') + end_date = parameters.get_single_parameter(kwargs, 'end_date') + + if (start_date and 'start_date' not in ignore) or ( + end_date and 'end_date' not in ignore): + record_ids &= _filter_records_by_days(ignore, + start_date, end_date, + memory_storage_inst) kwargs['record_ids'] = record_ids - kwargs['records'] = time_filter( - memory_storage_inst.get_records(record_ids)) + kwargs['records'] = memory_storage_inst.get_records(record_ids) return f(*args, **kwargs) return record_filter_decorated_function @@ -217,7 +220,7 @@ def man_days_filter(result, record, param_id): # effort of the author (author's effort is represented in patches) return - day = record['date'] // (24 * 3600) + day = utils.timestamp_to_day(record['date']) result_by_param = result[record[param_id]] if 'days' not in result_by_param: diff --git a/dashboard/memory_storage.py b/dashboard/memory_storage.py index b96dc5822..5911244dd 100644 --- a/dashboard/memory_storage.py +++ b/dashboard/memory_storage.py @@ -15,6 +15,8 @@ import six +from stackalytics.processor import utils + MEMORY_STORAGE_CACHED = 0 @@ -38,6 +40,7 @@ class CachedMemoryStorage(MemoryStorage): self.release_index = {} self.blueprint_id_index = {} self.company_name_mapping = {} + self.day_index = {} self.indexes = { 'primary_key': self.primary_key_index, @@ -60,6 +63,12 @@ class CachedMemoryStorage(MemoryStorage): else: self.blueprint_id_index[bp_id] = set([record['record_id']]) + record_day = utils.timestamp_to_day(record['date']) + if record_day in self.day_index: + self.day_index[record_day].add(record['record_id']) + else: + self.day_index[record_day] = set([record['record_id']]) + def update(self, records): have_updates = False @@ -81,6 +90,9 @@ class CachedMemoryStorage(MemoryStorage): for key, index in six.iteritems(self.indexes): index[record[key]].remove(record['record_id']) + record_day = utils.timestamp_to_day(record['date']) + self.day_index[record_day].remove(record['record_id']) + def _add_to_index(self, record_index, record, key): record_key = record[key] if record_key in record_index: @@ -114,6 +126,9 @@ class CachedMemoryStorage(MemoryStorage): return self._get_record_ids_from_index(blueprint_ids, self.blueprint_id_index) + def get_record_ids_by_days(self, days): + return self._get_record_ids_from_index(days, self.day_index) + def get_index_keys_by_record_ids(self, index_name, record_ids): return set([key for key, value in six.iteritems(self.indexes[index_name]) @@ -149,6 +164,9 @@ class CachedMemoryStorage(MemoryStorage): def get_user_ids(self): return self.user_id_index.keys() + def get_first_record_day(self): + return min(self.day_index.keys()) + def get_memory_storage(memory_storage_type): if memory_storage_type == MEMORY_STORAGE_CACHED: diff --git a/dashboard/web.py b/dashboard/web.py index e81c9c61b..a91d4acb2 100644 --- a/dashboard/web.py +++ b/dashboard/web.py @@ -541,7 +541,7 @@ def timeline(records, **kwargs): (record['week'] not in weeks)): continue - day = record['date'] // (24 * 3600) + day = utils.timestamp_to_day(record['date']) user = vault.get_user_from_runtime_storage(record['user_id']) if record['release'] == release_name: release_stat[day] |= set([user['seq']]) diff --git a/stackalytics/processor/utils.py b/stackalytics/processor/utils.py index ddc0e1709..5600afea6 100644 --- a/stackalytics/processor/utils.py +++ b/stackalytics/processor/utils.py @@ -68,6 +68,10 @@ def week_to_date(week): strftime('%Y-%m-%d %H:%M:%S')) +def timestamp_to_day(timestamp): + return timestamp // (24 * 3600) + + def check_email_validity(email): if email: return re.match(r'[\w\d_\.-]+@([\w\d_\.-]+\.)+[\w]+', email)