From 00d3f6dbfd1d765fabefb3c97767575881e9a054 Mon Sep 17 00:00:00 2001 From: "James E. Blair" Date: Sat, 31 May 2014 07:30:24 -0700 Subject: [PATCH] Add project updated column This should greatly reduce the number of unecessary sync calls by storing the last time a gertty queried for changes to a project. Previously, we used the updated time of the latest change in a project, however, subsequent queries with that value would typically return the same change even though it needn't be synced. Adjusting that value by a small amount is unlikely to work reliably because the query is for a relative time and it takes some time to process. Adjusting for a larger amount (eg, a few seconds) might miss data. Clock skew is also a concern in this system because we are using subtracting the server time from the client's time. By storing the last sync time locally, we can continue to update it past the highest value that gerrit has, so that we eventually get queries which return no results. Clock skew is not an issue because the delta arithmetic only involves client generated times. We can also increase the window slightly to account for query processing time without continuously sync already-synced changes. Change-Id: I8cd0af9bd4d3669f436f169059e4b602d4d3036c --- ...104b4c1b84_added_project_updated_column.py | 33 +++++++++++++++++++ gertty/db.py | 5 +-- gertty/sync.py | 27 +++++++++++---- 3 files changed, 54 insertions(+), 11 deletions(-) create mode 100644 gertty/alembic/versions/38104b4c1b84_added_project_updated_column.py diff --git a/gertty/alembic/versions/38104b4c1b84_added_project_updated_column.py b/gertty/alembic/versions/38104b4c1b84_added_project_updated_column.py new file mode 100644 index 0000000..2451e77 --- /dev/null +++ b/gertty/alembic/versions/38104b4c1b84_added_project_updated_column.py @@ -0,0 +1,33 @@ +"""Added project updated column + +Revision ID: 38104b4c1b84 +Revises: 56e48a4a064a +Create Date: 2014-05-31 06:52:12.452205 + +""" + +# revision identifiers, used by Alembic. +revision = '38104b4c1b84' +down_revision = '56e48a4a064a' + +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + op.add_column('project', sa.Column('updated', sa.DateTime)) + + conn = op.get_bind() + res = conn.execute("select key, name from project") + for (key, name) in res.fetchall(): + q = sa.text("select max(updated) from change where project_key=:key") + res = conn.execute(q, key=key) + for (updated,) in res.fetchall(): + q = sa.text("update project set updated=:updated where key=:key") + conn.execute(q, key=key, updated=updated) + + op.create_index(op.f('ix_project_updated'), 'project', ['updated'], unique=False) + +def downgrade(): + op.drop_index(op.f('ix_project_updated'), table_name='project') + op.drop_column('project', 'updated') diff --git a/gertty/db.py b/gertty/db.py index 6ceea0a..f129fae 100644 --- a/gertty/db.py +++ b/gertty/db.py @@ -32,6 +32,7 @@ project_table = Table( Column('name', String(255), index=True, unique=True, nullable=False), Column('subscribed', Boolean, index=True, default=False), Column('description', Text, nullable=False, default=''), + Column('updated', DateTime, index=True), ) change_table = Table( 'change', metadata, @@ -296,10 +297,6 @@ mapper(Project, project_table, properties=dict( change_table.c.status!='ABANDONED'), order_by=change_table.c.number, ), - updated = column_property( - select([func.max(change_table.c.updated)]).where( - change_table.c.project_key==project_table.c.key) - ), )) mapper(Change, change_table, properties=dict( revisions=relationship(Revision, backref='change', diff --git a/gertty/sync.py b/gertty/sync.py index 9f02dd6..171a817 100644 --- a/gertty/sync.py +++ b/gertty/sync.py @@ -130,26 +130,39 @@ class SyncProjectTask(Task): def run(self, sync): app = sync.app + now = datetime.datetime.utcnow() with app.db.getSession() as session: project = session.getProject(self.project_key) query = 'project:%s' % project.name if project.updated: - query += ' -age:%ss' % (int(math.ceil((datetime.datetime.utcnow()-project.updated).total_seconds())) + 0,) + # Allow 4 seconds for request time, etc. + query += ' -age:%ss' % (int(math.ceil((now-project.updated).total_seconds())) + 4,) changes = sync.get('changes/?q=%s' % query) self.log.debug('Query: %s ' % (query,)) with app.db.getSession() as session: - for c in reversed(changes): - # The list we get is newest to oldest; if we are - # interrupted, we will have already synced the newest - # change and a subsequent sync will not catch up the - # old ones. So reverse the list before we process it - # so that the updated time is accurate. + for c in changes: # For now, just sync open changes or changes already # in the db optionally we could sync all changes ever change = session.getChangeByID(c['id']) if change or (c['status'] not in self._closed_statuses): sync.submitTask(SyncChangeTask(c['id'], priority=self.priority)) self.log.debug("Change %s update %s" % (c['id'], c['updated'])) + sync.submitTask(SetProjectUpdatedTask(self.project_key, now, priority=self.priority)) + +class SetProjectUpdatedTask(Task): + def __init__(self, project_key, updated, priority=NORMAL_PRIORITY): + super(SetProjectUpdatedTask, self).__init__(priority) + self.project_key = project_key + self.updated = updated + + def __repr__(self): + return '' % (self.project_key, self.updated) + + def run(self, sync): + app = sync.app + with app.db.getSession() as session: + project = session.getProject(self.project_key) + project.updated = self.updated class SyncChangeByCommitTask(Task): def __init__(self, commit, priority=NORMAL_PRIORITY):