Add project updated column
This should greatly reduce the number of unecessary sync calls by storing the last time a gertty queried for changes to a project. Previously, we used the updated time of the latest change in a project, however, subsequent queries with that value would typically return the same change even though it needn't be synced. Adjusting that value by a small amount is unlikely to work reliably because the query is for a relative time and it takes some time to process. Adjusting for a larger amount (eg, a few seconds) might miss data. Clock skew is also a concern in this system because we are using subtracting the server time from the client's time. By storing the last sync time locally, we can continue to update it past the highest value that gerrit has, so that we eventually get queries which return no results. Clock skew is not an issue because the delta arithmetic only involves client generated times. We can also increase the window slightly to account for query processing time without continuously sync already-synced changes. Change-Id: I8cd0af9bd4d3669f436f169059e4b602d4d3036c
This commit is contained in:
parent
77ffdfb550
commit
00d3f6dbfd
@ -0,0 +1,33 @@
|
||||
"""Added project updated column
|
||||
|
||||
Revision ID: 38104b4c1b84
|
||||
Revises: 56e48a4a064a
|
||||
Create Date: 2014-05-31 06:52:12.452205
|
||||
|
||||
"""
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = '38104b4c1b84'
|
||||
down_revision = '56e48a4a064a'
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
def upgrade():
|
||||
op.add_column('project', sa.Column('updated', sa.DateTime))
|
||||
|
||||
conn = op.get_bind()
|
||||
res = conn.execute("select key, name from project")
|
||||
for (key, name) in res.fetchall():
|
||||
q = sa.text("select max(updated) from change where project_key=:key")
|
||||
res = conn.execute(q, key=key)
|
||||
for (updated,) in res.fetchall():
|
||||
q = sa.text("update project set updated=:updated where key=:key")
|
||||
conn.execute(q, key=key, updated=updated)
|
||||
|
||||
op.create_index(op.f('ix_project_updated'), 'project', ['updated'], unique=False)
|
||||
|
||||
def downgrade():
|
||||
op.drop_index(op.f('ix_project_updated'), table_name='project')
|
||||
op.drop_column('project', 'updated')
|
@ -32,6 +32,7 @@ project_table = Table(
|
||||
Column('name', String(255), index=True, unique=True, nullable=False),
|
||||
Column('subscribed', Boolean, index=True, default=False),
|
||||
Column('description', Text, nullable=False, default=''),
|
||||
Column('updated', DateTime, index=True),
|
||||
)
|
||||
change_table = Table(
|
||||
'change', metadata,
|
||||
@ -296,10 +297,6 @@ mapper(Project, project_table, properties=dict(
|
||||
change_table.c.status!='ABANDONED'),
|
||||
order_by=change_table.c.number,
|
||||
),
|
||||
updated = column_property(
|
||||
select([func.max(change_table.c.updated)]).where(
|
||||
change_table.c.project_key==project_table.c.key)
|
||||
),
|
||||
))
|
||||
mapper(Change, change_table, properties=dict(
|
||||
revisions=relationship(Revision, backref='change',
|
||||
|
@ -130,26 +130,39 @@ class SyncProjectTask(Task):
|
||||
|
||||
def run(self, sync):
|
||||
app = sync.app
|
||||
now = datetime.datetime.utcnow()
|
||||
with app.db.getSession() as session:
|
||||
project = session.getProject(self.project_key)
|
||||
query = 'project:%s' % project.name
|
||||
if project.updated:
|
||||
query += ' -age:%ss' % (int(math.ceil((datetime.datetime.utcnow()-project.updated).total_seconds())) + 0,)
|
||||
# Allow 4 seconds for request time, etc.
|
||||
query += ' -age:%ss' % (int(math.ceil((now-project.updated).total_seconds())) + 4,)
|
||||
changes = sync.get('changes/?q=%s' % query)
|
||||
self.log.debug('Query: %s ' % (query,))
|
||||
with app.db.getSession() as session:
|
||||
for c in reversed(changes):
|
||||
# The list we get is newest to oldest; if we are
|
||||
# interrupted, we will have already synced the newest
|
||||
# change and a subsequent sync will not catch up the
|
||||
# old ones. So reverse the list before we process it
|
||||
# so that the updated time is accurate.
|
||||
for c in changes:
|
||||
# For now, just sync open changes or changes already
|
||||
# in the db optionally we could sync all changes ever
|
||||
change = session.getChangeByID(c['id'])
|
||||
if change or (c['status'] not in self._closed_statuses):
|
||||
sync.submitTask(SyncChangeTask(c['id'], priority=self.priority))
|
||||
self.log.debug("Change %s update %s" % (c['id'], c['updated']))
|
||||
sync.submitTask(SetProjectUpdatedTask(self.project_key, now, priority=self.priority))
|
||||
|
||||
class SetProjectUpdatedTask(Task):
|
||||
def __init__(self, project_key, updated, priority=NORMAL_PRIORITY):
|
||||
super(SetProjectUpdatedTask, self).__init__(priority)
|
||||
self.project_key = project_key
|
||||
self.updated = updated
|
||||
|
||||
def __repr__(self):
|
||||
return '<SetProjectUpdatedTask %s %s>' % (self.project_key, self.updated)
|
||||
|
||||
def run(self, sync):
|
||||
app = sync.app
|
||||
with app.db.getSession() as session:
|
||||
project = session.getProject(self.project_key)
|
||||
project.updated = self.updated
|
||||
|
||||
class SyncChangeByCommitTask(Task):
|
||||
def __init__(self, commit, priority=NORMAL_PRIORITY):
|
||||
|
Loading…
x
Reference in New Issue
Block a user