boartty/gertty/gitrepo.py
James E. Blair 98f94be285 Fix crash on diff of new empty file
When adding (or presumably removing) an empty file, there is no
diff header, but that's where Gertty was getting the filenames.
Instead, get them from the git blob itself.

Change-Id: Ib2908aaf1d7554df53b5fb262017d85f3a560066
Story: 161
2014-08-18 15:38:08 -07:00

463 lines
16 KiB
Python

# Copyright 2014 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import datetime
import logging
import difflib
import itertools
import os
import re
import git
import gitdb
OLD = 0
NEW = 1
START = 0
END = 1
LINENO = 0
LINE = 1
class GitTimeZone(datetime.tzinfo):
"""Because we can't have nice things."""
def __init__(self, offset_seconds):
self._offset = offset_seconds
def utcoffset(self, dt):
return datetime.timedelta(seconds=self._offset)
def dst(self, dt):
return datetime.timedelta(0)
def tzname(self, dt):
return None
class CommitBlob(object):
def __init__(self):
self.path = '/COMMIT_MSG'
class CommitContext(object):
"""A git.diff.Diff for commit messages."""
def decorateGitTime(self, seconds, tz):
dt = datetime.datetime.fromtimestamp(seconds, GitTimeZone(-tz))
return dt.strftime('%Y-%m-%d %H:%M:%S %Z%z')
def decorateMessage(self, commit):
"""Put the Gerrit commit metadata at the front of the message.
e.g.:
Parent: cc8a51ca (Initial commit) 1
Author: Robert Collins <rbtcollins@hp.com> 2
AuthorDate: 2014-05-27 14:05:47 +1200 3
Commit: Robert Collins <rbtcollins@hp.com> 4
CommitDate: 2014-05-27 14:07:57 +1200 5
6
"""
# NB: If folk report that commits have comments at the wrong place
# Then this function, which reproduces gerrit behaviour, will need
# to be fixed (e.g. by making the behaviour match more closely.
if not commit:
return []
if commit.parents:
parentsha = commit.parents[0].hexsha[:8]
else:
parentsha = None
author = commit.author
committer = commit.committer
author_date = self.decorateGitTime(
commit.authored_date, commit.author_tz_offset)
commit_date = self.decorateGitTime(
commit.committed_date, commit.committer_tz_offset)
return ["Parent: %s\n" % parentsha,
"Author: %s <%s>\n" % (author.name, author.email),
"AuthorDate: %s\n" % author_date,
"Commit: %s <%s>\n" % (committer.name, committer.email),
"CommitDate: %s\n" % commit_date,
"\n"] + commit.message.splitlines(True)
def __init__(self, old, new):
"""Create a CommitContext.
:param old: A git.objects.commit object or None.
:param new: A git.objects.commit object.
"""
self.rename_from = self.rename_to = None
if old is None:
self.new_file = True
else:
self.new_file = False
self.deleted_file = False
self.a_blob = CommitBlob()
self.b_blob = CommitBlob()
self.diff = ''.join(difflib.unified_diff(
self.decorateMessage(old), self.decorateMessage(new),
fromfile="/a/COMMIT_MSG", tofile="/b/COMMIT_MSG"))
class DiffChunk(object):
def __init__(self):
self.oldlines = []
self.newlines = []
self.first = False
self.last = False
self.lines = []
self.calcRange()
def __repr__(self):
return '<%s old lines %s-%s / new lines %s-%s>' % (
self.__class__.__name__,
self.range[OLD][START], self.range[OLD][END],
self.range[NEW][START], self.range[NEW][END])
def calcRange(self):
self.range = [[0, 0],
[0, 0]]
for l in self.lines:
if self.range[OLD][START] == 0 and l[OLD][LINENO] is not None:
self.range[OLD][START] = l[OLD][LINENO]
if self.range[NEW][START] == 0 and l[NEW][LINENO] is not None:
self.range[NEW][START] = l[NEW][LINENO]
if (self.range[OLD][START] != 0 and
self.range[NEW][START] != 0):
break
for l in self.lines[::-1]:
if self.range[OLD][END] == 0 and l[OLD][LINENO] is not None:
self.range[OLD][END] = l[OLD][LINENO]
if self.range[NEW][END] == 0 and l[NEW][LINENO] is not None:
self.range[NEW][END] = l[NEW][LINENO]
if (self.range[OLD][END] != 0 and
self.range[NEW][END] != 0):
break
def indexOfLine(self, oldnew, lineno):
for i, l in enumerate(self.lines):
if l[oldnew][LINENO] == lineno:
return i
class DiffContextChunk(DiffChunk):
context = True
class DiffChangedChunk(DiffChunk):
context = False
class DiffFile(object):
def __init__(self):
self.newname = None
self.oldname = None
self.chunks = []
self.current_chunk = None
self.old_lineno = 0
self.new_lineno = 0
self.offset = 0
def finalize(self):
if not self.current_chunk:
return
self.current_chunk.lines = zip(self.current_chunk.oldlines,
self.current_chunk.newlines)
if not self.chunks:
self.current_chunk.first = True
else:
self.chunks[-1].last = False
self.current_chunk.last = True
self.current_chunk.calcRange()
self.chunks.append(self.current_chunk)
self.current_chunk = None
def addDiffLines(self, old, new):
if (self.current_chunk and
not isinstance(self.current_chunk, DiffChangedChunk)):
self.finalize()
if not self.current_chunk:
self.current_chunk = DiffChangedChunk()
for l in old:
self.current_chunk.oldlines.append((self.old_lineno, '-', l))
self.old_lineno += 1
self.offset -= 1
for l in new:
self.current_chunk.newlines.append((self.new_lineno, '+', l))
self.new_lineno += 1
self.offset += 1
while self.offset > 0:
self.current_chunk.oldlines.append((None, '', ''))
self.offset -= 1
while self.offset < 0:
self.current_chunk.newlines.append((None, '', ''))
self.offset += 1
def addNewLine(self, line):
if (self.current_chunk and
not isinstance(self.current_chunk, DiffChangedChunk)):
self.finalize()
if not self.current_chunk:
self.current_chunk = DiffChangedChunk()
def addContextLine(self, line):
if (self.current_chunk and
not isinstance(self.current_chunk, DiffContextChunk)):
self.finalize()
if not self.current_chunk:
self.current_chunk = DiffContextChunk()
self.current_chunk.oldlines.append((self.old_lineno, ' ', line))
self.current_chunk.newlines.append((self.new_lineno, ' ', line))
self.old_lineno += 1
self.new_lineno += 1
class GitCheckoutError(Exception):
def __init__(self, msg):
super(GitCheckoutError, self).__init__(msg)
self.msg = msg
class Repo(object):
def __init__(self, url, path):
self.log = logging.getLogger('gertty.gitrepo')
self.url = url
self.path = path
self.differ = difflib.Differ()
if not os.path.exists(path):
git.Repo.clone_from(self.url, self.path)
def hasCommit(self, sha):
repo = git.Repo(self.path)
try:
repo.commit(sha)
except gitdb.exc.BadObject:
return False
return True
def fetch(self, url, refspec):
repo = git.Repo(self.path)
try:
repo.git.fetch(url, refspec)
except AssertionError:
repo.git.fetch(url, refspec)
def checkout(self, ref):
repo = git.Repo(self.path)
try:
repo.git.checkout(ref)
except git.exc.GitCommandError as e:
raise GitCheckoutError(e.stderr.replace('\t', ' '))
def cherryPick(self, ref):
repo = git.Repo(self.path)
try:
repo.git.cherry_pick(ref)
except git.exc.GitCommandError as e:
raise GitCheckoutError(e.stderr.replace('\t', ' '))
def diffstat(self, old, new):
repo = git.Repo(self.path)
diff = repo.git.diff('-M', '--numstat', old, new)
ret = []
for x in diff.split('\n'):
# Added, removed, filename
ret.append(x.split('\t'))
return ret
def intralineDiff(self, old, new):
# takes a list of old lines and a list of new lines
prevline = None
prevstyle = None
output_old = []
output_new = []
#self.log.debug('startold' + repr(old))
#self.log.debug('startnew' + repr(new))
for line in self.differ.compare(old, new):
#self.log.debug('diff output: ' + line)
key = line[0]
rest = line[2:]
if key == '?':
result = []
accumulator = ''
emphasis = False
rest = rest[:-1] # It has a newline.
for i, c in enumerate(prevline):
if i >= len(rest):
indicator = ' '
else:
indicator = rest[i]
#self.log.debug('%s %s %s %s %s' % (i, c, indicator, emphasis, accumulator))
if indicator != ' ' and not emphasis:
# changing from not emph to emph
if accumulator:
result.append((prevstyle+'-line', accumulator))
accumulator = ''
emphasis = True
elif indicator == ' ' and emphasis:
# changing from emph to not emph
if accumulator:
result.append((prevstyle+'-word', accumulator))
accumulator = ''
emphasis = False
accumulator += c
if accumulator:
if emphasis:
result.append((prevstyle+'-word', accumulator))
else:
result.append((prevstyle+'-line', accumulator))
if prevstyle == 'added':
output_new.append(result)
elif prevstyle == 'removed':
output_old.append(result)
prevline = None
continue
if prevline is not None:
if prevstyle == 'added' or prevstyle == 'context':
output_new.append((prevstyle+'-line', prevline))
if prevstyle == 'removed' or prevstyle == 'context':
output_old.append((prevstyle+'-line', prevline))
if key == '+':
prevstyle = 'added'
elif key == '-':
prevstyle = 'removed'
elif key == ' ':
prevstyle = 'context'
prevline = rest
#self.log.debug('prev'+repr(prevline))
if prevline is not None:
if prevstyle == 'added':
output_new.append((prevstyle+'-line', prevline))
elif prevstyle == 'removed':
output_old.append((prevstyle+'-line', prevline))
#self.log.debug(repr(output_old))
#self.log.debug(repr(output_new))
return output_old, output_new
header_re = re.compile('@@ -(\d+)(,\d+)? \+(\d+)(,\d+)? @@')
def diff(self, old, new, context=10000, show_old_commit=False):
"""Create a diff from old to new.
Note that the commit message is also diffed, and listed as /COMMIT_MSG.
"""
repo = git.Repo(self.path)
#'-y', '-x', 'diff -C10', old, new, path).split('\n'):
oldc = repo.commit(old)
newc = repo.commit(new)
files = []
extra_contexts = []
if show_old_commit:
extra_contexts.append(CommitContext(oldc, newc))
else:
extra_contexts.append(CommitContext(None, newc))
contexts = itertools.chain(
extra_contexts, oldc.diff(newc, create_patch=True, U=context))
for diff_context in contexts:
# Each iteration of this is a file
f = DiffFile()
if diff_context.a_blob:
f.oldname = diff_context.a_blob.path
if diff_context.b_blob:
f.newname = diff_context.b_blob.path
if diff_context.new_file:
f.oldname = 'Empty file'
if diff_context.deleted_file:
f.newname = 'Empty file'
files.append(f)
if diff_context.rename_from:
f.oldname = diff_context.rename_from
if diff_context.rename_to:
f.newname = diff_context.rename_to
oldchunk = []
newchunk = []
prev_key = ''
diff_lines = diff_context.diff.split('\n')
for i, line in enumerate(diff_lines):
last_line = (i == len(diff_lines)-1)
if line.startswith('---'):
continue
if line.startswith('+++'):
continue
if line.startswith('@@'):
#socket.sendall(line)
m = self.header_re.match(line)
#socket.sendall(str(m.groups()))
f.old_lineno = int(m.group(1))
f.new_lineno = int(m.group(3))
continue
if not line:
if prev_key != '\\':
# Strangely, we get an extra newline in the
# diff in the case that the last line is "\ No
# newline at end of file". This is a
# workaround for that.
prev_key = ''
line = 'X '
else:
line = ' '
key = line[0]
rest = line[1:]
if key == '\\':
# This is for "\ No newline at end of file" which
# follows either a -, + or ' ' line to indicate
# which file it's talking about (or both). For
# now, treat it like normal text and let the user
# infer from context that it's not actually in the
# file. Potential TODO: highlight it to make that
# more clear.
if prev_key:
key = prev_key
else:
key = ' '
prev_key = '\\'
if key == '-':
prev_key = '-'
oldchunk.append(rest)
if not last_line:
continue
if key == '+':
prev_key = '+'
newchunk.append(rest)
if not last_line:
continue
prev_key = ''
# end of chunk
if oldchunk or newchunk:
oldchunk, newchunk = self.intralineDiff(oldchunk, newchunk)
f.addDiffLines(oldchunk, newchunk)
oldchunk = []
newchunk = []
if key == ' ':
f.addContextLine(rest)
continue
if line.startswith("similarity index"):
continue
if line.startswith("rename"):
continue
if line.startswith("index"):
continue
if not last_line:
raise Exception("Unhandled line: %s" % line)
f.finalize()
return files
def getFile(self, old, new, path):
f = DiffFile()
f.oldname = path
f.newname = path
f.old_lineno = 1
f.new_lineno = 1
repo = git.Repo(self.path)
newc = repo.commit(new)
blob = newc.tree[path]
for line in blob.data_stream.read().splitlines():
f.addContextLine(line)
f.finalize()
return f