boartty/gertty/gitrepo.py
James E. Blair e5666d006b Fix handling no newline at EOF in both files
If both files have no newline at EOF, diff will notify us in the
context, but that case wasn't handled by the parser.

https://storyboard.openstack.org/#!/story/84

Change-Id: Ib2f1b4b4c9d8f38a983300e7026331754592705f
2014-05-17 16:00:26 -04:00

331 lines
12 KiB
Python

# Copyright 2014 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import logging
import difflib
import os
import re
import git
OLD = 0
NEW = 1
START = 0
END = 1
LINENO = 0
LINE = 1
class DiffChunk(object):
def __init__(self):
self.oldlines = []
self.newlines = []
self.first = False
self.last = False
self.lines = []
self.calcRange()
def __repr__(self):
return '<%s old lines %s-%s / new lines %s-%s>' % (
self.__class__.__name__,
self.range[OLD][START], self.range[OLD][END],
self.range[NEW][START], self.range[NEW][END])
def calcRange(self):
self.range = [[0, 0],
[0, 0]]
for l in self.lines:
if self.range[OLD][START] == 0 and l[OLD][LINENO] is not None:
self.range[OLD][START] = l[OLD][LINENO]
if self.range[NEW][START] == 0 and l[NEW][LINENO] is not None:
self.range[NEW][START] = l[NEW][LINENO]
if (self.range[OLD][START] != 0 and
self.range[NEW][START] != 0):
break
for l in self.lines[::-1]:
if self.range[OLD][END] == 0 and l[OLD][LINENO] is not None:
self.range[OLD][END] = l[OLD][LINENO]
if self.range[NEW][END] == 0 and l[NEW][LINENO] is not None:
self.range[NEW][END] = l[NEW][LINENO]
if (self.range[OLD][END] != 0 and
self.range[NEW][END] != 0):
break
def indexOfLine(self, oldnew, lineno):
for i, l in enumerate(self.lines):
if l[oldnew][LINENO] == lineno:
return i
class DiffContextChunk(DiffChunk):
context = True
class DiffChangedChunk(DiffChunk):
context = False
class DiffFile(object):
def __init__(self):
self.newname = None
self.oldname = None
self.chunks = []
self.current_chunk = None
self.old_lineno = 0
self.new_lineno = 0
self.offset = 0
def finalize(self):
if not self.current_chunk:
return
self.current_chunk.lines = zip(self.current_chunk.oldlines,
self.current_chunk.newlines)
if not self.chunks:
self.current_chunk.first = True
else:
self.chunks[-1].last = False
self.current_chunk.last = True
self.current_chunk.calcRange()
self.chunks.append(self.current_chunk)
self.current_chunk = None
def addDiffLines(self, old, new):
if (self.current_chunk and
not isinstance(self.current_chunk, DiffChangedChunk)):
self.finalize()
if not self.current_chunk:
self.current_chunk = DiffChangedChunk()
for l in old:
self.current_chunk.oldlines.append((self.old_lineno, '-', l))
self.old_lineno += 1
self.offset -= 1
for l in new:
self.current_chunk.newlines.append((self.new_lineno, '+', l))
self.new_lineno += 1
self.offset += 1
while self.offset > 0:
self.current_chunk.oldlines.append((None, '', ''))
self.offset -= 1
while self.offset < 0:
self.current_chunk.newlines.append((None, '', ''))
self.offset += 1
def addNewLine(self, line):
if (self.current_chunk and
not isinstance(self.current_chunk, DiffChangedChunk)):
self.finalize()
if not self.current_chunk:
self.current_chunk = DiffChangedChunk()
def addContextLine(self, line):
if (self.current_chunk and
not isinstance(self.current_chunk, DiffContextChunk)):
self.finalize()
if not self.current_chunk:
self.current_chunk = DiffContextChunk()
self.current_chunk.oldlines.append((self.old_lineno, ' ', line))
self.current_chunk.newlines.append((self.new_lineno, ' ', line))
self.old_lineno += 1
self.new_lineno += 1
class GitCheckoutError(Exception):
def __init__(self, msg):
super(GitCheckoutError, self).__init__(msg)
self.msg = msg
class Repo(object):
def __init__(self, url, path):
self.log = logging.getLogger('gertty.gitrepo')
self.url = url
self.path = path
self.differ = difflib.Differ()
if not os.path.exists(path):
git.Repo.clone_from(self.url, self.path)
def fetch(self, url, refspec):
repo = git.Repo(self.path)
try:
repo.git.fetch(url, refspec)
except AssertionError:
repo.git.fetch(url, refspec)
def checkout(self, ref):
repo = git.Repo(self.path)
try:
repo.git.checkout(ref)
except git.exc.GitCommandError as e:
raise GitCheckoutError(e.stderr.replace('\t', ' '))
def diffstat(self, old, new):
repo = git.Repo(self.path)
diff = repo.git.diff('-M', '--numstat', old, new)
ret = []
for x in diff.split('\n'):
# Added, removed, filename
ret.append(x.split('\t'))
return ret
def intralineDiff(self, old, new):
# takes a list of old lines and a list of new lines
prevline = None
prevstyle = None
output_old = []
output_new = []
#socket.send('startold' + repr(old)+'\n')
#socket.send('startnew' + repr(new)+'\n')
for line in self.differ.compare(old, new):
#socket.sendall('diff output: ' + line+'\n')
key = line[0]
rest = line[2:]
if key == '?':
result = []
accumulator = ''
emphasis = False
rest = rest[:-1] # It has a newline.
for i, c in enumerate(prevline):
if i >= len(rest):
indicator = ' '
else:
indicator = rest[i]
#socket.sendall('%s %s %s %s %s\n' % (i, c, indicator, emphasis, accumulator))
if indicator != ' ' and not emphasis:
# changing from not emph to emph
if accumulator:
result.append((prevstyle+'-line', accumulator))
accumulator = ''
emphasis = True
elif indicator == ' ' and emphasis:
# changing from emph to not emph
if accumulator:
result.append((prevstyle+'-word', accumulator))
accumulator = ''
emphasis = False
accumulator += c
if accumulator:
if emphasis:
result.append((prevstyle+'-word', accumulator))
else:
result.append((prevstyle+'-line', accumulator))
if prevstyle == 'added':
output_new.append(result)
elif prevstyle == 'removed':
output_old.append(result)
prevline = None
continue
if prevline is not None:
if prevstyle == 'added':
output_new.append((prevstyle+'-line', prevline))
elif prevstyle == 'removed':
output_old.append((prevstyle+'-line', prevline))
if key == '+':
prevstyle = 'added'
elif key == '-':
prevstyle = 'removed'
prevline = rest
#socket.sendall('prev'+repr(prevline)+'\n')
if prevline is not None:
if prevstyle == 'added':
output_new.append((prevstyle+'-line', prevline))
elif prevstyle == 'removed':
output_old.append((prevstyle+'-line', prevline))
#socket.sendall(repr(output_old)+'\n')
#socket.sendall(repr(output_new)+'\n')
#socket.sendall('\n')
return output_old, output_new
header_re = re.compile('@@ -(\d+)(,\d+)? \+(\d+)(,\d+)? @@')
def diff(self, old, new, context=10000):
repo = git.Repo(self.path)
#'-y', '-x', 'diff -C10', old, new, path).split('\n'):
oldc = repo.commit(old)
newc = repo.commit(new)
files = []
for diff_context in oldc.diff(newc, create_patch=True, U=context):
# Each iteration of this is a file
f = DiffFile()
files.append(f)
if diff_context.rename_from:
f.oldname = diff_context.rename_from
if diff_context.rename_to:
f.newname = diff_context.rename_to
oldchunk = []
newchunk = []
prev_key = ''
diff_lines = diff_context.diff.split('\n')
for i, line in enumerate(diff_lines):
last_line = (i == len(diff_lines)-1)
if line.startswith('---'):
f.oldname = line[6:]
if line[4:] == '/dev/null':
f.oldname = 'Empty file'
continue
if line.startswith('+++'):
f.newname = line[6:]
if line[4:] == '/dev/null':
f.newname = 'Empty file'
continue
if line.startswith('@@'):
#socket.sendall(line)
m = self.header_re.match(line)
#socket.sendall(str(m.groups()))
f.old_lineno = int(m.group(1))
f.new_lineno = int(m.group(3))
continue
if not line:
if prev_key != '\\':
# Strangely, we get an extra newline in the
# diff in the case that the last line is "\ No
# newline at end of file". This is a
# workaround for that.
prev_key = ''
line = 'X '
else:
line = ' '
key = line[0]
rest = line[1:]
if key == '\\':
# This is for "\ No newline at end of file" which
# follows either a -, + or ' ' line to indicate
# which file it's talking about (or both). For
# now, treat it like normal text and let the user
# infer from context that it's not actually in the
# file. Potential TODO: highlight it to make that
# more clear.
if prev_key:
key = prev_key
else:
key = ' '
prev_key = '\\'
if key == '-':
prev_key = '-'
oldchunk.append(rest)
if not last_line:
continue
if key == '+':
prev_key = '+'
newchunk.append(rest)
if not last_line:
continue
prev_key = ''
# end of chunk
if oldchunk or newchunk:
oldchunk, newchunk = self.intralineDiff(oldchunk, newchunk)
f.addDiffLines(oldchunk, newchunk)
oldchunk = []
newchunk = []
if key == ' ':
f.addContextLine(rest)
continue
if not last_line:
raise Exception("Unhandled line: %s" % line)
f.finalize()
return files