
If both files have no newline at EOF, diff will notify us in the context, but that case wasn't handled by the parser. https://storyboard.openstack.org/#!/story/84 Change-Id: Ib2f1b4b4c9d8f38a983300e7026331754592705f
331 lines
12 KiB
Python
331 lines
12 KiB
Python
# Copyright 2014 OpenStack Foundation
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
import logging
|
|
import difflib
|
|
import os
|
|
import re
|
|
|
|
import git
|
|
|
|
OLD = 0
|
|
NEW = 1
|
|
START = 0
|
|
END = 1
|
|
LINENO = 0
|
|
LINE = 1
|
|
|
|
class DiffChunk(object):
|
|
def __init__(self):
|
|
self.oldlines = []
|
|
self.newlines = []
|
|
self.first = False
|
|
self.last = False
|
|
self.lines = []
|
|
self.calcRange()
|
|
|
|
def __repr__(self):
|
|
return '<%s old lines %s-%s / new lines %s-%s>' % (
|
|
self.__class__.__name__,
|
|
self.range[OLD][START], self.range[OLD][END],
|
|
self.range[NEW][START], self.range[NEW][END])
|
|
|
|
def calcRange(self):
|
|
self.range = [[0, 0],
|
|
[0, 0]]
|
|
for l in self.lines:
|
|
if self.range[OLD][START] == 0 and l[OLD][LINENO] is not None:
|
|
self.range[OLD][START] = l[OLD][LINENO]
|
|
if self.range[NEW][START] == 0 and l[NEW][LINENO] is not None:
|
|
self.range[NEW][START] = l[NEW][LINENO]
|
|
if (self.range[OLD][START] != 0 and
|
|
self.range[NEW][START] != 0):
|
|
break
|
|
|
|
for l in self.lines[::-1]:
|
|
if self.range[OLD][END] == 0 and l[OLD][LINENO] is not None:
|
|
self.range[OLD][END] = l[OLD][LINENO]
|
|
if self.range[NEW][END] == 0 and l[NEW][LINENO] is not None:
|
|
self.range[NEW][END] = l[NEW][LINENO]
|
|
if (self.range[OLD][END] != 0 and
|
|
self.range[NEW][END] != 0):
|
|
break
|
|
|
|
def indexOfLine(self, oldnew, lineno):
|
|
for i, l in enumerate(self.lines):
|
|
if l[oldnew][LINENO] == lineno:
|
|
return i
|
|
|
|
class DiffContextChunk(DiffChunk):
|
|
context = True
|
|
|
|
class DiffChangedChunk(DiffChunk):
|
|
context = False
|
|
|
|
class DiffFile(object):
|
|
def __init__(self):
|
|
self.newname = None
|
|
self.oldname = None
|
|
self.chunks = []
|
|
self.current_chunk = None
|
|
self.old_lineno = 0
|
|
self.new_lineno = 0
|
|
self.offset = 0
|
|
|
|
def finalize(self):
|
|
if not self.current_chunk:
|
|
return
|
|
self.current_chunk.lines = zip(self.current_chunk.oldlines,
|
|
self.current_chunk.newlines)
|
|
if not self.chunks:
|
|
self.current_chunk.first = True
|
|
else:
|
|
self.chunks[-1].last = False
|
|
self.current_chunk.last = True
|
|
self.current_chunk.calcRange()
|
|
self.chunks.append(self.current_chunk)
|
|
self.current_chunk = None
|
|
|
|
def addDiffLines(self, old, new):
|
|
if (self.current_chunk and
|
|
not isinstance(self.current_chunk, DiffChangedChunk)):
|
|
self.finalize()
|
|
if not self.current_chunk:
|
|
self.current_chunk = DiffChangedChunk()
|
|
for l in old:
|
|
self.current_chunk.oldlines.append((self.old_lineno, '-', l))
|
|
self.old_lineno += 1
|
|
self.offset -= 1
|
|
for l in new:
|
|
self.current_chunk.newlines.append((self.new_lineno, '+', l))
|
|
self.new_lineno += 1
|
|
self.offset += 1
|
|
while self.offset > 0:
|
|
self.current_chunk.oldlines.append((None, '', ''))
|
|
self.offset -= 1
|
|
while self.offset < 0:
|
|
self.current_chunk.newlines.append((None, '', ''))
|
|
self.offset += 1
|
|
|
|
def addNewLine(self, line):
|
|
if (self.current_chunk and
|
|
not isinstance(self.current_chunk, DiffChangedChunk)):
|
|
self.finalize()
|
|
if not self.current_chunk:
|
|
self.current_chunk = DiffChangedChunk()
|
|
|
|
def addContextLine(self, line):
|
|
if (self.current_chunk and
|
|
not isinstance(self.current_chunk, DiffContextChunk)):
|
|
self.finalize()
|
|
if not self.current_chunk:
|
|
self.current_chunk = DiffContextChunk()
|
|
self.current_chunk.oldlines.append((self.old_lineno, ' ', line))
|
|
self.current_chunk.newlines.append((self.new_lineno, ' ', line))
|
|
self.old_lineno += 1
|
|
self.new_lineno += 1
|
|
|
|
class GitCheckoutError(Exception):
|
|
def __init__(self, msg):
|
|
super(GitCheckoutError, self).__init__(msg)
|
|
self.msg = msg
|
|
|
|
class Repo(object):
|
|
def __init__(self, url, path):
|
|
self.log = logging.getLogger('gertty.gitrepo')
|
|
self.url = url
|
|
self.path = path
|
|
self.differ = difflib.Differ()
|
|
if not os.path.exists(path):
|
|
git.Repo.clone_from(self.url, self.path)
|
|
|
|
def fetch(self, url, refspec):
|
|
repo = git.Repo(self.path)
|
|
try:
|
|
repo.git.fetch(url, refspec)
|
|
except AssertionError:
|
|
repo.git.fetch(url, refspec)
|
|
|
|
def checkout(self, ref):
|
|
repo = git.Repo(self.path)
|
|
try:
|
|
repo.git.checkout(ref)
|
|
except git.exc.GitCommandError as e:
|
|
raise GitCheckoutError(e.stderr.replace('\t', ' '))
|
|
|
|
def diffstat(self, old, new):
|
|
repo = git.Repo(self.path)
|
|
diff = repo.git.diff('-M', '--numstat', old, new)
|
|
ret = []
|
|
for x in diff.split('\n'):
|
|
# Added, removed, filename
|
|
ret.append(x.split('\t'))
|
|
return ret
|
|
|
|
def intralineDiff(self, old, new):
|
|
# takes a list of old lines and a list of new lines
|
|
prevline = None
|
|
prevstyle = None
|
|
output_old = []
|
|
output_new = []
|
|
#socket.send('startold' + repr(old)+'\n')
|
|
#socket.send('startnew' + repr(new)+'\n')
|
|
for line in self.differ.compare(old, new):
|
|
#socket.sendall('diff output: ' + line+'\n')
|
|
key = line[0]
|
|
rest = line[2:]
|
|
if key == '?':
|
|
result = []
|
|
accumulator = ''
|
|
emphasis = False
|
|
rest = rest[:-1] # It has a newline.
|
|
for i, c in enumerate(prevline):
|
|
if i >= len(rest):
|
|
indicator = ' '
|
|
else:
|
|
indicator = rest[i]
|
|
#socket.sendall('%s %s %s %s %s\n' % (i, c, indicator, emphasis, accumulator))
|
|
if indicator != ' ' and not emphasis:
|
|
# changing from not emph to emph
|
|
if accumulator:
|
|
result.append((prevstyle+'-line', accumulator))
|
|
accumulator = ''
|
|
emphasis = True
|
|
elif indicator == ' ' and emphasis:
|
|
# changing from emph to not emph
|
|
if accumulator:
|
|
result.append((prevstyle+'-word', accumulator))
|
|
accumulator = ''
|
|
emphasis = False
|
|
accumulator += c
|
|
if accumulator:
|
|
if emphasis:
|
|
result.append((prevstyle+'-word', accumulator))
|
|
else:
|
|
result.append((prevstyle+'-line', accumulator))
|
|
if prevstyle == 'added':
|
|
output_new.append(result)
|
|
elif prevstyle == 'removed':
|
|
output_old.append(result)
|
|
prevline = None
|
|
continue
|
|
if prevline is not None:
|
|
if prevstyle == 'added':
|
|
output_new.append((prevstyle+'-line', prevline))
|
|
elif prevstyle == 'removed':
|
|
output_old.append((prevstyle+'-line', prevline))
|
|
if key == '+':
|
|
prevstyle = 'added'
|
|
elif key == '-':
|
|
prevstyle = 'removed'
|
|
prevline = rest
|
|
#socket.sendall('prev'+repr(prevline)+'\n')
|
|
if prevline is not None:
|
|
if prevstyle == 'added':
|
|
output_new.append((prevstyle+'-line', prevline))
|
|
elif prevstyle == 'removed':
|
|
output_old.append((prevstyle+'-line', prevline))
|
|
#socket.sendall(repr(output_old)+'\n')
|
|
#socket.sendall(repr(output_new)+'\n')
|
|
#socket.sendall('\n')
|
|
return output_old, output_new
|
|
|
|
header_re = re.compile('@@ -(\d+)(,\d+)? \+(\d+)(,\d+)? @@')
|
|
def diff(self, old, new, context=10000):
|
|
repo = git.Repo(self.path)
|
|
#'-y', '-x', 'diff -C10', old, new, path).split('\n'):
|
|
oldc = repo.commit(old)
|
|
newc = repo.commit(new)
|
|
files = []
|
|
for diff_context in oldc.diff(newc, create_patch=True, U=context):
|
|
# Each iteration of this is a file
|
|
f = DiffFile()
|
|
files.append(f)
|
|
if diff_context.rename_from:
|
|
f.oldname = diff_context.rename_from
|
|
if diff_context.rename_to:
|
|
f.newname = diff_context.rename_to
|
|
oldchunk = []
|
|
newchunk = []
|
|
prev_key = ''
|
|
diff_lines = diff_context.diff.split('\n')
|
|
for i, line in enumerate(diff_lines):
|
|
last_line = (i == len(diff_lines)-1)
|
|
if line.startswith('---'):
|
|
f.oldname = line[6:]
|
|
if line[4:] == '/dev/null':
|
|
f.oldname = 'Empty file'
|
|
continue
|
|
if line.startswith('+++'):
|
|
f.newname = line[6:]
|
|
if line[4:] == '/dev/null':
|
|
f.newname = 'Empty file'
|
|
continue
|
|
if line.startswith('@@'):
|
|
#socket.sendall(line)
|
|
m = self.header_re.match(line)
|
|
#socket.sendall(str(m.groups()))
|
|
f.old_lineno = int(m.group(1))
|
|
f.new_lineno = int(m.group(3))
|
|
continue
|
|
if not line:
|
|
if prev_key != '\\':
|
|
# Strangely, we get an extra newline in the
|
|
# diff in the case that the last line is "\ No
|
|
# newline at end of file". This is a
|
|
# workaround for that.
|
|
prev_key = ''
|
|
line = 'X '
|
|
else:
|
|
line = ' '
|
|
key = line[0]
|
|
rest = line[1:]
|
|
if key == '\\':
|
|
# This is for "\ No newline at end of file" which
|
|
# follows either a -, + or ' ' line to indicate
|
|
# which file it's talking about (or both). For
|
|
# now, treat it like normal text and let the user
|
|
# infer from context that it's not actually in the
|
|
# file. Potential TODO: highlight it to make that
|
|
# more clear.
|
|
if prev_key:
|
|
key = prev_key
|
|
else:
|
|
key = ' '
|
|
prev_key = '\\'
|
|
if key == '-':
|
|
prev_key = '-'
|
|
oldchunk.append(rest)
|
|
if not last_line:
|
|
continue
|
|
if key == '+':
|
|
prev_key = '+'
|
|
newchunk.append(rest)
|
|
if not last_line:
|
|
continue
|
|
prev_key = ''
|
|
# end of chunk
|
|
if oldchunk or newchunk:
|
|
oldchunk, newchunk = self.intralineDiff(oldchunk, newchunk)
|
|
f.addDiffLines(oldchunk, newchunk)
|
|
oldchunk = []
|
|
newchunk = []
|
|
if key == ' ':
|
|
f.addContextLine(rest)
|
|
continue
|
|
if not last_line:
|
|
raise Exception("Unhandled line: %s" % line)
|
|
f.finalize()
|
|
return files
|