Fix whitespace ignore feature

When we added full file content syntax highlighting on the client we broke the ignore whitespace feature, because the client was trying to recreate the complete file content for the B side using an edit list that came from the ignored whitespace difference, but was building from a sparse file content of B which was computed from the actual difference (that is, including whitespace). Although it seems like the edit list used shouldn't matter, the line indexes for the A side were thrown out of whack because different line ranges of B matched against A when whitespace was ignored. This caused us to be unable to locate a line of context in A that we expected to be present while trying to complete the sparse file content for B. The entire whitespace ignore logic has been vastly simplified. When packing content for B we now pack any context lines that differ from their matching line in A, thus ensuring we get the right variant for the side-by-side viewer to display. When pretty formatting a file, we no longer try to apply the edit list and hunk processing to the file contents, but instead do a direct walk over the line ranges available in the SparseFileContent. This lets us completely bypass the index lookups, eliminating the possibility for the bug we saw above. It also simplifies the logic quite a bit here, instead of trying to guess about what to format, we just format everything available. Change-Id: I24e0a8fab7fcbb85b6db6d35a9d04fe05d1b830d Signed-off-by: Shawn O. Pearce <sop@google.com>
2010-02-23 16:34:06 -08:00 · 2010-02-23 16:34:06 -08:00 · de16efbd60
parent b5ac61d82c
commit de16efbd60
6 changed files with 197 additions and 179 deletions
--- a/gerrit-common/src/main/java/com/google/gerrit/common/data/PatchScript.java
+++ b/gerrit-common/src/main/java/com/google/gerrit/common/data/PatchScript.java
@ -98,7 +98,7 @@ public class PatchScript {
    PrettyFormatter f = ClientSideFormatter.FACTORY.get();
    f.setPrettySettings(s);
    f.setEditFilter(PrettyFormatter.A);
-    f.setEditList(getEditList());
+    f.setEditList(edits);
    f.format(a);
    return f;
  }
@ -110,10 +110,10 @@ public class PatchScript {
    PrettyFormatter f = ClientSideFormatter.FACTORY.get();
    f.setPrettySettings(s);
    f.setEditFilter(PrettyFormatter.B);
-    f.setEditList(getEditList());
+    f.setEditList(edits);

    if (s.isSyntaxHighlighting() && a.isWholeFile() && !b.isWholeFile()) {
-      f.format(b.completeWithContext(a, getEditList()));
+      f.format(b.apply(a, edits));
    } else {
      f.format(b);
    }
@ -125,10 +125,6 @@ public class PatchScript {
  }

  public Iterable<EditList.Hunk> getHunks() {
-    return getEditList().getHunks();
-  }
-
-  private EditList getEditList() {
-    return new EditList(edits, getContext(), a.size(), b.size());
+    return new EditList(edits, getContext(), a.size(), b.size()).getHunks();
  }
 }
--- a/gerrit-httpd/src/main/java/com/google/gerrit/httpd/rpc/patch/PatchScriptBuilder.java
+++ b/gerrit-httpd/src/main/java/com/google/gerrit/httpd/rpc/patch/PatchScriptBuilder.java
@ -18,6 +18,7 @@ import com.google.gerrit.common.data.CommentDetail;
 import com.google.gerrit.common.data.PatchScript;
 import com.google.gerrit.common.data.PatchScriptSettings;
 import com.google.gerrit.common.data.PatchScript.DisplayMethod;
+import com.google.gerrit.common.data.PatchScriptSettings.Whitespace;
 import com.google.gerrit.prettify.common.EditList;
 import com.google.gerrit.prettify.common.SparseFileContent;
 import com.google.gerrit.reviewdb.Change;
@ -101,33 +102,31 @@ class PatchScriptBuilder {
    bId = b;
  }

-  PatchScript toPatchScript(final PatchListEntry contentWS,
-      final CommentDetail comments, final PatchListEntry contentAct)
-      throws IOException {
-    if (contentAct.getPatchType() == PatchType.N_WAY) {
+  PatchScript toPatchScript(final PatchListEntry content,
+      final CommentDetail comments) throws IOException {
+    if (content.getPatchType() == PatchType.N_WAY) {
      // For a diff --cc format we don't support converting it into
      // a patch script. Instead treat everything as a file header.
      //
-      return new PatchScript(change.getKey(), contentAct.getHeaderLines(),
+      return new PatchScript(change.getKey(), content.getHeaderLines(),
          settings, a.dst, b.dst, Collections.<Edit> emptyList(),
          a.displayMethod, b.displayMethod);
    }

-    a.path = oldName(contentAct);
-    b.path = newName(contentAct);
-
-    edits = new ArrayList<Edit>(contentAct.getEdits());
+    a.path = oldName(content);
+    b.path = newName(content);

    a.resolve(null, aId);
    b.resolve(a, bId);

+    edits = new ArrayList<Edit>(content.getEdits());
    ensureCommentsVisible(comments);
-    header.addAll(contentAct.getHeaderLines());
+    header.addAll(content.getHeaderLines());

    if (a.mode == FileMode.GITLINK || b.mode == FileMode.GITLINK) {

    } else if (a.src == b.src && a.size() <= context
-        && contentAct.getEdits().isEmpty()) {
+        && content.getEdits().isEmpty()) {
      // Odd special case; the files are identical (100% rename or copy)
      // and the user has asked for context that is larger than the file.
      // Send them the entire file, with an empty edit after the last line.
@ -154,16 +153,7 @@ class PatchScriptBuilder {
        //
        context = MAX_CONTEXT;
      }
-      packContent();
-    }
-
-    if (contentWS != contentAct) {
-      // The edit list we used to pack the file contents doesn't honor the
-      // whitespace settings requested. Instead we must rebuild our edit
-      // list around the whitespace edit list.
-      //
-      edits = new ArrayList<Edit>(contentWS.getEdits());
-      ensureCommentsVisible(comments);
+      packContent(settings.getWhitespace() != Whitespace.IGNORE_NONE);
    }

    return new PatchScript(change.getKey(), header, settings, a.dst, b.dst,
@ -311,19 +301,33 @@ class PatchScriptBuilder {
    return last.getBeginA() + (b - last.getEndB());
  }

-  private void packContent() {
+  private void packContent(boolean ignoredWhitespace) {
    EditList list = new EditList(edits, context, a.size(), b.size());
    for (final EditList.Hunk hunk : list.getHunks()) {
      while (hunk.next()) {
        if (hunk.isContextLine()) {
-          a.addLine(hunk.getCurA());
-          hunk.incBoth();
+          final String lineA = a.src.getLine(hunk.getCurA());
+          a.dst.addLine(hunk.getCurA(), lineA);

-        } else if (hunk.isDeletedA()) {
+          if (ignoredWhitespace) {
+            // If we ignored whitespace in some form, also get the line
+            // from b when it does not exactly match the line from a.
+            //
+            final String lineB = b.src.getLine(hunk.getCurB());
+            if (!lineA.equals(lineB)) {
+              b.dst.addLine(hunk.getCurB(), lineB);
+            }
+          }
+          hunk.incBoth();
+          continue;
+        }
+
+        if (hunk.isDeletedA()) {
          a.addLine(hunk.getCurA());
          hunk.incA();
+        }

-        } else if (hunk.isInsertedB()) {
+        if (hunk.isInsertedB()) {
          b.addLine(hunk.getCurB());
          hunk.incB();
        }
--- a/gerrit-httpd/src/main/java/com/google/gerrit/httpd/rpc/patch/PatchScriptFactory.java
+++ b/gerrit-httpd/src/main/java/com/google/gerrit/httpd/rpc/patch/PatchScriptFactory.java
@ -131,21 +131,9 @@ class PatchScriptFactory extends Handler<PatchScript> {
    try {
      final PatchList list = listFor(keyFor(settings.getWhitespace()));
      final PatchScriptBuilder b = newBuilder(list);
-      final PatchListEntry contentWS = list.get(fileName);
-
-      final PatchListEntry contentActual;
-      if (settings.getWhitespace() == Whitespace.IGNORE_NONE) {
-        contentActual = contentWS;
-      } else {
-        // If we are ignoring whitespace in some form, we still need to know
-        // where the post-image differs so we can ensure the post-image lines
-        // are still packed for the client to display.
-        //
-        contentActual = listFor(keyFor(Whitespace.IGNORE_NONE)).get(fileName);
-      }
-
+      final PatchListEntry content = list.get(fileName);
      try {
-        return b.toPatchScript(contentWS, comments, contentActual);
+        return b.toPatchScript(content, comments);
      } catch (IOException e) {
        log.error("File content unavailable", e);
        throw new NoSuchChangeException(changeId, e);
--- a/gerrit-prettify/src/main/java/com/google/gerrit/prettify/common/EditList.java
+++ b/gerrit-prettify/src/main/java/com/google/gerrit/prettify/common/EditList.java
@ -37,10 +37,6 @@ public class EditList {
    return edits;
  }

-  public EditList getFullContext() {
-    return new EditList(edits, 5000000, aSize, bSize);
-  }
-
  public Iterable<Hunk> getHunks() {
    return new Iterable<Hunk>() {
      public Iterator<Hunk> iterator() {
--- a/gerrit-prettify/src/main/java/com/google/gerrit/prettify/common/PrettyFormatter.java
+++ b/gerrit-prettify/src/main/java/com/google/gerrit/prettify/common/PrettyFormatter.java
@ -20,27 +20,16 @@ import com.google.gwtexpui.safehtml.client.SafeHtmlBuilder;
 import org.eclipse.jgit.diff.Edit;
 import org.eclipse.jgit.diff.ReplaceEdit;

+import java.util.ArrayList;
 import java.util.List;

 public abstract class PrettyFormatter implements SparseHtmlFile {
  public static abstract class EditFilter {
-    final String get(SparseFileContent src, EditList.Hunk hunk) {
-      return src.get(getCur(hunk));
-    }
-
    abstract String getStyleName();

-    abstract int getCur(EditList.Hunk hunk);
-
    abstract int getBegin(Edit edit);

    abstract int getEnd(Edit edit);
-
-    abstract boolean isModified(EditList.Hunk hunk);
-
-    abstract void incSelf(EditList.Hunk hunk);
-
-    abstract void incOther(EditList.Hunk hunk);
  }

  public static final EditFilter A = new EditFilter() {
@ -49,11 +38,6 @@ public abstract class PrettyFormatter implements SparseHtmlFile {
      return "wdd";
    }

-    @Override
-    int getCur(EditList.Hunk hunk) {
-      return hunk.getCurA();
-    }
-
    @Override
    int getBegin(Edit edit) {
      return edit.getBeginA();
@ -63,21 +47,6 @@ public abstract class PrettyFormatter implements SparseHtmlFile {
    int getEnd(Edit edit) {
      return edit.getEndA();
    }
-
-    @Override
-    boolean isModified(EditList.Hunk hunk) {
-      return hunk.isDeletedA();
-    }
-
-    @Override
-    void incSelf(EditList.Hunk hunk) {
-      hunk.incA();
-    }
-
-    @Override
-    void incOther(EditList.Hunk hunk) {
-      hunk.incB();
-    }
  };

  public static final EditFilter B = new EditFilter() {
@ -86,11 +55,6 @@ public abstract class PrettyFormatter implements SparseHtmlFile {
      return "wdi";
    }

-    @Override
-    int getCur(EditList.Hunk hunk) {
-      return hunk.getCurB();
-    }
-
    @Override
    int getBegin(Edit edit) {
      return edit.getBeginB();
@ -100,26 +64,11 @@ public abstract class PrettyFormatter implements SparseHtmlFile {
    int getEnd(Edit edit) {
      return edit.getEndB();
    }
-
-    @Override
-    boolean isModified(EditList.Hunk hunk) {
-      return hunk.isInsertedB();
-    }
-
-    @Override
-    void incSelf(EditList.Hunk hunk) {
-      hunk.incB();
-    }
-
-    @Override
-    void incOther(EditList.Hunk hunk) {
-      hunk.incA();
-    }
  };

  protected SparseFileContent content;
  protected EditFilter side;
-  protected EditList edits;
+  protected List<Edit> edits;
  protected PrettySettings settings;

  private int col;
@ -144,7 +93,7 @@ public abstract class PrettyFormatter implements SparseHtmlFile {
    side = f;
  }

-  public void setEditList(EditList all) {
+  public void setEditList(List<Edit> all) {
    edits = all;
  }

@ -364,88 +313,106 @@ public abstract class PrettyFormatter implements SparseHtmlFile {
  }

  private SafeHtml colorLineEdits(SparseFileContent src) {
+    // Make a copy of the edits with a sentinel that is after all lines
+    // in the source. That simplifies our loop below because we'll never
+    // run off the end of the edit list.
+    //
+    List<Edit> edits = new ArrayList<Edit>(this.edits.size() + 1);
+    edits.addAll(this.edits);
+    edits.add(new Edit(src.size(), src.size()));
+
    SafeHtmlBuilder buf = new SafeHtmlBuilder();

+    int curIdx = 0;
+    Edit curEdit = edits.get(curIdx);
+
    ReplaceEdit lastReplace = null;
    List<Edit> charEdits = null;
    int lastPos = 0;
    int lastIdx = 0;

-    EditList hunkGenerator = edits;
-    if (src.isWholeFile()) {
-      hunkGenerator = hunkGenerator.getFullContext();
-    }
+    for (int index = src.first(); index < src.size(); index = src.next(index)) {
+      int cmp = compare(index, curEdit);
+      while (0 < cmp) {
+        // The index is after the edit. Skip to the next edit.
+        //
+        curEdit = edits.get(curIdx++);
+        cmp = compare(index, curEdit);
+      }

-    for (final EditList.Hunk hunk : hunkGenerator.getHunks()) {
-      while (hunk.next()) {
-        if (hunk.isContextLine()) {
-          if (src.contains(side.getCur(hunk))) {
-            // If side is B and src isn't the complete file we can't
-            // add it to the buffer here. This can happen if the file
-            // was really large and we chose not to syntax highlight.
-            //
-            buf.append(side.get(src, hunk));
-            buf.append('\n');
-          }
-          hunk.incBoth();
+      if (cmp < 0) {
+        // index occurs before the edit. This is a line of context.
+        //
+        buf.append(src.get(index));
+        buf.append('\n');
+        continue;
+      }

-        } else if (!side.isModified(hunk)) {
-          side.incOther(hunk);
-
-        } else if (hunk.getCurEdit() instanceof ReplaceEdit) {
-          if (lastReplace != hunk.getCurEdit()) {
-            lastReplace = (ReplaceEdit) hunk.getCurEdit();
-            charEdits = lastReplace.getInternalEdits();
-            lastPos = 0;
-            lastIdx = 0;
-          }
-
-          final String line = side.get(src, hunk) + "\n";
-          for (int c = 0; c < line.length();) {
-            if (charEdits.size() <= lastIdx) {
-              buf.append(line.substring(c));
-              break;
-            }
-
-            final Edit edit = charEdits.get(lastIdx);
-            final int b = side.getBegin(edit) - lastPos;
-            final int e = side.getEnd(edit) - lastPos;
-
-            if (c < b) {
-              // There is text at the start of this line that is common
-              // with the other side. Copy it with no style around it.
-              //
-              final int n = Math.min(b, line.length());
-              buf.append(line.substring(c, n));
-              c = n;
-            }
-
-            if (c < e) {
-              final int n = Math.min(e, line.length());
-              buf.openSpan();
-              buf.setStyleName(side.getStyleName());
-              buf.append(line.substring(c, n));
-              buf.closeSpan();
-              c = n;
-            }
-
-            if (e <= c) {
-              lastIdx++;
-            }
-          }
-          lastPos += line.length();
-          side.incSelf(hunk);
-
-        } else {
-          buf.append(side.get(src, hunk));
-          buf.append('\n');
-          side.incSelf(hunk);
+      // index occurs within the edit. The line is a modification.
+      //
+      if (curEdit instanceof ReplaceEdit) {
+        if (lastReplace != curEdit) {
+          lastReplace = (ReplaceEdit) curEdit;
+          charEdits = lastReplace.getInternalEdits();
+          lastPos = 0;
+          lastIdx = 0;
        }
+
+        final String line = src.get(index) + "\n";
+        for (int c = 0; c < line.length();) {
+          if (charEdits.size() <= lastIdx) {
+            buf.append(line.substring(c));
+            break;
+          }
+
+          final Edit edit = charEdits.get(lastIdx);
+          final int b = side.getBegin(edit) - lastPos;
+          final int e = side.getEnd(edit) - lastPos;
+
+          if (c < b) {
+            // There is text at the start of this line that is common
+            // with the other side. Copy it with no style around it.
+            //
+            final int n = Math.min(b, line.length());
+            buf.append(line.substring(c, n));
+            c = n;
+          }
+
+          if (c < e) {
+            final int n = Math.min(e, line.length());
+            buf.openSpan();
+            buf.setStyleName(side.getStyleName());
+            buf.append(line.substring(c, n));
+            buf.closeSpan();
+            c = n;
+          }
+
+          if (e <= c) {
+            lastIdx++;
+          }
+        }
+        lastPos += line.length();
+
+      } else {
+        buf.append(src.get(index));
+        buf.append('\n');
      }
    }
    return buf;
  }

+  private int compare(int index, Edit edit) {
+    if (index < side.getBegin(edit)) {
+      return -1; // index occurs before the edit.
+
+    } else if (index < side.getEnd(edit)) {
+      return 0; // index occurs within the edit.
+
+    } else {
+      return 1; // index occurs after the edit.
+    }
+  }
+
  private SafeHtml showTabAfterSpace(SafeHtml src) {
    final String m = "( ( |<span[^>]*>|</span>)*\t)";
    final String r = "<span class=\"wse\">$1</span>";
--- a/gerrit-prettify/src/main/java/com/google/gerrit/prettify/common/SparseFileContent.java
+++ b/gerrit-prettify/src/main/java/com/google/gerrit/prettify/common/SparseFileContent.java
@ -14,6 +14,8 @@

 package com.google.gerrit.prettify.common;

+import org.eclipse.jgit.diff.Edit;
+
 import java.util.ArrayList;
 import java.util.List;

@ -78,6 +80,64 @@ public class SparseFileContent {
    return getLine(idx) != null;
  }

+  public int first() {
+    return ranges.isEmpty() ? size() : ranges.get(0).base;
+  }
+
+  public int next(final int idx) {
+    // Most requests are sequential in nature, fetching the next
+    // line from the current range, or the immediate next range.
+    //
+    int high = ranges.size();
+    if (currentRangeIdx < high) {
+      Range cur = ranges.get(currentRangeIdx);
+      if (cur.contains(idx + 1)) {
+        return idx + 1;
+      }
+
+      if (++currentRangeIdx < high) {
+        // Its not plus one, its the base of the next range.
+        //
+        return ranges.get(currentRangeIdx).base;
+      }
+    }
+
+    // Binary search for the current value, since we know its a sorted list.
+    //
+    int low = 0;
+    do {
+      final int mid = (low + high) / 2;
+      final Range cur = ranges.get(mid);
+
+      if (cur.contains(idx)) {
+        if (cur.contains(idx + 1)) {
+          // Trivial plus one case above failed due to wrong currentRangeIdx.
+          // Reset the cache so we don't miss in the future.
+          //
+          currentRangeIdx = mid;
+          return idx + 1;
+        }
+
+        if (mid + 1 < ranges.size()) {
+          // Its the base of the next range.
+          currentRangeIdx = mid + 1;
+          return ranges.get(currentRangeIdx).base;
+        }
+
+        // No more lines in the file.
+        //
+        return size();
+      }
+
+      if (idx < cur.base)
+        high = mid;
+      else
+        low = mid + 1;
+    } while (low < high);
+
+    return size();
+  }
+
  public int mapIndexToLine(int arrayIndex) {
    final int origIndex = arrayIndex;
    for (Range r : ranges) {
@ -155,19 +215,26 @@ public class SparseFileContent {
    return b.toString();
  }

-  public SparseFileContent completeWithContext(SparseFileContent a,
-      EditList editList) {
+  public SparseFileContent apply(SparseFileContent a, List<Edit> edits) {
+    EditList list = new EditList(edits, size, a.size(), size);
    ArrayList<String> lines = new ArrayList<String>(size);
-    for (final EditList.Hunk hunk : editList.getFullContext().getHunks()) {
+    for (final EditList.Hunk hunk : list.getHunks()) {
      while (hunk.next()) {
        if (hunk.isContextLine()) {
-          lines.add(a.get(hunk.getCurA()));
+          if (contains(hunk.getCurB())) {
+            lines.add(get(hunk.getCurB()));
+          } else {
+            lines.add(a.get(hunk.getCurA()));
+          }
          hunk.incBoth();
+          continue;
+        }

-        } else if (hunk.isDeletedA()) {
+        if (hunk.isDeletedA()) {
          hunk.incA();
+        }

-        } else if (hunk.isInsertedB()) {
+        if (hunk.isInsertedB()) {
          lines.add(get(hunk.getCurB()));
          hunk.incB();
        }