wiclear-2007-07-19/inc/classes/diff.class.php

<?php
# ***** BEGIN LICENSE BLOCK *****
# This file is part of WiClear.
# Copyright (c) 2004-2007 David Jobet. All rights
# reserved.
#
# WiClear is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# WiClear is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with DotClear; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA
#
# ***** END LICENSE BLOCK *****

/**
 * \brief represents a line of text + a checksum so that comparison are faster
 *
 */
class Line
{
  var $line;     //!< line of text
  var $checkSum; //!< checksum of line

  function Line($line)
  {
    $this->line     = $line;
    $this->computeCheckSum();
  }

  function computeCheckSum()
  {
    $this->checkSum = 0;

    for ($i = 0; $i < strlen($this->line); ++$i)
    {
      $this->checkSum += $this->checkSum * 256 + ord($this->line[$i]);
      if (is_infinite($this->checkSum))
      {
        $this->checkSum = 0;
      }
    }
  }

  /**
   * \brief compares this line against another line
   *
   * \param line the line to compare this instance to
   * \return true if lines are identical, false otherwise
   */
  function match($line)
  {
    if ($this->checkSum != $line->checkSum)
      return false;
    return $this->line == $line->line;
  }
}

// some constants
define('UNKNOWN',   0);
define('UNCHANGED', 1);
define('ADDED',     2);
define('REMOVED',   4);
define('CHANGED',   ADDED|REMOVED);

/**
 * \brief represents a block of line in either text1 or text2
 *
 * A BlockMatch is a contiguous block of lines that can be found
 * at index1 in text1, and at index2 in text2.
 * - if both text includes this block, then length1 == length2 and change == UNCHANGED
 * - if only text1 includes this block then change = REMOVED and only index1 and length1 have meaning
 * - if only text2 includes this block then change = ADDED and only index2 and length2 have meaning
 */
class BlockMatch
{
  var $index1;  //< start of block in text1 (not meaningfull if 'change' is ADDED)
  var $length1; //< length of block in text1

  var $index2;  //< start of block in text2 (not meaningfull if 'change' is REMOVED)
  var $length2; //< length of block in text2

  var $change;  //< what's the changement

  function BlockMatch($index1, $length1, $index2, $length2)
  {
    $this->index1  = $index1;
    $this->length1 = $length1;

    $this->index2  = $index2;
    $this->length2 = $length2;

    $this->change  = UNKNOWN;
  }

  function setChange($change)
  {
    $this->change = $change;
  }
}

/**
 * \brief performs a simplistic diff against text1 and text2 in this order
 *
 * order is important, because diff(text1, text2) != diff(text2, text1)
 */
class diff
{
  var $list1; //< array<Line> created from $text1
  var $list2; //< array<Line> created from $text2

  var $diff;  //< array<BlockMatch> as the diff

  /**
   * \brief creates a diff between text1 and text2
   */
  function diff($text1, $text2)
  {
    $this->list1 = $this->createList($text1);
    $this->list2 = $this->createList($text2);

    // create the list of common lines from list1 and list2
    $trace = $this->identifyCommonLines();
    // reconstruct missing blocks (ADDED/REMOVED/CHANGED) from trace
    $this->diff = $this->createDiff($trace);
  }

  /*
   * \brief explodes a text into a list of Line
   *
   * the text is exploded using the '\n' caracter
   *
   * \param text the text to explode
   *
   * \returns array<Line>
   */
  function createList($text)
  {
    $list = array();
    $lines = explode("\n", $text);

    foreach ($lines as $line)
    {
      $list[] = new Line(trim($line));
    }

    return $list;
  }

  /*
   * \brief identify common group of lines between list1 and list2
   *
   * the current algorithm is not optimal but is ok for simple examples
   *
   * \returns array<BlockMatch>
   */
  function identifyCommonLines()
  {
    $index1 = 0;
    $index2 = 0;

    $size1 = sizeof($this->list1);
    $size2 = sizeof($this->list2);

    $trace = array();
    $lastMatch = 0;

    //echo "<pre>&gt;&gt;</pre>";

    while ($index1 < $size1)
    {
      $index2 = $lastMatch;
      //echo "<pre>MAIN index1=".$index1.", index2=".$index2."</pre>";

      $foundMatchingLine = false;
      while ($index1 < $size1 && $index2 < $size2)
      {
        if ($this->list1[$index1]->match($this->list2[$index2]))
        {
          if (!$foundMatchingLine)
          {
            $begin1 = $index1;
            $begin2 = $index2;
          }

          $foundMatchingLine = true;
          ++$index1;
        }
        else
        if ($foundMatchingLine)
        {
          break;
        }

        //echo "<pre>INNER index1=".$index1.", index2=".$index2."</pre>";

        ++$index2;
      }

      if ($foundMatchingLine)
      {
        $length = $index1 - $begin1;
        $trace[] = new BlockMatch($begin1, $length, $begin2, $length);
        $lastMatch = $begin2 + $length;
      }

      ++$index1;
    }

    // add a last trace to force matching with the remaining (if any) blocks after last match
    $trace[] = new BlockMatch($size1, 0, $size2, 0);

    //echo "<pre>&lt;&lt;</pre>";

    return $trace;
  }

  /*
   * \brief create the set of minimal changement from the list of common lines
   *
   * in fact the list returned contains all necessary block to reconstruct
   * both $text1 and $text2.<br>
   * the list indicates if the block was
   * <ul>
   * <li>ADDED     : the block exist only in text2</li>
   * <li>REMOVED   : the block exist only in text1, it was removed in text2</li>
   * <li>CHANGED   : the block was changed between text1 and text2</li>
   * <li>UNCHANGED : the block is the same between the two</li>
   * </ul>
   *
   * \param trace array<BlockMatch>
   * \return array<BlockMatch>
   */
  function createDiff($trace)
  {
    $index1 = 0;
    $index2 = 0;

    $diff = array();

    foreach ($trace as $block)
    {
      //echo "<pre>".$index1."/".$index2."</pre>";
      if ($index1 == $block->index1 && $index2 < $block->index2)
      {
        // lines were added in text2
        $newBlock = new BlockMatch($index1, 0, $index2, $block->index2 - $index2);
        $newBlock->setChange(ADDED);
        $diff[] = $newBlock;
      }
      else
      if ($index1 < $block->index1 && $index2 == $block->index2)
      {
        // lines were removed in text2
        $newBlock = new BlockMatch($index1, $block->index1 - $index1, $index2, 0);
        $newBlock->setChange(REMOVED);
        $diff[] = $newBlock;
      }
      else
      if ($index1 < $block->index1 && $index2 < $block->index2)
      {
        // lines were changed
        $newBlock = new BlockMatch($index1, $block->index1 - $index1, $index2, $block->index2 - $index2);
        $newBlock->setChange(CHANGED);
        $diff[] = $newBlock;
      }

      $block->setChange(UNCHANGED);
      $diff[] = $block;

      $index1 = $block->index1 + $block->length1;
      $index2 = $block->index2 + $block->length1;
    }

    return $diff;
  }

  /*
   * \brief helper method for toHtml() function
   *
   * \param $change a constant in either (UNCHANGED/CHANGED/ADDED/REMOVED)
   * \return a string
   */
  function getClass($change)
  {
    switch ($change)
    {
      case UNCHANGED:
        return 'unchanged';
      case ADDED:
        return 'added';
      case REMOVED:
        return 'removed';
      case CHANGED:
        return 'changed';
    }

    return 'unknown';
  }

  /*
   * \brief display the diff in a table
   *
   * \param title1 title in table header
   * \param title2 title in table header
   * \return html blob
   */
  function toHtml($title1, $title2)
  {
    $blob =
    '<table class="diff">'.
    '<tr><th>'.$title1.'</th><th>'.$title2.'</th></tr>';

    foreach ($this->diff as $block)
    {
      for ($i = 0; $i < max($block->length1, $block->length2); ++$i)
      {
        $class = $this->getClass($block->change);

        $blob .=
        '<tr class="'.$class.'"><td>';

        if ($block->change != ADDED && $i < $block->length1)
        {
          $blob .= $this->list1[$block->index1 + $i]->line;
        }

        $blob .=
        '</td><td>';

        if ($block->change != REMOVED && $i < $block->length2)
        {
          $blob .= $this->list2[$block->index2 + $i]->line;
        }

        $blob .=
        '</td></tr>';
      }
    }

    $blob .=
    '</table>';

    return $blob;
  }

  /*
   * \brief display the diff as a classical 3 way text merge
   *
   * \return text
   */
  function toText()
  {
    $blob = '';

    foreach ($this->diff as $block)
    {
      if ($block->change != UNCHANGED)
      {
        $blob .= '<<<'."\n";
      }

      if ($block->change != ADDED)
      {
        for ($i = 0; $i < $block->length1; ++$i)
        {
          $blob .= $this->list1[$block->index1 + $i]->line."\n";
        }
      }

      if ($block->change != UNCHANGED)
      {
        $blob .= '==='."\n";

        if ($block->change != REMOVED)
        {
          for ($i = 0; $i < $block->length2; ++$i)
          {
            $blob .= $this->list2[$block->index2 + $i]->line."\n";
          }
        }

        $blob .= '>>>'."\n";
      }
    }

    return $blob;
  }

  /*
   * \brief display the blocks (debug function)
   *
   * \param $diff array<BlockMatch>
   * \return html blob
   */
  function displayBlocks($diff)
  {
    $blob = '';

    foreach ($diff as $block)
    {
      for ($i = 0; $i < $block->length1; ++$i)
      {
        $blob .= '<pre>('.($block->index1 + $i + 1).','.($block->index2 + $i + 1).')('.$block->length1.'/'.$block->length2.')</pre>';
      }
    }

    return $blob;
  }
}
?>