wiclear-2007-07-19/inc/classes/diff.class.php
<?php
# ***** BEGIN LICENSE BLOCK *****
# This file is part of WiClear.
# Copyright (c) 2004-2007 David Jobet. All rights
# reserved.
#
# WiClear is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# WiClear is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with DotClear; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
#
# ***** END LICENSE BLOCK *****
/**
* \brief represents a line of text + a checksum so that comparison are faster
*
*/
class Line
{
var $line; //!< line of text
var $checkSum; //!< checksum of line
function Line($line)
{
$this->line = $line;
$this->computeCheckSum();
}
function computeCheckSum()
{
$this->checkSum = 0;
for ($i = 0; $i < strlen($this->line); ++$i)
{
$this->checkSum += $this->checkSum * 256 + ord($this->line[$i]);
if (is_infinite($this->checkSum))
{
$this->checkSum = 0;
}
}
}
/**
* \brief compares this line against another line
*
* \param line the line to compare this instance to
* \return true if lines are identical, false otherwise
*/
function match($line)
{
if ($this->checkSum != $line->checkSum)
return false;
return $this->line == $line->line;
}
}
// some constants
define('UNKNOWN', 0);
define('UNCHANGED', 1);
define('ADDED', 2);
define('REMOVED', 4);
define('CHANGED', ADDED|REMOVED);
/**
* \brief represents a block of line in either text1 or text2
*
* A BlockMatch is a contiguous block of lines that can be found
* at index1 in text1, and at index2 in text2.
* - if both text includes this block, then length1 == length2 and change == UNCHANGED
* - if only text1 includes this block then change = REMOVED and only index1 and length1 have meaning
* - if only text2 includes this block then change = ADDED and only index2 and length2 have meaning
*/
class BlockMatch
{
var $index1; //< start of block in text1 (not meaningfull if 'change' is ADDED)
var $length1; //< length of block in text1
var $index2; //< start of block in text2 (not meaningfull if 'change' is REMOVED)
var $length2; //< length of block in text2
var $change; //< what's the changement
function BlockMatch($index1, $length1, $index2, $length2)
{
$this->index1 = $index1;
$this->length1 = $length1;
$this->index2 = $index2;
$this->length2 = $length2;
$this->change = UNKNOWN;
}
function setChange($change)
{
$this->change = $change;
}
}
/**
* \brief performs a simplistic diff against text1 and text2 in this order
*
* order is important, because diff(text1, text2) != diff(text2, text1)
*/
class diff
{
var $list1; //< array<Line> created from $text1
var $list2; //< array<Line> created from $text2
var $diff; //< array<BlockMatch> as the diff
/**
* \brief creates a diff between text1 and text2
*/
function diff($text1, $text2)
{
$this->list1 = $this->createList($text1);
$this->list2 = $this->createList($text2);
// create the list of common lines from list1 and list2
$trace = $this->identifyCommonLines();
// reconstruct missing blocks (ADDED/REMOVED/CHANGED) from trace
$this->diff = $this->createDiff($trace);
}
/*
* \brief explodes a text into a list of Line
*
* the text is exploded using the '\n' caracter
*
* \param text the text to explode
*
* \returns array<Line>
*/
function createList($text)
{
$list = array();
$lines = explode("\n", $text);
foreach ($lines as $line)
{
$list[] = new Line(trim($line));
}
return $list;
}
/*
* \brief identify common group of lines between list1 and list2
*
* the current algorithm is not optimal but is ok for simple examples
*
* \returns array<BlockMatch>
*/
function identifyCommonLines()
{
$index1 = 0;
$index2 = 0;
$size1 = sizeof($this->list1);
$size2 = sizeof($this->list2);
$trace = array();
$lastMatch = 0;
//echo "<pre>>></pre>";
while ($index1 < $size1)
{
$index2 = $lastMatch;
//echo "<pre>MAIN index1=".$index1.", index2=".$index2."</pre>";
$foundMatchingLine = false;
while ($index1 < $size1 && $index2 < $size2)
{
if ($this->list1[$index1]->match($this->list2[$index2]))
{
if (!$foundMatchingLine)
{
$begin1 = $index1;
$begin2 = $index2;
}
$foundMatchingLine = true;
++$index1;
}
else
if ($foundMatchingLine)
{
break;
}
//echo "<pre>INNER index1=".$index1.", index2=".$index2."</pre>";
++$index2;
}
if ($foundMatchingLine)
{
$length = $index1 - $begin1;
$trace[] = new BlockMatch($begin1, $length, $begin2, $length);
$lastMatch = $begin2 + $length;
}
++$index1;
}
// add a last trace to force matching with the remaining (if any) blocks after last match
$trace[] = new BlockMatch($size1, 0, $size2, 0);
//echo "<pre><<</pre>";
return $trace;
}
/*
* \brief create the set of minimal changement from the list of common lines
*
* in fact the list returned contains all necessary block to reconstruct
* both $text1 and $text2.<br>
* the list indicates if the block was
* <ul>
* <li>ADDED : the block exist only in text2</li>
* <li>REMOVED : the block exist only in text1, it was removed in text2</li>
* <li>CHANGED : the block was changed between text1 and text2</li>
* <li>UNCHANGED : the block is the same between the two</li>
* </ul>
*
* \param trace array<BlockMatch>
* \return array<BlockMatch>
*/
function createDiff($trace)
{
$index1 = 0;
$index2 = 0;
$diff = array();
foreach ($trace as $block)
{
//echo "<pre>".$index1."/".$index2."</pre>";
if ($index1 == $block->index1 && $index2 < $block->index2)
{
// lines were added in text2
$newBlock = new BlockMatch($index1, 0, $index2, $block->index2 - $index2);
$newBlock->setChange(ADDED);
$diff[] = $newBlock;
}
else
if ($index1 < $block->index1 && $index2 == $block->index2)
{
// lines were removed in text2
$newBlock = new BlockMatch($index1, $block->index1 - $index1, $index2, 0);
$newBlock->setChange(REMOVED);
$diff[] = $newBlock;
}
else
if ($index1 < $block->index1 && $index2 < $block->index2)
{
// lines were changed
$newBlock = new BlockMatch($index1, $block->index1 - $index1, $index2, $block->index2 - $index2);
$newBlock->setChange(CHANGED);
$diff[] = $newBlock;
}
$block->setChange(UNCHANGED);
$diff[] = $block;
$index1 = $block->index1 + $block->length1;
$index2 = $block->index2 + $block->length1;
}
return $diff;
}
/*
* \brief helper method for toHtml() function
*
* \param $change a constant in either (UNCHANGED/CHANGED/ADDED/REMOVED)
* \return a string
*/
function getClass($change)
{
switch ($change)
{
case UNCHANGED:
return 'unchanged';
case ADDED:
return 'added';
case REMOVED:
return 'removed';
case CHANGED:
return 'changed';
}
return 'unknown';
}
/*
* \brief display the diff in a table
*
* \param title1 title in table header
* \param title2 title in table header
* \return html blob
*/
function toHtml($title1, $title2)
{
$blob =
'<table class="diff">'.
'<tr><th>'.$title1.'</th><th>'.$title2.'</th></tr>';
foreach ($this->diff as $block)
{
for ($i = 0; $i < max($block->length1, $block->length2); ++$i)
{
$class = $this->getClass($block->change);
$blob .=
'<tr class="'.$class.'"><td>';
if ($block->change != ADDED && $i < $block->length1)
{
$blob .= $this->list1[$block->index1 + $i]->line;
}
$blob .=
'</td><td>';
if ($block->change != REMOVED && $i < $block->length2)
{
$blob .= $this->list2[$block->index2 + $i]->line;
}
$blob .=
'</td></tr>';
}
}
$blob .=
'</table>';
return $blob;
}
/*
* \brief display the diff as a classical 3 way text merge
*
* \return text
*/
function toText()
{
$blob = '';
foreach ($this->diff as $block)
{
if ($block->change != UNCHANGED)
{
$blob .= '<<<'."\n";
}
if ($block->change != ADDED)
{
for ($i = 0; $i < $block->length1; ++$i)
{
$blob .= $this->list1[$block->index1 + $i]->line."\n";
}
}
if ($block->change != UNCHANGED)
{
$blob .= '==='."\n";
if ($block->change != REMOVED)
{
for ($i = 0; $i < $block->length2; ++$i)
{
$blob .= $this->list2[$block->index2 + $i]->line."\n";
}
}
$blob .= '>>>'."\n";
}
}
return $blob;
}
/*
* \brief display the blocks (debug function)
*
* \param $diff array<BlockMatch>
* \return html blob
*/
function displayBlocks($diff)
{
$blob = '';
foreach ($diff as $block)
{
for ($i = 0; $i < $block->length1; ++$i)
{
$blob .= '<pre>('.($block->index1 + $i + 1).','.($block->index2 + $i + 1).')('.$block->length1.'/'.$block->length2.')</pre>';
}
}
return $blob;
}
}
?>