Diff.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453
  1. <?php
  2. /**
  3. * General API for generating and formatting diffs - the differences between
  4. * two sequences of strings.
  5. *
  6. * The original PHP version of this code was written by Geoffrey T. Dairiki
  7. * <dairiki@dairiki.org>, and is used/adapted with his permission.
  8. *
  9. * $Horde: framework/Text_Diff/Diff.php,v 1.11.2.11 2008/02/24 10:57:46 jan Exp $
  10. *
  11. * Copyright 2004 Geoffrey T. Dairiki <dairiki@dairiki.org>
  12. * Copyright 2004-2008 The Horde Project (http://www.horde.org/)
  13. *
  14. * See the enclosed file COPYING for license information (LGPL). If you did
  15. * not receive this file, see http://opensource.org/licenses/lgpl-license.php.
  16. *
  17. * @package Text_Diff
  18. * @author Geoffrey T. Dairiki <dairiki@dairiki.org>
  19. */
  20. class Text_Diff {
  21. /**
  22. * Array of changes.
  23. *
  24. * @var array
  25. */
  26. var $_edits;
  27. /**
  28. * Computes diffs between sequences of strings.
  29. *
  30. * @param string $engine Name of the diffing engine to use. 'auto'
  31. * will automatically select the best.
  32. * @param array $params Parameters to pass to the diffing engine.
  33. * Normally an array of two arrays, each
  34. * containing the lines from a file.
  35. */
  36. function Text_Diff($engine, $params)
  37. {
  38. // Backward compatibility workaround.
  39. if (!is_string($engine)) {
  40. $params = array($engine, $params);
  41. $engine = 'auto';
  42. }
  43. if ($engine == 'auto') {
  44. $engine = extension_loaded('xdiff') ? 'xdiff' : 'native';
  45. } else {
  46. $engine = basename($engine);
  47. }
  48. require_once 'Text/Diff/Engine/' . $engine . '.php';
  49. $class = 'Text_Diff_Engine_' . $engine;
  50. $diff_engine = new $class();
  51. $this->_edits = call_user_func_array(array($diff_engine, 'diff'), $params);
  52. }
  53. /**
  54. * Returns the array of differences.
  55. */
  56. function getDiff()
  57. {
  58. return $this->_edits;
  59. }
  60. /**
  61. * returns the number of new (added) lines in a given diff.
  62. *
  63. * @since Text_Diff 1.1.0
  64. * @since Horde 3.2
  65. *
  66. * @return integer The number of new lines
  67. */
  68. function countAddedLines()
  69. {
  70. $count = 0;
  71. foreach ($this->_edits as $edit) {
  72. if (is_a($edit, 'Text_Diff_Op_add') ||
  73. is_a($edit, 'Text_Diff_Op_change')) {
  74. $count += $edit->nfinal();
  75. }
  76. }
  77. return $count;
  78. }
  79. /**
  80. * Returns the number of deleted (removed) lines in a given diff.
  81. *
  82. * @since Text_Diff 1.1.0
  83. * @since Horde 3.2
  84. *
  85. * @return integer The number of deleted lines
  86. */
  87. function countDeletedLines()
  88. {
  89. $count = 0;
  90. foreach ($this->_edits as $edit) {
  91. if (is_a($edit, 'Text_Diff_Op_delete') ||
  92. is_a($edit, 'Text_Diff_Op_change')) {
  93. $count += $edit->norig();
  94. }
  95. }
  96. return $count;
  97. }
  98. /**
  99. * Computes a reversed diff.
  100. *
  101. * Example:
  102. * <code>
  103. * $diff = new Text_Diff($lines1, $lines2);
  104. * $rev = $diff->reverse();
  105. * </code>
  106. *
  107. * @return Text_Diff A Diff object representing the inverse of the
  108. * original diff. Note that we purposely don't return a
  109. * reference here, since this essentially is a clone()
  110. * method.
  111. */
  112. function reverse()
  113. {
  114. if (version_compare(zend_version(), '2', '>')) {
  115. $rev = clone($this);
  116. } else {
  117. $rev = $this;
  118. }
  119. $rev->_edits = array();
  120. foreach ($this->_edits as $edit) {
  121. $rev->_edits[] = $edit->reverse();
  122. }
  123. return $rev;
  124. }
  125. /**
  126. * Checks for an empty diff.
  127. *
  128. * @return boolean True if two sequences were identical.
  129. */
  130. function isEmpty()
  131. {
  132. foreach ($this->_edits as $edit) {
  133. if (!is_a($edit, 'Text_Diff_Op_copy')) {
  134. return false;
  135. }
  136. }
  137. return true;
  138. }
  139. /**
  140. * Computes the length of the Longest Common Subsequence (LCS).
  141. *
  142. * This is mostly for diagnostic purposes.
  143. *
  144. * @return integer The length of the LCS.
  145. */
  146. function lcs()
  147. {
  148. $lcs = 0;
  149. foreach ($this->_edits as $edit) {
  150. if (is_a($edit, 'Text_Diff_Op_copy')) {
  151. $lcs += count($edit->orig);
  152. }
  153. }
  154. return $lcs;
  155. }
  156. /**
  157. * Gets the original set of lines.
  158. *
  159. * This reconstructs the $from_lines parameter passed to the constructor.
  160. *
  161. * @return array The original sequence of strings.
  162. */
  163. function getOriginal()
  164. {
  165. $lines = array();
  166. foreach ($this->_edits as $edit) {
  167. if ($edit->orig) {
  168. array_splice($lines, count($lines), 0, $edit->orig);
  169. }
  170. }
  171. return $lines;
  172. }
  173. /**
  174. * Gets the final set of lines.
  175. *
  176. * This reconstructs the $to_lines parameter passed to the constructor.
  177. *
  178. * @return array The sequence of strings.
  179. */
  180. function getFinal()
  181. {
  182. $lines = array();
  183. foreach ($this->_edits as $edit) {
  184. if ($edit->final) {
  185. array_splice($lines, count($lines), 0, $edit->final);
  186. }
  187. }
  188. return $lines;
  189. }
  190. /**
  191. * Removes trailing newlines from a line of text. This is meant to be used
  192. * with array_walk().
  193. *
  194. * @param string $line The line to trim.
  195. * @param integer $key The index of the line in the array. Not used.
  196. */
  197. static function trimNewlines(&$line, $key)
  198. {
  199. $line = str_replace(array("\n", "\r"), '', $line);
  200. }
  201. /**
  202. * Determines the location of the system temporary directory.
  203. *
  204. * @static
  205. *
  206. * @access protected
  207. *
  208. * @return string A directory name which can be used for temp files.
  209. * Returns false if one could not be found.
  210. */
  211. function _getTempDir()
  212. {
  213. $tmp_locations = array('/tmp', '/var/tmp', 'c:\WUTemp', 'c:\temp',
  214. 'c:\windows\temp', 'c:\winnt\temp');
  215. /* Try PHP's upload_tmp_dir directive. */
  216. $tmp = ini_get('upload_tmp_dir');
  217. /* Otherwise, try to determine the TMPDIR environment variable. */
  218. if (!strlen($tmp)) {
  219. $tmp = getenv('TMPDIR');
  220. }
  221. /* If we still cannot determine a value, then cycle through a list of
  222. * preset possibilities. */
  223. while (!strlen($tmp) && count($tmp_locations)) {
  224. $tmp_check = array_shift($tmp_locations);
  225. if (@is_dir($tmp_check)) {
  226. $tmp = $tmp_check;
  227. }
  228. }
  229. /* If it is still empty, we have failed, so return false; otherwise
  230. * return the directory determined. */
  231. return strlen($tmp) ? $tmp : false;
  232. }
  233. /**
  234. * Checks a diff for validity.
  235. *
  236. * This is here only for debugging purposes.
  237. */
  238. function _check($from_lines, $to_lines)
  239. {
  240. if (serialize($from_lines) != serialize($this->getOriginal())) {
  241. trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
  242. }
  243. if (serialize($to_lines) != serialize($this->getFinal())) {
  244. trigger_error("Reconstructed final doesn't match", E_USER_ERROR);
  245. }
  246. $rev = $this->reverse();
  247. if (serialize($to_lines) != serialize($rev->getOriginal())) {
  248. trigger_error("Reversed original doesn't match", E_USER_ERROR);
  249. }
  250. if (serialize($from_lines) != serialize($rev->getFinal())) {
  251. trigger_error("Reversed final doesn't match", E_USER_ERROR);
  252. }
  253. $prevtype = null;
  254. foreach ($this->_edits as $edit) {
  255. if ($prevtype == get_class($edit)) {
  256. trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
  257. }
  258. $prevtype = get_class($edit);
  259. }
  260. return true;
  261. }
  262. }
  263. /**
  264. * @package Text_Diff
  265. * @author Geoffrey T. Dairiki <dairiki@dairiki.org>
  266. */
  267. class Text_MappedDiff extends Text_Diff {
  268. /**
  269. * Computes a diff between sequences of strings.
  270. *
  271. * This can be used to compute things like case-insensitive diffs, or diffs
  272. * which ignore changes in white-space.
  273. *
  274. * @param array $from_lines An array of strings.
  275. * @param array $to_lines An array of strings.
  276. * @param array $mapped_from_lines This array should have the same size
  277. * number of elements as $from_lines. The
  278. * elements in $mapped_from_lines and
  279. * $mapped_to_lines are what is actually
  280. * compared when computing the diff.
  281. * @param array $mapped_to_lines This array should have the same number
  282. * of elements as $to_lines.
  283. */
  284. function Text_MappedDiff($from_lines, $to_lines,
  285. $mapped_from_lines, $mapped_to_lines)
  286. {
  287. assert(count($from_lines) == count($mapped_from_lines));
  288. assert(count($to_lines) == count($mapped_to_lines));
  289. parent::Text_Diff($mapped_from_lines, $mapped_to_lines);
  290. $xi = $yi = 0;
  291. for ($i = 0; $i < count($this->_edits); $i++) {
  292. $orig = &$this->_edits[$i]->orig;
  293. if (is_array($orig)) {
  294. $orig = array_slice($from_lines, $xi, count($orig));
  295. $xi += count($orig);
  296. }
  297. $final = &$this->_edits[$i]->final;
  298. if (is_array($final)) {
  299. $final = array_slice($to_lines, $yi, count($final));
  300. $yi += count($final);
  301. }
  302. }
  303. }
  304. }
  305. /**
  306. * @package Text_Diff
  307. * @author Geoffrey T. Dairiki <dairiki@dairiki.org>
  308. *
  309. * @access private
  310. */
  311. class Text_Diff_Op {
  312. var $orig;
  313. var $final;
  314. function &reverse()
  315. {
  316. trigger_error('Abstract method', E_USER_ERROR);
  317. }
  318. function norig()
  319. {
  320. return $this->orig ? count($this->orig) : 0;
  321. }
  322. function nfinal()
  323. {
  324. return $this->final ? count($this->final) : 0;
  325. }
  326. }
  327. /**
  328. * @package Text_Diff
  329. * @author Geoffrey T. Dairiki <dairiki@dairiki.org>
  330. *
  331. * @access private
  332. */
  333. class Text_Diff_Op_copy extends Text_Diff_Op {
  334. function Text_Diff_Op_copy($orig, $final = false)
  335. {
  336. if (!is_array($final)) {
  337. $final = $orig;
  338. }
  339. $this->orig = $orig;
  340. $this->final = $final;
  341. }
  342. function &reverse()
  343. {
  344. $reverse = new Text_Diff_Op_copy($this->final, $this->orig);
  345. return $reverse;
  346. }
  347. }
  348. /**
  349. * @package Text_Diff
  350. * @author Geoffrey T. Dairiki <dairiki@dairiki.org>
  351. *
  352. * @access private
  353. */
  354. class Text_Diff_Op_delete extends Text_Diff_Op {
  355. function Text_Diff_Op_delete($lines)
  356. {
  357. $this->orig = $lines;
  358. $this->final = false;
  359. }
  360. function &reverse()
  361. {
  362. $reverse = new Text_Diff_Op_add($this->orig);
  363. return $reverse;
  364. }
  365. }
  366. /**
  367. * @package Text_Diff
  368. * @author Geoffrey T. Dairiki <dairiki@dairiki.org>
  369. *
  370. * @access private
  371. */
  372. class Text_Diff_Op_add extends Text_Diff_Op {
  373. function Text_Diff_Op_add($lines)
  374. {
  375. $this->final = $lines;
  376. $this->orig = false;
  377. }
  378. function &reverse()
  379. {
  380. $reverse = new Text_Diff_Op_delete($this->final);
  381. return $reverse;
  382. }
  383. }
  384. /**
  385. * @package Text_Diff
  386. * @author Geoffrey T. Dairiki <dairiki@dairiki.org>
  387. *
  388. * @access private
  389. */
  390. class Text_Diff_Op_change extends Text_Diff_Op {
  391. function Text_Diff_Op_change($orig, $final)
  392. {
  393. $this->orig = $orig;
  394. $this->final = $final;
  395. }
  396. function &reverse()
  397. {
  398. $reverse = new Text_Diff_Op_change($this->final, $this->orig);
  399. return $reverse;
  400. }
  401. }