<?php
|
/*---------------------------------------------------------------*/
|
/*
|
Titre : Tronque une chaine de caractères incluant du XHTML
|
|
URL : https://phpsources.net/code_s.php?id=391
|
Auteur : forty
|
Website auteur : http://www.toplien.fr/
|
Date édition : 21 Mai 2008
|
Date mise à jour : 24 Aout 2019
|
Rapport de la maj:
|
- fonctionnement du code vérifié
|
*/
|
/*---------------------------------------------------------------*/
|
|
/*
|
* Script base sur le parser html disponible ici :
|
http://php-html.sourceforge.net/
|
*/
|
|
define ("NODE_TYPE_START",0);
|
define ("NODE_TYPE_ELEMENT",1);
|
define ("NODE_TYPE_ENDELEMENT",2);
|
define ("NODE_TYPE_TEXT",3);
|
define ("NODE_TYPE_COMMENT",4);
|
define ("NODE_TYPE_DONE",5);
|
define ("NODE_TYPE_ELEMENT_END",6);
|
|
/**
|
* Class HtmlParser.
|
* To use, create an instance of the class passing
|
* HTML text. Then invoke parse() until it's false.
|
* When parse() returns true, $iNodeType, $iNodeName
|
* $iNodeValue and $iNodeAttributes are updated.
|
*
|
* To create an HtmlParser instance you may also
|
* use convenience functions HtmlParser_ForFile
|
* and HtmlParser_ForURL.
|
*/
|
class HtmlParser {
|
|
/**
|
* Field iNodeType.
|
* May be one of the NODE_TYPE_* constants above.
|
*/
|
var $iNodeType;
|
|
/**
|
* Field iNodeName.
|
* For elements, it's the name of the element.
|
*/
|
var $iNodeName = "";
|
|
/**
|
* Field iNodeValue.
|
* For text nodes, it's the text.
|
*/
|
var $iNodeValue = "";
|
|
/**
|
* Field iNodeAttributes.
|
* A string-indexed array containing attribute values
|
* of the current node. Indexes are always lowercase.
|
*/
|
var $iNodeAttributes;
|
|
/**
|
* Field iNodeStart.
|
* The position of the first char.
|
*/
|
var $iNodeStart;
|
|
/**
|
* Field iNodeEnd.
|
* The position of the last char.
|
*/
|
var $iNodeEnd;
|
|
// The following fields should be
|
// considered private:
|
|
var $iHtmlText;
|
var $iHtmlTextLength;
|
var $iHtmlTextIndex = 0;
|
var $iHtmlCurrentChar;
|
var $BOE_ARRAY;
|
var $B_ARRAY;
|
var $BOS_ARRAY;
|
|
var $no_comment = false;
|
|
//Liste des balises autofermantes
|
var $BalisesSimples = array('hr', 'br', 'input', 'meta', 'link', 'img',
|
'area', 'param');
|
|
/**
|
* Constructor.
|
* Constructs an HtmlParser instance with
|
* the HTML text given.
|
*/
|
function HtmlParser ($aHtmlText) {
|
$this->iHtmlText = $aHtmlText;
|
$this->iHtmlTextLength = strlen($aHtmlText);
|
$this->iNodeAttributes = array();
|
$this->setTextIndex (0);
|
|
$this->BOE_ARRAY = array (" ", "\t", "\r", "\n", "=" );
|
$this->B_ARRAY = array (" ", "\t", "\r", "\n" );
|
$this->BOS_ARRAY = array (" ", "\t", "\r", "\n", "/" );
|
}
|
|
/**
|
* Method parse.
|
* Parses the next node. Returns false only if
|
* the end of the HTML text has been reached.
|
* Updates values of iNode* fields.
|
*/
|
function parse() {
|
$this->iNodeStart = $this->iHtmlTextIndex;
|
$text = $this->skipToElement();
|
if ($text != "") {
|
$this->iNodeType = NODE_TYPE_TEXT;
|
$this->iNodeName = "Text";
|
$this->iNodeValue = $text;
|
$this->iNodeEnd = $this->iHtmlTextIndex;
|
return true;
|
}
|
return $this->readTag();
|
}
|
|
function clearAttributes() {
|
$this->iNodeAttributes = array();
|
}
|
|
function readTag() {
|
if ($this->iCurrentChar != "<") {
|
$this->iNodeType = NODE_TYPE_DONE;
|
return false;
|
}
|
$this->clearAttributes();
|
$this->skipMaxInTag ("<", 1);
|
if ($this->iCurrentChar == '/') {
|
$this->moveNext();
|
$name = $this->skipToBlanksInTag();
|
if (strtolower($name) == 'script') {
|
$this->no_comment = false;
|
}
|
$this->iNodeType = NODE_TYPE_ENDELEMENT;
|
$this->iNodeName = $name;
|
$this->iNodeValue = "";
|
$this->skipEndOfTag();
|
$this->iNodeEnd = $this->iHtmlTextIndex;
|
return true;
|
}
|
$name = $this->skipToBlanksOrSlashInTag();
|
if (!$this->isValidTagIdentifier ($name)) {
|
$comment = false;
|
if ((strpos($name, "!--") === 0) && (!$this->no_comment)) {
|
$ppos = strpos($name, "--", 3);
|
if (strpos($name, "--", 3) === (strlen($name) - 2)) {
|
$this->iNodeType = NODE_TYPE_COMMENT;
|
$this->iNodeName = "Comment";
|
$this->iNodeValue = "<" . $name . ">";
|
$comment = true;
|
} else {
|
$rest = $this->skipToStringInTag ("-->");
|
if ($rest != "") {
|
$this->iNodeType = NODE_TYPE_COMMENT;
|
$this->iNodeName = "Comment";
|
$this->iNodeValue = "<" . $name . $rest;
|
$comment = true;
|
// Already skipped end of tag
|
$this->iNodeEnd = $this->iHtmlTextIndex;
|
return true;
|
}
|
}
|
}
|
if (!$comment) {
|
$this->iNodeType = NODE_TYPE_TEXT;
|
$this->iNodeName = "Text";
|
$this->iNodeValue = "<" . $name;
|
$this->iNodeEnd = $this->iHtmlTextIndex;
|
return true;
|
}
|
} else {
|
if (strtolower($name) == 'script') {
|
$this->no_comment = true;
|
}
|
$this->iNodeType = NODE_TYPE_ELEMENT;
|
$this->iNodeValue = "";
|
$this->iNodeName = $name;
|
while ($this->skipBlanksInTag()) {
|
$attrName = $this->skipToBlanksOrEqualsInTag();
|
if ($attrName != "" && $attrName != "/") {
|
$this->skipBlanksInTag();
|
if ($this->iCurrentChar == "=") {
|
$this->skipEqualsInTag();
|
$this->skipBlanksInTag();
|
$value = $this->readValueInTag();
|
$this->iNodeAttributes[strtolower($attrName)] = $value;
|
} else {
|
$this->iNodeAttributes[strtolower($attrName)] = "";
|
$this->setTextIndex ($this->iHtmlTextIndex - 1);
|
}
|
}
|
}
|
}
|
if (($this->iHtmlText{$this->iHtmlTextIndex - 1} == '/') || (in_array(
|
$this->iNodeName, $this->BalisesSimples))) {
|
$this->iNodeType = NODE_TYPE_ELEMENT_END;
|
}
|
$this->skipEndOfTag();
|
$this->iNodeEnd = $this->iHtmlTextIndex;
|
return true;
|
}
|
|
function isValidTagIdentifier ($name) {
|
return preg_match("/^[A-Za-z0-9_\\-]+$/", $name);
|
}
|
|
function skipBlanksInTag() {
|
return "" != ($this->skipInTag ($this->B_ARRAY));
|
}
|
|
function skipToBlanksOrEqualsInTag() {
|
return $this->skipToInTag ($this->BOE_ARRAY);
|
}
|
|
function skipToBlanksInTag() {
|
return $this->skipToInTag ($this->B_ARRAY);
|
}
|
|
function skipToBlanksOrSlashInTag() {
|
return $this->skipToInTag ($this->BOS_ARRAY);
|
}
|
|
function skipEqualsInTag() {
|
return $this->skipMaxInTag ("=", 1);
|
}
|
|
function readValueInTag() {
|
$ch = $this->iCurrentChar;
|
$value = "";
|
if ($ch == "\"") {
|
$this->skipMaxInTag ("\"", 1);
|
$value = $this->skipToInTag ("\"");
|
$this->skipMaxInTag ("\"", 1);
|
} elseif ($ch == "'") {
|
$this->skipMaxInTag ("'", 1);
|
$value = $this->skipToInTag ("'");
|
$this->skipMaxInTag ("'", 1);
|
} else {
|
$value = $this->skipToBlanksInTag();
|
}
|
return $value;
|
}
|
|
function setTextIndex ($index) {
|
$this->iHtmlTextIndex = $index;
|
if ($index >= $this->iHtmlTextLength) {
|
$this->iCurrentChar = -1;
|
} else {
|
$this->iCurrentChar = $this->iHtmlText{$index};
|
}
|
}
|
|
function moveNext() {
|
if ($this->iHtmlTextIndex < $this->iHtmlTextLength) {
|
$this->setTextIndex ($this->iHtmlTextIndex + 1);
|
return true;
|
} else {
|
return false;
|
}
|
}
|
|
function skipEndOfTag() {
|
while (($ch = $this->iCurrentChar) !== -1) {
|
if ($ch == ">") {
|
$this->moveNext();
|
return;
|
}
|
$this->moveNext();
|
}
|
}
|
|
function skipInTag ($chars) {
|
$sb = "";
|
while (($ch = $this->iCurrentChar) !== -1) {
|
if ($ch == ">") {
|
return $sb;
|
} else {
|
$match = false;
|
for ($idx = 0; $idx < count($chars); $idx++) {
|
if ($ch == $chars[$idx]) {
|
$match = true;
|
break;
|
}
|
}
|
if (!$match) {
|
return $sb;
|
}
|
$sb .= $ch;
|
$this->moveNext();
|
}
|
}
|
return $sb;
|
}
|
|
function skipMaxInTag ($chars, $maxChars) {
|
$sb = "";
|
$count = 0;
|
while (($ch = $this->iCurrentChar) !== -1 && $count++ < $maxChars) {
|
if ($ch == ">") {
|
return $sb;
|
} else {
|
$match = false;
|
for ($idx = 0; $idx < count($chars); $idx++) {
|
if ($ch == $chars[$idx]) {
|
$match = true;
|
break;
|
}
|
}
|
if (!$match) {
|
return $sb;
|
}
|
$sb .= $ch;
|
$this->moveNext();
|
}
|
}
|
return $sb;
|
}
|
|
function skipToInTag ($chars) {
|
$sb = "";
|
while (($ch = $this->iCurrentChar) !== -1) {
|
$match = $ch == ">";
|
if (!$match) {
|
for ($idx = 0; $idx < count($chars); $idx++) {
|
if ($ch == $chars[$idx]) {
|
$match = true;
|
break;
|
}
|
}
|
}
|
if ($match) {
|
return $sb;
|
}
|
$sb .= $ch;
|
$this->moveNext();
|
}
|
return $sb;
|
}
|
|
function skipToElement() {
|
$sb = "";
|
while (($ch = $this->iCurrentChar) !== -1) {
|
if ($ch == "<") {
|
return $sb;
|
}
|
$sb .= $ch;
|
$this->moveNext();
|
}
|
return $sb;
|
}
|
|
/**
|
* Returns text between current position and $needle,
|
* inclusive, or "" if not found. The current index is moved to a point
|
* after the location of $needle, or not moved at all
|
* if nothing is found.
|
*/
|
function skipToStringInTag ($needle) {
|
$pos = strpos ($this->iHtmlText, $needle, $this->iHtmlTextIndex);
|
if ($pos === false) {
|
return "";
|
}
|
$top = $pos + strlen($needle);
|
$retvalue = substr ($this->iHtmlText, $this->iHtmlTextIndex, $top -
|
$this->iHtmlTextIndex);
|
$this->setTextIndex ($top);
|
return $retvalue;
|
}
|
}
|
|
function HtmlParser_ForFile ($fileName) {
|
return HtmlParser_ForURL($fileName);
|
}
|
|
function HtmlParser_ForURL ($url) {
|
$fp = fopen ($url, "r");
|
$content = "";
|
while (true) {
|
$data = fread ($fp, 8192);
|
if (strlen($data) == 0) {
|
break;
|
}
|
$content .= $data;
|
}
|
fclose ($fp);
|
return new HtmlParser ($content);
|
}
|
|
function TronqueHtml($chaine, $max, $separateur = ' ', $suffix = ' ...') {
|
if (strlen(strip_tags($chaine)) > $max) {
|
$tabElements = array();
|
$cur_len = 0;
|
$parser = new HtmlParser($chaine);
|
while ($parser->parse()) {
|
if ($parser->iNodeType == NODE_TYPE_ELEMENT) {
|
array_push($tabElements, $parser->iNodeName);
|
} elseif ($parser->iNodeType == NODE_TYPE_ENDELEMENT) {
|
while (array_pop($tabElements) != $parser->iNodeName) {
|
if (count($tabElements) < 1) {
|
echo 'Erreur : pas de balise ouvrante pour ' . $parser->
|
iNodeName;
|
}
|
}
|
} elseif ($parser->iNodeType == NODE_TYPE_TEXT) {
|
$cur_max = $cur_len + $parser->iNodeEnd - $parser->iNodeStart;
|
if ($cur_max == $max) {
|
$resultat = substr($chaine, 0, $parser->iNodeEnd) . $suffix;
|
while (($balise = array_pop($tabElements)) !== null) {
|
$resultat .= '</' . $balise . '>';
|
}
|
return $resultat;
|
} elseif ($cur_max > $max) {
|
if (($pos = strrpos(substr($parser->iNodeValue, 0, ($max -
|
$cur_len + strlen( $separateur ))), $separateur)) !== false) {
|
$resultat = substr($chaine, 0, $parser->iNodeStart +
|
$pos) . $suffix;
|
while (($balise = array_pop($tabElements)) !== null) {
|
$resultat .= '</' . $balise . '>';
|
}
|
return $resultat;
|
} else {
|
$resultat = substr($chaine, 0, $parser->iNodeEnd) .
|
$suffix;
|
while (($balise = array_pop($tabElements)) !== null) {
|
$resultat .= '</' . $balise . '>';
|
}
|
return $resultat;
|
}
|
} else {
|
$cur_len += $parser->iNodeEnd - $parser->iNodeStart;
|
}
|
}
|
}
|
}
|
return $chaine;
|
}
|
|
?>
|
|
|
Invité
12 Jan 2011 à 09:28Merci ! j'utilisais une autre fonction qui ne fonctionnait pas dans tous les cas, avec la tienne j'ai de bien meilleurs résultats, merci
Invité
08 Nov 2010 à 18:49un grand merci pour ce script qui m'enleve une epine du pied.
Bravo ;)
Forty
26 Avril 2009 à 10:33j'ai fait l'essai en entourant dans ton exemple "essai" de strong et ca marche bien. ca affiche :
essai de ...
et si je mets la balise fermante � la fin c'est bon aussi :
essai de ...
Invité
25 Avril 2009 à 22:48d�sol�, les balises html ont �t� supprim�es dans mon message. Bon bref, un texte qui poss�de une balise STRONG est bien tronqu�, mais la balise fermante n'est plus STRONG, mais S.
Sinon, ce code me serait tr�s utile. Tres bonne id�e
Invité
25 Avril 2009 à 22:45il y a un probl�me avec la balise on dirait...
je soupsonne que ce soit li� au fait que "strong" poss�de 6 lettres.
TronqueHtml('essai de texte pour montrer qu\'il y a un probleme avec la balise strong', 10, ' ', ' ...');
-> essai de ...