Skip to content

Commit

Permalink
Iterate node lists with foreach
Browse files Browse the repository at this point in the history
`DOMNodeList` implements `Traversable`.

There are some `for` loops left but we cannot simply replace those:
PHP follows the DOM specification, which requires that `NodeList`
objects in the DOM are live. As a result, any operation that removes
a node list member node from its parent (such as `removeChild`,
`replaceChild` or `appendChild`) will cause the next node
in the iterator to be skipped.

We could work around that by converting those node lists to static arrays
using `iterator_to_array` but not sure if it is worth it.
  • Loading branch information
jtojnar committed Oct 9, 2024
1 parent 006ac42 commit 0238436
Showing 1 changed file with 14 additions and 14 deletions.
28 changes: 14 additions & 14 deletions src/Readability.php
Original file line number Diff line number Diff line change
Expand Up @@ -302,8 +302,7 @@ public function addFootnotes(\DOMElement $articleContent): void
$articleLinks = $articleContent->getElementsByTagName('a');
$linkCount = 0;

for ($i = 0; $i < $articleLinks->length; ++$i) {
$articleLink = $articleLinks->item($i);
foreach ($articleLinks as $articleLink) {
$footnoteLink = $articleLink->cloneNode(true);
$refLink = $this->dom->createElement('a');
$footnote = $this->dom->createElement('li');
Expand Down Expand Up @@ -383,8 +382,8 @@ public function prepArticle(\DOMNode $articleContent): void

// Remove service data-candidate attribute.
$elems = $xpath->query('.//*[@data-candidate]', $articleContent);
for ($i = $elems->length - 1; $i >= 0; --$i) {
$elems->item($i)->removeAttribute('data-candidate');
foreach ($elems as $elem) {
$elem->removeAttribute('data-candidate');
}

// Clean out junk from the article content.
Expand Down Expand Up @@ -520,11 +519,12 @@ public function getLinkDensity(\DOMElement $e, bool $excludeExternal = false): f
$textLength = mb_strlen($this->getInnerText($e, true, true));
$linkLength = 0;

for ($dRe = $this->domainRegExp, $i = 0, $il = $links->length; $i < $il; ++$i) {
if ($excludeExternal && $dRe && !preg_match($dRe, $links->item($i)->getAttribute('href'))) {
$dRe = $this->domainRegExp;
foreach ($links as $link) {
if ($excludeExternal && $dRe && !preg_match($dRe, $link->getAttribute('href'))) {
continue;
}
$linkLength += mb_strlen($this->getInnerText($links->item($i)));
$linkLength += mb_strlen($this->getInnerText($link));
}

if ($textLength > 0 && $linkLength > 0) {
Expand Down Expand Up @@ -640,15 +640,15 @@ public function cleanConditionally(\DOMElement $e, string $tag): void
$embedCount = 0;
$embeds = $node->getElementsByTagName('embed');

for ($ei = 0, $il = $embeds->length; $ei < $il; ++$ei) {
if (preg_match($this->regexps['media'], $embeds->item($ei)->getAttribute('src'))) {
foreach ($embeds as $embed) {
if (preg_match($this->regexps['media'], $embed->getAttribute('src'))) {
++$embedCount;
}
}

$embeds = $node->getElementsByTagName('iframe');
for ($ei = 0, $il = $embeds->length; $ei < $il; ++$ei) {
if (preg_match($this->regexps['media'], $embeds->item($ei)->getAttribute('src'))) {
foreach ($embeds as $embed) {
if (preg_match($this->regexps['media'], $embed->getAttribute('src'))) {
++$embedCount;
}
}
Expand Down Expand Up @@ -1018,15 +1018,15 @@ protected function grabArticle(?\DOMElement $page = null)
* A score is determined by things like number of commas, class names, etc.
* Maybe eventually link density.
*/
for ($pt = 0, $scored = \count($nodesToScore); $pt < $scored; ++$pt) {
$ancestors = $this->getAncestors($nodesToScore[$pt], 5);
foreach ($nodesToScore as $nodeToScore) {
$ancestors = $this->getAncestors($nodeToScore, 5);

// No parent node? Move on...
if (0 === \count($ancestors)) {
continue;
}

$innerText = $this->getInnerText($nodesToScore[$pt]);
$innerText = $this->getInnerText($nodeToScore);

// If this paragraph is less than MIN_PARAGRAPH_LENGTH (default:20) characters, don't even count it.
if (mb_strlen($innerText) < self::MIN_PARAGRAPH_LENGTH) {
Expand Down

0 comments on commit 0238436

Please sign in to comment.