From 66b021ba2073b4954ebbbea9b9c213ee2a46b4ef Mon Sep 17 00:00:00 2001 From: Alex Grintsvayg Date: Mon, 7 Oct 2019 17:02:36 -0400 Subject: [PATCH] replaced html5 parser --- composer.json | 2 +- composer.lock | 165 ++++++++++++++++++-------------------------- view/View.class.php | 15 ++-- 3 files changed, 77 insertions(+), 105 deletions(-) diff --git a/composer.json b/composer.json index 7261341b..249f1f23 100644 --- a/composer.json +++ b/composer.json @@ -13,6 +13,6 @@ "erusev/parsedown-extra": "^0.7.1", "pelago/emogrifier": "^2.0", "mustangostang/spyc": "^0.6.2", - "paquettg/php-html-parser": "^2.1" + "masterminds/html5": "^2.7" } } diff --git a/composer.lock b/composer.lock index bd7cd8d0..06517cbc 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "f15420a8ce3f7350d05c6ac92f3e54b0", + "content-hash": "f19fda681ea968799f6370f24cf03aa9", "packages": [ { "name": "erusev/parsedown", @@ -148,6 +148,73 @@ ], "time": "2018-05-24T02:18:53+00:00" }, + { + "name": "masterminds/html5", + "version": "2.7.0", + "source": { + "type": "git", + "url": "https://github.com/Masterminds/html5-php.git", + "reference": "104443ad663d15981225f99532ba73c2f1d6b6f2" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/Masterminds/html5-php/zipball/104443ad663d15981225f99532ba73c2f1d6b6f2", + "reference": "104443ad663d15981225f99532ba73c2f1d6b6f2", + "shasum": "" + }, + "require": { + "ext-ctype": "*", + "ext-dom": "*", + "ext-libxml": "*", + "php": ">=5.3.0" + }, + "require-dev": { + "phpunit/phpunit": "^4.8.35", + "sami/sami": "~2.0", + "satooshi/php-coveralls": "1.0.*" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "2.7-dev" + } + }, + "autoload": { + "psr-4": { + "Masterminds\\": "src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Matt Butcher", + "email": "technosophos@gmail.com" + }, + { + "name": "Matt Farina", + "email": "matt@mattfarina.com" + }, + { + "name": "Asmir Mustafic", + "email": "goetas@gmail.com" + } + ], + "description": "An HTML5 parser and serializer.", + "homepage": "http://masterminds.github.io/html5-php", + "keywords": [ + "HTML5", + "dom", + "html", + "parser", + "querypath", + "serializer", + "xml" + ], + "time": "2019-07-25T07:03:26+00:00" + }, { "name": "mustangostang/spyc", "version": "0.6.2", @@ -198,102 +265,6 @@ ], "time": "2017-02-24T16:06:33+00:00" }, - { - "name": "paquettg/php-html-parser", - "version": "2.1.0", - "source": { - "type": "git", - "url": "https://github.com/paquettg/php-html-parser.git", - "reference": "d1000936350fed2cb6c54058890d2d19c5ccba4f" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/paquettg/php-html-parser/zipball/d1000936350fed2cb6c54058890d2d19c5ccba4f", - "reference": "d1000936350fed2cb6c54058890d2d19c5ccba4f", - "shasum": "" - }, - "require": { - "ext-mbstring": "*", - "paquettg/string-encode": "~1.0.0", - "php": ">=7.1" - }, - "require-dev": { - "mockery/mockery": "^1.2", - "php-coveralls/php-coveralls": "^2.1", - "phpunit/phpunit": "^7.5.1" - }, - "type": "library", - "autoload": { - "psr-4": { - "PHPHtmlParser\\": "src/PHPHtmlParser" - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Gilles Paquette", - "email": "paquettg@gmail.com", - "homepage": "http://gillespaquette.ca" - } - ], - "description": "An HTML DOM parser. It allows you to manipulate HTML. Find tags on an HTML page with selectors just like jQuery.", - "homepage": "https://github.com/paquettg/php-html-parser", - "keywords": [ - "dom", - "html", - "parser" - ], - "time": "2019-08-18T18:27:45+00:00" - }, - { - "name": "paquettg/string-encode", - "version": "1.0.1", - "source": { - "type": "git", - "url": "https://github.com/paquettg/string-encoder.git", - "reference": "a8708e9fac9d5ddfc8fc2aac6004e2cd05d80fee" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/paquettg/string-encoder/zipball/a8708e9fac9d5ddfc8fc2aac6004e2cd05d80fee", - "reference": "a8708e9fac9d5ddfc8fc2aac6004e2cd05d80fee", - "shasum": "" - }, - "require": { - "php": ">=7.1" - }, - "require-dev": { - "phpunit/phpunit": "^7.5.1" - }, - "type": "library", - "autoload": { - "psr-0": { - "stringEncode": "src/" - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Gilles Paquette", - "email": "paquettg@gmail.com", - "homepage": "http://gillespaquette.ca" - } - ], - "description": "Facilitating the process of altering string encoding in PHP.", - "homepage": "https://github.com/paquettg/string-encoder", - "keywords": [ - "charset", - "encoding", - "string" - ], - "time": "2018-12-21T02:25:09+00:00" - }, { "name": "pelago/emogrifier", "version": "v2.2.0", diff --git a/view/View.class.php b/view/View.class.php index 1fa6f29f..44c5be04 100644 --- a/view/View.class.php +++ b/view/View.class.php @@ -179,18 +179,19 @@ class View public static function safeExternalLinks(string $html, string $domain): string { - //temporarily disable this as it is breaking JS - return $html; + $parser = new Masterminds\HTML5(); + $dom = $parser->loadHTML($html); + $links = $dom->getElementsByTagName('body') ? + $dom->getElementsByTagName('body')[0]->getElementsByTagName('a') : + $dom->getElementsByTagName('a'); - $dom = new PHPHtmlParser\Dom(); - $dom->load($html, ['cleanupInput' => false, 'removeDoubleSpace' => false, 'removeSmartyScripts' => false]); - - foreach ($dom->find('body a') as $link) { + foreach ($links as $link) { if ($link->getAttribute('href') && static::isLinkExternal($link->getAttribute('href'), $domain)) { $link->setAttribute('rel', "noopener noreferrer"); } } - return $dom->root->outerHtml(); + + return $parser->saveHTML($dom); } public static function isLinkExternal(string $url, string $domain): bool