replaced html5 parser

This commit is contained in:
Alex Grintsvayg 2019-10-07 17:02:36 -04:00
parent 6546f91854
commit 66b021ba20
No known key found for this signature in database
GPG key ID: AEB3F089F86A22B5
3 changed files with 77 additions and 105 deletions

View file

@ -13,6 +13,6 @@
"erusev/parsedown-extra": "^0.7.1",
"pelago/emogrifier": "^2.0",
"mustangostang/spyc": "^0.6.2",
"paquettg/php-html-parser": "^2.1"
"masterminds/html5": "^2.7"
}
}

165
composer.lock generated
View file

@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
"content-hash": "f15420a8ce3f7350d05c6ac92f3e54b0",
"content-hash": "f19fda681ea968799f6370f24cf03aa9",
"packages": [
{
"name": "erusev/parsedown",
@ -148,6 +148,73 @@
],
"time": "2018-05-24T02:18:53+00:00"
},
{
"name": "masterminds/html5",
"version": "2.7.0",
"source": {
"type": "git",
"url": "https://github.com/Masterminds/html5-php.git",
"reference": "104443ad663d15981225f99532ba73c2f1d6b6f2"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/Masterminds/html5-php/zipball/104443ad663d15981225f99532ba73c2f1d6b6f2",
"reference": "104443ad663d15981225f99532ba73c2f1d6b6f2",
"shasum": ""
},
"require": {
"ext-ctype": "*",
"ext-dom": "*",
"ext-libxml": "*",
"php": ">=5.3.0"
},
"require-dev": {
"phpunit/phpunit": "^4.8.35",
"sami/sami": "~2.0",
"satooshi/php-coveralls": "1.0.*"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "2.7-dev"
}
},
"autoload": {
"psr-4": {
"Masterminds\\": "src"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Matt Butcher",
"email": "technosophos@gmail.com"
},
{
"name": "Matt Farina",
"email": "matt@mattfarina.com"
},
{
"name": "Asmir Mustafic",
"email": "goetas@gmail.com"
}
],
"description": "An HTML5 parser and serializer.",
"homepage": "http://masterminds.github.io/html5-php",
"keywords": [
"HTML5",
"dom",
"html",
"parser",
"querypath",
"serializer",
"xml"
],
"time": "2019-07-25T07:03:26+00:00"
},
{
"name": "mustangostang/spyc",
"version": "0.6.2",
@ -198,102 +265,6 @@
],
"time": "2017-02-24T16:06:33+00:00"
},
{
"name": "paquettg/php-html-parser",
"version": "2.1.0",
"source": {
"type": "git",
"url": "https://github.com/paquettg/php-html-parser.git",
"reference": "d1000936350fed2cb6c54058890d2d19c5ccba4f"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/paquettg/php-html-parser/zipball/d1000936350fed2cb6c54058890d2d19c5ccba4f",
"reference": "d1000936350fed2cb6c54058890d2d19c5ccba4f",
"shasum": ""
},
"require": {
"ext-mbstring": "*",
"paquettg/string-encode": "~1.0.0",
"php": ">=7.1"
},
"require-dev": {
"mockery/mockery": "^1.2",
"php-coveralls/php-coveralls": "^2.1",
"phpunit/phpunit": "^7.5.1"
},
"type": "library",
"autoload": {
"psr-4": {
"PHPHtmlParser\\": "src/PHPHtmlParser"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Gilles Paquette",
"email": "paquettg@gmail.com",
"homepage": "http://gillespaquette.ca"
}
],
"description": "An HTML DOM parser. It allows you to manipulate HTML. Find tags on an HTML page with selectors just like jQuery.",
"homepage": "https://github.com/paquettg/php-html-parser",
"keywords": [
"dom",
"html",
"parser"
],
"time": "2019-08-18T18:27:45+00:00"
},
{
"name": "paquettg/string-encode",
"version": "1.0.1",
"source": {
"type": "git",
"url": "https://github.com/paquettg/string-encoder.git",
"reference": "a8708e9fac9d5ddfc8fc2aac6004e2cd05d80fee"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/paquettg/string-encoder/zipball/a8708e9fac9d5ddfc8fc2aac6004e2cd05d80fee",
"reference": "a8708e9fac9d5ddfc8fc2aac6004e2cd05d80fee",
"shasum": ""
},
"require": {
"php": ">=7.1"
},
"require-dev": {
"phpunit/phpunit": "^7.5.1"
},
"type": "library",
"autoload": {
"psr-0": {
"stringEncode": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Gilles Paquette",
"email": "paquettg@gmail.com",
"homepage": "http://gillespaquette.ca"
}
],
"description": "Facilitating the process of altering string encoding in PHP.",
"homepage": "https://github.com/paquettg/string-encoder",
"keywords": [
"charset",
"encoding",
"string"
],
"time": "2018-12-21T02:25:09+00:00"
},
{
"name": "pelago/emogrifier",
"version": "v2.2.0",

View file

@ -179,18 +179,19 @@ class View
public static function safeExternalLinks(string $html, string $domain): string
{
//temporarily disable this as it is breaking JS
return $html;
$parser = new Masterminds\HTML5();
$dom = $parser->loadHTML($html);
$links = $dom->getElementsByTagName('body') ?
$dom->getElementsByTagName('body')[0]->getElementsByTagName('a') :
$dom->getElementsByTagName('a');
$dom = new PHPHtmlParser\Dom();
$dom->load($html, ['cleanupInput' => false, 'removeDoubleSpace' => false, 'removeSmartyScripts' => false]);
foreach ($dom->find('body a') as $link) {
foreach ($links as $link) {
if ($link->getAttribute('href') && static::isLinkExternal($link->getAttribute('href'), $domain)) {
$link->setAttribute('rel', "noopener noreferrer");
}
}
return $dom->root->outerHtml();
return $parser->saveHTML($dom);
}
public static function isLinkExternal(string $url, string $domain): bool