- Added mulit-level depth parsing.
- Updating usage of HTML doc in page content to be efficient.
- Removed now redundant PageContentTest cases.
- Made some include system fixes based upon testing.
*/
public function render(bool $blankIncludes = false): string
{
- $content = $this->page->html ?? '';
+ $html = $this->page->html ?? '';
- if (!config('app.allow_content_scripts')) {
- $content = HtmlContentFilter::removeScripts($content);
+ if (empty($html)) {
+ return $html;
}
- if ($blankIncludes) {
- $content = $this->blankPageIncludes($content);
- } else {
- for ($includeDepth = 0; $includeDepth < 3; $includeDepth++) {
- $content = $this->parsePageIncludes($content);
+ $doc = new HtmlDocument($html);
+
+ $contentProvider = function (int $id) use ($blankIncludes) {
+ if ($blankIncludes) {
+ return '';
}
+ return Page::visible()->find($id)->html ?? '';
+ };
+
+ $parser = new PageIncludeParser($doc, $contentProvider);
+ $nodesAdded = 1;
+
+ for ($includeDepth = 0; $includeDepth < 3 && $nodesAdded !== 0; $includeDepth++) {
+ $nodesAdded = $parser->parse();
+ }
+
+ if (!config('app.allow_content_scripts')) {
+ HtmlContentFilter::removeScriptsFromDocument($doc);
}
- return $content;
+ return $doc->getBodyInnerHtml();
}
/**
return $tree->toArray();
}
-
- /**
- * Remove any page include tags within the given HTML.
- */
- protected function blankPageIncludes(string $html): string
- {
- return preg_replace("/{{@\s?([0-9].*?)}}/", '', $html);
- }
-
- /**
- * Parse any include tags "{{@<page_id>#section}}" to be part of the page.
- */
- protected function parsePageIncludes(string $html): string
- {
- $matches = [];
- preg_match_all("/{{@\s?([0-9].*?)}}/", $html, $matches);
-
- foreach ($matches[1] as $index => $includeId) {
- $fullMatch = $matches[0][$index];
- $splitInclude = explode('#', $includeId, 2);
-
- // Get page id from reference
- $pageId = intval($splitInclude[0]);
- if (is_nan($pageId)) {
- continue;
- }
-
- // Find page to use, and default replacement to empty string for non-matches.
- /** @var ?Page $matchedPage */
- $matchedPage = Page::visible()->find($pageId);
- $replacement = '';
-
- if ($matchedPage && count($splitInclude) === 1) {
- // If we only have page id, just insert all page html and continue.
- $replacement = $matchedPage->html;
- } elseif ($matchedPage && count($splitInclude) > 1) {
- // Otherwise, if our include tag defines a section, load that specific content
- $innerContent = $this->fetchSectionOfPage($matchedPage, $splitInclude[1]);
- $replacement = trim($innerContent);
- }
-
- $themeReplacement = Theme::dispatch(
- ThemeEvents::PAGE_INCLUDE_PARSE,
- $includeId,
- $replacement,
- clone $this->page,
- $matchedPage ? (clone $matchedPage) : null,
- );
-
- // Perform the content replacement
- $html = str_replace($fullMatch, $themeReplacement ?? $replacement, $html);
- }
-
- return $html;
- }
-
- /**
- * Fetch the content from a specific section of the given page.
- */
- protected function fetchSectionOfPage(Page $page, string $sectionId): string
- {
- $topLevelTags = ['table', 'ul', 'ol', 'pre'];
- $doc = new HtmlDocument($page->html);
-
- // Search included content for the id given and blank out if not exists.
- $matchingElem = $doc->getElementById($sectionId);
- if ($matchingElem === null) {
- return '';
- }
-
- // Otherwise replace the content with the found content
- // Checks if the top-level wrapper should be included by matching on tag types
- $isTopLevel = in_array(strtolower($matchingElem->nodeName), $topLevelTags);
- if ($isTopLevel) {
- return $doc->getNodeOuterHtml($matchingElem);
- }
-
- return $doc->getNodeInnerHtml($matchingElem);
- }
}
*/
protected array $contents = [];
- protected bool $isTopLevel;
+ protected bool $isTopLevel = false;
public function __construct(
string $html,
protected array $toCleanup = [];
public function __construct(
- protected string $pageHtml,
+ protected HtmlDocument $doc,
protected Closure $pageContentForId,
) {
}
/**
* Parse out the include tags.
+ * Returns the count of new content DOM nodes added to the document.
*/
- public function parse(): string
+ public function parse(): int
{
- $doc = new HtmlDocument($this->pageHtml);
-
- $tags = $this->locateAndIsolateIncludeTags($doc);
+ $nodesAdded = 0;
+ $tags = $this->locateAndIsolateIncludeTags();
foreach ($tags as $tag) {
$htmlContent = $this->pageContentForId->call($this, $tag->getPageId());
}
}
- $this->replaceNodeWithNodes($tag->domNode, $content->toDomNodes());
+ $replacementNodes = $content->toDomNodes();
+ $nodesAdded += count($replacementNodes);
+ $this->replaceNodeWithNodes($tag->domNode, $replacementNodes);
}
$this->cleanup();
- return $doc->getBodyInnerHtml();
+ return $nodesAdded;
}
/**
* own nodes in the DOM for future targeted manipulation.
* @return PageIncludeTag[]
*/
- protected function locateAndIsolateIncludeTags(HtmlDocument $doc): array
+ protected function locateAndIsolateIncludeTags(): array
{
- $includeHosts = $doc->queryXPath("//body//*[text()[contains(., '{{@')]]");
+ $includeHosts = $this->doc->queryXPath("//*[text()[contains(., '{{@')]]");
$includeTags = [];
/** @var DOMNode $node */
foreach ($replacements as $replacement) {
if ($replacement->ownerDocument !== $targetDoc) {
- $replacement = $targetDoc->adoptNode($replacement);
+ $replacement = $targetDoc->importNode($replacement, true);
}
$toReplace->parentNode->insertBefore($replacement, $toReplace);
return $parent;
}
- $parent = $parent->parentElement;
+ $parent = $parent->parentNode;
} while ($parent !== null);
return null;
$hash = md5($content);
return $this->cache->remember('custom-head-export:' . $hash, 86400, function () use ($content) {
- return HtmlContentFilter::removeScripts($content);
+ return HtmlContentFilter::removeScriptsFromHtmlString($content);
});
}
class HtmlContentFilter
{
/**
- * Remove all the script elements from the given HTML.
+ * Remove all the script elements from the given HTML document.
*/
- public static function removeScripts(string $html): string
+ public static function removeScriptsFromDocument(HtmlDocument $doc)
{
- if (empty($html)) {
- return $html;
- }
-
- $doc = new HtmlDocument($html);
-
// Remove standard script tags
$scriptElems = $doc->queryXPath('//script');
static::removeNodes($scriptElems);
// Remove 'on*' attributes
$onAttributes = $doc->queryXPath('//@*[starts-with(name(), \'on\')]');
static::removeAttributes($onAttributes);
+ }
+
+ /**
+ * Remove scripts from the given HTML string.
+ */
+ public static function removeScriptsFromHtmlString(string $html): string
+ {
+ if (empty($html)) {
+ return $html;
+ }
+
+ $doc = new HtmlDocument($html);
+ static::removeScriptsFromDocument($doc);
return $doc->getBodyInnerHtml();
}
class PageContentTest extends TestCase
{
- protected $base64Jpeg = '/9j/2wBDAAMCAgICAgMCAgIDAwMDBAYEBAQEBAgGBgUGCQgKCgkICQkKDA8MCgsOCwkJDRENDg8QEBEQCgwSExIQEw8QEBD/yQALCAABAAEBAREA/8wABgAQEAX/2gAIAQEAAD8A0s8g/9k=';
+ protected string $base64Jpeg = '/9j/2wBDAAMCAgICAgMCAgIDAwMDBAYEBAQEBAgGBgUGCQgKCgkICQkKDA8MCgsOCwkJDRENDg8QEBEQCgwSExIQEw8QEBD/yQALCAABAAEBAREA/8wABgAQEAX/2gAIAQEAAD8A0s8g/9k=';
public function test_page_includes()
{
$this->assertEquals('', $page->text);
}
- public function test_page_includes_do_not_break_tables()
- {
- $page = $this->entities->page();
- $secondPage = $this->entities->page();
-
- $content = '<table id="table"><tbody><tr><td>test</td></tr></tbody></table>';
- $secondPage->html = $content;
- $secondPage->save();
-
- $page->html = "{{@{$secondPage->id}#table}}";
- $page->save();
-
- $pageResp = $this->asEditor()->get($page->getUrl());
- $pageResp->assertSee($content, false);
- }
-
- public function test_page_includes_do_not_break_code()
- {
- $page = $this->entities->page();
- $secondPage = $this->entities->page();
-
- $content = '<pre id="bkmrk-code"><code>var cat = null;</code></pre>';
- $secondPage->html = $content;
- $secondPage->save();
-
- $page->html = "{{@{$secondPage->id}#bkmrk-code}}";
- $page->save();
-
- $pageResp = $this->asEditor()->get($page->getUrl());
- $pageResp->assertSee($content, false);
- }
-
public function test_page_includes_rendered_on_book_export()
{
$page = $this->entities->page();
namespace Tests\Unit;
use BookStack\Entities\Tools\PageIncludeParser;
+use BookStack\Util\HtmlDocument;
use Tests\TestCase;
class PageIncludeParserTest extends TestCase
);
}
- protected function runParserTest(string $html, array $contentById, string $expected)
+ protected function runParserTest(string $html, array $contentById, string $expected): void
{
- $parser = new PageIncludeParser($html, function (int $id) use ($contentById) {
+ $doc = new HtmlDocument($html);
+ $parser = new PageIncludeParser($doc, function (int $id) use ($contentById) {
return $contentById[strval($id)] ?? '';
});
- $result = $parser->parse();
- $this->assertEquals($expected, $result);
+ $parser->parse();
+ $this->assertEquals($expected, $doc->getBodyInnerHtml());
}
}