summaryrefslogtreecommitdiff
blob: 93efa99de4a77415eb6d0c5ac045b7865759e6f1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
<?php

namespace Flow\Parsoid;

use DOMDocument;
use DOMXPath;
use Flow\Exception\FlowException;
use Flow\Model\AbstractRevision;
use Flow\Model\PostRevision;
use Title;

class ContentFixer {
	/**
	 * @var Fixer[] Array of Fixer objects
	 */
	protected $contentFixers = array();

	/**
	 * Accepts multiple content fixers.
	 *
	 * @param Fixer $contentFixer...
	 * @throws FlowException When provided arguments are not an instance of Fixer
	 */
	public function __construct( Fixer $contentFixer /* [, Fixer $contentFixer2 [, ...]] */ ) {
		$this->contentFixers = func_get_args();

		// validate data
		foreach ( $this->contentFixers as $contentFixer ) {
			if ( !$contentFixer instanceof Fixer ) {
				throw new FlowException( 'Invalid content fixer', 'default' );
			}
		}
	}

	/**
	 * @param AbstractRevision $revision
	 * @return string
	 */
	public function getContent( AbstractRevision $revision ) {
		return $this->apply(
			$revision->getContent( 'html' ),
			$revision->getCollection()->getTitle()
		);
	}

	/**
	 * Applies all contained content fixers to the provided HTML content.
	 * The resulting content is then suitible for display to the end user.
	 *
	 * @param string $content Html
	 * @param Title $title
	 * @return string Html
	 */
	public function apply( $content, Title $title ) {
		$dom = self::createDOM( $content );
		$xpath = new DOMXPath( $dom );
		foreach ( $this->contentFixers as $i => $contentFixer ) {
			$found = $xpath->query( $contentFixer->getXPath() );
			if ( !$found ) {
				wfDebugLog( 'Flow', __METHOD__ . ': Invalid XPath from ' . get_class( $contentFixer ) . ' of: ' . $contentFixer->getXPath() );
				unset( $this->contentFixers[$i] );
				continue;
			}

			foreach ( $found as $node ) {
				$contentFixer->apply( $node, $title );
			}
		}

		return Utils::getInnerHtml( $dom->getElementsByTagName( 'body' )->item( 0 ) );
	}

	/**
	 * creates a DOM with extra considerations for BC with
	 * previous parsoid content, and for encoding issues.
	 *
	 * @param string $content HTML from parsoid
	 * @return DOMDocument
	 */
	static public function createDOM( $content ) {
		/*
		 * Workaround because DOMDocument can't guess charset.
		 * Content should be utf-8. Alternative "workarounds" would be to
		 * provide the charset in $response, as either:
		 * * <?xml encoding="utf-8" ?>
		 * * <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
		 * * mb_convert_encoding( $content, 'HTML-ENTITIES', 'UTF-8' );
		 *
		 * The body tag is required otherwise <meta> tags at the top are
		 * magic'd into <head> rather than kept with the content.
		 */
		if ( substr( $content, 0, 5 ) !== '<body' ) {
			// BC: content currently comes from parsoid and is stored
			// wrapped in <body> tags, but prior to I0d9659f we were
			// storing only the contents and not the body tag itself.
			$content = "<body>$content</body>";
		}
		return Utils::createDOM( '<?xml encoding="utf-8"?>' . $content );
	}
}