You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
71 lines
2.2 KiB
71 lines
2.2 KiB
<?php |
|
|
|
/** |
|
* Parses a URI into the components and fragment identifier as specified |
|
* by RFC 3986. |
|
*/ |
|
class HTMLPurifier_URIParser |
|
{ |
|
|
|
/** |
|
* Instance of HTMLPurifier_PercentEncoder to do normalization with. |
|
*/ |
|
protected $percentEncoder; |
|
|
|
public function __construct() |
|
{ |
|
$this->percentEncoder = new HTMLPurifier_PercentEncoder(); |
|
} |
|
|
|
/** |
|
* Parses a URI. |
|
* @param $uri string URI to parse |
|
* @return HTMLPurifier_URI representation of URI. This representation has |
|
* not been validated yet and may not conform to RFC. |
|
*/ |
|
public function parse($uri) |
|
{ |
|
$uri = $this->percentEncoder->normalize($uri); |
|
|
|
// Regexp is as per Appendix B. |
|
// Note that ["<>] are an addition to the RFC's recommended |
|
// characters, because they represent external delimeters. |
|
$r_URI = '!'. |
|
'(([a-zA-Z0-9\.\+\-]+):)?'. // 2. Scheme |
|
'(//([^/?#"<>]*))?'. // 4. Authority |
|
'([^?#"<>]*)'. // 5. Path |
|
'(\?([^#"<>]*))?'. // 7. Query |
|
'(#([^"<>]*))?'. // 8. Fragment |
|
'!'; |
|
|
|
$matches = array(); |
|
$result = preg_match($r_URI, $uri, $matches); |
|
|
|
if (!$result) return false; // *really* invalid URI |
|
|
|
// seperate out parts |
|
$scheme = !empty($matches[1]) ? $matches[2] : null; |
|
$authority = !empty($matches[3]) ? $matches[4] : null; |
|
$path = $matches[5]; // always present, can be empty |
|
$query = !empty($matches[6]) ? $matches[7] : null; |
|
$fragment = !empty($matches[8]) ? $matches[9] : null; |
|
|
|
// further parse authority |
|
if ($authority !== null) { |
|
$r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/"; |
|
$matches = array(); |
|
preg_match($r_authority, $authority, $matches); |
|
$userinfo = !empty($matches[1]) ? $matches[2] : null; |
|
$host = !empty($matches[3]) ? $matches[3] : ''; |
|
$port = !empty($matches[4]) ? (int) $matches[5] : null; |
|
} else { |
|
$port = $host = $userinfo = null; |
|
} |
|
|
|
return new HTMLPurifier_URI( |
|
$scheme, $userinfo, $host, $port, $path, $query, $fragment); |
|
} |
|
|
|
} |
|
|
|
// vim: et sw=4 sts=4
|
|
|