|$)) # - Normative HTML comments.
|
[^a-zA-Z][^>]*> # - Closing tags with invalid tag names.
|
]*> # - Invalid markup declaration nodes. Not all invalid nodes
# are matched so as to avoid breaking legacy behaviors.
)
|
(<[^>]*(>|$)|>) # Tag-like spans of text.
~x
REGEX;
return preg_replace_callback( $token_pattern, '_wp_kses_split_callback', $content );
}
/**
* Returns an array of HTML attribute names whose value contains a URL.
*
* This function returns a list of all HTML attributes that must contain
* a URL according to the HTML specification.
*
* This list includes URI attributes both allowed and disallowed by KSES.
*
* @link https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes
*
* @since 5.0.1
*
* @return string[] HTML attribute names whose value contains a URL.
*/
function wp_kses_uri_attributes() {
$uri_attributes = array(
'action',
'archive',
'background',
'cite',
'classid',
'codebase',
'data',
'formaction',
'href',
'icon',
'longdesc',
'manifest',
'poster',
'profile',
'src',
'usemap',
'xmlns',
);
/**
* Filters the list of attributes that are required to contain a URL.
*
* Use this filter to add any `data-` attributes that are required to be
* validated as a URL.
*
* @since 5.0.1
*
* @param string[] $uri_attributes HTML attribute names whose value contains a URL.
*/
$uri_attributes = apply_filters( 'wp_kses_uri_attributes', $uri_attributes );
return $uri_attributes;
}
/**
* Callback for `wp_kses_split()`.
*
* @since 3.1.0
* @access private
* @ignore
*
* @global array[]|string $pass_allowed_html An array of allowed HTML elements and attributes,
* or a context name such as 'post'.
* @global string[] $pass_allowed_protocols Array of allowed URL protocols.
*
* @param array $matches preg_replace regexp matches
* @return string
*/
function _wp_kses_split_callback( $matches ) {
global $pass_allowed_html, $pass_allowed_protocols;
return wp_kses_split2( $matches[0], $pass_allowed_html, $pass_allowed_protocols );
}
/**
* Callback for `wp_kses_split()` for fixing malformed HTML tags.
*
* This function does a lot of work. It rejects some very malformed things like
* `<:::>`. It returns an empty string, if the element isn't allowed (look ma, no
* `strip_tags()`!). Otherwise it splits the tag into an element and an attribute
* list.
*
* After the tag is split into an element and an attribute list, it is run
* through another filter which will remove illegal attributes and once that is
* completed, will be returned.
*
* @access private
* @ignore
* @since 1.0.0
* @since 6.6.0 Recognize additional forms of invalid HTML which convert into comments.
*
* @param string $content Content to filter.
* @param array[]|string $allowed_html An array of allowed HTML elements and attributes,
* or a context name such as 'post'. See wp_kses_allowed_html()
* for the list of accepted context names.
* @param string[] $allowed_protocols Array of allowed URL protocols.
*
* @return string Fixed HTML element
*/
function wp_kses_split2( $content, $allowed_html, $allowed_protocols ) {
$content = wp_kses_stripslashes( $content );
/*
* The regex pattern used to split HTML into chunks attempts
* to split on HTML token boundaries. This function should
* thus receive chunks that _either_ start with meaningful
* syntax tokens, like a tag `