WordPress Version: 6.3
/**
* Encodes the Unicode values to be used in the URI.
*
* @since 1.5.0
* @since 5.8.3 Added the `encode_ascii_characters` parameter.
*
* @param string $utf8_string String to encode.
* @param int $length Max length of the string
* @param bool $encode_ascii_characters Whether to encode ascii characters such as < " '
* @return string String with Unicode encoded for URI.
*/
function utf8_uri_encode($utf8_string, $length = 0, $encode_ascii_characters = false)
{
$unicode = '';
$values = array();
$num_octets = 1;
$unicode_length = 0;
mbstring_binary_safe_encoding();
$string_length = strlen($utf8_string);
reset_mbstring_encoding();
for ($i = 0; $i < $string_length; $i++) {
$value = ord($utf8_string[$i]);
if ($value < 128) {
$char = chr($value);
$encoded_char = $encode_ascii_characters ? rawurlencode($char) : $char;
$encoded_char_length = strlen($encoded_char);
if ($length && $unicode_length + $encoded_char_length > $length) {
break;
}
$unicode .= $encoded_char;
$unicode_length += $encoded_char_length;
} else {
if (count($values) === 0) {
if ($value < 224) {
$num_octets = 2;
} elseif ($value < 240) {
$num_octets = 3;
} else {
$num_octets = 4;
}
}
$values[] = $value;
if ($length && $unicode_length + $num_octets * 3 > $length) {
break;
}
if (count($values) === $num_octets) {
for ($j = 0; $j < $num_octets; $j++) {
$unicode .= '%' . dechex($values[$j]);
}
$unicode_length += $num_octets * 3;
$values = array();
$num_octets = 1;
}
}
}
return $unicode;
}