[ Index ] |
PHP Cross Reference of Joomla 4.2.2 documentation |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Locate a byte index given a UTF-8 character index 4 * @package utf8 5 */ 6 7 //-------------------------------------------------------------------- 8 /** 9 * Given a string and a character index in the string, in 10 * terms of the UTF-8 character position, returns the byte 11 * index of that character. Can be useful when you want to 12 * PHP's native string functions but we warned, locating 13 * the byte can be expensive 14 * Takes variable number of parameters - first must be 15 * the search string then 1 to n UTF-8 character positions 16 * to obtain byte indexes for - it is more efficient to search 17 * the string for multiple characters at once, than make 18 * repeated calls to this function 19 * 20 * @author Chris Smith<[email protected]> 21 * @param string string to locate index in 22 * @param int (n times) 23 * @return mixed - int if only one input int, array if more 24 * @return boolean TRUE if it's all ASCII 25 * @package utf8 26 */ 27 function utf8_byte_position() { 28 29 $args = func_get_args(); 30 $str =& array_shift($args); 31 if (!is_string($str)) return false; 32 33 $result = array(); 34 35 // trivial byte index, character offset pair 36 $prev = array(0,0); 37 38 // use a short piece of str to estimate bytes per character 39 // $i (& $j) -> byte indexes into $str 40 $i = utf8_locate_next_chr($str, 300); 41 42 // $c -> character offset into $str 43 $c = strlen(utf8_decode(substr($str,0,$i))); 44 45 // deal with arguments from lowest to highest 46 sort($args); 47 48 foreach ($args as $offset) { 49 // sanity checks FIXME 50 51 // 0 is an easy check 52 if ($offset == 0) { $result[] = 0; continue; } 53 54 // ensure no endless looping 55 $safety_valve = 50; 56 57 do { 58 59 if ( ($c - $prev[1]) == 0 ) { 60 // Hack: gone past end of string 61 $error = 0; 62 $i = strlen($str); 63 break; 64 } 65 66 $j = $i + (int)(($offset-$c) * ($i - $prev[0]) / ($c - $prev[1])); 67 68 // correct to utf8 character boundary 69 $j = utf8_locate_next_chr($str, $j); 70 71 // save the index, offset for use next iteration 72 $prev = array($i,$c); 73 74 if ($j > $i) { 75 // determine new character offset 76 $c += strlen(utf8_decode(substr($str,$i,$j-$i))); 77 } else { 78 // ditto 79 $c -= strlen(utf8_decode(substr($str,$j,$i-$j))); 80 } 81 82 $error = abs($c-$offset); 83 84 // ready for next time around 85 $i = $j; 86 87 // from 7 it is faster to iterate over the string 88 } while ( ($error > 7) && --$safety_valve) ; 89 90 if ($error && $error <= 7) { 91 92 if ($c < $offset) { 93 // move up 94 while ($error--) { $i = utf8_locate_next_chr($str,++$i); } 95 } else { 96 // move down 97 while ($error--) { $i = utf8_locate_current_chr($str,--$i); } 98 } 99 100 // ready for next arg 101 $c = $offset; 102 } 103 $result[] = $i; 104 } 105 106 if ( count($result) == 1 ) { 107 return $result[0]; 108 } 109 110 return $result; 111 } 112 113 //-------------------------------------------------------------------- 114 /** 115 * Given a string and any byte index, returns the byte index 116 * of the start of the current UTF-8 character, relative to supplied 117 * position. If the current character begins at the same place as the 118 * supplied byte index, that byte index will be returned. Otherwise 119 * this function will step backwards, looking for the index where 120 * current UTF-8 character begins 121 * @author Chris Smith<[email protected]> 122 * @param string 123 * @param int byte index in the string 124 * @return int byte index of start of next UTF-8 character 125 * @package utf8 126 */ 127 function utf8_locate_current_chr( &$str, $idx ) { 128 129 if ($idx <= 0) return 0; 130 131 $limit = strlen($str); 132 if ($idx >= $limit) return $limit; 133 134 // Binary value for any byte after the first in a multi-byte UTF-8 character 135 // will be like 10xxxxxx so & 0xC0 can be used to detect this kind 136 // of byte - assuming well formed UTF-8 137 while ($idx && ((ord($str[$idx]) & 0xC0) == 0x80)) $idx--; 138 139 return $idx; 140 } 141 142 //-------------------------------------------------------------------- 143 /** 144 * Given a string and any byte index, returns the byte index 145 * of the start of the next UTF-8 character, relative to supplied 146 * position. If the next character begins at the same place as the 147 * supplied byte index, that byte index will be returned. 148 * @author Chris Smith<[email protected]> 149 * @param string 150 * @param int byte index in the string 151 * @return int byte index of start of next UTF-8 character 152 * @package utf8 153 */ 154 function utf8_locate_next_chr( &$str, $idx ) { 155 156 if ($idx <= 0) return 0; 157 158 $limit = strlen($str); 159 if ($idx >= $limit) return $limit; 160 161 // Binary value for any byte after the first in a multi-byte UTF-8 character 162 // will be like 10xxxxxx so & 0xC0 can be used to detect this kind 163 // of byte - assuming well formed UTF-8 164 while (($idx < $limit) && ((ord($str[$idx]) & 0xC0) == 0x80)) $idx++; 165 166 return $idx; 167 } 168
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Wed Sep 7 05:41:13 2022 | Chilli.vc Blog - For Webmaster,Blog-Writer,System Admin and Domainer |