PHPXRef 0.7.1 : Joomla 4.2.2 documentation : /libraries/vendor/joomla/string/src/phputf8/utils/position.php source

[Summary view] [Print] [Text view]
   1  <?php
   2  /**
   3  * Locate a byte index given a UTF-8 character index
   4  * @package utf8
   5  */
   6  
   7  //--------------------------------------------------------------------
   8  /**
   9  * Given a string and a character index in the string, in
  10  * terms of the UTF-8 character position, returns the byte
  11  * index of that character. Can be useful when you want to
  12  * PHP's native string functions but we warned, locating
  13  * the byte can be expensive
  14  * Takes variable number of parameters - first must be
  15  * the search string then 1 to n UTF-8 character positions
  16  * to obtain byte indexes for - it is more efficient to search
  17  * the string for multiple characters at once, than make
  18  * repeated calls to this function
  19  *
  20  * @author Chris Smith<[email protected]>
  21  * @param string string to locate index in
  22  * @param int (n times)
  23  * @return mixed - int if only one input int, array if more
  24  * @return boolean TRUE if it's all ASCII
  25  * @package utf8
  26  */
  27  function utf8_byte_position() {
  28  
  29      $args = func_get_args();
  30      $str =& array_shift($args);
  31      if (!is_string($str)) return false;
  32  
  33      $result = array();
  34  
  35      // trivial byte index, character offset pair
  36      $prev = array(0,0);
  37  
  38      // use a short piece of str to estimate bytes per character
  39      // $i (& $j) -> byte indexes into $str
  40      $i = utf8_locate_next_chr($str, 300);
  41  
  42      // $c -> character offset into $str
  43      $c = strlen(utf8_decode(substr($str,0,$i)));
  44  
  45      // deal with arguments from lowest to highest
  46      sort($args);
  47  
  48      foreach ($args as $offset) {
  49          // sanity checks FIXME
  50  
  51          // 0 is an easy check
  52          if ($offset == 0) { $result[] = 0; continue; }
  53  
  54          // ensure no endless looping
  55          $safety_valve = 50;
  56  
  57          do {
  58  
  59              if ( ($c - $prev[1]) == 0 ) {
  60                  // Hack: gone past end of string
  61                  $error = 0;
  62                  $i = strlen($str);
  63                  break;
  64              }
  65  
  66              $j = $i + (int)(($offset-$c) * ($i - $prev[0]) / ($c - $prev[1]));
  67  
  68              // correct to utf8 character boundary
  69              $j = utf8_locate_next_chr($str, $j);
  70  
  71              // save the index, offset for use next iteration
  72              $prev = array($i,$c);
  73  
  74              if ($j > $i) {
  75                  // determine new character offset
  76                  $c += strlen(utf8_decode(substr($str,$i,$j-$i)));
  77              } else {
  78                  // ditto
  79                  $c -= strlen(utf8_decode(substr($str,$j,$i-$j)));
  80              }
  81  
  82              $error = abs($c-$offset);
  83  
  84              // ready for next time around
  85              $i = $j;
  86  
  87          // from 7 it is faster to iterate over the string
  88          } while ( ($error > 7) && --$safety_valve) ;
  89  
  90          if ($error && $error <= 7) {
  91  
  92              if ($c < $offset) {
  93                  // move up
  94                  while ($error--) { $i = utf8_locate_next_chr($str,++$i); }
  95              } else {
  96                  // move down
  97                  while ($error--) { $i = utf8_locate_current_chr($str,--$i); }
  98              }
  99  
 100              // ready for next arg
 101              $c = $offset;
 102          }
 103          $result[] = $i;
 104      }
 105  
 106      if ( count($result) == 1 ) {
 107          return $result[0];
 108      }
 109  
 110      return $result;
 111  }
 112  
 113  //--------------------------------------------------------------------
 114  /**
 115  * Given a string and any byte index, returns the byte index
 116  * of the start of the current UTF-8 character, relative to supplied
 117  * position. If the current character begins at the same place as the
 118  * supplied byte index, that byte index will be returned. Otherwise
 119  * this function will step backwards, looking for the index where
 120  * current UTF-8 character begins
 121  * @author Chris Smith<[email protected]>
 122  * @param string
 123  * @param int byte index in the string
 124  * @return int byte index of start of next UTF-8 character
 125  * @package utf8
 126  */
 127  function utf8_locate_current_chr( &$str, $idx ) {
 128  
 129      if ($idx <= 0) return 0;
 130  
 131      $limit = strlen($str);
 132      if ($idx >= $limit) return $limit;
 133  
 134      // Binary value for any byte after the first in a multi-byte UTF-8 character
 135      // will be like 10xxxxxx so & 0xC0 can be used to detect this kind
 136      // of byte - assuming well formed UTF-8
 137      while ($idx && ((ord($str[$idx]) & 0xC0) == 0x80)) $idx--;
 138  
 139      return $idx;
 140  }
 141  
 142  //--------------------------------------------------------------------
 143  /**
 144  * Given a string and any byte index, returns the byte index
 145  * of the start of the next UTF-8 character, relative to supplied
 146  * position. If the next character begins at the same place as the
 147  * supplied byte index, that byte index will be returned.
 148  * @author Chris Smith<[email protected]>
 149  * @param string
 150  * @param int byte index in the string
 151  * @return int byte index of start of next UTF-8 character
 152  * @package utf8
 153  */
 154  function utf8_locate_next_chr( &$str, $idx ) {
 155  
 156      if ($idx <= 0) return 0;
 157  
 158      $limit = strlen($str);
 159      if ($idx >= $limit) return $limit;
 160  
 161      // Binary value for any byte after the first in a multi-byte UTF-8 character
 162      // will be like 10xxxxxx so & 0xC0 can be used to detect this kind
 163      // of byte - assuming well formed UTF-8
 164      while (($idx < $limit) && ((ord($str[$idx]) & 0xC0) == 0x80)) $idx++;
 165  
 166      return $idx;
 167  }
 168
PHP Cross Reference of Joomla 4.2.2 documentation

/libraries/vendor/joomla/string/src/phputf8/utils/ -> position.php (source)