[ Index ]

PHP Cross Reference of Joomla 4.2.2 documentation

title

Body

[close]

/libraries/vendor/joomla/string/src/phputf8/utils/ -> patterns.php (source)

   1  <?php
   2  /**
   3  * PCRE Regular expressions for UTF-8. Note this file is not actually used by
   4  * the rest of the library but these regular expressions can be useful to have
   5  * available.
   6  * @see http://www.w3.org/International/questions/qa-forms-utf-8
   7  * @package utf8
   8  */
   9  
  10  //--------------------------------------------------------------------
  11  /**
  12  * PCRE Pattern to check a UTF-8 string is valid
  13  * Comes from W3 FAQ: Multilingual Forms
  14  * Note: modified to include full ASCII range including control chars
  15  * @see http://www.w3.org/International/questions/qa-forms-utf-8
  16  * @package utf8
  17  */
  18  $UTF8_VALID = '^('.
  19      '[\x00-\x7F]'.                          # ASCII (including control chars)
  20      '|[\xC2-\xDF][\x80-\xBF]'.              # non-overlong 2-byte
  21      '|\xE0[\xA0-\xBF][\x80-\xBF]'.          # excluding overlongs
  22      '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'.   # straight 3-byte
  23      '|\xED[\x80-\x9F][\x80-\xBF]'.          # excluding surrogates
  24      '|\xF0[\x90-\xBF][\x80-\xBF]{2}'.       # planes 1-3
  25      '|[\xF1-\xF3][\x80-\xBF]{3}'.           # planes 4-15
  26      '|\xF4[\x80-\x8F][\x80-\xBF]{2}'.       # plane 16
  27      ')*$';
  28  
  29  //--------------------------------------------------------------------
  30  /**
  31  * PCRE Pattern to match single UTF-8 characters
  32  * Comes from W3 FAQ: Multilingual Forms
  33  * Note: modified to include full ASCII range including control chars
  34  * @see http://www.w3.org/International/questions/qa-forms-utf-8
  35  * @package utf8
  36  */
  37  $UTF8_MATCH =
  38      '([\x00-\x7F])'.                          # ASCII (including control chars)
  39      '|([\xC2-\xDF][\x80-\xBF])'.              # non-overlong 2-byte
  40      '|(\xE0[\xA0-\xBF][\x80-\xBF])'.          # excluding overlongs
  41      '|([\xE1-\xEC\xEE\xEF][\x80-\xBF]{2})'.   # straight 3-byte
  42      '|(\xED[\x80-\x9F][\x80-\xBF])'.          # excluding surrogates
  43      '|(\xF0[\x90-\xBF][\x80-\xBF]{2})'.       # planes 1-3
  44      '|([\xF1-\xF3][\x80-\xBF]{3})'.           # planes 4-15
  45      '|(\xF4[\x80-\x8F][\x80-\xBF]{2})';       # plane 16
  46  
  47  //--------------------------------------------------------------------
  48  /**
  49  * PCRE Pattern to locate bad bytes in a UTF-8 string
  50  * Comes from W3 FAQ: Multilingual Forms
  51  * Note: modified to include full ASCII range including control chars
  52  * @see http://www.w3.org/International/questions/qa-forms-utf-8
  53  * @package utf8
  54  */
  55  $UTF8_BAD =
  56      '([\x00-\x7F]'.                          # ASCII (including control chars)
  57      '|[\xC2-\xDF][\x80-\xBF]'.               # non-overlong 2-byte
  58      '|\xE0[\xA0-\xBF][\x80-\xBF]'.           # excluding overlongs
  59      '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'.    # straight 3-byte
  60      '|\xED[\x80-\x9F][\x80-\xBF]'.           # excluding surrogates
  61      '|\xF0[\x90-\xBF][\x80-\xBF]{2}'.        # planes 1-3
  62      '|[\xF1-\xF3][\x80-\xBF]{3}'.            # planes 4-15
  63      '|\xF4[\x80-\x8F][\x80-\xBF]{2}'.        # plane 16
  64      '|(.{1}))';                              # invalid byte


Generated: Wed Sep 7 05:41:13 2022 Chilli.vc Blog - For Webmaster,Blog-Writer,System Admin and Domainer