unicode.h 928 B

1234567891011121314151617181920212223242526272829
  1. #pragma once
  2. #include <cstdint>
  3. #include <string>
  4. #include <vector>
  5. #define CODEPOINT_TYPE_UNIDENTIFIED 0
  6. #define CODEPOINT_TYPE_DIGIT 1
  7. #define CODEPOINT_TYPE_LETTER 2
  8. #define CODEPOINT_TYPE_WHITESPACE 3
  9. #define CODEPOINT_TYPE_ACCENT_MARK 4
  10. #define CODEPOINT_TYPE_PUNCTUATION 5
  11. #define CODEPOINT_TYPE_SYMBOL 6
  12. #define CODEPOINT_TYPE_CONTROL 7
  13. std::string unicode_cpt_to_utf8(uint32_t cp);
  14. std::vector<uint32_t> unicode_cpts_from_utf8(const std::string & utf8);
  15. std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & cpts);
  16. int unicode_cpt_type(uint32_t cp);
  17. int unicode_cpt_type(const std::string & utf8);
  18. std::string unicode_byte_to_utf8(uint8_t byte);
  19. uint8_t unicode_utf8_to_byte(const std::string & utf8);
  20. char32_t unicode_tolower(char32_t cp);
  21. std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs);