Stdlib.Uchar
Unicode characters.
The type for Unicode characters.
A value of this type represents a Unicode scalar value which is an integer in the ranges 0x0000
...0xD7FF
or 0xE000
...0x10FFFF
.
let min: t;
min
is U+0000.
let max: t;
max
is U+10FFFF.
let bom: t;
bom
is U+FEFF, the byte order mark (BOM) character.
let rep: t;
rep
is U+FFFD, the replacement character.
is_valid n
is true
if and only if n
is a Unicode scalar value (i.e. in the ranges 0x0000
...0xD7FF
or 0xE000
...0x10FFFF
).
let of_int: int => t;
of_int i
is i
as a Unicode character.
let to_int: t => int;
to_int u
is u
as an integer.
let is_char: t => bool;
is_char u
is true
if and only if u
is a latin1 OCaml character.
let of_char: char => t;
of_char c
is c
as a Unicode character.
let to_char: t => char;
to_char u
is u
as an OCaml latin1 character.
let hash: t => int;
hash u
associates a non-negative integer to u
.
The type for UTF decode results. Values of this type represent the result of a Unicode Transformation Format decoding attempt.
let utf_decode_is_valid: utf_decode => bool;
utf_decode_is_valid d
is true
if and only if d
holds a valid decode.
let utf_decode_uchar: utf_decode => t;
utf_decode_uchar d
is the Unicode character decoded by d
if utf_decode_is_valid d
is true
and Uchar.rep
otherwise.
let utf_decode_length: utf_decode => int;
utf_decode_length d
is the number of elements from the source that were consumed by the decode d
. This is always strictly positive and smaller or equal to 4
. The kind of source elements depends on the actual decoder; for the decoders of the standard library this function always returns a length in bytes.
let utf_decode: int => t => utf_decode;
utf_decode n u
is a valid UTF decode for u
that consumed n
elements from the source for decoding. n
must be positive and smaller or equal to 4
(this is not checked by the module).
let utf_decode_invalid: int => utf_decode;
utf_decode_invalid n
is an invalid UTF decode that consumed n
elements from the source to error. n
must be positive and smaller or equal to 4
(this is not checked by the module). The resulting decode has rep
as the decoded Unicode character.
let utf_8_byte_length: t => int;
utf_8_byte_length u
is the number of bytes needed to encode u
in UTF-8.
let utf_16_byte_length: t => int;
utf_16_byte_length u
is the number of bytes needed to encode u
in UTF-16.