9 #ifndef EAGINE_MULTI_BYTE_SEQ_HPP
10 #define EAGINE_MULTI_BYTE_SEQ_HPP
28 using code_point_t = std::uint32_t;
29 static constexpr code_point_t invalid_code_point = 0x7FFFFFFFU;
30 using code_point = valid_if_less_than<code_point_t, 0x7FFFFFFFU>;
32 using valid_sequence_length = valid_if_between<span_size_t, 1, 6>;
33 using valid_byte_span = valid_if_size_gt<span<byte>,
span_size_t>;
35 static inline auto make_byte_span(span<byte> s) noexcept -> valid_byte_span {
39 static inline auto make_byte_span(span<char> s) noexcept -> valid_byte_span {
40 return {memory::accomodate<byte>(s)};
43 using valid_cbyte_span = valid_if_size_gt<span<const byte>,
span_size_t>;
45 static inline auto make_cbyte_span(span<const byte> s) noexcept
50 static inline auto make_cbyte_span(span<const char> s) noexcept
52 return {memory::accomodate<const byte>(s)};
55 static constexpr
auto max_code_point(
const valid_sequence_length& len) noexcept
56 -> valid_if_not_zero<code_point_t> {
57 return len == 1 ? 0x0000007F :
58 len == 2 ? 0x000007FF :
59 len == 3 ? 0x0000FFFF :
60 len == 4 ? 0x001FFFFF :
61 len == 5 ? 0x03FFFFFF :
62 len == 6 ? 0x7FFFFFFF :
66 static constexpr
auto head_data_bitshift(
68 return {len.is_valid() ? (
extract(len) - 1) * 6 : -1};
71 static constexpr
auto tail_data_bitshift(
72 const valid_sequence_length& idx,
75 (idx.is_valid() && len.is_valid()) ? (
extract(len) -
extract(idx) - 1) * 6
79 static constexpr
auto head_code_mask(
const valid_sequence_length& len) noexcept
80 -> valid_if_not_zero<byte> {
88 : len == 5 ? 0xFC : len == 6 ? 0xFE : 0x00;
92 static constexpr
auto inverted_byte(
const valid_if<byte, P> b) noexcept
93 -> optionally_valid<byte> {
94 return {
byte(~b.value_anyway()), b.is_valid()};
97 static constexpr
auto head_data_mask(
const valid_sequence_length& len) noexcept
98 -> optionally_valid<byte> {
99 return inverted_byte(head_code_mask(len));
102 static constexpr
auto tail_code_mask() noexcept ->
always_valid<
byte> {
106 static constexpr
auto tail_data_mask() noexcept ->
always_valid<
byte> {
110 template <
typename P>
111 static constexpr
auto head_code_from_mask(
const valid_if<byte, P> mask) noexcept
112 -> optionally_valid<byte> {
114 return {
byte((mask.value_anyway() << 1) & 0xFF), mask.is_valid()};
117 static constexpr
auto head_code(
const valid_sequence_length& len) noexcept
118 -> optionally_valid<byte> {
119 return head_code_from_mask(head_code_mask(len));
122 static constexpr
auto tail_code() noexcept ->
always_valid<
byte> {
126 template <
typename P1,
typename P2>
127 static constexpr
auto is_valid_masked_code(
129 const valid_if<byte, P1> mask,
130 const valid_if<byte, P2> code) noexcept ->
bool {
131 return (mask.is_valid() && code.is_valid())
136 static constexpr
auto
137 is_valid_head_byte(
const byte b,
const valid_sequence_length& l) noexcept
139 return is_valid_masked_code(b, head_code_mask(l), head_code(l));
142 static constexpr
auto is_valid_head_byte(
const byte b) noexcept ->
bool {
143 return is_valid_head_byte(b, 1) || is_valid_head_byte(b, 2) ||
144 is_valid_head_byte(b, 3) || is_valid_head_byte(b, 4) ||
145 is_valid_head_byte(b, 5) || is_valid_head_byte(b, 6);
148 static constexpr
auto is_valid_tail_byte(
150 const valid_sequence_length&,
151 const valid_sequence_length&) noexcept ->
bool {
152 return is_valid_masked_code(b, tail_code_mask(), tail_code());
155 static constexpr
auto required_sequence_length(
const code_point_t cp) noexcept
156 -> valid_sequence_length {
157 return (max_code_point(1) > cp)
159 : (max_code_point(2) > cp)
161 : (max_code_point(3) > cp)
163 : (max_code_point(4) > cp)
165 : (max_code_point(5) > cp)
167 : (max_code_point(6) > cp) ? 6 : 0;
170 auto do_decode_sequence_length(
const byte b) noexcept -> valid_sequence_length;
172 static inline auto decode_sequence_length(
const valid_cbyte_span& seq) noexcept
173 -> valid_sequence_length {
174 return do_decode_sequence_length(
byte(seq.value(0)[0]));
177 auto is_valid_encoding(
const valid_cbyte_span& vseq) noexcept -> bool;
179 auto do_decode_code_point(
180 const valid_cbyte_span& vsrc,
181 const valid_sequence_length& vl) noexcept -> code_point;
183 static inline auto decode_code_point(
const valid_cbyte_span& src) noexcept
185 return do_decode_code_point(src, decode_sequence_length(src));
188 auto do_encode_code_point(
189 const code_point& cp,
190 const valid_byte_span& vdest,
191 const valid_sequence_length& vl) noexcept -> bool;
193 auto encode_code_point(
194 const code_point& cp,
195 const valid_byte_span& dest) noexcept -> valid_sequence_length;
197 auto encode_code_point(
const code_point& cp) -> valid_if_not_empty<std::string>;
199 auto encoding_bytes_required(span<const code_point_t> cps) noexcept
200 -> optionally_valid<span_size_t>;
202 auto encoding_bytes_required(span<const code_point> cps) noexcept
203 -> optionally_valid<span_size_t>;
205 auto decoding_code_points_required(
const valid_cbyte_span& bytes) noexcept
206 -> optionally_valid<span_size_t>;
208 auto encode_code_points(
209 span<const code_point> cps,
210 const valid_byte_span& bytes) noexcept -> bool;
212 auto decode_code_points(
const valid_cbyte_span& bytes, span<code_point> cps)
218 #include <eagine/multi_byte_seq.inl>
220 #endif // EAGINE_MULTI_BYTE_SEQ_HPP