From f32d374fd5092555d20f8c38a508f7caed3735d8 Mon Sep 17 00:00:00 2001 From: Mateusz Pusz Date: Tue, 23 Jan 2024 21:16:47 +0100 Subject: [PATCH] feat: formatting grammar improved and units formatting support added --- src/core/include/mp-units/bits/fmt.h | 457 +++++++---------- src/core/include/mp-units/format.h | 716 ++++++++++++--------------- 2 files changed, 498 insertions(+), 675 deletions(-) diff --git a/src/core/include/mp-units/bits/fmt.h b/src/core/include/mp-units/bits/fmt.h index 1b56e7ef..1ec0ff22 100644 --- a/src/core/include/mp-units/bits/fmt.h +++ b/src/core/include/mp-units/bits/fmt.h @@ -35,15 +35,36 @@ #include #include -// most of the below code is based on/copied from libfmt +// most of the below code is based on/copied from fmtlib namespace mp_units::detail { -struct auto_id {}; +enum class fmt_align { none, left, right, center, numeric }; +enum class fmt_arg_id_kind { none, index, name }; -enum class fmt_align { none, left, right, center }; -enum class fmt_sign { none, minus, plus, space }; -enum class arg_id_kind { none, index, name }; +template +struct fmt_arg_ref { + fmt_arg_id_kind kind = fmt_arg_id_kind::none; + union value { + int index = 0; + std::basic_string_view name; + + value() = default; + constexpr value(int idx) : index(idx) {} + constexpr value(std::basic_string_view n) : name(n) {} + } val{}; + + fmt_arg_ref() = default; + constexpr explicit fmt_arg_ref(int index) : kind(fmt_arg_id_kind::index), val(index) {} + constexpr explicit fmt_arg_ref(std::basic_string_view name) : kind(fmt_arg_id_kind::name), val(name) {} + + [[nodiscard]] constexpr fmt_arg_ref& operator=(int idx) + { + kind = fmt_arg_id_kind::index; + val.index = idx; + return *this; + } +}; template struct fill_t { @@ -74,12 +95,6 @@ template inline constexpr bool is_integer = std::is_integral::value && !std::is_same::value && !std::is_same::value && !std::is_same::value; -template -[[nodiscard]] constexpr bool is_ascii_letter(Char c) -{ - return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); -} - // Converts a character to ASCII. Returns a number > 127 on conversion failure. template [[nodiscard]] constexpr Char to_ascii(Char value) @@ -89,232 +104,198 @@ template template requires std::is_enum_v -[[nodiscard]] constexpr auto to_ascii(Char value) -> std::underlying_type_t +[[nodiscard]] constexpr std::underlying_type_t to_ascii(Char value) { return value; } +// Casts a nonnegative integer to unsigned. +template +[[nodiscard]] constexpr std::make_unsigned_t to_unsigned(Int value) +{ + gsl_Expects(std::is_unsigned_v || value >= 0); + return static_cast>(value); +} + struct width_checker { template [[nodiscard]] constexpr unsigned long long operator()(T value) const { if constexpr (is_integer) { - if constexpr (std::numeric_limits::is_signed) { + if constexpr (std::numeric_limits::is_signed) if (value < 0) MP_UNITS_THROW(MP_UNITS_STD_FMT::format_error("negative width")); - } return static_cast(value); } MP_UNITS_THROW(MP_UNITS_STD_FMT::format_error("width is not integer")); - return 0; // should never happen + return 0; } }; -struct precision_checker { - template - [[nodiscard]] constexpr unsigned long long operator()(T value) const - { - if constexpr (is_integer) { - if constexpr (std::numeric_limits::is_signed) { - if (value < 0) MP_UNITS_THROW(MP_UNITS_STD_FMT::format_error("negative precision")); - } - return static_cast(value); - } - MP_UNITS_THROW(MP_UNITS_STD_FMT::format_error("precision is not integer")); - return 0; // should never happen - } -}; - -// Format specifiers for built-in and string types. -template -struct basic_format_specs { - int width = 0; - int precision = -1; - char type = '\0'; - fmt_align align : 4 = fmt_align::none; - fmt_sign sign : 3 = fmt_sign::none; - bool alt : 1 = false; // Alternate form ('#'). - bool localized : 1 = false; - fill_t fill; -}; - -// Format specifiers with width and precision resolved at formatting rather -// than parsing time to allow re-using the same parsed specifiers with -// different sets of arguments (precompilation of format strings). -template -struct dynamic_format_specs : basic_format_specs { - int dynamic_width_index = -1; - int dynamic_precision_index = -1; -}; - -[[nodiscard]] constexpr int verify_dynamic_arg_index_in_range(size_t idx) +template +[[nodiscard]] constexpr int get_dynamic_spec(FormatArg arg) { - if (idx > static_cast(std::numeric_limits::max())) { - MP_UNITS_THROW(MP_UNITS_STD_FMT::format_error("Dynamic width or precision index too large.")); - } - return static_cast(idx); -} - -template -[[nodiscard]] constexpr int on_dynamic_arg(size_t arg_id, MP_UNITS_STD_FMT::basic_format_parse_context& context) -{ - context.check_arg_id(MP_UNITS_FMT_TO_ARG_ID(arg_id)); - return verify_dynamic_arg_index_in_range(arg_id); -} - -template -[[nodiscard]] constexpr int on_dynamic_arg(auto_id, MP_UNITS_STD_FMT::basic_format_parse_context& context) -{ - return verify_dynamic_arg_index_in_range(MP_UNITS_FMT_FROM_ARG_ID(context.next_arg_id())); -} - -template -[[nodiscard]] constexpr int get_dynamic_spec(int index, FormatContext& ctx) -{ - const unsigned long long value = - MP_UNITS_STD_FMT::visit_format_arg(Handler{}, ctx.arg(MP_UNITS_FMT_TO_ARG_ID(static_cast(index)))); - if (value > static_cast(std::numeric_limits::max())) { + const unsigned long long value = MP_UNITS_STD_FMT::visit_format_arg(Handler{}, arg); + if (value > ::mp_units::detail::to_unsigned(std::numeric_limits::max())) { MP_UNITS_THROW(MP_UNITS_STD_FMT::format_error("number is too big")); } return static_cast(value); } +template +[[nodiscard]] constexpr auto get_arg(Context& ctx, ID id) -> decltype(ctx.arg(id)) +{ + auto arg = ctx.arg(id); + if (!arg) ctx.on_error("argument not found"); + return arg; +} + +template +constexpr void handle_dynamic_spec(int& value, fmt_arg_ref ref, Context& ctx) +{ + switch (ref.kind) { + case fmt_arg_id_kind::none: + break; + case fmt_arg_id_kind::index: + value = ::mp_units::detail::get_dynamic_spec(get_arg(ctx, ref.val.index)); + break; + case fmt_arg_id_kind::name: + value = ::mp_units::detail::get_dynamic_spec(get_arg(ctx, ref.val.name)); + break; + } +} + // Parses the range [begin, end) as an unsigned integer. This function assumes // that the range is non-empty and the first character is a digit. -template S> -[[nodiscard]] constexpr It parse_nonnegative_int(It begin, S end, size_t& value) +template +[[nodiscard]] constexpr int parse_nonnegative_int(const Char*& begin, const Char* end, int error_value) { gsl_Expects(begin != end && '0' <= *begin && *begin <= '9'); - constexpr auto max_int = static_cast(std::numeric_limits::max()); - constexpr auto big_int = max_int / 10u; - value = 0; - + unsigned value = 0, prev = 0; + auto p = begin; do { - if (value > big_int) { - value = max_int + 1; - break; - } - value = value * 10 + static_cast(*begin - '0'); - ++begin; - } while (begin != end && '0' <= *begin && *begin <= '9'); - - if (value > max_int) MP_UNITS_THROW(MP_UNITS_STD_FMT::format_error("Number is too big")); - - return begin; + prev = value; + value = value * 10 + unsigned(*p - '0'); + ++p; + } while (p != end && '0' <= *p && *p <= '9'); + auto num_digits = p - begin; + begin = p; + if (num_digits <= std::numeric_limits::digits10) return static_cast(value); + // Check for overflow. + const unsigned max = ::mp_units::detail::to_unsigned((std::numeric_limits::max)()); + return num_digits == std::numeric_limits::digits10 + 1 && prev * 10ull + unsigned(p[-1] - '0') <= max + ? static_cast(value) + : error_value; } -template S> -[[nodiscard]] constexpr It parse_nonnegative_int(It begin, S end, int& value) +template +[[nodiscard]] constexpr bool is_name_start(Char c) { - size_t val_unsigned = 0; - begin = parse_nonnegative_int(begin, end, val_unsigned); - // Never invalid because parse_nonnegative_integer throws an error for values that don't fit in signed integers - value = static_cast(val_unsigned); - return begin; + return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '_'; } -template S, typename IDHandler> -[[nodiscard]] constexpr It do_parse_arg_id(It begin, S end, IDHandler&& handler) +template +[[nodiscard]] constexpr const Char* do_parse_arg_id(const Char* begin, const Char* end, Handler&& handler) { - gsl_Expects(begin != end); - auto c = *begin; + Char c = *begin; if (c >= '0' && c <= '9') { - size_t index = 0; + int index = 0; + constexpr int max = (std::numeric_limits::max)(); if (c != '0') - begin = parse_nonnegative_int(begin, end, index); + index = ::mp_units::detail::parse_nonnegative_int(begin, end, max); else ++begin; if (begin == end || (*begin != '}' && *begin != ':')) MP_UNITS_THROW(MP_UNITS_STD_FMT::format_error("invalid format string")); else - handler(index); + handler.on_index(index); return begin; } - MP_UNITS_THROW(MP_UNITS_STD_FMT::format_error("invalid format string")); - return begin; // should never happen + if (c == '%') return begin; // mp-units extension + if (!::mp_units::detail::is_name_start(c)) { + MP_UNITS_THROW(MP_UNITS_STD_FMT::format_error("invalid format string")); + return begin; + } + auto it = begin; + do { + ++it; + } while (it != end && (::mp_units::detail::is_name_start(*it) || ('0' <= *it && *it <= '9'))); + handler.on_name({begin, ::mp_units::detail::to_unsigned(it - begin)}); + return it; } -template S, typename IDHandler> -[[nodiscard]] constexpr It parse_arg_id(It begin, S end, IDHandler&& handler) -{ - auto c = *begin; - if (c != '}' && c != ':') return do_parse_arg_id(begin, end, handler); - handler(); - return begin; -} - -template S, typename Handler> -[[nodiscard]] constexpr It parse_sign(It begin, S end, Handler&& handler) +template +[[nodiscard]] constexpr const Char* parse_arg_id(const Char* begin, const Char* end, Handler&& handler) { gsl_Expects(begin != end); - switch (to_ascii(*begin)) { - case '+': - handler.on_sign(fmt_sign::plus); - ++begin; - break; - case '-': - handler.on_sign(fmt_sign::minus); - ++begin; - break; - case ' ': - handler.on_sign(fmt_sign::space); - ++begin; - break; - default: - break; - } + Char c = *begin; + if (c != '}' && c != ':') return ::mp_units::detail::do_parse_arg_id(begin, end, handler); + handler.on_auto(); return begin; } -template S, typename Handler> -[[nodiscard]] constexpr It parse_width(It begin, S end, Handler&& handler) +template +[[nodiscard]] constexpr const Char* parse_subentity_replacement_field(const Char* begin, const Char* end, + Handler&& handler) { - struct width_adapter { - Handler& handler; - constexpr void operator()() { handler.on_dynamic_width(auto_id{}); } - constexpr void operator()(size_t id) { handler.on_dynamic_width(id); } - }; + if (end - begin++ < 4) return MP_UNITS_THROW(MP_UNITS_STD_FMT::format_error("invalid format string")), end; + if (*begin++ != '%') MP_UNITS_THROW(MP_UNITS_STD_FMT::format_error("invalid format")); + if (*begin == '}') MP_UNITS_THROW(MP_UNITS_STD_FMT::format_error("invalid format")); + auto it = begin; + for (; it != end; ++it) { + if (*it == '{' || *it == '%') MP_UNITS_THROW(MP_UNITS_STD_FMT::format_error("invalid format")); + if (*it == '}' || *it == ':') break; + } + if (it == end) MP_UNITS_THROW(MP_UNITS_STD_FMT::format_error("invalid format")); + std::string_view id{begin, it}; + if (*it == ':') ++it; + it = handler.on_replacement_field(id, it); + if (it == end || *it != '}') MP_UNITS_THROW(MP_UNITS_STD_FMT::format_error("invalid format")); + return ++it; +} +template +struct dynamic_spec_id_handler { + MP_UNITS_STD_FMT::basic_format_parse_context& ctx; + fmt_arg_ref& ref; + + constexpr void on_auto() + { + int id = ctx.next_arg_id(); + ref = fmt_arg_ref(id); + ctx.check_dynamic_spec(id); + } + constexpr void on_index(int id) + { + ref = fmt_arg_ref(id); + ctx.check_arg_id(id); + ctx.check_dynamic_spec(id); + } + constexpr void on_name(std::basic_string_view id) + { + ref = fmt_arg_ref(id); + ctx.check_arg_id(id); + } +}; + +template +[[nodiscard]] constexpr const Char* parse_dynamic_spec(const Char* begin, const Char* end, int& value, + fmt_arg_ref& ref, + MP_UNITS_STD_FMT::basic_format_parse_context& ctx) +{ gsl_Expects(begin != end); if ('0' <= *begin && *begin <= '9') { - int width = 0; - begin = parse_nonnegative_int(begin, end, width); - if (width != -1) - handler.on_width(width); + int val = ::mp_units::detail::parse_nonnegative_int(begin, end, -1); + if (val != -1) + value = val; else MP_UNITS_THROW(MP_UNITS_STD_FMT::format_error("number is too big")); } else if (*begin == '{') { ++begin; - if (begin != end) begin = parse_arg_id(begin, end, width_adapter{handler}); - if (begin == end || *begin != '}') MP_UNITS_THROW(MP_UNITS_STD_FMT::format_error("invalid format string")); - ++begin; - } - return begin; -} - -template S, typename Handler> -[[nodiscard]] constexpr It parse_precision(It begin, S end, Handler&& handler) -{ - struct precision_adapter { - Handler& handler; - constexpr void operator()() { handler.on_dynamic_precision(auto_id{}); } - constexpr void operator()(size_t id) { handler.on_dynamic_precision(id); } - }; - - ++begin; - auto c = begin != end ? *begin : std::iter_value_t(); - if ('0' <= c && c <= '9') { - auto precision = 0; - begin = parse_nonnegative_int(begin, end, precision); - if (precision != -1) - handler.on_precision(precision); - else - MP_UNITS_THROW(MP_UNITS_STD_FMT::format_error("number is too big")); - } else if (c == '{') { - ++begin; - if (begin != end) begin = parse_arg_id(begin, end, precision_adapter{handler}); - if (begin == end || *begin++ != '}') MP_UNITS_THROW(MP_UNITS_STD_FMT::format_error("invalid format string")); - } else { - MP_UNITS_THROW(MP_UNITS_STD_FMT::format_error("missing precision specifier")); + if (*begin == '%') return begin - 1; // mp-units extension + auto handler = dynamic_spec_id_handler{ctx, ref}; + if (begin != end) begin = ::mp_units::detail::parse_arg_id(begin, end, handler); + if (begin != end && *begin == '}') return ++begin; + MP_UNITS_THROW(MP_UNITS_STD_FMT::format_error("invalid format string")); } return begin; } @@ -334,13 +315,13 @@ constexpr int code_point_length(It begin) } // Parses fill and alignment. -template S, typename Handler> -[[nodiscard]] constexpr It parse_align(It begin, S end, Handler&& handler) +template +[[nodiscard]] constexpr const Char* parse_align(const Char* begin, const Char* end, Specs& specs) { gsl_Expects(begin != end); auto align = fmt_align::none; auto p = begin + code_point_length(begin); - if (p >= end) p = begin; + if (end - p <= 0) p = begin; for (;;) { switch (to_ascii(*p)) { case '<': @@ -352,120 +333,28 @@ template S, typename Handler> case '^': align = fmt_align::center; break; - default: - break; } if (align != fmt_align::none) { if (p != begin) { auto c = *begin; - if (c == '{') MP_UNITS_THROW(MP_UNITS_STD_FMT::format_error("invalid fill character '{'")); - handler.on_fill(std::basic_string_view>(&*begin, static_cast(p - begin))); + if (c == '}') return begin; + if (c == '{') { + MP_UNITS_THROW(MP_UNITS_STD_FMT::format_error("invalid fill character '{'")); + return begin; + } + specs.fill = {begin, to_unsigned(p - begin)}; begin = p + 1; - } else + } else { ++begin; - handler.on_align(align); + } break; } else if (p == begin) { break; } p = begin; } + specs.align = align; return begin; } -// Parses standard format specifiers and sends notifications about parsed -// components to handler. -template S, typename SpecHandler> -[[nodiscard]] constexpr It parse_format_specs(It begin, S end, SpecHandler&& handler) -{ - if (begin + 1 < end && begin[1] == '}' && is_ascii_letter(*begin) && *begin != 'L') { - handler.on_type(*begin++); - return begin; - } - - if (begin == end) return begin; - - begin = ::mp_units::detail::parse_align(begin, end, handler); - if (begin == end) return begin; - - // Parse sign. - begin = ::mp_units::detail::parse_sign(begin, end, handler); - if (begin == end) return begin; - - if (*begin == '#') { - handler.on_hash(); - if (++begin == end) return begin; - } - - // Parse zero flag. - if (*begin == '0') { - handler.on_zero(); - if (++begin == end) return begin; - } - - begin = ::mp_units::detail::parse_width(begin, end, handler); - if (begin == end) return begin; - - // Parse precision. - if (*begin == '.') { - begin = ::mp_units::detail::parse_precision(begin, end, handler); - if (begin == end) return begin; - } - - if (*begin == 'L') { - handler.on_localized(); - ++begin; - } - - // Parse type. - if (begin != end && *begin != '}') handler.on_type(*begin++); - return begin; -} - -// A format specifier handler that sets fields in basic_format_specs. -template -class specs_setter { -protected: - basic_format_specs& specs_; -public: - constexpr explicit specs_setter(basic_format_specs& specs) : specs_(specs) {} - constexpr void on_align(fmt_align align) { specs_.align = align; } - constexpr void on_fill(std::basic_string_view fill) { specs_.fill = fill; } - constexpr void on_sign(fmt_sign s) { specs_.sign = s; } - constexpr void on_hash() { specs_.alt = true; } - constexpr void on_localized() { specs_.localized = true; } - constexpr void on_zero() { specs_.fill[0] = Char('0'); } - constexpr void on_width(int width) { specs_.width = width; } - constexpr void on_precision(int precision) { specs_.precision = precision; } - constexpr void on_type(Char type) { specs_.type = static_cast(type); } -}; - -// Format spec handler that saves references to arguments representing dynamic -// width and precision to be resolved at formatting time. -template -class dynamic_specs_handler : public specs_setter { -public: - using char_type = MP_UNITS_TYPENAME ParseContext::char_type; - - constexpr dynamic_specs_handler(dynamic_format_specs& specs, ParseContext& ctx) : - specs_setter(specs), specs_(specs), context_(ctx) - { - } - - template - constexpr void on_dynamic_width(T t) - { - specs_.dynamic_width_index = on_dynamic_arg(t, context_); - } - - template - constexpr void on_dynamic_precision(T t) - { - specs_.dynamic_precision_index = on_dynamic_arg(t, context_); - } -private: - dynamic_format_specs& specs_; - ParseContext& context_; -}; - } // namespace mp_units::detail diff --git a/src/core/include/mp-units/format.h b/src/core/include/mp-units/format.h index eafba9b2..02aaf1f3 100644 --- a/src/core/include/mp-units/format.h +++ b/src/core/include/mp-units/format.h @@ -29,241 +29,19 @@ #include #include -// Grammar -// -// quantity-format-spec ::= [fill-and-align] [width] [quantity-specs] -// quantity-specs ::= conversion-spec -// quantity-specs conversion-spec -// quantity-specs literal-char -// literal-char ::= any character other than '{' or '}' -// conversion-spec ::= '%' type -// type ::= [rep-modifier] 'Q' -// [unit-modifier] 'q' -// rep-modifier ::= [sign] [#] [precision] [L] [rep-type] -// rep-type ::= one of -// a A b B d e E f F g G o x X -// unit-modifier ::= [text-encoding] [unit-symbol-solidus] [unit-symbol-separator] -// [text-encoding] [unit-symbol-separator] [unit-symbol-solidus] -// [unit-symbol-solidus] [text-encoding] [unit-symbol-separator] -// [unit-symbol-solidus] [unit-symbol-separator] [text-encoding] -// [unit-symbol-separator] [text-encoding] [unit-symbol-solidus] -// [unit-symbol-separator] [unit-symbol-solidus] [text-encoding] -// text-encoding ::= one of -// U A -// unit-symbol-solidus ::= one of -// o a n -// unit-symbol-separator ::= one of -// s d - - -// TODO Should the below be allowed? Is it even possible to implement with `format()` being const? -// std::cout << std::format("{:%Q %q %Q %q}\n", 123s); namespace mp_units::detail { -// Holds specs about the whole object -template -struct quantity_global_format_specs { - fill_t fill; - fmt_align align = fmt_align::none; +template +struct fill_align_width_format_specs { + fill_t fill; + fmt_align align : 4 = fmt_align::none; int width = 0; - int dynamic_width_index = -1; + fmt_arg_ref width_ref; }; -// Holds specs about the representation (%[specs]Q) -struct quantity_rep_format_specs { - fmt_sign sign = fmt_sign::none; - int precision = -1; - int dynamic_precision_index = -1; - char type = '\0'; - bool alt = false; - bool localized = false; -}; - -// Holds specs about the unit (%[specs]q) -struct quantity_unit_format_specs : unit_symbol_formatting {}; - -template -struct quantity_format_specs { - quantity_global_format_specs global; - quantity_rep_format_specs rep; - quantity_unit_format_specs unit; -}; - -// Parse a `units-rep-modifier` -template S, typename Handler> -constexpr It parse_units_rep(It begin, S end, Handler&& handler, bool treat_as_floating_point) -{ - // parse sign - begin = parse_sign(begin, end, handler); - if (begin == end) return begin; - - // parse # - if (*begin == '#') { - handler.on_hash(); - if (++begin == end) return begin; - } - - // parse precision if a floating point - if (*begin == '.') { - if (treat_as_floating_point) { - begin = parse_precision(begin, end, handler); - } else - throw MP_UNITS_STD_FMT::format_error("precision not allowed for integral quantity representation"); - if (begin == end) return begin; - } - - // parse L to enable the locale-specific form - if (*begin == 'L') { - handler.on_localized(); - ++begin; - } - - if (begin != end && *begin != '}' && *begin != '%') { - handler.on_type(*begin++); - } - return begin; -} - -// parse units-specs -template S, typename Handler> -constexpr It parse_units_format(It begin, S end, Handler&& handler) -{ - auto ptr = begin; - while (ptr != end) { - auto c = *ptr; - if (c == '}') break; - if (c != '%') { - ++ptr; - continue; - } - if (begin != ptr) handler.on_text(begin, ptr); - begin = ++ptr; // consume '%' - if (ptr == end) throw MP_UNITS_STD_FMT::format_error("invalid format"); - c = *ptr++; - - constexpr auto units_types = std::string_view{"Qq"}; - const auto new_end = find_first_of(begin, end, units_types.begin(), units_types.end()); - if (new_end == end) throw MP_UNITS_STD_FMT::format_error("invalid format"); - if (*new_end == 'Q') { - handler.on_quantity_value(begin, new_end); // Edit `on_quantity_value` to add rep modifiers - } else { - handler.on_quantity_unit(begin, new_end); // Edit `on_quantity_unit` to add an unit modifier - } - ptr = new_end + 1; - begin = ptr; - } - if (begin != ptr) handler.on_text(begin, ptr); - return ptr; -} - -// build the 'representation' as requested in the format string, applying only units-rep-modifiers -template -[[nodiscard]] OutputIt format_units_quantity_value(OutputIt out, const Rep& val, - const quantity_rep_format_specs& rep_specs, const Locale& loc) -{ - std::basic_string buffer; - auto to_buffer = std::back_inserter(buffer); - - MP_UNITS_STD_FMT::format_to(to_buffer, "{{:"); - switch (rep_specs.sign) { - case fmt_sign::none: - break; - case fmt_sign::plus: - MP_UNITS_STD_FMT::format_to(to_buffer, "+"); - break; - case fmt_sign::minus: - MP_UNITS_STD_FMT::format_to(to_buffer, "-"); - break; - case fmt_sign::space: - MP_UNITS_STD_FMT::format_to(to_buffer, " "); - break; - } - - if (rep_specs.alt) { - MP_UNITS_STD_FMT::format_to(to_buffer, "#"); - } - auto type = rep_specs.type; - if (auto precision = rep_specs.precision; precision >= 0) { - MP_UNITS_STD_FMT::format_to(to_buffer, ".{}{}", precision, type == '\0' ? 'f' : type); - } else if constexpr (treat_as_floating_point) { - MP_UNITS_STD_FMT::format_to(to_buffer, "{}", type == '\0' ? 'g' : type); - } else { - if (type != '\0') { - MP_UNITS_STD_FMT::format_to(to_buffer, "{}", type); - } - } - if (rep_specs.localized) { - MP_UNITS_STD_FMT::format_to(to_buffer, "L"); - } - - MP_UNITS_STD_FMT::format_to(to_buffer, "}}"); - if (rep_specs.localized) { - return MP_UNITS_STD_FMT::vformat_to(out, MP_UNITS_FMT_LOCALE(loc), buffer, MP_UNITS_STD_FMT::make_format_args(val)); - } - return MP_UNITS_STD_FMT::vformat_to(out, buffer, MP_UNITS_STD_FMT::make_format_args(val)); -} - -// Creates a global format string -// e.g. "{:*^10%.1Q_%q}, 1.23_q_m" => "{:*^10}" -template -OutputIt format_global_buffer(OutputIt out, const quantity_global_format_specs& specs) -{ - MP_UNITS_STD_FMT::format_to(out, "{{:"); - if (specs.fill.size() != 1 || specs.fill[0] != ' ') { - MP_UNITS_STD_FMT::format_to(out, "{}", specs.fill.data()); - } - switch (specs.align) { - case fmt_align::left: - MP_UNITS_STD_FMT::format_to(out, "<"); - break; - case fmt_align::right: - MP_UNITS_STD_FMT::format_to(out, ">"); - break; - case fmt_align::center: - MP_UNITS_STD_FMT::format_to(out, "^"); - break; - default: - break; - } - if (specs.width >= 1) MP_UNITS_STD_FMT::format_to(out, "{}", specs.width); - return MP_UNITS_STD_FMT::format_to(out, "}}"); -} - -template -struct quantity_formatter { - OutputIt out; - Rep val; - const quantity_format_specs& specs; - Locale loc; - - explicit quantity_formatter(OutputIt o, const quantity& q, const quantity_format_specs& fspecs, - Locale lc) : - out(o), val(q.numerical_value_ref_in(q.unit)), specs(fspecs), loc(std::move(lc)) - { - } - - template S> - void on_text(It begin, S end) - { - std::copy(begin, end, out); - } - - template S> - void on_quantity_value([[maybe_unused]] It, [[maybe_unused]] S) - { - out = format_units_quantity_value(out, val, specs.rep, loc); - } - - template S> - void on_quantity_unit(It, S) - { - out = unit_symbol_to(out, get_unit(Reference), specs.unit); - } -}; - -template S> -[[nodiscard]] constexpr It at_most_one_of(It begin, S end, std::string_view modifiers) +template +[[nodiscard]] constexpr const Char* at_most_one_of(const Char* begin, const Char* end, std::string_view modifiers) { auto it = find_first_of(begin, end, modifiers.begin(), modifiers.end()); if (it != end && find_first_of(it + 1, end, modifiers.begin(), modifiers.end()) != end) @@ -274,194 +52,350 @@ template S> } // namespace mp_units::detail -template -struct MP_UNITS_STD_FMT::formatter, CharT> { -private: - using quantity = mp_units::quantity; - using iterator = MP_UNITS_TYPENAME MP_UNITS_STD_FMT::basic_format_parse_context::iterator; +// +// Grammar +// +// dimension-format-spec ::= [fill-and-align] [width] [dimension-spec] +// dimension-spec ::= [text-encoding] - bool quantity_value = false; - bool quantity_unit = false; - mp_units::detail::quantity_format_specs specs; - std::basic_string_view format_str; +// template +// struct dimension_format_specs : fill_align_width_format_specs, dimension_symbol_formatting {}; - struct spec_handler { - formatter& f; - MP_UNITS_STD_FMT::basic_format_parse_context& context; - constexpr void on_fill(std::basic_string_view fill) { f.specs.global.fill = fill; } - constexpr void on_align(mp_units::detail::fmt_align align) { f.specs.global.align = align; } - constexpr void on_width(int width) { f.specs.global.width = width; } - constexpr void on_sign(mp_units::detail::fmt_sign sign) { f.specs.rep.sign = sign; } - constexpr void on_hash() { f.specs.rep.alt = true; } - constexpr void on_precision(int precision) { f.specs.rep.precision = precision; } - constexpr void on_localized() { f.specs.rep.localized = true; } +// +// Grammar +// +// unit-format-spec ::= [fill-and-align] [width] [unit-spec] +// unit-spec ::= [text-encoding] [unit-symbol-solidus] [unit-symbol-separator] [L] +// [text-encoding] [unit-symbol-separator] [unit-symbol-solidus] [L] +// [unit-symbol-solidus] [text-encoding] [unit-symbol-separator] [L] +// [unit-symbol-solidus] [unit-symbol-separator] [text-encoding] [L] +// [unit-symbol-separator] [text-encoding] [unit-symbol-solidus] [L] +// [unit-symbol-separator] [unit-symbol-solidus] [text-encoding] [L] +// text-encoding ::= one of +// U A +// unit-symbol-solidus ::= one of +// o a n +// unit-symbol-separator ::= one of +// s d +template +class MP_UNITS_STD_FMT::formatter { + struct format_specs : mp_units::detail::fill_align_width_format_specs, mp_units::unit_symbol_formatting {}; - constexpr void on_type(char type) + std::basic_string_view fill_align_width_format_str_; + std::basic_string_view modifiers_format_str_; + format_specs specs_{}; + + struct format_checker { + using enum mp_units::text_encoding; + + mp_units::text_encoding encoding = unicode; + + constexpr void on_text_encoding(Char val) { encoding = (val == 'U') ? unicode : ascii; } + constexpr void on_unit_symbol_solidus(Char) const {} + constexpr void on_unit_symbol_separator(Char val) const { - constexpr auto valid_rep_types = std::string_view{"aAbBdeEfFgGoxX"}; - if (valid_rep_types.find(type) != std::string_view::npos) { - f.specs.rep.type = type; - } else { - throw MP_UNITS_STD_FMT::format_error("invalid quantity type specifier"); - } - } - - template - constexpr void on_dynamic_width(T t) - { - f.specs.global.dynamic_width_index = mp_units::detail::on_dynamic_arg(t, context); - } - - template - constexpr void on_dynamic_precision(T t) - { - f.specs.rep.dynamic_precision_index = mp_units::detail::on_dynamic_arg(t, context); - } - - template S> - constexpr void on_text(It, S) - { - } - - template S> - constexpr void on_quantity_value(It begin, S end) - { - if (begin != end) mp_units::detail::parse_units_rep(begin, end, *this, mp_units::treat_as_floating_point); - f.quantity_value = true; - } - - template S> - constexpr void on_quantity_unit(It begin, S end) - { - if (begin == end) return; - - constexpr auto valid_modifiers = std::string_view{"UAoansd"}; - for (auto it = begin; it != end; ++it) { - if (valid_modifiers.find(*it) == std::string_view::npos) - throw MP_UNITS_STD_FMT::format_error("invalid unit modifier specified"); - } - - if (auto it = mp_units::detail::at_most_one_of(begin, end, "UA"); it != end) { - if (*it == 'U') - f.specs.unit.encoding = mp_units::text_encoding::unicode; - else - f.specs.unit.encoding = mp_units::text_encoding::ascii; - } - - if (auto it = mp_units::detail::at_most_one_of(begin, end, "oan"); it != end) { - if (*it == 'o') - f.specs.unit.solidus = mp_units::unit_symbol_solidus::one_denominator; - else if (*it == 'a') - f.specs.unit.solidus = mp_units::unit_symbol_solidus::always; - else - f.specs.unit.solidus = mp_units::unit_symbol_solidus::never; - } - - if (auto it = mp_units::detail::at_most_one_of(begin, end, "sd"); it != end) { - if (*it == 's') - f.specs.unit.separator = mp_units::unit_symbol_separator::space; - else { - if (f.specs.unit.encoding == mp_units::text_encoding::ascii) - throw MP_UNITS_STD_FMT::format_error("half_high_dot unit separator allowed only for Unicode encoding"); - f.specs.unit.separator = mp_units::unit_symbol_separator::half_high_dot; - } - } - - f.quantity_unit = true; + if (val == 'd' && encoding == ascii) + throw MP_UNITS_STD_FMT::format_error("half_high_dot unit separator allowed only for Unicode encoding"); } }; - [[nodiscard]] constexpr std::pair do_parse( - MP_UNITS_STD_FMT::basic_format_parse_context& ctx) - { - auto begin = ctx.begin(); - auto end = ctx.end(); + struct unit_formatter { + format_specs specs; - if (begin == end || *begin == '}') return {begin, begin}; + using enum mp_units::text_encoding; + using enum mp_units::unit_symbol_solidus; + using enum mp_units::unit_symbol_separator; - // handler to assign parsed data to formatter data members - spec_handler handler{*this, ctx}; - - // parse alignment - begin = mp_units::detail::parse_align(begin, end, handler); - if (begin == end) return {begin, begin}; - - // parse width - begin = mp_units::detail::parse_width(begin, end, handler); - if (begin == end) return {begin, begin}; - - // parse units-specific specification - end = mp_units::detail::parse_units_format(begin, end, handler); - - if (specs.global.align == mp_units::detail::fmt_align::none && (!quantity_unit || quantity_value)) - // quantity values should behave like numbers (by default aligned to right) - specs.global.align = mp_units::detail::fmt_align::right; - - return {begin, end}; - } - - template - OutputIt format_quantity_content(OutputIt out, const quantity& q, FormatContext& ctx) - { - auto begin = format_str.begin(); - auto end = format_str.end(); - - if (begin == end || *begin == '}') { - // default format should print value followed by the unit separated with 1 space - out = mp_units::detail::format_units_quantity_value(out, q.numerical_value_ref_in(q.unit), specs.rep, - ctx.locale()); - if constexpr (mp_units::detail::has_unit_symbol(get_unit(Reference))) { - if constexpr (mp_units::space_before_unit_symbol) *out++ = CharT(' '); - out = unit_symbol_to(out, get_unit(Reference)); + constexpr void on_text_encoding(Char val) { specs.encoding = (val == 'U') ? unicode : ascii; } + constexpr void on_unit_symbol_solidus(Char val) + { + switch (val) { + case 'o': + specs.solidus = one_denominator; + break; + case 'a': + specs.solidus = always; + break; + case 'n': + specs.solidus = never; + break; } - } else { - // user provided format - mp_units::detail::quantity_formatter f(out, q, specs, ctx.locale()); - mp_units::detail::parse_units_format(begin, end, f); } - return out; + constexpr void on_unit_symbol_separator(Char val) { specs.separator = (val == 's') ? space : half_high_dot; } + }; + + template + constexpr const Char* parse_unit_specs(const Char* begin, const Char* end, Handler&& handler) const + { + auto it = begin; + if (it == end || *it == '}') return begin; + + constexpr auto valid_modifiers = std::string_view{"UAoansd"}; + for (; it != end && *it != '}'; ++it) { + if (valid_modifiers.find(*it) == std::string_view::npos) + throw MP_UNITS_STD_FMT::format_error("invalid unit modifier specified"); + } + end = it; + + if (it = mp_units::detail::at_most_one_of(begin, end, "UA"); it != end) handler.on_text_encoding(*it); + if (it = mp_units::detail::at_most_one_of(begin, end, "oan"); it != end) handler.on_unit_symbol_solidus(*it); + if (it = mp_units::detail::at_most_one_of(begin, end, "sd"); it != end) handler.on_unit_symbol_separator(*it); + return end; } public: - [[nodiscard]] constexpr auto parse(MP_UNITS_STD_FMT::basic_format_parse_context& ctx) + constexpr auto parse(MP_UNITS_STD_FMT::basic_format_parse_context& ctx) -> decltype(ctx.begin()) { - auto range = do_parse(ctx); - if (range.first != range.second) - format_str = std::basic_string_view(&*range.first, static_cast(range.second - range.first)); - return range.second; + const auto begin = ctx.begin(); + auto it = begin, end = ctx.end(); + if (it == end || *it == '}') return it; + + it = mp_units::detail::parse_align(it, end, specs_); + if (it == end) return it; + + it = mp_units::detail::parse_dynamic_spec(it, end, specs_.width, specs_.width_ref, ctx); + if (it == end) return it; + + fill_align_width_format_str_ = {begin, it}; + + format_checker checker; + end = parse_unit_specs(it, end, checker); + modifiers_format_str_ = {it, end}; + return end; } template - auto format(const quantity& q, FormatContext& ctx) + auto format(const U& u, FormatContext& ctx) const -> decltype(ctx.out()) { - // process dynamic width and precision - if (specs.global.dynamic_width_index >= 0) - specs.global.width = - mp_units::detail::get_dynamic_spec(specs.global.dynamic_width_index, ctx); - if (specs.rep.dynamic_precision_index >= 0) - specs.rep.precision = - mp_units::detail::get_dynamic_spec(specs.rep.dynamic_precision_index, ctx); + auto specs = specs_; + mp_units::detail::handle_dynamic_spec(specs.width, specs.width_ref, ctx); - if (specs.global.width == 0) { + if (specs.width == 0) { // Avoid extra copying if width is not specified - return format_quantity_content(ctx.out(), q, ctx); + return mp_units::unit_symbol_to(ctx.out(), u, specs); } else { - // In `quantity_buffer` we will have the representation and the unit formatted according to their - // specification, ignoring global specifiers - // e.g. "{:*^10%.1Q_%q}, 1.23_q_m" => "1.2_m" - std::basic_string quantity_buffer; + std::basic_string unit_buffer; + mp_units::unit_symbol_to(std::back_inserter(unit_buffer), u, specs); - // deal with quantity content - format_quantity_content(std::back_inserter(quantity_buffer), q, ctx); + std::basic_string global_format_buffer = "{:" + std::basic_string{fill_align_width_format_str_} + "}"; + return MP_UNITS_STD_FMT::vformat_to(ctx.out(), global_format_buffer, + MP_UNITS_STD_FMT::make_format_args(unit_buffer)); + } + } +}; - // In `global_format_buffer` we will create a global format string - // e.g. "{:*^10%.1Q_%q}, 1.23_q_m" => "{:*^10}" - std::basic_string global_format_buffer; - mp_units::detail::format_global_buffer(std::back_inserter(global_format_buffer), specs.global); - // Format the `quantity buffer` using specs from `global_format_buffer` - // In the example, equivalent to MP_UNITS_STD_FMT::format("{:*^10}", "1.2_m") +// +// Grammar +// +// quantity-format-spec ::= [fill-and-align] [width] [quantity-specs] +// quantity-specs ::= conversion-spec +// quantity-specs conversion-spec +// quantity-specs literal-char +// literal-char ::= any character other than '{', '}', or '%' +// conversion-spec ::= placement-spec +// subentity-replacement-field +// placement-spec ::= '%' placement-type +// placement-type ::= one of +// N U D ? % +// subentity-replacement-field ::= { % subentity-id [format-specifier] } +// subentity-id ::= any character other than {, }, or % +// format-specifier ::= format-spec +// format-spec ::= as specified by the formatter for the argument type; cannot start with } +// +template +class MP_UNITS_STD_FMT::formatter, Char> { + static constexpr auto unit = get_unit(Reference); + static constexpr auto dimension = get_quantity_spec(Reference).dimension; + + using quantity_t = mp_units::quantity; + using unit_t = std::remove_const_t; + using dimension_t = std::remove_const_t; + + using format_specs = mp_units::detail::fill_align_width_format_specs; + + std::basic_string_view fill_align_width_format_str_; + std::basic_string_view modifiers_format_str_; + std::vector format_str_lengths_; + format_specs specs_{}; + + struct format_checker { + MP_UNITS_STD_FMT::basic_format_parse_context& ctx; + std::vector& format_str_lengths; + + constexpr void on_number(std::basic_string_view) const {} + constexpr void on_maybe_space() const {} + constexpr void on_unit(std::basic_string_view) const {} + constexpr void on_dimension(std::basic_string_view) const {} + constexpr void on_text(const Char*, const Char*) const {} + + constexpr const Char* on_replacement_field(std::basic_string_view id, const Char* begin) + { + if (id == "N") + return on_replacement_field(begin); + else if (id == "U") + return on_replacement_field(begin); + else if (id == "D") { + return begin; + // on_replacement_field(begin); + } else + throw MP_UNITS_STD_FMT::format_error("invalid format"); + } + + private: + template + constexpr const Char* on_replacement_field(const Char* begin) const + { + MP_UNITS_STD_FMT::formatter sf; + ctx.advance_to(begin); + auto ptr = sf.parse(ctx); + if (*ptr != '}') throw MP_UNITS_STD_FMT::format_error("invalid format"); + format_str_lengths.push_back(mp_units::detail::to_unsigned(ptr - begin)); + return ptr; + } + }; + + template + struct quantity_formatter { + OutputIt out; + const quantity_t& q; + std::vector::const_iterator format_str_lengths_it; + std::locale locale; + + void on_number(std::basic_string_view format_str) + { + out = MP_UNITS_STD_FMT::vformat_to(out, locale, format_str, + MP_UNITS_STD_FMT::make_format_args(q.numerical_value_ref_in(q.unit))); + } + void on_maybe_space() + { + if constexpr (mp_units::space_before_unit_symbol) *out++ = ' '; + } + void on_unit(std::basic_string_view format_str) + { + out = MP_UNITS_STD_FMT::vformat_to(out, locale, format_str, MP_UNITS_STD_FMT::make_format_args(q.unit)); + } + void on_dimension(std::basic_string_view) {} + void on_text(const Char* begin, const Char* end) const { std::copy(begin, end, out); } + + constexpr const Char* on_replacement_field(std::basic_string_view id, const Char* begin) + { + auto format_str = [&] { return "{:" + std::string(begin, *format_str_lengths_it + 1); }; + if (id == "N") + on_number(format_str()); + else if (id == "U") + on_unit(format_str()); + else if (id == "D") + on_dimension(format_str()); + else + throw MP_UNITS_STD_FMT::format_error("invalid format"); + return begin + *format_str_lengths_it++; + } + }; + + template + constexpr const Char* parse_quantity_specs(const Char* begin, const Char* end, Handler&& handler) const + { + if (begin == end || *begin == '}') return begin; + if (*begin != '%' && *begin != '{') throw MP_UNITS_STD_FMT::format_error("invalid format"); + auto ptr = begin; + while (ptr != end) { + auto c = *ptr; + if (c == '}') break; + if (c == '{') { + if (begin != ptr) handler.on_text(begin, ptr); + begin = ptr = mp_units::detail::parse_subentity_replacement_field(ptr, end, handler); + continue; + } + if (c != '%') { + ++ptr; + continue; + } + if (begin != ptr) handler.on_text(begin, ptr); + ++ptr; // consume '%' + if (ptr == end) throw MP_UNITS_STD_FMT::format_error("invalid format"); + + c = *ptr++; + switch (c) { + case 'N': + handler.on_number("{}"); + break; + case 'U': + handler.on_unit("{}"); + break; + case 'D': + handler.on_dimension("{}"); + break; + case '?': + handler.on_maybe_space(); + break; + case '%': + handler.on_text(ptr - 1, ptr); + break; + default: + throw MP_UNITS_STD_FMT::format_error("invalid format"); + } + begin = ptr; + } + if (begin != ptr) handler.on_text(begin, ptr); + return ptr; + } + + template + OutputIt format_quantity(OutputIt out, const quantity_t& q, FormatContext& ctx) const + { + std::locale locale = ctx.locale().template get(); + if (modifiers_format_str_.empty()) { + // default format should print value followed by the unit separated with 1 space + out = MP_UNITS_STD_FMT::vformat_to(out, locale, "{}", + MP_UNITS_STD_FMT::make_format_args(q.numerical_value_ref_in(q.unit))); + if constexpr (mp_units::space_before_unit_symbol) *out++ = ' '; + return MP_UNITS_STD_FMT::vformat_to(out, locale, "{}", MP_UNITS_STD_FMT::make_format_args(q.unit)); + } else { + // user provided format + quantity_formatter f{out, q, format_str_lengths_.cbegin(), locale}; + parse_quantity_specs(modifiers_format_str_.begin(), modifiers_format_str_.end(), f); + return f.out; + } + } + +public: + // parse quantity-format-specs + constexpr auto parse(MP_UNITS_STD_FMT::basic_format_parse_context& ctx) -> decltype(ctx.begin()) + { + const auto begin = ctx.begin(); + auto it = begin, end = ctx.end(); + if (it == end || *it == '}') return it; + + it = mp_units::detail::parse_align(it, end, specs_); + if (it == end) return it; + + it = mp_units::detail::parse_dynamic_spec(it, end, specs_.width, specs_.width_ref, ctx); + if (it == end) return it; + + fill_align_width_format_str_ = {begin, it}; + + format_checker checker(ctx, format_str_lengths_); + end = parse_quantity_specs(it, end, checker); + modifiers_format_str_ = {it, end}; + return end; + } + + template + auto format(const quantity_t& q, FormatContext& ctx) const -> decltype(ctx.out()) + { + auto specs = specs_; + mp_units::detail::handle_dynamic_spec(specs.width, specs.width_ref, ctx); + + if (specs.width == 0) { + // Avoid extra copying if width is not specified + return format_quantity(ctx.out(), q, ctx); + } else { + std::basic_string quantity_buffer; + format_quantity(std::back_inserter(quantity_buffer), q, ctx); + + std::basic_string global_format_buffer = "{:" + std::basic_string{fill_align_width_format_str_} + "}"; return MP_UNITS_STD_FMT::vformat_to(ctx.out(), global_format_buffer, MP_UNITS_STD_FMT::make_format_args(quantity_buffer)); }