diff --git a/CMakeLists.txt b/CMakeLists.txt index 4373048de53..2a1eaf8cada 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -471,6 +471,9 @@ endif() if (HERMES_ENABLE_INTL) add_definitions(-DHERMES_ENABLE_INTL) + if (NOT (HERMES_IS_ANDROID OR APPLE)) + add_definitions(-DHERMES_INTL_FORMAT_RANGE) + endif() endif() if (HERMES_ENABLE_UNICODE_REGEXP_PROPERTY_ESCAPES) diff --git a/include/hermes/Platform/Intl/PlatformIntl.h b/include/hermes/Platform/Intl/PlatformIntl.h index 8a0b8eba71d..9433f4b57e3 100644 --- a/include/hermes/Platform/Intl/PlatformIntl.h +++ b/include/hermes/Platform/Intl/PlatformIntl.h @@ -148,6 +148,17 @@ class DateTimeFormat : public vm::DecoratedObject::Decoration { std::u16string format(double jsTimeValue) noexcept; std::vector formatToParts(double jsTimeValue) noexcept; +#ifdef HERMES_INTL_FORMAT_RANGE + vm::CallResult formatRange( + vm::Runtime &runtime, + double startUtcMs, + double endUtcMs) noexcept; + + vm::CallResult> formatRangeToParts( + vm::Runtime &runtime, + double startUtcMs, + double endUtcMs) noexcept; +#endif }; class NumberFormat : public vm::DecoratedObject::Decoration { diff --git a/include/hermes/Platform/Intl/PlatformIntlShared.h b/include/hermes/Platform/Intl/PlatformIntlShared.h deleted file mode 100644 index ce141c335dc..00000000000 --- a/include/hermes/Platform/Intl/PlatformIntlShared.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef HERMES_PLATFORMINTL_PLATFORMINTLSHARED_H -#define HERMES_PLATFORMINTL_PLATFORMINTLSHARED_H - -#ifdef HERMES_ENABLE_INTL -#include "hermes/Platform/Intl/PlatformIntl.h" - -namespace hermes { -namespace platform_intl { - -/// https://402.ecma-international.org/8.0/#sec-todatetimeoptions -vm::CallResult toDateTimeOptions( - vm::Runtime &runtime, - Options options, - std::u16string_view required, - std::u16string_view defaults); - -/// https://402.ecma-international.org/8.0/#sec-case-sensitivity-and-case-mapping -std::u16string toASCIIUppercase(std::u16string_view tz); - -} // namespace platform_intl -} // namespace hermes - -#endif - -#endif diff --git a/include/hermes/VM/NativeFunctions.def b/include/hermes/VM/NativeFunctions.def index 3e605b6a5df..892d3228c0a 100644 --- a/include/hermes/VM/NativeFunctions.def +++ b/include/hermes/VM/NativeFunctions.def @@ -459,6 +459,10 @@ NATIVE_FUNCTION(intlDateTimeFormatFormat) NATIVE_FUNCTION(intlDateTimeFormatSupportedLocalesOf) NATIVE_FUNCTION(intlDateTimeFormatPrototypeFormatGetter) NATIVE_FUNCTION(intlDateTimeFormatPrototypeFormatToParts) +#ifdef HERMES_INTL_FORMAT_RANGE +NATIVE_FUNCTION(intlDateTimeFormatPrototypeFormatRange) +NATIVE_FUNCTION(intlDateTimeFormatPrototypeFormatRangeToParts) +#endif NATIVE_FUNCTION(intlDateTimeFormatPrototypeResolvedOptions) NATIVE_FUNCTION(intlNumberFormatConstructor) diff --git a/include/hermes/VM/PredefinedStrings.def b/include/hermes/VM/PredefinedStrings.def index 6242697af14..980be3c5ea5 100644 --- a/include/hermes/VM/PredefinedStrings.def +++ b/include/hermes/VM/PredefinedStrings.def @@ -505,6 +505,10 @@ STR(Intl, "Intl") STR(compare, "compare") STR(format, "format") STR(formatToParts, "formatToParts") +#ifdef HERMES_INTL_FORMAT_RANGE +STR(formatRange, "formatRange") +STR(formatRangeToParts, "formatRangeToParts") +#endif STR(getCanonicalLocales, "getCanonicalLocales") STR(resolvedOptions, "resolvedOptions") STR(supportedLocalesOf, "supportedLocalesOf") diff --git a/lib/Platform/Intl/BCP47Parser.cpp b/lib/Platform/Intl/BCP47Parser.cpp index ca7bb22f66d..6ef2af8d82c 100644 --- a/lib/Platform/Intl/BCP47Parser.cpp +++ b/lib/Platform/Intl/BCP47Parser.cpp @@ -46,7 +46,8 @@ bool isUnicodeRegionSubtag(const std::u16string &subtag) { bool isUnicodeVariantSubtag(const std::u16string &subtag) { // = (alphanum{5,8} | digit alphanum{3}); return isCharType(subtag, 5, 8, &isASCIILetterOrDigit) || - isCharType(subtag, 3, 3, &isASCIILetterOrDigit); + (subtag.size() == 4 && isASCIIDigit(subtag.front()) && + isCharType(subtag.substr(1), 3, 3, &isASCIILetterOrDigit)); } bool isUnicodeExtensionAttribute(const std::u16string &subtag) { // = alphanum{3,8}; diff --git a/lib/Platform/Intl/CMakeLists.txt b/lib/Platform/Intl/CMakeLists.txt index 01c69b5fdb4..f667885c201 100644 --- a/lib/Platform/Intl/CMakeLists.txt +++ b/lib/Platform/Intl/CMakeLists.txt @@ -24,13 +24,14 @@ if(HERMES_ENABLE_INTL) else() add_hermes_library(hermesPlatformIntl STATIC PlatformIntlICU.cpp - PlatformIntlShared.cpp impl_icu/Collator.cpp + impl_icu/DateTimeFormat.cpp impl_icu/IntlUtils.cpp impl_icu/LocaleConverter.cpp impl_icu/LocaleBCP47Object.cpp impl_icu/LocaleResolver.cpp impl_icu/OptionHelpers.cpp + impl_icu/NumberingSystem.cpp LINK_LIBS hermesBCP47Parser hermesPublic diff --git a/lib/Platform/Intl/PlatformIntlApple.mm b/lib/Platform/Intl/PlatformIntlApple.mm index 090bb952350..93698a2006e 100644 --- a/lib/Platform/Intl/PlatformIntlApple.mm +++ b/lib/Platform/Intl/PlatformIntlApple.mm @@ -1484,6 +1484,16 @@ uint8_t getCurrencyDigits(std::u16string_view code) { // 30. Let dataLocaleData be localeData.[[]]. // 31. Let matcher be ? GetOption(options, "formatMatcher", "string", « // "basic", "best fit" », "best fit"). + // NOTE: Only best fit format matcher is implemented through use of NSDateFormatter. + // The formatMatcher option is read and checked for valid values. + auto formatMatcherRes = getOptionString( + runtime, + options, + u"formatMatcher", + {u"basic", u"best fit"}, + u"best fit"); + if (LLVM_UNLIKELY(formatMatcherRes == vm::ExecutionStatus::EXCEPTION)) + return vm::ExecutionStatus::EXCEPTION; // 32. Let dateStyle be ? GetOption(options, "dateStyle", "string", « "full", // "long", "medium", "short" », undefined). static constexpr std::u16string_view dateStyles[] = { diff --git a/lib/Platform/Intl/PlatformIntlICU.cpp b/lib/Platform/Intl/PlatformIntlICU.cpp index ccdc133ead0..5523a97a8f5 100644 --- a/lib/Platform/Intl/PlatformIntlICU.cpp +++ b/lib/Platform/Intl/PlatformIntlICU.cpp @@ -5,132 +5,20 @@ * LICENSE file in the root directory of this source tree. */ -#include "hermes/Platform/Intl/BCP47Parser.h" #include "hermes/Platform/Intl/PlatformIntl.h" -#include "hermes/Platform/Intl/PlatformIntlShared.h" #include "impl_icu/Collator.h" -#include "impl_icu/IntlUtils.h" +#include "impl_icu/DateTimeFormat.h" #include "impl_icu/LocaleBCP47Object.h" -#include "impl_icu/LocaleResolver.h" -#include "impl_icu/OptionHelpers.h" - -#include -#include -#include // ICU changed the default UChar type on version 59, but we still need to // support 52+ However, ICU allows us to manually set a type for UChar using // UCHAR_TYPE so UChar isn't platform dependent. #define UCHAR_TYPE char16_t -#include "unicode/dtptngen.h" -#include "unicode/strenum.h" -#include "unicode/timezone.h" -#include "unicode/udat.h" -#include "unicode/unistr.h" - -using namespace U_ICU_NAMESPACE; - namespace hermes { namespace platform_intl { namespace { -/// Thread safe management of time zone names map. -class TimeZoneNames { - public: - /// Initializing the underlying map with all known time zone names in - /// ICU::TimeZone - TimeZoneNames() { - std::unique_ptr icuTimeZones( - TimeZone::createEnumeration()); - UErrorCode status = U_ZERO_ERROR; - auto *zoneId = icuTimeZones->unext(nullptr, status); - - while (zoneId != nullptr && status == U_ZERO_ERROR) { - auto upper = toASCIIUppercase(zoneId); - timeZoneNamesMap_.emplace(std::move(upper), std::move(zoneId)); - zoneId = icuTimeZones->unext(nullptr, status); - } - } - - /// Check if \p tz is a valid time zone name. - bool contains(std::u16string_view tz) const { - std::shared_lock lock(mutex_); - return timeZoneNamesMap_.find(toASCIIUppercase(tz)) != - timeZoneNamesMap_.end(); - } - - /// Get canonical time zone name for \p tz. Note that \p tz must - /// be a valid key in the map. - std::u16string getCanonical(std::u16string_view tz) const { - std::shared_lock lock(mutex_); - auto ianaTimeZoneIt = timeZoneNamesMap_.find(toASCIIUppercase(tz)); - assert( - ianaTimeZoneIt != timeZoneNamesMap_.end() && - "getCanonical() must be called on valid time zone name."); - return ianaTimeZoneIt->second; - } - - /// Update the time zone name map with \p tz if it does not exist yet. - void update(std::u16string_view tz) { - auto upper = toASCIIUppercase(tz); - // Read lock and check if tz is already in the map. - { - std::shared_lock lock(mutex_); - if (timeZoneNamesMap_.find(upper) != timeZoneNamesMap_.end()) { - return; - } - } - // If not, write lock and insert it into the map. - { - std::unique_lock lock(mutex_); - timeZoneNamesMap_.emplace(upper, tz); - } - } - - private: - /// Map from upper case time zone name to canonical time zone name. - std::unordered_map timeZoneNamesMap_; - mutable std::shared_mutex mutex_; -}; - -static TimeZoneNames &validTimeZoneNames() { - static TimeZoneNames validTimeZoneNames; - return validTimeZoneNames; -} - -/// https://402.ecma-international.org/8.0/#sec-isvalidtimezonename -static bool isValidTimeZoneName(std::u16string_view tz) { - return validTimeZoneNames().contains(tz); -} - -/// https://402.ecma-international.org/8.0/#sec-defaulttimezone -std::u16string getDefaultTimeZone(vm::Runtime &runtime) { - std::unique_ptr timeZone(TimeZone::createDefault()); - UnicodeString unicodeTz; - timeZone->getID(unicodeTz); - std::u16string tz(unicodeTz.getBuffer(), unicodeTz.length()); - validTimeZoneNames().update(tz); - return tz; -} - -/// https://402.ecma-international.org/8.0/#sec-canonicalizetimezonename -std::u16string canonicalizeTimeZoneName(std::u16string_view tz) { - // 1. Let ianaTimeZone be the Zone or Link name of the IANA Time Zone Database - // such that timeZone, converted to upper case as described in 6.1, is equal - // to ianaTimeZone, converted to upper case as described in 6.1. - auto ianaTimeZone = validTimeZoneNames().getCanonical(tz); - // NOTE: We don't use actual IANA database, so we leave (2) unimplemented. - // 2. If ianaTimeZone is a Link name, let ianaTimeZone be the corresponding - // Zone name as specified in the "backward" file of the IANA Time Zone - // Database. - // 3. If ianaTimeZone is "Etc/UTC" or "Etc/GMT", return "UTC". - if (ianaTimeZone == u"Etc/UTC" || ianaTimeZone == u"Etc/GMT") - ianaTimeZone = u"UTC"; - // 4. Return ianaTimeZone. - return ianaTimeZone; -} - // T is a type of the intl service implementation classes. // BaseT is the base platform_intl class that the intl service // implementation class inherits from. @@ -153,6 +41,24 @@ vm::CallResult> createInstance( } // namespace +// https://tc39.es/ecma402/#sec-intl.getcanonicallocales +vm::CallResult> getCanonicalLocales( + vm::Runtime &runtime, + const std::vector &locales) { + // 1. Let ll be ? CanonicalizeLocaleList(locales). + auto localeBcp47ObjectsRes = + impl_icu::LocaleBCP47Object::canonicalizeLocaleList(runtime, locales); + if (LLVM_UNLIKELY(localeBcp47ObjectsRes == vm::ExecutionStatus::EXCEPTION)) { + return vm::ExecutionStatus::EXCEPTION; + } + // 2. Return CreateArrayFromList(ll). + std::vector canonicalLocales; + for (const auto &localeBcp47Object : *localeBcp47ObjectsRes) { + canonicalLocales.push_back(localeBcp47Object.getCanonicalizedLocaleId()); + } + return canonicalLocales; +} + // Not yet implemented. vm::CallResult toLocaleLowerCase( vm::Runtime &runtime, @@ -196,82 +102,6 @@ double Collator::compare( return static_cast(this)->compare(x, y); } -namespace { -/// Implementation of -/// https://402.ecma-international.org/8.0/#datetimeformat-objects -struct DateTimeFormatICU : DateTimeFormat { - public: - DateTimeFormatICU() = default; - ~DateTimeFormatICU() { - udat_close(dateTimeFormatter_); - }; - - vm::ExecutionStatus initialize( - vm::Runtime &runtime, - const std::vector &locales, - const Options &inputOptions) noexcept; - Options resolvedOptions() noexcept; - - std::u16string format(double jsTimeValue) noexcept; - - private: - UDateFormat *getUDateFormatter(vm::Runtime &runtime); - std::u16string getDefaultHourCycle(); - - /// https://402.ecma-international.org/8.0/#sec-properties-of-intl-datetimeformat-instances - /// Intl.DateTimeFormat instances have an [[InitializedDateTimeFormat]] - /// internal slot. - /// NOTE: InitializedDateTimeFormat is not implemented. - /// Intl.DateTimeFormat instances also have several internal - /// slots that are computed by the constructor: - /// [[Locale]] is a String value with the language tag of the locale whose - /// localization is used for formatting. - std::u16string locale_; - /// [[Calendar]] is a String value with the "type" given in Unicode Technical - /// Standard 35 for the calendar used for formatting. - std::optional calendar_; - /// [[NumberingSystem]] is a String value with the "type" given in Unicode - /// Technical Standard 35 for the numbering system used for formatting. - /// [[TimeZone]] is a String value with the IANA time zone name of the time - /// zone used for formatting. - std::u16string timeZone_; - /// [[Weekday]], [[Era]], [[Year]], [[Month]], [[Day]], [[DayPeriod]], - /// [[Hour]], [[Minute]], [[Second]], [[TimeZoneName]] are each either - /// undefined, indicating that the component is not used for formatting, or - /// one of the String values given in Table 4, indicating how the component - /// should be presented in the formatted output. - std::optional weekday_; - std::optional era_; - std::optional year_; - std::optional month_; - std::optional day_; - std::optional dayPeriod_; - std::optional hour_; - std::optional minute_; - std::optional second_; - std::optional timeZoneName_; - /// [[FractionalSecondDigits]] is either undefined or a positive, non-zero - /// integer Number value indicating the fraction digits to be used for - /// fractional seconds. Numbers will be rounded or padded with trailing zeroes - /// if necessary. - std::optional fractionalSecondDigits_; - /// [[HourCycle]] is a String value indicating whether the 12-hour format - /// ("h11", "h12") or the 24-hour format ("h23", "h24") should be used. "h11" - /// and "h23" start with hour 0 and go up to 11 and 23 respectively. "h12" and - /// "h24" start with hour 1 and go up to 12 and 24. [[HourCycle]] is only used - /// when [[Hour]] is not undefined. - std::optional hourCycle_; - /// [[DateStyle]], [[TimeStyle]] are each either undefined, or a String value - /// with values "full", "long", "medium", or "short". - std::optional dateStyle_; - std::optional timeStyle_; - /// UTF-8 version of locale_. Used for ICU calls. - std::string locale8_; - /// Internal initialized ICU date formatter. - UDateFormat *dateTimeFormatter_; -}; -} // namespace - DateTimeFormat::DateTimeFormat() = default; DateTimeFormat::~DateTimeFormat() = default; @@ -280,701 +110,46 @@ vm::CallResult> DateTimeFormat::supportedLocalesOf( vm::Runtime &runtime, const std::vector &locales, const Options &options) noexcept { - return impl_icu::supportedLocales(runtime, locales, options); -} - -vm::ExecutionStatus DateTimeFormatICU::initialize( - vm::Runtime &runtime, - const std::vector &locales, - const Options &inputOptions) noexcept { - // 1. Let requestedLocales be ? CanonicalizeLocaleList(locales). - auto requestedLocalesRes = - impl_icu::LocaleBCP47Object::canonicalizeLocaleList(runtime, locales); - if (LLVM_UNLIKELY(requestedLocalesRes == vm::ExecutionStatus::EXCEPTION)) - return vm::ExecutionStatus::EXCEPTION; - // 2. Let options be ? ToDateTimeOptions(options, "any", "date"). - auto optionsRes = toDateTimeOptions(runtime, inputOptions, u"any", u"date"); - if (LLVM_UNLIKELY(optionsRes == vm::ExecutionStatus::EXCEPTION)) - return vm::ExecutionStatus::EXCEPTION; - auto options = *optionsRes; - // 3. Let opt be a new Record. - Options opt; - // 4. Let matcher be ? GetOption(options, "localeMatcher", "string", - // «"lookup", "best fit" », "best fit"). - auto matcherRes = impl_icu::getStringOption( - runtime, - options, - u"localeMatcher", - {u"lookup", u"best fit"}, - u"best fit"); - if (LLVM_UNLIKELY(matcherRes == vm::ExecutionStatus::EXCEPTION)) - return vm::ExecutionStatus::EXCEPTION; - // 5. Set opt.[[localeMatcher]] to matcher. - auto matcherOpt = *matcherRes; - opt.emplace(u"localeMatcher", *matcherOpt); - // 6. Let calendar be ? GetOption(options, "calendar", "string", - // undefined, undefined). - auto calendarRes = - impl_icu::getStringOption(runtime, options, u"calendar", {}, {}); - // 7. If calendar is not undefined, then - if (LLVM_UNLIKELY(calendarRes == vm::ExecutionStatus::EXCEPTION)) - return vm::ExecutionStatus::EXCEPTION; - // 8. Set opt.[[ca]] to calendar. - if (auto calendarOpt = *calendarRes) { - // a. If calendar does not match the Unicode Locale Identifier type - // nonterminal, throw a RangeError exception. - if (!isUnicodeExtensionType(*calendarOpt)) - return runtime.raiseRangeError( - vm::TwineChar16("Invalid calendar: ") + - vm::TwineChar16(calendarOpt->c_str())); - opt.emplace(u"ca", *calendarOpt); - } - // 9. Let numberingSystem be ? GetOption(options, "numberingSystem", - // "string", undefined, undefined). - // 10. If numberingSystem is not undefined, then - // a. If numberingSystem does not match the Unicode Locale Identifier - // type nonterminal, throw a RangeError exception. - // 11. Set opt.[[nu]] to numberingSystem. - opt.emplace(u"nu", u""); - // 12. Let hour12 be ? GetOption(options, "hour12", "boolean", - // undefined, undefined). - auto hour12 = impl_icu::getBoolOption(options, u"hour12", {}); - // 13. Let hourCycle be ? GetOption(options, "hourCycle", "string", « - // "h11", "h12", "h23", "h24" », undefined). - auto hourCycleRes = impl_icu::getStringOption( - runtime, options, u"hourCycle", {u"h11", u"h12", u"h23", u"h24"}, {}); - if (LLVM_UNLIKELY(hourCycleRes == vm::ExecutionStatus::EXCEPTION)) - return vm::ExecutionStatus::EXCEPTION; - auto hourCycleOpt = *hourCycleRes; - // 14. If hour12 is not undefined, then - if (hour12.has_value()) - // a. Let hourCycle be null. - // NOTE: We would normally just don't add this to the "opt" map, but - // resolveLocale actually checks for presence of keys, even if values are - // null or undefined. - hourCycleOpt = u""; - if (hourCycleOpt.has_value()) - // 15. Set opt.[[hc]] to hourCycle. - opt.emplace(u"hc", *hourCycleOpt); - // 16. Let localeData be %DateTimeFormat%.[[LocaleData]]. - // NOTE: We don't actually have access to the underlying locale data, so we - // will use ICU's default locale as a substitute - // 17. Let r be ResolveLocale(%DateTimeFormat%.[[AvailableLocales]], - // requestedLocales, opt, %DateTimeFormat%.[[RelevantExtensionKeys]], - // localeData). - static constexpr std::u16string_view relevantExtensionKeys[] = { - u"ca", u"nu", u"hc"}; - auto r = impl_icu::resolveLocale( - *requestedLocalesRes, - opt, - relevantExtensionKeys, - [](auto key, auto type, auto locale) { return true; }); - // 18. Set dateTimeFormat.[[Locale]] to r.[[locale]]. - locale_ = r.localeBcp47Object.getCanonicalizedLocaleId(); - - // store the UTF8 version of locale since it is used in almost all other - // functions - locale8_ = impl_icu::toUTF8ASCII(locale_); - - // 19. Let calendar be r.[[ca]]. - auto caIt = r.resolvedOpts.find(u"ca"); - // 20. Set dateTimeFormat.[[Calendar]] to calendar. - if (caIt != r.resolvedOpts.end()) - calendar_ = caIt->second.getString(); - // 21. Set dateTimeFormat.[[HourCycle]] to r.[[hc]]. - auto hcIt = r.resolvedOpts.find(u"hc"); - if (hcIt != r.resolvedOpts.end()) - hourCycle_ = hcIt->second.getString(); - // 22. Set dateTimeFormat.[[NumberingSystem]] to r.[[nu]]. - // 23. Let dataLocale be r.[[dataLocale]]. - // 24. Let timeZone be ? Get(options, "timeZone"). - auto timeZoneIt = options.find(u"timeZone"); - std::u16string timeZone; - // 25. If timeZone is undefined, then - if (timeZoneIt == options.end()) { - // a. Let timeZone be DefaultTimeZone(). - timeZone = getDefaultTimeZone(runtime); - // 26. Else, - } else { - // a. Let timeZone be ? ToString(timeZone). - timeZone = timeZoneIt->second.getString(); - // b. If the result of IsValidTimeZoneName(timeZone) is false, then - if (!isValidTimeZoneName(timeZone)) { - // i. Throw a RangeError exception. - return runtime.raiseRangeError("Incorrect timeZone information provided"); - } - // c. Let timeZone be CanonicalizeTimeZoneName(timeZone). - timeZone = canonicalizeTimeZoneName(timeZone); - } - // 27. Set dateTimeFormat.[[TimeZone]] to timeZone. - timeZone_ = timeZone; - // 28. Let opt be a new Record. - // 29. For each row of Table 4, except the header row, in table order, do - // a. Let prop be the name given in the Property column of the row. - // b. If prop is "fractionalSecondDigits", then - // i. Let value be ? GetNumberOption(options, "fractionalSecondDigits", 1, - // 3, undefined). - // d. Set opt.[[]] to value. - // c. Else, - // i. Let value be ? GetOption(options, prop, "string", « the strings - // given in the Values column of the row », undefined). - // d. Set opt.[[]] to value. - // 30. Let dataLocaleData be localeData.[[]]. - // 31. Let matcher be ? GetOption(options, "formatMatcher", "string", « - // "basic", "best fit" », "best fit"). - // 32. Let dateStyle be ? GetOption(options, "dateStyle", "string", « "full", - // "long", "medium", "short" », undefined). - auto dateStyleRes = impl_icu::getStringOption( - runtime, - options, - u"dateStyle", - {u"full", u"long", u"medium", u"short"}, - {}); - if (LLVM_UNLIKELY(dateStyleRes == vm::ExecutionStatus::EXCEPTION)) - return vm::ExecutionStatus::EXCEPTION; - // 33. Set dateTimeFormat.[[DateStyle]] to dateStyle. - dateStyle_ = *dateStyleRes; - // 34. Let timeStyle be ? GetOption(options, "timeStyle", "string", « "full", - // "long", "medium", "short" », undefined). - auto timeStyleRes = impl_icu::getStringOption( - runtime, - options, - u"timeStyle", - {u"full", u"long", u"medium", u"short"}, - {}); - if (LLVM_UNLIKELY(timeStyleRes == vm::ExecutionStatus::EXCEPTION)) - return vm::ExecutionStatus::EXCEPTION; - // 35. Set dateTimeFormat.[[TimeStyle]] to timeStyle. - timeStyle_ = *timeStyleRes; - - // Initialize properties using values from the input options. - auto weekdayRes = impl_icu::getStringOption( - runtime, options, u"weekday", {u"narrow", u"short", u"long"}, {}); - if (LLVM_UNLIKELY(weekdayRes == vm::ExecutionStatus::EXCEPTION)) - return vm::ExecutionStatus::EXCEPTION; - weekday_ = *weekdayRes; - - auto eraRes = impl_icu::getStringOption( - runtime, options, u"era", {u"narrow", u"short", u"long"}, {}); - if (LLVM_UNLIKELY(eraRes == vm::ExecutionStatus::EXCEPTION)) - return vm::ExecutionStatus::EXCEPTION; - era_ = *eraRes; - - auto yearRes = impl_icu::getStringOption( - runtime, options, u"year", {u"2-digit", u"numeric"}, {}); - if (LLVM_UNLIKELY(yearRes == vm::ExecutionStatus::EXCEPTION)) - return vm::ExecutionStatus::EXCEPTION; - year_ = *yearRes; - - auto monthRes = impl_icu::getStringOption( - runtime, - options, - u"month", - {u"2-digit", u"numeric", u"narrow", u"short", u"long"}, - {}); - if (LLVM_UNLIKELY(monthRes == vm::ExecutionStatus::EXCEPTION)) - return vm::ExecutionStatus::EXCEPTION; - month_ = *monthRes; - - auto dayRes = impl_icu::getStringOption( - runtime, options, u"day", {u"2-digit", u"numeric"}, {}); - if (LLVM_UNLIKELY(dayRes == vm::ExecutionStatus::EXCEPTION)) - return vm::ExecutionStatus::EXCEPTION; - day_ = *dayRes; - - auto dayPeriodRes = impl_icu::getStringOption( - runtime, options, u"dayPeriod", {u"narrow", u"short", u"long"}, {}); - if (LLVM_UNLIKELY(dayPeriodRes == vm::ExecutionStatus::EXCEPTION)) - return vm::ExecutionStatus::EXCEPTION; - dayPeriod_ = *dayPeriodRes; - - auto hourRes = impl_icu::getStringOption( - runtime, options, u"hour", {u"2-digit", u"numeric"}, {}); - if (LLVM_UNLIKELY(hourRes == vm::ExecutionStatus::EXCEPTION)) - return vm::ExecutionStatus::EXCEPTION; - hour_ = *hourRes; - - auto minuteRes = impl_icu::getStringOption( - runtime, options, u"minute", {u"2-digit", u"numeric"}, {}); - if (LLVM_UNLIKELY(minuteRes == vm::ExecutionStatus::EXCEPTION)) - return vm::ExecutionStatus::EXCEPTION; - minute_ = *minuteRes; - - auto secondRes = impl_icu::getStringOption( - runtime, options, u"second", {u"2-digit", u"numeric"}, {}); - if (LLVM_UNLIKELY(secondRes == vm::ExecutionStatus::EXCEPTION)) - return vm::ExecutionStatus::EXCEPTION; - second_ = *secondRes; - - auto fractionalSecondDigitsRes = impl_icu::getNumberOption( - runtime, options, u"fractionalSecondDigits", 1, 3, {}); - if (LLVM_UNLIKELY( - fractionalSecondDigitsRes == vm::ExecutionStatus::EXCEPTION)) - return vm::ExecutionStatus::EXCEPTION; - fractionalSecondDigits_ = *fractionalSecondDigitsRes; - - // NOTE: "shortOffset", "longOffset", "shortGeneric", "longGeneric" - // are specified here: - // https://tc39.es/proposal-intl-extend-timezonename - // they are not in ecma402 spec, but there is a test for them: - // "test262/test/intl402/DateTimeFormat/constructor-options-timeZoneName-valid.js" - auto timeZoneNameRes = impl_icu::getStringOption( - runtime, - options, - u"timeZoneName", - {u"short", - u"long", - u"shortOffset", - u"longOffset", - u"shortGeneric", - u"longGeneric"}, - {}); - if (LLVM_UNLIKELY(timeZoneNameRes == vm::ExecutionStatus::EXCEPTION)) - return vm::ExecutionStatus::EXCEPTION; - timeZoneName_ = *timeZoneNameRes; - // NOTE: We don't have access to localeData, instead we'll defer to - // ICU::Locale - // 36. If dateStyle is not undefined or timeStyle is not undefined, then - // a. For each row in Table 4, except the header row, do - // i. Let prop be the name given in the Property column of the row. - // ii. Let p be opt.[[]]. - // iii. If p is not undefined, then - // 1. Throw a TypeError exception. - // b. Let styles be dataLocaleData.[[styles]].[[]]. - // c. Let bestFormat be DateTimeStyleFormat(dateStyle, timeStyle, styles). - // 37. Else, - // a. Let formats be dataLocaleData.[[formats]].[[]]. - // b. If matcher is "basic", then - // i. Let bestFormat be BasicFormatMatcher(opt, formats). - // c. Else, - // i. Let bestFormat be BestFitFormatMatcher(opt, formats). - // 38. For each row in Table 4, except the header row, in table order, do - // for (auto const &row : table4) { - // a. Let prop be the name given in the Property column of the row. - // auto prop = row.first; - // b. If bestFormat has a field [[]], then - // i. Let p be bestFormat.[[]]. - // ii. Set dateTimeFormat's internal slot whose name is the Internal - // Slot column of the row to p. - // 39. If dateTimeFormat.[[Hour]] is undefined, then - if (!hour_.has_value()) { - // a. Set dateTimeFormat.[[HourCycle]] to undefined. - hourCycle_ = std::nullopt; - // b. Let pattern be bestFormat.[[pattern]]. - // c. Let rangePatterns be bestFormat.[[rangePatterns]]. - // 40. Else, - } else { - // a. Let hcDefault be dataLocaleData.[[hourCycle]]. - auto hcDefault = getDefaultHourCycle(); - // b. Let hc be dateTimeFormat.[[HourCycle]]. - auto hc = hourCycle_; - // c. If hc is null, then - if (!hc.has_value()) - // i. Set hc to hcDefault. - hc = hcDefault; - // d. If hour12 is not undefined, then - if (hour12.has_value()) { - // i. If hour12 is true, then - if (*hour12 == true) { - // 1. If hcDefault is "h11" or "h23", then - if (hcDefault == u"h11" || hcDefault == u"h23") { - // a. Set hc to "h11". - hc = u"h11"; - // 2. Else, - } else { - // a. Set hc to "h12". - hc = u"h12"; - } - // ii. Else, - } else { - // 1. Assert: hour12 is false. - // 2. If hcDefault is "h11" or "h23", then - if (hcDefault == u"h11" || hcDefault == u"h23") { - // a. Set hc to "h23". - hc = u"h23"; - // 3. Else, - } else { - // a. Set hc to "h24". - hc = u"h24"; - } - } - } - // e. Set dateTimeFormat.[[HourCycle]] to hc. - hourCycle_ = hc; - // f. If dateTimeformat.[[HourCycle]] is "h11" or "h12", then - // i. Let pattern be bestFormat.[[pattern12]]. - // ii. Let rangePatterns be bestFormat.[[rangePatterns12]]. - // g. Else, - // i. Let pattern be bestFormat.[[pattern]]. - // ii. Let rangePatterns be bestFormat.[[rangePatterns]]. - } - // 41. Set dateTimeFormat.[[Pattern]] to pattern. - // 42. Set dateTimeFormat.[[RangePatterns]] to rangePatterns. - // 43. Return dateTimeFormat. - dateTimeFormatter_ = getUDateFormatter(runtime); - return vm::ExecutionStatus::RETURNED; -} - -/// Gets the UDateFormat with options set in initialize -UDateFormat *DateTimeFormatICU::getUDateFormatter(vm::Runtime &runtime) { - static constexpr std::u16string_view kLong = u"long", kShort = u"short", - kNarrow = u"narrow", - keMedium = u"medium", kFull = u"full", - kNumeric = u"numeric", - kTwoDigit = u"2-digit", - kShortOffset = u"shortOffset", - kLongOffset = u"longOffset", - kShortGeneric = u"shortGeneric", - kLongGeneric = u"longGeneric"; - - // timeStyle and dateStyle cannot be used in conjunction with the other - // options. - if (timeStyle_.has_value() || dateStyle_.has_value()) { - UDateFormatStyle dateStyleRes = UDAT_NONE; - UDateFormatStyle timeStyleRes = UDAT_NONE; - - if (dateStyle_.has_value()) { - if (dateStyle_ == kFull) - dateStyleRes = UDAT_FULL; - else if (dateStyle_ == kLong) - dateStyleRes = UDAT_LONG; - else if (dateStyle_ == keMedium) - dateStyleRes = UDAT_MEDIUM; - else if (dateStyle_ == kShort) - dateStyleRes = UDAT_SHORT; - } - - if (timeStyle_.has_value()) { - if (timeStyle_ == kFull) - timeStyleRes = UDAT_FULL; - else if (timeStyle_ == kLong) - timeStyleRes = UDAT_LONG; - else if (timeStyle_ == keMedium) - timeStyleRes = UDAT_MEDIUM; - else if (timeStyle_ == kShort) - timeStyleRes = UDAT_SHORT; - } - - UErrorCode status = U_ZERO_ERROR; - UDateFormat *dtf; - // if timezone is specified, use that instead, else use default - if (!timeZone_.empty()) { - const UChar *timeZoneRes = - reinterpret_cast(timeZone_.c_str()); - int32_t timeZoneLength = timeZone_.length(); - dtf = udat_open( - timeStyleRes, - dateStyleRes, - &locale8_[0], - timeZoneRes, - timeZoneLength, - nullptr, - -1, - &status); - } else { - dtf = udat_open( - timeStyleRes, - dateStyleRes, - &locale8_[0], - nullptr, - -1, - nullptr, - -1, - &status); - } - assert(status == U_ZERO_ERROR); - return dtf; - } - - // Else: lets create the skeleton - std::u16string skeleton = u""; - if (weekday_.has_value()) { - if (weekday_ == kNarrow) - skeleton += u"EEEEE"; - else if (weekday_ == kLong) - skeleton += u"EEEE"; - else if (weekday_ == kShort) - skeleton += u"EEE"; - } - - if (timeZoneName_.has_value()) { - if (timeZoneName_ == kShort) - skeleton += u"z"; - else if (timeZoneName_ == kLong) - skeleton += u"zzzz"; - else if (timeZoneName_ == kShortOffset) - skeleton += u"O"; - else if (timeZoneName_ == kLongOffset) - skeleton += u"OOOO"; - else if (timeZoneName_ == kShortGeneric) - skeleton += u"v"; - else if (timeZoneName_ == kLongGeneric) - skeleton += u"vvvv"; - } - - if (era_.has_value()) { - if (era_ == kNarrow) - skeleton += u"GGGGG"; - else if (era_ == kShort) - skeleton += u"GG"; - else if (era_ == kLong) - skeleton += u"GGGG"; - } - - if (year_.has_value()) { - if (year_ == kNumeric) - skeleton += u"y"; - else if (year_ == kTwoDigit) - skeleton += u"yy"; - } - - if (month_.has_value()) { - if (month_ == kTwoDigit) - skeleton += u"MM"; - else if (month_ == kNumeric) - skeleton += u'M'; - else if (month_ == kNarrow) - skeleton += u"MMMMM"; - else if (month_ == kShort) - skeleton += u"MMM"; - else if (month_ == kLong) - skeleton += u"MMMM"; - } - - if (day_.has_value()) { - if (day_ == kNumeric) - skeleton += u"d"; - else if (day_ == kTwoDigit) - skeleton += u"dd"; - } - - if (hour_.has_value()) { - if (hourCycle_ == u"h12") { - if (hour_ == kNumeric) - skeleton += u"h"; - else if (hour_ == kTwoDigit) - skeleton += u"hh"; - } else if (hourCycle_ == u"h24") { - if (hour_ == kNumeric) - skeleton += u"k"; - else if (hour_ == kTwoDigit) - skeleton += u"kk"; - } else if (hourCycle_ == u"h23") { - if (hour_ == kNumeric) - skeleton += u"H"; - else if (hour_ == kTwoDigit) - skeleton += u"HH"; - } else { - if (hour_ == kNumeric) - skeleton += u"h"; - else if (hour_ == kTwoDigit) - skeleton += u"HH"; - } - } - - if (minute_.has_value()) { - if (minute_ == kNumeric) - skeleton += u"m"; - else if (minute_ == kTwoDigit) - skeleton += u"mm"; - } - - if (second_.has_value()) { - if (second_ == kNumeric) - skeleton += u"s"; - else if (second_ == kTwoDigit) - skeleton += u"ss"; - } - - UErrorCode status = U_ZERO_ERROR; - std::u16string bestpattern; - int32_t patternLength; - - std::unique_ptr - dtpGenerator(udatpg_open(&locale8_[0], &status), &udatpg_close); - patternLength = udatpg_getBestPatternWithOptions( - dtpGenerator.get(), - &skeleton[0], - -1, - UDATPG_MATCH_ALL_FIELDS_LENGTH, - nullptr, - 0, - &status); - - if (status == U_BUFFER_OVERFLOW_ERROR) { - status = U_ZERO_ERROR; - bestpattern.resize(patternLength); - udatpg_getBestPatternWithOptions( - dtpGenerator.get(), - &skeleton[0], - skeleton.length(), - UDATPG_MATCH_ALL_FIELDS_LENGTH, - &bestpattern[0], - patternLength, - &status); - } - - // if timezone is specified, use that instead, else use default - if (!timeZone_.empty()) { - const UChar *timeZoneRes = - reinterpret_cast(timeZone_.c_str()); - int32_t timeZoneLength = timeZone_.length(); - return udat_open( - UDAT_PATTERN, - UDAT_PATTERN, - &locale8_[0], - timeZoneRes, - timeZoneLength, - &bestpattern[0], - patternLength, - &status); - } else { - return udat_open( - UDAT_PATTERN, - UDAT_PATTERN, - &locale8_[0], - nullptr, - -1, - &bestpattern[0], - patternLength, - &status); - } -} - -std::u16string DateTimeFormatICU::getDefaultHourCycle() { - UErrorCode status = U_ZERO_ERROR; - std::u16string myString; - // open the default UDateFormat and Pattern of locale - UDateFormat *defaultDTF = udat_open( - UDAT_DEFAULT, - UDAT_DEFAULT, - &locale8_[0], - nullptr, - -1, - nullptr, - -1, - &status); - int32_t size = udat_toPattern(defaultDTF, true, nullptr, 0, &status); - if (status == U_BUFFER_OVERFLOW_ERROR) { - status = U_ZERO_ERROR; - myString.resize(size + 1); - udat_toPattern(defaultDTF, true, &myString[0], 40, &status); - assert(status <= 0); // Check for errors - udat_close(defaultDTF); - // find the default hour cycle and return it - for (int32_t i = 0; i < size; i++) { - char16_t ch = myString[i]; - switch (ch) { - case 'K': - return u"h11"; - break; - case 'h': - return u"h12"; - break; - case 'H': - return u"h23"; - break; - case 'k': - return u"h24"; - break; - } - } - } - - return u"h24"; + return impl_icu::DateTimeFormat::supportedLocalesOf( + runtime, locales, options); } vm::CallResult> DateTimeFormat::create( vm::Runtime &runtime, const std::vector &locales, - const Options &inputOptions) noexcept { - auto instance = std::make_unique(); - if (LLVM_UNLIKELY( - instance->initialize(runtime, locales, inputOptions) == - vm::ExecutionStatus::EXCEPTION)) { - return vm::ExecutionStatus::EXCEPTION; - } - return instance; -} - -/// Implementer note: This method corresponds roughly to -/// https://402.ecma-international.org/8.0/#sec-intl.datetimeformat.prototype.resolvedoptions -Options DateTimeFormatICU::resolvedOptions() noexcept { - Options options; - options.emplace(u"locale", Option(locale_)); - options.emplace(u"timeZone", Option(timeZone_)); - if (calendar_) - options.emplace(u"calendar", Option(*calendar_)); - if (hourCycle_.has_value()) { - options.emplace(u"hourCycle", *hourCycle_); - options.emplace(u"hour12", hourCycle_ == u"h11" || hourCycle_ == u"h12"); - } - if (weekday_.has_value()) - options.emplace(u"weekday", *weekday_); - if (era_.has_value()) - options.emplace(u"era", *era_); - if (year_.has_value()) - options.emplace(u"year", *year_); - if (month_.has_value()) - options.emplace(u"month", *month_); - if (day_.has_value()) - options.emplace(u"day", *day_); - if (hour_.has_value()) - options.emplace(u"hour", *hour_); - if (minute_.has_value()) - options.emplace(u"minute", *minute_); - if (second_.has_value()) - options.emplace(u"second", *second_); - if (timeZoneName_.has_value()) - options.emplace(u"timeZoneName", *timeZoneName_); - if (dateStyle_.has_value()) - options.emplace(u"dateStyle", *dateStyle_); - if (timeStyle_.has_value()) - options.emplace(u"timeStyle", *timeStyle_); - return options; + const Options &options) noexcept { + return createInstance( + runtime, locales, options); } Options DateTimeFormat::resolvedOptions() noexcept { - return static_cast(this)->resolvedOptions(); -} - -std::u16string DateTimeFormatICU::format(double jsTimeValue) noexcept { - auto timeInSeconds = jsTimeValue; - UDate date = UDate(timeInSeconds); - UErrorCode status = U_ZERO_ERROR; - std::u16string formattedDate; - int32_t myStrlen = 0; - - myStrlen = udat_format( - dateTimeFormatter_, date, nullptr, myStrlen, nullptr, &status); - if (status == U_BUFFER_OVERFLOW_ERROR) { - status = U_ZERO_ERROR; - formattedDate.resize(myStrlen); - udat_format( - dateTimeFormatter_, - date, - &formattedDate[0], - myStrlen, - nullptr, - &status); - } - - assert(status <= 0); // Check for errors - return formattedDate; + return static_cast(this)->resolvedOptions(); } std::u16string DateTimeFormat::format(double jsTimeValue) noexcept { - return static_cast(this)->format(jsTimeValue); + return static_cast(this)->format(jsTimeValue); } std::vector> DateTimeFormat::formatToParts(double jsTimeValue) noexcept { - std::unordered_map part; - part[u"type"] = u"integer"; - // This isn't right, but I didn't want to do more work for a stub. - std::string s = std::to_string(jsTimeValue); - part[u"value"] = {s.begin(), s.end()}; - return std::vector>{part}; + return static_cast(this)->formatToParts( + jsTimeValue); +} + +vm::CallResult DateTimeFormat::formatRange( + vm::Runtime &runtime, + double startUtcMs, + double endUtcMs) noexcept { + return static_cast(this)->formatRange( + runtime, startUtcMs, endUtcMs); +} + +vm::CallResult> DateTimeFormat::formatRangeToParts( + vm::Runtime &runtime, + double startUtcMs, + double endUtcMs) noexcept { + return static_cast(this)->formatRangeToParts( + runtime, startUtcMs, endUtcMs); } namespace { diff --git a/lib/Platform/Intl/PlatformIntlShared.cpp b/lib/Platform/Intl/PlatformIntlShared.cpp deleted file mode 100644 index ea1a933aedf..00000000000 --- a/lib/Platform/Intl/PlatformIntlShared.cpp +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -// This file includes shared code between Apple and ICU implementation of -// Intl APIs -#include "hermes/Platform/Intl/PlatformIntlShared.h" -#include "hermes/Platform/Intl/PlatformIntl.h" -#include "impl_icu/LocaleBCP47Object.h" - -using namespace ::hermes; - -namespace hermes { -namespace platform_intl { - -// https://tc39.es/ecma402/#sec-intl.getcanonicallocales -vm::CallResult> getCanonicalLocales( - vm::Runtime &runtime, - const std::vector &locales) { - // 1. Let ll be ? CanonicalizeLocaleList(locales). - auto localeBcp47ObjectsRes = - impl_icu::LocaleBCP47Object::canonicalizeLocaleList(runtime, locales); - if (LLVM_UNLIKELY(localeBcp47ObjectsRes == vm::ExecutionStatus::EXCEPTION)) { - return vm::ExecutionStatus::EXCEPTION; - } - // 2. Return CreateArrayFromList(ll). - std::vector canonicalLocales; - for (const auto &localeBcp47Object : *localeBcp47ObjectsRes) { - canonicalLocales.push_back(localeBcp47Object.getCanonicalizedLocaleId()); - } - return canonicalLocales; -} - -// Implementation of -/// https://402.ecma-international.org/8.0/#sec-todatetimeoptions -vm::CallResult toDateTimeOptions( - vm::Runtime &runtime, - Options options, - std::u16string_view required, - std::u16string_view defaults) { - // 1. If options is undefined, let options be null; otherwise let options be ? - // ToObject(options). - // 2. Let options be OrdinaryObjectCreate(options). - // 3. Let needDefaults be true. - bool needDefaults = true; - // 4. If required is "date" or "any", then - if (required == u"date" || required == u"any") { - // a. For each property name prop of « "weekday", "year", "month", "day" », - // do - // TODO(T116352920): Make this a std::u16string props[] once we have - // constexpr std::u16string. - static constexpr std::u16string_view props[] = { - u"weekday", u"year", u"month", u"day"}; - for (const auto &prop : props) { - // i. Let value be ? Get(options, prop). - if (options.find(std::u16string(prop)) != options.end()) { - // ii. If value is not undefined, let needDefaults be false. - needDefaults = false; - } - } - } - // 5. If required is "time" or "any", then - if (required == u"time" || required == u"any") { - // a. For each property name prop of « "dayPeriod", "hour", "minute", - // "second", "fractionalSecondDigits" », do - static constexpr std::u16string_view props[] = { - u"dayPeriod", u"hour", u"minute", u"second", u"fractionalSecondDigits"}; - for (const auto &prop : props) { - // i. Let value be ? Get(options, prop). - if (options.find(std::u16string(prop)) != options.end()) { - // ii. If value is not undefined, let needDefaults be false. - needDefaults = false; - } - } - } - // 6. Let dateStyle be ? Get(options, "dateStyle"). - auto dateStyle = options.find(u"dateStyle"); - // 7. Let timeStyle be ? Get(options, "timeStyle"). - auto timeStyle = options.find(u"timeStyle"); - // 8. If dateStyle is not undefined or timeStyle is not undefined, let - // needDefaults be false. - if (dateStyle != options.end() || timeStyle != options.end()) { - needDefaults = false; - } - // 9. If required is "date" and timeStyle is not undefined, then - if (required == u"date" && timeStyle != options.end()) { - // a. Throw a TypeError exception. - return runtime.raiseTypeError( - "Unexpectedly found timeStyle option for \"date\" property"); - } - // 10. If required is "time" and dateStyle is not undefined, then - if (required == u"time" && dateStyle != options.end()) { - // a. Throw a TypeError exception. - return runtime.raiseTypeError( - "Unexpectedly found dateStyle option for \"time\" property"); - } - // 11. If needDefaults is true and defaults is either "date" or "all", then - if (needDefaults && (defaults == u"date" || defaults == u"all")) { - // a. For each property name prop of « "year", "month", "day" », do - static constexpr std::u16string_view props[] = {u"year", u"month", u"day"}; - for (const auto &prop : props) { - // i. Perform ? CreateDataPropertyOrThrow(options, prop, "numeric"). - options.emplace(prop, Option(std::u16string(u"numeric"))); - } - } - // 12. If needDefaults is true and defaults is either "time" or "all", then - if (needDefaults && (defaults == u"time" || defaults == u"all")) { - // a. For each property name prop of « "hour", "minute", "second" », do - static constexpr std::u16string_view props[] = { - u"hour", u"minute", u"second"}; - for (const auto &prop : props) { - // i. Perform ? CreateDataPropertyOrThrow(options, prop, "numeric"). - options.emplace(prop, Option(std::u16string(u"numeric"))); - } - } - // 13. return options - return options; -} - -/// https://402.ecma-international.org/8.0/#sec-case-sensitivity-and-case-mapping -std::u16string toASCIIUppercase(std::u16string_view tz) { - std::u16string result; - std::uint8_t offset = 'a' - 'A'; - for (char16_t c16 : tz) { - if (c16 >= 'a' && c16 <= 'z') { - result.push_back((char)c16 - offset); - } else { - result.push_back(c16); - } - } - return result; -} - -} // namespace platform_intl -} // namespace hermes diff --git a/lib/Platform/Intl/impl_icu/Collator.cpp b/lib/Platform/Intl/impl_icu/Collator.cpp index 01378e6410a..758fd621593 100644 --- a/lib/Platform/Intl/impl_icu/Collator.cpp +++ b/lib/Platform/Intl/impl_icu/Collator.cpp @@ -284,7 +284,7 @@ vm::ExecutionStatus Collator::initializeCollator( options, constants::opt_name::localeMatcher, constants::opt_value::locale_matcher::validLocaleMatchers, - constants::opt_value::locale_matcher::best_fit); + constants::opt_value::locale_matcher::bestFit); if (LLVM_UNLIKELY(matcherRes == vm::ExecutionStatus::EXCEPTION)) { return vm::ExecutionStatus::EXCEPTION; } diff --git a/lib/Platform/Intl/impl_icu/Constants.h b/lib/Platform/Intl/impl_icu/Constants.h index 464f9e9133e..f39ec45e476 100644 --- a/lib/Platform/Intl/impl_icu/Constants.h +++ b/lib/Platform/Intl/impl_icu/Constants.h @@ -62,8 +62,8 @@ inline constexpr char16_t falseStr[] = u"false"; namespace locale_matcher { inline constexpr char16_t lookup[] = u"lookup"; -inline constexpr char16_t best_fit[] = u"best fit"; -inline constexpr const char16_t *validLocaleMatchers[] = {lookup, best_fit}; +inline constexpr char16_t bestFit[] = u"best fit"; +inline constexpr const char16_t *validLocaleMatchers[] = {lookup, bestFit}; } // namespace locale_matcher namespace case_first { @@ -96,8 +96,8 @@ inline constexpr const char16_t *validNumerics[] = {trueStr, falseStr}; namespace format_matcher { inline constexpr char16_t basic[] = u"basic"; -inline constexpr char16_t best_fit[] = u"best fit"; -inline constexpr const char16_t *validFormatMatchers[] = {basic, best_fit}; +inline constexpr char16_t bestFit[] = u"best fit"; +inline constexpr const char16_t *validFormatMatchers[] = {basic, bestFit}; } // namespace format_matcher namespace hour_cycle { @@ -171,6 +171,7 @@ namespace part_key { inline constexpr char16_t type[] = u"type"; inline constexpr char16_t value[] = u"value"; inline constexpr char16_t unit[] = u"unit"; +inline constexpr char16_t source[] = u"source"; } // namespace part_key namespace part_type { diff --git a/lib/Platform/Intl/impl_icu/DateTimeFormat.cpp b/lib/Platform/Intl/impl_icu/DateTimeFormat.cpp new file mode 100644 index 00000000000..576a6eded8c --- /dev/null +++ b/lib/Platform/Intl/impl_icu/DateTimeFormat.cpp @@ -0,0 +1,1588 @@ +/** + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "DateTimeFormat.h" + +#include "Constants.h" +#include "IntlUtils.h" +#include "LocaleConverter.h" +#include "LocaleResolver.h" +#include "NumberingSystem.h" +#include "OptionHelpers.h" + +#include + +namespace hermes { +namespace platform_intl { +namespace impl_icu { + +DateTimeFormat::DateTimeFormat() + : icuDateFormat_(nullptr, &udat_close), + icuDateIntervalFormat_(nullptr, &udtitvfmt_close), + // Corresponde to the table + // https://tc39.es/ecma402/#table-datetimeformat-components. + dateTimeFormatComponents_{ + {{constants::opt_name::weekday, + constants::opt_value::style::validNameOnlyStyles, + std::nullopt, + u'E'}, + {constants::opt_name::era, + constants::opt_value::style::validNameOnlyStyles, + std::nullopt, + u'G'}, + {constants::opt_name::year, + constants::opt_value::style::validNumericOnlyStyles, + std::nullopt, + u'y'}, + {constants::opt_name::month, + constants::opt_value::style::validNameAndNumericStyles, + std::nullopt, + u'M'}, + {constants::opt_name::day, + constants::opt_value::style::validNumericOnlyStyles, + std::nullopt, + u'd'}, + {constants::opt_name::dayPeriod, + constants::opt_value::style::validNameOnlyStyles, + std::nullopt, + u'B'}, + {constants::opt_name::hour, + constants::opt_value::style::validNumericOnlyStyles, + std::nullopt, + // hour's icu skeleton symbol depends on the resolved hour cycle. + // 'j' is populated here as a default, which is the hour skeleton + // symbol to specify the use of locale default hour cycle symbol. + // It's not used in buildSkeleton(), which gets the symbol based + // on the resolved hour cycle. + u'j'}, + {constants::opt_name::minute, + constants::opt_value::style::validNumericOnlyStyles, + std::nullopt, + u'm'}, + {constants::opt_name::second, + constants::opt_value::style::validNumericOnlyStyles, + std::nullopt, + u's'}, + {constants::opt_name::fractionalSecondDigits, + // fractionalSecondDigits values isn't populated here. When calling + // getNumberOption() for it, 1 as the minimum valid value and 3 as + // the maximum valid value are used. + {}, + std::nullopt, + u'S'}, + {constants::opt_name::timeZoneName, + constants::opt_value::style::validTimeZoneNameStyles, + std::nullopt, + // time zone name's icu skeleton symbol differs if the style is + // offset or generic type. That will be handled in buildSkeleton(). + u'z'}}} {} + +DateTimeFormat::~DateTimeFormat() = default; + +vm::ExecutionStatus DateTimeFormat::initialize( + vm::Runtime &runtime, + const std::vector &locales, + const Options &options) noexcept { + return createDateTimeFormat( + runtime, + locales, + options, + DateTimeComponent::ANY, + DateTimeComponent::DATE); +} + +vm::ExecutionStatus DateTimeFormat::createDateTimeFormat( + vm::Runtime &runtime, + const std::vector &locales, + const Options &options, + const DateTimeComponent requiredComponent, + const DateTimeComponent defaultComponent) noexcept { + // 1. Let dateTimeFormat be ? OrdinaryCreateFromConstructor(newTarget, + // "%DateTimeFormat.prototype%", « [[InitializedDateTimeFormat]], + // [[Locale]], [[Calendar]], [[NumberingSystem]], [[TimeZone]], + // [[Weekday]], [[Era]], [[Year]], [[Month]], [[Day]], [[DayPeriod]], + // [[Hour]], [[Minute]], [[Second]], [[FractionalSecondDigits]], + // [[TimeZoneName]], [[HourCycle]], [[DateStyle]], [[TimeStyle]], + // [[Pattern]], [[RangePatterns]], [[BoundFormat]] »). + // 2. Let requestedLocales be ? CanonicalizeLocaleList(locales). + auto requestedLocalesRes = + LocaleBCP47Object::canonicalizeLocaleList(runtime, locales); + if (LLVM_UNLIKELY(requestedLocalesRes == vm::ExecutionStatus::EXCEPTION)) { + return vm::ExecutionStatus::EXCEPTION; + } + + // 3. Set options to ? CoerceOptionsToObject(options). + // 4. Let opt be a new Record. + Options opt; + + // 5. Let matcher be ? GetOption(options, "localeMatcher", string, « "lookup", + // "best fit" », "best fit"). + // 6. Set opt.[[localeMatcher]] to matcher. + auto matcherRes = getStringOption( + runtime, + options, + constants::opt_name::localeMatcher, + constants::opt_value::locale_matcher::validLocaleMatchers, + constants::opt_value::locale_matcher::bestFit); + if (LLVM_UNLIKELY(matcherRes == vm::ExecutionStatus::EXCEPTION)) { + return vm::ExecutionStatus::EXCEPTION; + } + opt.emplace(constants::opt_name::localeMatcher, **matcherRes); + + // 7. Let calendar be ? GetOption(options, "calendar", string, empty, + // undefined). + // 8. If calendar is not undefined, then + // a. If calendar cannot be matched by the type Unicode locale nonterminal, + // throw a RangeError exception. + // 9. Set opt.[[ca]] to calendar. + auto calendarRes = getStringOption( + runtime, options, constants::opt_name::calendar, {}, std::nullopt); + if (LLVM_UNLIKELY(calendarRes == vm::ExecutionStatus::EXCEPTION)) { + return vm::ExecutionStatus::EXCEPTION; + } + if (calendarRes->has_value()) { + if (!isUnicodeExtensionType(**calendarRes)) { + return runtime.raiseRangeError( + vm::TwineChar16("Invalid calendar: ") + + vm::TwineChar16((*calendarRes)->c_str())); + } + opt.emplace(constants::extension_key::ca, **calendarRes); + } + + // 10. Let numberingSystem be ? GetOption(options, "numberingSystem", string, + // empty, undefined). + // 11. If numberingSystem is not undefined, then + // a. If numberingSystem cannot be matched by the type Unicode locale + // nonterminal, throw a RangeError exception. + // 12. Set opt.[[nu]] to numberingSystem. + auto numberingSystemRes = getStringOption( + runtime, options, constants::opt_name::numberingSystem, {}, std::nullopt); + if (LLVM_UNLIKELY(numberingSystemRes == vm::ExecutionStatus::EXCEPTION)) { + return vm::ExecutionStatus::EXCEPTION; + } + if (numberingSystemRes->has_value()) { + if (!isUnicodeExtensionType(**numberingSystemRes)) { + return runtime.raiseRangeError( + vm::TwineChar16("Invalid numbering system: ") + + vm::TwineChar16((*numberingSystemRes)->c_str())); + } + opt.emplace(constants::extension_key::nu, **numberingSystemRes); + } + + // 13. Let hour12 be ? GetOption(options, "hour12", boolean, empty, + // undefined). + auto hour12 = + getBoolOption(options, constants::opt_name::hour12, std::nullopt); + + // 14. Let hourCycle be ? GetOption(options, "hourCycle", string, « "h11", + // "h12", "h23", "h24" », undefined). + auto hourCycleRes = getStringOption( + runtime, + options, + constants::opt_name::hourCycle, + constants::opt_value::hour_cycle::validHourCycles, + std::nullopt); + if (LLVM_UNLIKELY(hourCycleRes == vm::ExecutionStatus::EXCEPTION)) { + return vm::ExecutionStatus::EXCEPTION; + } + + // 15. If hour12 is not undefined, then + // a. Set hourCycle to null. + // 16. Set opt.[[hc]] to hourCycle. + // Implementation Note: + // The spec is setting hourCycle to null here in order to get the effect of + // overriding any -u-hc extension included in given locales in ResolveLocale() + // We will implement that logic instead by checking for hour12 after + // ResolveLocale() and delete the resolved hourCycle. Here we are only going + // to pass valid hourCycle option to ResolveLocale if no hour12 option is set. + if (!hour12.has_value() && hourCycleRes->has_value()) { + opt.emplace(constants::extension_key::hc, **hourCycleRes); + } + + // 17. Let localeData be %DateTimeFormat%.[[LocaleData]]. + // 18. Let r be ResolveLocale(%DateTimeFormat%.[[AvailableLocales]], + // requestedLocales, opt, %DateTimeFormat%.[[RelevantExtensionKeys]], + // localeData). + static constexpr std::u16string_view relevantExtensionKeys[] = { + constants::extension_key::ca, + constants::extension_key::hc, + constants::extension_key::nu}; + + ResolvedResult result = resolveLocale( + *requestedLocalesRes, + opt, + relevantExtensionKeys, + isExtensionTypeSupported); + + LocaleBCP47Object resolvedBCP47Locale = result.localeBcp47Object; + Options resolvedOpt = result.resolvedOpts; + auto extensionMap = resolvedBCP47Locale.getExtensionMap(); + auto localeNoExtICU = + convertBCP47toICULocale(resolvedBCP47Locale.getLocaleNoExt()); + + // Implementation Note: + // If hour12 option is set, hour12 overrides any resolved hc unicode + // extension, so remove hc unicode extension from resolvedBCP47Locale and + // resolvedOpt. + if (hour12.has_value()) { + extensionMap.erase(constants::extension_key::hc); + resolvedBCP47Locale.updateExtensionMap(extensionMap); + resolvedOpt.erase(constants::extension_key::hc); + } + + // 19. Set dateTimeFormat.[[Locale]] to r.[[locale]]. + resolvedLocale_ = resolvedBCP47Locale.getCanonicalizedLocaleId(); + std::u16string internalLocale = resolvedLocale_; + + // 20. Let resolvedCalendar be r.[[ca]]. + // 21. Set dateTimeFormat.[[Calendar]] to resolvedCalendar. + auto calendarEntry = resolvedOpt.find(constants::extension_key::ca); + if (calendarEntry != resolvedOpt.end()) { + resolvedCalendar_ = calendarEntry->second.getString(); + // The way to pass calendar option to ICU4C DateTimeFormat + // is through setting 'ca' unicode extension in the locale used + // when creating the instance. + // resolvedLocale_ may not yet include calendar extension + // if calendar is specified through options parameter. + // Add the extension to internalLocale if that's the case. + if (extensionMap + .try_emplace(constants::extension_key::ca, resolvedCalendar_) + .second) { + resolvedBCP47Locale.updateExtensionMap(extensionMap); + internalLocale = resolvedBCP47Locale.getCanonicalizedLocaleId(); + } + } else { + resolvedCalendar_ = getDefaultCalendar(localeNoExtICU); + } + + // 22. Set dateTimeFormat.[[NumberingSystem]] to r.[[nu]]. + auto numberingSystemEntry = resolvedOpt.find(constants::extension_key::nu); + if (numberingSystemEntry != resolvedOpt.end()) { + resolvedNumberingSystem_ = numberingSystemEntry->second.getString(); + // The way to pass numbering system option to ICU4C DateTimeFormat + // is through setting 'nu' unicode extension in the locale used + // when creating the instance. + // resolvedLocale_ may not yet include numbering system extension + // if numbering system is specified through options parameter. + // Add the extension to internalLocale if that's the case. + if (extensionMap + .try_emplace(constants::extension_key::nu, resolvedNumberingSystem_) + .second) { + resolvedBCP47Locale.updateExtensionMap(extensionMap); + internalLocale = resolvedBCP47Locale.getCanonicalizedLocaleId(); + } + } else { + resolvedNumberingSystem_ = getDefaultNumberingSystem(localeNoExtICU); + } + + // 23. Let dataLocale be r.[[localeData]]. + // 24. Let dataLocaleData be localeData.[[]]. + // 25. If hour12 is true, then + // a. Let hc be dataLocaleData.[[hourCycle12]]. + // 26. Else if hour12 is false, then + // a. Let hc be dataLocaleData.[[hourCycle24]]. + // 27. Else, + // a. Assert: hour12 is undefined. + // b. Let hc be r.[[hc]]. + // c. If hc is null, set hc to dataLocaleData.[[hourCycle]]. + std::u16string hc; + if (hour12.has_value()) { + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr + generator(udatpg_open(localeNoExtICU.c_str(), &status), &udatpg_close); + std::u16string pattern(8, char16_t()); + if (*hour12) { + // Default for 12-hour clock is h12. + hc = constants::opt_value::hour_cycle::h12; + if (U_SUCCESS(status) && generator) { + if (getBestPattern( + generator.get(), u"h", UDATPG_MATCH_NO_OPTIONS, pattern) && + findDateFieldSymbol(pattern, u'K').has_value()) { + hc = constants::opt_value::hour_cycle::h11; + } + } + } else { + // Default for 24-hour clock is h23. + hc = constants::opt_value::hour_cycle::h23; + if (U_SUCCESS(status) && generator) { + if (getBestPattern( + generator.get(), u"H", UDATPG_MATCH_NO_OPTIONS, pattern) && + findDateFieldSymbol(pattern, u'k').has_value()) { + hc = constants::opt_value::hour_cycle::h24; + } + } + } + } else { + auto ptr = resolvedOpt.find(constants::extension_key::hc); + if (ptr != resolvedOpt.end()) { + hc = ptr->second.getString(); + } else { + hc = getDefaultHourCycle(localeNoExtICU); + } + } + + // 28. Set dateTimeFormat.[[HourCycle]] to hc. + resolvedHourCycle_ = hc; + + // For UDateIntervalFormat, there isn't a way to specify a date-time pattern + // when constructing, rather only with date-time skeleton. The problem is + // then the resolved hour cycle cannot be enforced through the pattern, + // unlike with UDateFormat. + // + // ICU 67.1 has added support for the 'hc' unicode extension in locale, so + // the hour cycle of UDateIntervalFormat can be controlled via the 'hc' + // extension. See https://unicode-org.atlassian.net/browse/ICU-20887. + // + // Therefore, add 'hc' extension to internalLocale. + if (resolvedHourCycle_.has_value() && + extensionMap + .try_emplace(constants::extension_key::hc, *resolvedHourCycle_) + .second) { + resolvedBCP47Locale.updateExtensionMap(extensionMap); + internalLocale = resolvedBCP47Locale.getCanonicalizedLocaleId(); + } + + // 29. Let timeZone be ? Get(options, "timeZone"). + // 30. If timeZone is undefined, then + // a. Set timeZone to systemTimeZoneIdentifier(). + // 31. Else, + // a. Set timeZone to ? ToString(timeZone). + auto timeZoneRes = getStringOption( + runtime, options, constants::opt_name::timeZone, {}, std::nullopt); + if (LLVM_UNLIKELY(timeZoneRes == vm::ExecutionStatus::EXCEPTION)) { + return vm::ExecutionStatus::EXCEPTION; + } + std::u16string internalTimeZone; + if (!timeZoneRes->has_value()) { + internalTimeZone = systemTimeZoneIdentifier(); + if (startsWith(internalTimeZone, u"GMT+") || + startsWith(internalTimeZone, u"GMT-")) { + vm::CallResult offsetRes = + formatOffsetTimeZoneIdentifier(runtime, internalTimeZone); + if (LLVM_UNLIKELY(offsetRes == vm::ExecutionStatus::EXCEPTION)) { + // Set time zone to UTC in case system time zone is an offset time zone + // id that is not allowed by the spec. + resolvedTimeZone_ = u"UTC"; + internalTimeZone = u"UTC"; + } else { + resolvedTimeZone_ = *offsetRes; + } + } else { + resolvedTimeZone_ = internalTimeZone; + } + } else { + // Implementation Note : We are keeping implemenation of steps 32-35 inside + // else block as the check done in isTimeZoneOffsetString() and + // isValidTimeZoneName() does not need to apply to system time zone. + + // 32. If IsTimeZoneOffsetString(timeZone) is true, then + // a. Let parseResult be ParseText(StringToCodePoints(timeZone), UTCOffset). + // b. Assert: parseResult is a Parse Node. + // c. If parseResult contains more than one MinuteSecond Parse Node, throw a + // RangeError exception. + // d. Let offsetNanoseconds be ParseTimeZoneOffsetString(timeZone). + // e. Let offsetMinutes be offsetNanoseconds / (6 × 10**10). + // f. Assert: offsetMinutes is an integer. + // g. Set timeZone to FormatOffsetTimeZoneIdentifier(offsetMinutes). + // 33. Else if IsValidTimeZoneName(timeZone) is true, then + // a. Set timeZone to CanonicalizeTimeZoneName(timeZone). + // 34. Else, + // a. Throw a RangeError exception. + // 35. Set dateTimeFormat.[[TimeZone]] to timeZone. + if (isTimeZoneOffsetString(**timeZoneRes, internalTimeZone) && + startsWith(internalTimeZone, u"GMT")) { + vm::CallResult offsetRes = + formatOffsetTimeZoneIdentifier(runtime, internalTimeZone); + if (LLVM_UNLIKELY(offsetRes == vm::ExecutionStatus::EXCEPTION)) { + return vm::ExecutionStatus::EXCEPTION; + } + resolvedTimeZone_ = *offsetRes; + } else if (isValidTimeZoneName(**timeZoneRes, internalTimeZone)) { + resolvedTimeZone_ = internalTimeZone; + } else { + return runtime.raiseRangeError( + vm::TwineChar16("Invalid time zone: ") + + vm::TwineChar16((*timeZoneRes)->c_str())); + } + } + + // 36. Let formatOptions be a new Record. + // 37. Set formatOptions.[[hourCycle]] to hc. + // 38. Let hasExplicitFormatComponents be false. + bool hasExplicitFormatComponents = false; + // 39. For each row of Table 7, except the header row, in table order, do + // a. Let prop be the name given in the Property column of the row. + // b. If prop is "fractionalSecondDigits", then + // i. Let value be ? GetNumberOption(options, "fractionalSecondDigits", 1, + // 3, undefined). + // c. Else, + // i. Let values be a List whose elements are the strings given in the + // Values column of the row. + // ii. Let value be ? GetOption(options, prop, STRING, values, undefined). + // d. Set formatOptions.[[]] to value. + // e. If value is not undefined, then + // i. Set hasExplicitFormatComponents to true. + for (auto &formatComponent : dateTimeFormatComponents_) { + std::u16string prop(formatComponent.property_); + if (prop == constants::opt_name::fractionalSecondDigits) { + auto fractionalSecondDigitsRes = + getNumberOption(runtime, options, prop, 1, 3, std::nullopt); + if (LLVM_UNLIKELY( + fractionalSecondDigitsRes == vm::ExecutionStatus::EXCEPTION)) { + return vm::ExecutionStatus::EXCEPTION; + } + if (fractionalSecondDigitsRes->has_value()) { + formatComponent.resolvedValue_ = **fractionalSecondDigitsRes; + hasExplicitFormatComponents = true; + } + } else { + auto valueRes = getStringOption( + runtime, options, prop, formatComponent.values_, std::nullopt); + if (LLVM_UNLIKELY(valueRes == vm::ExecutionStatus::EXCEPTION)) { + return vm::ExecutionStatus::EXCEPTION; + } + if (valueRes->has_value()) { + formatComponent.resolvedValue_ = **valueRes; + hasExplicitFormatComponents = true; + } + } + } + + // 40. Let formatMatcher be ? GetOption(options, "formatMatcher", STRING, « + // "basic", "best fit" », "best fit"). + auto formatMatcherRes = getStringOption( + runtime, + options, + constants::opt_name::formatMatcher, + constants::opt_value::format_matcher::validFormatMatchers, + constants::opt_value::format_matcher::bestFit); + if (LLVM_UNLIKELY(formatMatcherRes == vm::ExecutionStatus::EXCEPTION)) { + return vm::ExecutionStatus::EXCEPTION; + } + + // 41. Let dateStyle be ? GetOption(options, "dateStyle", STRING, « "full", + // "long", "medium", "short" », undefined). + // 42. Set dateTimeFormat.[[DateStyle]] to dateStyle. + auto dateStyleRes = getStringOption( + runtime, + options, + constants::opt_name::dateStyle, + constants::opt_value::style::validDateTimeStyles, + std::nullopt); + if (LLVM_UNLIKELY(dateStyleRes == vm::ExecutionStatus::EXCEPTION)) { + return vm::ExecutionStatus::EXCEPTION; + } + resolvedDateStyle_ = *dateStyleRes; + + // 43. Let timeStyle be ? GetOption(options, "timeStyle", STRING, « "full", + // "long", "medium", "short" », undefined). + // 44. Set dateTimeFormat.[[TimeStyle]] to timeStyle. + auto timeStyleRes = getStringOption( + runtime, + options, + constants::opt_name::timeStyle, + constants::opt_value::style::validDateTimeStyles, + std::nullopt); + if (LLVM_UNLIKELY(timeStyleRes == vm::ExecutionStatus::EXCEPTION)) { + return vm::ExecutionStatus::EXCEPTION; + } + resolvedTimeStyle_ = *timeStyleRes; + + std::string localeICU = convertBCP47toICULocale(internalLocale); + std::u16string bestFormatPattern; + std::u16string bestFormatSkeleton; + // 45. If dateStyle is not undefined or timeStyle is not undefined, then + if (resolvedDateStyle_.has_value() || resolvedTimeStyle_.has_value()) { + // a. If hasExplicitFormatComponents is true, then + // i. Throw a TypeError exception. + if (hasExplicitFormatComponents == true) { + return runtime.raiseTypeError( + "dateStyle and timeStyle cannot be specified with other date-time component options"); + } + // b. If required is DATE and timeStyle is not undefined, then + // i. Throw a TypeError exception. + if (requiredComponent == DateTimeComponent::DATE && + resolvedTimeStyle_.has_value()) { + return runtime.raiseTypeError( + "timeStyle cannot be specified for date only format"); + } + // c. If required is TIME and dateStyle is not undefined, then + // i. Throw a TypeError exception. + if (requiredComponent == DateTimeComponent::TIME && + resolvedDateStyle_.has_value()) { + return runtime.raiseTypeError( + "dateStyle cannot be specified for time only format"); + } + // d. Let styles be dataLocaleData.[[styles]].[[]]. + // e. Let bestFormat be DateTimeStyleFormat(dateStyle, timeStyle, styles). + // + // Implementation Note: implementation relies on ICU DateFormat instance + // constructed according to specified dateStyle and timeStyle in place of + // DateTimeStyleFormat(). + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr dateFormat( + udat_open( + dateTimeStyleToICUDateFormatStyle(resolvedTimeStyle_), + dateTimeStyleToICUDateFormatStyle(resolvedDateStyle_), + localeICU.c_str(), + nullptr, + 0, + nullptr, + 0, + &status), + &udat_close); + if (U_SUCCESS(status) && dateFormat) { + bestFormatPattern.resize(64); + int32_t bufferSize = bestFormatPattern.size(); + int32_t resultLength = 0; + while ((resultLength = udat_toPattern( + dateFormat.get(), + false, + bestFormatPattern.data(), + bufferSize, + &status)) > bufferSize) { + status = U_ZERO_ERROR; + bestFormatPattern.resize(resultLength); + bufferSize = bestFormatPattern.size(); + } + bestFormatPattern.resize(resultLength); + } + + bestFormatSkeleton.resize(bestFormatPattern.size()); + int32_t bufferSize = bestFormatSkeleton.size(); + int32_t resultLength = 0; + while ((resultLength = udatpg_getBaseSkeleton( + nullptr, + bestFormatPattern.data(), + bestFormatPattern.size(), + bestFormatSkeleton.data(), + bufferSize, + &status)) > bufferSize) { + status = U_ZERO_ERROR; + bestFormatSkeleton.resize(resultLength); + bufferSize = bestFormatSkeleton.size(); + } + bestFormatSkeleton.resize(resultLength); + // remove symbols 'a','b'and 'B' + cleanUpBaseSkeleton(bestFormatSkeleton); + + // If the generated pattern's hour cycle does not match against + // resolvedHourCycle_, replace hour cycle symbol in skeleton with + // resolvedHourCycle_'s symbol, and re-generate the best pattern + // from the modified skeleton. + if ((resolvedTimeStyle_.has_value()) && + ((resolvedHourCycle_.has_value()))) { + auto resultOpt = findHourSymbol(bestFormatPattern); + char16_t desiredHourSymbol = getHourSymbol(*resolvedHourCycle_); + if (resultOpt.has_value() && resultOpt->symbol != desiredHourSymbol) { + auto findResultOpt = findHourSymbol(bestFormatSkeleton); + if (findResultOpt.has_value()) { + bestFormatSkeleton.replace( + findResultOpt->startPos, + findResultOpt->count, + findResultOpt->count, + desiredHourSymbol); + std::unique_ptr + generator(udatpg_open(localeICU.c_str(), &status), &udatpg_close); + std::u16string pattern(bestFormatPattern.size() + 8, char16_t()); + if (U_SUCCESS(status) && generator && + getBestPattern( + generator.get(), + bestFormatSkeleton, + UDATPG_MATCH_NO_OPTIONS, + pattern)) { + bestFormatPattern = pattern; + } + } + } + } + } else { + // 46. Else, + // a. Let needDefaults be true. + bool needDefaults = true; + // b. If required is DATE or ANY, then + // i. For each property name prop of « "weekday", "year", "month", "day" », + // do + // 1. Let value be formatOptions.[[]]. + // 2. If value is not undefined, set needDefaults to false. + if (requiredComponent == DateTimeComponent::DATE || + requiredComponent == DateTimeComponent::ANY) { + for (const auto &formatComponent : dateTimeFormatComponents_) { + if ((formatComponent.property_ == constants::opt_name::weekday || + formatComponent.property_ == constants::opt_name::year || + formatComponent.property_ == constants::opt_name::month || + formatComponent.property_ == constants::opt_name::day) && + formatComponent.resolvedValue_.has_value()) { + needDefaults = false; + break; + } + } + } + + // c. If required is TIME or ANY, then + // i. For each property name prop of « "dayPeriod", "hour", "minute", + // "second", "fractionalSecondDigits" », do + // 1. Let value be formatOptions.[[]]. + // 2. If value is not undefined, set needDefaults to false. + if (requiredComponent == DateTimeComponent::TIME || + requiredComponent == DateTimeComponent::ANY) { + for (const auto &formatComponent : dateTimeFormatComponents_) { + if ((formatComponent.property_ == constants::opt_name::dayPeriod || + formatComponent.property_ == constants::opt_name::hour || + formatComponent.property_ == constants::opt_name::minute || + formatComponent.property_ == constants::opt_name::second || + formatComponent.property_ == + constants::opt_name::fractionalSecondDigits) && + formatComponent.resolvedValue_.has_value()) { + needDefaults = false; + break; + } + } + } + + // d. If needDefaults is true and defaults is either DATE or ALL, then + // i. For each property name prop of « "year", "month", "day" », do + // 1. Set formatOptions.[[]] to "numeric". + if (needDefaults && + (defaultComponent == DateTimeComponent::DATE || + defaultComponent == DateTimeComponent::ALL)) { + for (auto &formatComponent : dateTimeFormatComponents_) { + if (formatComponent.property_ == constants::opt_name::year || + formatComponent.property_ == constants::opt_name::month || + formatComponent.property_ == constants::opt_name::day) { + formatComponent.resolvedValue_ = + std::u16string(constants::opt_value::style::numeric); + } + } + } + + // e. If needDefaults is true and defaults is either TIME or ALL, then + // i. For each property name prop of « "hour", "minute", "second" », do + // 1. Set formatOptions.[[]] to "numeric". + if (needDefaults && + (defaultComponent == DateTimeComponent::TIME || + defaultComponent == DateTimeComponent::ALL)) { + for (auto &formatComponent : dateTimeFormatComponents_) { + if (formatComponent.property_ == constants::opt_name::hour || + formatComponent.property_ == constants::opt_name::minute || + formatComponent.property_ == constants::opt_name::second) { + formatComponent.resolvedValue_ = + std::u16string(constants::opt_value::style::numeric); + } + } + } + + // f. Let formats be dataLocaleData.[[formats]].[[]]. + // g. If formatMatcher is "basic", then + // i. Let bestFormat be BasicFormatMatcher(formatOptions, formats). + // h. Else, + // i. Let bestFormat be BestFitFormatMatcher(formatOptions, formats). + // + // Implementation Note: Only best fit format matcher is implemented and it + // relies on creating a skeleton based on the specified date-time components + // and then getting the best pattern from the skeleton using ICU. + bestFormatSkeleton = buildSkeleton(); + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr + generator(udatpg_open(localeICU.c_str(), &status), &udatpg_close); + std::u16string pattern(bestFormatSkeleton.size() + 32, char16_t()); + if (U_SUCCESS(status) && generator && + getBestPattern( + generator.get(), + bestFormatSkeleton, + UDATPG_MATCH_ALL_FIELDS_LENGTH, + pattern)) { + bestFormatPattern = pattern; + } + } + + // 47. For each row of Table 7, except the header row, in table order, do + // a. Let prop be the name given in the Property column of the current row. + // b. If bestFormat has a field [[]], then + // i. Let p be bestFormat.[[]]. + // ii. Set dateTimeFormat's internal slot whose name is the Internal Slot + // column of the current row to p. + // + // Implementation Note : Step 47 can be skipped because the best pattern + // generated by ICU respects all the styles from the skeleton as specified + // from date-time component input options. + + // Replace hour cycle symbol in the resolvedPattern if it doesn't match + // resolvedHourCycle_'s symbol. + if (resolvedHourCycle_.has_value()) { + char16_t desiredHourSymbol = getHourSymbol(*resolvedHourCycle_); + auto resultOpt = findHourSymbol(bestFormatPattern); + + // 48. If dateTimeFormat.[[Hour]] is undefined, then + // a. Set dateTimeFormat.[[HourCycle]] to undefined. + if (!resultOpt.has_value()) { + resolvedHourCycle_.reset(); + } else if (resultOpt->symbol != desiredHourSymbol) { + bestFormatPattern.replace( + resultOpt->startPos, + resultOpt->count, + resultOpt->count, + desiredHourSymbol); + } + } + + // 49. If dateTimeFormat.[[HourCycle]] is "h11" or "h12", then + // a. Let pattern be bestFormat.[[pattern12]]. + // b. Let rangePatterns be bestFormat.[[rangePatterns12]]. + // 50. Else, + // a. Let pattern be bestFormat.[[pattern]]. + // b. Let rangePatterns be bestFormat.[[rangePatterns]]. + // 51. Set dateTimeFormat.[[Pattern]] to pattern. + // 52. Set dateTimeFormat.[[RangePatterns]] to rangePatterns. + // 53. Return dateTimeFormat. + UErrorCode status = U_ZERO_ERROR; + icuDateFormat_.reset(udat_open( + UDAT_PATTERN, + UDAT_PATTERN, + localeICU.c_str(), + internalTimeZone.data(), + internalTimeZone.size(), + bestFormatPattern.data(), + bestFormatPattern.size(), + &status)); + if (U_FAILURE(status) || !icuDateFormat_) { + return runtime.raiseError( + "Internal error: unable to create DateTimeFormat instance"); + } + + icuDateIntervalFormat_.reset(udtitvfmt_open( + localeICU.c_str(), + bestFormatSkeleton.data(), + bestFormatSkeleton.size(), + internalTimeZone.data(), + internalTimeZone.size(), + &status)); + if (U_FAILURE(status) || !icuDateIntervalFormat_) { + return runtime.raiseError( + "Internal error: unable to create DateTimeFormat instance for range formatting"); + } + + return vm::ExecutionStatus::RETURNED; +} + +std::u16string DateTimeFormat::systemTimeZoneIdentifier() { + UErrorCode status = U_ZERO_ERROR; + std::u16string defaultTimeZone(32, char16_t()); + int32_t bufferSize = defaultTimeZone.size(); + int32_t resultLength = 0; + while ((resultLength = ucal_getDefaultTimeZone( + defaultTimeZone.data(), bufferSize, &status)) > bufferSize) { + status = U_ZERO_ERROR; + defaultTimeZone.resize(resultLength); + bufferSize = defaultTimeZone.size(); + } + defaultTimeZone.resize(resultLength); + if (U_FAILURE(status) || defaultTimeZone == u"Etc/Unknown") { + return u"UTC"; + } + return canonicalizeTimeZoneName(defaultTimeZone); +} + +const std::unordered_map & +DateTimeFormat::getValidTimeZones() { + // Intentionally leaked to avoid destruction order problems. + static const auto *validTimeZones = [] { + auto *validZones = new std::unordered_map(); + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr timeZones( + ucal_openTimeZones(&status), &uenum_close); + if (U_FAILURE(status) || !timeZones) { + return validZones; + } + // Only IANA time zone identifiers are allowed by the spec. + // See test262/test/intl402/DateTimeFormat/timezone-legacy-non-iana.js + // Below is the list of non-IANA link names that ICU accepts. + // Exclude adding the time zones in this list to validZones. + constexpr std::u16string_view namesToExclude[] = { + u"ACT", u"AET", u"AGT", u"ART", u"AST", u"BET", u"BST", u"CAT", u"CNT", + u"CST", u"CTT", u"EAT", u"ECT", u"IET", u"IST", u"JST", u"MIT", u"NET", + u"NST", u"PLT", u"PNT", u"PRT", u"PST", u"SST", u"VST"}; + const auto *namesToExcludeBegin = std::begin(namesToExclude); + const auto *namesToExcludeEnd = std::end(namesToExclude); + int32_t length; + const UChar *timeZoneId; + while ((timeZoneId = uenum_unext(timeZones.get(), &length, &status)) != + nullptr && + U_SUCCESS(status)) { + std::u16string timeZoneIdStr(timeZoneId, length); + if (std::find(namesToExcludeBegin, namesToExcludeEnd, timeZoneIdStr) == + namesToExcludeEnd) { + validZones->emplace( + toUpperASCII(timeZoneIdStr), + canonicalizeTimeZoneName(timeZoneIdStr)); + } + } + validZones->emplace(u"UTC", u"UTC"); + return validZones; + }(); + return *validTimeZones; +} + +bool DateTimeFormat::isValidTimeZoneName( + const std::u16string &timeZone, + std::u16string &canonicalizedTimeZoneResult) { + const std::unordered_map &validTimeZones = + getValidTimeZones(); + auto it = validTimeZones.find(toUpperASCII(timeZone)); + if (it != validTimeZones.end()) { + canonicalizedTimeZoneResult = it->second; + return true; + } + return false; +} + +bool DateTimeFormat::isTimeZoneOffsetString( + const std::u16string &timeZone, + std::u16string &canonicalizedTimeZoneResult) { + // Check if timeZone starts with "+", "-", or '\u2212' + if (timeZone.empty() || + (timeZone[0] != u'+' && timeZone[0] != u'-' && + timeZone[0] != u'\u2212')) { + return false; + } + if (timeZone.find(u':') == std::u16string::npos) { + if (timeZone.size() != 3 && timeZone.size() != 5) { + return false; + } + } else if (timeZone.size() != 6) { + return false; + } + std::u16string offsetTimeZoneId = timeZone; + // If offsetTimeZoneId starts with "\u2212", replace it with "-" + if (offsetTimeZoneId[0] == u'\u2212') { + offsetTimeZoneId[0] = u'-'; + } + // Prepend "GMT" to offsetTimeZoneId + offsetTimeZoneId.insert(0, u"GMT"); + auto resultOpt = getCanonicalTimeZoneId(offsetTimeZoneId); + if (!resultOpt.has_value()) { + return false; + } + canonicalizedTimeZoneResult = *resultOpt; + if (canonicalizedTimeZoneResult == u"GMT") { + canonicalizedTimeZoneResult.append(u"+00:00"); + } + return true; +} + +std::u16string DateTimeFormat::canonicalizeTimeZoneName( + const std::u16string &timeZoneName) { + std::u16string result; + // ECMA-402 requires the canonical time zone name to be the ones defined by + // IANA. Call ucal_getIanaTimeZoneID(), but it's only available since + // ICU 74. +#if U_ICU_VERSION_MAJOR_NUM >= 74 + UErrorCode status = U_ZERO_ERROR; + std::u16string ianaId(32, char16_t()); + int32_t bufferSize = ianaId.size(); + int32_t resultLength = 0; + while ((resultLength = ucal_getIanaTimeZoneID( + timeZoneName.data(), + timeZoneName.size(), + ianaId.data(), + bufferSize, + &status)) > bufferSize) { + status = U_ZERO_ERROR; + ianaId.resize(resultLength); + bufferSize = ianaId.size(); + } + ianaId.resize(resultLength); + if (U_FAILURE(status)) { + return u"UTC"; + } + result = ianaId; +#else + auto resultOpt = getCanonicalTimeZoneId(timeZoneName); + if (!resultOpt.has_value()) { + return u"UTC"; + } + result = *resultOpt; +#endif + // If the result is "Etc/UTC", "Etc/GMT", or "GMT", return "UTC" + if (result == u"Etc/UTC" || result == u"Etc/GMT" || result == u"GMT") { + return u"UTC"; + } + return result; +} + +std::optional DateTimeFormat::getCanonicalTimeZoneId( + const std::u16string &timeZoneId) { + UErrorCode status = U_ZERO_ERROR; + std::u16string canonicalId(32, char16_t()); + int32_t bufferSize = canonicalId.size(); + int32_t resultLength = 0; + while ((resultLength = ucal_getCanonicalTimeZoneID( + timeZoneId.data(), + timeZoneId.size(), + canonicalId.data(), + bufferSize, + nullptr, + &status)) > bufferSize) { + status = U_ZERO_ERROR; + canonicalId.resize(resultLength); + bufferSize = canonicalId.size(); + } + canonicalId.resize(resultLength); + if (U_FAILURE(status)) { + return std::nullopt; + } + return canonicalId; +} + +/** + * Converts ICU offset time zone id in the form GMT+/-hh:mm[:ss] to ECMA-402 + * offset time zone id format +/-hh:mm. + */ +vm::CallResult DateTimeFormat::formatOffsetTimeZoneIdentifier( + vm::Runtime &runtime, + const std::u16string &icuOffsetTimeZone) noexcept { + // Seconds field in the offset is not allowed in the spec. + // Check for seconds field in the offset time zone id. + // Throw a RangeError exception if seconds field is present. + if (icuOffsetTimeZone.length() > 9) { + return runtime.raiseRangeError( + "Offset time zone id cannot contain seconds field."); + } + // Remove "GMT" from the beginning + if (icuOffsetTimeZone.size() >= 3) { + return icuOffsetTimeZone.substr(3); + } + return icuOffsetTimeZone; +} + +bool DateTimeFormat::isExtensionTypeSupported( + std::u16string_view extensionKey, + std::u16string_view extensionType, + const LocaleBCP47Object &localeBCP47Object) { + if (extensionKey == constants::extension_key::nu) { + const std::unordered_set &numberingSystems = + getAvailableNumberingSystems(); + return numberingSystems.find(std::u16string(extensionType)) != + numberingSystems.end(); + } + + if (extensionKey == constants::extension_key::hc) { + return std::find( + std::begin(constants::opt_value::hour_cycle::validHourCycles), + std::end(constants::opt_value::hour_cycle::validHourCycles), + extensionType) != + std::end(constants::opt_value::hour_cycle::validHourCycles); + } + + if (extensionKey == constants::extension_key::ca) { + std::unordered_set calendars = getAvailableCalendars( + convertBCP47toICULocale(localeBCP47Object.getLocaleNoExt())); + return calendars.find(std::u16string(extensionType)) != calendars.end(); + } + + return false; +} + +std::u16string DateTimeFormat::getDefaultCalendar( + const std::string &localeICU) { + UErrorCode status = U_ZERO_ERROR; + std::u16string defaultCalendar{u"gregory"}; + std::unique_ptr calendar( + ucal_open(nullptr, -1, localeICU.c_str(), UCAL_DEFAULT, &status), + ucal_close); + if (U_FAILURE(status) || !calendar) { + return defaultCalendar; + } + std::string_view calType(ucal_getType(calendar.get(), &status)); + if (U_FAILURE(status) || calType.empty() || calType == "unknown") { + return defaultCalendar; + } + return convertLegacyCalendar(calType); +} + +std::unordered_set DateTimeFormat::getAvailableCalendars( + const std::string &localeICU) { + std::unordered_set availableCalendars; + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr names( + ucal_getKeywordValuesForLocale( + "calendar", localeICU.c_str(), false, &status), + &uenum_close); + if (U_FAILURE(status) || !names) { + return availableCalendars; + } + int32_t length; + const char *name; + while ((name = uenum_next(names.get(), &length, &status)) != nullptr && + U_SUCCESS(status)) { + std::string_view nameStr(name, length); + availableCalendars.emplace(convertLegacyCalendar(nameStr)); + } + return availableCalendars; +} + +std::u16string DateTimeFormat::convertLegacyCalendar(std::string_view cal) { + if (cal == "gregorian") { + return u"gregory"; + } else if (cal == "ethiopic-amete-alem") { + return u"ethioaa"; + } else { + return toUTF16ASCII(cal); + } +} + +char16_t DateTimeFormat::getHourSymbol(const std::u16string &hourCycle) { + if (hourCycle == constants::opt_value::hour_cycle::h11) { + return u'K'; + } else if (hourCycle == constants::opt_value::hour_cycle::h12) { + return u'h'; + } else if (hourCycle == constants::opt_value::hour_cycle::h23) { + return u'H'; + } else if (hourCycle == constants::opt_value::hour_cycle::h24) { + return u'k'; + } else { + return u'H'; + } +} + +std::u16string DateTimeFormat::getDefaultHourCycle( + const std::string &localeICU) { + // Default to 24-hour cycle in case of an error + auto defaultHourCycle = constants::opt_value::hour_cycle::h23; + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr generator( + udatpg_open(localeICU.c_str(), &status), &udatpg_close); + if (U_FAILURE(status) || !generator) { + return defaultHourCycle; + } + std::u16string pattern(8, char16_t()); + if (!getBestPattern( + generator.get(), u"j", UDATPG_MATCH_NO_OPTIONS, pattern)) { + return defaultHourCycle; + } + auto resultOpt = findHourSymbol(pattern); + if (!resultOpt.has_value()) { + return defaultHourCycle; + } + switch (resultOpt->symbol) { + case u'K': + return constants::opt_value::hour_cycle::h11; + case u'h': + return constants::opt_value::hour_cycle::h12; + case u'H': + return constants::opt_value::hour_cycle::h23; + case u'k': + return constants::opt_value::hour_cycle::h24; + default: + return defaultHourCycle; + } +} + +std::optional DateTimeFormat::findHourSymbol( + const std::u16string &pattern) { + for (char16_t symbol : {u'k', u'h', u'H', u'K', u'j'}) { + auto resultOpt = findDateFieldSymbol(pattern, symbol); + if (resultOpt.has_value()) { + return resultOpt; + } + } + return std::nullopt; +}; + +std::optional +DateTimeFormat::findDateFieldSymbol( + const std::u16string &pattern, + char16_t symbol) { + // Single quote. + const char16_t quote = u'\u0027'; + SymbolFindResult result; + bool found = false; + bool inQuote = false; + int32_t length = pattern.length(); + for (int32_t i = 0; i < length; i++) { + char16_t ch = pattern[i]; + + if (ch != symbol && result.count > 0) { + break; + } + + if (ch == quote) { + // Consecutive single quotes are a single quote literal, + // either outside of quotes or between quotes. + if ((i + 1) < length && pattern[i + 1] == quote) { + i += 1; + } else { + inQuote = !inQuote; + } + } else if (!inQuote && ch == symbol) { + if (result.startPos == -1) { + found = true; + result.symbol = symbol; + result.startPos = i; + } + result.count++; + } + } + return found ? std::make_optional(result) : std::nullopt; +} + +UDateFormatStyle DateTimeFormat::dateTimeStyleToICUDateFormatStyle( + const std::optional &styleOpt) { + if (!styleOpt.has_value()) { + return UDAT_NONE; + } + auto style = *styleOpt; + if (style == constants::opt_value::style::full) { + return UDAT_FULL; + } + if (style == constants::opt_value::style::longStr) { + return UDAT_LONG; + } + if (style == constants::opt_value::style::medium) { + return UDAT_MEDIUM; + } + if (style == constants::opt_value::style::shortStr) { + return UDAT_SHORT; + } + return UDAT_DEFAULT; +} + +// According to https://unicode-org.atlassian.net/browse/ICU-20437, there is a +// ICU bug with getBaseSkeleton that the symbols 'a', 'b', and 'B', are still +// present in the skeleton result. This function is to remove these symbols from +// the base skeleton. +void DateTimeFormat::cleanUpBaseSkeleton(std::u16string &skeleton) { + for (char16_t symbol : {u'a', u'b', u'B'}) { + // Remove 'a', 'b', and 'B' + auto resultOpt = findDateFieldSymbol(skeleton, symbol); + if (resultOpt.has_value()) { + skeleton.erase(resultOpt->startPos, resultOpt->count); + } + } +} + +std::u16string DateTimeFormat::buildSkeleton() { + std::u16string skeleton; + for (const auto &formatComponent : dateTimeFormatComponents_) { + if (!formatComponent.resolvedValue_.has_value()) { + continue; + } + Option value = *(formatComponent.resolvedValue_); + char16_t symbol = formatComponent.icuSkeletonSymbol; + if (formatComponent.property_ == + constants::opt_name::fractionalSecondDigits) { + skeleton.append(value.getNumber(), symbol); + continue; + } + // All other values are strings. + auto valueStr = value.getString(); + if (formatComponent.property_ == constants::opt_name::timeZoneName) { + if (valueStr == constants::opt_value::style::shortStr) { + skeleton.append(u"z"); + } else if (valueStr == constants::opt_value::style::longStr) { + skeleton.append(u"zzzz"); + } else if (valueStr == constants::opt_value::style::shortOffset) { + skeleton.append(u"O"); + } else if (valueStr == constants::opt_value::style::longOffset) { + skeleton.append(u"OOOO"); + } else if (valueStr == constants::opt_value::style::shortGeneric) { + skeleton.append(u"v"); + } else if (valueStr == constants::opt_value::style::longGeneric) { + skeleton.append(u"vvvv"); + } + continue; + } + if (formatComponent.property_ == constants::opt_name::hour) { + symbol = getHourSymbol(resolvedHourCycle_.value_or(u"")); + } + int numSymbols = 0; + if (valueStr == constants::opt_value::style::numeric) { + numSymbols = 1; + } else if (valueStr == constants::opt_value::style::twoDigit) { + numSymbols = 2; + } else if (valueStr == constants::opt_value::style::shortStr) { + numSymbols = 3; + } else if (valueStr == constants::opt_value::style::longStr) { + numSymbols = 4; + } else if (valueStr == constants::opt_value::style::narrow) { + numSymbols = 5; + } + skeleton.append(numSymbols, symbol); + } + return skeleton; +} + +bool DateTimeFormat::getBestPattern( + UDateTimePatternGenerator *generator, + std::u16string_view skeleton, + UDateTimePatternMatchOptions options, + std::u16string &bestPatternResult) { + UErrorCode status = U_ZERO_ERROR; + int32_t bufferSize = bestPatternResult.size(); + int32_t resultLength = 0; + while ((resultLength = udatpg_getBestPatternWithOptions( + generator, + skeleton.data(), + skeleton.size(), + options, + bestPatternResult.data(), + bufferSize, + &status)) > bufferSize) { + status = U_ZERO_ERROR; + bestPatternResult.resize(resultLength); + bufferSize = bestPatternResult.size(); + } + bestPatternResult.resize(resultLength); + return U_SUCCESS(status); +} + +// As mentioned in +// https://tc39.es/ecma402/#table-datetimeformat-resolvedoptions-properties +Options DateTimeFormat::resolvedOptions() noexcept { + Options finalResolvedOptions; + + finalResolvedOptions.emplace( + constants::opt_name::locale, Option(resolvedLocale_)); + finalResolvedOptions.emplace( + constants::opt_name::calendar, Option(resolvedCalendar_)); + finalResolvedOptions.emplace( + constants::opt_name::numberingSystem, Option(resolvedNumberingSystem_)); + finalResolvedOptions.emplace( + constants::opt_name::timeZone, Option(resolvedTimeZone_)); + if (resolvedHourCycle_.has_value()) { + finalResolvedOptions.emplace( + constants::opt_name::hourCycle, Option(*resolvedHourCycle_)); + finalResolvedOptions.emplace( + constants::opt_name::hour12, + Option( + (*resolvedHourCycle_ == constants::opt_value::hour_cycle::h11 || + *resolvedHourCycle_ == constants::opt_value::hour_cycle::h12))); + } + if (!resolvedDateStyle_.has_value() && !resolvedTimeStyle_.has_value()) { + for (const auto &formatComponent : dateTimeFormatComponents_) { + if (formatComponent.resolvedValue_.has_value()) { + finalResolvedOptions.emplace( + formatComponent.property_, + Option(*(formatComponent.resolvedValue_))); + } + } + } else { + if (resolvedDateStyle_.has_value()) { + finalResolvedOptions.emplace( + constants::opt_name::dateStyle, Option(*resolvedDateStyle_)); + } + if (resolvedTimeStyle_.has_value()) { + finalResolvedOptions.emplace( + constants::opt_name::timeStyle, Option(*resolvedTimeStyle_)); + } + } + + return finalResolvedOptions; +} + +vm::CallResult> DateTimeFormat::supportedLocalesOf( + vm::Runtime &runtime, + const std::vector &locales, + const Options &options) noexcept { + return supportedLocales(runtime, locales, options); +} + +std::u16string DateTimeFormat::format(double value) noexcept { + UErrorCode status = U_ZERO_ERROR; + std::u16string output(128, char16_t()); + int32_t bufferSize = output.size(); + int32_t resultLength = 0; + while ((resultLength = udat_format( + icuDateFormat_.get(), + value, + output.data(), + bufferSize, + nullptr, + &status)) > bufferSize) { + status = U_ZERO_ERROR; + output.resize(resultLength); + bufferSize = output.size(); + } + output.resize(resultLength); + if (U_FAILURE(status)) { + return u""; // TODO : Throw error if failure + } + return output; +} + +// Implementer note: This method corresponds roughly to +// https://402.ecma-international.org/8.0/#sec-formatdatetimetoparts +std::vector DateTimeFormat::formatToParts(double value) noexcept { + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr + posIter(ufieldpositer_open(&status), &ufieldpositer_close); + if (U_FAILURE(status) || !posIter) { + return {}; + } + std::u16string formattedStr(128, char16_t()); + int32_t bufferSize = formattedStr.size(); + int32_t resultLength = 0; + while ((resultLength = udat_formatForFields( + icuDateFormat_.get(), + value, + formattedStr.data(), + bufferSize, + posIter.get(), + &status)) > bufferSize) { + status = U_ZERO_ERROR; + formattedStr.resize(resultLength); + bufferSize = formattedStr.size(); + } + formattedStr.resize(resultLength); + if (U_FAILURE(status)) { + return {}; + } + return partitionPattern(posIter.get(), formattedStr); +} + +std::vector DateTimeFormat::partitionPattern( + UFieldPositionIterator *posIter, + const std::u16string &formattedStr) { + int32_t begin = 0; + int32_t end = 0; + int32_t previousEnd = 0; + int32_t fieldType = -1; + std::vector result{}; + + while ((fieldType = ufieldpositer_next(posIter, &begin, &end)) >= 0) { + if (previousEnd < begin) { + result.push_back( + {{constants::part_key::type, constants::part_type::literal}, + {constants::part_key::value, + formattedStr.substr(previousEnd, begin - previousEnd)}}); + } + result.push_back( + {{constants::part_key::type, icuDateFieldTypeToPartType(fieldType)}, + {constants::part_key::value, + formattedStr.substr(begin, end - begin)}}); + previousEnd = end; + } + int32_t formattedStrLength = formattedStr.length(); + if (previousEnd < formattedStrLength) { + result.push_back( + {{constants::part_key::type, constants::part_type::literal}, + {constants::part_key::value, formattedStr.substr(previousEnd)}}); + } + return result; +} + +std::u16string DateTimeFormat::icuDateFieldTypeToPartType(int32_t fieldType) { + // Mapping ICU DateFormat field type to DateTimeFormat part types. + switch (fieldType) { + case UDAT_DAY_OF_WEEK_FIELD: + case UDAT_DOW_LOCAL_FIELD: + case UDAT_STANDALONE_DAY_FIELD: + return u"weekday"; + case UDAT_ERA_FIELD: + return u"era"; + case UDAT_YEAR_FIELD: + case UDAT_EXTENDED_YEAR_FIELD: + return u"year"; + case UDAT_YEAR_NAME_FIELD: + return u"yearName"; + case UDAT_MONTH_FIELD: + case UDAT_STANDALONE_MONTH_FIELD: + return u"month"; + case UDAT_DATE_FIELD: + return u"day"; + case UDAT_AM_PM_FIELD: + case UDAT_AM_PM_MIDNIGHT_NOON_FIELD: + case UDAT_FLEXIBLE_DAY_PERIOD_FIELD: + return u"dayPeriod"; + case UDAT_HOUR_OF_DAY1_FIELD: + case UDAT_HOUR_OF_DAY0_FIELD: + case UDAT_HOUR1_FIELD: + case UDAT_HOUR0_FIELD: + return u"hour"; + case UDAT_MINUTE_FIELD: + return u"minute"; + case UDAT_SECOND_FIELD: + return u"second"; + case UDAT_FRACTIONAL_SECOND_FIELD: + return u"fractionalSecond"; + case UDAT_TIMEZONE_FIELD: + case UDAT_TIMEZONE_RFC_FIELD: + case UDAT_TIMEZONE_GENERIC_FIELD: + case UDAT_TIMEZONE_SPECIAL_FIELD: + case UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD: + case UDAT_TIMEZONE_ISO_FIELD: + case UDAT_TIMEZONE_ISO_LOCAL_FIELD: + return u"timeZoneName"; + // Since ICU 53, UDAT_RELATED_YEAR_FIELD is marked internal only, + // but it is needed ECMA-402 spec. test262 test case: + // intl402/DateTimeFormat/prototype/formatToParts/related-year.js + case UDAT_RELATED_YEAR_FIELD: + return u"relatedYear"; + default: + // Handle unsupported or unknown fields + return constants::part_type::literal; + } +} + +vm::CallResult DateTimeFormat::formatRange( + vm::Runtime &runtime, + double startUtcMs, + double endUtcMs) noexcept { + UErrorCode status = U_ZERO_ERROR; + std::u16string output(256, char16_t()); + int32_t bufferSize = output.size(); + int32_t resultLength = 0; + while ((resultLength = udtitvfmt_format( + icuDateIntervalFormat_.get(), + startUtcMs, + endUtcMs, + output.data(), + bufferSize, + nullptr, + &status)) > bufferSize) { + status = U_ZERO_ERROR; + output.resize(resultLength); + bufferSize = output.size(); + } + output.resize(resultLength); + if (U_FAILURE(status)) { + return runtime.raiseError( + "Internal error: unable to format the date range."); + } + return output; +} + +vm::CallResult> DateTimeFormat::formatRangeToParts( + vm::Runtime &runtime, + double startUtcMs, + double endUtcMs) noexcept { + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr + formattedDateInterval( + udtitvfmt_openResult(&status), &udtitvfmt_closeResult); +// The argument order changed when udtitvfmt_formatToResult() became stable in +// ICU 67 +#if U_ICU_VERSION_MAJOR_NUM >= 67 + udtitvfmt_formatToResult( + icuDateIntervalFormat_.get(), + startUtcMs, + endUtcMs, + formattedDateInterval.get(), + &status); +#else + udtitvfmt_formatToResult( + icuDateIntervalFormat_.get(), + formattedDateInterval.get(), + startUtcMs, + endUtcMs, + &status); +#endif + if (U_FAILURE(status)) { + return runtime.raiseError( + "Internal error: unable to format the date range"); + } + return partitionDateTimeRangePattern(runtime, formattedDateInterval.get()); +} + +vm::CallResult> DateTimeFormat::partitionDateTimeRangePattern( + vm::Runtime &runtime, + const UFormattedDateInterval *formattedDateInterval) { + std::vector result{}; + int32_t previousEnd = 0; + std::pair startDateRange{-1, -1}; + std::pair endDateRange{-1, -1}; + + UErrorCode status = U_ZERO_ERROR; + const UFormattedValue *formattedValue = + udtitvfmt_resultAsValue(formattedDateInterval, &status); + int32_t formattedStrLength; + const char16_t *formattedStr = + ufmtval_getString(formattedValue, &formattedStrLength, &status); + std::unique_ptr cfp( + ucfpos_open(&status), &ucfpos_close); + UConstrainedFieldPosition *constrainedFieldPosition = cfp.get(); + while ( + ufmtval_nextPosition(formattedValue, constrainedFieldPosition, &status) && + U_SUCCESS(status)) { + int32_t begin; + int32_t end; + ucfpos_getIndexes(constrainedFieldPosition, &begin, &end, &status); + int32_t fieldType = ucfpos_getField(constrainedFieldPosition, &status); + int32_t category = ucfpos_getCategory(constrainedFieldPosition, &status); + if (category == UFIELD_CATEGORY_DATE_INTERVAL_SPAN) { + // According to udtitvfmt_resultAsValue() documentation, the field value + // for category UFIELD_CATEGORY_DATE_INTERVAL_SPAN is either 0 or 1. + // 0 means the span is attributed to the start date. 1 means the span is + // attributed to the end date. Thus, we use this field value to set + // startDateRange and endDateRange accordingly. + if (fieldType == 0) { + startDateRange.first = begin; + startDateRange.second = end; + } else if (fieldType == 1) { + endDateRange.first = begin; + endDateRange.second = end; + } + } else { + if (begin > previousEnd) { + result.push_back( + {{constants::part_key::type, constants::part_type::literal}, + {constants::part_key::value, + std::u16string(formattedStr + previousEnd, begin - previousEnd)}, + {constants::part_key::source, + getPartSource( + {previousEnd, begin}, startDateRange, endDateRange)}}); + } + result.push_back( + {{constants::part_key::type, icuDateFieldTypeToPartType(fieldType)}, + {constants::part_key::value, + std::u16string(formattedStr + begin, end - begin)}, + {constants::part_key::source, + getPartSource({begin, end}, startDateRange, endDateRange)}}); + previousEnd = end; + } + } + if (U_FAILURE(status)) { + return runtime.raiseError( + "Internal error: unable to format the date range"); + } + if (previousEnd < formattedStrLength) { + result.push_back( + {{constants::part_key::type, constants::part_type::literal}, + {constants::part_key::value, + std::u16string( + formattedStr + previousEnd, formattedStrLength - previousEnd)}, + {constants::part_key::source, + getPartSource( + {previousEnd, formattedStrLength}, + startDateRange, + endDateRange)}}); + } + return result; +} + +bool DateTimeFormat::isSpanInRange( + std::pair span, + std::pair range) { + return (span.first >= range.first) && (span.first <= range.second) && + (span.second >= range.first) && (span.second <= range.second); +} + +std::u16string DateTimeFormat::getPartSource( + std::pair span, + std::pair startDateRange, + std::pair endDateRange) { + if (isSpanInRange(span, startDateRange)) { + return u"startRange"; + } else if (isSpanInRange(span, endDateRange)) { + return u"endRange"; + } + return u"shared"; +} + +} // namespace impl_icu +} // namespace platform_intl +} // namespace hermes diff --git a/lib/Platform/Intl/impl_icu/DateTimeFormat.h b/lib/Platform/Intl/impl_icu/DateTimeFormat.h new file mode 100644 index 00000000000..4bbe16c6cf9 --- /dev/null +++ b/lib/Platform/Intl/impl_icu/DateTimeFormat.h @@ -0,0 +1,293 @@ +/** + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#ifndef HERMES_PLATFORMINTL_IMPLICU_DATETIMEFORMAT_H +#define HERMES_PLATFORMINTL_IMPLICU_DATETIMEFORMAT_H + +#include "LocaleBCP47Object.h" +#include "hermes/Platform/Intl/PlatformIntl.h" +#include "llvh/ADT/ArrayRef.h" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace hermes { +namespace platform_intl { +namespace impl_icu { + +class DateTimeFormat : public platform_intl::DateTimeFormat { + public: + /** + * @brief Creates a DateTimeFormat. + */ + DateTimeFormat(); + + /** + * @brief Destructs the DateTimeFormat. + */ + ~DateTimeFormat() override; + + /** + * Initializes the DateTimeFormat. + * + * See https://tc39.es/ecma402/#sec-intl-datetimeformat-constructor. + * + * @param runtime runtime object + * @param locales locales passed from JS + * @param options options passed from JS + * @return ExecutionStatus.RETURNED on success, ExecutionStatus.EXCEPTION + * on failure. + */ + vm::ExecutionStatus initialize( + vm::Runtime &runtime, + const std::vector &locales, + const Options &options) noexcept; + + /** + * Returns the resolved options. + * + * See + * https://tc39.es/ecma402/#sec-intl.datetimeformat.prototype.resolvedoptions. + * + * @return a new options object with properties reflecting the locale and + * date-time-format options computed during initialization of this + * DateTimeFormat instance. + */ + Options resolvedOptions() noexcept; + + /** + * Returns provided locales that DateTimeFormat supports. + * + * See https://tc39.es/ecma402/#sec-intl.datetimeformat.supportedlocalesof. + * + * @param runtime runtime object + * @param locales locales passed from JS + * @param options options passed from JS + * @return CallResult with a vector of provided locales that are supported on + * success, with ExecutionStatus.EXCEPTION on failure. + */ + static vm::CallResult> supportedLocalesOf( + vm::Runtime &runtime, + const std::vector &locales, + const Options &options) noexcept; + + /** + * Formats an epoch date value to a string according to locale and options of + * this DateTimeFormat instance. + * + * See https://tc39.es/ecma402/#sec-formatdatetime. + * + * @param value epoch date value + * @return a formatted date-time string + */ + std::u16string format(double value) noexcept; + + /** + * Formats an epoch date value to a vector of objects containing the formatted + * date-time in parts according to locale and options of this DateTimeFormat + * instance. + * + * See https://tc39.es/ecma402/#sec-formatdatetimetoparts. + * + * @param value epoch date value + * @return a vector of formatted date-time string parts + */ + std::vector formatToParts(double value) noexcept; + + /** + * Formats a date range to a string according to locale and + * options of this DateTimeFormat instance. + * + * See https://tc39.es/ecma402/#sec-formatdatetimerange. + * + * @param runtime runtime object + * @param startUtcMs start of date range, an epoch date value + * @param endUtcMs end of date range, an epoch date value + * @return a formatted date-time range string + */ + vm::CallResult formatRange( + vm::Runtime &runtime, + double startUtcMs, + double endUtcMs) noexcept; + + /** + * Formats a date range to a vector of objects containing the formatted + * date-time range in parts according to locale and options of this + * DateTimeFormat instance. + * + * See https://tc39.es/ecma402/#sec-formatdatetimerangetoparts. + * + * @param runtime runtime object + * @param startUtcMs start of date range, an epoch date value + * @param endUtcMs end of date range, an epoch date value + * @return a vector of formatted date-time range string parts + */ + vm::CallResult> formatRangeToParts( + vm::Runtime &runtime, + double startUtcMs, + double endUtcMs) noexcept; + + private: + enum class DateTimeComponent : int { DATE, TIME, ALL, ANY }; + + /** + * Corresponds to table headers of + * https://tc39.es/ecma402/#table-datetimeformat-components. + * Additionally, include the date time format component's corresponding + * icu skeleton symbol. + */ + struct DateTimeFormatComponent { + const std::u16string_view property_; + const llvh::ArrayRef values_; + std::optional