From 09ed179e8b80acfa5219cea9a72d0105fd1fb385 Mon Sep 17 00:00:00 2001 From: Jakub Duchniewicz Date: Thu, 20 Feb 2020 22:54:48 +0100 Subject: [PATCH 1/4] Add normalization and collation of UTF-8 strings. Add basic tests. --- PolyEngine/Core/CMakeLists.txt | 3 + PolyEngine/Core/Src/pe/core/CorePCH.hpp | 7 ++- .../Core/Src/pe/core/storage/String.cpp | 57 ++++++++++++++++++- .../Core/Src/pe/core/storage/String.hpp | 13 ++++- .../Tests/CoreTests/Src/StringTests.cpp | 20 +++++++ 5 files changed, 94 insertions(+), 6 deletions(-) diff --git a/PolyEngine/Core/CMakeLists.txt b/PolyEngine/Core/CMakeLists.txt index 70c11654..7e6b1e58 100644 --- a/PolyEngine/Core/CMakeLists.txt +++ b/PolyEngine/Core/CMakeLists.txt @@ -6,7 +6,10 @@ file(GLOB_RECURSE POLYCORE_SRCS RELATIVE ${CMAKE_CURRENT_LIST_DIR} ${POLYCORE_INCLUDE}/*.h) GenerateSourceGoups("${POLYCORE_SRCS}") +find_package(ICU 65.1 COMPONENTS uc i18n REQUIRED) + add_library(${CORE_TARGET} SHARED ${POLYCORE_SRCS}) +target_link_libraries(${CORE_TARGET} PUBLIC ICU::uc ICU::i18n) target_compile_options(${CORE_TARGET} PRIVATE $<$:${SIMD_FLAGS}>) target_compile_definitions(${CORE_TARGET} PRIVATE _CORE DISABLE_SIMD=$>) target_include_directories(${CORE_TARGET} PUBLIC ${POLYCORE_INCLUDE} PRIVATE ${RapidJSON_INCLUDE_DIRS}) diff --git a/PolyEngine/Core/Src/pe/core/CorePCH.hpp b/PolyEngine/Core/Src/pe/core/CorePCH.hpp index fe97973f..e9cb8261 100644 --- a/PolyEngine/Core/Src/pe/core/CorePCH.hpp +++ b/PolyEngine/Core/Src/pe/core/CorePCH.hpp @@ -7,4 +7,9 @@ SILENCE_GCC_WARNING(-Wclass-memaccess, "Rapidjson has no release containing fix #include #include #include -UNSILENCE_GCC_WARNING() \ No newline at end of file +UNSILENCE_GCC_WARNING() + +// ICU +#include +#include +#include \ No newline at end of file diff --git a/PolyEngine/Core/Src/pe/core/storage/String.cpp b/PolyEngine/Core/Src/pe/core/storage/String.cpp index 4085aee8..3d6832d5 100644 --- a/PolyEngine/Core/Src/pe/core/storage/String.cpp +++ b/PolyEngine/Core/Src/pe/core/storage/String.cpp @@ -12,13 +12,48 @@ static const std::vector WHITESPACES { ' ', '\t', '\r', '\n', '\0' }; namespace pe::core::storage { -size_t StrLen(const char* str) { +size_t StrLen(const char* str) +{ size_t len = 0; while (str[len] != 0) ++len; return len; } +bool isValidASCIIString(const char* str) +{ + size_t pos = 0; + unsigned char c = 0; + while (c = str[pos], c != 0) + if (c > 0x7f) + return false; + else + ++pos; + return true; +} + +} + +String String::fromASCII(const char* data) // can still be invalid but better than nothing +{ + ASSERTE(isValidASCIIString(data), "Passed string is not valid ASCII, please use fromUTF8 factory method instead!"); + return String(data); +} + +String String::fromUTF8(const char* data) +{ + String str{}; + size_t length = StrLen(data); + str.Data.resize(length); + UErrorCode errorCode = UErrorCode::U_ZERO_ERROR; + auto unormalizer = unorm2_getNFCInstance(&errorCode); + unorm2_normalize(unormalizer, reinterpret_cast(data), -1, reinterpret_cast(str.Data.data()), length, &errorCode); + return str; +} + +String String::fromCodePoint(const char* data) +{ + return String(); } String::String(const char* data) { @@ -202,7 +237,8 @@ String& String::operator=(String&& rhs) { return *this; } -bool String::operator==(const char* str) const { +bool String::CmpBytes(const char* str) const +{ if (GetLength() != StrLen(str)) return false; for (size_t k = 0; k < GetLength(); ++k) @@ -211,10 +247,25 @@ bool String::operator==(const char* str) const { return true; } -bool String::operator==(const String& str) const { +bool String::CmpBytes(const String& str) const +{ return Data == str.Data; } +bool String::operator==(const char* str) const +{ + UErrorCode success = U_ZERO_ERROR; + icu::Collator* coll = icu::Collator::createInstance(success); + return coll->compareUTF8(Data.data(), str, success) == UCOL_EQUAL; +} + +bool String::operator==(const String& str) const +{ + UErrorCode success = U_ZERO_ERROR; + icu::Collator* coll = icu::Collator::createInstance(success); + return coll->compareUTF8(Data.data(), str.Data.data(), success) == UCOL_EQUAL; // there is implicit construction of StringPiece involved, do we want it? (alternative: use C-api) OR NOT? read docs +} + bool String::operator<(const String& rhs) const { if (GetLength() < rhs.GetLength()) return true; diff --git a/PolyEngine/Core/Src/pe/core/storage/String.hpp b/PolyEngine/Core/Src/pe/core/storage/String.hpp index 4f83bee4..0c1ba831 100644 --- a/PolyEngine/Core/Src/pe/core/storage/String.hpp +++ b/PolyEngine/Core/Src/pe/core/storage/String.hpp @@ -12,6 +12,12 @@ namespace pe::core::storage public: static const String EMPTY; + static String fromASCII(const char* data); + + static String fromUTF8(const char* data); + + static String fromCodePoint(const char* data); + /// Basic String costructor that creates empty String String() : String("") {}; @@ -27,7 +33,6 @@ namespace pe::core::storage /// Reference to String instance which state should be moved String(String&& rhs); - /// Casts int to String /// Integer value which should be used to make String instance /// String containing integer value @@ -72,7 +77,6 @@ namespace pe::core::storage /// String containing given std::string static String From(const std::string& var); - /// Checks if String instance contains another String instance /// String reference which should be contained in another String instance bool Contains(const String& var) const; @@ -164,6 +168,10 @@ namespace pe::core::storage /// Moved String reference String& operator=(String&& rhs); + bool CmpBytes(const char* str) const; + + bool CmpBytes(const String& str) const; + /// Compares String with Cstring /// Cstring to be compared with bool operator==(const char* str) const; @@ -193,6 +201,7 @@ namespace pe::core::storage char operator[](size_t idx) const; size_t GetLength() const; + const char* GetCStr() const { return Data.data(); } friend std::ostream& operator<< (std::ostream& stream, const String& rhs) { return stream << rhs.GetCStr(); } diff --git a/PolyEngine/Tests/CoreTests/Src/StringTests.cpp b/PolyEngine/Tests/CoreTests/Src/StringTests.cpp index 1c166197..17e34c90 100644 --- a/PolyEngine/Tests/CoreTests/Src/StringTests.cpp +++ b/PolyEngine/Tests/CoreTests/Src/StringTests.cpp @@ -104,4 +104,24 @@ TEST_CASE("String operations", "[String]") { ::pe::core::storage::String notContainsTest = ::pe::core::storage::String("Z[allz'/"); REQUIRE(test.Contains(notContainsTest) == false); +} + +TEST_CASE("UTF-8 string normalization and collation tests", "[String]") +{ + ::pe::core::storage::String utf8Literal("śląsk"); + ::pe::core::storage::String uft8Escaped("\xC5\x9B\x6C\xC4\x85\x73\x6B"); + REQUIRE(utf8Literal == uft8Escaped); + + auto normalizedUTF8One = ::pe::core::storage::String::fromUTF8("\xC3\xA4\x00"); + auto normalizedUTF8Two = ::pe::core::storage::String::fromUTF8("\x61\xCC\x88\x00"); + REQUIRE(normalizedUTF8One == normalizedUTF8Two); + REQUIRE(normalizedUTF8One.CmpBytes(normalizedUTF8Two) == false); + + + ::pe::core::storage::String regularStringOne("\xC3\xA4\x00"); + ::pe::core::storage::String regularStringTwo("\x61\xCC\x88\x00"); + REQUIRE(regularStringTwo == regularStringTwo); + REQUIRE(regularStringOne.CmpBytes(regularStringTwo) == false); + + //auto invalidASCII = ::pe::core::storage::String::fromASCII("\xC3\xA4\x00"); } \ No newline at end of file From aada678679e30eef809906a6cca84e853af876d2 Mon Sep 17 00:00:00 2001 From: Jakub Duchniewicz Date: Tue, 25 Feb 2020 22:51:07 +0100 Subject: [PATCH 2/4] Fix normalization. Add toASCII transliteration method. Add tests. --- PolyEngine/Core/Src/pe/core/CorePCH.hpp | 6 +++- .../Core/Src/pe/core/storage/String.cpp | 29 ++++++++++++---- .../Core/Src/pe/core/storage/String.hpp | 2 ++ .../Tests/CoreTests/Src/StringTests.cpp | 33 +++++++++++++++++-- 4 files changed, 59 insertions(+), 11 deletions(-) diff --git a/PolyEngine/Core/Src/pe/core/CorePCH.hpp b/PolyEngine/Core/Src/pe/core/CorePCH.hpp index e9cb8261..b40dc95b 100644 --- a/PolyEngine/Core/Src/pe/core/CorePCH.hpp +++ b/PolyEngine/Core/Src/pe/core/CorePCH.hpp @@ -10,6 +10,10 @@ SILENCE_GCC_WARNING(-Wclass-memaccess, "Rapidjson has no release containing fix UNSILENCE_GCC_WARNING() // ICU -#include +//#include +#include +#include +//#include +//#include #include #include \ No newline at end of file diff --git a/PolyEngine/Core/Src/pe/core/storage/String.cpp b/PolyEngine/Core/Src/pe/core/storage/String.cpp index 3d6832d5..74823e7c 100644 --- a/PolyEngine/Core/Src/pe/core/storage/String.cpp +++ b/PolyEngine/Core/Src/pe/core/storage/String.cpp @@ -42,13 +42,15 @@ String String::fromASCII(const char* data) // can still be invalid but better th String String::fromUTF8(const char* data) { - String str{}; - size_t length = StrLen(data); - str.Data.resize(length); + String ret{}; + const size_t len = StrLen(data); UErrorCode errorCode = UErrorCode::U_ZERO_ERROR; - auto unormalizer = unorm2_getNFCInstance(&errorCode); - unorm2_normalize(unormalizer, reinterpret_cast(data), -1, reinterpret_cast(str.Data.data()), length, &errorCode); - return str; + icu::UnicodeString dst, src(data, len); + auto normalizer = icu::Normalizer2::getNFCInstance(errorCode); + normalizer->normalize(src, dst, errorCode); + ret.Data.reserve(dst.length()); + dst.extract(0, dst.length(), ret.Data.data(), dst.length()); + return ret; } String String::fromCodePoint(const char* data) @@ -121,6 +123,19 @@ String String::ToUpper() const return ret; } +String String::toASCII() const // C-api is very unwieldy for this one, copying and duplicating is unavoidable +{ + String ret{}; + ret.Data.reserve(Data.size()); + icu::UnicodeString str(GetCStr(), Data.size()); + UErrorCode errorCode = UErrorCode::U_ZERO_ERROR; + UParseError parseError; + auto trans = icu::Transliterator::createInstance("Any-Latin; Latin-ASCII", UTRANS_FORWARD, parseError, errorCode); + trans->transliterate(str); + str.extract(0, str.length(), ret.Data.data(), Data.size()); + return ret; +} + bool String::IsEmpty() const { return GetLength() == 0; } @@ -263,7 +278,7 @@ bool String::operator==(const String& str) const { UErrorCode success = U_ZERO_ERROR; icu::Collator* coll = icu::Collator::createInstance(success); - return coll->compareUTF8(Data.data(), str.Data.data(), success) == UCOL_EQUAL; // there is implicit construction of StringPiece involved, do we want it? (alternative: use C-api) OR NOT? read docs + return coll->compareUTF8(Data.data(), str.Data.data(), success) == UCOL_EQUAL; } bool String::operator<(const String& rhs) const { diff --git a/PolyEngine/Core/Src/pe/core/storage/String.hpp b/PolyEngine/Core/Src/pe/core/storage/String.hpp index 0c1ba831..f67d00f9 100644 --- a/PolyEngine/Core/Src/pe/core/storage/String.hpp +++ b/PolyEngine/Core/Src/pe/core/storage/String.hpp @@ -93,6 +93,8 @@ namespace pe::core::storage /// Upper-case String instance String ToUpper() const; + String toASCII() const; + /// Checks if String is empty bool IsEmpty() const; diff --git a/PolyEngine/Tests/CoreTests/Src/StringTests.cpp b/PolyEngine/Tests/CoreTests/Src/StringTests.cpp index 17e34c90..e8622274 100644 --- a/PolyEngine/Tests/CoreTests/Src/StringTests.cpp +++ b/PolyEngine/Tests/CoreTests/Src/StringTests.cpp @@ -112,16 +112,43 @@ TEST_CASE("UTF-8 string normalization and collation tests", "[String]") ::pe::core::storage::String uft8Escaped("\xC5\x9B\x6C\xC4\x85\x73\x6B"); REQUIRE(utf8Literal == uft8Escaped); + auto codepointUTF8One = ::pe::core::storage::String::fromUTF8("\u4eba\u53e3\u3058\u3093\u3053\u3046\u306b\u81be\u7099\u304b\u3044\u3057\u3083\u3059\u308b"); + auto codepointUTF8Two = ::pe::core::storage::String::fromUTF8("\u4eba\u53e3\u3058\u3093\u3053\u3046\u306b\u81be\uf9fb\u304b\u3044\u3057\u3083\u3059\u308b"); + REQUIRE(codepointUTF8One == codepointUTF8Two); + REQUIRE(codepointUTF8One.CmpBytes(codepointUTF8Two) == true); + auto normalizedUTF8One = ::pe::core::storage::String::fromUTF8("\xC3\xA4\x00"); auto normalizedUTF8Two = ::pe::core::storage::String::fromUTF8("\x61\xCC\x88\x00"); REQUIRE(normalizedUTF8One == normalizedUTF8Two); - REQUIRE(normalizedUTF8One.CmpBytes(normalizedUTF8Two) == false); - + REQUIRE(normalizedUTF8One.CmpBytes(normalizedUTF8Two) == true); - ::pe::core::storage::String regularStringOne("\xC3\xA4\x00"); + ::pe::core::storage::String regularStringOne("\xC3\xA4\x00"); // from regular string they will not have proper representation as bytes, no normalization ::pe::core::storage::String regularStringTwo("\x61\xCC\x88\x00"); REQUIRE(regularStringTwo == regularStringTwo); REQUIRE(regularStringOne.CmpBytes(regularStringTwo) == false); //auto invalidASCII = ::pe::core::storage::String::fromASCII("\xC3\xA4\x00"); +} + +TEST_CASE("UTF-8 string conversion tests", "[String]") +{ + ::pe::core::storage::String utf8polish("śląsk"); + auto asciiConverted = utf8polish.toASCII(); + REQUIRE(asciiConverted == "slask"); + + ::pe::core::storage::String utf8capital("ŚPIĄC"); + asciiConverted = utf8capital.toASCII(); + REQUIRE(asciiConverted == "SPIAC"); + + ::pe::core::storage::String hangulTest("김, 국삼"); + asciiConverted = hangulTest.toASCII(); + REQUIRE(asciiConverted == "gim, gugsam"); // does not preserve capital letters + + ::pe::core::storage::String kanjiTest("たけだ, まさゆき"); + asciiConverted = kanjiTest.toASCII(); + REQUIRE(asciiConverted == "takeda, masayuki"); // does not preserve capital letters + + ::pe::core::storage::String cyrilicTest("Θεοδωράτου, Ελένη"); + asciiConverted = cyrilicTest.toASCII(); + REQUIRE(asciiConverted == "Theodoratou, Elene"); // this preserves it somehow } \ No newline at end of file From 3fb34bf06cf3b4cf7431c776d155c798fbd4d3b2 Mon Sep 17 00:00:00 2001 From: Jakub Duchniewicz Date: Wed, 26 Feb 2020 19:40:50 +0100 Subject: [PATCH 3/4] Minor fixes. Use StringBuilder for Join operation --- .../Core/Src/pe/core/storage/String.cpp | 54 +++++++++---------- .../Tests/CoreTests/Src/StringTests.cpp | 2 + 2 files changed, 29 insertions(+), 27 deletions(-) diff --git a/PolyEngine/Core/Src/pe/core/storage/String.cpp b/PolyEngine/Core/Src/pe/core/storage/String.cpp index 74823e7c..195ffe72 100644 --- a/PolyEngine/Core/Src/pe/core/storage/String.cpp +++ b/PolyEngine/Core/Src/pe/core/storage/String.cpp @@ -44,10 +44,10 @@ String String::fromUTF8(const char* data) { String ret{}; const size_t len = StrLen(data); - UErrorCode errorCode = UErrorCode::U_ZERO_ERROR; + UErrorCode success = UErrorCode::U_ZERO_ERROR; icu::UnicodeString dst, src(data, len); - auto normalizer = icu::Normalizer2::getNFCInstance(errorCode); - normalizer->normalize(src, dst, errorCode); + auto normalizer = icu::Normalizer2::getNFCInstance(success); + normalizer->normalize(src, dst, success); ret.Data.reserve(dst.length()); dst.extract(0, dst.length(), ret.Data.data(), dst.length()); return ret; @@ -128,9 +128,9 @@ String String::toASCII() const // C-api is very unwieldy for this one, copying a String ret{}; ret.Data.reserve(Data.size()); icu::UnicodeString str(GetCStr(), Data.size()); - UErrorCode errorCode = UErrorCode::U_ZERO_ERROR; + UErrorCode success = UErrorCode::U_ZERO_ERROR; UParseError parseError; - auto trans = icu::Transliterator::createInstance("Any-Latin; Latin-ASCII", UTRANS_FORWARD, parseError, errorCode); + auto trans = icu::Transliterator::createInstance("Any-Latin; Latin-ASCII", UTRANS_FORWARD, parseError, success); trans->transliterate(str); str.extract(0, str.length(), ret.Data.data(), Data.size()); return ret; @@ -150,8 +150,8 @@ String String::Replace(char what, char with) const return ret; } -String String::Replace(const String& what, const String& with) const { - +String String::Replace(const String& what, const String& with) const +{ std::vector splitted = Split(what); return Join(splitted.data(), splitted.size(), with); } @@ -172,28 +172,28 @@ std::vector String::Split(const String& delimiter) const { return elements; } -String String::Join(const String* vars, size_t size, const String& separator) { - //TODO replace using stringbuilder - String s = String(""); - for (size_t i = 0; i < size; i++) { - s = s + vars[i]; - if (i != size - 1) { - s = s + separator; - } +String String::Join(const String* vars, size_t size, const String& separator) +{ + StringBuilder sb; + for (size_t i = 0; i < size; ++i) + { + sb.Append(vars[i]); + if (i != size - 1) + sb.Append(separator); } - return s; + return sb.StealString(); } -String String::Join(const String* vars, size_t size, char separator) { - //TODO replace using stringbuilder - String s = String(""); - for (size_t i = 0; i < size; i++) { - s = s + vars[i]; - if (i != size - 1) { - s = s + separator; - } +String String::Join(const String* vars, size_t size, char separator) +{ + StringBuilder sb; + for (size_t i = 0; i < size; ++i) + { + sb.Append(vars[i]); + if (i != size - 1) + sb.Append(separator); } - return s; + return sb.StealString(); } bool String::StartsWith(char var) const { @@ -270,14 +270,14 @@ bool String::CmpBytes(const String& str) const bool String::operator==(const char* str) const { UErrorCode success = U_ZERO_ERROR; - icu::Collator* coll = icu::Collator::createInstance(success); + auto coll = icu::Collator::createInstance(success); return coll->compareUTF8(Data.data(), str, success) == UCOL_EQUAL; } bool String::operator==(const String& str) const { UErrorCode success = U_ZERO_ERROR; - icu::Collator* coll = icu::Collator::createInstance(success); + auto coll = icu::Collator::createInstance(success); return coll->compareUTF8(Data.data(), str.Data.data(), success) == UCOL_EQUAL; } diff --git a/PolyEngine/Tests/CoreTests/Src/StringTests.cpp b/PolyEngine/Tests/CoreTests/Src/StringTests.cpp index e8622274..a1a60b11 100644 --- a/PolyEngine/Tests/CoreTests/Src/StringTests.cpp +++ b/PolyEngine/Tests/CoreTests/Src/StringTests.cpp @@ -54,6 +54,8 @@ TEST_CASE("String operations", "[String]") { ::pe::core::storage::String empty = ::pe::core::storage::String(""); REQUIRE(!test.IsEmpty()); REQUIRE(empty.IsEmpty()); + REQUIRE(!empty.StartsWith('?')); + REQUIRE(!empty.EndsWith('?')); ::pe::core::storage::String replace = ::pe::core::storage::String("@ALZ[aWWzD{"); ::pe::core::storage::String replaced = test.Replace('l', 'W').Replace('\'', 'D'); From 8b02e163054ec76d0ed6a24b2e0aa2143874ec2b Mon Sep 17 00:00:00 2001 From: Jakub Duchniewicz Date: Mon, 2 Mar 2020 20:55:20 +0100 Subject: [PATCH 4/4] Add Memory iterator. Add tests. --- .../Core/Src/pe/core/storage/String.cpp | 5 ++ .../Core/Src/pe/core/storage/String.hpp | 75 +++++++++++++++++++ .../Tests/CoreTests/Src/StringTests.cpp | 70 +++++++++++++++++ 3 files changed, 150 insertions(+) diff --git a/PolyEngine/Core/Src/pe/core/storage/String.cpp b/PolyEngine/Core/Src/pe/core/storage/String.cpp index 195ffe72..7dd6fbf4 100644 --- a/PolyEngine/Core/Src/pe/core/storage/String.cpp +++ b/PolyEngine/Core/Src/pe/core/storage/String.cpp @@ -327,6 +327,11 @@ size_t String::GetLength() const return Data.size() - 1; } +size_t String::GetLogicalLength() const +{ + return 0; // TODO: count it on demand? +} + size_t String::FindSubstrFromPoint(size_t startPoint, const String& str) const { for (size_t idx = startPoint; idx < GetLength(); ++idx) diff --git a/PolyEngine/Core/Src/pe/core/storage/String.hpp b/PolyEngine/Core/Src/pe/core/storage/String.hpp index f67d00f9..f9b300b1 100644 --- a/PolyEngine/Core/Src/pe/core/storage/String.hpp +++ b/PolyEngine/Core/Src/pe/core/storage/String.hpp @@ -1,6 +1,7 @@ #pragma once #include +#include namespace pe::core::storage { @@ -204,10 +205,84 @@ namespace pe::core::storage size_t GetLength() const; + size_t GetLogicalLength() const; + const char* GetCStr() const { return Data.data(); } friend std::ostream& operator<< (std::ostream& stream, const String& rhs) { return stream << rhs.GetCStr(); } + class StringIteratorMemory final : public BaseObjectLiteralType<> + { + friend class String; + public: + using iterator_category = std::bidirectional_iterator_tag; + using value_type = char; + using difference_type = std::ptrdiff_t; + using pointer = char*; + using reference = char&; + + bool operator==(const StringIteratorMemory& rhs) const { return idx == rhs.idx; } + bool operator!=(const StringIteratorMemory& rhs) const { return idx != rhs.idx; } + + const char& operator*() const { return s->Data.at(idx); } + //const char* operator->() const { return s->Data.data() + idx * sizeof(char); } //are they even useful? + + StringIteratorMemory& operator++() { ++idx; return *this; } + StringIteratorMemory operator++(int) { StringIteratorMemory ret(s, idx); ++idx; return ret; } + StringIteratorMemory& operator--() { ASSERTE(idx > 0, "Index cannot be negative"); --idx; return *this; } + StringIteratorMemory operator--(int) { ASSERTE(idx > 0, "Index cannot be negative"); StringIteratorMemory ret(s, idx); --idx; return ret; } + private: + StringIteratorMemory(const String* string, size_t index) : s(string), idx(index) {}; + const String* s; + size_t idx; + }; + + class StringIteratorGlyph final : public BaseObjectLiteralType<>// add implementation from numeria + { + friend class String; + public: + using iterator_category = std::bidirectional_iterator_tag; + using value_type = char; + using difference_type = std::ptrdiff_t; + using pointer = char*; + using reference = char&; + + bool operator==(const StringIteratorGlyph& rhs) const { return idx == rhs.idx; } + bool operator!=(const StringIteratorGlyph& rhs) const { return idx != rhs.idx; } + + const char& operator*() const { return s->Data.at(idx); } + //const char* operator->() const { return s->Data.data() + idx * sizeof(char); } + + StringIteratorGlyph& operator++() { ++idx; return *this; } + StringIteratorGlyph operator++(int) { StringIteratorGlyph ret(s, idx); ++idx; return ret; } + StringIteratorGlyph& operator--() { ASSERTE(idx > 0, "Index cannot be negative"); --idx; return *this; } + StringIteratorGlyph operator--(int) { ASSERTE(idx > 0, "Index cannot be negative"); StringIteratorGlyph ret(s, idx); --idx; return ret; } + private: + StringIteratorGlyph(const String* string, size_t index) : s(string), idx(index) {}; + const String* s; + size_t idx; + }; + + // FIXME: default begin and end are memory for now, please scrutinize this, const added as well, they are const anyway, should user only use const ones? disallow mutating? + StringIteratorMemory begin() { return StringIteratorMemory(this, 0); } + StringIteratorMemory end() { return StringIteratorMemory(this, Data.size()); } + StringIteratorMemory cbegin() const { return StringIteratorMemory(this, 0); } + StringIteratorMemory cend() const { return StringIteratorMemory(this, Data.size()); } + StringIteratorGlyph beginGlyph() { return StringIteratorGlyph(this, 0); } + StringIteratorGlyph endGlyph() { return StringIteratorGlyph(this, Data.size()); } + + //iteratememory + ::pe::core::utils::Range IterateMemory() + { + return ::pe::core::utils::Range(begin(), end()); + } + + // iterateglyph + ::pe::core::utils::Range IterateGlyphs() + { + return ::pe::core::utils::Range(beginGlyph(), endGlyph()); + } + private: String(std::vector rawData) : Data(std::move(rawData)) { Data.push_back('\0'); } diff --git a/PolyEngine/Tests/CoreTests/Src/StringTests.cpp b/PolyEngine/Tests/CoreTests/Src/StringTests.cpp index a1a60b11..047d4eb2 100644 --- a/PolyEngine/Tests/CoreTests/Src/StringTests.cpp +++ b/PolyEngine/Tests/CoreTests/Src/StringTests.cpp @@ -153,4 +153,74 @@ TEST_CASE("UTF-8 string conversion tests", "[String]") ::pe::core::storage::String cyrilicTest("Θεοδωράτου, Ελένη"); asciiConverted = cyrilicTest.toASCII(); REQUIRE(asciiConverted == "Theodoratou, Elene"); // this preserves it somehow +} + +TEST_CASE("STL iterator tests", "[String]") +{ + ::pe::core::storage::String s1("regular string"); + ::pe::core::storage::String s2; + + size_t i = 0; + for(auto c : s1) + { + UNUSED(c); + ++i; + } + REQUIRE(i == s1.GetLength() + 1);// +1 for the \0 + + i = 0; + for(auto c : s2) + { + UNUSED(c); + ++i; + } + REQUIRE(i == s2.GetLength() + 1); +} + +TEST_CASE("Memory iterator tests", "[String]") +{ + ::pe::core::storage::String s1("regular string"); + ::pe::core::storage::String s2; + + size_t i = 0; + for(auto c : s1.IterateMemory()) + { + UNUSED(c); + ++i; + } + REQUIRE(i == s1.GetLength() + 1); + + i = 0; + for(auto c : s2.IterateMemory()) + { + UNUSED(c); + ++i; + } + REQUIRE(i == s2.GetLength() + 1); + + auto it = s1.begin(); + auto it2 = s1.end(); + //--it; + ++it; + --it2; + REQUIRE(*it == 'e'); + REQUIRE(*it2 == '\0'); + it++; + REQUIRE(*it == 'g'); + --it; + REQUIRE(*it == 'e'); + it--; + REQUIRE(*it == 'r'); + it2 = s1.begin(); + REQUIRE(it == it2); + ++it2; + REQUIRE(it != it2); +} + +TEST_CASE("Glyph iterator tests", "[String]") +{ + //::pe::core::storage::String s1("regular string"); +// ::pe::core::storage::String s2(""); + + } \ No newline at end of file