From 09ed179e8b80acfa5219cea9a72d0105fd1fb385 Mon Sep 17 00:00:00 2001
From: Jakub Duchniewicz <j.duchniewicz@gmail.com>
Date: Thu, 20 Feb 2020 22:54:48 +0100
Subject: [PATCH 1/4] Add normalization and collation of UTF-8 strings. Add
 basic tests.

---
 PolyEngine/Core/CMakeLists.txt                |  3 +
 PolyEngine/Core/Src/pe/core/CorePCH.hpp       |  7 ++-
 .../Core/Src/pe/core/storage/String.cpp       | 57 ++++++++++++++++++-
 .../Core/Src/pe/core/storage/String.hpp       | 13 ++++-
 .../Tests/CoreTests/Src/StringTests.cpp       | 20 +++++++
 5 files changed, 94 insertions(+), 6 deletions(-)
diff --git a/PolyEngine/Core/CMakeLists.txt b/PolyEngine/Core/CMakeLists.txt
index 70c11654..7e6b1e58 100644
--- a/PolyEngine/Core/CMakeLists.txt
+++ b/PolyEngine/Core/CMakeLists.txt
@@ -6,7 +6,10 @@ file(GLOB_RECURSE POLYCORE_SRCS RELATIVE ${CMAKE_CURRENT_LIST_DIR}
 	${POLYCORE_INCLUDE}/*.h)
 GenerateSourceGoups("${POLYCORE_SRCS}")
 
+find_package(ICU 65.1 COMPONENTS uc i18n REQUIRED)
+
 add_library(${CORE_TARGET} SHARED ${POLYCORE_SRCS})
+target_link_libraries(${CORE_TARGET} PUBLIC ICU::uc ICU::i18n)
 target_compile_options(${CORE_TARGET} PRIVATE $<$<BOOL:${SIMD}>:${SIMD_FLAGS}>)
 target_compile_definitions(${CORE_TARGET} PRIVATE _CORE DISABLE_SIMD=$<NOT:$<BOOL:${SIMD}>>)
 target_include_directories(${CORE_TARGET} PUBLIC ${POLYCORE_INCLUDE} PRIVATE ${RapidJSON_INCLUDE_DIRS})
diff --git a/PolyEngine/Core/Src/pe/core/CorePCH.hpp b/PolyEngine/Core/Src/pe/core/CorePCH.hpp
index fe97973f..e9cb8261 100644
--- a/PolyEngine/Core/Src/pe/core/CorePCH.hpp
+++ b/PolyEngine/Core/Src/pe/core/CorePCH.hpp
@@ -7,4 +7,9 @@ SILENCE_GCC_WARNING(-Wclass-memaccess, "Rapidjson has no release containing fix
 #include <rapidjson/stringbuffer.h>
 #include <rapidjson/prettywriter.h>
 #include <rapidjson/document.h>
-UNSILENCE_GCC_WARNING()
\ No newline at end of file
+UNSILENCE_GCC_WARNING()
+
+// ICU
+#include <unicode/unorm2.h>
+#include <unicode/ucnv.h>
+#include <unicode/coll.h>
\ No newline at end of file
diff --git a/PolyEngine/Core/Src/pe/core/storage/String.cpp b/PolyEngine/Core/Src/pe/core/storage/String.cpp
index 4085aee8..3d6832d5 100644
--- a/PolyEngine/Core/Src/pe/core/storage/String.cpp
+++ b/PolyEngine/Core/Src/pe/core/storage/String.cpp
@@ -12,13 +12,48 @@ static const std::vector<char> WHITESPACES { ' ', '\t', '\r', '\n', '\0' };
 namespace pe::core::storage
 {
 
-size_t StrLen(const char* str) {
+size_t StrLen(const char* str)
+{
 	size_t len = 0;
 	while (str[len] != 0)
 		++len;
 	return len;
 }
 
+bool isValidASCIIString(const char* str) 
+{
+	size_t pos = 0;
+	unsigned char c = 0;
+	while (c = str[pos], c != 0)
+		if (c > 0x7f)
+			return false;
+		else
+			++pos;
+	return true;
+}
+
+}
+
+String String::fromASCII(const char* data) // can still be invalid but better than nothing
+{
+	ASSERTE(isValidASCIIString(data), "Passed string is not valid ASCII, please use fromUTF8 factory method instead!");
+	return String(data);
+}
+
+String String::fromUTF8(const char* data)
+{
+	String str{};
+	size_t length = StrLen(data);
+	str.Data.resize(length);
+	UErrorCode errorCode = UErrorCode::U_ZERO_ERROR;
+	auto unormalizer = unorm2_getNFCInstance(&errorCode);
+	unorm2_normalize(unormalizer, reinterpret_cast<const UChar*>(data), -1, reinterpret_cast<UChar*>(str.Data.data()), length, &errorCode);
+	return str;
+}
+
+String String::fromCodePoint(const char* data)
+{
+	return String();
 }
 
 String::String(const char* data) {
@@ -202,7 +237,8 @@ String& String::operator=(String&& rhs) {
 	return *this;
 }
 
-bool String::operator==(const char* str) const {
+bool String::CmpBytes(const char* str) const
+{
 	if (GetLength() != StrLen(str))
 		return false;
 	for (size_t k = 0; k < GetLength(); ++k)
@@ -211,10 +247,25 @@ bool String::operator==(const char* str) const {
 	return true;
 }
 
-bool String::operator==(const String& str) const {
+bool String::CmpBytes(const String& str) const
+{
 	return Data == str.Data;
 }
 
+bool String::operator==(const char* str) const
+{
+	UErrorCode success = U_ZERO_ERROR;
+	icu::Collator* coll = icu::Collator::createInstance(success);
+	return coll->compareUTF8(Data.data(), str, success) == UCOL_EQUAL;
+}
+
+bool String::operator==(const String& str) const
+{
+	UErrorCode success = U_ZERO_ERROR;
+	icu::Collator* coll = icu::Collator::createInstance(success);
+	return coll->compareUTF8(Data.data(), str.Data.data(), success) == UCOL_EQUAL; // there is implicit construction of StringPiece involved, do we want it? (alternative: use C-api) OR NOT? read docs
+}
+
 bool String::operator<(const String& rhs) const {
 	if (GetLength() < rhs.GetLength())
 		return true;
diff --git a/PolyEngine/Core/Src/pe/core/storage/String.hpp b/PolyEngine/Core/Src/pe/core/storage/String.hpp
index 4f83bee4..0c1ba831 100644
--- a/PolyEngine/Core/Src/pe/core/storage/String.hpp
+++ b/PolyEngine/Core/Src/pe/core/storage/String.hpp
@@ -12,6 +12,12 @@ namespace pe::core::storage
 	public:
 		static const String EMPTY;
 
+		static String fromASCII(const char* data);
+
+		static String fromUTF8(const char* data);
+
+		static String fromCodePoint(const char* data);
+
 		/// <summary>Basic String costructor that creates empty String</summary>
 		String() : String("") {};
 
@@ -27,7 +33,6 @@ namespace pe::core::storage
 		/// <param name="rhs">Reference to String instance which state should be moved</param>
 		String(String&& rhs);
 
-
 		/// <summary>Casts int to String</summary>
 		/// <param name="var">Integer value which should be used to make String instance</param>
 		/// <returns>String containing integer value</returns>
@@ -72,7 +77,6 @@ namespace pe::core::storage
 		/// <returns>String containing given std::string</returns>
 		static String From(const std::string& var);
 
-
 		/// <summary>Checks if String instance contains another String instance</summary>
 		/// <param name="var">String reference which should be contained in another String instance</param>
 		bool Contains(const String& var) const;
@@ -164,6 +168,10 @@ namespace pe::core::storage
 		/// <returns>Moved String reference</returns>
 		String& operator=(String&& rhs);
 
+		bool CmpBytes(const char* str) const;
+
+		bool CmpBytes(const String& str) const;
+
 		/// <summary>Compares String with Cstring</summary>
 		/// <param name="str">Cstring to be compared with</param>
 		bool operator==(const char* str) const;
@@ -193,6 +201,7 @@ namespace pe::core::storage
 		char operator[](size_t idx) const;
 
 		size_t GetLength() const;
+
 		const char* GetCStr() const { return Data.data(); }
 
 		friend std::ostream& operator<< (std::ostream& stream, const String& rhs) { return stream << rhs.GetCStr(); }
diff --git a/PolyEngine/Tests/CoreTests/Src/StringTests.cpp b/PolyEngine/Tests/CoreTests/Src/StringTests.cpp
index 1c166197..17e34c90 100644
--- a/PolyEngine/Tests/CoreTests/Src/StringTests.cpp
+++ b/PolyEngine/Tests/CoreTests/Src/StringTests.cpp
@@ -104,4 +104,24 @@ TEST_CASE("String operations", "[String]") {
 
 	::pe::core::storage::String notContainsTest = ::pe::core::storage::String("Z[allz'/");
 	REQUIRE(test.Contains(notContainsTest) == false);
+}
+
+TEST_CASE("UTF-8 string normalization and collation tests", "[String]")
+{
+	::pe::core::storage::String utf8Literal("śląsk");
+	::pe::core::storage::String uft8Escaped("\xC5\x9B\x6C\xC4\x85\x73\x6B");
+	REQUIRE(utf8Literal == uft8Escaped);
+
+	auto normalizedUTF8One = ::pe::core::storage::String::fromUTF8("\xC3\xA4\x00");
+	auto normalizedUTF8Two = ::pe::core::storage::String::fromUTF8("\x61\xCC\x88\x00");
+	REQUIRE(normalizedUTF8One == normalizedUTF8Two);
+	REQUIRE(normalizedUTF8One.CmpBytes(normalizedUTF8Two) == false);
+
+
+	::pe::core::storage::String regularStringOne("\xC3\xA4\x00");
+	::pe::core::storage::String regularStringTwo("\x61\xCC\x88\x00");
+	REQUIRE(regularStringTwo == regularStringTwo);
+	REQUIRE(regularStringOne.CmpBytes(regularStringTwo) == false);
+
+	//auto invalidASCII = ::pe::core::storage::String::fromASCII("\xC3\xA4\x00");
 }
\ No newline at end of file

From aada678679e30eef809906a6cca84e853af876d2 Mon Sep 17 00:00:00 2001
From: Jakub Duchniewicz <j.duchniewicz@gmail.com>
Date: Tue, 25 Feb 2020 22:51:07 +0100
Subject: [PATCH 2/4] Fix normalization. Add toASCII transliteration method.
 Add tests.

---
 PolyEngine/Core/Src/pe/core/CorePCH.hpp       |  6 +++-
 .../Core/Src/pe/core/storage/String.cpp       | 29 ++++++++++++----
 .../Core/Src/pe/core/storage/String.hpp       |  2 ++
 .../Tests/CoreTests/Src/StringTests.cpp       | 33 +++++++++++++++++--
 4 files changed, 59 insertions(+), 11 deletions(-)

diff --git a/PolyEngine/Core/Src/pe/core/CorePCH.hpp b/PolyEngine/Core/Src/pe/core/CorePCH.hpp
index e9cb8261..b40dc95b 100644
--- a/PolyEngine/Core/Src/pe/core/CorePCH.hpp
+++ b/PolyEngine/Core/Src/pe/core/CorePCH.hpp
@@ -10,6 +10,10 @@ SILENCE_GCC_WARNING(-Wclass-memaccess, "Rapidjson has no release containing fix
 UNSILENCE_GCC_WARNING()
 
 // ICU
-#include <unicode/unorm2.h>
+//#include <unicode/unorm2.h>
+#include <unicode/normalizer2.h>
+#include <unicode/translit.h>
+//#include <unicode/ustring.h>
+//#include <unicode/utrans.h>
 #include <unicode/ucnv.h>
 #include <unicode/coll.h>
\ No newline at end of file
diff --git a/PolyEngine/Core/Src/pe/core/storage/String.cpp b/PolyEngine/Core/Src/pe/core/storage/String.cpp
index 3d6832d5..74823e7c 100644
--- a/PolyEngine/Core/Src/pe/core/storage/String.cpp
+++ b/PolyEngine/Core/Src/pe/core/storage/String.cpp
@@ -42,13 +42,15 @@ String String::fromASCII(const char* data) // can still be invalid but better th
 
 String String::fromUTF8(const char* data)
 {
-	String str{};
-	size_t length = StrLen(data);
-	str.Data.resize(length);
+	String ret{};
+	const size_t len = StrLen(data);
 	UErrorCode errorCode = UErrorCode::U_ZERO_ERROR;
-	auto unormalizer = unorm2_getNFCInstance(&errorCode);
-	unorm2_normalize(unormalizer, reinterpret_cast<const UChar*>(data), -1, reinterpret_cast<UChar*>(str.Data.data()), length, &errorCode);
-	return str;
+	icu::UnicodeString dst, src(data, len);
+	auto normalizer = icu::Normalizer2::getNFCInstance(errorCode);
+	normalizer->normalize(src, dst, errorCode);
+	ret.Data.reserve(dst.length());
+	dst.extract(0, dst.length(), ret.Data.data(), dst.length());
+	return ret;
 }
 
 String String::fromCodePoint(const char* data)
@@ -121,6 +123,19 @@ String String::ToUpper() const
 	return ret;
 }
 
+String String::toASCII() const // C-api is very unwieldy for this one, copying and duplicating is unavoidable
+{
+	String ret{};
+	ret.Data.reserve(Data.size());
+	icu::UnicodeString str(GetCStr(), Data.size());
+	UErrorCode errorCode = UErrorCode::U_ZERO_ERROR;
+	UParseError parseError;
+	auto trans = icu::Transliterator::createInstance("Any-Latin; Latin-ASCII", UTRANS_FORWARD, parseError, errorCode);
+	trans->transliterate(str);
+	str.extract(0, str.length(), ret.Data.data(), Data.size());
+	return ret;
+}
+
 bool String::IsEmpty() const {
 	return GetLength() == 0;
 }
@@ -263,7 +278,7 @@ bool String::operator==(const String& str) const
 {
 	UErrorCode success = U_ZERO_ERROR;
 	icu::Collator* coll = icu::Collator::createInstance(success);
-	return coll->compareUTF8(Data.data(), str.Data.data(), success) == UCOL_EQUAL; // there is implicit construction of StringPiece involved, do we want it? (alternative: use C-api) OR NOT? read docs
+	return coll->compareUTF8(Data.data(), str.Data.data(), success) == UCOL_EQUAL;
 }
 
 bool String::operator<(const String& rhs) const {
diff --git a/PolyEngine/Core/Src/pe/core/storage/String.hpp b/PolyEngine/Core/Src/pe/core/storage/String.hpp
index 0c1ba831..f67d00f9 100644
--- a/PolyEngine/Core/Src/pe/core/storage/String.hpp
+++ b/PolyEngine/Core/Src/pe/core/storage/String.hpp
@@ -93,6 +93,8 @@ namespace pe::core::storage
 		/// <returns>Upper-case String instance</returns>
 		String ToUpper() const;
 
+		String toASCII() const;
+
 		/// <summary>Checks if String is empty</summary>
 		bool IsEmpty() const;
 
diff --git a/PolyEngine/Tests/CoreTests/Src/StringTests.cpp b/PolyEngine/Tests/CoreTests/Src/StringTests.cpp
index 17e34c90..e8622274 100644
--- a/PolyEngine/Tests/CoreTests/Src/StringTests.cpp
+++ b/PolyEngine/Tests/CoreTests/Src/StringTests.cpp
@@ -112,16 +112,43 @@ TEST_CASE("UTF-8 string normalization and collation tests", "[String]")
 	::pe::core::storage::String uft8Escaped("\xC5\x9B\x6C\xC4\x85\x73\x6B");
 	REQUIRE(utf8Literal == uft8Escaped);
 
+	auto codepointUTF8One = ::pe::core::storage::String::fromUTF8("\u4eba\u53e3\u3058\u3093\u3053\u3046\u306b\u81be\u7099\u304b\u3044\u3057\u3083\u3059\u308b");
+	auto codepointUTF8Two = ::pe::core::storage::String::fromUTF8("\u4eba\u53e3\u3058\u3093\u3053\u3046\u306b\u81be\uf9fb\u304b\u3044\u3057\u3083\u3059\u308b");
+	REQUIRE(codepointUTF8One == codepointUTF8Two);
+	REQUIRE(codepointUTF8One.CmpBytes(codepointUTF8Two) == true);
+
 	auto normalizedUTF8One = ::pe::core::storage::String::fromUTF8("\xC3\xA4\x00");
 	auto normalizedUTF8Two = ::pe::core::storage::String::fromUTF8("\x61\xCC\x88\x00");
 	REQUIRE(normalizedUTF8One == normalizedUTF8Two);
-	REQUIRE(normalizedUTF8One.CmpBytes(normalizedUTF8Two) == false);
-
+	REQUIRE(normalizedUTF8One.CmpBytes(normalizedUTF8Two) == true);
 
-	::pe::core::storage::String regularStringOne("\xC3\xA4\x00");
+	::pe::core::storage::String regularStringOne("\xC3\xA4\x00"); // from regular string they will not have proper representation as bytes, no normalization
 	::pe::core::storage::String regularStringTwo("\x61\xCC\x88\x00");
 	REQUIRE(regularStringTwo == regularStringTwo);
 	REQUIRE(regularStringOne.CmpBytes(regularStringTwo) == false);
 
 	//auto invalidASCII = ::pe::core::storage::String::fromASCII("\xC3\xA4\x00");
+}
+
+TEST_CASE("UTF-8 string conversion tests", "[String]")
+{
+	::pe::core::storage::String utf8polish("śląsk");
+	auto asciiConverted = utf8polish.toASCII();
+	REQUIRE(asciiConverted == "slask");
+
+	::pe::core::storage::String utf8capital("ŚPIĄC");
+	asciiConverted = utf8capital.toASCII();
+	REQUIRE(asciiConverted == "SPIAC");
+
+	::pe::core::storage::String hangulTest("김, 국삼");
+	asciiConverted = hangulTest.toASCII();
+	REQUIRE(asciiConverted == "gim, gugsam"); // does not preserve capital letters
+	
+	::pe::core::storage::String kanjiTest("たけだ, まさゆき");
+	asciiConverted = kanjiTest.toASCII();
+	REQUIRE(asciiConverted == "takeda, masayuki"); // does not preserve capital letters
+
+	::pe::core::storage::String cyrilicTest("Θεοδωράτου, Ελένη");
+	asciiConverted = cyrilicTest.toASCII();
+	REQUIRE(asciiConverted == "Theodoratou, Elene"); // this preserves it somehow
 }
\ No newline at end of file

From 3fb34bf06cf3b4cf7431c776d155c798fbd4d3b2 Mon Sep 17 00:00:00 2001
From: Jakub Duchniewicz <j.duchniewicz@gmail.com>
Date: Wed, 26 Feb 2020 19:40:50 +0100
Subject: [PATCH 3/4] Minor fixes. Use StringBuilder for Join operation

---
 .../Core/Src/pe/core/storage/String.cpp       | 54 +++++++++----------
 .../Tests/CoreTests/Src/StringTests.cpp       |  2 +
 2 files changed, 29 insertions(+), 27 deletions(-)

diff --git a/PolyEngine/Core/Src/pe/core/storage/String.cpp b/PolyEngine/Core/Src/pe/core/storage/String.cpp
index 74823e7c..195ffe72 100644
--- a/PolyEngine/Core/Src/pe/core/storage/String.cpp
+++ b/PolyEngine/Core/Src/pe/core/storage/String.cpp
@@ -44,10 +44,10 @@ String String::fromUTF8(const char* data)
 {
 	String ret{};
 	const size_t len = StrLen(data);
-	UErrorCode errorCode = UErrorCode::U_ZERO_ERROR;
+	UErrorCode success = UErrorCode::U_ZERO_ERROR;
 	icu::UnicodeString dst, src(data, len);
-	auto normalizer = icu::Normalizer2::getNFCInstance(errorCode);
-	normalizer->normalize(src, dst, errorCode);
+	auto normalizer = icu::Normalizer2::getNFCInstance(success);
+	normalizer->normalize(src, dst, success);
 	ret.Data.reserve(dst.length());
 	dst.extract(0, dst.length(), ret.Data.data(), dst.length());
 	return ret;
@@ -128,9 +128,9 @@ String String::toASCII() const // C-api is very unwieldy for this one, copying a
 	String ret{};
 	ret.Data.reserve(Data.size());
 	icu::UnicodeString str(GetCStr(), Data.size());
-	UErrorCode errorCode = UErrorCode::U_ZERO_ERROR;
+	UErrorCode success = UErrorCode::U_ZERO_ERROR;
 	UParseError parseError;
-	auto trans = icu::Transliterator::createInstance("Any-Latin; Latin-ASCII", UTRANS_FORWARD, parseError, errorCode);
+	auto trans = icu::Transliterator::createInstance("Any-Latin; Latin-ASCII", UTRANS_FORWARD, parseError, success);
 	trans->transliterate(str);
 	str.extract(0, str.length(), ret.Data.data(), Data.size());
 	return ret;
@@ -150,8 +150,8 @@ String String::Replace(char what, char with) const
 	return ret;
 }
 
-String String::Replace(const String& what, const String& with) const {
-	
+String String::Replace(const String& what, const String& with) const
+{
 	std::vector<String> splitted = Split(what);
 	return Join(splitted.data(), splitted.size(), with);
 }
@@ -172,28 +172,28 @@ std::vector<String> String::Split(const String& delimiter) const {
 	return elements;
 }
 
-String String::Join(const String* vars, size_t size, const String& separator) {
-	//TODO replace using stringbuilder
-	String s = String("");
-	for (size_t i = 0; i < size; i++) {
-		s = s + vars[i];
-		if (i != size - 1) {
-			s = s + separator;
-		}
+String String::Join(const String* vars, size_t size, const String& separator)
+{
+	StringBuilder sb;
+	for (size_t i = 0; i < size; ++i) 
+	{
+		sb.Append(vars[i]);
+		if (i != size - 1)
+			sb.Append(separator);
 	}
-	return s;
+	return sb.StealString();
 }
 
-String String::Join(const String* vars, size_t size, char separator) {
-	//TODO replace using stringbuilder
-	String s = String("");
-	for (size_t i = 0; i < size; i++) {
-		s = s + vars[i];
-		if (i != size - 1) {
-			s = s + separator;
-		}
+String String::Join(const String* vars, size_t size, char separator)
+{
+	StringBuilder sb;
+	for (size_t i = 0; i < size; ++i) 
+	{
+		sb.Append(vars[i]);
+		if (i != size - 1)
+			sb.Append(separator);
 	}
-	return s;
+	return sb.StealString();
 }
 
 bool String::StartsWith(char var) const {
@@ -270,14 +270,14 @@ bool String::CmpBytes(const String& str) const
 bool String::operator==(const char* str) const
 {
 	UErrorCode success = U_ZERO_ERROR;
-	icu::Collator* coll = icu::Collator::createInstance(success);
+	auto coll = icu::Collator::createInstance(success);
 	return coll->compareUTF8(Data.data(), str, success) == UCOL_EQUAL;
 }
 
 bool String::operator==(const String& str) const
 {
 	UErrorCode success = U_ZERO_ERROR;
-	icu::Collator* coll = icu::Collator::createInstance(success);
+	auto coll = icu::Collator::createInstance(success);
 	return coll->compareUTF8(Data.data(), str.Data.data(), success) == UCOL_EQUAL;
 }
 
diff --git a/PolyEngine/Tests/CoreTests/Src/StringTests.cpp b/PolyEngine/Tests/CoreTests/Src/StringTests.cpp
index e8622274..a1a60b11 100644
--- a/PolyEngine/Tests/CoreTests/Src/StringTests.cpp
+++ b/PolyEngine/Tests/CoreTests/Src/StringTests.cpp
@@ -54,6 +54,8 @@ TEST_CASE("String operations", "[String]") {
 	::pe::core::storage::String empty = ::pe::core::storage::String("");
 	REQUIRE(!test.IsEmpty());
 	REQUIRE(empty.IsEmpty());
+	REQUIRE(!empty.StartsWith('?'));
+	REQUIRE(!empty.EndsWith('?'));
 
 	::pe::core::storage::String replace = ::pe::core::storage::String("@ALZ[aWWzD{");
 	::pe::core::storage::String replaced = test.Replace('l', 'W').Replace('\'', 'D');

From 8b02e163054ec76d0ed6a24b2e0aa2143874ec2b Mon Sep 17 00:00:00 2001
From: Jakub Duchniewicz <j.duchniewicz@gmail.com>
Date: Mon, 2 Mar 2020 20:55:20 +0100
Subject: [PATCH 4/4] Add Memory iterator. Add tests.

---
 .../Core/Src/pe/core/storage/String.cpp       |  5 ++
 .../Core/Src/pe/core/storage/String.hpp       | 75 +++++++++++++++++++
 .../Tests/CoreTests/Src/StringTests.cpp       | 70 +++++++++++++++++
 3 files changed, 150 insertions(+)

diff --git a/PolyEngine/Core/Src/pe/core/storage/String.cpp b/PolyEngine/Core/Src/pe/core/storage/String.cpp
index 195ffe72..7dd6fbf4 100644
--- a/PolyEngine/Core/Src/pe/core/storage/String.cpp
+++ b/PolyEngine/Core/Src/pe/core/storage/String.cpp
@@ -327,6 +327,11 @@ size_t String::GetLength() const
 	return Data.size() - 1;
 }
 
+size_t String::GetLogicalLength() const
+{
+	return 0; // TODO: count it on demand?
+}
+
 size_t String::FindSubstrFromPoint(size_t startPoint, const String& str) const
 {
 	for (size_t idx = startPoint; idx < GetLength(); ++idx)
diff --git a/PolyEngine/Core/Src/pe/core/storage/String.hpp b/PolyEngine/Core/Src/pe/core/storage/String.hpp
index f67d00f9..f9b300b1 100644
--- a/PolyEngine/Core/Src/pe/core/storage/String.hpp
+++ b/PolyEngine/Core/Src/pe/core/storage/String.hpp
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <pe/Defines.hpp>
+#include <pe/core/utils/Range.hpp>
 
 namespace pe::core::storage
 {
@@ -204,10 +205,84 @@ namespace pe::core::storage
 
 		size_t GetLength() const;
 
+		size_t GetLogicalLength() const;
+
 		const char* GetCStr() const { return Data.data(); }
 
 		friend std::ostream& operator<< (std::ostream& stream, const String& rhs) { return stream << rhs.GetCStr(); }
 
+		class StringIteratorMemory final : public BaseObjectLiteralType<>
+		{
+			friend class String;
+		public:
+			using iterator_category = std::bidirectional_iterator_tag;
+			using value_type = char;
+			using difference_type = std::ptrdiff_t;
+			using pointer = char*;
+			using reference = char&;
+
+			bool operator==(const StringIteratorMemory& rhs) const { return idx == rhs.idx; }
+			bool operator!=(const StringIteratorMemory& rhs) const { return idx != rhs.idx; }
+			
+			const char& operator*() const { return s->Data.at(idx); }
+			//const char* operator->() const { return s->Data.data() + idx * sizeof(char); } //are they even useful?
+
+			StringIteratorMemory& operator++() { ++idx; return *this; }
+			StringIteratorMemory operator++(int) { StringIteratorMemory ret(s, idx); ++idx; return ret; }
+			StringIteratorMemory& operator--() { ASSERTE(idx > 0, "Index cannot be negative"); --idx; return *this; }
+			StringIteratorMemory operator--(int) { ASSERTE(idx > 0, "Index cannot be negative"); StringIteratorMemory ret(s, idx); --idx; return ret; }
+		private:
+			StringIteratorMemory(const String* string, size_t index) : s(string), idx(index) {};
+			const String* s;
+			size_t idx;
+		};
+
+		class StringIteratorGlyph final : public BaseObjectLiteralType<>// add implementation from numeria
+		{
+			friend class String;
+		public:
+			using iterator_category = std::bidirectional_iterator_tag;
+			using value_type = char;
+			using difference_type = std::ptrdiff_t;
+			using pointer = char*;
+			using reference = char&;
+
+			bool operator==(const StringIteratorGlyph& rhs) const { return idx == rhs.idx; }
+			bool operator!=(const StringIteratorGlyph& rhs) const { return idx != rhs.idx; }
+			
+			const char& operator*() const { return s->Data.at(idx); }
+			//const char* operator->() const { return s->Data.data() + idx * sizeof(char); }
+
+			StringIteratorGlyph& operator++() { ++idx; return *this; }
+			StringIteratorGlyph operator++(int) { StringIteratorGlyph ret(s, idx); ++idx; return ret; }
+			StringIteratorGlyph& operator--() { ASSERTE(idx > 0, "Index cannot be negative"); --idx; return *this; }
+			StringIteratorGlyph operator--(int) { ASSERTE(idx > 0, "Index cannot be negative"); StringIteratorGlyph ret(s, idx); --idx; return ret; }
+		private:
+			StringIteratorGlyph(const String* string, size_t index) : s(string), idx(index) {};
+			const String* s;
+			size_t idx;
+		};
+
+		// FIXME: default begin and end are memory for now, please scrutinize this, const added as well, they are const anyway, should user only use const ones? disallow mutating?
+		StringIteratorMemory begin() { return StringIteratorMemory(this, 0); }
+		StringIteratorMemory end() { return StringIteratorMemory(this, Data.size()); }
+		StringIteratorMemory cbegin() const { return StringIteratorMemory(this, 0); }
+		StringIteratorMemory cend() const { return StringIteratorMemory(this, Data.size()); }
+		StringIteratorGlyph beginGlyph() { return StringIteratorGlyph(this, 0); }
+		StringIteratorGlyph endGlyph() { return StringIteratorGlyph(this, Data.size()); }
+
+		//iteratememory
+		::pe::core::utils::Range<StringIteratorMemory> IterateMemory()
+		{
+			return ::pe::core::utils::Range<StringIteratorMemory>(begin(), end());
+		}
+
+		// iterateglyph
+		::pe::core::utils::Range<StringIteratorGlyph> IterateGlyphs()
+		{
+			return ::pe::core::utils::Range<StringIteratorGlyph>(beginGlyph(), endGlyph());
+		}
+
 	private:
 
 		String(std::vector<char> rawData) : Data(std::move(rawData)) { Data.push_back('\0'); }
diff --git a/PolyEngine/Tests/CoreTests/Src/StringTests.cpp b/PolyEngine/Tests/CoreTests/Src/StringTests.cpp
index a1a60b11..047d4eb2 100644
--- a/PolyEngine/Tests/CoreTests/Src/StringTests.cpp
+++ b/PolyEngine/Tests/CoreTests/Src/StringTests.cpp
@@ -153,4 +153,74 @@ TEST_CASE("UTF-8 string conversion tests", "[String]")
 	::pe::core::storage::String cyrilicTest("Θεοδωράτου, Ελένη");
 	asciiConverted = cyrilicTest.toASCII();
 	REQUIRE(asciiConverted == "Theodoratou, Elene"); // this preserves it somehow
+}
+
+TEST_CASE("STL iterator tests", "[String]")
+{
+	::pe::core::storage::String s1("regular string");
+	::pe::core::storage::String s2;
+
+	size_t i = 0;
+	for(auto c : s1)
+	{
+		UNUSED(c);
+		++i;
+	}
+	REQUIRE(i == s1.GetLength() + 1);// +1 for the \0
+
+	i = 0;
+	for(auto c : s2)
+	{
+		UNUSED(c);
+		++i;
+	}
+	REQUIRE(i == s2.GetLength() + 1);
+}
+
+TEST_CASE("Memory iterator tests", "[String]")
+{
+	::pe::core::storage::String s1("regular string");
+	::pe::core::storage::String s2;
+
+	size_t i = 0;
+	for(auto c : s1.IterateMemory())
+	{
+		UNUSED(c);
+		++i;
+	}
+	REQUIRE(i == s1.GetLength() + 1);
+
+	i = 0;
+	for(auto c : s2.IterateMemory())
+	{
+		UNUSED(c);
+		++i;
+	}
+	REQUIRE(i == s2.GetLength() + 1);
+
+	auto it = s1.begin();
+	auto it2 = s1.end();
+	//--it;
+	++it;
+	--it2;
+	REQUIRE(*it == 'e');
+	REQUIRE(*it2 == '\0');
+	it++;
+	REQUIRE(*it == 'g');
+	--it;
+	REQUIRE(*it == 'e');
+	it--;
+	REQUIRE(*it == 'r');
+	it2 = s1.begin();
+	REQUIRE(it == it2);
+	++it2;
+	REQUIRE(it != it2);
+}
+
+TEST_CASE("Glyph iterator tests", "[String]")
+{
+	//::pe::core::storage::String s1("regular string");
+//	::pe::core::storage::String s2("");
+
+
 }
\ No newline at end of file