Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2021 - 2023 Apple Inc. and the Swift project authors
// Copyright (c) 2021 - 2025 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
Expand All @@ -15,7 +15,7 @@
#if _runtime(_ObjC)
import Foundation

// Cache of opened files
// Cache of opened files
var cachedFiles: [String: String] = [:]

func readInputFile(_ filename: String) -> String {
Expand Down Expand Up @@ -128,10 +128,10 @@ func parseBinaryProperties(

let info = line.split(separator: "#")
let components = info[0].split(separator: ";")

// Get the property first because we may not care about it.
let filteredProperty = components[1].filter { !$0.isWhitespace }

guard availableBinaryProperties.contains(filteredProperty) else {
continue
}
Expand Down Expand Up @@ -187,12 +187,12 @@ func parseNumericTypes(
guard !line.hasPrefix("#") else {
continue
}

let info = line.split(separator: "#")
let components = info[0].split(separator: ";")

let filteredProperty = components[1].filter { !$0.isWhitespace }

let numericType: Unicode.NumericType

switch filteredProperty {
Expand All @@ -205,9 +205,9 @@ func parseNumericTypes(
default:
continue
}

let filteredScalars = components[0].filter { !$0.isWhitespace }

let scalars = parseScalars(String(filteredScalars))

for scalar in scalars {
Expand All @@ -225,12 +225,12 @@ func parseNumericValues(
guard !line.hasPrefix("#") else {
continue
}

let info = line.split(separator: "#")
let components = info[0].split(separator: ";")

let filteredProperty = components[3].filter { !$0.isWhitespace }

let value: Double

// If we have a division, split the numerator and denominator and perform
Expand All @@ -247,7 +247,7 @@ func parseNumericValues(
}

let filteredScalars = components[0].filter { !$0.isWhitespace }

let scalars = parseScalars(String(filteredScalars))

for scalar in scalars {
Expand Down Expand Up @@ -286,7 +286,7 @@ func parseMappings(
) {
for line in data.split(separator: "\n") {
let components = line.split(separator: ";", omittingEmptySubsequences: false)

let scalarStr = components[0]
guard let scalar = Unicode.Scalar(UInt32(scalarStr, radix: 16)!) else {
continue
Expand All @@ -303,7 +303,7 @@ func parseMappings(

result[scalar, default: [:]]["lower"] = mapping
}

if let title = UInt32(components[14], radix: 16) {
let mapping = String(Unicode.Scalar(title)!)

Expand All @@ -320,27 +320,27 @@ func parseSpecialMappings(
guard !line.hasPrefix("#") else {
continue
}

let components = line.split(separator: ";", omittingEmptySubsequences: false)

// Conditional mappings have an extra component with the conditional name.
// Ignore those.
guard components.count == 5 else {
continue
}

guard let scalar = Unicode.Scalar(UInt32(components[0], radix: 16)!) else {
continue
}

let lower = components[1].split(separator: " ").map {
Character(Unicode.Scalar(UInt32($0, radix: 16)!)!)
}

let title = components[2].split(separator: " ").map {
Character(Unicode.Scalar(UInt32($0, radix: 16)!)!)
}

let upper = components[3].split(separator: " ").map {
Character(Unicode.Scalar(UInt32($0, radix: 16)!)!)
}
Expand Down Expand Up @@ -369,7 +369,7 @@ public let mappings: [Unicode.Scalar: [String: String]] = {
#else
let unicodeData = readInputFile("UnicodeData.txt")
#endif

let specialCasing = readInputFile("SpecialCasing.txt")

parseMappings(unicodeData, into: &result)
Expand Down Expand Up @@ -651,22 +651,22 @@ func parseCaseFoldings(
guard !line.hasPrefix("#") else {
continue
}

let components = line.split(separator: ";")

let status = components[1].filter { !$0.isWhitespace }

// We only care about Common and Full case mappings.
guard status == "C" || status == "F" else {
continue
}

let scalar = Unicode.Scalar(parseScalars(String(components[0])).lowerBound)!

let mapping = components[2].split(separator: " ").map {
Unicode.Scalar(UInt32($0, radix: 16)!)!
}

var mappingString = ""

for scalar in mapping {
Expand Down Expand Up @@ -710,6 +710,7 @@ extension Unicode {
case bassaVah = "Bassa_Vah"
case batak = "Batak"
case bengali = "Bengali"
case beriaErfe = "Beria_Erfe"
case bhaiksuki = "Bhaiksuki"
case bopomofo = "Bopomofo"
case brahmi = "Brahmi"
Expand Down Expand Up @@ -835,6 +836,7 @@ extension Unicode {
case sharada = "Sharada"
case shavian = "Shavian"
case siddham = "Siddham"
case sidetic = "Sidetic"
case signWriting = "SignWriting"
case sinhala = "Sinhala"
case sogdian = "Sogdian"
Expand All @@ -849,6 +851,7 @@ extension Unicode {
case taiLe = "Tai_Le"
case taiTham = "Tai_Tham"
case taiViet = "Tai_Viet"
case taiYo = "Tai_Yo"
case takri = "Takri"
case tamil = "Tamil"
case tangsa = "Tangsa"
Expand All @@ -860,6 +863,7 @@ extension Unicode {
case tifinagh = "Tifinagh"
case tirhuta = "Tirhuta"
case todhri = "Todhri"
case tolongSiki = "Tolong_Siki"
case toto = "Toto"
case tuluTigalari = "Tulu_Tigalari"
case ugaritic = "Ugaritic"
Expand Down Expand Up @@ -922,6 +926,7 @@ func classifyScriptProperty(
case "bass", "bassavah": return .bassaVah
case "batk", "batak": return .batak
case "beng", "bengali": return .bengali
case "berf", "beriaerfe": return .beriaErfe
case "bhks", "bhaiksuki": return .bhaiksuki
case "bopo", "bopomofo": return .bopomofo
case "brah", "brahmi": return .brahmi
Expand Down Expand Up @@ -1041,6 +1046,7 @@ func classifyScriptProperty(
case "shaw", "shavian": return .shavian
case "shrd", "sharada": return .sharada
case "sidd", "siddham": return .siddham
case "sidt", "sidetic": return .sidetic
case "sind", "khudawadi": return .khudawadi
case "sinh", "sinhala": return .sinhala
case "sogd", "sogdian": return .sogdian
Expand All @@ -1058,6 +1064,7 @@ func classifyScriptProperty(
case "taml", "tamil": return .tamil
case "tang", "tangut": return .tangut
case "tavt", "taiviet": return .taiViet
case "tayo", "taiyo": return .taiYo
case "telu", "telugu": return .telugu
case "tfng", "tifinagh": return .tifinagh
case "tglg", "tagalog": return .tagalog
Expand All @@ -1067,6 +1074,7 @@ func classifyScriptProperty(
case "tirh", "tirhuta": return .tirhuta
case "tnsa", "tangsa": return .tangsa
case "todr", "todhri": return .todhri
case "tols", "tolongsiki": return .tolongSiki
case "toto": return .toto
case "tutg", "tulutigalari": return .tuluTigalari
case "ugar", "ugaritic": return .ugaritic
Expand Down
4 changes: 2 additions & 2 deletions stdlib/private/StdlibUnicodeUnittest/WordBreaking.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2022 Apple Inc. and the Swift project authors
// Copyright (c) 2022 - 2025 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
Expand Down Expand Up @@ -42,7 +42,7 @@ func parseWordBreakTests(

// If this is a break, record the +1 count. Otherwise it is × which is
// not a break.
if components[i] == "÷" {
if components[i].hasPrefix("÷") {
words.append("")
}
}
Expand Down
25 changes: 11 additions & 14 deletions stdlib/public/core/StringGraphemeBreaking.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2023 Apple Inc. and the Swift project authors
// Copyright (c) 2014 - 2025 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
Expand Down Expand Up @@ -221,7 +221,7 @@ extension _StringGuts {
return 1
}
}

return _opaqueComplexCharacterStride(startingAt: i)
}

Expand Down Expand Up @@ -733,12 +733,12 @@ extension _GraphemeBreakingState {
}

let x = Unicode._GraphemeBreakProperty(from: scalar1)

// GB4 handled here because we don't need to know `y` for this case
if x == .control {
return true
}

// This variable and the defer statement help toggle the isInEmojiSequence
// state variable to false after every decision of 'shouldBreak'. If we
// happen to see a rhs .extend or .zwj, then it's a signal that we should
Expand All @@ -752,7 +752,7 @@ extension _GraphemeBreakingState {
isInEmojiSequence = enterEmojiSequence
isInIndicSequence = enterIndicSequence
}

let y = Unicode._GraphemeBreakProperty(from: scalar2)

switch (x, y) {
Expand Down Expand Up @@ -800,7 +800,7 @@ extension _GraphemeBreakingState {
// sequence; the sequence continues through subsequent extend/extend and
// extend/zwj pairs.
if (
x == .extendedPictographic || (isInEmojiSequence && x == .extend)
scalar1._isExtendedPictographic || (isInEmojiSequence && x == .extend)
) {
enterEmojiSequence = true
}
Expand Down Expand Up @@ -859,7 +859,7 @@ extension _GraphemeBreakingState {
return false

// GB11
case (.zwj, .extendedPictographic):
case (.zwj, _) where scalar2._isExtendedPictographic:
return !isInEmojiSequence

// GB12 & GB13
Expand Down Expand Up @@ -952,7 +952,7 @@ fileprivate func _shouldBreakWithLookback(
return false

// GB11
case (.zwj, .extendedPictographic):
case (.zwj, _) where scalar2._isExtendedPictographic:
return !_checkIfInEmojiSequence(at: index, with: previousScalar)

// GB12 & GB13
Expand Down Expand Up @@ -1030,14 +1030,11 @@ fileprivate func _checkIfInEmojiSequence(
i = prev.start
let gbp = Unicode._GraphemeBreakProperty(from: prev.scalar)

switch gbp {
case .extend:
if gbp == .extend {
continue
case .extendedPictographic:
return true
default:
return false
}

return prev.scalar._isExtendedPictographic
}
return false
}
Expand Down
Loading