`TextFormatter`, `RegexReplaceGenerator` & `RegexPlaceholderGenerator` added

This commit is contained in:
Vladimir Makarov 2023-04-17 16:25:45 +03:00
parent f70fec348c
commit 69418fb24c
8 changed files with 573 additions and 1 deletions

View File

@ -85,7 +85,7 @@ let package = Package(
.target(name: "TIPagination", dependencies: ["Cursors", "TISwiftUtils"], path: "TIPagination/Sources"),
.target(name: "TIAuth", dependencies: ["TIFoundationUtils", "TIUIKitCore", "KeychainAccess"], path: "TIAuth/Sources"),
.target(name: "TIEcommerce", dependencies: ["TIFoundationUtils", "TISwiftUtils", "TINetworking", "TIUIKitCore", "TIUIElements"], path: "TIEcommerce/Sources"),
.target(name: "TITextProcessing", dependencies: ["Antlr4"], path: "TITextProcessing/Sources"),
.target(name: "TITextProcessing", dependencies: [.product(name: "Antlr4", package: "antlr4")], path: "TITextProcessing/Sources"),
// MARK: - Tests
@ -93,5 +93,9 @@ let package = Package(
name: "TITimerTests",
dependencies: ["TIFoundationUtils"],
path: "Tests/TITimerTests"),
.testTarget(
name: "TITextProcessingTests",
dependencies: ["TITextProcessing"],
path: "Tests/TITextProcessingTests")
]
)

102
TITextProcessing/README.md Normal file
View File

@ -0,0 +1,102 @@
# `TITextProcessing`
### Библиотека для работы с регулярными выражениями
## - `TextFormatter`
Класс `TextFormatter` представляет из себя сервис, принимающий регулярное выражение на вход и предоставляющий возможность генерации следующих объектов:<br>
- `Replacement template` из `getRegexReplacement()`;<br>
- `Placeholder` из `getRegexPlaceholder()`;<br>
- `Formatter text` из `getFormattedText(_ text: String)`
-
#### `func getRegexReplacement()`
Метод, преобразующий входящее регулярное выражение в шаблон подстановки, например:
**Input**: `(\\d{4}) ?(\\d{4}) ?(\\d{4}) ?(\\d{4})`<br>
**Output**: `$1 $2 $3 $4`
-
#### `func getRegexPlaceholder()`
Метод, преобразующий входящее регулярное выражение в текст-заполнитесь a.k.a placeholder, например:
**Input**: `(\\d{4}) ?(\\d{4}) ?(\\d{4}) ?(\\d{4})`<br>
**Output**: `1234 5678 9012 3456`
-
#### `func getFormattedText(_ text: String) -> String`
Метод, преобразующий входящий текст к нужному формату, заранее определенному посредством указания регулярного выражения, например:
**Input**: `2200111555550080`<br>
**Output**: `2200 1115 5555 0080`
> P.S. Учитываем, что `TextFormatter` был проинициализирован со слеюущим регулярным выражением: `(\\d{4}) ?(\\d{4}) ?(\\d{4}) ?(\\d{4})`
## - `RegexReplaceGenerator`
Класс, отвечающий за генерацию `PCREGeneratorItem` из входящего регулярного выражения. Использует библиотеку `Antlr4` и `PCRE` для работы.
-
#### `static func generateReplacement(for regex: String) -> PCREGeneratorItem`
Функция, преобразующий входящее регулярное выражение в структуру, содержащую шаблон подстановки и матрицу символов, например:
```swift
let item = RegexReplaceGenerator. generateReplacement(for: "(\\d{2})\\/?(\\d{2})")
print(item.regexReplaceString)
/*
Выведет в консоль:
"$1\\/$2"
*/
print(item.matrixOfSymbols)
/*
Выведет в консоль:
[
["1", "2", "3", "4", "5", "6", "7", "8", "9", "0"],
["1", "2", "3", "4", "5", "6", "7", "8", "9", "0"],
["/"],
["1", "2", "3", "4", "5", "6", "7", "8", "9", "0"],
["1", "2", "3", "4", "5", "6", "7", "8", "9", "0"]
]
*/
```
Итоговый `PCREGeneratorItem` содержит следующие данные:
`regexReplaceString` - итоговый шаблон подстановки для изначального регулярного выражения;<br>
`matrixOfSymbols` - матрица символов, содержащая все возможные символы для каждого элемента в изначальном регулярном выражении
## - `RegexPlaceholderGenerator`
Класс, отвечающий за генерацию текста-заполнителя a.k.a placeholder.
-
#### `static func generatePlaceholder(matrixOfSymbols: [[Character]]) -> String`
Функция, преобразующая входящую матрицу символов в текст-заполнитель, например:
```swift
let matrix: [[Character]] = [
["1", "2", "3", "4", "5", "6", "7", "8", "9", "0"],
["1", "2", "3", "4", "5", "6", "7", "8", "9", "0"],
["/"],
["1", "2", "3", "4", "5", "6", "7", "8", "9", "0"],
["1", "2", "3", "4", "5", "6", "7", "8", "9", "0"]
]
let placeholder = RegexPlaceholderGenerator.generatePlaceholder(matrixOfSymbols: matrix)
print(placeholder)
/*
Выведет в консоль:
"12/34"
*/
```

View File

@ -0,0 +1,56 @@
//
// Copyright (c) 2023 Touch Instinct
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the Software), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
import Foundation
public final class RegexPlaceholderGenerator {
public static func generatePlaceholder(matrixOfSymbols: [[Character]]) -> String {
var placeholderStringBuilder = String()
var indexes = [Array<Character>: Int]()
matrixOfSymbols.forEach { listOfSymbols in
indexes[listOfSymbols] = 0
}
matrixOfSymbols.filter { !$0.isEmpty }.forEach { listOfSymbols in
if listOfSymbols.count == 1 {
placeholderStringBuilder.append(listOfSymbols[0])
return
}
if let index = indexes[listOfSymbols] {
var newIndex = index
if listOfSymbols.count <= newIndex {
newIndex = 0
}
placeholderStringBuilder.append(listOfSymbols[newIndex])
newIndex += 1
indexes[listOfSymbols] = newIndex
}
}
return placeholderStringBuilder
}
}

View File

@ -0,0 +1,28 @@
//
// Copyright (c) 2023 Touch Instinct
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the Software), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
import Foundation
public struct PCREGeneratorItem {
public let regexReplaceString: String
public let matrixOfSymbols: [[Character]]
}

View File

@ -0,0 +1,172 @@
//
// Copyright (c) 2023 Touch Instinct
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the Software), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
import Foundation
import Antlr4
public final class PCREGeneratorListener: PCREBaseListener {
// MARK: - Properties
/*
Matrix of available symbols for placeholder where
index - symbol number and value - all available symbols
*/
private var matrixOfSymbols = [[Character]]()
/*
Regex group index counter, 1 by default
*/
private var currentGroupIndex = 1
/*
The final output replacement of the entered regex
*/
private var regexReplaceString = ""
/*
Search element from the regex
Could contain possible elements of a regex, e.g.:
[1-2], \\d, [A-B] and elements not related to regex or escaped
*/
private var listOfSymbols = [Character]()
// MARK: - Overrides
/*
Called when a new capture group found
*/
public override func enterCapture(_ ctx: PCREParser.CaptureContext) {
super.enterCapture(ctx)
regexReplaceString += "$\(currentGroupIndex)"
currentGroupIndex += 1
}
/*
Called when there is a digit symbol found, e.g.:
\d{2} where \d is an indication of a digit symbol
*/
public override func enterShared_atom(_ ctx: PCREParser.Shared_atomContext) {
super.enterShared_atom(ctx)
listOfSymbols = "1234567890".map { Character(String($0)) }
matrixOfSymbols.append(listOfSymbols)
}
/*
Called when there is a range found, e.g.:
[А-дD-f] or [А-д]
*/
public override func enterCharacter_class(_ ctx: PCREParser.Character_classContext) {
super.enterCharacter_class(ctx)
// Range count validation
// - true if [А-дD-f]
// - false if [А-д]
if ctx.cc_atom().count > 1 {
listOfSymbols = []
guard let firstChar = ctx.CharacterClassStart()?.getText() else {
listOfSymbols = getAvailableSymbols(for: ctx.getText())
return
}
let endChar = ctx.CharacterClassEnd()[0].getText()
for i in 0 ..< ctx.cc_atom().count {
listOfSymbols += getAvailableSymbols(for: firstChar + ctx.cc_atom()[i].getText() + endChar)
}
} else {
listOfSymbols = getAvailableSymbols(for: ctx.getText())
}
matrixOfSymbols.append(listOfSymbols)
}
/*
Called when there is a number of element duplication found, e.g.:
[A-B]{6} where {6} is a number of required element duplication
*/
public override func enterDigits(_ ctx: PCREParser.DigitsContext) {
super.enterDigits(ctx)
guard let count = Int(ctx.getText()) else {
return
}
for _ in 1 ..< count {
matrixOfSymbols.append(listOfSymbols)
}
}
/*
Called when there is a single non-group literal found, e.g.:
(?:\\+7 ) where "+", "7" and " " are single non-group literals
*/
public override func enterLiteral(_ ctx: PCREParser.LiteralContext) {
super.enterLiteral(ctx)
guard let text = ctx.shared_literal()?.getText() else {
return
}
regexReplaceString += text
listOfSymbols = []
ctx.getText().forEach { symbol in
listOfSymbols.append(symbol)
}
matrixOfSymbols.append(listOfSymbols)
}
// MARK: - Public methods
public func toPCREGeneratorItem() -> PCREGeneratorItem {
return PCREGeneratorItem(regexReplaceString: regexReplaceString,
matrixOfSymbols: matrixOfSymbols.map { $0.filter { $0 != "\\" } })
}
// MARK: - Private methods
private func getAvailableSymbols(for ctxText: String) -> [Character] {
let startAtomStr = ctxText[ctxText.index(after: ctxText.startIndex)]
let endAtomStr = ctxText[ctxText.index(ctxText.endIndex, offsetBy: -2)]
guard (startAtomStr.isLetter || startAtomStr.isNumber) && (endAtomStr.isLetter || endAtomStr.isNumber) else {
return [startAtomStr, endAtomStr]
}
guard let startRangeScalar = startAtomStr.unicodeScalars.first?.value,
let endRangeScalar = endAtomStr.unicodeScalars.first?.value else {
return [startAtomStr, endAtomStr]
}
let symbols = (startRangeScalar...endRangeScalar)
.compactMap(UnicodeScalar.init)
.map(Character.init)
.filter { $0.isLetter || $0.isNumber }
return symbols
}
}

View File

@ -0,0 +1,45 @@
//
// Copyright (c) 2023 Touch Instinct
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the Software), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
import Foundation
import Antlr4
public final class RegexReplaceGenerator {
public static func generateReplacement(for regex: String) -> PCREGeneratorItem {
let inputStream = ANTLRInputStream(regex)
let lexer = PCRELexer(inputStream)
let tokens = CommonTokenStream(lexer)
let walker = ParseTreeWalker()
let pcreGeneratorListener = PCREGeneratorListener()
let parser = try? PCREParser(tokens)
guard let parseContext = try? parser?.parse() as? ParseTree else {
fatalError("Cannot parse input regex")
}
try? walker.walk(pcreGeneratorListener, parseContext)
return pcreGeneratorListener.toPCREGeneratorItem()
}
}

View File

@ -0,0 +1,53 @@
//
// Copyright (c) 2023 Touch Instinct
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the Software), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
import Foundation
public final class TextFormatter {
private let regex: String
public init(regex: String) {
self.regex = regex
}
public func getRegexReplacement() -> String {
RegexReplaceGenerator.generateReplacement(for: regex).regexReplaceString
}
public func getRegexPlaceholder() -> String {
let matrixOfSymbols = RegexReplaceGenerator.generateReplacement(for: regex).matrixOfSymbols
return RegexPlaceholderGenerator.generatePlaceholder(matrixOfSymbols: matrixOfSymbols)
}
public func getFormattedText(_ text: String) -> String {
guard let expression = try? NSRegularExpression(pattern: regex, options: .caseInsensitive) else {
fatalError("Cannot create NSRegularExpression from input regex")
}
return expression.stringByReplacingMatches(in: text,
options: .reportProgress,
range: NSMakeRange(0, text.count),
withTemplate: getRegexReplacement())
}
}

View File

@ -0,0 +1,112 @@
//
// Copyright (c) 2023 Touch Instinct
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the Software), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
import XCTest
@testable import TITextProcessing
final class TITextProcessingTests: XCTestCase {
func testDateRegex() {
// given
let regex = "(\\d{2})\\/?(\\d{2})"
let inputText = "1525"
let formatter = TextFormatter(regex: regex)
// when
let regexReplacement = formatter.getRegexReplacement()
let regexPlaceholder = formatter.getRegexPlaceholder()
let formattedText = formatter.getFormattedText(inputText)
// then
XCTAssertEqual(regexReplacement, "$1\\/$2")
XCTAssertEqual(regexPlaceholder, "12/34")
XCTAssertEqual(formattedText, "15/25")
}
func testCardNumberRegex() {
// given
let regex = "(\\d{4}) ?(\\d{4}) ?(\\d{4}) ?(\\d{4})"
let inputText = "2200111555550080"
let formatter = TextFormatter(regex: regex)
// when
let regexReplacement = formatter.getRegexReplacement()
let regexPlaceholder = formatter.getRegexPlaceholder()
let formattedText = formatter.getFormattedText(inputText)
// then
XCTAssertEqual(regexReplacement, "$1 $2 $3 $4")
XCTAssertEqual(regexPlaceholder, "1234 5678 9012 3456")
XCTAssertEqual(formattedText, "2200 1115 5555 0080")
}
func testPhoneNumberRegex() {
// given
let regex = "(?:\\+7 )?\\(?(\\d{3})\\)? ?(\\d{3}) ?(\\d{2}) ?(\\d{2})"
let inputText = "9995534820"
let formatter = TextFormatter(regex: regex)
// when
let regexReplacement = formatter.getRegexReplacement()
let regexPlaceholder = formatter.getRegexPlaceholder()
let formattedText = formatter.getFormattedText(inputText)
// then
XCTAssertEqual(regexReplacement, "\\+7 \\($1\\) $2 $3 $4")
XCTAssertEqual(regexPlaceholder, "+7 (123) 456 78 90")
XCTAssertEqual(formattedText, "+7 (999) 553 48 20")
}
func testBirthdayCertificateRegex() {
// given
let regex = "([A-Z]{2})-?([А-Я]{2}) ?№? ?(\\d{6})"
let inputText = "ABЮЯ689323"
let formatter = TextFormatter(regex: regex)
// when
let regexReplacement = formatter.getRegexReplacement()
let regexPlaceholder = formatter.getRegexPlaceholder()
let formattedText = formatter.getFormattedText(inputText)
// then
XCTAssertEqual(regexReplacement, "$1-$2 № $3")
XCTAssertEqual(regexPlaceholder, "AB-АБ № 123456")
XCTAssertEqual(formattedText, "AB-ЮЯ № 689323")
}
func testRoubleSumRegex() {
// given
let regex = "(\\d+)([.,]\\d+)? ?₽?"
let inputText = "1234.56"
let formatter = TextFormatter(regex: regex)
// when
let regexReplacement = formatter.getRegexReplacement()
let regexPlaceholder = formatter.getRegexPlaceholder()
let formattedText = formatter.getFormattedText(inputText)
// then
XCTAssertEqual(regexReplacement, "$1$2 ₽")
XCTAssertEqual(regexPlaceholder, "1.2 ₽")
XCTAssertEqual(formattedText, "1234.56 ₽")
}
}