Merge pull request #278 from TouchInstinct/INTERNAL-377_Placeholder_generation_from_regex_source_expression

INTERNAL-377: Placeholder generation from regex source expression
This commit is contained in:
airatmeister 2022-12-21 12:17:18 +03:00 committed by GitHub
commit 1cef096d45
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 231 additions and 34 deletions

View File

@ -3,5 +3,6 @@ apply from: "../android-configs/lib-config.gradle"
dependencies {
implementation 'org.antlr:antlr4:4.9.2'
implementation 'org.antlr:antlr4-runtime:4.9.2'
implementation 'ru.tinkoff.decoro:decoro:1.5.1'
testImplementation 'junit:junit:4.13.2'
}

View File

@ -1,47 +1,30 @@
package ru.touchin.roboswag.textprocessing
import org.antlr.v4.runtime.CharStreams
import org.antlr.v4.runtime.CommonTokenStream
import org.antlr.v4.runtime.tree.ParseTreeWalker
import ru.touchin.roboswag.textprocessing.pcre.parser.PCREBaseListener
import ru.touchin.roboswag.textprocessing.pcre.parser.PCRELexer
import ru.touchin.roboswag.textprocessing.pcre.parser.PCREParser
import android.widget.TextView
import ru.touchin.roboswag.textprocessing.generators.DecoroMaskGenerator
import ru.touchin.roboswag.textprocessing.generators.PlaceholderGenerator
import ru.touchin.roboswag.textprocessing.generators.regexgenerator.RegexReplaceGenerator
class TextFormatter(private val regex: String) {
private var currentGroupIndex = 1
private var regexReplaceString = ""
private val regexReplaceGenerator = RegexReplaceGenerator()
private val decoroMaskGenerator = DecoroMaskGenerator()
private val pcreGeneratorItem = regexReplaceGenerator.regexToRegexReplace(regex)
private val regexReplaceString = pcreGeneratorItem.regexReplaceString
private val matrixOfSymbols = pcreGeneratorItem.matrixOfSymbols
private val placeholderGenerator = PlaceholderGenerator(matrixOfSymbols)
init {
regexToRegexReplace(regex)
}
fun getFormatText(inputText: String) = inputText.replace(Regex(regex), regexReplaceString)
fun getFormatText(inputText: String): String {
return inputText.replace(Regex(regex), regexReplaceString)
}
fun getPlaceholder() = placeholderGenerator.getPlaceholder()
fun getRegexReplace() = regexReplaceString
private fun regexToRegexReplace(regex: String) {
val stringStream = CharStreams.fromString(regex)
val lexer = PCRELexer(stringStream)
val parser = PCREParser(CommonTokenStream(lexer))
val parseContext = parser.parse()
val walker = ParseTreeWalker()
walker.walk(
object : PCREBaseListener() {
override fun enterCapture(ctx: PCREParser.CaptureContext) {
super.enterCapture(ctx)
regexReplaceString += "\$$currentGroupIndex"
currentGroupIndex++
}
override fun enterLiteral(ctx: PCREParser.LiteralContext) {
super.enterLiteral(ctx)
regexReplaceString += ctx.shared_literal().text
}
},
parseContext
fun mask(textView: TextView) {
val formatWatcher = decoroMaskGenerator.mask(
placeholderGenerator.getPlaceholder(),
matrixOfSymbols
)
formatWatcher.installOn(textView)
}
}

View File

@ -0,0 +1,27 @@
package ru.touchin.roboswag.textprocessing.generators
import ru.tinkoff.decoro.MaskImpl
import ru.tinkoff.decoro.slots.PredefinedSlots
import ru.tinkoff.decoro.slots.Slot
import ru.tinkoff.decoro.watchers.MaskFormatWatcher
import ru.touchin.roboswag.textprocessing.validators.CustomValidator
class DecoroMaskGenerator {
/** Генерация маски и слотов на основе возможных символов для placeholder,
* если возможный символ всего один, то символ хардкодится в слот
* **/
fun mask(placeholder: String, matrixOfSymbols: Matrix<Char>): MaskFormatWatcher {
val slots = mutableListOf<Slot>()
for (i in placeholder.indices) {
slots.add(
if (matrixOfSymbols[i].size == 1) {
PredefinedSlots.hardcodedSlot(placeholder[i])
} else {
CustomValidator.customSlot(matrixOfSymbols[i])
}
)
}
return MaskFormatWatcher(MaskImpl.createTerminated(slots.toTypedArray()))
}
}

View File

@ -0,0 +1,3 @@
package ru.touchin.roboswag.textprocessing.generators
typealias Matrix<T> = List<List<T>>

View File

@ -0,0 +1,30 @@
package ru.touchin.roboswag.textprocessing.generators
class PlaceholderGenerator(matrixOfSymbols: Matrix<Char>) {
private var placeholder: String = ""
init {
val indexes = hashMapOf<List<Char>, Int>()
for (listOfSymbols in matrixOfSymbols) {
indexes[listOfSymbols] = 0
}
for (listOfSymbols in matrixOfSymbols) {
if (listOfSymbols.isEmpty()) continue
/** Если элемент без повторений **/
if (listOfSymbols.size == 1) {
placeholder += listOfSymbols[0]
continue
}
indexes[listOfSymbols]?.let {
var index = it
if (listOfSymbols.size <= index) index = 0
placeholder += listOfSymbols[index]
index++
indexes[listOfSymbols] = index
}
}
}
fun getPlaceholder() = placeholder
}

View File

@ -0,0 +1,8 @@
package ru.touchin.roboswag.textprocessing.generators.regexgenerator
import ru.touchin.roboswag.textprocessing.generators.Matrix
class PCREGeneratorItem(
val regexReplaceString: String,
val matrixOfSymbols: Matrix<Char>
)

View File

@ -0,0 +1,103 @@
package ru.touchin.roboswag.textprocessing.generators.regexgenerator
import ru.touchin.roboswag.textprocessing.pcre.parser.PCREBaseListener
import ru.touchin.roboswag.textprocessing.pcre.parser.PCREParser
class PCREGeneratorListener : PCREBaseListener() {
/**
* Лист для placeholder, где индекс - номер буквы для placeholder
* значение - возможные символы для placeholder
* **/
private val matrixOfSymbols = mutableListOf<List<Char>>()
private var currentGroupIndex = 1
private var regexReplaceString = ""
/** Элемент поиска с регулярного выражения
* В себе может содержать возможные элементы регулярного выражения,
* например:
* [1-2], \\d, [A-B], а так же элементы не относящиеся к регулярным выражениям
* или экранизированые
* **/
private var listOfSymbols = mutableListOf<Char>()
override fun enterCapture(ctx: PCREParser.CaptureContext) {
super.enterCapture(ctx)
regexReplaceString += "\$$currentGroupIndex"
currentGroupIndex++
}
override fun enterShared_atom(ctx: PCREParser.Shared_atomContext) {
super.enterShared_atom(ctx)
/** Найдено соответствие цифр \\d **/
listOfSymbols = '1'.rangeTo('9').toMutableList().apply { add('0') }
matrixOfSymbols.add(listOfSymbols)
}
override fun enterCharacter_class(ctx: PCREParser.Character_classContext) {
super.enterCharacter_class(ctx)
/** Проверка на количество диапазонов
* true - если, например [А-дD-f]
* false - если, например [А-д]
* **/
if (ctx.cc_atom().size > 1) {
listOfSymbols = mutableListOf<Char>()
val firstChar = ctx.CharacterClassStart().text
val endChar = ctx.CharacterClassEnd()[0].text
for (i in 0 until ctx.cc_atom().size) {
listOfSymbols += availableSymbolsToList(firstChar + ctx.cc_atom()[i].text + endChar)
}
} else {
listOfSymbols = availableSymbolsToList(ctx.text)
}
matrixOfSymbols.add(listOfSymbols)
}
/** Дублирование повторений для placeholder при их наличии, например [A-B]{6}, где 6 - повторения **/
override fun enterDigits(ctx: PCREParser.DigitsContext) {
super.enterDigits(ctx)
repeat(ctx.text.toInt() - 1) {
matrixOfSymbols.add(listOfSymbols)
}
}
override fun enterLiteral(ctx: PCREParser.LiteralContext) {
super.enterLiteral(ctx)
regexReplaceString += ctx.shared_literal().text
listOfSymbols = mutableListOf<Char>()
for (s in ctx.text) {
listOfSymbols.add(s)
}
matrixOfSymbols.add(listOfSymbols)
}
fun toPCREGeneratorItem() = PCREGeneratorItem(
regexReplaceString,
matrixOfSymbols.map { it ->
it.filter {
it != '\\'
}
}
)
private fun availableSymbolsToList(ctxText: String): MutableList<Char> {
/** startAtomStr = atomStr[1] - потому что должен проверяться первый допуск для строки
* endAtomStr index of atomStr.length - 2 вычисляется потому что с поиском,
* например, [A-B], endAtomStr = "B", startAtomStr = "A"
* **/
val endAtomStr = ctxText[ctxText.length - 2]
val startAtomStr = ctxText[1]
return if (startAtomStr.isLetterOrDigit() && endAtomStr.isLetterOrDigit()) {
getListRangeChars(ctxText).filter {
it.isLetterOrDigit()
}.toMutableList()
} else {
mutableListOf(startAtomStr, endAtomStr)
}
}
private fun getListRangeChars(atomStr: String): MutableList<Char> {
val startRange = atomStr[1]
val endRange = atomStr[atomStr.length - 2]
return startRange.rangeTo(endRange).toMutableList()
}
}

View File

@ -0,0 +1,21 @@
package ru.touchin.roboswag.textprocessing.generators.regexgenerator
import org.antlr.v4.runtime.CharStreams
import org.antlr.v4.runtime.CommonTokenStream
import org.antlr.v4.runtime.tree.ParseTreeWalker
import ru.touchin.roboswag.textprocessing.pcre.parser.PCRELexer
import ru.touchin.roboswag.textprocessing.pcre.parser.PCREParser
class RegexReplaceGenerator {
fun regexToRegexReplace(regex: String): PCREGeneratorItem {
val stringStream = CharStreams.fromString(regex)
val lexer = PCRELexer(stringStream)
val parser = PCREParser(CommonTokenStream(lexer))
val parseContext = parser.parse()
val walker = ParseTreeWalker()
val pcreGeneratorListener = PCREGeneratorListener()
walker.walk(pcreGeneratorListener, parseContext)
return pcreGeneratorListener.toPCREGeneratorItem()
}
}

View File

@ -0,0 +1,16 @@
package ru.touchin.roboswag.textprocessing.validators
import ru.tinkoff.decoro.slots.Slot
class CustomValidator private constructor(
private val slotSymbols: List<Char>
) : Slot.SlotValidator {
companion object {
fun customSlot(slotSymbols: List<Char>) = Slot(null, CustomValidator(slotSymbols))
}
override fun validate(value: Char): Boolean {
return slotSymbols.contains(value)
}
}

View File

@ -12,6 +12,7 @@ class TextFormatterTest {
val item = TextFormatter(regex)
Assert.assertEquals("$1\\/$2", item.getRegexReplace())
Assert.assertEquals("06/22", item.getFormatText(inputText))
Assert.assertEquals("12/34", item.getPlaceholder())
}
@Test
@ -21,6 +22,7 @@ class TextFormatterTest {
val item = TextFormatter(regex)
Assert.assertEquals("\$1 \$2 \$3 \$4", item.getRegexReplace())
Assert.assertEquals("1234 3456 1235 3534", item.getFormatText(inputText))
Assert.assertEquals("1234 5678 9012 3456", item.getPlaceholder())
}
@Test
@ -30,6 +32,7 @@ class TextFormatterTest {
val item = TextFormatter(regex)
Assert.assertEquals("\\+7 \\($1\\) $2 $3 $4", item.getRegexReplace())
Assert.assertEquals("+7 (909) 134 44 22", item.getFormatText(inputText))
Assert.assertEquals("+7 (123) 456 78 90", item.getPlaceholder())
}
@Test
@ -39,6 +42,7 @@ class TextFormatterTest {
val item = TextFormatter(regex)
Assert.assertEquals("\$1-\$2 № \$3", item.getRegexReplace())
Assert.assertEquals("IV-БЮ № 349823", item.getFormatText(inputText))
Assert.assertEquals("AB-АБ № 123456", item.getPlaceholder())
}
@Test
@ -48,5 +52,6 @@ class TextFormatterTest {
val item = TextFormatter(regex)
Assert.assertEquals("\$1\$2 ₽", item.getRegexReplace())
Assert.assertEquals("5332.4 ₽", item.getFormatText(inputText))
Assert.assertEquals("1.2 ₽", item.getPlaceholder())
}
}