package ai.passio.passiosdk.passiofood.voice.local.tokenization

import java.util.Arrays

private const val CASE_MASK = 0x20.toChar()

/** Basic tokenization (punctuation splitting, lower casing, etc.)  */
internal class BasicTokenizer(private val doLowerCase: Boolean) {

    fun tokenize(text: String?): List<String> {
        val cleanedText = cleanText(text)
        val origTokens = whitespaceTokenize(cleanedText)
        val stringBuilder = StringBuilder()
        for (token in origTokens) {
            var tempToken = token
            if (doLowerCase) {
                tempToken = toLowerCase(token)
            }
            val list = runSplitOnPunc(tempToken)
            for (subToken in list) {
                stringBuilder.append(subToken).append(" ")
            }
        }
        return whitespaceTokenize(stringBuilder.toString())
    }

    companion object {
        /* Performs invalid character removal and whitespace cleanup on text. */
        fun cleanText(text: String?): String {
            if (text == null) {
                throw NullPointerException("The input String is null.")
            }
            val stringBuilder = StringBuilder("")
            for (index in 0 until text.length) {
                val ch = text[index]

                // Skip the characters that cannot be used.
                if (CharChecker.isInvalid(ch) || CharChecker.isControl(ch)) {
                    continue
                }
                if (CharChecker.isWhitespace(ch)) {
                    stringBuilder.append(" ")
                } else {
                    stringBuilder.append(ch)
                }
            }
            return stringBuilder.toString()
        }

        /* Runs basic whitespace cleaning and splitting on a piece of text. */
        fun whitespaceTokenize(text: String?): List<String> {
            if (text == null) {
                throw NullPointerException("The input String is null.")
            }
            return Arrays.asList(*text.split(" ".toRegex()).dropLastWhile { it.isEmpty() }
                .toTypedArray())
        }

        /* Splits punctuation on a piece of text. */
        fun runSplitOnPunc(text: String?): List<String> {
            if (text == null) {
                throw NullPointerException("The input String is null.")
            }
            val tokens: MutableList<String> = ArrayList()
            var startNewWord = true
            for (i in 0 until text.length) {
                val ch = text[i]
                if (CharChecker.isPunctuation(ch)) {
                    tokens.add(ch.toString())
                    startNewWord = true
                } else {
                    if (startNewWord) {
                        tokens.add("")
                        startNewWord = false
                    }
                    tokens[tokens.size - 1] = tokens.last() + ch
                }
            }
            return tokens
        }
    }

    private fun toLowerCase(string: String): String {
        val length = string.length
        var i = 0
        while (i < length) {
            if (isUpperCase(string[i])) {
                val chars = string.toCharArray()
                while (i < length) {
                    val c = chars[i]
                    if (isUpperCase(c)) {
                        chars[i] = (c.code xor CASE_MASK.code).toChar()
                    }
                    i++
                }
                return String(chars)
            }
            i++
        }
        return string
    }

    private fun isUpperCase(c: Char): Boolean {
        return c.code >= 'A'.code && c.code <= 'Z'.code
    }
}