package com.instabug.library.networkinterception.delegate

/**
 * Evaluate CodePoint bytes
 * implementation follows the implementation in [okio.Buffer.readUtf8CodePoint]
 * For more information on UTF-8 code points see [link](<https://jenkov.com/tutorials/unicode/utf-8.html>)
 */
class Utf8CodePointEvaluator {
    private var remainingBytes = 0
    private var codePoint: Int? = null
    private var minimum = 0
    var byteCount = 0
        private set
    private val isFinished
        get() = remainingBytes == 0

    fun getCodePoint() = codePoint?.takeIf { isFinished }

    /**
     * Add a byte to be evaluated
     *
     * @return boolean indicating whether the evaluation is finished, or it's waiting for an extra byte
     * true if the evaluation is finished, false otherwise
     */
    fun add(byte: UByte): Boolean =
        if (isFinished) {
            processFirstByte(byte.toInt())
        } else {
            processContinuationByte(byte.toInt())
        }

    // first byte has the following formats, x denotes data
    // 0B0xxx_xxxx -> 1 byte char
    // 0B110x_xxxx -> 2 byte char
    // 0B1110_xxxx -> 3 byte char
    // 0B1111_0xxx -> 4 byte char
    private fun processFirstByte(byte: Int): Boolean {
        when {
            // 0B0xxxxxxx -> 1 byte char
            ((byte and _1_BYTE_FORMAT_MASK) == _1_BYTE_FORMAT) -> {
                byteCount = 1 // 7 bits ASCII
                remainingBytes = 0
                codePoint = byte and _1_BYTE_DATA_MASK
                minimum = _1_BYTE_MINIMUM_VALUE
            }
            // 0B110xxxxx -> 2 byte char
            ((byte and _2_BYTE_FORMAT_MASK) == _2_BYTE_FORMAT) -> {
                byteCount = 2 // 11 bits (5 + 6)
                remainingBytes = 1
                codePoint = byte and _2_BYTE_DATA_MASK
                minimum = _2_BYTE_MINIMUM_VALUE
            }
            // 0B1110xxxx -> 3 byte char
            ((byte and _3_BYTE_FORMAT_MASK) == _3_BYTE_FORMAT) -> {
                byteCount = 3 // 16 bits (4 + 6 + 6)
                remainingBytes = 2
                codePoint = byte and _3_BYTE_DATA_MASK
                minimum = _3_BYTE_MINIMUM_VALUE
            }
            // 0B11110xxx -> 4 byte char
            ((byte and _4_BYTE_FORMAT_MASK) == _4_BYTE_FORMAT) -> {
                byteCount = 4 // 21 bits (3 + 6 + 6 + 6)
                remainingBytes = 3
                codePoint = byte and _4_BYTE_DATA_MASK
                minimum = _4_BYTE_MINIMUM_VALUE
            }
            else -> {
                reset()
            }
        }
        return isFinished
    }

    // each continuation byte has the format 0B10xxxxxx
    private fun processContinuationByte(byte: Int): Boolean {
        remainingBytes--
        if ((byte and CONTINUATION_BYTE_FORMAT_MASK) == CONTINUATION_BYTE_FORMAT_MASKED) {
            codePoint = codePoint?.let { it shl CONTINUATION_BYTE_DATA_BITS_COUNT }
                ?.let { it or (byte and CONTINUATION_BYTE_DATA_MASK) }
            validateCodePoint()
        } else {
            reset()
        }
        return isFinished
    }

    private fun validateCodePoint() {
        if (isFinished) {
            codePoint?.takeIf {
                it.isLargerThanMaxUnicodeChar() || it.isSurrogatePair() || it.isLessThanMinimum()
            }?.let { reset() }
        }
    }

    private fun Int.isLargerThanMaxUnicodeChar() = this > MAX_UNICODE_CHAR_CODE_POINT
    private fun Int.isSurrogatePair() =
        this in MIN_SURROGATE_PAIR_CODE_POINT..MAX_SURROGATE_PAIR_CODE_POINT
    private fun Int.isLessThanMinimum() = this < minimum

    fun reset() {
        minimum = 0
        byteCount = 0
        remainingBytes = 0
        codePoint = null
    }

    companion object {

        // 0B0xx_xxxxx
        private const val _1_BYTE_DATA_MASK = 0x7f //                       0B0111_1111
        private const val _1_BYTE_FORMAT_MASK = _1_BYTE_DATA_MASK.inv() //  0B1000_000
        private const val _1_BYTE_FORMAT = 0x0
        private const val _1_BYTE_MINIMUM_VALUE = 0x0

        // 0B110x_xxxx
        private const val _2_BYTE_DATA_MASK = 0x1f //                      0B0001_1111
        private const val _2_BYTE_FORMAT_MASK = _2_BYTE_DATA_MASK.inv() // 0B1110_0000
        private const val _2_BYTE_FORMAT = 0xc0 //                  0B1100_0000
        private const val _2_BYTE_MINIMUM_VALUE = 0x80 // max can be held in one unicode byte (7 bit) + 1 = 2 pow 7

        // 0B1110_xxxx
        private const val _3_BYTE_DATA_MASK = 0x0f //                      0B0000_1111
        private const val _3_BYTE_FORMAT_MASK = _3_BYTE_DATA_MASK.inv() // 0B1111_0000
        private const val _3_BYTE_FORMAT = 0xe0 //                  0B1110_0000
        private const val _3_BYTE_MINIMUM_VALUE = 0x800 // max can be held in two unicode bytes (11 bits) + 1 = 2 pow 11

        // 0B1111_0xxx
        private const val _4_BYTE_DATA_MASK = 0x07 //                      0B0000_0111
        private const val _4_BYTE_FORMAT_MASK = _4_BYTE_DATA_MASK.inv() // 0B1111_1000
        private const val _4_BYTE_FORMAT = 0xf0 //                  0B1111_0000
        private const val _4_BYTE_MINIMUM_VALUE = 0x10000 // max can be held in three unicode bytes (16 bits) + 1 = 2 pow 16

        // 0B10xxxxxx
        private const val CONTINUATION_BYTE_DATA_MASK = 0x3f //                                 0B0011_1111
        private const val CONTINUATION_BYTE_FORMAT_MASK = CONTINUATION_BYTE_DATA_MASK.inv() //  0B1100_0000
        private const val CONTINUATION_BYTE_FORMAT_MASKED = 0x80 //                             0B1000_0000
        private const val CONTINUATION_BYTE_DATA_BITS_COUNT = 6

        private const val MAX_UNICODE_CHAR_CODE_POINT = 0x10FFFF
        private const val MIN_SURROGATE_PAIR_CODE_POINT = 0xd800
        private const val MAX_SURROGATE_PAIR_CODE_POINT = 0xdfff
    }
}
