// Copyright 2025 by Carnegie Mellon University
// See license information in LICENSE.txt

package org.cert.netsa.io.silk

import java.io.{EOFException, InputStream}
import java.nio.{ByteBuffer, ByteOrder}
import scala.collection.mutable.ListBuffer

import io.{
  BufferReader, LzoInputStreamBuffer, RawInputStreamBuffer, SnappyInputStreamBuffer,
  ZlibInputStreamBuffer
}
import io.BufferUtil.*
import io.prefixmap.{PrefixMapReaderIPv4, PrefixMapReaderIPv6, PrefixMapReaderProtoPort}

/** A reader of binary SiLK PrefixMap files. This is usable as an
  * [[scala.collection.Iterator Iterator]] over a pair representing a key and value. Depending on
  * the contents of the PrefixMap file, the key is either an
  * [[org.cert.netsa.data.net.IPBlock IPBlock]] or a [[PrefixMapProtocolPortPair]]. The value is
  * always a String.
  *
  * Because the PrefixMap may contain different key types, the PrefixMapReader factory methods
  * return a [[PrefixMapResult$ PrefixMapResult]] wrapper over PrefixMapReader.
  *
  * The mapName method returns the map name that was specified in the PrefixMap file's header, if
  * any.
  *
  * @example
  *   This example uses the single argument form of the [[PrefixMapReader$ companion object's]]
  *   `ofInputStream()` method to read the PrefixMap file "example.pmap". The code may be used
  *   outside of Hadoop when it is known that the key is an IPBlock.
  *   {{{
  * val stream = new java.io.FileInputStream("example.pmap")
  * val pmapresult = PrefixMapReader.ofInputStream(stream)
  * val pmap = pmapresult match {
  *   case PrefixMapResult.IPBlockPrefixMap(iter) => iter
  *   case _ => null
  * }
  * pmap.hasNext
  * val (ipblock, name) = pmap.next()
  *   }}}
  *
  * @see [[PrefixMapReader$ the companion object]] for more details
  */
abstract class PrefixMapReader[T] protected (
  protected val reader: BufferReader,
  protected val header: Header
) extends Iterator[(T, String)] {
  /*
   * Return the map name stored in the file's header from which this
   * prefix map file was read.  The name is returned as an
   * Option[String] since the map name is optional.
   */
  def mapName: Option[String] = {
    // get the PrefixMap header entry that contains the types and lengths of
    // the key and counter
    val hentry_opt = header
      .headerEntries
      .collectFirst {
        case entry: HeaderEntry.PrefixMap => entry
      }
    hentry_opt match {
      case None => return None
      case _    =>
    }
    val hentry = hentry_opt.get
    Option(hentry.mapName)
  }

  /** Tests whether this iterator can provide another (key, value) pair.
    *
    * @return `true` if a subsequent call to `next()` will yield an element, `false` otherwise.
    */
  override def hasNext: Boolean

  /** Produces the next (key, counter) pair of this iterator.
    *
    * @return the next pair of this iterator if `hasNext` is true
    *
    * @throws java.util.NoSuchElementException when the iterator is depleted
    */
  override def next(): (T, String)

  /*
   * Constant value that is used to determine whether a value in the
   * tree is a leaf.  Leaves have this bit set high.
   */
  protected val leafBit = 0x80000000

  /*
   * Constant value may be masked with a leaf to get its value.
   */
  protected val leafValueMask = 0x7fffffff

  /** Whether this prefix map uses a dictionary. */
  protected val hasDictionary = header.recordVersion match {
    case 1 => false
    case 2 => true
    case 3 => true
    case 4 => true
    case 5 => false
    case _ => throw new SilkDataFormatException(
        s"PrefixMap file has unexpected record version: ${header.recordVersion}"
      )
  }

  /*
   * Return true if the integer parameter is even; false otherwise.
   */
  protected def isEven(x: Int) = (x & 0x1) == 0x0

  /*
   * Return true if the integer parameter is a leaf; false otherwise.
   */
  protected def isLeaf(x: Int) = (x & leafBit) == leafBit

  /*
   * Take a leaf value and create a String containing the two letter
   * country code that the leaf represents.
   */
  protected def getCountryCode(x: Int) = {
    new String(Array[Byte](((x >>> 8) & 0xff).toByte, (x & 0xff).toByte), "US-ASCII")
  }

  /*
   * Utility function to read the dictionary from a prefix map file
   * and create an array of dictionary entries.
   *
   * Returns an empty array if the file does not contain a dictionary.
   */
  protected def readDictionary(): Array[String] = {
    if (!hasDictionary) {
      return Array.empty[String]
    }

    // Ensure there are four bytes that represent the length of the
    // dictionary.
    if (!checkAvailable(4)) {
      throw new SilkDataFormatException("PrefixMap file is invalid or truncated; 4 bytes required")
    }
    val dictLength = buffer.getInt(bufOffset)
    bufOffset = bufOffset + 4

    if (0 == dictLength) {
      // weird but allow it
      return Array.empty[String]
    }
    if (dictLength < 0) {
      throw new SilkDataFormatException(
        "PrefixMap dictionary is too large (signed 32-bit overflow)"
      )
    }

    // read the dictionary from the input source
    if (!checkAvailable(dictLength)) {
      throw new SilkDataFormatException(
        s"PrefixMap file is invalid or truncated; $dictLength bytes required"
      )
    }

    val dictionary: ListBuffer[String] = new ListBuffer()

    var bytes: Array[Byte] = buffer.getBytes(bufOffset, dictLength)
    bufOffset = bufOffset + dictLength

    // split the bytes into words
    do {
      val (word, rest) = bytes.span(_ != 0x00)
      dictionary += new String(word, "US-ASCII")
      bytes = rest.drop(1)
    } while (!bytes.isEmpty)

    dictionary.toArray
  }

  /*
   * Utility function for specific PrefixMap types that reads the tree
   * data-structure into an array of 32-bit integers.  Every two
   * integers represent a node in the tree, with the first integer
   * being the left branch and the second integer being the right
   * branch.
   *
   * When reading a Protocol-Port prefix map, this function removes
   * the first 16 integers from the tree since those values are always
   * zero.  The indexes of the remaining entries are adjusted
   * appropriately.
   */
  protected def readTree(): Array[Int] = {
    // Ensure the prefix map file contains the number of entries and
    // at least one node (for a total of three 4-byte integers)
    if (!checkAvailable(12)) {
      throw new SilkDataFormatException("PrefixMap file is invalid or truncated; 12 bytes required")
    }

    // a record (node) in the tree is composed to two integers; since
    // this code uses Array[Int] for the tree, double the record count
    var recordCount = 2 * buffer.getInt(bufOffset)
    bufOffset = bufOffset + 4

    if (recordCount < 1) {
      throw new SilkDataFormatException(s"PrefixMap file has an invalid recordCount: $recordCount")
    }

    var adjust: Int = 0
    if (3 == header.recordVersion && checkAvailable(64)) {
      // The protocol-port prefix map is 8 levels deeper than it needs
      // to be.  The values are encoded as ((proto<<16)|port) making
      // the first 8 bits always 0.  This code ignores the 8 nodes of
      // the tree (16 integers or 64 bytes) and reduces all remaining
      // node indexes by 16.
      adjust = -16
      recordCount = recordCount + adjust
      bufOffset = bufOffset + 64
    }

    val list: ListBuffer[Int] = new ListBuffer()

    do {
      val n = buffer.getInt(bufOffset)
      // if 'n' has the leaf bit set, add it the buffer; if the leaf
      // bit is not set, double it and add it the buffer (since 'n' is
      // a node index and each node is two ints)
      if (isLeaf(n)) {
        list += n
      } else {
        list += 2 * n + adjust
      }
      bufOffset = bufOffset + 4
      recordCount = recordCount - 1
    } while (recordCount > 0 && checkAvailable(4))

    if (recordCount > 0) {
      throw new SilkDataFormatException(
        s"PrefixMap file is invalid or truncated; $recordCount records were not read"
      )
    }

    list.toArray
  }

  /** Current buffer of bytes being processed */
  private var buffer = ByteBuffer.allocate(0)

  /** Current position (octet offset) in the current buffer. */
  private var bufOffset = 0

  /** Length of the current buffer in octets. */
  private var bufLength = 0

  /** Whether the end of the input stream has been reached. */
  private var endOfFile = false

  /** Return true if 'wanted' octets are available in the buffer, reading more data from the stream
    * if necessary.
    */
  private def checkAvailable(wanted: Int): Boolean = {
    while (!endOfFile && bufOffset + wanted > bufLength) {
      try {
        val (newBuffer, newLength) = reader.getNextBuffer()
        if (bufOffset == bufLength) {
          // nothing in old buffer; replace with new
          buffer = ByteBuffer.wrap(newBuffer)
          buffer.order(
            if (header.isBigEndian) ByteOrder.BIG_ENDIAN
            else ByteOrder.LITTLE_ENDIAN
          )
          bufLength = newLength
          bufOffset = 0
        } else {
          // create new buffer with tail of old + newly read
          buffer = ByteBuffer.wrap(buffer.array.drop(bufOffset) ++ newBuffer)
          buffer.order(
            if (header.isBigEndian) ByteOrder.BIG_ENDIAN
            else ByteOrder.LITTLE_ENDIAN
          )
          bufLength = bufLength - bufOffset + newLength
          bufOffset = 0
        }
      } catch {
        case _: EOFException => {
          reader.close()
          buffer = ByteBuffer.allocate(0)
          endOfFile = true
        }
      }
    }
    !endOfFile
  }

}

/** The PrefixMapReader object provides support for creating an [[PrefixMapReader]]. */
object PrefixMapReader {

  /** Helper function for the `ofInputStream()` methods below. Ensures the file's header is
    * reasonable for a PrefixMap file. Creates the correct type of PrefixMapReader to read the
    * contents of the file depending on the contents of the file's header.
    */
  private def createReader(bufferReader: BufferReader, header: Header): PrefixMapResult = {
    // verify the record version
    if (header.recordVersion < 1 || header.recordVersion > 5) {
      throw new SilkDataFormatException(
        s"PrefixMap file has unexpected record version: ${header.recordVersion}"
      )
    }

    // verify record size
    if (1 != header.recordSize) {
      throw new SilkDataFormatException(
        s"PrefixMap file has unexpected recordLength: ${header.recordSize}"
      )
    }

    header.recordVersion match {
      case 1 => PrefixMapResult.IPBlockPrefixMap(new PrefixMapReaderIPv4(bufferReader, header))
      case 2 => PrefixMapResult.IPBlockPrefixMap(new PrefixMapReaderIPv4(bufferReader, header))
      case 3 => PrefixMapResult
          .ProtocolPortPrefixMap(new PrefixMapReaderProtoPort(bufferReader, header))
      case 4 => PrefixMapResult.IPBlockPrefixMap(new PrefixMapReaderIPv6(bufferReader, header))
      case 5 => PrefixMapResult.IPBlockPrefixMap(new PrefixMapReaderIPv6(bufferReader, header))
      case _ => throw new SilkDataFormatException(
          s"PrefixMap file has unexpected record version: ${header.recordVersion}"
        )
    }
  }

  /** Creates and returns a reader from the provided input stream. Does not support compressed data.
    *
    * Since the key of a SiLK Prefix Map file may contain either an IP block or a protocol-port
    * pair, this function returns a [[PrefixMapResult$ PrefixMapResult]].
    *
    * @throws SilkDataFormatException if the input stream is malformed, is not a PrefixMap, or uses
    *   compression.
    */
  def ofInputStream(s: InputStream): PrefixMapResult = {
    val header = Header.readFrom(s)
    val bufferSize = 65536
    val bufferReader = header.compressionMethod match {
      case CompressionMethod.NONE   => RawInputStreamBuffer(s, bufferSize)
      case CompressionMethod.ZLIB   => ZlibInputStreamBuffer(s)
      case CompressionMethod.LZO1X  => LzoInputStreamBuffer(s)
      case CompressionMethod.SNAPPY => SnappyInputStreamBuffer(s)
      case _ => throw new SilkDataFormatException("Unsupported compression method (without Hadoop)")
    }

    createReader(bufferReader, header)
  }

}

// @LICENSE_FOOTER@
//
// Mothra 1.7
//
// Copyright 2025 Carnegie Mellon University.
//
// NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS
// FURNISHED ON AN "AS-IS" BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND,
// EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS
// FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED FROM USE OF THE MATERIAL.
// CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM FROM
// PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
//
// Licensed under a GNU GPL 2.0-style license, please see LICENSE.txt or contac
// permission@sei.cmu.edu for full terms.
//
// [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited
// distribution.  Please see Copyright notice for non-US Government use and distribution.
//
// This Software includes and/or makes use of Third-Party Software each subject to its own license.
//
// DM24-1649
