// Copyright 2015-2022 by Carnegie Mellon University
// See license information in LICENSE.txt

package org.cert.netsa.io.silk

import java.io.{EOFException, InputStream}

import io.{BufferReader, LzoInputStreamBuffer, RawInputStreamBuffer,
  SnappyInputStreamBuffer, ZlibInputStreamBuffer}
//import io.BufferUtil.{getInt32, getInt64, getIPv6Address, getIPv4Address}

import io.bag.{BagReaderInt, BagReaderIPv4Address, BagReaderIPv6Address}


/**
  * A reader of binary SiLK Bag files. This is usable as an
  * [[scala.collection.Iterator Iterator]] over a pair representing
  * the key and counter in the Bag.  Depending on the contents of the
  * Bag file, the key is either an [[org.cert.netsa.data.net.IPAddress
  * IPAddress]] or an [[scala.Int Int]].  The counter is always a
  * [[scala.Long Long]].
  *
  * Since a Bag file may contain different key types, the BagReader
  * factory methods return a [[BagResult$ BagResult]] wrapper over
  * BagReader.
  *
  * The `keyType` and `counterType` methods return a [[BagDataType]]
  * that specifies the type of the key and the counter that were
  * specified in the Bag file's header.
  *
  * @example This example uses the single argument form of the
  * [[BagReader$ companion object's]] `ofInputStream()` method to read
  * the Bag file "example.bag".  This code may be used outside of
  * Hadoop when it is known that the Bag's key is an IPAddress.
  * {{{
  * val stream = new java.io.FileInputStream("example.bag")
  * val bagresult = BagReader.ofInputStream(stream)
  * val bag = bagresult match {
  *   case BagResult.IPAddressBag(iter) => iter
  *   case _ => null
  * }
  * bag.hasNext
  * val (ipaddr, counter) = bag.next()
  * }}}
  *
  * @see [[BagReader$ the companion object]] for more details.
  *
  * @note While the SiLK command lines tools display an unspecified
  * key type as an IPv4 address, the BagReader class treats an
  * unspecified key type as an integer.
  */
abstract class BagReader[T] protected (
  protected[this] val reader: BufferReader,
  protected[this] val header: Header
) extends Iterator[(T, Long)]
{
  // get the Bag header entry that contains the types and lengths of
  // the key and counter
  private[this] val hentry_opt = header.headerEntries.collectFirst({
    case entry : HeaderEntry.Bag => entry
  })

  /**
    * Whether to byte-swap the bytes as they are read from the buffer
    */
  protected[this] val swap = !header.isBigEndian
  /**
    * Record length
    */
  protected[this] val recordLength = header.recordSize

  /**
    * Current buffer of bytes being processed
    */
  protected[this] var buffer = Array.empty[Byte]
  /**
    * Current position (octet offset) in the current buffer.
    */
  protected[this] var bufOffset = 0
  /**
    * Length of the current buffer in octets.
    */
  private[this] var bufLength = 0
  /**
    * Whether the end of the input stream has been reached.
    */
  protected[this] var endOfFile = false

  /**
    * Read a new buffer into memory if needed. Does no work if the
    * current buffer still has unread records, or if the reader is at
    * end of file. Sets endOfFile to true if the EOF is reached.
    */
  protected[this] def fillBuffer(): Unit = {
    if ( ! endOfFile ) {
      if ( bufOffset + recordLength > bufLength ) {
        try {
          val (newBuffer, newLength) = reader.getNextBuffer()
          buffer = newBuffer
          bufLength = newLength
          bufOffset = 0
        } catch {
          case _: EOFException => {
            reader.close()
            buffer = Array.empty
            endOfFile = true
          }
        }
      }
    }
  }

  /** Tests whether this iterator can provide another (key, counter)
    * pair.
    *
    * @return `true` if a subsequent call to `next()` will yield an
    *     element, `false` otherwise.
    */
  override def hasNext: Boolean = {
    fillBuffer()
    !endOfFile
  }

  /** Produces the next (key, counter) pair of this iterator.
    *
    * @return the next pair of this iterator if `hasNext` is true
    *
    * @throws java.util.NoSuchElementException when the iterator is
    *     depleted
    */
  override def next(): (T, Long)

  /**
    * Provides the data type of the key that was specified in the Bag
    * file's header, if any.
    *
    * @return the type of data in the Key field of the Bag or `None`
    * if no header was present
    */
  def keyType: Option[BagDataType] = {
    hentry_opt match {
      case None => return None
      case _ => return Option(BagDataType(hentry_opt.get.keyType))
    }
  }

  /**
    * Provides the data type of the counter that was specified in the
    * Bag file's header, if any.
    *
    * @return the type of data in the Counter field of the Bag or `None`
    * if no header was present
    */
  def counterType: Option[BagDataType] = {
    hentry_opt match {
      case None => return None
      case _ => return Option(BagDataType(hentry_opt.get.counterType))
    }
  }

}


/**
  * The BagReader object provides support for creating a
  * [[BagReader]].
  */
object BagReader {

  /**
    * Helper function for the `ofInputStream()` methods below.  Ensures
    * the file's header is reasonable for a Bag file.  Creates the
    * correct type of BagReader to read the contents of the Bag
    * depending on the contents of the Bag.
    */
  private[this] def createReader(
    bufferReader: BufferReader,
    header: Header
  ): BagResult = {
    // Default values of key length and counter length
    val defaultLengthKey: Short = 4
    val defaultLengthCounter: Short = 8
    val bagTypeCustom = BagDataType.SKBAG_FIELD_CUSTOM.value

    if (FileFormat.FT_RWBAG != header.fileFormat) {
      throw new SilkDataFormatException("File is not a Bag file")
    }

    // verify the record version
    if (header.recordVersion < 3 || header.recordVersion > 4) {
      throw new SilkDataFormatException(
        "Bag file has unexpected record version: " + header.recordVersion)
    }

    // get the Bag header entry; if one does not exist, create one
    // that can be queried
    val hentry_opt = header.headerEntries.collectFirst({
      case entry : HeaderEntry.Bag => entry
    })
    val hentry = hentry_opt match {
      case None => new HeaderEntry.Bag(bagTypeCustom, defaultLengthKey,
                                       bagTypeCustom, defaultLengthCounter)
      case _ => hentry_opt.get
    }

    if (hentry.keyLength + hentry.counterLength != header.recordSize) {
      throw new SilkDataFormatException(
        "Bag file has unexpected recordLength: " + header.recordSize)
    }

    if (16 == hentry.keyLength) {
      return BagResult.IPAddressBag(
        new BagReaderIPv6Address(bufferReader, header))
    }
    // although the bag format supports keys of lengths other than 4
    // or 16, none of the SiLK code has ever used that feature
    if (4 != hentry.keyLength) {
      throw new SilkDataFormatException(
        "Bag file has unexpected keyLength: " + hentry.keyLength)
    }

    val containsIP = BagDataType(hentry.keyType) match {
      case BagDataType.SKBAG_FIELD_SIPv4 => true
      case BagDataType.SKBAG_FIELD_DIPv4 => true
      case BagDataType.SKBAG_FIELD_NHIPv4 => true
      case BagDataType.SKBAG_FIELD_ANY_IPv4 => true
      case BagDataType.SKBAG_FIELD_SIPv6 => true
      case BagDataType.SKBAG_FIELD_DIPv6 => true
      case BagDataType.SKBAG_FIELD_NHIPv6 => true
      case BagDataType.SKBAG_FIELD_ANY_IPv6 => true
      case _  => false
    }

    if (containsIP) {
      return BagResult.IPAddressBag(
        new BagReaderIPv4Address(bufferReader, header))
    }
    BagResult.IntBag(new BagReaderInt(bufferReader, header))
  }

  /**
    * Creates and returns a reader from the provided input
    * stream. Does not support compressed data.
    *
    * Since the key of a SiLK Bag may contain either an IPAddress or
    * Integers, this function returns a [[BagResult$ BagResult]].
    *
    * @throws SilkDataFormatException if the input stream is
    *     malformed, is not a Bag, or uses compression.
    */
  def ofInputStream(s: InputStream): BagResult = {
    val header = Header.readFrom(s)
    val bufferSize = (65536 / header.recordSize) * header.recordSize
    val bufferReader = header.compressionMethod match {
      case CompressionMethod.NONE => RawInputStreamBuffer(s, bufferSize)
      case CompressionMethod.ZLIB => ZlibInputStreamBuffer(s)
      case CompressionMethod.LZO1X => LzoInputStreamBuffer(s)
      case CompressionMethod.SNAPPY => SnappyInputStreamBuffer(s)
      case _ => throw new SilkDataFormatException("Unsupported compression method (without Hadoop)")
    }

    createReader(bufferReader, header)
  }

}

// @LICENSE_FOOTER@
//
// Copyright 2015-2022 Carnegie Mellon University. All Rights Reserved.
//
// This material is based upon work funded and supported by the
// Department of Defense and Department of Homeland Security under
// Contract No. FA8702-15-D-0002 with Carnegie Mellon University for the
// operation of the Software Engineering Institute, a federally funded
// research and development center sponsored by the United States
// Department of Defense. The U.S. Government has license rights in this
// software pursuant to DFARS 252.227.7014.
//
// NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING
// INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" BASIS. CARNEGIE MELLON
// UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR
// IMPLIED, AS TO ANY MATTER INCLUDING, BUT NOT LIMITED TO, WARRANTY OF
// FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS
// OBTAINED FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT
// MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM FROM PATENT,
// TRADEMARK, OR COPYRIGHT INFRINGEMENT.
//
// Released under a GNU GPL 2.0-style license, please see LICENSE.txt or
// contact permission@sei.cmu.edu for full terms.
//
// [DISTRIBUTION STATEMENT A] This material has been approved for public
// release and unlimited distribution. Please see Copyright notice for
// non-US Government use and distribution.
//
// Carnegie Mellon(R) and CERT(R) are registered in the U.S. Patent and
// Trademark Office by Carnegie Mellon University.
//
// This software includes and/or makes use of third party software each
// subject to its own license as detailed in LICENSE-thirdparty.tx
//
// DM20-1143
