// Copyright 2015-2022 by Carnegie Mellon University
// See license information in LICENSE.txt

package org.cert.netsa.io.silk

import java.io.{OutputStream, DataOutputStream}
import org.apache.hadoop.conf.Configuration

import io.{BufferWriter, LzoOutputStreamBuffer, RawOutputStreamBuffer,
  SnappyOutputStreamBuffer, ZlibOutputStreamBuffer}
import io.BufferUtil.{putBytes, putInt8}

import org.cert.netsa.data.net.{IPBlock, IPAddress}


/**
  * A writer of binary SiLK IPset files.
  *
  * @example This example reads the contents of "example.set" and
  * writes it to "copy.set":
  * {{{
  * val in = new java.io.FileInputStream("example.set")
  * val out = new java.io.FileOutputStream("copy.set")
  * val ipset = IPSetReader.ofInputStream(in)
  * val writer = IPSetWriter.toOutputStream(out)
  * writer.append(ipset)
  * writer.close()
  * }}}
  *
  * @see [[IPSetWriter$ the companion object]] for more details
  */
class IPSetWriter private (
  val out: DataOutputStream,
  val compressionMethod: CompressionMethod,
  val hadoopConfig: Option[Configuration])
{
  /**
    * Set to true once the file's header has been written
    */
  private[this] var headerWritten = false

  /**
    * Length of a single IP Address.  Determined by the iterator
    * provided to the first call to append().
    */
  private[this] var ipLength:Int = _

  /**
    * Length of a single record.  One more than the ipLength to hold
    * the netblock length
    */
  private[this] var recordLength: Int = _

  /**
    * The size of the buffer to hold data prior to compressing.
    */
  private[this] val bufferSize = 65536

  /**
    * The buffer to hold data prior to compressing.  The append()
    * method fills this buffer.
    */
  private[this] val buffer = Array.ofDim[Byte](bufferSize)

  /**
    * The current offset into the output buffer.
    */
  private[this] var offset = 0

  /**
    * The maximum IP address of the previous IPBlock written to the
    * stream.  Used to ensure blocks are sorted and do not overlap.
    */
  private[this] var prevBlockMax: IPAddress = _

  /**
    * Object use to write/compress the output
    */
  private[this] val writer: BufferWriter =
    compressionMethod match {
      case CompressionMethod.NONE => RawOutputStreamBuffer(out)
      case CompressionMethod.ZLIB => ZlibOutputStreamBuffer(out)
      case CompressionMethod.LZO1X => LzoOutputStreamBuffer(hadoopConfig.get, out)
      case CompressionMethod.SNAPPY => SnappyOutputStreamBuffer(hadoopConfig.get, out)
      case _ => throw new SilkDataFormatException("Unrecognized compression method")
    }


  /**
    * Writes the SiLK file header to the output stream.
    */
  private[this] def writeHeader(isIPv6: Boolean): Unit = {
    // first create the IPset header entry
    val h_entries: Vector[HeaderEntry] =
      Vector(
        HeaderEntry.IPSet(0, 0, if (isIPv6) { 16 } else { 4 }, 0, 0, 0),
        HeaderEntry.EndOfHeaders)

    // then create and write the header object
    val header = new Header(Header.BigEndian, FileFormat.FT_IPSET,
      Header.FileVersion, compressionMethod, SilkVersion(0), 1, 4,
      h_entries)
    header.writeTo(out)
    out.flush()
    headerWritten = true
  }

  /**
    * Whether any IPBlocks have been written to the stream--that is,
    * whether the `append()` method has been called with a non-emtpy
    * Iterator.
    *
    * @return `true` once the `append()` method has been called
    */
  def wasHeaderWritten: Boolean = headerWritten

  /** Iterates over the [[org.cert.netsa.data.net.IPBlock IPBlocks]] and
    * appends them to the destination stream.
    *
    * Expects the [[org.cert.netsa.data.net.IPBlock IPBlocks]] in the
    * [[scala.collection.Iterator Iterator]] to be in sorted order
    * (numerically ascending).
    *
    * Expects all [[org.cert.netsa.data.net.IPBlock IPBlocks]] in the
    * [[scala.collection.Iterator Iterator]] to be of the same size;
    * that is, either all are [[org.cert.netsa.data.net.IPv4Block
    * IPv4Block]] or all are [[org.cert.netsa.data.net.IPv6Block
    * IPv6Block]].
    *
    * This function may be called successfully multiple times as long
    * as all IPBlocks have the same size and the IPBlocks across the
    * various calls are in sorted order.
    *
    * @throws java.lang.NoSuchElementException if the IPBlock Iterator
    *     contains a mix of IPv4 addresses and IPv6 addresses or the
    *     IPBlocks are not in sorted order.
    */
  def append[T <: IPBlock](iter: Iterator[T]): Unit = {
    if ( !headerWritten ) {
      // initialize the IPSet file by the first IPBlock in the
      // Iterator
      if ( !iter.hasNext ) {
        return
      }

      val block: IPBlock = iter.next()
      ipLength = block.min.toBytes.length
      recordLength = ipLength + 1

      val isIPv6: Boolean = ipLength match {
        case 4  => false
        case 16 => true
        case _  => throw new IllegalArgumentException(
          s"Unexpected IP Address length $ipLength")
      }

      // write the file's header
      writeHeader(isIPv6)

      // append the first IPBlock to the buffer
      assert(0 == offset)
      assert(bufferSize > recordLength)

      putBytes(buffer, 0, block.min.toBytes, ipLength)
      putInt8(buffer, ipLength, block.prefixLength.toByte)
      offset = recordLength

      prevBlockMax = block.max
    }

    // process the IP blocks
    for ( block <- iter ) {
      require(prevBlockMax < block.min, "IPBlocks are unsorted or overlap")
      prevBlockMax = block.max

      val arr = block.min.toBytes
      require(arr.length == ipLength,
        "Cannot mix IPv4Blocks and IPv6Blocks in IPSetWriter")

      if (bufferSize - offset < recordLength) {
        writer.putBuffer(buffer, offset)
        offset = 0
      }
      putBytes(buffer, offset, arr, ipLength)
      putInt8(buffer, offset + ipLength, block.prefixLength.toByte)
      offset = offset + recordLength
    }

    if (offset > 0) {
      writer.putBuffer(buffer, offset)
      offset = 0
    }
    out.flush()
  }

  /**
    * Closes the output stream.
    *
    * Writes the SiLK file header to the output stream if it has not
    * been written, writes any buffered records, closes the output
    * stream, and releases resources.
    */
  def close(): Unit = {
    if ( !headerWritten ) {
      writeHeader(false)
    }
    if ( offset > 0 ) {
      writer.putBuffer(buffer, offset)
      offset = 0
    }
    writer.end()
    out.close()
  }

}


/**
  * The IPSetWriter companion object provides support for creating an
  * [[IPSetWriter]].
  */
object IPSetWriter {

  /**
    * Creates and returns a writer that iterates over
    * [[org.cert.netsa.data.net.IPBlock IPBlocks]] and writes them as a
    * binary SiLK IPset stream (compatible with SiLK 3.7.0 and later)
    * to the output stream `s`.  Compresses the output using
    * `compressionMethod`.  Some compression methods require that a
    * hadoop configuration be provided.
    *
    * @throws java.util.NoSuchElementException when a hadoop
    *     configuration is required and none is provided.
    */
  def toOutputStream(
    s: OutputStream,
    compressionMethod: CompressionMethod = CompressionMethod.NONE,
    hadoopConfig: Option[Configuration] = None)
      : IPSetWriter =
  {
    val out = s match {
      case x: DataOutputStream => x
      case y: OutputStream => new DataOutputStream(y)
    }

    new IPSetWriter(out, compressionMethod, hadoopConfig)
  }

}

// @LICENSE_FOOTER@
//
// Copyright 2015-2022 Carnegie Mellon University. All Rights Reserved.
//
// This material is based upon work funded and supported by the
// Department of Defense and Department of Homeland Security under
// Contract No. FA8702-15-D-0002 with Carnegie Mellon University for the
// operation of the Software Engineering Institute, a federally funded
// research and development center sponsored by the United States
// Department of Defense. The U.S. Government has license rights in this
// software pursuant to DFARS 252.227.7014.
//
// NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING
// INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" BASIS. CARNEGIE MELLON
// UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR
// IMPLIED, AS TO ANY MATTER INCLUDING, BUT NOT LIMITED TO, WARRANTY OF
// FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS
// OBTAINED FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT
// MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM FROM PATENT,
// TRADEMARK, OR COPYRIGHT INFRINGEMENT.
//
// Released under a GNU GPL 2.0-style license, please see LICENSE.txt or
// contact permission@sei.cmu.edu for full terms.
//
// [DISTRIBUTION STATEMENT A] This material has been approved for public
// release and unlimited distribution. Please see Copyright notice for
// non-US Government use and distribution.
//
// Carnegie Mellon(R) and CERT(R) are registered in the U.S. Patent and
// Trademark Office by Carnegie Mellon University.
//
// This software includes and/or makes use of third party software each
// subject to its own license as detailed in LICENSE-thirdparty.tx
//
// DM20-1143
