// Copyright 2015-2022 by Carnegie Mellon University
// See license information in LICENSE.txt

package org.cert.netsa.io.silk
package config

// DEVELOPER NOTE:
//
// This uses the Scala Standard Parser Combinator Library, which you
// can find details about at
// https://github.com/scala/scala-parser-combinators, including a
// getting started guide.
//
// In practice, you probably want to look at the following to
// understand what's going on:
//
// http://www.scala-lang.org/api/2.11.9/scala-parser-combinators/#scala.util.parsing.combinator.RegexParsers
// http://www.scala-lang.org/api/2.11.9/scala-parser-combinators/#scala.util.parsing.combinator.Parsers$Parser
//
// The first is what SilkConfigParser extends. This provides the
// various "function-name" like operators, like "rep" for repetition,
// and "err" and "success" for produce parse errors or successful
// results. It also brings in some automatic conversions which will
// automatically convert a string or regex into a parser when it's
// used where a parser is expected. (This is why all of the various
// literal strings and regexes below are just sort of used as parsers
// without being converted explicitly somehow.
//
// Some functions of note:
//
//      rep(p) - Parser that matches zero or more repetitions of
//               parser p, and returns the result as a list.
//
//  success(c) - Parser that always succeeds with the value c.
//
//      err(c) - Parser that always fails with the error message c.
//
// The second is the type of the Parser object itself, and this
// provides the various infix operators that work on parsers. Some
// operators of note:
//
//   (p1 | p2) - Parser that tries parser p1 and if it succeeds
//               returns the result, otherwise it tries parser
//               p2. Note that this means p1 and p2 have to have
//               compatible result types. (i.e. both return String or
//               both return Int or the like.)
//
//   (p ^^^ c) - Parser that matches parser p and if it succeeds
//               discards the result of p and returns the constant c
//               instead.
//
//    (p ^^ f) - Parser that matches parser p and if it succeeds with
//               value x, returns the value of f(x) instead. (This is
//               "map".)
//
//    (p >> f) - Parser that matches parser p and if it succeds with
//               value x, calls f(x) to get a new parser and tries
//               that parser. (This is "bind".)
//
//   (p1 ~ p2) - Parser that parses p1 and then p2 and keeps the
//               results of both for later use.
//
//  (p1 ~> p2) - Parser that parses p1 and then p2 and discards the
//               result of p1 returning the result of p2.
//
//  (p1 <~ p2) - Parser that parses p1 and then p2 and discards the
//               result of p2 returning the result of p1.
//
// The final thing to note is that most of the parsers here are taking
// a SilkConfig object as an argument, and returning a SilkConfig
// object as their result. Basically, each parser is parsing a part of
// the input file and applying whatever changes are needed to the
// SilkConfig before passing it on to the next parser that matches.
//
// Hopefully all of this will provide enough to get started
// understanding how this works and to follow the code. Don't forget
// to look through the documentation if you need to.

import java.io.{FileInputStream, InputStreamReader}
import scala.util.parsing.combinator.RegexParsers
import scala.util.{Failure => TFailure, Success => TSuccess}

private[silk] object SilkConfigParser extends RegexParsers {

  override val skipWhitespace: Boolean = false

  // Some constants
  private val SK_MAX_NUM_SENSORS: Int = 0xFFFF
  private val SK_MAX_NUM_FLOWTYPES: Int = 0xFF
  private val SK_MAX_STRLEN_SENSOR: Int = 64
  private val SK_MAX_STRLEN_FLOWTYPE: Int = 32

  // Basic "lex-like" parsers for tokens

  def ws: Parser[Unit] = """[ \t\r]+""".r ^^^ (())
  def comment: Parser[Unit] = """#[^\n]*""".r ^^^ (())
  def eol: Parser[Unit] = opt(ws) ~> opt(comment) ~> "\n" ^^^ (())
  def blankLine: Parser[Unit] = opt(ws) ~> eol
  def blankLines: Parser[Unit] = rep(blankLine) ^^^ (())

  def atom: Parser[String] = """[-._/@A-za-z0-9]+""".r

  def string: Parser[String] =
    "\"" ~> rep(stringContent) <~ "\"" ^^ { _.mkString("") }
  def stringContent: Parser[String] = {
    "\n" ~> err("Unterminated string") |
    """\\[0-7]{1,3}""".r ^^ { s =>
      Integer.parseInt(s.substring(1)).toChar.toString } |
    """\\[0-9]+""".r ~> err("Invalid octal escape") |
    "\\n" ^^^ "\n" |
    "\\t" ^^^ "\t" |
    "\\r" ^^^ "\r" |
    "\\b" ^^^ "\b" |
    "\\f" ^^^ "\f" |
    "\\\n" ^^^ "\n" |
    """\\.""".r ^^ { _.substring(1) } |
    """[^\\\n\"]+""".r
  }
  def integer: Parser[String] = """[0-9]+""".r

  // Semantic "lex-like" parsers for getting arguments to directives

  def str: Parser[String] = opt(ws) ~> (atom | string | integer)
  def int: Parser[Int] = opt(ws) ~> integer ^^ { _.toInt }

  // Each parser past this point is a family of parsers
  // parameterized by the current configuration. (SilkConfig =>
  // Parser[SilkConfig])
  //
  // Basically, you call foo(conf) and it returns a parser that
  // accepts some input and returns a result with a new
  // configuration modified by that input. An easy example is
  // defaultClass(conf), which reads "default-class <className>" and
  // returns a copy of conf with its defaultClassName set to the new
  // name.

  // Combinator that takes a parser and folds it across multiple
  // lines of input, skipping blank lines and building the
  // configuration a step for each success and passing it on to the
  // next.
  def lines[T](p: T => Parser[T])(v: T): Parser[T] =
    blankLines ~> (
      (opt(ws) ~> p(v) >> lines(p)) |
        success(v)
    )

  // Top-level directives ////////////////////////////////////////////

  // class <name>
  //   ...
  // end class
  def classSection(conf: SilkConfig): Parser[SilkConfig] =
    ("class" ~> str <~ eol) >> { className =>
      lines(classDef(className))(conf.withClass(className))
    } <~ ("end" ~> ws ~> "class" ~> eol)

  // default-class <className>
  def defaultClass(conf: SilkConfig): Parser[SilkConfig] =
    "default-class" ~> str <~ eol ^^ { className =>
      conf.withDefaultClassName(className)
    }

  // group <name>
  //   ...
  // end group
  def groupSection(conf: SilkConfig): Parser[SilkConfig] =
    ("group" ~> str <~ eol) >> { groupName =>
      lines(groupDef(groupName))(conf.withGroup(groupName))
    } <~ ("end" ~> ws ~> "group" ~> eol)

  // include <path>
  def include(conf: SilkConfig): Parser[SilkConfig] =
    "include" ~> str <~ eol ^^ { filename =>
      parseAll(
        top(conf),
        new InputStreamReader(new FileInputStream(filename))).get
    }

  // packing-logic <path>
  def packingLogic(conf: SilkConfig): Parser[SilkConfig] =
    "packing-logic" ~> str <~ eol ^^ { path =>
      conf.withPackingLogicPath(path)
    }

  // path-format <format>
  def pathFormat(conf: SilkConfig): Parser[SilkConfig] =
    "path-format" ~> str <~ eol ^^ { format =>
      conf.withPathFormat(format)
    }

  // sensor <id> <name> [<description>]
  def sensor(conf: SilkConfig): Parser[SilkConfig] =
    "sensor" ~> int ~ str ~ opt(str) <~ eol >> {
      case sensorId ~ sensorName ~ sensorDescription =>
        if ( sensorId >= SK_MAX_NUM_SENSORS ) {
          err(s"Sensor id '${sensorId}' is greater than maximum of ${SK_MAX_NUM_SENSORS - 1}")
        } else if ( sensorName.length > SK_MAX_STRLEN_SENSOR ) {
          err(s"Sensor name '$sensorName' is longer than maximum of ${SK_MAX_STRLEN_SENSOR}")
        } else if ( conf.sensors.get(Sensor(sensorId)).isDefined ) {
          err(s"A sensor with id '$sensorId' already exists")
        } else if ( conf.sensors.byName.get(sensorName).isDefined ) {
          err(s"A sensor with name '$sensorName' already exists")
        } else if ( sensorDescription.isDefined &&
                    !conf.supportsSensorDescriptions ) {
          err("Sensor descriptions only allowed when file's version is 2 or greater")
        } else {
          success(
            conf.withSensor(Sensor(sensorId), sensorName, sensorDescription))
        }
    }

  // version <num>
  def version(conf: SilkConfig): Parser[SilkConfig] =
    "version" ~> int <~ eol >> { versionNum =>
      if ( conf.version.isDefined ) {
        err("Multiple version commands specified")
      } else if ( versionNum < 1 || versionNum > 2 ) {
        err(s"Unsupported version '${versionNum}'")
      } else {
        success(conf.withVersion(versionNum))
      }
    }

  // Any of the top-level directives
  def confDef(conf: SilkConfig): Parser[SilkConfig] = (
    classSection(conf) | defaultClass(conf) | groupSection(conf) |
      include(conf) | packingLogic(conf) | pathFormat(conf) | sensor(conf) |
      version(conf)
  )

  // Class-level directives //////////////////////////////////////////

  // default-types <name> ...
  def classDefaultTypes(className: String)(conf: SilkConfig):
      Parser[SilkConfig] =
    "default-types" ~> rep(str) <~ eol ^^ { typeNames =>
      conf.withClassDefaultTypes(className, typeNames)
    }

  // sensors <name-or-group> ...
  def classSensors(className: String)(conf: SilkConfig): Parser[SilkConfig] =
    "sensors" ~> rep(str) <~ eol >> { sensorSpecs =>
      conf.expandSensorSpecs(sensorSpecs) match {
        case TFailure(e) =>
          err(s"Cannot add to class '${className}': ${e.getMessage}")
        case TSuccess(sensorNames) =>
          success(conf.withClassSensors(className, sensorNames))
      }
    }

  // type <id> <name> [<flowTypeName>]
  def classType(className: String)(conf: SilkConfig): Parser[SilkConfig] =
    "type" ~> int ~ str ~ opt(str) <~ eol >> {
      case id ~ typeName ~ flowTypeOpt =>
        val flowTypeName = flowTypeOpt.getOrElse(className + typeName)
        if ( typeName.length > SK_MAX_STRLEN_FLOWTYPE ) {
          err(s"The type-name '${typeName}' is longer than the maximum of $SK_MAX_STRLEN_FLOWTYPE")
        } else if ( flowTypeOpt.isDefined && flowTypeName.length > SK_MAX_STRLEN_FLOWTYPE ) {
          err(s"The flowtype-name '${flowTypeName}' is longer than the maximum of $SK_MAX_STRLEN_FLOWTYPE")
        } else if ( flowTypeName.length > SK_MAX_STRLEN_FLOWTYPE ) {
          err(s"The generated flowtype-name '${flowTypeName}' is longer than the maximum of $SK_MAX_STRLEN_FLOWTYPE")
        } else if ( id >= SK_MAX_NUM_FLOWTYPES ) {
          err(s"Type id '${id}' is greater than maximum of $SK_MAX_STRLEN_FLOWTYPE")
        } else if ( conf.flowTypes.contains(FlowType(id)) ) {
          err(s"A type with id '${id}' already exists")
        } else if ( conf.flowTypes.byName.contains(flowTypeName) ) {
          err(s"A type with prefix '${flowTypeName}' already exists")
        } else if ( conf.flowTypes.byClassAndType.contains((className, typeName)) ) {
          err(s"The type '${typeName}' for class '${className}' already exists")
        } else {
          success(conf.withFlowType(
            FlowType(id), className, typeName, flowTypeName))
        }
    }

  // Any of the class-level directives
  def classDef(className: String)(conf: SilkConfig): Parser[SilkConfig] = (
    classDefaultTypes(className)(conf) | classSensors(className)(conf) |
      classType(className)(conf)
  )

  // Group-level directives //////////////////////////////////////////

  // sensors <name-or-group> ...
  def groupSensors(groupName: String)(conf: SilkConfig): Parser[SilkConfig] =
    "sensors" ~> rep(str) <~ eol >> { sensorSpecs =>
      conf.expandSensorSpecs(sensorSpecs) match {
        case TFailure(e) =>
          err(s"Cannot add to group '${groupName}': ${e.getMessage}")
        case TSuccess(sensorNames) =>
          success(conf.withGroupSensors(groupName, sensorNames))
      }
    }

  // Any of the group-level directives
  def groupDef(groupName: String)(conf: SilkConfig): Parser[SilkConfig] =
    groupSensors(groupName)(conf)

  // Entrypoints /////////////////////////////////////////////////////

  def top(conf: SilkConfig): Parser[SilkConfig] = lines(confDef)(conf)

}

// @LICENSE_FOOTER@
//
// Copyright 2015-2022 Carnegie Mellon University. All Rights Reserved.
//
// This material is based upon work funded and supported by the
// Department of Defense and Department of Homeland Security under
// Contract No. FA8702-15-D-0002 with Carnegie Mellon University for the
// operation of the Software Engineering Institute, a federally funded
// research and development center sponsored by the United States
// Department of Defense. The U.S. Government has license rights in this
// software pursuant to DFARS 252.227.7014.
//
// NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING
// INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" BASIS. CARNEGIE MELLON
// UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR
// IMPLIED, AS TO ANY MATTER INCLUDING, BUT NOT LIMITED TO, WARRANTY OF
// FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS
// OBTAINED FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT
// MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM FROM PATENT,
// TRADEMARK, OR COPYRIGHT INFRINGEMENT.
//
// Released under a GNU GPL 2.0-style license, please see LICENSE.txt or
// contact permission@sei.cmu.edu for full terms.
//
// [DISTRIBUTION STATEMENT A] This material has been approved for public
// release and unlimited distribution. Please see Copyright notice for
// non-US Government use and distribution.
//
// Carnegie Mellon(R) and CERT(R) are registered in the U.S. Patent and
// Trademark Office by Carnegie Mellon University.
//
// This software includes and/or makes use of third party software each
// subject to its own license as detailed in LICENSE-thirdparty.tx
//
// DM20-1143
