// Copyright 2015-2022 by Carnegie Mellon University
// See license information in LICENSE.txt

package org.cert.netsa.mothra.tools

import java.nio.file.{Path => JavaPath}
import org.apache.hadoop.fs.{Path => HadoopPath}
import org.cert.netsa.mothra.packer.CorePacker

class RepackerOptions extends ToolOptions("mothra-repacker") {

  note("Required options:\n")

  var sourceDirs: Seq[HadoopPath] = Seq()
  arg[HadoopPath]("SOURCE-1 .. SOURCE-N").required().unbounded()
    .foreach(sourceDirs :+= _)
    .text("HDFS directories from which to read files to repack.")
    .validate(checkHadoopDirExists("Argument SOURCE"))

  var outgoingDir: HadoopPath = _
  opt[HadoopPath]("outgoing-dir").valueName("DIR").required()
    .foreach(outgoingDir = _)
    .text("HDFS directory to store packed file repository.")
    .validate(checkHadoopDirExists("Option --outgoing-dir"))

  var workDir: JavaPath = _
  opt[JavaPath]("work-dir").valueName("DIR").required()
    .foreach(workDir = _)
    .text("Local directory to store files while packing.")
    .validate(checkJavaDirExists("Option --work-dir"))

  var packingLogicFile: HadoopPath = _
  opt[HadoopPath]("packing-logic").valueName("FILE").required()
    .foreach(packingLogicFile = _)
    .text("HDFS file containing packing configuration.")
    .validate(checkHadoopFileExists("Option --packing-logic"))

  note("\nOptions:\n")

  help("help")
    .text("Print this message and exit.")

  version("version")
    .text(s"Print the version number of $toolName and exit.")

  note("")

  var archiveDir: Option[HadoopPath] = None
  opt[HadoopPath]("archive-dir").valueName("DIR")
    .foreach(d => archiveDir = Some(d))
    .text("Archive working files to HDFS after use.")
    .validate(checkHadoopDirExists("Option --archive-dir"))

  var maxScanJobs: Option[Int] = None
  opt[Int]("max-scan-jobs").valueName("N")
    .foreach(n => maxScanJobs = Some(n))
    .text(s"Scan only N input directories simultaneously.")
    .validate(checkPos("Option --max-scan-jobs"))

  var readersPerScanner: Option[Int] = None
  opt[Int]("readers-per-scanner").valueName("N")
    .foreach(n => readersPerScanner = Some(n))
    .text("Use N reader threads for each source directory. (Def. 1)")
    .validate(checkPos("Option --readers-per-scanner"))

  var maxThreads: Option[Int] = None
  opt[Int]("max-threads").valueName("N")
    .foreach(n => maxThreads = Some(n))
    .text(s"Process no more than N directories or files at once.")
    .validate(checkPos("Option --max-threads"))

  note("""
    The default value of --max-threads is computed from --readers-per-scanner
    and the number of scan jobs.""")

  note("\nOptions that control the format of packed files:\n")

  var compression: Option[String] = None
  opt[String]("compression").valueName("CODEC")
    .foreach(c => compression = Some(parseCompression(c)))
    .text(s"Compress repacked files using this method.${
      if ( CorePacker.DEFAULT_COMPRESSION == "" ) ""
        else s" (Def. ${CorePacker.DEFAULT_COMPRESSION})"}")
    .validate(checkCompression("compression"))

  var hoursPerFile: Option[Int] = None
  opt[Int]("hours-per-file").valueName("N")
    .foreach(hours => hoursPerFile = Some(hours))
    .text(s"Store N hours (1-24) in each output file. (Def. ${CorePacker.DEFAULT_HOURS_PER_FILE})")
    .validate(checkRange("Option --hours-per-file", 1, 24))

  var maximumSize: Option[Long] = None
  opt[Long]("maximum-size").valueName("N")
    .foreach(n => maximumSize = Some(n))
    .text("Close output files after their size reaches N bytes.")
    .validate(checkPos("Option --maximum-size"))

  var fileCacheSize: Option[Int] = None
  opt[Int]("file-cache-size").valueName("N")
    .foreach(n => fileCacheSize = Some(n))
    .text(s"Keep up to N >= ${CorePacker.MINIMUM_FILE_CACHE_SIZE} files open for writing. (Def. ${CorePacker.DEFAULT_FILE_CACHE_SIZE})")
    .validate(checkGE("Option --file-cache-size", CorePacker.MINIMUM_FILE_CACHE_SIZE))

  note("""
    If --maximum-size is given and repacking into the same repository that is
    being read, duplicate records may temporarily appear in the repository.""")

}

// @LICENSE_FOOTER@
//
// Copyright 2015-2022 Carnegie Mellon University. All Rights Reserved.
//
// This material is based upon work funded and supported by the
// Department of Defense and Department of Homeland Security under
// Contract No. FA8702-15-D-0002 with Carnegie Mellon University for the
// operation of the Software Engineering Institute, a federally funded
// research and development center sponsored by the United States
// Department of Defense. The U.S. Government has license rights in this
// software pursuant to DFARS 252.227.7014.
//
// NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING
// INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" BASIS. CARNEGIE MELLON
// UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR
// IMPLIED, AS TO ANY MATTER INCLUDING, BUT NOT LIMITED TO, WARRANTY OF
// FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS
// OBTAINED FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT
// MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM FROM PATENT,
// TRADEMARK, OR COPYRIGHT INFRINGEMENT.
//
// Released under a GNU GPL 2.0-style license, please see LICENSE.txt or
// contact permission@sei.cmu.edu for full terms.
//
// [DISTRIBUTION STATEMENT A] This material has been approved for public
// release and unlimited distribution. Please see Copyright notice for
// non-US Government use and distribution.
//
// Carnegie Mellon(R) and CERT(R) are registered in the U.S. Patent and
// Trademark Office by Carnegie Mellon University.
//
// This software includes and/or makes use of third party software each
// subject to its own license as detailed in LICENSE-thirdparty.tx
//
// DM20-1143
