#!/usr/bin/env bash
#
# Licensed to Big Data Genomics (BDG) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The BDG licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Figure out where ADAM is installed
SCRIPT_DIR="$(cd `dirname $0`/..; pwd)"

# Get list of required jars for ADAM
ADAM_JARS=$("$SCRIPT_DIR"/bin/compute-adam-jars.sh)

if [ ! -z "$ADDL_JARS" ]; then
  ADAM_JARS="$ADAM_JARS","$ADDL_JARS"
fi

# append ADAM_JARS to the --jars option, if any
NEW_OPTIONS=$("$SCRIPT_DIR"/bin/append_to_option.py , --jars "$ADAM_JARS" "$@")

# Binary distribution
REPO_DIR="$SCRIPT_DIR/repo"
if [ ! -d "$REPO_DIR" ]; then
# Fallback to source directory
REPO_DIR="$SCRIPT_DIR/adam-cli/target/appassembler/repo/"
fi

# Find the ADAM CLI jar
ADAM_DIR="$REPO_DIR/org/bdgenomics/adam/"
num_versions=$(ls ${ADAM_DIR} | grep cli | wc -l)
if [ "$num_versions" -eq "0" ]; then
  echo "Failed to find adam-cli jar in $ADAM_DIR"
  echo "You need to build ADAM before running this program."
  exit 1
fi
if [ "$num_versions" -gt "1" ]; then
  versions_list=$(ls "$ADAM_DIR" | grep cli)
  echo "Found multiple ADAM CLI versions in $ADAM_DIR:"
  echo "$versions_list"
  echo "Please remove all but one."
  exit 1
fi
CLI=$(ls "$ADAM_DIR" | grep cli)
CLI_DIR="${ADAM_DIR}/${CLI}"
ADAM_CLI_JAR=$(ls $CLI_DIR/*/adam-cli_2.1[01]-*.jar)

# Find spark-submit script
if [ -z "$SPARK_HOME" ]; then
  echo "SPARK_HOME must be set for 'adam-submit'"
  exit 1
else
  SPARK_SUBMIT="$SPARK_HOME"/bin/spark-submit
fi

# Split args into Spark args and ADAM args
# NOTE: if Spark uses gatherSparkSubmitOpts in spark-submit, this is unnecessary
function usage() {
  echo "adam-submit <spark-args> <adam-args>"
  exit 0
}
if [ -f "$SPARK_HOME"/libexec/bin/utils.sh ]; then
  source "$SPARK_HOME"/libexec/bin/utils.sh
else
  source "$SPARK_HOME"/bin/utils.sh
fi
SUBMIT_USAGE_FUNCTION=usage
gatherSparkSubmitOpts $NEW_OPTIONS

# submit the job to Spark
"$SPARK_SUBMIT" \
  --class org.bdgenomics.adam.cli.ADAMMain \
  --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
  --conf spark.kryo.registrator=org.bdgenomics.adam.serialization.ADAMKryoRegistrator \
  --conf spark.kryoserializer.buffer.mb=4 \
  --conf spark.kryo.referenceTracking=true \
  ${ADAM_OPTS:- } \
  --conf spark.executor.memory=${ADAM_EXECUTOR_MEMORY:-"4g"} \
  --driver-memory ${ADAM_DRIVER_MEMORY:-"4g"} \
  "${SUBMISSION_OPTS[@]}" \
  "$ADAM_CLI_JAR" \
  "${APPLICATION_OPTS[@]}"
