#!/usr/bin/env bash
#
# Licensed to Big Data Genomics (BDG) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The BDG licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# usage: cannoli-submit [<spark-args> --] <cannoli-args>

set -e

# Split args into Spark and Cannoli args
DD=False  # DD is "double dash"
PRE_DD=()
POST_DD=()
for ARG in "$@"; do
  shift
  if [[ $ARG == "--" ]]; then
    DD=True
    POST_DD=( "$@" )
    break
  fi
  PRE_DD+=("$ARG")
done

if [[ $DD == True ]]; then
  SPARK_ARGS=("${PRE_DD[@]}")
  CANNOLI_ARGS=("${POST_DD[@]}")
else
  SPARK_ARGS=()
  CANNOLI_ARGS=("${PRE_DD[@]}")
fi

# Find original directory of this script, resolving symlinks
# http://stackoverflow.com/questions/59895/can-a-bash-script-tell-what-directory-its-stored-in/246128#246128
SOURCE="${BASH_SOURCE[0]}"
while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink
    SCRIPT_DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
    SOURCE="$(readlink "$SOURCE")"
    [[ $SOURCE != /* ]] && SOURCE="$SCRIPT_DIR/$SOURCE" # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located
done
SCRIPT_DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
INSTALL_DIR="$SCRIPT_DIR/.."

# Find Cannoli cli assembly jar
CANNOLI_CLI_JAR=
if [ -d "$INSTALL_DIR/repo" ]; then
  ASSEMBLY_DIR="$INSTALL_DIR/repo"
else
  ASSEMBLY_DIR="$INSTALL_DIR/assembly/target"
fi

num_jars="$(ls -1 "$ASSEMBLY_DIR" | grep "^cannoli[0-9A-Za-z\.\_\-]*\.jar$" | grep -v javadoc | grep -v sources | wc -l)"
if [ "$num_jars" -eq "0" ]; then
  echo "Failed to find Cannoli cli assembly in $ASSEMBLY_DIR." 1>&2
  echo "You need to build Cannoli before running this program." 1>&2
  exit 1
fi

ASSEMBLY_JARS="$(ls -1 "$ASSEMBLY_DIR" | grep "^cannoli[0-9A-Za-z\.\_\-]*\.jar$" | grep -v javadoc | grep -v sources || true)"
if [ "$num_jars" -gt "1" ]; then
  echo "Found multiple Cannoli cli assembly jars in $ASSEMBLY_DIR:" 1>&2
  echo "$ASSEMBLY_JARS" 1>&2
  echo "Please remove all but one jar." 1>&2
  exit 1
fi

CANNOLI_CLI_JAR="${ASSEMBLY_DIR}/${ASSEMBLY_JARS}"

# Find spark-submit script
if [ -z "$SPARK_HOME" ]; then
  SPARK_SUBMIT=$(which spark-submit || echo)
else
  SPARK_SUBMIT="$SPARK_HOME"/bin/spark-submit
fi
if [ -z "$SPARK_SUBMIT" ]; then
  echo "SPARK_HOME not set and spark-submit not on PATH; Aborting."
  exit 1
fi
echo "Using SPARK_SUBMIT=$SPARK_SUBMIT"

# submit the job to Spark
"$SPARK_SUBMIT" \
  --class org.bdgenomics.cannoli.cli.Cannoli \
  --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
  --conf spark.kryo.registrator=org.bdgenomics.adam.serialization.ADAMKryoRegistrator \
  "${SPARK_ARGS[@]}" \
  "$CANNOLI_CLI_JAR" \
  "${CANNOLI_ARGS[@]}"
