#!/usr/bin/env bash
#
# Licensed to Big Data Genomics (BDG) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The BDG licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

set -e

# Figure out where ADAM is installed
SCRIPT_DIR="$(cd `dirname $0`/..; pwd)"

# does the user have ADAM_OPTS set? if yes, then warn
if [[ -z $@ && -n "$ADAM_OPTS" ]]; then
    echo "WARNING: Passing Spark arguments via ADAM_OPTS was recently removed."
    echo "Run adam-shell instead as adam-shell <spark-args>"
fi

# Find ADAM cli assembly jar
ADAM_CLI_JAR=
if [ -d "$SCRIPT_DIR/repo" ]; then
  ASSEMBLY_DIR="$SCRIPT_DIR/repo"
else
  ASSEMBLY_DIR="$SCRIPT_DIR/adam-assembly/target"
fi

num_jars="$(ls -1 "$ASSEMBLY_DIR" | grep "^adam_[0-9A-Za-z\.-]*\.jar$" | grep -v javadoc | wc -l)"
if [ "$num_jars" -eq "0" ]; then
  echo "Failed to find ADAM assembly in $ASSEMBLY_DIR." 1>&2
  echo "You need to build ADAM before running this program." 1>&2
  exit 1
fi

ASSEMBLY_JARS="$(ls -1 "$ASSEMBLY_DIR" | grep "^adam_[0-9A-Za-z\.-]*\.jar$" | grep -v javadoc || true)"
if [ "$num_jars" -gt "1" ]; then
  echo "Found multiple ADAM cli assembly jars in $ASSEMBLY_DIR:" 1>&2
  echo "$ASSEMBLY_JARS" 1>&2
  echo "Please remove all but one jar." 1>&2
  exit 1
fi

ADAM_CLI_JAR="${ASSEMBLY_DIR}/${ASSEMBLY_JARS}"

if [ -z "$SPARK_HOME" ]; then
  SPARK_SHELL=$(which spark-shell || echo)
else
  SPARK_SHELL="$SPARK_HOME"/bin/spark-shell
fi
if [ -z "$SPARK_SHELL" ]; then
  echo "SPARK_HOME not set and spark-shell not on PATH; Aborting."
  exit 1
fi
echo "Using SPARK_SHELL=$SPARK_SHELL"

# submit the job to Spark
"$SPARK_SHELL" \
    --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
    --conf spark.kryo.registrator=org.bdgenomics.adam.serialization.ADAMKryoRegistrator \
    --jars ${ADAM_CLI_JAR} \
    "$@"
