#!/usr/bin/env bash
#
# Licensed to Big Data Genomics (BDG) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The BDG licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Figure out where ADAM is installed
ADAM_REPO="$(cd `dirname $0`/..; pwd)"

CLASSPATH=$("$ADAM_REPO"/bin/compute-adam-classpath.sh)
ADAM_JARS=$("$ADAM_REPO"/bin/compute-adam-jars.sh)

if [ ! -z "$ADDL_JARS" ]; then
  ADAM_JARS="$ADAM_JARS","$ADDL_JARS"
fi

# append ADAM_JARS to the --jars option, if any
NEW_OPTIONS=$("$ADAM_REPO"/bin/append_to_option.py , --jars "$ADAM_JARS" "$@")
# append CLASSPATH to the --driver-class-path
NEW_OPTIONS=$("$ADAM_REPO"/bin/append_to_option.py ":" --driver-class-path "$CLASSPATH" "$NEW_OPTIONS")

if [ -z "$SPARK_HOME" ]; then
  echo "Attempting to use 'pyspark' on default path; you might need to set SPARK_HOME"
  echo ""
  SPARK_SHELL=pyspark
else
  SPARK_SHELL="$SPARK_HOME"/bin/pyspark
fi

# submit the job to Spark
"$SPARK_SHELL" \
  --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
  --conf spark.kryo.registrator=org.bdgenomics.adam.serialization.ADAMKryoRegistrator \
  --conf spark.kryoserializer.buffer.mb=4 \
  --conf spark.kryo.referenceTracking=true \
  $NEW_OPTIONS
