-
Notifications
You must be signed in to change notification settings - Fork 0
/
start_spark.sh
executable file
·49 lines (43 loc) · 1.04 KB
/
start_spark.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/usr/bin/env bash
now="$(date +'%d-%m-%Y')"
echo "today is $now"
#default value for Spark local mode
MASTER="local[4]"
INPUTPATH="data/ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/*.xml"
OUTPUTPATH="data/output/parquet"
JAR="target/scala-2.11/*.jar"
MODE="client"
while getopts m:i:o:j: option
do
case "${option}"
in
m) MASTER=${OPTARG};;
i) INPUTPATH=${OPTARG};;
o) OUTPUTPATH=${OPTARG};;
j) JAR=${OPTARG};;
esac
done
if [[ "$MASTER" == "yarn"* ]]; then
MODE="cluster"
elif [[ "$MASTER" == "local"* ]]; then
MODE="client"
fi
echo "MASTER $MASTER"
echo "INPUT PATH $INPUTPATH"
echo "OUTPUT PATH $OUTPUTPATH"
echo "JAR $JAR"
echo "MODE $MODE"
spark-submit \
--class "Main" \
--name "multivac-pubmed" \
--master "$MASTER" \
--deploy-mode "$MODE" \
--driver-memory 4g \
--executor-cores 5 \
--executor-memory 12g \
--conf spark.dynamicAllocation.enabled=true \
--conf spark.dynamicAllocation.maxExecutors=20 \
--conf spark.conf.master.value="$MASTER" \
--conf spark.conf.inputPath.value="$INPUTPATH" \
--conf spark.conf.outputPath.value="$OUTPUTPATH" \
$JAR