Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 45 additions & 28 deletions bin/cloud-local.sh
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,11 @@ function download_packages {
local maven=${pkg_src_maven}

declare -a urls=("${apache_archive_url}/hadoop/common/hadoop-${pkg_hadoop_ver}/hadoop-${pkg_hadoop_ver}.tar.gz"
"${apache_archive_url}/zookeeper/zookeeper-${pkg_zookeeper_ver}/zookeeper-${pkg_zookeeper_ver}.tar.gz"
"${apache_archive_url}/spark/spark-${pkg_spark_ver}/spark-${pkg_spark_ver}-bin-${pkg_spark_hadoop_ver}.tgz")
"${apache_archive_url}/zookeeper/zookeeper-${pkg_zookeeper_ver}/zookeeper-${pkg_zookeeper_ver}.tar.gz")

if [[ "$spark_enabled" -eq 1 ]]; then
urls=("${urls[@]}" "${apache_archive_url}/spark/spark-${pkg_spark_ver}/spark-${pkg_spark_ver}-bin-${pkg_spark_hadoop_ver}.tgz")
fi

if [[ "$kafka_enabled" -eq 1 ]]; then
urls=("${urls[@]}" "${apache_archive_url}/kafka/${pkg_kafka_ver}/kafka_${pkg_kafka_scala_ver}-${pkg_kafka_ver}.tgz")
Expand Down Expand Up @@ -134,28 +136,27 @@ function unpackage {
[[ "$acc_enabled" -eq 1 ]] && (cd -P "${CLOUD_HOME}" && tar $targs "${CLOUD_HOME}/pkg/accumulo-${pkg_accumulo_ver}-bin.tar.gz") && echo "Unpacked accumulo"
[[ "$hbase_enabled" -eq 1 ]] && (cd -P "${CLOUD_HOME}" && tar $targs "${CLOUD_HOME}/pkg/hbase-${pkg_hbase_ver}-bin.tar.gz") && echo "Unpacked hbase"
[[ "$zeppelin_enabled" -eq 1 ]] && (cd -P "${CLOUD_HOME}" && tar $targs "${CLOUD_HOME}/pkg/zeppelin-${pkg_zeppelin_ver}-bin-all.tgz") && echo "Unpacked zeppelin"
(cd -P "${CLOUD_HOME}" && tar $targs "${CLOUD_HOME}/pkg/hadoop-${pkg_hadoop_ver}.tar.gz") && echo "Unpacked hadoop"
[[ "$kafka_enabled" -eq 1 ]] && (cd -P "${CLOUD_HOME}" && tar $targs "${CLOUD_HOME}/pkg/kafka_${pkg_kafka_scala_ver}-${pkg_kafka_ver}.tgz") && echo "Unpacked kafka"
(cd -P "${CLOUD_HOME}" && tar $targs "${CLOUD_HOME}/pkg/spark-${pkg_spark_ver}-bin-${pkg_spark_hadoop_ver}.tgz") && echo "Unpacked spark"
[[ "$spark_enabled" -eq 1 ]] \
&& (cd -P "${CLOUD_HOME}" && tar $targs "${CLOUD_HOME}/pkg/spark-${pkg_spark_ver}-bin-${pkg_spark_hadoop_ver}.tgz") && echo "Unpacked spark"
(cd -P "${CLOUD_HOME}" && tar $targs "${CLOUD_HOME}/pkg/hadoop-${pkg_hadoop_ver}.tar.gz") && echo "Unpacked hadoop"
}

function configure {
mkdir -p "${CLOUD_HOME}/tmp/staging"
cp -r ${CLOUD_HOME}/templates/* ${CLOUD_HOME}/tmp/staging/

# accumulo config before substitutions
[[ "$acc_enabled" -eq 1 ]] && cp $ACCUMULO_HOME/conf/examples/3GB/standalone/* $ACCUMULO_HOME/conf/
##[[ "$acc_enabled" -eq 1 ]] && cp $ACCUMULO_HOME/conf/examples/3GB/standalone/* $ACCUMULO_HOME/conf/

## Substitute env vars
sed -i~orig "s#LOCAL_CLOUD_PREFIX#${CLOUD_HOME}#;s#CLOUD_LOCAL_HOSTNAME#${CL_HOSTNAME}#;s#CLOUD_LOCAL_BIND_ADDRESS#${CL_BIND_ADDRESS}#" ${CLOUD_HOME}/tmp/staging/*/*

if [[ "$acc_enabled" -eq 1 ]]; then
# accumulo config
# make accumulo bind to all network interfaces (so you can see the monitor from other boxes)
sed -i~orig "s/\# export ACCUMULO_MONITOR_BIND_ALL=\"true\"/export ACCUMULO_MONITOR_BIND_ALL=\"true\"/" "${ACCUMULO_HOME}/conf/accumulo-env.sh"
echo "${CL_HOSTNAME}" > ${ACCUMULO_HOME}/conf/gc
echo "${CL_HOSTNAME}" > ${ACCUMULO_HOME}/conf/masters
echo "${CL_HOSTNAME}" > ${ACCUMULO_HOME}/conf/slaves
echo "${CL_HOSTNAME}" > ${ACCUMULO_HOME}/conf/tservers
echo "${CL_HOSTNAME}" > ${ACCUMULO_HOME}/conf/monitor
echo "${CL_HOSTNAME}" > ${ACCUMULO_HOME}/conf/tracers
fi
Expand Down Expand Up @@ -187,11 +188,27 @@ function configure {
[[ "$zeppelin_enabled" -eq 1 ]] && cp ${CLOUD_HOME}/tmp/staging/zeppelin/* ${ZEPPELIN_HOME}/conf/

# If Spark doesn't have log4j settings, use the Spark defaults
test -f $SPARK_HOME/conf/log4j.properties || cp $SPARK_HOME/conf/log4j.properties.template $SPARK_HOME/conf/log4j.properties
test -f $SPARK_HOME/conf/log4j.properties || [[ "$spark_enabled" -eq 1 ]] && cp $SPARK_HOME/conf/log4j.properties.template $SPARK_HOME/conf/log4j.properties

# configure port offsets
configure_port_offset

# As of Accumulo 2 accumulo-site.xml is nolonger allowed. To avoid a lot of work rewriting the ports script we'll just use accumulo's converter.
if [ -f "$ACCUMULO_HOME/conf/accumulo-site.xml" ]; then
rm -f "$ACCUMULO_HOME/conf/accumulo.properties"
"$ACCUMULO_HOME/bin/accumulo" convert-config \
-x "$ACCUMULO_HOME/conf/accumulo-site.xml" \
-p "$ACCUMULO_HOME/conf/accumulo.properties"
rm -f "$ACCUMULO_HOME/conf/accumulo-site.xml"
fi

# Configure accumulo-client.properties
if [ -f "$ACCUMULO_HOME/conf/accumulo-client.properties" ]; then
sed -i "s/.*instance.name=.*$/instance.name=$cl_acc_inst_name/" "$ACCUMULO_HOME/conf/accumulo-client.properties"
sed -i "s/.*auth.principal=.*$/auth.principal=root/" "$ACCUMULO_HOME/conf/accumulo-client.properties"
sed -i "s/.*auth.token=.*$/auth.token=$cl_acc_inst_pass/" "$ACCUMULO_HOME/conf/accumulo-client.properties"

fi
rm -rf ${CLOUD_HOME}/tmp/staging
}

Expand All @@ -213,15 +230,15 @@ function start_first_time {

# format namenode
echo "Formatting namenode..."
$HADOOP_HOME/bin/hadoop namenode -format
$HADOOP_HOME/bin/hdfs namenode -format

# start hadoop
echo "Starting hadoop..."
$HADOOP_HOME/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR start namenode
$HADOOP_HOME/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR start secondarynamenode
$HADOOP_HOME/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR start datanode
$HADOOP_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR start resourcemanager
$HADOOP_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR start nodemanager
$HADOOP_HOME/bin/hdfs --config $HADOOP_CONF_DIR --daemon start namenode
$HADOOP_HOME/bin/hdfs --config $HADOOP_CONF_DIR --daemon start secondarynamenode
$HADOOP_HOME/bin/hdfs --config $HADOOP_CONF_DIR --daemon start datanode
$HADOOP_HOME/bin/yarn --config $HADOOP_CONF_DIR --daemon start resourcemanager
$HADOOP_HOME/bin/yarn --config $HADOOP_CONF_DIR --daemon start nodemanager

# Wait for HDFS to exit safemode:
echo "Waiting for HDFS to exit safemode..."
Expand All @@ -244,7 +261,7 @@ function start_first_time {

# starting accumulo
echo "Starting accumulo..."
$ACCUMULO_HOME/bin/start-all.sh
$ACCUMULO_HOME/bin/accumulo-cluster start
fi

if [[ "$hbase_enabled" -eq 1 ]]; then
Expand Down Expand Up @@ -286,9 +303,9 @@ function start_cloud {

# start hadoop
echo "Starting hadoop..."
hadoop-daemon.sh --config $HADOOP_CONF_DIR start namenode
hadoop-daemon.sh --config $HADOOP_CONF_DIR start secondarynamenode
hadoop-daemon.sh --config $HADOOP_CONF_DIR start datanode
hdfs --config $HADOOP_CONF_DIR --daemon start namenode
hdfs --config $HADOOP_CONF_DIR --daemon start secondarynamenode
hdfs --config $HADOOP_CONF_DIR --daemon start datanode
start_yarn

# Wait for HDFS to exit safemode:
Expand All @@ -298,7 +315,7 @@ function start_cloud {
if [[ "$acc_enabled" -eq 1 ]]; then
# starting accumulo
echo "starting accumulo..."
$ACCUMULO_HOME/bin/start-all.sh
$ACCUMULO_HOME/bin/accumulo-cluster start
fi

if [[ "$hbase_enabled" -eq 1 ]]; then
Expand All @@ -321,8 +338,8 @@ function start_cloud {
}

function start_yarn {
$HADOOP_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR start resourcemanager
$HADOOP_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR start nodemanager
$HADOOP_HOME/bin/yarn --config $HADOOP_CONF_DIR --daemon start resourcemanager
$HADOOP_HOME/bin/yarn --config $HADOOP_CONF_DIR --daemon start nodemanager
}

function start_geoserver {
Expand All @@ -349,7 +366,7 @@ function stop_cloud {

if [[ "$acc_enabled" -eq 1 ]]; then
echo "Stopping accumulo..."
$ACCUMULO_HOME/bin/stop-all.sh
$ACCUMULO_HOME/bin/accumulo-cluster stop
fi

if [[ "$hbase_enabled" -eq 1 ]]; then
Expand All @@ -359,9 +376,9 @@ function stop_cloud {

echo "Stopping yarn and dfs..."
stop_yarn
$HADOOP_HOME/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR stop namenode
$HADOOP_HOME/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR stop secondarynamenode
$HADOOP_HOME/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR stop datanode
$HADOOP_HOME/bin/hdfs --config $HADOOP_CONF_DIR --daemon stop namenode
$HADOOP_HOME/bin/hdfs --config $HADOOP_CONF_DIR --daemon stop secondarynamenode
$HADOOP_HOME/bin/hdfs --config $HADOOP_CONF_DIR --daemon stop datanode

echo "Stopping zookeeper..."
$ZOOKEEPER_HOME/bin/zkServer.sh stop
Expand Down Expand Up @@ -434,8 +451,8 @@ function verify_stop {
}

function stop_yarn {
$HADOOP_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR stop resourcemanager
$HADOOP_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR stop nodemanager
$HADOOP_HOME/bin/yarn --config $HADOOP_CONF_DIR --daemon stop resourcemanager
$HADOOP_HOME/bin/yarn --config $HADOOP_CONF_DIR --daemon stop nodemanager
}

function stop_geoserver {
Expand Down
9 changes: 3 additions & 6 deletions bin/config.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,15 @@ function validate_config {
# allowed versions are
local pkg_error=""
# hadoop 2.[5-9].x
if [[ -z "$pkg_hadoop_ver" || ! $pkg_hadoop_ver =~ 2[.][56789][.]. ]]; then
if [[ -z "$pkg_hadoop_ver" || ! $pkg_hadoop_ver =~ 2[.][56789][.].|3[.]2[.]1 ]]; then
pkg_error="${pkg_error}Invalid hadoop version: '${pkg_hadoop_ver}' ${NL}"
fi
# zk 3.4.[5-10]
if [[ -z "$pkg_zookeeper_ver" || ! $pkg_zookeeper_ver =~ 3[.]4[.]([56789]|10) ]]; then
pkg_error="${pkg_error}Invalid zookeeper version: '${pkg_zookeeper_ver}' ${NL}"
fi
# acc 1.[6-9].x
if [[ -z "$pkg_accumulo_ver" || ! $pkg_accumulo_ver =~ 1[.][6789][.]. ]]; then
if [[ -z "$pkg_accumulo_ver" || ! $pkg_accumulo_ver =~ 1[.][6789][.].|2[.]0[.]0 ]]; then
pkg_error="${pkg_error}Invalid accumulo version: '${pkg_accumulo_ver}' ${NL}"
fi
# kafka 0.9.x, 0.10.x, 0.11.x, 1.0.x
Expand Down Expand Up @@ -78,17 +78,14 @@ function set_env_vars {
fi

export HADOOP_HOME="$CLOUD_HOME/hadoop-${pkg_hadoop_ver}"
export HADOOP_PREFIX="${HADOOP_HOME}"
export HADOOP_CONF_DIR="${HADOOP_PREFIX}/etc/hadoop"
export HADOOP_CONF_DIR="${HADOOP_HOME}/etc/hadoop"
export HADOOP_COMMON_HOME="${HADOOP_HOME}"
export HADOOP_HDFS_HOME="${HADOOP_HOME}"
export HADOOP_YARN_HOME="${HADOOP_HOME}"
export HADOOP_PID_DIR="${CLOUD_HOME}/data/hadoop/pid"
export HADOOP_IDENT_STRING=$(echo ${CLOUD_HOME} | (md5sum 2>/dev/null || md5) | cut -c1-32)

export YARN_HOME="${HADOOP_HOME}"
export YARN_PID_DIR="${HADOOP_PID_DIR}"
export YARN_IDENT_STRING="${HADOOP_IDENT_STRING}"

export SPARK_HOME="$CLOUD_HOME/spark-${pkg_spark_ver}-bin-${pkg_spark_hadoop_ver}"

Expand Down
8 changes: 5 additions & 3 deletions bin/ports.sh
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,11 @@ function configure_port_offset {
sed -i~orig "s/clientPort=.*/clientPort=$zkPort/" $ZOOKEEPER_HOME/conf/zoo.cfg

# kafka (server.properties)
kafkaPort=$((9092+offset))
sed -i~orig "s/\/\/$CL_HOSTNAME:[0-9].*/\/\/$CL_HOSTNAME:$kafkaPort/" $KAFKA_HOME/config/server.properties
sed -i~orig "s/zookeeper.connect=$CL_HOSTNAME:[0-9].*/zookeeper.connect=$CL_HOSTNAME:$zkPort/" $KAFKA_HOME/config/server.properties
if [[ "kafka_enabled" -eq 1 ]]; then
kafkaPort=$((9092+offset))
sed -i~orig "s/\/\/$CL_HOSTNAME:[0-9].*/\/\/$CL_HOSTNAME:$kafkaPort/" $KAFKA_HOME/config/server.properties
sed -i~orig "s/zookeeper.connect=$CL_HOSTNAME:[0-9].*/zookeeper.connect=$CL_HOSTNAME:$zkPort/" $KAFKA_HOME/config/server.properties
fi

# Zeppelin
if [[ "$zeppelin_enabled" -eq 1 ]]; then
Expand Down
6 changes: 3 additions & 3 deletions conf/cloud-local.conf
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ pkg_src_maven="https://repo1.maven.org/maven2"
# VERSION MANAGEMENT - Versions of popular software
################################################################################

pkg_accumulo_ver="1.9.2"
pkg_accumulo_ver="2.0.0"
pkg_hbase_ver="1.3.1"
# Note pkg_spark_hadoop_ver below if modifying
pkg_hadoop_ver="2.8.4"
pkg_hadoop_ver="3.2.1"
# Note, just the major+minor from Hadoop, not patch level
hadoop_base_ver=${pkg_hadoop_ver:0:3}

Expand Down Expand Up @@ -100,7 +100,7 @@ hbase_enabled=0
kafka_enabled=0

# Download spark distribution
spark_enabled=1
spark_enabled=0

# Enable/Disable installation of GeoMesa
geomesa_enabled=0
Expand Down
124 changes: 124 additions & 0 deletions templates/accumulo/accumulo-env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#! /usr/bin/env bash

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

## Before accumulo-env.sh is loaded, these environment variables are set and can be used in this file:

# cmd - Command that is being called such as tserver, master, etc.
# basedir - Root of Accumulo installation
# bin - Directory containing Accumulo scripts
# conf - Directory containing Accumulo configuration
# lib - Directory containing Accumulo libraries

############################
# Variables that must be set
############################

## Accumulo logs directory. Referenced by logger config.
export ACCUMULO_LOG_DIR="${ACCUMULO_LOG_DIR:-${basedir}/logs}"
## Hadoop installation
export HADOOP_HOME="${HADOOP_HOME:-/path/to/hadoop}"
## Hadoop configuration
export HADOOP_CONF_DIR="${HADOOP_CONF_DIR:-${HADOOP_HOME}/etc/hadoop}"
## Zookeeper installation
export ZOOKEEPER_HOME="${ZOOKEEPER_HOME:-/path/to/zookeeper}"

##########################
# Build CLASSPATH variable
##########################

## Verify that Hadoop & Zookeeper installation directories exist
if [[ ! -d "$ZOOKEEPER_HOME" ]]; then
echo "ZOOKEEPER_HOME=$ZOOKEEPER_HOME is not set to a valid directory in accumulo-env.sh"
exit 1
fi
if [[ ! -d "$HADOOP_HOME" ]]; then
echo "HADOOP_HOME=$HADOOP_HOME is not set to a valid directory in accumulo-env.sh"
exit 1
fi

## Build using existing CLASSPATH, conf/ directory, dependencies in lib/, and external Hadoop & Zookeeper dependencies
if [[ -n "$CLASSPATH" ]]; then
CLASSPATH="${CLASSPATH}:${conf}"
else
CLASSPATH="${conf}"
fi
CLASSPATH="${CLASSPATH}:${lib}/*:${HADOOP_CONF_DIR}:${ZOOKEEPER_HOME}/*:${HADOOP_HOME}/share/hadoop/client/*"
export CLASSPATH

##################################################################
# Build JAVA_OPTS variable. Defaults below work but can be edited.
##################################################################

## JVM options set for all processes. Extra options can be passed in by setting ACCUMULO_JAVA_OPTS to an array of options.
JAVA_OPTS=("${ACCUMULO_JAVA_OPTS[@]}"
'-XX:+UseConcMarkSweepGC'
'-XX:CMSInitiatingOccupancyFraction=75'
'-XX:+CMSClassUnloadingEnabled'
'-XX:OnOutOfMemoryError=kill -9 %p'
'-XX:-OmitStackTraceInFastThrow'
'-Djava.net.preferIPv4Stack=true'
"-Daccumulo.native.lib.path=${lib}/native")

## Make sure Accumulo native libraries are built since they are enabled by default
"${bin}"/accumulo-util build-native &> /dev/null

## JVM options set for individual applications
case "$cmd" in
master) JAVA_OPTS=("${JAVA_OPTS[@]}" '-Xmx512m' '-Xms512m') ;;
monitor) JAVA_OPTS=("${JAVA_OPTS[@]}" '-Xmx256m' '-Xms256m') ;;
gc) JAVA_OPTS=("${JAVA_OPTS[@]}" '-Xmx256m' '-Xms256m') ;;
tserver) JAVA_OPTS=("${JAVA_OPTS[@]}" '-Xmx3G' '-Xms3G') ;;
*) JAVA_OPTS=("${JAVA_OPTS[@]}" '-Xmx256m' '-Xms64m') ;;
esac

## JVM options set for logging. Review logj4 properties files to see how they are used.
JAVA_OPTS=("${JAVA_OPTS[@]}"
"-Daccumulo.log.dir=${ACCUMULO_LOG_DIR}"
"-Daccumulo.application=${cmd}${ACCUMULO_SERVICE_INSTANCE}_$(hostname)")

case "$cmd" in
monitor)
JAVA_OPTS=("${JAVA_OPTS[@]}" "-Dlog4j.configuration=log4j-monitor.properties")
;;
gc|master|tserver|tracer)
JAVA_OPTS=("${JAVA_OPTS[@]}" "-Dlog4j.configuration=log4j-service.properties")
;;
*)
# let log4j use its default behavior (log4j.xml, log4j.properties)
true
;;
esac

export JAVA_OPTS

############################
# Variables set to a default
############################

export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-1}
## Add Hadoop native libraries to shared library paths given operating system
case "$(uname)" in
Darwin) export DYLD_LIBRARY_PATH="${HADOOP_HOME}/lib/native:${DYLD_LIBRARY_PATH}" ;;
*) export LD_LIBRARY_PATH="${HADOOP_HOME}/lib/native:${LD_LIBRARY_PATH}" ;;
esac

###############################################
# Variables that are optional. Uncomment to set
###############################################

## Specifies command that will be placed before calls to Java in accumulo script
# export ACCUMULO_JAVA_PREFIX=""
Loading