Merge "Adding Spark to sahara-image-pack"
This commit is contained in:
commit
7af450e34d
|
@ -24,12 +24,28 @@ For cluster provisioning, prepared images should be used.
|
|||
(build parameter)
|
||||
- Notes
|
||||
|
||||
* - 2.3
|
||||
- Ubuntu 16.04, CentOS 7
|
||||
- sahara-image-pack
|
||||
- 2.3
|
||||
- based on CDH 5.11
|
||||
use --plugin_version to specify the minor version: 2.3.2 (default),
|
||||
2.3.1 or 2.3.0
|
||||
|
||||
* - 2.3
|
||||
- Ubuntu 16.04
|
||||
- sahara-image-create
|
||||
- 2.3.0
|
||||
- based on CDH 5.11
|
||||
|
||||
* - 2.2
|
||||
- Ubuntu 16.04, CentOS 7
|
||||
- sahara-image-pack
|
||||
- 2.2
|
||||
- based on CDH 5.11
|
||||
use --plugin_version to specify the minor version: 2.2.1 (default),
|
||||
or 2.2.0
|
||||
|
||||
* - 2.2
|
||||
- Ubuntu 16.04
|
||||
- sahara-image-create
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
---
|
||||
features:
|
||||
- |
|
||||
Adding abilitiy to create spark images using Sahara Image Pack.
|
|
@ -0,0 +1,44 @@
|
|||
# Copyright (c) 2019 Red Hat, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from sahara.plugins import images
|
||||
from sahara.plugins import utils as plugin_utils
|
||||
|
||||
|
||||
_validator = images.SaharaImageValidator.from_yaml(
|
||||
'plugins/spark/resources/images/image.yaml',
|
||||
resource_roots=['plugins/spark/resources/images'],
|
||||
package='sahara_plugin_spark')
|
||||
|
||||
|
||||
def get_image_arguments():
|
||||
return _validator.get_argument_list()
|
||||
|
||||
|
||||
def pack_image(remote, test_only=False, image_arguments=None):
|
||||
_validator.validate(remote, test_only=test_only,
|
||||
image_arguments=image_arguments)
|
||||
|
||||
|
||||
def validate_images(cluster, test_only=False, image_arguments=None):
|
||||
image_arguments = get_image_arguments()
|
||||
if not test_only:
|
||||
instances = plugin_utils.get_instances(cluster)
|
||||
else:
|
||||
instances = plugin_utils.get_instances(cluster)[0]
|
||||
for instance in instances:
|
||||
with instance.remote() as r:
|
||||
_validator.validate(r, test_only=test_only,
|
||||
image_arguments=image_arguments)
|
|
@ -30,6 +30,7 @@ from sahara.plugins import utils
|
|||
from sahara_plugin_spark.i18n import _
|
||||
from sahara_plugin_spark.plugins.spark import config_helper as c_helper
|
||||
from sahara_plugin_spark.plugins.spark import edp_engine
|
||||
from sahara_plugin_spark.plugins.spark import images
|
||||
from sahara_plugin_spark.plugins.spark import run_scripts as run
|
||||
from sahara_plugin_spark.plugins.spark import scaling as sc
|
||||
from sahara_plugin_spark.plugins.spark import shell_engine
|
||||
|
@ -569,3 +570,19 @@ class SparkProvider(p.ProvisioningPluginBase):
|
|||
want_to_configure, self.get_configs(
|
||||
cluster.hadoop_version), cluster, scaling)
|
||||
provider.apply_recommended_configs()
|
||||
|
||||
def get_image_arguments(self, hadoop_version):
|
||||
if hadoop_version in ['1.6.0', '2.1.0']:
|
||||
return NotImplemented
|
||||
return images.get_image_arguments()
|
||||
|
||||
def pack_image(self, hadoop_version, remote,
|
||||
test_only=False, image_arguments=None):
|
||||
images.pack_image(remote, test_only=test_only,
|
||||
image_arguments=image_arguments)
|
||||
|
||||
def validate_images(self, cluster, test_only=False, image_arguments=None):
|
||||
if cluster.hadoop_version not in ['1.6.0', '2.1.0']:
|
||||
images.validate_images(cluster,
|
||||
test_only=test_only,
|
||||
image_arguments=image_arguments)
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
#!/bin/bash
|
||||
if [ $test_only -eq 0 ]; then
|
||||
systemctl stop hadoop-hdfs-datanode
|
||||
systemctl stop hadoop-hdfs-namenode
|
||||
else
|
||||
exit 0
|
||||
fi
|
|
@ -0,0 +1,43 @@
|
|||
#!/bin/bash
|
||||
|
||||
CDH_VERSION=5.11
|
||||
CDH_MINOR_VERSION=5.11.0
|
||||
|
||||
if [ ! -f /etc/yum.repos.d/cloudera-cdh5.repo ]; then
|
||||
if [ $test_only -eq 0 ]; then
|
||||
echo '[cloudera-cdh5]' > /etc/yum.repos.d/cloudera-cdh5.repo
|
||||
echo "name=Cloudera's Distribution for Hadoop, Version 5" >> /etc/yum.repos.d/cloudera-cdh5.repo
|
||||
echo "baseurl=http://archive.cloudera.com/cdh5/redhat/7/x86_64/cdh/$CDH_MINOR_VERSION/" >> /etc/yum.repos.d/cloudera-cdh5.repo
|
||||
echo "gpgkey = http://archive.cloudera.com/cdh5/redhat/7/x86_64/cdh/RPM-GPG-KEY-cloudera" >> /etc/yum.repos.d/cloudera-cdh5.repo
|
||||
echo 'gpgcheck = 1' >> /etc/yum.repos.d/cloudera-cdh5.repo
|
||||
|
||||
echo '[cloudera-manager]' > /etc/yum.repos.d/cloudera-manager.repo
|
||||
echo 'name=Cloudera Manager' >> /etc/yum.repos.d/cloudera-manager.repo
|
||||
echo "baseurl=http://archive.cloudera.com/cm5/redhat/7/x86_64/cm/$CDH_MINOR_VERSION/" >> /etc/yum.repos.d/cloudera-manager.repo
|
||||
echo "gpgkey = http://archive.cloudera.com/cm5/redhat/7/x86_64/cm/RPM-GPG-KEY-cloudera" >> /etc/yum.repos.d/cloudera-manager.repo
|
||||
echo 'gpgcheck = 1' >> /etc/yum.repos.d/cloudera-manager.repo
|
||||
|
||||
echo '[navigator-keytrustee]' > /etc/yum.repos.d/kms.repo
|
||||
echo "name=Cloudera's Distribution for navigator-Keytrustee, Version 5" >> /etc/yum.repos.d/kms.repo
|
||||
|
||||
RETURN_CODE="$(curl -s -o /dev/null -w "%{http_code}" http://archive.cloudera.com/navigator-keytrustee5/redhat/7/x86_64/navigator-keytrustee/$CDH_MINOR_VERSION/)"
|
||||
if [ "$RETURN_CODE" == "404" ]; then
|
||||
echo "baseurl=http://archive.cloudera.com/navigator-keytrustee5/redhat/7/x86_64/navigator-keytrustee/$CDH_VERSION/" >> /etc/yum.repos.d/kms.repo
|
||||
else
|
||||
echo "baseurl=http://archive.cloudera.com/navigator-keytrustee5/redhat/7/x86_64/navigator-keytrustee/$CDH_MINOR_VERSION/" >> /etc/yum.repos.d/kms.repo
|
||||
fi
|
||||
|
||||
echo "gpgkey = http://archive.cloudera.com/navigator-keytrustee5/redhat/7/x86_64/navigator-keytrustee/RPM-GPG-KEY-cloudera" >> /etc/yum.repos.d/kms.repo
|
||||
echo 'gpgcheck = 1' >> /etc/yum.repos.d/kms.repo
|
||||
|
||||
echo "[cloudera-kafka]" > /etc/yum.repos.d/cloudera-kafka.repo
|
||||
echo "name=Cloudera's Distribution for kafka, Version 2.2.0" >> /etc/yum.repos.d/cloudera-kafka.repo
|
||||
echo "baseurl=http://archive.cloudera.com/kafka/redhat/7/x86_64/kafka/2.2.0/" >> /etc/yum.repos.d/cloudera-kafka.repo
|
||||
echo "gpgkey = http://archive.cloudera.com/kafka/redhat/7/x86_64/kafka/RPM-GPG-KEY-cloudera" >> /etc/yum.repos.d/cloudera-kafka.repo
|
||||
echo "gpgcheck = 1" >> /etc/yum.repos.d/cloudera-kafka.repo
|
||||
|
||||
yum clean all
|
||||
else
|
||||
exit 0
|
||||
fi
|
||||
fi
|
|
@ -0,0 +1,20 @@
|
|||
#!/bin/bash
|
||||
|
||||
hadoop="2.6.0"
|
||||
|
||||
HDFS_LIB_DIR=${hdfs_lib_dir:-"/usr/share/hadoop/lib"}
|
||||
HADOOP_SWIFT_JAR_NAME="hadoop-openstack.jar"
|
||||
|
||||
if [ $test_only -eq 0 ]; then
|
||||
mkdir -p $HDFS_LIB_DIR
|
||||
curl -sS -o $HDFS_LIB_DIR/$HADOOP_SWIFT_JAR_NAME $swift_url
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
echo -e "Could not download Swift Hadoop FS implementation.\nAborting"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
chmod 0644 $HDFS_LIB_DIR/$HADOOP_SWIFT_JAR_NAME
|
||||
else
|
||||
exit 0
|
||||
fi
|
|
@ -0,0 +1,30 @@
|
|||
#!/bin/bash
|
||||
|
||||
EXTJS_DESTINATION_DIR="/var/lib/oozie"
|
||||
EXTJS_DOWNLOAD_URL="https://tarballs.openstack.org/sahara-extra/dist/common-artifacts/ext-2.2.zip"
|
||||
|
||||
extjs_basepath=$(basename ${EXTJS_DOWNLOAD_URL})
|
||||
extjs_archive=/tmp/${extjs_basepath}
|
||||
extjs_folder="${extjs_basepath%.*}"
|
||||
|
||||
setup_extjs() {
|
||||
curl -sS -o $extjs_archive $EXTJS_DOWNLOAD_URL
|
||||
mkdir -p $EXTJS_DESTINATION_DIR
|
||||
}
|
||||
|
||||
if [ -z "${EXTJS_NO_UNPACK:-}" ]; then
|
||||
if [ ! -d "${EXTJS_DESTINATION_DIR}/${extjs_folder}" ]; then
|
||||
setup_extjs
|
||||
unzip -o -d "$EXTJS_DESTINATION_DIR" $extjs_archive
|
||||
rm -f $extjs_archive
|
||||
else
|
||||
exit 0
|
||||
fi
|
||||
else
|
||||
if [ ! -f "${EXTJS_DESTINATION_DIR}/${extjs_basepath}" ]; then
|
||||
setup_extjs
|
||||
mv $extjs_archive $EXTJS_DESTINATION_DIR
|
||||
else
|
||||
exit 0
|
||||
fi
|
||||
fi
|
|
@ -0,0 +1,41 @@
|
|||
#!/bin/bash
|
||||
|
||||
tmp_dir=/tmp/spark
|
||||
CDH_VERSION=5.11
|
||||
mkdir -p $tmp_dir
|
||||
|
||||
if [ ! -d /opt/spark ]; then
|
||||
if [ $test_only -eq 0 ]; then
|
||||
# The user is not providing his own Spark distribution package
|
||||
if [ -z "${SPARK_DOWNLOAD_URL:-}" ]; then
|
||||
# Check hadoop version
|
||||
# INFO on hadoop versions: http://spark.apache.org/docs/latest/hadoop-third-party-distributions.html
|
||||
# Now the below is just a sanity check
|
||||
if [ -z "${SPARK_HADOOP_DL:-}" ]; then
|
||||
SPARK_HADOOP_DL=hadoop2.7
|
||||
fi
|
||||
|
||||
SPARK_DOWNLOAD_URL="http://archive.apache.org/dist/spark/spark-$plugin_version/spark-$plugin_version-bin-$SPARK_HADOOP_DL.tgz"
|
||||
fi
|
||||
|
||||
echo "Downloading SPARK"
|
||||
spark_file=$(basename "$SPARK_DOWNLOAD_URL")
|
||||
wget -O $tmp_dir/$spark_file $SPARK_DOWNLOAD_URL
|
||||
echo "$SPARK_DOWNLOAD_URL" > $tmp_dir/spark_url.txt
|
||||
|
||||
echo "Extracting SPARK"
|
||||
extract_folder=$(tar tzf $tmp_dir/$spark_file | sed -e 's@/.*@@' | uniq)
|
||||
echo "Decompressing Spark..."
|
||||
tar xzf $tmp_dir/$spark_file
|
||||
rm $tmp_dir/$spark_file
|
||||
|
||||
echo "Moving SPARK to /opt/"
|
||||
# Placing spark in /opt/spark
|
||||
mv $extract_folder /opt/spark/
|
||||
mv $tmp_dir/spark_url.txt /opt/spark/
|
||||
|
||||
rm -Rf $tmp_dir
|
||||
else
|
||||
exit 1
|
||||
fi
|
||||
fi
|
|
@ -0,0 +1,12 @@
|
|||
#!/bin/bash
|
||||
|
||||
SPARK_JARS_DIR_PATH="/opt/spark/jars"
|
||||
HADOOP_TOOLS_DIR_PATH="/opt/hadoop/share/hadoop/tools/lib"
|
||||
HADOOP_COMMON_DIR_PATH="/opt/hadoop/share/hadoop/common/lib"
|
||||
|
||||
|
||||
# The hadoop-aws and aws-java-sdk libraries are missing here, but we
|
||||
# cannot copy them from the Hadoop folder on-disk due to
|
||||
# version/patching issues
|
||||
curl -sS https://tarballs.openstack.org/sahara-extra/dist/common-artifacts/hadoop-aws-2.7.3.jar -o $SPARK_JARS_DIR_PATH/hadoop-aws.jar
|
||||
curl -sS http://central.maven.org/maven2/com/amazonaws/aws-java-sdk/1.7.4/aws-java-sdk-1.7.4.jar -o $SPARK_JARS_DIR_PATH/aws-java-sdk.jar
|
|
@ -0,0 +1,61 @@
|
|||
arguments:
|
||||
plugin_version:
|
||||
description: The version of Spark to install. Defaults to 2.3.2
|
||||
default: 2.3.2
|
||||
choices:
|
||||
- 2.3.2
|
||||
- 2.3.1
|
||||
- 2.3.0
|
||||
- 2.2.1
|
||||
- 2.2.0
|
||||
java_distro:
|
||||
default: openjdk
|
||||
description: The distribution of Java to install. Defaults to openjdk.
|
||||
choices:
|
||||
- openjdk
|
||||
- oracle-java
|
||||
hdfs_lib_dir:
|
||||
default: /usr/lib/hadoop-mapreduce
|
||||
description: The path to HDFS lib. Defaults to /usr/lib/hadoop-mapreduce.
|
||||
required: False
|
||||
swift_url:
|
||||
default: https://tarballs.openstack.org/sahara-extra/dist/hadoop-openstack/master/hadoop-openstack-2.6.0.jar
|
||||
description: Location of the swift jar file.
|
||||
required: False
|
||||
|
||||
validators:
|
||||
- os_case:
|
||||
- redhat:
|
||||
- package: wget
|
||||
- script: centos/wget_cdh_repo
|
||||
- ubuntu:
|
||||
- script: ubuntu/wget_cdh_repo
|
||||
- argument_case:
|
||||
argument_name: java_distro
|
||||
cases:
|
||||
openjdk:
|
||||
- os_case:
|
||||
- redhat:
|
||||
- package: java-1.8.0-openjdk-devel
|
||||
- ubuntu:
|
||||
- package: openjdk-8-jdk
|
||||
- script:
|
||||
common/install_spark:
|
||||
env_vars: [plugin_version, cdh_version]
|
||||
- os_case:
|
||||
- ubuntu:
|
||||
- script: ubuntu/config_spark
|
||||
- package: ntp
|
||||
- package:
|
||||
- hadoop-hdfs-namenode
|
||||
- hadoop-hdfs-datanode
|
||||
- script: common/install_extjs
|
||||
- os_case:
|
||||
- redhat:
|
||||
- script: centos/turn_off_services
|
||||
- ubuntu:
|
||||
- script: ubuntu/turn_off_services
|
||||
- script: common/manipulate_s3
|
||||
- script:
|
||||
common/add_jar:
|
||||
env_vars: [hdfs_lib_dir, swift_url]
|
|
@ -0,0 +1,12 @@
|
|||
#!/bin/bash
|
||||
|
||||
firstboot_script_name="/opt/spark/firstboot.sh"
|
||||
sed -i -e "s,^exit 0$,[ -f $firstboot_script_name ] \&\& sh $firstboot_script_name; exit 0," /etc/rc.local
|
||||
user_and_group_names="ubuntu:ubuntu"
|
||||
|
||||
cat >> $firstboot_script_name <<EOF
|
||||
#!/bin/sh
|
||||
chown -R $user_and_group_names /opt/spark
|
||||
chown -R $user_and_group_names /etc/hadoop
|
||||
rm $firstboot_script_name
|
||||
EOF
|
|
@ -0,0 +1,7 @@
|
|||
#!/bin/bash
|
||||
if [ $test_only -eq 0 ]; then
|
||||
update-rc.d -f hadoop-hdfs-datanode remove
|
||||
update-rc.d -f hadoop-hdfs-namenode remove
|
||||
else
|
||||
exit 0
|
||||
fi
|
|
@ -0,0 +1,36 @@
|
|||
#!/bin/bash
|
||||
|
||||
CDH_VERSION=5.11
|
||||
|
||||
if [ ! -f /etc/apt/sources.list.d/cdh5.list ]; then
|
||||
if [ $test_only -eq 0 ]; then
|
||||
# Add repository with postgresql package (it's dependency of cloudera packages)
|
||||
# Base image doesn't contain this repo
|
||||
echo 'deb http://nova.clouds.archive.ubuntu.com/ubuntu/ xenial universe multiverse main' >> /etc/apt/sources.list
|
||||
|
||||
# Cloudera repositories
|
||||
echo "deb [arch=amd64] http://archive.cloudera.com/cdh5/ubuntu/xenial/amd64/cdh xenial-cdh$CDH_VERSION contrib" > /etc/apt/sources.list.d/cdh5.list
|
||||
echo "deb-src http://archive.cloudera.com/cdh5/ubuntu/xenial/amd64/cdh xenial-cdh$CDH_VERSION contrib" >> /etc/apt/sources.list.d/cdh5.list
|
||||
|
||||
wget -qO - http://archive.cloudera.com/cdh5/ubuntu/xenial/amd64/cdh/archive.key | apt-key add -
|
||||
|
||||
echo "deb [arch=amd64] http://archive.cloudera.com/cm5/ubuntu/xenial/amd64/cm xenial-cm$CDH_VERSION contrib" > /etc/apt/sources.list.d/cm5.list
|
||||
echo "deb-src http://archive.cloudera.com/cm5/ubuntu/xenial/amd64/cm xenial-cm$CDH_VERSION contrib" >> /etc/apt/sources.list.d/cm5.list
|
||||
|
||||
wget -qO - http://archive.cloudera.com/cm5/ubuntu/xenial/amd64/cm/archive.key | apt-key add -
|
||||
|
||||
wget -O /etc/apt/sources.list.d/kms.list http://archive.cloudera.com/navigator-keytrustee5/ubuntu/xenial/amd64/navigator-keytrustee/cloudera.list
|
||||
wget -qO - http://archive.cloudera.com/navigator-keytrustee5/ubuntu/xenial/amd64/navigator-keytrustee/archive.key | apt-key add -
|
||||
|
||||
# add Kafka repository
|
||||
echo 'deb http://archive.cloudera.com/kafka/ubuntu/xenial/amd64/kafka/ xenial-kafka2.2.0 contrib' >> /etc/apt/sources.list
|
||||
wget -qO - https://archive.cloudera.com/kafka/ubuntu/xenial/amd64/kafka/archive.key | apt-key add -
|
||||
|
||||
#change repository priority
|
||||
echo 'Package: zookeeper\nPin: origin "archive.cloudera.com"\nPin-Priority: 1001' > /etc/apt/preferences.d/cloudera-pin
|
||||
|
||||
apt-get update
|
||||
else
|
||||
exit 0
|
||||
fi
|
||||
fi
|
Loading…
Reference in New Issue