Merge "Revise s3_hadoop"

This commit is contained in:
Zuul 2018-02-08 18:47:14 +00:00 committed by Gerrit Code Review
commit 8a9185373f
5 changed files with 65 additions and 51 deletions

View File

@ -482,7 +482,7 @@ if [ -z "$PLUGIN" -o "$PLUGIN" = "vanilla" ]; then
export HIVE_VERSION=${HIVE_VERSION:-"0.11.0"}
export HADOOP_V2_7_1_NATIVE_LIBS_DOWNLOAD_URL=${HADOOP_V2_7_1_NATIVE_LIBS_DOWNLOAD_URL:-"https://tarballs.openstack.org/sahara/dist/common-artifacts/hadoop-native-libs-2.7.1.tar.gz"}
export OOZIE_HADOOP_V2_7_1_DOWNLOAD_URL=${OOZIE_HADOOP_V2_7_1_FILE:-"http://sahara-files.mirantis.com/oozie-4.2.0-hadoop-2.7.1.tar.gz"}
export DIB_HDFS_LIB_DIR="/opt/hadoop/share/hadoop/common/lib"
export DIB_HDFS_LIB_DIR="/opt/hadoop/share/hadoop/tools/lib"
export plugin_type="vanilla"
if [ "$DIB_SPARK_VERSION" = "1.3.1" ]; then
@ -630,19 +630,19 @@ if [ -z "$PLUGIN" -o "$PLUGIN" = "ambari" ]; then
if [ -z "$BASE_IMAGE_OS" -o "$BASE_IMAGE_OS" = "ubuntu" ]; then
ambari_ubuntu_image_name=${ambari_ubuntu_image_name:-ubuntu_sahara_ambari}
ambari_element_sequence="ambari $JAVA_ELEMENT swift_hadoop kdc s3_hadoop"
ambari_element_sequence="ambari $JAVA_ELEMENT swift_hadoop kdc"
export DIB_RELEASE="trusty"
image_create ubuntu $ambari_ubuntu_image_name $ambari_element_sequence
unset DIB_RELEASE
fi
if [ "$BASE_IMAGE_OS" = "centos" ]; then
ambari_centos_image_name=${ambari_centos_image_name:-centos_sahara_ambari}
ambari_element_sequence="ambari $JAVA_ELEMENT disable-firewall swift_hadoop kdc nc s3_hadoop"
ambari_element_sequence="ambari $JAVA_ELEMENT disable-firewall swift_hadoop kdc nc"
image_create centos $ambari_centos_image_name $ambari_element_sequence
fi
if [ -z "$BASE_IMAGE_OS" -o "$BASE_IMAGE_OS" = "centos7" ]; then
ambari_centos7_image_name=${ambari_centos7_image_name:-"centos7-sahara-ambari"}
ambari_element_sequence="disable-selinux ambari $JAVA_ELEMENT disable-firewall swift_hadoop kdc nc s3_hadoop"
ambari_element_sequence="disable-selinux ambari $JAVA_ELEMENT disable-firewall swift_hadoop kdc nc"
image_create centos7 $ambari_centos7_image_name $ambari_element_sequence
fi
@ -668,7 +668,7 @@ if [ -z "$PLUGIN" -o "$PLUGIN" = "cloudera" ]; then
HADOOP_VERSION=${DIB_CDH_MINOR_VERSION%.*}
fi
cloudera_elements_sequence="hadoop-cloudera swift_hadoop kdc s3_hadoop"
cloudera_elements_sequence="hadoop-cloudera swift_hadoop kdc"
if [ -z "$BASE_IMAGE_OS" -o "$BASE_IMAGE_OS" = "ubuntu" ]; then
if [ -z "$HADOOP_VERSION" -o "$HADOOP_VERSION" = "5.5" ]; then
export DIB_CDH_VERSION="5.5"
@ -709,7 +709,7 @@ if [ -z "$PLUGIN" -o "$PLUGIN" = "cloudera" ]; then
fi
if [ "$BASE_IMAGE_OS" = "centos" ]; then
centos_cloudera_elements_sequence="selinux-permissive disable-firewall nc s3_hadoop"
centos_cloudera_elements_sequence="selinux-permissive disable-firewall nc"
if [ -z "$HADOOP_VERSION" -o "$HADOOP_VERSION" = "5.5" ]; then
export DIB_CDH_VERSION="5.5"
@ -721,7 +721,7 @@ if [ -z "$PLUGIN" -o "$PLUGIN" = "cloudera" ]; then
fi
if [ -z "$BASE_IMAGE_OS" -o "$BASE_IMAGE_OS" = "centos7" ]; then
centos7_cloudera_elements_sequence="selinux-permissive disable-firewall nc s3_hadoop"
centos7_cloudera_elements_sequence="selinux-permissive disable-firewall nc"
if [ -z "$HADOOP_VERSION" -o "$HADOOP_VERSION" = "5.5" ]; then
export DIB_CDH_VERSION="5.5"
@ -778,8 +778,8 @@ if [ -z "$PLUGIN" -o "$PLUGIN" = "mapr" ]; then
#MapR repository requires additional space
export DIB_MIN_TMPFS=10
mapr_ubuntu_elements_sequence="ssh hadoop-mapr $JAVA_ELEMENT s3_hadoop"
mapr_centos_elements_sequence="ssh hadoop-mapr selinux-permissive $JAVA_ELEMENT disable-firewall nc s3_hadoop"
mapr_ubuntu_elements_sequence="ssh hadoop-mapr $JAVA_ELEMENT"
mapr_centos_elements_sequence="ssh hadoop-mapr selinux-permissive $JAVA_ELEMENT disable-firewall nc"
if [ -z "$BASE_IMAGE_OS" -o "$BASE_IMAGE_OS" = "ubuntu" ]; then
export DIB_RELEASE=${DIB_RELEASE:-trusty}

View File

@ -61,8 +61,11 @@ EOF
$HADOOP_HOME/etc/hadoop/yarn-env.sh
echo "source $JAVA_RC" >> $HADOOP_HOME/etc/hadoop/yarn-env.sh
# remove apache-built swiftfs
rm ${HADOOP_HOME}/share/hadoop/tools/lib/hadoop-openstack-${DIB_HADOOP_VERSION}.jar
# enable swiftfs
# Sahara expects the jar to be in both folders, even though only one is
# the "official" classpath. Until images and Sahara become more strictly
# coupled we must maintain this legacy behavior.
ln -s ${HADOOP_HOME}/share/hadoop/tools/lib/hadoop-openstack-${DIB_HADOOP_VERSION}.jar ${HADOOP_HOME}/share/hadoop/common/lib/
}

View File

@ -2,22 +2,9 @@
s3_hadoop
=========
Copy the Hadoop S3 connector jar file into the Hadoop classpath.
Copy the Hadoop S3 connector libraries into the Hadoop and Spark classpaths.
Environment Variables
---------------------
HADOOP_S3_JAR_ORIGIN
:Required: No
:Default: Depends on plugin.
:Description: Path to where the S3 jar is (already) located.
HADOOP_S3_JAR_DOWNLOAD
:Required: No
:Default: None.
:Description: If set, use a download a specific S3 jar instead of one already available on the image.
DIB_HDFS_LIB_DIR
:Required: No
:Default: /usr/share/hadoop/lib
:Description: Directory in the guest where to save the S3 jar. Shared with swift_hadoop.
None.

View File

@ -5,32 +5,56 @@ fi
set -eu
set -o pipefail
if [ -z "${HADOOP_S3_JAR_ORIGIN:-}" ]; then
# The jar is not locally available during image-gen on Ambari/MapR plugins: relevant packages are installed later.
# The jar is not appropriate for the Storm plugin: you cannot stream data from an object store.
# For plugins not found in the switch statement below, a user-specified jar can still be downloaded.
case "$plugin_type" in
"vanilla")
HADOOP_S3_JAR_ORIGIN="/opt/hadoop/share/hadoop/tools/lib/hadoop-aws-$DIB_HADOOP_VERSION.jar"
;;
"cloudera" | "spark" )
HADOOP_S3_JAR_ORIGIN="/usr/lib/hadoop/hadoop-aws.jar"
;;
esac
fi
HDFS_LIB_DIR=${DIB_HDFS_LIB_DIR:-"/usr/share/hadoop/lib"} # matches swift_hadoop default
case "$plugin_type" in
"vanilla" )
HADOOP_TOOLS_DIR_PATH="/opt/hadoop/share/hadoop/tools/lib"
HADOOP_ENV_SH_PATH="/opt/hadoop/etc/hadoop/hadoop-env.sh"
SPARK_JARS_DIR_PATH="/opt/spark/jars"
;;
"spark" )
HADOOP_TOOLS_DIR_PATH="/usr/lib/hadoop/client"
SPARK_JARS_DIR_PATH="/opt/spark/jars"
;;
"cloudera" )
echo -n "The s3_hadoop element is not supported on CDH,"
echo " because the relevant libraries are already in the right place."
exit 1
;;
*)
echo "The s3_hadoop element is only supported on Vanilla and Spark."
exit 1
esac
if [ -z "${HADOOP_S3_JAR_DOWNLOAD:-}" ]; then
if [ "${HADOOP_S3_JAR_ORIGIN:-}" ]; then
cp $HADOOP_S3_JAR_ORIGIN $HDFS_LIB_DIR/hadoop-aws.jar
# NOTE: By defintion, the Spark standalone plugin does not contain Hadoop in
# its entirety. Therefore, there are no Hadoop-specific environment settings
# available for modification.
if [ "$plugin_type" != "spark" ]; then
if [ -f "$HADOOP_ENV_SH_PATH" ]; then
cat >> $HADOOP_ENV_SH_PATH <<EOF
for f in $HADOOP_TOOLS_DIR_PATH/*.jar; do
if [ "\$HADOOP_CLASSPATH" ]; then
export HADOOP_CLASSPATH=\$HADOOP_CLASSPATH:\$f
else
export HADOOP_CLASSPATH=\$f
fi
done
EOF
else
echo "Something went wrong: couldn't find Hadoop env settings."
exit 1
fi
fi
if [ -d "$SPARK_JARS_DIR_PATH" ]; then
cp $HADOOP_TOOLS_DIR_PATH/*aws*.jar $SPARK_JARS_DIR_PATH
chmod 0644 $SPARK_JARS_DIR_PATH/*aws*jar
else
wget -O $HDFS_LIB_DIR/hadoop-aws.jar $HADOOP_S3_JAR_DOWNLOAD
fi
path=$HDFS_LIB_DIR/hadoop-aws.jar
if [ -f $path ]; then
chmod 0644 $path
# NOTE: In the case of Vanilla, the user may have disabled the Spark
# element. So, check for the existence of the directory explicitly, but
# crucially do do not consider it an error if the folder does not exist.
if [ "$plugin_type" != "vanilla" ]; then
echo "Something went wrong: couldn't find Spark installation."
exit 1
fi
fi

View File

@ -17,7 +17,7 @@ swift_url
DIB_HDFS_LIB_DIR
:Required: No
:Default: /usr/share/hadoop/lib
:Description: Directory in the guest where to save the swift jar. Shared with s3_hadoop.
:Description: Directory in the guest where to save the swift jar.
DIB_HADOOP_SWIFT_JAR_NAME
:Required: No