diff --git a/diskimage-create/diskimage-create.sh b/diskimage-create/diskimage-create.sh index 01be5cb5..67817065 100755 --- a/diskimage-create/diskimage-create.sh +++ b/diskimage-create/diskimage-create.sh @@ -482,7 +482,7 @@ if [ -z "$PLUGIN" -o "$PLUGIN" = "vanilla" ]; then export HIVE_VERSION=${HIVE_VERSION:-"0.11.0"} export HADOOP_V2_7_1_NATIVE_LIBS_DOWNLOAD_URL=${HADOOP_V2_7_1_NATIVE_LIBS_DOWNLOAD_URL:-"https://tarballs.openstack.org/sahara/dist/common-artifacts/hadoop-native-libs-2.7.1.tar.gz"} export OOZIE_HADOOP_V2_7_1_DOWNLOAD_URL=${OOZIE_HADOOP_V2_7_1_FILE:-"http://sahara-files.mirantis.com/oozie-4.2.0-hadoop-2.7.1.tar.gz"} - export DIB_HDFS_LIB_DIR="/opt/hadoop/share/hadoop/common/lib" + export DIB_HDFS_LIB_DIR="/opt/hadoop/share/hadoop/tools/lib" export plugin_type="vanilla" if [ "$DIB_SPARK_VERSION" = "1.3.1" ]; then @@ -624,19 +624,19 @@ if [ -z "$PLUGIN" -o "$PLUGIN" = "ambari" ]; then if [ -z "$BASE_IMAGE_OS" -o "$BASE_IMAGE_OS" = "ubuntu" ]; then ambari_ubuntu_image_name=${ambari_ubuntu_image_name:-ubuntu_sahara_ambari} - ambari_element_sequence="ambari $JAVA_ELEMENT swift_hadoop kdc s3_hadoop" + ambari_element_sequence="ambari $JAVA_ELEMENT swift_hadoop kdc" export DIB_RELEASE="trusty" image_create ubuntu $ambari_ubuntu_image_name $ambari_element_sequence unset DIB_RELEASE fi if [ "$BASE_IMAGE_OS" = "centos" ]; then ambari_centos_image_name=${ambari_centos_image_name:-centos_sahara_ambari} - ambari_element_sequence="ambari $JAVA_ELEMENT disable-firewall swift_hadoop kdc nc s3_hadoop" + ambari_element_sequence="ambari $JAVA_ELEMENT disable-firewall swift_hadoop kdc nc" image_create centos $ambari_centos_image_name $ambari_element_sequence fi if [ -z "$BASE_IMAGE_OS" -o "$BASE_IMAGE_OS" = "centos7" ]; then ambari_centos7_image_name=${ambari_centos7_image_name:-"centos7-sahara-ambari"} - ambari_element_sequence="disable-selinux ambari $JAVA_ELEMENT disable-firewall swift_hadoop kdc nc s3_hadoop" + ambari_element_sequence="disable-selinux ambari $JAVA_ELEMENT disable-firewall swift_hadoop kdc nc" image_create centos7 $ambari_centos7_image_name $ambari_element_sequence fi @@ -662,7 +662,7 @@ if [ -z "$PLUGIN" -o "$PLUGIN" = "cloudera" ]; then HADOOP_VERSION=${DIB_CDH_MINOR_VERSION%.*} fi - cloudera_elements_sequence="hadoop-cloudera swift_hadoop kdc s3_hadoop" + cloudera_elements_sequence="hadoop-cloudera swift_hadoop kdc" if [ -z "$BASE_IMAGE_OS" -o "$BASE_IMAGE_OS" = "ubuntu" ]; then if [ -z "$HADOOP_VERSION" -o "$HADOOP_VERSION" = "5.5" ]; then export DIB_CDH_VERSION="5.5" @@ -703,7 +703,7 @@ if [ -z "$PLUGIN" -o "$PLUGIN" = "cloudera" ]; then fi if [ "$BASE_IMAGE_OS" = "centos" ]; then - centos_cloudera_elements_sequence="selinux-permissive disable-firewall nc s3_hadoop" + centos_cloudera_elements_sequence="selinux-permissive disable-firewall nc" if [ -z "$HADOOP_VERSION" -o "$HADOOP_VERSION" = "5.5" ]; then export DIB_CDH_VERSION="5.5" @@ -715,7 +715,7 @@ if [ -z "$PLUGIN" -o "$PLUGIN" = "cloudera" ]; then fi if [ -z "$BASE_IMAGE_OS" -o "$BASE_IMAGE_OS" = "centos7" ]; then - centos7_cloudera_elements_sequence="selinux-permissive disable-firewall nc s3_hadoop" + centos7_cloudera_elements_sequence="selinux-permissive disable-firewall nc" if [ -z "$HADOOP_VERSION" -o "$HADOOP_VERSION" = "5.5" ]; then export DIB_CDH_VERSION="5.5" @@ -772,8 +772,8 @@ if [ -z "$PLUGIN" -o "$PLUGIN" = "mapr" ]; then #MapR repository requires additional space export DIB_MIN_TMPFS=10 - mapr_ubuntu_elements_sequence="ssh hadoop-mapr $JAVA_ELEMENT s3_hadoop" - mapr_centos_elements_sequence="ssh hadoop-mapr selinux-permissive $JAVA_ELEMENT disable-firewall nc s3_hadoop" + mapr_ubuntu_elements_sequence="ssh hadoop-mapr $JAVA_ELEMENT" + mapr_centos_elements_sequence="ssh hadoop-mapr selinux-permissive $JAVA_ELEMENT disable-firewall nc" if [ -z "$BASE_IMAGE_OS" -o "$BASE_IMAGE_OS" = "ubuntu" ]; then export DIB_RELEASE=${DIB_RELEASE:-trusty} diff --git a/elements/hadoop/install.d/40-setup-hadoop b/elements/hadoop/install.d/40-setup-hadoop index 7dd995e0..56e109b9 100755 --- a/elements/hadoop/install.d/40-setup-hadoop +++ b/elements/hadoop/install.d/40-setup-hadoop @@ -61,8 +61,11 @@ EOF $HADOOP_HOME/etc/hadoop/yarn-env.sh echo "source $JAVA_RC" >> $HADOOP_HOME/etc/hadoop/yarn-env.sh - # remove apache-built swiftfs - rm ${HADOOP_HOME}/share/hadoop/tools/lib/hadoop-openstack-${DIB_HADOOP_VERSION}.jar + # enable swiftfs + # Sahara expects the jar to be in both folders, even though only one is + # the "official" classpath. Until images and Sahara become more strictly + # coupled we must maintain this legacy behavior. + ln -s ${HADOOP_HOME}/share/hadoop/tools/lib/hadoop-openstack-${DIB_HADOOP_VERSION}.jar ${HADOOP_HOME}/share/hadoop/common/lib/ } diff --git a/elements/s3_hadoop/README.rst b/elements/s3_hadoop/README.rst index 3118dc62..4280b160 100644 --- a/elements/s3_hadoop/README.rst +++ b/elements/s3_hadoop/README.rst @@ -2,22 +2,9 @@ s3_hadoop ========= -Copy the Hadoop S3 connector jar file into the Hadoop classpath. +Copy the Hadoop S3 connector libraries into the Hadoop and Spark classpaths. Environment Variables --------------------- -HADOOP_S3_JAR_ORIGIN - :Required: No - :Default: Depends on plugin. - :Description: Path to where the S3 jar is (already) located. - -HADOOP_S3_JAR_DOWNLOAD - :Required: No - :Default: None. - :Description: If set, use a download a specific S3 jar instead of one already available on the image. - -DIB_HDFS_LIB_DIR - :Required: No - :Default: /usr/share/hadoop/lib - :Description: Directory in the guest where to save the S3 jar. Shared with swift_hadoop. +None. diff --git a/elements/s3_hadoop/post-install.d/89-add-amazon-jar b/elements/s3_hadoop/post-install.d/89-add-amazon-jar index 30e2c376..70a10eac 100755 --- a/elements/s3_hadoop/post-install.d/89-add-amazon-jar +++ b/elements/s3_hadoop/post-install.d/89-add-amazon-jar @@ -5,32 +5,56 @@ fi set -eu set -o pipefail -if [ -z "${HADOOP_S3_JAR_ORIGIN:-}" ]; then - # The jar is not locally available during image-gen on Ambari/MapR plugins: relevant packages are installed later. - # The jar is not appropriate for the Storm plugin: you cannot stream data from an object store. - # For plugins not found in the switch statement below, a user-specified jar can still be downloaded. - case "$plugin_type" in - "vanilla") - HADOOP_S3_JAR_ORIGIN="/opt/hadoop/share/hadoop/tools/lib/hadoop-aws-$DIB_HADOOP_VERSION.jar" - ;; - "cloudera" | "spark" ) - HADOOP_S3_JAR_ORIGIN="/usr/lib/hadoop/hadoop-aws.jar" - ;; - esac -fi -HDFS_LIB_DIR=${DIB_HDFS_LIB_DIR:-"/usr/share/hadoop/lib"} # matches swift_hadoop default +case "$plugin_type" in + "vanilla" ) + HADOOP_TOOLS_DIR_PATH="/opt/hadoop/share/hadoop/tools/lib" + HADOOP_ENV_SH_PATH="/opt/hadoop/etc/hadoop/hadoop-env.sh" + SPARK_JARS_DIR_PATH="/opt/spark/jars" + ;; + "spark" ) + HADOOP_TOOLS_DIR_PATH="/usr/lib/hadoop/client" + SPARK_JARS_DIR_PATH="/opt/spark/jars" + ;; + "cloudera" ) + echo -n "The s3_hadoop element is not supported on CDH," + echo " because the relevant libraries are already in the right place." + exit 1 + ;; + *) + echo "The s3_hadoop element is only supported on Vanilla and Spark." + exit 1 +esac -if [ -z "${HADOOP_S3_JAR_DOWNLOAD:-}" ]; then - if [ "${HADOOP_S3_JAR_ORIGIN:-}" ]; then - cp $HADOOP_S3_JAR_ORIGIN $HDFS_LIB_DIR/hadoop-aws.jar +# NOTE: By defintion, the Spark standalone plugin does not contain Hadoop in +# its entirety. Therefore, there are no Hadoop-specific environment settings +# available for modification. +if [ "$plugin_type" != "spark" ]; then + if [ -f "$HADOOP_ENV_SH_PATH" ]; then + cat >> $HADOOP_ENV_SH_PATH <