summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeremy Freudberg <jeremyfreudberg@gmail.com>2017-07-28 12:17:58 +0000
committerJeremy Freudberg <jeremyfreudberg@gmail.com>2017-07-28 14:01:01 +0000
commita77a9a978a655044a0b58a299df965c89391090d (patch)
treed4ec7c2b0abb8066442e1064b6590b59d5de3f72
parent5c64e8e34581619fccbd46a8a76d3e7545d4f01b (diff)
Add S3 jar to Hadoop classpath
As prereq of support for S3 datasource, the hadoop-aws jar needs to be in the Hadoop classpath. The jar is copied into the proper folder when possible on the appropriate plugins, and otherwise can be provided from a download URL by the user. Additionally, set the correct value of DIB_HDFS_LIB_DIR on the Vanilla plugin to avoid any unnecessary simlinking. Partially-Implements: bp sahara-support-s3 Change-Id: I94c5b0055b87f6a4e1382118d0718e588fccfe87
Notes
Notes (review): Code-Review+2: Telles Mota Vidal Nóbrega <tenobreg@redhat.com> Workflow+1: Telles Mota Vidal Nóbrega <tenobreg@redhat.com> Verified+2: Jenkins Submitted-by: Jenkins Submitted-at: Fri, 28 Jul 2017 17:49:18 +0000 Reviewed-on: https://review.openstack.org/488398 Project: openstack/sahara-image-elements Branch: refs/heads/master
-rwxr-xr-xdiskimage-create/diskimage-create.sh30
-rwxr-xr-xelements/hadoop/install.d/40-setup-hadoop5
-rw-r--r--elements/s3_hadoop/README.rst23
-rw-r--r--elements/s3_hadoop/element-deps1
-rw-r--r--elements/s3_hadoop/package-installs.yaml2
-rwxr-xr-xelements/s3_hadoop/post-install.d/89-add-amazon-jar36
-rw-r--r--elements/swift_hadoop/README.rst2
7 files changed, 82 insertions, 17 deletions
diff --git a/diskimage-create/diskimage-create.sh b/diskimage-create/diskimage-create.sh
index a0effdf..a29c26d 100755
--- a/diskimage-create/diskimage-create.sh
+++ b/diskimage-create/diskimage-create.sh
@@ -474,15 +474,15 @@ if [ -z "$PLUGIN" -o "$PLUGIN" = "vanilla" ]; then
474 export HIVE_VERSION=${HIVE_VERSION:-"0.11.0"} 474 export HIVE_VERSION=${HIVE_VERSION:-"0.11.0"}
475 export HADOOP_V2_7_1_NATIVE_LIBS_DOWNLOAD_URL=${HADOOP_V2_7_1_NATIVE_LIBS_DOWNLOAD_URL:-"https://tarballs.openstack.org/sahara/dist/common-artifacts/hadoop-native-libs-2.7.1.tar.gz"} 475 export HADOOP_V2_7_1_NATIVE_LIBS_DOWNLOAD_URL=${HADOOP_V2_7_1_NATIVE_LIBS_DOWNLOAD_URL:-"https://tarballs.openstack.org/sahara/dist/common-artifacts/hadoop-native-libs-2.7.1.tar.gz"}
476 export OOZIE_HADOOP_V2_7_1_DOWNLOAD_URL=${OOZIE_HADOOP_V2_7_1_FILE:-"http://sahara-files.mirantis.com/oozie-4.2.0-hadoop-2.7.1.tar.gz"} 476 export OOZIE_HADOOP_V2_7_1_DOWNLOAD_URL=${OOZIE_HADOOP_V2_7_1_FILE:-"http://sahara-files.mirantis.com/oozie-4.2.0-hadoop-2.7.1.tar.gz"}
477 export DIB_HDFS_LIB_DIR="/opt/hadoop/share/hadoop/tools/lib" 477 export DIB_HDFS_LIB_DIR="/opt/hadoop/share/hadoop/common/lib"
478 export plugin_type="vanilla" 478 export plugin_type="vanilla"
479 export DIB_SPARK_VERSION=1.6.0 479 export DIB_SPARK_VERSION=1.6.0
480 export SPARK_HADOOP_DL=hadoop2.6 480 export SPARK_HADOOP_DL=hadoop2.6
481 481
482 ubuntu_elements_sequence="hadoop oozie mysql hive $JAVA_ELEMENT swift_hadoop spark" 482 ubuntu_elements_sequence="hadoop oozie mysql hive $JAVA_ELEMENT swift_hadoop spark s3_hadoop"
483 fedora_elements_sequence="hadoop oozie mysql disable-firewall hive $JAVA_ELEMENT swift_hadoop spark" 483 fedora_elements_sequence="hadoop oozie mysql disable-firewall hive $JAVA_ELEMENT swift_hadoop spark s3_hadoop"
484 centos_elements_sequence="hadoop oozie mysql disable-firewall hive $JAVA_ELEMENT swift_hadoop spark nc" 484 centos_elements_sequence="hadoop oozie mysql disable-firewall hive $JAVA_ELEMENT swift_hadoop spark nc s3_hadoop"
485 centos7_elements_sequence="hadoop oozie mysql disable-firewall hive $JAVA_ELEMENT swift_hadoop spark nc" 485 centos7_elements_sequence="hadoop oozie mysql disable-firewall hive $JAVA_ELEMENT swift_hadoop spark nc s3_hadoop"
486 486
487 # Workaround for https://bugs.launchpad.net/diskimage-builder/+bug/1204824 487 # Workaround for https://bugs.launchpad.net/diskimage-builder/+bug/1204824
488 # https://bugs.launchpad.net/sahara/+bug/1252684 488 # https://bugs.launchpad.net/sahara/+bug/1252684
@@ -557,7 +557,7 @@ if [ -z "$PLUGIN" -o "$PLUGIN" = "spark" ]; then
557 # Tell the cloudera element to install only hdfs 557 # Tell the cloudera element to install only hdfs
558 export DIB_CDH_HDFS_ONLY=1 558 export DIB_CDH_HDFS_ONLY=1
559 559
560 ubuntu_elements_sequence="$JAVA_ELEMENT swift_hadoop spark hadoop-cloudera" 560 ubuntu_elements_sequence="$JAVA_ELEMENT swift_hadoop spark hadoop-cloudera s3_hadoop"
561 export ubuntu_image_name=${ubuntu_spark_image_name:-"ubuntu_sahara_spark_latest"} 561 export ubuntu_image_name=${ubuntu_spark_image_name:-"ubuntu_sahara_spark_latest"}
562 562
563 # Creating Ubuntu cloud image 563 # Creating Ubuntu cloud image
@@ -605,19 +605,19 @@ if [ -z "$PLUGIN" -o "$PLUGIN" = "ambari" ]; then
605 605
606 if [ -z "$BASE_IMAGE_OS" -o "$BASE_IMAGE_OS" = "ubuntu" ]; then 606 if [ -z "$BASE_IMAGE_OS" -o "$BASE_IMAGE_OS" = "ubuntu" ]; then
607 ambari_ubuntu_image_name=${ambari_ubuntu_image_name:-ubuntu_sahara_ambari} 607 ambari_ubuntu_image_name=${ambari_ubuntu_image_name:-ubuntu_sahara_ambari}
608 ambari_element_sequence="ambari $JAVA_ELEMENT swift_hadoop kdc" 608 ambari_element_sequence="ambari $JAVA_ELEMENT swift_hadoop kdc s3_hadoop"
609 export DIB_RELEASE="trusty" 609 export DIB_RELEASE="trusty"
610 image_create ubuntu $ambari_ubuntu_image_name $ambari_element_sequence 610 image_create ubuntu $ambari_ubuntu_image_name $ambari_element_sequence
611 unset DIB_RELEASE 611 unset DIB_RELEASE
612 fi 612 fi
613 if [ "$BASE_IMAGE_OS" = "centos" ]; then 613 if [ "$BASE_IMAGE_OS" = "centos" ]; then
614 ambari_centos_image_name=${ambari_centos_image_name:-centos_sahara_ambari} 614 ambari_centos_image_name=${ambari_centos_image_name:-centos_sahara_ambari}
615 ambari_element_sequence="ambari $JAVA_ELEMENT disable-firewall swift_hadoop kdc nc" 615 ambari_element_sequence="ambari $JAVA_ELEMENT disable-firewall swift_hadoop kdc nc s3_hadoop"
616 image_create centos $ambari_centos_image_name $ambari_element_sequence 616 image_create centos $ambari_centos_image_name $ambari_element_sequence
617 fi 617 fi
618 if [ -z "$BASE_IMAGE_OS" -o "$BASE_IMAGE_OS" = "centos7" ]; then 618 if [ -z "$BASE_IMAGE_OS" -o "$BASE_IMAGE_OS" = "centos7" ]; then
619 ambari_centos7_image_name=${ambari_centos7_image_name:-"centos7-sahara-ambari"} 619 ambari_centos7_image_name=${ambari_centos7_image_name:-"centos7-sahara-ambari"}
620 ambari_element_sequence="disable-selinux ambari $JAVA_ELEMENT disable-firewall swift_hadoop kdc nc" 620 ambari_element_sequence="disable-selinux ambari $JAVA_ELEMENT disable-firewall swift_hadoop kdc nc s3_hadoop"
621 image_create centos7 $ambari_centos7_image_name $ambari_element_sequence 621 image_create centos7 $ambari_centos7_image_name $ambari_element_sequence
622 fi 622 fi
623 623
@@ -643,7 +643,7 @@ if [ -z "$PLUGIN" -o "$PLUGIN" = "cloudera" ]; then
643 HADOOP_VERSION=${DIB_CDH_MINOR_VERSION%.*} 643 HADOOP_VERSION=${DIB_CDH_MINOR_VERSION%.*}
644 fi 644 fi
645 645
646 cloudera_elements_sequence="hadoop-cloudera swift_hadoop kdc" 646 cloudera_elements_sequence="hadoop-cloudera swift_hadoop kdc s3_hadoop"
647 if [ -z "$BASE_IMAGE_OS" -o "$BASE_IMAGE_OS" = "ubuntu" ]; then 647 if [ -z "$BASE_IMAGE_OS" -o "$BASE_IMAGE_OS" = "ubuntu" ]; then
648 if [ -z "$HADOOP_VERSION" -o "$HADOOP_VERSION" = "5.5" ]; then 648 if [ -z "$HADOOP_VERSION" -o "$HADOOP_VERSION" = "5.5" ]; then
649 export DIB_CDH_VERSION="5.5" 649 export DIB_CDH_VERSION="5.5"
@@ -684,7 +684,7 @@ if [ -z "$PLUGIN" -o "$PLUGIN" = "cloudera" ]; then
684 fi 684 fi
685 685
686 if [ "$BASE_IMAGE_OS" = "centos" ]; then 686 if [ "$BASE_IMAGE_OS" = "centos" ]; then
687 centos_cloudera_elements_sequence="selinux-permissive disable-firewall nc" 687 centos_cloudera_elements_sequence="selinux-permissive disable-firewall nc s3_hadoop"
688 if [ -z "$HADOOP_VERSION" -o "$HADOOP_VERSION" = "5.5" ]; then 688 if [ -z "$HADOOP_VERSION" -o "$HADOOP_VERSION" = "5.5" ]; then
689 export DIB_CDH_VERSION="5.5" 689 export DIB_CDH_VERSION="5.5"
690 690
@@ -696,7 +696,7 @@ if [ -z "$PLUGIN" -o "$PLUGIN" = "cloudera" ]; then
696 fi 696 fi
697 697
698 if [ -z "$BASE_IMAGE_OS" -o "$BASE_IMAGE_OS" = "centos7" ]; then 698 if [ -z "$BASE_IMAGE_OS" -o "$BASE_IMAGE_OS" = "centos7" ]; then
699 centos7_cloudera_elements_sequence="selinux-permissive disable-firewall nc" 699 centos7_cloudera_elements_sequence="selinux-permissive disable-firewall nc s3_hadoop"
700 if [ -z "$HADOOP_VERSION" -o "$HADOOP_VERSION" = "5.5" ]; then 700 if [ -z "$HADOOP_VERSION" -o "$HADOOP_VERSION" = "5.5" ]; then
701 export DIB_CDH_VERSION="5.5" 701 export DIB_CDH_VERSION="5.5"
702 702
@@ -745,6 +745,7 @@ fi
745########################## 745##########################
746if [ -z "$PLUGIN" -o "$PLUGIN" = "mapr" ]; then 746if [ -z "$PLUGIN" -o "$PLUGIN" = "mapr" ]; then
747 export DIB_MAPR_VERSION=${DIB_MAPR_VERSION:-${DIB_DEFAULT_MAPR_VERSION}} 747 export DIB_MAPR_VERSION=${DIB_MAPR_VERSION:-${DIB_DEFAULT_MAPR_VERSION}}
748 export plugin_type="mapr"
748 749
749 export DIB_CLOUD_INIT_DATASOURCES=$CLOUD_INIT_DATASOURCES 750 export DIB_CLOUD_INIT_DATASOURCES=$CLOUD_INIT_DATASOURCES
750 751
@@ -752,8 +753,8 @@ if [ -z "$PLUGIN" -o "$PLUGIN" = "mapr" ]; then
752 #MapR repository requires additional space 753 #MapR repository requires additional space
753 export DIB_MIN_TMPFS=10 754 export DIB_MIN_TMPFS=10
754 755
755 mapr_ubuntu_elements_sequence="ssh hadoop-mapr $JAVA_ELEMENT" 756 mapr_ubuntu_elements_sequence="ssh hadoop-mapr $JAVA_ELEMENT s3_hadoop"
756 mapr_centos_elements_sequence="ssh hadoop-mapr selinux-permissive $JAVA_ELEMENT disable-firewall nc" 757 mapr_centos_elements_sequence="ssh hadoop-mapr selinux-permissive $JAVA_ELEMENT disable-firewall nc s3_hadoop"
757 758
758 if [ -z "$BASE_IMAGE_OS" -o "$BASE_IMAGE_OS" = "ubuntu" ]; then 759 if [ -z "$BASE_IMAGE_OS" -o "$BASE_IMAGE_OS" = "ubuntu" ]; then
759 export DIB_RELEASE=${DIB_RELEASE:-trusty} 760 export DIB_RELEASE=${DIB_RELEASE:-trusty}
@@ -780,6 +781,7 @@ if [ -z "$PLUGIN" -o "$PLUGIN" = "mapr" ]; then
780 781
781 unset DIB_CLOUD_INIT_DATASOURCES 782 unset DIB_CLOUD_INIT_DATASOURCES
782 fi 783 fi
784 unset plugin_type
783 785
784fi 786fi
785 787
diff --git a/elements/hadoop/install.d/40-setup-hadoop b/elements/hadoop/install.d/40-setup-hadoop
index c0d28dd..7dd995e 100755
--- a/elements/hadoop/install.d/40-setup-hadoop
+++ b/elements/hadoop/install.d/40-setup-hadoop
@@ -61,8 +61,9 @@ EOF
61 $HADOOP_HOME/etc/hadoop/yarn-env.sh 61 $HADOOP_HOME/etc/hadoop/yarn-env.sh
62 echo "source $JAVA_RC" >> $HADOOP_HOME/etc/hadoop/yarn-env.sh 62 echo "source $JAVA_RC" >> $HADOOP_HOME/etc/hadoop/yarn-env.sh
63 63
64 # enable swiftfs 64 # remove apache-built swiftfs
65 ln -s ${HADOOP_HOME}/share/hadoop/tools/lib/hadoop-openstack-${DIB_HADOOP_VERSION}.jar ${HADOOP_HOME}/share/hadoop/common/lib/ 65 rm ${HADOOP_HOME}/share/hadoop/tools/lib/hadoop-openstack-${DIB_HADOOP_VERSION}.jar
66
66} 67}
67 68
68case "$DISTRO_NAME" in 69case "$DISTRO_NAME" in
diff --git a/elements/s3_hadoop/README.rst b/elements/s3_hadoop/README.rst
new file mode 100644
index 0000000..3118dc6
--- /dev/null
+++ b/elements/s3_hadoop/README.rst
@@ -0,0 +1,23 @@
1=========
2s3_hadoop
3=========
4
5Copy the Hadoop S3 connector jar file into the Hadoop classpath.
6
7Environment Variables
8---------------------
9
10HADOOP_S3_JAR_ORIGIN
11 :Required: No
12 :Default: Depends on plugin.
13 :Description: Path to where the S3 jar is (already) located.
14
15HADOOP_S3_JAR_DOWNLOAD
16 :Required: No
17 :Default: None.
18 :Description: If set, use a download a specific S3 jar instead of one already available on the image.
19
20DIB_HDFS_LIB_DIR
21 :Required: No
22 :Default: /usr/share/hadoop/lib
23 :Description: Directory in the guest where to save the S3 jar. Shared with swift_hadoop.
diff --git a/elements/s3_hadoop/element-deps b/elements/s3_hadoop/element-deps
new file mode 100644
index 0000000..7076aba
--- /dev/null
+++ b/elements/s3_hadoop/element-deps
@@ -0,0 +1 @@
package-installs
diff --git a/elements/s3_hadoop/package-installs.yaml b/elements/s3_hadoop/package-installs.yaml
new file mode 100644
index 0000000..cc77790
--- /dev/null
+++ b/elements/s3_hadoop/package-installs.yaml
@@ -0,0 +1,2 @@
1wget:
2 phase: post-install.d
diff --git a/elements/s3_hadoop/post-install.d/89-add-amazon-jar b/elements/s3_hadoop/post-install.d/89-add-amazon-jar
new file mode 100755
index 0000000..30e2c37
--- /dev/null
+++ b/elements/s3_hadoop/post-install.d/89-add-amazon-jar
@@ -0,0 +1,36 @@
1#!/bin/bash
2if [ "${DIB_DEBUG_TRACE:-0}" -gt 0 ]; then
3 set -x
4fi
5set -eu
6set -o pipefail
7
8if [ -z "${HADOOP_S3_JAR_ORIGIN:-}" ]; then
9 # The jar is not locally available during image-gen on Ambari/MapR plugins: relevant packages are installed later.
10 # The jar is not appropriate for the Storm plugin: you cannot stream data from an object store.
11 # For plugins not found in the switch statement below, a user-specified jar can still be downloaded.
12 case "$plugin_type" in
13 "vanilla")
14 HADOOP_S3_JAR_ORIGIN="/opt/hadoop/share/hadoop/tools/lib/hadoop-aws-$DIB_HADOOP_VERSION.jar"
15 ;;
16 "cloudera" | "spark" )
17 HADOOP_S3_JAR_ORIGIN="/usr/lib/hadoop/hadoop-aws.jar"
18 ;;
19 esac
20fi
21
22HDFS_LIB_DIR=${DIB_HDFS_LIB_DIR:-"/usr/share/hadoop/lib"} # matches swift_hadoop default
23
24if [ -z "${HADOOP_S3_JAR_DOWNLOAD:-}" ]; then
25 if [ "${HADOOP_S3_JAR_ORIGIN:-}" ]; then
26 cp $HADOOP_S3_JAR_ORIGIN $HDFS_LIB_DIR/hadoop-aws.jar
27 fi
28else
29 wget -O $HDFS_LIB_DIR/hadoop-aws.jar $HADOOP_S3_JAR_DOWNLOAD
30fi
31
32path=$HDFS_LIB_DIR/hadoop-aws.jar
33
34if [ -f $path ]; then
35 chmod 0644 $path
36fi
diff --git a/elements/swift_hadoop/README.rst b/elements/swift_hadoop/README.rst
index e447414..dc8b75d 100644
--- a/elements/swift_hadoop/README.rst
+++ b/elements/swift_hadoop/README.rst
@@ -17,7 +17,7 @@ swift_url
17DIB_HDFS_LIB_DIR 17DIB_HDFS_LIB_DIR
18 :Required: No 18 :Required: No
19 :Default: /usr/share/hadoop/lib 19 :Default: /usr/share/hadoop/lib
20 :Description: Directory in the guest where to save the swift jar. 20 :Description: Directory in the guest where to save the swift jar. Shared with s3_hadoop.
21 21
22DIB_HADOOP_SWIFT_JAR_NAME 22DIB_HADOOP_SWIFT_JAR_NAME
23 :Required: No 23 :Required: No