summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZuul <zuul@review.openstack.org>2018-08-06 18:03:33 +0000
committerGerrit Code Review <review@openstack.org>2018-08-06 18:03:33 +0000
commitee7964f83049862791f72889b15604e47083b438 (patch)
tree38d2bd3c531a419955ad6cb5075dbdcab8a1f3e3
parent8659169c84c9a2198d5aee9e94c6d145a1f8d93c (diff)
parenta37dfac06b5a6a474e8756c6567f832a38a233c6 (diff)
Merge "S3 data source URL format change"
-rw-r--r--doc/source/user/edp.rst10
-rw-r--r--releasenotes/notes/s3-datasource-protocol-d3abd0b22f653b3b.yaml4
-rw-r--r--sahara/service/edp/data_sources/s3/implementation.py8
-rw-r--r--sahara/tests/unit/service/edp/data_sources/s3/test_s3_type.py3
4 files changed, 18 insertions, 7 deletions
diff --git a/doc/source/user/edp.rst b/doc/source/user/edp.rst
index 1052654..618d1d8 100644
--- a/doc/source/user/edp.rst
+++ b/doc/source/user/edp.rst
@@ -135,7 +135,7 @@ share will be automatically mounted to your cluster's nodes as needed to
135access the data source. 135access the data source.
136 136
137Finally, Sahara supports data sources referring to S3-like object stores. The 137Finally, Sahara supports data sources referring to S3-like object stores. The
138URL should be of the form ``s3a://{bucket}/{path}``. Also, the following 138URL should be of the form ``s3://{bucket}/{path}``. Also, the following
139credentials/configs are understood: ``accesskey``, ``secretkey``, 139credentials/configs are understood: ``accesskey``, ``secretkey``,
140``endpoint``, ``bucket_in_path``, and ``ssl``. These credentials are specified 140``endpoint``, ``bucket_in_path``, and ``ssl``. These credentials are specified
141through the ``credentials`` attribute of the body of the request when creating 141through the ``credentials`` attribute of the body of the request when creating
@@ -632,13 +632,13 @@ Manila NFS filesystem reference URLS take the form:
632This format should be used when referring to a job binary or a data source 632This format should be used when referring to a job binary or a data source
633stored in a manila NFS share. 633stored in a manila NFS share.
634 634
635For job binaries only, S3 urls take the form: 635For both job binaries and data sources, S3 urls take the form:
636 636
637``s3://bucket/path/to/object`` 637``s3://bucket/path/to/object``
638 638
639For data sources, S3 urls take the standard Hadoop form: 639Despite the above URL format, the current implementation of EDP will still
640 640use the Hadoop ``s3a`` driver to access data sources. Botocore is used to
641``s3a://bucket/path/to/object`` 641access job binaries.
642 642
643EDP Requirements 643EDP Requirements
644================ 644================
diff --git a/releasenotes/notes/s3-datasource-protocol-d3abd0b22f653b3b.yaml b/releasenotes/notes/s3-datasource-protocol-d3abd0b22f653b3b.yaml
new file mode 100644
index 0000000..86baaae
--- /dev/null
+++ b/releasenotes/notes/s3-datasource-protocol-d3abd0b22f653b3b.yaml
@@ -0,0 +1,4 @@
1---
2other:
3 - |
4 The URL of an S3 data source may have `s3://` or `s3a://`, equivalently.
diff --git a/sahara/service/edp/data_sources/s3/implementation.py b/sahara/service/edp/data_sources/s3/implementation.py
index 2a74000..f65242a 100644
--- a/sahara/service/edp/data_sources/s3/implementation.py
+++ b/sahara/service/edp/data_sources/s3/implementation.py
@@ -55,8 +55,9 @@ class S3Type(DataSourceType):
55 raise ex.InvalidDataException(_("S3 url must not be empty")) 55 raise ex.InvalidDataException(_("S3 url must not be empty"))
56 56
57 url = urlparse.urlparse(url) 57 url = urlparse.urlparse(url)
58 if url.scheme != "s3a": 58 if url.scheme not in ["s3", "s3a"]:
59 raise ex.InvalidDataException(_("URL scheme must be 's3a'")) 59 raise ex.InvalidDataException(
60 _("URL scheme must be 's3' or 's3a'"))
60 61
61 if not url.hostname: 62 if not url.hostname:
62 raise ex.InvalidDataException(_("Bucket name must be present")) 63 raise ex.InvalidDataException(_("Bucket name must be present"))
@@ -80,3 +81,6 @@ class S3Type(DataSourceType):
80 if job_conf.get(s3a_cfg_name, None) is None: # no overwrite 81 if job_conf.get(s3a_cfg_name, None) is None: # no overwrite
81 if creds.get(config_name, None) is not None: 82 if creds.get(config_name, None) is not None:
82 job_conf[s3a_cfg_name] = creds[config_name] 83 job_conf[s3a_cfg_name] = creds[config_name]
84
85 def get_runtime_url(self, url, cluster):
86 return url.replace("s3://", "s3a://", 1)
diff --git a/sahara/tests/unit/service/edp/data_sources/s3/test_s3_type.py b/sahara/tests/unit/service/edp/data_sources/s3/test_s3_type.py
index 2da7a12..e396049 100644
--- a/sahara/tests/unit/service/edp/data_sources/s3/test_s3_type.py
+++ b/sahara/tests/unit/service/edp/data_sources/s3/test_s3_type.py
@@ -35,6 +35,9 @@ class TestSwiftType(base.SaharaTestCase):
35 } 35 }
36 self.s_type.validate(data) 36 self.s_type.validate(data)
37 37
38 data["url"] = "s3://mybucket/myobject"
39 self.s_type.validate(data)
40
38 creds = {} 41 creds = {}
39 data["credentials"] = creds 42 data["credentials"] = creds
40 self.s_type.validate(data) 43 self.s_type.validate(data)