From 69313a69bfcd8fe13afc323bdc6944f705de7734 Mon Sep 17 00:00:00 2001 From: "Florent Vennetier (OpenIO)" Date: Tue, 14 Mar 2017 14:33:47 +0100 Subject: [PATCH] Implement "GET Bucket (List Objects) Version 2" Before this commit, V2 listing parameters ('start-after', 'continuation-token' and 'fetch-owner') were just ignored, making some S3 clients return errors, or handle paging badly. V2 listing is selected by passing 'list-type=2' in query string. 'marker' is replaced by either 'start-after' or 'continuation-token'. This commit wraps 'start-after' and 'continuation-token' in 'marker', which is passed to swift. 'NextContinuationToken' is a base64 encoding of the last returned object, so it is opaque to the client. Change-Id: I23bf83cb8bbaf4c4935bf6b56791051c032c688c --- doc/rnc/list_bucket_result.rnc | 13 ++- swift3/controllers/bucket.py | 56 ++++++++-- swift3/schema/list_bucket_result.rng | 40 +++++-- swift3/test/functional/test_bucket.py | 115 +++++++++++++++++++ swift3/test/unit/test_bucket.py | 152 ++++++++++++++++++++++++++ 5 files changed, 354 insertions(+), 22 deletions(-) diff --git a/doc/rnc/list_bucket_result.rnc b/doc/rnc/list_bucket_result.rnc index 1dac2ac8..e7f572b7 100644 --- a/doc/rnc/list_bucket_result.rnc +++ b/doc/rnc/list_bucket_result.rnc @@ -4,8 +4,17 @@ start = element ListBucketResult { element Name { xsd:string }, element Prefix { xsd:string }, - element Marker { xsd:string }, - element NextMarker { xsd:string }?, + ( + ( + element Marker { xsd:string }, + element NextMarker { xsd:string }? + ) | ( + element NextContinuationToken { xsd:string }?, + element ContinuationToken { xsd:string }?, + element StartAfter { xsd:string }?, + element KeyCount { xsd:int } + ) + ), element MaxKeys { xsd:int }, element EncodingType { xsd:string }?, element Delimiter { xsd:string }?, diff --git a/swift3/controllers/bucket.py b/swift3/controllers/bucket.py index ed28bfc0..ab8f457d 100644 --- a/swift3/controllers/bucket.py +++ b/swift3/controllers/bucket.py @@ -14,9 +14,11 @@ # limitations under the License. import sys +from base64 import standard_b64encode as b64encode +from base64 import standard_b64decode as b64decode from swift.common.http import HTTP_OK -from swift.common.utils import json, public +from swift.common.utils import json, public, config_true_value from swift3.controllers.base import Controller from swift3.etree import Element, SubElement, tostring, fromstring, \ @@ -115,6 +117,19 @@ class BucketController(Controller): if 'delimiter' in req.params: query.update({'delimiter': req.params['delimiter']}) + # GET Bucket (List Objects) Version 2 parameters + is_v2 = int(req.params.get('list-type', '1')) == 2 + fetch_owner = False + if is_v2: + if 'start-after' in req.params: + query.update({'marker': req.params['start-after']}) + # continuation-token overrides start-after + if 'continuation-token' in req.params: + decoded = b64decode(req.params['continuation-token']) + query.update({'marker': decoded}) + if 'fetch-owner' in req.params: + fetch_owner = config_true_value(req.params['fetch-owner']) + resp = req.get_response(self.app, query=query) objects = json.loads(resp.body) @@ -122,20 +137,36 @@ class BucketController(Controller): elem = Element('ListBucketResult') SubElement(elem, 'Name').text = req.container_name SubElement(elem, 'Prefix').text = req.params.get('prefix') - SubElement(elem, 'Marker').text = req.params.get('marker') # in order to judge that truncated is valid, check whether # max_keys + 1 th element exists in swift. is_truncated = max_keys > 0 and len(objects) > max_keys objects = objects[:max_keys] - if is_truncated and 'delimiter' in req.params: - if 'name' in objects[-1]: - SubElement(elem, 'NextMarker').text = \ - objects[-1]['name'] - if 'subdir' in objects[-1]: - SubElement(elem, 'NextMarker').text = \ - objects[-1]['subdir'] + if not is_v2: + SubElement(elem, 'Marker').text = req.params.get('marker') + if is_truncated and 'delimiter' in req.params: + if 'name' in objects[-1]: + SubElement(elem, 'NextMarker').text = \ + objects[-1]['name'] + if 'subdir' in objects[-1]: + SubElement(elem, 'NextMarker').text = \ + objects[-1]['subdir'] + else: + if is_truncated: + if 'name' in objects[-1]: + SubElement(elem, 'NextContinuationToken').text = \ + b64encode(objects[-1]['name']) + if 'subdir' in objects[-1]: + SubElement(elem, 'NextContinuationToken').text = \ + b64encode(objects[-1]['subdir']) + if 'continuation-token' in req.params: + SubElement(elem, 'ContinuationToken').text = \ + req.params['continuation-token'] + if 'start-after' in req.params: + SubElement(elem, 'StartAfter').text = \ + req.params['start-after'] + SubElement(elem, 'KeyCount').text = str(len(objects)) SubElement(elem, 'MaxKeys').text = str(tag_max_keys) @@ -156,9 +187,10 @@ class BucketController(Controller): o['last_modified'][:-3] + 'Z' SubElement(contents, 'ETag').text = '"%s"' % o['hash'] SubElement(contents, 'Size').text = str(o['bytes']) - owner = SubElement(contents, 'Owner') - SubElement(owner, 'ID').text = req.user_id - SubElement(owner, 'DisplayName').text = req.user_id + if fetch_owner or not is_v2: + owner = SubElement(contents, 'Owner') + SubElement(owner, 'ID').text = req.user_id + SubElement(owner, 'DisplayName').text = req.user_id SubElement(contents, 'StorageClass').text = 'STANDARD' for o in objects: diff --git a/swift3/schema/list_bucket_result.rng b/swift3/schema/list_bucket_result.rng index f463fe38..9c6640c6 100644 --- a/swift3/schema/list_bucket_result.rng +++ b/swift3/schema/list_bucket_result.rng @@ -9,14 +9,38 @@ - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/swift3/test/functional/test_bucket.py b/swift3/test/functional/test_bucket.py index c0828a31..8ebb82fd 100644 --- a/swift3/test/functional/test_bucket.py +++ b/swift3/test/functional/test_bucket.py @@ -283,6 +283,121 @@ class TestSwift3Bucket(Swift3FunctionalTestCase): self.assertTrue(o.find('Owner/DisplayName').text, self.conn.user_id) + def test_get_bucket_v2_with_start_after(self): + bucket = 'bucket' + put_objects = ('object', 'object2', 'subdir/object', 'subdir2/object', + 'dir/subdir/object') + self._prepare_test_get_bucket(bucket, put_objects) + + marker = 'object' + query = 'list-type=2&start-after=%s' % marker + expect_objects = ('object2', 'subdir/object', 'subdir2/object') + status, headers, body = \ + self.conn.make_request('GET', bucket, query=query) + self.assertEqual(status, 200) + elem = fromstring(body, 'ListBucketResult') + self.assertEqual(elem.find('StartAfter').text, marker) + resp_objects = elem.findall('./Contents') + self.assertEqual(len(list(resp_objects)), len(expect_objects)) + for i, o in enumerate(resp_objects): + self.assertEqual(o.find('Key').text, expect_objects[i]) + self.assertTrue(o.find('LastModified').text is not None) + self.assertRegexpMatches( + o.find('LastModified').text, + r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$') + self.assertTrue(o.find('ETag').text is not None) + self.assertTrue(o.find('Size').text is not None) + self.assertEqual(o.find('StorageClass').text, 'STANDARD') + self.assertIsNone(o.find('Owner/ID')) + self.assertIsNone(o.find('Owner/DisplayName')) + + def test_get_bucket_v2_with_fetch_owner(self): + bucket = 'bucket' + put_objects = ('object', 'object2', 'subdir/object', 'subdir2/object', + 'dir/subdir/object') + self._prepare_test_get_bucket(bucket, put_objects) + + query = 'list-type=2&fetch-owner=true' + expect_objects = ('dir/subdir/object', 'object', 'object2', + 'subdir/object', 'subdir2/object') + status, headers, body = \ + self.conn.make_request('GET', bucket, query=query) + self.assertEqual(status, 200) + elem = fromstring(body, 'ListBucketResult') + self.assertEqual(elem.find('KeyCount').text, '5') + resp_objects = elem.findall('./Contents') + self.assertEqual(len(list(resp_objects)), len(expect_objects)) + for i, o in enumerate(resp_objects): + self.assertEqual(o.find('Key').text, expect_objects[i]) + self.assertTrue(o.find('LastModified').text is not None) + self.assertRegexpMatches( + o.find('LastModified').text, + r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$') + self.assertTrue(o.find('ETag').text is not None) + self.assertTrue(o.find('Size').text is not None) + self.assertEqual(o.find('StorageClass').text, 'STANDARD') + self.assertTrue(o.find('Owner/ID').text, self.conn.user_id) + self.assertTrue(o.find('Owner/DisplayName').text, + self.conn.user_id) + + def test_get_bucket_v2_with_continuation_token(self): + bucket = 'bucket' + put_objects = ('object', 'object2', 'subdir/object', 'subdir2/object', + 'dir/subdir/object') + self._prepare_test_get_bucket(bucket, put_objects) + + query = 'list-type=2&max-keys=3' + expect_objects = ('dir/subdir/object', 'object', 'object2') + status, headers, body = \ + self.conn.make_request('GET', bucket, query=query) + self.assertEqual(status, 200) + elem = fromstring(body, 'ListBucketResult') + self.assertEqual(elem.find('MaxKeys').text, '3') + self.assertEqual(elem.find('KeyCount').text, '3') + self.assertEqual(elem.find('IsTruncated').text, 'true') + next_cont_token_elem = elem.find('NextContinuationToken') + self.assertIsNotNone(next_cont_token_elem) + resp_objects = elem.findall('./Contents') + self.assertEqual(len(list(resp_objects)), len(expect_objects)) + for i, o in enumerate(resp_objects): + self.assertEqual(o.find('Key').text, expect_objects[i]) + self.assertTrue(o.find('LastModified').text is not None) + self.assertRegexpMatches( + o.find('LastModified').text, + r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$') + self.assertTrue(o.find('ETag').text is not None) + self.assertTrue(o.find('Size').text is not None) + self.assertEqual(o.find('StorageClass').text, 'STANDARD') + self.assertIsNone(o.find('Owner/ID')) + self.assertIsNone(o.find('Owner/DisplayName')) + + query = 'list-type=2&max-keys=3&continuation-token=%s' % \ + next_cont_token_elem.text + expect_objects = ('subdir/object', 'subdir2/object') + status, headers, body = \ + self.conn.make_request('GET', bucket, query=query) + self.assertEqual(status, 200) + elem = fromstring(body, 'ListBucketResult') + self.assertEqual(elem.find('MaxKeys').text, '3') + self.assertEqual(elem.find('KeyCount').text, '2') + self.assertEqual(elem.find('IsTruncated').text, 'false') + self.assertIsNone(elem.find('NextContinuationToken')) + cont_token_elem = elem.find('ContinuationToken') + self.assertEqual(cont_token_elem.text, next_cont_token_elem.text) + resp_objects = elem.findall('./Contents') + self.assertEqual(len(list(resp_objects)), len(expect_objects)) + for i, o in enumerate(resp_objects): + self.assertEqual(o.find('Key').text, expect_objects[i]) + self.assertTrue(o.find('LastModified').text is not None) + self.assertRegexpMatches( + o.find('LastModified').text, + r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$') + self.assertTrue(o.find('ETag').text is not None) + self.assertTrue(o.find('Size').text is not None) + self.assertEqual(o.find('StorageClass').text, 'STANDARD') + self.assertIsNone(o.find('Owner/ID')) + self.assertIsNone(o.find('Owner/DisplayName')) + def test_head_bucket_error(self): self.conn.make_request('PUT', 'bucket') diff --git a/swift3/test/unit/test_bucket.py b/swift3/test/unit/test_bucket.py index acd01faa..04658ced 100644 --- a/swift3/test/unit/test_bucket.py +++ b/swift3/test/unit/test_bucket.py @@ -69,8 +69,20 @@ class TestSwift3Bucket(Swift3TestCase): {}, None) self.swift.register('GET', '/v1/AUTH_test/junk', swob.HTTPOk, {}, object_list) + self.swift.register( + 'GET', + '/v1/AUTH_test/junk?delimiter=a&format=json&limit=3&marker=viola', + swob.HTTPOk, {}, json.dumps(objects[2:])) self.swift.register('GET', '/v1/AUTH_test/junk-subdir', swob.HTTPOk, {}, json.dumps(object_list_subdir)) + self.swift.register( + 'GET', + '/v1/AUTH_test/subdirs?delimiter=/&format=json&limit=3', + swob.HTTPOk, {}, json.dumps([ + {'subdir': 'nothing/'}, + {'subdir': 'but/'}, + {'subdir': 'subdirs/'}, + ])) def setUp(self): super(TestSwift3Bucket, self).setUp() @@ -183,6 +195,47 @@ class TestSwift3Bucket(Swift3TestCase): elem = fromstring(body, 'ListBucketResult') self.assertEqual(elem.find('./IsTruncated').text, 'true') + req = Request.blank('/subdirs?delimiter=/&max-keys=2', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_swift3(req) + elem = fromstring(body, 'ListBucketResult') + self.assertEqual(elem.find('./IsTruncated').text, 'true') + self.assertEqual(elem.find('./NextMarker').text, 'but/') + + def test_bucket_GET_v2_is_truncated(self): + bucket_name = 'junk' + + req = Request.blank('/%s?list-type=2&max-keys=5' % bucket_name, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_swift3(req) + elem = fromstring(body, 'ListBucketResult') + self.assertEqual(elem.find('./KeyCount').text, '5') + self.assertEqual(elem.find('./IsTruncated').text, 'false') + + req = Request.blank('/%s?list-type=2&max-keys=4' % bucket_name, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_swift3(req) + elem = fromstring(body, 'ListBucketResult') + self.assertIsNotNone(elem.find('./NextContinuationToken')) + self.assertEqual(elem.find('./KeyCount').text, '4') + self.assertEqual(elem.find('./IsTruncated').text, 'true') + + req = Request.blank('/subdirs?list-type=2&delimiter=/&max-keys=2', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_swift3(req) + elem = fromstring(body, 'ListBucketResult') + self.assertIsNotNone(elem.find('./NextContinuationToken')) + self.assertEqual(elem.find('./KeyCount').text, '2') + self.assertEqual(elem.find('./IsTruncated').text, 'true') + def test_bucket_GET_max_keys(self): bucket_name = 'junk' @@ -259,6 +312,26 @@ class TestSwift3Bucket(Swift3TestCase): self.assertEqual(args['marker'], 'b') self.assertEqual(args['prefix'], 'c') + def test_bucket_GET_v2_passthroughs(self): + bucket_name = 'junk' + req = Request.blank( + '/%s?list-type=2&delimiter=a&start-after=b&prefix=c' % bucket_name, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_swift3(req) + elem = fromstring(body, 'ListBucketResult') + self.assertEqual(elem.find('./Prefix').text, 'c') + self.assertEqual(elem.find('./StartAfter').text, 'b') + self.assertEqual(elem.find('./Delimiter').text, 'a') + _, path = self.swift.calls[-1] + _, query_string = path.split('?') + args = dict(cgi.parse_qsl(query_string)) + self.assertEqual(args['delimiter'], 'a') + # "start-after" is converted to "marker" + self.assertEqual(args['marker'], 'b') + self.assertEqual(args['prefix'], 'c') + def test_bucket_GET_with_nonascii_queries(self): bucket_name = 'junk' req = Request.blank( @@ -279,6 +352,26 @@ class TestSwift3Bucket(Swift3TestCase): self.assertEqual(args['marker'], '\xef\xbc\xa2') self.assertEqual(args['prefix'], '\xef\xbc\xa3') + def test_bucket_GET_v2_with_nonascii_queries(self): + bucket_name = 'junk' + req = Request.blank( + '/%s?list-type=2&delimiter=\xef\xbc\xa1&start-after=\xef\xbc\xa2&' + 'prefix=\xef\xbc\xa3' % bucket_name, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_swift3(req) + elem = fromstring(body, 'ListBucketResult') + self.assertEqual(elem.find('./Prefix').text, '\xef\xbc\xa3') + self.assertEqual(elem.find('./StartAfter').text, '\xef\xbc\xa2') + self.assertEqual(elem.find('./Delimiter').text, '\xef\xbc\xa1') + _, path = self.swift.calls[-1] + _, query_string = path.split('?') + args = dict(cgi.parse_qsl(query_string)) + self.assertEqual(args['delimiter'], '\xef\xbc\xa1') + self.assertEqual(args['marker'], '\xef\xbc\xa2') + self.assertEqual(args['prefix'], '\xef\xbc\xa3') + def test_bucket_GET_with_delimiter_max_keys(self): bucket_name = 'junk' req = Request.blank('/%s?delimiter=a&max-keys=2' % bucket_name, @@ -292,6 +385,33 @@ class TestSwift3Bucket(Swift3TestCase): self.assertEqual(elem.find('./MaxKeys').text, '2') self.assertEqual(elem.find('./IsTruncated').text, 'true') + def test_bucket_GET_v2_with_delimiter_max_keys(self): + bucket_name = 'junk' + req = Request.blank( + '/%s?list-type=2&delimiter=a&max-keys=2' % bucket_name, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_swift3(req) + self.assertEqual(status.split()[0], '200') + elem = fromstring(body, 'ListBucketResult') + next_token = elem.find('./NextContinuationToken') + self.assertIsNotNone(next_token) + self.assertEqual(elem.find('./MaxKeys').text, '2') + self.assertEqual(elem.find('./IsTruncated').text, 'true') + + req = Request.blank( + '/%s?list-type=2&delimiter=a&max-keys=2&continuation-token=%s' % + (bucket_name, next_token.text), + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_swift3(req) + self.assertEqual(status.split()[0], '200') + elem = fromstring(body, 'ListBucketResult') + names = [o.find('./Key').text for o in elem.iterchildren('Contents')] + self.assertEqual(names[0], 'lily') + def test_bucket_GET_subdir_with_delimiter_max_keys(self): bucket_name = 'junk-subdir' req = Request.blank('/%s?delimiter=a&max-keys=1' % bucket_name, @@ -305,6 +425,38 @@ class TestSwift3Bucket(Swift3TestCase): self.assertEqual(elem.find('./MaxKeys').text, '1') self.assertEqual(elem.find('./IsTruncated').text, 'true') + def test_bucket_GET_v2_fetch_owner(self): + bucket_name = 'junk' + req = Request.blank('/%s?list-type=2' % bucket_name, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_swift3(req) + self.assertEqual(status.split()[0], '200') + + elem = fromstring(body, 'ListBucketResult') + name = elem.find('./Name').text + self.assertEqual(name, bucket_name) + + objects = elem.iterchildren('Contents') + for o in objects: + self.assertIsNone(o.find('./Owner')) + + req = Request.blank('/%s?list-type=2&fetch-owner=true' % bucket_name, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_swift3(req) + self.assertEqual(status.split()[0], '200') + + elem = fromstring(body, 'ListBucketResult') + name = elem.find('./Name').text + self.assertEqual(name, bucket_name) + + objects = elem.iterchildren('Contents') + for o in objects: + self.assertIsNotNone(o.find('./Owner')) + @s3acl def test_bucket_PUT_error(self): code = self._test_method_error('PUT', '/bucket', swob.HTTPCreated,