diff --git a/swiftonfile/swift/common/exceptions.py b/swiftonfile/swift/common/exceptions.py index 90c8deb..4de24df 100644 --- a/swiftonfile/swift/common/exceptions.py +++ b/swiftonfile/swift/common/exceptions.py @@ -36,3 +36,7 @@ class AlreadyExistsAsDir(SwiftOnFileFsException): class AlreadyExistsAsFile(SwiftOnFileFsException): pass + + +class DiskFileContainerDoesNotExist(SwiftOnFileFsException): + pass diff --git a/swiftonfile/swift/obj/diskfile.py b/swiftonfile/swift/obj/diskfile.py index 334c248..f6eb546 100644 --- a/swiftonfile/swift/obj/diskfile.py +++ b/swiftonfile/swift/obj/diskfile.py @@ -27,7 +27,7 @@ from uuid import uuid4 from eventlet import sleep from contextlib import contextmanager from swiftonfile.swift.common.exceptions import AlreadyExistsAsFile, \ - AlreadyExistsAsDir + AlreadyExistsAsDir, DiskFileContainerDoesNotExist from swift.common.utils import ThreadPool, hash_path, \ normalize_timestamp, fallocate, Timestamp from swift.common.exceptions import DiskFileNotExist, DiskFileError, \ @@ -320,8 +320,13 @@ class DiskFileWriter(object): try: do_rename(self._tmppath, df._data_file) except OSError as err: - if err.errno in (errno.ENOENT, errno.EIO) \ + if err.errno in (errno.ENOENT, errno.EIO, + errno.EBUSY, errno.ESTALE) \ and attempts < MAX_RENAME_ATTEMPTS: + # Some versions of GlusterFS had rename() as non-blocking + # operation. So we check for STALE and EBUSY. This was + # fixed recently: http://review.gluster.org/#/c/13366/ + # The comment that follows is for ENOENT and EIO... # FIXME: Why either of these two error conditions is # happening is unknown at this point. This might be a # FUSE issue of some sort or a possible race @@ -411,7 +416,7 @@ class DiskFileWriter(object): df._threadpool.force_run_in_thread(self._finalize_put, metadata) - # Avoid the unlink() system call as part of the mkstemp context + # Avoid the unlink() system call as part of the create context # cleanup self._tmppath = None @@ -579,6 +584,8 @@ class DiskFile(object): self._is_dir = False self._metadata = None self._fd = None + # This fd attribute is not used in PUT path. fd used in PUT path + # is encapsulated inside DiskFileWriter object. self._stat = None # Don't store a value for data_file until we know it exists. self._data_file = None @@ -884,6 +891,7 @@ class DiskFile(object): # Assume the full directory path exists to the file already, and # construct the proper name for the temporary file. + fd = None attempts = 1 while True: # To know more about why following temp file naming convention is @@ -911,7 +919,7 @@ class DiskFile(object): if attempts >= MAX_OPEN_ATTEMPTS: # We failed after N attempts to create the temporary # file. - raise DiskFileError('DiskFile.mkstemp(): failed to' + raise DiskFileError('DiskFile.create(): failed to' ' successfully create a temporary file' ' without running into a name conflict' ' after %d of %d attempts for: %s' % ( @@ -924,24 +932,41 @@ class DiskFile(object): # FIXME: Possible FUSE issue or race condition, let's # sleep on it and retry the operation. _random_sleep() - logging.warn("DiskFile.mkstemp(): %s ... retrying in" + logging.warn("DiskFile.create(): %s ... retrying in" " 0.1 secs", gerr) attempts += 1 elif not self._obj_path: + # ENOENT # No directory hierarchy and the create failed telling us # the container or volume directory does not exist. This # could be a FUSE issue or some race condition, so let's # sleep a bit and retry. + # Handle race: + # This can be the issue when memcache has cached that the + # container exists. If someone removes the container dir + # from filesystem, it's not reflected in memcache. So + # swift reports that the container exists and this code + # tries to create a file in a directory that does not + # exist. However, it's wrong to create the container here. _random_sleep() - logging.warn("DiskFile.mkstemp(): %s ... retrying in" + logging.warn("DiskFile.create(): %s ... retrying in" " 0.1 secs", gerr) attempts += 1 + if attempts > 2: + # Ideally we would want to return 404 indicating that + # the container itself does not exist. Can't be done + # though as the caller won't catch DiskFileNotExist. + # We raise an exception with a meaningful name for + # correctness. + logging.warn("Container dir %s does not exist", + self._container_path) + raise DiskFileContainerDoesNotExist elif attempts > 1: # Got ENOENT after previously making the path. This could # also be a FUSE issue or some race condition, nap and # retry. _random_sleep() - logging.warn("DiskFile.mkstemp(): %s ... retrying in" + logging.warn("DiskFile.create(): %s ... retrying in" " 0.1 secs" % gerr) attempts += 1 else: @@ -963,6 +988,8 @@ class DiskFile(object): # Ensure it is properly owned before we make it available. do_fchown(fd, self._uid, self._gid) dw = DiskFileWriter(fd, tmppath, self) + # It's now the responsibility of DiskFileWriter to close this fd. + fd = None yield dw finally: if dw: