http://git-wip-us.apache.org/repos/asf/zookeeper/blob/63aaf0a1/zookeeper-contrib/zookeeper-contrib-zkfuse/src/zkfuse.cc ---------------------------------------------------------------------- diff --git a/zookeeper-contrib/zookeeper-contrib-zkfuse/src/zkfuse.cc b/zookeeper-contrib/zookeeper-contrib-zkfuse/src/zkfuse.cc new file mode 100644 index 0000000..6a82168 --- /dev/null +++ b/zookeeper-contrib/zookeeper-contrib-zkfuse/src/zkfuse.cc @@ -0,0 +1,4492 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define FUSE_USE_VERSION 26 + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#undef _GNU_SOURCE +#define _GNU_SOURCE + +extern "C" { +#include <fuse.h> +#include <ulockmgr.h> +} +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> +#include <dirent.h> +#include <errno.h> +#include <sys/time.h> +#ifdef HAVE_SETXATTR +#include <sys/xattr.h> +#endif + +#include <getopt.h> + +#include <iostream> +#include <sstream> +#include <map> +#include <string> +#include <boost/utility.hpp> +#include <boost/weak_ptr.hpp> + +#include "log.h" +#include "mutex.h" +#include "zkadapter.h" + +#define ZOOKEEPER_ROOT_CHILDREN_WATCH_BUG + +/** + Typedef for ZooKeeperAdapter::Data. +*/ +typedef std::string Data; +/** + Typedef for ZooKeeperAdapter::NodeNames. +*/ +typedef vector<std::string> NodeNames; + +#define MAX_DATA_SIZE 1024; + +DEFINE_LOGGER(LOG, "zkfuse"); + +inline +uint64_t millisecsToSecs(uint64_t millisecs) +{ + return millisecs / 1000; +} +inline +uint64_t secsToMillisecs(uint64_t secs) +{ + return secs * 1000; +} +inline +uint64_t nanosecsToMillisecs(uint64_t nanosecs) +{ + return nanosecs / 1000000; +} +inline +uint64_t timespecToMillisecs(const struct timespec & ts) +{ + return secsToMillisecs(ts.tv_sec) + nanosecsToMillisecs(ts.tv_nsec); +} + +typedef boost::shared_ptr<ZooKeeperAdapter> ZooKeeperAdapterSharedPtr; + +/** + * ZkFuseCommon - holds immutable configuration objects. + * + * No locks are required to access these objects. + * A ZkFuseCommon instance is considered to be a data object and may be copied. + */ +class ZkFuseCommon +{ + private: + /** + References the ZooKeeperAdapter instance to be used. + */ + ZooKeeperAdapterSharedPtr _zkAdapter; + /** + Path to the ZooKeeper root node. + */ + std::string _rootPathName; + /** + Name used to access data "file" when the ZK node has + children. + */ + std::string _dataFileName; + /** + Suffix added to path components to force interpretation of + path components as directory. This is usually only required + for the last component. For example, ZkFuse may consider + a leaf node a regular file, e.g. /a/b/c/leaf. The suffix + can be used to create child under this node, e.g. + mkdir /a/b/c/leaf{forceDirSuffix}/new_leaf. + */ + std::string _forceDirSuffix; + /** + Prefix common to all metadata nodes created by ZkFuse. + */ + std::string _metadataNamePrefix; + /** + Path component name that identifies a directory metadata node. + A directory metadata node is currently empty. It is used by ZkFuse + to create a child when mkdir is used. This prevents ZkFuse + from interpreting the new child as a regular file. + */ + std::string _dirMetadataName; + /** + Path component name that identifies a regular file metadata node. + A regular metadata node holds metadata required to implement + Posix regular file semantics, such as setting mtime. + */ + std::string _regMetadataName; + /** + Number of not-in-use nodes to cache. + */ + unsigned _cacheSize; + /** + Assume this userid owns all nodes. + */ + const uid_t _uid; + /** + Assume this groupid owns all nodes. + */ + const gid_t _gid; + /** + Blocksize used to calculate number of blocks used for stat. + */ + const unsigned _blkSize; + + public: + /** + Constructor. + */ + ZkFuseCommon() + : _zkAdapter(), + _rootPathName("/"), + _dataFileName(), + _forceDirSuffix(), + _metadataNamePrefix(".zkfuse."), + _dirMetadataName(_metadataNamePrefix + "dir"), + _regMetadataName(_metadataNamePrefix + "file"), + _cacheSize(256), + _uid(geteuid()), + _gid(getegid()), + _blkSize(8192) + { + } + /** + Get root path name. Always "/". + \see _rootPathName + */ + const std::string & getRootPathName() const + { + return _rootPathName; + } + /** + Get dataFileName - the name for synthesized files to access + ZooKeeper node data. + \see _dataFileName + */ + const std::string & getDataFileName() const + { + return _dataFileName; + } + /** + Set dataFileName. + \see getDataFileName + \see _dataFileName + */ + void setDataFileName(const std::string & dataFileName) + { + _dataFileName = dataFileName; + } + /** + Get metadataNamePrefix - the common prefix for all ZkFuse created + metadata ZooKeeper nodes. + \see _metadataNamePrefix + */ + const std::string & getMetadataNamePrefix() const + { + return _metadataNamePrefix; + } + /** + Get forceDirSuffix - the suffix added to a path component to force + the path component to be treated like a directory. + \see _forceDirSuffix + */ + const std::string & getForceDirSuffix() const + { + return _forceDirSuffix; + } + /** + Set forceDirSuffix. + \see getForceDirSuffix + \see _forceDirSuffix + */ + void setForceDirSuffix(const std::string & forceDirSuffix) + { + _forceDirSuffix = forceDirSuffix; + } + /** + Get dirMetadataName - path component name of all directory + metadata ZooKeeper nodes. + \see _dirMetadataname + */ + const std::string & getDirMetadataName() const + { + return _dirMetadataName; + } + /** + Get regMetadataName - path component name of all regular file + metadata ZooKeeper nodes. + \see _regMetadataname + */ + const std::string & getRegMetadataName() const + { + return _regMetadataName; + } + /** + Get number of not-in-use ZkFuseFile instances to to cache. + \see _cacheSize + */ + unsigned getCacheSize() const + { + return _cacheSize; + } + /** + Set cache size. + \see getCacheSize + \see _cacheSize + */ + void setCacheSize(unsigned v) + { + _cacheSize = v; + } + /** + Get userid. + \see _uid + */ + uid_t getUid() const + { + return _uid; + } + /** + Get groupid. + \see _gid + */ + gid_t getGid() const + { + return _gid; + } + /** + Get block size. + \see _blkSize + */ + unsigned getBlkSize() const + { + return _blkSize; + } + /** + Get ZooKeeperAdapter. + \see _zkAdapter. + */ + const ZooKeeperAdapterSharedPtr & getZkAdapter() const + { + return _zkAdapter; + } + /** + Set ZooKeeperAdapter. + \see _zkAdaptor + */ + void setZkAdapter(const ZooKeeperAdapterSharedPtr & zkAdapter) + { + _zkAdapter = zkAdapter; + } +}; + +/** + ZkFuseNameType - identifies the type of the ZkFuse path. + */ +enum ZkFuseNameType { + /** + ZkFuse path is not syntheiszed. + ZkFuse should use its default rules to determine the Posix representation + of the path. + */ + ZkFuseNameDefaultType = 0, + /** + ZkFuse path is synthesized and identifies the data part of a + ZooKeeper node, i.e. Posix regular file semantics is expected. + */ + ZkFuseNameRegType = 1, + /** + ZkFuse path is synthesized and identifies the chidlren part of a + ZooKeeper node, i.e. Posix directory semantics is expected. + */ + ZkFuseNameDirType = 2 +}; + +class ZkFuseFile; + +typedef ZkFuseFile * ZkFuseFilePtr; + +class ZkFuseHandleManagerFactory; + +/** + ZkFuseHandleManager - keeps track of all the ZkFuseFile instances + allocated by a ZkFuseHandleManager instance and provides them + with a handle that can be used by FUSE. + + It maps a ZooKeeper path to a handle and a handle to a ZkFuse instance. + It also implements the methods that takes path names as arguments, such + as open, mknod, rmdir, and rename. + + Memory management + - References ZkFuseFile instances using regular pointers + Smart pointer is not used because reference counts are needed to + determine how many time a node is opened as a regular file or + directory. This also avoids circular smart pointer references. + - Each ZkFuseFile instance holds a reference to its ZkFuseHandleManager + using a boost::shared_ptr. This ensures that the ZkFuseHandleManager + instance that has the handle for the ZkFuseFile instance does not + get garbage collected while the ZkFuseFile instance exists. + + Concurrency control + - Except for the immutable ZkFuseCommon, all other member variables + are protected by _mutex. + - A method in this class can hold _mutex when it directly or + indirectly invokes ZkFuseFile methods. A ZkFuseFile method that holds + a ZkFuseFile instance _mutex cannot invoke a ZkFuseHandleManager + method that acquires the ZkFuseHandleManager instance's _mutex. + Otherwise, this may cause a dead lock. + - Methods that with names that begin with "_" do not acquire _mutex. + They are usually called by public methods that acquire and hold _mutex. + */ +class ZkFuseHandleManager : boost::noncopyable +{ + private: + /** + Typedef of handle, which is an int. + */ + typedef int Handle; + /** + Typedef of std::map used to map path to handle. + */ + typedef std::map<std::string, Handle> Map; + /** + Typedef of std::vector used to map handle to ZkFuseFile instances. + */ + typedef std::vector<ZkFuseFilePtr> Files; + /** + Typedef of std::vector used to hold unused handles. + */ + typedef std::vector<Handle> FreeList; + /** + Typedef of boost::weak_ptr to the ZkFuseHandleManager instance. + */ + typedef boost::weak_ptr<ZkFuseHandleManager> WeakPtr; + + /* Only ZkFuseHandleManagerFactory can create instances of this class */ + friend class ZkFuseHandleManagerFactory; + + /** + Contains common configuration. + Immutable so that it can be accessed without locks. + */ + const ZkFuseCommon _common; + /** + Maps a path name to a Handle. + */ + Map _map; + /** + Maps a handle to a ZkFuseFile instances. + Also holds pointers to all known ZkFuseFile instances. + An element may point to an allocated ZkFuseFile instance or be NULL. + + An allocated ZkFuseFile instance may be in one of the following states: + - in-use + Currently open, i.e. the ZkFuseFile instance's reference count + greater than 0. + - in-cache + Not currently open, i.e. the ZkFuseFile instances's + reference count is 0. + */ + Files _files; + /** + List of free'ed handles. + */ + FreeList _freeList; + /** + Mutex used to protect this instance. + */ + mutable zkfuse::Mutex _mutex; + /** + Count of number of in-use entries. + It used to calculate number of cached nodes. + Number cached nodes is (_files.size() - _numInUse). + */ + unsigned _numInUse; + /** + WeakPtr to myself. + */ + WeakPtr _thisWeakPtr; + + /** + Obtain a handle for the given path. + - If path is not known, then allocate a new handle and increment + _numInUse, and set newFile to true. The allocated + ZkFuseFile instance's reference count should be 1. + - If path is known, increase the corresponding + ZkFuseFile instance's reference count. + + \return the allocated handle. + \param path the path to lookup. + \param newFile indicates whether a new handle has been allocated. + */ + Handle allocate(const std::string & path, bool & newFile); + + /** + Constructor. + + \param common the immutable common configuration. + \param reserve number of elements to pre-allocate for + _files and _freeList. + */ + ZkFuseHandleManager( + const ZkFuseCommon & common, + const unsigned reserve) + : _common(common), + _files(), + _freeList(), + _mutex(), + _numInUse(0) + { + _files.reserve(reserve); + _files[0] = NULL; /* 0 never allocated */ + _files.resize(1); + _freeList.reserve(reserve); + } + + public: + /** + Typedef for boost::shared_ptr for this ZkFuseHandleManager class. + */ + typedef boost::shared_ptr<ZkFuseHandleManager> SharedPtr; + + /** + Destructor. + */ + ~ZkFuseHandleManager() + { + } + /** + Get the ZkFuseFile instance for a handle. + + \return the ZkFuseFile instance identified by the handle. + \param handle get ZkFuseFile instance for this handle. + */ + ZkFuseFilePtr getFile(Handle handle) const + { + AutoLock lock(_mutex); + return _files[handle]; + } + /** + Get the immutable common configuration. + + \return the common configuration instance. + */ + const ZkFuseCommon & getCommon() const + { + return _common; + } + /** + Deallocate a previously allocated handle. + This decrements the reference count of the corresponding + ZkFuseFile instance. If the reference count becomes zero, + decrement _numInUse. It may also cause the ZkFuseFile instance + to be reclaimed if there are too many cached ZkFuseFile instances. + + The ZkFuseFile instance should be reclaimed if the number of + unused ZkFuseFile instances exceeds the configured cache size, i.e. + (_files.size() - _numInUse) > _common.getCacheSize() + and the ZkFuseFile instance has a reference count of zero. + + Reclaiming a ZkFuseFile instance involves removing the ZkFuseFile + instance's path to handle mapping from _map and the handle to the + ZkFuseFile instance mapping from _files, adding the handle to + the _freeList, and finally deleting the ZkFuseFile instance. + + \param handle the handle that should be deallocated. + */ + void deallocate(Handle handle); + /** + Handles ZooKeeper session events. + It invokes the known ZkFuseFile instances to let them know + that their watches will no longer be valid. + */ + void eventReceived(const ZKWatcherEvent & event); + /** + Get data from the specified the ZooKeeper path. + + \return 0 if successful, otherwise return negative errno. + \param path the path of the ZooKeeper node. + \param data return data read. + */ + int getData(const std::string & path, Data & data); + /** + Set data into the specified ZooKeeper path. + + \return 0 if successful, otherwise return negative errno. + \param path the path of the ZooKeeper node. + \param data the data to be written. + \param exists set to true if this path exists. + \param doFlush set to true if new data should be flushed to ZooKeeper. + */ + int setData(const std::string & path, + const Data & data, + bool exists, + bool doFlush); + /** + Create a ZooKeeper node to represent a ZkFuse file or directory. + + \return handle if successful, otherwise return negative errno. + \param path to create. + \param mode should be either S_IFDIR for directory or + S_IFREG for regular file. + \param mayExist if set and the ZooKeeper node already exist, return + valid handle instead of -EEXIST. + \param created returns whether a new ZooKeeper node had been created. + */ + int mknod(const std::string & path, + mode_t mode, + bool mayExist, + bool & created); + /** + Open a ZooKeeper node. + + The justCreated argument is used to differentiate if the _deleted flag + of the ZkFuseFile instance is to be trusted (i.e. the path + does not exist in ZooKeeper.) The _deleted flag is trusted + if the ZkFuseFile instance is known to exist in ZooKeeper after + invoking ZooKeeper with the path. + + If justCreated is true, then the ZkFuseFile instance was just created. + The ZkFuseFile constructor sets the _deleted flag to true because + path is not known to exist and hence should not be accessed. + The justCreated flag will force the ZkFuseFile instance to invoke + ZooKeeper to determine if the path exists. + + \return handle if successful, otherwise return negative errno. + \param path the path to open. + \param justCreated indicates if this is newly created ZkFuseFile instance. + */ + int open(const std::string & path, bool justCreated); + /** + Remove a ZkFuse directory. + + If force is not set, then the ZooKeeper node will be removed only + if it has no data and no child nodes except ZkFuse metadata nodes. + + \return 0 if successful, otherwise return negative errno. + \param path the path to remove. + \param force force removal, i.e. bypass checks. + */ + int rmdir(const char * path, bool force = false); + /** + Make a ZkFuse directory. + + ZkFuse represents a ZooKeeper node with no data and no children + as a regular file. In order to differentiate a newly created + directory from an empty regular file, mkdir will create a directory + metadata node as a child of the directory. + + \return 0 if successful, otherwise return negative errno. + \param path the path of the directory to create. + \param mode create directory with this mode + (mode currently not implemented). + */ + int mkdir(const char * path, mode_t mode); + /** + Remove a ZkFuse regular file. + + A file is the abstraction for the data part of a ZooKeeper node. + - If ZkFuse represents a ZooKeeper node as a directory, the data part + of the node is represented by synthesizing a name for this file. This + synthesized name is visible through readdir if the ZooKeeper node's + data is not empty. Removing such a file is done by truncating + the ZooKeeper node's data to 0 length. + - If ZkFuse represents a ZooKeeper node as a file, then removing the + is done by removing the ZooKeeper node (and its metadata). + + \return 0 if successful, otherwise return negative errno. + \param path the path of the file to remove. + */ + int unlink(const char * path); + /** + Get attributes of a ZkFuse regular file or directory. + + \return 0 if successful, otherwise return negative errno. + \param path get attributes for this path + \param stbuf store attributes here. + */ + int getattr(const char * path, struct stat & stbuf); + /** + Rename a ZkFuse regular file. + + It creates a new ZooKeeper node at toPath, copies data and file + metadata from the ZooKeeper node at fromPath to the new node, + and deletes the current ZooKeeper node. If the current ZooKeeper + node is not deleted if the new ZooKeeper node cannot be created + or the data copy fails. + + It cannot be used to rename a directory. + + \return 0 if successful, otherwise return negative errno. + \param fromPath the current path. + \param toPath rename to this path. + */ + int rename(const char * fromPath, const char * toPath); + /** + Add a child ZooKeeper path to the children information cache + of the ZkFuseFile instance that caches the parent ZooKeeper node. + + This is used to add a child path after a new ZooKeeper node has + been created to the children information cache of the parent + ZooKeeper node. This is needed because waiting for the children + changed event to update the cache may result in inconsistent local + views of the changes. + \see removeChildFromParent + + \parama childPath the path of the child ZooKeeper node. + */ + void addChildToParent(const std::string & childPath) const; + /** + Remove a child ZooKeeper path from the children information cache + of the ZkFuseFile instance that caches the parent ZooKeeper node. + + For example, this should happen whenever a path is deleted. + This child information cache of the parent will eventually be + invalidated by watches. However, the delivery of the children + change event may come after the next access and thus provide + the client with an inconsistent view. One example is that + client deletes the last file in a directory, but the children + changed event is not delivered before the client invokes rmdir. + to remove the parent. In this case, the rmdir fails because + the cached children information of the parent indicates the + "directory" is not empty. + + \param childPath the path of the child ZooKeeper node. + */ + void removeChildFromParent(const std::string & childPath) const; + /** + Return the path for the parent of the specified ZooKeeper path. + + \return the parent path. + \param childPath the child path. + */ + std::string getParentPath(const std::string & childPath) const; + /** + Return the ZooKeeper path from a ZkFuse path. + + The ZkFuse path may be a synthesized path. For example, a synthesized + path is required to access the data part of a ZooKeeper node's + data when ZkFuse represents the ZooKeeper node as directory. + A synthesized path is also required to create a child ZooKeeper node + under a ZooKeeper node that is represented by a regular file. + + \return the ZooKeeper path for path. + \param path the ZkFuse path, which may be a synthesized path. + \param nameType indicate whether the ZkFuse path is synthesized and + whether the synthesized ZkFuse path identifies a + directory or a regular file. + */ + std::string getZkPath(const char * path, ZkFuseNameType & nameType) const; +}; + +/** + ZkFuseHandleManagerFactory - factory for ZkFuseHandleManager. + + This is the only way to create a ZkFuseHandleManager instance. + to make sure that _thisWeakPtr of the instance is intialized + after the instance is created. + */ +class ZkFuseHandleManagerFactory +{ + public: + /** + Create an instance of ZkFuseHandleManager. + + \return the created ZkFuseHandleManager instance. + \param common the common configuration. + \param reserve initially reserve space for this number of handles. + */ + static ZkFuseHandleManager::SharedPtr create( + const ZkFuseCommon & common, + unsigned reserve = 1000) + { + ZkFuseHandleManager::SharedPtr manager + (new ZkFuseHandleManager(common, reserve)); + manager->_thisWeakPtr = manager; + return manager; + } +}; + +/** + ZkFuseAutoHandle - automatically closes handle. + + It holds an opened handle and automatically closes this handle + when it is destroyed. This enables code that open a handle + to be exception safe. + */ +class ZkFuseAutoHandle +{ + private: + /** + Typedef for Handle which is an int. + */ + typedef int Handle; + /** + Holds a reference to the ZkFuseHandlerManager instance that + allocated the handle. + */ + ZkFuseHandleManager::SharedPtr _manager; + /** + The handle that should be closed when this instance is destroyed. + A valid handle has value that is equal or greater than 0. + A negative value indicates an error condition, usually the value + is a negative errno. + */ + Handle _handle; + /** + Caches a reference to the ZkFuseFile instance with this handle. + This is a performance optimization so that _manager.getFile(_handle) + is only called once when the handle is initialized. + */ + ZkFuseFilePtr _file; + + /** + Initialize reference to the ZkFuseFile instance with this handle. + */ + void _initFile() + { + if (_handle >= 0) { + _file = _manager->getFile(_handle); + } else { + _file = NULL; + } + } + + public: + /** + Constructor - takes an previously opened handle. + + \param manager the ZkFuseHandleManager instance who allocated the handle. + \param handle the handle. + */ + ZkFuseAutoHandle( + const ZkFuseHandleManager::SharedPtr & manager, + int handle) + : _manager(manager), + _handle(handle), + _file() + { + _initFile(); + } + /** + Constructor - open path and remember handle. + + \param manager the ZkFuseHandleManager instance who allocated the handle. + \param path open this path and remember its handle in this instance. + */ + ZkFuseAutoHandle( + const ZkFuseHandleManager::SharedPtr & manager, + const std::string & path) + : _manager(manager), + _handle(_manager->open(path, false)), + _file() + { + _initFile(); + } + /** + Constructor - create path and remember handle. + + The creation mode indicates whether the path identifies a regular file + or a directory. + + \param manager the ZkFuseHandleManager instance who allocated the handle. + \param path create this path and remember its handle in this instance. + \param mode the creation mode for the path, should be either + S_IFDIR or S_IFDIR. + \param mayExist, if set and the path already exists, + then the ZkFuseAutoHandle will hold the handle + for the path instead of -EEXIST. + If not set and the path does not exist, then the handle + be -EEXIST. + */ + ZkFuseAutoHandle( + const ZkFuseHandleManager::SharedPtr & manager, + const std::string & path, + mode_t mode, + bool mayExist) + : _manager(manager), + _handle(-1), + _file() + { + bool created; + _handle = _manager->mknod(path, mode, mayExist, created); + _initFile(); + } + /** + Destructor - closes the handle. + */ + ~ZkFuseAutoHandle() + { + reset(); + } + /** + Get the handle. + \see _handle + */ + int get() const + { + return _handle; + } + /** + Get the ZkFuseFile instance of the handle. + \see _file + */ + ZkFuseFilePtr getFile() const + { + return _file; + } + /** + Forget the handle, don't close the handle. + */ + void release() + { + _handle = -1; + _file = NULL; + } + /** + Change the remembered handle. + + It will close the current handle (if valid). + */ + void reset(int handle = -1); +}; + +/** + ZkFuseStat - C++ wrapper for ZooKeeper Stat. + + This wrapper provides ZooKeeper Stat will constructors that + initializes the instance variables of Stat. + */ +class ZkFuseStat : public Stat +{ + public: + /** + Constructor - clear instance variables. + */ + ZkFuseStat() + { + clear(); + } + /** + Destructor - do nothing. + */ + ~ZkFuseStat() + { + } + /** + Clear instance variables. + */ + void clear() + { + czxid = 0; + mzxid = 0; + ctime = 0; + mtime = 0; + version = 0; + cversion = 0; + aversion = 0; + } +}; + +/** + ZkFuseFile - an instance encapsulates the runtime state of an allocated + ZooKeeper node. + + Memory management + - Referenced by the ZkFuseHandleManager that created this instance. + - Uses boost::shared_ptr to reference the ZkFuseHandleManager that + created this instance. This makes sure that this ZkFuseHandleManager + instance cannot be deleted when it has allocated ZkFuseFile instances. + - A ZkFuseHandleManager deletes itself if it can be reclaimed. + It can be reclaimed if it has no watches, its reference count is zero, + and the ZkFuseHandleManager instance would have more than the + configured number of cached ZkFuseFile instances. + - A ZkFuseFile instance cannot be deleted if it has active watches on + its ZooKeeper node. When one of its watches fires, the ZkFuseFile + instance must exist because one of its methods will be invoked + to process the event. If the ZkFuseFile instance has been deleted, + the method will access previously freed memory. + + Concurrency control + - _mutex protects the instance variables of an instance. + - Callers should assume that a public method will acquire _mutex. + - Methods of this class may not hold _mutex while invoking an + ZkFuseHandleManager instance. + - Methods that with names that begin with "_" do not acquire _mutex. + They are usually called by public methods that acquire and hold _mutex. +*/ +class ZkFuseFile : boost::noncopyable +{ + public: + /** + Maximum size for the data part of a ZooKeeper node. + */ + static const unsigned maxDataFileSize = MAX_DATA_SIZE; + + private: + /** + Mode returned by getattr for a ZkFuse directory. + */ + static const mode_t dirMode = (S_IFDIR | 0777); + /** + Mode returned by getattr for a ZkFuse regular file. + */ + static const mode_t regMode = (S_IFREG | 0777); + + /** + References the ZkFuseHandleManager that created this instance. + */ + ZkFuseHandleManager::SharedPtr _manager; + /** + Handle for this instance. + */ + const int _handle; + /** + Path of the ZooKeeper node represented by this instance. + */ + const std::string _path; + /** + Mutex that protects the instance variables of this instance. + */ + mutable zkfuse::Mutex _mutex; + /** + Reference count for this instance, i.e. the number of opens + minus the number of closes. + */ + int _refCount; + /** + Indicates whether the ZooKeeper node exist. + This flag allows caching of deleted ZooKeeper node to avoid + repeated ZooKeeper lookups for a non-existent path, and avoid + using cached information. + + Its value is true if + - it is verified to exist (by calling ZooKeeper), or + - it is existence is unknown because ZooKeeper has not been + invoked to verify its path's existence. + */ + bool _deleted; + /** + Count of current number directory opens minus directory closes. + */ + int _openDirCount; + /** + Indicates whether cached children information is valid. + + It is true if the cached children information is valid. + */ + bool _initializedChildren; + /** + Indicates whether there is an outstanding children watch. + + It is true if it has an outstanding children watch. + */ + bool _hasChildrenListener; + /** + Cached children information. + + The cache is valid if _initializedChildren is true. + */ + NodeNames _children; + + /** + Indicates whether the cached data is valid. + + It is true if the cached data and ZooKeeper Stat are valid. + */ + bool _initializedData; + /** + Indicates whether there is an outstanding data watch. + + It is true if it has an outstanding data watch. + */ + bool _hasDataListener; + /** + Indicates whether the cached data (_activeData) has been modified. + + It is true if the cached data has been modified. + */ + bool _dirtyData; + /** + Currently active data. + + To maintain atomicity of updates and emulate Posix semantics, + when a ZkFuse file remains open, the same data will be accessed + by the file's clients. The data will be flushed to ZooKeeper when + the flush method is called. The flush method may be called + explicitly by a client or implicitly when the ZkFuse file is no + longer currently open. + + _activeData and _activeStat stores the data and ZooKeeper Stat + that will be accessed by the file's clients. + + If there are changes when the ZkFuse file is open, new data is + cached as latest data (by _latestData and _latestStat). + */ + Data _activeData; + /** + Currently active ZooKeeper Stat. + \see _activeData + */ + ZkFuseStat _activeStat; + /** + Latest data. + This is either the same as _activeData or it is newer. It is newer + is it has been updated by event triggered by a data watch. + */ + Data _latestData; + /** + Latest ZooKeeper data. + This is either the same as _activeStat or it is newer. It is newer + is it has been updated by event triggered by a data watch. + */ + ZkFuseStat _latestStat; + + /** + Get userid. + + \return the userid. + */ + uid_t _getUid() const + { + return _manager->getCommon().getUid(); + } + /** + Get groupid. + + \return the groupid. + */ + gid_t _getGid() const + { + return _manager->getCommon().getGid(); + } + /** + Get block size. + + \return the block size. + */ + unsigned _getBlkSize() const + { + return _manager->getCommon().getBlkSize(); + } + /** + Get number of children, include metadata children in the count. + + \return the number of children including metadata children. + */ + unsigned _numChildrenIncludeMeta() const + { + unsigned count = _children.size(); + LOG_DEBUG(LOG, "numChildrenIncludeMeta() returns %u", count); + return count; + } + /** + Get number of children, exclude metadata children in the count. + + \return the number of children excluding metadata children. + */ + unsigned _numChildrenExcludeMeta() const + { + unsigned count = 0; + for (NodeNames::const_iterator it = _children.begin(); + it != _children.end(); + it++) { + if (!_isMeta(*it)) { + count++; + } + } + LOG_DEBUG(LOG, "numChildrenExcludeMeta() returns %u", count); + return count; + } + /** + Whether the ZooKeeper node has children, include metadata + children. + + \return true if it has children including metadata children. + */ + bool _hasChildrenIncludeMeta() const + { + return _numChildrenIncludeMeta() != 0; + } + /** + Return true if the ZooKeeper node has children, include metadata + children. + + \return true if it has children excluding metadata children. + */ + bool _hasChildrenExcludeMeta() const + { + return _numChildrenExcludeMeta() != 0; + } + /** + Whether the ZooKeeper node has data. + + \return true if _activeData is not empty. + */ + bool _hasData() const + { + return _activeData.empty() == false; + } + /** + Whether the ZooKeeper node has child with the specified path. + + \return true if the ZooKeeper node has a child with the specified path. + \param childPath the path of the child. + */ + bool _hasChildPath(const std::string & childPath) const + { + bool hasChild = + std::find(_children.begin(), _children.end(), childPath) + != _children.end(); + LOG_DEBUG(LOG, "hasChild(childPath %s) returns %d", + childPath.c_str(), hasChild); + return hasChild; + } + /** + Whether the given path component is a ZkFuse synthesized path + component. + + A ZkFuse synthesized path component will begin with + the metadataNamePrefix obtained from the common configuration. + \see _metadataNamePrefix + + \return true if the path component is a ZkFuse synthesized path + component. + \param childName the path component to check if it is synthesized by + ZkFuse. + */ + bool _isMeta(const std::string & childName) const + { + bool isMeta; + const std::string & prefix = + _manager->getCommon().getMetadataNamePrefix(); + unsigned offset = + (_path.length() > 1 ? + _path.length() + 1 : + 1 /* special case for root dir */ ); + unsigned minLength = offset + prefix.length(); + if (childName.length() < minLength || + childName.compare(offset, prefix.length(), prefix) != 0) { + isMeta = false; + } else { + isMeta = true; + } + LOG_DEBUG(LOG, "isMeta(childName %s) returns %d", + childName.c_str(), isMeta); + return isMeta; + } + /** + Build a path for a specific child of the ZooKeeper node. + + This is done by appending "/" (unless it is the ZooKeeper node + is the root node) and the name of the child. + + \return the path for the specified child of the ZooKeeper node. + \param name the name of the child. + */ + std::string _getChildPath(const std::string & name) const + { + return buildChildPath(_path, name); + } + /** + Whether the ZooKeeper node has a regular file metadata child node. + + \return true if the ZooKeeper node has a regular file metadata child + node. + */ + bool _hasRegMetadata() const + { + bool res = _hasChildPath( + _getChildPath(_manager->getCommon().getRegMetadataName())); + LOG_DEBUG(LOG, "hasRegMetadata() returns %d", res); + return res; + } + /** + Whether the ZooKeeper node has a directory metadata child node. + + \return true if the ZooKeeper node has a directory metadata child + node. + */ + bool _hasDirMetadata() const + { + bool res = _hasChildPath( + _getChildPath(_manager->getCommon().getDirMetadataName())); + LOG_DEBUG(LOG, "hasDirMetadata() returns %d", res); + return res; + } + /** + Whether ZkFuse should present the ZooKeeper node as a ZkFuse regular + file. + + It should be a ZkFuse regular file it has no children or its + only children is its regular file metadata child node. + + \return true if the Zookeeper node should be presented as a ZkFuse + regular file. + */ + bool _isReg() const + { + unsigned numChildrenIncludeMeta = _numChildrenIncludeMeta(); + bool res = + (numChildrenIncludeMeta == 0) || + (numChildrenIncludeMeta == 1 && _hasRegMetadata() == true); + LOG_DEBUG(LOG, "isReg() returns %d", res); + return res; + } + /** + Whether ZkFuse should present the ZooKeeper node as a ZkFuse directory. + + It should be a ZkFuse directory if it should not be presented as + a ZkFuse regular directory. + \see _isReg + + \return true if the Zookeeper node should be presented as a ZkFuse + directory. + */ + bool _isDir() const + { + return !_isReg(); + } + /** + Whether ZkFuse should present the ZooKeeper node as a ZkFuse regular + file by taking into account the specified ZkFuseNameType. + + The ZkFuseNameType may override the default ZkFuse presentation of + a ZooKeeper node. + + \return true if ZkFuse should present the ZooKeeper node as a ZkFuse + regular file. + \param nameType specifies the ZkFuseNameType. + \param doLock whether _mutex should be acquired, it should be true + if the caller did not acquire _mutex. + */ + bool _isRegNameType(ZkFuseNameType nameType, bool doLock = false) const + { + bool res; + switch (nameType) { + case ZkFuseNameRegType: + res = true; + break; + case ZkFuseNameDirType: + res = false; + break; + case ZkFuseNameDefaultType: + default: + if (doLock) { + AutoLock lock(_mutex); + res = _isReg(); + } else { + res = _isReg(); + } + break; + } + LOG_DEBUG(LOG, "isRegNameType(nameType %d) returns %d", + int(nameType), res); + return res; + } + /** + Whether ZkFuse should present the ZooKeeper node as a ZkFuse + directory by taking into account the specified ZkFuseNameType. + + The ZkFuseNameType may override the default ZkFuse presentation of + a ZooKeeper node. + + \return true if ZkFuse should present the ZooKeeper node as a ZkFuse + directory. + \param nameType specifies the ZkFuseNameType. + \param doLock whether _mutex should be acquired, it should be true + if the caller did not acquire _mutex. + */ + bool _isDirNameType(ZkFuseNameType nameType, bool doLock = false) const + { + bool res; + switch (nameType) { + case ZkFuseNameRegType: + res = false; + break; + case ZkFuseNameDirType: + res = true; + break; + case ZkFuseNameDefaultType: + default: + if (doLock) { + AutoLock lock(_mutex); + res = _isDir(); + } else { + res = _isDir(); + } + break; + } + LOG_DEBUG(LOG, "isDirNameType(nameType %d) returns %d", + int(nameType), res); + return res; + } + /** + ZkFuse regular file metadata. + */ + struct Metadata { + /** + Version of the ZooKeeper node data that this metadata is good for. + */ + uint32_t version; + /** + Acces time in milliseconds. + */ + uint64_t atime; + /** + Modified time in milliseconds. + */ + uint64_t mtime; + + /** + Constructor. + */ + Metadata() + : version(0), + atime(0), + mtime(0) + { + } + }; + /** + Encode Metadata into Data so that it can be stored in a metadata + ZooKeeper node. + + Each Metadata attribute is encoded as "<key>: <value>" on single line + terminated by newline. + + \param meta the input Metadata. + \param data the output Data after encoding. + */ + void _encodeMetadata(const Metadata & meta, Data & data) const + { + LOG_DEBUG(LOG, "encodeMetadata()"); + std::ostringstream oss; + oss << "version: " << meta.version << endl + << "atime: " << meta.atime << endl + << "mtime: " << meta.mtime << endl; + data = oss.str(); + } + /** + Decode Data from a metadata child ZooKeeper node into Metadata. + + Data is a stream of "<key>: <value>" records separated by newline. + + \param data the input Data. + \param meta the output Metadata after decoding. + */ + void _decodeMetadata(const Data & data, Metadata & meta) const + { + LOG_DEBUG(LOG, "decodeMetadata(data %s)", data.c_str()); + std::istringstream iss(data); + char key[128]; + char value[1024]; + while (!iss.eof()) { + key[0] = 0; + value[0] = 0; + iss.get(key, sizeof(key), ' '); + if (iss.eof()) { + break; + } + iss.ignore(32, ' '); + iss.getline(value, sizeof(value)); + LOG_DEBUG(LOG, "key %s value %s", key, value); + if (strcmp(key, "version:") == 0) { + unsigned long long v = strtoull(value, NULL, 0); + LOG_DEBUG(LOG, "version: %llu", v); + meta.version = v; + } + else if (strcmp(key, "atime:") == 0) { + unsigned long long v = strtoull(value, NULL, 0); + LOG_DEBUG(LOG, "atime: %llu", v); + meta.atime = v; + } + else if (strcmp(key, "mtime:") == 0) { + unsigned long long v = strtoull(value, NULL, 0); + LOG_DEBUG(LOG, "mtime: %llu", v); + meta.mtime = v; + } + else { + LOG_WARN(LOG, "decodeMetadata: path %s unknown key %s %s\n", + _path.c_str(), key, value); + } + } + LOG_DEBUG(LOG, "decodeMetadata done"); + } + /** + Flush data to the ZooKeeper node. + + If cached active data has been modified, flush it to the ZooKeeper node. + Returns -EIO if the data cannot be written because the cached active + data is not the expected version, i.e. ZooKeeper returns ZBADVERSION. + -EIO may also indicate a more general failure, such as unable to + communicate with ZooKeeper. + + \return 0 if successful, otherwise negative errno. + */ + int _flush() + { + LOG_DEBUG(LOG, "flush() path %s", _path.c_str()); + + int res = 0; + try { + if (_dirtyData) { + LOG_DEBUG(LOG, "is dirty, active version %d", + _activeStat.version); + _manager->getCommon().getZkAdapter()-> + setNodeData(_path, _activeData, _activeStat.version); + /* assumes version always increments by one if successful */ + _deleted = false; + _activeStat.version++; + _dirtyData = false; + res = 0; + } + else { + LOG_DEBUG(LOG, "not dirty"); + res = 0; + } + } catch (const ZooKeeperException & e) { + if (e.getZKErrorCode() == ZBADVERSION) { + LOG_ERROR(LOG, "flush %s bad version, was %d", + _path.c_str(), _activeStat.version); + res = -EIO; + } + else { + LOG_ERROR(LOG, "flush %s exception %s", + _path.c_str(), e.what()); + res = -EIO; + } + } + + LOG_DEBUG(LOG, "flush returns %d", res); + return res; + } + /** + Truncate or expand the size of the cached active data. + + This method only changes the size of the cached active data. + This change is committed to ZooKeeper when the cached data + is written to the ZooKeeper node by flush(). + + Return -EFBIG is the requested size exceeds the maximum. + + \return 0 if successful, otherwise negative errno. + \param size the requested size. + */ + int _truncate(off_t size) + { + LOG_DEBUG(LOG, "truncate(size %zu) path %s", size, _path.c_str()); + + int res = 0; + + if (!_isInitialized()) { + LOG_DEBUG(LOG, "not initialized"); + res = -EIO; + } + else if (size > _activeData.size()) { + if (size > maxDataFileSize) { + LOG_DEBUG(LOG, "size > maxDataFileSize"); + res = -EFBIG; + } else { + LOG_DEBUG(LOG, "increase to size"); + _activeData.insert(_activeData.begin() + + (size - _activeData.size()), 0); + _dirtyData = true; + res = 0; + } + } + else if (size < _activeData.size()) { + LOG_DEBUG(LOG, "decrease to size"); + _activeData.resize(size); + _dirtyData = true; + res = 0; + } + else { + LOG_DEBUG(LOG, "do nothing, same size"); + } + + LOG_DEBUG(LOG, "truncate returns %d", res); + return res; + } + /** + Remove a ZkFuse directory. + + If force is true, then the ZooKeeper node and its decendants + will be deleted. + + If force is false, then this method implements the semantics + of removing a ZkFuse directory. It will delete the ZooKeeper node + only if the ZooKeeper node have no data and no non-metadata + children. + - Return -ENOTDIR if the ZooKeeper node is not considered + to be a directory (after taking into consideration the specified + ZkFuseNameType). + - Return -ENOTEMPTY if the ZooKeeper node has data or it has + non-metadata children. + - Return -ENOENT if the ZooKeeper cannot be deleted, usually this + is because it does not exist. + + \return 0 if successful, otherwise negative errno. + \param nameType the ZkFuseNameType of the path used to specify the + directory to be removed. It influences whether ZkFuse + considers the ZooKeeper node to be a regular file or + directory. \see ZkFuseNameType + \param force set to true to bypass ZkFuse rmdir semantic check. + */ + int _rmdir(ZkFuseNameType nameType, bool force) + { + LOG_DEBUG(LOG, "rmdir(nameType %d, force %d) path %s", + int(nameType), force, _path.c_str()); + + int res = 0; + try { + if (!force && !_isDirNameType(nameType)) { + LOG_DEBUG(LOG, "failed because not directory"); + res = -ENOTDIR; + } + else if (!force && _hasData()) { + /* rmdir cannot occur if there non-empty "data file" */ + LOG_DEBUG(LOG, "failed because node has data"); + res = -ENOTEMPTY; + } + else if (!force && _hasChildrenExcludeMeta()) { + /* rmdir cannot occur if there are "subdirs" */ + LOG_DEBUG(LOG, "failed because node has children"); + res = -ENOTEMPTY; + } + else { + LOG_DEBUG(LOG, "delete node"); + bool deleted = _manager->getCommon().getZkAdapter()-> + deleteNode(_path, true); + if (deleted) { + _deleted = true; + _clearChildren(); + res = 0; + } else { + /* TODO: differentiate delete error conditions, + * e.g. access permission, not exists, ... ? + */ + LOG_DEBUG(LOG, "delete failed"); + res = -ENOENT; + } + } + } catch (const std::exception & e) { + LOG_ERROR(LOG, "rmdir %s exception %s", _path.c_str(), e.what()); + res = -EIO; + } + + LOG_DEBUG(LOG, "rmdir returns %d", res); + return res; + } + /** + Remove a ZkFuse regular file. + + This method implements the semantics of removing a ZkFuse regular file. + - If the ZkFuse regular file represents the data part of the + ZooKeeper node which is presented as a ZkFuse directory, + the regular file is virtually deleted by truncating the + ZooKeeper node's data. Readdir will not synthesize a regular + file entry for the data part of a ZooKeeper node if + the ZooKeeper node has no data. + - If the ZkFuse regular file represents the data part of the + ZooKeeper node which is presented as a ZkFuse regular file, + the ZooKeeper node and its decendants are deleted. + + Returns -EISDIR if the ZkFuse regular file cannot be deleted + because ZkFuse consider it to be a directory. + + \return 0 if successful, otherwise negative errno. + \param nameType the ZkFuseNameType of the path used to specify the + directory to be removed. It influences whether ZkFuse + considers the ZooKeeper node to be a regular file or + directory. \see ZkFuseNameType + */ + int _unlink(ZkFuseNameType nameType) + { + LOG_DEBUG(LOG, "unlink(nameType %d) path %s", + int(nameType), _path.c_str()); + + int res = 0; + switch (nameType) { + case ZkFuseNameRegType: + if (_isDir()) { + res = _truncate(0); + } else { + res = _rmdir(nameType, true); + } + break; + case ZkFuseNameDirType: + res = -EISDIR; + break; + case ZkFuseNameDefaultType: + default: + if (_isReg()) { + res = _rmdir(nameType, true); + } else { + res = -EISDIR; + } + break; + } + + LOG_DEBUG(LOG, "unlink returns %d", res); + return res; + } + /** + Whether cached children and data are valid. + + \return true if cached children and data are valid. + */ + bool _isInitialized() const + { + return _initializedChildren && _initializedData; + } + /** + Clear and invalidate cached children information. + */ + void _clearChildren() + { + _initializedChildren = false; + _children.clear(); + } + /** + Clear and invalidate cached data. + */ + void _clearData() + { + _initializedData = false; + _dirtyData = false; + _activeData.clear(); + _activeStat.clear(); + _latestData.clear(); + _latestStat.clear(); + } + /** + Whether the ZkFuseFile instance is a zombie. + + It is a zombie if it is not currently open, i.e. its reference count + is 0. + */ + bool _isZombie() const + { + return (_refCount == 0); + } + /** + Whether the ZkFuseFile instance is currently opened as a regular file + only once. + + It is used to determine when the cached data can be replaced with + the latest data. \see _activeData. + + \return true if its currently opened as a regular file only once. + */ + bool _isOnlyRegOpen() const + { + return ((_refCount - _openDirCount) == 1); + } + /** + Get attributes without accessing metadata. + + The atime and mtime returned does not take into consideration + overrides present in a matadata file. + + \return 0 if successful, otherwise negative errno. + \param stbuf return attributes here. + \param nameType specifies the ZkFuseNameType of the ZkFuse path used + to get attributes. It influences whether the directory + or regular file attributes are returned. + */ + int _getattrNoMetaAccess(struct stat & stbuf, ZkFuseNameType nameType) const + { + int res = 0; + if (_deleted) { + LOG_DEBUG(LOG, "deleted"); + res = -ENOENT; + } + else if (!_isInitialized()) { + LOG_DEBUG(LOG, "not initialized"); + res = -EIO; + } + else { + assert(_isInitialized()); + bool isRegular = _isRegNameType(nameType); + if (isRegular) { + LOG_DEBUG(LOG, "regular"); + stbuf.st_mode = regMode; + stbuf.st_nlink = 1; + stbuf.st_size = _activeData.size(); + } else { + LOG_DEBUG(LOG, "directory"); + stbuf.st_mode = dirMode; + stbuf.st_nlink = + _children.size() + (_activeData.empty() ? 0 : 1); + stbuf.st_size = stbuf.st_nlink; + } + stbuf.st_uid = _getUid(); + stbuf.st_gid = _getGid(); + /* IMPORTANT: + * Conversion to secs from millisecs must occur before + * assigning to st_atime, st_mtime, and st_ctime. Otherwise + * truncating from 64-bit to 32-bit will cause lost of + * most significant 32-bits before converting to secs. + */ + stbuf.st_atime = millisecsToSecs(_activeStat.mtime); + stbuf.st_mtime = millisecsToSecs(_activeStat.mtime); + stbuf.st_ctime = millisecsToSecs(_activeStat.ctime); + stbuf.st_blksize = _getBlkSize(); + stbuf.st_blocks = + (stbuf.st_size + stbuf.st_blksize - 1) / stbuf.st_blksize; + res = 0; + } + return res; + } + /** + Get the context that should be registered with the data and + children watches. + + The returned context is a pointer to the ZkFuseFile instance + cast to the desired ContextType. + + \return the context. + */ + ZooKeeperAdapter::ContextType _getZkContext() const + { + return (ZooKeeperAdapter::ContextType) NULL; + } + + /** + DataListener - listener that listens for ZooKeeper data events + and calls dataEventReceived on the ZkFuseFile instance + identified by the event context. + \see dataEventReceived + */ + class DataListener : public ZKEventListener { + public: + /** + Received a data event and invoke ZkFuseFile instance obtained from + event context to handle the event. + */ + virtual void eventReceived(const ZKEventSource & source, + const ZKWatcherEvent & event) + { + assert(event.getContext() != 0); + ZkFuseFile * file = static_cast<ZkFuseFile *>(event.getContext()); + file->dataEventReceived(event); + } + }; + + /** + DataListener - listener that listens for ZooKeeper children events + and calls childrenEventReceived on the ZkFuseFile instance + identified by the event context. + \see childrenEventReceived + */ + class ChildrenListener : public ZKEventListener { + public: + /** + Received a children event and invoke ZkFuseFile instance obtained from + event context to handle the event. + */ + virtual void eventReceived(const ZKEventSource & source, + const ZKWatcherEvent & event) + { + assert(event.getContext() != 0); + ZkFuseFile * file = static_cast<ZkFuseFile *>(event.getContext()); + file->childrenEventReceived(event); + } + }; + + /** + Globally shared DataListener. + */ + static DataListener _dataListener; + /** + Globally shared ChildrenListener. + */ + static ChildrenListener _childrenListener; + + public: + /** + Constructor. + + Sets reference count to one, i.e. it has been constructed because + a client is trying to open the path. \see _refCount. + Sets deleted to true. \see _deleted. + Sets number of currently directory opens to zero. \see _openDirCount. + Invalidate cach for children information and data. + + \param manager the ZkFuseHandleManager instance who is creating this + ZkFuseFile instance. + \param handle the handle assigned by the ZkFuseHandleManager instance + for this ZkFuseFile instance. + \param path the ZooKeeper path represented by this ZkFuseFile instance. + */ + ZkFuseFile(const ZkFuseHandleManager::SharedPtr & manager, + const int handle, + const std::string & path) + : _manager(manager), + _handle(handle), + _path(path), + _mutex(), + _refCount(1), + _deleted(true), + /* children stuff */ + _openDirCount(0), + _initializedChildren(false), + _hasChildrenListener(false), + _children(), + /* data stuff */ + _initializedData(false), + _hasDataListener(false), + _dirtyData(false), + _activeData(), + _activeStat(), + _latestData(), + _latestStat() + { + LOG_DEBUG(LOG, "constructor() path %s", _path.c_str()); + } + /** + Destructor. + */ + ~ZkFuseFile() + { + LOG_DEBUG(LOG, "destructor() path %s", _path.c_str()); + + assert(_isZombie()); + _clearChildren(); + _clearData(); + } + /** + Whether the ZooKeeper node represented by this ZkFuseFile instance + has been deleted. + \see _deleted + + \return true if it is deleted. + */ + bool isDeleted() const + { + AutoLock lock(_mutex); + return _deleted; + } + /** + Return the path of the ZooKeeper node represented by this ZkFuseFile + instance. + \see _path. + + \return the ZooKeeper node's path. + */ + const string & getPath() const + { + return _path; + } + /** + Add a childPath to the children information cache. + + \return 0 if successful, otherwise return negative errno. + \param childPath the ZooKeeper path of the child. + */ + int addChild(const std::string & childPath) + { + LOG_DEBUG(LOG, "addChild(childPath %s) path %s", + childPath.c_str(), _path.c_str()); + + int res = 0; + { + AutoLock lock(_mutex); + if (_initializedChildren) { + NodeNames::iterator it = + std::find(_children.begin(), _children.end(), childPath); + if (it == _children.end()) { + LOG_DEBUG(LOG, "child not found, adding child path"); + _children.push_back(childPath); + res = 0; + } + else { + LOG_DEBUG(LOG, "child found"); + res = -EEXIST; + } + } + } + + LOG_DEBUG(LOG, "addChild returns %d", res); + return res; + } + /** + Remove a childPath from the children information cache. + + \return 0 if successful, otherwise return negative errno. + \param childPath the ZooKeeper path of the child. + */ + int removeChild(const std::string & childPath) + { + LOG_DEBUG(LOG, "removeChild(childPath %s) path %s", + childPath.c_str(), _path.c_str()); + + int res = 0; + { + AutoLock lock(_mutex); + if (_initializedChildren) { + NodeNames::iterator it = + std::find(_children.begin(), _children.end(), childPath); + if (it != _children.end()) { + LOG_DEBUG(LOG, "child found"); + _children.erase(it); + res = 0; + } + else { + LOG_DEBUG(LOG, "child not found"); + res = -ENOENT; + } + } + } + + LOG_DEBUG(LOG, "removeChild returns %d", res); + return res; + } + /** + Invalidate the cached children information and cached data. + \see _clearChildren + \see _clearData + + \param clearChildren set to true to invalidate children information cache. + \param clearData set to true to invalidate data cache. + */ + void clear(bool clearChildren = true, bool clearData = true) + { + LOG_DEBUG(LOG, "clear(clearChildren %d, clearData %d) path %s", + clearChildren, clearData, _path.c_str()); + + { + AutoLock lock(_mutex); + if (clearChildren) { + _clearChildren(); + } + if (clearData) { + _clearData(); + } + } + } + /** + Whether reference count is zero. + \see _refCount + + \return true if reference count is zero. + */ + bool isZombie() const + { + AutoLock lock(_mutex); + + return (_refCount == 0); + } + /** + Increment the reference count of the ZkFuseFile instance. + + This method may be called by a ZkFuseFileManager instance while + holding the ZkFuseFileManager's _mutex. To avoid deadlocks, + this methods must never invoke a ZkFuseFileManager instance + directly or indirectly while holding the ZkFuseFile instance's + _mutex. + \see _refCount + + \return the post-increment reference count. + \param count value to increment the reference count by. + */ + int incRefCount(int count = 1) + { + LOG_DEBUG(LOG, "incRefCount(count %d) path %s", count, _path.c_str()); + + int res = 0; + { + AutoLock lock(_mutex); + _refCount += count; + assert(_refCount >= 0); + res = _refCount; + } + + LOG_DEBUG(LOG, "incRefCount returns %d", res); + return res; + } + /** + Decrement the reference count of the ZkFuseFile instance. + + This method may be called by a ZkFuseFileManager instance while + holding the ZkFuseFileManager's _mutex. To avoid deadlocks, + this methods must never invoke a ZkFuseFileManager instance + directly or indirectly while holding the ZkFuseFile instance's + _mutex. + \see _refCount + + \return the post-decrement reference count. + \param count value to decrement the reference count by. + */ + int decRefCount(int count = 1) + { + return incRefCount(-count); + } + /** + Increment the count of number times the ZkFuseFile instance has + been opened as a directory. + + This count is incremented by opendir and decremented by releasedir. + \see _openDirCount. + + \return the post-increment count. + \param count the value to increment the count by. + */ + int incOpenDirCount(int count = 1) + { + LOG_DEBUG(LOG, "incOpenDirCount(count %d) path %s", + count, _path.c_str()); + + int res = 0; + { + AutoLock lock(_mutex); + _openDirCount += count; + assert(_openDirCount >= 0); + res = _openDirCount; + assert(_openDirCount <= _refCount); + } + + LOG_DEBUG(LOG, "incOpenDirCount returns %d", res); + return res; + + } + /** + Decrement the count of number times the ZkFuseFile instance has + been opened as a directory. + + This count is incremented by opendir and decremented by releasedir. + \see _openDirCount. + + \return the post-decrement count. + \param count the value to decrement the count by. + */ + int decOpenDirCount(int count = 1) + { + return incOpenDirCount(-count); + } + /** + Whether ZkFuse should present the ZooKeeper node as a ZkFuse + directory by taking into account the specified ZkFuseNameType. + + The ZkFuseNameType may override the default ZkFuse presentation of + a ZooKeeper node. + \see _isDirNameType + + \return true if ZkFuse should present the ZooKeeper node as a ZkFuse + directory. + \param nameType specifies the ZkFuseNameType. + */ + bool isDirNameType(ZkFuseNameType nameType) const + { + return _isDirNameType(nameType, true); + } + /** + Whether ZkFuse should present the ZooKeeper node as a ZkFuse + regular file by taking into account the specified ZkFuseNameType. + + The ZkFuseNameType may override the default ZkFuse presentation of + a ZooKeeper node. + \see _isRegNameType + + \return true if ZkFuse should present the ZooKeeper node as a ZkFuse + regular file. + \param nameType specifies the ZkFuseNameType. + */ + bool isRegNameType(ZkFuseNameType nameType) const + { + return _isRegNameType(nameType, true); + } + /** + Get the active data. + \see _activeData + + \param data return data here. + */ + void getData(Data & data) const + { + AutoLock lock(_mutex); + + data = _activeData; + } + /** + Set the active data. + \see _activeData + + Return -EFBIG is the data to be written is bigger than the maximum + permitted size (and no data is written). + + \return 0 if successful, otherwise return negative errno. + \param data set to this data. + \param doFlush whether to flush the data to the ZooKeeper node. + */ + int setData(const Data & data, bool doFlush) + { + LOG_DEBUG(LOG, "setData(doFlush %d) path %s", doFlush, _path.c_str()); + int res = 0; + + if (data.size() > maxDataFileSize) { + res = -EFBIG; + } + else { + AutoLock lock(_mutex); + _activeData = data; + _dirtyData = true; + if (doFlush) { + res = _flush(); + } + } + + LOG_DEBUG(LOG, "setData() returns %d", res); + return res; + } + /** + Update the children information and the data caches as needed. + + This method is invoked when a ZkFuse regular file or directory + implemented by this ZkFuseFile instance is opened, e.g. + using open or opendir. It attempts to: + - make sure that the cache has valid children information + - register for watches for changes if no previous watches have + been registered. + + The newFile flag indicates if the ZkFuseFile instance has just + been constructed and that ZooKeeper has not been contacted to + determine if the ZooKeeper path for this file really exist. + When a ZkFuseFile instance is created, the _deleted flag is set to + true because it is safer to assume that the ZooKeeper node does + not exist. The newFile flag causes the _deleted flag to be + ignored and ZooKeeper to be contacted to update the caches. + + If the newFile flag is false, then the ZkFuseFile instance is + currently open and have been opened before. Hence, these previous + opens should have contacted ZooKeeper and would like learned from + ZooKeeper whether the ZooKeeper path exists. Therefore, + the _deleted flag should be trustworthy, i.e. it has accurate + information on whether the ZooKeeper path actually exists. + + \return 0 if successful, otherwise return negative errno. + \param newFile set to true if the ZkFuseFile instance is newly created. + */ + int update(bool newFile) + { + LOG_DEBUG(LOG, "update(newFile %d) path %s", newFile, _path.c_str()); + + int res = 0; + { + AutoLock lock(_mutex); + + /* At this point, cannot be zombie. + */ + assert(!_isZombie()); + if (!newFile && _deleted) { + /* Deleted file, don't bother to update caches */ + LOG_DEBUG(LOG, "deleted, not new file"); + res = -ENOENT; + } + else { + try { + LOG_DEBUG(LOG, "initialized children %d, data %d", + _initializedChildren, _initializedData); + LOG_DEBUG(LOG, "has children watch %d, data watch %d", + _hasChildrenListener, _hasDataListener); + /* + * Children handling starts here. + * If don't have children listener, + * then must establish listener. + * If don't have cached children information, + * then must get children information. + * It just happens, that the same ZooKeeper API + * is used for both. + */ + if (_initializedChildren == false || + _hasChildrenListener == false +#ifdef ZOOKEEPER_ROOT_CHILDREN_WATCH_BUG + /* HACK for root node because changes to children + * on a root node does not cause children watches to + * fire. + */ + || _path.length() == 1 +#endif // ZOOKEEPER_ROOT_CHILDREN_WATCH_BUG + ) { + LOG_DEBUG(LOG, "update children"); + NodeNames children; + _manager->getCommon().getZkAdapter()-> + getNodeChildren( children, _path, + &_childrenListener, _getZkContext()); + _hasChildrenListener = true; + LOG_DEBUG(LOG, "update children done"); + _children.swap(children); + _initializedChildren = true; + /* Since getNodeChildren is successful, the + * path must exist */ + _deleted = false; + } + else { + /* Children information is fresh since + * it is initialized and and have been + * updated by listener. + */ + } + /* + * Data handling starts here. + */ + assert(newFile == false || _isOnlyRegOpen()); + if (!_isOnlyRegOpen()) { + /* If is already currently opened by someone, + * then don't update data with latest from ZooKeeper, + * use current active data (which may be initialized + * or not). + * \see _activeData + */ + LOG_DEBUG(LOG, "node currently in-use, no data update"); + } + else { + /* If not opened/reopened by someone else, + * then perform more comprehensive checks of + * to make data and listener is setup correctly. + * If don't have data listener, + * then must establish listener. + * If don't have cached data, + * then must get data. + * It just happens, that the same ZooKeeper API + * is used for both. + */ + LOG_DEBUG(LOG, "node first use or reuse"); + if (_initializedData == false || + _hasDataListener == false) { + /* Don't have any data for now or need to register + * for callback */ + LOG_DEBUG(LOG, "update data"); + _latestData = + _manager->getCommon().getZkAdapter()-> + getNodeData(_path, &_dataListener, + _getZkContext(), + &_latestStat); + _hasDataListener = true; + LOG_DEBUG(LOG, + "update data done, latest version %d", + _latestStat.version); + /* Since getNodeData is successful, the + * path must exist. */ + _deleted = false; + } + else { + /* Data is fresh since it is initialized and + * and have been updated by listener. + */ + } + /* Update active data to the same as the most + * recently acquire data. + */ + _activeData = _latestData; + _activeStat = _latestStat; + _initializedData = true; + _dirtyData = false; + LOG_DEBUG(LOG, "update set active version %d", + _activeStat.version); + } + res = 0; + } catch (const ZooKeeperException & e) { + /* May have ZNONODE exception if path does exist. */ + if (e.getZKErrorCode() == ZNONODE) { + LOG_DEBUG(LOG, "update %s exception %s", + _path.c_str(), e.what()); + /* Path does not exist, set _deleted, + * clear children information cache + */ + _deleted = true; + _clearChildren(); + res = -ENOENT; + } else { + LOG_ERROR(LOG, "update %s exception %s", + _path.c_str(), e.what()); + res = -EIO; + } + } + } + } + + LOG_DEBUG(LOG, "update returns %d", res); + return res; + } + /** + Process a data event. + + This method may: + - Invalidate the data cache. + - Invoke ZooKeeper to update the data cache and register a new + data watch so that the cache can be kept in-sync with the + ZooKeeper node's data. + + This method does not change the active data. Active data will be + changed to a later version by update() at the appropriate time. + \see update. + */ + void dataEventReceived(const ZKWatcherEvent & event) + { + bool reclaim = false; + int eventType = event.getType(); + int eventState = event.getState(); + + /* + IMPORTANT: + + Do not mark ZkFuseFile instance as deleted when a ZOO_DELETED_EVENT + is received without checking with ZooKeeper. An example of + problematic sequence would be: + + 1. Create node. + 2. Set data and watch. + 3. Delete node. + 4. Create node. + 5. Deleted event received. + + It is a bug to mark the ZkFuseFile instance as deleted after + step 5 because the node exists. + + Therefore, this method should always contact ZooKeeper to keep the + data cache (and deleted status) up-to-date if necessary. + */ + LOG_DEBUG(LOG, "dataEventReceived() path %s, type %d, state %d", + _path.c_str(), eventType, eventState); + { + AutoLock lock(_mutex); + + _hasDataListener = false; + /* If zombie, then invalidate cached data. + * This clears _initializedData and eliminate + * the need to get the latest data from ZooKeeper and + * re-register data watch. + */ + if (_isZombie() && _initializedData) { + LOG_DEBUG(LOG, "invalidate data"); + _clearData(); + } + else if ((_refCount - _openDirCount) > 0) { + /* Don't invalidate cached data because clients of currently + * open files don't expect the data to change from under them. + * If data acted upon by these clients have become stale, + * then the clients will get an error when ZkFuse attempts to + * flush dirty data. The clients will not get error + * notification if they don't modify the stale data. + * + * If data cache is cleared here, then the following code + * to update data cache and re-register data watch will not + * be executed and may result in the cached data being + * out-of-sync with ZooKeeper. + */ + LOG_WARN(LOG, + "%s data has changed while in-use, " + "type %d, state %d, refCount %d", + _path.c_str(), eventType, eventState, _refCount); + } + /* If cache was valid and still connected + * then get the latest data from ZooKeeper + * and re-register data watch. This is required to keep + * the data cache in-sync with ZooKeeper. + */ + if (_initializedData && + eventState == ZOO_CONNECTED_STATE + ) { + try { + LOG_DEBUG(LOG, "register data watcher"); + _latestData = + _manager->getCommon().getZkAdapter()-> + getNodeData(_path, &_dataListener, _getZkContext(), + &_latestStat); + _hasDataListener = true; + LOG_DEBUG(LOG, + "get data done, version %u, cversion %u done", + _latestStat.version, _latestStat.cversion); + _deleted = false; + } catch (const ZooKeeperException & e) { + if (e.getZKErrorCode() == ZNONODE) { + _deleted = true; + _clearChildren(); + } + LOG_ERROR(LOG, "dataEventReceived %s exception %s", + _path.c_str(), e.what()); + } + } + } + LOG_DEBUG(LOG, "dataEventReceived return %d", reclaim); + } + /** + Process a children event. + + This method may: + - Invalidate the children information cache. + - Invoke ZooKeeper to update the children cache and register a new + data watch so that the cache can be kept in-sync with the + ZooKeeper node's children information. + */ + void childrenEventReceived(const ZKWatcherEvent & event) + { + bool reclaim = false; + int eventType = event.getType(); + int eventState = event.getState(); + + LOG_DEBUG(LOG, "childrenEventReceived() path %s, type %d, state %d", + _path.c_str(), eventType, eventState); + { + AutoLock lock(_mutex); + + _hasChildrenListener = false; + /* If zombie or disconnected, then invalidate cached children + * information. This clears _initializedChildren and eliminate + * the need to get the latest children information and + * re-register children watch. + */ + if (_initializedChildren && + (_isZombie() || eventState != ZOO_CONNECTED_STATE)) { + LOG_DEBUG(LOG, "invalidate children"); + _clearChildren(); + } + else if (_initializedChildren) { + /* Keep cached children information so that we have some + * children information if get new children information + * fails. If there is failure, then on next open, + * update() will attempt again to get children information + * again because _hasChildrenListener will be false. + * + * If children information cache is cleared here, then + * the following code to update children information cache + * and re-register children watch will not be executed + * and may result in the cached children information being + * out-of-sync with ZooKeeper. + * + * The children cache will be cleared if unable to + * get children and re-establish watch. + */ + LOG_WARN(LOG, + "%s children has changed while in-use, " + "type %d, state %d, refCount %d", + _path.c_str(), eventType, eventState, _refCount); + } + /* If children cache was valid and still connected, + * then get the latest children information from ZooKeeper + * and re-register children watch. This is required to + * keep the children information cache in-sync with ZooKeeper. + */ + if (_initializedChildren && + eventState == ZOO_CONNECTED_STATE + ) { + /* Should try to keep the cache in-sync, register call + * callback again and get current children. + */ + try { + LOG_DEBUG(LOG, "update children"); + NodeNames children; + _manager->getCommon().getZkAdapter()-> + getNodeChildren(children, _path, + &_childrenListener, _getZkContext()); + _hasChildrenListener = true; + LOG_DEBUG(LOG, "update children done"); + _children.swap(children); + _deleted = false; + } catch (const ZooKeeperException & e) { + if (e.getZKErrorCode() == ZNONODE) { + _deleted = true; + _clearChildren(); + } + LOG_ERROR(LOG, "childrenEventReceived %s exception %s", + _path.c_str(), e.what()); + _children.clear(); + } + } + } + LOG_DEBUG(LOG, "childrenEventReceived returns %d", reclaim); + } + /** + Truncate or expand the size of the cached active data. + + This method only changes the size of the cached active data. + This change is committed to ZooKeeper when the cached data + is written to the ZooKeeper node by flush(). + + Return -EFBIG is the requested size exceeds the maximum. + + \return 0 if successful, otherwise negative errno. + \param size the requested size. + */ + int truncate(off_t size) + { + int res = 0; + + { + AutoLock lock(_mutex); + res = _truncate(size); + } + + return res; + } + /** + Copy range of active data into specified output buffer. + + \return if successful, return number of bytes copied, otherwise + return negative errno. + \param buf address of the output buffer. + \param size size of the output buffer and desired number of bytes to copy. + \param offset offset into active data to start copying from. + */ + int read(char *buf, size_t size, off_t offset) const + { + LOG_DEBUG(LOG, "read(size %zu, off_t %zu) path %s", + size, offset, _path.c_str()); + + int res = 0; + + { + AutoLock lock(_mutex); + if (!_initializedData) { + LOG_DEBUG(LOG, "not initialized"); + res = -EIO; + } + else { + off_t fileSize = _activeData.size(); + if (offset > fileSize) { + LOG_DEBUG(LOG, "offset > fileSize %zu", fileSize); + res = 0; + } + else { + if (offset + size > fileSize) { + size = fileSize - offset; + LOG_DEBUG(LOG, + "reducing read size to %zu for fileSize %zu", + size, fileSize); + } + copy(_activeData.begin() + offset, + _activeData.begin() + offset + size, + buf); + res = size; + } + } + } + + LOG_DEBUG(LOG, "read returns %d", res); + return res; + } + /** + Copy buffer content to active data. + + \return if successful, return number of bytes copied, otherwise + return negative errno. + \param buf address of the buffer. + \param size size
<TRUNCATED>
