Hi All,
Attached is a draft of the C API specification that some of us (in Yahoo)
have been thinking about. The specification is closely tied to the API
exported by Hadoop's FileSystem class.
Will really appreciate any comments, etc. on the specification.
Thanks,
Devaraj.
#ifndef DFS_H
#define DFS_H
#include <stdio.h>
#include <stdint.h>
#include <time.h>
#include <errno.h>
/** All APIs set errno to meaningful values */
extern "C"
{
typedef int32_t tSize; /// size of data for read/write io ops
typedef time_t tTime; /// time type
typedef int64_t tOffset;/// offset within the file
typedef uint16_t tPort; /// port
typedef enum tObjectKind {
kObjectKindFile = 'F',
kObjectKindDirectory = 'D'
};
typedef void *dfsFile;
typedef void *dfsFS;
/** dfsConnect - connect to a dfs file system
* host is a string containing either a host name, or an ip adderss
* of the namenode of a dfs cluster.
* port is the port on which the server is listening.
* host must be passed as null if you want to connect to the
* local file system. host must be passed as "default" and port as 0
* if the configured file system (in hadoop-site/hadoop-default.xml)
* is to be used
*/
dfsFS dfsConnect(char *host, tPort port);
/** dfsOpenFile - open a dfs file.
* path is a full path to the file.
* flags is either O_RDONLY or O_WRONLY, for read-only or write-only
* options is an XML'ized string containing options like buffersize,
* replication level, etc.
* e.g. options: <replication>2</replication><buffersize>8192</buffersize>
*/
dfsFile dfsOpenFile(dfsFS fs, char *path, int flags, char *options);
/** close a file */
void dfsCloseFile(dfsFS fs, dfsFile file);
/** seek to given offset in file. This works only for files
* opened in read-only mode
*/
void dfsSeek(dfsFs fs, dfsFile f, tOffset offset, int whence);
/** get the current offset in the file, in bytes */
tOffset dfsTell(dfsFs fs, dfsFile f);
/** read returns the number of bytes actually read, possibly less than
* the number of bytes requested.
*/
tSize dfsRead(dfsFs fs, dfsFile f, void* buffer, tSize length);
/** write length bytes of buffer to f */
tSize dfsWrite(dfsFs fs, dfsFile f, const void* buffer, tSize length);
/** flush the data */
void dfsFlush(dfsFs fs, dfsFile f);
/** check whether the file pointer is at end-of-file */
bool dfsIsEof(dfsFs fs, dfsFile f);
/** delete a file. */
void dfsDelete(dfsFS fs, char *name);
/** rename a file
* old_name and new_name could be absolute or relative to the current
* working directory
*/
void dfsRename(dfsFS fs, char *old_name, char *new_name);
/** Make the given file and all non-existent parents into directories */
void dfsCreateDirectory(dfsFS fs, char *name);
/**
* dfsFileLocationInfo
* used to get the mapping between file blocks and the hostnames where
* they are stored. Due to replication, a file block could be stored on
* multiple hosts.
*/
typedef struct {
char **hostname;
int numHosts;
} dfsFileLocationInfo;
/**
* dfsStat
* used for getting information about a file/directory
*/
typedef struct {
tObjectKind mKind; /** file or directory */
char *mName; /* the name of the file */
tTime mCreationTime;
dfsFileLocationInfo *fileLocationInfo; /*the last element
in the array is NULL*/
long mSize; /*the size of the file in bytes */
bool replicated; /*whether this file is replicated */
} dfsFileInfo;
/** return information about a path as a (dynamically allocated) array
* of dfsFileInfo.
* numEntries is set to the number of elements in the array.
* If the path happens to be a file, the array will have just one element.
* If the path happens to be a directory, the dfsFileInfo elements in the
* array will contain information about the files/sub-dirs within the path.
* NULL is returned if the path does not exist or some other error is
* encountered. freeDfsFileInfo should be called passing the array and
* numEntries when it is no longer needed.
*/
dfsFileInfo *dfsGetPathInfo(dfsFS fs, char *path, int *numEntries);
/** free up the dfsFileInfo array (including the fields */
void freeDfsFileInfo(dfsFileInfo *dfsFileInfo, int numEntries);
/** Obtain a lock on the file. Return -1 on error*/
int dfsLock(dfsFs fs, char *path, int shared);
/** Release the lock. Return -1 on error*/
int dfsReleaseLock(dfsFs fs, char *path);
/** Set the working directory. All relative paths will be resolved relative
to it */
void dfsSetWorkingDirectory(dfsFs fs, char *path);
/** Get the current working directory for the given file system*/
char *dfsGetWorkingDirectory(dfsFs fs);
/** The src file is on the local disk.
* Add it to FS at the given dst name and the source is kept
* intact afterwards. Returns -1 on error.
*/
int dfsCopyFromLocalFile(dfsFs fs, char *src, char *dst);
/** The src file is under DFS, and the dst is on the local disk.
* Copy it from FS control to the local dst name. Returns -1 on error.
*/
int dfsCopyToLocalFile(dfsFs fs, char *src, char *dst);
/** The src file is on the local disk. Add it to FS at the given dst name,
* removing the source afterwards. Returns -1 on error.
*/
int dfsMoveFromLocalFile(dfsFs fs, char *src, char *dst);
/** Return the raw capacity of the filesystem */
long dfsGetCapacity(dfsFs fs);
/** Return the total raw size of all files in the filesystem.*/
long dfsGetUsed(dfsFs fs);
}
#endif /*DFS_H*/