[
https://issues.apache.org/jira/browse/HDFS-2685?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13215424#comment-13215424
]
Mitesh Singh Jat commented on HDFS-2685:
----------------------------------------
As a workaround, I have created a C program to resolve the globbing, and return
appropriate exit code.
{code:title=hadoop_fs_ls_globbing.c}
//-------------------------------------------------------------
/*
* Filename: hadoop_fs_ls_globbing.c
* Description: TO support globbing in `hadoop fs -ls` command
* Created: 01/11/2012 09:39:55 AM (IST)
* Author: Mitesh Singh Jat (), mitesh_singh_jat[at]yahoo[dot]co[dot]in
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define CMD_LEN 4096 // command line length
#define PATH_LEN 256 // splits in path based on delimiters
#define MAX_PATHS 256 // Maximum resolved paths in globbed path
int print_usage(char *cmd_name) {
fprintf(stderr, "Usage: %s [-c <hadoop_commands>] path\n",
cmd_name);
exit(EXIT_FAILURE);
}
int main(int argc, char *argv[])
{
char *str1, *str2, *token, *subtoken;
char *saveptr1, *saveptr2;
int i, j, k;
char cmds[MAX_PATHS][CMD_LEN];
char *a[MAX_PATHS][PATH_LEN];
int total_paths = 1;
int nt = 0, nst = 0;
int retval = 0;
int exit_code = 0;
char cmd[CMD_LEN] = "hadoop fs -ls ";
char delim[] = "{}";
char subdelim[] = ",";
int nsts[PATH_LEN];
int xpaths;
for (i = 0; i < MAX_PATHS; ++i) {
strcpy(cmds[i], "");
for (j = 0; j < PATH_LEN; ++j)
a[i][j] = (char *) NULL;
}
if (argc < 2 || (argc > 2 && argc < 4))
print_usage(argv[0]);
if (argc > 2) {
if (strncmp(argv[1], "-c", 2))
print_usage(argv[0]);
strncpy(cmd, argv[2], CMD_LEN);
strncat(cmd, " ", CMD_LEN);
argv += 2;
}
a[0][0] = cmd;
nsts[0] = 1;
for (j = 1, str1 = argv[1], nt = 0; ; j++, str1 = NULL, ++nt) {
token = strtok_r(str1, delim, &saveptr1);
if (token == NULL)
break;
//printf("%d: %s\n", j, token);
for (str2 = token, nst = 0; ; str2 = NULL, ++nst) {
subtoken = strtok_r(str2, subdelim, &saveptr2);
if (subtoken == NULL)
break;
//printf(" --> %s\n", subtoken);
a[nt + 1][nst] = subtoken;
}
total_paths *= nst;
if (total_paths > MAX_PATHS) {
fprintf(stderr, "%s: Total generated paths %d > %d\n",
argv[0], total_paths, MAX_PATHS);
exit(EXIT_FAILURE);
}
nsts[nt + 1] = nst;
}
xpaths = total_paths;
for (i = 0; i < nt + 1; ++i) {
nst = nsts[i];
if (nst == 1) {
for (k = 0; k < total_paths; ++k)
strncat(cmds[k], a[i][0], CMD_LEN);
}
else {
xpaths = xpaths / nst;
for (k = 0; k < total_paths; ++k) {
j = (k / xpaths) % nst;
strncat(cmds[k], a[i][j], CMD_LEN);
}
}
}
for (i = 0; i < total_paths; ++i) {
printf("Running: %s\n", cmds[i]);
retval = system(cmds[i]); // hadoop returns 0 - 255
if (retval != 0) {
exit_code = ~(retval & 0x000000FF); // Get exit code as LSB
// remove comment for not to check remaining
//exit(exit_code);
}
}
return (exit_code);
} /* main */
//-------------------------------------------------------------
{code}
Compilation on Linux/Unix
{noformat}
$ gcc -Wall -o hadoop_fs_ls_globbing hadoop_fs_ls_globbing.c
{noformat}
Now the above c program supports custom hadoop command (with option flag -c),
for example
"hadoop fs -ls"
"hadoop fs -test -e"
{noformat}
$ ./hadoop_fs_ls_globbing -c "hadoop fs -test -e" "input/20110{4,1}/{A,B}";
echo $?
Running: hadoop fs -test -e input/201104/A
Running: hadoop fs -test -e input/201104/B
Running: hadoop fs -test -e input/201101/A
Running: hadoop fs -test -e input/201101/B
255 <*****
$ ./hadoop_fs_ls_globbing -c "hadoop fs -ls" "input/20110{4,1}/{A,B}"; echo $?
Running: hadoop fs -ls input/201104/A
ls: Cannot access input/201104/A: No such file or directory.
Running: hadoop fs -ls input/201104/B
ls: Cannot access input/201104/B: No such file or directory.
Running: hadoop fs -ls input/201101/A
Found 2 items
drwxr-xr-x - mitesh supergroup 0 2011-12-15 11:51
/user/mitesh/input/201101/A/1
drwxr-xr-x - mitesh supergroup 0 2011-12-15 11:51
/user/mitesh/input/201101/A/2
Running: hadoop fs -ls input/201101/B
Found 2 items
drwxr-xr-x - mitesh supergroup 0 2011-12-15 11:51
/user/mitesh/input/201101/B/1
drwxr-xr-x - mitesh supergroup 0 2011-12-15 11:51
/user/mitesh/input/201101/B/2
255 <*****
{noformat}
> hadoop fs -ls globbing gives inconsistent exit code
> ---------------------------------------------------
>
> Key: HDFS-2685
> URL: https://issues.apache.org/jira/browse/HDFS-2685
> Project: Hadoop HDFS
> Issue Type: Bug
> Affects Versions: 0.20.2, 0.20.204.0, 0.20.205.0
> Reporter: Mitesh Singh Jat
>
> _hadoop fs -ls_ command gives exit code for globbed input path, which is the
> exit code for the last resolved absolute path. Whereas _ls_ command always
> give same exit code regardless of position of non-existent path in globbing.
> {code}$ hadoop fs -mkdir input/20110{1,2,3}/{A,B,C,D}/{1,2} {code}
> Since directory 'input/201104/' is not present, the following command gives
> 255 as exit code.
> {code}$ hadoop fs -ls input/20110{1,2,3,4}/ ; echo $? {code}
> {noformat}
> Found 4 items
> drwxr-xr-x - mitesh supergroup 0 2011-12-15 11:51
> /user/mitesh/input/201101/A
> drwxr-xr-x - mitesh supergroup 0 2011-12-15 11:51
> /user/mitesh/input/201101/B
> drwxr-xr-x - mitesh supergroup 0 2011-12-15 11:51
> /user/mitesh/input/201101/C
> drwxr-xr-x - mitesh supergroup 0 2011-12-15 11:51
> /user/mitesh/input/201101/D
> Found 4 items
> drwxr-xr-x - mitesh supergroup 0 2011-12-15 11:51
> /user/mitesh/input/201102/A
> drwxr-xr-x - mitesh supergroup 0 2011-12-15 11:51
> /user/mitesh/input/201102/B
> drwxr-xr-x - mitesh supergroup 0 2011-12-15 11:51
> /user/mitesh/input/201102/C
> drwxr-xr-x - mitesh supergroup 0 2011-12-15 11:51
> /user/mitesh/input/201102/D
> Found 4 items
> drwxr-xr-x - mitesh supergroup 0 2011-12-15 11:51
> /user/mitesh/input/201103/A
> drwxr-xr-x - mitesh supergroup 0 2011-12-15 11:51
> /user/mitesh/input/201103/B
> drwxr-xr-x - mitesh supergroup 0 2011-12-15 11:51
> /user/mitesh/input/201103/C
> drwxr-xr-x - mitesh supergroup 0 2011-12-15 11:51
> /user/mitesh/input/201103/D
> ls: Cannot access input/201104/: No such file or directory.
> 255
> {noformat}
> The directory 'input/201104/' is not present but given as second last
> parameter in globbing.
> The following command gives 0 as exit code, because directory 'input/201103/'
> is present.
> {code}$ hadoop fs -ls input/20110{1,2,4,3}/ ; echo $? {code}
> {noformat}
> Found 4 items
> drwxr-xr-x - mitesh supergroup 0 2011-12-15 11:51
> /user/mitesh/input/201101/A
> drwxr-xr-x - mitesh supergroup 0 2011-12-15 11:51
> /user/mitesh/input/201101/B
> drwxr-xr-x - mitesh supergroup 0 2011-12-15 11:51
> /user/mitesh/input/201101/C
> drwxr-xr-x - mitesh supergroup 0 2011-12-15 11:51
> /user/mitesh/input/201101/D
> Found 4 items
> drwxr-xr-x - mitesh supergroup 0 2011-12-15 11:51
> /user/mitesh/input/201102/A
> drwxr-xr-x - mitesh supergroup 0 2011-12-15 11:51
> /user/mitesh/input/201102/B
> drwxr-xr-x - mitesh supergroup 0 2011-12-15 11:51
> /user/mitesh/input/201102/C
> drwxr-xr-x - mitesh supergroup 0 2011-12-15 11:51
> /user/mitesh/input/201102/D
> ls: Cannot access input/201104/: No such file or directory.
> Found 4 items
> drwxr-xr-x - mitesh supergroup 0 2011-12-15 11:51
> /user/mitesh/input/201103/A
> drwxr-xr-x - mitesh supergroup 0 2011-12-15 11:51
> /user/mitesh/input/201103/B
> drwxr-xr-x - mitesh supergroup 0 2011-12-15 11:51
> /user/mitesh/input/201103/C
> drwxr-xr-x - mitesh supergroup 0 2011-12-15 11:51
> /user/mitesh/input/201103/D
> 0
> {noformat}
> Whereas, on Linux, ls command gives non-zero(2) as exit code, irrespective of
> position of non-existent path in globbing.
> {code}$ mkdir -p input/20110{1,2,3,4}/{A,B,C,D}/{1,2} {code}
> {code}$ ls input/20110{1,2,4,3}/ ; echo $? {code}
> {noformat}
> /bin/ls: input/201104/: No such file or directory
> input/201101/:
> ./ ../ A/ B/ C/ D/
> input/201102/:
> ./ ../ A/ B/ C/ D/
> input/201103/:
> ./ ../ A/ B/ C/ D/
> 2
> {noformat}
> {code}$ ls input/20110{1,2,3,4}/ ; echo $? {code}
> {noformat}
> /bin/ls: input/201104/: No such file or directory
> input/201101/:
> ./ ../ A/ B/ C/ D/
> input/201102/:
> ./ ../ A/ B/ C/ D/
> input/201103/:
> ./ ../ A/ B/ C/ D/
> 2
> {noformat}
--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators:
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira