On Wednesday 09 July 2008 18:01:17 Tito wrote:
> On Wednesday 09 July 2008 14:35:50 Rob Landley wrote:
> > On Tuesday 08 July 2008 16:40:14 Vladimir Dronnikov wrote:
> > > Cool!
> > >
> > > But we need to not just process continuations and comments but also to
> > > parse the line into some (possibly variable) amount of
> > > whitespaces-delimited tokens.
> > 
> > There's no obvious reason to mix these two issues.  A function that reads 
> > comments and continuations and returns the next string from the file is 
> > useful by itself, and you can feed the output of that into something that 
> > tokenizes it.
> > 
> > You may need different tokenizers if you have different file formats.  (If 
> > you 
> > ever wind up caring about bind zone files you get into multiple nested 
> > contexts.  Heck, even  something like "/etc/sudoers" is actually a bit more 
> > complicated than it looks at first glance.)
> > 
> > Rob
> 
> 
> Hi,
> yes this is also my opinion:
> parsing the config file in the sense to rip out the non option stuff
> and pass only the options to the application is one thing
> that could be done in a common libbb way supporting the various cases:
> 1) comments
> 2) indented comments
> 3) end of line comments
> 4) multiline comments
> 5) options
> 6) indented options
> 7) multiline options
> 8) empty lines
> 9) empty config files
> 10) missing config files
> 
> Trying to parse the contents of the options is not suitable to be done
> in a common way as only the application knows:
> 
> 1) the keywords
> 2) the separator between keyword and option value (SHELL=/bin/bash or PIPPO: 
> PLUTO or SHELL = /bin/bash er PIPPO : PLUTO etc)
> 3) the number of fields in non-keyworded options:
>       57 0 * * 0 root [ -x /usr/share/mdadm/checkarray ] && [ $(date +\%d) 
> -le 7 ] && /usr/share/mdadm/checkarray --cron --all --quiet
>       this for example is difficult to parse by whitespace or any other 
> separator if you don't know the number of fields to expect
> 4) the type of values to expect in the fields so that they could be 
> eventually sanitized
> 
> I can imagine a flow of events like this if we don't try to make one big 
> monster function but break the problem
> in more little pieces:
> 
> 1) parse_config() readies the config file in a way suitable (linked list)  to 
> be passed to the app
> 2) char * parse_named_options(const char *name, char separator, llist_t 
> *list) parses option names and returns the values found for them 
>     (removes them from the linked list and frees memory)
> 3)   parse_option_lines(int fields, ...)  parses the rest in the desired 
> number of fields and passes the values to the desired number of varargs
>     (removes them from the linked list and frees memory)
> 
> For example:
> 
> typedef struct crontab_t {
>         char *min;
>         char *hour;
>         char *day;
>         char *month;
>         char *day_of_week;
>         char *cmd;
>       struct crontab_t *link;
> } crontab_t;
> 
> int main(int argc, char**argv)
> {
> 
>       char *shell;
>         char *path;
>       list_t *option_list;
>       crontab_t *old_head = NULL;
>       crontab_t *new_head = NULL;
> 
>       option_list = parse_config("/etc/crontab");
>       shell = parse_named_option("SHELL", '=', &option_list);
>       path = parse_named_option("PATH", '=', &option_list);
>       while (option_list) {
>               crontab_t *new_head = xmalloc(sizeof(crontab_t));
>               parse_option_lines(7, &new_head->min, &new_head->hour, 
> &new_head->day, &new_head->month, &new_head->day_of_week, &new_head->cmd);
>               new_head->link = *old_head;
>               *old_head = new_head;
>       }
>       free(option_list);
> }
> 
> This obviously is only pseudo code not tested nor compile tested, just an 
> idea.
> Comments, hints, critics are welcome....
> 
> 
> Ciao,
> Tito

Hi,
attached is a proof of concept test program that uses the new functions:

void parse_option_lines(char *data, int fields, ...)
char * parse_named_option(const char *option_name, char separator, llist_t 
**option_list)
llist_t *parse_config(const char *filename)

and a config file to test it.

Usage is  ./test crontab
At the moment only his config file could be used as test as some values are 
hard coded.

It is ugly code but maybe with the help of the list members it could be 
improved.
Hints, critics and  help are welcome.

Ciao,
Tito 
# /etc/crontab: system-wide crontab
# Unlike any other crontab you don't have to run the `crontab'
# command to install the new version when you edit this file
# and files in /etc/cron.d. These files also have username fields,
# that none of the other crontabs do.

SHELL= /bin/sh
PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin

# m h dom mon dow user  command
17 *    * * *   root    cd / && run-parts --report /etc/cron.hourly
25 6    * * *   root    test -x /usr/sbin/anacron || ( cd / && run-parts 
--report /etc/cron.daily )
47 6    * * 7   root    test -x /usr/sbin/anacron || ( cd / && run-parts 
--report /etc/cron.weekly )
52 6    1 * *   root    test -x /usr/sbin/anacron || ( cd / && run-parts 
--report /etc/cron.monthly )
#


#define _GNU_SOURCE
#include <ctype.h>
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <netdb.h>
#include <setjmp.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <stddef.h>
#include <string.h>
#include <sys/poll.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <termios.h>
#include <time.h>
#include <unistd.h>
#include <utime.h>
/* Try to pull in PATH_MAX */
#include <limits.h>
#include <sys/param.h>

typedef struct llist_t {
	char *data;
	struct llist_t *link;
} llist_t;


char* last_char_is(const char *s, int c)
{
	if (s && *s) {
		size_t sz = strlen(s) - 1;
		s += sz;
		//printf("last_char_is: %s\n", s);
		if ((unsigned char)*s == c)
			return (char*)s;
	}
	return NULL;
}


char* skip_whitespace(const char *s)
{
	/* NB: isspace('\0') returns 0 */
	while (isspace(*s)) ++s;

	return (char *) s;
}

void* xrealloc(void *ptr, size_t size)
{
	ptr = realloc(ptr, size);
	if (ptr == NULL && size != 0)
		exit(1);
	return ptr;
}

char* bb_get_chunk_from_file(FILE *file, int *end)
{
	int ch;
	int idx = 0;
	char *linebuf = NULL;
	int linebufsz = 0;

	while ((ch = getc(file)) != EOF) {
		/* grow the line buffer as necessary */
		if (idx >= linebufsz) {
			linebufsz += 80;
			linebuf = xrealloc(linebuf, linebufsz);
		}
		linebuf[idx++] = (char) ch;
		if (!ch || (end && ch == '\n'))
			break;
	}
	if (end)
		*end = idx;
	if (linebuf) {
		// huh, does fgets discard prior data on error like this?
		// I don't think so....
		//if (ferror(file)) {
		//	free(linebuf);
		//	return NULL;
		//}
		linebuf = xrealloc(linebuf, idx + 1);
		linebuf[idx] = '\0';
	}
	return linebuf;
}

/* Get line.  Remove trailing \n */
char* xmalloc_fgetline(FILE *file)
{
	int i;
	char *c = bb_get_chunk_from_file(file, &i);

	if (i && c[--i] == '\n')
		c[i] = '\0';

	return c;
}

void* xmalloc(size_t size)
{
	void *ptr = malloc(size);
	if (ptr == NULL && size != 0)
		exit(1);
	return ptr;
}


char* xasprintf(const char *format, ...)
{
	va_list p;
	int r;
	char *string_ptr;

	// GNU extension
	va_start(p, format);
	r = vasprintf(&string_ptr, format, p);
	va_end(p);

	if (r < 0)
		exit(1);
	return string_ptr;
}

/* Add data to the end of the linked list.  */
void llist_add_to_end(llist_t **list_head, void *data)
{
	llist_t *new_item = xmalloc(sizeof(llist_t));

	new_item->data = data;
	new_item->link = NULL;

	if (!*list_head)
		*list_head = new_item;
	else {
		llist_t *tail = *list_head;

		while (tail->link)
			tail = tail->link;
		tail->link = new_item;
	}
}

/* Remove first element from the list and return it */
void* llist_pop(llist_t **head)
{
	void *data, *next;

	if (!*head)
		return NULL;

	data = (*head)->data;
	next = (*head)->link;
	free(*head);
	*head = next;

	return data;
}

/* Unlink arbitrary given element from the list */
void llist_unlink(llist_t **head, llist_t *elm)
{
	llist_t *crt;

	if (!(elm && *head))
		return;

	if (elm == *head) {
		*head = (*head)->link;
		return;
	}

	for (crt = *head; crt; crt = crt->link) {
		if (crt->link == elm) {
			crt->link = elm->link;
			return;
		}
	}
}

char* xstrdup(const char *s)
{
	char *t;

	if (s == NULL)
		return NULL;

	t = strdup(s);

	if (t == NULL)
		exit(1);

	return t;
}

void trim(char *s)
{
	size_t len = strlen(s);
	size_t lws;

	/* trim trailing whitespace */
	while (len && isspace(s[len-1]))
		--len;

	/* trim leading whitespace */
	if (len) {
		lws = strspn(s, " \n\r\t\v");
		if (lws) {
			len -= lws;
			memmove(s, s + lws, len);
		}
	}
	s[len] = '\0';
}


/* Copyright (C) 2008 by Tito Ragusa <[EMAIL PROTECTED]>
*  Lines starting with "#" are ignored. Note that end-of-line
*  comments are supported.
*  Blank lines are ignored. 
*  Lines may be indented freely.
*  A "\" character at the very end of the line indicates the next line
*  should be treated as a continuation of the current one.
*  Named options are in the format  NAME=OPTION (or NAME = OPTION ???)
*  (or in the format NAME:OPTION???) (or NAME : OPTION ???) MAYBE PASS THE OPTION NAME
*  Full line options are returned as is and parsed by the caller
*  who knows the specific format (???)   MAYBE PASS NULL AS OPTION NAME
*/

llist_t *parse_config(const char *filename)
{
	char *line;
	char *next_line;
	char *p;
	char *t;
	llist_t *option_list = NULL;
	FILE *file = fopen(filename, "r");
	
	if (file) {
		while ((line = xmalloc_fgetline(file))) {
			while ((p = last_char_is(line, '\\'))){
					/* Multi-line object */
					*p = '\0'; /* Remove '\' */
					next_line = xmalloc_fgetline(file);
					line = xasprintf("%s%s", line, (next_line) ? next_line : "");
					free(next_line);
			}
			p = skip_whitespace(line); /* Remove leading whitespace */
			if (*p && *p != '#') { /* Not a comment or indented comment */
				if ((t = strchr(p, '#'))) /* End of line comment - cut it */
					*t = '\0';
				/* What remains is an option so add it to the linked list */
				llist_add_to_end(&option_list, xstrdup(p));
				continue;
			} /* else empty line */
			free(line);
		} /*EOF */	
		fclose(file);
	}  /* else cannot read  - return NULL */
	return option_list;
}

char * parse_named_option(const char *option_name, char separator, llist_t **option_list)
{
	llist_t *head;
	int size;
	char *value = NULL;

	if (option_list) {
		head = *option_list;
		size = strlen(option_name);
		while (head) {
			if (strncmp(option_name, head->data, size) == 0  /* Check for option name */
				&& (head->data[size] == separator            /* Check for separator */
				|| (head->data[size] == ' ' && head->data[size + 1] == separator)) /* Check for space plus separator */
			) {
				value = head->data; /* Get a pointer to the line */
				llist_unlink(option_list, head); /* Unlink it from the list */
				value = strchr(value, separator) + 1; /* Go past the separator */
				trim(value); /* Trim leading and trailing whitespace */
			} 
			head = head->link; /* Next list member */
		}
	}
	/* Must be freed by the caller */
	return value;
}

char* skip_non_whitespace(const char *s)
{
	while (*s && !isspace(*s)) ++s;

	return (char *) s;
}

void parse_option_lines(char *data, int fields, ...)
{
	va_list p;
	int i;
	char *tmp;

	va_start(p, fields);
	if (data) {
		for (i = 0; i < fields - 1; i++) {
			tmp = skip_non_whitespace(data);
			*tmp = '\0';
			*(va_arg(p, char **)) = data;
			data = skip_whitespace(++tmp);
		}
		*(va_arg(p, char **)) = data;
	}
	va_end(p);
}

typedef struct crontab_t {
	char *min;
	char *hour;
	char *day;
	char *month;
	char *day_of_week;
	char *user;
	char *cmd;
	struct crontab_t *link;
} crontab_t;

int main(int argc, char **argv)
{
	llist_t *config_options = NULL;
	crontab_t *crontab_options = NULL;
	void *data;
	char *shell = NULL;
	char *path  = NULL;

	if (argv[1])
		config_options = parse_config(argv[1]);

	if (config_options) {
		shell = parse_named_option("SHELL", '=', &config_options);
		path = parse_named_option("PATH", '=', &config_options);
		crontab_options = xmalloc(sizeof(crontab_t));
		while ((data = llist_pop(&config_options))) {
			parse_option_lines(data, 7,			&crontab_options->min, 
												&crontab_options->hour,
												&crontab_options->day,
												&crontab_options->month,
												&crontab_options->day_of_week,
												&crontab_options->user,
												&crontab_options->cmd);
			printf("min:   %s\n", crontab_options->min);
			printf("hour:  %s\n", crontab_options->hour);
			printf("day:   %s\n", crontab_options->day);
			printf("month  %s\n", crontab_options->month);
			printf("dow:   %s\n", crontab_options->day_of_week);
			printf("user:  %s\n", crontab_options->user);
			printf("cmd:   %s\n", crontab_options->cmd);
			crontab_options->link = xmalloc(sizeof(crontab_t));
			crontab_options = crontab_options->link;
		}
		free(config_options);
	}
	printf("SHELL='%s'\n", shell);
	printf("PATH='%s'\n", path);
	return 0;
}
_______________________________________________
busybox mailing list
[email protected]
http://busybox.net/cgi-bin/mailman/listinfo/busybox

Reply via email to