Re: [ltt-dev] Babeltrace Update - Convert and Merge The Content of Multiple Subdirectories

Mathieu Desnoyers Tue, 19 Jul 2011 08:44:37 -0700

* Amer Alhalabi ([email protected]) wrote:
> 
> 
> 
> Hello Guys,
> 
> I have updated babeltrace so that it can convert multiple
> subdirectories (given the path of the parent directory) at the same
> time and merge the events into one ASCII file.
> 
> Small changes have been made to babeltrace.c and babeltrace-lib.c
> --see attachement
> 
> Please have a look at them and lemme know what you think.  (Later on I
> might do some other scalability imporvements)


Hi Amer,

Thanks for submitting this. A few points for next time: please generate
a diff of the changes, and submit it directly in the email body (without
formatting). This helps accelerate the review process. Meanwhile, I
generated the diff from your code for review below,


> diff --git a/converter/babeltrace-lib.c b/converter/babeltrace-lib.c
> index 6cc2b7b..10f7544 100644
> --- a/converter/babeltrace-lib.c
> +++ b/converter/babeltrace-lib.c
> @@ -56,49 +56,58 @@ int stream_compare(void *a, void *b)
>               return 0;
>  }
>  
> -int convert_trace(struct trace_descriptor *td_write,
> -               struct trace_descriptor *td_read)
> +int convert_trace(struct trace_descriptor *td_write, struct trace_descriptor 
> **td_read_arr, int td_read_arr_index)
>  {
> -     struct ctf_trace *tin = container_of(td_read, struct ctf_trace, parent);
> -     struct ctf_text_stream_pos *sout =
> -             container_of(td_write, struct ctf_text_stream_pos, 
> trace_descriptor);
> -     int stream_id;
> -     int ret = 0;
> +     // create a heap to the hold the streams from the td_read 
> trace_descriptors saved in td_read_arr

Please avoid comments with "//". Always use /*  */ for single-line
commands, and

/*
 * blah blah
 * blah
 */

for multi-line comments. I follow quite stricly the Linux kernel coding
style (Documentation/CodingStyle of the Linux kernel source tree). You
can verify that your patch follows the coding style with the program
scripts/checkpatch.pl in the Linux kernel sources.

> +     struct ptr_head *global_heap;

Please add whiteline between declarations and code.

> +     global_heap = g_new(struct ptr_heap, 1);

Don't call a local variable "global", this is confusing.

> +     heap_init(global_heap, 0, stream_compare);
> -     tin->stream_heap = g_new(struct ptr_heap, 1);
> -     heap_init(tin->stream_heap, 0, stream_compare);
> +     struct ctf_text_stream_pos *sout = container_of(td_write, struct 
> ctf_text_stream_pos, trace_descriptor);

Never mix declarations and code, except at the beginning of a new block.

> +     int ret = 0;
>  
> -     /* Populate heap with each stream */
> -     for (stream_id = 0; stream_id < tin->streams->len; stream_id++) {
> -             struct ctf_stream_class *stream = 
> g_ptr_array_index(tin->streams, stream_id);
> -             int filenr;
> +     int i_for;

just "i" would do.

> +     for(i_for=0;i_for<td_read_arr_index;i_for++)

coding style:

for (i = 0; i < td_read_arr_index; i++) {


> +     {
> +             struct trace_descriptor *td_read = td_read_arr[i_for];
 
-               if (!stream)
+               if(!td_read)

use a space after if, for, switch and while.

                        continue;
> -             for (filenr = 0; filenr < stream->streams->len; filenr++) {
> -                     struct ctf_file_stream *file_stream = 
> g_ptr_array_index(stream->streams, filenr);
> -                     ret = read_event(file_stream);
> -                     if (ret == EOF) {
> -                             ret = 0;
> +
> +             struct ctf_trace *tin = container_of(td_read, struct ctf_trace, 
> parent);

No declaration and code mix.

> +             int stream_id;
> +
> +             // Populate heap with each stream
> +             for (stream_id = 0; stream_id < tin->streams->len; stream_id++) 
> {
> +                     struct ctf_stream_class *stream = 
> g_ptr_array_index(tin->streams, stream_id);
> +                     int filenr;
> +
> +                     if (!stream)
>                               continue;
> -                     } else if (ret)
> -                             goto end;
> -                     /* Add to heap */
> -                     ret = heap_insert(tin->stream_heap, file_stream);
> -                     if (ret) {
> -                             fprintf(stdout, "[error] Out of memory.\n");
> -                             goto end;
> +                     for (filenr = 0; filenr < stream->streams->len; 
> filenr++) {
> +                             struct ctf_file_stream *file_stream = 
> g_ptr_array_index(stream->streams, filenr);
> +                             ret = read_event(file_stream);
> +                             if (ret == EOF) {
> +                                     ret = 0;
> +                                     continue;
> +                             } else if (ret)
> +                                     goto end;
> +                             // Add to heap
> +                             ret = heap_insert(global_heap, file_stream);
> +                             if (ret) {
> +                                     fprintf(stdout, "[error] Out of 
> memory.\n");
> +                                     goto end;
> +                             }
>                       }
>               }
>       }
> -
> -     /* Replace heap entries until EOF for each stream (heap empty) */
> +     // Replace heap entries until EOF for each stream (heap empty)
>       for (;;) {
> -             struct ctf_file_stream *file_stream, *removed;
>  
> -             file_stream = heap_maximum(tin->stream_heap);
> +             struct ctf_file_stream *file_stream, *removed;
> +             file_stream = heap_maximum(global_heap);
>               if (!file_stream) {
> -                     /* end of file for all streams */
> +                     // end of file for all streams
>                       ret = 0;
>                       break;
>               }
> @@ -109,19 +118,22 @@ int convert_trace(struct trace_descriptor *td_write,
>               }
>               ret = read_event(file_stream);
>               if (ret == EOF) {
> -                     removed = heap_remove(tin->stream_heap);
> +                     //removed = heap_remove(tin->stream_heap);
> +                     removed = heap_remove(global_heap);
>                       assert(removed == file_stream);
>                       ret = 0;
>                       continue;
>               } else if (ret)
>                       goto end;
> -             /* Reinsert the file stream into the heap, and rebalance. */
> -             removed = heap_replace_max(tin->stream_heap, file_stream);
> +             // Reinsert the file stream into the heap, and rebalance.
> +             //removed = heap_replace_max(tin->stream_heap, file_stream);
> +             removed = heap_replace_max(global_heap, file_stream);
>               assert(removed == file_stream);
>       }
> -
>  end:
> -     heap_free(tin->stream_heap);
> -     g_free(tin->stream_heap);
> +     //heap_free(tin->stream_heap);
> +     //g_free(tin->stream_heap);
> +     heap_free(global_heap);
> +     g_free(global_heap);
>       return ret;
>  }
> diff --git a/converter/babeltrace.c b/converter/babeltrace.c
> index dd335ed..96b99bb 100644
> --- a/converter/babeltrace.c
> +++ b/converter/babeltrace.c
> @@ -27,6 +27,10 @@
>  #include <sys/stat.h>
>  #include <sys/types.h>
>  #include <fcntl.h>
> +#include <dirent.h>
> +
> +#define _XOPEN_SOURCE 500
> +#include <ftw.h>
>  
>  static char *opt_input_format;
>  static char *opt_output_format;
> @@ -37,6 +41,12 @@ static const char *opt_output_path;
>  int babeltrace_verbose, babeltrace_debug;
>  int opt_field_names;
>  

variables and functions only referenced within a single C file should be
declared "static", so no symbols pollute outside of the file. This also
lets the compiler do more optimisations, because it is allowed to assume
it has a complete view of interactions for these variables.


> +int td_read_arr_index;  // the index of the last element in the array + 1  
> ;i.e. position
> +#define td_read_arr_size 1024   // max number of td_read (trace_descriptors) 
> that can be handled ;i.e. Max. num. of subdirectories

Please dynamically grow the array size as needed. I try to avoid these
hardcoded limits as much as possible.

> +struct trace_descriptor *td_read_arr[td_read_arr_size]; // an array of 
> td_read trace_descriptors
> +struct format *fmt_read;

> +
> +
>  void strlower(char *str)
>  {
>       while (*str) {
> @@ -149,11 +159,68 @@ end:
>       return ret;
>  }
>  
> +/*
> + * this is the callback function for File Tree Walk (nftw)

Please use at most 80-columns text, and ideally slightly less (e.g. 72
or 76 columns), so when we comment code in email threads 80-columns are
sufficient to view the code.

> + * it receives the path of the current file/dir being visited with a flag to 
> indicate the type of the file/dir
> + * if the file being visited is a directory, then check if it contains a 
> metadata file.
> + * if it does, then open that directory for reading and save a 
> trace_descriptor pointer to that dir in the rd_read_arr
> + */
> +static int traverse_dir(const char *fpath, const struct stat *sb, int tflag, 
> struct FTW *ftwbuf)
> +{
> +     if( tflag == FTW_D )

if () {

> +     {
> +             DIR *dir;
> +             int dirfd;
> +             int fd;
> +
> +             dir = opendir(fpath);
> +             if(!dir){

if () {

> +                     fprintf(stdout,"[error] invalid path:%s .\n\n", fpath);
> +                     exit(EXIT_FAILURE);

return error code rather than exit ?

> +             }
> +             dirfd = open(fpath, 0);
> +             if (dirfd < 0) {
> +                     fprintf(stdout, "[error] unable to open trace directory 
> file descriptor.\n");
> +                     exit(EXIT_FAILURE);

return error code rather than exit ?

> +             }
> +
> +             fd = openat(dirfd, "metadata", O_RDONLY);
> +             if( fd < 0){

if () {

> +                     close(dirfd);
> +                     closedir(dir);
> +             }

} else {

> +             else
> +             {
> +                     close(fd);
> +                     close(dirfd);
> +                     closedir(dir);
> +                     struct trace_descriptor *td_read;

mixed declaration and code.

> +                     td_read = fmt_read->open_trace(fpath, O_RDONLY);
> +                     if (!td_read) {
> +                             fprintf(stdout, "[error] opening trace \"%s\" 
> for reading.\n\n", fpath);
> +                             exit(EXIT_FAILURE);

return error code rather than exit ?

> +                     }
> +                     if( td_read_arr_index < td_read_arr_size){

if (td_read_arr_index < td_read_arr_size) {

> +                             td_read_arr[td_read_arr_index++] = td_read;
> +                     }
> +                     else

} else {

> +                     {
> +                             fprintf(stdout, "[error] out of range\n\n\n");

\n\n\n -> just \n

> +                             exit(EXIT_FAILURE);

just return an error code here rather than exit ?

> +                     }
> +             }
> +     }
> +    return 0;           /* To tell nftw() to continue */
> +}
> +
> +
>  int main(int argc, char **argv)
>  {
>       int ret;
> -     struct format *fmt_read, *fmt_write;
> -     struct trace_descriptor *td_read, *td_write;
> +     //struct format *fmt_read, *fmt_write;
> +     struct format *fmt_write;
> +     //struct trace_descriptor *td_read, *td_write;
> +     struct trace_descriptor *td_write;
>  
>       ret = parse_options(argc, argv);
>       if (ret < 0) {
> @@ -196,35 +263,67 @@ int main(int argc, char **argv)
>               exit(EXIT_FAILURE);
>       }
>  
> +     struct stat st;
> +     if(stat(opt_input_path,&st) != 0)

if (stat(opt_input_path,&st))

> +             perror(opt_input_path);
> +
> +     /* pass the input path (provided by the user from the command line 
> options to nftw function

/*
 * ....

+80 cols.

> +      * specify traverse_dir() as the callback function
> +      *  depth=10 which is the number of directories that can be accessed at 
> the same time

not really. This is more precisely the max number of file descriptors nftw will 
open at
a given time.

> +      *  flags=0 ; check the documentation of nftw for more info about these 
> flags
> +      *
> +      *  after the traversal, if no trace_descriptor is saved in the 
> td_read_Arr , it means that no metadata file was found
> +      */
> +     td_read_arr_index = 0;
> +     nftw(opt_input_path,traverse_dir, 10, 0);

error code handling ?

> +     if(td_read_arr_index == 0){

if () {

> +             fprintf(stdout, "[warning] no metadata file was found. no 
> output was generated\n");
> +             return 0;
> +     }
> +     /*
>       td_read = fmt_read->open_trace(opt_input_path, O_RDONLY);
>       if (!td_read) {
>               fprintf(stdout, "[error] opening trace \"%s\" for reading.\n\n",
>                       opt_input_path);
>               goto error_td_read;
>       }
> +     */
> +
>  
>       td_write = fmt_write->open_trace(opt_output_path, O_RDWR);
>       if (!td_write) {
> -             fprintf(stdout, "Error opening trace \"%s\" for writing.\n\n",
> +             fprintf(stdout, "[error] failed to open trace \"%s\" for 
> writing.\n\n",
>                       opt_output_path ? : "<none>");
>               goto error_td_write;
>       }
>  
> -     ret = convert_trace(td_write, td_read);
> -     if (ret) {
> -             fprintf(stdout, "Error printing trace.\n\n");
> +     //ret = convert_trace(td_write, td_read);
> +     ret = convert_trace(td_write, td_read_arr, td_read_arr_index);
> +     if (ret)
>               goto error_copy_trace;
> -     }
> +
>  
>       fmt_write->close_trace(td_write);
> -     fmt_read->close_trace(td_read);
> +
> +     int i_for;

mixed declaration and code.

> +     for(i_for=0;i_for<td_read_arr_index;i_for++)

use "i" instead.

for () {


> +     {
> +             //fmt_read->close_trace(td_read);
> +             fmt_read->close_trace(td_read_arr[i_for]);
> +     }
> +
> +     printf("finished converting. output written to:\n%s\n", 
> opt_output_path);
>       exit(EXIT_SUCCESS);
>  
>       /* Error handling */
>  error_copy_trace:
>       fmt_write->close_trace(td_write);
>  error_td_write:
> -     fmt_read->close_trace(td_read);
> +     //fmt_read->close_trace(td_read);
> +     for(i_for=0;i_for<td_read_arr_size;i_for++)

same here.

> +     {
> +             fmt_read->close_trace(td_read_arr[i_for]);
> +     }
>  error_td_read:
>       exit(EXIT_FAILURE);
>  }

Thanks!

Mathieu

-- 
Mathieu Desnoyers
Operating System Efficiency R&D Consultant
EfficiOS Inc.
http://www.efficios.com

_______________________________________________
ltt-dev mailing list
[email protected]
http://lists.casi.polymtl.ca/cgi-bin/mailman/listinfo/ltt-dev

Re: [ltt-dev] Babeltrace Update - Convert and Merge The Content of Multiple Subdirectories

Reply via email to