On 10/17/2013 06:56 AM, Lukas Slebodnik wrote: > On (10/10/13 23:03), Dmitri Pal wrote: >> Hello, >> >> The attached patches address ticket >> https://fedorahosted.org/sssd/ticket/2106 >> The patches do not define new flags as suggested in the ticket. >> As we discussed with Nalin we try to detect the encoding based on the >> BOM and do the right thing without flags. >> If we see that this is not enough we will add flags but that would add a >> new function so such change would be destined for a bigger release. >> For now I think the use case is addressed. If we see the need to other >> use cases we will open other tickets. >> >> The patch depends on the previous set of patches that deal with C-Style >> comments. >> >> I also found some other unrelated issue that I logged as a separate ticket. >> https://fedorahosted.org/sssd/ticket/2119 >> >> -- >> Thank you, >> Dmitri Pal >> >> Sr. Engineering Manager for IdM portfolio >> Red Hat Inc. >> >> >> ------------------------------- >> Looking to carve out IT costs? >> www.redhat.com/carveoutcosts/ >> >> >> > >From ee1988301ba164a12263b7faf8c9bf686174deab Mon Sep 17 00:00:00 2001 >> From: Dmitri Pal <[email protected]> >> Date: Thu, 10 Oct 2013 22:47:23 -0400 >> Subject: [PATCH 3/4] [INI] Convert files to UTF >> >> Patch adds functionality that detects BOM >> at the beginning of the file and uses >> it to convert to UTF8. >> If no BOM is detected file is assumed to be UTF8 >> encoded. >> --- >> ini/ini_fileobj.c | 317 >> +++++++++++++++++++++++++++++++++++++++++++++++++++-- >> 1 files changed, 306 insertions(+), 11 deletions(-) >> >> diff --git a/ini/ini_fileobj.c b/ini/ini_fileobj.c >> index 7ac9dc6..000d36e 100644 >> --- a/ini/ini_fileobj.c >> +++ b/ini/ini_fileobj.c >> @@ -21,14 +21,29 @@ >> >> #include "config.h" >> #include <errno.h> >> +#include <sys/types.h> >> +#include <sys/stat.h> >> +#include <fcntl.h> >> #include <string.h> >> #include <stdlib.h> >> +#include <iconv.h> >> #include "trace.h" >> #include "ini_defines.h" >> #include "ini_configobj.h" >> #include "ini_config_priv.h" >> #include "path_utils.h" >> >> +#define ICONV_BUFFER 100 >> + >> +#define BOM4_SIZE 4 >> +#define BOM3_SIZE 3 >> +#define BOM2_SIZE 2 >> + >> +#define INDEX_UTF32BE 0 >> +#define INDEX_UTF32LE 1 >> +#define INDEX_UTF16BE 2 >> +#define INDEX_UTF16LE 3 >> +#define INDEX_UTF8 4 >> >> /* Close file but not destroy the object */ >> void ini_config_file_close(struct ini_cfgfile *file_ctx) >> @@ -52,6 +67,7 @@ void ini_config_file_destroy(struct ini_cfgfile *file_ctx) >> >> if(file_ctx) { >> free(file_ctx->filename); >> + simplebuffer_free(file_ctx->file_data); >> if(file_ctx->file) fclose(file_ctx->file); >> free(file_ctx); >> } >> @@ -59,37 +75,298 @@ void ini_config_file_destroy(struct ini_cfgfile >> *file_ctx) >> TRACE_FLOW_EXIT(); >> } >> >> -/* Internal common initialization part */ >> -static int common_file_init(struct ini_cfgfile *file_ctx) >> +/* How much I plan to read? */ >> +size_t how_much_to_read(size_t left, size_t increment) >> +{ >> + if(left > increment) return increment; >> + else return left; >> +} >> + >> +int check_bom(int ind, unsigned char *buffer, size_t len, size_t *bom_shift) >> +{ >> + TRACE_FLOW_ENTRY(); >> + >> + if (len > BOM4_SIZE) { >> + if ((buffer[0] == 0x00) && >> + (buffer[1] == 0x00) && >> + (buffer[2] == 0xFE) && >> + (buffer[3] == 0xFF)) { >> + TRACE_FLOW_RETURN(INDEX_UTF32BE); >> + *bom_shift = BOM4_SIZE; >> + return INDEX_UTF32BE; >> + } >> + else if ((buffer[0] == 0xFF) && >> + (buffer[1] == 0xFE) && >> + (buffer[2] == 0x00) && >> + (buffer[3] == 0x00)) { >> + TRACE_FLOW_RETURN(INDEX_UTF32LE); >> + *bom_shift = BOM4_SIZE; >> + return INDEX_UTF32LE; >> + } >> + } >> + >> + if (len > BOM3_SIZE) { >> + if ((buffer[0] == 0xEF) && >> + (buffer[1] == 0xBB) && >> + (buffer[2] == 0xBF)) { >> + TRACE_FLOW_RETURN(INDEX_UTF8); >> + *bom_shift = BOM3_SIZE; >> + return INDEX_UTF8; >> + } >> + } >> + >> + if (len > BOM2_SIZE) { >> + if ((buffer[0] == 0xFE) && >> + (buffer[1] == 0xFF)) { >> + TRACE_FLOW_RETURN(INDEX_UTF16BE); >> + *bom_shift = BOM2_SIZE; >> + return INDEX_UTF16BE; >> + } >> + else if ((buffer[0] == 0xFF) && >> + (buffer[1] == 0xFE)) { >> + TRACE_FLOW_RETURN(INDEX_UTF16LE); >> + *bom_shift = BOM2_SIZE; >> + return INDEX_UTF16LE; >> + } >> + } >> + >> + TRACE_FLOW_RETURN(ind); >> + return ind; >> +} >> + >> +/* Internal conversion part */ >> +static int common_file_convert(int raw_file, struct ini_cfgfile *file_ctx) >> { > This function is quite long (174 LOC). > It would be better to split it into smaller functions. > >> int error = EOK; >> + size_t read_cnt = 0; >> + size_t total_read = 0; >> + size_t to_read = 0; >> + size_t in_buffer = 0; >> + int ind = INDEX_UTF8; >> + iconv_t conv = 0; >> + size_t conv_res = 0; >> + char read_buf[ICONV_BUFFER+1]; >> + char result_buf[ICONV_BUFFER]; >> + char *src, *dest; >> + size_t to_convert = 0; >> + size_t room_left = 0; >> + size_t bom_shift = 0; >> + const char *encodings[] = { "UTF-32BE", >> + "UTF-32LE", >> + "UTF-16BE", >> + "UTF-16LE", >> + "UTF-8" }; >> >> TRACE_FLOW_ENTRY(); >> >> + do { >> + to_read = how_much_to_read(file_ctx->file_stats.st_size - >> total_read, >> + ICONV_BUFFER - in_buffer); >> + >> + TRACE_INFO_NUMBER("About to read", to_read); >> + errno = 0; >> + read_cnt = read(raw_file, read_buf + in_buffer, to_read); >> + if (read_cnt == -1) { >> + error = errno; >> + close(raw_file); > read can fail on second round of loop do-while > and conv handle will not be closed. > >> + TRACE_ERROR_NUMBER("Failed to read data from file", error); >> + return error; >> + } >> + >> + if (read_cnt != to_read) { >> + error = EIO; >> + close(raw_file); > The same situation is here. > >> + TRACE_ERROR_NUMBER("Read less than required", error); >> + return error; >> + } >> + >> + /* First time do some initialization */ >> + if(total_read == 0) { > ^^^ > missing space >> + TRACE_INFO_STRING("Reading first time.","Checking BOM"); >> + >> + ind = check_bom(ind, (unsigned char *)read_buf, read_cnt, >> &bom_shift); > > ^^^^^^^^^^^ > line length > > 80 >> + >> + TRACE_INFO_STRING("Converting to", encodings[INDEX_UTF8]); >> + TRACE_INFO_STRING("Converting from", encodings[ind]); >> + >> + errno = 0; >> + conv = iconv_open(encodings[INDEX_UTF8], encodings[ind]); >> + if (conv == (iconv_t) -1) { >> + error = errno; >> + close(raw_file); >> + TRACE_ERROR_NUMBER("Failed to create converter", error); >> + return error; >> + } >> + } > ^^^^^ > I would prefer to have initialisation >>> if(total_read == 0) <<< > before the outermost do while, because it generate coverity warning. > Overwriting "conv" in "conv = iconv_open(encodings[4], > encodings[ind])" leaks the storage that "conv" points > to. > I know that it is a fall positive, but code will be simpler. > >> + total_read += read_cnt; >> + TRACE_INFO_NUMBER("Total read", total_read); >> + >> + do { >> + /* Do conversion */ >> + errno = 0; >> + src = read_buf + bom_shift; >> + dest = result_buf; >> + to_convert = read_cnt + in_buffer - bom_shift; >> + bom_shift = 0; >> + room_left = ICONV_BUFFER; >> + conv_res = iconv(conv, &src, &to_convert, &dest, &room_left); >> + if (conv == (iconv_t) -1) { > ^^^^ > conv_res should be checked and it has type size_t >> + error = errno; >> + switch(error) { >> + case EILSEQ: >> + TRACE_ERROR_NUMBER("Invalid multibyte encoding", error); >> + close(raw_file); >> + iconv_close(conv); >> + return error; >> + case EINVAL: >> + /* We need to just read more if we can */ >> + if (total_read != file_ctx->file_stats.st_size) { >> + TRACE_INFO_STRING("Incomplete sequence.", ""); >> + TRACE_INFO_NUMBER("File size.", >> + file_ctx->file_stats.st_size); >> + memmove(read_buf, src, to_convert); >> + in_buffer = to_convert; >> + } >> + else { >> + /* Or return error if we can't */ >> + TRACE_ERROR_NUMBER("Incomplete sequence", error); >> + close(raw_file); >> + iconv_close(conv); >> + return error; >> + } > ^^^^^^^^^ > missing break. > Is it intentional? > http://www.freeipa.org/page/Coding_Style#Switch > /* FALLTHROUGH */ >> + case E2BIG: >> + TRACE_INFO_STRING("No room in the output buffer.", ""); >> + error = simplebuffer_add_raw(file_ctx->file_data, >> + result_buf, >> + ICONV_BUFFER - room_left, >> + ICONV_BUFFER); >> + if (error) { >> + TRACE_ERROR_NUMBER("Failed to store converted >> bytes", >> + error); >> + close(raw_file); >> + iconv_close(conv); >> + return error; >> + } >> + >> + room_left = ICONV_BUFFER; >> + dest = result_buf; >> + continue; >> + default: >> + TRACE_ERROR_NUMBER("Unexpected internal error", >> + error); >> + close(raw_file); >> + iconv_close(conv); >> + return ENOTSUP; >> + } >> + } >> + /* The whole buffer was sucessfully converted */ >> + error = simplebuffer_add_raw(file_ctx->file_data, >> + result_buf, >> + ICONV_BUFFER - room_left, >> + ICONV_BUFFER); >> + if (error) { >> + TRACE_ERROR_NUMBER("Failed to store converted bytes", >> + error); >> + close(raw_file); >> + iconv_close(conv); >> + return error; >> + } >> + TRACE_INFO_STRING("Saved procesed portion.", >> + (char >> *)simplebuffer_get_vbuf(file_ctx->file_data)); >> + >> + in_buffer = 0; >> + break; >> + } >> + while (1); >> + } >> + while(total_read < file_ctx->file_stats.st_size); >> + >> + close(raw_file); >> + iconv_close(conv); >> + >> /* Open file */ >> - TRACE_INFO_STRING("File", file_ctx->filename); >> + TRACE_INFO_STRING("File data", >> + (char *)simplebuffer_get_vbuf(file_ctx->file_data)); >> + TRACE_INFO_NUMBER("File len", >> + simplebuffer_get_len(file_ctx->file_data)); >> + TRACE_INFO_NUMBER("Size", >> + file_ctx->file_data->size); >> errno = 0; >> - file_ctx->file = fopen(file_ctx->filename, "r"); >> + file_ctx->file = fmemopen(simplebuffer_get_vbuf(file_ctx->file_data), >> + simplebuffer_get_len(file_ctx->file_data), >> + "r"); >> if (!(file_ctx->file)) { >> error = errno; >> TRACE_ERROR_NUMBER("Failed to open file", error); >> return error; >> } >> >> - file_ctx->stats_read = 0; >> + TRACE_FLOW_EXIT(); >> + return EOK; >> +} >> + >> + >> +/* Internal common initialization part */ >> +static int common_file_init(struct ini_cfgfile *file_ctx) >> +{ >> + int error = EOK; >> + int raw_file = 0; >> + int stat_ret = 0; >> + >> + TRACE_FLOW_ENTRY(); >> + >> + TRACE_INFO_STRING("File", file_ctx->filename); >> + >> + /* Open file in binary mode first */ >> + errno = 0; >> + raw_file = open(file_ctx->filename, O_RDONLY); >> + if (raw_file == -1) { >> + error = errno; >> + TRACE_ERROR_NUMBER("Failed to open file in binary mode", error); >> + return error; >> + } >> + >> + /* Get the size of the file */ >> + errno = 0; >> + stat_ret = fstat(raw_file, &(file_ctx->file_stats)); >> + if (stat_ret == -1) { >> + error = errno; >> + TRACE_ERROR_NUMBER("Failed to get file stats", error); > Resource leak > Handle variable "raw_file" going out of scope leaks the handle. >> + return error; >> + } >> >> - /* Collect stats */ >> - if (file_ctx->metadata_flags & INI_META_STATS) { >> + /* Trick to overcome the fact that >> + * fopen and fmemopen behave differently when file >> + * is 0 length >> + */ >> + if (file_ctx->file_stats.st_size) { >> + error = common_file_convert(raw_file, file_ctx); >> + if (error) { >> + TRACE_ERROR_NUMBER("Failed to convert file", >> + error); >> + close(raw_file); >> + return error; >> + } >> + } >> + else { >> errno = 0; >> - if (fstat(fileno(file_ctx->file), >> - &(file_ctx->file_stats)) < 0) { >> + file_ctx->file = fdopen(raw_file, "r"); >> + if (!(file_ctx->file)) { >> error = errno; >> - TRACE_ERROR_NUMBER("Failed to get file stats.", error); >> + TRACE_ERROR_NUMBER("Failed to fdopen file", error); >> return error; >> } >> + >> + } >> + >> + /* Collect stats */ >> + if (file_ctx->metadata_flags & INI_META_STATS) { >> file_ctx->stats_read = 1; >> } >> - else memset(&(file_ctx->file_stats), 0, sizeof(struct stat)); >> + else { >> + memset(&(file_ctx->file_stats), 0, sizeof(struct stat)); >> + file_ctx->stats_read = 0; >> + } >> >> TRACE_FLOW_EXIT(); >> return EOK; >> @@ -119,6 +396,15 @@ int ini_config_file_open(const char *filename, >> >> new_ctx->filename = NULL; >> new_ctx->file = NULL; >> + new_ctx->file_data = NULL; >> + >> + error = simplebuffer_alloc(&(new_ctx->file_data)); >> + if (error) { >> + TRACE_ERROR_NUMBER("Failed to allocate buffer ctx.", error); >> + ini_config_file_destroy(new_ctx); >> + return error; >> + >> + } >> >> /* Store flags */ >> new_ctx->metadata_flags = metadata_flags; >> @@ -176,6 +462,15 @@ int ini_config_file_reopen(struct ini_cfgfile >> *file_ctx_in, >> } >> >> new_ctx->file = NULL; >> + new_ctx->file_data = NULL; >> + >> + error = simplebuffer_alloc(&(new_ctx->file_data)); >> + if (error) { >> + TRACE_ERROR_NUMBER("Failed to allocate buffer ctx.", error); >> + ini_config_file_destroy(new_ctx); > It is better to initialize new_ctx with memset or initialize > new_ctx->filename to NULL, because ini_config_file_destroy can access to > uninitialized variable. > >> + return error; >> + >> + } >> >> /* Store flags */ >> new_ctx->metadata_flags = file_ctx_in->metadata_flags; >> -- >> 1.7.1 >> > >From e4fcd1ba0f86675e6fe6455c80d5215b821af973 Mon Sep 17 00:00:00 2001 >> From: Dmitri Pal <[email protected]> >> Date: Wed, 9 Oct 2013 23:29:22 -0400 >> Subject: [PATCH 1/4] [INI] Expose buffer context as void >> >> In some cases the intenal buffer needs to be accessed as void. >> Add a function that would do that instead of type casting >> and dealing with 'const'. >> --- >> basicobjects/simplebuffer.c | 7 +++++++ >> basicobjects/simplebuffer.h | 4 ++++ >> 2 files changed, 11 insertions(+), 0 deletions(-) >> > ACK > > >From b48891b2753bd8d26f71f6cbb18b28bd22ece9cc Mon Sep 17 00:00:00 2001 >> From: Dmitri Pal <[email protected]> >> Date: Thu, 10 Oct 2013 22:51:22 -0400 >> Subject: [PATCH 4/4] [INI] Updated unit test for UTF8 conversion >> >> Patch adds new test files and bom creation function. >> --- >> ini/ini.d/real16be.conf | Bin 0 -> 3288 bytes >> ini/ini.d/real16le.conf | Bin 0 -> 3288 bytes >> ini/ini.d/real32be.conf | Bin 0 -> 6576 bytes >> ini/ini.d/real32le.conf | Bin 0 -> 6576 bytes >> ini/ini.d/real8.conf | 70 >> +++++++++++++++++++++++++++++++++++++++++++++++ >> ini/ini_parse_ut.c | 35 +++++++++++++++++++++++ >> 6 files changed, 105 insertions(+), 0 deletions(-) >> create mode 100644 ini/ini.d/real16be.conf >> create mode 100644 ini/ini.d/real16le.conf >> create mode 100644 ini/ini.d/real32be.conf >> create mode 100644 ini/ini.d/real32le.conf >> create mode 100644 ini/ini.d/real8.conf >> > The are some trailing whitespaces in real8.conf, but it seems like > intentional. > > ACK > > >From f0c79c9091d9165d60aaa030d09fe63450215c5d Mon Sep 17 00:00:00 2001 >> From: Dmitri Pal <[email protected]> >> Date: Wed, 9 Oct 2013 23:31:49 -0400 >> Subject: [PATCH 2/4] [INI] Extend internal file handle >> >> When we need to decode the file we need to keep it >> in memory so we need to have a buffer. >> Adding simple buffer into the internal structure. >> --- >> ini/ini_config_priv.h | 3 +++ >> 1 files changed, 3 insertions(+), 0 deletions(-) >> >> diff --git a/ini/ini_config_priv.h b/ini/ini_config_priv.h >> index 89ad8a9..e1292f7 100644 >> --- a/ini/ini_config_priv.h >> +++ b/ini/ini_config_priv.h >> @@ -26,6 +26,7 @@ >> #include <sys/stat.h> >> #include <unistd.h> >> #include "collection.h" >> +#include "simplebuffer.h" >> #include "ini_comment.h" >> >> /* Configuration object */ >> @@ -71,6 +72,8 @@ struct ini_cfgfile { >> struct stat file_stats; >> /* Were stats read ? */ >> int stats_read; >> + /* Internal buffer */ >> + struct simplebuffer *file_data; >> }; >> >> /* Parsing error */ > ACK > > LS Thank you for review. I think there is a bug somewhere related to unicode decoding. I might be not copying all bytes or missing some. I would have to test more and fix your comments. I am withdrawing the patch for now.
-- Thank you, Dmitri Pal Sr. Engineering Manager for IdM portfolio Red Hat Inc. ------------------------------- Looking to carve out IT costs? www.redhat.com/carveoutcosts/ _______________________________________________ sssd-devel mailing list [email protected] https://lists.fedorahosted.org/mailman/listinfo/sssd-devel
