* find/util.c (get_info): call get_statinfo if need_inum and we don't already have the inode number. (apply_predicate): Call get_info if need_inum. * find/defs.h (enum EvaluationCost): Add NeedsInodeNumber. (struct predicate): Add need_inum. * find/tree.c (get_new_pred): Default need_inum to false. (get_new_pred_chk_op): Likewise. * find/ftsfind.c (consider_visiting): Copy the value of st_ino into statbuf.st_ino. * find/parser.c (parse_inum): Set need_inum=true. (parse_samefile): Set need_inum=false, but add a comment indicating that there is an optimisation opportunity. (make_segment): For -printf %i, set NeedsInodeNumber rather than NeedsStat. * find/pred.c (pred_inum): assert that the inode number is known (i.e. non-zero). (print_optlist): Print "[need inum]" when need_inum is true. * find/tree.c (opt_expr): Also reorder expressions if the right hand expression is -inum (that is, consider that test to be no more expensive than -type). (set_new_parent): Default need_inum to false. (costlookup): Set pred_inum=NeedsInodeNumber. (get_pred_cost): need_inum implies a data cost of NeedsInodeNumber. (cost_table): Add NeedsInodeNumber. (print_tree): Handle need_inum.
Signed-off-by: James Youngman <[email protected]> --- ChangeLog | 31 +++++++++++++++++++++++++++++++ NEWS | 9 +++++++++ find/defs.h | 4 ++++ find/ftsfind.c | 1 + find/parser.c | 12 +++++++++++- find/pred.c | 7 +++++-- find/tree.c | 34 ++++++++++++++++++++++++---------- find/util.c | 53 +++++++++++++++++++++++++++++++++++++++++++++-------- 8 files changed, 130 insertions(+), 21 deletions(-) diff --git a/ChangeLog b/ChangeLog index 540b23b..3f21cc8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,34 @@ +2009-03-07 James Youngman <[email protected]> + + Optimise away calls to stat if all we need is the inode number. + This fixes Savannah bug #24342. + * find/util.c (get_info): call get_statinfo if need_inum and we + don't already have the inode number. + (apply_predicate): Call get_info if need_inum. + * find/defs.h (enum EvaluationCost): Add NeedsInodeNumber. + (struct predicate): Add need_inum. + * find/tree.c (get_new_pred): Default need_inum to false. + (get_new_pred_chk_op): Likewise. + * find/ftsfind.c (consider_visiting): Copy the value of st_ino + into statbuf.st_ino. + * find/parser.c (parse_inum): Set need_inum=true. + (parse_samefile): Set need_inum=false, but add a comment + indicating that there is an optimisation opportunity. + (make_segment): For -printf %i, set NeedsInodeNumber rather than + NeedsStat. + * find/pred.c (pred_inum): assert that the inode number is known + (i.e. non-zero). + (print_optlist): Print "[need inum]" when need_inum is true. + * find/tree.c (opt_expr): Also reorder expressions if the right + hand expression is -inum (that is, consider that test to be no + more expensive than -type). + (set_new_parent): Default need_inum to false. + (costlookup): Set pred_inum=NeedsInodeNumber. + (get_pred_cost): need_inum implies a data cost of + NeedsInodeNumber. + (cost_table): Add NeedsInodeNumber. + (print_tree): Handle need_inum. + 2009-03-06 James Youngman <[email protected]> Updated translation po files from translationproject.org. diff --git a/NEWS b/NEWS index 1fc82ef..749c7e2 100644 --- a/NEWS +++ b/NEWS @@ -12,6 +12,12 @@ element of struct dirent, for example reiserfs. Anecdotal evidence suggests this can speed updatedb up from about 30 minutes to 3-4 minutes. +The ftsfind executable also now avoids calling stat() functions to +discover the inode number of a file, if we already read this +information from the directory. This does provide a speed-up, but +only for a restricted set of commands such as "find . -inum 4001". +This fix is listed below as bug #24342. + ** Bug Fixes #25764: remove duplicate entry for 'proc' in updatedb's $PRUNEFS. @@ -26,6 +32,9 @@ unknown user or is missing. #25154: Allow compilation with C compilers that don't allow declarations to follow statements. +#24342: -inum predicate shoud use dirent.d_ino instead of stat.st_ino +(this is a performance bug). + ** Translations Updated translations for Bulgarian, German, Irish, Hungarian, diff --git a/find/defs.h b/find/defs.h index 973c4c1..c6fcc0f 100644 --- a/find/defs.h +++ b/find/defs.h @@ -242,6 +242,7 @@ struct predicate_performance_info enum EvaluationCost { NeedsNothing, + NeedsInodeNumber, NeedsType, NeedsStatInfo, NeedsLinkName, @@ -285,6 +286,9 @@ struct predicate /* True if this predicate node requires knowledge of the file type. */ boolean need_type; + /* True if this predicate node requires knowledge of the inode number. */ + boolean need_inum; + enum EvaluationCost p_cost; /* est_success_rate is a number between 0.0 and 1.0 */ diff --git a/find/ftsfind.c b/find/ftsfind.c index bd09051..74018fa 100644 --- a/find/ftsfind.c +++ b/find/ftsfind.c @@ -409,6 +409,7 @@ consider_visiting(FTS *p, FTSENT *ent) inside_dir(p->fts_cwd_fd); prev_depth = ent->fts_level; + statbuf.st_ino = ent->fts_statp->st_ino; /* Cope with various error conditions. */ if (ent->fts_info == FTS_ERR diff --git a/find/parser.c b/find/parser.c index 06c1686..0f8d77e 100644 --- a/find/parser.c +++ b/find/parser.c @@ -1247,6 +1247,9 @@ parse_inum (const struct parser_table* entry, char **argv, int *arg_ptr) * files match */ p->est_success_rate = 1e-6; + p->need_inum = true; + p->need_stat = false; + p->need_type = false; return true; } else @@ -2293,6 +2296,8 @@ parse_samefile (const struct parser_table* entry, char **argv, int *arg_ptr) our_pred->args.samefileid.dev = st.st_dev; our_pred->args.samefileid.fd = fd; our_pred->need_type = false; + /* smarter way: compare type and inode number first. */ + /* TODO: maybe optimise this away by being optimistic */ our_pred->need_stat = true; our_pred->est_success_rate = 0.01f; return true; @@ -2895,6 +2900,12 @@ make_segment (struct segment **segment, *fmt++ = 's'; break; + case 'i': /* inode number */ + pred->need_inum = true; + mycost = NeedsInodeNumber; + *fmt++ = 's'; + break; + case 'a': /* atime in `ctime' format */ case 'A': /* atime in user-specified strftime format */ case 'B': /* birth time in user-specified strftime format */ @@ -2902,7 +2913,6 @@ make_segment (struct segment **segment, case 'C': /* ctime in user-specified strftime format */ case 'F': /* file system type */ case 'g': /* group name */ - case 'i': /* inode number */ case 'M': /* mode in `ls -l' format (eg., "drwxr-xr-x") */ case 's': /* size in bytes */ case 't': /* mtime in `ctime' format */ diff --git a/find/pred.c b/find/pred.c index d7253e4..c51b4e7 100644 --- a/find/pred.c +++ b/find/pred.c @@ -1216,6 +1216,8 @@ pred_inum (const char *pathname, struct stat *stat_buf, struct predicate *pred_p { (void) pathname; + assert (stat_buf->st_ino != 0); + switch (pred_ptr->args.numinfo.kind) { case COMP_GT: @@ -2434,9 +2436,10 @@ print_optlist (FILE *fp, const struct predicate *p) { print_parenthesised(fp, p->pred_left); fprintf (fp, - "%s%s", + "%s%s%s", p->need_stat ? "[call stat] " : "", - p->need_type ? "[need type] " : ""); + p->need_type ? "[need type] " : "", + p->need_inum ? "[need inum] " : ""); print_predicate(fp, p); fprintf(fp, " [%g] ", p->est_success_rate); if (options.debug_options & DebugSuccessRates) diff --git a/find/tree.c b/find/tree.c index cb147af..929c5f6 100644 --- a/find/tree.c +++ b/find/tree.c @@ -748,8 +748,9 @@ opt_expr (struct predicate **eval_treep) } reorder = ((options.optimisation_level > 1) - && (NeedsType == curr->pred_right->p_cost) - && !curr->pred_right->need_stat) || + && (NeedsType == curr->pred_right->p_cost + || NeedsInodeNumber == curr->pred_right->p_cost) + && !curr->pred_right->need_stat) || (options.optimisation_level > 2); if (reorder) @@ -831,6 +832,7 @@ set_new_parent (struct predicate *curr, enum predicate_precedence high_prec, str new_parent->p_prec = high_prec; new_parent->need_stat = false; new_parent->need_type = false; + new_parent->need_inum = false; new_parent->p_cost = NeedsNothing; switch (high_prec) @@ -919,7 +921,7 @@ static struct pred_cost_lookup costlookup[] = { pred_group , NeedsStatInfo }, { pred_ilname , NeedsLinkName }, { pred_iname , NeedsNothing }, - { pred_inum , NeedsStatInfo }, + { pred_inum , NeedsInodeNumber }, { pred_ipath , NeedsNothing }, { pred_links , NeedsStatInfo }, { pred_lname , NeedsLinkName }, @@ -1004,6 +1006,10 @@ get_pred_cost(const struct predicate *p) { data_requirement_cost = NeedsStatInfo; } + else if (p->need_inum) + { + data_requirement_cost = NeedsInodeNumber; + } else if (p->need_type) { data_requirement_cost = NeedsType; @@ -1433,6 +1439,7 @@ get_new_pred (const struct parser_table *entry) last_pred->no_default_print = false; last_pred->need_stat = true; last_pred->need_type = true; + last_pred->need_inum = false; last_pred->args.str = NULL; last_pred->pred_next = NULL; last_pred->pred_left = NULL; @@ -1478,6 +1485,7 @@ get_new_pred_chk_op (const struct parser_table *entry) new_pred->p_prec = AND_PREC; new_pred->need_stat = false; new_pred->need_type = false; + new_pred->need_inum = false; new_pred->args.str = NULL; new_pred->side_effects = false; new_pred->no_default_print = false; @@ -1500,15 +1508,16 @@ struct cost_assoc struct cost_assoc cost_table[] = { { NeedsNothing, "Nothing" }, - { NeedsType, "Type" }, - { NeedsStatInfo, "StatInfo" }, - { NeedsLinkName, "LinkName" }, - { NeedsAccessInfo, "AccessInfo" }, - { NeedsSyncDiskHit, "SyncDiskHit" }, + { NeedsInodeNumber, "InodeNumber" }, + { NeedsType, "Type" }, + { NeedsStatInfo, "StatInfo" }, + { NeedsLinkName, "LinkName" }, + { NeedsAccessInfo, "AccessInfo" }, + { NeedsSyncDiskHit, "SyncDiskHit" }, { NeedsEventualExec, "EventualExec" }, { NeedsImmediateExec, "ImmediateExec" }, { NeedsUserInteraction, "UserInteraction" }, - { NeedsUnknown, "Unknown" } + { NeedsUnknown, "Unknown" } }; struct prec_assoc @@ -1604,7 +1613,7 @@ print_tree (FILE *fp, struct predicate *node, int indent) node->est_success_rate, (node->side_effects ? "" : "no ")); - if (node->need_stat || node->need_type) + if (node->need_stat || node->need_type || node->need_inum) { int comma = 0; @@ -1614,6 +1623,11 @@ print_tree (FILE *fp, struct predicate *node, int indent) fprintf (fp, "stat"); comma = 1; } + if (node->need_inum) + { + fprintf (fp, "%sinode", comma ? "," : ""); + comma = 1; + } if (node->need_type) { fprintf (fp, "%stype", comma ? "," : ""); diff --git a/find/util.c b/find/util.c index 2570682..bbc8436 100644 --- a/find/util.c +++ b/find/util.c @@ -221,8 +221,8 @@ get_statinfo (const char *pathname, const char *name, struct stat *p) } -/* Get the stat/type information for a file, if it is - * not already known. +/* Get the stat/type/inode information for a file, if it is not + * already known. */ int get_info (const char *pathname, @@ -235,14 +235,51 @@ get_info (const char *pathname, * already have it, stat the file now. */ if (pred_ptr->need_stat) - todo = true; + { + todo = true; /* need full stat info */ + } else if ((pred_ptr->need_type && (0 == state.have_type))) - todo = true; - + { + todo = true; /* need to stat to get the type */ + } + if (!todo && pred_ptr->need_inum) + { + if (state.have_type) + { + /* For now we decide not to trust struct dirent.d_ino for + * directry entries that are subdirectories, in case this + * subdirectory is a mount point. We also need to call a + * stat function if we don't have st_ino (i.e. it is zero). + */ + if (S_ISDIR(p->st_mode) || (!p->st_ino)) + todo = true; + } + else + { + todo = true; + } + } if (todo) - return get_statinfo(pathname, state.rel_pathname, p); + { + int result = get_statinfo(pathname, state.rel_pathname, p); + if (result != 0) + { + /* Verify some postconditions. We can't check st_mode for + non-zero-ness because of Savannah bug #16378. */ + if (pred_ptr->need_type) + { + assert (state.have_type); + } + if (pred_ptr->need_inum) + { + assert (p->st_ino); + } + } + } else - return 0; + { + return 0; + } } /* Determine if we can use O_NOFOLLOW. @@ -979,7 +1016,7 @@ apply_predicate(const char *pathname, struct stat *stat_buf, struct predicate *p { ++p->perf.visits; - if (p->need_stat || p->need_type) + if (p->need_stat || p->need_type || p->need_inum) { /* We may need a stat here. */ if (get_info(pathname, stat_buf, p) != 0) -- 1.5.6.5
