This is an analysis of one request from ap_read_request to ap_graceful_stop_signalled serving file out of mod_mem_cache, with a couple of hacks (which I am working to turn into production quality patches) to bypass ap_http_header_filter...
Before Jeff's poll-timeout patch: 58115 instructions (this is from the last profile I posted) Apply Jeff's patch: 54888 instructions (I have not posted this profile) Apply Jeff's patch and this patch: 51917 instructions (profile posted here) Definitely going in the right direction! Space % Cycles ===== ==== ===== User 20.0 45126 Shared Library 45.4 102161 Kernel 34.6 77821 Total 225108 (4.34 per instruction) /usr/local/apachetest/bin/httpd : Subroutine Name Source File Ratio Enter % Cycles =============== =========== ===== ===== ==== ====== .core_input_filter core.c 3.11 11 1.6 3591 .ap_rgetline_core protocol.c 3.02 6 1.3 3025 .get_filter_handle util_filter.c 4.84 2 1.3 2859 .add_any_filter_handle util_filter.c 4.18 6 0.9 1997 .net_time_filter core.c 3.16 12 0.8 1869 ._ptrgl ptrgl.s 4.58 66 0.8 1814 .core_output_filter core.c 4.22 2 0.8 1708 .fix_hostname vhost.c 3.61 1 0.7 1610 .add_any_filter util_filter.c 5.60 1 0.6 1439 .ap_update_child_status_from_indexes scoreboard.c 15.29 2 0.5 1223 .ap_get_brigade util_filter.c 3.61 25 0.5 1172 ._moveeq moveeq.s 5.08 14 0.5 1102 .ap_find_token util.c 4.60 3 0.5 1052 .ap_get_mime_headers_core protocol.c 3.22 1 0.3 753 .ap_read_request protocol.c 5.23 1 0.3 753 .ap_set_keepalive http_protocol.c 5.06 1 0.3 749 .ap_getword_white util.c 2.85 2 0.3 724 .log_error_core log.c 5.42 4 0.3 651 .ap_pass_brigade util_filter.c 5.35 5 0.3 647 .isspace glink.s 4.51 22 0.3 596 .apr_palloc glink.s 4.27 22 0.3 563 .core_create_req core.c 9.43 1 0.2 547 .read_request_line protocol.c 4.13 1 0.2 516 .ap_content_length_filter protocol.c 3.14 1 0.2 496 .ap_http_filter http_protocol.c 4.73 2 0.2 468 .apr_brigade_cleanup glink.s 6.83 11 0.2 451 .apr_table_get glink.s 5.76 12 0.2 414 .ap_recent_rfc822_date util_time.c 2.16 1 0.2 410 .tolower glink.s 3.55 19 0.2 404 .isalpha glink.s 3.32 19 0.2 378 .ap_run_log_transaction protocol.c 11.21 1 0.2 370 .writev_it_all core.c 7.48 1 0.1 336 .ap_add_output_filters_by_type core.c 13.68 1 0.1 315 .ap_process_http_connection http_core.c 8.31 0 0.1 291 .ap_run_quick_handler config.c 10.26 1 0.1 287 .ap_run_create_request request.c 6.30 1 0.1 283 .ap_run_post_read_request protocol.c 8.19 1 0.1 278 .basic_http_header http_protocol.c 3.29 1 0.1 276 .avoid_xlc_bug core.c 6.17 11 0.1 272 .ap_log_error log.c 2.99 4 0.1 263 .cached_explode util_time.c 3.86 1 0.1 251 .apr_setsocketopt glink.s 3.53 11 0.1 233 .ap_discard_request_body http_protocol.c 3.14 1 0.1 223 .ap_process_request http_request.c 8.05 1 0.1 217 .lookup_builtin_method http_protocol.c 7.99 1 0.1 216 .apr_table_make glink.s 5.83 6 0.1 210 .ap_parse_uri protocol.c 4.75 1 0.1 209 .http_create_request http_core.c 5.43 1 0.1 206 .ap_get_server_version core.c 40.44 1 0.1 202 .apr_brigade_split_line glink.s 5.61 6 0.1 202 .__divi64 glink.s 11.09 3 0.1 200 .ap_update_vhost_from_headers vhost.c 6.37 1 0.1 197 .basic_http_header_check http_protocol.c 4.70 1 0.1 197 .apr_brigade_puts glink.s 8.21 4 0.1 197 .strlen glink.s 8.12 4 0.1 195 .apr_brigade_destroy glink.s 10.81 3 0.1 195 .create_empty_config config.c 8.58 1 0.1 180 .apr_brigade_write glink.s 3.74 8 0.1 180 .apr_array_make glink.s 29.27 1 0.1 176 .form_header_field http_protocol.c 2.03 2 0.1 158 .apr_pool_destroy glink.s 25.47 1 0.1 153 .ap_basic_http_header http_protocol.c 10.12 1 0.1 152 .apr_off_t_toa glink.s 24.63 1 0.1 148 .ap_set_byterange http_protocol.c 4.49 1 0.1 144 .apr_brigade_create glink.s 3.97 6 0.1 143 .memset glink.s 5.70 4 0.1 137 .strchr glink.s 5.57 4 0.1 134 .check_pipeline_flush http_request.c 2.94 1 0.1 129 .apr_pstrcatv glink.s 21.47 1 0.1 129 .apr_table_addn glink.s 5.24 4 0.1 126 .apr_pstrdup glink.s 20.67 1 0.1 124 .ap_graceful_stop_signalled worker.c 41.27 1 0.1 124 .apr_psprintf glink.s 20.43 1 0.1 123 .ap_make_method_list http_protocol.c 4.90 1 0.1 122 .apr_sendv glink.s 20.13 1 0.1 121 .isdigit glink.s 9.82 2 0.1 118 .apr_brigade_split glink.s 3.52 5 0.0 106 .apr_table_overlap glink.s 16.87 1 0.0 101 .apr_brigade_partition glink.s 4.18 4 0.0 100 .ap_add_output_filter util_filter.c 12.53 1 0.0 100 .apr_table_setn glink.s 8.22 2 0.0 99 .ap_get_limit_req_body core.c 13.71 1 0.0 96 .ap_byterange_filter http_protocol.c 3.23 1 0.0 94 .apr_uri_parse glink.s 15.17 1 0.0 91 .ap_method_number_of http_protocol.c 4.46 1 0.0 89 .remove_any_filter util_filter.c 2.84 2 0.0 88 .ap_set_content_length protocol.c 4.37 1 0.0 87 .ap_set_content_type http_protocol.c 4.11 1 0.0 86 .ap_index_of_response http_protocol.c 4.35 1 0.0 83 .strncasecmp glink.s 6.45 2 0.0 77 .apr_time_now glink.s 12.47 1 0.0 75 .apr_table_unset glink.s 5.95 2 0.0 71 .ap_update_child_status scoreboard.c 4.70 2 0.0 66 .apr_parse_addr_port glink.s 10.10 1 0.0 61 .ap_add_output_filter_handle util_filter.c 2.83 3 0.0 51 .ap_get_output_filter_handle util_filter.c 8.27 2 0.0 50 .apr_bucket_flush_create glink.s 8.20 1 0.0 49 .ap_finalize_request_protocol protocol.c 2.47 1 0.0 47 .apr_pstrmemdup glink.s 7.83 1 0.0 47 .ap_remove_output_filter util_filter.c 2.20 2 0.0 44 .ap_add_input_filter_handle util_filter.c 2.83 2 0.0 34 .apr_table_mergen glink.s 5.37 1 0.0 32 .apr_pool_create_ex glink.s 4.83 1 0.0 29 .ap_explode_recent_gmt util_time.c 6.75 1 0.0 27 .apr_bucket_eos_create glink.s 2.90 1 0.0 17 .ap_create_request_config config.c 7.00 1 0.0 7 Shlib Subroutine Source File Ratio Enter % Cycles ================ =========== ===== ===== ==== ====== .__is_wctype_std libc/__is_wctype_std.c 3.30 66 1.9 4358 .apr_table_get apr_tables.c 3.11 17 1.8 4063 .memset memset.s 5.10 8 1.5 3293 .apr_palloc apr_pools.c 2.00 85 1.4 3061 ._moveeq moveeq.s 4.11 11 1.3 3017 .__divu64 divu64.s 2.70 3 1.2 2701 .memchr libc/memchr.c 3.63 11 1.2 2682 .__divi64 divi64.s 2.84 4 1.2 2640 .strcasecmp libaixinet/strcasecmp.c 3.82 7 1.2 2599 .apr_setsocketopt sockopt.c 3.33 13 1.2 2591 .apr_table_setn apr_tables.c 3.05 11 1.1 2398 .strlen strlen.s 3.41 14 0.8 1891 .apr_vformatter apr_snprintf.c 4.29 1 0.8 1811 .apr_brigade_cleanup apr_brigade.c 2.90 18 0.8 1790 .pthread_mutex_lock libpthreads/mutex.c 9.57 3 0.8 1693 .apr_bucket_alloc apr_buckets_alloc.c 2.61 28 0.7 1617 .apr_brigade_puts apr_brigade.c 2.34 5 0.7 1597 .apr_brigade_split_line apr_brigade.c 2.94 6 0.7 1567 .apr_brigade_write apr_brigade.c 2.51 11 0.6 1411 .isspace libc/isspace.c 2.90 23 0.6 1335 .cache_url_handler mod_cache.c 6.11 1 0.5 1191 ._ptrgl ptrgl.s 2.88 63 0.5 1089 ._ptrgl ptrgl.s 2.59 70 0.5 1089 .allocator_alloc apr_pools.c 4.12 5 0.5 1055 .apr_bucket_free apr_buckets_alloc.c 2.74 28 0.5 1029 .multi_log_transaction mod_log_config.c 32.19 1 0.4 998 .apr_bucket_simple_copy apr_buckets_simple.c 3.63 14 0.4 966 .allocator_free apr_pools.c 4.07 5 0.4 923 .tolower libc/tolower.c 2.79 19 0.4 901 ._moveeq moveeq.s 4.07 13 0.4 870 .find_entry cache_hash.c 5.99 1 0.4 838 .make_array_core apr_tables.c 2.40 12 0.4 792 .apr_array_push_noclear apr_tables.c 1.77 19 0.3 772 .isalpha libc/isalpha.c 1.95 19 0.3 741 .strchr strchr.s 2.76 5 0.3 739 .apr_pool_cleanup_register apr_pools.c 2.43 13 0.3 726 .apr_palloc glink.s 3.35 36 0.3 724 .match_headers mod_setenvif.c 18.81 1 0.3 715 .apr_brigade_create apr_brigade.c 2.18 12 0.3 706 .strncasecmp libaixinet/strcasecmp.c 3.25 3 0.3 702 .ap_cache_get_cachetype cache_util.c 12.17 1 0.3 694 .isupper libc/isupper.c 1.81 19 0.3 687 .apr_table_overlap apr_tables.c 3.34 1 0.3 685 .apr_bucket_simple_split apr_buckets_simple.c 2.19 10 0.3 679 .apr_table_unset apr_tables.c 3.27 2 0.3 678 .open_entity mod_mem_cache.c 8.13 1 0.3 659 .heap_bucket_destroy apr_buckets_heap.c 2.49 17 0.3 644 .pthread_mutex_unlock libpthreads/mutex.c 5.35 3 0.3 642 .apr_brigade_partition apr_brigade.c 4.32 4 0.3 622 .apr_uri_parse apr_uri.c 5.05 1 0.3 606 .overlap_hash apr_tables.c 3.49 4 0.3 586 .read libc/read.c 7.81 2 0.2 547 .unserialize_table mod_mem_cache.c 3.85 4 0.2 546 .apr_bucket_alloc glink.s 3.19 28 0.2 537 .spin_lock_global_ppc_up locks_ppc_up.s 15.85 3 0.2 523 .apr_table_make apr_tables.c 2.72 10 0.2 517 .heap_bucket_read apr_buckets_heap.c 1.83 30 0.2 493 .apr_recv sendrecv.c 4.60 2 0.2 479 .run_cleanups apr_pools.c 9.41 1 0.2 452 .socket_bucket_read apr_buckets_socket.c 3.65 2 0.2 431 .strcasecmp glink.s 11.93 6 0.2 429 .apr_bucket_heap_make apr_buckets_heap.c 4.34 3 0.2 417 .apr_atomic_dec apr_atomic.c 12.86 1 0.2 399 .apr_sendv sendrecv.c 8.53 1 0.2 392 .apr_parse_addr_port sockaddr.c 6.40 1 0.2 390 .apr_table_addn apr_tables.c 2.49 4 0.2 388 .cache_out_filter mod_cache.c 4.48 1 0.2 385 .pthread_mutex_lock glink.s 21.33 3 0.2 384 .apr_table_mergen apr_tables.c 4.11 1 0.2 382 .apr_pool_cleanup_kill glink.s 21.11 3 0.2 380 .apr_bucket_shared_destroy glink.s 3.58 17 0.2 365 .cache_run_open_entity cache_storage.c 11.29 1 0.2 361 .apr_brigade_split apr_brigade.c 2.62 5 0.2 361 .apr_bucket_shared_split apr_buckets_refcount.c 2.00 10 0.2 360 .apr_bucket_shared_destroy apr_buckets_refcount.c 3.52 17 0.2 359 .cache_select_url cache_storage.c 4.86 1 0.2 350 .apr_thread_mutex_lock thread_mutex.c 7.22 3 0.2 347 ._Errno libc/errno.c 5.96 3 0.2 340 .apr_pool_cleanup_kill apr_pools.c 5.69 3 0.1 336 .apr_palloc glink.s 4.41 12 0.1 318 .apr_wait_for_io_or_timeout sendrecv.c 7.11 1 0.1 313 .apr_pvsprintf apr_pools.c 6.38 1 0.1 312 ._ptrgl ptrgl.s 12.76 4 0.1 306 .apr_pstrdup apr_strings.c 2.62 5 0.1 304 .apr_bucket_simple_split glink.s 4.84 10 0.1 291 .read glink.s 23.77 2 0.1 285 .pthread_mutex_unlock glink.s 15.66 3 0.1 282 .isdigit libc/isdigit.c 4.56 3 0.1 274 .islower libc/islower.c 6.79 2 0.1 272 .read_real_time read_real_time.s 9.46 1 0.1 265 .writev libc/write.c 13.04 1 0.1 261 .apr_pool_destroy apr_pools.c 4.86 1 0.1 257 ._ptrgl ptrgl.s 10.64 4 0.1 255 .apr_pool_create_ex apr_pools.c 3.76 1 0.1 252 .read_headers mod_mem_cache.c 3.94 1 0.1 252 .spin_unlock_global_ppc_up locks_ppc_up.s 6.17 3 0.1 241 .apr_pstrcatv apr_strings.c 2.86 1 0.1 241 .apr_bucket_shared_make glink.s 13.27 3 0.1 239 .decrement_refcount mod_mem_cache.c 9.15 1 0.1 238 .kread glink.s 19.57 2 0.1 235 .memchr glink.s 3.91 10 0.1 235 .read_body mod_mem_cache.c 5.45 1 0.1 229 .memcache_gdsf_algorithm mod_mem_cache.c 8.06 2 0.1 226 .apr_brigade_destroy apr_brigade.c 4.34 3 0.1 221 .ap_cache_tokstr cache_util.c 4.91 1 0.1 221 .poll glink.s 35.83 1 0.1 215 .apr_atomic_inc apr_atomic.c 7.32 1 0.1 212 .__pthread_geterrno_addr libpthreads/lib_lock.c 4.34 4 0.1 208 .cache_hash_get glink.s 34.53 1 0.1 207 .strlen glink.s 6.90 5 0.1 207 .apr_allocator_alloc glink.s 8.59 4 0.1 206 ._ptrgl ptrgl.s 6.24 5 0.1 187 .apr_table_make glink.s 7.75 4 0.1 186 .conv_10 apr_snprintf.c 2.60 2 0.1 182 .memcmp memcmp.s 4.97 1 0.1 179 .memset glink.s 14.80 2 0.1 178 .apr_pstrmemdup glink.s 28.57 1 0.1 171 .ap_cache_get_cachetype glink.s 28.10 1 0.1 169 .ap_cache_tokstr glink.s 27.93 1 0.1 168 .apr_thread_mutex_lock glink.s 13.77 2 0.1 165 .strlen glink.s 5.47 5 0.1 164 .apr_pool_cleanup_register glink.s 2.27 12 0.1 163 .apr_recv glink.s 13.50 2 0.1 162 .apr_off_t_toa apr_strings.c 2.47 1 0.1 158 .ap_set_content_type glink.s 26.23 1 0.1 157 .gettimeofday glink.s 26.13 1 0.1 157 .cache_update glink.s 26.07 1 0.1 156 .__divi64 glink.s 25.57 1 0.1 153 .ap_add_output_filter glink.s 25.50 1 0.1 153 .apr_pstrmemdup apr_strings.c 3.32 2 0.1 153 .apr_table_setn glink.s 2.77 9 0.1 150 ._Errno glink.s 8.12 3 0.1 146 .apr_pstrndup apr_strings.c 4.63 1 0.1 143 .kwritev glink.s 23.53 1 0.1 141 .cache_find glink.s 23.40 1 0.1 140 .gettimeofday libc/gettimeofday.c 3.27 1 0.1 137 .cache_find cache_cache.c 10.43 1 0.1 136 .apr_atomic_dec glink.s 22.57 1 0.1 135 .apr_time_now time.c 5.27 1 0.1 132 .cache_pq_change_priority cache_pqueue.c 6.11 1 0.1 128 .apr_thread_mutex_unlock thread_mutex.c 2.67 3 0.1 128 .apr_getsocketopt glink.s 21.30 1 0.1 128 .apr_allocator_free glink.s 5.25 4 0.1 126 .ap_cache_liststr glink.s 10.50 2 0.1 126 .apr_palloc glink.s 10.32 2 0.1 124 .apr_array_make apr_tables.c 5.97 1 0.1 119 .memcmp glink.s 19.67 1 0.1 118 .apr_bucket_eos_create glink.s 19.57 1 0.1 117 .ap_cache_liststr cache_util.c 2.76 2 0.1 116 .apr_allocator_mutex_get apr_pools.c 27.75 2 0.0 111 .writev glink.s 18.17 1 0.0 109 .apr_bucket_shared_copy apr_buckets_refcount.c 1.69 4 0.0 108 .islower glink.s 8.92 2 0.0 107 .strncasecmp glink.s 17.77 1 0.0 107 .memset glink.s 17.47 1 0.0 105 .apr_bucket_immortal_create glink.s 17.33 1 0.0 104 .apr_bucket_eos_create apr_buckets_eos.c 2.58 2 0.0 103 .cache_pq_percolate_down cache_pqueue.c 6.04 1 0.0 103 .apr_thread_mutex_lock glink.s 16.50 1 0.0 99 .ap_set_keepalive glink.s 16.43 1 0.0 99 .apr_vformatter glink.s 15.93 1 0.0 96 .apr_bucket_immortal_create apr_buckets_simple.c 3.98 1 0.0 96 .apr_bucket_heap_create glink.s 7.92 2 0.0 95 .apr_bucket_socket_create apr_buckets_socket.c 4.44 1 0.0 93 .time_base_to_time libc/POWER/time_base_to_time.c 1.66 1 0.0 93 .apr_bucket_heap_create apr_buckets_heap.c 1.78 2 0.0 93 .minchild cache_pqueue.c 5.45 1 0.0 93 .ap_log_error glink.s 3.77 4 0.0 91 .cache_update cache_cache.c 2.56 1 0.0 79 .apr_bucket_eos_make apr_buckets_eos.c 5.64 2 0.0 79 .apr_bucket_shared_make apr_buckets_refcount.c 4.23 3 0.0 76 .apr_bucket_simple_copy glink.s 3.05 4 0.0 73 .apr_bucket_immortal_make apr_buckets_simple.c 10.11 1 0.0 71 .cache_generate_key_default cache_storage.c 4.16 1 0.0 71 .memchr glink.s 11.43 1 0.0 69 .cache_read_entity_headers cache_storage.c 3.09 1 0.0 68 .apr_table_get glink.s 2.17 5 0.0 65 .apr_pstrdup glink.s 3.56 3 0.0 64 .free_proc_chain apr_pools.c 4.17 1 0.0 58 .apr_bucket_flush_create apr_buckets_flush.c 2.87 1 0.0 57 .ap_basic_http_header glink.s 9.53 1 0.0 57 .ap_pass_brigade glink.s 4.72 2 0.0 57 .simple_bucket_read apr_buckets_simple.c 3.33 2 0.0 53 .apr_bucket_free glink.s 2.17 4 0.0 52 .apr_atomic_inc glink.s 8.07 1 0.0 48 .cache_hash_get cache_hash.c 3.43 1 0.0 48 .ap_get_output_filter_handle glink.s 3.97 2 0.0 48 .strcasecmp glink.s 7.73 1 0.0 46 .brigade_cleanup apr_brigade.c 11.35 4 0.0 45 .memcache_get_pos mod_mem_cache.c 14.60 1 0.0 44 .strchr glink.s 7.20 1 0.0 43 .cache_read_entity_body cache_storage.c 4.16 1 0.0 42 .apr_brigade_write glink.s 2.17 3 0.0 39 .apr_pstrndup glink.s 6.43 1 0.0 39 .apr_allocator_alloc apr_pools.c 9.55 4 0.0 38 .ap_remove_output_filter glink.s 5.77 1 0.0 35 .apr_psprintf apr_pools.c 1.83 1 0.0 33 .apr_thread_mutex_unlock glink.s 2.72 2 0.0 33 .apr_bucket_socket_make apr_buckets_socket.c 4.60 1 0.0 32 .apr_allocator_free apr_pools.c 7.85 4 0.0 31 .apr_bucket_destroy_noop apr_buckets.c 7.00 4 0.0 28 .isdigit glink.s 4.53 1 0.0 27 .apr_setsocketopt glink.s 2.17 2 0.0 26 .cache_read_entity_body glink.s 3.70 1 0.0 22 .apr_getsocketopt sockopt.c 3.33 1 0.0 20 .apr_allocator_owner_get apr_pools.c 5.00 2 0.0 20 .apr_thread_mutex_unlock glink.s 2.90 1 0.0 17 .memcache_inc_frequency mod_mem_cache.c 2.14 1 0.0 15 .apr_bucket_flush_make apr_buckets_flush.c 2.00 1 0.0 14 .apr_pool_cleanup_register glink.s 2.17 1 0.0 13 .memset glink.s 2.17 1 0.0 13 .cache_pq_change_priority glink.s 2.17 1 0.0 13 .apr_bucket_heap_make glink.s 2.17 1 0.0 13 .apr_brigade_create glink.s 2.17 1 0.0 13 .apr_pstrdup glink.s 2.17 1 0.0 13 .isspace glink.s 2.17 1 0.0 13 .cache_select_url glink.s 2.17 1 0.0 13 .apr_brigade_puts glink.s 2.17 1 0.0 13 .eos_bucket_read apr_buckets_eos.c 2.75 1 0.0 11 > This patch uses the same temp brigade to read all the lines of > an HTTP request, in order to eliminate the overhead of brigade > creation and destruction that we've seen in recent performance > profiling. The patch changes the signature of ap_rgetline_core() > and adds a new ap_get_mime_headers_core(), but it leaves ap_getline() > unchanged for compatibility with code outside the core (like > mod_proxy). > > Bill S: if you have time, can you try this code in your benchmarking > environment? > > Thanks, > --Brian > >
