Re: [PR] esi: replace _findOpeningTag with memchr [trafficserver]

via GitHub Tue, 19 May 2026 13:47:49 -0700


zwoop commented on code in PR #13173:
URL: https://github.com/apache/trafficserver/pull/13173#discussion_r3269446471



##########
plugins/esi/lib/EsiParser.cc:
##########
@@ -182,68 +183,52 @@ EsiParser::_compareData(const string &data, size_t pos, 
const char *str, int str
   return PARTIAL_MATCH;
 }
 
-/** This implementation is optimized but not completely correct.  If
- * the opening tag were to have a repeating opening sequence ('<e<esi'
- * or something like that), this will break. However that is not the
- * case for the two opening tags we are looking for */
+/** Uses memchr to skip non-'<' bytes, then memcmp to verify each candidate
+ * anchor.  Delegates scanning to the platform's optimized memchr
+ * implementation.  Does not have the KMP-failure limitation of the original
+ * state-machine. */
 EsiParser::MATCH_TYPE
 EsiParser::_findOpeningTag(const string &data, size_t start_pos, size_t 
&opening_tag_pos, bool &is_html_comment_node) const
 {
-  size_t i_data = start_pos;
-  int    i_esi = 0, i_html_comment = 0;
-
-  while (i_data < data.size()) {
-    if (data[i_data] == ESI_TAG_PREFIX[i_esi]) {
-      if (++i_esi == ESI_TAG_PREFIX_LEN) {
-        is_html_comment_node = false;
-        opening_tag_pos      = i_data - i_esi + 1;
+  const char *const buf     = data.data();
+  const size_t      total   = data.size();
+  const size_t      esi_len = ESI_TAG_PREFIX_LEN;
+  const size_t      hlen    = HTML_COMMENT_NODE_INFO.tag_suffix_len;

Review Comment:
   Do you really need this hlen local? The compiler will optimize it away, but 
I honestly would just like to see HTML_COMMENT_NODE_INFO.tag_suffix_len where 
you need this, specially since you memcmp from 
HTML_COMMENT_NODE_INFO.tag_suffix already (no shadow variable for the string).
   
   Same with the esi_len I think, neither are blockers.



##########
plugins/esi/lib/EsiParser.cc:
##########
@@ -182,68 +183,52 @@ EsiParser::_compareData(const string &data, size_t pos, 
const char *str, int str
   return PARTIAL_MATCH;
 }
 
-/** This implementation is optimized but not completely correct.  If
- * the opening tag were to have a repeating opening sequence ('<e<esi'
- * or something like that), this will break. However that is not the
- * case for the two opening tags we are looking for */
+/** Uses memchr to skip non-'<' bytes, then memcmp to verify each candidate
+ * anchor.  Delegates scanning to the platform's optimized memchr
+ * implementation.  Does not have the KMP-failure limitation of the original
+ * state-machine. */
 EsiParser::MATCH_TYPE
 EsiParser::_findOpeningTag(const string &data, size_t start_pos, size_t 
&opening_tag_pos, bool &is_html_comment_node) const
 {
-  size_t i_data = start_pos;
-  int    i_esi = 0, i_html_comment = 0;
-
-  while (i_data < data.size()) {
-    if (data[i_data] == ESI_TAG_PREFIX[i_esi]) {
-      if (++i_esi == ESI_TAG_PREFIX_LEN) {
-        is_html_comment_node = false;
-        opening_tag_pos      = i_data - i_esi + 1;
+  const char *const buf     = data.data();

Review Comment:
   One thought here, and I asked Claude to make an example: Did you consider 
pulling in and using libswoc TextView instead? It excels at things like "string 
parsing". Not a show stopper, and could be a future consideration.
   
   ```cpp
   swoc::TextView view{data.data() + start_pos, data.size() - start_pos};
   const swoc::TextView esi_prefix{ESI_TAG_PREFIX, ESI_TAG_PREFIX_LEN};
   const swoc::TextView html_prefix{HTML_COMMENT_NODE_INFO.tag_suffix,
                                    
size_t(HTML_COMMENT_NODE_INFO.tag_suffix_len)};
   while (!view.empty()) {
     size_t pos = view.find('<');
     if (pos == swoc::TextView::npos) return NO_MATCH;
     view.remove_prefix(pos);
     size_t abs_pos = data.size() - view.size();
     if (view.starts_with(esi_prefix)) { /* COMPLETE_MATCH esi */ }
     if (view.size() > html_prefix.size() && view.starts_with(html_prefix)) {
       char ch = view[html_prefix.size()];
       if (ch==' '||ch=='\t'||ch=='\r'||ch=='\n') { /* COMPLETE_MATCH html */ }
     }
     if (view.size() < esi_prefix.size() && esi_prefix.starts_with(view)) { /* 
PARTIAL */ }
     if (view.size() <= html_prefix.size() && html_prefix.starts_with(view)) { 
/* PARTIAL */ }
     view.remove_prefix(1);
   }
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] esi: replace _findOpeningTag with memchr [trafficserver]

Reply via email to