deniskuzZ commented on PR #4998:
URL: https://github.com/apache/hive/pull/4998#issuecomment-2058437503

   > @deniskuzZ I have fixed it as per your suggestion. But, we did not 
integrate it with UDFLike because it would require major fixes to the logic. 
Please let me know if you think it should be fixed.
   
   hi @ryukobayashi, since we directly identify the pattern, why do we need 
multiple matcher factories, can we have just 1 factory? 
   ````
   public class FilterStringColLikeStringScalar extends 
AbstractFilterStringColLikeStringScalar {
     private static final long serialVersionUID = 1L;
   
     private transient final static List<CheckerFactory> checkerFactories = 
ImmutableList.of(
       pattern -> {
         UDFLikePattern udfLike = UDFLikePattern.matcher(pattern);
         try {
           return udfLike.checker.getConstructor(String.class).newInstance(
             udfLike.format(pattern));
         } catch (Exception e) {
           throw new IllegalArgumentException("unable to initialize Checker");
         }
       });
   
     public FilterStringColLikeStringScalar() {
       super();
     }
   
     public FilterStringColLikeStringScalar(int colNum, byte[] likePattern) {
       super(colNum, null);
       super.setPattern(new String(likePattern, StandardCharsets.UTF_8));
     }
   
     @Override
     protected List<CheckerFactory> getCheckerFactories() {
       return checkerFactories;
     }
   
     private enum UDFLikePattern {
       BEGIN(BeginChecker.class),
       END(EndChecker.class),
       MIDDLE(MiddleChecker.class),
       NONE(NoneChecker.class),
       CHAINED(ChainedChecker.class),
       COMPLEX(ComplexChecker.class);
   
       Class<? extends Checker> checker;
   
       UDFLikePattern(Class<? extends Checker> checker) {
         this.checker = checker;
       }
       
       private static UDFLikePattern matcher(String pattern) {
         UDFLikePattern lastType = NONE;
         int length = pattern.length();
         char lastChar = 0;
   
         for (int i = 0; i < length; i++) {
           char n = pattern.charAt(i);
           if (n == '_' && lastChar != '\\') { // such as "a_bc"
             return COMPLEX;
           } else if (n == '%') {
             if (i == 0) { // such as "%abc"
               lastType = END;
             } else if (i < length - 1) {
               if (lastChar != '\\') { // such as "a%bc"
                 lastType = CHAINED;
               }
             } else {
               if (lastChar != '\\') {
                 if (lastType == END) { // such as "%abc%"
                   lastType = MIDDLE;
                 } else if (lastType != CHAINED) {
                   lastType = BEGIN; // such as "abc%"
                 }
               }
             }
           }
           lastChar = n;
         }
         return lastType;
       }
   
       private String format(String pattern) {
         int startIndex = 0;
         int endIndex = pattern.length();
   
         switch (this) {
           case BEGIN:
             endIndex--;
             break;
           case END:
             startIndex++;
             break;
           case MIDDLE:
             startIndex++;
             endIndex--;
             break;
           case COMPLEX:
             return "^" + UDFLike.likePatternToRegExp(pattern) + "$";
         }
   
         return pattern.substring(startIndex, endIndex);
       }
     }
   }
   ````
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to