sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r921968917


##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditLogMergerAndParser.java:
##########
@@ -0,0 +1,546 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.specific.SpecificDatumWriter;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+
+/**
+ * Merge all the audit logs present in a directory of
+ * multiple audit log files into a single audit log file.
+ */
+public class S3AAuditLogMergerAndParser {
+
+  private static final Logger LOG =
+      LoggerFactory.getLogger(S3AAuditLogMergerAndParser.class);
+
+  /**
+   * Simple entry: anything up to a space.
+   * {@value}.
+   */
+  private static final String SIMPLE = "[^ ]*";
+
+  /**
+   * Date/Time. Everything within square braces.
+   * {@value}.
+   */
+  private static final String DATETIME = "\\[(.*?)\\]";
+
+  /**
+   * A natural number or "-".
+   * {@value}.
+   */
+  private static final String NUMBER = "(-|[0-9]*)";
+
+  /**
+   * A Quoted field or "-".
+   * {@value}.
+   */
+  private static final String QUOTED = "(-|\"[^\"]*\")";
+
+  /**
+   * An entry in the regexp.
+   *
+   * @param name    name of the group
+   * @param pattern pattern to use in the regexp
+   * @return the pattern for the regexp
+   */
+  private static String e(String name, String pattern) {
+    return String.format("(?<%s>%s) ", name, pattern);
+  }
+
+  /**
+   * An entry in the regexp.
+   *
+   * @param name    name of the group
+   * @param pattern pattern to use in the regexp
+   * @return the pattern for the regexp
+   */
+  private static String eNoTrailing(String name, String pattern) {
+    return String.format("(?<%s>%s)", name, pattern);
+  }
+
+  /**
+   * Simple entry using the {@link #SIMPLE} pattern.
+   *
+   * @param name name of the element (for code clarity only)
+   * @return the pattern for the regexp
+   */
+  private static String e(String name) {
+    return e(name, SIMPLE);
+  }
+
+  /**
+   * Quoted entry using the {@link #QUOTED} pattern.
+   *
+   * @param name name of the element (for code clarity only)
+   * @return the pattern for the regexp
+   */
+  private static String q(String name) {
+    return e(name, QUOTED);
+  }
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String OWNER_GROUP = "owner";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String BUCKET_GROUP = "bucket";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String TIMESTAMP_GROUP = "timestamp";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REMOTEIP_GROUP = "remoteip";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTER_GROUP = "requester";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTID_GROUP = "requestid";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String VERB_GROUP = "verb";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String KEY_GROUP = "key";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTURI_GROUP = "requesturi";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String HTTP_GROUP = "http";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String AWSERRORCODE_GROUP = "awserrorcode";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String BYTESSENT_GROUP = "bytessent";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String OBJECTSIZE_GROUP = "objectsize";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String TOTALTIME_GROUP = "totaltime";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String TURNAROUNDTIME_GROUP = "turnaroundtime";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REFERRER_GROUP = "referrer";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String USERAGENT_GROUP = "useragent";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String VERSION_GROUP = "version";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String HOSTID_GROUP = "hostid";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String SIGV_GROUP = "sigv";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String CYPHER_GROUP = "cypher";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String AUTH_GROUP = "auth";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String ENDPOINT_GROUP = "endpoint";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String TLS_GROUP = "tls";
+
+  /**
+   * This is where anything at the tail of a log.
+   * entry ends up; it is null unless/until the AWS
+   * logs are enhanced in future.
+   * Value {@value}.
+   */
+  public static final String TAIL_GROUP = "tail";
+
+  /**
+   * Construct the log entry pattern.
+   */
+  public static final String LOG_ENTRY_REGEXP = ""
+      + e(OWNER_GROUP)
+      + e(BUCKET_GROUP)
+      + e(TIMESTAMP_GROUP, DATETIME)
+      + e(REMOTEIP_GROUP)
+      + e(REQUESTER_GROUP)
+      + e(REQUESTID_GROUP)
+      + e(VERB_GROUP)
+      + e(KEY_GROUP)
+      + q(REQUESTURI_GROUP)
+      + e(HTTP_GROUP, NUMBER)
+      + e(AWSERRORCODE_GROUP)
+      + e(BYTESSENT_GROUP)
+      + e(OBJECTSIZE_GROUP)
+      + e(TOTALTIME_GROUP)
+      + e(TURNAROUNDTIME_GROUP)
+      + q(REFERRER_GROUP)
+      + q(USERAGENT_GROUP)
+      + e(VERSION_GROUP)
+      + e(HOSTID_GROUP)
+      + e(SIGV_GROUP)
+      + e(CYPHER_GROUP)
+      + e(AUTH_GROUP)
+      + e(ENDPOINT_GROUP)
+      + eNoTrailing(TLS_GROUP, SIMPLE)
+      + eNoTrailing(TAIL_GROUP, ".*") // anything which follows
+      + "$"; // end of line
+
+  /**
+   * Groups in order.
+   */
+  private static final String[] GROUPS = {
+      OWNER_GROUP,
+      BUCKET_GROUP,
+      TIMESTAMP_GROUP,
+      REMOTEIP_GROUP,
+      REQUESTER_GROUP,
+      REQUESTID_GROUP,
+      VERB_GROUP,
+      KEY_GROUP,
+      REQUESTURI_GROUP,
+      HTTP_GROUP,
+      AWSERRORCODE_GROUP,
+      BYTESSENT_GROUP,
+      OBJECTSIZE_GROUP,
+      TOTALTIME_GROUP,
+      TURNAROUNDTIME_GROUP,
+      REFERRER_GROUP,
+      USERAGENT_GROUP,
+      VERSION_GROUP,
+      HOSTID_GROUP,
+      SIGV_GROUP,
+      CYPHER_GROUP,
+      AUTH_GROUP,
+      ENDPOINT_GROUP,
+      TLS_GROUP,
+      TAIL_GROUP
+  };
+
+  /**
+   * Ordered list of regular expression group names.
+   */
+  public static final List<String> AWS_LOG_REGEXP_GROUPS =
+      Collections.unmodifiableList(Arrays.asList(GROUPS));
+
+  /**
+   * And the actual compiled pattern.
+   */
+  public static final Pattern LOG_ENTRY_PATTERN = Pattern.compile(
+      LOG_ENTRY_REGEXP);
+
+  /**
+   * parseAuditLog method helps in parsing the audit log.
+   * into key-value pairs using regular expression
+   *
+   * @param singleAuditLog this is single audit log from merged audit log file
+   * @return it returns a map i.e, auditLogMap which contains key-value pairs 
of a single audit log
+   */
+  public Map<String, String> parseAuditLog(String singleAuditLog) {
+    Map<String, String> auditLogMap = new HashMap<>();
+    if (singleAuditLog == null || singleAuditLog.length() == 0) {
+      LOG.info(

Review Comment:
   changed to log at debug



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org

Reply via email to