Github user tgravescs commented on a diff in the pull request: https://github.com/apache/spark/pull/14659#discussion_r79844130 --- Diff: core/src/main/scala/org/apache/spark/util/Utils.scala --- @@ -2420,29 +2420,54 @@ private[spark] object Utils extends Logging { } } +/** + * An utility class used to set up Spark caller contexts to HDFS and Yarn. The `context` will be + * constructed by parameters passed in. + * When Spark applications run on Yarn and HDFS, its caller contexts will be written into Yarn RM + * audit log and hdfs-audit.log. That can help users to better diagnose and understand how + * specific applications impacting parts of the Hadoop system and potential problems they may be + * creating (e.g. overloading NN). As HDFS mentioned in HDFS-9184, for a given HDFS operation, it's + * very helpful to track which upper level job issues it. + * + * @param from who sets up the caller context (TASK, CLIENT, APPLICATION_MASTER) + * + * The parameters below are optional: + * @param appID id of the app this task belongs to + * @param appAttemptID attempt id of the app this task belongs to + * @param jobID id of the job this task belongs to + * @param stageID id of the stage this task belongs to + * @param stageAttemptId attempt id of the stage this task belongs to + * @param taskId task id + * @param taskAttemptNumber task attempt id + * @since 2.0.1 + */ private[spark] class CallerContext( - appName: Option[String] = None, - appID: Option[String] = None, - appAttemptID: Option[String] = None, - jobID: Option[Int] = None, - stageID: Option[Int] = None, + from: String, + appId: Option[String] = None, + appAttemptId: Option[String] = None, + jobId: Option[Int] = None, + stageId: Option[Int] = None, stageAttemptId: Option[Int] = None, taskId: Option[Long] = None, taskAttemptNumber: Option[Int] = None) extends Logging { - val AppName = if (appName.isDefined) s"_AppName_${appName.get}" else "" - val AppID = if (appID.isDefined) s"_AppID_${appID.get}" else "" - val AppAttemptID = if (appAttemptID.isDefined) s"_${appAttemptID.get}" else "" - val JobID = if (jobID.isDefined) s"_JobID_${jobID.get}" else "" - val StageID = if (stageID.isDefined) s"_StageID_${stageID.get}" else "" - val StageAttemptId = if (stageAttemptId.isDefined) s"_${stageAttemptId.get}" else "" + val AppId = if (appId.isDefined) s"_AppId_${appId.get}" else "" --- End diff -- as mentioned please remove AppId_... the application id is pretty obvious in the logs it starts with application_ so no need to print extra characters
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org