[jira] [Commented] (FLINK-1579) Create a Flink History Server

ASF GitHub Bot (JIRA) Tue, 07 Mar 2017 02:27:57 -0800

    [ 
https://issues.apache.org/jira/browse/FLINK-1579?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15899185#comment-15899185
 ]


ASF GitHub Bot commented on FLINK-1579:
---------------------------------------

Github user zentol commented on a diff in the pull request:

    https://github.com/apache/flink/pull/3460#discussion_r104634695
  
    --- Diff: 
flink-runtime-web/src/main/java/org/apache/flink/runtime/webmonitor/history/HistoryServer.java
 ---
    @@ -0,0 +1,411 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.flink.runtime.webmonitor.history;
    +
    +import com.fasterxml.jackson.core.JsonFactory;
    +import com.fasterxml.jackson.core.JsonGenerator;
    +import com.fasterxml.jackson.databind.JsonNode;
    +import com.fasterxml.jackson.databind.ObjectMapper;
    +import io.netty.handler.codec.http.router.Router;
    +import org.apache.flink.api.java.utils.ParameterTool;
    +import org.apache.flink.configuration.Configuration;
    +import org.apache.flink.configuration.GlobalConfiguration;
    +import org.apache.flink.configuration.HistoryServerOptions;
    +import org.apache.flink.core.fs.FileStatus;
    +import org.apache.flink.core.fs.FileSystem;
    +import org.apache.flink.core.fs.Path;
    +import org.apache.flink.runtime.filecache.FileCache;
    +import org.apache.flink.runtime.net.SSLUtils;
    +import org.apache.flink.runtime.util.ExecutorThreadFactory;
    +import 
org.apache.flink.runtime.webmonitor.handlers.CurrentJobsOverviewHandler;
    +import org.apache.flink.runtime.webmonitor.handlers.DashboardConfigHandler;
    +import org.apache.flink.runtime.webmonitor.utils.NettySetup;
    +import org.apache.flink.util.FileUtils;
    +import org.slf4j.Logger;
    +import org.slf4j.LoggerFactory;
    +
    +import javax.net.ssl.SSLContext;
    +import java.io.File;
    +import java.io.FileWriter;
    +import java.io.IOException;
    +import java.nio.file.Files;
    +import java.util.ArrayList;
    +import java.util.Iterator;
    +import java.util.List;
    +import java.util.Map;
    +import java.util.TimerTask;
    +import java.util.UUID;
    +import java.util.concurrent.ConcurrentHashMap;
    +import java.util.concurrent.CountDownLatch;
    +import java.util.concurrent.Executors;
    +import java.util.concurrent.ScheduledExecutorService;
    +import java.util.concurrent.TimeUnit;
    +
    +public class HistoryServer {
    +
    +   private static final Logger LOG = 
LoggerFactory.getLogger(HistoryServer.class);
    +   private static final JsonFactory jacksonFactory = new JsonFactory();
    +   private static final ObjectMapper mapper = new ObjectMapper();
    +
    +   private static final String OVERVIEWS_FOLDER_NAME = "overviews";
    +
    +   private final String webAddress;
    +   private final int webPort;
    +   private final long webRefreshIntervalMillis;
    +   private final File webDir;
    +
    +   private final long refreshIntervalMillis;
    +   private final List<Path> refreshDirs = new ArrayList<>();
    +
    +   /** Map containing the JobID of all fetched jobs and the refreshDir 
from with they originate. */
    +   private final Map<String, Path> cachedArchives = new 
ConcurrentHashMap<>();
    +
    +   private final SSLContext serverSSLContext;
    +   private NettySetup netty;
    +   private final Object startupShutdownLock = new Object();
    +
    +   private final ScheduledExecutorService executor = 
Executors.newSingleThreadScheduledExecutor(
    +           new ExecutorThreadFactory("Flink-HistoryServer-FileFetcher"));
    +   private final JobArchiveFetcherTask fetcherTask;
    +
    +   public static void main(String[] args) throws Exception {
    +           ParameterTool pt = ParameterTool.fromArgs(args);
    +           String configDir = pt.getRequired("configDir");
    +
    +           LOG.info("Loading configuration from {}", configDir);
    +           Configuration flinkConfig = 
GlobalConfiguration.loadConfiguration(configDir);
    +
    +           HistoryServer hs = new HistoryServer(flinkConfig);
    +           hs.run();
    +   }
    +
    +   public HistoryServer(Configuration config) throws IOException {
    +           if 
(config.getBoolean(HistoryServerOptions.HISTORY_SERVER_WEB_SSL_ENABLED) && 
SSLUtils.getSSLEnabled(config)) {
    +                   LOG.info("Enabling ssl for the history server.");
    +                   try {
    +                           this.serverSSLContext = 
SSLUtils.createSSLServerContext(config);
    +                   } catch (Exception e) {
    +                           throw new IOException("Failed to initialize 
SSLContext for the history server.", e);
    +                   }
    +           } else {
    +                   this.serverSSLContext = null;
    +           }
    +
    +           webAddress = 
config.getString(HistoryServerOptions.HISTRY_SERVER_WEB_ADDRESS);
    +           webPort = 
config.getInteger(HistoryServerOptions.HISTORY_SERVER_WEB_PORT);
    +           webRefreshIntervalMillis = 
config.getLong(HistoryServerOptions.HISTORY_SERVER_WEB_REFRESH_INTERVAL);
    +
    +           String webDirectory = 
config.getString(HistoryServerOptions.HISTORY_SERVER_WEB_DIR);
    +           if (webDirectory == null) {
    +                   webDirectory = System.getProperty("java.io.tmpdir") + 
"flink-web-history-" + UUID.randomUUID();
    +           }
    +           webDir = new File(webDirectory);
    +
    +           String refreshDirectories = 
config.getString(HistoryServerOptions.HISTORY_SERVER_DIR);
    +           if (refreshDirectories == null) {
    +                   throw new 
IOException(HistoryServerOptions.HISTORY_SERVER_DIR + " was not configured.");
    +           }
    +           for (String refreshDirectory : refreshDirectories.split(",")) {
    +                   refreshDirs.add(new Path(refreshDirectory));
    +           }
    +           refreshIntervalMillis = 
config.getLong(HistoryServerOptions.HISTORY_SERVER_REFRESH_INTERVAL);
    +
    +           this.fetcherTask = new JobArchiveFetcherTask(refreshDirs, 
cachedArchives, webDir);
    +
    +           // add shutdown hook for deleting the directories and remaining 
temp files on shutdown
    +           try {
    +                   Runtime.getRuntime().addShutdownHook(new Thread() {
    +                           @Override
    +                           public void run() {
    +                                   HistoryServer.this.stop();
    +                           }
    +                   });
    +           } catch (IllegalStateException e) {
    +                   // race, JVM is in shutdown already, we can safely 
ignore this
    +                   LOG.debug("Unable to add shutdown hook, shutdown 
already in progress", e);
    +           } catch (Throwable t) {
    +                   // these errors usually happen when the shutdown is 
already in progress
    +                   LOG.warn("Error while adding shutdown hook", t);
    +           }
    +   }
    +
    +   public void run() {
    +           try {
    +                   start();
    +                   while (true) {
    +                           try {
    +                                   new CountDownLatch(1).await();
    +                           } catch (InterruptedException e) {
    +                                   Thread.currentThread().interrupt();
    +                           }
    +                   }
    +           } catch (Exception e) {
    +                   LOG.error("Failure while running HistoryServer.", e);
    +           } finally {
    +                   stop();
    +           }
    +   }
    +
    +   // 
=================================================================================================================
    +   // Life-cycle
    +   // 
=================================================================================================================
    +   private void start() throws IOException, InterruptedException {
    +           synchronized (startupShutdownLock) {
    +                   LOG.info("Starting history server.");
    +
    +                   Files.createDirectories(webDir.toPath());
    +                   LOG.info("Using directory {} as local cache.", webDir);
    +                   if (LOG.isInfoEnabled()) {
    +                           for (Path refreshDir : refreshDirs) {
    +                                   LOG.info("Monitoring directory {} for 
archived jobs.", refreshDir);
    +                           }
    +                   }
    +
    +                   Router router = new Router();
    +                   router.GET("/:*", new 
HistoryServerStaticFileServerHandler(webDir, new JobFileFetcher(new 
Path(webDir.getAbsolutePath()), cachedArchives)));
    +
    +                   netty = new NettySetup(router, LOG, webDir, 
serverSSLContext, webAddress, webPort);
    +
    +                   createDashboardConfigFile();
    +                   updateJobOverview(webDir);
    +
    +                   executor.scheduleWithFixedDelay(fetcherTask, 
refreshIntervalMillis, refreshIntervalMillis, TimeUnit.MILLISECONDS);
    +           }
    +   }
    +
    +   private void stop() {
    +           synchronized (startupShutdownLock) {
    +                   LOG.info("Stopping history server.");
    +                   netty.shutdown();
    +                   shutdownExecutor();
    +                   try {
    +                           LOG.info("Removing web dashboard root cache 
directory {}", webDir);
    +                           FileUtils.deleteDirectory(webDir);
    +                   } catch (Throwable t) {
    +                           LOG.warn("Error while deleting web root 
directory {}", webDir, t);
    +                   }
    +
    +                   LOG.info("Stopped history server.");
    +           }
    +   }
    +
    +   private void shutdownExecutor() {
    +           if (executor != null) {
    +                   executor.shutdown();
    +
    +                   try {
    +                           if (!executor.awaitTermination(1, 
TimeUnit.SECONDS)) {
    +                                   executor.shutdownNow();
    +                           }
    +                   } catch (InterruptedException ignored) {
    +                           executor.shutdownNow();
    +                   }
    +           }
    +   }
    +
    +   // 
=================================================================================================================
    +   // File-fetching
    +   // 
=================================================================================================================
    +
    +   /**
    +    * {@link TimerTask} that polls the directories configured as {@link 
HistoryServerOptions#HISTORY_SERVER_DIR} for
    +    * new job archives.
    +    */
    +   private static class JobArchiveFetcherTask extends TimerTask {
    +           private final List<Path> refreshDirs;
    +           private final Map<String, Path> cachedArchives;
    +           private final File webDir;
    +           private final Path webJobDir;
    +           private final Path webOverviewDir;
    +           
    +           public JobArchiveFetcherTask(List<Path> refreshDirs, 
Map<String, Path> cachedArchives, File webDir) {
    +                   this.refreshDirs = refreshDirs;
    +                   this.cachedArchives = cachedArchives;
    +                   this.webDir = webDir;
    +                   this.webJobDir = new Path(webDir.getAbsolutePath(), 
"jobs");
    +                   this.webOverviewDir = new 
Path(webDir.getAbsolutePath(), OVERVIEWS_FOLDER_NAME);
    +           }
    +
    +           @Override
    +           public void run() {
    +                   for (Iterator<Path> iterator = refreshDirs.iterator(); 
iterator.hasNext();) {
    +                           Path refreshDir = iterator.next();
    +                           FileSystem fs;
    +                           try {
    +                                   fs = refreshDir.getFileSystem();
    +                           } catch (IOException e) {
    +                                   LOG.error("Failed to create FileSystem 
for path {}.", refreshDir, e);
    +                                   // there's most likely something wrong 
with the path itself
    +                                   try {
    +                                           iterator.remove();
    +                                   } catch (Exception ignored) {
    +                                           //well, we tried
    +                                   }
    +                                   continue;
    +                           }
    +
    +                           /**
    +                            * The expected directory structure is as 
follows:
    +                            * /:refreshDir/:jobid/jobs/:jobid/...
    +                            * /:refreshDir/:jobid/jobs/:jobid.json
    +                            * /:refreshDir/:jobid/joboverview.json
    +                            */
    +                           // contents of /:refreshDir
    +                           FileStatus[] jobArchives;
    +                           try {
    +                                   jobArchives = fs.listStatus(refreshDir);
    +                           } catch (IOException e) {
    +                                   LOG.error("Failed to access job archive 
location for path {}.", refreshDir, e);
    +                                   continue;
    +                           }
    +                           boolean updateOverview = false;
    +                           for (FileStatus jobArchive : jobArchives) {
    +                                   Path jobArchivePath = 
jobArchive.getPath();
    +                                   String jobID = jobArchivePath.getName();
    +                                   if (cachedArchives.put(jobID, 
refreshDir) == null) {
    +                                           try {
    +                                                   // contents of 
/:refreshDir/:jobid
    +                                                   FileStatus[] 
archiveFiles = fs.listStatus(jobArchivePath);
    +                                                   for (FileStatus 
archiveFile : archiveFiles) { 
    +                                                           if 
(archiveFile.isDir()) {
    +                                                                   // 
contents of /:refreshDir/:jobid/jobs
    +                                                                   for 
(FileStatus jobFile : fs.listStatus(archiveFile.getPath())) {
    +                                                                           
/**
    +                                                                           
 * src: /:refreshDir/:jobid/jobs/X
    +                                                                           
 * dst: /:webDir/jobs/X
    +                                                                           
 * 
    +                                                                           
 * X is either :jobid.json or the :jobid directory.
    +                                                                           
 */
    +                                                                           
FileCache.copy(jobFile.getPath(), new Path(webJobDir + "/" + 
jobFile.getPath().getName()), false);
    +                                                                   }
    +                                                           } else {
    +                                                                   /**
    +                                                                    * src: 
/:refreshDir/:jobid/joboverview.json
    +                                                                    * dst: 
/:webDir/joboverviews/:jobid.json
    +                                                                    */
    +                                                                   
FileCache.copy(archiveFile.getPath(), new Path(webOverviewDir, jobID + 
".json"), false);
    +                                                                   
updateOverview = true;
    +                                                           }
    +                                                   }
    +                                           } catch (IOException e) {
    +                                                   LOG.error("Failed to 
fetch archive file.", e);
    +                                                   
cachedArchives.remove(jobID);
    +                                                   continue;
    +                                           }
    +                                   }
    +                           }
    +                           if (updateOverview) {
    +                                   updateJobOverview(webDir);
    +                           }
    +                   }
    +           }
    +   }
    +
    +   /**
    +    * Backup file fetcher that is used by the {@link 
HistoryServerStaticFileServerHandler} if it cannot find a file
    +    * within a job archive. This class does not share code with the {@link 
JobArchiveFetcherTask} since the former
    +    * fetches entire archives at a given location based on a path whereas 
this class only fetches one specific file
    +    * based on a REST URL.
    +    */
    +   public static class JobFileFetcher {
    +           private final Path webDir;
    +           private final Map<String, Path> cachedArchives;
    +
    +           public JobFileFetcher(Path webDir, Map<String, Path> 
cachedArchives) {
    +                   this.webDir = webDir;
    +                   this.cachedArchives = cachedArchives;
    +           }
    +
    +           public void fetchFileForRestRequest(String requestPath) throws 
IOException {
    +                   /**
    +                    * Here we extract the job ID from the request path. We 
are either missing a file that resides
    +                    * under /jobs/:jobid/* or /jobs/:jobid.json. In the 
latter case indexOf will
    +                    * return -1 so we manually set 'to' to the index 
before '.json'.
    +                    */
    +                   int from = 6;
    +                   int to = requestPath.indexOf('/', 7);
    +                   if (to == -1) {
    +                           to = requestPath.length() - 5;
    +                   }
    +                   String jobID = requestPath.substring(from, to);
    +                   Path sourcePath = new Path(new 
Path(cachedArchives.get(jobID), jobID), requestPath);
    +                   try {
    +                           FileCache.copy(sourcePath, new Path(webDir, 
requestPath), false);
    +                   } catch (Exception e) {
    +                           LOG.debug("Failed to retrieve file {} for job 
{}. This may indicate an incomplete or corrupt job archive.", sourcePath, e);
    +                           LOG.error("Failed to retrieve file {} for job 
{}. This may indicate an incomplete or corrupt job archive.", sourcePath, 
jobID);
    +                   }
    +           }
    +   }
    +
    +   // 
=================================================================================================================
    +   // File generation
    +   // 
=================================================================================================================
    +   private static FileWriter createOrGetFile(File folder, String name) 
throws IOException {
    +           File file = new File(folder, name + ".json");
    +           if (!file.exists()) {
    +                   Files.createFile(file.toPath());
    +           }
    +           FileWriter fr = new FileWriter(file);
    +           return fr;
    +   }
    +
    +   private void createDashboardConfigFile() throws IOException {
    --- End diff --
    
    Actually, turning around the inheritancs of ```RuntimeMonitorHandler``` and 
```RuntimeMonitorHandlerBase``` would do the trick.


> Create a Flink History Server
> -----------------------------
>
>                 Key: FLINK-1579
>                 URL: https://issues.apache.org/jira/browse/FLINK-1579
>             Project: Flink
>          Issue Type: New Feature
>          Components: Distributed Coordination
>    Affects Versions: 0.9
>            Reporter: Robert Metzger
>            Assignee: Chesnay Schepler
>
> Right now its not possible to analyze the job results for jobs that ran on 
> YARN, because we'll loose the information once the JobManager has stopped.
> Therefore, I propose to implement a "Flink History Server" which serves  the 
> results from these jobs.
> I haven't started thinking about the implementation, but I suspect it 
> involves some JSON files stored in HDFS :)



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

[jira] [Commented] (FLINK-1579) Create a Flink History Server

Reply via email to