http://git-wip-us.apache.org/repos/asf/incubator-myriad/blob/b5b468b9/myriad-commons/src/main/java/org/apache/mesos/Scheduler.java ---------------------------------------------------------------------- diff --git a/myriad-commons/src/main/java/org/apache/mesos/Scheduler.java b/myriad-commons/src/main/java/org/apache/mesos/Scheduler.java new file mode 100644 index 0000000..da65116 --- /dev/null +++ b/myriad-commons/src/main/java/org/apache/mesos/Scheduler.java @@ -0,0 +1,211 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mesos; + +import java.util.List; + +import org.apache.mesos.Protos.*; +/** + * Callback interface to be implemented by frameworks' + * schedulers. Note that only one callback will be invoked at a time, + * so it is not recommended that you block within a callback because + * it may cause a deadlock. + * <p> + * Each callback includes a reference to the scheduler driver that was + * used to run this scheduler. The reference will not change for the + * duration of a scheduler (i.e., from the point you do {@link + * SchedulerDriver#start} to the point that {@link + * SchedulerDriver#join} returns). This is intended for convenience so + * that a scheduler doesn't need to store a reference to the driver + * itself. + */ +public interface Scheduler { + /** + * Invoked when the scheduler successfully registers with a Mesos + * master. A unique ID (generated by the master) used for + * distinguishing this framework from others and MasterInfo + * with the IP and port of the current master are provided as arguments. + * + * @param driver The scheduler driver that was registered. + * @param frameworkId The framework ID generated by the master. + * @param masterInfo Info about the current master, including IP and port. + * + * @see SchedulerDriver + * @see FrameworkID + * @see MasterInfo + */ + void registered(SchedulerDriver driver, + FrameworkID frameworkId, + MasterInfo masterInfo); + + /** + * Invoked when the scheduler re-registers with a newly elected Mesos master. + * This is only called when the scheduler has previously been registered. + * MasterInfo containing the updated information about the elected master + * is provided as an argument. + * + * @param driver The driver that was re-registered. + * @param masterInfo The updated information about the elected master. + * + * @see SchedulerDriver + * @see MasterInfo + */ + void reregistered(SchedulerDriver driver, MasterInfo masterInfo); + + /** + * Invoked when resources have been offered to this framework. A + * single offer will only contain resources from a single slave. + * Resources associated with an offer will not be re-offered to + * _this_ framework until either (a) this framework has rejected + * those resources (see {@link SchedulerDriver#launchTasks}) or (b) + * those resources have been rescinded (see {@link Scheduler#offerRescinded}). + * Note that resources may be concurrently offered to more than one + * framework at a time (depending on the allocator being used). In + * that case, the first framework to launch tasks using those + * resources will be able to use them while the other frameworks + * will have those resources rescinded (or if a framework has + * already launched tasks with those resources then those tasks will + * fail with a TASK_LOST status and a message saying as much). + * + * @param driver The driver that was used to run this scheduler. + * @param offers The resources offered to this framework. + * + * @see SchedulerDriver + * @see Offer + */ + void resourceOffers(SchedulerDriver driver, List<Offer> offers); + + /** + * Invoked when an offer is no longer valid (e.g., the slave was + * lost or another framework used resources in the offer). If for + * whatever reason an offer is never rescinded (e.g., dropped + * message, failing over framework, etc.), a framework that attempts + * to launch tasks using an invalid offer will receive TASK_LOST + * status updates for those tasks (see {@link #resourceOffers}). + * + * @param driver The driver that was used to run this scheduler. + * @param offerId The ID of the offer that was rescinded. + * + * @see SchedulerDriver + * @see OfferID + */ + void offerRescinded(SchedulerDriver driver, OfferID offerId); + + /** + * Invoked when the status of a task has changed (e.g., a slave is + * lost and so the task is lost, a task finishes and an executor + * sends a status update saying so, etc). If implicit + * acknowledgements are being used, then returning from this + * callback _acknowledges_ receipt of this status update! If for + * whatever reason the scheduler aborts during this callback (or + * the process exits) another status update will be delivered (note, + * however, that this is currently not true if the slave sending the + * status update is lost/fails during that time). If explicit + * acknowledgements are in use, the scheduler must acknowledge this + * status on the driver. + * + * @param driver The driver that was used to run this scheduler. + * @param status The status update, which includes the task ID and status. + * + * @see SchedulerDriver + * @see TaskStatus + */ + void statusUpdate(SchedulerDriver driver, TaskStatus status); + + /** + * Invoked when an executor sends a message. These messages are best + * effort; do not expect a framework message to be retransmitted in + * any reliable fashion. + * + * @param driver The driver that received the message. + * @param executorId The ID of the executor that sent the message. + * @param slaveId The ID of the slave that launched the executor. + * @param data The message payload. + * + * @see SchedulerDriver + * @see ExecutorID + * @see SlaveID + */ + void frameworkMessage(SchedulerDriver driver, + ExecutorID executorId, + SlaveID slaveId, + byte[] data); + + /** + * Invoked when the scheduler becomes "disconnected" from the master + * (e.g., the master fails and another is taking over). + * + * @param driver The driver that was used to run this scheduler. + * + * @see SchedulerDriver + */ + void disconnected(SchedulerDriver driver); + + /** + * Invoked when a slave has been determined unreachable (e.g., + * machine failure, network partition). Most frameworks will need to + * reschedule any tasks launched on this slave on a new slave. + * + * NOTE: This callback is not reliably delivered. If a host or + * network failure causes messages between the master and the + * scheduler to be dropped, this callback may not be invoked. + * + * @param driver The driver that was used to run this scheduler. + * @param slaveId The ID of the slave that was lost. + * + * @see SchedulerDriver + * @see SlaveID + */ + void slaveLost(SchedulerDriver driver, SlaveID slaveId); + + /** + * Invoked when an executor has exited/terminated. Note that any + * tasks running will have TASK_LOST status updates automagically + * generated. + * + * NOTE: This callback is not reliably delivered. If a host or + * network failure causes messages between the master and the + * scheduler to be dropped, this callback may not be invoked. + * + * @param driver The driver that was used to run this scheduler. + * @param executorId The ID of the executor that was lost. + * @param slaveId The ID of the slave that launched the executor. + * @param status The exit status of the executor. + * + * @see SchedulerDriver + * @see ExecutorID + * @see SlaveID + * @see Status + */ + void executorLost(SchedulerDriver driver, + ExecutorID executorId, + SlaveID slaveId, + int status); + + /** + * Invoked when there is an unrecoverable error in the scheduler or + * driver. The driver will be aborted BEFORE invoking this callback. + * + * @param driver The driver that was used to run this scheduler. + * @param message The error message. + * + * @see SchedulerDriver + */ + void error(SchedulerDriver driver, String message); +}
http://git-wip-us.apache.org/repos/asf/incubator-myriad/blob/b5b468b9/myriad-commons/src/main/java/org/apache/mesos/SchedulerDriver.java ---------------------------------------------------------------------- diff --git a/myriad-commons/src/main/java/org/apache/mesos/SchedulerDriver.java b/myriad-commons/src/main/java/org/apache/mesos/SchedulerDriver.java new file mode 100644 index 0000000..7de5515 --- /dev/null +++ b/myriad-commons/src/main/java/org/apache/mesos/SchedulerDriver.java @@ -0,0 +1,323 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mesos; + +import java.util.Collection; + +import org.apache.mesos.Protos.*; +/** + * Abstract interface for connecting a scheduler to Mesos. This + * interface is used both to manage the scheduler's lifecycle (start + * it, stop it, or wait for it to finish) and to interact with Mesos + * (e.g., launch tasks, kill tasks, etc.). + */ +public interface SchedulerDriver { + /** + * Starts the scheduler driver. This needs to be called before any + * other driver calls are made. + * + * @return The state of the driver after the call. + * + * @see Status + */ + Status start(); + + /** + * Stops the scheduler driver. If the 'failover' flag is set to + * false then it is expected that this framework will never + * reconnect to Mesos. So Mesos will unregister the framework + * and shutdown all its tasks and executors. If 'failover' is true, + * all executors and tasks will remain running (for some framework + * specific failover timeout) allowing the scheduler to reconnect + * (possibly in the same process, or from a different process, for + * example, on a different machine). + * + * @param failover Whether framework failover is expected. + * + * @return The state of the driver after the call. + * + * @see Status + */ + Status stop(boolean failover); + + /** + * Stops the scheduler driver assuming no failover. This will + * cause Mesos to unregister the framework and shutdown all + * its tasks and executors. Please see {@link #stop(boolean)} + * for more details. + * + * @return The state of the driver after the call. + */ + Status stop(); + + /** + * Aborts the driver so that no more callbacks can be made to the + * scheduler. The semantics of abort and stop have deliberately been + * separated so that code can detect an aborted driver (i.e., via + * the return status of {@link #join}, see below), and instantiate + * and start another driver if desired (from within the same + * process). + * + * @return The state of the driver after the call. + */ + Status abort(); + + /** + * Waits for the driver to be stopped or aborted, possibly + * <i>blocking</i> the current thread indefinitely. The return status of + * this function can be used to determine if the driver was aborted + * (see mesos.proto for a description of Status). + * + * @return The state of the driver after the call. + */ + Status join(); + + /** + * Starts and immediately joins (i.e., blocks on) the driver. + * + * @return The state of the driver after the call. + */ + Status run(); + + /** + * Requests resources from Mesos (see mesos.proto for a description + * of Request and how, for example, to request resources + * from specific slaves). Any resources available are offered to the + * framework via {@link Scheduler#resourceOffers} callback, + * asynchronously. + * + * @param requests The resource requests. + * + * @return The state of the driver after the call. + * + * @see Request + * @see Status + */ + Status requestResources(Collection<Request> requests); + + /** + * Launches the given set of tasks. Any remaining resources (i.e., + * those that are not used by the launched tasks or their executors) + * will be considered declined. Note that this includes resources + * used by tasks that the framework attempted to launch but failed + * (with TASK_ERROR) due to a malformed task description. The + * specified filters are applied on all unused resources (see + * mesos.proto for a description of Filters). Available resources + * are aggregated when multiple offers are provided. Note that all + * offers must belong to the same slave. Invoking this function with + * an empty collection of tasks declines offers in their entirety + * (see {@link #declineOffer}). + * + * @param offerIds The collection of offer IDs. + * @param tasks The collection of tasks to be launched. + * @param filters The filters to set for any remaining resources. + * + * @return The state of the driver after the call. + * + * @see OfferID + * @see TaskInfo + * @see Filters + * @see Status + */ + Status launchTasks(Collection<OfferID> offerIds, + Collection<TaskInfo> tasks, + Filters filters); + + /** + * Launches the given set of tasks. See above for details. + * Note that this may add a default filter (see mesos.proto) + * for the remaining resources. Notably the MesosSchedulerDriver + * does so. + * + * + * @param offerIds The collection of offer IDs. + * @param tasks The collection of tasks to be launched. + * + * @return The state of the driver after the call. + */ + Status launchTasks(Collection<OfferID> offerIds, Collection<TaskInfo> tasks); + + /** + * @deprecated Use {@link #launchTasks(Collection, Collection, Filters)} instead. + * + * @param offerId The offer ID. + * @param tasks The collection of tasks to be launched. + * @param filters The filters to set for any remaining resources. + * + * @return The state of the driver after the call. + */ + Status launchTasks(OfferID offerId, + Collection<TaskInfo> tasks, + Filters filters); + + /** + * @deprecated Use {@link #launchTasks(Collection, Collection)} instead. + * Note that this may add a default filter (see mesos.proto) + * for the remaining resources. Notably the MesosSchedulerDriver + * does so. + * + * @param offerId The offer ID. + * @param tasks The collection of tasks to be launched. + * + * @return The state of the driver after the call. + */ + Status launchTasks(OfferID offerId, Collection<TaskInfo> tasks); + + /** + * Kills the specified task. Note that attempting to kill a task is + * currently not reliable. If, for example, a scheduler fails over + * while it was attempting to kill a task it will need to retry in + * the future Likewise, if unregistered / disconnected, the request + * will be dropped (these semantics may be changed in the future). + * + * @param taskId The ID of the task to be killed. + * + * @return The state of the driver after the call. + */ + Status killTask(TaskID taskId); + + /** + * Accepts the given offers and performs a sequence of operations on + * those accepted offers. See Offer.Operation in mesos.proto for the + * set of available operations. Any remaining resources (i.e., those + * that are not used by the launched tasks or their executors) will + * be considered declined. Note that this includes resources used by + * tasks that the framework attempted to launch but failed (with + * TASK_ERROR) due to a malformed task description. The specified + * filters are applied on all unused resources (see mesos.proto for + * a description of Filters). Available resources are aggregated + * when multiple offers are provided. Note that all offers must + * belong to the same slave. + * + * @param offerIds The collection of offer IDs. + * @param operations The collection of offer operations to perform. + * @param filters The filters to set for any remaining resources. + * + * @return The state of the driver after the call. + * + * @see OfferID + * @see Offer.Operation + * @see Filters + * @see Status + */ + Status acceptOffers(Collection<OfferID> offerIds, + Collection<Offer.Operation> operations, + Filters filters); + + /** + * Declines an offer in its entirety and applies the specified + * filters on the resources (see mesos.proto for a description of + * Filters). Note that this can be done at any time, it is not + * necessary to do this within the {@link Scheduler#resourceOffers} + * callback. + * + * @param offerId The ID of the offer to be declined. + * @param filters The filters to set for any remaining resources. + * + * @return The state of the driver after the call. + * + * @see OfferID + * @see Filters + * @see Status + */ + Status declineOffer(OfferID offerId, Filters filters); + + /** + * Declines an offer in its entirety. See above for details. + * + * @param offerId The ID of the offer to be declined. + * + * @return The state of the driver after the call. + * + * @see OfferID + * @see Status + */ + Status declineOffer(OfferID offerId); + + /** + * Removes all filters, previously set by the framework (via {@link + * #launchTasks}). This enables the framework to receive offers + * from those filtered slaves. + * + * @return The state of the driver after the call. + * + * @see Status + */ + Status reviveOffers(); + + /** + * Inform Mesos master to stop sending offers to the framework. The + * scheduler should call reviveOffers() to resume getting offers. + * + * @return The state of the driver after the call. + * + * @see Status + */ + Status suppressOffers(); + + /** + * Acknowledges the status update. This should only be called + * once the status update is processed durably by the scheduler. + * Not that explicit acknowledgements must be requested via the + * constructor argument, otherwise a call to this method will + * cause the driver to crash. + * + * @param status The status to acknowledge. + * + * @return The state of the driver after the call. + * + * @see TaskStatus + */ + Status acknowledgeStatusUpdate(TaskStatus status); + + /** + * Sends a message from the framework to one of its executors. These + * messages are best effort; do not expect a framework message to be + * retransmitted in any reliable fashion. + * + * @param executorId The ID of the executor to send the message to. + * @param slaveId The ID of the slave that is running the executor. + * @param data The message. + * + * @return The state of the driver after the call. + * + * @see ExecutorID + * @see SlaveID + */ + Status sendFrameworkMessage(ExecutorID executorId, + SlaveID slaveId, + byte[] data); + + /** + * Allows the framework to query the status for non-terminal tasks. + * This causes the master to send back the latest task status for + * each task in 'statuses', if possible. Tasks that are no longer + * known will result in a TASK_LOST update. If statuses is empty, + * then the master will send the latest status for each task + * currently known. + * + * @param statuses The collection of non-terminal TaskStatuses to reconcile. + * + * @return The state of the driver after the call. + * + * @see TaskStatus + * @see SlaveID + */ + Status reconcileTasks(Collection<TaskStatus> statuses); +}