http://git-wip-us.apache.org/repos/asf/incubator-myriad/blob/b5b468b9/myriad-commons/proto/mesos/scheduler.proto ---------------------------------------------------------------------- diff --git a/myriad-commons/proto/mesos/scheduler.proto b/myriad-commons/proto/mesos/scheduler.proto new file mode 100644 index 0000000..0528a7e --- /dev/null +++ b/myriad-commons/proto/mesos/scheduler.proto @@ -0,0 +1,433 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +import "mesos/mesos.proto"; + +package mesos.scheduler; + +option java_package = "org.apache.mesos.scheduler"; +option java_outer_classname = "Protos"; + + +/** + * Scheduler event API. + * + * An event is described using the standard protocol buffer "union" + * trick, see: + * https://developers.google.com/protocol-buffers/docs/techniques#union. + */ +message Event { + // Possible event types, followed by message definitions if + // applicable. + enum Type { + // This must be the first enum value in this list, to + // ensure that if 'type' is not set, the default value + // is UNKNOWN. This enables enum values to be added + // in a backwards-compatible way. See: MESOS-4997. + UNKNOWN = 0; + + SUBSCRIBED = 1; // See 'Subscribed' below. + OFFERS = 2; // See 'Offers' below. + INVERSE_OFFERS = 9; // See 'InverseOffers' below. + RESCIND = 3; // See 'Rescind' below. + RESCIND_INVERSE_OFFER = 10; // See 'RescindInverseOffer' below. + UPDATE = 4; // See 'Update' below. + MESSAGE = 5; // See 'Message' below. + FAILURE = 6; // See 'Failure' below. + ERROR = 7; // See 'Error' below. + + // Periodic message sent by the Mesos master according to + // 'Subscribed.heartbeat_interval_seconds'. If the scheduler does + // not receive any events (including heartbeats) for an extended + // period of time (e.g., 5 x heartbeat_interval_seconds), there is + // likely a network partition. In such a case the scheduler should + // close the existing subscription connection and resubscribe + // using a backoff strategy. + HEARTBEAT = 8; + } + + // First event received when the scheduler subscribes. + message Subscribed { + required FrameworkID framework_id = 1; + + // This value will be set if the master is sending heartbeats. See + // the comment above on 'HEARTBEAT' for more details. + optional double heartbeat_interval_seconds = 2; + + // Since Mesos 1.1. + optional MasterInfo master_info = 3; + } + + // Received whenever there are new resources that are offered to the + // scheduler. Each offer corresponds to a set of resources on an + // agent. Until the scheduler accepts or declines an offer the + // resources are considered allocated to the scheduler. + message Offers { + repeated Offer offers = 1; + } + + // Received whenever there are resources requested back from the + // scheduler. Each inverse offer specifies the agent, and + // optionally specific resources. Accepting or Declining an inverse + // offer informs the allocator of the scheduler's ability to release + // the specified resources without violating an SLA. If no resources + // are specified then all resources on the agent are requested to be + // released. + message InverseOffers { + repeated InverseOffer inverse_offers = 1; + } + + // Received when a particular offer is no longer valid (e.g., the + // slave corresponding to the offer has been removed) and hence + // needs to be rescinded. Any future calls ('Accept' / 'Decline') made + // by the scheduler regarding this offer will be invalid. + message Rescind { + required OfferID offer_id = 1; + } + + // Received when a particular inverse offer is no longer valid + // (e.g., the agent corresponding to the offer has been removed) + // and hence needs to be rescinded. Any future calls ('Accept' / + // 'Decline') made by the scheduler regarding this inverse offer + // will be invalid. + message RescindInverseOffer { + required OfferID inverse_offer_id = 1; + } + + // Received whenever there is a status update that is generated by + // the executor or slave or master. Status updates should be used by + // executors to reliably communicate the status of the tasks that + // they manage. It is crucial that a terminal update (see TaskState + // in mesos.proto) is sent by the executor as soon as the task + // terminates, in order for Mesos to release the resources allocated + // to the task. It is also the responsibility of the scheduler to + // explicitly acknowledge the receipt of a status update. See + // 'Acknowledge' in the 'Call' section below for the semantics. + // + // A task status update may be used for guaranteed delivery of some + // task-related information, e.g., task's health update. Such + // information may be shadowed by subsequent task status updates, that + // do not preserve fields of the previously sent message. + message Update { + required TaskStatus status = 1; + } + + // Received when a custom message generated by the executor is + // forwarded by the master. Note that this message is not + // interpreted by Mesos and is only forwarded (without reliability + // guarantees) to the scheduler. It is up to the executor to retry + // if the message is dropped for any reason. + message Message { + required SlaveID slave_id = 1; + required ExecutorID executor_id = 2; + required bytes data = 3; + } + + // Received when a slave is removed from the cluster (e.g., failed + // health checks) or when an executor is terminated. Note that, this + // event coincides with receipt of terminal UPDATE events for any + // active tasks belonging to the slave or executor and receipt of + // 'Rescind' events for any outstanding offers belonging to the + // slave. Note that there is no guaranteed order between the + // 'Failure', 'Update' and 'Rescind' events when a slave or executor + // is removed. + // TODO(vinod): Consider splitting the lost slave and terminated + // executor into separate events and ensure it's reliably generated. + message Failure { + optional SlaveID slave_id = 1; + + // If this was just a failure of an executor on a slave then + // 'executor_id' will be set and possibly 'status' (if we were + // able to determine the exit status). + optional ExecutorID executor_id = 2; + + // On Posix, `status` corresponds to termination information in the + // `stat_loc` area returned from a `waitpid` call. On Windows, `status` + // is obtained via calling the `GetExitCodeProcess()` function. For + // messages coming from Posix agents, schedulers need to apply + // `WEXITSTATUS` family macros or equivalent transformations to obtain + // exit codes. + // + // TODO(alexr): Consider unifying Windows and Posix behavior by returning + // exit code here, see MESOS-7241. + optional int32 status = 3; + } + + // Received when there is an unrecoverable error in the scheduler (e.g., + // scheduler failed over, rate limiting, authorization errors etc.). The + // scheduler should abort on receiving this event. + message Error { + required string message = 1; + } + + // Type of the event, indicates which optional field below should be + // present if that type has a nested message definition. + // Enum fields should be optional, see: MESOS-4997. + optional Type type = 1; + + optional Subscribed subscribed = 2; + optional Offers offers = 3; + optional InverseOffers inverse_offers = 9; + optional Rescind rescind = 4; + optional RescindInverseOffer rescind_inverse_offer = 10; + optional Update update = 5; + optional Message message = 6; + optional Failure failure = 7; + optional Error error = 8; +} + + +/** + * Scheduler call API. + * + * Like Event, a Call is described using the standard protocol buffer + * "union" trick (see above). + */ +message Call { + // Possible call types, followed by message definitions if + // applicable. + enum Type { + // See comments above on `Event::Type` for more details on this enum value. + UNKNOWN = 0; + + SUBSCRIBE = 1; // See 'Subscribe' below. + TEARDOWN = 2; // Shuts down all tasks/executors and removes framework. + ACCEPT = 3; // See 'Accept' below. + DECLINE = 4; // See 'Decline' below. + ACCEPT_INVERSE_OFFERS = 13; // See 'AcceptInverseOffers' below. + DECLINE_INVERSE_OFFERS = 14; // See 'DeclineInverseOffers' below. + REVIVE = 5; // Removes any previous filters set via ACCEPT or DECLINE. + KILL = 6; // See 'Kill' below. + SHUTDOWN = 7; // See 'Shutdown' below. + ACKNOWLEDGE = 8; // See 'Acknowledge' below. + RECONCILE = 9; // See 'Reconcile' below. + MESSAGE = 10; // See 'Message' below. + REQUEST = 11; // See 'Request' below. + SUPPRESS = 12; // Inform master to stop sending offers to the framework. + + // TODO(benh): Consider adding an 'ACTIVATE' and 'DEACTIVATE' for + // already subscribed frameworks as a way of stopping offers from + // being generated and other events from being sent by the master. + // Note that this functionality existed originally to support + // SchedulerDriver::abort which was only necessary to handle + // exceptions getting thrown from within Scheduler callbacks, + // something that is not an issue with the Event/Call API. + } + + // Subscribes the scheduler with the master to receive events. A + // scheduler must send other calls only after it has received the + // SUBCRIBED event. + message Subscribe { + // See the comments below on 'framework_id' on the semantics for + // 'framework_info.id'. + required FrameworkInfo framework_info = 1; + + // NOTE: 'force' field is not present in v1/scheduler.proto because it is + // only used by the scheduler driver. The driver sets it to true when the + // scheduler re-registers for the first time after a failover. Once + // re-registered all subsequent re-registration attempts (e.g., due to ZK + // blip) will have 'force' set to false. This is important because master + // uses this field to know when it needs to send FrameworkRegisteredMessage + // vs FrameworkReregisteredMessage. + optional bool force = 2; + + // List of suppressed roles for which the framework does not wish to be + // offered resources. The framework can decide to suppress all or a subset + // of roles the framework (re)registers as. + // + // Note: This field is not set by scheduler driver, so will always be + // empty. It is added here for transformation from `v1::Call::Subscribe`. + repeated string suppressed_roles = 3; + } + + // Accepts an offer, performing the specified operations + // in a sequential manner. + // + // E.g. Launch a task with a newly reserved persistent volume: + // + // Accept { + // offer_ids: [ ... ] + // operations: [ + // { type: RESERVE, + // reserve: { resources: [ disk(role):2 ] } } + // { type: CREATE, + // create: { volumes: [ disk(role):1+persistence ] } } + // { type: LAUNCH, + // launch: { task_infos ... disk(role):1;disk(role):1+persistence } } + // ] + // } + // + // NOTE: Any of the offerâs resources not used in the `Accept` call + // (e.g., to launch a task) are considered unused and might be + // reoffered to other frameworks. In other words, the same `OfferID` + // cannot be used in more than one `Accept` call. + // NOTE: All offers must belong to the same agent. + message Accept { + repeated OfferID offer_ids = 1; + repeated Offer.Operation operations = 2; + optional Filters filters = 3; + } + + // Declines an offer, signaling the master to potentially reoffer + // the resources to a different framework. Note that this is same + // as sending an Accept call with no operations. See comments on + // top of 'Accept' for semantics. + message Decline { + repeated OfferID offer_ids = 1; + optional Filters filters = 2; + } + + // Accepts an inverse offer. Inverse offers should only be accepted + // if the resources in the offer can be safely evacuated before the + // provided unavailability. + message AcceptInverseOffers { + repeated OfferID inverse_offer_ids = 1; + optional Filters filters = 2; + } + + // Declines an inverse offer. Inverse offers should be declined if + // the resources in the offer might not be safely evacuated before + // the provided unavailability. + message DeclineInverseOffers { + repeated OfferID inverse_offer_ids = 1; + optional Filters filters = 2; + } + + // Revive offers for the specified roles. If `roles` is empty, + // the `REVIVE` call will revive offers for all of the roles + // the framework is currently subscribed to. + message Revive { + repeated string roles = 1; + } + + // Kills a specific task. If the scheduler has a custom executor, + // the kill is forwarded to the executor and it is up to the + // executor to kill the task and send a TASK_KILLED (or TASK_FAILED) + // update. Note that Mesos releases the resources for a task once it + // receives a terminal update (See TaskState in mesos.proto) for it. + // If the task is unknown to the master, a TASK_LOST update is + // generated. + // + // If a task within a task group is killed before the group is + // delivered to the executor, all tasks in the task group are + // killed. When a task group has been delivered to the executor, + // it is up to the executor to decide how to deal with the kill. + // Note The default Mesos executor will currently kill all the + // tasks in the task group if it gets a kill for any task. + message Kill { + required TaskID task_id = 1; + optional SlaveID slave_id = 2; + + // If set, overrides any previously specified kill policy for this task. + // This includes 'TaskInfo.kill_policy' and 'Executor.kill.kill_policy'. + // Can be used to forcefully kill a task which is already being killed. + optional KillPolicy kill_policy = 3; + } + + // Shuts down a custom executor. When the executor gets a shutdown + // event, it is expected to kill all its tasks (and send TASK_KILLED + // updates) and terminate. If the executor doesnât terminate within + // a certain timeout (configurable via + // '--executor_shutdown_grace_period' slave flag), the slave will + // forcefully destroy the container (executor and its tasks) and + // transition its active tasks to TASK_LOST. + message Shutdown { + required ExecutorID executor_id = 1; + required SlaveID slave_id = 2; + } + + // Acknowledges the receipt of status update. Schedulers are + // responsible for explicitly acknowledging the receipt of status + // updates that have 'Update.status().uuid()' field set. Such status + // updates are retried by the slave until they are acknowledged by + // the scheduler. + message Acknowledge { + required SlaveID slave_id = 1; + required TaskID task_id = 2; + required bytes uuid = 3; + } + + // Allows the scheduler to query the status for non-terminal tasks. + // This causes the master to send back the latest task status for + // each task in 'tasks', if possible. Tasks that are no longer known + // will result in a TASK_LOST, TASK_UNKNOWN, or TASK_UNREACHABLE update. + // If 'tasks' is empty, then the master will send the latest status + // for each task currently known. + message Reconcile { + // TODO(vinod): Support arbitrary queries than just state of tasks. + message Task { + required TaskID task_id = 1; + optional SlaveID slave_id = 2; + } + + repeated Task tasks = 1; + } + + // Sends arbitrary binary data to the executor. Note that Mesos + // neither interprets this data nor makes any guarantees about the + // delivery of this message to the executor. + message Message { + required SlaveID slave_id = 1; + required ExecutorID executor_id = 2; + required bytes data = 3; + } + + // Requests a specific set of resources from Mesos's allocator. If + // the allocator has support for this, corresponding offers will be + // sent asynchronously via the OFFERS event(s). + // + // NOTE: The built-in hierarchical allocator doesn't have support + // for this call and hence simply ignores it. + message Request { + repeated mesos.Request requests = 1; + } + + // Suppress offers for the specified roles. If `roles` is empty, + // the `SUPPRESS` call will suppress offers for all of the roles + // the framework is currently subscribed to. + message Suppress { + repeated string roles = 1; + } + + // Identifies who generated this call. Master assigns a framework id + // when a new scheduler subscribes for the first time. Once assigned, + // the scheduler must set the 'framework_id' here and within its + // FrameworkInfo (in any further 'Subscribe' calls). This allows the + // master to identify a scheduler correctly across disconnections, + // failovers, etc. + optional FrameworkID framework_id = 1; + + // Type of the call, indicates which optional field below should be + // present if that type has a nested message definition. + // See comments on `Event::Type` above on the reasoning behind this field being optional. + optional Type type = 2; + + optional Subscribe subscribe = 3; + optional Accept accept = 4; + optional Decline decline = 5; + optional AcceptInverseOffers accept_inverse_offers = 13; + optional DeclineInverseOffers decline_inverse_offers = 14; + optional Revive revive = 15; + optional Kill kill = 6; + optional Shutdown shutdown = 7; + optional Acknowledge acknowledge = 8; + optional Reconcile reconcile = 9; + optional Message message = 10; + optional Request request = 11; + optional Suppress suppress = 16; +}
http://git-wip-us.apache.org/repos/asf/incubator-myriad/blob/b5b468b9/myriad-commons/proto/mesos/v1/executor.proto ---------------------------------------------------------------------- diff --git a/myriad-commons/proto/mesos/v1/executor.proto b/myriad-commons/proto/mesos/v1/executor.proto new file mode 100644 index 0000000..b2ef325 --- /dev/null +++ b/myriad-commons/proto/mesos/v1/executor.proto @@ -0,0 +1,214 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +import "mesos/v1/mesos.proto"; + +package mesos.v1.executor; + +option java_package = "org.apache.mesos.v1.executor"; +option java_outer_classname = "Protos"; + + +/** + * Executor event API. + * + * An event is described using the standard protocol buffer "union" + * trick, see https://developers.google.com/protocol-buffers/docs/techniques#union. + */ +message Event { + // Possible event types, followed by message definitions if + // applicable. + enum Type { + // This must be the first enum value in this list, to + // ensure that if 'type' is not set, the default value + // is UNKNOWN. This enables enum values to be added + // in a backwards-compatible way. See: MESOS-4997. + UNKNOWN = 0; + + SUBSCRIBED = 1; // See 'Subscribed' below. + LAUNCH = 2; // See 'Launch' below. + LAUNCH_GROUP = 8; // See 'LaunchGroup' below. + KILL = 3; // See 'Kill' below. + ACKNOWLEDGED = 4; // See 'Acknowledged' below. + MESSAGE = 5; // See 'Message' below. + ERROR = 6; // See 'Error' below. + + // Received when the agent asks the executor to shutdown/kill itself. + // The executor is then required to kill all its active tasks, send + // `TASK_KILLED` status updates and gracefully exit. The executor + // should terminate within a `MESOS_EXECUTOR_SHUTDOWN_GRACE_PERIOD` + // (an environment variable set by the agent upon executor startup); + // it can be configured via `ExecutorInfo.shutdown_grace_period`. If + // the executor fails to do so, the agent will forcefully destroy the + // container where the executor is running. The agent would then send + // `TASK_LOST` updates for any remaining active tasks of this executor. + // + // NOTE: The executor must not assume that it will always be allotted + // the full grace period, as the agent may decide to allot a shorter + // period and failures / forcible terminations may occur. + // + // TODO(alexr): Consider adding a duration field into the `Shutdown` + // message so that the agent can communicate when a shorter period + // has been allotted. + SHUTDOWN = 7; + } + + // First event received when the executor subscribes. + // The 'id' field in the 'framework_info' will be set. + message Subscribed { + required ExecutorInfo executor_info = 1; + required FrameworkInfo framework_info = 2; + required AgentInfo agent_info = 3; + + // Uniquely identifies the container of an executor run. + optional ContainerID container_id = 4; + } + + // Received when the framework attempts to launch a task. Once + // the task is successfully launched, the executor must respond with + // a TASK_RUNNING update (See TaskState in v1/mesos.proto). + message Launch { + required TaskInfo task = 1; + } + + // Received when the framework attempts to launch a group of tasks atomically. + // Similar to `Launch` above the executor must send TASK_RUNNING updates for + // tasks that are successfully launched. + message LaunchGroup { + required TaskGroupInfo task_group = 1; + } + + // Received when the scheduler wants to kill a specific task. Once + // the task is terminated, the executor should send a TASK_KILLED + // (or TASK_FAILED) update. The terminal update is necessary so + // Mesos can release the resources associated with the task. + message Kill { + required TaskID task_id = 1; + + // If set, overrides any previously specified kill policy for this task. + // This includes 'TaskInfo.kill_policy' and 'Executor.kill.kill_policy'. + // Can be used to forcefully kill a task which is already being killed. + optional KillPolicy kill_policy = 2; + } + + // Received when the agent acknowledges the receipt of status + // update. Schedulers are responsible for explicitly acknowledging + // the receipt of status updates that have 'update.status().uuid()' + // field set. Unacknowledged updates can be retried by the executor. + // They should also be sent by the executor whenever it + // re-subscribes. + message Acknowledged { + required TaskID task_id = 1; + required bytes uuid = 2; + } + + // Received when a custom message generated by the scheduler is + // forwarded by the agent. Note that this message is not + // interpreted by Mesos and is only forwarded (without reliability + // guarantees) to the executor. It is up to the scheduler to retry + // if the message is dropped for any reason. + message Message { + required bytes data = 1; + } + + // Received in case the executor sends invalid calls (e.g., + // required values not set). + // TODO(arojas): Remove this once the old executor driver is no + // longer supported. With HTTP API all errors will be signaled via + // HTTP response codes. + message Error { + required string message = 1; + } + + // Type of the event, indicates which optional field below should be + // present if that type has a nested message definition. + // Enum fields should be optional, see: MESOS-4997. + optional Type type = 1; + + optional Subscribed subscribed = 2; + optional Acknowledged acknowledged = 3; + optional Launch launch = 4; + optional LaunchGroup launch_group = 8; + optional Kill kill = 5; + optional Message message = 6; + optional Error error = 7; +} + + +/** + * Executor call API. + * + * Like Event, a Call is described using the standard protocol buffer + * "union" trick (see above). + */ + message Call { + // Possible call types, followed by message definitions if + // applicable. + enum Type { + // See comments above on `Event::Type` for more details on this enum value. + UNKNOWN = 0; + + SUBSCRIBE = 1; // See 'Subscribe' below. + UPDATE = 2; // See 'Update' below. + MESSAGE = 3; // See 'Message' below. + } + + // Request to subscribe with the agent. If subscribing after a disconnection, + // it must include a list of all the tasks and updates which haven't been + // acknowledged by the scheduler. + message Subscribe { + repeated TaskInfo unacknowledged_tasks = 1; + repeated Update unacknowledged_updates = 2; + } + + // Notifies the scheduler that a task has transitioned from one + // state to another. Status updates should be used by executors + // to reliably communicate the status of the tasks that they + // manage. It is crucial that a terminal update (see TaskState + // in v1/mesos.proto) is sent to the scheduler as soon as the task + // terminates, in order for Mesos to release the resources allocated + // to the task. It is the responsibility of the scheduler to + // explicitly acknowledge the receipt of a status update. See + // 'Acknowledged' in the 'Events' section above for the semantics. + message Update { + required TaskStatus status = 1; + } + + // Sends arbitrary binary data to the scheduler. Note that Mesos + // neither interprets this data nor makes any guarantees about the + // delivery of this message to the scheduler. + // See 'Message' in the 'Events' section. + message Message { + required bytes data = 2; + } + + // Identifies the executor which generated this call. + required ExecutorID executor_id = 1; + required FrameworkID framework_id = 2; + + // Type of the call, indicates which optional field below should be + // present if that type has a nested message definition. + // In case type is SUBSCRIBED, no message needs to be set. + // See comments on `Event::Type` above on the reasoning behind this + // field being optional. + optional Type type = 3; + + optional Subscribe subscribe = 4; + optional Update update = 5; + optional Message message = 6; +}