http://git-wip-us.apache.org/repos/asf/incubator-myriad/blob/b5b468b9/myriad-commons/proto/mesos/v1/mesos.proto ---------------------------------------------------------------------- diff --git a/myriad-commons/proto/mesos/v1/mesos.proto b/myriad-commons/proto/mesos/v1/mesos.proto new file mode 100644 index 0000000..4d905d3 --- /dev/null +++ b/myriad-commons/proto/mesos/v1/mesos.proto @@ -0,0 +1,3173 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package mesos.v1; + +option java_package = "org.apache.mesos.v1"; +option java_outer_classname = "Protos"; + + +/** + * Status is used to indicate the state of the scheduler and executor + * driver after function calls. + */ +enum Status { + DRIVER_NOT_STARTED = 1; + DRIVER_RUNNING = 2; + DRIVER_ABORTED = 3; + DRIVER_STOPPED = 4; +} + + +/** + * A unique ID assigned to a framework. A framework can reuse this ID + * in order to do failover (see MesosSchedulerDriver). + */ +message FrameworkID { + required string value = 1; +} + + +/** + * A unique ID assigned to an offer. + */ +message OfferID { + required string value = 1; +} + + +/** + * A unique ID assigned to an agent. Currently, an agent gets a new ID + * whenever it (re)registers with Mesos. Framework writers shouldn't + * assume any binding between an agent ID and and a hostname. + */ +message AgentID { + required string value = 1; +} + + +/** + * A framework-generated ID to distinguish a task. The ID must remain + * unique while the task is active. A framework can reuse an ID _only_ + * if the previous task with the same ID has reached a terminal state + * (e.g., TASK_FINISHED, TASK_KILLED, etc.). However, reusing task IDs + * is strongly discouraged (MESOS-2198). + */ +message TaskID { + required string value = 1; +} + + +/** + * A framework-generated ID to distinguish an executor. Only one + * executor with the same ID can be active on the same agent at a + * time. However, reusing executor IDs is discouraged. + */ +message ExecutorID { + required string value = 1; +} + + +/** + * ID used to uniquely identify a container. If the `parent` is not + * specified, the ID is a UUID generated by the agent to uniquely + * identify the container of an executor run. If the `parent` field is + * specified, it represents a nested container. + */ +message ContainerID { + required string value = 1; + optional ContainerID parent = 2; +} + + +/** + * A unique ID assigned to a resource provider. Currently, a resource + * provider gets a new ID whenever it (re)registers with Mesos. + */ +message ResourceProviderID { + required string value = 1; +} + + +/** + * Represents time since the epoch, in nanoseconds. + */ +message TimeInfo { + required int64 nanoseconds = 1; +} + + +/** + * Represents duration in nanoseconds. + */ +message DurationInfo { + required int64 nanoseconds = 1; +} + + +/** + * A network address. + * + * TODO(bmahler): Use this more widely. + */ +message Address { + // May contain a hostname, IP address, or both. + optional string hostname = 1; + optional string ip = 2; + + required int32 port = 3; +} + + +/** + * Represents a URL. + */ +message URL { + required string scheme = 1; + required Address address = 2; + optional string path = 3; + repeated Parameter query = 4; + optional string fragment = 5; +} + + +/** + * Represents an interval, from a given start time over a given duration. + * This interval pertains to an unavailability event, such as maintenance, + * and is not a generic interval. + */ +message Unavailability { + required TimeInfo start = 1; + + // When added to `start`, this represents the end of the interval. + // If unspecified, the duration is assumed to be infinite. + optional DurationInfo duration = 2; + + // TODO(josephw): Add additional fields for expressing the purpose and + // urgency of the unavailability event. +} + + +/** + * Represents a single machine, which may hold one or more agents. + * + * NOTE: In order to match an agent to a machine, both the `hostname` and + * `ip` must match the values advertised by the agent to the master. + * Hostname is not case-sensitive. + */ +message MachineID { + optional string hostname = 1; + optional string ip = 2; +} + + +/** + * Holds information about a single machine, its `mode`, and any other + * relevant information which may affect the behavior of the machine. + */ +message MachineInfo { + // Describes the several states that a machine can be in. A `Mode` + // applies to a machine and to all associated agents on the machine. + enum Mode { + // In this mode, a machine is behaving normally; + // offering resources, executing tasks, etc. + UP = 1; + + // In this mode, all agents on the machine are expected to cooperate with + // frameworks to drain resources. In general, draining is done ahead of + // a pending `unavailability`. The resources should be drained so as to + // maximize utilization prior to the maintenance but without knowingly + // violating the frameworks' requirements. + DRAINING = 2; + + // In this mode, a machine is not running any tasks and will not offer + // any of its resources. Agents on the machine will not be allowed to + // register with the master. + DOWN = 3; + } + + required MachineID id = 1; + optional Mode mode = 2; + + // Signifies that the machine may be unavailable during the given interval. + // See comments in `Unavailability` and for the `unavailability` fields + // in `Offer` and `InverseOffer` for more information. + optional Unavailability unavailability = 3; +} + + +/** + * Describes a framework. + */ +message FrameworkInfo { + // Used to determine the Unix user that an executor or task should be + // launched as. + // + // When using the MesosSchedulerDriver, if the field is set to an + // empty string, it will automagically set it to the current user. + // + // When using the HTTP Scheduler API, the user has to be set + // explicitly. + required string user = 1; + + // Name of the framework that shows up in the Mesos Web UI. + required string name = 2; + + // Note that 'id' is only available after a framework has + // registered, however, it is included here in order to facilitate + // scheduler failover (i.e., if it is set then the + // MesosSchedulerDriver expects the scheduler is performing + // failover). + optional FrameworkID id = 3; + + // The amount of time (in seconds) that the master will wait for the + // scheduler to failover before it tears down the framework by + // killing all its tasks/executors. This should be non-zero if a + // framework expects to reconnect after a failure and not lose its + // tasks/executors. + // + // NOTE: To avoid accidental destruction of tasks, production + // frameworks typically set this to a large value (e.g., 1 week). + optional double failover_timeout = 4 [default = 0.0]; + + // If set, agents running tasks started by this framework will write + // the framework pid, executor pids and status updates to disk. If + // the agent exits (e.g., due to a crash or as part of upgrading + // Mesos), this checkpointed data allows the restarted agent to + // reconnect to executors that were started by the old instance of + // the agent. Enabling checkpointing improves fault tolerance, at + // the cost of a (usually small) increase in disk I/O. + optional bool checkpoint = 5 [default = false]; + + // Roles are the entities to which allocations are made. + // The framework must have at least one role in order to + // be offered resources. Note that `role` is deprecated + // in favor of `roles` and only one of these fields must + // be used. Since we cannot distinguish between empty + // `roles` and the default unset `role`, we require that + // frameworks set the `MULTI_ROLE` capability if + // setting the `roles` field. + optional string role = 6 [default = "*", deprecated=true]; + repeated string roles = 12; + + // Used to indicate the current host from which the scheduler is + // registered in the Mesos Web UI. If set to an empty string Mesos + // will automagically set it to the current hostname if one is + // available. + optional string hostname = 7; + + // This field should match the credential's principal the framework + // uses for authentication. This field is used for framework API + // rate limiting and dynamic reservations. It should be set even + // if authentication is not enabled if these features are desired. + optional string principal = 8; + + // This field allows a framework to advertise its web UI, so that + // the Mesos web UI can link to it. It is expected to be a full URL, + // for example http://my-scheduler.example.com:8080/. + optional string webui_url = 9; + + message Capability { + enum Type { + // This must be the first enum value in this list, to + // ensure that if 'type' is not set, the default value + // is UNKNOWN. This enables enum values to be added + // in a backwards-compatible way. See: MESOS-4997. + UNKNOWN = 0; + + // Receive offers with revocable resources. See 'Resource' + // message for details. + REVOCABLE_RESOURCES = 1; + + // Receive the TASK_KILLING TaskState when a task is being + // killed by an executor. The executor will examine this + // capability to determine whether it can send TASK_KILLING. + TASK_KILLING_STATE = 2; + + // Indicates whether the framework is aware of GPU resources. + // Frameworks that are aware of GPU resources are expected to + // avoid placing non-GPU workloads on GPU agents, in order + // to avoid occupying a GPU agent and preventing GPU workloads + // from running! Currently, if a framework is unaware of GPU + // resources, it will not be offered *any* of the resources on + // an agent with GPUs. This restriction is in place because we + // do not have a revocation mechanism that ensures GPU workloads + // can evict GPU agent occupants if necessary. + // + // TODO(bmahler): As we add revocation we can relax the + // restriction here. See MESOS-5634 for more information. + GPU_RESOURCES = 3; + + // Receive offers with resources that are shared. + SHARED_RESOURCES = 4; + + // Indicates that (1) the framework is prepared to handle the + // following TaskStates: TASK_UNREACHABLE, TASK_DROPPED, + // TASK_GONE, TASK_GONE_BY_OPERATOR, and TASK_UNKNOWN, and (2) + // the framework will assume responsibility for managing + // partitioned tasks that reregister with the master. + // + // Frameworks that enable this capability can define how they + // would like to handle partitioned tasks. Frameworks will + // receive TASK_UNREACHABLE for tasks on agents that are + // partitioned from the master. If/when a partitioned agent + // reregisters, tasks on the agent that were started by + // PARTITION_AWARE frameworks will not killed. + // + // Without this capability, frameworks will receive TASK_LOST + // for tasks on partitioned agents; such tasks will be killed by + // Mesos when the agent reregisters (unless the master has + // failed over). + PARTITION_AWARE = 5; + + // This expresses the ability for the framework to be + // "multi-tenant" via using the newly introduced `roles` + // field, and examining `Offer.allocation_info` to determine + // which role the offers are being made to. We also + // expect that "single-tenant" schedulers eventually + // provide this and move away from the deprecated + // `role` field. + MULTI_ROLE = 6; + + // This capability has two effects for a framework. + // + // (1) The framework is offered resources in a new format. + // + // The offered resources have the `Resource.reservations` field set + // rather than `Resource.role` and `Resource.reservation`. In short, + // an empty `reservations` field denotes unreserved resources, and + // each `ReservationInfo` in the `reservations` field denotes a + // reservation that refines the previous one. + // + // See the 'Resource Format' section for more details. + // + // (2) The framework can create refined reservations. + // + // A framework can refine an existing reservation via the + // `Resource.reservations` field. For example, a reservation for role + // `eng` can be refined to `eng/front_end`. + // + // See `ReservationInfo.reservations` for more details. + // + // NOTE: Without this capability, a framework is not offered resources + // that have refined reservations. A resource is said to have refined + // reservations if it uses the `Resource.reservations` field, and + // `Resource.reservations_size() > 1`. + RESERVATION_REFINEMENT = 7; // EXPERIMENTAL. + + // Indicates that the framework is prepared to receive offers + // for agents whose region is different from the master's + // region. Network links between hosts in different regions + // typically have higher latency and lower bandwidth than + // network links within a region, so frameworks should be + // careful to only place suitable workloads in remote regions. + // Frameworks that are not region-aware will never receive + // offers for remote agents; region-aware frameworks are assumed + // to implement their own logic to decide which workloads (if + // any) are suitable for placement on remote agents. + REGION_AWARE = 8; + } + + // Enum fields should be optional, see: MESOS-4997. + optional Type type = 1; + } + + // This field allows a framework to advertise its set of + // capabilities (e.g., ability to receive offers for revocable + // resources). + repeated Capability capabilities = 10; + + // Labels are free-form key value pairs supplied by the framework + // scheduler (e.g., to describe additional functionality offered by + // the framework). These labels are not interpreted by Mesos itself. + // Labels should not contain duplicate key-value pairs. + optional Labels labels = 11; +} + + +/** + * Describes a general non-interpreting non-killing check for a task or + * executor (or any arbitrary process/command). A type is picked by + * specifying one of the optional fields. Specifying more than one type + * is an error. + * + * NOTE: This API is unstable and the related feature is experimental. + */ +message CheckInfo { + enum Type { + UNKNOWN = 0; + COMMAND = 1; + HTTP = 2; + TCP = 3; + + // TODO(alexr): Consider supporting custom user checks. They should + // probably be paired with a `data` field and complemented by a + // `data` response in `CheckStatusInfo`. + } + + // Describes a command check. If applicable, enters mount and/or network + // namespaces of the task. + message Command { + required CommandInfo command = 1; + } + + // Describes an HTTP check. Sends a GET request to + // http://<host>:port/path. Note that <host> is not configurable and is + // resolved automatically to 127.0.0.1. + message Http { + // Port to send the HTTP request. + required uint32 port = 1; + + // HTTP request path. + optional string path = 2; + + // TODO(alexr): Add support for HTTP method. While adding POST + // and PUT is simple, supporting payload is more involved. + + // TODO(alexr): Add support for custom HTTP headers. + + // TODO(alexr): Consider adding an optional message to describe TLS + // options and thus enabling https. Such message might contain certificate + // validation, TLS version. + } + + // Describes a TCP check, i.e. based on establishing a TCP connection to + // the specified port. Note that <host> is not configurable and is resolved + // automatically to 127.0.0.1. + message Tcp { + required uint32 port = 1; + } + + // The type of the check. + optional Type type = 1; + + // Command check. + optional Command command = 2; + + // HTTP check. + optional Http http = 3; + + // TCP check. + optional Tcp tcp = 7; + + // Amount of time to wait to start checking the task after it + // transitions to `TASK_RUNNING` or `TASK_STARTING` if the latter + // is used by the executor. + optional double delay_seconds = 4 [default = 15.0]; + + // Interval between check attempts, i.e., amount of time to wait after + // the previous check finished or timed out to start the next check. + optional double interval_seconds = 5 [default = 10.0]; + + // Amount of time to wait for the check to complete. Zero means infinite + // timeout. + // + // After this timeout, the check attempt is aborted and no result is + // reported. Note that this may be considered a state change and hence + // may trigger a check status change delivery to the corresponding + // scheduler. See `CheckStatusInfo` for more details. + optional double timeout_seconds = 6 [default = 20.0]; +} + + +/** + * Describes a health check for a task or executor (or any arbitrary + * process/command). A type is picked by specifying one of the + * optional fields. Specifying more than one type is an error. + */ +message HealthCheck { + enum Type { + UNKNOWN = 0; + COMMAND = 1; + HTTP = 2; + TCP = 3; + } + + // Describes an HTTP health check. Sends a GET request to + // scheme://<host>:port/path. Note that <host> is not configurable and is + // resolved automatically, in most cases to 127.0.0.1. Default executors + // treat return codes between 200 and 399 as success; custom executors + // may employ a different strategy, e.g. leveraging the `statuses` field. + message HTTPCheckInfo { + // Currently "http" and "https" are supported. + optional string scheme = 3; + + // Port to send the HTTP request. + required uint32 port = 1; + + // HTTP request path. + optional string path = 2; + + // TODO(alexr): Add support for HTTP method. While adding POST + // and PUT is simple, supporting payload is more involved. + + // TODO(alexr): Add support for custom HTTP headers. + + // TODO(alexr): Add support for success and possibly failure + // statuses. + + // NOTE: It is up to the custom executor to interpret and act on this + // field. Setting this field has no effect on the default executors. + // + // TODO(haosdent): Deprecate this field when we add better support for + // success and possibly failure statuses, e.g. ranges of success and + // failure statuses. + repeated uint32 statuses = 4; + + // TODO(haosdent): Consider adding a flag to enable task's certificate + // validation for HTTPS health checks, see MESOS-5997. + + // TODO(benh): Include an 'optional bytes data' field for checking + // for specific data in the response. + } + + // Describes a TCP health check, i.e. based on establishing + // a TCP connection to the specified port. + message TCPCheckInfo { + // Port expected to be open. + required uint32 port = 1; + } + + // TODO(benh): Consider adding a URL health check strategy which + // allows doing something similar to the HTTP strategy but + // encapsulates all the details in a single string field. + + // Amount of time to wait to start health checking the task after it + // transitions to `TASK_RUNNING` or `TASK_STATING` if the latter is + // used by the executor. + optional double delay_seconds = 2 [default = 15.0]; + + // Interval between health checks, i.e., amount of time to wait after + // the previous health check finished or timed out to start the next + // health check. + optional double interval_seconds = 3 [default = 10.0]; + + // Amount of time to wait for the health check to complete. After this + // timeout, the health check is aborted and treated as a failure. Zero + // means infinite timeout. + optional double timeout_seconds = 4 [default = 20.0]; + + // Number of consecutive failures until the task is killed by the executor. + optional uint32 consecutive_failures = 5 [default = 3]; + + // Amount of time after the task is launched during which health check + // failures are ignored. Once a check succeeds for the first time, + // the grace period does not apply anymore. Note that it includes + // `delay_seconds`, i.e., setting `grace_period_seconds` < `delay_seconds` + // has no effect. + optional double grace_period_seconds = 6 [default = 10.0]; + + // TODO(alexr): Add an optional `KillPolicy` that should be used + // if the task is killed because of a health check failure. + + // The type of health check. + optional Type type = 8; + + // Command health check. + optional CommandInfo command = 7; + + // HTTP health check. + optional HTTPCheckInfo http = 1; + + // TCP health check. + optional TCPCheckInfo tcp = 9; +} + + +/** + * Describes a kill policy for a task. Currently does not express + * different policies (e.g. hitting HTTP endpoints), only controls + * how long to wait between graceful and forcible task kill: + * + * graceful kill --------------> forcible kill + * grace_period + * + * Kill policies are best-effort, because machine failures / forcible + * terminations may occur. + * + * NOTE: For executor-less command-based tasks, the kill is performed + * via sending a signal to the task process: SIGTERM for the graceful + * kill and SIGKILL for the forcible kill. For the docker executor-less + * tasks the grace period is passed to 'docker stop --time'. + */ +message KillPolicy { + // The grace period specifies how long to wait before forcibly + // killing the task. It is recommended to attempt to gracefully + // kill the task (and send TASK_KILLING) to indicate that the + // graceful kill is in progress. Once the grace period elapses, + // if the task has not terminated, a forcible kill should occur. + // The task should not assume that it will always be allotted + // the full grace period. For example, the executor may be + // shutdown more quickly by the agent, or failures / forcible + // terminations may occur. + optional DurationInfo grace_period = 1; +} + + +/** + * Describes a command, executed via: '/bin/sh -c value'. Any URIs specified + * are fetched before executing the command. If the executable field for an + * uri is set, executable file permission is set on the downloaded file. + * Otherwise, if the downloaded file has a recognized archive extension + * (currently [compressed] tar and zip) it is extracted into the executor's + * working directory. This extraction can be disabled by setting `extract` to + * false. In addition, any environment variables are set before executing + * the command (so they can be used to "parameterize" your command). + */ +message CommandInfo { + message URI { + required string value = 1; + optional bool executable = 2; + + // In case the fetched file is recognized as an archive, extract + // its contents into the sandbox. Note that a cached archive is + // not copied from the cache to the sandbox in case extraction + // originates from an archive in the cache. + optional bool extract = 3 [default = true]; + + // If this field is "true", the fetcher cache will be used. If not, + // fetching bypasses the cache and downloads directly into the + // sandbox directory, no matter whether a suitable cache file is + // available or not. The former directs the fetcher to download to + // the file cache, then copy from there to the sandbox. Subsequent + // fetch attempts with the same URI will omit downloading and copy + // from the cache as long as the file is resident there. Cache files + // may get evicted at any time, which then leads to renewed + // downloading. See also "docs/fetcher.md" and + // "docs/fetcher-cache-internals.md". + optional bool cache = 4; + + // The fetcher's default behavior is to use the URI string's basename to + // name the local copy. If this field is provided, the local copy will be + // named with its value instead. If there is a directory component (which + // must be a relative path), the local copy will be stored in that + // subdirectory inside the sandbox. + optional string output_file = 5; + } + + repeated URI uris = 1; + + optional Environment environment = 2; + + // There are two ways to specify the command: + // 1) If 'shell == true', the command will be launched via shell + // (i.e., /bin/sh -c 'value'). The 'value' specified will be + // treated as the shell command. The 'arguments' will be ignored. + // 2) If 'shell == false', the command will be launched by passing + // arguments to an executable. The 'value' specified will be + // treated as the filename of the executable. The 'arguments' + // will be treated as the arguments to the executable. This is + // similar to how POSIX exec families launch processes (i.e., + // execlp(value, arguments(0), arguments(1), ...)). + // NOTE: The field 'value' is changed from 'required' to 'optional' + // in 0.20.0. It will only cause issues if a new framework is + // connecting to an old master. + optional bool shell = 6 [default = true]; + optional string value = 3; + repeated string arguments = 7; + + // Enables executor and tasks to run as a specific user. If the user + // field is present both in FrameworkInfo and here, the CommandInfo + // user value takes precedence. + optional string user = 5; +} + + +/** + * Describes information about an executor. + */ +message ExecutorInfo { + enum Type { + UNKNOWN = 0; + + // Mesos provides a simple built-in default executor that frameworks can + // leverage to run shell commands and containers. + // + // NOTES: + // + // 1) `command` must not be set when using a default executor. + // + // 2) Default executor only accepts a *single* `LAUNCH` or `LAUNCH_GROUP` + // offer operation. + // + // 3) If `container` is set, `container.type` must be `MESOS` + // and `container.mesos.image` must not be set. + DEFAULT = 1; + + // For frameworks that need custom functionality to run tasks, a `CUSTOM` + // executor can be used. Note that `command` must be set when using a + // `CUSTOM` executor. + CUSTOM = 2; + } + + // For backwards compatibility, if this field is not set when using `LAUNCH` + // offer operation, Mesos will infer the type by checking if `command` is + // set (`CUSTOM`) or unset (`DEFAULT`). `type` must be set when using + // `LAUNCH_GROUP` offer operation. + // + // TODO(vinod): Add support for explicitly setting `type` to `DEFAULT ` + // in `LAUNCH` offer operation. + optional Type type = 15; + + required ExecutorID executor_id = 1; + optional FrameworkID framework_id = 8; // TODO(benh): Make this required. + optional CommandInfo command = 7; + + // Executor provided with a container will launch the container + // with the executor's CommandInfo and we expect the container to + // act as a Mesos executor. + optional ContainerInfo container = 11; + + repeated Resource resources = 5; + optional string name = 9; + + // 'source' is an identifier style string used by frameworks to + // track the source of an executor. This is useful when it's + // possible for different executor ids to be related semantically. + // + // NOTE: 'source' is exposed alongside the resource usage of the + // executor via JSON on the agent. This allows users to import usage + // information into a time series database for monitoring. + // + // This field is deprecated since 1.0. Please use labels for + // free-form metadata instead. + optional string source = 10 [deprecated = true]; // Since 1.0. + + // This field can be used to pass arbitrary bytes to an executor. + optional bytes data = 4; + + // Service discovery information for the executor. It is not + // interpreted or acted upon by Mesos. It is up to a service + // discovery system to use this information as needed and to handle + // executors without service discovery information. + optional DiscoveryInfo discovery = 12; + + // When shutting down an executor the agent will wait in a + // best-effort manner for the grace period specified here + // before forcibly destroying the container. The executor + // must not assume that it will always be allotted the full + // grace period, as the agent may decide to allot a shorter + // period and failures / forcible terminations may occur. + optional DurationInfo shutdown_grace_period = 13; + + // Labels are free-form key value pairs which are exposed through + // master and agent endpoints. Labels will not be interpreted or + // acted upon by Mesos itself. As opposed to the data field, labels + // will be kept in memory on master and agent processes. Therefore, + // labels should be used to tag executors with lightweight metadata. + // Labels should not contain duplicate key-value pairs. + optional Labels labels = 14; +} + + +/** + * Describes a domain. A domain is a collection of hosts that have + * similar characteristics. Mesos currently only supports "fault + * domains", which identify groups of hosts with similar failure + * characteristics. + * + * Frameworks can generally assume that network links between hosts in + * the same fault domain have lower latency, higher bandwidth, and better + * availability than network links between hosts in different domains. + * Schedulers may prefer to place network-intensive workloads in the + * same domain, as this may improve performance. Conversely, a single + * failure that affects a host in a domain may be more likely to + * affect other hosts in the same domain; hence, schedulers may prefer + * to place workloads that require high availability in multiple + * domains. (For example, all the hosts in a single rack might lose + * power or network connectivity simultaneously.) + * + * There are two kinds of fault domains: regions and zones. Regions + * offer the highest degree of fault isolation, but network latency + * between regions is typically high (typically >50 ms). Zones offer a + * modest degree of fault isolation along with reasonably low network + * latency (typically <10 ms). + * + * The mapping from fault domains to physical infrastructure is up to + * the operator to configure. In cloud environments, regions and zones + * can be mapped to the "region" and "availability zone" concepts + * exposed by most cloud providers, respectively. In on-premise + * deployments, regions and zones can be mapped to data centers and + * racks, respectively. + * + * Both masters and agents can be configured with domains. Frameworks + * can compare the domains of two hosts to determine if the hosts are + * in the same zone, in different zones in the same region, or in + * different regions. Note that all masters in a given Mesos cluster + * must be in the same region. + */ +message DomainInfo { + message FaultDomain { + message RegionInfo { + required string name = 1; + } + + message ZoneInfo { + required string name = 1; + } + + required RegionInfo region = 1; + required ZoneInfo zone = 2; + } + + optional FaultDomain fault_domain = 1; +} + + +/** + * Describes a master. This will probably have more fields in the + * future which might be used, for example, to link a framework webui + * to a master webui. + */ +message MasterInfo { + required string id = 1; + + // The IP address (only IPv4) as a packed 4-bytes integer, + // stored in network order. Deprecated, use `address.ip` instead. + required uint32 ip = 2; + + // The TCP port the Master is listening on for incoming + // HTTP requests; deprecated, use `address.port` instead. + required uint32 port = 3 [default = 5050]; + + // In the default implementation, this will contain information + // about both the IP address, port and Master name; it should really + // not be relied upon by external tooling/frameworks and be + // considered an "internal" implementation field. + optional string pid = 4; + + // The server's hostname, if available; it may be unreliable + // in environments where the DNS configuration does not resolve + // internal hostnames (eg, some public cloud providers). + // Deprecated, use `address.hostname` instead. + optional string hostname = 5; + + // The running Master version, as a string; taken from the + // generated "master/version.hpp". + optional string version = 6; + + // The full IP address (supports both IPv4 and IPv6 formats) + // and supersedes the use of `ip`, `port` and `hostname`. + // Since Mesos 0.24. + optional Address address = 7; + + // The domain that this master belongs to. All masters in a Mesos + // cluster should belong to the same region. + optional DomainInfo domain = 8; +} + + +/** + * Describes an agent. Note that the 'id' field is only available + * after an agent is registered with the master, and is made available + * here to facilitate re-registration. + */ +message AgentInfo { + required string hostname = 1; + optional int32 port = 8 [default = 5051]; + + // The configured resources at the agent. This does not include any + // dynamic reservations or persistent volumes that may currently + // exist at the agent. + repeated Resource resources = 3; + + repeated Attribute attributes = 5; + optional AgentID id = 6; + + // The domain that this agent belongs to. If the agent's region + // differs from the master's region, it will not appear in resource + // offers to frameworks that have not enabled the REGION_AWARE + // capability. + optional DomainInfo domain = 10; + + message Capability { + enum Type { + // This must be the first enum value in this list, to + // ensure that if 'type' is not set, the default value + // is UNKNOWN. This enables enum values to be added + // in a backwards-compatible way. See: MESOS-4997. + UNKNOWN = 0; + + // This expresses the ability for the agent to be able + // to launch tasks of a 'multi-role' framework. + MULTI_ROLE = 1; + + // This expresses the ability for the agent to be able to launch + // tasks, reserve resources, and create volumes using resources + // allocated to a 'hierarchical-role'. + // NOTE: This capability is required specifically for creating + // volumes because a hierchical role includes '/' (slashes) in them. + // Agents with this capability know to transform the '/' (slashes) + // into ' ' (spaces). + HIERARCHICAL_ROLE = 2; + + // This capability has three effects for an agent. + // + // (1) The format of the checkpointed resources, and + // the resources reported to master. + // + // These resources are reported in the "pre-reservation-refinement" + // format if none of the resources have refined reservations. If any + // of the resources have refined reservations, they are reported in + // the "post-reservation-refinement" format. The purpose is to allow + // downgrading of an agent as well as communication with a pre-1.4.0 + // master until the reservation refinement feature is actually used. + // + // See the 'Resource Format' section for more details. + // + // (2) The format of the resources reported by the HTTP endpoints. + // + // For resources reported by agent endpoints, the + // "pre-reservation-refinement" format is "injected" if possible. + // That is, resources without refined reservations will have the + // `Resource.role` and `Resource.reservation` set, whereas + // resources with refined reservations will not. + // + // See the 'Resource Format' section for more details. + // + // (3) The ability for the agent to launch tasks, reserve resources, and + // create volumes using resources that have refined reservations. + // + // See `ReservationInfo.reservations` section for more details. + // + // NOTE: Resources are said to have refined reservations if it uses the + // `Resource.reservations` field, and `Resource.reservations_size() > 1`. + RESERVATION_REFINEMENT = 3; + } + + // Enum fields should be optional, see: MESOS-4997. + optional Type type = 1; + } +} + + +/** + * Describes a resource provider. Note that the 'id' field is only available + * after a resource provider is registered with the master, and is made + * available here to facilitate re-registration. + */ +message ResourceProviderInfo { + optional ResourceProviderID id = 1; + repeated Attribute attributes = 2; + + // The type of the resource provider. This uniquely identifies a + // resource provider implementation. For instance: + // org.apache.mesos.rp.local.storage + // + // Please follow to Java package naming convention + // (https://en.wikipedia.org/wiki/Java_package#Package_naming_conventions) + // to avoid conflicts on type names. + required string type = 3; + + // The name of the resource provider. There could be multiple + // instances of a type of resource provider. The name field is used + // to distinguish these instances. + required string name = 4; +} + + +/** + * Describes an Attribute or Resource "value". A value is described + * using the standard protocol buffer "union" trick. + */ +message Value { + enum Type { + SCALAR = 0; + RANGES = 1; + SET = 2; + TEXT = 3; + } + + message Scalar { + // Scalar values are represented using floating point. To reduce + // the chance of unpredictable floating point behavior due to + // roundoff error, Mesos only supports three decimal digits of + // precision for scalar resource values. That is, floating point + // values are converted to a fixed point format that supports + // three decimal digits of precision, and then converted back to + // floating point on output. Any additional precision in scalar + // resource values is discarded (via rounding). + required double value = 1; + } + + message Range { + required uint64 begin = 1; + required uint64 end = 2; + } + + message Ranges { + repeated Range range = 1; + } + + message Set { + repeated string item = 1; + } + + message Text { + required string value = 1; + } + + required Type type = 1; + optional Scalar scalar = 2; + optional Ranges ranges = 3; + optional Set set = 4; + optional Text text = 5; +} + + +/** + * Describes an attribute that can be set on a machine. For now, + * attributes and resources share the same "value" type, but this may + * change in the future and attributes may only be string based. + */ +message Attribute { + required string name = 1; + required Value.Type type = 2; + optional Value.Scalar scalar = 3; + optional Value.Ranges ranges = 4; + optional Value.Set set = 6; + optional Value.Text text = 5; +} + + +/** + * Describes a resource from a resource provider. The `name` field is + * a string like "cpus" or "mem" that indicates which kind of resource + * this is; the rest of the fields describe the properties of the + * resource. A resource can take on one of three types: scalar + * (double), a list of finite and discrete ranges (e.g., [1-10, + * 20-30]), or a set of items. A resource is described using the + * standard protocol buffer "union" trick. + * + * Note that "disk" and "mem" resources are scalar values expressed in + * megabytes. Fractional "cpus" values are allowed (e.g., "0.5"), + * which correspond to partial shares of a CPU. + */ +message Resource { + optional ResourceProviderID provider_id = 12; + + required string name = 1; + required Value.Type type = 2; + optional Value.Scalar scalar = 3; + optional Value.Ranges ranges = 4; + optional Value.Set set = 5; + + // The role that this resource is reserved for. If "*", this indicates + // that the resource is unreserved. Otherwise, the resource will only + // be offered to frameworks that belong to this role. + // + // NOTE: Frameworks must not set this field if `reservations` is set. + // See the 'Resource Format' section for more details. + // + // TODO(mpark): Deprecate once `reservations` is no longer experimental. + optional string role = 6 [default = "*", deprecated=true]; + + // This was initially introduced to support MULTI_ROLE capable + // frameworks. Frameworks that are not MULTI_ROLE capable can + // continue to assume that the offered resources are allocated + // to their role. + message AllocationInfo { + // If set, this resource is allocated to a role. Note that in the + // future, this may be unset and the scheduler may be responsible + // for allocating to one of its roles. + optional string role = 1; + + // In the future, we may add additional fields here, e.g. priority + // tier, type of allocation (quota / fair share). + } + + optional AllocationInfo allocation_info = 11; + + // Resource Format: + // + // Frameworks receive resource offers in one of two formats, depending on + // whether the RESERVATION_REFINEMENT capability is enabled. + // + // __WITHOUT__ the RESERVATION_REFINEMENT capability, the framework is offered + // resources in the "pre-reservation-refinement" format. In this format, the + // `Resource.role` and `Resource.reservation` fields are used in conjunction + // to describe the reservation state of a `Resource` message. + // + // The following is an overview of the possible reservation states: + // + // +------------+------------------------------------------------------------+ + // | unreserved | { | + // | | role: "*", | + // | | reservation: <not set>, | + // | | reservations: <unused> | + // | | } | + // +------------+------------------------------------------------------------+ + // | static | { | + // | | role: "eng", | + // | | reservation: <not set>, | + // | | reservations: <unused> | + // | | } | + // +------------+------------------------------------------------------------+ + // | dynamic | { | + // | | role: "eng", | + // | | reservation: { | + // | | type: <unused>, | + // | | role: <unused>, | + // | | principal: <optional>, | + // | | labels: <optional> | + // | | }, | + // | | reservations: <unused> | + // | | } | + // +------------+------------------------------------------------------------+ + // + // __WITH__ the RESERVATION_REFINEMENT capability, the framework is offered + // resources in the "post-reservation-refinement" format. In this format, the + // reservation state of a `Resource` message is expressed solely in + // `Resource.reservations` field. + // + // The following is an overview of the possible reservation states: + // + // +------------+------------------------------------------------------------+ + // | unreserved | { | + // | | role: <unused>, | + // | | reservation: <unused>, | + // | | reservations: [] | + // | | } | + // +------------+------------------------------------------------------------+ + // | static | { | + // | | role: <unused>, | + // | | reservation: <unused>, | + // | | reservations: [ | + // | | { | + // | | type: STATIC, | + // | | role: "eng", | + // | | principal: <optional>, | + // | | labels: <optional> | + // | | } | + // | | ] | + // | | } | + // +------------+------------------------------------------------------------+ + // | dynamic | { | + // | | role: <unused>, | + // | | reservation: <unused>, | + // | | reservations: [ | + // | | { | + // | | type: DYNAMIC, | + // | | role: "eng", | + // | | principal: <optional>, | + // | | labels: <optional> | + // | | } | + // | | ] | + // | | } | + // +------------+------------------------------------------------------------+ + // + // We can also __refine__ reservations with this capability like so: + // + // +------------+------------------------------------------------------------+ + // | refined | { | + // | | role: <unused>, | + // | | reservation: <unused>, | + // | | reservations: [ | + // | | { | + // | | type: STATIC or DYNAMIC, | + // | | role: "eng", | + // | | principal: <optional>, | + // | | labels: <optional> | + // | | }, | + // | | { | + // | | type: DYNAMIC, | + // | | role: "eng/front_end", | + // | | principal: <optional>, | + // | | labels: <optional> | + // | | } | + // | | ] | + // | | } | + // +------------+------------------------------------------------------------+ + // + // NOTE: Each `ReservationInfo` in the `reservations` field denotes + // a reservation that refines the previous `ReservationInfo`. + + message ReservationInfo { + // TODO(mpark): Explain the two resource formats. + + // Describes a reservation. A static reservation is set by the operator on + // the command-line and they are immutable without agent restart. A dynamic + // reservation is acquired by an operator via the '/reserve' HTTP endpoint + // or by a framework via the offer cycle by sending back an + // 'Offer::Operation::Reserve' message. + // NOTE: We currently do not allow frameworks with role "*" to make dynamic + // reservations. + + enum Type { + UNKNOWN = 0; + STATIC = 1; + DYNAMIC = 2; + } + + // The type of this reservation. + // NOTE: This field must not be set for `Resource.reservation`. + optional Type type = 4; + + // The role to which this reservation is made for. + // NOTE: This field must not be set for `Resource.reservation`. + optional string role = 3; + + // Indicates the principal, if any, of the framework or operator + // that reserved this resource. If reserved by a framework, the + // field should match the `FrameworkInfo.principal`. It is used in + // conjunction with the `UnreserveResources` ACL to determine + // whether the entity attempting to unreserve this resource is + // permitted to do so. + optional string principal = 1; + + // Labels are free-form key value pairs that can be used to + // associate arbitrary metadata with a reserved resource. For + // example, frameworks can use labels to identify the intended + // purpose for a portion of the resources the framework has + // reserved at a given agent. Labels should not contain duplicate + // key-value pairs. + optional Labels labels = 2; + } + + // If this is set, this resource was dynamically reserved by an + // operator or a framework. Otherwise, this resource is either unreserved + // or statically reserved by an operator via the --resources flag. + // NOTE: Frameworks must not set this field if `reservations` is set. + optional ReservationInfo reservation = 8; + + // The stack of reservations. If this field is empty, it indicates that this + // resource is unreserved. Otherwise, the resource is reserved. The first + // `ReservationInfo` may have type `STATIC` or `DYNAMIC`, but the rest must + // have `DYNAMIC`. One can create a new reservation on top of an existing + // one by pushing a new `ReservationInfo` to the back. The last + // `ReservationInfo` in this stack is the "current" reservation. The new + // reservation's role must be a child of the current reservation's role. + // NOTE: Frameworks must not set this field if `reservation` is set. + repeated ReservationInfo reservations = 13; // EXPERIMENTAL. + + message DiskInfo { + // Describes a persistent disk volume. + // + // A persistent disk volume will not be automatically garbage + // collected if the task/executor/agent terminates, but will be + // re-offered to the framework(s) belonging to the 'role'. + // + // NOTE: Currently, we do not allow persistent disk volumes + // without a reservation (i.e., 'role' cannot be '*'). + message Persistence { + // A unique ID for the persistent disk volume. This ID must be + // unique per role on each agent. Although it is possible to use + // the same ID on different agents in the cluster and to reuse + // IDs after a volume with that ID has been destroyed, both + // practices are discouraged. + required string id = 1; + + // This field indicates the principal of the operator or + // framework that created this volume. It is used in conjunction + // with the "destroy" ACL to determine whether an entity + // attempting to destroy the volume is permitted to do so. + // + // NOTE: This field should match the FrameworkInfo.principal of + // the framework that created the volume. + optional string principal = 2; + } + + optional Persistence persistence = 1; + + // Describes how this disk resource will be mounted in the + // container. If not set, the disk resource will be used as the + // sandbox. Otherwise, it will be mounted according to the + // 'container_path' inside 'volume'. The 'host_path' inside + // 'volume' is ignored. + // NOTE: If 'volume' is set but 'persistence' is not set, the + // volume will be automatically garbage collected after + // task/executor terminates. Currently, if 'persistence' is set, + // 'volume' must be set. + optional Volume volume = 2; + + // Describes where a disk originates from. + // TODO(jmlvanre): Add support for BLOCK devices. + message Source { + enum Type { + UNKNOWN = 0; + PATH = 1; + MOUNT = 2; + } + + // A folder that can be located on a separate disk device. This + // can be shared and carved up as necessary between frameworks. + message Path { + // Path to the folder (e.g., /mnt/raid/disk0). + optional string root = 1; + } + + // A mounted file-system set up by the Agent administrator. This + // can only be used exclusively: a framework cannot accept a + // partial amount of this disk. + message Mount { + // Path to mount point (e.g., /mnt/raid/disk0). + optional string root = 1; + } + + required Type type = 1; + optional Path path = 2; + optional Mount mount = 3; + } + + optional Source source = 3; + } + + optional DiskInfo disk = 7; + + message RevocableInfo {} + + // If this is set, the resources are revocable, i.e., any tasks or + // executors launched using these resources could get preempted or + // throttled at any time. This could be used by frameworks to run + // best effort tasks that do not need strict uptime or performance + // guarantees. Note that if this is set, 'disk' or 'reservation' + // cannot be set. + optional RevocableInfo revocable = 9; + + // Allow the resource to be shared across tasks. + message SharedInfo {} + + // If this is set, the resources are shared, i.e. multiple tasks + // can be launched using this resource and all of them shall refer + // to the same physical resource on the cluster. Note that only + // persistent volumes can be shared currently. + optional SharedInfo shared = 10; +} + + +/** + * When the network bandwidth caps are enabled and the container + * is over its limit, outbound packets may be either delayed or + * dropped completely either because it exceeds the maximum bandwidth + * allocation for a single container (the cap) or because the combined + * network traffic of multiple containers on the host exceeds the + * transmit capacity of the host (the share). We can report the + * following statistics for each of these conditions exported directly + * from the Linux Traffic Control Queueing Discipline. + * + * id : name of the limiter, e.g. 'tx_bw_cap' + * backlog : number of packets currently delayed + * bytes : total bytes seen + * drops : number of packets dropped in total + * overlimits : number of packets which exceeded allocation + * packets : total packets seen + * qlen : number of packets currently queued + * rate_bps : throughput in bytes/sec + * rate_pps : throughput in packets/sec + * requeues : number of times a packet has been delayed due to + * locking or device contention issues + * + * More information on the operation of Linux Traffic Control can be + * found at http://www.lartc.org/lartc.html. + */ +message TrafficControlStatistics { + required string id = 1; + optional uint64 backlog = 2; + optional uint64 bytes = 3; + optional uint64 drops = 4; + optional uint64 overlimits = 5; + optional uint64 packets = 6; + optional uint64 qlen = 7; + optional uint64 ratebps = 8; + optional uint64 ratepps = 9; + optional uint64 requeues = 10; +} + + +message IpStatistics { + optional int64 Forwarding = 1; + optional int64 DefaultTTL = 2; + optional int64 InReceives = 3; + optional int64 InHdrErrors = 4; + optional int64 InAddrErrors = 5; + optional int64 ForwDatagrams = 6; + optional int64 InUnknownProtos = 7; + optional int64 InDiscards = 8; + optional int64 InDelivers = 9; + optional int64 OutRequests = 10; + optional int64 OutDiscards = 11; + optional int64 OutNoRoutes = 12; + optional int64 ReasmTimeout = 13; + optional int64 ReasmReqds = 14; + optional int64 ReasmOKs = 15; + optional int64 ReasmFails = 16; + optional int64 FragOKs = 17; + optional int64 FragFails = 18; + optional int64 FragCreates = 19; +} + + +message IcmpStatistics { + optional int64 InMsgs = 1; + optional int64 InErrors = 2; + optional int64 InCsumErrors = 3; + optional int64 InDestUnreachs = 4; + optional int64 InTimeExcds = 5; + optional int64 InParmProbs = 6; + optional int64 InSrcQuenchs = 7; + optional int64 InRedirects = 8; + optional int64 InEchos = 9; + optional int64 InEchoReps = 10; + optional int64 InTimestamps = 11; + optional int64 InTimestampReps = 12; + optional int64 InAddrMasks = 13; + optional int64 InAddrMaskReps = 14; + optional int64 OutMsgs = 15; + optional int64 OutErrors = 16; + optional int64 OutDestUnreachs = 17; + optional int64 OutTimeExcds = 18; + optional int64 OutParmProbs = 19; + optional int64 OutSrcQuenchs = 20; + optional int64 OutRedirects = 21; + optional int64 OutEchos = 22; + optional int64 OutEchoReps = 23; + optional int64 OutTimestamps = 24; + optional int64 OutTimestampReps = 25; + optional int64 OutAddrMasks = 26; + optional int64 OutAddrMaskReps = 27; +} + + +message TcpStatistics { + optional int64 RtoAlgorithm = 1; + optional int64 RtoMin = 2; + optional int64 RtoMax = 3; + optional int64 MaxConn = 4; + optional int64 ActiveOpens = 5; + optional int64 PassiveOpens = 6; + optional int64 AttemptFails = 7; + optional int64 EstabResets = 8; + optional int64 CurrEstab = 9; + optional int64 InSegs = 10; + optional int64 OutSegs = 11; + optional int64 RetransSegs = 12; + optional int64 InErrs = 13; + optional int64 OutRsts = 14; + optional int64 InCsumErrors = 15; +} + + +message UdpStatistics { + optional int64 InDatagrams = 1; + optional int64 NoPorts = 2; + optional int64 InErrors = 3; + optional int64 OutDatagrams = 4; + optional int64 RcvbufErrors = 5; + optional int64 SndbufErrors = 6; + optional int64 InCsumErrors = 7; + optional int64 IgnoredMulti = 8; +} + + +message SNMPStatistics { + optional IpStatistics ip_stats = 1; + optional IcmpStatistics icmp_stats = 2; + optional TcpStatistics tcp_stats = 3; + optional UdpStatistics udp_stats = 4; +} + + +message DiskStatistics { + optional Resource.DiskInfo.Source source = 1; + optional Resource.DiskInfo.Persistence persistence = 2; + optional uint64 limit_bytes = 3; + optional uint64 used_bytes = 4; +} + + +/** + * A snapshot of resource usage statistics. + */ +message ResourceStatistics { + required double timestamp = 1; // Snapshot time, in seconds since the Epoch. + + optional uint32 processes = 30; + optional uint32 threads = 31; + + // CPU Usage Information: + // Total CPU time spent in user mode, and kernel mode. + optional double cpus_user_time_secs = 2; + optional double cpus_system_time_secs = 3; + + // Number of CPUs allocated. + optional double cpus_limit = 4; + + // cpu.stat on process throttling (for contention issues). + optional uint32 cpus_nr_periods = 7; + optional uint32 cpus_nr_throttled = 8; + optional double cpus_throttled_time_secs = 9; + + // Memory Usage Information: + + // mem_total_bytes was added in 0.23.0 to represent the total memory + // of a process in RAM (as opposed to in Swap). This was previously + // reported as mem_rss_bytes, which was also changed in 0.23.0 to + // represent only the anonymous memory usage, to keep in sync with + // Linux kernel's (arguably erroneous) use of terminology. + optional uint64 mem_total_bytes = 36; + + // Total memory + swap usage. This is set if swap is enabled. + optional uint64 mem_total_memsw_bytes = 37; + + // Hard memory limit for a container. + optional uint64 mem_limit_bytes = 6; + + // Soft memory limit for a container. + optional uint64 mem_soft_limit_bytes = 38; + + // Broken out memory usage information: pagecache, rss (anonymous), + // mmaped files and swap. + + // TODO(chzhcn) mem_file_bytes and mem_anon_bytes are deprecated in + // 0.23.0 and will be removed in 0.24.0. + optional uint64 mem_file_bytes = 10; + optional uint64 mem_anon_bytes = 11; + + // mem_cache_bytes is added in 0.23.0 to represent page cache usage. + optional uint64 mem_cache_bytes = 39; + + // Since 0.23.0, mem_rss_bytes is changed to represent only + // anonymous memory usage. Note that neither its requiredness, type, + // name nor numeric tag has been changed. + optional uint64 mem_rss_bytes = 5; + + optional uint64 mem_mapped_file_bytes = 12; + // This is only set if swap is enabled. + optional uint64 mem_swap_bytes = 40; + optional uint64 mem_unevictable_bytes = 41; + + // Number of occurrences of different levels of memory pressure + // events reported by memory cgroup. Pressure listening (re)starts + // with these values set to 0 when agent (re)starts. See + // https://www.kernel.org/doc/Documentation/cgroups/memory.txt for + // more details. + optional uint64 mem_low_pressure_counter = 32; + optional uint64 mem_medium_pressure_counter = 33; + optional uint64 mem_critical_pressure_counter = 34; + + // Disk Usage Information for executor working directory. + optional uint64 disk_limit_bytes = 26; + optional uint64 disk_used_bytes = 27; + + // Per disk (resource) statistics. + repeated DiskStatistics disk_statistics = 43; + + // Cgroups blkio statistics. + optional CgroupInfo.Blkio.Statistics blkio_statistics = 44; + + // Perf statistics. + optional PerfStatistics perf = 13; + + // Network Usage Information: + optional uint64 net_rx_packets = 14; + optional uint64 net_rx_bytes = 15; + optional uint64 net_rx_errors = 16; + optional uint64 net_rx_dropped = 17; + optional uint64 net_tx_packets = 18; + optional uint64 net_tx_bytes = 19; + optional uint64 net_tx_errors = 20; + optional uint64 net_tx_dropped = 21; + + // The kernel keeps track of RTT (round-trip time) for its TCP + // sockets. RTT is a way to tell the latency of a container. + optional double net_tcp_rtt_microsecs_p50 = 22; + optional double net_tcp_rtt_microsecs_p90 = 23; + optional double net_tcp_rtt_microsecs_p95 = 24; + optional double net_tcp_rtt_microsecs_p99 = 25; + + optional double net_tcp_active_connections = 28; + optional double net_tcp_time_wait_connections = 29; + + // Network traffic flowing into or out of a container can be delayed + // or dropped due to congestion or policy inside and outside the + // container. + repeated TrafficControlStatistics net_traffic_control_statistics = 35; + + // Network SNMP statistics for each container. + optional SNMPStatistics net_snmp_statistics = 42; +} + + +/** + * Describes a snapshot of the resource usage for executors. + */ +message ResourceUsage { + message Executor { + required ExecutorInfo executor_info = 1; + + // This includes resources used by the executor itself + // as well as its active tasks. + repeated Resource allocated = 2; + + // Current resource usage. If absent, the containerizer + // cannot provide resource usage. + optional ResourceStatistics statistics = 3; + + // The container id for the executor specified in the executor_info field. + required ContainerID container_id = 4; + + message Task { + required string name = 1; + required TaskID id = 2; + repeated Resource resources = 3; + optional Labels labels = 4; + } + + // Non-terminal tasks. + repeated Task tasks = 5; + } + + repeated Executor executors = 1; + + // Agent's total resources including checkpointed dynamic + // reservations and persistent volumes. + repeated Resource total = 2; +} + + +/** + * Describes a sample of events from "perf stat". Only available on + * Linux. + * + * NOTE: Each optional field matches the name of a perf event (see + * "perf list") with the following changes: + * 1. Names are downcased. + * 2. Hyphens ('-') are replaced with underscores ('_'). + * 3. Events with alternate names use the name "perf stat" returns, + * e.g., for the event "cycles OR cpu-cycles" perf always returns + * cycles. + */ +message PerfStatistics { + required double timestamp = 1; // Start of sample interval, in seconds since the Epoch. + required double duration = 2; // Duration of sample interval, in seconds. + + // Hardware event. + optional uint64 cycles = 3; + optional uint64 stalled_cycles_frontend = 4; + optional uint64 stalled_cycles_backend = 5; + optional uint64 instructions = 6; + optional uint64 cache_references = 7; + optional uint64 cache_misses = 8; + optional uint64 branches = 9; + optional uint64 branch_misses = 10; + optional uint64 bus_cycles = 11; + optional uint64 ref_cycles = 12; + + // Software event. + optional double cpu_clock = 13; + optional double task_clock = 14; + optional uint64 page_faults = 15; + optional uint64 minor_faults = 16; + optional uint64 major_faults = 17; + optional uint64 context_switches = 18; + optional uint64 cpu_migrations = 19; + optional uint64 alignment_faults = 20; + optional uint64 emulation_faults = 21; + + // Hardware cache event. + optional uint64 l1_dcache_loads = 22; + optional uint64 l1_dcache_load_misses = 23; + optional uint64 l1_dcache_stores = 24; + optional uint64 l1_dcache_store_misses = 25; + optional uint64 l1_dcache_prefetches = 26; + optional uint64 l1_dcache_prefetch_misses = 27; + optional uint64 l1_icache_loads = 28; + optional uint64 l1_icache_load_misses = 29; + optional uint64 l1_icache_prefetches = 30; + optional uint64 l1_icache_prefetch_misses = 31; + optional uint64 llc_loads = 32; + optional uint64 llc_load_misses = 33; + optional uint64 llc_stores = 34; + optional uint64 llc_store_misses = 35; + optional uint64 llc_prefetches = 36; + optional uint64 llc_prefetch_misses = 37; + optional uint64 dtlb_loads = 38; + optional uint64 dtlb_load_misses = 39; + optional uint64 dtlb_stores = 40; + optional uint64 dtlb_store_misses = 41; + optional uint64 dtlb_prefetches = 42; + optional uint64 dtlb_prefetch_misses = 43; + optional uint64 itlb_loads = 44; + optional uint64 itlb_load_misses = 45; + optional uint64 branch_loads = 46; + optional uint64 branch_load_misses = 47; + optional uint64 node_loads = 48; + optional uint64 node_load_misses = 49; + optional uint64 node_stores = 50; + optional uint64 node_store_misses = 51; + optional uint64 node_prefetches = 52; + optional uint64 node_prefetch_misses = 53; +} + + +/** + * Describes a request for resources that can be used by a framework + * to proactively influence the allocator. If 'agent_id' is provided + * then this request is assumed to only apply to resources on that + * agent. + */ +message Request { + optional AgentID agent_id = 1; + repeated Resource resources = 2; +} + + +/** + * Describes some resources available on an agent. An offer only + * contains resources from a single agent. + */ +message Offer { + required OfferID id = 1; + required FrameworkID framework_id = 2; + required AgentID agent_id = 3; + required string hostname = 4; + + // URL for reaching the agent running on the host. + optional URL url = 8; + + // The domain of the agent. + optional DomainInfo domain = 11; + + repeated Resource resources = 5; + repeated Attribute attributes = 7; + repeated ExecutorID executor_ids = 6; + + // Signifies that the resources in this Offer may be unavailable during + // the given interval. Any tasks launched using these resources may be + // killed when the interval arrives. For example, these resources may be + // part of a planned maintenance schedule. + // + // This field only provides information about a planned unavailability. + // The unavailability interval may not necessarily start at exactly this + // interval, nor last for exactly the duration of this interval. + // The unavailability may also be forever! See comments in + // `Unavailability` for more details. + optional Unavailability unavailability = 9; + + // An offer represents resources allocated to *one* of the + // roles managed by the scheduler. (Therefore, each + // `Offer.resources[i].allocation_info` will match the + // top level `Offer.allocation_info`). + optional Resource.AllocationInfo allocation_info = 10; + + // Defines an operation that can be performed against offers. + message Operation { + enum Type { + UNKNOWN = 0; + LAUNCH = 1; + LAUNCH_GROUP = 6; + RESERVE = 2; + UNRESERVE = 3; + CREATE = 4; + DESTROY = 5; + } + + // TODO(vinod): Deprecate this in favor of `LaunchGroup` below. + message Launch { + repeated TaskInfo task_infos = 1; + } + + // Unlike `Launch` above, all the tasks in a `task_group` are + // atomically delivered to an executor. + // + // `NetworkInfo` set on executor will be shared by all tasks in + // the task group. + // + // TODO(vinod): Any volumes set on executor could be used by a + // task by explicitly setting `Volume.source` in its resources. + message LaunchGroup { + required ExecutorInfo executor = 1; + required TaskGroupInfo task_group = 2; + } + + message Reserve { + repeated Resource resources = 1; + } + + message Unreserve { + repeated Resource resources = 1; + } + + message Create { + repeated Resource volumes = 1; + } + + message Destroy { + repeated Resource volumes = 1; + } + + optional Type type = 1; + optional Launch launch = 2; + optional LaunchGroup launch_group = 7; + optional Reserve reserve = 3; + optional Unreserve unreserve = 4; + optional Create create = 5; + optional Destroy destroy = 6; + } +} + + +/** + * A request to return some resources occupied by a framework. + */ +message InverseOffer { + // This is the same OfferID as found in normal offers, which allows + // re-use of some of the OfferID-only messages. + required OfferID id = 1; + + // URL for reaching the agent running on the host. This enables some + // optimizations as described in MESOS-3012, such as allowing the + // scheduler driver to bypass the master and talk directly with an agent. + optional URL url = 2; + + // The framework that should release its resources. + // If no specifics are provided (i.e. which agent), all the framework's + // resources are requested back. + required FrameworkID framework_id = 3; + + // Specified if the resources need to be released from a particular agent. + // All the framework's resources on this agent are requested back, + // unless further qualified by the `resources` field. + optional AgentID agent_id = 4; + + // This InverseOffer represents a planned unavailability event in the + // specified interval. Any tasks running on the given framework or agent + // may be killed when the interval arrives. Therefore, frameworks should + // aim to gracefully terminate tasks prior to the arrival of the interval. + // + // For reserved resources, the resources are expected to be returned to the + // framework after the unavailability interval. This is an expectation, + // not a guarantee. For example, if the unavailability duration is not set, + // the resources may be removed permanently. + // + // For other resources, there is no guarantee that requested resources will + // be returned after the unavailability interval. The allocator has no + // obligation to re-offer these resources to the prior framework after + // the unavailability. + required Unavailability unavailability = 5; + + // A list of resources being requested back from the framework, + // on the agent identified by `agent_id`. If no resources are specified + // then all resources are being requested back. For the purpose of + // maintenance, this field is always empty (maintenance always requests + // all resources back). + repeated Resource resources = 6; + + // TODO(josephw): Add additional options for narrowing down the resources + // being requested back. Such as specific executors, tasks, etc. +} + + +/** + * Describes a task. Passed from the scheduler all the way to an + * executor (see SchedulerDriver::launchTasks and + * Executor::launchTask). Either ExecutorInfo or CommandInfo should be set. + * A different executor can be used to launch this task, and subsequent tasks + * meant for the same executor can reuse the same ExecutorInfo struct. + */ +message TaskInfo { + required string name = 1; + required TaskID task_id = 2; + required AgentID agent_id = 3; + repeated Resource resources = 4; + optional ExecutorInfo executor = 5; + optional CommandInfo command = 7; + + // Task provided with a container will launch the container as part + // of this task paired with the task's CommandInfo. + optional ContainerInfo container = 9; + + // A health check for the task. Implemented for executor-less + // command-based tasks. For tasks that specify an executor, it is + // the executor's responsibility to implement the health checking. + optional HealthCheck health_check = 8; + + // A general check for the task. Implemented for all built-in executors. + // For tasks that specify an executor, it is the executor's responsibility + // to implement checking support. Executors should (all built-in executors + // will) neither interpret nor act on the check's result. + // + // NOTE: Check support in built-in executors is experimental. + // + // TODO(alexr): Consider supporting multiple checks per task. + optional CheckInfo check = 13; + + // A kill policy for the task. Implemented for executor-less + // command-based and docker tasks. For tasks that specify an + // executor, it is the executor's responsibility to implement + // the kill policy. + optional KillPolicy kill_policy = 12; + + optional bytes data = 6; + + // Labels are free-form key value pairs which are exposed through + // master and agent endpoints. Labels will not be interpreted or + // acted upon by Mesos itself. As opposed to the data field, labels + // will be kept in memory on master and agent processes. Therefore, + // labels should be used to tag tasks with light-weight meta-data. + // Labels should not contain duplicate key-value pairs. + optional Labels labels = 10; + + // Service discovery information for the task. It is not interpreted + // or acted upon by Mesos. It is up to a service discovery system + // to use this information as needed and to handle tasks without + // service discovery information. + optional DiscoveryInfo discovery = 11; +} + + +/** + * Describes a group of tasks that belong to an executor. The + * executor will receive the task group in a single message to + * allow the group to be launched "atomically". + * + * NOTES: + * 1) `NetworkInfo` must not be set inside task's `ContainerInfo`. + * 2) `TaskInfo.executor` doesn't need to set. If set, it should match + * `LaunchGroup.executor`. + */ +message TaskGroupInfo { + repeated TaskInfo tasks = 1; +} + + +// TODO(bmahler): Add executor_uuid here, and send it to the master. This will +// allow us to expose executor work directories for tasks in the webui when +// looking from the master level. Currently only the agent knows which run the +// task belongs to. +/** + * Describes a task, similar to `TaskInfo`. + * + * `Task` is used in some of the Mesos messages found below. + * `Task` is used instead of `TaskInfo` if: + * 1) we need additional IDs, such as a specific + * framework, executor, or agent; or + * 2) we do not need the additional data, such as the command run by the + * task or the health checks. These additional fields may be large and + * unnecessary for some Mesos messages. + * + * `Task` is generally constructed from a `TaskInfo`. See protobuf::createTask. + */ +message Task { + required string name = 1; + required TaskID task_id = 2; + required FrameworkID framework_id = 3; + optional ExecutorID executor_id = 4; + required AgentID agent_id = 5; + required TaskState state = 6; // Latest state of the task. + repeated Resource resources = 7; + repeated TaskStatus statuses = 8; + + // These fields correspond to the state and uuid of the latest + // status update forwarded to the master. + // NOTE: Either both the fields must be set or both must be unset. + optional TaskState status_update_state = 9; + optional bytes status_update_uuid = 10; + + optional Labels labels = 11; + + // Service discovery information for the task. It is not interpreted + // or acted upon by Mesos. It is up to a service discovery system + // to use this information as needed and to handle tasks without + // service discovery information. + optional DiscoveryInfo discovery = 12; + + // Container information for the task. + optional ContainerInfo container = 13; + + // Specific user under which task is running. + optional string user = 14; +} + + +/** + * Describes possible task states. IMPORTANT: Mesos assumes tasks that + * enter terminal states (see below) imply the task is no longer + * running and thus clean up any thing associated with the task + * (ultimately offering any resources being consumed by that task to + * another task). + */ +enum TaskState { + TASK_STAGING = 6; // Initial state. Framework status updates should not use. + TASK_STARTING = 0; // The task is being launched by the executor. + TASK_RUNNING = 1; + + // NOTE: This should only be sent when the framework has + // the TASK_KILLING_STATE capability. + TASK_KILLING = 8; // The task is being killed by the executor. + + TASK_FINISHED = 2; // TERMINAL: The task finished successfully. + TASK_FAILED = 3; // TERMINAL: The task failed to finish successfully. + TASK_KILLED = 4; // TERMINAL: The task was killed by the executor. + TASK_ERROR = 7; // TERMINAL: The task description contains an error. + + // In Mesos 1.3, this will only be sent when the framework does NOT + // opt-in to the PARTITION_AWARE capability. + // + // NOTE: This state is not always terminal. For example, tasks might + // transition from TASK_LOST to TASK_RUNNING or other states when a + // partitioned agent re-registers. + TASK_LOST = 5; // The task failed but can be rescheduled. + + // The following task states are only sent when the framework + // opts-in to the PARTITION_AWARE capability. + + // The task failed to launch because of a transient error. The + // task's executor never started running. Unlike TASK_ERROR, the + // task description is valid -- attempting to launch the task again + // may be successful. + TASK_DROPPED = 9; // TERMINAL. + + // The task was running on an agent that has lost contact with the + // master, typically due to a network failure or partition. The task + // may or may not still be running. + TASK_UNREACHABLE = 10; + + // The task is no longer running. This can occur if the agent has + // been terminated along with all of its tasks (e.g., the host that + // was running the agent was rebooted). It might also occur if the + // task was terminated due to an agent or containerizer error, or if + // the task was preempted by the QoS controller in an + // oversubscription scenario. + TASK_GONE = 11; // TERMINAL. + + // The task was running on an agent that the master cannot contact; + // the operator has asserted that the agent has been shutdown, but + // this has not been directly confirmed by the master. If the + // operator is correct, the task is not running and this is a + // terminal state; if the operator is mistaken, the task may still + // be running and might return to RUNNING in the future. + TASK_GONE_BY_OPERATOR = 12; + + // The master has no knowledge of the task. This is typically + // because either (a) the master never had knowledge of the task, or + // (b) the master forgot about the task because it garbage collected + // its metadata about the task. The task may or may not still be + // running. + TASK_UNKNOWN = 13; +} + + +/** +* Describes the status of a check. Type and the corresponding field, i.e., +* `command` or `http` must be set. If the result of the check is not available +* (e.g., the check timed out), these fields must contain empty messages, i.e., +* `exit_code` or `status_code` will be unset. +* +* NOTE: This API is unstable and the related feature is experimental. +*/ +message CheckStatusInfo { + message Command { + // Exit code of a command check. It is the result of calling + // `WEXITSTATUS()` on `waitpid()` termination information on + // Posix and calling `GetExitCodeProcess()` on Windows. + optional int32 exit_code = 1; + } + + message Http { + // HTTP status code of an HTTP check. + optional uint32 status_code = 1; + } + + message Tcp { + // Whether a TCP connection succeeded. + optional bool succeeded = 1; + } + + // TODO(alexr): Consider adding a `data` field, which can contain, e.g., + // truncated stdout/stderr output for command checks or HTTP response body + // for HTTP checks. Alternatively, it can be an even shorter `message` field + // containing the last line of stdout or Reason-Phrase of the status line of + // the HTTP response. + + // The type of the check this status corresponds to. + optional CheckInfo.Type type = 1; + + // Status of a command check. + optional Command command = 2; + + // Status of an HTTP check. + optional Http http = 3; + + // Status of a TCP check. + optional Tcp tcp = 4; + + // TODO(alexr): Consider introducing a "last changed at" timestamp, since + // task status update's timestamp may not correspond to the last check's + // state, e.g., for reconciliation. + + // TODO(alexr): Consider introducing a `reason` enum here to explicitly + // distinguish between completed, delayed, and timed out checks. +} + + +/** + * Describes the current status of a task. + */ +message TaskStatus { + // Describes the source of the task status update. + enum Source { + SOURCE_MASTER = 0; + SOURCE_AGENT = 1; + SOURCE_EXECUTOR = 2; + } + + // Detailed reason for the task status update. + // + // TODO(bmahler): Differentiate between agent removal reasons + // (e.g. unhealthy vs. unregistered for maintenance). + enum Reason { + // TODO(jieyu): The default value when a caller doesn't check for + // presence is 0 and so ideally the 0 reason is not a valid one. + // Since this is not used anywhere, consider removing this reason. + REASON_COMMAND_EXECUTOR_FAILED = 0; + + REASON_CONTAINER_LAUNCH_FAILED = 21; + REASON_CONTAINER_LIMITATION = 19; + REASON_CONTAINER_LIMITATION_DISK = 20; + REASON_CONTAINER_LIMITATION_MEMORY = 8; + REASON_CONTAINER_PREEMPTED = 17; + REASON_CONTAINER_UPDATE_FAILED = 22; + REASON_EXECUTOR_REGISTRATION_TIMEOUT = 23; + REASON_EXECUTOR_REREGISTRATION_TIMEOUT = 24; + REASON_EXECUTOR_TERMINATED = 1; + REASON_EXECUTOR_UNREGISTERED = 2; // No longer used. + REASON_FRAMEWORK_REMOVED = 3; + REASON_GC_ERROR = 4; + REASON_INVALID_FRAMEWORKID = 5; + REASON_INVALID_OFFERS = 6; + REASON_IO_SWITCHBOARD_EXITED = 27; + REASON_MASTER_DISCONNECTED = 7; + REASON_RECONCILIATION = 9; + REASON_RESOURCES_UNKNOWN = 18; + REASON_AGENT_DISCONNECTED = 10; + REASON_AGENT_REMOVED = 11; + REASON_AGENT_RESTARTED = 12; + REASON_AGENT_UNKNOWN = 13; + REASON_TASK_KILLED_DURING_LAUNCH = 30; + REASON_TASK_CHECK_STATUS_UPDATED = 28; + REASON_TASK_HEALTH_CHECK_STATUS_UPDATED = 29; + REASON_TASK_GROUP_INVALID = 25; + REASON_TASK_GROUP_UNAUTHORIZED = 26; + REASON_TASK_INVALID = 14; + REASON_TASK_UNAUTHORIZED = 15; + REASON_TASK_UNKNOWN = 16; + } + + required TaskID task_id = 1; + required TaskState state = 2; + optional string message = 4; // Possible message explaining state. + optional Source source = 9; + optional Reason reason = 10; + optional bytes data = 3; + optional AgentID agent_id = 5; + optional ExecutorID executor_id = 7; // TODO(benh): Use in master/agent. + optional double timestamp = 6; + + // Statuses that are delivered reliably to the scheduler will + // include a 'uuid'. The status is considered delivered once + // it is acknowledged by the scheduler. Schedulers can choose + // to either explicitly acknowledge statuses or let the scheduler + // driver implicitly acknowledge (default). + // + // TODO(bmahler): This is currently overwritten in the scheduler + // driver and executor driver, but executors will need to set this + // to a valid RFC-4122 UUID if using the HTTP API. + optional bytes uuid = 11; + + // Describes whether the task has been determined to be healthy (true) or + // unhealthy (false) according to the `health_check` field in `TaskInfo`. + optional bool healthy = 8; + + // Contains check status for the check specified in the corresponding + // `TaskInfo`. If no check has been specified, this field must be + // absent, otherwise it must be present even if the check status is + // not available yet. If the status update is triggered for a different + // reason than `REASON_TASK_CHECK_STATUS_UPDATED`, this field will contain + // the last known value. + // + // NOTE: A check-related task status update is triggered if and only if + // the value or presence of any field in `CheckStatusInfo` changes. + // + // NOTE: Check support in built-in executors is experimental. + optional CheckStatusInfo check_status = 15; + + // Labels are free-form key value pairs which are exposed through + // master and agent endpoints. Labels will not be interpreted or + // acted upon by Mesos itself. As opposed to the data field, labels + // will be kept in memory on master and agent processes. Therefore, + // labels should be used to tag TaskStatus message with light-weight + // meta-data. Labels should not contain duplicate key-value pairs. + optional Labels labels = 12; + + // Container related information that is resolved dynamically such as + // network address. + optional ContainerStatus container_status = 13; + + // The time (according to the master's clock) when the agent where + // this task was running became unreachable. This is only set on + // status updates for tasks running on agents that are unreachable + // (e.g., partitioned away from the master). + optional TimeInfo unreachable_time = 14; +} + + +/** + * Describes possible filters that can be applied to unused resources + * (see SchedulerDriver::launchTasks) to influence the allocator. + */ +message Filters { + // Time to consider unused resources refused. Note that all unused + // resources will be considered refused and use the default value + // (below) regardless of whether Filters was passed to + // SchedulerDriver::launchTasks. You MUST pass Filters with this + // field set to change this behavior (i.e., get another offer which + // includes unused resources sooner or later than the default). + optional double refuse_seconds = 1 [default = 5.0]; +} + + +/** +* Describes a collection of environment variables. This is used with +* CommandInfo in order to set environment variables before running a +* command. The contents of each variable may be specified as a string +* or a Secret; only one of `value` and `secret` must be set. +*/ +message Environment { + message Variable { + required string name = 1; + + enum Type { + UNKNOWN = 0; + VALUE = 1; + SECRET = 2; + } + + // In Mesos 1.2, the `Environment.variables.value` message was made + // optional. The default type for `Environment.variables.type` is now VALUE, + // which requires `value` to be set, maintaining backward compatibility. + // + // TODO(greggomann): The default can be removed in Mesos 2.1 (MESOS-7134). + optional Type type = 3 [default = VALUE]; + + // Only one
<TRUNCATED>