wilfred-s commented on code in PR #1001: URL: https://github.com/apache/yunikorn-core/pull/1001#discussion_r1877404707
########## pkg/scheduler/objects/application.go: ########## @@ -816,144 +804,155 @@ func (sa *Application) IsReservedOnNode(nodeID string) bool { } sa.RLock() defer sa.RUnlock() - // make sure matches only for the whole nodeID - separator := nodeID + "|" - for key := range sa.reservations { - if strings.HasPrefix(key, separator) { + for _, reserved := range sa.reservations { + if reserved.nodeID == nodeID { return true } } return false } -// Reserve the application for this node and ask combination. +// Reserve the application for this node and alloc combination. // If the reservation fails the function returns false, if the reservation is made it returns true. -// If the node and ask combination was already reserved for the application this is a noop and returns true. +// If the node and alloc combination was already reserved for the application this is a noop and returns true. func (sa *Application) Reserve(node *Node, ask *Allocation) error { + if node == nil || ask == nil { + return fmt.Errorf("reservation creation failed node or alloc are nil on appID %s", sa.ApplicationID) + } sa.Lock() defer sa.Unlock() return sa.reserveInternal(node, ask) } -// Unlocked version for Reserve that really does the work. +// reserveInternal is the unlocked version for Reserve that really does the work. // Must only be called while holding the application lock. func (sa *Application) reserveInternal(node *Node, ask *Allocation) error { + allocKey := ask.GetAllocationKey() + if sa.requests[allocKey] == nil { + log.Log(log.SchedApplication).Debug("alloc is not registered to this app", + zap.String("app", sa.ApplicationID), + zap.String("allocKey", allocKey)) + return fmt.Errorf("reservation creation failed alloc %s not found on appID %s", allocKey, sa.ApplicationID) + } // create the reservation (includes nil checks) nodeReservation := newReservation(node, sa, ask, true) if nodeReservation == nil { log.Log(log.SchedApplication).Debug("reservation creation failed unexpectedly", zap.String("app", sa.ApplicationID), - zap.Any("node", node), - zap.Any("ask", ask)) - return fmt.Errorf("reservation creation failed node or ask are nil on appID %s", sa.ApplicationID) - } - allocKey := ask.GetAllocationKey() - if sa.requests[allocKey] == nil { - log.Log(log.SchedApplication).Debug("ask is not registered to this app", - zap.String("app", sa.ApplicationID), - zap.String("allocKey", allocKey)) - return fmt.Errorf("reservation creation failed ask %s not found on appID %s", allocKey, sa.ApplicationID) + zap.Stringer("node", node), + zap.Stringer("alloc", ask)) + return fmt.Errorf("reservation creation failed node or alloc are nil on appID %s", sa.ApplicationID) } - if !sa.canAskReserve(ask) { - if ask.IsAllocated() { - return fmt.Errorf("ask is already allocated") - } else { - return fmt.Errorf("ask is already reserved") - } + // the alloc should not have reserved a node yet: do not allow multiple nodes to be reserved + if err := sa.canAllocationReserve(ask); err != nil { + return err } // check if we can reserve the node before reserving on the app if err := node.Reserve(sa, ask); err != nil { return err } - sa.reservations[nodeReservation.getKey()] = nodeReservation + sa.reservations[allocKey] = nodeReservation log.Log(log.SchedApplication).Info("reservation added successfully", zap.String("app", sa.ApplicationID), zap.String("node", node.NodeID), - zap.String("ask", allocKey)) + zap.String("alloc", allocKey)) return nil } -// UnReserve the application for this node and ask combination. -// This first removes the reservation from the node. +// UnReserve the application for this node and alloc combination. // If the reservation does not exist it returns 0 for reservations removed, if the reservation is removed it returns 1. // The error is set if the reservation key cannot be removed from the app or node. -func (sa *Application) UnReserve(node *Node, ask *Allocation) (int, error) { +func (sa *Application) UnReserve(node *Node, ask *Allocation) int { + log.Log(log.SchedApplication).Info("unreserving allocation from application", + zap.String("appID", sa.ApplicationID), + zap.Stringer("node", node), + zap.Stringer("alloc", ask)) + if node == nil || ask == nil { + return 0 + } sa.Lock() defer sa.Unlock() - return sa.unReserveInternal(node, ask) + reserve, ok := sa.reservations[ask.allocationKey] + if !ok { + log.Log(log.SchedApplication).Debug("reservation not found on application", + zap.String("appID", sa.ApplicationID), + zap.String("allocationKey", ask.allocationKey)) + return 0 + } + if reserve.nodeID != node.NodeID { + log.Log(log.SchedApplication).Warn("UnReserve: provided info not consistent with reservation", + zap.String("appID", sa.ApplicationID), + zap.String("node", reserve.nodeID), + zap.String("alloc", reserve.allocKey)) + } + return sa.unReserveInternal(reserve) } // Unlocked version for UnReserve that really does the work. +// This is idempotent and will not fail // Must only be called while holding the application lock. -func (sa *Application) unReserveInternal(node *Node, ask *Allocation) (int, error) { - resKey := reservationKey(node, nil, ask) - if resKey == "" { - log.Log(log.SchedApplication).Debug("unreserve reservation key create failed unexpectedly", - zap.String("appID", sa.ApplicationID), - zap.Stringer("node", node), - zap.Stringer("ask", ask)) - return 0, fmt.Errorf("reservation key failed node or ask are nil for appID %s", sa.ApplicationID) +func (sa *Application) unReserveInternal(reserve *reservation) int { + // this should not happen + if reserve == nil { + return 0 } // unReserve the node before removing from the app - var num int - var err error - if num, err = node.unReserve(sa, ask); err != nil { - return 0, err - } + num := reserve.node.unReserve(reserve.alloc) // if the unreserve worked on the node check the app - if _, found := sa.reservations[resKey]; found { + if _, found := sa.reservations[reserve.allocKey]; found { // worked on the node means either found or not but no error, log difference here if num == 0 { log.Log(log.SchedApplication).Info("reservation not found while removing from node, app has reservation", zap.String("appID", sa.ApplicationID), - zap.String("nodeID", node.NodeID), - zap.String("ask", ask.GetAllocationKey())) + zap.String("nodeID", reserve.nodeID), + zap.String("alloc", reserve.allocKey)) } - delete(sa.reservations, resKey) - log.Log(log.SchedApplication).Info("reservation removed successfully", zap.String("node", node.NodeID), - zap.String("app", ask.GetApplicationID()), zap.String("ask", ask.GetAllocationKey())) - return 1, nil + delete(sa.reservations, reserve.allocKey) + log.Log(log.SchedApplication).Info("reservation removed successfully", + zap.String("appID", sa.ApplicationID), + zap.String("node", reserve.nodeID), + zap.String("alloc", reserve.allocKey)) + return 1 } // reservation was not found log.Log(log.SchedApplication).Info("reservation not found while removing from app", zap.String("appID", sa.ApplicationID), - zap.String("nodeID", node.NodeID), - zap.String("ask", ask.GetAllocationKey()), + zap.String("node", reserve.nodeID), + zap.String("alloc", reserve.allocKey), zap.Int("nodeReservationsRemoved", num)) - return 0, nil + return 0 } -// Return the allocation reservations on any node. -// The returned array is 0 or more keys into the reservations map. +// IsAllocationReserved returns true if the allocation has been reserved +func (sa *Application) IsAllocationReserved(allocKey string) bool { + sa.RLock() + defer sa.RUnlock() + return sa.reservations[allocKey] != nil +} + +// getAllocationReservation returns the reservation object for the allocation // No locking must be called while holding the lock -func (sa *Application) GetAskReservations(allocKey string) []string { - reservationKeys := make([]string, 0) - if allocKey == "" { - return reservationKeys - } - for key := range sa.reservations { - if strings.HasSuffix(key, allocKey) { - reservationKeys = append(reservationKeys, key) - } - } - return reservationKeys +func (sa *Application) getAllocationReservation(allocKey string) *reservation { + return sa.reservations[allocKey] Review Comment: removed -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@yunikorn.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org