Skip to content

Commit

Permalink
Merge pull request #128 from LiilyZhang/issue3340
Browse files Browse the repository at this point in the history
Issue 3340 - Bug: CSS getting an OOM during agent auto upgrade
  • Loading branch information
LiilyZhang authored Jun 30, 2022
2 parents 85275d2 + bfa1e66 commit 1340934
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 2 deletions.
14 changes: 14 additions & 0 deletions common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,20 @@ func IsIgnoredRequest(err error) bool {
return ok
}

// TooManyRequestError is the error for too many request
type TooManyRequestError struct {
Message string
}

func (e *TooManyRequestError) Error() string {
return e.Message
}

func IsTooManyRequestError(err error) bool {
_, ok := err.(*TooManyRequestError)
return ok
}

// Destination describes a sync service node.
// Each sync service edge node (ESS) has an address that is composed of the node's ID, Type, and Organization.
// An ESS node communicates with the CSS using either MQTT or HTTP.
Expand Down
13 changes: 13 additions & 0 deletions core/base/apiServer.go
Original file line number Diff line number Diff line change
Expand Up @@ -2159,6 +2159,19 @@ func handleObjectGetData(orgID string, objectType string, objectID string, canAc
}
}

if common.ObjectDownloadSemaphore.TryAcquire(1) == false {
// If too many downloads are in flight, agent will get error and retry. Originally, there was a lock around the download that
// caused the downloads to be serial. It was changed to use a semaphore to allow limited concurrency.
if trace.IsLogging(logger.TRACE) {
trace.Trace("Failed to acquire semaphore for handleObjects of %s %s %s \n", orgID, objectType, objectID)
}
err := &common.TooManyRequestError{Message: "Error in handleObjects: Unable to acquire object semaphore."}
communications.SendErrorResponse(writer, err, "", 0)
return
}

defer common.ObjectDownloadSemaphore.Release(1)

// Get range from the header "Range:bytes={startOffset}-{endOffset}"
var dataReader io.Reader
var eof bool
Expand Down
4 changes: 3 additions & 1 deletion core/communications/communicator.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ func SendErrorResponse(writer http.ResponseWriter, err error, message string, st
statusCode = http.StatusConflict
case *common.IgnoredRequest:
statusCode = http.StatusTemporaryRedirect
case *common.TooManyRequestError:
statusCode = http.StatusTooManyRequests
case *Error:
// Don't return an error if it's a communication error
statusCode = http.StatusNoContent
Expand Down Expand Up @@ -198,7 +200,7 @@ func IsTransportError(pResp *http.Response, err error) bool {
// 503: service unavailable
return true
} else if pResp.StatusCode == http.StatusRequestTimeout {
// 408: request time out
// 408: request time out
return true
} else if pResp.StatusCode == http.StatusTooManyRequests {
// 429: too many requests
Expand Down
2 changes: 1 addition & 1 deletion core/communications/httpCommunication.go
Original file line number Diff line number Diff line change
Expand Up @@ -1687,7 +1687,7 @@ func (communication *HTTP) handleGetData(orgID string, objectType string, object
if trace.IsLogging(logger.TRACE) {
trace.Trace("Failed to acquire semaphore for handleGetData of %s %s %s %s \n", objectType, objectID, destType, destID)
}
err := &Error{"Error in handleGetData: Unable to acquire object semaphore."}
err := &common.TooManyRequestError{Message: "Error in handleGetData: Unable to acquire object semaphore."}
SendErrorResponse(writer, err, "", http.StatusTooManyRequests)
return
}
Expand Down

0 comments on commit 1340934

Please sign in to comment.