Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

span-calculation: comment 85% CPU span calculation #5948

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions bench/locli/src/Cardano/Analysis/API/Chain.hs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ slotStart Genesis{..} =
. fromIntegral
. unSlotNo

-- | `impliedSlot` calculates slot numbers from timestamps and genesis
-- parameters. The fields `systemStart` time and `slotLength` are
-- both fields of the `Genesis` record.
impliedSlot :: Genesis -> UTCTime -> SlotNo
impliedSlot Genesis{..} =
SlotNo
Expand Down
47 changes: 47 additions & 0 deletions bench/locli/src/Cardano/Analysis/MachPerf.hs
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,24 @@ import Cardano.Unlog.Resources

-- * 1. Collect SlotStats & RunScalars:
--
-- | `collectSlotStats` processes logfiles from different cluster
-- nodes in parallel. The right-hand argument of `(<$>)` is a
-- partially-applied functipn, so it's really just `(.)`, but then
-- `sequence` being composed with the function within the map has
-- some complications from the forcing etc. from `deepseq` and the
-- `evaluate` IO primitive.
collectSlotStats :: Run -> [(JsonLogfile, [LogObject])]
-> IO (Either Text [(JsonLogfile, (RunScalars, [SlotStats UTCTime]))])
collectSlotStats run = fmap sequence <$> mapConcurrentlyPure (timelineFromLogObjects run)


-- | `timelineFromLogObjects` processes a list of `LogObject` to
-- assemble the log entries into time slots. Per-slot data structures
-- absorb the log entries describing the events ongoing during the
-- time period they're intended to track over the course of the fold.
-- The per-slot structures are also instantiated as the time periods
-- in need of having events tracked within them are encountered in
-- the list of log entries.
timelineFromLogObjects :: Run -> (JsonLogfile, [LogObject])
-> Either Text (JsonLogfile, (RunScalars, [SlotStats UTCTime]))
timelineFromLogObjects _ (JsonLogfile f, []) =
Expand All @@ -54,6 +67,8 @@ timelineFromLogObjects run@Run{genesis} (f, xs') =
firstLogObjectHost :: Host
firstLogObjectHost = loHost (head xs)

-- | `zeroTimelineAccum` represents the initial state of the
-- timeline accumulator to be fed to the fold.
zeroTimelineAccum :: TimelineAccum
zeroTimelineAccum =
TimelineAccum
Expand Down Expand Up @@ -107,6 +122,9 @@ timelineFromLogObjects run@Run{genesis} (f, xs') =
, slLogObjects = []
}

-- | `timelineStep` processes a single `LogObject` to incorporate it
-- into the `TimelineAccum` accumulator. A more detailed description
-- of the case analysis inside it could be worthwhile.
timelineStep :: Run -> JsonLogfile -> TimelineAccum -> LogObject -> TimelineAccum
timelineStep Run{genesis} f accum@TimelineAccum{aSlotStats=cur:_, ..} lo =
-- 1. skip pre-historic events not subject to performance analysis;
Expand Down Expand Up @@ -357,6 +375,9 @@ lastBlockSlot new TimelineAccum{aSlotStats=SlotStats{..}:_,..} =
then slSlot
else aLastBlockSlot

-- | `patchSlotGap` walks through a gap in slot numbers adding slots
-- to the accumulator. The gap is taken to start after the head of
-- the accumulator, whose list is reversed.
patchSlotGap :: Genesis -> SlotNo -> TimelineAccum -> TimelineAccum
patchSlotGap genesis curSlot a@TimelineAccum{aSlotStats=last:_, ..} =
a & if gapLen < 1000
Expand All @@ -372,9 +393,12 @@ patchSlotGap genesis curSlot a@TimelineAccum{aSlotStats=last:_, ..} =

go :: Word64 -> SlotNo -> TimelineAccum -> TimelineAccum
go 0 _ acc = acc

go remainingGap patchSlot acc =
go (remainingGap - 1) (patchSlot + 1) (acc & addGapSlot patchSlot)

-- | `addGapSlot` constructs a single slot and adds it to the front
-- of the accumulator list.
addGapSlot :: SlotNo -> TimelineAccum -> TimelineAccum
addGapSlot slot acc =
let (epoch, epochSlot) = genesis `unsafeParseSlot` slot in
Expand Down Expand Up @@ -418,6 +442,10 @@ patchSlotGap genesis curSlot a@TimelineAccum{aSlotStats=last:_, ..} =
}
where slStart = slotStart genesis slot

-- | `addTimelineSlot` updates the current position of the
-- `TimelineAccum` accumulator. `timelineStep` invokes it via its
-- `continue` internal function, which calls it when the slot is
-- strictly greater than the `slSlot` if properly deciphered.
addTimelineSlot :: Genesis -> SlotNo -> UTCTime -> TimelineAccum -> TimelineAccum
addTimelineSlot genesis slot _time a@TimelineAccum{..} =
let (epoch, epochSlot) = genesis `unsafeParseSlot` slot in
Expand Down Expand Up @@ -556,6 +584,23 @@ slotStatsSummary Run{genesis=Genesis{epochLength}} slots =
in if tailEpoch == slEpoch (Vec.head v) then v
else Vec.dropWhile ((tailEpoch == ) . slEpoch) v

-- | `spanLen` in effect does (slSlot last) - (slSlot head) with
-- `unSlotNo` only unpacking it from the `SlotNo` newtype and
-- `fromIntegral` converting it from a `Word64` to an `Int`, where
-- last and head refer to the vector's elements.
--
-- In plain words, this represents the number of slots in a half-open
-- interval bounded by the slot numbers at the first and last
-- positions within the vector of slot numbers. Some thoughts could
-- be had about the potential for a discontiguous set of slots
-- within the vector and whether a closed interval should be
-- preferred over a half-open one. Answers could be turned up over
-- the course of this documentation audit.
--
-- Simplifying the expression within the executable code might help
-- with maintainability, but it's best left undisturbed for the sake
-- of reducing the audit surface area in the event of regressions,
-- barring sufficiently broad consensus.
spanLen :: Vector (SlotStats a) -> Int
spanLen = fromIntegral . unSlotNo . uncurry (-) . (slSlot *** slSlot) . (Vec.last &&& Vec.head)

Expand Down Expand Up @@ -598,6 +643,8 @@ slotStatsMachPerf run (f, slots) =

(,) sFirst sLast = (slSlot . head &&& slSlot . last) slots

-- | `dist` assembles the sample into a distribution `CDF` according
-- to the standard list of centiles `stdCentiles` using `cdfZ`.
dist :: Divisible a => [a] -> CDF I a
dist = cdfZ stdCentiles

Expand Down
19 changes: 19 additions & 0 deletions bench/locli/src/Cardano/Command.hs
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,9 @@ runChainCommand s@State{sMultiSummary=Just summ@Summary{..}}
runChainCommand _ c@WriteMetaGenesis{} = missingCommandData c
["multi objects"]

-- | Slots are input here, in the `Unlog` command.
-- `runLiftLogObjects` does the IO and `Aeson` processing with
-- `readLogObjectStream` as a helper.
runChainCommand s
c@(Unlog rlf okDErr loAnyLimit) = do
progress "logs" (Q $ printf "reading run log manifest %s" $ unJsonInputFile rlf)
Expand Down Expand Up @@ -537,13 +540,26 @@ runChainCommand s@State{sRun=Just _run, sChain=Just Chain{..}}
runChainCommand _ c@TimelineChain{} = missingCommandData c
["run metadata & genesis", "chain"]

-- | `CollectSlots` is the step slots make from `sRunLogs` to
-- `sSlotsRaw`. `deltifySlotStats` is the crucial post-processing after
runChainCommand s@State{sRun=Just run, sRunLogs=Just (rlLogs -> objs)}
c@(CollectSlots ignores) = do
let nonIgnored = flip filter objs $ (`notElem` ignores) . fst
forM_ ignores $
progress "perf-ignored-log" . R . unJsonLogfile
progress "slots" (Q $ printf "building slot %d timelines" $ length objs)
(scalars, slotsRaw) <-
-- `mapAndUnzip` just maps using a function producing a pair and
-- unzips the result. `redistribute` just takes a triple in the
-- form of (t1, (t2, t3)) mapping it to ((t1, t2), (t1, t3)),
-- repeating the first component to pair it with both halves of the
-- second component. `collectSlotStats` just maps coalescing the log
-- entries into per- (time) slot structures collecting everything
-- described in one of the `LogObject` lists, done in parallel across
-- the distinct log files. `deltifySlotStats` then differences the
-- timestamps within a slot against the `Genesis` to provide a
-- relative slot start time, and then differences other fields
-- against each other and that baseline to measure timings.
fmap (mapAndUnzip redistribute) <$> collectSlotStats run nonIgnored
& newExceptT
& firstExceptT (CommandError c)
Expand All @@ -560,6 +576,8 @@ runChainCommand s@State{sSlotsRaw=Just slotsRaw}
runChainCommand _ c@DumpSlotsRaw = missingCommandData c
["unfiltered slots"]

-- | `FilterSlots` takes raw slots in `sSlotsRaw` and cleans them up to
-- be put at long last in `sSlots` for consumption by higher-level code.
runChainCommand s@State{sRun=Just run, sSlotsRaw=Just slotsRaw}
c@(FilterSlots fltfs fltExprs) = do
progress "slots" (Q $ printf "filtering %d slot timelines" $ length slotsRaw)
Expand Down Expand Up @@ -591,6 +609,7 @@ runChainCommand s@State{sSlots=Just slots}
runChainCommand _ c@DumpSlots = missingCommandData c
["filtered slots"]

-- | `TimelineSlots` exports the processed slots in renderable form.
runChainCommand s@State{sRun=Just _run, sSlots=Just slots}
c@(TimelineSlots rc comments) = do
progress "mach" (Q $ printf "dumping %d slot timelines: %s" (length slots) (show rc :: String))
Expand Down
3 changes: 3 additions & 0 deletions bench/locli/src/Cardano/Render.hs
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,9 @@ renderProfilingData rc a flt pd =
, fDescription = peSrcLoc pe
}

-- | `renderTimelineWithClass` and `renderTimeline` output textual
-- representations of timelines for postprocessing with ede, em, and
-- other such tools for producing benchmark reports.
renderTimelineWithClass :: forall (a :: Type). TimelineFields a => (Field ISelect I a -> Bool) -> RenderConfig -> Anchor -> [TimelineComments a] -> [a] -> [Text]
renderTimelineWithClass flt = renderTimeline (filter flt timelineFields) rtCommentary

Expand Down
5 changes: 5 additions & 0 deletions bench/locli/src/Cardano/Util.hs
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,9 @@ mapSMaybeFB cons f x next = case f x of
SNothing -> next
SJust r -> cons r next

-- | `mapConcurrentlyPure` mostly fully forces the list elements
-- produced in parallel. `evaluate` seems to have mostly to do with
-- exceptions.
mapConcurrentlyPure :: NFData b => (a -> b) -> [a] -> IO [b]
mapConcurrentlyPure f =
mapConcurrently
Expand Down Expand Up @@ -178,6 +181,8 @@ replaceExtension :: FilePath -> String -> FilePath
replaceExtension f new = F.dropExtension f <> "." <> new


-- | `spans` creates a list of `Vector` where each list element is all
-- of the list elements comprising a span put into a `Vector`.
spans :: forall a. (a -> Bool) -> [a] -> [Vector a]
spans f = go []
where
Expand Down
Loading