Yi Kong | e5b9f24 | 2023-04-26 13:35:57 +0900 | [diff] [blame^] | 1 | //===--------------------- ResourceManager.h --------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// \file |
| 9 | /// |
| 10 | /// The classes here represent processor resource units and their management |
| 11 | /// strategy. These classes are managed by the Scheduler. |
| 12 | /// |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #ifndef LLVM_MCA_HARDWAREUNITS_RESOURCEMANAGER_H |
| 16 | #define LLVM_MCA_HARDWAREUNITS_RESOURCEMANAGER_H |
| 17 | |
| 18 | #include "llvm/ADT/DenseMap.h" |
| 19 | #include "llvm/ADT/SmallVector.h" |
| 20 | #include "llvm/MC/MCSchedule.h" |
| 21 | #include "llvm/MCA/Instruction.h" |
| 22 | #include "llvm/MCA/Support.h" |
| 23 | |
| 24 | namespace llvm { |
| 25 | namespace mca { |
| 26 | |
| 27 | /// Used to notify the internal state of a processor resource. |
| 28 | /// |
| 29 | /// A processor resource is available if it is not reserved, and there are |
| 30 | /// available slots in the buffer. A processor resource is unavailable if it |
| 31 | /// is either reserved, or the associated buffer is full. A processor resource |
| 32 | /// with a buffer size of -1 is always available if it is not reserved. |
| 33 | /// |
| 34 | /// Values of type ResourceStateEvent are returned by method |
| 35 | /// ResourceManager::canBeDispatched() |
| 36 | /// |
| 37 | /// The naming convention for resource state events is: |
| 38 | /// * Event names start with prefix RS_ |
| 39 | /// * Prefix RS_ is followed by a string describing the actual resource state. |
| 40 | enum ResourceStateEvent { |
| 41 | RS_BUFFER_AVAILABLE, |
| 42 | RS_BUFFER_UNAVAILABLE, |
| 43 | RS_RESERVED |
| 44 | }; |
| 45 | |
| 46 | /// Resource allocation strategy used by hardware scheduler resources. |
| 47 | class ResourceStrategy { |
| 48 | ResourceStrategy(const ResourceStrategy &) = delete; |
| 49 | ResourceStrategy &operator=(const ResourceStrategy &) = delete; |
| 50 | |
| 51 | public: |
| 52 | ResourceStrategy() = default; |
| 53 | virtual ~ResourceStrategy(); |
| 54 | |
| 55 | /// Selects a processor resource unit from a ReadyMask. |
| 56 | virtual uint64_t select(uint64_t ReadyMask) = 0; |
| 57 | |
| 58 | /// Called by the ResourceManager when a processor resource group, or a |
| 59 | /// processor resource with multiple units has become unavailable. |
| 60 | /// |
| 61 | /// The default strategy uses this information to bias its selection logic. |
| 62 | virtual void used(uint64_t ResourceMask) {} |
| 63 | }; |
| 64 | |
| 65 | /// Default resource allocation strategy used by processor resource groups and |
| 66 | /// processor resources with multiple units. |
| 67 | class DefaultResourceStrategy final : public ResourceStrategy { |
| 68 | /// A Mask of resource unit identifiers. |
| 69 | /// |
| 70 | /// There is one bit set for every available resource unit. |
| 71 | /// It defaults to the value of field ResourceSizeMask in ResourceState. |
| 72 | const uint64_t ResourceUnitMask; |
| 73 | |
| 74 | /// A simple round-robin selector for processor resource units. |
| 75 | /// Each bit of this mask identifies a sub resource within a group. |
| 76 | /// |
| 77 | /// As an example, lets assume that this is a default policy for a |
| 78 | /// processor resource group composed by the following three units: |
| 79 | /// ResourceA -- 0b001 |
| 80 | /// ResourceB -- 0b010 |
| 81 | /// ResourceC -- 0b100 |
| 82 | /// |
| 83 | /// Field NextInSequenceMask is used to select the next unit from the set of |
| 84 | /// resource units. It defaults to the value of field `ResourceUnitMasks` (in |
| 85 | /// this example, it defaults to mask '0b111'). |
| 86 | /// |
| 87 | /// The round-robin selector would firstly select 'ResourceC', then |
| 88 | /// 'ResourceB', and eventually 'ResourceA'. When a resource R is used, the |
| 89 | /// corresponding bit in NextInSequenceMask is cleared. For example, if |
| 90 | /// 'ResourceC' is selected, then the new value of NextInSequenceMask becomes |
| 91 | /// 0xb011. |
| 92 | /// |
| 93 | /// When NextInSequenceMask becomes zero, it is automatically reset to the |
| 94 | /// default value (i.e. ResourceUnitMask). |
| 95 | uint64_t NextInSequenceMask; |
| 96 | |
| 97 | /// This field is used to track resource units that are used (i.e. selected) |
| 98 | /// by other groups other than the one associated with this strategy object. |
| 99 | /// |
| 100 | /// In LLVM processor resource groups are allowed to partially (or fully) |
| 101 | /// overlap. That means, a same unit may be visible to multiple groups. |
| 102 | /// This field keeps track of uses that have originated from outside of |
| 103 | /// this group. The idea is to bias the selection strategy, so that resources |
| 104 | /// that haven't been used by other groups get prioritized. |
| 105 | /// |
| 106 | /// The end goal is to (try to) keep the resource distribution as much uniform |
| 107 | /// as possible. By construction, this mask only tracks one-level of resource |
| 108 | /// usage. Therefore, this strategy is expected to be less accurate when same |
| 109 | /// units are used multiple times by other groups within a single round of |
| 110 | /// select. |
| 111 | /// |
| 112 | /// Note: an LRU selector would have a better accuracy at the cost of being |
| 113 | /// slightly more expensive (mostly in terms of runtime cost). Methods |
| 114 | /// 'select' and 'used', are always in the hot execution path of llvm-mca. |
| 115 | /// Therefore, a slow implementation of 'select' would have a negative impact |
| 116 | /// on the overall performance of the tool. |
| 117 | uint64_t RemovedFromNextInSequence; |
| 118 | |
| 119 | public: |
| 120 | DefaultResourceStrategy(uint64_t UnitMask) |
| 121 | : ResourceUnitMask(UnitMask), NextInSequenceMask(UnitMask), |
| 122 | RemovedFromNextInSequence(0) {} |
| 123 | virtual ~DefaultResourceStrategy() = default; |
| 124 | |
| 125 | uint64_t select(uint64_t ReadyMask) override; |
| 126 | void used(uint64_t Mask) override; |
| 127 | }; |
| 128 | |
| 129 | /// A processor resource descriptor. |
| 130 | /// |
| 131 | /// There is an instance of this class for every processor resource defined by |
| 132 | /// the machine scheduling model. |
| 133 | /// Objects of class ResourceState dynamically track the usage of processor |
| 134 | /// resource units. |
| 135 | class ResourceState { |
| 136 | /// An index to the MCProcResourceDesc entry in the processor model. |
| 137 | const unsigned ProcResourceDescIndex; |
| 138 | /// A resource mask. This is generated by the tool with the help of |
| 139 | /// function `mca::computeProcResourceMasks' (see Support.h). |
| 140 | /// |
| 141 | /// Field ResourceMask only has one bit set if this resource state describes a |
| 142 | /// processor resource unit (i.e. this is not a group). That means, we can |
| 143 | /// quickly check if a resource is a group by simply counting the number of |
| 144 | /// bits that are set in the mask. |
| 145 | /// |
| 146 | /// The most significant bit of a mask (MSB) uniquely identifies a resource. |
| 147 | /// Remaining bits are used to describe the composition of a group (Group). |
| 148 | /// |
| 149 | /// Example (little endian): |
| 150 | /// Resource | Mask | MSB | Group |
| 151 | /// ---------+------------+------------+------------ |
| 152 | /// A | 0b000001 | 0b000001 | 0b000000 |
| 153 | /// | | | |
| 154 | /// B | 0b000010 | 0b000010 | 0b000000 |
| 155 | /// | | | |
| 156 | /// C | 0b010000 | 0b010000 | 0b000000 |
| 157 | /// | | | |
| 158 | /// D | 0b110010 | 0b100000 | 0b010010 |
| 159 | /// |
| 160 | /// In this example, resources A, B and C are processor resource units. |
| 161 | /// Only resource D is a group resource, and it contains resources B and C. |
| 162 | /// That is because MSB(B) and MSB(C) are both contained within Group(D). |
| 163 | const uint64_t ResourceMask; |
| 164 | |
| 165 | /// A ProcResource can have multiple units. |
| 166 | /// |
| 167 | /// For processor resource groups this field is a mask of contained resource |
| 168 | /// units. It is obtained from ResourceMask by clearing the highest set bit. |
| 169 | /// The number of resource units in a group can be simply computed as the |
| 170 | /// population count of this field. |
| 171 | /// |
| 172 | /// For normal (i.e. non-group) resources, the number of bits set in this mask |
| 173 | /// is equivalent to the number of units declared by the processor model (see |
| 174 | /// field 'NumUnits' in 'ProcResourceUnits'). |
| 175 | uint64_t ResourceSizeMask; |
| 176 | |
| 177 | /// A mask of ready units. |
| 178 | uint64_t ReadyMask; |
| 179 | |
| 180 | /// Buffered resources will have this field set to a positive number different |
| 181 | /// than zero. A buffered resource behaves like a reservation station |
| 182 | /// implementing its own buffer for out-of-order execution. |
| 183 | /// |
| 184 | /// A BufferSize of 1 is used by scheduler resources that force in-order |
| 185 | /// execution. |
| 186 | /// |
| 187 | /// A BufferSize of 0 is used to model in-order issue/dispatch resources. |
| 188 | /// Since in-order issue/dispatch resources don't implement buffers, dispatch |
| 189 | /// events coincide with issue events. |
| 190 | /// Also, no other instruction ca be dispatched/issue while this resource is |
| 191 | /// in use. Only when all the "resource cycles" are consumed (after the issue |
| 192 | /// event), a new instruction ca be dispatched. |
| 193 | const int BufferSize; |
| 194 | |
| 195 | /// Available slots in the buffer (zero, if this is not a buffered resource). |
| 196 | unsigned AvailableSlots; |
| 197 | |
| 198 | /// This field is set if this resource is currently reserved. |
| 199 | /// |
| 200 | /// Resources can be reserved for a number of cycles. |
| 201 | /// Instructions can still be dispatched to reserved resources. However, |
| 202 | /// istructions dispatched to a reserved resource cannot be issued to the |
| 203 | /// underlying units (i.e. pipelines) until the resource is released. |
| 204 | bool Unavailable; |
| 205 | |
| 206 | const bool IsAGroup; |
| 207 | |
| 208 | /// Checks for the availability of unit 'SubResMask' in the group. |
| 209 | bool isSubResourceReady(uint64_t SubResMask) const { |
| 210 | return ReadyMask & SubResMask; |
| 211 | } |
| 212 | |
| 213 | public: |
| 214 | ResourceState(const MCProcResourceDesc &Desc, unsigned Index, uint64_t Mask); |
| 215 | |
| 216 | unsigned getProcResourceID() const { return ProcResourceDescIndex; } |
| 217 | uint64_t getResourceMask() const { return ResourceMask; } |
| 218 | uint64_t getReadyMask() const { return ReadyMask; } |
| 219 | int getBufferSize() const { return BufferSize; } |
| 220 | |
| 221 | bool isBuffered() const { return BufferSize > 0; } |
| 222 | bool isInOrder() const { return BufferSize == 1; } |
| 223 | |
| 224 | /// Returns true if this is an in-order dispatch/issue resource. |
| 225 | bool isADispatchHazard() const { return BufferSize == 0; } |
| 226 | bool isReserved() const { return Unavailable; } |
| 227 | |
| 228 | void setReserved() { Unavailable = true; } |
| 229 | void clearReserved() { Unavailable = false; } |
| 230 | |
| 231 | /// Returs true if this resource is not reserved, and if there are at least |
| 232 | /// `NumUnits` available units. |
| 233 | bool isReady(unsigned NumUnits = 1) const; |
| 234 | |
| 235 | bool isAResourceGroup() const { return IsAGroup; } |
| 236 | |
| 237 | bool containsResource(uint64_t ID) const { return ResourceMask & ID; } |
| 238 | |
| 239 | void markSubResourceAsUsed(uint64_t ID) { |
| 240 | assert(isSubResourceReady(ID)); |
| 241 | ReadyMask ^= ID; |
| 242 | } |
| 243 | |
| 244 | void releaseSubResource(uint64_t ID) { |
| 245 | assert(!isSubResourceReady(ID)); |
| 246 | ReadyMask ^= ID; |
| 247 | } |
| 248 | |
| 249 | unsigned getNumUnits() const { |
| 250 | return isAResourceGroup() ? 1U : llvm::popcount(ResourceSizeMask); |
| 251 | } |
| 252 | |
| 253 | /// Checks if there is an available slot in the resource buffer. |
| 254 | /// |
| 255 | /// Returns RS_BUFFER_AVAILABLE if this is not a buffered resource, or if |
| 256 | /// there is a slot available. |
| 257 | /// |
| 258 | /// Returns RS_RESERVED if this buffered resource is a dispatch hazard, and it |
| 259 | /// is reserved. |
| 260 | /// |
| 261 | /// Returns RS_BUFFER_UNAVAILABLE if there are no available slots. |
| 262 | ResourceStateEvent isBufferAvailable() const; |
| 263 | |
| 264 | /// Reserve a buffer slot. |
| 265 | /// |
| 266 | /// Returns true if the buffer is not full. |
| 267 | /// It always returns true if BufferSize is set to zero. |
| 268 | bool reserveBuffer() { |
| 269 | if (BufferSize <= 0) |
| 270 | return true; |
| 271 | |
| 272 | --AvailableSlots; |
| 273 | assert(AvailableSlots <= static_cast<unsigned>(BufferSize)); |
| 274 | return AvailableSlots; |
| 275 | } |
| 276 | |
| 277 | /// Releases a slot in the buffer. |
| 278 | void releaseBuffer() { |
| 279 | // Ignore dispatch hazards or invalid buffer sizes. |
| 280 | if (BufferSize <= 0) |
| 281 | return; |
| 282 | |
| 283 | ++AvailableSlots; |
| 284 | assert(AvailableSlots <= static_cast<unsigned>(BufferSize)); |
| 285 | } |
| 286 | |
| 287 | #ifndef NDEBUG |
| 288 | void dump() const; |
| 289 | #endif |
| 290 | }; |
| 291 | |
| 292 | /// A resource unit identifier. |
| 293 | /// |
| 294 | /// This is used to identify a specific processor resource unit using a pair |
| 295 | /// of indices where the 'first' index is a processor resource mask, and the |
| 296 | /// 'second' index is an index for a "sub-resource" (i.e. unit). |
| 297 | typedef std::pair<uint64_t, uint64_t> ResourceRef; |
| 298 | |
| 299 | // First: a MCProcResourceDesc index identifying a buffered resource. |
| 300 | // Second: max number of buffer entries used in this resource. |
| 301 | typedef std::pair<unsigned, unsigned> BufferUsageEntry; |
| 302 | |
| 303 | /// A resource manager for processor resource units and groups. |
| 304 | /// |
| 305 | /// This class owns all the ResourceState objects, and it is responsible for |
| 306 | /// acting on requests from a Scheduler by updating the internal state of |
| 307 | /// ResourceState objects. |
| 308 | /// This class doesn't know about instruction itineraries and functional units. |
| 309 | /// In future, it can be extended to support itineraries too through the same |
| 310 | /// public interface. |
| 311 | class ResourceManager { |
| 312 | // Set of resources available on the subtarget. |
| 313 | // |
| 314 | // There is an instance of ResourceState for every resource declared by the |
| 315 | // target scheduling model. |
| 316 | // |
| 317 | // Elements of this vector are ordered by resource kind. In particular, |
| 318 | // resource units take precedence over resource groups. |
| 319 | // |
| 320 | // The index of a processor resource in this vector depends on the value of |
| 321 | // its mask (see the description of field ResourceState::ResourceMask). In |
| 322 | // particular, it is computed as the position of the most significant bit set |
| 323 | // (MSB) in the mask plus one (since we want to ignore the invalid resource |
| 324 | // descriptor at index zero). |
| 325 | // |
| 326 | // Example (little endian): |
| 327 | // |
| 328 | // Resource | Mask | MSB | Index |
| 329 | // ---------+---------+---------+------- |
| 330 | // A | 0b00001 | 0b00001 | 1 |
| 331 | // | | | |
| 332 | // B | 0b00100 | 0b00100 | 3 |
| 333 | // | | | |
| 334 | // C | 0b10010 | 0b10000 | 5 |
| 335 | // |
| 336 | // |
| 337 | // The same index is also used to address elements within vector `Strategies` |
| 338 | // and vector `Resource2Groups`. |
| 339 | std::vector<std::unique_ptr<ResourceState>> Resources; |
| 340 | std::vector<std::unique_ptr<ResourceStrategy>> Strategies; |
| 341 | |
| 342 | // Used to quickly identify groups that own a particular resource unit. |
| 343 | std::vector<uint64_t> Resource2Groups; |
| 344 | |
| 345 | // A table that maps processor resource IDs to processor resource masks. |
| 346 | SmallVector<uint64_t, 8> ProcResID2Mask; |
| 347 | |
| 348 | // A table that maps resource indices to actual processor resource IDs in the |
| 349 | // scheduling model. |
| 350 | SmallVector<unsigned, 8> ResIndex2ProcResID; |
| 351 | |
| 352 | // Keeps track of which resources are busy, and how many cycles are left |
| 353 | // before those become usable again. |
| 354 | SmallDenseMap<ResourceRef, unsigned> BusyResources; |
| 355 | |
| 356 | // Set of processor resource units available on the target. |
| 357 | uint64_t ProcResUnitMask; |
| 358 | |
| 359 | // Set of processor resource units that are available during this cycle. |
| 360 | uint64_t AvailableProcResUnits; |
| 361 | |
| 362 | // Set of processor resources that are currently reserved. |
| 363 | uint64_t ReservedResourceGroups; |
| 364 | |
| 365 | // Set of unavailable scheduler buffer resources. This is used internally to |
| 366 | // speedup `canBeDispatched()` queries. |
| 367 | uint64_t AvailableBuffers; |
| 368 | |
| 369 | // Set of dispatch hazard buffer resources that are currently unavailable. |
| 370 | uint64_t ReservedBuffers; |
| 371 | |
| 372 | // Returns the actual resource unit that will be used. |
| 373 | ResourceRef selectPipe(uint64_t ResourceID); |
| 374 | |
| 375 | void use(const ResourceRef &RR); |
| 376 | void release(const ResourceRef &RR); |
| 377 | |
| 378 | unsigned getNumUnits(uint64_t ResourceID) const; |
| 379 | |
| 380 | // Overrides the selection strategy for the processor resource with the given |
| 381 | // mask. |
| 382 | void setCustomStrategyImpl(std::unique_ptr<ResourceStrategy> S, |
| 383 | uint64_t ResourceMask); |
| 384 | |
| 385 | public: |
| 386 | ResourceManager(const MCSchedModel &SM); |
| 387 | virtual ~ResourceManager() = default; |
| 388 | |
| 389 | // Overrides the selection strategy for the resource at index ResourceID in |
| 390 | // the MCProcResourceDesc table. |
| 391 | void setCustomStrategy(std::unique_ptr<ResourceStrategy> S, |
| 392 | unsigned ResourceID) { |
| 393 | assert(ResourceID < ProcResID2Mask.size() && |
| 394 | "Invalid resource index in input!"); |
| 395 | return setCustomStrategyImpl(std::move(S), ProcResID2Mask[ResourceID]); |
| 396 | } |
| 397 | |
| 398 | // Returns RS_BUFFER_AVAILABLE if buffered resources are not reserved, and if |
| 399 | // there are enough available slots in the buffers. |
| 400 | ResourceStateEvent canBeDispatched(uint64_t ConsumedBuffers) const; |
| 401 | |
| 402 | // Return the processor resource identifier associated to this Mask. |
| 403 | unsigned resolveResourceMask(uint64_t Mask) const; |
| 404 | |
| 405 | // Acquires a slot from every buffered resource in mask `ConsumedBuffers`. |
| 406 | // Units that are dispatch hazards (i.e. BufferSize=0) are marked as reserved. |
| 407 | void reserveBuffers(uint64_t ConsumedBuffers); |
| 408 | |
| 409 | // Releases a slot from every buffered resource in mask `ConsumedBuffers`. |
| 410 | // ConsumedBuffers is a bitmask of previously acquired buffers (using method |
| 411 | // `reserveBuffers`). Units that are dispatch hazards (i.e. BufferSize=0) are |
| 412 | // not automatically unreserved by this method. |
| 413 | void releaseBuffers(uint64_t ConsumedBuffers); |
| 414 | |
| 415 | // Reserve a processor resource. A reserved resource is not available for |
| 416 | // instruction issue until it is released. |
| 417 | void reserveResource(uint64_t ResourceID); |
| 418 | |
| 419 | // Release a previously reserved processor resource. |
| 420 | void releaseResource(uint64_t ResourceID); |
| 421 | |
| 422 | // Returns a zero mask if resources requested by Desc are all available during |
| 423 | // this cycle. It returns a non-zero mask value only if there are unavailable |
| 424 | // processor resources; each bit set in the mask represents a busy processor |
| 425 | // resource unit or a reserved processor resource group. |
| 426 | uint64_t checkAvailability(const InstrDesc &Desc) const; |
| 427 | |
| 428 | uint64_t getProcResUnitMask() const { return ProcResUnitMask; } |
| 429 | uint64_t getAvailableProcResUnits() const { return AvailableProcResUnits; } |
| 430 | |
| 431 | void issueInstruction( |
| 432 | const InstrDesc &Desc, |
| 433 | SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Pipes); |
| 434 | |
| 435 | void cycleEvent(SmallVectorImpl<ResourceRef> &ResourcesFreed); |
| 436 | |
| 437 | #ifndef NDEBUG |
| 438 | void dump() const { |
| 439 | for (const std::unique_ptr<ResourceState> &Resource : Resources) |
| 440 | Resource->dump(); |
| 441 | } |
| 442 | #endif |
| 443 | }; |
| 444 | } // namespace mca |
| 445 | } // namespace llvm |
| 446 | |
| 447 | #endif // LLVM_MCA_HARDWAREUNITS_RESOURCEMANAGER_H |