NVIDIA
diff --git a/‎cpp/include/tensorrt_llm/batch_manager/evictionPolicy.h‎
Lines changed: 2 additions & 1 deletion b/‎cpp/include/tensorrt_llm/batch_manager/evictionPolicy.h‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎cpp/include/tensorrt_llm/batch_manager/kvCacheManager.h‎
Lines changed: 7 additions & 5 deletions b/‎cpp/include/tensorrt_llm/batch_manager/kvCacheManager.h‎
Lines changed: 7 additions & 5 deletions
diff --git a/‎cpp/include/tensorrt_llm/batch_manager/templatedTrie.h‎
Lines changed: 3 additions & 3 deletions b/‎cpp/include/tensorrt_llm/batch_manager/templatedTrie.h‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎cpp/tensorrt_llm/batch_manager/evictionPolicy.cpp‎
Lines changed: 4 additions & 4 deletions b/‎cpp/tensorrt_llm/batch_manager/evictionPolicy.cpp‎
Lines changed: 4 additions & 4 deletions
@@ -123,7 +123,8 @@ class MaybePlaceholderLRUEvictionPolicy : public LRUEvictionPolicy
     /// @brief Initialize the placeholder eviction policy with pre-allocated placeholder blocks.
     /// @param allPlaceholderBlocksById Vector of placeholder blocks indexed by abs(blockId).
     ///        Indices 0 and 1 are unused (nullptr); index abs(blockId) holds the block with that ID.
-    /// @param numPlaceholderBlocks Number of placeholder blocks (determines valid index range [2, numPlaceholderBlocks+1]).
+    /// @param numPlaceholderBlocks Number of placeholder blocks (determines valid index range [2,
+    /// numPlaceholderBlocks+1]).
     /// @param secondaryOffloadMinPriority Secondary offload priority threshold (passed to inner policy).
     void initializePlaceholders(std::vector<BlockPtr>& allPlaceholderBlocksById, SizeType32 numPlaceholderBlocks,
         std::optional<executor::RetentionPriority> secondaryOffloadMinPriority);
 
@@ -1050,8 +1050,8 @@ class WindowBlockManager
     //! \param pinBlocks If true, increment ref count for blocks while storing (pin on store).
     //! \return Pair of (num blocks stored for reuse, vector of pinned block IDs).
     [[nodiscard]] std::pair<SizeType32, std::vector<KVCacheBlock::IdType>> storeBlocks(
-        std::vector<BlockKey> const& blockKeys, std::vector<KVCacheBlock::IdType> const& blockIds,OptionalRef<LlmRequest const> llmRequest,
-        bool pinBlocks = false);
+        std::vector<BlockKey> const& blockKeys, std::vector<KVCacheBlock::IdType> const& blockIds,
+        OptionalRef<LlmRequest const> llmRequest, bool pinBlocks = false);
 
     [[nodiscard]] bool verifyQueueIntegrity();
 
@@ -1168,7 +1168,6 @@ class WindowBlockManager
             && LinearAttentionMetadata::hasRecurrentStatesCache(mLinearAttentionMetadata->cacheType);
     }
 
-
 private:
     nvinfer1::DataType mDataType;
     SizeType32 mWindowSize;
@@ -1815,7 +1814,8 @@ class BaseKVCacheManager
     /// @brief Increase size for request at seqSlotIdx. Allocate new KV cache block(s) if needed.
     virtual void addToken(LlmRequest::RequestIdType requestId) = 0;
 
-    /// @brief Get the number of tokens for a request at KVCacheManager's sight. Sometimes it is different from LlmRequest::getNumTokens.
+    /// @brief Get the number of tokens for a request at KVCacheManager's sight. Sometimes it is different from
+    /// LlmRequest::getNumTokens.
     [[nodiscard]] virtual SizeType32 getTokenCount(LlmRequest::RequestIdType requestId) const = 0;
 
     /// @brief Add new request to the KV cache manager.
@@ -1935,7 +1935,9 @@ class BaseKVCacheManager
         }
         TLLM_LOG_DEBUG("[calculateCacheSizePerTokenForSingleWindowSize] nkvh: %s", ss.str().c_str());
         auto const sumLocalHeads = std::reduce(nkvh.cbegin(), nkvh.cend());
-        TLLM_LOG_DEBUG("[calculateCacheSizePerTokenForSingleWindowSize] sumLocalHeads: %d, kvFactor: %d, sizePerHead: %d", sumLocalHeads, kvFactor, modelConfig.getSizePerHead());
+        TLLM_LOG_DEBUG(
+            "[calculateCacheSizePerTokenForSingleWindowSize] sumLocalHeads: %d, kvFactor: %d, sizePerHead: %d",
+            sumLocalHeads, kvFactor, modelConfig.getSizePerHead());
         // NOTE: We expect the initialization of modelConfig to have already taken the tp size into account and do not
         // address it here
         // consider only local layers for the calculation
 
@@ -165,9 +165,9 @@ class Node
     {
     }
 
-    //! \brief Print subtree in Unix `tree` style (├──, └──, │). NodeKey must support operator<<(std::ostream&, NodeKey).
-    void printTree(int depth = 0, std::string const& prefix = "",
-        std::optional<bool> isLast = std::nullopt) const
+    //! \brief Print subtree in Unix `tree` style (├──, └──, │). NodeKey must support operator<<(std::ostream&,
+    //! NodeKey).
+    void printTree(int depth = 0, std::string const& prefix = "", std::optional<bool> isLast = std::nullopt) const
     {
         (void) depth;
         bool const isRoot = mPrevNode.expired();
 
@@ -302,8 +302,8 @@ class PlaceholderInnerLRUEvictionPolicy : public LRUEvictionPolicy
                 }
                 if (block->hasRefs())
                 {
-                    TLLM_LOG_WARNING("Found placeholder block (id %d) with references in placeholder policy",
-                        block->getBlockId());
+                    TLLM_LOG_WARNING(
+                        "Found placeholder block (id %d) with references in placeholder policy", block->getBlockId());
                     queueCompromised = true;
                 }
             }
@@ -322,8 +322,8 @@ void MaybePlaceholderLRUEvictionPolicy::initializePlaceholders(std::vector<Block
 
     // Extract the actual placeholder blocks from allPlaceholderBlocksById[2..numPlaceholderBlocks+1]
     // so the inner policy's mFreeBlockIterators[i] corresponds to blockId = -(i+2).
-    std::vector<BlockPtr> placeholderBlocks(allPlaceholderBlocksById.begin() + 2,
-        allPlaceholderBlocksById.begin() + numPlaceholderBlocks + 2);
+    std::vector<BlockPtr> placeholderBlocks(
+        allPlaceholderBlocksById.begin() + 2, allPlaceholderBlocksById.begin() + numPlaceholderBlocks + 2);
 
     mPlaceholderEvictionPolicy->initialize(placeholderBlocks, {numPlaceholderBlocks, 0}, secondaryOffloadMinPriority);
 }
Original file line number	Diff line number	Diff line change
`@@ -165,9 +165,9 @@ class Node`
`165`	`165`	`{`
`166`	`166`	`}`
`167`	`167`
`168`		- //! \brief Print subtree in Unix `tree` style (├──, └──, │). NodeKey must support operator<<(std::ostream&, NodeKey).
`169`		`- void printTree(int depth = 0, std::string const& prefix = "",`
`170`		`- std::optional<bool> isLast = std::nullopt) const`
	`168`	+ //! \brief Print subtree in Unix `tree` style (├──, └──, │). NodeKey must support operator<<(std::ostream&,
	`169`	`+ //! NodeKey).`
	`170`	`+ void printTree(int depth = 0, std::string const& prefix = "", std::optional<bool> isLast = std::nullopt) const`
`171`	`171`	`{`
`172`	`172`	`(void) depth;`
`173`	`173`	`bool const isRoot = mPrevNode.expired();`
Original file line number	Diff line number	Diff line change
`@@ -302,8 +302,8 @@ class PlaceholderInnerLRUEvictionPolicy : public LRUEvictionPolicy`
`302`	`302`	`}`
`303`	`303`	`if (block->hasRefs())`
`304`	`304`	`{`
`305`		`- TLLM_LOG_WARNING("Found placeholder block (id %d) with references in placeholder policy",`
`306`		`- block->getBlockId());`
	`305`	`+ TLLM_LOG_WARNING(`
	`306`	`+ "Found placeholder block (id %d) with references in placeholder policy", block->getBlockId());`
`307`	`307`	`queueCompromised = true;`
`308`	`308`	`}`
`309`	`309`	`}`
`@@ -322,8 +322,8 @@ void MaybePlaceholderLRUEvictionPolicy::initializePlaceholders(std::vector<Block`
`322`	`322`
`323`	`323`	`// Extract the actual placeholder blocks from allPlaceholderBlocksById[2..numPlaceholderBlocks+1]`
`324`	`324`	`// so the inner policy's mFreeBlockIterators[i] corresponds to blockId = -(i+2).`
`325`		`- std::vector<BlockPtr> placeholderBlocks(allPlaceholderBlocksById.begin() + 2,`
`326`		`- allPlaceholderBlocksById.begin() + numPlaceholderBlocks + 2);`
	`325`	`+ std::vector<BlockPtr> placeholderBlocks(`
	`326`	`+ allPlaceholderBlocksById.begin() + 2, allPlaceholderBlocksById.begin() + numPlaceholderBlocks + 2);`
`327`	`327`
`328`	`328`	`mPlaceholderEvictionPolicy->initialize(placeholderBlocks, {numPlaceholderBlocks, 0}, secondaryOffloadMinPriority);`
`329`	`329`	`}`