From 278a78c70960712e39ce2d6e45ec684b0a35d017 Mon Sep 17 00:00:00 2001 From: Philippe Canal Date: Sat, 29 Nov 2025 18:36:40 +0100 Subject: [PATCH 01/13] io: Add data structure for 64 bit byte count. In order to support 64 bits byte counts, which do not fit in the space reserved for them in the stream (4 bytes minus 2 control bits) and to work around the fact that the variables that holds the position and the byte count information in Streamer functions are only 32 bits (see arguments to TBufferFile::WriteVersion and TBufferFile::ReadVersion), we need to pass them indirectly when they needs 64 bits Since the streaming is inherently serial, we can leverage the sequence of calls and cache the 64 bits values in a queue. The bytecount that can not be stored in place inside the io stream will be held in a collection (fByteCounts) that need to be stored externally to the buffer. --- io/io/inc/TBufferFile.h | 13 +++++++++++++ io/io/src/TBufferFile.cxx | 16 ++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/io/io/inc/TBufferFile.h b/io/io/inc/TBufferFile.h index 3d2e37141fcc4..209fb617348d2 100644 --- a/io/io/inc/TBufferFile.h +++ b/io/io/inc/TBufferFile.h @@ -52,6 +52,16 @@ class TBufferFile : public TBufferIO { TStreamerInfo *fInfo{nullptr}; ///< Pointer to TStreamerInfo object writing/reading the buffer InfoList_t fInfoStack; ///< Stack of pointers to the TStreamerInfos + using ByteCountLocator_t = std::size_t; // This might become a pair if we implement chunked keys + using ByteCountStack_t = std::vector; + ByteCountStack_t fByteCountStack; ///; + // fByteCounts will be stored either in the header/summary tkey or at the end + // of the last segment/chunk for a large TKey. + ByteCountFinder_t fByteCounts; ///< Map to find the byte count value for a given position + // Default ctor TBufferFile() {} // NOLINT: not allowed to use = default because of TObject::kIsOnHeap detection, see ROOT-10300 @@ -62,6 +72,7 @@ class TBufferFile : public TBufferIO { Long64_t CheckByteCount(ULong64_t startpos, ULong64_t bcnt, const TClass *clss, const char* classname); void CheckCount(UInt_t offset) override; UInt_t CheckObject(UInt_t offset, const TClass *cl, Bool_t readClass = kFALSE); + UInt_t ReserveByteCount(); void WriteObjectClass(const void *actualObjStart, const TClass *actualClass, Bool_t cacheReuse) override; @@ -106,6 +117,8 @@ class TBufferFile : public TBufferIO { TObject *ReadObject(const TClass *cl) override; + ByteCountFinder_t GetByteCounts() const { return fByteCounts; } + using TBufferIO::CheckObject; // basic types and arrays of basic types diff --git a/io/io/src/TBufferFile.cxx b/io/io/src/TBufferFile.cxx index dd533c88a20ce..1a8948faff1fe 100644 --- a/io/io/src/TBufferFile.cxx +++ b/io/io/src/TBufferFile.cxx @@ -3327,6 +3327,22 @@ UInt_t TBufferFile::CheckObject(UInt_t offset, const TClass *cl, Bool_t readClas return offset; } +//////////////////////////////////////////////////////////////////////////////// +/// Reserve space for a leading byte count and return the position where to +/// store the byte count value. +/// +/// \param[in] mask The mask to apply to the placeholder value (default kByteCountMask) +/// \return The position (cntpos) where the byte count should be stored later, +/// or 0 if the position exceeds kMaxInt + +UInt_t TBufferFile::ReserveByteCount() +{ + // reserve space for leading byte count + auto full_cntpos = fBufCur - fBuffer; + fByteCountStack.push_back(full_cntpos); + *this << (UInt_t)kByteCountMask; // placeholder for byte count + return full_cntpos < kMaxInt ? full_cntpos : kMaxInt; +} //////////////////////////////////////////////////////////////////////////////// /// Read max bytes from the I/O buffer into buf. The function returns From 684d5a5c5f3258e84feb09336c1f4e2b191777e9 Mon Sep 17 00:00:00 2001 From: Philippe Canal Date: Sat, 29 Nov 2025 18:55:15 +0100 Subject: [PATCH 02/13] io: Support 64 bits position/byte-count in reading --- io/io/src/TBufferFile.cxx | 82 +++++++++++++++++++++++++++++++++++---- 1 file changed, 74 insertions(+), 8 deletions(-) diff --git a/io/io/src/TBufferFile.cxx b/io/io/src/TBufferFile.cxx index 1a8948faff1fe..dc3c596ebc148 100644 --- a/io/io/src/TBufferFile.cxx +++ b/io/io/src/TBufferFile.cxx @@ -360,10 +360,30 @@ void TBufferFile::SetByteCount(ULong64_t cntpos, Bool_t packInVersion) Long64_t TBufferFile::CheckByteCount(ULong64_t startpos, ULong64_t bcnt, const TClass *clss, const char *classname) { - if (!bcnt) return 0; - R__ASSERT(startpos <= kMaxUInt && bcnt <= kMaxUInt); - Long64_t offset = 0; + R__ASSERT(!fByteCountStack.empty()); + if (startpos == kMaxInt) { + // The position is above 1GB and was cached using a 32 bit variable. + startpos = fByteCountStack.back(); + } + if (bcnt == kMaxInt) { + // in case we are checking a byte count for which we postponed + // the writing because it was too large, we retrieve it now + auto it = fByteCounts.find(startpos); + if (it != fByteCounts.end()) { + bcnt = it->second; + } else { + bcnt = 0; + Error("CheckByteCount", + "Could not find byte count for position %llu in the byte count map (size=%zu)", + startpos, fByteCounts.size()); + } + } + fByteCountStack.pop_back(); + + if (!bcnt) + return 0; + Long64_t offset = 0; Longptr_t endpos = Longptr_t(fBuffer) + startpos + bcnt + sizeof(UInt_t); if (Longptr_t(fBufCur) != endpos) { @@ -390,7 +410,7 @@ Long64_t TBufferFile::CheckByteCount(ULong64_t startpos, ULong64_t bcnt, const T if ( ((char *)endpos) > fBufMax ) { offset = fBufMax-fBufCur; Error("CheckByteCount", - "Byte count probably corrupted around buffer position %llu:\n\t%llu for a possible maximum of %lld", + "Byte count probably corrupted around buffer position %llu:\n\tByte count is %llu while the buffer size is %lld", startpos, bcnt, offset); fBufCur = fBufMax; @@ -2749,6 +2769,8 @@ TClass *TBufferFile::ReadClass(const TClass *clReq, UInt_t *objTag) bcnt = 0; } else { fVersion = 1; + if (objTag) + fByteCountStack.push_back(fBufCur - fBuffer); startpos = UInt_t(fBufCur-fBuffer); *this >> tag; } @@ -2932,7 +2954,10 @@ Version_t TBufferFile::ReadVersion(UInt_t *startpos, UInt_t *bcnt, const TClass if (startpos) { // before reading object save start position - *startpos = UInt_t(fBufCur-fBuffer); + auto full_startpos = fBufCur - fBuffer; + *startpos = full_startpos < kMaxInt ? UInt_t(full_startpos) : kMaxInt; + if (bcnt) + fByteCountStack.push_back(full_startpos); } // read byte count (older files don't have byte count) @@ -2956,9 +2981,30 @@ Version_t TBufferFile::ReadVersion(UInt_t *startpos, UInt_t *bcnt, const TClass fBufCur -= sizeof(UInt_t); v.cnt = 0; } - if (bcnt) *bcnt = (v.cnt & ~kByteCountMask); + if (bcnt) { + if (!v.cnt) { + // no byte count stored + *bcnt = 0; + if (startpos) // Undo the push_back only if it happened. + fByteCountStack.pop_back(); + } else { + *bcnt = (v.cnt & ~kByteCountMask); + if (*bcnt == 0) { + // The byte count was stored but is zero, this means the data + // did not fit and thus we stored it in 'fByteCounts' instead. + // Mark this case by setting startpos to kMaxInt. + *bcnt = kMaxInt; + if (startpos) + *startpos = kMaxInt; + } + } + } frombuf(this->fBufCur,&version); + // NOTE: The code above is not straightforward to refactor by a call + // to ReadVersionNoCheckSum because of the following code need the + // 'raw' value in `v`. + if (version<=1) { if (version <= 0) { if (cl) { @@ -3038,7 +3084,10 @@ Version_t TBufferFile::ReadVersionNoCheckSum(UInt_t *startpos, UInt_t *bcnt) if (startpos) { // before reading object save start position - *startpos = UInt_t(fBufCur-fBuffer); + auto full_startpos = fBufCur - fBuffer; + *startpos = full_startpos < kMaxInt ? UInt_t(full_startpos) : kMaxInt; + if (bcnt) + fByteCountStack.push_back(full_startpos); } // read byte count (older files don't have byte count) @@ -3062,7 +3111,24 @@ Version_t TBufferFile::ReadVersionNoCheckSum(UInt_t *startpos, UInt_t *bcnt) fBufCur -= sizeof(UInt_t); v.cnt = 0; } - if (bcnt) *bcnt = (v.cnt & ~kByteCountMask); + if (bcnt) { + if (!v.cnt) { + // no byte count stored + *bcnt = 0; + if (startpos) // Undo the push_back only if it happened. + fByteCountStack.pop_back(); + } else { + *bcnt = (v.cnt & ~kByteCountMask); + if (*bcnt == 0) { + // The byte count was stored but is zero, this means the data + // did not fit and thus we stored it in 'fByteCounts' instead. + // Mark this case by setting startpos to kMaxInt. + *bcnt = kMaxInt; + if (startpos) + *startpos = kMaxInt; + } + } + } frombuf(this->fBufCur,&version); return version; From 29e675df7a2cc753b2e89b90e273c85a3e4ee064 Mon Sep 17 00:00:00 2001 From: Philippe Canal Date: Sat, 29 Nov 2025 18:54:43 +0100 Subject: [PATCH 03/13] io: Support 64 bits position/byte-count in writing --- io/io/src/TBufferFile.cxx | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/io/io/src/TBufferFile.cxx b/io/io/src/TBufferFile.cxx index dc3c596ebc148..fb33ab45c9a9a 100644 --- a/io/io/src/TBufferFile.cxx +++ b/io/io/src/TBufferFile.cxx @@ -323,7 +323,26 @@ void TBufferFile::SetByteCount(ULong64_t cntpos, Bool_t packInVersion) && (fBufCur >= fBuffer) && static_cast(fBufCur - fBuffer) <= std::numeric_limits::max() && "Byte count position is after the end of the buffer"); - const UInt_t cnt = UInt_t(fBufCur - fBuffer) - UInt_t(cntpos) - sizeof(UInt_t); + + // We can either make this unconditional or we could split the routine + // in two, one with a new signature and guarantee to get the 64bit position + // (which may be chunk number + local offset) and one with the old signature + // which uses the stack to get the position and call the new one. + // This (of course) also requires that we do the 'same' to the WriteVersion + // routines. + R__ASSERT( !fByteCountStack.empty() ); + if (cntpos == kMaxInt) { + cntpos = fByteCountStack.back(); + } + fByteCountStack.pop_back(); + // if we are not in the same TKey chunk or if the cntpos is too large to fit in UInt_t + // let's postpone the writing of the byte count + const ULong64_t full_cnt = ULong64_t(fBufCur - fBuffer) - cntpos - sizeof(UInt_t); + if (full_cnt >= kMaxMapCount) { + fByteCounts[cntpos] = full_cnt; + return; + } + UInt_t cnt = static_cast(full_cnt); char *buf = (char *)(fBuffer + cntpos); // if true, pack byte count in two consecutive shorts, so it can @@ -2714,8 +2733,7 @@ void TBufferFile::WriteObjectClass(const void *actualObjectStart, const TClass * } // reserve space for leading byte count - UInt_t cntpos = UInt_t(fBufCur-fBuffer); - fBufCur += sizeof(UInt_t); + UInt_t cntpos = ReserveByteCount(); // write class of object first Int_t mapsize = fMap->Capacity(); // The slot depends on the capacity and WriteClass might induce an increase. @@ -3214,8 +3232,7 @@ UInt_t TBufferFile::WriteVersion(const TClass *cl, Bool_t useBcnt) UInt_t cntpos = 0; if (useBcnt) { // reserve space for leading byte count - cntpos = UInt_t(fBufCur-fBuffer); - fBufCur += sizeof(UInt_t); + cntpos = ReserveByteCount(); } Version_t version = cl->GetClassVersion(); @@ -3244,8 +3261,7 @@ UInt_t TBufferFile::WriteVersionMemberWise(const TClass *cl, Bool_t useBcnt) UInt_t cntpos = 0; if (useBcnt) { // reserve space for leading byte count - cntpos = UInt_t(fBufCur-fBuffer); - fBufCur += sizeof(UInt_t); + cntpos = ReserveByteCount(); } Version_t version = cl->GetClassVersion(); From b4cc23d7ffbe6e990f2cb7f4f9a8be7e1923cb3c Mon Sep 17 00:00:00 2001 From: Philippe Canal Date: Sat, 29 Nov 2025 18:58:00 +0100 Subject: [PATCH 04/13] io/nfc: add comment --- tree/tree/src/TBranch.cxx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tree/tree/src/TBranch.cxx b/tree/tree/src/TBranch.cxx index 6b9d1102625a9..768434017ea86 100644 --- a/tree/tree/src/TBranch.cxx +++ b/tree/tree/src/TBranch.cxx @@ -979,6 +979,7 @@ Int_t TBranch::FillEntryBuffer(TBasket* basket, TBuffer* buf, Int_t& lnew) fEntryBuffer->SetBufferOffset(objectStart); *fEntryBuffer >> tag; if (tag & kByteCountMask) { + // Ignore byte count. *fEntryBuffer >> tag; } if (tag == kNewClassTag) { @@ -1457,7 +1458,7 @@ bool TBranch::SupportsBulkRead() const { /// the number of elements corresponding to each entries. /// /// For each entry the number of elements is the multiplication of -/// +/// /// ~~~{.cpp} /// TLeaf *leaf = static_cast(branch->GetListOfLeaves()->At(0)); /// auto len = leaf->GetLen(); From ec9caac367c0ffcc5a6c375a1e5e61205af1f4d7 Mon Sep 17 00:00:00 2001 From: Philippe Canal Date: Tue, 2 Dec 2025 11:09:44 -0600 Subject: [PATCH 05/13] io: Add Set/GetByteCounts to TBufferFile Note to store and restore the larger than 1GB byte count use: // Copy the content of the const reference. auto bytecounts{b.GetByteCounts()}; ... b.SetByteCounts(std::move(bytecounts)); --- io/io/inc/TBufferFile.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/io/io/inc/TBufferFile.h b/io/io/inc/TBufferFile.h index 209fb617348d2..19bd6808dc3a7 100644 --- a/io/io/inc/TBufferFile.h +++ b/io/io/inc/TBufferFile.h @@ -117,7 +117,8 @@ class TBufferFile : public TBufferIO { TObject *ReadObject(const TClass *cl) override; - ByteCountFinder_t GetByteCounts() const { return fByteCounts; } + const ByteCountFinder_t& GetByteCounts() const { return fByteCounts; } + void SetByteCounts(ByteCountFinder_t &&byteCounts) { fByteCounts = std::move(byteCounts); } using TBufferIO::CheckObject; From fcfd908e80ccbf3a0a062eff40e89a93f10420e2 Mon Sep 17 00:00:00 2001 From: Philippe Canal Date: Mon, 8 Dec 2025 14:24:29 -0600 Subject: [PATCH 06/13] io: Add reset of new bytecount infrastructure --- io/io/inc/TBufferFile.h | 2 ++ io/io/src/TBufferFile.cxx | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/io/io/inc/TBufferFile.h b/io/io/inc/TBufferFile.h index 19bd6808dc3a7..ba6d0be4d881d 100644 --- a/io/io/inc/TBufferFile.h +++ b/io/io/inc/TBufferFile.h @@ -120,6 +120,8 @@ class TBufferFile : public TBufferIO { const ByteCountFinder_t& GetByteCounts() const { return fByteCounts; } void SetByteCounts(ByteCountFinder_t &&byteCounts) { fByteCounts = std::move(byteCounts); } + void ResetMap() override; + using TBufferIO::CheckObject; // basic types and arrays of basic types diff --git a/io/io/src/TBufferFile.cxx b/io/io/src/TBufferFile.cxx index fb33ab45c9a9a..fc28af2c82be7 100644 --- a/io/io/src/TBufferFile.cxx +++ b/io/io/src/TBufferFile.cxx @@ -114,6 +114,16 @@ TBufferFile::~TBufferFile() { } +//////////////////////////////////////////////////////////////////////////////// +/// Reset buffer maps and clear byte-count tracking structures. + +void TBufferFile::ResetMap() +{ + TBufferIO::ResetMap(); + fByteCountStack.clear(); + fByteCounts.clear(); +} + //////////////////////////////////////////////////////////////////////////////// /// Increment level. From 6dfcf4fe3a3cc8de55935946cd950a51480acf1a Mon Sep 17 00:00:00 2001 From: Philippe Canal Date: Mon, 8 Dec 2025 14:25:17 -0600 Subject: [PATCH 07/13] io: Add new bytecount test --- io/io/test/CMakeLists.txt | 4 ++ io/io/test/testByteCount.C | 119 +++++++++++++++++++++++++++++++++++ io/io/test/testByteCount.ref | 2 + 3 files changed, 125 insertions(+) create mode 100644 io/io/test/testByteCount.C create mode 100644 io/io/test/testByteCount.ref diff --git a/io/io/test/CMakeLists.txt b/io/io/test/CMakeLists.txt index 2e9efbd370683..b65a1886a6d17 100644 --- a/io/io/test/CMakeLists.txt +++ b/io/io/test/CMakeLists.txt @@ -40,3 +40,7 @@ endif() # ROOT_EXECUTABLE(TMapFileTest TMapFileTest.cxx LIBRARIES RIO Hist New) # ROOT_ADD_TEST(io-io-test-TMapFileTest COMMAND TMapFileTest complete) #endif() + +ROOTTEST_ADD_TEST(testLargeByteCounts + MACRO testByteCount.C + OUTREF testByteCount.ref) diff --git a/io/io/test/testByteCount.C b/io/io/test/testByteCount.C new file mode 100644 index 0000000000000..4c9af3f9707df --- /dev/null +++ b/io/io/test/testByteCount.C @@ -0,0 +1,119 @@ +{ + int errors = 0; + int expectedByteCounts = 1; + + // TBufferFile currently reject size larger than 2GB. + // SetBufferOffset does not check against the size, + // so we can provide and use a larger buffer. + std::vector databuffer{}; + databuffer.reserve( 4*1024*1024*1024ll ); + TBufferFile b(TBuffer::kWrite, 2*1024*1024*1024ll-100, databuffer.data(), false /* don't adopt */); + { + // Regular object at offset 0 + UInt_t R__c = b.WriteVersion(TExMap::Class(), kTRUE); + b.SetBufferOffset(1000); + b.SetByteCount(R__c, kTRUE); + } + { + // Regular object + UInt_t R__c = b.WriteVersion(TExMap::Class(), kTRUE); + b.SetBufferOffset(2000); + b.SetByteCount(R__c, kTRUE); + } + { + // Object larger than 1GB + UInt_t R__c = b.WriteVersion(TExMap::Class(), kTRUE); + b.SetBufferOffset(4000+1*1024*1024*1024ll); + b.SetByteCount(R__c, kTRUE); + } + { + // Regular object located past 1GB + UInt_t R__c = b.WriteVersion(TExMap::Class(), kTRUE); + b.SetBufferOffset(8000+1*1024*1024*1024ll); + b.SetByteCount(R__c, kTRUE); + } + { + ++expectedByteCounts; + // Object larger than 1GB start after 1GB + // NOTE: this does not yet fit, we are writing past the end. + // Need to lift the 2GB limit for TBuffer first. + // However the lifting might be temporary, so this might need to be + // moved to a test that stored objects in a TFile. + UInt_t R__c = b.WriteVersion(TExMap::Class(), kTRUE); + b.SetBufferOffset(12000+2*1024*1024*1024ll); + b.SetByteCount(R__c, kTRUE); + } + + + // To make a copy instead of using the const references: + auto bytecounts{ b.GetByteCounts() }; + if (bytecounts.size() != expectedByteCounts) { + ++errors; + std::cerr << "The number of bytecount is not as expected (1), it is " << bytecounts.size() << '\n'; + std::cerr << "The full list is:\n"; + for(auto bc : bytecounts) + std::cerr << "values: " << bc.first << " , " << bc.second << '\n'; + } + + // Rewind. Other code use Reset instead of SetBufferOffset + b.SetReadMode(); + b.Reset(); + b.SetByteCounts(std::move(bytecounts)); + + UInt_t R__s = 0; + UInt_t R__c = 0; + { + // Regular object at offset 0 + auto version = b.ReadVersion(&R__s, &R__c, TExMap::Class()); + b.SetBufferOffset(1000); + auto res = b.CheckByteCount(R__s, R__c, TExMap::Class()); + if (res != 0) { + ++errors; + // We can assume there as already an error message in CheckByCount + } + } + { + // Regular object + auto version = b.ReadVersion(&R__s, &R__c, TExMap::Class()); + b.SetBufferOffset(2000); + auto res = b.CheckByteCount(R__s, R__c, TExMap::Class()); + if (res != 0) { + ++errors; + // We can assume there as already an error message in CheckByCount + } + } + { + // Object larger than 1GB + auto version = b.ReadVersion(&R__s, &R__c, TExMap::Class()); + b.SetBufferOffset(4000+1*1024*1024*1024ll); + auto res = b.CheckByteCount(R__s, R__c, TExMap::Class()); + if (res != 0) { + ++errors; + // We can assume there as already an error message in CheckByCount + } + } + { + // Regular object located past 1GB + auto version = b.ReadVersion(&R__s, &R__c, TExMap::Class()); + b.SetBufferOffset(8000+1*1024*1024*1024ll); + auto res = b.CheckByteCount(R__s, R__c, TExMap::Class()); + if (res != 0) { + ++errors; + // We can assume there as already an error message in CheckByCount + } + } + { + // Object larger than 1GB start after 1GB + // NOTE: this does not yet fit. + auto version = b.ReadVersion(&R__s, &R__c, TExMap::Class()); + b.SetBufferOffset(12000+2*1024*1024*1024ll); + auto res = b.CheckByteCount(R__s, R__c, TExMap::Class()); + if (res != 0) { + ++errors; + // We can assume there as already an error message in CheckByCount + } + } + + std::cerr << "The end.\n"; + return errors; +} diff --git a/io/io/test/testByteCount.ref b/io/io/test/testByteCount.ref new file mode 100644 index 0000000000000..a5a2b78b46929 --- /dev/null +++ b/io/io/test/testByteCount.ref @@ -0,0 +1,2 @@ +Processing io/io/test/testByteCount.C... +The end. From 446708883faaed65e37bddd8184431d7d646ce57 Mon Sep 17 00:00:00 2001 From: Philippe Canal Date: Thu, 11 Dec 2025 12:49:44 -0600 Subject: [PATCH 08/13] io: Suppress error message about large byte count --- io/io/src/TBufferFile.cxx | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/io/io/src/TBufferFile.cxx b/io/io/src/TBufferFile.cxx index fc28af2c82be7..f0d0a06064dc6 100644 --- a/io/io/src/TBufferFile.cxx +++ b/io/io/src/TBufferFile.cxx @@ -374,8 +374,10 @@ void TBufferFile::SetByteCount(ULong64_t cntpos, Bool_t packInVersion) tobuf(buf, cnt | kByteCountMask); if (cnt >= kMaxMapCount) { - Error("WriteByteCount", "bytecount too large (more than %d)", kMaxMapCount); - // exception + // NOTE: This should now be unreachable. + Fatal("WriteByteCount", + "Control flow error, code should be unreachable and wrote a bytecount that is too large (more than %d)", + kMaxMapCount); } } @@ -3334,7 +3336,11 @@ void TBufferFile::StreamObject(TObject *obj) void TBufferFile::CheckCount(UInt_t offset) { - if (IsWriting()) { + // FIXME: + // We could continue to issue the error for cases where we don't want to + // support storing more than 1GB in a single TBufferFile. + const bool bufferLimitedToMaxMapCount = false; + if (bufferLimitedToMaxMapCount && IsWriting()) { if (offset >= kMaxMapCount) { Error("CheckCount", "buffer offset too large (larger than %d)", kMaxMapCount); // exception From 7a9e8691dcff7da658eef0f202aa999e2b3d8c44 Mon Sep 17 00:00:00 2001 From: Philippe Canal Date: Thu, 11 Dec 2025 13:02:11 -0600 Subject: [PATCH 09/13] [NFC] io white space / byte count related --- io/io/inc/TBufferFile.h | 2 +- io/io/src/TBufferFile.cxx | 5 +++-- io/io/test/testByteCount.C | 21 ++++++++++----------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/io/io/inc/TBufferFile.h b/io/io/inc/TBufferFile.h index ba6d0be4d881d..143f8ac374a69 100644 --- a/io/io/inc/TBufferFile.h +++ b/io/io/inc/TBufferFile.h @@ -117,7 +117,7 @@ class TBufferFile : public TBufferIO { TObject *ReadObject(const TClass *cl) override; - const ByteCountFinder_t& GetByteCounts() const { return fByteCounts; } + const ByteCountFinder_t &GetByteCounts() const { return fByteCounts; } void SetByteCounts(ByteCountFinder_t &&byteCounts) { fByteCounts = std::move(byteCounts); } void ResetMap() override; diff --git a/io/io/src/TBufferFile.cxx b/io/io/src/TBufferFile.cxx index f0d0a06064dc6..0be3f74ac63fa 100644 --- a/io/io/src/TBufferFile.cxx +++ b/io/io/src/TBufferFile.cxx @@ -340,7 +340,7 @@ void TBufferFile::SetByteCount(ULong64_t cntpos, Bool_t packInVersion) // which uses the stack to get the position and call the new one. // This (of course) also requires that we do the 'same' to the WriteVersion // routines. - R__ASSERT( !fByteCountStack.empty() ); + R__ASSERT(!fByteCountStack.empty()); if (cntpos == kMaxInt) { cntpos = fByteCountStack.back(); } @@ -441,7 +441,8 @@ Long64_t TBufferFile::CheckByteCount(ULong64_t startpos, ULong64_t bcnt, const T if ( ((char *)endpos) > fBufMax ) { offset = fBufMax-fBufCur; Error("CheckByteCount", - "Byte count probably corrupted around buffer position %llu:\n\tByte count is %llu while the buffer size is %lld", + "Byte count probably corrupted around buffer position %llu:\n\tByte count is %llu while the buffer size " + "is %lld", startpos, bcnt, offset); fBufCur = fBufMax; diff --git a/io/io/test/testByteCount.C b/io/io/test/testByteCount.C index 4c9af3f9707df..b152e3700aa75 100644 --- a/io/io/test/testByteCount.C +++ b/io/io/test/testByteCount.C @@ -6,8 +6,8 @@ // SetBufferOffset does not check against the size, // so we can provide and use a larger buffer. std::vector databuffer{}; - databuffer.reserve( 4*1024*1024*1024ll ); - TBufferFile b(TBuffer::kWrite, 2*1024*1024*1024ll-100, databuffer.data(), false /* don't adopt */); + databuffer.reserve(4 * 1024 * 1024 * 1024ll); + TBufferFile b(TBuffer::kWrite, 2 * 1024 * 1024 * 1024ll - 100, databuffer.data(), false /* don't adopt */); { // Regular object at offset 0 UInt_t R__c = b.WriteVersion(TExMap::Class(), kTRUE); @@ -23,13 +23,13 @@ { // Object larger than 1GB UInt_t R__c = b.WriteVersion(TExMap::Class(), kTRUE); - b.SetBufferOffset(4000+1*1024*1024*1024ll); + b.SetBufferOffset(4000 + 1 * 1024 * 1024 * 1024ll); b.SetByteCount(R__c, kTRUE); } { // Regular object located past 1GB UInt_t R__c = b.WriteVersion(TExMap::Class(), kTRUE); - b.SetBufferOffset(8000+1*1024*1024*1024ll); + b.SetBufferOffset(8000 + 1 * 1024 * 1024 * 1024ll); b.SetByteCount(R__c, kTRUE); } { @@ -40,18 +40,17 @@ // However the lifting might be temporary, so this might need to be // moved to a test that stored objects in a TFile. UInt_t R__c = b.WriteVersion(TExMap::Class(), kTRUE); - b.SetBufferOffset(12000+2*1024*1024*1024ll); + b.SetBufferOffset(12000 + 2 * 1024 * 1024 * 1024ll); b.SetByteCount(R__c, kTRUE); } - // To make a copy instead of using the const references: - auto bytecounts{ b.GetByteCounts() }; + auto bytecounts{b.GetByteCounts()}; if (bytecounts.size() != expectedByteCounts) { ++errors; std::cerr << "The number of bytecount is not as expected (1), it is " << bytecounts.size() << '\n'; std::cerr << "The full list is:\n"; - for(auto bc : bytecounts) + for (auto bc : bytecounts) std::cerr << "values: " << bc.first << " , " << bc.second << '\n'; } @@ -85,7 +84,7 @@ { // Object larger than 1GB auto version = b.ReadVersion(&R__s, &R__c, TExMap::Class()); - b.SetBufferOffset(4000+1*1024*1024*1024ll); + b.SetBufferOffset(4000 + 1 * 1024 * 1024 * 1024ll); auto res = b.CheckByteCount(R__s, R__c, TExMap::Class()); if (res != 0) { ++errors; @@ -95,7 +94,7 @@ { // Regular object located past 1GB auto version = b.ReadVersion(&R__s, &R__c, TExMap::Class()); - b.SetBufferOffset(8000+1*1024*1024*1024ll); + b.SetBufferOffset(8000 + 1 * 1024 * 1024 * 1024ll); auto res = b.CheckByteCount(R__s, R__c, TExMap::Class()); if (res != 0) { ++errors; @@ -106,7 +105,7 @@ // Object larger than 1GB start after 1GB // NOTE: this does not yet fit. auto version = b.ReadVersion(&R__s, &R__c, TExMap::Class()); - b.SetBufferOffset(12000+2*1024*1024*1024ll); + b.SetBufferOffset(12000 + 2 * 1024 * 1024 * 1024ll); auto res = b.CheckByteCount(R__s, R__c, TExMap::Class()); if (res != 0) { ++errors; From 16a863b2230f3521f3cf92dacd7d0ea75b782bea Mon Sep 17 00:00:00 2001 From: Philippe Canal Date: Thu, 5 Feb 2026 19:12:43 +0100 Subject: [PATCH 10/13] NFC io: fix doc typo --- io/io/src/TBufferFile.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io/io/src/TBufferFile.cxx b/io/io/src/TBufferFile.cxx index 0be3f74ac63fa..c9f982a872616 100644 --- a/io/io/src/TBufferFile.cxx +++ b/io/io/src/TBufferFile.cxx @@ -324,7 +324,7 @@ void TBufferFile::WriteCharStar(char *s) //////////////////////////////////////////////////////////////////////////////// /// Set byte count at position cntpos in the buffer. Generate warning if -/// count larger than kMaxMapCount. The count is excluded its own size. +/// count larger than kMaxMapCount. The count is excluding its own size. /// \note If underflow or overflow, an Error ir raised (stricter checks in Debug mode) void TBufferFile::SetByteCount(ULong64_t cntpos, Bool_t packInVersion) From 543515b04de6468ee5904ec77560080053b5979a Mon Sep 17 00:00:00 2001 From: Philippe Canal Date: Thu, 5 Feb 2026 19:15:54 +0100 Subject: [PATCH 11/13] io: doc clarifications - bytecount related --- io/io/src/TBufferFile.cxx | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/io/io/src/TBufferFile.cxx b/io/io/src/TBufferFile.cxx index c9f982a872616..3d32ed85ad109 100644 --- a/io/io/src/TBufferFile.cxx +++ b/io/io/src/TBufferFile.cxx @@ -415,6 +415,7 @@ Long64_t TBufferFile::CheckByteCount(ULong64_t startpos, ULong64_t bcnt, const T return 0; Long64_t offset = 0; + // End position is buffer location + start position + byte count + size of byte count field Longptr_t endpos = Longptr_t(fBuffer) + startpos + bcnt + sizeof(UInt_t); if (Longptr_t(fBufCur) != endpos) { @@ -2800,6 +2801,8 @@ TClass *TBufferFile::ReadClass(const TClass *clReq, UInt_t *objTag) bcnt = 0; } else { fVersion = 1; + // When objTag is not used, the caller is not interested in the byte + // count and will not (can not) call CheckByteCount. if (objTag) fByteCountStack.push_back(fBufCur - fBuffer); startpos = UInt_t(fBufCur-fBuffer); @@ -2978,6 +2981,9 @@ void TBufferFile::SkipVersion(const TClass *cl) //////////////////////////////////////////////////////////////////////////////// /// Read class version from I/O buffer. +/// If passing startpos and bcnt, CheckByteCount must be called later with +/// the same startpos and bcnt (this is needed to properly handle the byte +/// count stack used to support very large buffers) Version_t TBufferFile::ReadVersion(UInt_t *startpos, UInt_t *bcnt, const TClass *cl) { @@ -3108,6 +3114,15 @@ Version_t TBufferFile::ReadVersion(UInt_t *startpos, UInt_t *bcnt, const TClass //////////////////////////////////////////////////////////////////////////////// /// Read class version from I/O buffer, when the caller knows for sure that /// there is no checksum written/involved. +/// +/// This routine can be used from custom streamers when it is known that the +/// class was always versioned (and thus never stored a checksum within the buffer). +/// This allows to disambiguate the case where the class used to have a version +/// number equal to zero and did not save a byte count and is now versioned. +/// +/// When reading the version number being zero, without the byte count we have +/// no way to guess whether the class was version un-versioned or had previously +/// a version number equal to zero. Version_t TBufferFile::ReadVersionNoCheckSum(UInt_t *startpos, UInt_t *bcnt) { @@ -3430,9 +3445,8 @@ UInt_t TBufferFile::CheckObject(UInt_t offset, const TClass *cl, Bool_t readClas /// Reserve space for a leading byte count and return the position where to /// store the byte count value. /// -/// \param[in] mask The mask to apply to the placeholder value (default kByteCountMask) /// \return The position (cntpos) where the byte count should be stored later, -/// or 0 if the position exceeds kMaxInt +/// or kOverflowPosition if the position exceeds kMaxCountPosition UInt_t TBufferFile::ReserveByteCount() { From ed19201196cee94c5c776cd6e894868193df2dcd Mon Sep 17 00:00:00 2001 From: Philippe Canal Date: Thu, 5 Feb 2026 19:28:27 +0100 Subject: [PATCH 12/13] io: Clarify byte count related limits --- io/io/src/TBufferFile.cxx | 78 ++++++++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 29 deletions(-) diff --git a/io/io/src/TBufferFile.cxx b/io/io/src/TBufferFile.cxx index 3d32ed85ad109..a304fb0093196 100644 --- a/io/io/src/TBufferFile.cxx +++ b/io/io/src/TBufferFile.cxx @@ -15,6 +15,21 @@ \ingroup IO The concrete implementation of TBuffer for writing/reading to/from a ROOT file or socket. + +Limits: +Byte Count positions (refered to as `cntpos` or `startpos` in the code) + - Internally they are passed as a ULong64_t + - In legacy user code they are held in UInt_t variables (See type used in + TBufferFile::WriteVersion and TBufferFile::ReadVersion) + - The positions passed by value are limited to 4GB (kOverflowPosition) + and the positions are held in fByteCountStack but retrieved only when + higher than 4GB (kOverflowCount). +Byte Count values: + - Internally they are passed as a ULong64_t + - In legacy user code they are held in UInt_t variables (See type used in + TBufferFile::WriteVersion and TBufferFile::ReadVersion) + - The values passed by value are limited to 1GB (kMaxMapCount) + - The values larger than kMaxMapCount are held in fByteCounts. */ #include @@ -46,15 +61,18 @@ The concrete implementation of TBuffer for writing/reading to/from a ROOT file o #include "Bswapcpy.h" #endif +constexpr UInt_t kOverflowPosition = 0xFFFFFFFF; +constexpr UInt_t kOverflowCount = 0xFFFFFFFF; +constexpr UInt_t kMaxCountPosition = 0xFFFFFFFE; // We reserve the highest value to mark overflow positions -const UInt_t kNewClassTag = 0xFFFFFFFF; -const UInt_t kClassMask = 0x80000000; // OR the class index with this -const UInt_t kByteCountMask = 0x40000000; // OR the byte count with this -const UInt_t kMaxMapCount = 0x3FFFFFFE; // last valid fMapCount and byte count -const Version_t kByteCountVMask = 0x4000; // OR the version byte count with this -const Version_t kMaxVersion = 0x3FFF; // highest possible version number -const Int_t kMapOffset = 2; // first 2 map entries are taken by null obj and self obj - +constexpr UInt_t kNewClassTag = 0xFFFFFFFF; +constexpr UInt_t kClassMask = 0x80000000; // OR the class index with this +constexpr UInt_t kByteCountMask = 0x40000000; // OR the byte count with this +constexpr UInt_t kMaxMapCount = 0x3FFFFFFE; // last valid fMapCount and byte count +constexpr UInt_t kMaxRecordByteCount = kMaxMapCount; // last valid byte count +constexpr Version_t kByteCountVMask = 0x4000; // OR the version byte count with this +constexpr Version_t kMaxVersion = 0x3FFF; // highest possible version number +constexpr Int_t kMapOffset = 2; // first 2 map entries are taken by null obj and self obj //////////////////////////////////////////////////////////////////////////////// @@ -323,13 +341,13 @@ void TBufferFile::WriteCharStar(char *s) } //////////////////////////////////////////////////////////////////////////////// -/// Set byte count at position cntpos in the buffer. Generate warning if -/// count larger than kMaxMapCount. The count is excluding its own size. +/// Set byte count at position cntpos in the buffer. +/// The count is excluding its own size. /// \note If underflow or overflow, an Error ir raised (stricter checks in Debug mode) void TBufferFile::SetByteCount(ULong64_t cntpos, Bool_t packInVersion) { - assert( cntpos <= kMaxUInt && (sizeof(UInt_t) + cntpos) < static_cast(fBufCur - fBuffer) + assert( cntpos <= kOverflowPosition && (sizeof(UInt_t) + cntpos) < static_cast(fBufCur - fBuffer) && (fBufCur >= fBuffer) && static_cast(fBufCur - fBuffer) <= std::numeric_limits::max() && "Byte count position is after the end of the buffer"); @@ -341,14 +359,15 @@ void TBufferFile::SetByteCount(ULong64_t cntpos, Bool_t packInVersion) // This (of course) also requires that we do the 'same' to the WriteVersion // routines. R__ASSERT(!fByteCountStack.empty()); - if (cntpos == kMaxInt) { + if (cntpos == kOverflowPosition) { + // The position is above 4GB but was cached using a 32 bit variable. cntpos = fByteCountStack.back(); } fByteCountStack.pop_back(); // if we are not in the same TKey chunk or if the cntpos is too large to fit in UInt_t // let's postpone the writing of the byte count const ULong64_t full_cnt = ULong64_t(fBufCur - fBuffer) - cntpos - sizeof(UInt_t); - if (full_cnt >= kMaxMapCount) { + if (full_cnt >= kMaxRecordByteCount) { fByteCounts[cntpos] = full_cnt; return; } @@ -373,11 +392,11 @@ void TBufferFile::SetByteCount(ULong64_t cntpos, Bool_t packInVersion) } else tobuf(buf, cnt | kByteCountMask); - if (cnt >= kMaxMapCount) { + if (cnt >= kMaxRecordByteCount) { // NOTE: This should now be unreachable. Fatal("WriteByteCount", "Control flow error, code should be unreachable and wrote a bytecount that is too large (more than %d)", - kMaxMapCount); + kMaxRecordByteCount); } } @@ -391,12 +410,13 @@ void TBufferFile::SetByteCount(ULong64_t cntpos, Bool_t packInVersion) Long64_t TBufferFile::CheckByteCount(ULong64_t startpos, ULong64_t bcnt, const TClass *clss, const char *classname) { - R__ASSERT(!fByteCountStack.empty()); - if (startpos == kMaxInt) { - // The position is above 1GB and was cached using a 32 bit variable. + R__ASSERT(!fByteCountStack.empty() && startpos <= kOverflowPosition && bcnt <= kOverflowCount + && "Byte count stack is empty or invalid startpos/bcnt"); + if (startpos == kOverflowPosition) { + // The position is above 4GB but was cached using a 32 bit variable. startpos = fByteCountStack.back(); } - if (bcnt == kMaxInt) { + if (bcnt == kOverflowCount) { // in case we are checking a byte count for which we postponed // the writing because it was too large, we retrieve it now auto it = fByteCounts.find(startpos); @@ -2992,7 +3012,7 @@ Version_t TBufferFile::ReadVersion(UInt_t *startpos, UInt_t *bcnt, const TClass if (startpos) { // before reading object save start position auto full_startpos = fBufCur - fBuffer; - *startpos = full_startpos < kMaxInt ? UInt_t(full_startpos) : kMaxInt; + *startpos = full_startpos <= kMaxCountPosition ? UInt_t(full_startpos) : kOverflowPosition; if (bcnt) fByteCountStack.push_back(full_startpos); } @@ -3029,10 +3049,10 @@ Version_t TBufferFile::ReadVersion(UInt_t *startpos, UInt_t *bcnt, const TClass if (*bcnt == 0) { // The byte count was stored but is zero, this means the data // did not fit and thus we stored it in 'fByteCounts' instead. - // Mark this case by setting startpos to kMaxInt. - *bcnt = kMaxInt; + // Mark this case by setting startpos to kOverflowCount. + *bcnt = kOverflowCount; if (startpos) - *startpos = kMaxInt; + *startpos = kOverflowPosition; } } } @@ -3131,7 +3151,7 @@ Version_t TBufferFile::ReadVersionNoCheckSum(UInt_t *startpos, UInt_t *bcnt) if (startpos) { // before reading object save start position auto full_startpos = fBufCur - fBuffer; - *startpos = full_startpos < kMaxInt ? UInt_t(full_startpos) : kMaxInt; + *startpos = full_startpos < kMaxCountPosition ? UInt_t(full_startpos) : kOverflowPosition; if (bcnt) fByteCountStack.push_back(full_startpos); } @@ -3168,10 +3188,10 @@ Version_t TBufferFile::ReadVersionNoCheckSum(UInt_t *startpos, UInt_t *bcnt) if (*bcnt == 0) { // The byte count was stored but is zero, this means the data // did not fit and thus we stored it in 'fByteCounts' instead. - // Mark this case by setting startpos to kMaxInt. - *bcnt = kMaxInt; + // Mark this case by setting startpos to kOverflowCount. + *bcnt = kOverflowCount; if (startpos) - *startpos = kMaxInt; + *startpos = kOverflowPosition; } } } @@ -3357,7 +3377,7 @@ void TBufferFile::CheckCount(UInt_t offset) // support storing more than 1GB in a single TBufferFile. const bool bufferLimitedToMaxMapCount = false; if (bufferLimitedToMaxMapCount && IsWriting()) { - if (offset >= kMaxMapCount) { + if (offset > kMaxRecordByteCount) { // We have kMaxMapCount == kMaxRecordByteCount Error("CheckCount", "buffer offset too large (larger than %d)", kMaxMapCount); // exception } @@ -3454,7 +3474,7 @@ UInt_t TBufferFile::ReserveByteCount() auto full_cntpos = fBufCur - fBuffer; fByteCountStack.push_back(full_cntpos); *this << (UInt_t)kByteCountMask; // placeholder for byte count - return full_cntpos < kMaxInt ? full_cntpos : kMaxInt; + return full_cntpos <= kMaxCountPosition ? full_cntpos : kOverflowPosition; } //////////////////////////////////////////////////////////////////////////////// From 588b29e6f2ed80bd37eaafd9ea2e14336420b1b2 Mon Sep 17 00:00:00 2001 From: Philippe Canal Date: Sun, 8 Feb 2026 15:00:57 +0100 Subject: [PATCH 13/13] io: ReadVersion should always take both position and byte count --- io/io/src/TBufferFile.cxx | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/io/io/src/TBufferFile.cxx b/io/io/src/TBufferFile.cxx index a304fb0093196..92bacafdcd231 100644 --- a/io/io/src/TBufferFile.cxx +++ b/io/io/src/TBufferFile.cxx @@ -3007,14 +3007,15 @@ void TBufferFile::SkipVersion(const TClass *cl) Version_t TBufferFile::ReadVersion(UInt_t *startpos, UInt_t *bcnt, const TClass *cl) { + assert((!startpos && !bcnt) || (startpos && bcnt)); // both or none should be set + Version_t version; if (startpos) { // before reading object save start position auto full_startpos = fBufCur - fBuffer; *startpos = full_startpos <= kMaxCountPosition ? UInt_t(full_startpos) : kOverflowPosition; - if (bcnt) - fByteCountStack.push_back(full_startpos); + fByteCountStack.push_back(full_startpos); } // read byte count (older files don't have byte count) @@ -3039,11 +3040,11 @@ Version_t TBufferFile::ReadVersion(UInt_t *startpos, UInt_t *bcnt, const TClass v.cnt = 0; } if (bcnt) { + // We also have (asserted) that (startpos != nullptr) if (!v.cnt) { // no byte count stored *bcnt = 0; - if (startpos) // Undo the push_back only if it happened. - fByteCountStack.pop_back(); + fByteCountStack.pop_back(); } else { *bcnt = (v.cnt & ~kByteCountMask); if (*bcnt == 0) { @@ -3051,8 +3052,7 @@ Version_t TBufferFile::ReadVersion(UInt_t *startpos, UInt_t *bcnt, const TClass // did not fit and thus we stored it in 'fByteCounts' instead. // Mark this case by setting startpos to kOverflowCount. *bcnt = kOverflowCount; - if (startpos) - *startpos = kOverflowPosition; + *startpos = kOverflowPosition; } } } @@ -3146,14 +3146,15 @@ Version_t TBufferFile::ReadVersion(UInt_t *startpos, UInt_t *bcnt, const TClass Version_t TBufferFile::ReadVersionNoCheckSum(UInt_t *startpos, UInt_t *bcnt) { + assert((!startpos && !bcnt) || (startpos && bcnt)); // both or none should be set + Version_t version; if (startpos) { // before reading object save start position auto full_startpos = fBufCur - fBuffer; *startpos = full_startpos < kMaxCountPosition ? UInt_t(full_startpos) : kOverflowPosition; - if (bcnt) - fByteCountStack.push_back(full_startpos); + fByteCountStack.push_back(full_startpos); } // read byte count (older files don't have byte count) @@ -3181,8 +3182,7 @@ Version_t TBufferFile::ReadVersionNoCheckSum(UInt_t *startpos, UInt_t *bcnt) if (!v.cnt) { // no byte count stored *bcnt = 0; - if (startpos) // Undo the push_back only if it happened. - fByteCountStack.pop_back(); + fByteCountStack.pop_back(); } else { *bcnt = (v.cnt & ~kByteCountMask); if (*bcnt == 0) { @@ -3190,8 +3190,7 @@ Version_t TBufferFile::ReadVersionNoCheckSum(UInt_t *startpos, UInt_t *bcnt) // did not fit and thus we stored it in 'fByteCounts' instead. // Mark this case by setting startpos to kOverflowCount. *bcnt = kOverflowCount; - if (startpos) - *startpos = kOverflowPosition; + *startpos = kOverflowPosition; } } }