From 246cab6f3f80c38c32308655024a3be770913148 Mon Sep 17 00:00:00 2001 From: Kam Cheung Ting Date: Mon, 15 Jun 2026 10:49:03 +0000 Subject: [PATCH 1/9] feat(logging): add Logger interface and process-global default (2/6) Second block: the backend-agnostic logging API and the swappable default logger. - Logger: pure-virtual sink (ShouldLog/Log/SetLevel/level/Flush/IsNoop), with ShouldLog() as the single authority for runtime filtering and a documented no-reentrancy / thread-safety / noexcept contract. Mirrors MetricsReporter. - LogMessage owns its formatted text (moved in) so a sink may retain records; reserves an owning attributes vector for future structured logging without an ABI break. logger.h is backend-agnostic -- it never includes the build config header nor references the spdlog feature macro, so consumers see one stable API. - Process-global default logger: GetDefaultLogger / SetDefaultLogger / SetDefaultLevel, plus a lock-free thread-local generation cache on the hot path (no lock or refcount churn in steady state; slot mutex only on swap/refresh). NoopLogger is the placeholder default here; CerrLogger and the spdlog backend arrive in the following blocks. Builds the source into both CMake and Meson and installs logger.h alongside log_level.h; logger_test covers the default-logger API, level lowering taking effect immediately, and concurrent swap/read safety. Co-authored-by: Isaac --- src/iceberg/CMakeLists.txt | 1 + src/iceberg/logging/logger.cc | 146 ++++++++++++++++ src/iceberg/logging/logger.h | 213 ++++++++++++++++++++++++ src/iceberg/logging/meson.build | 2 +- src/iceberg/meson.build | 1 + src/iceberg/test/CMakeLists.txt | 2 +- src/iceberg/test/logger_test.cc | 149 +++++++++++++++++ src/iceberg/test/logging_test_helpers.h | 77 +++++++++ src/iceberg/test/meson.build | 2 +- 9 files changed, 590 insertions(+), 3 deletions(-) create mode 100644 src/iceberg/logging/logger.cc create mode 100644 src/iceberg/logging/logger.h create mode 100644 src/iceberg/test/logger_test.cc create mode 100644 src/iceberg/test/logging_test_helpers.h diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index 18615c202..93a3a6004 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -47,6 +47,7 @@ set(ICEBERG_SOURCES inheritable_metadata.cc json_serde.cc location_provider.cc + logging/logger.cc manifest/manifest_adapter.cc manifest/manifest_entry.cc manifest/manifest_filter_manager.cc diff --git a/src/iceberg/logging/logger.cc b/src/iceberg/logging/logger.cc new file mode 100644 index 000000000..cd864bc42 --- /dev/null +++ b/src/iceberg/logging/logger.cc @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/logging/logger.h" + +#include +#include +#include +#include +#include + +namespace iceberg { + +namespace { + +/// \brief Logger that drops every record. +class NoopLogger final : public Logger { + public: + bool ShouldLog(LogLevel /*level*/) const override { return false; } + void Log(LogMessage&& /*message*/) noexcept override {} + void SetLevel(LogLevel /*level*/) override {} + LogLevel level() const override { return LogLevel::kOff; } + bool IsNoop() const override { return true; } +}; + +/// \brief Construct the process default logger for this build configuration. +/// +/// This block ships only the interface and the no-op logger; the concrete +/// std::cerr and spdlog sinks (and the build-config selection between them) +/// arrive in later blocks, which update this factory. +std::shared_ptr MakeDefaultLogger() { return std::make_shared(); } + +/// \brief The process-global default-logger slot. +struct DefaultSlot { + std::mutex mtx; + std::shared_ptr logger; + // Seeded to 1 so a fresh thread (tls_gen == 0) always refreshes on first use. + std::atomic gen{1}; + + DefaultSlot() : logger(MakeDefaultLogger()) {} +}; + +/// \brief Immortal (leaked, hence reachable -> LSan-clean) accessor for the slot. +DefaultSlot& Slot() { + static auto* slot = new DefaultSlot(); + return *slot; +} + +} // namespace + +std::shared_ptr Logger::Noop() { + // Intentionally leaked: reachable via the function-local static (LSan-clean) + // and never destroyed, so logging during static teardown stays safe. + static auto* instance = new std::shared_ptr(std::make_shared()); + return *instance; +} + +std::shared_ptr GetDefaultLogger() { + DefaultSlot& slot = Slot(); + std::lock_guard lock(slot.mtx); + return slot.logger; +} + +void SetDefaultLogger(std::shared_ptr logger) { + if (!logger) { + logger = Logger::Noop(); + } + DefaultSlot& slot = Slot(); + std::lock_guard lock(slot.mtx); + slot.logger = std::move(logger); + // Publish the swap; the mutex provides the happens-before, gen is a detector. + slot.gen.fetch_add(1, std::memory_order_relaxed); +} + +void SetDefaultLevel(LogLevel level) { + DefaultSlot& slot = Slot(); + std::lock_guard lock(slot.mtx); + slot.logger->SetLevel(level); +} + +namespace detail { + +const std::shared_ptr& CurrentLogger() noexcept { + static thread_local std::shared_ptr tls; + static thread_local uint64_t tls_gen = 0; + // Sentinel whose destructor marks the cache dead at thread exit. It is + // declared after tls/tls_gen, so it is destroyed FIRST (reverse order); once + // dead, a log from any later-destroyed thread_local destructor must not touch + // the (about-to-be / already) destroyed tls slot. + static thread_local struct AliveFlag { + bool value = true; + ~AliveFlag() { value = false; } + } alive; + if (!alive.value) { + // Thread teardown: the TLS cache is unsafe. Fall back to an immortal logger + // (leaked, never destroyed) so logging during teardown stays safe. + static const std::shared_ptr kFallback = Logger::Noop(); + return kFallback; + } + DefaultSlot& slot = Slot(); + uint64_t current = slot.gen.load(std::memory_order_relaxed); + if (current != tls_gen) { + std::lock_guard lock(slot.mtx); + tls = slot.logger; + tls_gen = current; + } + return tls; +} + +void Emit(Logger& logger, LogLevel level, const std::source_location& location, + std::string&& message) { + logger.Log(LogMessage{.level = level, + .message = std::move(message), + .location = location, + .attributes = {}}); +} + +void EmitFormatError(Logger& logger, LogLevel level, + const std::source_location& location) noexcept { + // Fixed short literal (<= 15 bytes, fits SSO on libstdc++/libc++/MSVC -> no heap + // allocation), no std::format, no retry. Cannot throw or recurse. + logger.Log(LogMessage{.level = level, + .message = std::string(""), + .location = location, + .attributes = {}}); +} + +} // namespace detail + +} // namespace iceberg diff --git a/src/iceberg/logging/logger.h b/src/iceberg/logging/logger.h new file mode 100644 index 000000000..58f5691af --- /dev/null +++ b/src/iceberg/logging/logger.h @@ -0,0 +1,213 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/logging/logger.h +/// \brief Pluggable logging interface, the process-global default logger, and +/// the logging macros. +/// +/// This header is backend-agnostic: it never includes the build-generated +/// backend configuration header and never references the spdlog feature macro, +/// so consumers see one stable API regardless of how the backend was configured. + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "iceberg/iceberg_export.h" +#include "iceberg/logging/log_level.h" +#include "iceberg/result.h" + +namespace iceberg { + +/// \brief A structured key/value attribute attached to a log record. +/// +/// Both key and value are owned so a sink may retain the record safely. +/// Unused in v1; reserved so structured logging can be added without an ABI +/// break to LogMessage. +struct LogAttribute { + std::string key; + std::string value; +}; + +/// \brief A single log record handed to a Logger. +/// +/// The formatted message is owned (moved in by the logging macros), so a sink +/// may safely retain the record beyond the Log() call. The member set must not +/// depend on the build's logging backend (the spdlog backend never appears here). +struct LogMessage { + LogLevel level = LogLevel::kOff; + std::string message; + std::source_location location = std::source_location::current(); + std::vector attributes; +}; + +/// \brief Well-known Logger::Initialize() property keys. +/// +/// `level` is honored by the base Logger::Initialize (parsed via +/// LogLevelFromString). `pattern` is honored by the formatting sinks +/// (CerrLogger, SpdLogger). +inline constexpr std::string_view kLevelProperty = "level"; +inline constexpr std::string_view kPatternProperty = "pattern"; + +/// \brief Pluggable logging sink. +/// +/// ShouldLog() is the single authority for runtime filtering -- the macros call +/// it on every (compile-time-enabled) statement, so level changes by any path +/// take effect immediately. Implementations must be thread-safe and must not +/// throw. They must also obey: +/// - No reentrancy: Log()/Flush() must not call the logging macros or +/// GetDefaultLogger() (UB -- deadlock with mutex-based sinks). +/// - level() is an accessor consistent with ShouldLog (used by SetDefaultLevel +/// and introspection); ShouldLog may implement finer logic than a level compare. +class ICEBERG_EXPORT Logger { + public: + virtual ~Logger() = default; + + /// \brief Property-based setup, called by Loggers::Load() before first use. + /// + /// The base implementation applies the "level" property (parsed via + /// LogLevelFromString); an unrecognized value is an InvalidArgument error. + /// Formatting sinks override this to also apply "pattern" and then delegate + /// to this base for "level". + virtual Status Initialize( + const std::unordered_map& properties) { + if (auto it = properties.find(std::string(kLevelProperty)); it != properties.end()) { + auto parsed = LogLevelFromString(it->second); + if (!parsed) return std::unexpected(parsed.error()); + SetLevel(*parsed); + } + return {}; + } + + /// \brief Cheap check whether a record at \p level would be emitted. + virtual bool ShouldLog(LogLevel level) const = 0; + + /// \brief Emit one (already-formatted) record, taking ownership. Must not throw. + virtual void Log(LogMessage&& message) noexcept = 0; + + /// \brief Set the minimum level this logger emits. + virtual void SetLevel(LogLevel level) = 0; + + /// \brief Return the minimum level this logger emits. + virtual LogLevel level() const = 0; + + /// \brief Flush any buffered output. Must not throw; best-effort on the fatal path. + virtual void Flush() noexcept {} + + /// \brief Return true if this logger is a no-op. + virtual bool IsNoop() const { return false; } + + /// \brief Return a shared, immortal no-op logger singleton. + static std::shared_ptr Noop(); +}; + +/// \brief Return the process-global default logger (never null). +/// +/// Off the hot path -- acquires the slot lock and returns an owning copy. The +/// logging macros use the cheaper internal hot-path accessor instead. +ICEBERG_EXPORT std::shared_ptr GetDefaultLogger(); + +/// \brief Install a new process-global default logger. +/// +/// A null argument installs the no-op logger. Thread-safe; intended for +/// occasional (configuration-time) use rather than the hot path. +ICEBERG_EXPORT void SetDefaultLogger(std::shared_ptr logger); + +/// \brief Set the minimum level of the current default logger. +/// +/// Convenience for `GetDefaultLogger()->SetLevel(level)`. Filtering is always +/// decided by the logger's own ShouldLog(), so changing a logger's level by any +/// means (this, SetLevel on a held handle, or Initialize) takes effect immediately. +ICEBERG_EXPORT void SetDefaultLevel(LogLevel level); + +// --------------------------------------------------------------------------- +// Using the API directly (the LOG_* macros that wrap this are added later in +// the stack). Example: a custom sink, installed as the process default. +// +// class MySink : public Logger { +// public: +// bool ShouldLog(LogLevel level) const override { return level >= level_; } +// void Log(LogMessage&& m) noexcept override { write_line(m.message); } +// void SetLevel(LogLevel level) override { level_ = level; } +// LogLevel level() const override { return level_; } +// private: +// std::atomic level_{LogLevel::kInfo}; +// }; +// +// SetDefaultLogger(std::make_shared()); // install process-wide +// SetDefaultLevel(LogLevel::kDebug); // adjust the threshold +// +// auto logger = GetDefaultLogger(); // borrow the current default +// if (logger->ShouldLog(LogLevel::kInfo)) { +// logger->Log(LogMessage{.level = LogLevel::kInfo, .message = "scan ready"}); +// } +// +// // Or configure from catalog-style properties (applies the "level" key): +// auto sink = std::make_shared(); +// auto status = sink->Initialize({{std::string(kLevelProperty), "warn"}}); // -> kWarn +// --------------------------------------------------------------------------- + +namespace detail { + +/// \brief Hot-path accessor for the default logger. +/// +/// Returns a reference to a thread-local cached shared_ptr that is refreshed +/// only when the default logger has changed (no lock / no refcount churn in +/// steady state). The reference is valid for the duration of the calling +/// statement. +ICEBERG_EXPORT const std::shared_ptr& CurrentLogger() noexcept; + +/// \brief Build a LogMessage from the already-formatted text and dispatch it. +/// +/// Declared ICEBERG_EXPORT because the logging macros expand into this call in +/// consumer translation units. +ICEBERG_EXPORT void Emit(Logger& logger, LogLevel level, + const std::source_location& location, std::string&& message); + +/// \brief Emit a fixed fallback record when formatting threw. +/// +/// noexcept, allocation-light (small/SSO literal), performs no std::format, and +/// does not recurse -- so the macro's "logging never throws" guarantee holds +/// even when a format argument throws. +ICEBERG_EXPORT void EmitFormatError(Logger& logger, LogLevel level, + const std::source_location& location) noexcept; + +/// \brief Runtime (non-literal) format-string helper. +/// +/// std::format requires a compile-time format string; this routes a runtime +/// string through std::vformat. Args are bound as named lvalues and the +/// arg-store is held in a named variable so it outlives the vformat call +/// (C++23 make_format_args rejects rvalues -- P2905 / LWG3631). +template +std::string VFormat(std::string_view fmt, Args&&... args) { + auto store = std::make_format_args(args...); + return std::vformat(fmt, store); +} + +} // namespace detail + +} // namespace iceberg diff --git a/src/iceberg/logging/meson.build b/src/iceberg/logging/meson.build index 3c286a196..3f7af4fec 100644 --- a/src/iceberg/logging/meson.build +++ b/src/iceberg/logging/meson.build @@ -15,4 +15,4 @@ # specific language governing permissions and limitations # under the License. -install_headers(['log_level.h'], subdir: 'iceberg/logging') +install_headers(['log_level.h', 'logger.h'], subdir: 'iceberg/logging') diff --git a/src/iceberg/meson.build b/src/iceberg/meson.build index 7a6ba4f8b..a0a0be610 100644 --- a/src/iceberg/meson.build +++ b/src/iceberg/meson.build @@ -72,6 +72,7 @@ iceberg_sources = files( 'inspect/snapshots_table.cc', 'json_serde.cc', 'location_provider.cc', + 'logging/logger.cc', 'manifest/manifest_adapter.cc', 'manifest/manifest_entry.cc', 'manifest/manifest_filter_manager.cc', diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt index 0a7db5ed2..16575c161 100644 --- a/src/iceberg/test/CMakeLists.txt +++ b/src/iceberg/test/CMakeLists.txt @@ -102,7 +102,7 @@ add_iceberg_test(table_test table_test.cc table_update_test.cc) -add_iceberg_test(logging_test SOURCES log_level_test.cc) +add_iceberg_test(logging_test SOURCES log_level_test.cc logger_test.cc) add_iceberg_test(expression_test SOURCES diff --git a/src/iceberg/test/logger_test.cc b/src/iceberg/test/logger_test.cc new file mode 100644 index 000000000..ed1c88aa5 --- /dev/null +++ b/src/iceberg/test/logger_test.cc @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/logging/logger.h" + +#include +#include +#include +#include + +#include + +#include "iceberg/logging/log_level.h" +#include "iceberg/test/logging_test_helpers.h" + +namespace iceberg { + +TEST(LoggerTest, NoopIsSharedImmortalAndSilent) { + auto noop = Logger::Noop(); + ASSERT_NE(noop, nullptr); + EXPECT_TRUE(noop->IsNoop()); + EXPECT_FALSE(noop->ShouldLog(LogLevel::kFatal)); + EXPECT_EQ(noop->level(), LogLevel::kOff); + // Same singleton instance every call. + EXPECT_EQ(noop.get(), Logger::Noop().get()); +} + +TEST(LoggerTest, DefaultLoggerIsNeverNull) { EXPECT_NE(GetDefaultLogger(), nullptr); } + +TEST(LoggerTest, SetAndGetDefaultLogger) { + auto capturing = std::make_shared(); + ScopedDefaultLogger guard(capturing); + EXPECT_EQ(GetDefaultLogger().get(), capturing.get()); + EXPECT_EQ(detail::CurrentLogger().get(), capturing.get()); +} + +TEST(LoggerTest, SetNullFallsBackToNoop) { + ScopedDefaultLogger guard(std::make_shared()); + SetDefaultLogger(nullptr); + EXPECT_TRUE(GetDefaultLogger()->IsNoop()); +} + +TEST(LoggerTest, CurrentLoggerTracksSwaps) { + auto first = std::make_shared(); + auto second = std::make_shared(); + ScopedDefaultLogger guard(first); + EXPECT_EQ(detail::CurrentLogger().get(), first.get()); + SetDefaultLogger(second); + // Generation bump must invalidate the thread-local cache. + EXPECT_EQ(detail::CurrentLogger().get(), second.get()); +} + +TEST(LoggerTest, SetDefaultLevelUpdatesLogger) { + auto capturing = std::make_shared(); + ScopedDefaultLogger guard(capturing); + SetDefaultLevel(LogLevel::kError); + EXPECT_EQ(capturing->level(), LogLevel::kError); +} + +// Filtering is decided by the logger's own ShouldLog (no separate cached gate), +// so lowering a logger's level out-of-band (not via SetDefaultLevel) takes effect +// immediately -- this is the regression guard for the dropped g_effective_level gate. +TEST(LoggerTest, OutOfBandLevelLoweringTakesEffect) { + auto capturing = std::make_shared(); + capturing->SetLevel(LogLevel::kError); + ScopedDefaultLogger guard(capturing); + EXPECT_FALSE(detail::CurrentLogger()->ShouldLog(LogLevel::kInfo)); + capturing->SetLevel(LogLevel::kTrace); // lowered directly on the handle + EXPECT_TRUE(detail::CurrentLogger()->ShouldLog(LogLevel::kInfo)); +} + +TEST(LoggerTest, ConcurrentSwapAndReadIsSafe) { + // Stress CurrentLogger()/GetDefaultLogger() against SetDefaultLogger() swaps. + // Run under TSan in CI; here it asserts no crash and a valid logger throughout. + auto a = std::make_shared(); + auto b = std::make_shared(); + ScopedDefaultLogger guard(a); + std::atomic stop{false}; + std::atomic saw_null{false}; + std::vector readers; + for (int i = 0; i < 6; ++i) { + readers.emplace_back([&stop, &saw_null] { + // ASSERT_* doesn't propagate from non-main threads; record via a flag. + while (!stop.load(std::memory_order_relaxed)) { + const auto& l = detail::CurrentLogger(); + if (!l) saw_null.store(true, std::memory_order_relaxed); + (void)l->ShouldLog(LogLevel::kError); + (void)GetDefaultLogger(); + } + }); + } + for (int i = 0; i < 2000; ++i) SetDefaultLogger((i & 1) ? a : b); + stop.store(true, std::memory_order_relaxed); + for (auto& t : readers) t.join(); + EXPECT_FALSE(saw_null.load()); // CurrentLogger() is never null across swaps +} + +TEST(LoggerTest, InitializeAppliesLevelProperty) { + CapturingLogger logger; + auto status = logger.Initialize({{std::string(kLevelProperty), std::string("error")}}); + ASSERT_TRUE(status.has_value()); + EXPECT_EQ(logger.level(), LogLevel::kError); +} + +TEST(LoggerTest, InitializeRejectsInvalidLevel) { + CapturingLogger logger; + auto status = + logger.Initialize({{std::string(kLevelProperty), std::string("not-a-level")}}); + ASSERT_FALSE(status.has_value()); + EXPECT_EQ(status.error().kind, ErrorKind::kInvalidArgument); +} + +// Logging during thread teardown (from a thread_local destructor) must not crash: +// CurrentLogger() serves the live cache or falls back to an immortal logger. Run +// under ASan in CI for full signal. +TEST(LoggerTest, LoggingFromThreadLocalDestructorIsSafe) { + std::thread([] { + struct Probe { + ~Probe() { + const auto& logger = detail::CurrentLogger(); + if (logger) { + detail::Emit(*logger, LogLevel::kInfo, std::source_location::current(), + "from thread_local dtor"); + } + } + }; + static thread_local Probe probe; + (void)probe; + }).join(); + SUCCEED(); +} + +} // namespace iceberg diff --git a/src/iceberg/test/logging_test_helpers.h b/src/iceberg/test/logging_test_helpers.h new file mode 100644 index 000000000..00a57cf82 --- /dev/null +++ b/src/iceberg/test/logging_test_helpers.h @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include "iceberg/logging/logger.h" + +namespace iceberg { + +/// \brief Test sink that records every emitted LogMessage under a mutex. +class CapturingLogger : public Logger { + public: + bool ShouldLog(LogLevel level) const override { return level >= level_; } + + void Log(LogMessage&& message) noexcept override { + std::lock_guard lock(mutex_); + records_.push_back(std::move(message)); + } + + void SetLevel(LogLevel level) override { level_ = level; } + LogLevel level() const override { return level_; } + + std::vector records() const { + std::lock_guard lock(mutex_); + return records_; + } + + std::size_t count() const { + std::lock_guard lock(mutex_); + return records_.size(); + } + + private: + mutable std::mutex mutex_; + LogLevel level_ = LogLevel::kTrace; + std::vector records_; +}; + +/// \brief RAII guard that restores the process default logger on scope exit, so +/// tests that swap the global default don't leak state into other tests. +class ScopedDefaultLogger { + public: + explicit ScopedDefaultLogger(std::shared_ptr logger) + : previous_(GetDefaultLogger()) { + SetDefaultLogger(std::move(logger)); + } + ~ScopedDefaultLogger() { SetDefaultLogger(previous_); } + + ScopedDefaultLogger(const ScopedDefaultLogger&) = delete; + ScopedDefaultLogger& operator=(const ScopedDefaultLogger&) = delete; + + private: + std::shared_ptr previous_; +}; + +} // namespace iceberg diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build index a76a15553..bd19c20f2 100644 --- a/src/iceberg/test/meson.build +++ b/src/iceberg/test/meson.build @@ -61,7 +61,7 @@ iceberg_tests = { 'table_update_test.cc', ), }, - 'logging_test': {'sources': files('log_level_test.cc')}, + 'logging_test': {'sources': files('log_level_test.cc', 'logger_test.cc')}, 'expression_test': { 'sources': files( 'aggregate_test.cc', From 796ff0d4cf29c341537f908a333f1169dc30ca19 Mon Sep 17 00:00:00 2001 From: Kam Cheung Ting Date: Fri, 19 Jun 2026 17:35:09 +0000 Subject: [PATCH 2/9] fix(logging): address review feedback on the Logger interface Resolves @wgtmac's review comments on PR #723: - Mark ShouldLog()/SetLevel()/level() noexcept so a custom logger cannot throw from the hot filter path; propagated to NoopLogger and CapturingLogger. - Make CurrentLogger() safe during thread teardown without UB: the per-thread cache is reached via a trivially-destructible thread_local pointer to a never-freed heap slot (kept LSan-reachable by a registry), so logging from a thread_local destructor in any order is safe. Replaces the AliveFlag guard. - Add ICEBERG_EXPORT to LogAttribute/LogMessage, matching Error/ScanReport. - Trim the logger.h \file brief (the macros it referenced arrive in PR #725). - Tests: use the IsError matcher and std::ignore; strengthen the teardown test to log from a thread_local destroyed after the cache. Co-authored-by: Isaac --- src/iceberg/logging/logger.cc | 67 ++++++++++++++++--------- src/iceberg/logging/logger.h | 19 ++++--- src/iceberg/test/logger_test.cc | 20 +++++--- src/iceberg/test/logging_test_helpers.h | 6 +-- 4 files changed, 69 insertions(+), 43 deletions(-) diff --git a/src/iceberg/logging/logger.cc b/src/iceberg/logging/logger.cc index cd864bc42..9c8c2798e 100644 --- a/src/iceberg/logging/logger.cc +++ b/src/iceberg/logging/logger.cc @@ -24,6 +24,7 @@ #include #include #include +#include namespace iceberg { @@ -32,10 +33,10 @@ namespace { /// \brief Logger that drops every record. class NoopLogger final : public Logger { public: - bool ShouldLog(LogLevel /*level*/) const override { return false; } + bool ShouldLog(LogLevel /*level*/) const noexcept override { return false; } void Log(LogMessage&& /*message*/) noexcept override {} - void SetLevel(LogLevel /*level*/) override {} - LogLevel level() const override { return LogLevel::kOff; } + void SetLevel(LogLevel /*level*/) noexcept override {} + LogLevel level() const noexcept override { return LogLevel::kOff; } bool IsNoop() const override { return true; } }; @@ -62,6 +63,34 @@ DefaultSlot& Slot() { return *slot; } +/// \brief A thread's cached view of the default logger and the generation it was +/// cached at. Heap-allocated and intentionally never freed (see CurrentLogger). +struct ThreadCache { + std::shared_ptr logger; + uint64_t gen = 0; // 0 != Slot().gen (seeded to 1) -> first use always refreshes +}; + +/// \brief Keeps every leaked ThreadCache reachable so LeakSanitizer does not flag +/// the intentional per-thread leak. Immortal (leaked) itself, like the slot. +struct CacheRegistry { + std::mutex mtx; + std::vector caches; +}; +CacheRegistry& Registry() { + static auto* registry = new CacheRegistry(); + return *registry; +} + +/// \brief Allocate a per-thread cache that is never destroyed and register it so +/// it stays reachable for LSan. +ThreadCache* NewThreadCache() { + auto* cache = new ThreadCache(); // intentionally leaked; see CurrentLogger + CacheRegistry& registry = Registry(); + std::lock_guard lock(registry.mtx); + registry.caches.push_back(cache); + return cache; +} + } // namespace std::shared_ptr Logger::Noop() { @@ -97,30 +126,22 @@ void SetDefaultLevel(LogLevel level) { namespace detail { const std::shared_ptr& CurrentLogger() noexcept { - static thread_local std::shared_ptr tls; - static thread_local uint64_t tls_gen = 0; - // Sentinel whose destructor marks the cache dead at thread exit. It is - // declared after tls/tls_gen, so it is destroyed FIRST (reverse order); once - // dead, a log from any later-destroyed thread_local destructor must not touch - // the (about-to-be / already) destroyed tls slot. - static thread_local struct AliveFlag { - bool value = true; - ~AliveFlag() { value = false; } - } alive; - if (!alive.value) { - // Thread teardown: the TLS cache is unsafe. Fall back to an immortal logger - // (leaked, never destroyed) so logging during teardown stays safe. - static const std::shared_ptr kFallback = Logger::Noop(); - return kFallback; - } + // The per-thread cache is reached through a trivially-destructible thread_local + // raw pointer to a heap object that is intentionally NEVER destroyed. This makes + // the accessor safe to call during thread teardown -- including from another + // thread_local's destructor, in any destruction order: the pointer has no + // destructor (so it stays readable throughout teardown), and the cached + // shared_ptr outlives every thread_local, so there is no use-after-destruction. + // The intentional per-thread leak is kept LSan-reachable via the registry. + static thread_local ThreadCache* cache = NewThreadCache(); DefaultSlot& slot = Slot(); uint64_t current = slot.gen.load(std::memory_order_relaxed); - if (current != tls_gen) { + if (current != cache->gen) { std::lock_guard lock(slot.mtx); - tls = slot.logger; - tls_gen = current; + cache->logger = slot.logger; + cache->gen = current; } - return tls; + return cache->logger; } void Emit(Logger& logger, LogLevel level, const std::source_location& location, diff --git a/src/iceberg/logging/logger.h b/src/iceberg/logging/logger.h index 58f5691af..693bbc52a 100644 --- a/src/iceberg/logging/logger.h +++ b/src/iceberg/logging/logger.h @@ -20,8 +20,7 @@ #pragma once /// \file iceberg/logging/logger.h -/// \brief Pluggable logging interface, the process-global default logger, and -/// the logging macros. +/// \brief Pluggable logging interface and the process-global default logger. /// /// This header is backend-agnostic: it never includes the build-generated /// backend configuration header and never references the spdlog feature macro, @@ -48,7 +47,7 @@ namespace iceberg { /// Both key and value are owned so a sink may retain the record safely. /// Unused in v1; reserved so structured logging can be added without an ABI /// break to LogMessage. -struct LogAttribute { +struct ICEBERG_EXPORT LogAttribute { std::string key; std::string value; }; @@ -58,7 +57,7 @@ struct LogAttribute { /// The formatted message is owned (moved in by the logging macros), so a sink /// may safely retain the record beyond the Log() call. The member set must not /// depend on the build's logging backend (the spdlog backend never appears here). -struct LogMessage { +struct ICEBERG_EXPORT LogMessage { LogLevel level = LogLevel::kOff; std::string message; std::source_location location = std::source_location::current(); @@ -104,16 +103,16 @@ class ICEBERG_EXPORT Logger { } /// \brief Cheap check whether a record at \p level would be emitted. - virtual bool ShouldLog(LogLevel level) const = 0; + virtual bool ShouldLog(LogLevel level) const noexcept = 0; /// \brief Emit one (already-formatted) record, taking ownership. Must not throw. virtual void Log(LogMessage&& message) noexcept = 0; /// \brief Set the minimum level this logger emits. - virtual void SetLevel(LogLevel level) = 0; + virtual void SetLevel(LogLevel level) noexcept = 0; /// \brief Return the minimum level this logger emits. - virtual LogLevel level() const = 0; + virtual LogLevel level() const noexcept = 0; /// \brief Flush any buffered output. Must not throw; best-effort on the fatal path. virtual void Flush() noexcept {} @@ -150,10 +149,10 @@ ICEBERG_EXPORT void SetDefaultLevel(LogLevel level); // // class MySink : public Logger { // public: -// bool ShouldLog(LogLevel level) const override { return level >= level_; } +// bool ShouldLog(LogLevel level) const noexcept override { return level >= level_; } // void Log(LogMessage&& m) noexcept override { write_line(m.message); } -// void SetLevel(LogLevel level) override { level_ = level; } -// LogLevel level() const override { return level_; } +// void SetLevel(LogLevel level) noexcept override { level_ = level; } +// LogLevel level() const noexcept override { return level_; } // private: // std::atomic level_{LogLevel::kInfo}; // }; diff --git a/src/iceberg/test/logger_test.cc b/src/iceberg/test/logger_test.cc index ed1c88aa5..0c53714d1 100644 --- a/src/iceberg/test/logger_test.cc +++ b/src/iceberg/test/logger_test.cc @@ -22,12 +22,14 @@ #include #include #include +#include #include #include #include "iceberg/logging/log_level.h" #include "iceberg/test/logging_test_helpers.h" +#include "iceberg/test/matchers.h" namespace iceberg { @@ -100,8 +102,8 @@ TEST(LoggerTest, ConcurrentSwapAndReadIsSafe) { while (!stop.load(std::memory_order_relaxed)) { const auto& l = detail::CurrentLogger(); if (!l) saw_null.store(true, std::memory_order_relaxed); - (void)l->ShouldLog(LogLevel::kError); - (void)GetDefaultLogger(); + std::ignore = l->ShouldLog(LogLevel::kError); + std::ignore = GetDefaultLogger(); } }); } @@ -123,12 +125,15 @@ TEST(LoggerTest, InitializeRejectsInvalidLevel) { auto status = logger.Initialize({{std::string(kLevelProperty), std::string("not-a-level")}}); ASSERT_FALSE(status.has_value()); - EXPECT_EQ(status.error().kind, ErrorKind::kInvalidArgument); + EXPECT_THAT(status, IsError(ErrorKind::kInvalidArgument)); } -// Logging during thread teardown (from a thread_local destructor) must not crash: -// CurrentLogger() serves the live cache or falls back to an immortal logger. Run -// under ASan in CI for full signal. +// Logging during thread teardown (from a thread_local destructor) must not crash. +// CurrentLogger()'s per-thread cache is reached via a trivially-destructible +// pointer to a never-freed heap object, so it is safe even when the logging +// statement runs from a thread_local that is destroyed AFTER the cache -- the +// hard case. Probe is constructed before CurrentLogger() is first touched on the +// thread, so it is destroyed last. Run under ASan in CI for full signal. TEST(LoggerTest, LoggingFromThreadLocalDestructorIsSafe) { std::thread([] { struct Probe { @@ -141,7 +146,8 @@ TEST(LoggerTest, LoggingFromThreadLocalDestructorIsSafe) { } }; static thread_local Probe probe; - (void)probe; + (void)probe; // construct Probe first ... + (void)detail::CurrentLogger(); // ... then the logger cache (destroyed first) }).join(); SUCCEED(); } diff --git a/src/iceberg/test/logging_test_helpers.h b/src/iceberg/test/logging_test_helpers.h index 00a57cf82..8859523c5 100644 --- a/src/iceberg/test/logging_test_helpers.h +++ b/src/iceberg/test/logging_test_helpers.h @@ -31,15 +31,15 @@ namespace iceberg { /// \brief Test sink that records every emitted LogMessage under a mutex. class CapturingLogger : public Logger { public: - bool ShouldLog(LogLevel level) const override { return level >= level_; } + bool ShouldLog(LogLevel level) const noexcept override { return level >= level_; } void Log(LogMessage&& message) noexcept override { std::lock_guard lock(mutex_); records_.push_back(std::move(message)); } - void SetLevel(LogLevel level) override { level_ = level; } - LogLevel level() const override { return level_; } + void SetLevel(LogLevel level) noexcept override { level_ = level; } + LogLevel level() const noexcept override { return level_; } std::vector records() const { std::lock_guard lock(mutex_); From 4943849be68d34b91ee2d49f078c589b6ba3ab26 Mon Sep 17 00:00:00 2001 From: Kam Cheung Ting Date: Sun, 21 Jun 2026 00:08:59 +0000 Subject: [PATCH 3/9] feat(logging): add std::format-style Log() functions Addresses @wgtmac's review comment on PR #723 asking for a std::format-style API that builds the string only after the level check. - A single overloaded iceberg::Log(): Log(level, "fmt {}", args...) -> process-default logger Log(logger, level, "fmt {}", args...) -> explicit logger disambiguated by the first argument. - Formats only after ShouldLog() (no wasted work when disabled) and never throws -- a formatting failure routes to EmitFormatError, matching the macros. - The call-site source_location is captured via a small consteval FmtWithLoc wrapper (the std::print technique), preserving compile-time format-string checking without a macro. The sink interface (Logger::Log(LogMessage&&)) intentionally stays pre-formatted: passing std::format_args across the virtual boundary is lifetime-unsafe and would break safe record retention. The format-style win is delivered at the call layer instead. Co-authored-by: Isaac --- src/iceberg/logging/logger.h | 57 +++++++++++++++++++++++++++++++++ src/iceberg/test/logger_test.cc | 29 +++++++++++++++++ 2 files changed, 86 insertions(+) diff --git a/src/iceberg/logging/logger.h b/src/iceberg/logging/logger.h index 693bbc52a..eea0a6cd0 100644 --- a/src/iceberg/logging/logger.h +++ b/src/iceberg/logging/logger.h @@ -26,12 +26,14 @@ /// backend configuration header and never references the spdlog feature macro, /// so consumers see one stable API regardless of how the backend was configured. +#include #include #include #include #include #include #include +#include #include #include #include @@ -207,6 +209,61 @@ std::string VFormat(std::string_view fmt, Args&&... args) { return std::vformat(fmt, store); } +/// \brief A checked format string bundled with the caller's source_location. +/// +/// The consteval constructor preserves std::format's compile-time format-string +/// checking while capturing the call site (the std::print/println technique), +/// so the function-style Log() can record an accurate file:line without a macro. +/// Used as a non-deduced parameter so the trailing args drive deduction. +template +struct FmtWithLoc { + std::format_string fmt; + std::source_location loc; + + template + requires std::convertible_to> + consteval FmtWithLoc( // NOLINT(google-explicit-constructor): mirrors std::format_string + const T& s, std::source_location loc = std::source_location::current()) + : fmt(s), loc(loc) {} +}; + +/// \brief Shared gate -> format -> emit body for the function-style Log() API. +/// +/// Formats only when the logger is enabled for \p level, and never throws (a +/// formatting failure routes to EmitFormatError, matching the macros). +template +void Dispatch(Logger& logger, LogLevel level, const std::source_location& loc, + std::format_string fmt, Args&&... args) noexcept { + if (!logger.ShouldLog(level)) return; + try { + Emit(logger, level, loc, std::format(fmt, std::forward(args)...)); + } catch (...) { + EmitFormatError(logger, level, loc); + } +} + } // namespace detail +/// \brief Log to the process-default logger, std::format style. Formats only if +/// the level is enabled; never throws. +/// +/// Example: `iceberg::Log(LogLevel::kInfo, "loaded {} files", n);` +template +void Log(LogLevel level, detail::FmtWithLoc...> fmt, + Args&&... args) { + const std::shared_ptr& logger = detail::CurrentLogger(); + if (logger) { + detail::Dispatch(*logger, level, fmt.loc, fmt.fmt, std::forward(args)...); + } +} + +/// \brief Log to an explicit logger, std::format style. Formats only if enabled. +/// +/// Example: `iceberg::Log(logger, LogLevel::kWarn, "retry {}", attempt);` +template +void Log(Logger& logger, LogLevel level, + detail::FmtWithLoc...> fmt, Args&&... args) { + detail::Dispatch(logger, level, fmt.loc, fmt.fmt, std::forward(args)...); +} + } // namespace iceberg diff --git a/src/iceberg/test/logger_test.cc b/src/iceberg/test/logger_test.cc index 0c53714d1..833163819 100644 --- a/src/iceberg/test/logger_test.cc +++ b/src/iceberg/test/logger_test.cc @@ -152,4 +152,33 @@ TEST(LoggerTest, LoggingFromThreadLocalDestructorIsSafe) { SUCCEED(); } +// --- Function-style API (non-macro): overloaded Log() --- + +TEST(LoggerTest, LogToExplicitLoggerFormats) { + auto sink = std::make_shared(); + Log(*sink, LogLevel::kInfo, "x={} y={}", 1, 2); + auto records = sink->records(); + ASSERT_EQ(records.size(), 1u); + EXPECT_EQ(records[0].level, LogLevel::kInfo); + EXPECT_EQ(records[0].message, "x=1 y=2"); + EXPECT_NE(records[0].location.line(), 0u); // call-site location captured +} + +TEST(LoggerTest, LogRespectsLevelAndDoesNotFormatWhenDisabled) { + auto sink = std::make_shared(); + sink->SetLevel(LogLevel::kError); + Log(*sink, LogLevel::kInfo, "dropped {}", 1); + EXPECT_EQ(sink->count(), 0u); +} + +TEST(LoggerTest, LogToDefaultLoggerFormatStyle) { + auto sink = std::make_shared(); + ScopedDefaultLogger guard(sink); + Log(LogLevel::kWarn, "v={}", 7); + auto records = sink->records(); + ASSERT_EQ(records.size(), 1u); + EXPECT_EQ(records[0].level, LogLevel::kWarn); + EXPECT_EQ(records[0].message, "v=7"); +} + } // namespace iceberg From bd4a25cb67bf3f00c60889ee5416776e79d84669 Mon Sep 17 00:00:00 2001 From: Kam Cheung Ting Date: Sun, 21 Jun 2026 00:14:11 +0000 Subject: [PATCH 4/9] feat(logging): add LogMessage::Builder for structured attributes Addresses @wgtmac's review comment on PR #723 that LogMessage::attributes had no easy fill path. Adds a fluent LogMessage::Builder so engine loggers can attach structured fields (query id, task id, table name, snapshot id, file path, ...): auto record = LogMessage::Builder(LogLevel::kInfo) .Message("scan finished") .Attribute("table", table_name) .Attribute("snapshot_id", std::to_string(id)) .Build(); logger->Log(std::move(record)); - Message() sets the formatted text; Attribute() appends an owned key/value; Location() overrides the source location (defaults to the build site). - Build() moves the accumulated state into a LogMessage. This defines how attributes get populated, so structured logging can grow without an ABI break. Co-authored-by: Isaac --- src/iceberg/logging/logger.h | 60 +++++++++++++++++++++++++++++++-- src/iceberg/test/logger_test.cc | 34 +++++++++++++++++++ 2 files changed, 91 insertions(+), 3 deletions(-) diff --git a/src/iceberg/logging/logger.h b/src/iceberg/logging/logger.h index eea0a6cd0..d9edc2f34 100644 --- a/src/iceberg/logging/logger.h +++ b/src/iceberg/logging/logger.h @@ -46,9 +46,9 @@ namespace iceberg { /// \brief A structured key/value attribute attached to a log record. /// -/// Both key and value are owned so a sink may retain the record safely. -/// Unused in v1; reserved so structured logging can be added without an ABI -/// break to LogMessage. +/// Both key and value are owned so a sink may retain the record safely. Engine +/// loggers can surface these as discrete fields (query id, task id, table name, +/// snapshot id, file path, ...); see LogMessage::Builder to populate them. struct ICEBERG_EXPORT LogAttribute { std::string key; std::string value; @@ -59,11 +59,65 @@ struct ICEBERG_EXPORT LogAttribute { /// The formatted message is owned (moved in by the logging macros), so a sink /// may safely retain the record beyond the Log() call. The member set must not /// depend on the build's logging backend (the spdlog backend never appears here). +/// Use LogMessage::Builder for a readable way to assemble one, especially with +/// structured attributes. struct ICEBERG_EXPORT LogMessage { LogLevel level = LogLevel::kOff; std::string message; std::source_location location = std::source_location::current(); std::vector attributes; + + class Builder; +}; + +/// \brief Fluent builder for LogMessage, the easy path to attach structured +/// attributes. +/// +/// Example: +/// auto record = LogMessage::Builder(LogLevel::kInfo) +/// .Message("scan finished") +/// .Attribute("table", table_name) +/// .Attribute("snapshot_id", std::to_string(id)) +/// .Build(); +/// logger->Log(std::move(record)); +/// +/// The location defaults to where the Builder is constructed; override it with +/// Location() (e.g. to forward a caller's std::source_location). +class ICEBERG_EXPORT LogMessage::Builder { + public: + explicit Builder(LogLevel level) : level_(level) {} + + /// \brief Set the already-formatted message text. + Builder& Message(std::string message) { + message_ = std::move(message); + return *this; + } + + /// \brief Append a structured key/value attribute. + Builder& Attribute(std::string key, std::string value) { + attributes_.push_back(LogAttribute{.key = std::move(key), .value = std::move(value)}); + return *this; + } + + /// \brief Override the record's source location (defaults to the build site). + Builder& Location(std::source_location location) { + location_ = location; + return *this; + } + + /// \brief Materialize the LogMessage, moving the accumulated state out. + LogMessage Build() { + return LogMessage{.level = level_, + .message = std::move(message_), + .location = location_, + .attributes = std::move(attributes_)}; + } + + private: + LogLevel level_; + std::string message_; + std::source_location location_ = std::source_location::current(); + std::vector attributes_; }; /// \brief Well-known Logger::Initialize() property keys. diff --git a/src/iceberg/test/logger_test.cc b/src/iceberg/test/logger_test.cc index 833163819..c66a7c02e 100644 --- a/src/iceberg/test/logger_test.cc +++ b/src/iceberg/test/logger_test.cc @@ -181,4 +181,38 @@ TEST(LoggerTest, LogToDefaultLoggerFormatStyle) { EXPECT_EQ(records[0].message, "v=7"); } +// --- LogMessage::Builder (structured attributes) --- + +TEST(LoggerTest, BuilderAssemblesMessageAndAttributes) { + auto record = LogMessage::Builder(LogLevel::kInfo) + .Message("scan finished") + .Attribute("table", "db.t") + .Attribute("snapshot_id", "42") + .Build(); + EXPECT_EQ(record.level, LogLevel::kInfo); + EXPECT_EQ(record.message, "scan finished"); + ASSERT_EQ(record.attributes.size(), 2u); + EXPECT_EQ(record.attributes[0].key, "table"); + EXPECT_EQ(record.attributes[0].value, "db.t"); + EXPECT_EQ(record.attributes[1].key, "snapshot_id"); + EXPECT_EQ(record.attributes[1].value, "42"); +} + +TEST(LoggerTest, BuilderDefaultsAndEmitToSink) { + auto sink = std::make_shared(); + sink->Log(LogMessage::Builder(LogLevel::kError).Message("boom").Build()); + auto records = sink->records(); + ASSERT_EQ(records.size(), 1u); + EXPECT_EQ(records[0].level, LogLevel::kError); + EXPECT_EQ(records[0].message, "boom"); + EXPECT_TRUE(records[0].attributes.empty()); + EXPECT_NE(records[0].location.line(), 0u); // location defaulted at build site +} + +TEST(LoggerTest, BuilderLocationOverride) { + auto loc = std::source_location::current(); + auto record = LogMessage::Builder(LogLevel::kDebug).Location(loc).Build(); + EXPECT_EQ(record.location.line(), loc.line()); +} + } // namespace iceberg From 92c4f97bc0e919bd427cdf42294caba75a5afb75 Mon Sep 17 00:00:00 2001 From: Kam Cheung Ting Date: Sun, 21 Jun 2026 06:43:24 +0000 Subject: [PATCH 5/9] fix name problem --- src/iceberg/logging/logger.cc | 4 ++-- src/iceberg/logging/logger.h | 22 ++++++++++++---------- src/iceberg/test/logger_test.cc | 23 ++++++++++++----------- 3 files changed, 26 insertions(+), 23 deletions(-) diff --git a/src/iceberg/logging/logger.cc b/src/iceberg/logging/logger.cc index 9c8c2798e..286f443c3 100644 --- a/src/iceberg/logging/logger.cc +++ b/src/iceberg/logging/logger.cc @@ -123,7 +123,7 @@ void SetDefaultLevel(LogLevel level) { slot.logger->SetLevel(level); } -namespace detail { +namespace internal { const std::shared_ptr& CurrentLogger() noexcept { // The per-thread cache is reached through a trivially-destructible thread_local @@ -162,6 +162,6 @@ void EmitFormatError(Logger& logger, LogLevel level, .attributes = {}}); } -} // namespace detail +} // namespace internal } // namespace iceberg diff --git a/src/iceberg/logging/logger.h b/src/iceberg/logging/logger.h index d9edc2f34..3519a64d3 100644 --- a/src/iceberg/logging/logger.h +++ b/src/iceberg/logging/logger.h @@ -226,7 +226,7 @@ ICEBERG_EXPORT void SetDefaultLevel(LogLevel level); // auto status = sink->Initialize({{std::string(kLevelProperty), "warn"}}); // -> kWarn // --------------------------------------------------------------------------- -namespace detail { +namespace internal { /// \brief Hot-path accessor for the default logger. /// @@ -276,7 +276,8 @@ struct FmtWithLoc { template requires std::convertible_to> - consteval FmtWithLoc( // NOLINT(google-explicit-constructor): mirrors std::format_string + consteval FmtWithLoc( // NOLINT(google-explicit-constructor): mirrors + // std::format_string const T& s, std::source_location loc = std::source_location::current()) : fmt(s), loc(loc) {} }; @@ -286,8 +287,8 @@ struct FmtWithLoc { /// Formats only when the logger is enabled for \p level, and never throws (a /// formatting failure routes to EmitFormatError, matching the macros). template -void Dispatch(Logger& logger, LogLevel level, const std::source_location& loc, - std::format_string fmt, Args&&... args) noexcept { +void FormatAndEmit(Logger& logger, LogLevel level, const std::source_location& loc, + std::format_string fmt, Args&&... args) noexcept { if (!logger.ShouldLog(level)) return; try { Emit(logger, level, loc, std::format(fmt, std::forward(args)...)); @@ -296,18 +297,19 @@ void Dispatch(Logger& logger, LogLevel level, const std::source_location& loc, } } -} // namespace detail +} // namespace internal /// \brief Log to the process-default logger, std::format style. Formats only if /// the level is enabled; never throws. /// /// Example: `iceberg::Log(LogLevel::kInfo, "loaded {} files", n);` template -void Log(LogLevel level, detail::FmtWithLoc...> fmt, +void Log(LogLevel level, internal::FmtWithLoc...> fmt, Args&&... args) { - const std::shared_ptr& logger = detail::CurrentLogger(); + const std::shared_ptr& logger = internal::CurrentLogger(); if (logger) { - detail::Dispatch(*logger, level, fmt.loc, fmt.fmt, std::forward(args)...); + internal::FormatAndEmit(*logger, level, fmt.loc, fmt.fmt, + std::forward(args)...); } } @@ -316,8 +318,8 @@ void Log(LogLevel level, detail::FmtWithLoc...> fmt, /// Example: `iceberg::Log(logger, LogLevel::kWarn, "retry {}", attempt);` template void Log(Logger& logger, LogLevel level, - detail::FmtWithLoc...> fmt, Args&&... args) { - detail::Dispatch(logger, level, fmt.loc, fmt.fmt, std::forward(args)...); + internal::FmtWithLoc...> fmt, Args&&... args) { + internal::FormatAndEmit(logger, level, fmt.loc, fmt.fmt, std::forward(args)...); } } // namespace iceberg diff --git a/src/iceberg/test/logger_test.cc b/src/iceberg/test/logger_test.cc index c66a7c02e..da2fb09f0 100644 --- a/src/iceberg/test/logger_test.cc +++ b/src/iceberg/test/logger_test.cc @@ -49,7 +49,7 @@ TEST(LoggerTest, SetAndGetDefaultLogger) { auto capturing = std::make_shared(); ScopedDefaultLogger guard(capturing); EXPECT_EQ(GetDefaultLogger().get(), capturing.get()); - EXPECT_EQ(detail::CurrentLogger().get(), capturing.get()); + EXPECT_EQ(internal::CurrentLogger().get(), capturing.get()); } TEST(LoggerTest, SetNullFallsBackToNoop) { @@ -62,10 +62,10 @@ TEST(LoggerTest, CurrentLoggerTracksSwaps) { auto first = std::make_shared(); auto second = std::make_shared(); ScopedDefaultLogger guard(first); - EXPECT_EQ(detail::CurrentLogger().get(), first.get()); + EXPECT_EQ(internal::CurrentLogger().get(), first.get()); SetDefaultLogger(second); // Generation bump must invalidate the thread-local cache. - EXPECT_EQ(detail::CurrentLogger().get(), second.get()); + EXPECT_EQ(internal::CurrentLogger().get(), second.get()); } TEST(LoggerTest, SetDefaultLevelUpdatesLogger) { @@ -82,9 +82,9 @@ TEST(LoggerTest, OutOfBandLevelLoweringTakesEffect) { auto capturing = std::make_shared(); capturing->SetLevel(LogLevel::kError); ScopedDefaultLogger guard(capturing); - EXPECT_FALSE(detail::CurrentLogger()->ShouldLog(LogLevel::kInfo)); + EXPECT_FALSE(internal::CurrentLogger()->ShouldLog(LogLevel::kInfo)); capturing->SetLevel(LogLevel::kTrace); // lowered directly on the handle - EXPECT_TRUE(detail::CurrentLogger()->ShouldLog(LogLevel::kInfo)); + EXPECT_TRUE(internal::CurrentLogger()->ShouldLog(LogLevel::kInfo)); } TEST(LoggerTest, ConcurrentSwapAndReadIsSafe) { @@ -100,7 +100,7 @@ TEST(LoggerTest, ConcurrentSwapAndReadIsSafe) { readers.emplace_back([&stop, &saw_null] { // ASSERT_* doesn't propagate from non-main threads; record via a flag. while (!stop.load(std::memory_order_relaxed)) { - const auto& l = detail::CurrentLogger(); + const auto& l = internal::CurrentLogger(); if (!l) saw_null.store(true, std::memory_order_relaxed); std::ignore = l->ShouldLog(LogLevel::kError); std::ignore = GetDefaultLogger(); @@ -138,16 +138,17 @@ TEST(LoggerTest, LoggingFromThreadLocalDestructorIsSafe) { std::thread([] { struct Probe { ~Probe() { - const auto& logger = detail::CurrentLogger(); + const auto& logger = internal::CurrentLogger(); if (logger) { - detail::Emit(*logger, LogLevel::kInfo, std::source_location::current(), - "from thread_local dtor"); + internal::Emit(*logger, LogLevel::kInfo, std::source_location::current(), + "from thread_local dtor"); } } }; static thread_local Probe probe; - (void)probe; // construct Probe first ... - (void)detail::CurrentLogger(); // ... then the logger cache (destroyed first) + std::ignore = probe; // construct Probe first ... + std::ignore = + internal::CurrentLogger(); // ... then the logger cache (destroyed first) }).join(); SUCCEED(); } From 5a9c820e0f4203c5edaf25f564bcc5311a5e6ca1 Mon Sep 17 00:00:00 2001 From: Kam Cheung Ting Date: Sun, 21 Jun 2026 06:58:08 +0000 Subject: [PATCH 6/9] add comment to location_ to clarify why no std::move --- src/iceberg/logging/logger.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/iceberg/logging/logger.h b/src/iceberg/logging/logger.h index 3519a64d3..75f9037c2 100644 --- a/src/iceberg/logging/logger.h +++ b/src/iceberg/logging/logger.h @@ -116,6 +116,7 @@ class ICEBERG_EXPORT LogMessage::Builder { private: LogLevel level_; std::string message_; + // `location_` is a trivially copyable members no need to move. std::source_location location_ = std::source_location::current(); std::vector attributes_; }; From 79acfa8393047086bb5238ad1d9caab8566d1f47 Mon Sep 17 00:00:00 2001 From: Kam Cheung Ting Date: Sun, 21 Jun 2026 07:22:26 +0000 Subject: [PATCH 7/9] fix(logging): free the per-thread logger cache at thread exit Replaces the intentional permanent per-thread leak with a cache that is freed at thread exit, while staying safe to call CurrentLogger() from any thread_local destructor during teardown. The per-thread state is split by lifetime: - `dead` (bool) and `cache` (pointer) are trivially destructible, so their storage stays valid for the whole thread and is readable from any other thread_local's destructor, in any destruction order. - A `guard` (the only piece with a destructor) frees the cache and sets `dead` at thread exit. A thread_local destroyed after it reads `dead == true` and falls back to an immortal no-op; one destroyed before it still sees a live cache. Either order is safe. This is the correct version of the earlier AliveFlag attempt, which was UB because the flag object itself had a destructor (reading it after that destructor ran was the very use-after-lifetime it tried to prevent). Drops the LSan-reachability registry, which is no longer needed now that nothing leaks. Validated with a dedicated stress harness under ThreadSanitizer and AddressSanitizer (no leaks): logging from a thread_local destroyed after the cache; logging interleaved with concurrent SetDefaultLogger swaps and thread teardown; and a thread whose only logging happens in a thread_local destructor. Adds ConcurrentTeardownAndSwapIsSafe to cover the combined path in CI. Co-authored-by: Isaac --- src/iceberg/logging/logger.cc | 67 ++++++++++++++++++--------------- src/iceberg/test/logger_test.cc | 45 +++++++++++++++++++--- 2 files changed, 76 insertions(+), 36 deletions(-) diff --git a/src/iceberg/logging/logger.cc b/src/iceberg/logging/logger.cc index 286f443c3..f245526ae 100644 --- a/src/iceberg/logging/logger.cc +++ b/src/iceberg/logging/logger.cc @@ -24,7 +24,6 @@ #include #include #include -#include namespace iceberg { @@ -64,33 +63,12 @@ DefaultSlot& Slot() { } /// \brief A thread's cached view of the default logger and the generation it was -/// cached at. Heap-allocated and intentionally never freed (see CurrentLogger). +/// cached at. Heap-allocated per thread and freed at thread exit (see CurrentLogger). struct ThreadCache { std::shared_ptr logger; uint64_t gen = 0; // 0 != Slot().gen (seeded to 1) -> first use always refreshes }; -/// \brief Keeps every leaked ThreadCache reachable so LeakSanitizer does not flag -/// the intentional per-thread leak. Immortal (leaked) itself, like the slot. -struct CacheRegistry { - std::mutex mtx; - std::vector caches; -}; -CacheRegistry& Registry() { - static auto* registry = new CacheRegistry(); - return *registry; -} - -/// \brief Allocate a per-thread cache that is never destroyed and register it so -/// it stays reachable for LSan. -ThreadCache* NewThreadCache() { - auto* cache = new ThreadCache(); // intentionally leaked; see CurrentLogger - CacheRegistry& registry = Registry(); - std::lock_guard lock(registry.mtx); - registry.caches.push_back(cache); - return cache; -} - } // namespace std::shared_ptr Logger::Noop() { @@ -126,14 +104,41 @@ void SetDefaultLevel(LogLevel level) { namespace internal { const std::shared_ptr& CurrentLogger() noexcept { - // The per-thread cache is reached through a trivially-destructible thread_local - // raw pointer to a heap object that is intentionally NEVER destroyed. This makes - // the accessor safe to call during thread teardown -- including from another - // thread_local's destructor, in any destruction order: the pointer has no - // destructor (so it stays readable throughout teardown), and the cached - // shared_ptr outlives every thread_local, so there is no use-after-destruction. - // The intentional per-thread leak is kept LSan-reachable via the registry. - static thread_local ThreadCache* cache = NewThreadCache(); + // Per-thread cache, freed at thread exit, yet safe to read from ANY other + // thread_local's destructor during teardown, in any destruction order. The + // three thread_locals are split by lifetime on purpose: + // + // * `dead` and `cache` are trivially destructible, so their storage stays + // valid for the WHOLE thread (their lifetime is not ended by a destructor); + // they remain readable throughout the teardown of every other thread_local. + // * `guard` is the only one with a destructor. At thread exit it sets `dead` + // and frees the cache. A thread_local destroyed AFTER `guard` reads + // `dead == true` and falls back instead of touching freed memory; one + // destroyed BEFORE `guard` still sees a live cache. Either order is safe. + // + // (The earlier AliveFlag was wrong because the flag itself had a destructor, so + // reading it after that destructor ran was UB -- the very bug it tried to avoid.) + static thread_local bool dead = false; + static thread_local ThreadCache* cache = nullptr; + static thread_local struct Guard { + ~Guard() { + dead = true; // mark BEFORE freeing, so a re-entrant log hits the fallback + delete cache; + cache = nullptr; + } + } guard; + (void)guard; // force initialization so its destructor is registered + + if (dead) { + // Thread teardown after the cache was freed: serve an immortal no-op so a log + // from a later thread_local destructor is safe. Such teardown logs are dropped. + static auto* fallback = new std::shared_ptr(Logger::Noop()); + return *fallback; + } + if (cache == nullptr) { + cache = new ThreadCache(); + } + DefaultSlot& slot = Slot(); uint64_t current = slot.gen.load(std::memory_order_relaxed); if (current != cache->gen) { diff --git a/src/iceberg/test/logger_test.cc b/src/iceberg/test/logger_test.cc index da2fb09f0..a9f4d8dd0 100644 --- a/src/iceberg/test/logger_test.cc +++ b/src/iceberg/test/logger_test.cc @@ -129,11 +129,12 @@ TEST(LoggerTest, InitializeRejectsInvalidLevel) { } // Logging during thread teardown (from a thread_local destructor) must not crash. -// CurrentLogger()'s per-thread cache is reached via a trivially-destructible -// pointer to a never-freed heap object, so it is safe even when the logging -// statement runs from a thread_local that is destroyed AFTER the cache -- the -// hard case. Probe is constructed before CurrentLogger() is first touched on the -// thread, so it is destroyed last. Run under ASan in CI for full signal. +// The per-thread cache is freed at thread exit, but CurrentLogger()'s teardown +// guard (a trivially-destructible "dead" flag whose storage outlives every +// thread_local) makes it safe even when the logging statement runs from a +// thread_local destroyed AFTER the cache -- the hard case. Probe is constructed +// before CurrentLogger() is first touched, so it is destroyed last. Run under +// ASan/TSan in CI for full signal. TEST(LoggerTest, LoggingFromThreadLocalDestructorIsSafe) { std::thread([] { struct Probe { @@ -153,6 +154,40 @@ TEST(LoggerTest, LoggingFromThreadLocalDestructorIsSafe) { SUCCEED(); } +// Teardown interleaved with concurrent default-logger swaps: many short-lived +// threads each log from a thread_local destructor while another thread swaps the +// default logger. Exercises the per-thread cache being freed at thread exit at +// the same time the global slot is mutated. Run under ASan/TSan in CI. +TEST(LoggerTest, ConcurrentTeardownAndSwapIsSafe) { + auto a = std::make_shared(); + ScopedDefaultLogger guard(a); + std::atomic stop{false}; + std::thread swapper([&] { + auto b = std::make_shared(); + while (!stop.load(std::memory_order_relaxed)) { + SetDefaultLogger(b); + SetDefaultLogger(a); + } + }); + for (int i = 0; i < 100; ++i) { + std::thread worker([] { + struct Probe { + ~Probe() { + const auto& l = internal::CurrentLogger(); + if (l) l->ShouldLog(LogLevel::kError); + } + }; + static thread_local Probe probe; + std::ignore = probe; // constructed before the cache + std::ignore = internal::CurrentLogger(); // touch the cache in normal code + }); + worker.join(); // teardown runs concurrently with the swapper + } + stop.store(true, std::memory_order_relaxed); + swapper.join(); + SUCCEED(); +} + // --- Function-style API (non-macro): overloaded Log() --- TEST(LoggerTest, LogToExplicitLoggerFormats) { From 963c49dcf556d8d85bf227e364fb74fcb59c78da Mon Sep 17 00:00:00 2001 From: Kam Cheung Ting Date: Sun, 21 Jun 2026 07:24:44 +0000 Subject: [PATCH 8/9] test(logging): make CapturingLogger's level atomic --- src/iceberg/logging/logger.cc | 4 +++- src/iceberg/test/logging_test_helpers.h | 15 +++++++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/iceberg/logging/logger.cc b/src/iceberg/logging/logger.cc index f245526ae..bca9179ed 100644 --- a/src/iceberg/logging/logger.cc +++ b/src/iceberg/logging/logger.cc @@ -23,6 +23,7 @@ #include #include #include +#include #include namespace iceberg { @@ -127,7 +128,8 @@ const std::shared_ptr& CurrentLogger() noexcept { cache = nullptr; } } guard; - (void)guard; // force initialization so its destructor is registered + std::ignore = guard; // mark the thread_local as intentionally used (its dtor is + // registered by reaching the declaration above) if (dead) { // Thread teardown after the cache was freed: serve an immortal no-op so a log diff --git a/src/iceberg/test/logging_test_helpers.h b/src/iceberg/test/logging_test_helpers.h index 8859523c5..f3999195e 100644 --- a/src/iceberg/test/logging_test_helpers.h +++ b/src/iceberg/test/logging_test_helpers.h @@ -19,6 +19,7 @@ #pragma once +#include #include #include #include @@ -31,15 +32,21 @@ namespace iceberg { /// \brief Test sink that records every emitted LogMessage under a mutex. class CapturingLogger : public Logger { public: - bool ShouldLog(LogLevel level) const noexcept override { return level >= level_; } + bool ShouldLog(LogLevel level) const noexcept override { + return level >= level_.load(std::memory_order_relaxed); + } void Log(LogMessage&& message) noexcept override { std::lock_guard lock(mutex_); records_.push_back(std::move(message)); } - void SetLevel(LogLevel level) noexcept override { level_ = level; } - LogLevel level() const noexcept override { return level_; } + void SetLevel(LogLevel level) noexcept override { + level_.store(level, std::memory_order_relaxed); + } + LogLevel level() const noexcept override { + return level_.load(std::memory_order_relaxed); + } std::vector records() const { std::lock_guard lock(mutex_); @@ -53,7 +60,7 @@ class CapturingLogger : public Logger { private: mutable std::mutex mutex_; - LogLevel level_ = LogLevel::kTrace; + std::atomic level_ = LogLevel::kTrace; std::vector records_; }; From 642926c5e93eb63455e61e26a02e64e84c5993f1 Mon Sep 17 00:00:00 2001 From: Kam Cheung Ting Date: Sun, 21 Jun 2026 10:06:06 +0000 Subject: [PATCH 9/9] feat(logging): add ScopedLogger for per-context logger routing Bind a logger for the current thread + scope so the default path (CurrentLogger/Log(level,...)/LOG_* macros) routes to it, with the global default as fallback. Engines can route Iceberg's own internal logs into a per catalog/session/query/task context without touching call sites. --- src/iceberg/logging/logger.cc | 83 ++++++++++---- src/iceberg/logging/logger.h | 43 ++++++++ src/iceberg/test/logger_test.cc | 184 ++++++++++++++++++++++++++++++++ 3 files changed, 290 insertions(+), 20 deletions(-) diff --git a/src/iceberg/logging/logger.cc b/src/iceberg/logging/logger.cc index bca9179ed..3adbf75dd 100644 --- a/src/iceberg/logging/logger.cc +++ b/src/iceberg/logging/logger.cc @@ -64,10 +64,13 @@ DefaultSlot& Slot() { } /// \brief A thread's cached view of the default logger and the generation it was -/// cached at. Heap-allocated per thread and freed at thread exit (see CurrentLogger). +/// cached at. Heap-allocated per thread and freed at thread exit (see +/// AccessThreadCache). `override_` is the active ScopedLogger binding for this +/// thread (empty when none); when set it supersedes the cached default. struct ThreadCache { std::shared_ptr logger; uint64_t gen = 0; // 0 != Slot().gen (seeded to 1) -> first use always refreshes + std::shared_ptr override_; // active ScopedLogger binding, empty if none }; } // namespace @@ -104,21 +107,20 @@ void SetDefaultLevel(LogLevel level) { namespace internal { -const std::shared_ptr& CurrentLogger() noexcept { - // Per-thread cache, freed at thread exit, yet safe to read from ANY other - // thread_local's destructor during teardown, in any destruction order. The - // three thread_locals are split by lifetime on purpose: - // - // * `dead` and `cache` are trivially destructible, so their storage stays - // valid for the WHOLE thread (their lifetime is not ended by a destructor); - // they remain readable throughout the teardown of every other thread_local. - // * `guard` is the only one with a destructor. At thread exit it sets `dead` - // and frees the cache. A thread_local destroyed AFTER `guard` reads - // `dead == true` and falls back instead of touching freed memory; one - // destroyed BEFORE `guard` still sees a live cache. Either order is safe. - // - // (The earlier AliveFlag was wrong because the flag itself had a destructor, so - // reading it after that destructor ran was UB -- the very bug it tried to avoid.) +namespace { + +/// \brief The one place the per-thread cache's lifetime is managed; shared by +/// CurrentLogger, GetCurrentLogger, and the ScopedLogger helpers. +/// +/// Safe to call from any thread_local destructor during teardown: `dead`/`cache` +/// are trivially destructible so their storage lives the whole thread, and `guard` +/// (the only one with a destructor) sets `dead` before freeing the cache -- so a +/// late caller gets nullptr instead of touching freed memory, in any order. +/// +/// \param create allocate the cache if absent (writers pass true; read-only +/// callers pass false so a query never allocates). Returns nullptr while tearing +/// down, or when !create and no cache exists -- caller falls back to global/noop. +ThreadCache* AccessThreadCache(bool create) noexcept { static thread_local bool dead = false; static thread_local ThreadCache* cache = nullptr; static thread_local struct Guard { @@ -131,15 +133,25 @@ const std::shared_ptr& CurrentLogger() noexcept { std::ignore = guard; // mark the thread_local as intentionally used (its dtor is // registered by reaching the declaration above) - if (dead) { + if (dead) return nullptr; + if (cache == nullptr && create) cache = new ThreadCache(); + return cache; +} + +} // namespace + +const std::shared_ptr& CurrentLogger() noexcept { + ThreadCache* cache = AccessThreadCache(/*create=*/true); + if (cache == nullptr) { // Thread teardown after the cache was freed: serve an immortal no-op so a log // from a later thread_local destructor is safe. Such teardown logs are dropped. static auto* fallback = new std::shared_ptr(Logger::Noop()); return *fallback; } - if (cache == nullptr) { - cache = new ThreadCache(); - } + + // A scoped override wins -- no lock, no gen load. After the cache check (deref is + // teardown-safe), before the refresh (keeps the override path cheapest). + if (cache->override_) return cache->override_; DefaultSlot& slot = Slot(); uint64_t current = slot.gen.load(std::memory_order_relaxed); @@ -151,6 +163,20 @@ const std::shared_ptr& CurrentLogger() noexcept { return cache->logger; } +std::shared_ptr ExchangeThreadOverride(std::shared_ptr next) noexcept { + ThreadCache* cache = AccessThreadCache(/*create=*/true); + if (cache == nullptr) return {}; // tearing down -> binding a scope is a no-op + std::shared_ptr prev = std::move(cache->override_); + cache->override_ = std::move(next); + return prev; // empty == no override was active +} + +void RestoreThreadOverride(std::shared_ptr prev) noexcept { + ThreadCache* cache = AccessThreadCache(/*create=*/false); // never allocate to restore + if (cache == nullptr) return; // dead or never created -> nothing to restore + cache->override_ = std::move(prev); +} + void Emit(Logger& logger, LogLevel level, const std::source_location& location, std::string&& message) { logger.Log(LogMessage{.level = level, @@ -171,4 +197,21 @@ void EmitFormatError(Logger& logger, LogLevel level, } // namespace internal +ScopedLogger::ScopedLogger(std::shared_ptr logger) noexcept + : previous_(internal::ExchangeThreadOverride(std::move(logger))) {} + +ScopedLogger::~ScopedLogger() { internal::RestoreThreadOverride(std::move(previous_)); } + +std::shared_ptr GetCurrentLogger() { + ThreadCache* cache = internal::AccessThreadCache(/*create=*/false); + if (cache == nullptr) { + // Teardown, or no per-thread cache ever created -> no override possible. + // GetDefaultLogger() touches only the immortal slot (no thread_local), so it + // stays valid during teardown, and is never null. + return GetDefaultLogger(); + } + if (cache->override_) return cache->override_; + return GetDefaultLogger(); +} + } // namespace iceberg diff --git a/src/iceberg/logging/logger.h b/src/iceberg/logging/logger.h index 75f9037c2..0b5cb31ef 100644 --- a/src/iceberg/logging/logger.h +++ b/src/iceberg/logging/logger.h @@ -187,6 +187,14 @@ class ICEBERG_EXPORT Logger { /// logging macros use the cheaper internal hot-path accessor instead. ICEBERG_EXPORT std::shared_ptr GetDefaultLogger(); +/// \brief Return the effective logger for this thread (never null): the active +/// ScopedLogger binding if any, else the global default. +/// +/// Off the hot path -- returns an owning copy, e.g. to capture the current logger +/// and re-bind it on a worker thread (see ScopedLogger). During teardown, prefer +/// the Log(...) overloads over emitting through this handle. +ICEBERG_EXPORT std::shared_ptr GetCurrentLogger(); + /// \brief Install a new process-global default logger. /// /// A null argument installs the no-op logger. Thread-safe; intended for @@ -200,6 +208,41 @@ ICEBERG_EXPORT void SetDefaultLogger(std::shared_ptr logger); /// means (this, SetLevel on a held handle, or Initialize) takes effect immediately. ICEBERG_EXPORT void SetDefaultLevel(LogLevel level); +/// \brief Bind a logger for the current thread until this object leaves scope. +/// +/// The default logging path on this thread -- CurrentLogger(), Log(level, ...), +/// and the LOG_* macros -- routes to \p logger instead of the global default; +/// explicit Log(logger, ...) is unaffected. Bindings nest and restore on exit, and +/// nullptr masks any enclosing binding back to the global default. Lets an engine +/// route Iceberg's own logs into a per catalog/session/query/task context with no +/// call-site changes. +/// +/// \code +/// auto query_log = std::make_shared(); +/// iceberg::ScopedLogger bind(query_log); // this thread, this scope +/// iceberg::Log(LogLevel::kInfo, "scan {}", id); // -> query_log +/// \endcode +/// +/// Stack-only and same-thread (non-copyable, non-movable). For thread pools, +/// capture on the submitting thread and re-bind on the worker: +/// \code +/// auto captured = iceberg::GetCurrentLogger(); +/// pool.submit([captured, work] { iceberg::ScopedLogger bind(captured); work(); }); +/// \endcode +class ICEBERG_EXPORT ScopedLogger { + public: + explicit ScopedLogger(std::shared_ptr logger) noexcept; + ~ScopedLogger(); + + ScopedLogger(const ScopedLogger&) = delete; + ScopedLogger& operator=(const ScopedLogger&) = delete; + ScopedLogger(ScopedLogger&&) = delete; + ScopedLogger& operator=(ScopedLogger&&) = delete; + + private: + std::shared_ptr previous_; +}; + // --------------------------------------------------------------------------- // Using the API directly (the LOG_* macros that wrap this are added later in // the stack). Example: a custom sink, installed as the process default. diff --git a/src/iceberg/test/logger_test.cc b/src/iceberg/test/logger_test.cc index a9f4d8dd0..95b270a82 100644 --- a/src/iceberg/test/logger_test.cc +++ b/src/iceberg/test/logger_test.cc @@ -188,6 +188,190 @@ TEST(LoggerTest, ConcurrentTeardownAndSwapIsSafe) { SUCCEED(); } +// --- Per-context routing: ScopedLogger + GetCurrentLogger --- + +TEST(LoggerTest, ScopedLoggerOverridesDefaultPath) { + auto global = std::make_shared(); + auto scoped = std::make_shared(); + ScopedDefaultLogger guard(global); + { + ScopedLogger bind(scoped); + EXPECT_EQ(internal::CurrentLogger().get(), scoped.get()); + Log(LogLevel::kInfo, "hi {}", 1); + } + EXPECT_EQ(scoped->count(), 1u); + EXPECT_EQ(global->count(), 0u); +} + +TEST(LoggerTest, ScopedLoggerRestoresOnScopeExit) { + auto global = std::make_shared(); + auto scoped = std::make_shared(); + ScopedDefaultLogger guard(global); + { + ScopedLogger bind(scoped); + } + EXPECT_EQ(internal::CurrentLogger().get(), global.get()); + Log(LogLevel::kInfo, "back"); + EXPECT_EQ(global->count(), 1u); + EXPECT_EQ(scoped->count(), 0u); +} + +TEST(LoggerTest, ExplicitLoggerBypassesOverride) { + auto scoped = std::make_shared(); + auto explicit_sink = std::make_shared(); + ScopedDefaultLogger guard(std::make_shared()); + ScopedLogger bind(scoped); + Log(*explicit_sink, LogLevel::kInfo, "e {}", 1); + EXPECT_EQ(explicit_sink->count(), 1u); + EXPECT_EQ(scoped->count(), 0u); +} + +TEST(LoggerTest, NestedScopedLoggersRestoreInLifo) { + auto global = std::make_shared(); + auto x = std::make_shared(); + auto y = std::make_shared(); + ScopedDefaultLogger guard(global); + { + ScopedLogger a(x); + EXPECT_EQ(internal::CurrentLogger().get(), x.get()); + { + ScopedLogger b(y); + EXPECT_EQ(internal::CurrentLogger().get(), y.get()); + } + EXPECT_EQ(internal::CurrentLogger().get(), x.get()); + } + EXPECT_EQ(internal::CurrentLogger().get(), global.get()); +} + +TEST(LoggerTest, ScopedLoggerNullMasksToGlobalDefault) { + auto global = std::make_shared(); + auto x = std::make_shared(); + ScopedDefaultLogger guard(global); + ScopedLogger a(x); + { + ScopedLogger mask(nullptr); + EXPECT_EQ(internal::CurrentLogger().get(), global.get()); // not x, not Noop + EXPECT_FALSE(internal::CurrentLogger()->IsNoop()); + } + EXPECT_EQ(internal::CurrentLogger().get(), x.get()); // enclosing binding restored +} + +TEST(LoggerTest, GetCurrentLoggerReturnsOverrideThenDefault) { + auto global = std::make_shared(); + auto scoped = std::make_shared(); + ScopedDefaultLogger guard(global); + EXPECT_EQ(GetCurrentLogger().get(), global.get()); + { + ScopedLogger bind(scoped); + EXPECT_EQ(GetCurrentLogger().get(), scoped.get()); + } + EXPECT_EQ(GetCurrentLogger().get(), global.get()); +} + +TEST(LoggerTest, GetCurrentLoggerOnFreshThreadReturnsDefault) { + auto global = std::make_shared(); + ScopedDefaultLogger guard(global); + Logger* seen = nullptr; + std::thread([&] { seen = GetCurrentLogger().get(); }).join(); // never used a scope + EXPECT_EQ(seen, global.get()); +} + +TEST(LoggerTest, ThreadPoolPropagationPattern) { + auto global = std::make_shared(); + auto scoped = std::make_shared(); + ScopedDefaultLogger guard(global); + ScopedLogger bind(scoped); + auto captured = GetCurrentLogger(); // capture the effective logger at "submit" + EXPECT_EQ(captured.get(), scoped.get()); + std::thread([captured] { + ScopedLogger rebind(captured); // re-bind on the worker thread + Log(LogLevel::kInfo, "task {}", 7); + }).join(); + EXPECT_EQ(scoped->count(), 1u); + EXPECT_EQ(global->count(), 0u); +} + +TEST(LoggerTest, WorkerOverrideDoesNotLeakAcrossTasksOnReusedThread) { + auto global = std::make_shared(); + auto o1 = std::make_shared(); + ScopedDefaultLogger guard(global); + Logger* during = nullptr; + Logger* between = nullptr; + std::thread([&] { + { + ScopedLogger b1(o1); + during = GetCurrentLogger().get(); + } + between = GetCurrentLogger().get(); // no scope active -> global default + }).join(); + EXPECT_EQ(during, o1.get()); + EXPECT_EQ(between, global.get()); +} + +// Binding/unbinding a ScopedLogger from a thread_local destructor that runs after +// the per-thread cache was freed (Probe constructed before CurrentLogger is first +// touched) must no-op against the dead cache, never touch freed memory. Run under +// ASan/TSan in CI for full signal. +TEST(LoggerTest, ScopedLoggerBindUnbindDuringTeardownIsSafe) { + std::thread([] { + struct Probe { + ~Probe() { + ScopedLogger late(std::make_shared()); // ctor: no-op on dead + const auto& l = internal::CurrentLogger(); // -> Noop fallback + if (l) l->ShouldLog(LogLevel::kError); + } // ~ScopedLogger: restore is a no-op on dead + }; + static thread_local Probe probe; + std::ignore = probe; // constructed first + std::ignore = internal::CurrentLogger(); // cache created after -> freed first + }).join(); + SUCCEED(); +} + +// The override path deliberately skips the generation refresh, but a swap that +// happened while an override was active must still be observed on the first call +// after the override is popped (gen is monotonic). +TEST(LoggerTest, OverrideActiveSkipsGenRefreshButSwapStillSeenAfterPop) { + auto a = std::make_shared(); + auto b = std::make_shared(); + auto c = std::make_shared(); + ScopedDefaultLogger guard(a); + EXPECT_EQ(internal::CurrentLogger().get(), a.get()); + { + ScopedLogger bind(b); + SetDefaultLogger(c); // bump gen while the override is active + EXPECT_EQ(internal::CurrentLogger().get(), b.get()); // override wins + } + EXPECT_EQ(internal::CurrentLogger().get(), c.get()); // swap seen after pop +} + +// Many short-lived threads each bind a ScopedLogger and tear down while another +// thread swaps the global default. Run under ASan/TSan in CI. +TEST(LoggerTest, ConcurrentOverrideTeardownAndSwapIsSafe) { + auto a = std::make_shared(); + ScopedDefaultLogger guard(a); + std::atomic stop{false}; + std::thread swapper([&] { + auto b = std::make_shared(); + while (!stop.load(std::memory_order_relaxed)) { + SetDefaultLogger(b); + SetDefaultLogger(a); + } + }); + for (int i = 0; i < 100; ++i) { + std::thread worker([] { + auto local = std::make_shared(); + ScopedLogger bind(local); + const auto& l = internal::CurrentLogger(); + if (l) l->ShouldLog(LogLevel::kError); + }); + worker.join(); + } + stop.store(true, std::memory_order_relaxed); + swapper.join(); + SUCCEED(); +} + // --- Function-style API (non-macro): overloaded Log() --- TEST(LoggerTest, LogToExplicitLoggerFormats) {