C++ thread_local | Thread-Local Storage (TLS) Guide
이 글의 핵심
Practical guide to C++ thread_local: basics, examples, and pitfalls.
Introduction
C++11 thread_local gives each thread independent storage, which helps you write thread-safe code without synchronizing every access. You can manage per-thread data in multi-threaded programs without locks.
1. thread_local basics
Concept
#include <thread>
#include <iostream>
thread_local int counter = 0;
void func() {
counter++;
std::cout << "Thread " << std::this_thread::get_id()
<< ": " << counter << std::endl;
}
int main() {
std::thread t1(func);
std::thread t2(func);
t1.join();
t2.join();
}
Basic usage
#include <thread>
#include <iostream>
thread_local int x = 0;
void worker() {
x++;
std::cout << "Thread " << std::this_thread::get_id()
<< ": " << x << std::endl;
}
int main() {
std::thread t1(worker);
std::thread t2(worker);
t1.join();
t2.join();
}
2. Practical examples
Example 1: Per-thread request counter
#include <thread>
#include <vector>
#include <iostream>
thread_local size_t requestCount = 0;
void handleRequest() {
requestCount++;
std::cout << "Thread " << std::this_thread::get_id()
<< " requests: " << requestCount << std::endl;
}
int main() {
std::vector<std::thread> threads;
for (int i = 0; i < 5; i++) {
threads.emplace_back([] {
for (int j = 0; j < 3; j++) {
handleRequest();
}
});
}
for (auto& t : threads) {
t.join();
}
}
Example 2: Per-thread buffer
#include <thread>
#include <vector>
#include <iostream>
thread_local std::vector<int> buffer;
void flush(const std::vector<int>& buf) {
std::cout << "Flush: " << buf.size() << " items" << std::endl;
}
void process(int value) {
buffer.push_back(value);
if (buffer.size() >= 100) {
flush(buffer);
buffer.clear();
}
}
int main() {
std::thread t1([] {
for (int i = 0; i < 150; i++) {
process(i);
}
});
t1.join();
}
Example 3: Random number generator
#include <random>
#include <thread>
#include <iostream>
thread_local std::mt19937 rng(std::random_device{}());
int getRandomNumber() {
std::uniform_int_distribution<int> dist(1, 100);
return dist(rng);
}
int main() {
std::thread t1([] {
for (int i = 0; i < 5; i++) {
std::cout << "Thread 1: " << getRandomNumber() << std::endl;
}
});
std::thread t2([] {
for (int i = 0; i < 5; i++) {
std::cout << "Thread 2: " << getRandomNumber() << std::endl;
}
});
t1.join();
t2.join();
}
3. Initialization
At thread start
#include <thread>
#include <iostream>
thread_local int x = 10;
void worker() {
std::cout << "x = " << x << std::endl;
}
int main() {
std::thread t1(worker);
std::thread t2(worker);
t1.join();
t2.join();
}
First use
#include <thread>
#include <iostream>
int compute() {
std::cout << "compute() called" << std::endl;
return 42;
}
void func() {
thread_local int y = compute();
std::cout << "y = " << y << std::endl;
}
int main() {
std::thread t1([] {
func();
func();
});
t1.join();
}
4. Common problems
Problem 1: Destruction order
#include <thread>
#include <iostream>
struct Resource {
~Resource() {
std::cout << "Resource destroyed" << std::endl;
}
};
thread_local Resource r;
void func() {
std::cout << "func() running" << std::endl;
}
int main() {
std::thread t1(func);
t1.join();
}
Problem 2: Class static members
#include <iostream>
class MyClass {
public:
static thread_local int x;
};
thread_local int MyClass::x = 0;
int main() {
MyClass::x = 42;
std::cout << MyClass::x << std::endl; // 42
}
Problem 3: Initialization cost
#include <memory>
#include <iostream>
struct ExpensiveObject {
ExpensiveObject() {
std::cout << "ExpensiveObject constructed" << std::endl;
}
};
thread_local std::unique_ptr<ExpensiveObject> obj;
void func() {
if (!obj) {
obj = std::make_unique<ExpensiveObject>();
}
}
int main() {
func();
func();
}
Problem 4: Memory usage
#include <vector>
#include <thread>
#include <iostream>
thread_local std::vector<int> largeBuffer(1000000);
void worker() {
std::cout << "Buffer size: " << largeBuffer.size() << std::endl;
}
int main() {
std::thread t1(worker);
std::thread t2(worker);
t1.join();
t2.join();
}
5. Usage patterns
Pattern 1: Per-thread cache
#include <unordered_map>
#include <string>
thread_local std::unordered_map<std::string, int> cache;
int getValue(const std::string& key) {
if (cache.find(key) != cache.end()) {
return cache[key];
}
int value = computeValue(key);
cache[key] = value;
return value;
}
Pattern 2: Per-thread statistics
#include <iostream>
struct Statistics {
size_t count = 0;
size_t errors = 0;
void print() {
std::cout << "Count: " << count << ", Errors: " << errors << std::endl;
}
};
thread_local Statistics stats;
void processRequest() {
stats.count++;
}
Summary
Key points
- thread_local: independent variable per thread
- Initialization: at thread start or first use
- Uses: caches, stats, RNGs
- Performance: fast access; initialization cost exists
- Memory: scales with thread count × variable size
thread_local vs global
| Aspect | thread_local | Global |
|---|---|---|
| Thread safety | Yes (per thread) | No (needs sync) |
| Synchronization | Not for same thread | Often required |
| Memory | Per thread | Single instance |
| Performance | Fast reads | Can be slow with locks |
Practical tips
- Use for per-thread caches
- Prefer thread_local for RNGs
- Mind initialization cost
- Watch total memory with many threads
Next steps
- C++ jthread
- C++ random_device
- C++ mutex
Related posts
- C++ async & launch