본문으로 건너뛰기 [2026] C++ Regular Expressions — Complete std::regex Guide

[2026] C++ Regular Expressions — Complete std::regex Guide

[2026] C++ Regular Expressions — Complete std::regex Guide

이 글의 핵심

Core concepts and practical tips for C++ regular expressions, from basic matching to iterators and common pitfalls.

Basic usage

#include <regex>
#include <iostream>
using namespace std;

int main() {
    regex pattern("\\d+");  // digit pattern
    
    string text = "abc123def456";
    
    // search
    if (regex_search(text, pattern)) {
        cout << "found digits" << endl;
    }
}
regex pattern("\\d+");

string s1 = "123";
string s2 = "abc123";

// regex_match: whole string must match
cout << regex_match(s1, pattern) << endl;  // 1 (true)
cout << regex_match(s2, pattern) << endl;  // 0 (false)

// regex_search: substring match
cout << regex_search(s1, pattern) << endl;  // 1
cout << regex_search(s2, pattern) << endl;  // 1

Capture groups

regex pattern("(\\d{3})-(\\d{4})-(\\d{4})");
string phone = "010-1234-5678";

smatch match;
if (regex_match(phone, match, pattern)) {
    cout << "full: " << match[0] << endl;   // 010-1234-5678
    cout << "part1: " << match[1] << endl;  // 010
    cout << "part2: " << match[2] << endl;  // 1234
    cout << "part3: " << match[3] << endl;  // 5678
}

Practical examples

Example 1: Email validation

bool isValidEmail(const string& email) {
    regex pattern(R"(^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$)");
    return regex_match(email, pattern);
}

int main() {
    cout << isValidEmail("[email protected]") << endl;  // 1
    cout << isValidEmail("invalid.email") << endl;     // 0
}

Example 2: URL parsing

struct URL {
    string protocol;
    string host;
    string port;
    string path;
};

URL parseURL(const string& url) {
    regex pattern(R"(^(\w+)://([^:/]+)(?::(\d+))?(/.*)?$)");
    smatch match;
    
    if (regex_match(url, match, pattern)) {
        return {
            match[1],  // protocol
            match[2],  // host
            match[3],  // port
            match[4]   // path
        };
    }
    
    return {};
}

int main() {
    auto url = parseURL("https://example.com:8080/path/to/page");
    
    cout << "protocol: " << url.protocol << endl;
    cout << "host: " << url.host << endl;
    cout << "port: " << url.port << endl;
    cout << "path: " << url.path << endl;
}

Example 3: String replacement

#include <regex>

int main() {
    string text = "Hello World, Hello C++";
    regex pattern("Hello");
    
    // replace all
    string result = regex_replace(text, pattern, "Hi");
    cout << result << endl;  // Hi World, Hi C++
    
    // first occurrence only
    result = regex_replace(text, pattern, "Hi", regex_constants::format_first_only);
    cout << result << endl;  // Hi World, Hello C++
}

Example 4: Log parsing

struct LogEntry {
    string timestamp;
    string level;
    string message;
};

vector<LogEntry> parseLog(const string& log) {
    vector<LogEntry> entries;
    
    regex pattern(R"(\[([\d\-: ]+)\] \[(\w+)\] (.+))");
    
    istringstream iss(log);
    string line;
    
    while (getline(iss, line)) {
        smatch match;
        if (regex_match(line, match, pattern)) {
            entries.push_back({
                match[1],  // timestamp
                match[2],  // level
                match[3]   // message
            });
        }
    }
    
    return entries;
}

int main() {
    string log = R"([2026-03-11 10:30:00] [INFO] server started
[2026-03-11 10:30:05] [ERROR] connection failed
[2026-03-11 10:30:10] [WARN] retrying)";
    
    auto entries = parseLog(log);
    
    for (const auto& entry : entries) {
        cout << entry.timestamp << " | " 
             << entry.level << " | " 
             << entry.message << endl;
    }
}

Iterators

// declare and initialize
string text = "abc123def456ghi789";
regex pattern("\\d+");

// find every match
sregex_iterator it(text.begin(), text.end(), pattern);
sregex_iterator end;

while (it != end) {
    cout << it->str() << endl;  // 123, 456, 789
    ++it;
}

Tokenization

string text = "apple,banana,cherry";
regex delimiter(",");

// token iterator
sregex_token_iterator it(text.begin(), text.end(), delimiter, -1);
sregex_token_iterator end;

while (it != end) {
    cout << *it << endl;  // apple, banana, cherry
    ++it;
}

Common pitfalls

Pitfall 1: Escaping

Typical C/C++ string literal issue:

// wrong: insufficient escaping
regex pattern("\d+");  // \d is not a regex escape as intended

// correct: double backslash
regex pattern("\\d+");

// raw string (recommended)
regex pattern(R"(\d+)");

Pitfall 2: Performance

// bad: construct regex every iteration
for (const string& text : texts) {
    regex pattern("\\d+");  // wasteful
    regex_search(text, pattern);
}

// good: reuse one regex
regex pattern("\\d+");
for (const string& text : texts) {
    regex_search(text, pattern);
}

Pitfall 3: Greedy matching

Typical C/C++ example:

string html = "<div>content</div>";

// greedy
regex greedy("<.*>");
// matches: <div>content</div> (whole string)

// non-greedy
regex nonGreedy("<.*?>");
// matches: <div>, </div> separately

Regex syntax (ECMAScript-style overview)

Illustrative C/C++ comments for common constructs:

// character classes
\d  // digit [0-9]
\w  // word [a-zA-Z0-9_]
\s  // whitespace
.   // any character (except newline, depending on flags)

// quantifiers
*   // zero or more
+   // one or more
?   // zero or one
{n} // exactly n
{n,m}  // between n and m

// anchors
^   // start
$   // end
\b  // word boundary

// groups
()  // capturing group
(?:)  // non-capturing group

FAQ

Q1: When should I use regular expressions?

A:

  • String validation
  • Parsing
  • Search and replace
  • Data extraction

Q2: What about performance?

A: Complex patterns can be slow. For very simple checks, string member functions may be faster.

Q3: Why use raw string literals?

A: R"(...)" avoids manual backslash escaping in the pattern text.

Q4: ECMAScript vs POSIX?

A: The default is ECMAScript; you can change the grammar with regex_constants when constructing std::regex.

Q5: How do I debug regex?

A:

Q6: Where can I learn more?

A:


Posts that connect well with this topic:

  • C++ std::regex | Regular expressions in C++11
  • C++ Adapter pattern | Interface adaptation and compatibility
  • C++ string vs string_view | Comparison
  • C++ CRTP | Static polymorphism and compile-time optimization
  • C++ Command pattern | Undo and macro systems