字符串的查找与替换一直是C++的若是,运用Boost这个准标准库,将可以很好的弥补C++的不足,使针对字符串的操作更加容易。
字符串格式转换:
using namespace std; | |
using namespace boost; | |
int main(int argc, char * argv[]) | |
{ | |
string str[3] = { "100", "102", "3.14159" }; | |
// 字符串转换为数值类型 | |
std::cout << "字符串转为整数: " << lexical_cast<int>(str[0]) << std::endl; | |
std::cout << "字符串转为长整数: " << lexical_cast<long>(str[1]) << std::endl; | |
std::cout << "字符串转为浮点数: " << lexical_cast<float>(str[2]) << std::endl; | |
// 数值类型转化为字符串 | |
std::cout << "数值转为字符串: " << lexical_cast<string>(100) << std::endl; | |
std::cout << "十六进制转为十进制: " << lexical_cast<string>(0x10) << std::endl; | |
system("pause"); | |
return 0; | |
} |
format格式化:
using namespace std; | |
using namespace boost; | |
int main(int argc, char * argv[]) | |
{ | |
// 第一种输出方式: 直接填充字符串 | |
boost::format fmtA("姓名: %s -> 年龄: %d -> 性别: %s"); | |
fmtA %"lyshark"; | |
fmtA % 22; | |
fmtA %"男"; | |
std::string str = fmtA.str(); | |
std::cout << "格式化后: " << str << std::endl; | |
// 第二种方式: 拷贝的使用 | |
boost::format fmtB("姓名: %s -> 年龄: %d -> 性别: %s"); | |
cout << format(fmtB) % "lyshark" % 23 % "男" << std::endl; | |
system("pause"); | |
return 0; | |
} |
string_algo 大小写转化函数
using namespace std; | |
using namespace boost; | |
int main(int argc, char * argv[]) | |
{ | |
string str("readme.log"); | |
if (ends_with(str, "log")) | |
{ | |
cout << "转化为大写: " << to_upper_copy(str) << endl; | |
cout << "转化为小写: " << to_lower_copy(str) << endl; | |
} | |
// 替换开头 | |
replace_first(str, "readme", "lyshark"); | |
std::cout << "替换开头: " << str << std::endl; | |
// 执行删除后缀 | |
vector<char> v(str.begin(), str.end()); | |
vector<char> v2 = to_upper_copy(erase_first_copy(v, ".log")); | |
for (auto x : v2) | |
{ | |
cout << x ; | |
} | |
system("pause"); | |
return 0; | |
} |
判断类
using namespace std; | |
using namespace boost; | |
int main(int argc, char * argv[]) | |
{ | |
string str[6] = { "hello lyshark", "hello LyShark", "ABC", "ABC", "FCE" ,"lyshark"}; | |
cout << "大小写不敏感判断后缀: " << iends_with(str[0], "lyshark") << endl; | |
cout << "大小写敏感判断前缀: " << starts_with(str[1], "Hello") << endl; | |
cout << "测试包含关系: " << contains(str[0], str[5]) << endl; | |
cout << "测试前5个字母是否为小写: " << all(str[0].substr(0, 5), is_lower()) << endl; | |
is_any_of() | |
system("pause"); | |
return 0; | |
} |
修正
using namespace std; | |
using namespace boost; | |
int main(int argc, char * argv[]) | |
{ | |
string str[3] = { "hello LyShark", "hello LyShark", "lyshark" }; | |
cout << "大小写不敏感判断后缀: " << iends_with(str[0], "lyshark") << endl; | |
cout << "大小写敏感判断前缀: " << starts_with(str[1], "Hello") << endl; | |
cout << "测试包含关系: " << contains(str[0], str[2]) << endl; | |
cout << "前5个字母是否为小写: " << all(str[0].substr(0, 5), is_lower()) << endl; | |
cout << "前2个字符是否为字母: " << all(str[1].substr(0,2), is_alpha()) << endl; | |
// 字符串修剪 去空格 | |
boost::format fmt("|%s|\n"); | |
std::string my_string = " hello lyshark "; | |
cout << "删除两端空格: " << fmt %trim_copy(my_string) << endl; | |
cout << "删除左端空格: " << fmt %trim_left_copy(my_string) << endl; | |
trim_right(my_string); | |
cout << "原地删除右端空格: " << fmt %my_string << endl; | |
// 字符串修建 去特殊符号 | |
std::string my_stringa = "2021 happy new Year !!!"; | |
cout << "删除左端数字: " << fmt %trim_left_copy_if(my_stringa, is_digit()) << endl; | |
cout << "删除右端标点: " << fmt %trim_right_copy_if(my_stringa, is_punct()) << endl; | |
cout << "删除两端(标点|数字|空格): " << trim_copy_if(my_stringa, is_punct() || is_digit() || is_space()) << endl; | |
system("pause"); | |
return 0; | |
} |
字符串查找
using namespace std; | |
using namespace boost; | |
int main(int argc, char const *argv[]) | |
{ | |
boost::format fmt("|%s|. pos = %d\n"); | |
std::string my_string = "Long long ago, there was Ago king as long."; | |
iterator_range<std::string::iterator> reg; // 定义迭代区间 | |
// 寻找第一次出现的位置(大小写敏感) | |
reg = find_first(my_string, "Ago"); | |
cout << fmt %reg % (reg.begin() - my_string.begin()) << endl; | |
// 寻找最后一次出现的位置(大小写不敏感) | |
reg = ifind_last(my_string, "ago"); | |
cout << fmt %reg % (reg.begin() - my_string.begin()) << endl; | |
// 寻找第三次出现long的位置(大小写不敏感) | |
reg = ifind_nth(my_string, "long", 2); | |
cout << fmt %reg % (reg.begin() - my_string.begin()) << endl; | |
// 取前四个字符和后四个字符 | |
reg = find_head(my_string, 4); | |
cout << fmt %reg % (reg.begin() - my_string.begin()) << endl; | |
reg = find_tail(my_string, 4); | |
cout << fmt %reg % (reg.begin() - my_string.begin()) << endl; | |
// 找不到则输出 | |
reg = find_first(my_string, "lyshark"); | |
cout << "flag = " << (reg.empty() && !reg) << endl; | |
getchar(); | |
return 0; | |
} |
替换与删除 注意带有_copy的为拷贝,代表可以使用string变量来接收,不带的直接操作原始字符串。
using namespace std; | |
using namespace boost; | |
int main(int argc, char const *argv[]) | |
{ | |
boost::format fmt("|%s|. pos = %d\n"); | |
std::string my_string = "Long long ago, there was Ago king as long."; | |
// 替换开头字符串(两种替换方式) | |
std::string str_copy = replace_first_copy(my_string, "long", "LONG"); | |
cout << "替换后返回字符串: " << str_copy << endl; | |
replace_first(my_string, "ago", "AGO"); | |
cout << "直接替换在原始字符串上: " << my_string << endl; | |
// 替换开头或结尾前后5个字符 | |
replace_tail(my_string, 5, "lyshark"); | |
cout << "替换结尾5个字符: " << my_string << endl; | |
replace_head(my_string, 5, "lyshark"); | |
cout << "替换开头5个字符: " << my_string << endl; | |
// 替换第一次出现long的位置为AGES | |
replace_nth(my_string, "long", 0, "AGES"); | |
cout << "第一次出现位置: " << my_string << endl; | |
// 替换所有出现过的位置 | |
std::string str_copy_a = replace_all_copy(my_string, "lyshark", "LYSSHARK"); | |
cout << "替换所有位置: " << str_copy_a << endl; | |
// 删除第1次出现was位置的字符串 | |
std::string del_str_copy = erase_nth_copy(my_string, "was", 0); | |
cout << "删除后的字符串: " << del_str_copy << endl; | |
// 删除字符串中所有的LYSSHARK | |
erase_all(my_string, "LYSSHARK"); | |
cout << "删除后的字符串: " << my_string << endl; | |
getchar(); | |
return 0; | |
} |
切割合并字符串:
using namespace std; | |
using namespace boost; | |
int main(int argc, char const *argv[]) | |
{ | |
std::string my_string = "lyshark,Link.Zelda:Mario-Ligui+zelda,ZELDA"; | |
std::string my_string_b("hello||lyshark||welcome||link"); | |
// 查找字符串中的特定字符串 | |
deque<std::string> deq; | |
ifind_all(deq, my_string, "zelda"); | |
cout << "查找字符串个数(不区分大小写): " << deq.size() << endl; | |
if (deq.size() == 3) | |
{ | |
for (auto each : deq) | |
cout << "[ " << each << " ]" << endl; | |
} | |
// 切割字符串(1) | |
list<iterator_range<std::string::iterator>> ptr; | |
split(ptr, my_string, is_any_of(",.:-+")); | |
for (auto each : ptr) | |
cout << "[ " << each << " ]" << endl; | |
// 切割字符串(2) | |
ptr.clear(); | |
split(ptr, my_string, is_any_of(".:-"), token_compress_on); | |
for (auto each : ptr) | |
cout << "[ " << each << " ]" << endl; | |
// 合并字符串 | |
std::vector<string> vct; | |
vct.push_back("hello"); | |
vct.push_back("lyshark"); | |
cout << "用空格将其连接起来: " << join(vct, " ") << endl; | |
// 查找迭代器的使用 | |
typedef find_iterator<string::iterator> string_find_iterator; | |
string_find_iterator pos, end; | |
for (pos = make_find_iterator(my_string_b, first_finder("lyshark", is_iequal())); pos != end; ++pos) | |
{ | |
cout << "[ " << *pos << " ]" << endl; | |
} | |
// 分割迭代器的使用 | |
typedef split_iterator<string::iterator> string_split_iterator; | |
string_split_iterator p, endp; | |
for (p = make_split_iterator(my_string_b, first_finder("||", is_iequal())); p != endp; ++p) | |
{ | |
cout << "[ " << *p << " ]" << endl; | |
} | |
getchar(); | |
return 0; | |
} |
正则模块的使用:
正则匹配
using namespace std; | |
using namespace boost; | |
int main(int argc, char const *argv[]) | |
{ | |
using namespace boost::xpressive; | |
// 简单匹配字符串 | |
cregex regxA = cregex::compile("a.c"); | |
cout << "匹配字符串: " << regex_match("abd", regxA) << endl; | |
std::string StringA = "hello lyshark a.c"; | |
cregex regxD = cregex::compile("a.c"); | |
cout << "匹配字符串: " << regex_match((char *)&StringA, regxD) << endl; | |
// 数组方式引用正则 | |
cregex_compiler regc; | |
regc["regxA"] = regc.compile("a|b|c"); | |
regc["regxB"] = regc.compile("\\d*"); | |
cout << regex_match("abcdefg", regc["regxA"]) << endl; | |
cout << regex_match("123123", regc["regxB"]) << endl; | |
// 使用C++ 11 匹配身份证 | |
cregex regxB = cregex::compile(R"---(\d{6}(1|2)\d{3}(0|1)\d[0-3]\d\d{3}(X|\d))---", icase); | |
cout << "验证身份证: " << regex_match("513436200002247099", regxB) << endl; | |
// 使用C++ 98 匹配身份证 | |
cregex regxC = cregex::compile("\\d{6}((1|2)\\d{3})((0|1)\\d)([0-3]\\d)(\\d{3}(X|\\d))", icase); | |
cmatch what; | |
regex_match("513436200002247099", what, regxC); | |
for (auto &each : what) | |
{ | |
cout << "[ " << each << " ]" << endl; | |
} | |
cout << "生日为: " << what[1] << what[3] << what[5] << endl; | |
// 正则匹配开头结尾 | |
string StringB("lyshark.log"); | |
sregex start_regx = sregex::compile("^ly.*"); | |
cout << "匹配开头: " << regex_match(StringB, start_regx) << endl; | |
sregex end_regx = sregex::compile(".*log$"); | |
cout << "匹配结尾: " << regex_match(StringB, end_regx) << endl; | |
getchar(); | |
return 0; | |
} |
正则查找替换
using namespace std; | |
using namespace boost; | |
int main(int argc, char const *argv[]) | |
{ | |
using namespace boost::xpressive; | |
// 正则查找特定字符串 | |
char my_stringA[] = "This is power-studio territory"; | |
cregex regx = cregex::compile("(power)-(.{6})", icase); | |
cmatch what; | |
regex_search(my_stringA, what, regx); | |
if (what.size() != 0) | |
{ | |
for (int x = 0; x < what.size(); x++) | |
{ | |
cout << what[x] << endl; | |
} | |
} | |
// 正则替换特定字符串 | |
std::string my_stringB = "2020 Happy New Year !!!"; | |
sregex regxA = sregex::compile("^(\\d| )*"); // 匹配开头数字 | |
sregex regxB = sregex::compile("!*$"); // 匹配末尾标点符号 | |
cout << regex_replace(my_stringB, regxA, "2021") << endl; | |
cout << regex_replace(my_stringB, regxB, "") << endl; | |
getchar(); | |
return 0; | |
} |
正则迭代与分词
using namespace std; | |
using namespace boost; | |
int main(int argc, char const *argv[]) | |
{ | |
using namespace boost::xpressive; | |
// 正则迭代输出 | |
std::string my_string_a = "power-shell, power-studio,power-engine,super-user"; | |
sregex regxA = sregex::compile("power-(\\w{5})", icase); | |
sregex_iterator start_ptr(my_string_a.begin(), my_string_a.end(), regxA); | |
sregex_iterator end_ptr; | |
for (; start_ptr != end_ptr; ++start_ptr) | |
{ | |
cout << "[ " << (*start_ptr)[0] << " ]" << endl; | |
} | |
// 正则分词 | |
char my_string_b[] = "*lyshark*||+administrator+||root!!||metaper"; | |
cregex regxB = cregex::compile("\\w+", icase); // 过滤出所有的字母 | |
cregex_token_iterator pos(my_string_b, my_string_b + strlen(my_string_b), regxB); | |
for (; pos != cregex_token_iterator(); ++pos) | |
{ | |
cout << "[ " << *pos << " ]" << endl; | |
} | |
// 正则切割 | |
cregex split_regx = cregex::compile("\\|\\|"); | |
pos = cregex_token_iterator(my_string_b, my_string_b + strlen(my_string_b), split_regx, -1); | |
for (; pos != cregex_token_iterator(); ++pos) | |
{ | |
cout << "[ " << *pos << " ]" << endl; | |
} | |
// 正则格式化(小写转大写) | |
struct formater | |
{ | |
string operator()(cmatch const &m) const | |
{ | |
return boost::to_upper_copy(m[0].str()); | |
} | |
}; | |
char my_string_c[] = "*lyshark*||+administrator+||root!!||metaper"; | |
cregex regxC = cregex::compile("\\w+", icase); | |
cout << regex_replace(my_string_c, regxC, formater()) << endl; | |
getchar(); | |
return 0; | |
} |
分词器使用:
using namespace std; | |
using namespace boost; | |
int main(int argc, char const *argv[]) | |
{ | |
std::string strTag = "explorer.exe,1024"; | |
// 定义分割符号为逗号与空格 定义分词器 | |
boost::char_separator<char> sep(", "); | |
typedef boost::tokenizer<boost::char_separator<char>> CustonTokenizer; | |
CustonTokenizer tok(strTag, sep); | |
// 将分词结果放入vector链表 | |
std::vector<std::string> vecSegTag; | |
for (CustonTokenizer::iterator beg = tok.begin(); beg != tok.end(); ++beg) | |
{ | |
vecSegTag.push_back(*beg); | |
} | |
// const_case 将string转换为char 8 | |
std::string ref_string = const_cast<char *>(vecSegTag[0].c_str()); | |
cout <<"进程名: " << ref_string << endl; | |
cout << "分词数: " << vecSegTag.size() << endl; | |
getchar(); | |
return 0; | |
} |