Compare string performance using C#,C++(STL,boost) and C - part 3, results
Task:
you have array of strings "word1-word2", "word3-word4", "word5-word6", you need to transform it to string "word1:word2:word3:word4:word5:word6"
I use something that I call "idiomatic C" and 4 different "idiomatic" C++ solutions:
1. Using "Boost String Algorithms Library".
2. Using "select_many" kind of function.
3. Using Boost Tokenizer
4. Using "naive" С++ with std::strings.
C code:
char* do_the_job_in_plain_c(char** array, size_t size, char c1,char c2)
{
size_t desc_size=size;
for(size_t i=0;i<size;++i)
desc_size+=strlen(array[i]);
char* res_str = new char[desc_size];
char* res_str_t = res_str;
for(size_t i=0;i<size;++i,*res_str_t++ = c2){
char* p = array[i];
do{
*res_str_t++ = (*p == c1)?c2:*p;
}while(*(++p));
}
res_str[desc_size-1] = 0;
return res_str;
}
Using "Boost String Algorithms Library".
std::string result;
typedef std::vector< std::string > split_vector_type;
split_vector_type split_vec_res;
for(std::vector<std::string>::const_iterator v = w.begin();v!=w.end();++v){
split_vector_type split_vec;
boost::split( split_vec, *v, boost::is_any_of("-") );
std::copy(split_vec.begin(),split_vec.end(),std::back_inserter(split_vec_res));
}
std::string res = boost::join(split_vec_res,":");
Using "select_many" kind of function
std::string res = join_string(select_many(w,boost::bind(split_string,_1,("-"))),":");
Using Boost Tokenizer
std::string result;
for(std::vector<std::string>::const_iterator v = w.begin();v!=w.end();++v){
boost::char_separator<char> sep("-");
tokenizer tok(*v,sep);
for (tokenizer::iterator tok_iter = tok.begin();tok_iter != tok.end(); ++tok_iter){
if(!result.empty())
result+=":";
result+=*tok_iter;
}
}
Using "naive" С++ with std::strings
std::string result;
size_t length = w.size()+1;
for(std::vector<std::string>::const_iterator v = w.begin();v!=w.end();++v)
length+=v->length();
result.reserve(length);
for(std::vector<std::string>::const_iterator v = w.begin();v!=w.end();++v){
const std::vector<std::string>& w1 = split_string(*v,"-");
for(std::vector<std::string>::const_iterator v1 = w1.begin();v1!=w1.end();++v1){
if(!result.empty())
result+=":";
result+=*v1;
}
}
Whole programm code:
#include "stdafx.h"
#include <string>
#include <algorithm>
#include <boost/tokenizer.hpp>
#include <boost/algorithm/string.hpp>
#include <boost/bind.hpp>
#define NUM_ITERATIONS 10000
class HiPerfTimer
{
public:
void start()
{
QueryPerformanceFrequency(&freq);
QueryPerformanceCounter(&begin);
}
void stop(){QueryPerformanceCounter(&end);}
double duration(){return (double)(end.QuadPart-begin.QuadPart)/freq.QuadPart;}
protected:
LARGE_INTEGER begin,end,freq;
};
template<class _container_type,class _Fn1> inline
_container_type select_many(const _container_type& container, _Fn1& _Func)
{
_container_type result;
for(_container_type::const_iterator v = container.begin();v!=container.end();++v)
{
_container_type tmp = _Func(*v);
for(_container_type::const_iterator v1 = tmp.begin();v1!=tmp.end();++v1)
result.push_back(*v1);
}
return result;
}
std::vector<std::string> split_string (const std::string& str, const std::string& delimiters)
{
std::vector<std::string> v;
size_t offset = 0;
while(true)
{
size_t token_start = str.find_first_not_of(delimiters, offset);
if (token_start == std::string::npos)
{
v.push_back( str.substr(token_start,str.length() - token_start));
break;
}
size_t token_end = str.find_first_of(delimiters, token_start);
if (token_end == std::string::npos)
{
v.push_back(str.substr(token_start));
break;
}
v.push_back(str.substr(token_start, token_end - token_start));
offset = token_end;
}
return v;
}
std::string join_string (const std::vector<std::string>& v,const std::string& delimiters)
{
std::string s;
size_t reserve = 0;
for(std::vector<std::string>::const_iterator the_str = v.begin();the_str!=v.end();++the_str)
reserve+=the_str->length();
reserve+=delimiters.length()*v.size();
s.reserve(reserve);
for(std::vector<std::string>::const_iterator the_str = v.begin();the_str!=v.end();++the_str)
{
if(!s.empty())
s+=delimiters;
s+=*the_str;
}
return s;
}
char* do_the_job_in_plain_c(char** array, size_t size, char c1,char c2)
{
size_t desc_size=size;
for(size_t i=0;i<size;++i)
desc_size+=strlen(array[i]);
char* res_str = new char[desc_size];
char* res_str_t = res_str;
for(size_t i=0;i<size;++i,*res_str_t++ = c2)
{
char* p = array[i];
do
{
*res_str_t++ = (*p == c1)?c2:*p;
}while(*(++p));
}
res_str[desc_size-1] = 0;
return res_str;
}
void test_select_many(std::vector<std::string>& w){
HiPerfTimer pt;
pt.start();
for (int i = 0; i < NUM_ITERATIONS; ++i)
std::string res = join_string(select_many(w,bind(split_string,_1,("-"))),":");
pt.stop();
printf("select_many- %f sec\n",pt.duration());
}
void test_boost_string_algorithm(std::vector<std::string>& w){
HiPerfTimer pt;
pt.start();
for (int i = 0; i < NUM_ITERATIONS; ++i)
{
std::string result;
typedef std::vector< std::string > split_vector_type;
split_vector_type split_vec_res;
for(std::vector<std::string>::const_iterator v = w.begin();v!=w.end();++v){
split_vector_type split_vec;
boost::split( split_vec, *v, boost::is_any_of("-") );
std::copy(split_vec.begin(),split_vec.end(),std::back_inserter(split_vec_res));
}
std::string res = boost::join(split_vec_res,":");
}
pt.stop();
printf("Boost String Algorithms Library - %f sec\n",pt.duration());
}
void test_boost_tokenizer(std::vector<std::string>& w)
{
HiPerfTimer pt;
pt.start();
typedef boost::tokenizer<boost::char_separator<char> > tokenizer;
for (int i = 0; i < NUM_ITERATIONS; ++i) {
std::string result;
for(std::vector<std::string>::const_iterator v = w.begin();v!=w.end();++v){
boost::char_separator<char> sep("-");
tokenizer tok(*v,sep);
for (tokenizer::iterator tok_iter = tok.begin();tok_iter != tok.end(); ++tok_iter){
if(!result.empty())
result+=":";
result+=*tok_iter;
}
}
}
pt.stop();
printf("Boost Tokenizer - %f sec\n",pt.duration());
}
void test_plain_cpp(std::vector<std::string>& w)
{
HiPerfTimer pt;
pt.start();
for (int i = 0; i < NUM_ITERATIONS; ++i) {
std::string result;
size_t length = w.size()+1;
for(std::vector<std::string>::const_iterator v = w.begin();v!=w.end();++v)
length+=v->length();
result.reserve(length);
for(std::vector<std::string>::const_iterator v = w.begin();v!=w.end();++v){
const std::vector<std::string>& w1 = split_string(*v,"-");
for(std::vector<std::string>::const_iterator v1 = w1.begin();v1!=w1.end();++v1){
if(!result.empty())
result+=":";
result+=*v1;
}
}
}
pt.stop();
printf("naive C++ - %f sec\n",pt.duration());
}
void test_plain_c(char** array,size_t size)
{
HiPerfTimer pt;
pt.start();
for (int i = 0; i < NUM_ITERATIONS; ++i)
std::auto_ptr<char> res_str ( do_the_job_in_plain_c(array,size,'-',':') );
pt.stop();
printf("plain C - %f sec\n",pt.duration());
}
int _tmain(int argc, _TCHAR* argv[])
{
std::vector<std::string> w;
w.push_back("word1-word2");w.push_back("word3-word4");w.push_back("word5-word6");
w.push_back("word7-word8");w.push_back("word9-word0");
test_boost_string_algorithm(w);
test_select_many(w);
test_boost_tokenizer(w);
test_plain_cpp(w);
char* w1[]={"word1-word2","word3-word4","word5-word6","word7-word8","word9-word0"};
test_plain_c(w1,5);
return 0;
}
Комментариев нет:
Отправить комментарий