суббота, 1 марта 2008 г.

Compare string performance using C#,C++(STL,boost) and C - part 2, C++(STL) and C.

Compare string performance using C#,C++(STL,boost) and C - part 1, C#
Compare string performance using C#,C++(STL,boost) and C - part 3, results
Task:
you have array of strings "word1-word2", "word3-word4", "word5-word6", you need to transform it to string "word1:word2:word3:word4:word5:word6"


I use something that I call "idiomatic C" and 4 different "idiomatic" C++ solutions:

1. Using "Boost String Algorithms Library".
2. Using "select_many" kind of function.
3. Using Boost Tokenizer
4. Using "naive" С++ with std::strings.

C code:

char* do_the_job_in_plain_c(char** array, size_t size, char c1,char c2)

{

    size_t desc_size=size;

 

    for(size_t i=0;i<size;++i)

        desc_size+=strlen(array[i]);

    char* res_str = new char[desc_size];

    char* res_str_t = res_str;

    for(size_t i=0;i<size;++i,*res_str_t++ = c2){

        char* p = array[i];

        do{

            *res_str_t++ = (*p == c1)?c2:*p;

        }while(*(++p));

    }

    res_str[desc_size-1] = 0;

    return res_str;

}



Using "Boost String Algorithms Library".

        std::string result;

        typedef std::vector< std::string > split_vector_type;

        split_vector_type split_vec_res;

        for(std::vector<std::string>::const_iterator v = w.begin();v!=w.end();++v){

            split_vector_type split_vec;

            boost::split( split_vec, *v, boost::is_any_of("-") );

            std::copy(split_vec.begin(),split_vec.end(),std::back_inserter(split_vec_res));

        }

        std::string res = boost::join(split_vec_res,":");



Using "select_many" kind of function

std::string res = join_string(select_many(w,boost::bind(split_string,_1,("-"))),":");


Using Boost Tokenizer

        std::string result;

        for(std::vector<std::string>::const_iterator v = w.begin();v!=w.end();++v){

            boost::char_separator<char> sep("-");

            tokenizer tok(*v,sep);   

            for (tokenizer::iterator tok_iter = tok.begin();tok_iter != tok.end(); ++tok_iter){

                if(!result.empty())

                    result+=":";

                result+=*tok_iter;

            }

        }



Using "naive" С++ with std::strings

        std::string result;

 

        size_t length = w.size()+1;

        for(std::vector<std::string>::const_iterator v = w.begin();v!=w.end();++v)

            length+=v->length();

        result.reserve(length);           

 

        for(std::vector<std::string>::const_iterator v = w.begin();v!=w.end();++v){

            const std::vector<std::string>& w1 = split_string(*v,"-");

            for(std::vector<std::string>::const_iterator v1 = w1.begin();v1!=w1.end();++v1){

                if(!result.empty())

                    result+=":";

                result+=*v1;

            }

        }



Whole programm code:

#include "stdafx.h"

#include <string>

#include <algorithm>

 

#include <boost/tokenizer.hpp>

#include <boost/algorithm/string.hpp>

#include <boost/bind.hpp>

 

#define NUM_ITERATIONS 10000

 

class HiPerfTimer

{

public:

    void start()

    {

        QueryPerformanceFrequency(&freq);

        QueryPerformanceCounter(&begin);

    }

    void stop(){QueryPerformanceCounter(&end);}

 

    double duration(){return (double)(end.QuadPart-begin.QuadPart)/freq.QuadPart;}

protected:

    LARGE_INTEGER begin,end,freq;

};

 

 

template<class _container_type,class _Fn1> inline

_container_type select_many(const _container_type& container, _Fn1& _Func)

{   

    _container_type result;

    for(_container_type::const_iterator v = container.begin();v!=container.end();++v)

    {

        _container_type tmp = _Func(*v);

        for(_container_type::const_iterator v1 = tmp.begin();v1!=tmp.end();++v1)

            result.push_back(*v1);

    }

    return result;

}

 

std::vector<std::string>    split_string    (const std::string& str, const std::string& delimiters)

{

    std::vector<std::string> v;

    size_t offset = 0;

    while(true)

    {

        size_t token_start = str.find_first_not_of(delimiters, offset);

        if (token_start == std::string::npos)

        {

            v.push_back( str.substr(token_start,str.length()  - token_start));

            break;

        }

        size_t token_end = str.find_first_of(delimiters, token_start);

        if (token_end == std::string::npos)

        {

            v.push_back(str.substr(token_start));

            break;

        }

        v.push_back(str.substr(token_start, token_end - token_start));

        offset = token_end;

    }

 

    return v;

}

 

std::string    join_string        (const std::vector<std::string>& v,const std::string& delimiters)

{

    std::string s;

    size_t reserve = 0;

    for(std::vector<std::string>::const_iterator the_str = v.begin();the_str!=v.end();++the_str)

        reserve+=the_str->length();

    reserve+=delimiters.length()*v.size();

    s.reserve(reserve);

    for(std::vector<std::string>::const_iterator the_str = v.begin();the_str!=v.end();++the_str)   

    {

        if(!s.empty())

            s+=delimiters;

        s+=*the_str;

    }

    return s;

}

 

 

char* do_the_job_in_plain_c(char** array, size_t size, char c1,char c2)

{

    size_t desc_size=size;

 

    for(size_t i=0;i<size;++i)

        desc_size+=strlen(array[i]);

    char* res_str = new char[desc_size];

    char* res_str_t = res_str;

    for(size_t i=0;i<size;++i,*res_str_t++ = c2)

    {

        char* p = array[i];

        do

        {

            *res_str_t++ = (*p == c1)?c2:*p;

        }while(*(++p));

    }

    res_str[desc_size-1] = 0;

    return res_str;

}

 

 

 

void test_select_many(std::vector<std::string>& w){

    HiPerfTimer pt;

    pt.start();

    for (int i = 0; i < NUM_ITERATIONS; ++i)

        std::string res = join_string(select_many(w,bind(split_string,_1,("-"))),":");

    pt.stop();

    printf("select_many- %f sec\n",pt.duration());

}

 

void test_boost_string_algorithm(std::vector<std::string>& w){

    HiPerfTimer pt;

    pt.start();

 

    for (int i = 0; i < NUM_ITERATIONS; ++i)

    {

        std::string result;

        typedef std::vector< std::string > split_vector_type;

        split_vector_type split_vec_res;

        for(std::vector<std::string>::const_iterator v = w.begin();v!=w.end();++v){

            split_vector_type split_vec;

            boost::split( split_vec, *v, boost::is_any_of("-") );

            std::copy(split_vec.begin(),split_vec.end(),std::back_inserter(split_vec_res));

        }

        std::string res = boost::join(split_vec_res,":");

    }

    pt.stop();

    printf("Boost String Algorithms Library - %f sec\n",pt.duration());

}

 

void test_boost_tokenizer(std::vector<std::string>& w)

{

    HiPerfTimer pt;

    pt.start();

 

    typedef boost::tokenizer<boost::char_separator<char> > tokenizer;

    for (int i = 0; i < NUM_ITERATIONS; ++i) {

        std::string result;

        for(std::vector<std::string>::const_iterator v = w.begin();v!=w.end();++v){

            boost::char_separator<char> sep("-");

            tokenizer tok(*v,sep);   

            for (tokenizer::iterator tok_iter = tok.begin();tok_iter != tok.end(); ++tok_iter){

                if(!result.empty())

                    result+=":";

                result+=*tok_iter;

            }

        }

    }

    pt.stop();   

    printf("Boost Tokenizer - %f sec\n",pt.duration());

}

 

void test_plain_cpp(std::vector<std::string>& w)

{

    HiPerfTimer pt;

    pt.start();

 

    for (int i = 0; i < NUM_ITERATIONS; ++i) {

        std::string result;

 

        size_t length = w.size()+1;

        for(std::vector<std::string>::const_iterator v = w.begin();v!=w.end();++v)

            length+=v->length();

        result.reserve(length);           

 

        for(std::vector<std::string>::const_iterator v = w.begin();v!=w.end();++v){

            const std::vector<std::string>& w1 = split_string(*v,"-");

            for(std::vector<std::string>::const_iterator v1 = w1.begin();v1!=w1.end();++v1){

                if(!result.empty())

                    result+=":";

                result+=*v1;

            }

        }

    }

    pt.stop();   

    printf("naive C++ - %f sec\n",pt.duration());

}

 

void test_plain_c(char** array,size_t size)

{

    HiPerfTimer pt;

    pt.start();

    for (int i = 0; i < NUM_ITERATIONS; ++i)

        std::auto_ptr<char> res_str ( do_the_job_in_plain_c(array,size,'-',':') );

    pt.stop();   

    printf("plain C - %f sec\n",pt.duration());

}

int _tmain(int argc, _TCHAR* argv[])

{

    std::vector<std::string> w;

    w.push_back("word1-word2");w.push_back("word3-word4");w.push_back("word5-word6");

    w.push_back("word7-word8");w.push_back("word9-word0");

 

    test_boost_string_algorithm(w);

    test_select_many(w);

    test_boost_tokenizer(w);

    test_plain_cpp(w);

 

    char* w1[]={"word1-word2","word3-word4","word5-word6","word7-word8","word9-word0"};

    test_plain_c(w1,5);

    return 0;

}

Комментариев нет: