00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef UNICODEHELPERS_H
00023 #define UNICODEHELPERS_H UNICODEHELPERS_H
00024
00025 #include <vector>
00026 #include <unicode/ucnv.h>
00027 #include <unicode/schriter.h>
00028 #include <unicode/regex.h>
00029 #include <boost/shared_ptr.hpp>
00030 #include <boost/tokenizer.hpp>
00031 #include <boost/shared_array.hpp>
00032
00033 namespace schma {
00034
00035 typedef boost::shared_ptr<UnicodeString> UnicodePtr;
00036 typedef boost::shared_array<char> charArray;
00037 typedef std::vector<UnicodePtr> UnicodeVector;
00038
00039 inline UnicodeVector splitString(const std::string& s)
00040 {
00041 UnicodeVector result;
00042
00043 boost::char_separator<char> sep(" ");
00044 boost::tokenizer<boost::char_separator<char> > tok(s, sep);
00045
00046 for(boost::tokenizer<boost::char_separator<char> >::const_iterator it = tok.begin();
00047 it != tok.end(); ++it){
00048 result.push_back(UnicodePtr(new UnicodeString(it->c_str())));
00049 }
00050 return result;
00051 }
00052
00053
00054
00055
00056
00057
00058 class _UTF8Converter
00059 {
00060 UConverter* m_conv;
00061 public:
00062 _UTF8Converter()
00063 {
00064 UErrorCode errcode = U_ZERO_ERROR;
00065 m_conv = ucnv_open("UTF8", &errcode);
00066 assert(errcode == U_ZERO_ERROR);
00067 }
00068 ~_UTF8Converter()
00069 {
00070 ucnv_close(m_conv);
00071 }
00072 inline UConverter* converter() const
00073 {
00074 return m_conv;
00075 }
00076 };
00077
00078
00079 inline UConverter* UTF8Converter()
00080 {
00081 static _UTF8Converter u;
00082 return u.converter();
00083 }
00084
00085
00086 inline charArray toCharArray(UnicodePtr u)
00087 {
00088 char* buffer = new char[u->length()*4];
00089 UErrorCode c = U_ZERO_ERROR;
00090 u->extract(buffer,u->length()*4, 0, c);
00091 return charArray(buffer);
00092 }
00093
00094
00095 inline std::string toStdString(UnicodePtr u)
00096 {
00097 char buffer[u->length()*4];
00098 UErrorCode c = U_ZERO_ERROR;
00099 u->extract(buffer,u->length()*4, 0, c);
00100 return std::string(buffer);
00101 }
00102
00103 }
00104
00105 #endif