-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implemented core::UTF8String, to manipule utf8 encoded strings.
- Loading branch information
1 parent
8728865
commit 17d3368
Showing
10 changed files
with
1,124 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
|
||
#include "utf8.hpp" | ||
#include "utf8/utf8.h" | ||
#include <cstring> | ||
|
||
namespace core | ||
{ | ||
UTF8String::UTF8String() | ||
{} | ||
|
||
UTF8String::UTF8String(const UTF8String& cp) | ||
: m_src(cp.m_src) | ||
{} | ||
|
||
UTF8String::UTF8String(const std::string& src) | ||
: m_src(src) | ||
{} | ||
|
||
UTF8String::~UTF8String() | ||
{ | ||
} | ||
|
||
size_t UTF8String::size() const | ||
{ | ||
utf8::iterator<std::string::const_iterator> it(m_src.cbegin(), m_src.cbegin(), m_src.cend()); | ||
utf8::iterator<std::string::const_iterator> end(m_src.cend(), m_src.cbegin(), m_src.cend()); | ||
size_t count = 0; | ||
while(it != end) { | ||
++count; | ||
++it; | ||
} | ||
return count; | ||
} | ||
|
||
void UTF8String::clear() | ||
{ | ||
m_src.clear(); | ||
} | ||
|
||
bool UTF8String::empty() const | ||
{ | ||
return m_src.empty(); | ||
} | ||
|
||
bool UTF8String::valid() const | ||
{ | ||
return utf8::is_valid(m_src.begin(), m_src.end()); | ||
} | ||
|
||
void UTF8String::removeErrors() | ||
{ | ||
std::string temp; | ||
utf8::replace_invalid(m_src.begin(), m_src.end(), std::back_inserter(temp)); | ||
m_src = temp; | ||
} | ||
|
||
UTF8String& UTF8String::operator=(const UTF8String& cp) | ||
{ | ||
m_src = cp.m_src; | ||
return *this; | ||
} | ||
|
||
std::string UTF8String::getSrc() const | ||
{ | ||
return m_src; | ||
} | ||
|
||
UTF8String::operator std::string() const | ||
{ | ||
return getSrc(); | ||
} | ||
|
||
unsigned int UTF8String::operator[](size_t idx) const | ||
{ | ||
utf8::iterator<std::string::const_iterator> it(m_src.cbegin(), m_src.cbegin(), m_src.cend()); | ||
for(size_t i = 0; i < idx; ++i) | ||
++it; | ||
return *it; | ||
} | ||
|
||
bool operator==(const UTF8String& s1, const UTF8String& s2) | ||
{ | ||
return s1.getSrc() == s2.getSrc(); | ||
} | ||
|
||
std::ostream& operator<<(std::ostream& os, const UTF8String& str) | ||
{ | ||
os << str.getSrc(); | ||
return os; | ||
} | ||
|
||
} | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
|
||
#ifndef DEF_CORE_UTF8 | ||
#define DEF_CORE_UTF8 | ||
|
||
#include <string> | ||
#include <vector> | ||
|
||
namespace core | ||
{ | ||
/** @brief A string with utf-8 based methods. | ||
* | ||
* You should use this only when you really need precise utf-8 handling, | ||
* because a simple std::string can do the job most of the time. | ||
*/ | ||
class UTF8String | ||
{ | ||
public: | ||
UTF8String(); | ||
UTF8String(const UTF8String& cp); | ||
/** @brief Creates an UTF8String based on a plain string. */ | ||
UTF8String(const std::string& src); | ||
~UTF8String(); | ||
|
||
/** @brief The number of characters. */ | ||
size_t size() const; | ||
/** @brief Empty the string. */ | ||
void clear(); | ||
/** @brief Check if the string is empty. */ | ||
bool empty() const; | ||
|
||
/** @brief Check if there is an error in the utf-8 codage. */ | ||
bool valid() const; | ||
/** @brief Will removes any character with encoding errors, replacing it by standart unicode. */ | ||
void removeErrors(); | ||
|
||
UTF8String& operator=(const UTF8String& cp); | ||
|
||
/** @brief Returns the plain string representing the string. */ | ||
std::string getSrc() const; | ||
operator std::string() const; | ||
|
||
/** @brief Access utf8 elements of the string. The value returned can't be printed directly : it's the unicode number of the character. | ||
* Undefined behaviour may happen if idx is outside range. | ||
*/ | ||
unsigned int operator[](size_t idx) const; | ||
|
||
private: | ||
std::string m_src; /**< @brief The plain string stored. */ | ||
}; | ||
|
||
bool operator==(const UTF8String& s1, const UTF8String& s2); | ||
std::ostream& operator<<(std::ostream& os, const UTF8String& str); | ||
} | ||
|
||
#endif | ||
|
Oops, something went wrong.