Skip to content

Commit

Permalink
Implemented core::UTF8String, to manipule utf8 encoded strings.
Browse files Browse the repository at this point in the history
  • Loading branch information
dwarfmaster committed Dec 8, 2013
1 parent 8728865 commit 17d3368
Show file tree
Hide file tree
Showing 10 changed files with 1,124 additions and 1 deletion.
2 changes: 1 addition & 1 deletion Doxyfile.in
Original file line number Diff line number Diff line change
Expand Up @@ -781,7 +781,7 @@ RECURSIVE = YES
# Note that relative paths are relative to the directory from which doxygen is
# run.

EXCLUDE = @CMAKE_CURRENT_SOURCE_DIR@/src/liblua @CMAKE_CURRENT_SOURCE_DIR@/src/Box2D
EXCLUDE = @CMAKE_CURRENT_SOURCE_DIR@/src/liblua @CMAKE_CURRENT_SOURCE_DIR@/src/Box2D @CMAKE_CURRENT_SOURCE_DIR@/src/core/utf8

# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
# directories that are symbolic links (a Unix file system feature) are excluded
Expand Down
1 change: 1 addition & 0 deletions src/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ add_library(${lib}
systemtime.cpp systemtime.hpp
logger.cpp logger.hpp
config.cpp config.hpp
utf8.cpp utf8.hpp
)

95 changes: 95 additions & 0 deletions src/core/utf8.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@

#include "utf8.hpp"
#include "utf8/utf8.h"
#include <cstring>

namespace core
{
UTF8String::UTF8String()
{}

UTF8String::UTF8String(const UTF8String& cp)
: m_src(cp.m_src)
{}

UTF8String::UTF8String(const std::string& src)
: m_src(src)
{}

UTF8String::~UTF8String()
{
}

size_t UTF8String::size() const
{
utf8::iterator<std::string::const_iterator> it(m_src.cbegin(), m_src.cbegin(), m_src.cend());
utf8::iterator<std::string::const_iterator> end(m_src.cend(), m_src.cbegin(), m_src.cend());
size_t count = 0;
while(it != end) {
++count;
++it;
}
return count;
}

void UTF8String::clear()
{
m_src.clear();
}

bool UTF8String::empty() const
{
return m_src.empty();
}

bool UTF8String::valid() const
{
return utf8::is_valid(m_src.begin(), m_src.end());
}

void UTF8String::removeErrors()
{
std::string temp;
utf8::replace_invalid(m_src.begin(), m_src.end(), std::back_inserter(temp));
m_src = temp;
}

UTF8String& UTF8String::operator=(const UTF8String& cp)
{
m_src = cp.m_src;
return *this;
}

std::string UTF8String::getSrc() const
{
return m_src;
}

UTF8String::operator std::string() const
{
return getSrc();
}

unsigned int UTF8String::operator[](size_t idx) const
{
utf8::iterator<std::string::const_iterator> it(m_src.cbegin(), m_src.cbegin(), m_src.cend());
for(size_t i = 0; i < idx; ++i)
++it;
return *it;
}

bool operator==(const UTF8String& s1, const UTF8String& s2)
{
return s1.getSrc() == s2.getSrc();
}

std::ostream& operator<<(std::ostream& os, const UTF8String& str)
{
os << str.getSrc();
return os;
}

}



56 changes: 56 additions & 0 deletions src/core/utf8.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@

#ifndef DEF_CORE_UTF8
#define DEF_CORE_UTF8

#include <string>
#include <vector>

namespace core
{
/** @brief A string with utf-8 based methods.
*
* You should use this only when you really need precise utf-8 handling,
* because a simple std::string can do the job most of the time.
*/
class UTF8String
{
public:
UTF8String();
UTF8String(const UTF8String& cp);
/** @brief Creates an UTF8String based on a plain string. */
UTF8String(const std::string& src);
~UTF8String();

/** @brief The number of characters. */
size_t size() const;
/** @brief Empty the string. */
void clear();
/** @brief Check if the string is empty. */
bool empty() const;

/** @brief Check if there is an error in the utf-8 codage. */
bool valid() const;
/** @brief Will removes any character with encoding errors, replacing it by standart unicode. */
void removeErrors();

UTF8String& operator=(const UTF8String& cp);

/** @brief Returns the plain string representing the string. */
std::string getSrc() const;
operator std::string() const;

/** @brief Access utf8 elements of the string. The value returned can't be printed directly : it's the unicode number of the character.
* Undefined behaviour may happen if idx is outside range.
*/
unsigned int operator[](size_t idx) const;

private:
std::string m_src; /**< @brief The plain string stored. */
};

bool operator==(const UTF8String& s1, const UTF8String& s2);
std::ostream& operator<<(std::ostream& os, const UTF8String& str);
}

#endif

Loading

0 comments on commit 17d3368

Please sign in to comment.