UCommon
ucommon::utf8 Class Reference

A core class of ut8 encoded string functions. More...

#include <unicode.h>

Inheritance diagram for ucommon::utf8:

Static Public Member Functions

static unsigned ccount (const char *string, ucs4_t character)
 Count occurrences of a unicode character in string.
 
static size_t chars (const unicode_t string)
 How many chars requires to encode a given wchar string.
 
static size_t chars (ucs4_t character)
 How many chars requires to encode a given unicode character.
 
static ucs4_t codepoint (const char *encoded)
 Convert a utf8 encoded codepoint to a ucs4 character value.
 
static size_t count (const char *string)
 Count ut8 encoded ucs4 codepoints in string.
 
static const char * find (const char *string, ucs4_t character, size_t start=0)
 Find first occurance of character in string.
 
static ucs4_t get (const char *cp)
 Get a unicode character from a character protocol.
 
static char * offset (char *string, ssize_t position)
 Get codepoint offset in a string.
 
static size_t pack (unicode_t unicode, const char *cp, size_t len)
 Convert a utf8 string into a unicode data buffer.
 
static void put (ucs4_t character, char *buf)
 Push a unicode character to a character protocol.
 
static const char * rfind (const char *string, ucs4_t character, size_t end=(size_t) -1l)
 Find last occurrence of character in string.
 
static unsigned size (const char *codepoint)
 Compute character size of utf8 string codepoint.
 
static ucs4_tudup (const char *string)
 Dup a utf8 string into a ucs4_t string.
 
static size_t unpack (const unicode_t string, char *text, size_t size)
 Convert a unicode string into utf8.
 
static ucs2_twdup (const char *string)
 Dup a utf8 string into a ucs2_t representation.
 

Static Public Attributes

static const char * nil
 A convenient NULL pointer value.
 
static const unsigned ucsize
 Size of "unicode_t" character codes, may not be ucs4_t size.
 

Protected Member Functions

 utf8 (const utf8 &copy)
 

Detailed Description

A core class of ut8 encoded string functions.

This is a foundation for all utf8 string processing.

Author
David Sugar

Definition at line 67 of file unicode.h.

Constructor & Destructor Documentation

◆ utf8() [1/2]

ucommon::utf8::utf8 ( )
inlineprotected

Definition at line 70 of file unicode.h.

◆ utf8() [2/2]

ucommon::utf8::utf8 ( const utf8 & copy)
inlineprotected

Definition at line 72 of file unicode.h.

Member Function Documentation

◆ ccount()

static unsigned ucommon::utf8::ccount ( const char * string,
ucs4_t character )
static

Count occurrences of a unicode character in string.

Parameters
stringto search in.
charactercode to search for.
Returns
count of occurrences.

◆ chars() [1/2]

static size_t ucommon::utf8::chars ( const unicode_t string)
static

How many chars requires to encode a given wchar string.

Parameters
stringof ucs4 data.
Returns
number of chars required to encode given string.

◆ chars() [2/2]

static size_t ucommon::utf8::chars ( ucs4_t character)
static

How many chars requires to encode a given unicode character.

Parameters
characterto encode.
Returns
number of chars required to encode given character.

◆ codepoint()

static ucs4_t ucommon::utf8::codepoint ( const char * encoded)
static

Convert a utf8 encoded codepoint to a ucs4 character value.

Parameters
encodedutf8 codepoint.
Returns
ucs4 string or 0 if invalid.

◆ count()

static size_t ucommon::utf8::count ( const char * string)
static

Count ut8 encoded ucs4 codepoints in string.

Parameters
stringof utf8 data.
Returns
codepount count, 0 if empty or invalid.

◆ find()

static const char * ucommon::utf8::find ( const char * string,
ucs4_t character,
size_t start = 0 )
static

Find first occurance of character in string.

Parameters
stringto search in.
charactercode to search for.
startoffset in string in codepoints.
Returns
pointer to first instance or NULL if not found.

◆ get()

static ucs4_t ucommon::utf8::get ( const char * cp)
static

Get a unicode character from a character protocol.

Parameters
bufferof character protocol to read from.
Returns
unicode character or EOF error.

◆ offset()

static char * ucommon::utf8::offset ( char * string,
ssize_t position )
static

Get codepoint offset in a string.

Parameters
stringof utf8 data.
positionof codepoint in string, negative offsets are from tail.
Returns
offset of codepoint or NULL if invalid.

◆ pack()

static size_t ucommon::utf8::pack ( unicode_t unicode,
const char * cp,
size_t len )
static

Convert a utf8 string into a unicode data buffer.

Parameters
unicodedata buffer.
bufferof character protocol to pack from.
sizeof unicode data buffer in codepoints.
Returns
number of code points converted.

◆ put()

static void ucommon::utf8::put ( ucs4_t character,
char * buf )
static

Push a unicode character to a character protocol.

Parameters
characterto push to file.
bufferof character protocol to push character to.
Returns
unicode character or EOF on error.

◆ rfind()

static const char * ucommon::utf8::rfind ( const char * string,
ucs4_t character,
size_t end = (size_t) -1l )
static

Find last occurrence of character in string.

Parameters
stringto search in.
charactercode to search for.
endoffset to start from in codepoints.
Returns
pointer to last instance or NULL if not found.

◆ size()

static unsigned ucommon::utf8::size ( const char * codepoint)
static

Compute character size of utf8 string codepoint.

Parameters
codepointin string.
Returns
size of codepoint as utf8 encoded data, 0 if invalid.

◆ unpack()

static size_t ucommon::utf8::unpack ( const unicode_t string,
char * text,
size_t size )
static

Convert a unicode string into utf8.

Parameters
stringof unicode data to pack
bufferof character protocol to put data into.
Returns
number of code points converted.

Field Documentation

◆ nil

const char* ucommon::utf8::nil
static

A convenient NULL pointer value.

Definition at line 83 of file unicode.h.

◆ ucsize

const unsigned ucommon::utf8::ucsize
static

Size of "unicode_t" character codes, may not be ucs4_t size.

Definition at line 78 of file unicode.h.


The documentation for this class was generated from the following file: