[gstream.h] Generic Character ( Set ) & Stream Library

contents



#include "standard.h"



   [gstream] supports the conversion of strings from one character set to another.
   Beside that it provides a generic stream library which is able to handle different
   character sets.
   Supported platforms are Unix and Windows.



Types and macros


Default multibyte and wide character sets
#if defined( _MSDOS ) || defined( _WIN32 )
#define CS_ID_WCHAR   "1200"
#else
#define CS_ID_WCHAR   "UCS-4"
#endif
#define CS_ID_MBYTE   "UTF-8"


CSConv_T Character set conversion type
GStream_T Generic stream type

Types of generic stream functions
// read byte vector function (RC>=0 <==> bytes read, RC=-1 <==> error)
typedef int (*GS_getb_T)(GStream_T gstream, c_byte* b, int cnt);
// put byte vector function (RC>=0 <==> bytes written, RC=-1 <==> error)
typedef int (*GS_putb_T)(GStream_T gstream, c_byte* b, int cnt);
// fseek function
// (RC = new byte position = origin + offset <==> OK, RC=-1 <==> error)
typedef LONG_INT (*GS_seek_T)(GStream_T gstream, long offset, int origin);
// close and free function
typedef void (*GS_destruct_T)(GStream_T gstream);



Multibyte and wide character set support

int GS_csconv_ucs4_utf8
    (
      wc_string in, size_t insize,
      c_string out, size_t outsize
    )
ucs4 --> utf-8 ( RFC 2279 )
'insize' contains the number of wide characters in 'in', L'\0' inclusive.
'outsize' contains the number of bytes in 'out', '\0' inclusive.
'out': utf-8 representation of 'in' or NULL
RC: >=0 on O.K. ( number of written bytes )
| =-1 on possible incomplete input
| =-2 on error

int GS_csconv_utf8_ucs4
    (
      c_string in, size_t insize,
      wc_string out, size_t outsize
    )
utf-8 --> ucs4 ( RFC 2279 )
'insize' contains the number of bytes in 'in', '\0' inclusive.
'outsize' contains the number of wide characters in 'out', L'\0' inclusive.
'out': ucs4 representation of 'in' or NULL
RC: >=0 on O.K. ( number of written wide characters )
| =-1 on possible incomplete input
| =-2 on error

c_string GS_ucs4_to_utf8(wc_string in)
ucs4 --> utf-8 ( RFC 2279 )
RC: utf-8 representation of 'in' or NULL; allocs memory

wc_string GS_utf8_to_ucs4(c_string in)
utf-8 --> ucs4 ( RFC 2279 )
RC: ucs4 representation of 'in' or NULL; allocs memory

void GS_fprint_utf8(FILE* fp, c_string s, c_bool raw)
print utf-8 string 's' to file 'fp'
raw --> printable ascii or hex

void GS_fprint_ucs4(FILE* fp, wc_string ws, c_bool raw)
print ucs4 string 'ws' to file 'fp'
raw --> printable ascii or hex

CSConv_T GS_csconv_new(c_string ct, c_string cs)
create new character set conversion description ( RC=NULL on error )
void GS_csconv_free(CSConv_T csci)
free character set conversion description
int GS_csconv_string
    (
      CSConv_T csci, c_string in, int insize,
      c_string out, int* outsize
    )
character set conversion of 'in' to 'out'
'outsize' contains the number of bytes in 'out'.
'in' and 'out' must be a valid character buffers with size > 0.
RC: >=0 on O.K. ( number of conversions / characters )
| =-1 on possible incomplete input
| =-2 on error



Generic stream support

void GS_stream_file_free(GStream_T gstream)
free file stream 'gstream'
void GS_stream_file_destruct(GStream_T gstream)
close and free file stream 'gstream'
GStream_T GS_stream_file_new
          (
            FILE* fp, c_string cs, c_bool close
          )
create new generic stream on a file opened for binary I/O
'cs' : character set name ( UCS4, UTF-8, MS:CodePage / GNU:iconv-based )
'close': true <--> close file on stream destruction
RC: stream on O.K. | NULL on error

void GS_stream_string_free(GStream_T gstream)
free string stream 'gstream'
GStream_T GS_stream_string_new
          (
            c_string s, unsigned int len, c_string cs
          )
create new generic stream on a string buffer
'len': string size in bytes, inclusive terminating bytes
'cs' : character set name ( UCS4, UTF-8, MS:CodePage / GNU:iconv-based )
RC: stream on O.K. | NULL on error
Note: During write operations when reaching the end of the buffer
's' will be deleted and recreated.

c_bool GS_stream_string_set
       (
         GStream_T gstream, c_string s, unsigned int len
       )
reset the string buffer of generic stream 'gstream'
'len': string size in bytes, inclusive terminating bytes
RC: True = O.K.
Note: The character set of 's' and 'gstream' must be the same.

c_bool GS_stream_string_insert
       (
         GStream_T gstream, c_string s, unsigned int len
       )
insert 's' into the string buffer of generic stream 'gstream'
'len': string size in bytes, exclusive terminating bytes
RC: True = O.K.
Note: The character set of 's' and 'gstream' must be the same.

int GS_stream_get_wcval(GStream_T gstream, wc_int* wc)
get wide character 'wc' from 'gstream'
RC: =1 on O.K. | =0 on O.K. and EOF | =-1 on error

int GS_stream_get_wcrc(GStream_T gstream)
get wide character from 'gstream'
RC: >=0 as wide character or =-1 on EOF | =-2 on error

int GS_stream_put_wcval(GStream_T gstream, wc_int wc)
put wide character 'wc' to 'gstream'
RC: >=0 on O.K. | =-1 on error

int GS_stream_bytepos(GStream_T gstream, unsigned int* pos)
get ( relative ) stream position in bytes
RC: =0 on O.K. | =-1 on error

GS_getb_T GS_fun_getb(GStream_T gstream)
get byte function of 'gstream' or NULL
GS_putb_T GS_fun_putb(GStream_T gstream)
put byte function of 'gstream' or NULL
GS_seek_T GS_fun_seek(GStream_T gstream)
position function of 'gstream' or NULL
GS_destruct_T GS_fun_destruct(GStream_T gstream)
close and free function of 'gstream' or NULL