Dynamic strings
Overview
Helper functions to work with strings. More…
// typedefs typedef void() te_string_free_func(struct te_string *str); typedef struct te_string te_string; typedef enum te_string_uri_escape_mode te_string_uri_escape_mode; typedef void te_string_generic_escape_fn( te_string *str, char c ); typedef te_errno te_string_line_handler_fn( char *line, void *user ); // enums enum te_string_uri_escape_mode; // structs struct te_string; // global variables te_string_free_func te_string_free_heap; // global functions static void te_string_reset(te_string* str); static void te_string_move(char** dest, te_string* src); te_errno te_string_reserve(te_string* str, size_t size); static const char* te_string_value(const te_string* str); te_errno te_string_append(te_string* str, const char* fmt, ...); te_errno te_string_append_va(te_string* str, const char* fmt, va_list ap); te_errno te_string_append_chk(te_string* str, const char* fmt, ...); te_errno te_string_append_va_chk(te_string* str, const char* fmt, va_list ap); te_errno te_string_append_buf(te_string* str, const char* buf, size_t len); te_errno te_string_append_shell_args_as_is(te_string* str, ...); te_errno te_string_append_shell_arg_as_is(te_string* str, const char* arg); void te_string_append_escape_uri(te_string* str, te_string_uri_escape_mode mode, const char* arg); te_errno te_string_join_vec(te_string* str, const te_vec* strvec, const char* sep); void te_string_join_uri_path(te_string* str, const te_vec* strvec); void te_string_build_uri(te_string* str, const char* scheme, const char* userinfo, const char* host, uint16_t port, const char* path, const char* query, const char* frag); void te_string_generic_escape(te_string* str, const char* input, const char* esctable[static UINT8_MAX+1], te_string_generic_escape_fn* ctrl_esc, te_string_generic_escape_fn* nonascii_esc); void te_string_encode_base64(te_string* str, size_t len, const uint8_t bytes[len], bool url_safe); te_errno te_string_decode_base64(te_string* str, const char* base64str); char* te_string_fmt_va(const char* fmt, va_list ap); char* te_string_fmt(const char* fmt, ...); char void te_string_cut(te_string* str, size_t len); void te_string_cut_beginning(te_string* str, size_t len); void te_string_chop(te_string* str, const char* trail); void te_string_add_centered(te_string* str, const char* src, size_t padlen, char padchar); te_errno te_string_process_lines(te_string* buffer, bool complete_lines, te_string_line_handler_fn* callback, void* user_data); void te_string_free(te_string* str); char* raw2string(const uint8_t* data, size_t size); te_errno te_string_replace_all_substrings(te_string* str, const char* new, const char* old); te_errno te_string_replace_substring(te_string* str, const char* new, const char* old); // macros #define TE_STRING_BUF_INIT(buf_) #define TE_STRING_EXT_BUF_INIT(buf_, size_) #define TE_STRING_GROW_FACTOR #define TE_STRING_GROW_FACTOR_EXP_LIMIT #define TE_STRING_INIT #define TE_STRING_INIT_LEN #define TE_STRING_INIT_RESERVE(reserved_size_) #define TE_STRING_INIT_RESERVE_FREE(reserved_size_, free_func_) #define TE_STRING_INIT_STATIC(_size)
Detailed Documentation
Helper functions to work with strings.
Copyright (C) 2004-2023 OKTET Labs Ltd. All rights reserved.
Typedefs
typedef struct te_string te_string
TE string type.
typedef enum te_string_uri_escape_mode te_string_uri_escape_mode
URI escaping modes suitable for various parts of URI.
typedef void te_string_generic_escape_fn( te_string *str, char c )
Type for character escaping functions.
A function is expected to append some representation of c
to str
. The representation is allowed to be empty, i.e. the function may swallow some input characters.
Parameters:
str |
TE string |
c |
input character |
typedef te_errno te_string_line_handler_fn( char *line, void *user )
Function type for handlers called by te_string_process_lines().
The function may freely modify line
, but it must treat it as a pointer to a local buffer, that is it must not try to free() or realloc() it, nor store it outside of the scope.
Parameters:
line |
Line buffer without a newline terminator. |
user |
User data. |
TE_EOK |
te_string_process_lines() will stop immediately and return success to the caller. |
Returns:
Status code.
Global Functions
static void te_string_reset(te_string* str)
Reset TE string (mark its empty).
Parameters:
str |
TE string. |
static void te_string_move(char** dest, te_string* src)
Safely move the string content to a pointer variable.
te_strings are regularly used as temporary containers, with the resulting data being passed upstream as a plain character pointer. The function ensures that the pointer won’t be freed by an accidental te_string_free() etc.
The function must not be used with an external-buffer te_strings it is a logic error: the function does transfer the ownership of the memory but an external-buffer te_string does not own the memory
Parameters:
dest |
Location of a destination pointer |
src |
Source string |
te_errno te_string_reserve(te_string* str, size_t size)
Reserve space for at least size
elements in str
string (including null byte at the end).
If there is a room already for the number of elements specified by size
, no action would be performed.
The function never returns an error. Its return type is not void for legacy reasons. New code should never check the return value.
Parameters:
str |
TE string. |
size |
Number of elements to to have a room for in a string |
Returns:
0
static const char* te_string_value(const te_string* str)
Get value of TE string.
Parameters:
str |
Pointer to TE string |
Returns:
Pointer to null-terminated sequence of characters. If str
is NULL
or its internal buffer is not allocated, pointer to empty string is returned.
te_errno te_string_append(te_string* str, const char* fmt, ...)
Format arguments according to fmt
and append the result to the string.
The function never returns an error. Its return type is not void for legacy reasons. New code should never check the return value.
Parameters:
str |
TE string |
fmt |
Format string |
… |
Format string arguments |
Returns:
0
te_errno te_string_append_va(te_string* str, const char* fmt, va_list ap)
Format the varargs according to fmt
and append the result to the string.
Parameters:
str |
TE string |
fmt |
Format string |
ap |
List of arguments |
Returns:
0 (see te_string_append() for explanation)
te_errno te_string_append_chk(te_string* str, const char* fmt, ...)
Format arguments according to fmt
and append the result to the string.
This function is intended for special use cases, where a caller is ready to deal with static buffers of insufficient size in some sensible manner. Normally, te_string_append() should be used instead. Other than the possible error code, the two functions are identical.
Parameters:
str |
TE string |
fmt |
Format string |
… |
Format string arguments |
TE_ENOBUFS |
The string has an external buffer and it does not have enough space. |
Returns:
Status code
te_errno te_string_append_va_chk(te_string* str, const char* fmt, va_list ap)
Format the varargs according to fmt
and append the result to the string.
Parameters:
str |
TE string |
fmt |
Format string |
ap |
List of arguments |
TE_ENOBUFS |
See te_string_append_chk() for explanation. |
Returns:
Status code
te_errno te_string_append_buf(te_string* str, const char* buf, size_t len)
Append contents of a buffer to TE string. Buffer may be not null-terminated.
Parameters:
str |
TE string |
buf |
Buffer |
len |
Number of bytes in buffer |
Returns:
0 (see te_string_append() for explanation)
te_errno te_string_append_shell_args_as_is(te_string* str, ...)
Append arguments separated by space with required shell escaping to avoid expansion and variable substitution.
Parameters:
str |
TE string |
… |
String arguments terminated by |
Returns:
0 (see te_string_append() for explanation)
te_errno te_string_append_shell_arg_as_is(te_string* str, const char* arg)
Append a single argument with required shell escaping to avoid expansion and variable substitution.
Parameters:
str |
TE string |
arg |
String argument |
Returns:
0 (see te_string_append() for explanation)
void te_string_append_escape_uri(te_string* str, te_string_uri_escape_mode mode, const char* arg)
Append a part of an URI escaping all the characters that are not acceptable in given mode
.
The unacceptable characters are percent-encoded as per RFC3986.
The exact escaping rules of RFC3987 are a bit more subtle, so in theory it is possible to construct an invalid URI using this function, however, it is very unlikely for any practical usecase.
Parameters:
str |
TE string |
mode |
escaping mode |
arg |
source value |
te_errno te_string_join_vec(te_string* str, const te_vec* strvec, const char* sep)
Append the elements of vec
(which must be C strings), separated by sep
. NULL
elements are ignored.
The function never returns an error. Its return type is not void for legacy reasons. New code should never check the return value.
Parameters:
str |
TE string |
strvec |
Vector of C strings |
sep |
Separator |
Returns:
0
void te_string_join_uri_path(te_string* str, const te_vec* strvec)
Append the elements of vec
(which must be C strings), escaped as URI path segments, separated by /
.
The leading /
is not added, to allow building relative URIs.
Parameters:
str |
TE string |
strvec |
vector of C strings |
See also:
TE_STRING_URI_ESCAPE_PATH_SEGMENT
void te_string_build_uri(te_string* str, const char* scheme, const char* userinfo, const char* host, uint16_t port, const char* path, const char* query, const char* frag)
Build an URI of parts and append it to str
.
If any of the components is NULL
(including scheme
), it is omitted together with a corresponding separator.
path
and query
are not automatically escaped, because the exact escaping rules depend on whether they are treated as monolithic strings or compound objects. Therefore the caller is responsible for providing correct escaping e.g by using te_string_append_escape_uri(), te_string_join_vec(), te_kvpair_to_uri_query() or in some other way.
Leading /
is added to path
if host
is present, and path
does not already start with /
.
Parameters:
str |
TE string |
scheme |
URI scheme |
userinfo |
user info (escaped) |
host |
host (escaped) |
port |
port ( |
path |
path (not escaped) |
query |
query (not escaped) |
frag |
escaped |
TE_FATAL_ERROR |
if |
void te_string_generic_escape(te_string* str, const char* input, const char* esctable[static UINT8_MAX+1], te_string_generic_escape_fn* ctrl_esc, te_string_generic_escape_fn* nonascii_esc)
Generic string escaping function.
All characters from input
are copied unchanged to str
, except:
if the character has a non-
NULL
entry inesctable
, it is substituted;if
ctrl_esc
is notNULL
and the character is a control character,ctrl_esc
is used to write the representation of the character;if
nonascii_esc
is notNULL
and the character is non-ASCII (i.e. its code is larger than127
),nonascii_esc
is used to write the representation of the character.
Parameters:
str |
TE string |
input |
input string |
esctable |
table of escape sequences |
ctrl_esc |
control character escaping function (may be |
nonascii_esc |
non-ASCII character escaping function (may be |
void te_string_encode_base64(te_string* str, size_t len, const uint8_t bytes[len], bool url_safe)
Encode binary data with Base-64 encoding.
The encoding is defined in RFC 4648. Lines are not split.
If url_safe
is true
, the so called URL-safe variant of RFC 4648 is used which produces strings that may be directed included into an URI without additional escaping. Also they may be safely used as filenames.
Parameters:
str |
TE string |
len |
size of binary data |
bytes |
raw binary data |
url_safe |
a variant of RFC 4648 |
te_errno te_string_decode_base64(te_string* str, const char* base64str)
Decode a Base64-encoded string.
Both variants of RFC 4648 encoding are accepted. Embedded newlines are silently skipped.
str
will always be zero-terminated, but it may contain embedded zeroes.
Parameters:
str |
TE string |
base64str |
Base64-encoded string |
TE_EILSEQ |
Invalid Base64 encoding |
Returns:
status code
char* te_string_fmt_va(const char* fmt, va_list ap)
Return a char * that is a result of sprintf into allocated memory.
Parameters:
fmt |
Format string |
ap |
List of arguments |
Returns:
Heap-allocated string.
char* te_string_fmt(const char* fmt, ...)
Return a char * that is a result of sprintf into allocated memory.
Parameters:
fmt |
Format string |
… |
Format string arguments |
Returns:
Heap-allocated string.
char void te_string_cut(te_string* str, size_t len)
Cut from the string specified number of characters.
Parameters:
str |
TE string |
len |
Number of characters to cut |
void te_string_cut_beginning(te_string* str, size_t len)
Cut specified number of characters from the beginning of the string.
Parameters:
str |
TE string |
len |
Number of characters to cut from the beginning |
void te_string_chop(te_string* str, const char* trail)
Chop off trailing characters from str
that belong to trail
.
For example, this function may be used to remove trailing newlines from the contents of a file:
te_string_chop(dest, "\n");
Parameters:
str |
TE string. |
trail |
Trailing characters to chop. |
void te_string_add_centered(te_string* str, const char* src, size_t padlen, char padchar)
Center the string src
padding it to padlen
with padchar
and append the result to str
.
Parameters:
str |
TE string |
src |
Source C string |
padlen |
Desired length (if the original string is longer, it will be truncated) |
padchar |
Padding character |
te_errno te_string_process_lines(te_string* buffer, bool complete_lines, te_string_line_handler_fn* callback, void* user_data)
Call callback
for every line in buffer
.
If complete_lines
is true, the last incomplete line without a newline terminator is not processed.
The line terminator is '\n
‘, however, if it is preceded by '\r
‘, it is removed as well.
All processed lines are removed from buffer
.
Empty lines are never skipped, but if the buffer
is empty, callback
is not called at all.
If a callback
returns a non-zero status, the processing stop, however, the current line is still removed. TE_EOK is treated as success.
The following snippet would read data from a POSIX fd in arbitrary chunks, split the input into lines, count them, log any occurred error messages and bail out early on fatal errors.
static te_errno handle_line(char *line, void *data) { unsigned int *linecount = data; const char *msg = te_str_strip_prefix(line, "FATAL: "); if (msg != NULL) { ERROR("Fatal error at line %u: %s", *linecount, msg); return TE_EBADMSG; } msg = te_str_strip_prefix(line, "ERROR: "); if (msg != NULL) ERROR("Error at line %u: %s", *linecount, msg); (*linecount)++; return 0; } ... te_string buffer = TE_STRING_INIT; char buf[BUFSIZE]; ssize_t read_bytes; unsigned int linecount = 1; while ((read_bytes = read(fd, buf, sizeof(buf))) > 0) { te_string_append_buf(&buffer, buf, read_bytes); CHECK_RC(te_string_process_lines(&buffer, true, handle_line, data)); } CHECK_RC(te_string_process_lines(&buffer, false, handle_line, &linecount));
Parameters:
buffer |
TE string. |
complete_lines |
Do not process incomplete lines if |
callback |
Callback function. |
user_data |
User data. |
Returns:
Status code.
void te_string_free(te_string* str)
Free TE string.
It will not release buffer supplied by user with te_string_set_buf().
Parameters:
str |
TE string |
char* raw2string(const uint8_t* data, size_t size)
Get string representation of raw data.
Parameters:
data |
Buffer |
size |
Number of bytes |
Returns:
String representation
te_errno te_string_replace_all_substrings(te_string* str, const char* new, const char* old)
Replace all the substrings in a string
Parameters:
str |
The string in which to replace. |
new |
The new substring to replace. |
old |
The substring to be replaced. |
Returns:
Status code (always 0).
te_errno te_string_replace_substring(te_string* str, const char* new, const char* old)
Replace the substring in a string
Parameters:
str |
The string in which to replace. |
new |
The new substring to replace. |
old |
The substring to be replaced. |
Returns:
Status code (always 0).
Macros
#define TE_STRING_BUF_INIT(buf_)
Initialize TE string assigning buffer to it.
#define TE_STRING_EXT_BUF_INIT(buf_, size_)
Initialize TE string assigning buffer and size to it.
#define TE_STRING_GROW_FACTOR
String grow factor which is used during reallocation
#define TE_STRING_GROW_FACTOR_EXP_LIMIT
The grow factor exponent limit during a single relocation. If a good size is not found in a range [size, TE_STRING_GROW_FACTOR ^ TE_STRING_GROW_FACTOR_EXP_LIMIT], the fallback to addendum-based grow is performed.
Empirically, current grow factor exponent limit is enough for running long tests without too frequent reallocation. Type of function to be used for releasing resources
#define TE_STRING_INIT
On-stack te_string initializer
#define TE_STRING_INIT_LEN
Initial length of the dynamically allocated string
#define TE_STRING_INIT_RESERVE(reserved_size_)
On-stack te_string initializer with a defined reserve
: You should probably not use it unless you really need to. Please use simple TE_STRING_INIT.
#define TE_STRING_INIT_RESERVE_FREE(reserved_size_, free_func_)
On-stack te_string initializer with a defined reserve and free function.
#define TE_STRING_INIT_STATIC(_size)
Initialize TE string assigning statically allocated memory to it. Dynamic memory allocation will not be used for such string, so there will be no need in calling te_string_free().
Parameters:
_size |
Number of bytes reserved for storing the string. |