Dynamic strings

Overview

Helper functions to work with strings. More…

// typedefs

typedef void() te_string_free_func(struct te_string *str);
typedef struct te_string te_string;
typedef enum te_string_uri_escape_mode te_string_uri_escape_mode;

typedef void te_string_generic_escape_fn(
    te_string *str,
    char c
    );

typedef te_errno te_string_line_handler_fn(
    char *line,
    void *user
    );

// enums

enum te_string_uri_escape_mode;

// structs

struct te_string;

// global variables

te_string_free_func te_string_free_heap;

// global functions

static void te_string_reset(te_string* str);
static void te_string_move(char** dest, te_string* src);
te_errno te_string_reserve(te_string* str, size_t size);
static const char* te_string_value(const te_string* str);
te_errno te_string_append(te_string* str, const char* fmt, ...);
te_errno te_string_append_va(te_string* str, const char* fmt, va_list ap);
te_errno te_string_append_chk(te_string* str, const char* fmt, ...);
te_errno te_string_append_va_chk(te_string* str, const char* fmt, va_list ap);
te_errno te_string_append_buf(te_string* str, const char* buf, size_t len);
te_errno te_string_append_shell_args_as_is(te_string* str, ...);
te_errno te_string_append_shell_arg_as_is(te_string* str, const char* arg);
void te_string_append_escape_uri(te_string* str, te_string_uri_escape_mode mode, const char* arg);
te_errno te_string_join_vec(te_string* str, const te_vec* strvec, const char* sep);
void te_string_join_uri_path(te_string* str, const te_vec* strvec);
void te_string_build_uri(te_string* str, const char* scheme, const char* userinfo, const char* host, uint16_t port, const char* path, const char* query, const char* frag);
void te_string_generic_escape(te_string* str, const char* input, const char* esctable[static UINT8_MAX+1], te_string_generic_escape_fn* ctrl_esc, te_string_generic_escape_fn* nonascii_esc);
void te_string_encode_base64(te_string* str, size_t len, const uint8_t bytes[len], bool url_safe);
te_errno te_string_decode_base64(te_string* str, const char* base64str);
char* te_string_fmt_va(const char* fmt, va_list ap);
char* te_string_fmt(const char* fmt, ...);
char void te_string_cut(te_string* str, size_t len);
void te_string_cut_beginning(te_string* str, size_t len);
void te_string_chop(te_string* str, const char* trail);
void te_string_add_centered(te_string* str, const char* src, size_t padlen, char padchar);
te_errno te_string_process_lines(te_string* buffer, bool complete_lines, te_string_line_handler_fn* callback, void* user_data);
void te_string_free(te_string* str);
char* raw2string(const uint8_t* data, size_t size);
te_errno te_string_replace_all_substrings(te_string* str, const char* new, const char* old);
te_errno te_string_replace_substring(te_string* str, const char* new, const char* old);

// macros

#define TE_STRING_BUF_INIT(buf_)
#define TE_STRING_EXT_BUF_INIT(buf_, size_)
#define TE_STRING_GROW_FACTOR
#define TE_STRING_GROW_FACTOR_EXP_LIMIT
#define TE_STRING_INIT
#define TE_STRING_INIT_LEN
#define TE_STRING_INIT_RESERVE(reserved_size_)
#define TE_STRING_INIT_RESERVE_FREE(reserved_size_, free_func_)
#define TE_STRING_INIT_STATIC(_size)

Detailed Documentation

Helper functions to work with strings.

Copyright (C) 2004-2023 OKTET Labs Ltd. All rights reserved.

Typedefs

typedef struct te_string te_string

TE string type.

typedef enum te_string_uri_escape_mode te_string_uri_escape_mode

URI escaping modes suitable for various parts of URI.

typedef void te_string_generic_escape_fn(
    te_string *str,
    char c
    )

Type for character escaping functions.

A function is expected to append some representation of c to str. The representation is allowed to be empty, i.e. the function may swallow some input characters.

Parameters:

str

TE string

c

input character

typedef te_errno te_string_line_handler_fn(
    char *line,
    void *user
    )

Function type for handlers called by te_string_process_lines().

The function may freely modify line, but it must treat it as a pointer to a local buffer, that is it must not try to free() or realloc() it, nor store it outside of the scope.

Parameters:

line

Line buffer without a newline terminator.

user

User data.

TE_EOK

te_string_process_lines() will stop immediately and return success to the caller.

Returns:

Status code.

Global Functions

static void te_string_reset(te_string* str)

Reset TE string (mark its empty).

Parameters:

str

TE string.

static void te_string_move(char** dest, te_string* src)

Safely move the string content to a pointer variable.

te_strings are regularly used as temporary containers, with the resulting data being passed upstream as a plain character pointer. The function ensures that the pointer won’t be freed by an accidental te_string_free() etc.

The function must not be used with an external-buffer te_strings it is a logic error: the function does transfer the ownership of the memory but an external-buffer te_string does not own the memory

Parameters:

dest

Location of a destination pointer

src

Source string

te_errno te_string_reserve(te_string* str, size_t size)

Reserve space for at least size elements in str string (including null byte at the end).

If there is a room already for the number of elements specified by size, no action would be performed.

The function never returns an error. Its return type is not void for legacy reasons. New code should never check the return value.

Parameters:

str

TE string.

size

Number of elements to to have a room for in a string

Returns:

0

static const char* te_string_value(const te_string* str)

Get value of TE string.

Parameters:

str

Pointer to TE string

Returns:

Pointer to null-terminated sequence of characters. If str is NULL or its internal buffer is not allocated, pointer to empty string is returned.

te_errno te_string_append(te_string* str, const char* fmt, ...)

Format arguments according to fmt and append the result to the string.

The function never returns an error. Its return type is not void for legacy reasons. New code should never check the return value.

Parameters:

str

TE string

fmt

Format string

Format string arguments

Returns:

0

te_errno te_string_append_va(te_string* str, const char* fmt, va_list ap)

Format the varargs according to fmt and append the result to the string.

Parameters:

str

TE string

fmt

Format string

ap

List of arguments

Returns:

0 (see te_string_append() for explanation)

te_errno te_string_append_chk(te_string* str, const char* fmt, ...)

Format arguments according to fmt and append the result to the string.

This function is intended for special use cases, where a caller is ready to deal with static buffers of insufficient size in some sensible manner. Normally, te_string_append() should be used instead. Other than the possible error code, the two functions are identical.

Parameters:

str

TE string

fmt

Format string

Format string arguments

TE_ENOBUFS

The string has an external buffer and it does not have enough space.

Returns:

Status code

te_errno te_string_append_va_chk(te_string* str, const char* fmt, va_list ap)

Format the varargs according to fmt and append the result to the string.

Parameters:

str

TE string

fmt

Format string

ap

List of arguments

TE_ENOBUFS

See te_string_append_chk() for explanation.

Returns:

Status code

te_errno te_string_append_buf(te_string* str, const char* buf, size_t len)

Append contents of a buffer to TE string. Buffer may be not null-terminated.

Parameters:

str

TE string

buf

Buffer

len

Number of bytes in buffer

Returns:

0 (see te_string_append() for explanation)

te_errno te_string_append_shell_args_as_is(te_string* str, ...)

Append arguments separated by space with required shell escaping to avoid expansion and variable substitution.

Parameters:

str

TE string

String arguments terminated by NULL

Returns:

0 (see te_string_append() for explanation)

te_errno te_string_append_shell_arg_as_is(te_string* str, const char* arg)

Append a single argument with required shell escaping to avoid expansion and variable substitution.

Parameters:

str

TE string

arg

String argument

Returns:

0 (see te_string_append() for explanation)

void te_string_append_escape_uri(te_string* str, te_string_uri_escape_mode mode, const char* arg)

Append a part of an URI escaping all the characters that are not acceptable in given mode.

The unacceptable characters are percent-encoded as per RFC3986.

The exact escaping rules of RFC3987 are a bit more subtle, so in theory it is possible to construct an invalid URI using this function, however, it is very unlikely for any practical usecase.

Parameters:

str

TE string

mode

escaping mode

arg

source value

te_errno te_string_join_vec(te_string* str, const te_vec* strvec, const char* sep)

Append the elements of vec (which must be C strings), separated by sep. NULL elements are ignored.

The function never returns an error. Its return type is not void for legacy reasons. New code should never check the return value.

Parameters:

str

TE string

strvec

Vector of C strings

sep

Separator

Returns:

0

void te_string_join_uri_path(te_string* str, const te_vec* strvec)

Append the elements of vec (which must be C strings), escaped as URI path segments, separated by /.

The leading / is not added, to allow building relative URIs.

Parameters:

str

TE string

strvec

vector of C strings

See also:

TE_STRING_URI_ESCAPE_PATH_SEGMENT

void te_string_build_uri(te_string* str, const char* scheme, const char* userinfo, const char* host, uint16_t port, const char* path, const char* query, const char* frag)

Build an URI of parts and append it to str.

If any of the components is NULL (including scheme), it is omitted together with a corresponding separator.

path and query are not automatically escaped, because the exact escaping rules depend on whether they are treated as monolithic strings or compound objects. Therefore the caller is responsible for providing correct escaping e.g by using te_string_append_escape_uri(), te_string_join_vec(), te_kvpair_to_uri_query() or in some other way.

Leading / is added to path if host is present, and path does not already start with /.

Parameters:

str

TE string

scheme

URI scheme

userinfo

user info (escaped)

host

host (escaped)

port

port (0 meaning no port)

path

path (not escaped)

query

query (not escaped)

frag

escaped

TE_FATAL_ERROR

if scheme, path or query contains invalid characters.

void te_string_generic_escape(te_string* str, const char* input, const char* esctable[static UINT8_MAX+1], te_string_generic_escape_fn* ctrl_esc, te_string_generic_escape_fn* nonascii_esc)

Generic string escaping function.

All characters from input are copied unchanged to str, except:

  • if the character has a non- NULL entry in esctable, it is substituted;

  • if ctrl_esc is not NULL and the character is a control character, ctrl_esc is used to write the representation of the character;

  • if nonascii_esc is not NULL and the character is non-ASCII (i.e. its code is larger than 127), nonascii_esc is used to write the representation of the character.

Parameters:

str

TE string

input

input string

esctable

table of escape sequences

ctrl_esc

control character escaping function (may be NULL)

nonascii_esc

non-ASCII character escaping function (may be NULL)

void te_string_encode_base64(te_string* str, size_t len, const uint8_t bytes[len], bool url_safe)

Encode binary data with Base-64 encoding.

The encoding is defined in RFC 4648. Lines are not split.

If url_safe is true, the so called URL-safe variant of RFC 4648 is used which produces strings that may be directed included into an URI without additional escaping. Also they may be safely used as filenames.

Parameters:

str

TE string

len

size of binary data

bytes

raw binary data

url_safe

a variant of RFC 4648

te_errno te_string_decode_base64(te_string* str, const char* base64str)

Decode a Base64-encoded string.

Both variants of RFC 4648 encoding are accepted. Embedded newlines are silently skipped.

str will always be zero-terminated, but it may contain embedded zeroes.

Parameters:

str

TE string

base64str

Base64-encoded string

TE_EILSEQ

Invalid Base64 encoding

Returns:

status code

char* te_string_fmt_va(const char* fmt, va_list ap)

Return a char * that is a result of sprintf into allocated memory.

Parameters:

fmt

Format string

ap

List of arguments

Returns:

Heap-allocated string.

char* te_string_fmt(const char* fmt, ...)

Return a char * that is a result of sprintf into allocated memory.

Parameters:

fmt

Format string

Format string arguments

Returns:

Heap-allocated string.

char void te_string_cut(te_string* str, size_t len)

Cut from the string specified number of characters.

Parameters:

str

TE string

len

Number of characters to cut

void te_string_cut_beginning(te_string* str, size_t len)

Cut specified number of characters from the beginning of the string.

Parameters:

str

TE string

len

Number of characters to cut from the beginning

void te_string_chop(te_string* str, const char* trail)

Chop off trailing characters from str that belong to trail.

For example, this function may be used to remove trailing newlines from the contents of a file:

te_string_chop(dest, "\n");

Parameters:

str

TE string.

trail

Trailing characters to chop.

void te_string_add_centered(te_string* str, const char* src, size_t padlen, char padchar)

Center the string src padding it to padlen with padchar and append the result to str.

Parameters:

str

TE string

src

Source C string

padlen

Desired length (if the original string is longer, it will be truncated)

padchar

Padding character

te_errno te_string_process_lines(te_string* buffer, bool complete_lines, te_string_line_handler_fn* callback, void* user_data)

Call callback for every line in buffer.

If complete_lines is true, the last incomplete line without a newline terminator is not processed.

The line terminator is '\n ‘, however, if it is preceded by '\r ‘, it is removed as well.

All processed lines are removed from buffer.

Empty lines are never skipped, but if the buffer is empty, callback is not called at all.

If a callback returns a non-zero status, the processing stop, however, the current line is still removed. TE_EOK is treated as success.

The following snippet would read data from a POSIX fd in arbitrary chunks, split the input into lines, count them, log any occurred error messages and bail out early on fatal errors.

static te_errno
handle_line(char *line, void *data)
{
    unsigned int *linecount = data;
    const char *msg = te_str_strip_prefix(line, "FATAL: ");

    if (msg != NULL)
    {
        ERROR("Fatal error at line %u: %s", *linecount, msg);
        return TE_EBADMSG;
    }

    msg = te_str_strip_prefix(line, "ERROR: ");
    if (msg != NULL)
        ERROR("Error at line %u: %s", *linecount, msg);

    (*linecount)++;
    return 0;
}

...

te_string buffer = TE_STRING_INIT;
char buf[BUFSIZE];
ssize_t read_bytes;
unsigned int linecount = 1;

while ((read_bytes = read(fd, buf, sizeof(buf))) > 0)
{
    te_string_append_buf(&buffer, buf, read_bytes);
    CHECK_RC(te_string_process_lines(&buffer, true,
                                     handle_line, data));
}
CHECK_RC(te_string_process_lines(&buffer, false, handle_line, &linecount));

Parameters:

buffer

TE string.

complete_lines

Do not process incomplete lines if true.

callback

Callback function.

user_data

User data.

Returns:

Status code.

void te_string_free(te_string* str)

Free TE string.

It will not release buffer supplied by user with te_string_set_buf().

Parameters:

str

TE string

char* raw2string(const uint8_t* data, size_t size)

Get string representation of raw data.

Parameters:

data

Buffer

size

Number of bytes

Returns:

String representation

te_errno te_string_replace_all_substrings(te_string* str, const char* new, const char* old)

Replace all the substrings in a string

Parameters:

str

The string in which to replace.

new

The new substring to replace.

old

The substring to be replaced.

Returns:

Status code (always 0).

te_errno te_string_replace_substring(te_string* str, const char* new, const char* old)

Replace the substring in a string

Parameters:

str

The string in which to replace.

new

The new substring to replace.

old

The substring to be replaced.

Returns:

Status code (always 0).

Macros

#define TE_STRING_BUF_INIT(buf_)

Initialize TE string assigning buffer to it.

#define TE_STRING_EXT_BUF_INIT(buf_, size_)

Initialize TE string assigning buffer and size to it.

#define TE_STRING_GROW_FACTOR

String grow factor which is used during reallocation

#define TE_STRING_GROW_FACTOR_EXP_LIMIT

The grow factor exponent limit during a single relocation. If a good size is not found in a range [size, TE_STRING_GROW_FACTOR ^ TE_STRING_GROW_FACTOR_EXP_LIMIT], the fallback to addendum-based grow is performed.

Empirically, current grow factor exponent limit is enough for running long tests without too frequent reallocation. Type of function to be used for releasing resources

#define TE_STRING_INIT

On-stack te_string initializer

#define TE_STRING_INIT_LEN

Initial length of the dynamically allocated string

#define TE_STRING_INIT_RESERVE(reserved_size_)

On-stack te_string initializer with a defined reserve

: You should probably not use it unless you really need to. Please use simple TE_STRING_INIT.

#define TE_STRING_INIT_RESERVE_FREE(reserved_size_, free_func_)

On-stack te_string initializer with a defined reserve and free function.

#define TE_STRING_INIT_STATIC(_size)

Initialize TE string assigning statically allocated memory to it. Dynamic memory allocation will not be used for such string, so there will be no need in calling te_string_free().

Parameters:

_size

Number of bytes reserved for storing the string.