Substring manipulation API

Overview

// typedefs

typedef struct te_substring_t te_substring_t;
typedef enum te_substring_mod_op te_substring_mod_op;

// enums

enum te_substring_mod_op;

// structs

struct te_substring_t;

// global functions

static void te_substring_invalidate(te_substring_t* substr);
static bool te_substring_is_valid(const te_substring_t* substr);
static bool te_substring_past_end(const te_substring_t* substr);
static void te_substring_till_end(te_substring_t* substr);
static bool te_substring_extract_buf(char* dst, const te_substring_t* src);
static bool te_substring_extract(te_string* dst, const te_substring_t* src);
int te_substring_compare(const te_substring_t* substr1, const te_substring_t* substr2);
int te_substring_compare_str(const te_substring_t* substr, const char* str);
bool te_substring_find(te_substring_t* substr, const char* str);
char te_substring_span(te_substring_t* substr, const char* cset, bool inverted);
size_t te_substring_skip(te_substring_t* substr, char skip, size_t at_most);
bool te_substring_strip_prefix(te_substring_t* substr, const char* prefix);
bool te_substring_strip_suffix(te_substring_t* substr, const char* suffix);
bool te_substring_strip_uint_suffix(te_substring_t* str, uintmax_t* suffix_val);
size_t te_substring_modify(te_substring_t* substr, te_substring_mod_op op, const char* fmt, ...);
size_t size_t te_substring_modify_va(te_substring_t* substr, te_substring_mod_op op, const char* fmt, va_list args);
size_t te_substring_replace(te_substring_t* substr, const char* fmt, ...);
size_t size_t te_substring_replace_va(te_substring_t* substr, const char* fmt, va_list args);
bool te_substring_insert_sep(te_substring_t* substr, char sep, bool at_bol);
bool te_substring_copy(te_substring_t* dst, const te_substring_t* src, te_substring_mod_op op);
bool te_substring_advance(te_substring_t* substr);
bool te_substring_limit(te_substring_t* substr, const te_substring_t* limit);
size_t te_string_replace_all_substrings(te_string* str, const char* new, const char* old);
bool te_string_replace_substring(te_string* str, const char* new, const char* old);

// macros

#define TE_SUBSTRING_INIT(base_)

Detailed Documentation

Typedefs

typedef struct te_substring_t te_substring_t

Structure for describing a piece of string

typedef enum te_substring_mod_op te_substring_mod_op

The mode of operation for substring modifications.

Global Functions

static void te_substring_invalidate(te_substring_t* substr)

Invalidate the substring so that it would be treated as pointing nowhere.

Parameters:

substr

Substring.

static bool te_substring_is_valid(const te_substring_t* substr)

Check that substring is valid.

The substring is considered valid if it is completely contained within its base string or if it has a length of zero and points right after the end of its base string.

Parameters:

substr

Substring.

Returns:

true if the substring is valid.

static bool te_substring_past_end(const te_substring_t* substr)

Check whether a substring points past the end of its base string.

If it is true, replacing a substring would effectively append data to the base string.

Parameters:

substr

Substring.

Returns:

true if the substring is valid and points past the end of its base string.

static void te_substring_till_end(te_substring_t* substr)

Extend the length of the substring to reach the end of its base string.

Parameters:

substr

Substring.

static bool te_substring_extract_buf(char* dst, const te_substring_t* src)

Copy the content of a substring into the C array.

The terminating '\0' is added to the array, so it should have enough space to hold src->len + 1 bytes.

If the substring is not valid, a single '\0' is written to the buffer.

Parameters:

dst

Target buffer.

src

Substring.

Returns:

true if src is a valid substring.

static bool te_substring_extract(te_string* dst, const te_substring_t* src)

Copy the content of a substring into the target string.

If the substring is not valid, nothing happens.

The function returns true if the substring is valid but empty, so nothing is actually copied.

Parameters:

dst

Target string.

src

Substring.

Returns:

true if src is a valid substring.

int te_substring_compare(const te_substring_t* substr1, const te_substring_t* substr2)

Compare two substrings like strcmp().

An invalid substring always compares equal to another invalid substring and is considered less than any valid substring.

Parameters:

substr1

First substring.

substr2

Second substring.

Returns:

0, -1 or 1 depending on whether substr1 is equal, less or greater than substr2.

int te_substring_compare_str(const te_substring_t* substr, const char* str)

Compare a substring to a C string like strcmp().

An invalid substring compares equal to NULL and is less than any non-null string. A valid substring is always greater than NULL.

Parameters:

substr

Substring.

str

C string to compare (may be NULL).

Returns:

0, -1 or 1 depending on whether substr is equal, less or greater than str.

bool te_substring_find(te_substring_t* substr, const char* str)

Find a str starting at substr position and update it accordingly.

If str has not been found, te_substring_is_valid() will also return false after this call.

If te_substring_find() is called the second time without calling te_substring_advance(), it will effectively do nothing.

Parameters:

substr

Substring.

str

The string to find.

Returns:

true if str has been found.

char te_substring_span(te_substring_t* substr, const char* cset, bool inverted)

Make the substring cover the longset segment of characters entirely from cset (or entirely not from cset if inverted is true).

The starting point of the substring is not changed.

Parameters:

substr

Substring.

cset

Set of characters.

inverted

If true, consider characters not in cset.

Returns:

The first character after the initial segment, may be '\0'.

size_t te_substring_skip(te_substring_t* substr, char skip, size_t at_most)

Skip at most at_most characters skip in substr.

Unlike te_substring_span(), this function does move the starting point and the length is decreased if it’s not zero.

Returns:

The number of characters actually skipped.

bool te_substring_strip_prefix(te_substring_t* substr, const char* prefix)

Strip a prefix from a substring.

If substr starts with prefix, its starting point is moved to skip that prefix.

The underlying string is not modified.

Parameters:

substr

Substring.

prefix

Prefix to strip.

Returns:

true if prefix has been stripped.

See also:

te_str_strip_prefix()

bool te_substring_strip_suffix(te_substring_t* substr, const char* suffix)

Strip a suffix from a subsstring.

If substr ends with suffix, the length of the suffix is substracted from the length of the substring.

The underlying string is not modified.

Parameters:

substr

Substring.

suffix

Suffix to strip.

Returns:

true if suffix has been stripped.

bool te_substring_strip_uint_suffix(te_substring_t* str, uintmax_t* suffix_val)

Strip a sequence of digits from the end of substring.

If the sequence is not empty, the resulting number is stored in suffix_val and the length of substr is diminished to exclude the numeric suffix.

Otherwise, substr is untouched and 0 is stored in suffix_val.

If the numeric suffix represent a number that does not fit into uintmax_t, it won’t be stripped.

The underlying string is never modified.

Parameters:

substr

Substring.

suffix_val

Place to store the numeric value of a suffix or zero (may be NULL).

Returns:

true if any digits have been stripped off.

size_t te_substring_modify(te_substring_t* substr, te_substring_mod_op op, const char* fmt, ...)

Modify a substring at a given position, changing the underlying te_string in place. If the substring is not valid, nothing happens. The exact behaviour depends on the value of op:

The replacement string is constructed by applying printf() format to the arguments.

If fmt is NULL, the content of the substring is deleted (if op is TE_SUBSTRING_MOD_OP_REPLACE, otherwise it’s a no-op). No variadic arguments shall be present in this case.

The starting point of the substring remains the same in all cases and the length is adjusted according to the operation and the length of the replacement string (in contrast to te_substring_replace()).

Parameters:

substr

Substring.

op

Mode of operation.

fmt

Replacement format (may be NULL).

Format arguments.

Returns:

The length of the replacement string. Note that this does not include the length of the old substring in case of appending/prepending.

size_t size_t te_substring_modify_va(te_substring_t* substr, te_substring_mod_op op, const char* fmt, va_list args)

Same as te_substring_modify() but accepts a variadic list argument. If fmt is NULL, the content of the substring is deleted (when op is TE_SUBSTRING_MOD_OP_REPLACE).

Parameters:

substr

Substring.

op

Mode of operation.

fmt

Replacement format (may be NULL).

Format arguments.

Returns:

The length of the replacement string.

size_t te_substring_replace(te_substring_t* substr, const char* fmt, ...)

Like te_substring_modify(), but the operation is always TE_SUBSTRING_MOD_OP_REPLACE.

Unlike te_substring_modify(), this function moves the starting point past the end of the replaced string and sets the substring length to zero, so it’s basically equivalent to calling te_substring_advance() after te_substring_modify().

1.45.0. Before that version, it accepted a fixed replacement string, not a format. It also used to return a pretty uninformative status code instead of the length.

Parameters:

substr

Substring.

fmt

Replacement format (may be NULL).

Format arguments.

Returns:

The length of the replacement string.

size_t size_t te_substring_replace_va(te_substring_t* substr, const char* fmt, va_list args)

Same as te_substring_replace() but accepts a variadic list argument.

Parameters:

substr

Substring.

fmt

Replacement format (may be NULL).

Format arguments.

Returns:

The length of the replacement string.

bool te_substring_insert_sep(te_substring_t* substr, char sep, bool at_bol)

Inserts a separator at the start of the substring if there is no one already.

The function checks the character immediately preceding the substring. If at_bol is true, the separator is also inserted if there is no preceding character at all.

Parameters:

substr

Substring.

sep

Separator.

at_bol

If true, insert at the start of the string.

Returns:

true if the separator has been

bool te_substring_copy(te_substring_t* dst, const te_substring_t* src, te_substring_mod_op op)

Copy the content of a substring into another substring.

If dst is invalid, no copying is done. If src is invalid, the content of dst is deleted.

dst and src must have different base strings.

The mode of operation is the same as for te_substring_modify().

Parameters:

dst

Target substring.

src

Source substring.

op

Modification mode.

TE_FATAL_ERROR

if dst and src refer to the same string. This may change in the future.

Returns:

true if copying took place.

bool te_substring_advance(te_substring_t* substr)

Move the position of a substring by its length.

In other words, after this call the substring will point after the point where the substring ended initially. If the string was not valid initially, nothing happens.

Parameters:

substr

Substring.

Returns:

true if the substring has been advanced.

bool te_substring_limit(te_substring_t* substr, const te_substring_t* limit)

Limit the length of the substr so that it would end right before the start of limit.

If any of the substrings is not valid of if they have different base strings or if limit starts before substr, nothing happens.

Parameters:

substr

Substring.

limit

Limiting substring.

Returns:

true if the substring has been limited.

size_t te_string_replace_all_substrings(te_string* str, const char* new, const char* old)

Replace all occurrences of a substring in a string.

1.45.0. Before that version the function returned a meaningless status code instead of the count.

Parameters:

str

The string in which to replace.

new

The replacement string (may be NULL).

old

The substring to be replaced.

Returns:

The number of performed replacements.

bool te_string_replace_substring(te_string* str, const char* new, const char* old)

Replace the first occurrence of substring in a string.

1.45.0. Before that version the function returned a meaningless status code instead of the success flag.

Parameters:

str

The string in which to replace.

new

The replacement (may be NULL).

old

The substring to be replaced.

Returns:

true if a substring has been replaced.

Macros

#define TE_SUBSTRING_INIT(base_)

Substring initializer

Bug base_ may be a pointer to const te_string without a warning from the compiler, but in this case a user shall not call te_substring_replace() on it and currenly this is not enforced but that may be fixed later.

Parameters:

base_

Pointer to the base string.