#include "utf8utils.h"
#include "str-utils.h"

Include dependency graph for utf8utils.c:

Functions
void	append_unsafe_utf8_as_escaped (GString escaped_output, const gchar raw, gssize raw_len, guint32 unsafe_flags, const gchar control_format, const gchar invalid_format)

void	append_unsafe_utf8_as_escaped_binary (GString escaped_string, const gchar str, gssize str_len, guint32 unsafe_flags)

gchar *	convert_unsafe_utf8_to_escaped_binary (const gchar *str, gssize str_len, guint32 unsafe_flags)

void	append_unsafe_utf8_as_escaped_text (GString escaped_string, const gchar str, gssize str_len, guint32 unsafe_flags)

gchar *	convert_unsafe_utf8_to_escaped_text (const gchar *str, gssize str_len, guint32 unsafe_flags)

Function Documentation

◆ append_unsafe_utf8_as_escaped()

void append_unsafe_utf8_as_escaped	(	GString *	escaped_output,
		const gchar *	raw,
		gssize	raw_len,
		guint32	unsafe_flags,
		const gchar *	control_format,
		const gchar *	invalid_format
	)

See also: _append_escaped_utf8_character()

◆ append_unsafe_utf8_as_escaped_binary()

void append_unsafe_utf8_as_escaped_binary	(	GString *	escaped_string,
		const gchar *	str,
		gssize	str_len,
		guint32	unsafe_flags
	)

This function escapes an unsanitized input (e.g. that can contain binary characters, and produces an escaped format that can be deescaped in need, which is guaranteed to be utf8 clean. The major difference between "binary" and "text" form is that the receiver is able to cope with \xXX sequences that can incorporate invalid utf8 sequences when decoded. With "text" format, we never embed anything that would become not valid utf8 when decoded.

Here are the rules that the routine follows:

well-known control characters are escaped (0x0a as
and so on)
other control characters as per control_format (\xXX)
backslash is escaped as \
any additional characters (only ASCII is supported) as <char>
invalid utf8 sequences are converted as per invalid_format (\xXX)
utf8 characters are reproduced as is

This is basically meant to be used when sending data to 8 bit clean receivers, e.g. syslog-ng or WELF.

See also: append_unsafe_utf8_as_escaped()

◆ append_unsafe_utf8_as_escaped_text()

void append_unsafe_utf8_as_escaped_text	(	GString *	escaped_string,
		const gchar *	str,
		gssize	str_len,
		guint32	unsafe_flags
	)

This function escapes an unsanitized input (e.g. that can contain binary characters, and produces an escaped format that can be deescaped in need, which is guaranteed to be utf8 clean. The major difference between "binary" and "text" form is that the receiver is able to cope with \xXX sequences that can incorporate invalid utf8 sequences when decoded. With "text" format, we never embed anything that would become not valid utf8 when decoded.

Here are the rules that the routine follows:

well-known control characters are escaped (0x0a as
and so on)
other control characters as per control_format (\xXX)
backslash is escaped as \
any additional characters (only ASCII is supported) as <char>
invalid utf8 sequences are converted as per invalid_format (\xXX)
utf8 characters are reproduced as is

This is basically meant to be used when sending data to utf8 only receivers, e.g. JSON.

See also: append_unsafe_utf8_as_escaped()

◆ convert_unsafe_utf8_to_escaped_binary()

gchar* convert_unsafe_utf8_to_escaped_binary	(	const gchar *	str,
		gssize	str_len,
		guint32	unsafe_flags
	)

◆ convert_unsafe_utf8_to_escaped_text()