irssi/src/core/recode.c
Joseph Bisch 0840eaec7b Make split functions return an array with NULL instead of NULL
This avoids undefined behavior in functions that call these split
functions and expect an array back instead of just a NULL pointer.
2017-10-20 15:22:32 +02:00

308 lines
7.1 KiB
C

/*
recode.c : irssi
Copyright (C) 1999-2000 Timo Sirainen
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include "module.h"
#include "settings.h"
#include "servers.h"
#include "signals.h"
#include "lib-config/iconfig.h"
#include "misc.h"
static char *translit_charset;
static gboolean term_is_utf8;
gboolean is_utf8(void)
{
return term_is_utf8;
}
static gboolean is_translit(const char *charset)
{
char *pos;
pos = stristr(charset, "//translit");
return (pos != NULL);
}
gboolean is_valid_charset(const char *charset)
{
GIConv cd;
char *to = NULL;
if (!charset || *charset == '\0')
return FALSE;
if (settings_get_bool("recode_transliterate") && !is_translit(charset))
charset = to = g_strconcat(charset, "//TRANSLIT", NULL);
cd = g_iconv_open(charset, "UTF-8");
g_free(to);
if (cd != (GIConv)-1) {
g_iconv_close(cd);
return TRUE;
}
return FALSE;
}
static char *find_conversion(const SERVER_REC *server, const char *target)
{
char *conv = NULL;
if (server != NULL && target != NULL) {
char *tagtarget = g_strdup_printf("%s/%s", server->tag, target);
conv = iconfig_get_str("conversions", tagtarget, NULL);
g_free(tagtarget);
}
if (conv == NULL && target != NULL)
conv = iconfig_get_str("conversions", target, NULL);
if (conv == NULL && server != NULL)
conv = iconfig_get_str("conversions", server->tag, NULL);
return conv;
}
static int str_is_ascii(const char *str)
{
int i;
for (i = 0; str[i] != '\0'; i++)
if (str[i] & 0x80)
return 0;
return 1;
}
char *recode_in(const SERVER_REC *server, const char *str, const char *target)
{
const char *from = NULL;
const char *to = translit_charset;
char *recoded = NULL;
gboolean str_is_utf8, recode, autodetect;
int len;
if (!str)
return NULL;
recode = settings_get_bool("recode");
if (!recode)
return g_strdup(str);
len = strlen(str);
/* Only validate for UTF-8 if an 8-bit encoding. */
str_is_utf8 = 0;
if (!str_is_ascii(str))
str_is_utf8 = g_utf8_validate(str, len, NULL);
else if (!strchr(str, '\e'))
str_is_utf8 = 1;
autodetect = settings_get_bool("recode_autodetect_utf8");
if (autodetect && str_is_utf8)
if (term_is_utf8)
return g_strdup(str);
else
from = "UTF-8";
else
from = find_conversion(server, target);
if (from)
recoded = g_convert_with_fallback(str, len, to, from, NULL, NULL, NULL, NULL);
if (!recoded) {
if (str_is_utf8)
if (term_is_utf8)
return g_strdup(str);
else
from = "UTF-8";
else
if (term_is_utf8)
from = settings_get_str("recode_fallback");
else
from = NULL;
if (from)
recoded = g_convert_with_fallback(str, len, to, from, NULL, NULL, NULL, NULL);
if (!recoded)
recoded = g_strdup(str);
}
return recoded;
}
char *recode_out(const SERVER_REC *server, const char *str, const char *target)
{
char *recoded = NULL;
const char *from = translit_charset;
const char *to = NULL;
char *translit_to = NULL;
gboolean translit, recode;
int len;
if (!str)
return NULL;
recode = settings_get_bool("recode");
if (!recode)
return g_strdup(str);
len = strlen(str);
translit = settings_get_bool("recode_transliterate");
to = find_conversion(server, target);
if (to == NULL)
/* default outgoing charset if set */
to = settings_get_str("recode_out_default_charset");
if (to && *to != '\0') {
if (translit && !is_translit(to))
to = translit_to = g_strconcat(to ,"//TRANSLIT", NULL);
recoded = g_convert(str, len, to, from, NULL, NULL, NULL);
}
g_free(translit_to);
if (!recoded)
recoded = g_strdup(str);
return recoded;
}
char **recode_split(const SERVER_REC *server, const char *str,
const char *target, int len, gboolean onspace)
{
GIConv cd = (GIConv)-1;
const char *from = translit_charset;
const char *to = translit_charset;
char *translit_to = NULL;
const char *inbuf = str;
const char *previnbuf = inbuf;
char *tmp = NULL;
char *outbuf;
gsize inbytesleft = strlen(inbuf);
gsize outbytesleft = len;
int n = 0;
char **ret;
g_warn_if_fail(str != NULL);
if (str == NULL) {
ret = g_new(char *, 1);
ret[0] = NULL;
return ret;
}
if (settings_get_bool("recode")) {
to = find_conversion(server, target);
if (to == NULL)
/* default outgoing charset if set */
to = settings_get_str("recode_out_default_charset");
if (to != NULL && *to != '\0') {
if (settings_get_bool("recode_transliterate") &&
!is_translit(to))
to = translit_to = g_strconcat(to,
"//TRANSLIT",
NULL);
} else {
to = from;
}
}
cd = g_iconv_open(to, from);
if (cd == (GIConv)-1) {
/* Fall back to splitting by byte. */
ret = strsplit_len(str, len, onspace);
goto out;
}
tmp = g_malloc(outbytesleft);
outbuf = tmp;
ret = g_new(char *, 1);
while (g_iconv(cd, (char **)&inbuf, &inbytesleft, &outbuf,
&outbytesleft) == -1) {
if (errno != E2BIG) {
/*
* Conversion failed. Fall back to splitting
* by byte.
*/
ret[n] = NULL;
g_strfreev(ret);
ret = strsplit_len(str, len, onspace);
goto out;
}
/* Outbuf overflowed, split the input string. */
if (onspace) {
/*
* Try to find a space to split on and leave
* the space on the previous line.
*/
int i;
for (i = 0; i < inbuf - previnbuf; i++) {
if (previnbuf[inbuf-previnbuf-1-i] == ' ') {
inbuf -= i;
inbytesleft += i;
break;
}
}
}
ret[n++] = g_strndup(previnbuf, inbuf - previnbuf);
ret = g_renew(char *, ret, n + 1);
previnbuf = inbuf;
/* Reset outbuf for the next substring. */
outbuf = tmp;
outbytesleft = len;
}
/* Copy the last substring into the array. */
ret[n++] = g_strndup(previnbuf, inbuf - previnbuf);
ret = g_renew(char *, ret, n + 1);
ret[n] = NULL;
out:
if (cd != (GIConv)-1)
g_iconv_close(cd);
g_free(translit_to);
g_free(tmp);
return ret;
}
void recode_update_charset(void)
{
const char *charset = settings_get_str("term_charset");
term_is_utf8 = !g_ascii_strcasecmp(charset, "UTF-8");
g_free(translit_charset);
if (settings_get_bool("recode_transliterate") && !is_translit(charset))
translit_charset = g_strconcat(charset, "//TRANSLIT", NULL);
else
translit_charset = g_strdup(charset);
}
void recode_init(void)
{
settings_add_bool("misc", "recode", TRUE);
settings_add_str("misc", "recode_fallback", "CP1252");
settings_add_str("misc", "recode_out_default_charset", "");
settings_add_bool("misc", "recode_transliterate", TRUE);
settings_add_bool("misc", "recode_autodetect_utf8", TRUE);
}
void recode_deinit(void)
{
g_free(translit_charset);
}