From 375e3ffed496c995383156f9675fa95f145e05bf Mon Sep 17 00:00:00 2001
From: Warren Dukes <warren.dukes@gmail.com>
Date: Tue, 13 Apr 2004 02:20:46 +0000
Subject: add my own utf8/ascii converters and utf8 validator validate all mpd
 tags on import, if they are invalid, assume they are ascii and convert to
 utf8

git-svn-id: https://svn.musicpd.org/mpd/trunk@707 09075e82-0dd4-0310-85a5-a0d7c8717e4f
---
 src/Makefile.am |  4 +--
 src/tag.c       | 29 +++++++++++++++++++
 src/utf8.c      | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/utf8.h      | 14 ++++++++++
 4 files changed, 132 insertions(+), 2 deletions(-)
 create mode 100644 src/utf8.c
 create mode 100644 src/utf8.h

diff --git a/src/Makefile.am b/src/Makefile.am
index e3d7c0144..3b6717936 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -5,13 +5,13 @@ mpd_headers = buffer2array.h interface.h command.h playlist.h ls.h \
 	tag.h player.h listen.h conf.h ogg_decode.h volume.h flac_decode.h \
 	audio.h playerData.h stats.h myfprintf.h sig_handlers.h decode.h log.h \
 	audiofile_decode.h charConv.h permission.h mpd_types.h pcm_utils.h \
-	mp4_decode.h aac_decode.h signal_check.h
+	mp4_decode.h aac_decode.h signal_check.h utf8.h
 mpd_SOURCES = main.c buffer2array.c interface.c command.c playlist.c ls.c \
 	song.c list.c directory.c tables.c utils.c path.c mp3_decode.c \
 	tag.c player.c listen.c conf.c ogg_decode.c volume.c flac_decode.c \
 	audio.c playerData.c stats.c myfprintf.c sig_handlers.c decode.c log.c \
 	audiofile_decode.c charConv.c permission.c pcm_utils.c mp4_decode.c \
-	aac_decode.c signal_check.c $(mpd_headers)
+	aac_decode.c signal_check.c utf8.c $(mpd_headers)
 
 mpd_CFLAGS = $(MPD_CFLAGS)
 mpd_LDADD = $(MPD_LIBS) $(ID3_LIB) $(MAD_LIB) $(MP4FF_LIB)
diff --git a/src/tag.c b/src/tag.c
index bda1810be..2fabf5639 100644
--- a/src/tag.c
+++ b/src/tag.c
@@ -24,6 +24,7 @@
 #include "mp4_decode.h"
 #include "aac_decode.h"
 #include "utils.h"
+#include "utf8.h"
 
 #include <sys/stat.h>
 #include <stdlib.h>
@@ -57,6 +58,22 @@ void printMpdTag(FILE * fp, MpdTag * tag) {
 	if(tag->time>=0) myfprintf(fp,"Time: %i\n",tag->time);
 }
 
+#define fixUtf8(str) { \
+	if(str && !validUtf8String(str)) { \
+		char * temp; \
+		temp = asciiStrToUtf8Dup(str); \
+		free(str); \
+		str = temp; \
+	} \
+}
+
+void validateUtf8Tag(MpdTag * tag) {
+	fixUtf8(tag->artist);
+	fixUtf8(tag->album);
+	fixUtf8(tag->track);
+	fixUtf8(tag->title);
+}
+
 #ifdef HAVE_ID3TAG
 char * getID3Info(struct id3_tag * tag, char * id) {
 	struct id3_frame const * frame;
@@ -145,6 +162,8 @@ MpdTag * audiofileTagDup(char * utf8file) {
 		ret->time = time;
 	}
 
+	if(ret) validateUtf8Tag(ret);
+
 	return ret;
 }
 #endif
@@ -163,6 +182,8 @@ MpdTag * mp3TagDup(char * utf8file) {
 		ret->time = time;
 	}
 
+	if(ret) validateUtf8Tag(ret);
+
 	return ret;
 }
 #endif
@@ -179,6 +200,8 @@ MpdTag * aacTagDup(char * utf8file) {
 		ret->time = time;
 	}
 
+	if(ret) validateUtf8Tag(ret);
+
 	return ret;
 }
 
@@ -267,6 +290,8 @@ MpdTag * mp4TagDup(char * utf8file) {
 		}
 	}
 
+	if(ret) validateUtf8Tag(ret);
+
 	return ret;
 }
 #endif
@@ -329,6 +354,8 @@ MpdTag * oggTagDup(char * utf8file) {
 
 	ov_clear(&vf);
 
+	if(ret) validateUtf8Tag(ret);
+
 	return ret;	
 }
 #endif
@@ -441,6 +468,8 @@ MpdTag * flacTagDup(char * utf8file) {
 		}
 	}
 
+	if(ret) validateUtf8Tag(ret);
+
 	return ret;
 }
 #endif
diff --git a/src/utf8.c b/src/utf8.c
new file mode 100644
index 000000000..aa427e99b
--- /dev/null
+++ b/src/utf8.c
@@ -0,0 +1,87 @@
+#include "utf8.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+unsigned char * asciiToUtf8(unsigned char c) {
+	static unsigned char utf8[3];
+
+	memset(utf8,0,3);
+
+	if(c < 128) utf8[0] = c;
+	else if(c<192) {
+		utf8[0] = 194;
+		utf8[1] = c;
+	}
+	else {
+		utf8[0] = 195;
+		utf8[1] = c-64;
+	}
+
+	return utf8;
+}
+
+unsigned char * asciiStrToUtf8Dup(unsigned char * ascii) {
+	/* utf8 should have at most two char's per ascii char */
+	int len = strlen(ascii)*2+1;
+	unsigned char * ret = malloc(len);
+	unsigned char * cp = ret;
+	unsigned char * utf8;
+
+	memset(ret,0,len);
+
+	len = 0;
+
+	while(*ascii) {
+		utf8 = asciiToUtf8(*ascii);
+		while(*utf8) {
+			*(cp++) = *(utf8++);
+			len++;
+		}
+		ascii++;
+	}
+
+	return realloc(ret,len+1);
+}
+
+unsigned char utf8ToAscii(unsigned char * utf8) {
+	unsigned char c = 0;
+
+	if(utf8[0]<128) return utf8[0];
+	else if(utf8[0]==195) c+=64;
+	else if(utf8[0]!=194) return '?';
+	return c+utf8[1];
+}
+
+int validateUtf8Char(unsigned char * utf8Char) {
+	if(utf8Char[0]<0x80) return 1;
+	
+	if(utf8Char[0]>=0xC0 && utf8Char[0]<=0xFD) {
+		int count = 1;
+		unsigned char t = 0x20;
+		int i;
+		while(count < 6 && (t & utf8Char[0])) {
+			t = (t >> 1);
+			count++;
+		}
+		if(count > 5) return 0;
+		for(i=1;i<=count;i++) {
+			if(utf8Char[i] < 0x80 || utf8Char[i] > 0xBF) return 0;
+		}
+		return count;
+	}
+	else return 0;
+}
+
+int validUtf8String(unsigned char * string) {
+	int ret;
+
+	while(*string) {
+		ret = validateUtf8Char(string);
+		if(!ret) return 0;
+		string+= ret;
+	}
+
+	return 1;
+}
diff --git a/src/utf8.h b/src/utf8.h
new file mode 100644
index 000000000..1928a8a81
--- /dev/null
+++ b/src/utf8.h
@@ -0,0 +1,14 @@
+#ifndef UTF_8_H
+#define UTF_8_H
+
+unsigned char * asciiToUtf8(unsigned char c);
+
+unsigned char * asciiStrToUtf8Dup(unsigned char * ascii);
+
+unsigned char utf8ToAscii(unsigned char * utf8);
+
+int validateUtf8Char(unsigned char * utf8Char);
+
+int validUtf8String(unsigned char * string);
+
+#endif
-- 
cgit v1.2.3