00001 /* ================================================================ */ 00002 /* 00003 File: ConvertUTF.h 00004 Author: Mark E. Davis 00005 Copyright (C) 1994 Taligent, Inc. All rights reserved. 00006 00007 This code is copyrighted. Under the copyright laws, this code may not 00008 be copied, in whole or part, without prior written consent of Taligent. 00009 00010 Taligent grants the right to use or reprint this code as long as this 00011 ENTIRE copyright notice is reproduced in the code or reproduction. 00012 The code is provided AS-IS, AND TALIGENT DISCLAIMS ALL WARRANTIES, 00013 EITHER EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO IMPLIED 00014 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN 00015 NO EVENT WILL TALIGENT BE LIABLE FOR ANY DAMAGES WHATSOEVER (INCLUDING, 00016 WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS 00017 INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY 00018 LOSS) ARISING OUT OF THE USE OR INABILITY TO USE THIS CODE, EVEN 00019 IF TALIGENT HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 00020 BECAUSE SOME STATES DO NOT ALLOW THE EXCLUSION OR LIMITATION OF 00021 LIABILITY FOR CONSEQUENTIAL OR INCIDENTAL DAMAGES, THE ABOVE 00022 LIMITATION MAY NOT APPLY TO YOU. 00023 00024 RESTRICTED RIGHTS LEGEND: Use, duplication, or disclosure by the 00025 government is subject to restrictions as set forth in subparagraph 00026 (c)(l)(ii) of the Rights in Technical Data and Computer Software 00027 clause at DFARS 252.227-7013 and FAR 52.227-19. 00028 00029 This code may be protected by one or more U.S. and International 00030 Patents. 00031 00032 TRADEMARKS: Taligent and the Taligent Design Mark are registered 00033 trademarks of Taligent, Inc. 00034 */ 00035 /* ================================================================ */ 00036 00037 #ifndef __cvtutf_H__ 00038 #define __cvtutf_H__ 00039 00040 #include <stdio.h> 00041 #include <stdlib.h> 00042 // #include <types.h> 00043 #include <string.h> 00044 00045 /* ================================================================ */ 00046 /* The following 4 definitions are compiler-specific. 00047 I would use wchar_t for UCS2/UTF16, except that the C standard 00048 does not guarantee that it has at least 16 bits, so wchar_t is 00049 no less portable than unsigned short! 00050 */ 00051 00052 typedef unsigned long UCS4; 00053 typedef unsigned short UCS2; 00054 typedef unsigned short UTF16; 00055 typedef unsigned char UTF8; 00056 #define unichar UTF16 00057 00058 //typedef enum {false, true} Boolean; 00059 00060 static const UCS4 kReplacementCharacter = 0x0000FFFDUL; 00061 static const UCS4 kMaximumUCS2 = 0x0000FFFFUL; 00062 static const UCS4 kMaximumUTF16 = 0x0010FFFFUL; 00063 static const UCS4 kMaximumUCS4 = 0x7FFFFFFFUL; 00064 00065 /* ================================================================ */ 00066 /* Each of these routines converts the text between *sourceStart and 00067 sourceEnd, putting the result into the buffer between *targetStart and 00068 targetEnd. Note: the end pointers are *after* the last item: e.g. 00069 *(sourceEnd - 1) is the last item. 00070 00071 The return result indicates whether the conversion was successful, 00072 and if not, whether the problem was in the source or target buffers. 00073 00074 After the conversion, *sourceStart and *targetStart are both 00075 updated to point to the end of last text successfully converted in 00076 the respective buffers. 00077 */ 00078 00079 typedef enum { 00080 ok, /* conversion successful */ 00081 sourceExhausted, /* partial character in source, but hit end */ 00082 targetExhausted /* insuff. room in target for conversion */ 00083 } ConversionResult; 00084 00085 ConversionResult ConvertUCS4toUTF16 ( 00086 UCS4** sourceStart, const UCS4* sourceEnd, 00087 UTF16** targetStart, const UTF16* targetEnd); 00088 00089 ConversionResult ConvertUTF16toUCS4 ( 00090 UTF16** sourceStart, UTF16* sourceEnd, 00091 UCS4** targetStart, const UCS4* targetEnd); 00092 00093 int NSConvertUTF16toUTF8(unichar **sourceStart, 00094 const unichar *sourceEnd, 00095 unsigned char **targetStart, 00096 const unsigned char *targetEnd); 00097 int NSConvertUTF8toUTF16(unsigned char **sourceStart, unsigned char *sourceEnd, 00098 unichar **targetStart, const unichar *targetEnd); 00099 00100 ConversionResult ConvertUCS4toUTF8 ( 00101 UCS4** sourceStart, const UCS4* sourceEnd, 00102 UTF8** targetStart, const UTF8* targetEnd); 00103 00104 ConversionResult ConvertUTF8toUCS4 ( 00105 UTF8** sourceStart, UTF8* sourceEnd, 00106 UCS4** targetStart, const UCS4* targetEnd); 00107 00108 /* ================================================================ */ 00109 00110 #endif /* __cvtutf_H__ */