Quick Search:

View

Revision:
Expand:  
Changeset: MAIN:plunky:20140606153156

Diff

Diff from 1.7 to:

Annotations

Annotate by Age | Author | Mixed | None

Annotated File View

plunky
1.7
1 /*      $Id: unicode.c,v 1.7 2014/06/06 15:31:56 plunky Exp $   */
ragge
1.1
2 /*
3  * Copyright (c) 2014 Eric Olson <ejolson@renomath.org>
4  * Some rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in
14  *    the documentation and/or other materials provided with the
15  *    distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
20  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
21  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
23  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
25  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
27  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30
31 #include <stdio.h>
32 #include <ctype.h>
plunky
1.5
33 #include "pass1.h"
34 #include "manifest.h"
ragge
1.1
35 #include "unicode.h"
36
plunky
1.7
37 #if 0
plunky
1.4
38 /*
39  * encode a 32-bit code point as UTF-8
40  * return end position
41  */
ragge
1.1
42 char *
43 cp2u8(char *p,unsigned int c)
44 {
45         unsigned char *s=(unsigned char *)p;
46         if(c>0x7F){
47                 if(c>0x07FF){
48                         if(c>0xFFFF){
49                                 if(c>0x1FFFFF){
50                                         if(c>0x3FFFFFF){
51                                                 if(c>0x7FFFFFFF){
52                                                         u8error("invalid unicode code point");
53                                                 } else {
54                                                         *s++=0xF8|(c>>30);
55                                                         *s++=0x80|((c>>24)&0x3F);
56                                                 }
57                                         } else {
58                                                 *s++=0xF8|(c>>24);
59                                         }
60                                         *s++=0x80|((c>>18)&0x3F);
61                                 } else {
62                                         *s++=0xF0|(c>>18);
63                                 }
64                                 *s++=0x80|((c>>12)&0x3F);
65                         } else {
66                                 *s++=0xE0|(c>>12);
67                         }
68                         *s++=0x80|((c>>6)&0x3F);
69                 } else {
70                         *s++=0xC0|(c>>6);
71                 }
72                 *s++=0x80|(c&0x3F);
73         } else {
74                 *s++=c;
75         }
76         return (char *)s;
77 }
plunky
1.7
78 #endif
ragge
1.1
79
plunky
1.4
80 /*
81  * decode 32-bit code point from UTF-8
82  * move pointer
83  */
ragge
1.1
84 unsigned int 
85 u82cp(char **q)
86 {
87         unsigned char *t=(unsigned char *)*q;
88         unsigned int c=*t;
89         unsigned int r;
90         if(c>0x7F){
91                 int sz;
92                 if((c&0xE0)==0xC0){
93                         sz=2;
94                         r=c&0x1F;
95                 } else if((c&0xF0)==0xE0){
96                         sz=3;
97                         r=c&0x0F;
98                 } else if((c&0xF8)==0xF0){
99                         sz=4;
100                         r=c&0x07;
101                 } else if((c&0xFC)==0xF8){
102                         sz=5;
103                         r=c&0x03;
104                 } else if((c&0xFE)==0xFC){
105                         sz=6;
106                         r=c&0x01;
107                 } else {
108                         u8error("invalid utf-8 prefix");
109                         (*q)++;
110                         return 0xFFFF;
111                 }
112                 t++;
113                 int i;
114                 for(i=1;i<sz;i++){
115                         if((*t&0xC0)==0x80){
116                                 r=(r<<6)+(*t++&0x3F);
117                         } else {
118                                 u8error("utf-8 encoding %d bytes too short",sz-i);
119                                 (*q)++;
120                                 return 0xFFFF;
121                         }
122                 }
123         } else {
124                 r=*t++;
125         }
126         *q=(char *)t;
127         return r;
128 }
129
plunky
1.4
130 /*
131  * return length of UTF-8 code point
132  */
ragge
1.1
133 int 
134 u8len(char *t)
135 {
136         unsigned int c=(unsigned char)*t;
137         if(c>0x7F){
138                 int sz;
139                 if((c&0xE0)==0xC0sz=2;
140                 else if((c&0xF0)==0xE0sz=3;
141                 else if((c&0xF8)==0xF0sz=4;
142                 else if((c&0xFC)==0xF8sz=5;
143                 else if((c&0xFE)==0xFCsz=6;
144                 else return 1;
145                 int i;
146                 for(i=1;i<sz;i++){
147                         c=(unsigned char)*++t;
148                         if((c&0xC0)!=0x80return 1;
149                 }
150                 return sz;
151         }
152         return 1;
153 }
154
155 unsigned int 
156 esc2char(char **q)
157 {
158         unsigned int v;
159         unsigned char *t=(unsigned char *)*q;
160         unsigned int c=*t;
161         if(c=='\\') {
162                 int i;
163                 switch(v=*++t){
164 case 'a':
165                         c='\a'break;
166 case 'b':
167                         c='\b'break;
plunky
1.3
168 #ifdef GCC_COMPAT
169 case 'e':
170                         c='\033'break;
171 #endif
ragge
1.1
172 case 't':
173                         c='\t'break;
174 case 'n':
175                         c='\n'break;
176 case 'v':
177                         c='\v'break;
178 case 'f':
179                         c='\f'break;
180 case 'r':
181                         c='\r'break;
182 case '\\':
183                         c='\\'break;
184 case '\'':
185                         c='\''break;
186 case '\"':
187                         c='\"'break;
plunky
1.2
188 case '\?':
189                         c='\?'break;
ragge
1.1
190 case 'x':
191                         v=*++t;
192                         for(i=0,c=0;;v=t[++i]){
193                                 v=toupper(v);
194                                 if(v>='0' && v<='9'c=(c<<4)+v-'0';
195                                 else if(v>='A' && v<='F'c=(c<<4)+v-'A'+10;
196                                 else break;
197                         }
198                         *q=(char *)t+i;
plunky
1.5
199                         return (xuchar == 0 && c > MAX_CHAR) ? (c - MAX_UCHAR - 1) : (c);
ragge
1.1
200 default:
201                         for(i=0,c=0;i<3;v=t[++i]){
202                                 if(v>='0' && v<='7'c=(c<<3)+v-'0';
203                                 else if(i==0) {
204                                         u8error("unknown escape sequence \\%c",v);
205                                         c='\\';
206                                         break;
207                                 } else {
208                                         break;
209                                 }
210                         }
211                         *q=(char *)t+i;
plunky
1.5
212                         return (xuchar == 0 && c > MAX_CHAR) ? (c - MAX_UCHAR - 1) : (c);
ragge
1.1
213                 }
214         }
215         *q=(char *)t+1;
216         return c;
217 }
FishEye: Open Source License registered to PCC.
Your maintenance has expired. You can renew your license at http://www.atlassian.com/fisheye/renew
Atlassian FishEye, CVS analysis. (Version:1.6.3 Build:build-336 2008-11-04) - Administration - Page generated 2014-09-22 10:16 +0200