Quick Search:

View

Revision:
Expand:  
Changeset: MAIN:plunky:20140528065541

Diff

Diff from 1.5 to:

Annotations

Annotate by Age | Author | Mixed | None
/fisheye/browse/pcc/pcc/mip/unicode.c

Annotated File View

ragge
1.1
1 /*
2  * Copyright (c) 2014 Eric Olson <ejolson@renomath.org>
3  * Some rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in
13  *    the documentation and/or other materials provided with the
14  *    distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
20  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
22  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
26  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #include <stdio.h>
31 #include <ctype.h>
plunky
1.5
32 #include "pass1.h"
33 #include "manifest.h"
ragge
1.1
34 #include "unicode.h"
35
plunky
1.4
36 /*
37  * encode a 32-bit code point as UTF-8
38  * return end position
39  */
ragge
1.1
40 char *
41 cp2u8(char *p,unsigned int c)
42 {
43         unsigned char *s=(unsigned char *)p;
44         if(c>0x7F){
45                 if(c>0x07FF){
46                         if(c>0xFFFF){
47                                 if(c>0x1FFFFF){
48                                         if(c>0x3FFFFFF){
49                                                 if(c>0x7FFFFFFF){
50                                                         u8error("invalid unicode code point");
51                                                 } else {
52                                                         *s++=0xF8|(c>>30);
53                                                         *s++=0x80|((c>>24)&0x3F);
54                                                 }
55                                         } else {
56                                                 *s++=0xF8|(c>>24);
57                                         }
58                                         *s++=0x80|((c>>18)&0x3F);
59                                 } else {
60                                         *s++=0xF0|(c>>18);
61                                 }
62                                 *s++=0x80|((c>>12)&0x3F);
63                         } else {
64                                 *s++=0xE0|(c>>12);
65                         }
66                         *s++=0x80|((c>>6)&0x3F);
67                 } else {
68                         *s++=0xC0|(c>>6);
69                 }
70                 *s++=0x80|(c&0x3F);
71         } else {
72                 *s++=c;
73         }
74         return (char *)s;
75 }
76
plunky
1.4
77 /*
78  * decode 32-bit code point from UTF-8
79  * move pointer
80  */
ragge
1.1
81 unsigned int 
82 u82cp(char **q)
83 {
84         unsigned char *t=(unsigned char *)*q;
85         unsigned int c=*t;
86         unsigned int r;
87         if(c>0x7F){
88                 int sz;
89                 if((c&0xE0)==0xC0){
90                         sz=2;
91                         r=c&0x1F;
92                 } else if((c&0xF0)==0xE0){
93                         sz=3;
94                         r=c&0x0F;
95                 } else if((c&0xF8)==0xF0){
96                         sz=4;
97                         r=c&0x07;
98                 } else if((c&0xFC)==0xF8){
99                         sz=5;
100                         r=c&0x03;
101                 } else if((c&0xFE)==0xFC){
102                         sz=6;
103                         r=c&0x01;
104                 } else {
105                         u8error("invalid utf-8 prefix");
106                         (*q)++;
107                         return 0xFFFF;
108                 }
109                 t++;
110                 int i;
111                 for(i=1;i<sz;i++){
112                         if((*t&0xC0)==0x80){
113                                 r=(r<<6)+(*t++&0x3F);
114                         } else {
115                                 u8error("utf-8 encoding %d bytes too short",sz-i);
116                                 (*q)++;
117                                 return 0xFFFF;
118                         }
119                 }
120         } else {
121                 r=*t++;
122         }
123         *q=(char *)t;
124         return r;
125 }
126
plunky
1.4
127 /*
128  * return length of UTF-8 code point
129  */
ragge
1.1
130 int 
131 u8len(char *t)
132 {
133         unsigned int c=(unsigned char)*t;
134         if(c>0x7F){
135                 int sz;
136                 if((c&0xE0)==0xC0sz=2;
137                 else if((c&0xF0)==0xE0sz=3;
138                 else if((c&0xF8)==0xF0sz=4;
139                 else if((c&0xFC)==0xF8sz=5;
140                 else if((c&0xFE)==0xFCsz=6;
141                 else return 1;
142                 int i;
143                 for(i=1;i<sz;i++){
144                         c=(unsigned char)*++t;
145                         if((c&0xC0)!=0x80return 1;
146                 }
147                 return sz;
148         }
149         return 1;
150 }
151
152 unsigned int 
153 esc2char(char **q)
154 {
155         unsigned int v;
156         unsigned char *t=(unsigned char *)*q;
157         unsigned int c=*t;
158         if(c=='\\') {
159                 int i;
160                 switch(v=*++t){
161 case 'a':
162                         c='\a'break;
163 case 'b':
164                         c='\b'break;
plunky
1.3
165 #ifdef GCC_COMPAT
166 case 'e':
167                         c='\033'break;
168 #endif
ragge
1.1
169 case 't':
170                         c='\t'break;
171 case 'n':
172                         c='\n'break;
173 case 'v':
174                         c='\v'break;
175 case 'f':
176                         c='\f'break;
177 case 'r':
178                         c='\r'break;
179 case '\\':
180                         c='\\'break;
181 case '\'':
182                         c='\''break;
183 case '\"':
184                         c='\"'break;
plunky
1.2
185 case '\?':
186                         c='\?'break;
ragge
1.1
187 case 'x':
188                         v=*++t;
189                         for(i=0,c=0;;v=t[++i]){
190                                 v=toupper(v);
191                                 if(v>='0' && v<='9'c=(c<<4)+v-'0';
192                                 else if(v>='A' && v<='F'c=(c<<4)+v-'A'+10;
193                                 else break;
194                         }
195                         *q=(char *)t+i;
plunky
1.5
196                         return (xuchar == 0 && c > MAX_CHAR) ? (c - MAX_UCHAR - 1) : (c);
ragge
1.1
197 default:
198                         for(i=0,c=0;i<3;v=t[++i]){
199                                 if(v>='0' && v<='7'c=(c<<3)+v-'0';
200                                 else if(i==0) {
201                                         u8error("unknown escape sequence \\%c",v);
202                                         c='\\';
203                                         break;
204                                 } else {
205                                         break;
206                                 }
207                         }
208                         *q=(char *)t+i;
plunky
1.5
209                         return (xuchar == 0 && c > MAX_CHAR) ? (c - MAX_UCHAR - 1) : (c);
ragge
1.1
210                 }
211         }
212         *q=(char *)t+1;
213         return c;
214 }
FishEye: Open Source License registered to PCC.
Your maintenance has expired. You can renew your license at http://www.atlassian.com/fisheye/renew
Atlassian FishEye, CVS analysis. (Version:1.6.3 Build:build-336 2008-11-04) - Administration - Page generated 2014-09-30 22:00 +0200