Quick Search:

View

Revision:
Expand:  
Changeset: MAIN:plunky:20140528065858

Diff

Diff from 1.6 to:

Annotations

Annotate by Age | Author | Mixed | None
/fisheye/browse/pcc/pcc/mip/unicode.c

Annotated File View

plunky
1.6
1 /*      $Id: unicode.c,v 1.6 2014/05/28 06:58:58 plunky Exp $   */
ragge
1.1
2 /*
3  * Copyright (c) 2014 Eric Olson <ejolson@renomath.org>
4  * Some rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in
14  *    the documentation and/or other materials provided with the
15  *    distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
20  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
21  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
23  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
25  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
27  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30
31 #include <stdio.h>
32 #include <ctype.h>
plunky
1.5
33 #include "pass1.h"
34 #include "manifest.h"
ragge
1.1
35 #include "unicode.h"
36
plunky
1.4
37 /*
38  * encode a 32-bit code point as UTF-8
39  * return end position
40  */
ragge
1.1
41 char *
42 cp2u8(char *p,unsigned int c)
43 {
44         unsigned char *s=(unsigned char *)p;
45         if(c>0x7F){
46                 if(c>0x07FF){
47                         if(c>0xFFFF){
48                                 if(c>0x1FFFFF){
49                                         if(c>0x3FFFFFF){
50                                                 if(c>0x7FFFFFFF){
51                                                         u8error("invalid unicode code point");
52                                                 } else {
53                                                         *s++=0xF8|(c>>30);
54                                                         *s++=0x80|((c>>24)&0x3F);
55                                                 }
56                                         } else {
57                                                 *s++=0xF8|(c>>24);
58                                         }
59                                         *s++=0x80|((c>>18)&0x3F);
60                                 } else {
61                                         *s++=0xF0|(c>>18);
62                                 }
63                                 *s++=0x80|((c>>12)&0x3F);
64                         } else {
65                                 *s++=0xE0|(c>>12);
66                         }
67                         *s++=0x80|((c>>6)&0x3F);
68                 } else {
69                         *s++=0xC0|(c>>6);
70                 }
71                 *s++=0x80|(c&0x3F);
72         } else {
73                 *s++=c;
74         }
75         return (char *)s;
76 }
77
plunky
1.4
78 /*
79  * decode 32-bit code point from UTF-8
80  * move pointer
81  */
ragge
1.1
82 unsigned int 
83 u82cp(char **q)
84 {
85         unsigned char *t=(unsigned char *)*q;
86         unsigned int c=*t;
87         unsigned int r;
88         if(c>0x7F){
89                 int sz;
90                 if((c&0xE0)==0xC0){
91                         sz=2;
92                         r=c&0x1F;
93                 } else if((c&0xF0)==0xE0){
94                         sz=3;
95                         r=c&0x0F;
96                 } else if((c&0xF8)==0xF0){
97                         sz=4;
98                         r=c&0x07;
99                 } else if((c&0xFC)==0xF8){
100                         sz=5;
101                         r=c&0x03;
102                 } else if((c&0xFE)==0xFC){
103                         sz=6;
104                         r=c&0x01;
105                 } else {
106                         u8error("invalid utf-8 prefix");
107                         (*q)++;
108                         return 0xFFFF;
109                 }
110                 t++;
111                 int i;
112                 for(i=1;i<sz;i++){
113                         if((*t&0xC0)==0x80){
114                                 r=(r<<6)+(*t++&0x3F);
115                         } else {
116                                 u8error("utf-8 encoding %d bytes too short",sz-i);
117                                 (*q)++;
118                                 return 0xFFFF;
119                         }
120                 }
121         } else {
122                 r=*t++;
123         }
124         *q=(char *)t;
125         return r;
126 }
127
plunky
1.4
128 /*
129  * return length of UTF-8 code point
130  */
ragge
1.1
131 int 
132 u8len(char *t)
133 {
134         unsigned int c=(unsigned char)*t;
135         if(c>0x7F){
136                 int sz;
137                 if((c&0xE0)==0xC0sz=2;
138                 else if((c&0xF0)==0xE0sz=3;
139                 else if((c&0xF8)==0xF0sz=4;
140                 else if((c&0xFC)==0xF8sz=5;
141                 else if((c&0xFE)==0xFCsz=6;
142                 else return 1;
143                 int i;
144                 for(i=1;i<sz;i++){
145                         c=(unsigned char)*++t;
146                         if((c&0xC0)!=0x80return 1;
147                 }
148                 return sz;
149         }
150         return 1;
151 }
152
153 unsigned int 
154 esc2char(char **q)
155 {
156         unsigned int v;
157         unsigned char *t=(unsigned char *)*q;
158         unsigned int c=*t;
159         if(c=='\\') {
160                 int i;
161                 switch(v=*++t){
162 case 'a':
163                         c='\a'break;
164 case 'b':
165                         c='\b'break;
plunky
1.3
166 #ifdef GCC_COMPAT
167 case 'e':
168                         c='\033'break;
169 #endif
ragge
1.1
170 case 't':
171                         c='\t'break;
172 case 'n':
173                         c='\n'break;
174 case 'v':
175                         c='\v'break;
176 case 'f':
177                         c='\f'break;
178 case 'r':
179                         c='\r'break;
180 case '\\':
181                         c='\\'break;
182 case '\'':
183                         c='\''break;
184 case '\"':
185                         c='\"'break;
plunky
1.2
186 case '\?':
187                         c='\?'break;
ragge
1.1
188 case 'x':
189                         v=*++t;
190                         for(i=0,c=0;;v=t[++i]){
191                                 v=toupper(v);
192                                 if(v>='0' && v<='9'c=(c<<4)+v-'0';
193                                 else if(v>='A' && v<='F'c=(c<<4)+v-'A'+10;
194                                 else break;
195                         }
196                         *q=(char *)t+i;
plunky
1.5
197                         return (xuchar == 0 && c > MAX_CHAR) ? (c - MAX_UCHAR - 1) : (c);
ragge
1.1
198 default:
199                         for(i=0,c=0;i<3;v=t[++i]){
200                                 if(v>='0' && v<='7'c=(c<<3)+v-'0';
201                                 else if(i==0) {
202                                         u8error("unknown escape sequence \\%c",v);
203                                         c='\\';
204                                         break;
205                                 } else {
206                                         break;
207                                 }
208                         }
209                         *q=(char *)t+i;
plunky
1.5
210                         return (xuchar == 0 && c > MAX_CHAR) ? (c - MAX_UCHAR - 1) : (c);
ragge
1.1
211                 }
212         }
213         *q=(char *)t+1;
214         return c;
215 }
FishEye: Open Source License registered to PCC.
Atlassian FishEye, CVS analysis. (Version:1.6.3 Build:build-336 2008-11-04) - Administration - Page generated 2014-12-21 03:36 +0100