Quick Search:

View

Revision:
Expand:  
Changeset: MAIN:plunky:20140527190007

Diff

Diff from 1.4 to:

Annotations

Annotate by Age | Author | Mixed | None
/fisheye/browse/pcc/pcc/mip/unicode.c

Annotated File View

ragge
1.1
1 /*
2  * Copyright (c) 2014 Eric Olson <ejolson@renomath.org>
3  * Some rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in
13  *    the documentation and/or other materials provided with the
14  *    distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
20  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
22  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
26  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #include <stdio.h>
31 #include <ctype.h>
32 #include "unicode.h"
33
plunky
1.4
34 /*
35  * encode a 32-bit code point as UTF-8
36  * return end position
37  */
ragge
1.1
38 char *
39 cp2u8(char *p,unsigned int c)
40 {
41         unsigned char *s=(unsigned char *)p;
42         if(c>0x7F){
43                 if(c>0x07FF){
44                         if(c>0xFFFF){
45                                 if(c>0x1FFFFF){
46                                         if(c>0x3FFFFFF){
47                                                 if(c>0x7FFFFFFF){
48                                                         u8error("invalid unicode code point");
49                                                 } else {
50                                                         *s++=0xF8|(c>>30);
51                                                         *s++=0x80|((c>>24)&0x3F);
52                                                 }
53                                         } else {
54                                                 *s++=0xF8|(c>>24);
55                                         }
56                                         *s++=0x80|((c>>18)&0x3F);
57                                 } else {
58                                         *s++=0xF0|(c>>18);
59                                 }
60                                 *s++=0x80|((c>>12)&0x3F);
61                         } else {
62                                 *s++=0xE0|(c>>12);
63                         }
64                         *s++=0x80|((c>>6)&0x3F);
65                 } else {
66                         *s++=0xC0|(c>>6);
67                 }
68                 *s++=0x80|(c&0x3F);
69         } else {
70                 *s++=c;
71         }
72         return (char *)s;
73 }
74
plunky
1.4
75 /*
76  * decode 32-bit code point from UTF-8
77  * move pointer
78  */
ragge
1.1
79 unsigned int 
80 u82cp(char **q)
81 {
82         unsigned char *t=(unsigned char *)*q;
83         unsigned int c=*t;
84         unsigned int r;
85         if(c>0x7F){
86                 int sz;
87                 if((c&0xE0)==0xC0){
88                         sz=2;
89                         r=c&0x1F;
90                 } else if((c&0xF0)==0xE0){
91                         sz=3;
92                         r=c&0x0F;
93                 } else if((c&0xF8)==0xF0){
94                         sz=4;
95                         r=c&0x07;
96                 } else if((c&0xFC)==0xF8){
97                         sz=5;
98                         r=c&0x03;
99                 } else if((c&0xFE)==0xFC){
100                         sz=6;
101                         r=c&0x01;
102                 } else {
103                         u8error("invalid utf-8 prefix");
104                         (*q)++;
105                         return 0xFFFF;
106                 }
107                 t++;
108                 int i;
109                 for(i=1;i<sz;i++){
110                         if((*t&0xC0)==0x80){
111                                 r=(r<<6)+(*t++&0x3F);
112                         } else {
113                                 u8error("utf-8 encoding %d bytes too short",sz-i);
114                                 (*q)++;
115                                 return 0xFFFF;
116                         }
117                 }
118         } else {
119                 r=*t++;
120         }
121         *q=(char *)t;
122         return r;
123 }
124
plunky
1.4
125 /*
126  * return length of UTF-8 code point
127  */
ragge
1.1
128 int 
129 u8len(char *t)
130 {
131         unsigned int c=(unsigned char)*t;
132         if(c>0x7F){
133                 int sz;
134                 if((c&0xE0)==0xC0sz=2;
135                 else if((c&0xF0)==0xE0sz=3;
136                 else if((c&0xF8)==0xF0sz=4;
137                 else if((c&0xFC)==0xF8sz=5;
138                 else if((c&0xFE)==0xFCsz=6;
139                 else return 1;
140                 int i;
141                 for(i=1;i<sz;i++){
142                         c=(unsigned char)*++t;
143                         if((c&0xC0)!=0x80return 1;
144                 }
145                 return sz;
146         }
147         return 1;
148 }
149
150 unsigned int 
151 esc2char(char **q)
152 {
153         unsigned int v;
154         unsigned char *t=(unsigned char *)*q;
155         unsigned int c=*t;
156         if(c=='\\') {
157                 int i;
158                 switch(v=*++t){
159 case 'a':
160                         c='\a'break;
161 case 'b':
162                         c='\b'break;
plunky
1.3
163 #ifdef GCC_COMPAT
164 case 'e':
165                         c='\033'break;
166 #endif
ragge
1.1
167 case 't':
168                         c='\t'break;
169 case 'n':
170                         c='\n'break;
171 case 'v':
172                         c='\v'break;
173 case 'f':
174                         c='\f'break;
175 case 'r':
176                         c='\r'break;
177 case '\\':
178                         c='\\'break;
179 case '\'':
180                         c='\''break;
181 case '\"':
182                         c='\"'break;
plunky
1.2
183 case '\?':
184                         c='\?'break;
ragge
1.1
185 case 'x':
186                         v=*++t;
187                         for(i=0,c=0;;v=t[++i]){
188                                 v=toupper(v);
189                                 if(v>='0' && v<='9'c=(c<<4)+v-'0';
190                                 else if(v>='A' && v<='F'c=(c<<4)+v-'A'+10;
191                                 else break;
192                         }
193                         *q=(char *)t+i;
194                         return c;
195 default:
196                         for(i=0,c=0;i<3;v=t[++i]){
197                                 if(v>='0' && v<='7'c=(c<<3)+v-'0';
198                                 else if(i==0) {
199                                         u8error("unknown escape sequence \\%c",v);
200                                         c='\\';
201                                         break;
202                                 } else {
203                                         break;
204                                 }
205                         }
206                         *q=(char *)t+i;
207                         return c;
208                 }
209         }
210         *q=(char *)t+1;
211         return c;
212 }
FishEye: Open Source License registered to PCC.
Your maintenance has expired. You can renew your license at http://www.atlassian.com/fisheye/renew
Atlassian FishEye, CVS analysis. (Version:1.6.3 Build:build-336 2008-11-04) - Administration - Page generated 2014-09-20 05:56 +0200