Quick Search:

View

Revision:
Expand:  
Changeset: MAIN:ragge:20140517075559

Diff

Diff from 1.1 to:

Annotations

Annotate by Age | Author | Mixed | None
/fisheye/browse/pcc/pcc/mip/unicode.c

Annotated File View

ragge
1.1
1 /*
2  * Copyright (c) 2014 Eric Olson <ejolson@renomath.org>
3  * Some rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in
13  *    the documentation and/or other materials provided with the
14  *    distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
20  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
22  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
26  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #include <stdio.h>
31 #include <ctype.h>
32 #include "unicode.h"
33
34 char *
35 cp2u8(char *p,unsigned int c)
36 {
37         unsigned char *s=(unsigned char *)p;
38         if(c>0x7F){
39                 if(c>0x07FF){
40                         if(c>0xFFFF){
41                                 if(c>0x1FFFFF){
42                                         if(c>0x3FFFFFF){
43                                                 if(c>0x7FFFFFFF){
44                                                         u8error("invalid unicode code point");
45                                                 } else {
46                                                         *s++=0xF8|(c>>30);
47                                                         *s++=0x80|((c>>24)&0x3F);
48                                                 }
49                                         } else {
50                                                 *s++=0xF8|(c>>24);
51                                         }
52                                         *s++=0x80|((c>>18)&0x3F);
53                                 } else {
54                                         *s++=0xF0|(c>>18);
55                                 }
56                                 *s++=0x80|((c>>12)&0x3F);
57                         } else {
58                                 *s++=0xE0|(c>>12);
59                         }
60                         *s++=0x80|((c>>6)&0x3F);
61                 } else {
62                         *s++=0xC0|(c>>6);
63                 }
64                 *s++=0x80|(c&0x3F);
65         } else {
66                 *s++=c;
67         }
68         return (char *)s;
69 }
70
71 unsigned int 
72 u82cp(char **q)
73 {
74         unsigned char *t=(unsigned char *)*q;
75         unsigned int c=*t;
76         unsigned int r;
77         if(c>0x7F){
78                 int sz;
79                 if((c&0xE0)==0xC0){
80                         sz=2;
81                         r=c&0x1F;
82                 } else if((c&0xF0)==0xE0){
83                         sz=3;
84                         r=c&0x0F;
85                 } else if((c&0xF8)==0xF0){
86                         sz=4;
87                         r=c&0x07;
88                 } else if((c&0xFC)==0xF8){
89                         sz=5;
90                         r=c&0x03;
91                 } else if((c&0xFE)==0xFC){
92                         sz=6;
93                         r=c&0x01;
94                 } else {
95                         u8error("invalid utf-8 prefix");
96                         (*q)++;
97                         return 0xFFFF;
98                 }
99                 t++;
100                 int i;
101                 for(i=1;i<sz;i++){
102                         if((*t&0xC0)==0x80){
103                                 r=(r<<6)+(*t++&0x3F);
104                         } else {
105                                 u8error("utf-8 encoding %d bytes too short",sz-i);
106                                 (*q)++;
107                                 return 0xFFFF;
108                         }
109                 }
110         } else {
111                 r=*t++;
112         }
113         *q=(char *)t;
114         return r;
115 }
116
117 int 
118 u8len(char *t)
119 {
120         unsigned int c=(unsigned char)*t;
121         if(c>0x7F){
122                 int sz;
123                 if((c&0xE0)==0xC0sz=2;
124                 else if((c&0xF0)==0xE0sz=3;
125                 else if((c&0xF8)==0xF0sz=4;
126                 else if((c&0xFC)==0xF8sz=5;
127                 else if((c&0xFE)==0xFCsz=6;
128                 else return 1;
129                 int i;
130                 for(i=1;i<sz;i++){
131                         c=(unsigned char)*++t;
132                         if((c&0xC0)!=0x80return 1;
133                 }
134                 return sz;
135         }
136         return 1;
137 }
138
139 unsigned int 
140 esc2char(char **q)
141 {
142         unsigned int v;
143         unsigned char *t=(unsigned char *)*q;
144         unsigned int c=*t;
145         if(c=='\\') {
146                 int i;
147                 switch(v=*++t){
148 case 'a':
149                         c='\a'break;
150 case 'b':
151                         c='\b'break;
152 case 't':
153                         c='\t'break;
154 case 'n':
155                         c='\n'break;
156 case 'v':
157                         c='\v'break;
158 case 'f':
159                         c='\f'break;
160 case 'r':
161                         c='\r'break;
162 case '\\':
163                         c='\\'break;
164 case '\'':
165                         c='\''break;
166 case '\"':
167                         c='\"'break;
168 case 'x':
169                         v=*++t;
170                         for(i=0,c=0;;v=t[++i]){
171                                 v=toupper(v);
172                                 if(v>='0' && v<='9'c=(c<<4)+v-'0';
173                                 else if(v>='A' && v<='F'c=(c<<4)+v-'A'+10;
174                                 else break;
175                         }
176                         *q=(char *)t+i;
177                         return c;
178 default:
179                         for(i=0,c=0;i<3;v=t[++i]){
180                                 if(v>='0' && v<='7'c=(c<<3)+v-'0';
181                                 else if(i==0) {
182                                         u8error("unknown escape sequence \\%c",v);
183                                         c='\\';
184                                         break;
185                                 } else {
186                                         break;
187                                 }
188                         }
189                         *q=(char *)t+i;
190                         return c;
191                 }
192         }
193         *q=(char *)t+1;
194         return c;
195 }
FishEye: Open Source License registered to PCC.
Your maintenance has expired. You can renew your license at http://www.atlassian.com/fisheye/renew
Atlassian FishEye, CVS analysis. (Version:1.6.3 Build:build-336 2008-11-04) - Administration - Page generated 2014-10-30 14:02 +0100