encodedstream.h Source File

encodedstream.h Source File#

Composable Kernel: encodedstream.h Source File
encodedstream.h
Go to the documentation of this file.
1// Tencent is pleased to support the open source community by making RapidJSON available.
2//
3// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
4//
5// Licensed under the MIT License (the "License"); you may not use this file except
6// in compliance with the License. You may obtain a copy of the License at
7//
8// http://opensource.org/licenses/MIT
9//
10// Unless required by applicable law or agreed to in writing, software distributed
11// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13// specific language governing permissions and limitations under the License.
14
15#ifndef RAPIDJSON_ENCODEDSTREAM_H_
16#define RAPIDJSON_ENCODEDSTREAM_H_
17
18#include "stream.h"
19#include "memorystream.h"
20
21#ifdef __GNUC__
22RAPIDJSON_DIAG_PUSH
23RAPIDJSON_DIAG_OFF(effc++)
24#endif
25
26#ifdef __clang__
27RAPIDJSON_DIAG_PUSH
28RAPIDJSON_DIAG_OFF(padded)
29#endif
30
32
34
38template <typename Encoding, typename InputByteStream>
40{
41 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
42
43 public:
44 typedef typename Encoding::Ch Ch;
45
46 EncodedInputStream(InputByteStream& is) : is_(is) { current_ = Encoding::TakeBOM(is_); }
47
48 Ch Peek() const { return current_; }
50 {
51 Ch c = current_;
52 current_ = Encoding::Take(is_);
53 return c;
54 }
55 size_t Tell() const { return is_.Tell(); }
56
57 // Not implemented
58 void Put(Ch) { RAPIDJSON_ASSERT(false); }
59 void Flush() { RAPIDJSON_ASSERT(false); }
61 {
62 RAPIDJSON_ASSERT(false);
63 return 0;
64 }
65 size_t PutEnd(Ch*)
66 {
67 RAPIDJSON_ASSERT(false);
68 return 0;
69 }
70
71 private:
73 EncodedInputStream& operator=(const EncodedInputStream&);
74
75 InputByteStream& is_;
76 Ch current_;
77};
78
80template <>
82{
83 public:
84 typedef UTF8<>::Ch Ch;
85
87 {
88 if(static_cast<unsigned char>(is_.Peek()) == 0xEFu)
89 is_.Take();
90 if(static_cast<unsigned char>(is_.Peek()) == 0xBBu)
91 is_.Take();
92 if(static_cast<unsigned char>(is_.Peek()) == 0xBFu)
93 is_.Take();
94 }
95 Ch Peek() const { return is_.Peek(); }
96 Ch Take() { return is_.Take(); }
97 size_t Tell() const { return is_.Tell(); }
98
99 // Not implemented
100 void Put(Ch) {}
101 void Flush() {}
102 Ch* PutBegin() { return 0; }
103 size_t PutEnd(Ch*) { return 0; }
104
106
107 private:
109 EncodedInputStream& operator=(const EncodedInputStream&);
110};
111
113
118template <typename Encoding, typename OutputByteStream>
120{
121 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
122
123 public:
124 typedef typename Encoding::Ch Ch;
125
126 EncodedOutputStream(OutputByteStream& os, bool putBOM = true) : os_(os)
127 {
128 if(putBOM)
129 Encoding::PutBOM(os_);
130 }
131
132 void Put(Ch c) { Encoding::Put(os_, c); }
133 void Flush() { os_.Flush(); }
134
135 // Not implemented
136 Ch Peek() const
137 {
138 RAPIDJSON_ASSERT(false);
139 return 0;
140 }
142 {
143 RAPIDJSON_ASSERT(false);
144 return 0;
145 }
146 size_t Tell() const
147 {
148 RAPIDJSON_ASSERT(false);
149 return 0;
150 }
152 {
153 RAPIDJSON_ASSERT(false);
154 return 0;
155 }
156 size_t PutEnd(Ch*)
157 {
158 RAPIDJSON_ASSERT(false);
159 return 0;
160 }
161
162 private:
164 EncodedOutputStream& operator=(const EncodedOutputStream&);
165
166 OutputByteStream& os_;
167};
168
169#define RAPIDJSON_ENCODINGS_FUNC(x) \
170 UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
171
173
177template <typename CharType, typename InputByteStream>
179{
180 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
181
182 public:
183 typedef CharType Ch;
184
186
190 AutoUTFInputStream(InputByteStream& is, UTFType type = kUTF8)
191 : is_(&is), type_(type), hasBOM_(false)
192 {
193 RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE);
194 DetectType();
195 static const TakeFunc f[] = {RAPIDJSON_ENCODINGS_FUNC(Take)};
196 takeFunc_ = f[type_];
197 current_ = takeFunc_(*is_);
198 }
199
200 UTFType GetType() const { return type_; }
201 bool HasBOM() const { return hasBOM_; }
202
203 Ch Peek() const { return current_; }
205 {
206 Ch c = current_;
207 current_ = takeFunc_(*is_);
208 return c;
209 }
210 size_t Tell() const { return is_->Tell(); }
211
212 // Not implemented
213 void Put(Ch) { RAPIDJSON_ASSERT(false); }
214 void Flush() { RAPIDJSON_ASSERT(false); }
216 {
217 RAPIDJSON_ASSERT(false);
218 return 0;
219 }
220 size_t PutEnd(Ch*)
221 {
222 RAPIDJSON_ASSERT(false);
223 return 0;
224 }
225
226 private:
228 AutoUTFInputStream& operator=(const AutoUTFInputStream&);
229
230 // Detect encoding type with BOM or RFC 4627
231 void DetectType()
232 {
233 // BOM (Byte Order Mark):
234 // 00 00 FE FF UTF-32BE
235 // FF FE 00 00 UTF-32LE
236 // FE FF UTF-16BE
237 // FF FE UTF-16LE
238 // EF BB BF UTF-8
239
240 const unsigned char* c = reinterpret_cast<const unsigned char*>(is_->Peek4());
241 if(!c)
242 return;
243
244 unsigned bom = static_cast<unsigned>(c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24));
245 hasBOM_ = false;
246 if(bom == 0xFFFE0000)
247 {
248 type_ = kUTF32BE;
249 hasBOM_ = true;
250 is_->Take();
251 is_->Take();
252 is_->Take();
253 is_->Take();
254 }
255 else if(bom == 0x0000FEFF)
256 {
257 type_ = kUTF32LE;
258 hasBOM_ = true;
259 is_->Take();
260 is_->Take();
261 is_->Take();
262 is_->Take();
263 }
264 else if((bom & 0xFFFF) == 0xFFFE)
265 {
266 type_ = kUTF16BE;
267 hasBOM_ = true;
268 is_->Take();
269 is_->Take();
270 }
271 else if((bom & 0xFFFF) == 0xFEFF)
272 {
273 type_ = kUTF16LE;
274 hasBOM_ = true;
275 is_->Take();
276 is_->Take();
277 }
278 else if((bom & 0xFFFFFF) == 0xBFBBEF)
279 {
280 type_ = kUTF8;
281 hasBOM_ = true;
282 is_->Take();
283 is_->Take();
284 is_->Take();
285 }
286
287 // RFC 4627: Section 3
288 // "Since the first two characters of a JSON text will always be ASCII
289 // characters [RFC0020], it is possible to determine whether an octet
290 // stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking
291 // at the pattern of nulls in the first four octets."
292 // 00 00 00 xx UTF-32BE
293 // 00 xx 00 xx UTF-16BE
294 // xx 00 00 00 UTF-32LE
295 // xx 00 xx 00 UTF-16LE
296 // xx xx xx xx UTF-8
297
298 if(!hasBOM_)
299 {
300 int pattern = (c[0] ? 1 : 0) | (c[1] ? 2 : 0) | (c[2] ? 4 : 0) | (c[3] ? 8 : 0);
301 switch(pattern)
302 {
303 case 0x08: type_ = kUTF32BE; break;
304 case 0x0A: type_ = kUTF16BE; break;
305 case 0x01: type_ = kUTF32LE; break;
306 case 0x05: type_ = kUTF16LE; break;
307 case 0x0F: type_ = kUTF8; break;
308 default: break; // Use type defined by user.
309 }
310 }
311
312 // Runtime check whether the size of character type is sufficient. It only perform checks
313 // with assertion.
314 if(type_ == kUTF16LE || type_ == kUTF16BE)
315 RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
316 if(type_ == kUTF32LE || type_ == kUTF32BE)
317 RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
318 }
319
320 typedef Ch (*TakeFunc)(InputByteStream& is);
321 InputByteStream* is_;
322 UTFType type_;
323 Ch current_;
324 TakeFunc takeFunc_;
325 bool hasBOM_;
326};
327
329
333template <typename CharType, typename OutputByteStream>
335{
336 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
337
338 public:
339 typedef CharType Ch;
340
342
347 AutoUTFOutputStream(OutputByteStream& os, UTFType type, bool putBOM) : os_(&os), type_(type)
348 {
349 RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE);
350
351 // Runtime check whether the size of character type is sufficient. It only perform checks
352 // with assertion.
353 if(type_ == kUTF16LE || type_ == kUTF16BE)
354 RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
355 if(type_ == kUTF32LE || type_ == kUTF32BE)
356 RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
357
358 static const PutFunc f[] = {RAPIDJSON_ENCODINGS_FUNC(Put)};
359 putFunc_ = f[type_];
360
361 if(putBOM)
362 PutBOM();
363 }
364
365 UTFType GetType() const { return type_; }
366
367 void Put(Ch c) { putFunc_(*os_, c); }
368 void Flush() { os_->Flush(); }
369
370 // Not implemented
371 Ch Peek() const
372 {
373 RAPIDJSON_ASSERT(false);
374 return 0;
375 }
377 {
378 RAPIDJSON_ASSERT(false);
379 return 0;
380 }
381 size_t Tell() const
382 {
383 RAPIDJSON_ASSERT(false);
384 return 0;
385 }
387 {
388 RAPIDJSON_ASSERT(false);
389 return 0;
390 }
391 size_t PutEnd(Ch*)
392 {
393 RAPIDJSON_ASSERT(false);
394 return 0;
395 }
396
397 private:
399 AutoUTFOutputStream& operator=(const AutoUTFOutputStream&);
400
401 void PutBOM()
402 {
403 typedef void (*PutBOMFunc)(OutputByteStream&);
404 static const PutBOMFunc f[] = {RAPIDJSON_ENCODINGS_FUNC(PutBOM)};
405 f[type_](*os_);
406 }
407
408 typedef void (*PutFunc)(OutputByteStream&, Ch);
409
410 OutputByteStream* os_;
411 UTFType type_;
412 PutFunc putFunc_;
413};
414
415#undef RAPIDJSON_ENCODINGS_FUNC
416
418
419#ifdef __clang__
420RAPIDJSON_DIAG_POP
421#endif
422
423#ifdef __GNUC__
424RAPIDJSON_DIAG_POP
425#endif
426
427#endif // RAPIDJSON_FILESTREAM_H_
Input stream wrapper with dynamically bound encoding and automatic encoding detection.
Definition encodedstream.h:179
CharType Ch
Definition encodedstream.h:183
size_t PutEnd(Ch *)
Definition encodedstream.h:220
void Put(Ch)
Definition encodedstream.h:213
Ch * PutBegin()
Definition encodedstream.h:215
Ch Peek() const
Definition encodedstream.h:203
Ch Take()
Definition encodedstream.h:204
size_t Tell() const
Definition encodedstream.h:210
AutoUTFInputStream(InputByteStream &is, UTFType type=kUTF8)
Constructor.
Definition encodedstream.h:190
bool HasBOM() const
Definition encodedstream.h:201
UTFType GetType() const
Definition encodedstream.h:200
void Flush()
Definition encodedstream.h:214
Output stream wrapper with dynamically bound encoding and automatic encoding detection.
Definition encodedstream.h:335
AutoUTFOutputStream(OutputByteStream &os, UTFType type, bool putBOM)
Constructor.
Definition encodedstream.h:347
void Flush()
Definition encodedstream.h:368
Ch Take()
Definition encodedstream.h:376
size_t PutEnd(Ch *)
Definition encodedstream.h:391
UTFType GetType() const
Definition encodedstream.h:365
size_t Tell() const
Definition encodedstream.h:381
Ch * PutBegin()
Definition encodedstream.h:386
CharType Ch
Definition encodedstream.h:339
void Put(Ch c)
Definition encodedstream.h:367
Ch Peek() const
Definition encodedstream.h:371
UTF8 ::Ch Ch
Definition encodedstream.h:84
Ch * PutBegin()
Definition encodedstream.h:102
Ch Take()
Definition encodedstream.h:96
size_t Tell() const
Definition encodedstream.h:97
void Put(Ch)
Definition encodedstream.h:100
EncodedInputStream(MemoryStream &is)
Definition encodedstream.h:86
MemoryStream & is_
Definition encodedstream.h:105
size_t PutEnd(Ch *)
Definition encodedstream.h:103
void Flush()
Definition encodedstream.h:101
Ch Peek() const
Definition encodedstream.h:95
Input byte stream wrapper with a statically bound encoding.
Definition encodedstream.h:40
Ch Peek() const
Definition encodedstream.h:48
EncodedInputStream(InputByteStream &is)
Definition encodedstream.h:46
size_t PutEnd(Ch *)
Definition encodedstream.h:65
void Flush()
Definition encodedstream.h:59
Ch * PutBegin()
Definition encodedstream.h:60
Ch Take()
Definition encodedstream.h:49
Encoding::Ch Ch
Definition encodedstream.h:44
size_t Tell() const
Definition encodedstream.h:55
void Put(Ch)
Definition encodedstream.h:58
Output byte stream wrapper with statically bound encoding.
Definition encodedstream.h:120
void Put(Ch c)
Definition encodedstream.h:132
size_t Tell() const
Definition encodedstream.h:146
Ch Peek() const
Definition encodedstream.h:136
void Flush()
Definition encodedstream.h:133
size_t PutEnd(Ch *)
Definition encodedstream.h:156
Ch Take()
Definition encodedstream.h:141
Encoding::Ch Ch
Definition encodedstream.h:124
EncodedOutputStream(OutputByteStream &os, bool putBOM=true)
Definition encodedstream.h:126
Ch * PutBegin()
Definition encodedstream.h:151
#define RAPIDJSON_ENCODINGS_FUNC(x)
Definition encodedstream.h:169
UTFType
Runtime-specified UTF encoding type of a stream.
Definition encodings.h:757
@ kUTF32BE
UTF-32 big endian.
Definition encodings.h:762
@ kUTF16BE
UTF-16 big endian.
Definition encodings.h:760
@ kUTF8
UTF-8.
Definition encodings.h:758
@ kUTF32LE
UTF-32 little endian.
Definition encodings.h:761
@ kUTF16LE
UTF-16 little endian.
Definition encodings.h:759
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition rapidjson.h:451
#define RAPIDJSON_NAMESPACE_BEGIN
provide custom rapidjson namespace (opening expression)
Definition rapidjson.h:121
#define RAPIDJSON_NAMESPACE_END
provide custom rapidjson namespace (closing expression)
Definition rapidjson.h:124
#define RAPIDJSON_STATIC_ASSERT(x)
(Internal) macro to check for conditions at compile-time
Definition rapidjson.h:500
Represents an in-memory input byte stream.
Definition memorystream.h:42
UTF-8 encoding.
Definition encodings.h:98
CharType Ch
Definition encodings.h:99