▲在.Netframework中Strea...... 在.Net framework中StreamReader的使用encoding必须在构造器中指定,而且中途完全不可以更改。 在一般的情况下,这不会造成什么问题。一般若是从硬盘读取文件,单一文件内的编码一般都是统一的。即便是发现读错,亦可以关闭StreamReader,重启使用新的编码读取。 偏偏偶最近遇到了需要修改编码的需求,而且,我的程序没有关闭重读的机会。因为偶使用的StreamReader的BaseStream是一个Network Stream,我不可以关闭它……但是Network Stream传过来的东西很可能包涵不同的编码……GB2312,Big5,UTF8,ISO-8859-1等等……虽然是先得到编码信息,然后再读具体内容,但是,一开始使用的Stream Reader编码一旦错了,读出来的东西便再也无法恢复……会丢字之类的…… 我也不可以在获得编码信息之后,重新建立一个新的Stream Reader,因为具体内容已经被原来的Stream Reader给缓冲掉了…… 唯一的解决方法,便是自己实现一个可以改变CurrentEncoding属性的Stream Reader了…… 全部从头写起非常不实际,偶是先当了mono的源码,从mono的Stream Reader实现代码做修改。 Stream Reader其实很简单,它内部有两个Buffer,一个是input buffer,一个是decoded buffer,前者用于缓存从base stream读过来的原始数据,后者用于缓存根据原始数据解码出来后的东西……只要看明白mono的实现中ReadBuffer这个方法,要动态修改CurrentEncoding也就不是太难了…… 我需要处理的网络协议是一个行协议……偶在程序中只调用了StreamReader的Readline方法,而完全没有使用Read的两个方法,这也使得偶动态修改编码容易了许多…… 偶的做法是每次调用Readline的时候,不仅移动decoded buffer的游标(pos),同时也移动input buffer一个新的游标(pos_input),做法很简单,Readline方法需要调用FindNextEOL移动游标查找换行符号……我在FindNextEOL方法添加多一行: int FindNextEOL () { FindNextInputEOL(); .... 而FindNextInputEOL这个新的函数,完全是FindNextEOL的翻版,只是前者处理input buffer,而后者处理decoded buffer…… 如此一来,我便可以知道每次Readline之后,input buffer中还没有被上层读到的原始数据有哪些了…… 然后,再把CurrentEncoding属性添加Set的方法: set { encoding=value; decoder = encoding.GetDecoder(); decoded_count = pos + decoder.GetChars (input_buffer, pos_input, cbEncoded , pos_input, decoded_buffer, pos); } 设定新编码时,程序便根据input buffer的游标(pos_input)把没有被读到的原始数据重新decode一次,并且替换掉decoded buffer中的内容。 然后,事情就搞定了……甚至不需要对Readline方法做任何修改……除了把cbEncoded这个变量放到全局里面外…… 但是,偶这个修改使得Read的两个方法变得完全不可以用……一旦调用了……便会使得input buffer与decoded buffer里面两个游标不同步……下面附上完整的代码,还望有大侠可以帮忙把Read的两个方法也给搞定了…… 先谢过…… / // System.IO.StreamReader.cs // // Author: // Dietmar Maurer (dietmar@ximian.com) // Miguel de Icaza (miguel@ximian.com) // // (C) Ximian, Inc. http://www.ximian.com // Copyright (C) 2004 Novell (http://www.novell.com) // // // Copyright (C) 2004 Novell, Inc (http://www.novell.com) // // Permission is hereby granted, free of charge, to any person obtaining // a copy of this software and associated documentation files (the // "Software"), to deal in the Software without restriction, including // without limitation the rights to use, copy, modify, merge, publish, // distribute, sublicense, and/or sell copies of the Software, and to // permit persons to whom the Software is furnished to do so, subject to // the following conditions: // // The above copyright notice and this permission notice shall be // included in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // using System; using System.Text; using System.Runtime.InteropServices; namespace System.IO { [Serializable] public class DynamicStreamReader : TextReader { const int DefaultBufferSize = 1024; const int DefaultFileBufferSize = 4096; const int MinimumBufferSize = 128; // // The input buffer // byte [] input_buffer; // // The decoded buffer from the above input buffer // char [] decoded_buffer; // // Decoded bytes in decoded_buffer. // int decoded_count; // // Current position in the decoded_buffer // int pos; // // Current position in the input_buffer // int pos_input; // // The buffer size that we are using // int buffer_size; int do_checks; Encoding encoding; Decoder decoder; Stream base_stream; bool mayBlock; StringBuilder line_builder; private class NullStreamReader : DynamicStreamReader { public override int Peek () { return -1; } public override int Read () { return -1; } public override int Read ([In, Out] char[] buffer, int index, int count) { return 0; } public override string ReadLine () { return null; } public override string ReadToEnd () { return String.Empty; } public override Stream BaseStream { get { return Stream.Null; } } public override Encoding CurrentEncoding { get { return Encoding.Unicode; } } } public new static readonly DynamicStreamReader Null = (DynamicStreamReader)(new NullStreamReader()); internal DynamicStreamReader() {} public DynamicStreamReader(Stream stream) : this (stream, Encoding.UTF8, true, DefaultBufferSize) { } public DynamicStreamReader(Stream stream, bool detect_encoding_from_bytemarks) : this (stream, Encoding.UTF8, detect_encoding_from_bytemarks, DefaultBufferSize) { } public DynamicStreamReader(Stream stream, Encoding encoding) : this (stream, encoding, true, DefaultBufferSize) { } public DynamicStreamReader(Stream stream, Encoding encoding, bool detect_encoding_from_bytemarks) : this (stream, encoding, detect_encoding_from_bytemarks, DefaultBufferSize) { } public DynamicStreamReader(Stream stream, Encoding encoding, bool detect_encoding_from_bytemarks, int buffer_size) { Initialize (stream, encoding, detect_encoding_from_bytemarks, buffer_size); } public DynamicStreamReader(string path) : this (path, Encoding.UTF8, true, DefaultFileBufferSize) { } public DynamicStreamReader(string path, bool detect_encoding_from_bytemarks) : this (path, Encoding.UTF8, detect_encoding_from_bytemarks, DefaultFileBufferSize) { } public DynamicStreamReader(string path, Encoding encoding) : this (path, encoding, true, DefaultFileBufferSize) { } public DynamicStreamReader(string path, Encoding encoding, bool detect_encoding_from_bytemarks) : this (path, encoding, detect_encoding_from_bytemarks, DefaultFileBufferSize) { } public DynamicStreamReader(string path, Encoding encoding, bool detect_encoding_from_bytemarks, int buffer_size) { if (null == path) throw new ArgumentNullException("path"); if (String.Empty == path) throw new ArgumentException("Empty path not allowed"); if (path.IndexOfAny (Path.InvalidPathChars) != -1) throw new ArgumentException("path contains invalid characters"); if (null == encoding) throw new ArgumentNullException ("encoding"); if (buffer_size <= 0) throw new ArgumentOutOfRangeException ("buffer_size", "The minimum size of the buffer must be positive"); string DirName = Path.GetDirectoryName(path); if (DirName != String.Empty && !Directory.Exists(DirName)) throw new DirectoryNotFoundException ("Directory '" + DirName + "' not found."); if (!File.Exists(path)) throw new FileNotFoundException("File not found.", path); Stream stream = (Stream) File.OpenRead (path); Initialize (stream, encoding, detect_encoding_from_bytemarks, buffer_size); } internal void Initialize (Stream stream, Encoding encoding, bool detect_encoding_from_bytemarks, int buffer_size) { if (null == stream) throw new ArgumentNullException ("stream"); if (null == encoding) throw new ArgumentNullException ("encoding"); if (!stream.CanRead) throw new ArgumentException ("Cannot read stream"); if (buffer_size <= 0) throw new ArgumentOutOfRangeException ("buffer_size", "The minimum size of the buffer must be positive"); if (buffer_size < MinimumBufferSize) buffer_size = MinimumBufferSize; base_stream = stream; input_buffer = new byte [buffer_size]; this.buffer_size = buffer_size; this.encoding = encoding; decoder = encoding.GetDecoder (); byte [] preamble = encoding.GetPreamble (); do_checks = detect_encoding_from_bytemarks ? 1 : 0; do_checks += (preamble.Length == 0) ? 0 : 2; decoded_buffer = new char [encoding.GetMaxCharCount (buffer_size)]; decoded_count = 0; pos = 0; pos_input =0; } public virtual Stream BaseStream { get { return base_stream; } } public virtual Encoding CurrentEncoding { get { if (encoding == null) throw new Exception (); return encoding; } set { encoding=value; decoder = encoding.GetDecoder(); decoded_count = pos + decoder.GetChars (input_buffer, pos_input, cbEncoded - pos_input, decoded_buffer, pos); //DiscardBufferedData(); } } public override void Close () { Dispose (true); } protected override void Dispose (bool disposing) { if (disposing && base_stream != null) base_stream.Close (); input_buffer = null; decoded_buffer = null; encoding = null; decoder = null; base_stream = null; base.Dispose (disposing); } // // Provides auto-detection of the encoding, as well as skipping over // byte marks at the beginning of a stream. // int DoChecks (int count) { if ((do_checks & 2) == 2) { byte [] preamble = encoding.GetPreamble (); int c = preamble.Length; if (count >= c) { int i; for (i = 0; i < c; i++) if (input_buffer [i] != preamble [i]) break; if (i == c) return i; } } if ((do_checks & 1) == 1) { if (count < 2) return 0; if (input_buffer [0] == 0xfe && input_buffer [1] == 0xff) { this.encoding = Encoding.BigEndianUnicode; return 2; } if (input_buffer [0] == 0xff && input_buffer [1] == 0xfe) { this.encoding = Encoding.Unicode; return 2; } if (count < 3) return 0; if (input_buffer [0] == 0xef && input_buffer [1] == 0xbb && input_buffer [2] == 0xbf) { this.encoding = Encoding.UTF8; return 3; } } return 0; } public void DiscardBufferedData () { pos = decoded_count = 0; mayBlock = false; // Discard internal state of the decoder too. decoder = encoding.GetDecoder (); } int cbEncoded; int parse_start; // the buffer is empty, fill it again private int ReadBuffer () { pos = 0; pos_input = 0; cbEncoded = 0; // keep looping until the decoder gives us some chars decoded_count = 0; parse_start = 0; do { cbEncoded = base_stream.Read (input_buffer, 0, buffer_size); if (cbEncoded == 0) return 0; mayBlock = (cbEncoded < buffer_size); if (do_checks > 0) { Encoding old = encoding; parse_start = DoChecks (cbEncoded); if (old != encoding) { decoder = encoding.GetDecoder (); } do_checks = 0; cbEncoded -= parse_start; } decoded_count += decoder.GetChars (input_buffer, parse_start, cbEncoded, decoded_buffer, 0); parse_start = 0; } while (decoded_count == 0); return decoded_count; } public override int Peek () { if (base_stream == null) throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader"); if (pos >= decoded_count && (mayBlock || ReadBuffer () == 0)) return -1; return decoded_buffer [pos]; } public override int Read () { throw new Exception("Dynamic Reader could not read!"); } public override int Read ([In, Out] char[] dest_buffer, int index, int count) { throw new Exception("Dynamic Reader could not read!"); } bool foundCR_input; int FindNextInputEOL() { char c = '\0'; for (; pos_input < cbEncoded; pos_input++) { c = (char)input_buffer [pos_input]; if (c == '\n') { pos_input++; int res = (foundCR_input) ? (pos_input - 2) : (pos_input - 1); if (res < 0) res = 0; // if a new buffer starts with a \n and there was a \r at // the end of the previous one, we get here. foundCR_input = false; return res; } else if (foundCR_input) { foundCR_input = false; return pos - 1; } foundCR_input = (c == '\r'); } return -1; } bool foundCR; int FindNextEOL () { FindNextInputEOL(); char c = '\0'; for (; pos < decoded_count; pos++) { c = decoded_buffer [pos]; if (c == '\n') { pos++; int res = (foundCR) ? (pos - 2) : (pos - 1); if (res < 0) res = 0; // if a new buffer starts with a \n and there was a \r at // the end of the previous one, we get here. foundCR = false; return res; } else if (foundCR) { foundCR = false; return pos - 1; } foundCR = (c == '\r'); } return -1; } public override string ReadLine() { if (base_stream == null) throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader"); if (pos >= decoded_count && ReadBuffer () == 0) return null; int begin = pos; int end = FindNextEOL (); if (end < decoded_count && end >= begin) return new string (decoded_buffer, begin, end - begin); if (line_builder == null) line_builder = new StringBuilder (); else line_builder.Length = 0; while (true) { if (foundCR) // don't include the trailing CR if present decoded_count--; line_builder.Append (new string (decoded_buffer, begin, decoded_count - begin)); if (ReadBuffer () == 0) { if (line_builder.Capacity > 32768) { StringBuilder sb = line_builder; line_builder = null; return sb.ToString (0, sb.Length); } return line_builder.ToString (0, line_builder.Length); } begin = pos; end = FindNextEOL (); if (end < decoded_count && end >= begin) { line_builder.Append (new string (decoded_buffer, begin, end - begin)); if (line_builder.Capacity > 32768) { StringBuilder sb = line_builder; line_builder = null; return sb.ToString (0, sb.Length); } return line_builder.ToString (0, line_builder.Length); } } } public override string ReadToEnd() { if (base_stream == null) throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader"); StringBuilder text = new StringBuilder (); int size = decoded_buffer.Length; char [] buffer = new char [size]; int len; while ((len = Read (buffer, 0, size)) > 0) text.Append (buffer, 0, len); return text.ToString (); } } } |
|小黑屋|最新主题|手机版|微赢网络技术论坛 ( 苏ICP备08020429号 )
GMT+8, 2024-9-29 23:22 , Processed in 0.345933 second(s), 12 queries , Gzip On, MemCache On.
Powered by Discuz! X3.5
© 2001-2023 Discuz! Team.