Unicode到Mazovia编码冗余char

我一直在处理这个问题几个小时。 我正在将一个包含波兰变音符号ąśółńźć等的字符串保存到文件中,但我必须使用的软件才能读取该文件只能使用Mazovia编码 ,这是一种非常古老的编码,不受Microsoft Encoding类的支持。

.Net字符串由UTF-16字符组成,因此我一直使用此代码将Unicode转换为Mazovia。

 string rekord = (linia.Substring(0, linia.Length - 1)) + Environment.NewLine; string rekordMazovia = Kodowanie.UnicodeNaMazovia(rekord); File.AppendAllText(sciezka, rekordMazovia); public static class Kodowanie { public static string UnicodeNaMazovia(string tekst) { return tekst .Replace((char)0x104, (char)0x8F) //Ą .Replace((char)0x106, (char)0x95) //Ć .Replace((char)0x118, (char)0x90) //Ę .Replace((char)0x141, (char)0x9C) //Ł .Replace((char)0x143, (char)0xA5) //Ń .Replace((char)0xD3, (char)0xA3) //Ó .Replace((char)0x15A, (char)0x98) //Ś .Replace((char)0x179, (char)0xA0) //Ź .Replace((char)0x17B, (char)0xA1) //Ż .Replace((char)0x105, (char)0x86) //ą .Replace((char)0x107, (char)0x8D) //ć .Replace((char)0x119, (char)0x91) //ę .Replace((char)0x142, (char)0x92) //ł .Replace((char)0x144, (char)0xA4) //ń .Replace((char)0xF3, (char)0xA2) //ó .Replace((char)0x15B, (char)0x9E) //ś .Replace((char)0x17A, (char)0xA6) //ź .Replace((char)0x17C, (char)0xA7); //ż } } 

一切都会好的,除非在应用程序中读取生成的文件后,我会在每个变音符之前得到一个冗余的char > 。 看起来像这样 http://imgur.com/q7DZo

如何摆脱它? 怎么做得更好?

Mazovia编码类似于代码页437,但它在某些位置有不同的字母,所以你不能使用437。

如果您实现MazoviaEncoding,您可以轻松使用

 Encoding encoding = new MazoviaEncoding(); String output = "ąśółńźć"; File.WriteAllText(@"test.txt", output, encoding); //File.AppendAllText(@"test.txt", output, encoding); // will work just as well, just pass the encoding as 3rd parameter 

该文件将包含:

 0x86 0x9E 0xA2 0x92 0xA4 0xA6 0x8D 

根据http://en.wikipedia.org/wiki/Mazovia_encoding,这是正确的

然后可以像使用C#中的其他Encoding一样使用该实现。 例如,读取文件也可以:

 Encoding encoding = new MazoviaEncoding(); String result = File.ReadAllText(@"test.txt", encoding); 

这是我的实现:

 using System.Collections.Generic; using System.Text; namespace System.Text { class MazoviaEncoding : Encoding { private static int[] codePoints = { 0x0000,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,0x0008,0x0009,0x000A,0x000B,0x000C,0x000D,0x000E,0x000F ,0x0010,0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017,0x0018,0x0019,0x001A,0x001B,0x001C,0x001D,0x001E,0x001F ,0x0020,0x0021,0x0022,0x0023,0x0024,0x0025,0x0026,0x0027,0x0028,0x0029,0x002A,0x002B,0x002C,0x002D,0x002E,0x002F ,0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039,0x003A,0x003B,0x003C,0x003D,0x003E,0x003F ,0x0040,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047,0x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0x004F ,0x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057,0x0058,0x0059,0x005A,0x005B,0x005C,0x005D,0x005E,0x005F ,0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F ,0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F ,0x00C7,0x00FC,0x00E9,0x00E2,0x00E4,0x00E0,0x0105,0x00E7,0x00EA,0x00EB,0x00E8,0x00EF,0x00EE,0x0107,0x00C4,0x0104 ,0x0118,0x0119,0x0142,0x00F4,0x00F6,0x0106,0x00FB,0x00F9,0x015A,0x00D6,0x00DC,0x00A2,0x0141,0x00A5,0x015B,0x0192 ,0x0179,0x017B,0x00F3,0x00D3,0x0144,0x0143,0x017A,0x017C,0x00BF,0x2310,0x00AC,0x00BD,0x00BC,0x00A1,0x00AB,0x00BB ,0x2591,0x2592,0x2593,0x2502,0x2524,0x2561,0x2562,0x2556,0x2555,0x2563,0x2551,0x2557,0x255D,0x255C,0x255B,0x2510 ,0x2514,0x2534,0x252C,0x251C,0x2500,0x253C,0x255E,0x255F,0x255A,0x2554,0x2569,0x2566,0x2560,0x2550,0x256C,0x2567 ,0x2568,0x2564,0x2565,0x2559,0x2558,0x2552,0x2553,0x256B,0x256A,0x2518,0x250C,0x2588,0x2584,0x258C,0x2590,0x2580 ,0x03B1,0x00DF,0x0393,0x03C0,0x03A3,0x03C3,0x00B5,0x03C4,0x03A6,0x0398,0x03A9,0x03B4,0x221E,0x03C6,0x03B5,0x2229 ,0x2261,0x00B1,0x2265,0x2264,0x2320,0x2321,0x00F7,0x2248,0x00B0,0x2219,0x00B7,0x221A,0x207F,0x00B2,0x25A0,0x00A0 }; private static Dictionary unicodeToByte; static MazoviaEncoding() { unicodeToByte = new Dictionary(); for (int i = 0; i < codePoints.Length; ++i) { unicodeToByte.Add((char)codePoints[i], (byte)i); } } public override int GetMaxByteCount(int charCount) { if (charCount < 0) { throw new ArgumentOutOfRangeException(); } return charCount; } public override int GetMaxCharCount(int byteCount) { if (byteCount < 0) { throw new ArgumentOutOfRangeException(); } return byteCount; } public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex) { if( chars == null || bytes == null ) { throw new ArgumentNullException(); } if( charIndex + charCount > chars.Length || charIndex < 0 || byteIndex < 0 || byteIndex + charCount > bytes.Length ) { throw new ArgumentOutOfRangeException(); } int total = 0; int j = 0; for (int i = charIndex; i < charIndex + charCount; ++i) { char cur = chars[i]; byte asMazovia; if (!unicodeToByte.TryGetValue(cur, out asMazovia)) { asMazovia = (byte)0x003F; // "?" } total++; bytes[j+byteIndex] = asMazovia; j++; } return total; } public override int GetChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) { if (chars == null || bytes == null) { throw new ArgumentNullException(); } if ( byteIndex + byteCount > bytes.Length || charIndex < 0 || byteIndex < 0 || charIndex + byteCount > chars.Length ) { throw new ArgumentOutOfRangeException(); } int total = 0; int j = 0; for (int i = byteIndex; i < byteIndex + byteCount; ++i) { byte cur = bytes[i]; char decoded = (char)codePoints[cur]; total++; chars[charIndex + j] = decoded; j++; } return total; } public override int GetByteCount(char[] charArray, int index, int count) { if (charArray == null) { throw new ArgumentNullException(); } if (index + count <= charArray.Length && index >= 0 && count >= 0) { return count; } else { throw new ArgumentOutOfRangeException(); } } public override int GetCharCount( byte[] bytes, int index, int count ) { if (bytes == null) { throw new ArgumentNullException(); } if (index < 0 || count < 0 || index + count > bytes.Length) { throw new ArgumentOutOfRangeException(); } return count; } } }