using System; using System.Text; using System.Collections; using System.Collections.Generic; using System.IO; namespace LZWTest { class Program { static void Main(string[] args) { string test = "ABABCDCDABCD"; Console.WriteLine("测试字符串 {0}", test); LZW lzw = new LZW(); List<ushort> compress = lzw.Encode(test); Console.WriteLine(); Console.WriteLine("压缩列表"); for (int i = 0; i < compress.Count; i++) Console.Write(compress[i] + " "); Console.WriteLine(); string str = lzw.Decode(compress); Console.WriteLine(); Console.WriteLine("方式一 解压 {0}", str); byte[] compressBytes = lzw.GetCompressData(); string str1 = lzw.Decode(compressBytes); Console.WriteLine("方式二 解压 {0}", str); Console.Read(); } } /// <summary> /// 串表压缩算法(LZW) /// 用12位来表示可能的字符串或字符,前256代表单字符,剩下的3840给可能出现的字符组合 /// /// GIF规范 /// 1.原始数据用8位表示 /// 2.标号用9位-12位表示(256-4096) /// 3.标号一开始用9位表示(256-512),当达到512时再用10位表示,依次类推 /// 4.当达到12位能表示的最大数时(4096),在这里插入清除标志,又从9位开始 /// 5.清除标志(clear flag)的值是原始数据字长表示的最大值加1 /// 6.结束标志(end flag)的值是原始数据字长表示的最大值加2 /// </summary> public sealed class LZW { //清除标志 private ushort clearFlag = 256; //结束标志 private ushort endFlag = 257; //字典最大长度 private int maxDicCount = 4096; //存储压缩后的bit流 private MemoryStream compressStream; private BinaryWriter compressWriter; private byte[] compressData; //初始化标号字典 private void InitCodeDic(Dictionary<ZString, ushort> codeDic) { codeDic.Clear(); //原始数据设置到标号字典中 for (ushort i = 0; i <= 255; i++) { char c = (char)i; ZString key = new ZString(c); codeDic.Add(key, i); } } //编码 public List<ushort> Encode(string str) { compressStream = new MemoryStream(); compressWriter = new BinaryWriter(compressStream); //输出列表 List<ushort> output = new List<ushort>(); //查询表<前缀字符串, 编码> Dictionary<ZString, ushort> codeDic = new Dictionary<ZString, ushort>(); //前缀 ZString prefix = null; //后缀 ZString suffix = null; //标号 ushort mark = 258; InitCodeDic(codeDic); if (prefix == null) prefix = new ZString(); Console.WriteLine("建立字典"); ushort code; for (int i=0; i<str.Length; i++) { char c = str[i]; if (prefix.IsNullOrEmpty()) { prefix.Add(c); continue; } suffix = new ZString(c); ZString key = prefix + suffix; if (codeDic.ContainsKey(key)) { prefix = key; } else { codeDic.Add(key, mark); mark++; code = codeDic[prefix]; output.Add(code); Console.WriteLine("{0}->{1}", prefix, code); prefix = suffix; WriteCompressStream(code, 12, compressWriter); //字典长度已达上限 if (codeDic.Count >= maxDicCount) { //插入清除标志 output.Add(clearFlag); InitCodeDic(codeDic); prefix.Clear(); mark = 258; WriteCompressStream(clearFlag, 12, compressWriter); } } } //考虑最后一个字符 code = codeDic[prefix]; output.Add(code); //插入结束标志 output.Add(endFlag); Console.WriteLine("{0}->{1}", prefix, code); WriteCompressStream(endFlag, 12, compressWriter); BinaryReader br = new BinaryReader(compressStream); compressData = br.ReadBytes((int)compressStream.Length); return output; } //获取压缩后的数据 public byte[] GetCompressData() { return compressData; } //解码 (按照单字符8bit,组合字符12bit解码) public string Decode(byte[] bytes) { //转成bit数组 BitArray bitArray = new BitArray(bytes); //输出列表 List<byte> output = new List<byte>(); //查询表<编码,字符串> Dictionary<ushort, ZString> codeDic = new Dictionary<ushort, ZString>(); ZString prefix = new ZString(); ushort mark = 258; int bitIndex = 0; while (bitIndex < bitArray.Length) { ushort code = bitArray.GetUShort(bitIndex, 12); if (code <= 255) { output.Add((byte)code); prefix.Add(code); if (prefix.Count == 1) continue; //前缀为多字符时加入字典 (重建字典) if (!codeDic.ContainsKey(mark)) { codeDic.Add(mark, prefix); prefix = new ZString(code); mark++; } continue; } else if (code == clearFlag) { codeDic.Clear(); prefix.Clear(); mark = 258; } else if (code == endFlag) { break; } else if (codeDic.ContainsKey(code)) { prefix = codeDic[code]; output.AddRange(prefix.ToArray()); mark++; continue; } else { Console.WriteLine("重建字典出错,未找到 code={0} 对应的子串", code); } } return Encoding.ASCII.GetString(output.ToArray()); } //解码 public string Decode(List<ushort> list) { //输出列表 List<byte> output = new List<byte>(); //查询表<编码,字符串> Dictionary<ushort, ZString> codeDic = new Dictionary<ushort, ZString>(); ZString prefix = new ZString(); ushort mark = 258; for (ushort i=0; i<list.Count; i++) { ushort code = list[i]; //原始ASCII字符直接输出 if (code <= 255) { output.Add((byte)code); prefix.Add(code); if (prefix.Count == 1) continue; //前缀为多字符时加入字典 (重建字典) if (!codeDic.ContainsKey(mark)) { codeDic.Add(mark, prefix); prefix = new ZString(code); mark++; } continue; } else if (code == clearFlag) { codeDic.Clear(); prefix.Clear(); mark = 258; continue; } else if (code == endFlag) { break; } else if (codeDic.ContainsKey(code)) { prefix = codeDic[code]; output.AddRange(prefix.ToArray()); mark++; continue; } else { //如果执行到这里,说明动态重建查询表出了问题 Console.WriteLine("无法查询到 code={0} 对应的字符串", code); } } return Encoding.ASCII.GetString(output.ToArray()); } //将数据写入压缩流 private void WriteCompressStream(ushort value, int bitSize, BinaryWriter bw) { BitArray arr = new BitArray(new int[1] { value }); for (int i = 0; i < bitSize; i++) { bw.Write(arr[i]); } bw.Flush(); } public class ZString { private List<byte> list = new List<byte>(); public ZString() { } public ZString(char c) { Add(c); } public ZString(string s) { Add(s); } public ZString(int i) { Add(i); } public ZString(byte b) { Add(b); } public int Count { get { return list.Count; } } public byte[] ToArray() { return list.ToArray(); } public bool IsNullOrEmpty() { return list.Count == 0; } public void Add(byte[] bytes) { if (bytes == null) return; list.AddRange(bytes); } public void Add(int i) { list.Add((byte)i); } public void Add(char c) { byte b = (byte)c; list.Add(b); } public void Add(byte b) { list.Add(b); } public void Add(string str) { if (string.IsNullOrEmpty(str)) return; for (int i = 0; i < str.Length; i++) Add(str[i]); } public void Clear() { list.Clear(); } public override int GetHashCode() { //BKDR Hash ulong hash = 0; //也可以乘以31、131、1313、13131、131313.. uint p = 1313; for (int i=0; i<list.Count; i++) { //当作p进制计算 hash = hash * p + list[i]; } return (int)hash; } public override bool Equals(object obj) { ZString key = obj as ZString; if (key == null) return false; return GetHashCode() == key.GetHashCode(); } public override string ToString() { return Encoding.ASCII.GetString(list.ToArray()); } public static ZString operator +(ZString a, ZString b) { ZString zs = new ZString(); zs.Add(a.ToArray()); zs.Add(b.ToArray()); return zs; } public static ZString operator +(ZString a, char b) { ZString zs = new ZString(); zs.Add(a.ToArray()); zs.Add(b); return zs; } } } } public static class BitArrayExtension { public static int GetInt(this BitArray array, int startIndex, int bitLength) { var newArray = new BitArray(bitLength); for (int i = 0; i < bitLength; i++) { if (array.Length <= startIndex + i) { newArray[i] = false; } else { bool bit = array.Get(startIndex + i); newArray[i] = bit; } } return newArray.ToInt(); } public static int ToInt(this BitArray array) { if (array == null) { Console.WriteLine("array is nothing."); return 0; } if (array.Length > 32) { Console.WriteLine("must be at most 32 bits long."); return 0; } var result = new int[1]; array.CopyTo(result, 0); return result[0]; } public static ushort GetUShort(this BitArray array, int startIndex, int bitLength) { var newArray = new BitArray(bitLength); for (int i = 0; i < bitLength; i++) { if (array.Length <= startIndex + i) { newArray[i] = false; } else { bool bit = array.Get(startIndex + i); newArray[i] = bit; } } return newArray.ToUShort(); } public static ushort ToUShort(this BitArray array) { if (array == null) { Console.WriteLine("array is nothing."); return 0; } if (array.Length > 32) { Console.WriteLine("must be at most 32 bits long."); return 0; } var result = new int[1]; array.CopyTo(result, 0); return (ushort)result[0]; } }
运行效果