LZW压缩算法

作者:追风剑情 发布于:2022-1-20 15:21 分类:C#

  1. using System;
  2. using System.Text;
  3. using System.Collections;
  4. using System.Collections.Generic;
  5. using System.IO;
  6.  
  7. namespace LZWTest
  8. {
  9. class Program
  10. {
  11. static void Main(string[] args)
  12. {
  13. string test = "ABABCDCDABCD";
  14. Console.WriteLine("测试字符串 {0}", test);
  15.  
  16. LZW lzw = new LZW();
  17. List<ushort> compress = lzw.Encode(test);
  18. Console.WriteLine();
  19.  
  20. Console.WriteLine("压缩列表");
  21. for (int i = 0; i < compress.Count; i++)
  22. Console.Write(compress[i] + " ");
  23. Console.WriteLine();
  24.  
  25. string str = lzw.Decode(compress);
  26. Console.WriteLine();
  27. Console.WriteLine("方式一 解压 {0}", str);
  28.  
  29. byte[] compressBytes = lzw.GetCompressData();
  30. string str1 = lzw.Decode(compressBytes);
  31. Console.WriteLine("方式二 解压 {0}", str);
  32.  
  33. Console.Read();
  34. }
  35. }
  36.  
  37. /// <summary>
  38. /// 串表压缩算法(LZW)
  39. /// 用12位来表示可能的字符串或字符,前256代表单字符,剩下的3840给可能出现的字符组合
  40. ///
  41. /// GIF规范
  42. /// 1.原始数据用8位表示
  43. /// 2.标号用9位-12位表示(256-4096)
  44. /// 3.标号一开始用9位表示(256-512),当达到512时再用10位表示,依次类推
  45. /// 4.当达到12位能表示的最大数时(4096),在这里插入清除标志,又从9位开始
  46. /// 5.清除标志(clear flag)的值是原始数据字长表示的最大值加1
  47. /// 6.结束标志(end flag)的值是原始数据字长表示的最大值加2
  48. /// </summary>
  49. public sealed class LZW
  50. {
  51. //清除标志
  52. private ushort clearFlag = 256;
  53. //结束标志
  54. private ushort endFlag = 257;
  55. //字典最大长度
  56. private int maxDicCount = 4096;
  57.  
  58. //存储压缩后的bit流
  59. private MemoryStream compressStream;
  60. private BinaryWriter compressWriter;
  61. private byte[] compressData;
  62.  
  63. //初始化标号字典
  64. private void InitCodeDic(Dictionary<ZString, ushort> codeDic)
  65. {
  66. codeDic.Clear();
  67. //原始数据设置到标号字典中
  68. for (ushort i = 0; i <= 255; i++)
  69. {
  70. char c = (char)i;
  71. ZString key = new ZString(c);
  72. codeDic.Add(key, i);
  73. }
  74. }
  75.  
  76. //编码
  77. public List<ushort> Encode(string str)
  78. {
  79. compressStream = new MemoryStream();
  80. compressWriter = new BinaryWriter(compressStream);
  81.  
  82. //输出列表
  83. List<ushort> output = new List<ushort>();
  84. //查询表<前缀字符串, 编码>
  85. Dictionary<ZString, ushort> codeDic = new Dictionary<ZString, ushort>();
  86. //前缀
  87. ZString prefix = null;
  88. //后缀
  89. ZString suffix = null;
  90. //标号
  91. ushort mark = 258;
  92.  
  93. InitCodeDic(codeDic);
  94.  
  95. if (prefix == null)
  96. prefix = new ZString();
  97.  
  98. Console.WriteLine("建立字典");
  99. ushort code;
  100. for (int i=0; i<str.Length; i++)
  101. {
  102. char c = str[i];
  103. if (prefix.IsNullOrEmpty())
  104. {
  105. prefix.Add(c);
  106. continue;
  107. }
  108. suffix = new ZString(c);
  109. ZString key = prefix + suffix;
  110. if (codeDic.ContainsKey(key))
  111. {
  112. prefix = key;
  113. }
  114. else
  115. {
  116. codeDic.Add(key, mark);
  117. mark++;
  118.  
  119. code = codeDic[prefix];
  120. output.Add(code);
  121. Console.WriteLine("{0}->{1}", prefix, code);
  122. prefix = suffix;
  123.  
  124. WriteCompressStream(code, 12, compressWriter);
  125.  
  126. //字典长度已达上限
  127. if (codeDic.Count >= maxDicCount)
  128. {
  129. //插入清除标志
  130. output.Add(clearFlag);
  131. InitCodeDic(codeDic);
  132. prefix.Clear();
  133. mark = 258;
  134.  
  135. WriteCompressStream(clearFlag, 12, compressWriter);
  136. }
  137. }
  138. }
  139. //考虑最后一个字符
  140. code = codeDic[prefix];
  141. output.Add(code);
  142. //插入结束标志
  143. output.Add(endFlag);
  144. Console.WriteLine("{0}->{1}", prefix, code);
  145.  
  146. WriteCompressStream(endFlag, 12, compressWriter);
  147.  
  148. BinaryReader br = new BinaryReader(compressStream);
  149. compressData = br.ReadBytes((int)compressStream.Length);
  150.  
  151. return output;
  152. }
  153.  
  154. //获取压缩后的数据
  155. public byte[] GetCompressData()
  156. {
  157. return compressData;
  158. }
  159.  
  160. //解码 (按照单字符8bit,组合字符12bit解码)
  161. public string Decode(byte[] bytes)
  162. {
  163. //转成bit数组
  164. BitArray bitArray = new BitArray(bytes);
  165. //输出列表
  166. List<byte> output = new List<byte>();
  167. //查询表<编码,字符串>
  168. Dictionary<ushort, ZString> codeDic = new Dictionary<ushort, ZString>();
  169. ZString prefix = new ZString();
  170. ushort mark = 258;
  171.  
  172. int bitIndex = 0;
  173. while (bitIndex < bitArray.Length)
  174. {
  175. ushort code = bitArray.GetUShort(bitIndex, 12);
  176. if (code <= 255)
  177. {
  178. output.Add((byte)code);
  179. prefix.Add(code);
  180.  
  181. if (prefix.Count == 1)
  182. continue;
  183.  
  184. //前缀为多字符时加入字典 (重建字典)
  185. if (!codeDic.ContainsKey(mark))
  186. {
  187. codeDic.Add(mark, prefix);
  188. prefix = new ZString(code);
  189. mark++;
  190. }
  191. continue;
  192. }
  193. else if (code == clearFlag)
  194. {
  195. codeDic.Clear();
  196. prefix.Clear();
  197. mark = 258;
  198. }
  199. else if (code == endFlag)
  200. {
  201. break;
  202. }
  203. else if (codeDic.ContainsKey(code))
  204. {
  205. prefix = codeDic[code];
  206. output.AddRange(prefix.ToArray());
  207. mark++;
  208. continue;
  209. }
  210. else
  211. {
  212. Console.WriteLine("重建字典出错,未找到 code={0} 对应的子串", code);
  213. }
  214. }
  215.  
  216. return Encoding.ASCII.GetString(output.ToArray());
  217. }
  218.  
  219. //解码
  220. public string Decode(List<ushort> list)
  221. {
  222. //输出列表
  223. List<byte> output = new List<byte>();
  224. //查询表<编码,字符串>
  225. Dictionary<ushort, ZString> codeDic = new Dictionary<ushort, ZString>();
  226. ZString prefix = new ZString();
  227. ushort mark = 258;
  228.  
  229. for (ushort i=0; i<list.Count; i++)
  230. {
  231. ushort code = list[i];
  232. //原始ASCII字符直接输出
  233. if (code <= 255)
  234. {
  235. output.Add((byte)code);
  236. prefix.Add(code);
  237.  
  238. if (prefix.Count == 1)
  239. continue;
  240.  
  241. //前缀为多字符时加入字典 (重建字典)
  242. if (!codeDic.ContainsKey(mark))
  243. {
  244. codeDic.Add(mark, prefix);
  245. prefix = new ZString(code);
  246. mark++;
  247. }
  248. continue;
  249. }
  250. else if (code == clearFlag)
  251. {
  252. codeDic.Clear();
  253. prefix.Clear();
  254. mark = 258;
  255. continue;
  256. }
  257. else if (code == endFlag)
  258. {
  259. break;
  260. }
  261. else if (codeDic.ContainsKey(code))
  262. {
  263. prefix = codeDic[code];
  264. output.AddRange(prefix.ToArray());
  265. mark++;
  266. continue;
  267. }
  268. else
  269. {
  270. //如果执行到这里,说明动态重建查询表出了问题
  271. Console.WriteLine("无法查询到 code={0} 对应的字符串", code);
  272. }
  273. }
  274.  
  275. return Encoding.ASCII.GetString(output.ToArray());
  276. }
  277.  
  278. //将数据写入压缩流
  279. private void WriteCompressStream(ushort value, int bitSize, BinaryWriter bw)
  280. {
  281. BitArray arr = new BitArray(new int[1] { value });
  282. for (int i = 0; i < bitSize; i++)
  283. {
  284. bw.Write(arr[i]);
  285. }
  286. bw.Flush();
  287. }
  288.  
  289. public class ZString
  290. {
  291. private List<byte> list = new List<byte>();
  292.  
  293. public ZString() { }
  294. public ZString(char c) { Add(c); }
  295. public ZString(string s) { Add(s); }
  296. public ZString(int i) { Add(i); }
  297. public ZString(byte b) { Add(b); }
  298.  
  299. public int Count { get { return list.Count; } }
  300.  
  301. public byte[] ToArray()
  302. {
  303. return list.ToArray();
  304. }
  305.  
  306. public bool IsNullOrEmpty()
  307. {
  308. return list.Count == 0;
  309. }
  310.  
  311. public void Add(byte[] bytes)
  312. {
  313. if (bytes == null)
  314. return;
  315. list.AddRange(bytes);
  316. }
  317.  
  318. public void Add(int i)
  319. {
  320. list.Add((byte)i);
  321. }
  322.  
  323. public void Add(char c)
  324. {
  325. byte b = (byte)c;
  326. list.Add(b);
  327. }
  328.  
  329. public void Add(byte b)
  330. {
  331. list.Add(b);
  332. }
  333.  
  334. public void Add(string str)
  335. {
  336. if (string.IsNullOrEmpty(str))
  337. return;
  338. for (int i = 0; i < str.Length; i++)
  339. Add(str[i]);
  340. }
  341. public void Clear()
  342. {
  343. list.Clear();
  344. }
  345.  
  346. public override int GetHashCode()
  347. {
  348. //BKDR Hash
  349. ulong hash = 0;
  350. //也可以乘以31、131、1313、13131、131313..
  351. uint p = 1313;
  352. for (int i=0; i<list.Count; i++)
  353. {
  354. //当作p进制计算
  355. hash = hash * p + list[i];
  356. }
  357. return (int)hash;
  358. }
  359.  
  360.  
  361. public override bool Equals(object obj)
  362. {
  363. ZString key = obj as ZString;
  364. if (key == null)
  365. return false;
  366. return GetHashCode() == key.GetHashCode();
  367. }
  368.  
  369. public override string ToString()
  370. {
  371. return Encoding.ASCII.GetString(list.ToArray());
  372. }
  373.  
  374. public static ZString operator +(ZString a, ZString b)
  375. {
  376. ZString zs = new ZString();
  377. zs.Add(a.ToArray());
  378. zs.Add(b.ToArray());
  379. return zs;
  380. }
  381.  
  382. public static ZString operator +(ZString a, char b)
  383. {
  384. ZString zs = new ZString();
  385. zs.Add(a.ToArray());
  386. zs.Add(b);
  387. return zs;
  388. }
  389. }
  390. }
  391. }
  392.  
  393. public static class BitArrayExtension
  394. {
  395. public static int GetInt(this BitArray array, int startIndex, int bitLength)
  396. {
  397. var newArray = new BitArray(bitLength);
  398.  
  399. for (int i = 0; i < bitLength; i++)
  400. {
  401. if (array.Length <= startIndex + i)
  402. {
  403. newArray[i] = false;
  404. }
  405. else
  406. {
  407. bool bit = array.Get(startIndex + i);
  408. newArray[i] = bit;
  409. }
  410. }
  411.  
  412. return newArray.ToInt();
  413. }
  414.  
  415. public static int ToInt(this BitArray array)
  416. {
  417. if (array == null)
  418. {
  419. Console.WriteLine("array is nothing.");
  420. return 0;
  421. }
  422. if (array.Length > 32)
  423. {
  424. Console.WriteLine("must be at most 32 bits long.");
  425. return 0;
  426. }
  427.  
  428. var result = new int[1];
  429. array.CopyTo(result, 0);
  430. return result[0];
  431. }
  432.  
  433. public static ushort GetUShort(this BitArray array, int startIndex, int bitLength)
  434. {
  435. var newArray = new BitArray(bitLength);
  436.  
  437. for (int i = 0; i < bitLength; i++)
  438. {
  439. if (array.Length <= startIndex + i)
  440. {
  441. newArray[i] = false;
  442. }
  443. else
  444. {
  445. bool bit = array.Get(startIndex + i);
  446. newArray[i] = bit;
  447. }
  448. }
  449.  
  450. return newArray.ToUShort();
  451. }
  452.  
  453. public static ushort ToUShort(this BitArray array)
  454. {
  455. if (array == null)
  456. {
  457. Console.WriteLine("array is nothing.");
  458. return 0;
  459. }
  460. if (array.Length > 32)
  461. {
  462. Console.WriteLine("must be at most 32 bits long.");
  463. return 0;
  464. }
  465.  
  466. var result = new int[1];
  467. array.CopyTo(result, 0);
  468. return (ushort)result[0];
  469. }
  470. }

运行效果

11111.png

标签: C#

Powered by emlog  蜀ICP备18021003号-1   sitemap

川公网安备 51019002001593号