LZW压缩算法

作者:追风剑情 发布于:2022-1-20 15:21 分类:C#

using System;
using System.Text;
using System.Collections;
using System.Collections.Generic;
using System.IO;

namespace LZWTest
{
    class Program
    {
        static void Main(string[] args)
        {
            string test = "ABABCDCDABCD";
            Console.WriteLine("测试字符串 {0}", test);

            LZW lzw = new LZW();
            List<ushort> compress = lzw.Encode(test);
            Console.WriteLine();

            Console.WriteLine("压缩列表");
            for (int i = 0; i < compress.Count; i++)
                Console.Write(compress[i] + " ");
            Console.WriteLine();

            string str = lzw.Decode(compress);
            Console.WriteLine();
            Console.WriteLine("方式一 解压 {0}", str);

            byte[] compressBytes = lzw.GetCompressData();
            string str1 = lzw.Decode(compressBytes);
            Console.WriteLine("方式二 解压 {0}", str);

            Console.Read();
        }
    }

    /// <summary>
    /// 串表压缩算法(LZW)
    /// 用12位来表示可能的字符串或字符,前256代表单字符,剩下的3840给可能出现的字符组合
    /// 
    /// GIF规范
    ///   1.原始数据用8位表示
    ///   2.标号用9位-12位表示(256-4096)
    ///   3.标号一开始用9位表示(256-512),当达到512时再用10位表示,依次类推
    ///   4.当达到12位能表示的最大数时(4096),在这里插入清除标志,又从9位开始
    ///   5.清除标志(clear flag)的值是原始数据字长表示的最大值加1
    ///   6.结束标志(end flag)的值是原始数据字长表示的最大值加2
    /// </summary>
    public sealed class LZW
    {
        //清除标志
        private ushort clearFlag = 256;
        //结束标志
        private ushort endFlag = 257;
        //字典最大长度
        private int maxDicCount = 4096;

        //存储压缩后的bit流
        private MemoryStream compressStream;
        private BinaryWriter compressWriter;
        private byte[] compressData;

        //初始化标号字典
        private void InitCodeDic(Dictionary<ZString, ushort> codeDic)
        {
            codeDic.Clear();
            //原始数据设置到标号字典中
            for (ushort i = 0; i <= 255; i++)
            {
                char c = (char)i;
                ZString key = new ZString(c);
                codeDic.Add(key, i);
            }
        }

        //编码
        public List<ushort> Encode(string str)
        {
            compressStream = new MemoryStream();
            compressWriter = new BinaryWriter(compressStream);

            //输出列表
            List<ushort> output = new List<ushort>();
            //查询表<前缀字符串, 编码>
            Dictionary<ZString, ushort> codeDic = new Dictionary<ZString, ushort>();
            //前缀
            ZString prefix = null;
            //后缀
            ZString suffix = null;
            //标号
            ushort mark = 258;

            InitCodeDic(codeDic);

            if (prefix == null)
                prefix = new ZString();

            Console.WriteLine("建立字典");
            ushort code;
            for (int i=0; i<str.Length; i++)
            {
                char c = str[i];
                if (prefix.IsNullOrEmpty())
                {
                    prefix.Add(c);
                    continue;
                }
                suffix = new ZString(c);
                ZString key = prefix + suffix;
                if (codeDic.ContainsKey(key))
                {
                    prefix = key;
                }
                else
                {
                    codeDic.Add(key, mark);
                    mark++;

                    code = codeDic[prefix];
                    output.Add(code);
                    Console.WriteLine("{0}->{1}", prefix, code);
                    prefix = suffix;

                    WriteCompressStream(code, 12, compressWriter);

                    //字典长度已达上限
                    if (codeDic.Count >= maxDicCount)
                    {
                        //插入清除标志
                        output.Add(clearFlag);
                        InitCodeDic(codeDic);
                        prefix.Clear();
                        mark = 258;

                        WriteCompressStream(clearFlag, 12, compressWriter);
                    }
                }
            }
            //考虑最后一个字符
            code = codeDic[prefix];
            output.Add(code);
            //插入结束标志
            output.Add(endFlag);
            Console.WriteLine("{0}->{1}", prefix, code);

            WriteCompressStream(endFlag, 12, compressWriter);

            BinaryReader br = new BinaryReader(compressStream);
            compressData = br.ReadBytes((int)compressStream.Length);

            return output;
        }

        //获取压缩后的数据
        public byte[] GetCompressData()
        {
            return compressData;
        }

        //解码 (按照单字符8bit,组合字符12bit解码)
        public string Decode(byte[] bytes)
        {
            //转成bit数组
            BitArray bitArray = new BitArray(bytes);
            //输出列表
            List<byte> output = new List<byte>();
            //查询表<编码,字符串>
            Dictionary<ushort, ZString> codeDic = new Dictionary<ushort, ZString>();
            ZString prefix = new ZString();
            ushort mark = 258;

            int bitIndex = 0;
            while (bitIndex < bitArray.Length)
            {
                ushort code = bitArray.GetUShort(bitIndex, 12);
                if (code <= 255)
                {
                    output.Add((byte)code);
                    prefix.Add(code);

                    if (prefix.Count == 1)
                        continue;

                    //前缀为多字符时加入字典 (重建字典)
                    if (!codeDic.ContainsKey(mark))
                    {
                        codeDic.Add(mark, prefix);
                        prefix = new ZString(code);
                        mark++;
                    }
                    continue;
                }
                else if (code == clearFlag)
                {
                    codeDic.Clear();
                    prefix.Clear();
                    mark = 258;
                }
                else if (code == endFlag)
                {
                    break;
                }
                else if (codeDic.ContainsKey(code))
                {
                    prefix = codeDic[code];
                    output.AddRange(prefix.ToArray());
                    mark++;
                    continue;
                }
                else
                {
                    Console.WriteLine("重建字典出错,未找到 code={0} 对应的子串", code);
                }
            }

            return Encoding.ASCII.GetString(output.ToArray());
        }

        //解码
        public string Decode(List<ushort> list)
        {
            //输出列表
            List<byte> output = new List<byte>();
            //查询表<编码,字符串>
            Dictionary<ushort, ZString> codeDic = new Dictionary<ushort, ZString>();
            ZString prefix = new ZString();
            ushort mark = 258;

            for (ushort i=0; i<list.Count; i++)
            {
                ushort code = list[i];
                //原始ASCII字符直接输出
                if (code <= 255)
                {
                    output.Add((byte)code);
                    prefix.Add(code);

                    if (prefix.Count == 1)
                        continue;

                    //前缀为多字符时加入字典 (重建字典)
                    if (!codeDic.ContainsKey(mark))
                    {
                        codeDic.Add(mark, prefix);
                        prefix = new ZString(code);
                        mark++;
                    }
                    continue;
                }
                else if (code == clearFlag)
                {
                    codeDic.Clear();
                    prefix.Clear();
                    mark = 258;
                    continue;
                }
                else if (code == endFlag)
                {
                    break;
                }
                else if (codeDic.ContainsKey(code))
                {
                    prefix = codeDic[code];
                    output.AddRange(prefix.ToArray());
                    mark++;
                    continue;
                }
                else
                {
                    //如果执行到这里,说明动态重建查询表出了问题
                    Console.WriteLine("无法查询到 code={0} 对应的字符串", code);
                }
            }

            return Encoding.ASCII.GetString(output.ToArray());
        }

        //将数据写入压缩流
        private void WriteCompressStream(ushort value, int bitSize, BinaryWriter bw)
        {
            BitArray arr = new BitArray(new int[1] { value });
            for (int i = 0; i < bitSize; i++)
            {
                bw.Write(arr[i]);
            }
            bw.Flush();
        }

        public class ZString
        {
            private List<byte> list = new List<byte>();

            public ZString() { }
            public ZString(char c) { Add(c); }
            public ZString(string s) { Add(s); }
            public ZString(int i) { Add(i); }
            public ZString(byte b) { Add(b); }

            public int Count { get { return list.Count; } }

            public byte[] ToArray()
            {
                return list.ToArray();
            }

            public bool IsNullOrEmpty()
            {
                return list.Count == 0;
            }

            public void Add(byte[] bytes)
            {
                if (bytes == null)
                    return;
                list.AddRange(bytes);
            }

            public void Add(int i)
            {
                list.Add((byte)i);
            }

            public void Add(char c)
            {
                byte b = (byte)c;
                list.Add(b);
            }

            public void Add(byte b)
            {
                list.Add(b);
            }

            public void Add(string str)
            {
                if (string.IsNullOrEmpty(str))
                    return;
                for (int i = 0; i < str.Length; i++)
                    Add(str[i]);
            }
             
            public void Clear()
            {
                list.Clear();
            }

            public override int GetHashCode()
            {
                //BKDR Hash
                ulong hash = 0;
                //也可以乘以31、131、1313、13131、131313..
                uint p = 1313;
                for (int i=0; i<list.Count; i++)
                {
                    //当作p进制计算
                    hash = hash * p + list[i];
                }
                return (int)hash;
            }


            public override bool Equals(object obj)
            {
                ZString key = obj as ZString;
                if (key == null)
                    return false;
                return GetHashCode() == key.GetHashCode();
            }

            public override string ToString()
            {
                return Encoding.ASCII.GetString(list.ToArray());
            }

            public static ZString operator +(ZString a, ZString b)
            {
                ZString zs = new ZString();
                zs.Add(a.ToArray());
                zs.Add(b.ToArray());
                return zs;
            }

            public static ZString operator +(ZString a, char b)
            {
                ZString zs = new ZString();
                zs.Add(a.ToArray());
                zs.Add(b);
                return zs;
            }
        }
    }
}

public static class BitArrayExtension
{
    public static int GetInt(this BitArray array, int startIndex, int bitLength)
    {
        var newArray = new BitArray(bitLength);

        for (int i = 0; i < bitLength; i++)
        {
            if (array.Length <= startIndex + i)
            {
                newArray[i] = false;
            }
            else
            {
                bool bit = array.Get(startIndex + i);
                newArray[i] = bit;
            }
        }

        return newArray.ToInt();
    }

    public static int ToInt(this BitArray array)
    {
        if (array == null)
        {
            Console.WriteLine("array is nothing.");
            return 0;
        }
        if (array.Length > 32)
        {
            Console.WriteLine("must be at most 32 bits long.");
            return 0;
        }

        var result = new int[1];
        array.CopyTo(result, 0);
        return result[0];
    }

    public static ushort GetUShort(this BitArray array, int startIndex, int bitLength)
    {
        var newArray = new BitArray(bitLength);

        for (int i = 0; i < bitLength; i++)
        {
            if (array.Length <= startIndex + i)
            {
                newArray[i] = false;
            }
            else
            {
                bool bit = array.Get(startIndex + i);
                newArray[i] = bit;
            }
        }

        return newArray.ToUShort();
    }

    public static ushort ToUShort(this BitArray array)
    {
        if (array == null)
        {
            Console.WriteLine("array is nothing.");
            return 0;
        }
        if (array.Length > 32)
        {
            Console.WriteLine("must be at most 32 bits long.");
            return 0;
        }

        var result = new int[1];
        array.CopyTo(result, 0);
        return (ushort)result[0];
    }
}

运行效果

11111.png

标签: C#

Powered by emlog  蜀ICP备18021003号-1   sitemap

川公网安备 51019002001593号