2014-11-24 109 views
1

我不知道我將如何攻擊我的霍夫曼樹的遍歷。樹是正確的,我很難弄清楚如何以一種好的方式來遍歷它。出於某種原因,我的穿越方法沒有給出結果...霍夫曼樹:遍歷

UPDATE: - 清理代碼,使其更加面向對象

Node類:

public class Node 
{ 
    public int frekvens; //Frequency 
    public char tegn; //Symbol 
    public Node venstre; //Left child 
    public Node høyre; //Right child 
    public string s; //result string 
    public string resultat; 
    public Node (char c) // Node constructor containing symbol. 
    { 
     frekvens = 1; 
     tegn = c; 
    } 

    public Node (int f, Node venstre, Node høyre) // Node Constructor containing frequency and children 
     { 
      frekvens = f; 
      this.venstre = venstre; 
      this.høyre = høyre; 
     } 

    public Node (Node node) // Node constructor containing a node 
     { 
      frekvens = node.frekvens; 
      tegn = node.tegn; 
      this.venstre = venstre; 
      this.høyre = høyre; 
     } 

    public void ØkMed1() // Inkrement frequency by one 
    { 
     frekvens = frekvens + 1; 
    } 


    public char getVenstreTegn() 
    { 
     return venstre.tegn; 
    } 

    public char getHøyreTegn() 
    { 
     return venstre.tegn; 
    } 

    public int getVenstreFrekvens() 
    { 
     return venstre.frekvens; 
    } 

    public int getHøyreFrekvens() 
    { 
     return høyre.frekvens; 
    } 

    public int getFrekvens() 
    { 
     return frekvens; 
    } 


    public bool ErTegn(char c) 
    { 
     if (c == tegn) 
     { 
      return false; 
     } 
     else 
     { 
      return true; 
     } 
    } 

    //Pretty sure this does not work as intended 
    public string traverser (Node n) //Traverse the tree 
    { 
     if (n.tegn != '\0') //If the node containes a symbol --> a leaf 
     { 
      resultat += s; 
     } 
     else 
     { 
      if (n.getVenstreTegn() == '\0') //If left child does not have a symbol 
      { 
       s += "0"; 
       traverser(n.venstre); 
      } 
      if (n.getHøyreTegn() == '\0') //If right child does not have a symbol 
      { 
       s += "1"; 
       traverser(n.høyre); 
      } 
     } 
     return resultat; 
    } 
    public string Resultat() //Used priviously to check if i got the correct huffman tree 
    { 
     string resultat; 
     resultat = "Tegn: " + Convert.ToString(tegn) +" frekvens: " + Convert.ToString(frekvens) + "\n"; 
     return resultat; 
    } 
} 

Huffman_Tree類:

public class Huffman_Tre 
{ 
    string treString; 
    List<Node> noder = new List<Node>(); 
    public Node rot; 
    public void bygg (string input) 
    { 
     bool funnet; //Found 
     char karakter; //character 

     for (int i = 0; i < input.Length;i++) //Loops through string and sets character 
      //with coresponding freqeuncy in the node list 
     { 
      karakter = input[i]; 
      funnet = false; //default 
      for (int j = 0; j< noder.Count; j++) 
      { 
       if (noder[j].ErTegn(karakter) == false) //if the character already exists 
       { 
        noder[j].ØkMed1(); //inkrement frequency by one 
        funnet = true; 
        break; 
       } 
      } 
      if (!funnet) //if the character does not exist 
      { 
       noder.Add(new Node(karakter)); //add the character to list 
      } 
     } 
     //Sorting node list acending by frequency 
     var sortertListe = noder.OrderBy(c => c.frekvens).ToList(); 

     noder = sortertListe; 

     do 
     { 
      noder.Add(new Node((noder[0].frekvens + noder[1].frekvens), noder[0],noder[1])); 

      //Remove the leaf nodes 
      noder.RemoveAt(0); 
      noder.RemoveAt(0); 
     } while(noder.Count >= 2); 

    } 

    public Node getRot() 
    { 
     return rot; 
    } 

    public string visTre() 
    { 

     foreach (Node node in noder) 
     { 
      treString += node.Resultat(); 
     } 
     return treString; 
    } 
    public bool erNull() 
    { 
     if (noder[0].tegn == '\0') 
     { 
      return true; 
     } 
     else 
      return false; 
    } 
} 

主程序:

private void btnKomprimer_Click(object sender, System.Windows.RoutedEventArgs e) 
    { 
     string input; //The string input I want to compress 
     input = txtInput.Text; //initialize input to text input 
     input = input.ToLower(); 
     txtOutput.Text = ""; 

     Huffman_Tre tre = new Huffman_Tre(); 

     tre.bygg(input); 

     Node rot = new Node(tre.getRot()); 

     txtOutput.Text += rot.traverser(rot); 
    } 
} 
+2

[您是否嘗試調試?](http://ericlippert.com/2014/03/05/how-to-debug-small-programs/) – 2014-11-24 12:11:59

+0

Sindre,也許它可能會更方便使用二進制樹先。由於huffman的算法將根據字符的頻率或形成根的頻率總和來確定二進制數字(1或0)。如果你仍然需要更多的提示,只是大喊 – RvdV79 2015-01-14 21:18:26

+0

我忽略了你已經在使用二叉樹的事實。我很抱歉。儘管如此,它引發了我建立一個遍歷課程的霍夫曼算法,我已將它發佈在我的答案中。 – RvdV79 2015-01-16 16:31:09

回答

9

因爲我還剩下一點時間,所以我製作了一個霍夫曼樹的例子,同時玩C#6.0。它沒有被優化(甚至不是很好!),但它作爲一個例子很好。它會幫助你看看你的'挑戰'可能出現在哪裏。由於我的英語遠比我的斯堪的納維亞知識好,所以我用英語命名,我希望你不介意。

首先,讓我們從保持頻率的類開始。

public sealed class HuffmanFrequencyTable 
{  
    #region Properties 
    /// <summary> 
    /// Holds the characters and their corresponding frequencies 
    /// </summary> 
    public Dictionary<char, int> FrequencyTable { get; set; } = new Dictionary<char, int>(); 

    #endregion 

    #region Methods 

    /// <summary> 
    /// Clears the internal frequency table 
    /// </summary> 
    public void Clear() 
    { 
     FrequencyTable?.Clear();    
    } 

    /// <summary> 
    /// Accepts and parses a new line (string) which is then 
    /// merged with the existing dictionary or frequency table 
    /// </summary> 
    /// <param name="line">The line to parse</param> 
    public void Accept(string line) 
    { 
     if (!string.IsNullOrEmpty(line)) 
     { 
      line.GroupBy(ch => ch). 
       ToDictionary(g => g.Key, g => g.Count()). 
       ToList(). 
       ForEach(x => FrequencyTable[x.Key] = x.Value); 
     } 
    } 

    /// <summary> 
    /// Performs a dump of the frequency table, ordering all characters, lowest frequency first. 
    /// </summary> 
    /// <returns>The frequency table in the format 'character [frequency]'</returns> 
    public override string ToString() 
    {    
     return FrequencyTable?.PrintFrequencies(); 
    } 
    #endregion 
} 

請注意,ToString()方法使用一種擴展方法,其能夠「轉儲」的使用的詞典的內容。擴展坐落在一個叫幫手,看起來像這樣靜態類:現在

/// <summary> 
/// Extension method that helps to write the contents of a generic Dictionary to a string, ordered by it's values and 
/// printing the key and it's value between brackets. 
/// </summary> 
/// <typeparam name="TKey">Generic key</typeparam> 
/// <typeparam name="TValue">Generic value type</typeparam> 
/// <param name="dictionary">The dictionary</param> 
/// <exception cref="ArgumentNullException">Throws an argument null exception if the provided dictionary is null</exception> 
/// <returns></returns> 
public static string PrintFrequencies<TKey, TValue>(this IDictionary<TKey, TValue> dictionary) 
{ 
    if (dictionary == null) 
     throw new ArgumentNullException("dictionary"); 

    var items = from kvp in dictionary 
       orderby kvp.Value 
       select kvp.Key + " [" + kvp.Value + "]"; 

    return string.Join(Environment.NewLine, items); 
} 

,本FrequencyTable的地方,我們可以開始尋找關於如何建立起來的節點。 Huffman使用二叉樹,因此最好生成一個具有左右子節點的Node類。我也冒昧地在這裏執行遍歷算法。該類構建如下:

public sealed class HuffmanNode 
{ 
    #region Properties 

    /// <summary> 
    /// Holds the left node, if applicable, otherwise null 
    /// </summary> 
    public HuffmanNode Left { get; set; } = null; 

    /// <summary> 
    /// Holds the right node, if applicable, otherwise null 
    /// </summary> 
    public HuffmanNode Right { get; set; } = null; 

    /// <summary> 
    /// Holds the Character (or null) for this particular node 
    /// </summary> 
    public char? Character { get; set; } = null; 

    /// <summary> 
    /// Holds the frequency for this particular node, defaulted to 0 
    /// </summary> 
    public int Frequency { get; set; } = default(int); 

    #endregion 

    #region Methods 

    /// <summary> 
    /// Traverses all nodes recursively returning the binary 
    /// path for the corresponding character that has been found. 
    /// </summary> 
    /// <param name="character">The character to find</param> 
    /// <param name="data">The datapath (containing '1's and '0's)</param> 
    /// <returns>The complete binary path for a character within a node</returns> 
    public List<bool> Traverse(char? character, List<bool> data) 
    { 
     //Check the leafs for existing characters 
     if (null == Left && null == Right) 
     { 
      //We're at an endpoint of our 'tree', so return it's data or nothing when the symbol 
      //characters do not match 
      return (bool)character?.Equals(Character) ? data : null; 
     } 
     else 
     { 
      List<bool> left = null; 
      List<bool> right = null; 

      //TODO: If possible refactor with proper C# 6.0 features 
      if (null != Left) 
      { 
       List<bool> leftPath = new List<bool>(data);      
       leftPath.Add(false); //Add a '0' 
       left = Left.Traverse(character, leftPath); //Recursive traversal for child nodes within this left node. 
      } 

      if (null != Right) 
      { 
       List<bool> rightPath = new List<bool>(data);      
       rightPath.Add(true); //Add a '1' 
       right = Right.Traverse(character, rightPath); //Recursive traversal for childnodes within this right node 
      } 

      return (null != left) ? left : right; 
     } 
    }   

    #endregion 
} 

我使用HuffmanTree類中的Node類。從邏輯上講,從節點構建一棵樹。相應HuffmanTree是這樣寫的:

public sealed class HuffmanTree 
{ 
    #region Fields 
    /// <summary> 
    /// Field for keeping the Huffman nodes in. Internally used. 
    /// </summary> 
    private List<HuffmanNode> nodes = new List<HuffmanNode>();   
    #endregion 

    #region Properties 

    /// <summary> 
    /// Holds the Huffman tree 
    /// </summary> 
    public HuffmanNode Root { get; set; } = null; 

    /// <summary> 
    /// Holds the frequency table for all parsed characters 
    /// </summary> 
    public HuffmanFrequencyTable Frequencies { get; private set; } = new HuffmanFrequencyTable() 

    /// <summary> 
    /// Holds the amount of bits after encoding the tree. 
    /// Primary usable for decoding. 
    /// </summary> 
    public int BitCountForTree { get; private set; } = default(int); 

    #endregion 

    #region Methods 

    /// <summary> 
    /// Builds the Huffman tree 
    /// </summary> 
    /// <param name="source">The source to build the Hufftree from</param> 
    /// <exception cref="ArgumentNullException">Thrown when source is null or empty</exception> 
    public void BuildTree(string source) 
    { 
     nodes.Clear(); //As we build a new tree, first make sure it's clean :) 

     if (string.IsNullOrEmpty(source)) 
      throw new ArgumentNullException("source"); 
     else 
     { 
      Frequencies.Accept(source); 

      foreach (KeyValuePair<char, int> symbol in Frequencies.FrequencyTable) 
      { 
       nodes.Add(new HuffmanNode() { Character = symbol.Key, Frequency = symbol.Value }); 
      } 

      while (nodes.Count > 1) 
      { 
       List<HuffmanNode> orderedNodes = nodes.OrderBy(node => node.Frequency).ToList(); 

       if (orderedNodes.Count >= 2) 
       { 
        List<HuffmanNode> takenNodes = orderedNodes.Take(2).ToList(); 

        HuffmanNode parent = new HuffmanNode() 
        { 
         Character = null, 
         Frequency = takenNodes[0].Frequency + takenNodes[1].Frequency, 
         Left = takenNodes[0], 
         Right = takenNodes[1] 
        }; 

        //Remove the childnodes from the original node list and add the new parent node 
        nodes.Remove(takenNodes[0]); 
        nodes.Remove(takenNodes[1]); 
        nodes.Add(parent); 
       } 
      } 

      Root = nodes.FirstOrDefault(); 
     } 
    } 

    /// <summary> 
    /// Encodes a given string to the corresponding huffman encoding path 
    /// </summary> 
    /// <param name="source">The source to encode</param> 
    /// <returns>The binary huffman representation of the source</returns> 
    public BitArray Encode(string source) 
    { 
     if (!string.IsNullOrEmpty(source)) 
     { 
      List<bool> encodedSource = new List<bool>(); 
      //Traverse the tree for each character in the passed source (string) and add the binary path to the encoded source 
      encodedSource.AddRange(source.SelectMany(character => 
             Root.Traverse(character, new List<bool>()) 
            ).ToList() 
           ); 
      //For decoding, we might need the amount of bits to skip trailing bits. 
      BitCountForTree = encodedSource.Count; 
      return new BitArray(encodedSource.ToArray()); 
     } 
     else return null; 
    } 

    /// <summary> 
    /// Decodes a given binary path to represent it's string value 
    /// </summary> 
    /// <param name="bits">BitArray for traversing the tree</param> 
    /// <returns></returns> 
    public string Decode(BitArray bits) 
    { 
     HuffmanNode current = Root; 
     string decodedString = string.Empty; 

     foreach (bool bit in bits) 
     { 
      //Find the correct current node depending on the bit set or not set. 
      current = (bit ? current.Right ?? current : current.Left ?? current);    

      if (current.IsLeaf()) 
      { 
       decodedString += current.Character; 
       current = Root; 
      } 
     } 

     return decodedString; 
    } 

    #endregion 
} 

什麼在此代碼是有趣的是,我決定使用BitArrays將保持樹二進制路徑時,它的建立。這裏的public BitArray Encode(string source)方法包含一個骯髒的黑客攻擊。我跟蹤用於編碼的總位數,並將其存儲在BitCountForTree屬性中。在執行解碼時,我將使用此屬性來刪除可能出現的任何尾隨位。有一種更好的方式來執行此操作,但我會將其打開以供您查找。

此外,此類使用爲HuffmanNode編寫的擴展方法。這是一個簡單的,但:

/// <summary> 
    /// Determines whether a given Huffman node is a leaf or not. 
    /// A node is considered to be a leaf when it has no childnodes 
    /// </summary> 
    /// <param name="node">A huffman node</param> 
    /// <returns>True if no children are left, false otherwise</returns> 
    public static bool IsLeaf(this HuffmanNode node) 
    { 
     return (null == node.Left && null == node.Right); 
    } 

這個擴展方法很方便地確定給定節點是否實際上是葉節點。一個葉子是沒有子節點的節點,因此二叉樹的末端(或者更好的是該樹的一個分支)。

現在有趣的部分,我該如何在這裏工作。我已經構建了一個具有3個文本框的Windows窗體應用程序。一個用於實際輸入,一個用於二進制(編碼)輸出,最後一個用於顯示壓縮結果。 我還放置了兩個簡單的按鈕,一個用於執行霍夫曼編碼,另一個用於霍夫曼解碼。

霍夫曼編碼方法被寫爲以下的(只是在編碼按鈕的事件處理程序):

string input = tbInput.Text; 
Tree.BuildTree(input); //Build the huffman tree 

BitArray encoded = Tree.Encode(input); //Encode the tree 

//First show the generated binary output 
tbBinaryOutput.Text = string.Join(string.Empty, encoded.Cast<bool>().Select(bit => bit ? "1" : "0")); 

    //Next, convert the binary output to the new characterized output string.  
    byte[] bytes = new byte[(encoded.Length/8) + 1]; 
    encoded.CopyTo(bytes, 0); 

    tbOutput.Text = Encoding.Default.GetString(bytes); //Write the compressed output to the textbox. 

注意,編碼的二進制字符串沒有任何拖尾比特。我將把它留給C#的編碼機制。這個缺點是,我必須在解碼時跟蹤它。

現在解碼也不算太困難。儘管在這個例子中,我正在使用上面編碼代碼生成的壓縮輸出。另外,我假設霍夫曼樹(和它的頻率表!!!)已經建成。通常情況下,頻率表存儲在壓縮文件中,以便重建。

//First convert the compressed output to a bit array again again and skip trailing bits.    
bool[] boolAr = new BitArray(Encoding.Default.GetBytes(tbOutput.Text)).Cast<bool>().Take(Tree.BitCountForTree).ToArray(); 
BitArray encoded = new BitArray(boolAr); 

string decoded = Tree.Decode(encoded); 
MessageBox.Show(decoded, "Decoded result: ", MessageBoxButtons.OK, MessageBoxIcon.Information); 

請注意骯髒的黑客我創建,爲Encoding.Default.GetBytes(tbOutput.Text)肯定生成的字節數組,它可能包含尾隨其不需要待解碼的位。因此,我只根據重建樹來獲取實際需要的位數。

按下「哈夫編碼」按鈕後:

所以運行時,使用了「世界著名的句子」「敏捷的棕色狐狸跳過懶惰的程序員」當我的例子提供了以下輸出,

HuffEncode

並按下 「哈夫解碼」 按鈕後:

HuffDecode

現在這段代碼可以真正使用一些優化,因爲你可能會考慮使用數組而不是字典。還有更多,但由你來考慮。

+0

很好的答案!隨着我在編程語言方面的知識的提高,我越來越尊重優秀的代碼。我會盡量不要太看重你的代碼,而是閱讀你的幫助描述,因爲我確實想從頭開始重新制作這個應用程序。現在應該會變得更加容易:D 這是一個非常有趣的程序,我希望你在寫作時也有一些樂趣;) – Woksin 2015-01-16 20:07:13

+0

我真的這樣做了,不用擔心:-)。我很高興它有幫助! – RvdV79 2015-01-16 20:09:27