05-树9 Huffman Codes (30 分)
In 1953, David A. Huffman published his paper “A Method for the Construction of Minimum-Redundancy Codes”, and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string “aaaxuaxz”, we can observe that the frequencies of the characters ‘a’, ‘x’, ‘u’ and ‘z’ are 4, 2, 1 and 1, respectively. We may either encode the symbols as {‘a’=0, ‘x’=10, ‘u’=110, ‘z’=111}, or in another way as {‘a’=1, ‘x’=01, ‘u’=001, ‘z’=000}, both compress the string into 14 bits. Another set of code can be given as {‘a’=0, ‘x’=11, ‘u’=100, ‘z’=101}, but {‘a’=0, ‘x’=01, ‘u’=011, ‘z’=001} is NOT correct since “aaaxuaxz” and “aazuaxax” can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.
Input Specification:
Each input file contains one test case. For each case, the first line gives an integer N (2≤N≤63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:
c[1] f[1] c[2] f[2] ... c[N] f[N]
where c[i] is a character chosen from {‘0’ - ‘9’, ‘a’ - ‘z’, ‘A’ - ‘Z’, ‘_’}, and f[i] is the frequency of c[i] and is an integer no more than 1000. The next line gives a positive integer M (≤1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:
c[i] code[i]
where c[i] is the i-th character and code[i] is an non-empty string of no more than 63 '0’s and '1’s.
Output Specification:
For each test case, print in each line either “Yes” if the student’s submission is correct, or “No” if not.
Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.
Sample Input:
7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11
Sample Output:
Yes
Yes
No
No
Code
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MaxSize 64
/*哈夫曼树结点*/
typedef struct TNode *HuffmanTree;
struct TNode{
int Weight;
HuffmanTree Left;
HuffmanTree Right;
};
/*最小堆*/
typedef struct HeapNode *MinHeap;
struct HeapNode{
HuffmanTree Elements[MaxSize];
int Size;
};
char ch[MaxSize]; //输入字符
int N,w[MaxSize],TotalCodes; //编码中含字符个数 以及频率 最优编码长度
MinHeap CreatHeap();
HuffmanTree CreatHuffman();
void Insert(MinHeap H,HuffmanTree X);
HuffmanTree DeleteMin(MinHeap H);
HuffmanTree BuildHuffman(MinHeap H);
int WPL(HuffmanTree root,int depth);
int Judge();
int main()
{
/*核心算法: 1.以最小堆构建哈夫曼树,得到最优编码长度 2.根据学生输入建树,判断是否为最优编码。判断根据: 1)最优编码长度唯一,check是否与第一步所得值一致; 2)是否存在二义性,即是否有前缀码 */
int M;
HuffmanTree tmp,ROOT;
scanf("%d",&N);
MinHeap H =CreatHeap();
/*根据输入的字符已经其频率, 一个个插入构建最小堆*/
for(int i=0;i<N;i++)
{
getchar();
scanf("%c %d",&ch[i],&w[i]);
tmp = CreatHuffman();
tmp->Weight = w[i];
// printf("ch[%d] = %c,w[%d] = %d,tmp->Weight=%d\n",i,ch[i],i,w[i],tmp->Weight);
Insert(H,tmp);
}
ROOT = BuildHuffman(H);
TotalCodes = WPL(ROOT,0);
// printf("TotalCodes = %d\n",TotalCodes);
scanf("%d",&M);
for(int i=0;i<M;i++)
{
if(Judge())
printf("Yes\n");
else
printf("No\n");
}
return 0;
}
/*创建最小堆*/
MinHeap CreatHeap()
{
MinHeap H;
H = (MinHeap)malloc(sizeof(struct HeapNode));
H->Size = 0;
H->Elements[0] = (HuffmanTree)malloc(sizeof(struct TNode));
H->Elements[0]->Weight = -1;
H->Elements[0]->Left =H->Elements[0]->Right = NULL;
return H;
}
/*创建哈夫曼树结点,这里注意初始化其权重和左右儿子*/
HuffmanTree CreatHuffman()
{
HuffmanTree T;
T = (HuffmanTree)malloc(sizeof(struct TNode));
T->Left = T->Right =NULL;
T->Weight =0;
return T;
}
/*最小堆的插入*/
void Insert(MinHeap H,HuffmanTree X)
{
int i = ++H->Size;
// printf("H->Size=%d\n",H->Size);
while(H->Elements[i/2]->Weight > X->Weight)
{
H->Elements[i]= H->Elements[i/2];
i/=2;
}
H->Elements[i]= X;
// printf("H->Elements[%d]->Weight= %d,X->Weight=%d\n",i,H->Elements[i]->Weight,X->Weight);
}
/*最小堆删除元素*/
HuffmanTree DeleteMin(MinHeap H)
{
HuffmanTree MinTtem,temp;
int Parent,Child;
MinTtem = H->Elements[1];
// printf("before MinTtem is %d \n",H->Elements[1]->Weight);
temp = H->Elements[H->Size--];
for(Parent =1;Parent*2 <= H->Size;Parent = Child){
Child = Parent*2;
if((Child != H->Size) && (H->Elements[Child]->Weight > H->Elements[Child+1]->Weight))
Child++;
if(temp->Weight <= H->Elements[Child]->Weight) break;
else
H->Elements[Parent] = H->Elements[Child];
}
H->Elements[Parent] = temp;
// printf("after MinTtem is %d \n",H->Elements[1]->Weight);
return MinTtem;
}
/*构建哈夫曼树*/
HuffmanTree BuildHuffman(MinHeap H)
{
HuffmanTree T;
int num = H->Size;
for(int i=1;i<num;i++)
{
T = CreatHuffman();
T->Left = DeleteMin(H);
T->Right = DeleteMin(H);
T->Weight = T->Left->Weight + T->Right->Weight;
Insert(H,T);
// printf("i=%d,H->Size=%d\n",i,H->Size);
}
T = DeleteMin(H);
// printf("root->Weight = %d\n",T->Weight);
return T;
}
/*根据哈夫曼树,计算最优编码长度并返回*/
int WPL(HuffmanTree root,int depth)
{
if((root->Left==NULL)&&(root->Right==NULL))
{
return depth*root->Weight;
}else{
return WPL(root->Left,depth+1)+WPL(root->Right,depth+1);
}
}
/*判断是否为最优编码*/
int Judge()
{
HuffmanTree T,p;
char ch1,*codes;
codes = (char *)malloc(sizeof(char)*MaxSize);
int length=0,flag=1,wgh,j;
T = CreatHuffman();
for(int i=0;i<N;i++)
{
scanf("\n%c %s",&ch1,codes);
if(strlen(codes)>=N)
flag = 0;
else{
for(j=0;ch1!=ch[j];j++); wgh = w[j];
p=T;
// if(T->Left) printf("T->Left not NULL\n");
// else printf("T->Left NULL\n");
for(j=0;j<strlen(codes);j++)
{
if(codes[j]=='0'){
if(!p->Left)
p->Left = CreatHuffman();
p=p->Left;
}else if(codes[j]=='1'){
if(!p->Right)
p->Right = CreatHuffman();
p=p->Right;
}
if(p->Weight) flag = 0;
}
if(p->Left||p->Right)
flag=0;
else p->Weight = wgh;
}
length += strlen(codes)* p->Weight;
}
if(length!=TotalCodes)
flag =0;
return flag;
}