User Tools

Site Tools


tanszek:oktatas:techcomm:information

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
Next revision
Previous revision
tanszek:oktatas:techcomm:information [2025/10/06 20:40] – [Entropy] kneheztanszek:oktatas:techcomm:information [2025/10/14 06:25] (current) – [Entropy] knehez
Line 45: Line 45:
 The average information content of the set of messages is called the //entropy// of the message set. The average information content of the set of messages is called the //entropy// of the message set.
  
-$$ H_E = \sum_{i=1}^n p_i \cdot I_{E_i} = \sum_{i=1}^n p_i \cdot \log_2 \frac{1}{p_i} = - \sum_{i=1}^n p_i \cdot \log_2 p_i$$+$$ H_E = \sum_{i=1}^n p_i \cdot I_{E_i} = \sum_{i=1}^n p_i \cdot \log_2 \frac{1}{p_i} = - \sum_{i=1}^n p_i \cdot \log_2 p_i  [bit]$$
  
 **Example**: Given an event space consisting of two events: \( E = \{E_1, E_2\} \), and further \( p = \{p_1, p_2\} \) with \( p_2 = 1 - p_1 \), then the average information content is: **Example**: Given an event space consisting of two events: \( E = \{E_1, E_2\} \), and further \( p = \{p_1, p_2\} \) with \( p_2 = 1 - p_1 \), then the average information content is:
Line 124: Line 124:
 #include <stdio.h> #include <stdio.h>
 #include <math.h> #include <math.h>
 +#include <string.h>
  
-float calculateEntropy(unsigned int bytes[], int length);+float calculateEntropy(unsigned char counts[], int length);
  
-char sample[] = "Some poetry types are unique to particular cultures and  genres and respond to yQ%v?FY}ZT=/5cJ.m~A{9^8Lse characteristics of the language in which the poet writes. Readers accustomed to identifying poetry with Dante, Goethe, Mickiewicz, or Rumi may think of it as written in lines based on rhyme and regular meter. There are, however, traditions, such as Biblical poetry and alliterative verse, that use other means to create rhythm and euphony. Much modern poetry reflects a critique of poetic tradition,[6] testing the principle of euphony itself or altogether forgoing rhyme or set rhythm";+int main(void) { 
 +    const char sample[] = 
 +        "Some poetry types are unique to particular cultures and genres 
 +        "and respond to yQ%v?FY}ZT=/5cJ.m~A{9^8L the characteristics of the language in which the 
 +        "poet writes. Readers accustomed to identifying poetry with Dante, 
 +        "Goethe, Mickiewicz, or Rumi may think of it as written in lines 
 +        "based on rhyme and regular meter. There are, however, traditions, 
 +        "such as Biblical poetry and alliterative verse, that use other 
 +        "means to create rhythm and euphony. Much modern poetry reflects 
 +        "a critique of poetic tradition, testing the principle of euphony 
 +        "itself or altogether forgoing rhyme or set rhythm.";
  
-int main() 
-{ 
-    unsigned int byteCounter[256]; 
     const int windowWidth = 20;     const int windowWidth = 20;
 +    unsigned char counts[256];
  
-    for(int 0; i < sizeof(sample) - windowWidth; i++) +    int sampleLength strlen(sample);
-    { +
-        memset(byteCounter, 0, sizeof(unsigned int) * 256);+
  
-        char *p &sample[i]; +    for (int start 0start <sampleLength - windowWidth; start++) { 
-        char *end &sample[i + windowWidth];+        memset(counts, 0, sizeof(counts));
  
-        while(p !end) +        // Count characters in current window 
-        +        for (int j 0; j < windowWidth; j++) { 
-            byteCounter[(unsigned char)(*p++)]++;+            unsigned char c = (unsigned char)sample[start j]; 
 +            counts[c]++;
         }         }
-        float entropy = calculateEntropy(byteCounter, windowWidth); + 
-        printf("%d - %.3f\n", i, entropy);+        float entropy = calculateEntropy(counts, windowWidth); 
 +        printf("%d - %.3f\n", start, entropy);
     }     }
  
 +    return 0;
 } }
  
- +float calculateEntropy(unsigned char counts[], int length) {
-float calculateEntropy(unsigned int bytes[], int length) +
-{+
     float entropy = 0.0f;     float entropy = 0.0f;
- +    for (int i = 0; i < 256; i++) { 
-    for (int i = 0; i < 256; i++) +        if (counts[i] 0) { 
-    +            float freq = (float)counts[i] / length; 
-        if (bytes[i] != 0) +            entropy -= freq * log2f(freq);
-        +
-            float freq = (float) bytes[i] / (float) length; +
-            entropy +-freq * log2f(freq);+
         }         }
     }     }
     return entropy;     return entropy;
 } }
 +
 </sxh> </sxh>
  
tanszek/oktatas/techcomm/information.1759783246.txt.gz · Last modified: 2025/10/06 20:40 by knehez