tanszek:oktatas:techcomm:information
Differences
This shows you the differences between two versions of the page.
Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
tanszek:oktatas:techcomm:information [2025/10/06 20:40] – [Entropy] knehez | tanszek:oktatas:techcomm:information [2025/10/14 06:25] (current) – [Entropy] knehez | ||
---|---|---|---|
Line 45: | Line 45: | ||
The average information content of the set of messages is called the //entropy// of the message set. | The average information content of the set of messages is called the //entropy// of the message set. | ||
- | $$ H_E = \sum_{i=1}^n p_i \cdot I_{E_i} = \sum_{i=1}^n p_i \cdot \log_2 \frac{1}{p_i} = - \sum_{i=1}^n p_i \cdot \log_2 p_i$$ | + | $$ H_E = \sum_{i=1}^n p_i \cdot I_{E_i} = \sum_{i=1}^n p_i \cdot \log_2 \frac{1}{p_i} = - \sum_{i=1}^n p_i \cdot \log_2 p_i [bit]$$ |
**Example**: | **Example**: | ||
Line 124: | Line 124: | ||
#include < | #include < | ||
#include < | #include < | ||
+ | #include < | ||
- | float calculateEntropy(unsigned | + | float calculateEntropy(unsigned |
- | char sample[] = "Some poetry types are unique to particular cultures and genres and respond to yQ%v? | + | int main(void) { |
+ | const char sample[] = | ||
+ | | ||
+ | "and respond to yQ%v? | ||
+ | "poet writes. Readers accustomed to identifying poetry with Dante, | ||
+ | "Goethe, Mickiewicz, or Rumi may think of it as written in lines " | ||
+ | "based on rhyme and regular meter. There are, however, traditions, | ||
+ | "such as Biblical poetry and alliterative verse, that use other " | ||
+ | "means to create rhythm and euphony. Much modern poetry reflects | ||
+ | "a critique of poetic tradition, testing the principle of euphony | ||
+ | "itself or altogether forgoing rhyme or set rhythm."; | ||
- | int main() | ||
- | { | ||
- | unsigned int byteCounter[256]; | ||
const int windowWidth = 20; | const int windowWidth = 20; | ||
+ | unsigned char counts[256]; | ||
- | | + | int sampleLength |
- | { | + | |
- | memset(byteCounter, | + | |
- | char *p = & | + | for (int start = 0; start <= sampleLength - windowWidth; start++) { |
- | char *end = & | + | memset(counts, |
- | | + | |
- | | + | for (int j = 0; j < windowWidth; |
- | | + | |
+ | counts[c]++; | ||
} | } | ||
- | | + | |
- | printf(" | + | |
+ | printf(" | ||
} | } | ||
+ | return 0; | ||
} | } | ||
- | + | float calculateEntropy(unsigned | |
- | float calculateEntropy(unsigned | + | |
- | { | + | |
float entropy = 0.0f; | float entropy = 0.0f; | ||
- | + | | |
- | | + | if (counts[i] > 0) { |
- | | + | float freq = (float)counts[i] / length; |
- | if (bytes[i] != 0) | + | entropy |
- | | + | |
- | float freq = (float) | + | |
- | entropy | + | |
} | } | ||
} | } | ||
return entropy; | return entropy; | ||
} | } | ||
+ | |||
</ | </ | ||
tanszek/oktatas/techcomm/information.1759783246.txt.gz · Last modified: 2025/10/06 20:40 by knehez