Skip to content

Commit

Permalink
Add average word length stat
Browse files Browse the repository at this point in the history
  • Loading branch information
phoenixpereira committed May 30, 2024
1 parent 2a57d47 commit 2b65eb6
Show file tree
Hide file tree
Showing 6 changed files with 613 additions and 439 deletions.
287 changes: 178 additions & 109 deletions public/combined_analysis.json
Original file line number Diff line number Diff line change
@@ -1,135 +1,204 @@
{
"human": {
"lexical_diversity": {
"0.0": 2,
"0.15": 1,
"0.2": 5,
"0.25": 53,
"0.3": 132,
"0.35": 316,
"0.4": 673,
"0.45": 1214,
"0.5": 1765,
"0.55": 2854,
"0.6": 2141,
"0.65": 3508,
"0.7": 2312,
"0.75": 2731,
"0.8": 2322,
"0.85": 1955,
"0.9": 1752,
"0.95": 215,
"1.0": 1049
"0.0": 1,
"0.2": 9,
"0.25": 52,
"0.3": 130,
"0.35": 291,
"0.4": 684,
"0.45": 1203,
"0.5": 1817,
"0.55": 2824,
"0.6": 2203,
"0.65": 3438,
"0.7": 2367,
"0.75": 2649,
"0.8": 2351,
"0.85": 1981,
"0.9": 1774,
"0.95": 212,
"1.0": 1014
},
"average_word_count": {
"0": 66,
"5": 1254,
"10": 4131,
"15": 6855,
"20": 6072,
"25": 3398,
"30": 1545,
"35": 708,
"40": 350,
"45": 207,
"50": 120,
"55": 80,
"60": 46,
"65": 34,
"70": 31,
"75": 19,
"80": 17,
"85": 13,
"0": 72,
"5": 1286,
"10": 4055,
"15": 6807,
"20": 6142,
"25": 3384,
"30": 1527,
"35": 726,
"40": 362,
"45": 206,
"50": 128,
"55": 76,
"60": 51,
"65": 48,
"70": 25,
"75": 18,
"80": 11,
"85": 18,
"90": 9,
"95": 3,
"95": 8,
"100": 2
},
"average_word_length": {
"1.0": 1,
"1.2": 1,
"1.8": 3,
"2.0": 11,
"2.2": 20,
"2.4": 34,
"2.6": 107,
"2.8": 155,
"3.0": 433,
"3.2": 845,
"3.4": 1943,
"3.6": 3945,
"3.8": 4803,
"4.0": 5107,
"4.2": 3490,
"4.4": 2037,
"4.6": 967,
"4.8": 459,
"5.0": 289,
"5.2": 159,
"5.4": 90,
"5.6": 49,
"5.8": 13,
"6.0": 15,
"6.2": 5,
"6.4": 4,
"6.6": 5,
"6.8": 1,
"7.0": 3,
"7.2": 1,
"8.0": 1,
"8.2": 3,
"9.2": 1,
"10.0": 0
},
"flesch_kincaid_grade": {
"0": 214,
"1": 230,
"2": 400,
"3": 743,
"4": 1177,
"5": 1696,
"6": 2224,
"7": 2901,
"8": 2934,
"9": 2959,
"10": 2438,
"11": 1923,
"12": 1430,
"13": 1003,
"14": 688,
"15": 516,
"16": 352,
"17": 276,
"18": 896
"0": 238,
"1": 217,
"2": 416,
"3": 696,
"4": 1162,
"5": 1711,
"6": 2231,
"7": 2880,
"8": 2997,
"9": 2854,
"10": 2509,
"11": 1906,
"12": 1406,
"13": 991,
"14": 717,
"15": 512,
"16": 383,
"17": 248,
"18": 926
}
},
"ai": {
"lexical_diversity": {
"0.0": 17,
"0.2": 10,
"0.25": 73,
"0.3": 365,
"0.35": 1063,
"0.4": 3477,
"0.45": 6004,
"0.5": 6064,
"0.55": 4597,
"0.6": 1501,
"0.65": 1043,
"0.7": 231,
"0.75": 142,
"0.0": 18,
"0.2": 11,
"0.25": 70,
"0.3": 371,
"0.35": 1053,
"0.4": 3478,
"0.45": 5995,
"0.5": 6066,
"0.55": 4599,
"0.6": 1511,
"0.65": 1051,
"0.7": 228,
"0.75": 141,
"0.8": 95,
"0.85": 101,
"0.9": 119,
"0.95": 18,
"1.0": 80
"0.85": 98,
"0.9": 117,
"0.95": 17,
"1.0": 81
},
"average_word_count": {
"0": 48,
"5": 79,
"10": 267,
"15": 2574,
"20": 7048,
"25": 6406,
"30": 3356,
"35": 2049,
"40": 1263,
"45": 727,
"50": 397,
"55": 251,
"60": 153,
"65": 80,
"70": 65,
"75": 58,
"80": 40,
"85": 25,
"90": 18,
"5": 81,
"10": 266,
"15": 2569,
"20": 7003,
"25": 6443,
"30": 3362,
"35": 2015,
"40": 1274,
"45": 721,
"50": 414,
"55": 257,
"60": 160,
"65": 84,
"70": 64,
"75": 54,
"80": 41,
"85": 22,
"90": 21,
"95": 19,
"100": 3
},
"average_word_length": {
"0.0": 18,
"2.4": 1,
"2.6": 2,
"2.8": 9,
"3.0": 21,
"3.2": 105,
"3.4": 344,
"3.6": 1426,
"3.8": 2693,
"4.0": 4479,
"4.2": 5181,
"4.4": 4855,
"4.6": 3160,
"4.8": 1551,
"5.0": 728,
"5.2": 269,
"5.4": 83,
"5.6": 28,
"5.8": 18,
"6.0": 8,
"6.2": 4,
"6.4": 1,
"6.6": 1,
"6.8": 4,
"7.0": 1,
"7.2": 2,
"7.6": 2,
"7.8": 1,
"8.0": 1,
"8.4": 1,
"8.6": 1,
"10.0": 0
},
"flesch_kincaid_grade": {
"0": 19,
"1": 1,
"2": 13,
"3": 46,
"4": 86,
"5": 241,
"6": 483,
"7": 901,
"8": 1455,
"1": 2,
"2": 12,
"3": 44,
"4": 87,
"5": 235,
"6": 482,
"7": 906,
"8": 1458,
"9": 2055,
"10": 2563,
"11": 2763,
"12": 2655,
"13": 2287,
"14": 1966,
"15": 1522,
"16": 1224,
"17": 961,
"18": 3759
"10": 2534,
"11": 2772,
"12": 2636,
"13": 2268,
"14": 1989,
"15": 1542,
"16": 1234,
"17": 973,
"18": 3752
}
}
}
13 changes: 12 additions & 1 deletion src/analysis/analyse.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,14 @@ def total_syllables(text):
return sum(count_syllables(word) for word in words)


def average_word_length(text):
words = word_tokenize(text)
word_lengths = [len(word) for word in words]
if len(word_lengths) == 0:
return 0
return round(sum(word_lengths) / len(word_lengths), 2)


def average_word_count(word_count, sentence_count):
if sentence_count == 0:
return 0
Expand Down Expand Up @@ -62,9 +70,11 @@ def __init__(self, text):
self.word_count = len(self.words)
self.sentence_count = count_sentences(text)
self.syllable_count = total_syllables(text)
self.average_word_length = average_word_length(text)

def analyze(self):
return {
"average_word_length": self.average_word_length,
"average_word_count": average_word_count(self.word_count, self.sentence_count),
"lexical_diversity": lexical_diversity(self.words),
"flesch_kincaid_grade": flesch_kincaid_grade(self.word_count, self.sentence_count, self.syllable_count)
Expand All @@ -91,7 +101,7 @@ def create_sorted_frequency_ranges(stats, min_val, max_val, range_size):

def process_responses(responses, desc):
analyses = {"lexical_diversity": [],
"average_word_count": [], "flesch_kincaid_grade": []}
"average_word_count": [], "average_word_length": [], "flesch_kincaid_grade": []}
for answer in tqdm(responses, desc=desc):
analysis = TextAnalysis(answer).analyze()
for key, value in analysis.items():
Expand All @@ -107,6 +117,7 @@ def main():
"range_settings": {
"lexical_diversity": {"min_val": 0, "max_val": 1.05, "range_size": 0.05},
"average_word_count": {"min_val": 0, "max_val": 100, "range_size": 5},
"average_word_length": {"min_val": 0, "max_val": 10, "range_size": 0.2},
"flesch_kincaid_grade": {"min_val": 0, "max_val": 18, "range_size": 1},
}
}
Expand Down
Loading

0 comments on commit 2b65eb6

Please sign in to comment.