import re from collections import Counter """ For English texts: Flesch Reading Ease score Flesch-Kincaid Grade Level Gunning Fog Index For Italian texts: Flesch Reading Ease (adapted for Italian with Flesch-Vacca formula) Gulpease Index (specifically designed for Italian) Gunning Fog Index Basic statistics for both: Sentence count Word count Syllable count Complex words (3+ syllables) Average words per sentence Average syllables per word """ class ReadabilityAnalyzer: """Analyze text readability for English and Italian""" def __init__(self, text, language='en'): self.text = text self.language = language.lower() self.sentences = self._count_sentences() self.words = self._count_words() self.syllables = self._count_syllables() self.complex_words = self._count_complex_words() self.characters = len(re.sub(r'\s', '', text)) def _count_sentences(self): """Count sentences in text""" sentences = re.split(r'[.!?]+', self.text) return len([s for s in sentences if s.strip()]) def _count_words(self): """Count words in text""" words = re.findall(r'\b[a-zA-ZàèéìòùÀÈÉÌÒÙáíóúýÁÍÓÚÝâêîôûÂÊÎÔÛäëïöüÄËÏÖÜ]+\b', self.text) return len(words) def _count_syllables(self): """Count syllables in text (approximation for both languages)""" words = re.findall(r'\b[a-zA-ZàèéìòùÀÈÉÌÒÙáíóúýÁÍÓÚÝâêîôûÂÊÎÔÛäëïöüÄËÏÖÜ]+\b', self.text.lower()) total_syllables = 0 for word in words: if self.language == 'it': syllables = self._count_syllables_italian(word) else: syllables = self._count_syllables_english(word) total_syllables += syllables return total_syllables def _count_syllables_english(self, word): """Count syllables in English word""" word = word.lower() vowels = 'aeiouy' syllables = 0 previous_was_vowel = False for char in word: is_vowel = char in vowels if is_vowel and not previous_was_vowel: syllables += 1 previous_was_vowel = is_vowel # Adjust for silent e if word.endswith('e'): syllables -= 1 # Ensure at least 1 syllable if syllables == 0: syllables = 1 return syllables def _count_syllables_italian(self, word): """Count syllables in Italian word""" word = word.lower() vowels = 'aeiouàèéìòùáíóúý' syllables = 0 previous_was_vowel = False for char in word: is_vowel = char in vowels if is_vowel and not previous_was_vowel: syllables += 1 previous_was_vowel = is_vowel # Ensure at least 1 syllable if syllables == 0: syllables = 1 return syllables def _count_complex_words(self): """Count words with 3+ syllables""" words = re.findall(r'\b[a-zA-ZàèéìòùÀÈÉÌÒÙáíóúýÁÍÓÚÝâêîôûÂÊÎÔÛäëïöüÄËÏÖÜ]+\b', self.text.lower()) complex_count = 0 for word in words: if self.language == 'it': syllables = self._count_syllables_italian(word) else: syllables = self._count_syllables_english(word) if syllables >= 3: complex_count += 1 return complex_count def flesch_reading_ease(self): """Calculate Flesch Reading Ease score""" if self.words == 0 or self.sentences == 0: return 0 if self.language == 'it': # Flesch-Vacca formula for Italian score = 206.835 - 1.3 * (self.words / self.sentences) - 60.1 * (self.syllables / self.words) else: # Standard Flesch formula for English score = 206.835 - 1.015 * (self.words / self.sentences) - 84.6 * (self.syllables / self.words) return round(score, 2) def flesch_kincaid_grade(self): """Calculate Flesch-Kincaid Grade Level (primarily for English)""" if self.words == 0 or self.sentences == 0: return 0 grade = 0.39 * (self.words / self.sentences) + 11.8 * (self.syllables / self.words) - 15.59 return round(grade, 2) def gunning_fog_index(self): """Calculate Gunning Fog Index""" if self.words == 0 or self.sentences == 0: return 0 fog = 0.4 * ((self.words / self.sentences) + 100 * (self.complex_words / self.words)) return round(fog, 2) def gulpease_index(self): """Calculate Gulpease Index (for Italian)""" if self.words == 0: return 0 gulpease = 89 - (self.characters / self.words * 10) + (self.sentences / self.words * 300) return round(gulpease, 2) def get_all_scores(self): """Get all readability scores""" scores = { 'basic_stats': { 'sentences': self.sentences, 'words': self.words, 'syllables': self.syllables, 'complex_words': self.complex_words, 'characters': self.characters, 'avg_words_per_sentence': round(self.words / self.sentences, 2) if self.sentences > 0 else 0, 'avg_syllables_per_word': round(self.syllables / self.words, 2) if self.words > 0 else 0 }, 'readability_scores': {} } # Add appropriate scores based on language if self.language == 'it': scores['readability_scores']['flesch_reading_ease_it'] = self.flesch_reading_ease() scores['readability_scores']['gulpease_index'] = self.gulpease_index() scores['readability_scores']['gunning_fog_index'] = self.gunning_fog_index() else: scores['readability_scores']['flesch_reading_ease'] = self.flesch_reading_ease() scores['readability_scores']['flesch_kincaid_grade'] = self.flesch_kincaid_grade() scores['readability_scores']['gunning_fog_index'] = self.gunning_fog_index() return scores def interpret_scores(self): """Provide interpretation of readability scores""" scores = self.get_all_scores() interpretation = [] if self.language == 'it': # Flesch Reading Ease (Italian) fre = scores['readability_scores']['flesch_reading_ease_it'] if fre >= 80: interpretation.append(f"Flesch Reading Ease (IT): {fre} - Molto facile (Very easy)") elif fre >= 60: interpretation.append(f"Flesch Reading Ease (IT): {fre} - Facile (Easy)") elif fre >= 50: interpretation.append(f"Flesch Reading Ease (IT): {fre} - Abbastanza facile (Fairly easy)") elif fre >= 40: interpretation.append(f"Flesch Reading Ease (IT): {fre} - Normale (Normal)") elif fre >= 30: interpretation.append(f"Flesch Reading Ease (IT): {fre} - Abbastanza difficile (Fairly difficult)") else: interpretation.append(f"Flesch Reading Ease (IT): {fre} - Difficile (Difficult)") # Gulpease Index gulpease = scores['readability_scores']['gulpease_index'] if gulpease >= 80: interpretation.append(f"Gulpease Index: {gulpease} - Elementare (Elementary school)") elif gulpease >= 60: interpretation.append(f"Gulpease Index: {gulpease} - Media inferiore (Middle school)") elif gulpease >= 40: interpretation.append(f"Gulpease Index: {gulpease} - Media superiore (High school)") else: interpretation.append(f"Gulpease Index: {gulpease} - Universitario (University)") else: # Flesch Reading Ease (English) fre = scores['readability_scores']['flesch_reading_ease'] if fre >= 90: interpretation.append(f"Flesch Reading Ease: {fre} - Very easy (5th grade)") elif fre >= 80: interpretation.append(f"Flesch Reading Ease: {fre} - Easy (6th grade)") elif fre >= 70: interpretation.append(f"Flesch Reading Ease: {fre} - Fairly easy (7th grade)") elif fre >= 60: interpretation.append(f"Flesch Reading Ease: {fre} - Standard (8th-9th grade)") elif fre >= 50: interpretation.append(f"Flesch Reading Ease: {fre} - Fairly difficult (10th-12th grade)") elif fre >= 30: interpretation.append(f"Flesch Reading Ease: {fre} - Difficult (College)") else: interpretation.append(f"Flesch Reading Ease: {fre} - Very difficult (College graduate)") # Flesch-Kincaid Grade fkg = scores['readability_scores']['flesch_kincaid_grade'] interpretation.append(f"Flesch-Kincaid Grade: {fkg} (US grade level)") # Gunning Fog Index (both languages) fog = scores['readability_scores']['gunning_fog_index'] interpretation.append(f"Gunning Fog Index: {fog} (years of education needed)") return '\n'.join(interpretation) # Example usage if __name__ == "__main__": # English example english_text = """ The quick brown fox jumps over the lazy dog. This is a simple sentence. However, more complicated sentences with multisyllabic words can significantly increase the complexity of the text and make it harder to read. """ print("=== ENGLISH TEXT ANALYSIS ===") analyzer_en = ReadabilityAnalyzer(english_text, language='en') scores_en = analyzer_en.get_all_scores() print("\nBasic Statistics:") for key, value in scores_en['basic_stats'].items(): print(f" {key}: {value}") print("\nReadability Scores:") for key, value in scores_en['readability_scores'].items(): print(f" {key}: {value}") print("\nInterpretation:") print(analyzer_en.interpret_scores()) # Italian example italian_text = """ Il veloce cane marrone salta sopra il cane pigro. Questa è una frase semplice. Tuttavia, frasi più complicate con parole polisillabiche possono aumentare significativamente la complessità del testo e renderlo più difficile da leggere. """ print("\n\n=== ITALIAN TEXT ANALYSIS ===") analyzer_it = ReadabilityAnalyzer(italian_text, language='it') scores_it = analyzer_it.get_all_scores() print("\nBasic Statistics:") for key, value in scores_it['basic_stats'].items(): print(f" {key}: {value}") print("\nReadability Scores:") for key, value in scores_it['readability_scores'].items(): print(f" {key}: {value}") print("\nInterpretation:") print(analyzer_it.interpret_scores())