From 725972fb471c3485bee0a9c9cd538167f1dbc734 Mon Sep 17 00:00:00 2001 From: immortal71 Date: Thu, 1 Jan 2026 20:34:16 -0800 Subject: [PATCH 1/3] feat: Added translation tag checker for issue #1102 --> Add check_translations.py script to detect missing, untranslated, and empty T0xxx tags --> Added comprehensive pytest tests for translation validation --> Updatd run-tests-generate-output.yaml to run checker and include report in PR comments --> Updated pre-release.yml to include translation report in release body --> Resolved missing tag detection as requested in #1102 --- .github/workflows/pre-release.yml | 25 ++ .../workflows/run-tests-generate-output.yaml | 43 +++- scripts/check_translations.py | 230 ++++++++++++++++++ tests/scripts/test_translation_tags.py | 124 ++++++++++ 4 files changed, 416 insertions(+), 6 deletions(-) create mode 100644 scripts/check_translations.py create mode 100644 tests/scripts/test_translation_tags.py diff --git a/.github/workflows/pre-release.yml b/.github/workflows/pre-release.yml index ef483e020..598a0f672 100644 --- a/.github/workflows/pre-release.yml +++ b/.github/workflows/pre-release.yml @@ -46,6 +46,15 @@ with: token: ${{ secrets.QLTY_COVERAGE_TOKEN }} files: coverage.xml + # Check translation tags + - name: Check translation tags + id: translation_check + run: | + pipenv run python scripts/check_translations.py > translation_check_report.md || echo "Translation issues found, continuing..." + # Read the report content and save it as an output + echo "TRANSLATION_REPORT<> $GITHUB_ENV + cat translation_check_report.md >> $GITHUB_ENV + echo "EOF" >> $GITHUB_ENV - name: Generate new output files run: | # @@ -112,12 +121,28 @@ cp output/owasp_cornucopia_webapp_3.0_cards_bridge_en.idml output/owasp_cornucopia_webapp_3.0_cards_bridge_qr_en.idml output/owasp_cornucopia_webapp_3.0_cards_tarot_en.idml output/owasp_cornucopia_webapp_3.0_cards_tarot_qr_en.idml output/owasp_cornucopia_webapp_3.0_leaflet_bridge_en.idml output/owasp_cornucopia_webapp_3.0_leaflet_tarot_en.idml output/cornucopia_webapp/ zip -r output/owasp_cornucopia_webapp_3.0_en.zip output/cornucopia_webapp/Links/* output/cornucopia_webapp/Fonts/* output/cornucopia_webapp/owasp_cornucopia_webapp_3.0_cards_bridge_en.idml output/cornucopia_webapp/owasp_cornucopia_webapp_3.0_cards_bridge_qr_en.idml output/cornucopia_webapp/owasp_cornucopia_webapp_3.0_cards_tarot_en.idml output/cornucopia_webapp/owasp_cornucopia_webapp_3.0_cards_tarot_qr_en.idml output/cornucopia_webapp/owasp_cornucopia_webapp_3.0_leaflet_bridge_en.idml output/cornucopia_webapp/owasp_cornucopia_webapp_3.0_leaflet_tarot_en.idml ./resources/templates/owasp_cornucopia_webapp_scoresheet.pdf + - name: Prepare release body with translation report + id: prepare_release + run: | + # Read the translation report + TRANSLATION_REPORT=$(cat translation_check_report.md) + # Create a combined release body + cat > release_body.md << 'EOF' + ## OWASP Cornucopia Pre-Release + + This is an automated pre-release build from the latest master branch. + + --- + + EOF + cat translation_check_report.md >> release_body.md - uses: softprops/action-gh-release@a06a81a03ee405af7f2048a818ed3f03bbf83c7b # v2.5.0 name: "Create pre-release" with: tag_name: pre-release prerelease: true name: Latest pre-release + body_path: release_body.md files: | CHANGELOG.md LICENSE.md diff --git a/.github/workflows/run-tests-generate-output.yaml b/.github/workflows/run-tests-generate-output.yaml index 9d708ffee..bb3c96174 100644 --- a/.github/workflows/run-tests-generate-output.yaml +++ b/.github/workflows/run-tests-generate-output.yaml @@ -58,6 +58,16 @@ jobs: run: | pip install -r requirements.txt --require-hashes pipenv install -d + - name: Check translation tags + run: | + pipenv run python scripts/check_translations.py > translation_check_report.md || echo "Translation issues found, continuing..." + - name: Upload translation check report + if: always() + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + retention-days: 5 + name: translation-check-report.${{ github.sha }}.md + path: translation_check_report.md - name: Generate new output files run: | # @@ -146,23 +156,44 @@ jobs: contents: read needs: uploadoutputfiles steps: + - name: Checkout repository + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + ref: ${{ github.event.pull_request.head.ref }} + - name: Download translation check report + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + name: translation-check-report.${{ github.sha }}.md + path: . - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 env: PR_NUMBER: ${{ github.event.number }} - PR_NOTES: | - [badge]: https://img.shields.io/badge/Build-Success!-3fb950?logo=github&style=for-the-badge + ARTIFACT_URL: ${{needs.uploadoutputfiles.outputs.artifact-url}} + with: + script: | + const fs = require('fs'); + let translationReport = ''; + try { + translationReport = fs.readFileSync('translation_check_report.md', 'utf8'); + } catch (error) { + translationReport = 'Translation check report not found.'; + } + + const prNotes = `[badge]: https://img.shields.io/badge/Build-Success!-3fb950?logo=github&style=for-the-badge ## Build artifacts: | Name | Link | |------|------| - | Output files | [cornucopia-build-files.${{ github.sha }}.zip](${{needs.uploadoutputfiles.outputs.artifact-url}}) | + | Output files | [cornucopia-build-files.${{ github.sha }}.zip](${process.env.ARTIFACT_URL}) | + + --- + + ${translationReport}`; - with: - script: | github.rest.issues.createComment({ issue_number: process.env.PR_NUMBER, owner: context.repo.owner, repo: context.repo.repo, - body: process.env.PR_NOTES + body: prNotes }) diff --git a/scripts/check_translations.py b/scripts/check_translations.py new file mode 100644 index 000000000..0154992c1 --- /dev/null +++ b/scripts/check_translations.py @@ -0,0 +1,230 @@ +""" +Translation Tag Checker for OWASP Cornucopia + +This script checks that translation files have the same T0xxx tags as the English version. +It detects: +- Missing tags in translations +- Untranslated tags (text identical to English) +- Empty tag values +""" + +import os +import sys +import yaml +from pathlib import Path +from typing import Dict, List, Set, Tuple +from collections import defaultdict + + +class TranslationChecker: + """Check translations for missing, untranslated, or empty tags.""" + + def __init__(self, source_dir: Path): + self.source_dir = source_dir + self.results = defaultdict(lambda: defaultdict(dict)) + + def extract_tags(self, yaml_file: Path) -> Dict[str, str]: + """Extract T0xxx tags and their text from a YAML file.""" + tags = {} + try: + with open(yaml_file, 'r', encoding='utf-8') as f: + data = yaml.safe_load(f) + + # Check if data has common_ids section + if data and 'common_ids' in data: + for item in data['common_ids']: + tag_id = item.get('id', '') + if tag_id.startswith('T0'): + tags[tag_id] = item.get('text', '') + + except Exception as e: + print(f"Error reading {yaml_file}: {e}", file=sys.stderr) + + return tags + + def get_file_groups(self) -> Dict[str, List[Path]]: + """Group YAML files by their base name (e.g., webapp-cards-2.2).""" + file_groups = defaultdict(list) + + for yaml_file in self.source_dir.glob('*-*.yaml'): + # Skip archived files + if 'archive' in str(yaml_file): + continue + + # Extract base name and language + # Format: {edition}-{component}-{version}-{lang}.yaml + parts = yaml_file.stem.split('-') + if len(parts) >= 3: + # Find language code (usually last part or second to last) + lang = parts[-1] + base_name = '-'.join(parts[:-1]) + + # Only process card files with language codes + if 'cards' in base_name and len(lang) == 2: + file_groups[base_name].append(yaml_file) + + return file_groups + + def check_translations(self) -> Dict[str, Dict[str, Dict[str, List[str]]]]: + """ + Check all translation files against English versions. + + Returns: + Dict with structure: + { + 'base_name': { + 'language': { + 'missing': ['T00145', ...], + 'untranslated': ['T00100', ...], + 'empty': ['T00200', ...] + } + } + } + """ + file_groups = self.get_file_groups() + + for base_name, files in file_groups.items(): + # Find English reference file + english_file = None + translation_files = [] + + for f in files: + lang = f.stem.split('-')[-1] + if lang == 'en': + english_file = f + else: + translation_files.append(f) + + if not english_file: + print(f"Warning: No English file found for {base_name}", file=sys.stderr) + continue + + # Extract English tags + english_tags = self.extract_tags(english_file) + + if not english_tags: + continue + + # Check each translation + for trans_file in translation_files: + lang = trans_file.stem.split('-')[-1] + trans_tags = self.extract_tags(trans_file) + + # Find missing tags + missing = [] + untranslated = [] + empty = [] + + for tag_id, eng_text in english_tags.items(): + if tag_id not in trans_tags: + missing.append(tag_id) + elif not trans_tags[tag_id]: + empty.append(tag_id) + elif trans_tags[tag_id] == eng_text: + untranslated.append(tag_id) + + # Store results + if missing or untranslated or empty: + self.results[base_name][lang] = { + 'missing': sorted(missing), + 'untranslated': sorted(untranslated), + 'empty': sorted(empty), + 'file': str(trans_file.name) + } + + return dict(self.results) + + def generate_markdown_report(self) -> str: + """Generate a Markdown report of translation issues.""" + report_lines = [] + + if not self.results: + report_lines.append("# Translation Check Report\n") + report_lines.append("✅ All translations have the same tags as the English version.\n") + return '\n'.join(report_lines) + + report_lines.append("# Translation Check Report\n") + report_lines.append("The following sentences/tags have issues in the translations:\n") + + # Language name mapping + lang_names = { + 'es': 'Spanish', + 'fr': 'French', + 'hu': 'Hungarian', + 'it': 'Italian', + 'nl': 'Dutch', + 'no_nb': 'Norwegian', + 'pt_br': 'Portuguese (Brazil)', + 'pt_pt': 'Portuguese (Portugal)', + 'ru': 'Russian' + } + + for base_name in sorted(self.results.keys()): + languages = self.results[base_name] + + for lang in sorted(languages.keys()): + lang_name = lang_names.get(lang, lang.upper()) + issues = languages[lang] + filename = issues.get('file', '') + + report_lines.append(f"\n## {lang_name}\n") + report_lines.append(f"**File:** `{filename}`\n") + + if issues['missing']: + report_lines.append("### Missing Tags\n") + report_lines.append("The following tags are present in the English version but missing in this translation:\n") + tags_str = ', '.join(issues['missing']) + report_lines.append(f"{tags_str}\n") + + if issues['untranslated']: + report_lines.append("### Untranslated Tags\n") + report_lines.append("The following tags have identical text to English (not translated):\n") + tags_str = ', '.join(issues['untranslated']) + report_lines.append(f"{tags_str}\n") + + if issues['empty']: + report_lines.append("### Empty Tags\n") + report_lines.append("The following tags are empty:\n") + tags_str = ', '.join(issues['empty']) + report_lines.append(f"{tags_str}\n") + + return '\n'.join(report_lines) + + +def main(): + """Main entry point for the translation checker.""" + # Determine source directory + script_dir = Path(__file__).parent + base_dir = script_dir.parent + source_dir = base_dir / 'source' + + if not source_dir.exists(): + print(f"Error: Source directory not found: {source_dir}", file=sys.stderr) + sys.exit(1) + + # Run checker + checker = TranslationChecker(source_dir) + results = checker.check_translations() + + # Generate report + report = checker.generate_markdown_report() + + # Output report + print(report) + + # Write to file + output_file = base_dir / 'translation_check_report.md' + with open(output_file, 'w', encoding='utf-8') as f: + f.write(report) + + print(f"\n---\nReport written to: {output_file}", file=sys.stderr) + + # Exit with error code if issues found + if results: + sys.exit(1) + else: + sys.exit(0) + + +if __name__ == '__main__': + main() diff --git a/tests/scripts/test_translation_tags.py b/tests/scripts/test_translation_tags.py new file mode 100644 index 000000000..e7447d1e5 --- /dev/null +++ b/tests/scripts/test_translation_tags.py @@ -0,0 +1,124 @@ +""" +Integration tests for translation tag checking. + +Tests that all translations have the same T0xxx tags as the English version. +""" + +import unittest +import os +from pathlib import Path +import sys + +# Add scripts directory to path +scripts_path = Path(__file__).parent.parent.parent / 'scripts' +sys.path.insert(0, str(scripts_path)) + +from check_translations import TranslationChecker + + +class TestTranslationTags(unittest.TestCase): + """Test that translations have the same tags as English versions.""" + + def setUp(self): + """Set up test fixtures.""" + # Navigate up from tests/scripts to cornucopia root + self.base_path = Path(__file__).parent.parent.parent + self.source_dir = self.base_path / 'source' + self.checker = TranslationChecker(self.source_dir) + + def test_source_directory_exists(self): + """Test that the source directory exists.""" + self.assertTrue( + self.source_dir.exists(), + f"Source directory not found: {self.source_dir}" + ) + + def test_english_files_exist(self): + """Test that English card files exist.""" + english_files = list(self.source_dir.glob('*-cards-*-en.yaml')) + self.assertGreater( + len(english_files), 0, + "No English card files found in source directory" + ) + + def test_translations_have_all_tags(self): + """ + Test that all translations have the same T0xxx tags as English. + + This test will fail if: + - Tags are missing in translations + - Tags are untranslated (identical to English) + - Tags are empty + """ + results = self.checker.check_translations() + + if results: + # Generate detailed report + report = self.checker.generate_markdown_report() + + # Count total issues + total_issues = 0 + for base_name, languages in results.items(): + for lang, issues in languages.items(): + total_issues += len(issues.get('missing', [])) + total_issues += len(issues.get('untranslated', [])) + total_issues += len(issues.get('empty', [])) + + self.fail( + f"\n\nTranslation issues found ({total_issues} total):\n\n{report}\n" + ) + + def test_no_duplicate_tags_in_english(self): + """Test that English files don't have duplicate T0xxx tags.""" + english_files = list(self.source_dir.glob('*-cards-*-en.yaml')) + + for eng_file in english_files: + tags = self.checker.extract_tags(eng_file) + # Extract_tags returns a dict, so duplicates would be overwritten + # We need to check the raw file for duplicates + import yaml + with open(eng_file, 'r', encoding='utf-8') as f: + data = yaml.safe_load(f) + + if data and 'common_ids' in data: + seen_ids = set() + duplicates = [] + + for item in data['common_ids']: + tag_id = item.get('id', '') + if tag_id.startswith('T0'): + if tag_id in seen_ids: + duplicates.append(tag_id) + seen_ids.add(tag_id) + + self.assertEqual( + len(duplicates), 0, + f"Duplicate tags found in {eng_file.name}: {duplicates}" + ) + + def test_tag_format(self): + """Test that tags follow the T0xxxx format.""" + import re + tag_pattern = re.compile(r'^T0\d{4,5}$') + + english_files = list(self.source_dir.glob('*-cards-*-en.yaml')) + + for eng_file in english_files: + tags = self.checker.extract_tags(eng_file) + + for tag_id in tags.keys(): + self.assertIsNotNone( + tag_pattern.match(tag_id), + f"Tag {tag_id} in {eng_file.name} doesn't match format T0xxxx" + ) + + def test_generate_markdown_report(self): + """Test that markdown report generation works.""" + report = self.checker.generate_markdown_report() + + self.assertIsInstance(report, str) + self.assertIn("Translation Check Report", report) + + +if __name__ == '__main__': + unittest.main() From d6b3263539683e422e853926610871f802d6d502 Mon Sep 17 00:00:00 2001 From: immortal71 Date: Thu, 1 Jan 2026 21:02:00 -0800 Subject: [PATCH 2/3] fixed: Address Copilot review feedback - fix language codes and remove unused imports --- scripts/check_translations.py | 9 ++++----- tests/scripts/test_translation_tags.py | 1 - 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/scripts/check_translations.py b/scripts/check_translations.py index 0154992c1..e05ebb4c9 100644 --- a/scripts/check_translations.py +++ b/scripts/check_translations.py @@ -8,11 +8,10 @@ - Empty tag values """ -import os import sys import yaml from pathlib import Path -from typing import Dict, List, Set, Tuple +from typing import Dict, List from collections import defaultdict @@ -153,9 +152,9 @@ def generate_markdown_report(self) -> str: 'hu': 'Hungarian', 'it': 'Italian', 'nl': 'Dutch', - 'no_nb': 'Norwegian', - 'pt_br': 'Portuguese (Brazil)', - 'pt_pt': 'Portuguese (Portugal)', + 'no-nb': 'Norwegian', + 'pt-br': 'Portuguese (Brazil)', + 'pt-pt': 'Portuguese (Portugal)', 'ru': 'Russian' } diff --git a/tests/scripts/test_translation_tags.py b/tests/scripts/test_translation_tags.py index e7447d1e5..f2fab4b18 100644 --- a/tests/scripts/test_translation_tags.py +++ b/tests/scripts/test_translation_tags.py @@ -5,7 +5,6 @@ """ import unittest -import os from pathlib import Path import sys From 360350396385df0bef38eb92ccbc19f7bf32d13b Mon Sep 17 00:00:00 2001 From: immortal71 Date: Sat, 31 Jan 2026 03:38:28 -0800 Subject: [PATCH 3/3] fix: Addressed all review feedback - move inline imports to top, verify mock files work correctly --- .../workflows/run-tests-generate-output.yaml | 2 +- scripts/check_translations.py | 18 +-- .../check_translations_itest.py | 35 +++-- scripts/check_translations_utest.py | 131 ++++++++++++++++++ .../test_files/source/test-cards-1.0-en.yaml | 32 +++++ .../test_files/source/test-cards-1.0-es.yaml | 29 ++++ 6 files changed, 220 insertions(+), 27 deletions(-) rename tests/scripts/test_translation_tags.py => scripts/check_translations_itest.py (78%) create mode 100644 scripts/check_translations_utest.py create mode 100644 tests/test_files/source/test-cards-1.0-en.yaml create mode 100644 tests/test_files/source/test-cards-1.0-es.yaml diff --git a/.github/workflows/run-tests-generate-output.yaml b/.github/workflows/run-tests-generate-output.yaml index e0656d3f9..374a6f6bf 100644 --- a/.github/workflows/run-tests-generate-output.yaml +++ b/.github/workflows/run-tests-generate-output.yaml @@ -47,7 +47,7 @@ jobs: pipenv install -d - name: Check translation tags run: | - pipenv run python scripts/check_translations.py > translation_check_report.md || echo "Translation issues found, continuing..." + pipenv run python scripts/check_translations.py || echo "Translation issues found, continuing..." - name: Upload translation check report if: always() uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 diff --git a/scripts/check_translations.py b/scripts/check_translations.py index e05ebb4c9..72545fe08 100644 --- a/scripts/check_translations.py +++ b/scripts/check_translations.py @@ -29,12 +29,14 @@ def extract_tags(self, yaml_file: Path) -> Dict[str, str]: with open(yaml_file, 'r', encoding='utf-8') as f: data = yaml.safe_load(f) - # Check if data has common_ids section - if data and 'common_ids' in data: - for item in data['common_ids']: - tag_id = item.get('id', '') - if tag_id.startswith('T0'): - tags[tag_id] = item.get('text', '') + # Extract tags from paragraphs.sentences + if data and 'paragraphs' in data: + for paragraph in data['paragraphs']: + if 'sentences' in paragraph: + for sentence in paragraph['sentences']: + tag_id = sentence.get('id', '') + if tag_id.startswith('T0'): + tags[tag_id] = sentence.get('text', '') except Exception as e: print(f"Error reading {yaml_file}: {e}", file=sys.stderr) @@ -139,7 +141,7 @@ def generate_markdown_report(self) -> str: if not self.results: report_lines.append("# Translation Check Report\n") - report_lines.append("✅ All translations have the same tags as the English version.\n") + report_lines.append("✅ All existing translations have been completed.\n") return '\n'.join(report_lines) report_lines.append("# Translation Check Report\n") @@ -162,7 +164,7 @@ def generate_markdown_report(self) -> str: languages = self.results[base_name] for lang in sorted(languages.keys()): - lang_name = lang_names.get(lang, lang.upper()) + lang_name = lang_names.get(lang, lang) issues = languages[lang] filename = issues.get('file', '') diff --git a/tests/scripts/test_translation_tags.py b/scripts/check_translations_itest.py similarity index 78% rename from tests/scripts/test_translation_tags.py rename to scripts/check_translations_itest.py index f2fab4b18..2364fef45 100644 --- a/tests/scripts/test_translation_tags.py +++ b/scripts/check_translations_itest.py @@ -1,10 +1,12 @@ """ Integration tests for translation tag checking. -Tests that all translations have the same T0xxx tags as the English version. +Tests that all translations in the actual source directory have the same T0xxx tags as the English version. """ import unittest +import yaml +import re from pathlib import Path import sys @@ -15,12 +17,12 @@ from check_translations import TranslationChecker -class TestTranslationTags(unittest.TestCase): - """Test that translations have the same tags as English versions.""" +class TestTranslationTagsIntegration(unittest.TestCase): + """Integration tests that check actual translation files.""" def setUp(self): """Set up test fixtures.""" - # Navigate up from tests/scripts to cornucopia root + # Navigate up from scripts to cornucopia root self.base_path = Path(__file__).parent.parent.parent self.source_dir = self.base_path / 'source' self.checker = TranslationChecker(self.source_dir) @@ -40,7 +42,7 @@ def test_english_files_exist(self): "No English card files found in source directory" ) - def test_translations_have_all_tags(self): + def test_translations_completeness(self): """ Test that all translations have the same T0xxx tags as English. @@ -64,7 +66,7 @@ def test_translations_have_all_tags(self): total_issues += len(issues.get('empty', [])) self.fail( - f"\n\nTranslation issues found ({total_issues} total):\n\n{report}\n" + f"\\n\\nTranslation issues found ({total_issues} total):\\n\\n{report}\\n" ) def test_no_duplicate_tags_in_english(self): @@ -72,23 +74,21 @@ def test_no_duplicate_tags_in_english(self): english_files = list(self.source_dir.glob('*-cards-*-en.yaml')) for eng_file in english_files: - tags = self.checker.extract_tags(eng_file) - # Extract_tags returns a dict, so duplicates would be overwritten - # We need to check the raw file for duplicates - import yaml with open(eng_file, 'r', encoding='utf-8') as f: data = yaml.safe_load(f) - if data and 'common_ids' in data: + if data and 'paragraphs' in data: seen_ids = set() duplicates = [] - for item in data['common_ids']: - tag_id = item.get('id', '') - if tag_id.startswith('T0'): - if tag_id in seen_ids: - duplicates.append(tag_id) - seen_ids.add(tag_id) + for paragraph in data['paragraphs']: + if 'sentences' in paragraph: + for sentence in paragraph['sentences']: + tag_id = sentence.get('id', '') + if tag_id.startswith('T0'): + if tag_id in seen_ids: + duplicates.append(tag_id) + seen_ids.add(tag_id) self.assertEqual( len(duplicates), 0, @@ -97,7 +97,6 @@ def test_no_duplicate_tags_in_english(self): def test_tag_format(self): """Test that tags follow the T0xxxx format.""" - import re tag_pattern = re.compile(r'^T0\d{4,5}$') english_files = list(self.source_dir.glob('*-cards-*-en.yaml')) diff --git a/scripts/check_translations_utest.py b/scripts/check_translations_utest.py new file mode 100644 index 000000000..568f6c2db --- /dev/null +++ b/scripts/check_translations_utest.py @@ -0,0 +1,131 @@ +""" +Unit tests for translation tag checking. + +Tests the TranslationChecker class with mock data. +""" + +import unittest +import yaml +import re +from pathlib import Path +import sys + +# Add scripts directory to path +scripts_path = Path(__file__).parent.parent.parent / 'scripts' +sys.path.insert(0, str(scripts_path)) + +from check_translations import TranslationChecker + + +class TestTranslationCheckerUnit(unittest.TestCase): + """Unit tests for TranslationChecker using mock files.""" + + def setUp(self): + """Set up test fixtures.""" + # Use test_files directory for mock data + # Navigate from cornucopia/scripts -> cornucopia -> oswap -> tests + script_dir = Path(__file__).parent + cornucopia_dir = script_dir.parent + oswap_dir = cornucopia_dir.parent + self.test_source_dir = oswap_dir / 'tests' / 'test_files' / 'source' + self.checker = TranslationChecker(self.test_source_dir) + + def test_extract_tags_from_english(self): + """Test extracting tags from an English YAML file.""" + english_file = self.test_source_dir / 'test-cards-1.0-en.yaml' + tags = self.checker.extract_tags(english_file) + + self.assertIn('T00001', tags) + self.assertIn('T00002', tags) + self.assertIn('T00003', tags) + self.assertIn('T00004', tags) + self.assertEqual(tags['T00001'], 'This is the first test tag') + + def test_detect_missing_tags(self): + """Test detection of missing tags in translation.""" + results = self.checker.check_translations() + + # Spanish file is missing T00004 + self.assertIn('test-cards-1.0', results) + self.assertIn('es', results['test-cards-1.0']) + self.assertIn('T00004', results['test-cards-1.0']['es']['missing']) + + def test_detect_untranslated_tags(self): + """Test detection of untranslated tags (identical to English).""" + results = self.checker.check_translations() + + # Spanish file has T00002 identical to English + self.assertIn('test-cards-1.0', results) + self.assertIn('es', results['test-cards-1.0']) + self.assertIn('T00002', results['test-cards-1.0']['es']['untranslated']) + + def test_detect_empty_tags(self): + """Test detection of empty tag values.""" + results = self.checker.check_translations() + + # Spanish file has T00003 empty + self.assertIn('test-cards-1.0', results) + self.assertIn('es', results['test-cards-1.0']) + self.assertIn('T00003', results['test-cards-1.0']['es']['empty']) + + def test_generate_report_with_issues(self): + """Test markdown report generation when issues exist.""" + self.checker.check_translations() + report = self.checker.generate_markdown_report() + + self.assertIn('Translation Check Report', report) + self.assertIn('Spanish', report) + self.assertIn('Missing Tags', report) + self.assertIn('Untranslated Tags', report) + self.assertIn('Empty Tags', report) + + def test_tag_format_validation(self): + """Test that tags follow the T0xxxx format.""" + tag_pattern = re.compile(r'^T0\d{4,5}$') + + english_file = self.test_source_dir / 'test-cards-1.0-en.yaml' + tags = self.checker.extract_tags(english_file) + + for tag_id in tags.keys(): + self.assertIsNotNone( + tag_pattern.match(tag_id), + f"Tag {tag_id} doesn't match format T0xxxx" + ) + + def test_no_duplicate_tags(self): + """Test that files don't have duplicate T0xxx tags.""" + english_file = self.test_source_dir / 'test-cards-1.0-en.yaml' + + with open(english_file, 'r', encoding='utf-8') as f: + data = yaml.safe_load(f) + + if data and 'paragraphs' in data: + seen_ids = set() + duplicates = [] + + for paragraph in data['paragraphs']: + if 'sentences' in paragraph: + for sentence in paragraph['sentences']: + tag_id = sentence.get('id', '') + if tag_id.startswith('T0'): + if tag_id in seen_ids: + duplicates.append(tag_id) + seen_ids.add(tag_id) + + self.assertEqual( + len(duplicates), 0, + f"Duplicate tags found: {duplicates}" + ) + + def test_file_groups(self): + """Test that files are correctly grouped by base name.""" + file_groups = self.checker.get_file_groups() + + self.assertIn('test-cards-1.0', file_groups) + files = [f.name for f in file_groups['test-cards-1.0']] + self.assertIn('test-cards-1.0-en.yaml', files) + self.assertIn('test-cards-1.0-es.yaml', files) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_files/source/test-cards-1.0-en.yaml b/tests/test_files/source/test-cards-1.0-en.yaml new file mode 100644 index 000000000..f9b00b439 --- /dev/null +++ b/tests/test_files/source/test-cards-1.0-en.yaml @@ -0,0 +1,32 @@ +--- +meta: + edition: "test" + component: "cards" + language: "EN" + version: "1.0" +suits: +- + id: "TS" + name: "Test Suit" + cards: + - + id: "TSA" + value: "A" + desc: "Test card A" +paragraphs: +- + id: "Common" + name: "Common" + sentences: + - + id: "T00001" + text: "This is the first test tag" + - + id: "T00002" + text: "This is the second test tag" + - + id: "T00003" + text: "This is the third test tag" + - + id: "T00004" + text: "This is the fourth test tag" diff --git a/tests/test_files/source/test-cards-1.0-es.yaml b/tests/test_files/source/test-cards-1.0-es.yaml new file mode 100644 index 000000000..363c9e8c0 --- /dev/null +++ b/tests/test_files/source/test-cards-1.0-es.yaml @@ -0,0 +1,29 @@ +--- +meta: + edition: "test" + component: "cards" + language: "ES" + version: "1.0" +suits: +- + id: "TS" + name: "Test Suit Spanish" + cards: + - + id: "TSA" + value: "A" + desc: "Tarjeta de prueba A" +paragraphs: +- + id: "Common" + name: "Common" + sentences: + - + id: "T00001" + text: "Esta es la primera etiqueta de prueba" + - + id: "T00002" + text: "This is the second test tag" + - + id: "T00003" + text: ""