diff --git a/.gitignore b/.gitignore index 095c9a8..20e68ce 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ thoughts/ +data/testuser/collection.anki2-wal diff --git a/Makefile b/Makefile index 686276f..2bcc1f2 100644 --- a/Makefile +++ b/Makefile @@ -37,6 +37,15 @@ init-env: ## init-env test: ## Run all tests (unit, integration, and doc tests) with debug logging pushd $(pkg_src) && RUST_LOG=INFO cargo test --all-features --all-targets -- --test-threads=1 #--nocapture +.PHONY: refresh-test-fixture +refresh-test-fixture: ## Refresh test fixture from golden dataset + @echo "Refreshing test fixture from golden dataset..." + ./ankiview/tests/fixtures/copy_golden_dataset.sh + +.PHONY: test-verbose +test-verbose: ## Run tests with verbose logging + pushd $(pkg_src) && RUST_LOG=debug cargo test --all-features --all-targets -- --test-threads=1 --nocapture + ################################################################################ # Building, Deploying \ BUILDING: ## ################################################################## diff --git a/ankiview/Cargo.toml b/ankiview/Cargo.toml index cb6a71c..8591f44 100644 --- a/ankiview/Cargo.toml +++ b/ankiview/Cargo.toml @@ -27,6 +27,9 @@ thiserror = "2.0.11" tracing = "0.1.41" tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } +[[bin]] +name = "build_test_collection" +path = "tests/fixtures/build_test_collection.rs" [profile.release] codegen-units = 1 diff --git a/ankiview/tests/fixtures/README.md b/ankiview/tests/fixtures/README.md new file mode 100644 index 0000000..832f3e0 --- /dev/null +++ b/ankiview/tests/fixtures/README.md @@ -0,0 +1,64 @@ +# Test Fixtures + +## Golden Test Dataset + +**Source**: `/Users/Q187392/dev/s/private/ankiview/data/testuser/` +**Fixture Location**: `test_collection/` + +**IMPORTANT**: The golden dataset in the source location is READ-ONLY. Never modify it. All tests work with copies. + +### Structure +- 15 notes with real-world content +- Basic card type (front/back) +- 4 media files (PNG images) +- Collection size: ~1MB +- Media directory: ~140KB + +### Content Coverage +- Data structures (DAG, Tree, DFS) +- Algorithms and complexity +- Data science metrics (F1, accuracy) +- Database concepts (star schema) +- Embeddings and ML concepts +- Geographic reference systems + +### Media Files +- `dag.png` (37KB) - Referenced by note 1695797540370 +- `star-schema.png` (16KB) - Referenced by note 1713763428669 +- `mercator.png` (24KB) - Referenced by note 1737647330399 +- `wsg-enu2.png` (58KB) - Referenced by note 1737647330399 + +### Refreshing Fixture from Golden Dataset + +If the golden dataset is updated, refresh the fixture: + +```bash +chmod +x ankiview/tests/fixtures/copy_golden_dataset.sh +./ankiview/tests/fixtures/copy_golden_dataset.sh +``` + +### Note IDs for Testing + +Use these note IDs in integration tests: + +```rust +pub mod test_notes { + // Notes with images + pub const DAG_NOTE: i64 = 1695797540370; + pub const STAR_SCHEMA: i64 = 1713763428669; + pub const MERCATOR: i64 = 1737647330399; + + // Text-heavy notes + pub const TREE: i64 = 1695797540371; + pub const RECURSIVE_DFS: i64 = 1695797540372; + pub const TAIL_RECURSION: i64 = 1698125272387; + + // Data science notes + pub const F1_SCORE: i64 = 1714489634039; + pub const ACCURACY: i64 = 1714489634040; + pub const COLBERT: i64 = 1715928977633; + + // For testing errors + pub const NONEXISTENT: i64 = 999999999; +} +``` diff --git a/ankiview/tests/fixtures/build_test_collection.rs b/ankiview/tests/fixtures/build_test_collection.rs new file mode 100644 index 0000000..d588f17 --- /dev/null +++ b/ankiview/tests/fixtures/build_test_collection.rs @@ -0,0 +1,120 @@ +// Build script to create test collection fixture +// Run manually: cargo run --bin build_test_collection +// +// This script creates a minimal Anki collection using the Anki library. +// Due to the complexity and version-specific nature of the Anki API, +// an alternative approach is to manually create the collection in Anki desktop +// and copy it here. This script serves as documentation of what the collection should contain. + +use anki::collection::CollectionBuilder; +use std::path::PathBuf; + +fn main() -> anyhow::Result<()> { + println!("Creating test collection...\n"); + println!("Note: Due to Anki API complexity, this script creates an empty collection."); + println!("You should add notes manually using Anki desktop, then copy the collection here.\n"); + + let fixture_dir = PathBuf::from("tests/fixtures/test_collection"); + + // Remove old collection if exists + if fixture_dir.exists() { + std::fs::remove_dir_all(&fixture_dir)?; + } + std::fs::create_dir_all(&fixture_dir)?; + + let collection_path = fixture_dir.join("collection.anki2"); + let col = CollectionBuilder::new(&collection_path).build()?; + + println!("Created empty collection at: {:?}", collection_path); + + // Close collection + col.close(None)?; + + // Create media directory + let media_dir = fixture_dir.join("collection.media"); + std::fs::create_dir_all(&media_dir)?; + + // Create test images + create_test_media(&media_dir)?; + + println!("\n=============================================="); + println!("MANUAL STEPS REQUIRED:"); + println!("==============================================\n"); + println!("1. Open Anki desktop application"); + println!("2. Create a new profile or use existing one"); + println!("3. Add the following 8 notes with Basic card type:\n"); + println!(" Note 1:"); + println!(" Front: What is Rust?"); + println!(" Back: A systems programming language\n"); + println!(" Note 2:"); + println!(" Front: What is the quadratic formula?"); + println!(r#" Back:
$x = \frac{{-b \pm \sqrt{{b^2 - 4ac}}}}{{2a}}$
"#); + println!(); + println!(" Note 3:"); + println!(" Front: How to create a vector in Rust?"); + println!(r#" Back:
let v: Vec = vec![1, 2, 3];
"#); + println!(); + println!(" Note 4:"); + println!(" Front: Rust logo"); + println!(r#" Back: Rust logo"#); + println!(); + println!(" Note 5:"); + println!(" Front: External image test"); + println!(r#" Back: External"#); + println!(); + println!(" Note 6:"); + println!(" Front: HTML entities test"); + println!(" Back: Less than: < Greater than: > Ampersand: &"); + println!(); + println!(" Note 7:"); + println!(" Front: Question with no answer"); + println!(" Back: (leave empty)"); + println!(); + println!(" Note 8:"); + println!(" Front: Tagged question"); + println!(" Back: Tagged answer"); + println!(" Tags: test rust programming"); + println!(); + println!("4. Close Anki"); + println!("5. Copy the collection.anki2 file to:"); + println!(" {}", collection_path.display()); + println!("6. Copy media files from profile's collection.media/ to:"); + println!(" {}", media_dir.display()); + println!("7. Note the IDs of the created notes (use SQLite browser or query)"); + println!("8. Update tests/helpers/mod.rs with the actual note IDs\n"); + println!("==============================================\n"); + + Ok(()) +} + +fn create_test_media(media_dir: &std::path::Path) -> anyhow::Result<()> { + // Create a simple 1x1 PNG file (rust-logo.png) + // PNG signature + IHDR chunk for 1x1 red pixel + let rust_logo_png = [ + 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, // PNG signature + 0x00, 0x00, 0x00, 0x0D, // IHDR length + 0x49, 0x48, 0x44, 0x52, // IHDR + 0x00, 0x00, 0x00, 0x01, // width: 1 + 0x00, 0x00, 0x00, 0x01, // height: 1 + 0x08, 0x02, 0x00, 0x00, 0x00, // bit depth, color type, compression, filter, interlace + 0x90, 0x77, 0x53, 0xDE, // CRC + 0x00, 0x00, 0x00, 0x0C, // IDAT length + 0x49, 0x44, 0x41, 0x54, // IDAT + 0x08, 0xD7, 0x63, 0xF8, 0xCF, 0xC0, 0x00, 0x00, 0x03, 0x01, 0x01, 0x00, + 0x18, 0xDD, 0x8D, 0xB4, // CRC + 0x00, 0x00, 0x00, 0x00, // IEND length + 0x49, 0x45, 0x4E, 0x44, // IEND + 0xAE, 0x42, 0x60, 0x82, // CRC + ]; + + let rust_logo_path = media_dir.join("rust-logo.png"); + std::fs::write(&rust_logo_path, &rust_logo_png)?; + println!("Created test image: {:?}", rust_logo_path); + + // Create another simple PNG (sample.jpg - actually a PNG despite the name) + let sample_path = media_dir.join("sample.jpg"); + std::fs::write(&sample_path, &rust_logo_png)?; + println!("Created test image: {:?}", sample_path); + + Ok(()) +} diff --git a/ankiview/tests/fixtures/copy_golden_dataset.sh b/ankiview/tests/fixtures/copy_golden_dataset.sh new file mode 100755 index 0000000..efc4b56 --- /dev/null +++ b/ankiview/tests/fixtures/copy_golden_dataset.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# Copy golden test dataset to fixtures directory +# This script should be run from the repository root + +set -euo pipefail + +GOLDEN_SOURCE="/Users/Q187392/dev/s/private/ankiview/data/testuser" +FIXTURE_TARGET="ankiview/tests/fixtures/test_collection" + +echo "Copying golden dataset to test fixtures..." + +# Remove old fixture if exists +if [ -d "$FIXTURE_TARGET" ]; then + echo "Removing existing fixture at $FIXTURE_TARGET" + rm -rf "$FIXTURE_TARGET" +fi + +# Create fixture directory +mkdir -p "$FIXTURE_TARGET" + +# Copy collection file (close any open SQLite connections first) +echo "Copying collection.anki2..." +cp "$GOLDEN_SOURCE/collection.anki2" "$FIXTURE_TARGET/" + +# Copy media directory +echo "Copying media files..." +cp -r "$GOLDEN_SOURCE/collection.media" "$FIXTURE_TARGET/" + +# Copy media database +echo "Copying media database..." +cp "$GOLDEN_SOURCE/collection.media.db2" "$FIXTURE_TARGET/" + +# Verify files were copied +echo "" +echo "Verification:" +ls -lh "$FIXTURE_TARGET/collection.anki2" +ls -lh "$FIXTURE_TARGET/collection.media.db2" +echo "" +echo "Media files:" +ls -lh "$FIXTURE_TARGET/collection.media/" +echo "" +echo "Golden dataset copied successfully!" +echo "" +echo "IMPORTANT: Do not modify files in $GOLDEN_SOURCE" +echo "Tests will work with copies of this fixture." diff --git a/ankiview/tests/fixtures/test_collection/collection.anki2 b/ankiview/tests/fixtures/test_collection/collection.anki2 new file mode 100644 index 0000000..28014ec Binary files /dev/null and b/ankiview/tests/fixtures/test_collection/collection.anki2 differ diff --git a/ankiview/tests/fixtures/test_collection/collection.anki2-shm b/ankiview/tests/fixtures/test_collection/collection.anki2-shm new file mode 100644 index 0000000..fe9ac28 Binary files /dev/null and b/ankiview/tests/fixtures/test_collection/collection.anki2-shm differ diff --git a/ankiview/tests/fixtures/test_collection/collection.anki2-wal b/ankiview/tests/fixtures/test_collection/collection.anki2-wal new file mode 100644 index 0000000..e69de29 diff --git a/ankiview/tests/fixtures/test_collection/collection.media.db2 b/ankiview/tests/fixtures/test_collection/collection.media.db2 new file mode 100644 index 0000000..2410091 Binary files /dev/null and b/ankiview/tests/fixtures/test_collection/collection.media.db2 differ diff --git a/ankiview/tests/fixtures/test_collection/collection.media/dag.png b/ankiview/tests/fixtures/test_collection/collection.media/dag.png new file mode 100644 index 0000000..c37a120 Binary files /dev/null and b/ankiview/tests/fixtures/test_collection/collection.media/dag.png differ diff --git a/ankiview/tests/fixtures/test_collection/collection.media/mercator.png b/ankiview/tests/fixtures/test_collection/collection.media/mercator.png new file mode 100644 index 0000000..21f7181 Binary files /dev/null and b/ankiview/tests/fixtures/test_collection/collection.media/mercator.png differ diff --git a/ankiview/tests/fixtures/test_collection/collection.media/star-schema.png b/ankiview/tests/fixtures/test_collection/collection.media/star-schema.png new file mode 100644 index 0000000..ee14737 Binary files /dev/null and b/ankiview/tests/fixtures/test_collection/collection.media/star-schema.png differ diff --git a/ankiview/tests/fixtures/test_collection/collection.media/wsg-enu2.png b/ankiview/tests/fixtures/test_collection/collection.media/wsg-enu2.png new file mode 100644 index 0000000..9ac799d Binary files /dev/null and b/ankiview/tests/fixtures/test_collection/collection.media/wsg-enu2.png differ diff --git a/ankiview/tests/helpers/mod.rs b/ankiview/tests/helpers/mod.rs new file mode 100644 index 0000000..f1869ce --- /dev/null +++ b/ankiview/tests/helpers/mod.rs @@ -0,0 +1,102 @@ +use ankiview::infrastructure::AnkiRepository; +use anyhow::{Context, Result}; +use std::path::{Path, PathBuf}; +use tempfile::TempDir; + +/// Test fixture for working with temporary Anki collections +#[allow(dead_code)] +pub struct TestCollection { + _temp_dir: TempDir, + pub collection_path: PathBuf, + pub media_dir: PathBuf, +} + +impl TestCollection { + /// Create a new test collection by copying the fixture + pub fn new() -> Result { + let temp_dir = tempfile::tempdir() + .context("Failed to create temporary directory")?; + + let fixture_path = Self::fixture_collection_path(); + let collection_path = temp_dir.path().join("collection.anki2"); + + // Copy fixture collection to temp location + std::fs::copy(&fixture_path, &collection_path) + .context("Failed to copy test collection fixture")?; + + // Copy media directory + let fixture_media = fixture_path.parent().unwrap().join("collection.media"); + let media_dir = temp_dir.path().join("collection.media"); + + if fixture_media.exists() { + copy_dir_all(&fixture_media, &media_dir) + .context("Failed to copy media directory")?; + } else { + std::fs::create_dir_all(&media_dir) + .context("Failed to create media directory")?; + } + + Ok(Self { + _temp_dir: temp_dir, + collection_path, + media_dir, + }) + } + + /// Get path to the fixture collection + fn fixture_collection_path() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests/fixtures/test_collection/collection.anki2") + } + + /// Open repository for this test collection + pub fn open_repository(&self) -> Result { + AnkiRepository::new(&self.collection_path) + } +} + +/// Recursively copy directory contents +fn copy_dir_all(src: &Path, dst: &Path) -> Result<()> { + std::fs::create_dir_all(dst)?; + for entry in std::fs::read_dir(src)? { + let entry = entry?; + let file_type = entry.file_type()?; + let dst_path = dst.join(entry.file_name()); + + if file_type.is_dir() { + copy_dir_all(&entry.path(), &dst_path)?; + } else { + std::fs::copy(entry.path(), &dst_path)?; + } + } + Ok(()) +} + +/// Known test note IDs from golden dataset +#[allow(dead_code)] +pub mod test_notes { + // Notes with images - good for testing media path resolution + pub const DAG_NOTE: i64 = 1695797540370; // Has dag.png image + pub const STAR_SCHEMA: i64 = 1713763428669; // Has star-schema.png image + pub const MERCATOR: i64 = 1737647330399; // Has mercator.png and wsg-enu2.png images + + // Text-heavy notes - good for testing content rendering + pub const TREE: i64 = 1695797540371; + pub const RECURSIVE_DFS: i64 = 1695797540372; + pub const TAIL_RECURSION: i64 = 1698125272387; + pub const BIG_O: i64 = 1713934919822; + + // Data science notes - good for testing HTML formatting + pub const F1_SCORE: i64 = 1714489634039; + pub const ACCURACY: i64 = 1714489634040; + pub const COLBERT: i64 = 1715928977633; + + // Additional notes + pub const SCHEMA_REASONING: i64 = 1726838512787; + pub const RRF: i64 = 1727071084388; + pub const AGENT: i64 = 1748163225945; + pub const IMBALANCED: i64 = 1748169001421; + + // For testing error cases + pub const NONEXISTENT: i64 = 999999999; +} diff --git a/ankiview/tests/test_anki.rs b/ankiview/tests/test_anki.rs index 8a5b317..a662ecd 100644 --- a/ankiview/tests/test_anki.rs +++ b/ankiview/tests/test_anki.rs @@ -1,29 +1,144 @@ +mod helpers; + use ankiview::application::NoteRepository; -use ankiview::infrastructure::AnkiRepository; +use ankiview::domain::DomainError; use anyhow::Result; -use tempfile::TempDir; +use helpers::{TestCollection, test_notes}; -fn setup_test_repository() -> Result<(TempDir, AnkiRepository)> { - let temp_dir = tempfile::tempdir()?; - let collection_path = temp_dir.path().join("collection.anki2"); +// Existing test (now un-ignored) +#[test] +fn given_nonexistent_note_when_getting_note_then_returns_error() -> Result<()> { + // Arrange + let test_collection = TestCollection::new()?; + let mut repo = test_collection.open_repository()?; - // TODO: Create a proper test collection using anki-core - std::fs::write(&collection_path, vec![0; 100])?; + // Act + let result = repo.get_note(test_notes::NONEXISTENT); - let repo = AnkiRepository::new(&collection_path)?; - Ok((temp_dir, repo)) + // Assert + assert!(result.is_err()); + match result.unwrap_err() { + DomainError::NoteNotFound(id) => assert_eq!(id, test_notes::NONEXISTENT), + _ => panic!("Expected NoteNotFound error"), + } + Ok(()) } #[test] -#[ignore = "TODO: Not implemented"] -fn given_nonexistent_note_when_getting_note_then_returns_error() -> Result<()> { +fn given_dag_note_when_getting_note_then_returns_note_with_image() -> Result<()> { + // Arrange + let test_collection = TestCollection::new()?; + let mut repo = test_collection.open_repository()?; + + // Act + let note = repo.get_note(test_notes::DAG_NOTE)?; + + // Assert + assert_eq!(note.id, test_notes::DAG_NOTE); + assert!(note.front.contains("DAG")); + assert!(note.back.contains("dag.png")); // Has image reference + assert!(!note.model_name.is_empty()); // Has a model name + Ok(()) +} + +#[test] +fn given_tree_note_when_getting_note_then_returns_note() -> Result<()> { + // Arrange + let test_collection = TestCollection::new()?; + let mut repo = test_collection.open_repository()?; + + // Act + let note = repo.get_note(test_notes::TREE)?; + + // Assert + assert_eq!(note.id, test_notes::TREE); + assert!(note.front.contains("Tree")); + assert!(!note.back.is_empty()); + Ok(()) +} + +#[test] +fn given_star_schema_note_when_getting_note_then_returns_html_content() -> Result<()> { + // Arrange + let test_collection = TestCollection::new()?; + let mut repo = test_collection.open_repository()?; + + // Act + let note = repo.get_note(test_notes::STAR_SCHEMA)?; + + // Assert + assert!(note.back.contains("

")); // Has HTML heading + assert!(note.back.contains("star-schema.png")); // Has image + assert!(note.back.contains("Fact Table")); + Ok(()) +} + +#[test] +fn given_f1_score_note_when_getting_note_then_returns_data_science_content() -> Result<()> { // Arrange - let (_temp_dir, mut repo) = setup_test_repository()?; + let test_collection = TestCollection::new()?; + let mut repo = test_collection.open_repository()?; // Act - let result = repo.get_note(999999); + let note = repo.get_note(test_notes::F1_SCORE)?; // Assert + assert_eq!(note.id, test_notes::F1_SCORE); + assert!(note.front.contains("F1 score")); + Ok(()) +} + +#[test] +fn given_existing_note_when_deleting_then_removes_note() -> Result<()> { + // Arrange + let test_collection = TestCollection::new()?; + let mut repo = test_collection.open_repository()?; + + // Verify note exists first + let _ = repo.get_note(test_notes::TREE)?; + + // Act + let deleted_cards = repo.delete_note(test_notes::TREE)?; + + // Assert + assert!(deleted_cards > 0); + + // Verify note is gone + let result = repo.get_note(test_notes::TREE); assert!(result.is_err()); Ok(()) } + +#[test] +fn given_nonexistent_note_when_deleting_then_returns_error() -> Result<()> { + // Arrange + let test_collection = TestCollection::new()?; + let mut repo = test_collection.open_repository()?; + + // Act + let result = repo.delete_note(test_notes::NONEXISTENT); + + // Assert + assert!(result.is_err()); + match result.unwrap_err() { + DomainError::NoteNotFound(id) => assert_eq!(id, test_notes::NONEXISTENT), + _ => panic!("Expected NoteNotFound error"), + } + Ok(()) +} + +#[test] +fn given_repository_when_accessing_media_dir_then_returns_valid_path() -> Result<()> { + // Arrange + let test_collection = TestCollection::new()?; + let repo = test_collection.open_repository()?; + + // Act + let media_dir = repo.media_dir(); + + // Assert + assert!(media_dir.exists()); + assert!(media_dir.is_dir()); + assert!(media_dir.ends_with("collection.media")); + Ok(()) +} diff --git a/ankiview/tests/test_html_presenter.rs b/ankiview/tests/test_html_presenter.rs new file mode 100644 index 0000000..87a4137 --- /dev/null +++ b/ankiview/tests/test_html_presenter.rs @@ -0,0 +1,105 @@ +mod helpers; + +use ankiview::application::NoteRepository; +use ankiview::ports::HtmlPresenter; +use anyhow::Result; +use helpers::{TestCollection, test_notes}; + +#[test] +fn given_dag_note_when_rendering_with_media_dir_then_converts_to_file_uri() -> Result<()> { + // Arrange + let test_collection = TestCollection::new()?; + let mut repo = test_collection.open_repository()?; + let note = repo.get_note(test_notes::DAG_NOTE)?; + + let media_dir = test_collection.media_dir.clone(); + let presenter = HtmlPresenter::with_media_dir(&media_dir); + + // Act + let html = presenter.render(¬e); + + // Assert + assert!(html.contains("file://")); + assert!(html.contains("dag.png")); + assert!(html.contains(&media_dir.to_string_lossy().to_string())); + Ok(()) +} + +#[test] +fn given_star_schema_note_when_rendering_then_processes_html_content() -> Result<()> { + // Arrange + let test_collection = TestCollection::new()?; + let mut repo = test_collection.open_repository()?; + let note = repo.get_note(test_notes::STAR_SCHEMA)?; + + let media_dir = test_collection.media_dir.clone(); + let presenter = HtmlPresenter::with_media_dir(&media_dir); + + // Act + let html = presenter.render(¬e); + + // Assert + assert!(html.contains("file://")); // Image converted to file URI + assert!(html.contains("star-schema.png")); + assert!(html.contains("

")); // HTML structure preserved + assert!(html.contains("Fact Table")); + Ok(()) +} + +#[test] +fn given_mercator_note_when_rendering_then_converts_multiple_images() -> Result<()> { + // Arrange + let test_collection = TestCollection::new()?; + let mut repo = test_collection.open_repository()?; + let note = repo.get_note(test_notes::MERCATOR)?; + + let media_dir = test_collection.media_dir.clone(); + let presenter = HtmlPresenter::with_media_dir(&media_dir); + + // Act + let html = presenter.render(¬e); + + // Assert + assert!(html.contains("mercator.png")); + assert!(html.contains("wsg-enu2.png")); + assert!(html.contains("file://")); + Ok(()) +} + +#[test] +fn given_f1_score_note_when_rendering_then_includes_syntax_highlighting() -> Result<()> { + // Arrange + let test_collection = TestCollection::new()?; + let mut repo = test_collection.open_repository()?; + let note = repo.get_note(test_notes::F1_SCORE)?; + + let presenter = HtmlPresenter::new(); + + // Act + let html = presenter.render(¬e); + + // Assert + assert!(html.contains("")); + assert!(html.contains("F1 score")); + // highlight.js should be included for potential code blocks + assert!(html.contains("highlight.js")); + Ok(()) +} + +#[test] +fn given_recursive_dfs_note_when_rendering_then_handles_code_content() -> Result<()> { + // Arrange + let test_collection = TestCollection::new()?; + let mut repo = test_collection.open_repository()?; + let note = repo.get_note(test_notes::RECURSIVE_DFS)?; + + let presenter = HtmlPresenter::new(); + + // Act + let html = presenter.render(¬e); + + // Assert - should not crash, should have valid HTML structure + assert!(html.contains("")); + assert!(html.contains("DFS") || html.contains("recursive")); + Ok(()) +} diff --git a/ankiview/tests/test_note_deleter.rs b/ankiview/tests/test_note_deleter.rs new file mode 100644 index 0000000..40bd574 --- /dev/null +++ b/ankiview/tests/test_note_deleter.rs @@ -0,0 +1,62 @@ +mod helpers; + +use ankiview::application::{NoteDeleter, NoteRepository}; +use ankiview::domain::DomainError; +use anyhow::Result; +use helpers::{TestCollection, test_notes}; + +#[test] +fn given_existing_note_when_deleting_then_removes_note_and_cards() -> Result<()> { + // Arrange + let test_collection = TestCollection::new()?; + let mut repo = test_collection.open_repository()?; + + // Verify note exists first + let _ = repo.get_note(test_notes::RECURSIVE_DFS)?; + + let mut deleter = NoteDeleter::new(repo); + + // Act + let deleted_cards = deleter.delete_note(test_notes::RECURSIVE_DFS)?; + + // Assert + assert!(deleted_cards > 0); + + // Note: We can't verify deletion by reopening the collection due to SQLite lock. + // The successful deletion with deleted_cards > 0 is sufficient verification. + Ok(()) +} + +#[test] +fn given_nonexistent_note_when_deleting_then_returns_not_found_error() -> Result<()> { + // Arrange + let test_collection = TestCollection::new()?; + let repo = test_collection.open_repository()?; + let mut deleter = NoteDeleter::new(repo); + + // Act + let result = deleter.delete_note(test_notes::NONEXISTENT); + + // Assert + assert!(result.is_err()); + match result.unwrap_err() { + DomainError::NoteNotFound(id) => assert_eq!(id, test_notes::NONEXISTENT), + _ => panic!("Expected NoteNotFound error"), + } + Ok(()) +} + +#[test] +fn given_note_with_image_when_deleting_then_removes_all_cards() -> Result<()> { + // Arrange + let test_collection = TestCollection::new()?; + let repo = test_collection.open_repository()?; + let mut deleter = NoteDeleter::new(repo); + + // Act - delete note that has image + let deleted_cards = deleter.delete_note(test_notes::DAG_NOTE)?; + + // Assert - at least one card deleted + assert!(deleted_cards >= 1); + Ok(()) +} diff --git a/ankiview/tests/test_note_viewer.rs b/ankiview/tests/test_note_viewer.rs new file mode 100644 index 0000000..514d2f4 --- /dev/null +++ b/ankiview/tests/test_note_viewer.rs @@ -0,0 +1,79 @@ +mod helpers; + +use ankiview::application::NoteViewer; +use ankiview::ports::HtmlPresenter; +use anyhow::Result; +use helpers::{TestCollection, test_notes}; + +#[test] +fn given_valid_note_id_when_viewing_note_then_returns_note() -> Result<()> { + // Arrange + let test_collection = TestCollection::new()?; + let repo = test_collection.open_repository()?; + let mut viewer = NoteViewer::new(repo); + + // Act + let note = viewer.view_note(test_notes::TREE)?; + + // Assert + assert_eq!(note.id, test_notes::TREE); + assert!(!note.front.is_empty()); + assert!(!note.back.is_empty()); + Ok(()) +} + +#[test] +fn given_nonexistent_note_id_when_viewing_note_then_returns_error() -> Result<()> { + // Arrange + let test_collection = TestCollection::new()?; + let repo = test_collection.open_repository()?; + let mut viewer = NoteViewer::new(repo); + + // Act + let result = viewer.view_note(test_notes::NONEXISTENT); + + // Assert + assert!(result.is_err()); + Ok(()) +} + +#[test] +fn given_dag_note_when_viewing_and_rendering_then_produces_valid_html_with_image() -> Result<()> { + // Arrange + let test_collection = TestCollection::new()?; + let repo = test_collection.open_repository()?; + let media_dir = test_collection.media_dir.clone(); + + let mut viewer = NoteViewer::new(repo); + let presenter = HtmlPresenter::with_media_dir(&media_dir); + + // Act + let note = viewer.view_note(test_notes::DAG_NOTE)?; + let html = presenter.render(¬e); + + // Assert + assert!(html.contains("")); + assert!(html.contains("file://")); + assert!(html.contains("dag.png")); + Ok(()) +} + +#[test] +fn given_star_schema_note_when_viewing_and_rendering_then_resolves_media_paths() -> Result<()> { + // Arrange + let test_collection = TestCollection::new()?; + let repo = test_collection.open_repository()?; + let media_dir = test_collection.media_dir.clone(); + + let mut viewer = NoteViewer::new(repo); + let presenter = HtmlPresenter::with_media_dir(&media_dir); + + // Act + let note = viewer.view_note(test_notes::STAR_SCHEMA)?; + let html = presenter.render(¬e); + + // Assert + assert!(html.contains("file://")); + assert!(html.contains("star-schema.png")); + Ok(()) +} diff --git a/data/README.md b/data/README.md new file mode 100644 index 0000000..e3aaad4 --- /dev/null +++ b/data/README.md @@ -0,0 +1 @@ +# anki diff --git a/data/README.txt b/data/README.txt new file mode 100644 index 0000000..21a6d2b --- /dev/null +++ b/data/README.txt @@ -0,0 +1,5 @@ +This folder stores all of your Anki data in a single location, +to make backups easy. To tell Anki to use a different location, +please see: + +http://ankisrs.net/docs/manual.html#startupopts diff --git a/data/prefs21.db b/data/prefs21.db new file mode 100644 index 0000000..45a60b0 Binary files /dev/null and b/data/prefs21.db differ diff --git a/data/prefs21.db.backup b/data/prefs21.db.backup new file mode 100644 index 0000000..2ff8e11 Binary files /dev/null and b/data/prefs21.db.backup differ diff --git a/data/testuser/collection.anki2 b/data/testuser/collection.anki2 new file mode 100644 index 0000000..28014ec Binary files /dev/null and b/data/testuser/collection.anki2 differ diff --git a/data/testuser/collection.anki2-shm b/data/testuser/collection.anki2-shm new file mode 100644 index 0000000..fe9ac28 Binary files /dev/null and b/data/testuser/collection.anki2-shm differ diff --git a/data/testuser/collection.media.db2 b/data/testuser/collection.media.db2 new file mode 100644 index 0000000..2410091 Binary files /dev/null and b/data/testuser/collection.media.db2 differ diff --git a/data/testuser/collection.media.db2-shm b/data/testuser/collection.media.db2-shm new file mode 100644 index 0000000..fe9ac28 Binary files /dev/null and b/data/testuser/collection.media.db2-shm differ diff --git a/data/testuser/collection.media.db2-wal b/data/testuser/collection.media.db2-wal new file mode 100644 index 0000000..e69de29 diff --git a/data/testuser/collection.media/dag.png b/data/testuser/collection.media/dag.png new file mode 100644 index 0000000..c37a120 Binary files /dev/null and b/data/testuser/collection.media/dag.png differ diff --git a/data/testuser/collection.media/mercator.png b/data/testuser/collection.media/mercator.png new file mode 100644 index 0000000..21f7181 Binary files /dev/null and b/data/testuser/collection.media/mercator.png differ diff --git a/data/testuser/collection.media/star-schema.png b/data/testuser/collection.media/star-schema.png new file mode 100644 index 0000000..ee14737 Binary files /dev/null and b/data/testuser/collection.media/star-schema.png differ diff --git a/data/testuser/collection.media/wsg-enu2.png b/data/testuser/collection.media/wsg-enu2.png new file mode 100644 index 0000000..9ac799d Binary files /dev/null and b/data/testuser/collection.media/wsg-enu2.png differ diff --git a/data/testuser/deleted.txt b/data/testuser/deleted.txt new file mode 100644 index 0000000..2ec6922 --- /dev/null +++ b/data/testuser/deleted.txt @@ -0,0 +1,1252 @@ +nid mid fields +1695797540373 1686497988937

Explain stack-based DFS algorithm for tree.

Stack-Based Depth-First Search (DFS) for Tree Traversal

eliminates the need for recursion by using an explicit stack data structure to keep track of nodes to visit. +This is particularly useful in scenarios where recursion is expensive in terms of memory or computational overhead.

Algorithm Steps:

  1. Create an empty stack and push the root node onto the stack.
  2. While the stack is not empty:
    1. Pop the top node from the stack and process it (e.g., print its value).
    2. Push the right child of the popped node onto the stack if it exists.
    3. Push the left child of the popped node onto the stack if it exists.
class Node:
+    def __init__(self, value):
+        self.value = value
+        self.left = None
+        self.right = None
+# Pre-Order
+def dfs_tree_stack(root):
+    if root is None:
+        return
+    stack = [root]
+    while stack:
+        current_node = stack.pop()
+        print(current_node.value)  # Process the current node
+        if current_node.right:
+            stack.append(current_node.right)  # Push right child if exists
+        if current_node.left:
+            stack.append(current_node.left)  # Push left child if exists
+# In-Order
+def dfs_in_order_stack(root):
+    if root is None:
+        return
+    stack = []
+    current = root
+    while stack or current:
+        while current:
+            stack.append(current)  # Push current node
+            current = current.left  # Move to left child
+        current = stack.pop()  # Pop the top item
+        print(current.value)  # Process the current node
+        current = current.right  # Move to right child
+# Post-Order
+def dfs_post_order_stack(root):
+    if root is None:
+        return
+    stack = []
+    # ensure that a node's right subtree is processed before the node itself.
+    # The node is only processed (popped and printed) when either it has no right child, or its right child has already been processed
+    last_node_visited = None
+    current = root
+    while stack or current:
+        if current:
+            stack.append(current)  # Push current node
+            current = current.left  # Move to left child
+        else:
+            peek_node = stack[-1]
+            # Check if right child exists and is unvisited
+            if peek_node.right and last_node_visited != peek_node.right:
+                current = peek_node.right
+            else:
+                last_node_visited = stack.pop()  # Pop the top item
+                print(last_node_visited.value)  # Process the current node

File: /Users/Q187392/dev/s/private/vimwiki/dev/algo/algorithms.md

+1695797540374 1686497988937

Explain pre-order vs. in-order vs. post-order in tree traversal.

In binary trees, there are three common methods of traversal: pre-order, in-order, and post-order

  1. Pre-order Traversal: Visit the current node before its child nodes. The order is: Node, Left, Right.
  2. In-order Traversal: Visit the left child, then the current node, and finally the right child. The order is: Left, Node, Right.
  3. Post-order Traversal: Visit the child nodes before the current node. The order is: Left, Right, Node.
class Node:
+    def __init__(self, value):
+        self.value = value
+        self.left = None
+        self.right = None
+
+def preorder_traversal(node):
+    if node:
+        print(node.value, end=' ')
+        preorder_traversal(node.left)
+        preorder_traversal(node.right)
+
+def inorder_traversal(node):
+    if node:
+        inorder_traversal(node.left)
+        print(node.value, end=' ')
+        inorder_traversal(node.right)
+
+def postorder_traversal(node):
+    if node:
+        postorder_traversal(node.left)
+        postorder_traversal(node.right)
+        print(node.value, end=' ')
+
+# Example tree
+#     1
+#    / \
+#   2   3
+#  / \
+# 4   5
+
+root = Node(1)
+root.left = Node(2)
+root.right = Node(3)
+root.left.left = Node(4)
+root.left.right = Node(5)
+
+print("Pre-order Traversal: ")
+preorder_traversal(root)  # Output: 1 2 4 5 3
+
+print("\nIn-order Traversal: ")
+inorder_traversal(root)  # Output: 4 2 5 1 3
+
+print("\nPost-order Traversal: ")
+postorder_traversal(root)  # Output: 4 5 2 3 1

File: /Users/Q187392/dev/s/private/vimwiki/dev/algo/algorithms.md

+1699599187787 1686497988937

What is Polymorphism?

File: /Users/tw/dev/s/private/vimwiki/dev/OOP.md

+1700896496025 1686497988937

Explain Change Data Capture (CDC):

is a software design pattern used to efficiently track changes in data in a database system.

  1. Purpose: CDC aims to identify and capture changes made to the data in a database, such as inserts, updates, and deletes.

  2. Process:

    • Capture Changes: CDC systems monitor data sources (like databases) for changes.
    • Record Changes: These changes are then recorded and stored, often in a change log or a separate data store.
    • Propagate Changes: The recorded changes can be used to update data warehouses, analytics systems, or other databases.
  3. Techniques:

    • Triggers: Database triggers are used to log changes in real-time.
    • Log-Based: Reading database logs (like transaction logs) to extract changes.
    • Polling/Snapshot: Regularly querying the database to detect changes.
  4. Benefits:

    • Real-Time Data Processing: Allows for near real-time data integration and analysis.
    • Minimizes Load on Source Systems: Reduces the need for frequent, heavy queries.
    • Data Consistency and Accuracy: Ensures that downstream systems are synchronized with source data.
    • Can Caputre Deletes: polling will not allow you to identify any records that have been deleted since the last poll
  5. Use Cases:

    • Data Warehousing: Updating data warehouses with the latest data.
    • Data Replication: Synchronizing data across different systems.
    • Real-Time Analytics: Providing up-to-date data for analytics.

CDC is a key component in modern data architectures, particularly in systems that require high levels of data freshness and accuracy for real-time decision-making.Five Advantages of Log-Based Change Data Capture

File: /Users/Q187392/dev/s/private/vimwiki/dev/event_driven.md

+1705384410662 1686497988937

Explain Ring Buffer:

Key Characteristics:

  1. Fixed Size:
  2. Sequential Storage: Data is stored in a sequential manner, typically in an array.
  3. Circular Wrapping: When the end of the buffer is reached, new data wraps back to the beginning of the buffer.
  4. Two Pointers/Index: Typically, there are two pointers or indices used in a ring buffer: one for reading data and one for writing data.

Working Mechanism:

difference to fifo queue

class RingBuffer:
+    def __init__(self, size):
+        self.size = size
+        self.buffer = [None] * size
+        self.write_pos = 0
+        self.read_pos = 0
+
+    def is_full(self):
+        next_write_pos = (self.write_pos + 1) % self.size
+        return next_write_pos == self.read_pos
+
+    def is_empty(self):
+        return self.write_pos == self.read_pos
+
+    def write(self, item):
+        self.buffer[self.write_pos] = item
+        self.write_pos = (self.write_pos + 1) % self.size
+
+    def read(self):
+        item = self.buffer[self.read_pos]
+        self.read_pos = (self.read_pos + 1) % self.size
+        return item
+
+# Example usage
+buffer = RingBuffer(5)
+
+# Write data to the buffer
+for i in range(1, 6):
+    buffer.write(i)
+
+# Read data from the buffer
+for _ in range(5):
+    print(buffer.read())

File: /Users/Q187392/dev/s/private/vimwiki/dev/algo/algorithms.md

+1706855598821 1686497988937

Explain "at-least-once" semantic of queue:

If I have a message for you, I will read it to you, and keep doing so again and again until you acknowledge it. +when you receive a message from the queue and don't delete/acknowledge it, you will receive it again in the future, and will keep receiving it until you explicitly delete/acknowledge it. +If the queuing system restarts before it can properly keep track of what's been sent to you, the message will be sent again. +This simple remedy of sending the message again in case of any problem on any side is what makes this guarantee so reliable.

File: /Users/Q187392/dev/s/private/vimwiki/dev/queues.md

+1711518761236 1686497988937

Explain pointers:

myvar = SOMETHING;
+mypointer = get_address_of(myvar);
+print(get_value_via_pointer(mypointer));
+## output is SOMETHING
int myvar = 17;
+int *mypointer = &myvar;  // declares mypointer as a pointer to an int (C)
+print_int(*mypointer);      // outputs 17

pointers must be declared with the * syntax (int *pointerName), and they store memory addresses. +to access the value stored at the address a pointer is pointing to, use dereference operator * again (e.g., *pointerName).

References (&T and &mut T)

fn main() {
+    let myvar: i32 = 17;
+    let mypointer: &i32 = &myvar;  // declares mypointer as a reference to an int
+
+    println!("{}", *mypointer);
+}

File: /Users/Q187392/dev/s/private/vimwiki/dev/algo/algorithms.md

+1715673437739 1686497988937

What is an algebraic type?

An algebraic type is a composite type that is formed by combining other types. An Algebraic Data Type (ADT) is a type formed by combining other types (just a bundle of data)

  1. Product Types: These are types formed by combining multiple values from other types. An example in many programming languages is a struct or a class, which can contain multiple fields of different types. In a product type, the total number of possible values is the product of the number of possible values of its constituent types.

    • combines types by AND (all types must be present).
    • dataclass is a good example of a product type.
    • It's a class where the data members are simply other types +A Python class is a more general concept than a product ADT. While a product ADT is used specifically to bundle multiple values together, a class in Python can have methods (functions), properties, inheritance, and other features. A class can be used to create complex objects with behavior, not just data.
  2. Sum Types (Union Types): These are types where a value can be one of several types but not simultaneously. They are called "sum types" because the total number of possible values is the sum of the number of possible values from its constituent types. The term "union" in union types is often used in languages like C and Rust, where it represents a type that may hold data from different types, but only one type at a time.

    • combines types by OR (one of the types must be present).
    • bool is the quintessential sum type in Python, being logically equivalent to a union of two singletons, True and False.
    • Barring bool, Python sum types come in the form of enums, literals or unions.

In summary, a union is considered an algebraic type (specifically a sum type) because it is formed by combining multiple types in a way that the resulting type can take a value that is one of its constituent types. The algebraic nature of these types comes from the way they are formed using operations analogous to those in algebra (sum for sum types, product for product types).

File: /Users/Q187392/dev/s/private/vimwiki/help/mypy.md

+1724482677282 1686497988937

Explain RAII:

Resource Acquisition Is Initialization (RAII) in Rust

Example where a file is opened and closed using RAII:

use std::fs::File;
+use std::io::{self, Write, Read};
+
+struct FileWrapper {
+    file: File,
+}
+
+impl FileWrapper {
+    fn new(filename: &str) -> io::Result<FileWrapper> {
+        let file = File::open(filename)?;
+        Ok(FileWrapper { file })
+    }
+}
+
+impl Drop for FileWrapper {
+    fn drop(&mut self) {
+        // The file will be automatically closed when the struct goes out of scope
+        // No explicit cleanup is needed in this case because the File type implements Drop
+        println!("File is being closed automatically");
+    }
+}
+
+fn process_file(filename: &str) -> io::Result<()> {
+    let mut file_wrapper = FileWrapper::new(filename)?;
+
+    let mut contents = String::new();
+    file_wrapper.file.read_to_string(&mut contents)?;
+    println!("File content: {}", contents);
+
+    // The file is automatically closed here when file_wrapper goes out of scope
+    Ok(())
+}
+
+fn main() {
+    if let Err(e) = process_file("example.txt") {
+        eprintln!("An error occurred: {}", e);
+    }
+}

Why is RAII Beneficial?

  1. Automatic Resource Management:

    • RAII ensures that resources are automatically released when they are no longer needed, reducing the risk of resource leaks (e.g., memory leaks, open file descriptors).
    • reduces boilerplate code for resource management, making the code cleaner and easier to maintain.
  2. Exception Safety (Panic Safety in Rust):

    • If a panic happens, Rust's ownership model ensures that destructors (via the Drop trait) are still called, ensuring that resources are released properly.
    • This makes code more robust, as resources are not leaked even when errors or panics occur.
  3. Improved Reliability and Safety:

    • By centralizing resource management in constructors and destructors, RAII reduces bugs related to improper resource handling, such as double-free errors or use-after-free errors.

Python:

Python's with statement and context managers provide a similar mechanism for managing resources automatically.

Java

The closest concept to RAII in Java is try-with-resources statement

Example:

import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+
+public class FileReaderExample {
+    public static void main(String[] args) {
+        try (BufferedReader reader = new BufferedReader(new FileReader("example.txt"))) {
+            String line;
+            while ((line = reader.readLine()) != null) {
+                System.out.println(line);
+            }
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+        // No need to explicitly close the reader; it's done automatically.
+    }
+}

Comparison with RAII in Rust

explicit in Java (through syntax), whereas in Rust, it's implicit via the Drop trait and ownership model. +Both Rust (via RAII and the Drop trait) and Java (via try-with-resources) ensure that resources are cleaned up even if an error occurs.

File: /Users/Q187392/dev/s/private/vimwiki/dev/rust-reference.md

+1746867058129 1686497988937

Explain Float Precision

float typically follows the IEEE 754 32-bit single-precision standard.

f = float("0.1")
+print(f"{f:.17f}")
+
+0.10000000149011612

Even 0.1 is already rounded in binary.

Details (IEEE 754 float):

A 32-bit float has:

That’s exactly about 7.22 decimal digits (log10(2^24) ≈ 7.22).

1. Binary vs Decimal Precision

Example (Python):

f = float(1.123456789)
+print(f)
+
+1.123456789  # may appear unchanged, but internal precision is ~7 digits

For Higher Precision:

Significant digits are the digits that carry meaning in a number -- they represent its precision.

f = float("0.1234567")
+print(f"{f:.17f}")
+
+0.12345670163631439

Notice the inexact representation — even though you gave exactly 7 decimal digits, the float cannot represent 0.1234567 precisely.

Example 2: 1.0000001

f = float("1.0000001")
+print(f"{f:.17f}")
+1.00000011920928955

float-example

File: /Users/Q187392/dev/s/private/vimwiki/dev/development.md

+1748163225597 1686497988937

When to use Graph versus DAG?

Graph (General)

A graph is a collection of nodes (vertices) connected by edges (which can be directed or undirected).

DAG (Directed Acyclic Graph)

A DAG is a directed graph with no cycles.

Problem Classes Solvable with DAGs but Not with General Graphs

DAGs solve problems where dependency order matters and cycles must be avoided:

These problems fail in a general graph because:

Problem Classes Solvable with Graphs (Including Cycles) but Not with DAGs

Graphs with cycles represent problems where feedback or mutual relationships are essential:

Summary Table

Feature DAG General Graph
Cycles Forbidden Allowed
Topological ordering Possible Not possible (if cycles)
Suitable for dependencies Yes No (cycles create deadlocks)
Suitable for mutual relationships No Yes

File: /Users/Q187392/dev/s/private/vimwiki/dev/algo/algorithms.md

+1748163225601 1686497988937

why is it easier to determine whether dependencies are met with DAG rather than a Graph?

Why is determining "dependencies are met" easier with a DAG than with a general graph?

2. Determining Dependencies in a DAG

With a DAG, you can:

This method is safe and efficient because a DAG guarantees:

3. In a General Graph (with Cycles)

This can lead to:

To handle this, you'd need:

Aspect DAG General Graph
Cycles Forbidden Allowed (possible mutual dependencies)
Topological order Always possible Impossible if cycles exist
Dependency tracking In-degree approach, simple and efficient Needs cycle detection and complex logic
Risk of deadlocks None High, if cycles exist
Execution ordering Deterministic Unclear if cycles exist

Conclusion

DAGs make it easy to determine if a node's dependencies are satisfied because:

File: /Users/Q187392/dev/s/private/vimwiki/dev/algo/algorithms.md

+1732086221265 1686497988937

How does a Kafka message look like?

Kafka Message Structure

Field Description
Key An optional key associated with the message, used for partitioning.
Value The actual data or payload of the message.
Timestamp The timestamp associated with the message.
Headers Optional key-value pairs for additional metadata.
Partition The partition where the message resides.
Offset The position of the message in the partition.
Topic The name of the topic to which the message belongs.

Timestamp Details

Example Kafka Message

Here’s an example JSON representation of a Kafka message (though messages are in binary format by default):

{
+  "topic": "example-topic",
+  "partition": 0,
+  "offset": 12345,
+  "timestamp": 1678901234567,
+  "key": "user123",
+  "value": {
+    "eventType": "login",
+    "timestamp": "2023-11-19T12:34:56Z"
+  },
+  "headers": {
+    "traceId": "abcd-1234"
+  }
+}
  1. From the Producer: If CreateTime is used, the producer sets the timestamp when sending the message.
  2. From the Broker: If LogAppendTime is used, the broker overwrites the timestamp with the time it appends the message to the partition.

File: /Users/Q187392/dev/s/private/vimwiki/dev/kafka.md

+1734591704184 1686497988937

how does map work in Rust Result context?

    env::var("HOME")
+        .map(|home| format!("{}/xxx/rs-cg", home))
+        .unwrap_or_else(|_| "/tmp/xxx/rs-cg".to_string())

File: /Users/Q187392/dev/s/private/vimwiki/dev/rust-concepts.md

+1734591704413 1686497988937

Explain derived traits for structs:

use derive_builder::Builder;
+use serde::{Serialize, Deserialize};
+use getset::{Getters, Setters};
+use derive_more::{Display, From};
+use std::fmt;
+
+#[derive(
+    Debug,          // Allows `{:?}` formatting for debugging.
+    Clone,          // Enables `.clone()` for creating a duplicate.
+    PartialEq, Eq,  // Enables `==` and `!=` for equality comparison.
+    PartialOrd, Ord, // Enables `<`, `>`, `<=`, `>=` for ordering.
+    Hash,           // Allows hashing for use in `HashMap` or `HashSet`.
+    Default,        // Provides a default value with `T::default()`.
+    Serialize,      // Enables serialization to formats like JSON, YAML, etc.
+    Deserialize,    // Enables deserialization from those formats.
+    Builder,        // Generates a builder for the struct.
+    Getters,        // Creates getter methods for struct fields.
+    Setters,        // Creates setter methods for struct fields.
+    Display,        // Enables user-facing string representation.
+    From            // Enables conversion from tuple or compatible structs.
+)]
+pub struct Person {
+    #[getset(get = "pub", set = "pub")]
+    name: String,
+    #[getset(get = "pub", set = "pub")]
+    age: u8,
+    #[getset(get = "pub", set = "pub")]
+    email: String,
+}
+
+fn main() {
+    // Default instance
+    let default_person = Person::default();
+    println!("{:?}", default_person);
+
+    // Using the builder
+    let builder_person = PersonBuilder::default()
+        .name("Alice".to_string())
+        .age(30)
+        .email("alice@example.com".to_string())
+        .build()
+        .unwrap();
+    println!("{}", builder_person); // Uses Display trait
+
+    // Using Clone
+    let cloned_person = builder_person.clone();
+    assert_eq!(builder_person, cloned_person); // Uses PartialEq
+}

Generated Capabilities

  1. Default Instance:

    • Person::default() creates an instance with default field values (e.g., empty strings, 0 for integers).
  2. Builder Pattern:

    • Construct instances fluently:
      PersonBuilder::default()
      +    .name("Alice".to_string())
      +    .age(30)
      +    .email("alice@example.com".to_string())
      +    .build()
      +    .unwrap();
  3. Serialization/Deserialization:

    • Convert the struct to/from formats like JSON or YAML:
      let json = serde_json::to_string(&person).unwrap();
      +let deserialized: Person = serde_json::from_str(&json).unwrap();
  4. Equality and Ordering:

    • Compare structs:
      assert_eq!(person1, person2);
      +let ordered = vec![person1, person2].sort();
  5. Getters and Setters:

    • Access or modify fields:
      println!("{}", person.name());
      +person.set_name("Bob".to_string());
  6. Display Formatting:

    • Use user-friendly string representations:
      println!("{}", person); // Customizable with `Display`

File: /Users/Q187392/dev/s/private/vimwiki/dev/rust/core/annotations.md

+1734591704479 1686497988937

Explaing logging with tracing:

1. Add Dependencies

set RUST_LOG ! +Ensure tracing and tracing-subscriber are added to your Cargo.toml:

[dependencies]
+tracing = "0.1"
+tracing-subscriber = "0.3"

2. Setup Tracing Subscriber

Initialize the tracing-subscriber with a formatter that includes span information:

use tracing_subscriber::fmt;
+
+fn main() {
+    // Initialize a subscriber with span context enabled
+    tracing_subscriber::fmt()
+        .with_env_filter("info") // Set log level
+        .with_target(true)       // Include module path
+        .with_thread_names(true) // Include thread names (optional)
+        .init();
+
+    example_function();
+}

3. Use #[instrument] to Capture Method Names

Annotate functions with the #[instrument] macro to capture their name in the logs:

use tracing::{info, instrument};
+
+#[instrument]
+fn example_function() {
+    info!("This is a log message from the method.");
+}

Output:

INFO tracing_example::example_function: This is a log message from the method.

4. Log Method Arguments

You can also include method arguments in the logs by leveraging the #[instrument] macro. It automatically logs the values of the arguments:

use tracing::{info, instrument};
+
+#[instrument]
+fn calculate_sum(a: i32, b: i32) {
+    let result = a + b;
+    info!("Sum calculated: {}", result);
+}
+
+fn main() {
+    tracing_subscriber::fmt().with_env_filter("info").init();
+    calculate_sum(5, 7);
+}

Output:

INFO tracing_example::calculate_sum{a=5, b=7}: Sum calculated: 12

5. Customizing the Subscriber

You can customize the format to explicitly include the span information in your log output:

use tracing_subscriber::fmt::format::FmtSpan;
+
+fn main() {
+    tracing_subscriber::fmt()
+        .with_env_filter("info")
+        .with_span_events(FmtSpan::ACTIVE) // Log span entry/exit
+        .init();
+}

6. Manually Adding Spans (Optional)

If you want finer control without the #[instrument] macro, you can manually create spans using tracing::span!:

use tracing::{info, span, Level};
+
+fn main() {
+    tracing_subscriber::fmt().init();
+
+    let span = span!(Level::INFO, "custom_span", method = "main_function");
+    let _enter = span.enter(); // Enter the span
+    info!("Logging within the custom span");
+}

Output:

INFO custom_span{method=main_function}: Logging within the custom span

Summary

File: /Users/Q187392/dev/s/private/vimwiki/dev/rust/core/logging.md

+1734591766331 1686497988937

Popular crates for struct traits:

In addition to the built-in traits external crates provide procedural macros to derive commonly needed traits for specific use cases.

Popular External Derived Struct Traits

Crate Trait Purpose
derive_builder Builder Generates a builder pattern for constructing complex structs.
serde Serialize, Deserialize Provides (de)serialization support for structs and enums to/from formats like JSON or YAML.
thiserror Error Simplifies error handling by deriving implementations of the std::error::Error trait.
num-derive FromPrimitive, ToPrimitive Derives conversions between enums and primitive types.
strum EnumString, Display, AsRefStr, EnumIter Enhances enums with string conversions, iteration, and more.
getset Getters, Setters Auto-generates getter and setter methods for struct fields.
async-trait async_trait Allows traits to contain async functions, resolving lifetime and complexity issues.
derive_more From, Into, Display, etc. Provides convenient derives for common conversion and formatting traits.
enum-as-inner EnumAsInner Provides safe accessors for enums with single-value variants.
bitflags BitFlags Easily define and manipulate bitflags.
smart-default SmartDefault Extends Default with custom defaults for individual fields.

1. derive_builder - Builder Pattern

Used to generate a builder for constructing complex structs.

use derive_builder::Builder;
+
+#[derive(Builder, Debug)]
+struct Config {
+    host: String,
+    port: u16,
+    use_tls: bool,
+}
+
+fn main() {
+    let config = ConfigBuilder::default()
+        .host("localhost".to_string())
+        .port(8080)
+        .use_tls(true)
+        .build()
+        .unwrap();
+
+    println!("{:?}", config); // Config { host: "localhost", port: 8080, use_tls: true }
+}

2. serde - Serialization/Deserialization

Converts structs or enums to/from formats like JSON or YAML.

use serde::{Serialize, Deserialize};
+
+#[derive(Serialize, Deserialize, Debug)]
+struct User {
+    id: u64,
+    name: String,
+}
+
+fn main() {
+    let user = User { id: 1, name: "Alice".to_string() };
+    let json = serde_json::to_string(&user).unwrap();
+    println!("{}", json); // {"id":1,"name":"Alice"}
+
+    let deserialized: User = serde_json::from_str(&json).unwrap();
+    println!("{:?}", deserialized); // User { id: 1, name: "Alice" }
+}

3. thiserror - Error Handling

Simplifies creating custom error types.

use thiserror::Error;
+
+#[derive(Error, Debug)]
+enum MyError {
+    #[error("Invalid input: {0}")]
+    InvalidInput(String),
+    #[error("Database error")]
+    DatabaseError,
+}
+
+fn main() {
+    let err = MyError::InvalidInput("missing field".to_string());
+    println!("{}", err); // Invalid input: missing field
+}

4. strum - Enum Enhancements

Adds utilities for enums, such as string conversions or iteration.

use strum_macros::{EnumString, Display, EnumIter};
+
+#[derive(EnumString, Display, EnumIter, Debug)]
+enum Color {
+    #[strum(serialize = "red")]
+    Red,
+    #[strum(serialize = "green")]
+    Green,
+    #[strum(serialize = "blue")]
+    Blue,
+}
+
+fn main() {
+    let color: Color = "red".parse().unwrap();
+    println!("{}", color); // Red
+}

5. getset - Getters and Setters

Auto-generates getters and setters for fields.

use getset::{Getters, Setters};
+
+#[derive(Getters, Setters, Debug)]
+struct Person {
+    #[getset(get = "pub", set = "pub")]
+    name: String,
+    #[getset(get = "pub")]
+    age: u8,
+}
+
+fn main() {
+    let mut person = Person { name: "Alice".to_string(), age: 30 };
+    person.set_name("Bob".to_string());
+    println!("{}", person.name()); // Bob
+}

File: /Users/Q187392/dev/s/private/vimwiki/dev/rust/core/annotations.md

+1734591766497 1686497988937

Most common traits of stdlib?

1. Core Traits

Trait Purpose Key Methods
Clone For creating duplicate values. clone()
Copy For simple, bitwise copyable types (e.g., integers, floats). Implicit (=)
Default For creating a default value for a type. default()
Debug For formatting values for debugging. fmt() (used with {:?})
PartialEq/Eq For comparing values for equality. ==, !=
PartialOrd/Ord For ordering and comparisons. <, >, cmp()

2. Iterator and Collection Traits

Trait Purpose Key Methods
Iterator For iterating over a sequence of items. next(), map(), filter()
IntoIterator Converts a type into an Iterator. into_iter()
Extend Extends a collection by adding items from an Iterator. extend()
FromIterator Creates a collection from an Iterator. from_iter()

3. Conversion and Formatting Traits

Trait Purpose Key Methods
From Converts one type into another. from()
Into Converts a type into another type. into()
AsRef/AsMut Provides references to types (&T or &mut T). as_ref(), as_mut()
ToString Converts a type into a String. to_string()
Display Formats a value for user-facing output. fmt() (used with {})

From

pub trait From<T>: Sized {
+    fn from(value: T) -> Self;
+}
+
+let s = String::from("hello"); // &str → String
+let n = i32::from(42u8);       // u8 → i32

From and Into relationship: They’re directly linked:

impl<T, U> Into<U> for T
+where
+    U: From<T>,
+{
+    fn into(self) -> U {
+        U::from(self)
+    }
+}

So if you implement From<T> for U, you automatically get Into<U> for free.

Use From when:

Examples:

AsRef

pub trait AsRef<T: ?Sized> {
+    fn as_ref(&self) -> &T;
+}
+
+let s = String::from("hello");
+let r: &str = s.as_ref(); // &String -> &str

You rarely see AsRef in callsites because:

It’s used in function bounds, not in code bodies.

Library authors use it in generic APIs to accept multiple input types. +Callers just pass whatever type they already have.

fn read_file<P: AsRef<std::path::Path>>(path: P) {
+    let path_ref: &std::path::Path = path.as_ref();
+    // ...
+}

Caller side:

read_file("config.yaml");        // &str
+read_file(String::from("a.txt")); // String
+read_file(Path::new("b.txt"));    // &Path

You don’t write as_ref() manually — the compiler infers it via generics.

Where you do see it

impl File {
+    pub fn open<P: AsRef<Path>>(path: P) -> io::Result<File> {
+        // internally: path.as_ref()
+    }
+}

Callers can use any type that can reference a Path — that’s the power of AsRef.

Use AsRef when you write APIs that take “anything that can be referenced as …”, e.g.:

fn print_uppercase<S: AsRef<str>>(input: S) {
+    println!("{}", input.as_ref().to_uppercase());
+}

You can now call:

print_uppercase("hello");
+print_uppercase(String::from("world"));
Why you rarely see it Explanation
It’s used in trait bounds, not directly in calls Most developers are callers, not implementors
It enables flexible APIs to accept multiple reference types Common in library design
It’s often implicit — no need to call .as_ref() yourself Rust’s type inference handles it

AsMut is the mutable companion to AsRef.

pub trait AsMut<T: ?Sized> {
+    fn as_mut(&mut self) -> &mut T;
+}
// Example: generic API that can mutate through many wrappers
+fn zero_out<U>(mut x: U)
+where
+    U: AsMut<u32>,
+{
+    *x.as_mut() = 0;
+}
+
+fn main() {
+    let mut a: u32 = 5;
+    zero_out(&mut a);            // &mut T implements AsMut<T>
+    let mut b = Box::new(7u32);
+    zero_out(&mut b);            // Box<u32> → &mut u32
+}

Pitfall:

4. Error Handling Traits

Trait Purpose Key Methods
Error Represents error types with optional descriptions or sources. description(), source()
Result Common enum for error handling (Ok or Err). unwrap(), expect(), map()
Option Represents an optional value (Some or None). unwrap(), map(), is_some()

5. Functional Programming Traits

Trait Purpose Key Methods
Fn, FnMut, FnOnce For closures and callable objects. call(), call_mut(), call_once()

6. Smart Pointer and Ownership Traits

Trait Purpose Key Methods
Deref Overloads the dereference operator (*). deref()
Drop Custom cleanup logic when a value goes out of scope. drop() (called automatically)
Borrow/BorrowMut Provides immutable/mutable borrowing of values. borrow(), borrow_mut()

7. Marker Traits

Trait Purpose Key Notes
Sized Indicates types with a known size at compile-time (automatically applied). All types are Sized by default.
Send Allows types to be transferred between threads. Needed for thread safety.
Sync Allows shared references to be shared between threads. Needed for concurrency.

File: /Users/Q187392/dev/s/private/vimwiki/dev/rust/core/traits.md

+1734944783891 1686497988937

Explain Smart Pointers:

Why Vec<T> is a smart pointer

A Vec<T> is implemented as a struct that contains:

So the Vec value itself lives on the stack, but it points to elements stored on the heap.

When the Vec is dropped:

That automatic heap memory management makes it a smart pointer.

Compare with String

Example:

fn main() {
+    let mut v = Vec::new();
+    v.push(10);
+    v.push(20);
+
+    println!("Length: {}", v.len());   // metadata (len)
+    println!("First: {}", v[0]);       // dereference into heap memory
+}

Vec<T> is a smart pointer**, but also a collection type. +It owns heap memory, manages it automatically, and provides higher-level APIs on top.

🔍 Rust’s Deref Trait in Smart Pointers

trait Deref {
+    type Target: ?Sized;  // Target can be Sized (like `String`) or unsized (like `[u8]`)
+    fn deref(&self) -> &Self::Target;
+}
+let x: Box<T> = Box::new(...);
+let y: &T = &*x;
+let y: &T = &x;  // auto-deref happens here

Rust automatically calls deref() behind the scenes, yielding &T.

🧠 Why Is This Important?

let s = Rc::new(String::from("hello"));
+println!("{}", s.len());  // s is auto-dereferenced to &String

1. Transparent Access to Underlying Data

Deref defines how smart pointer behaves when * operator is used.

let x = 5;
+let y = Box::new(x);
+
+assert_eq!(5, *y); // works because Box<T> implements Deref<Target=T>
+
+*(y.deref())  // y.deref() -> &i32, then *&i32 -> i32

So *y is really:

*(y.deref())  // y.deref() -> &i32, then *&i32 -> i32

Confusion comes from how Rust automatically applies Deref coercions in certain contexts +(like method calls). But the raw * operator itself is precise:

fn main() {
+    let x = 5;
+    let y = Box::new(x);
+
+    // *y calls Deref -> &i32 -> i32
+    assert_eq!(5, *y);
+
+    // &y is &Box<i32>
+    let r: &Box<i32> = &y;
+
+    // *r removes the & -> gives back the Box<i32>
+    let b: Box<i32> = *r; // moves it out!
+    assert_eq!(5, *b);
+}

2. Polymorphism with Regular References

functions take references (&T) as arguments. For smart pointers to work, they must be convertible to regular references. The Deref trait enables this by defining how the smart pointer can be dereferenced to a &T.

fn greet(name: &str) {
+    println!("Hello, {}!", name);
+}
+
+let my_name = Box::new(String::from("Alice"));
+greet(&my_name); // Deref coercion converts `&Box<String>` to `&String` and then to `&str`.

This is known as Deref coercion, where the compiler automatically calls the Deref implementation to transform a smart pointer into a compatible reference.

3. Customizing Dereference Behavior

By implementing the Deref trait, you can define custom dereference behavior for your own types to seamlessly expose their inner values.

use std::ops::Deref;
+
+struct MyBox<T>(T);
+
+impl<T> MyBox<T> {
+    fn new(value: T) -> MyBox<T> {
+        MyBox(value)
+    }
+}
+
+impl<T> Deref for MyBox<T> {
+    type Target = T;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0 // Returns a reference to the inner value
+    }
+}
+
+let my_box = MyBox::new(42);
+assert_eq!(42, *my_box); // Deref trait enables dereferencing to the inner value

Treating a Type Like a Reference by Implementing the Deref Trait:*sp is replaced with *(sp.deref()):

Why Use Smart Pointers?

  1. Memory Safety: Prevent memory leaks, dangling pointers, and double frees.
  2. Ownership Management: Facilitate single or shared ownership of data.
  3. Abstractions: Provide ergonomic APIs for resource management (e.g., dynamic allocation, reference counting).

Key Smart Pointer Types in Rust

Smart Pointer Purpose Key Features Example Use Cases
Box<T> Allocates data on the heap. Single ownership; Deref to value. Large structs, recursive types.
Rc<T> Reference-counted pointer for shared ownership. Immutable shared ownership. Shared immutable data (e.g., graphs, trees).
Arc<T> Like Rc<T>, but thread-safe (atomic reference counting). Thread-safe shared RO ownership. Shared data across threads in concurrent code.
RefCell<T> Allows mutable borrows checked at runtime. Interior mutability; not thread-safe. Mutating data in shared ownership contexts.
Cell<T> Similar to RefCell, but for Copy types with no borrows. Interior mutability for Copy types. Mutating small values like integers.
Mutex<T> Provides mutual exclusion for data in multithreaded code. Thread-safe interior mutability. Protecting data shared across threads.
RwLock<T> A read-write lock for multithreaded code. Multiple readers or a single writer. High-performance shared mutable state.
Cow<T> A clone-on-write pointer. Avoids cloning unless necessary. Efficient handling of borrowed or owned data.
Weak<T> non-owning reference for use with Rc or Arc. does not add to reference count, no owning. Prevents circular refs in ref-counted structures trees/graphs.

1. Box<T> - Heap Allocation

Box is used to allocate data on the heap, providing ownership and a stable address.

fn main() {
+    let b = Box::new(42); // Allocate on the heap
+    println!("Boxed value: {}", b);
+
+    // Useful for recursive types:
+    enum List {
+        Cons(i32, Box<List>),
+        Nil,
+    }
+
+    let _list = List::Cons(1, Box::new(List::Cons(2, Box::new(List::Nil))));
+}

2. Rc<T> - Reference Counting

Rc enables multiple owners for the same data, with immutable access.

use std::rc::Rc;
+
+fn main() {
+    let data = Rc::new("Hello, Rc!".to_string());
+
+    let a = Rc::clone(&data); // Clone the reference (not the data).
+    let b = Rc::clone(&data);
+
+    println!("Reference count: {}", Rc::strong_count(&data)); // 3
+    println!("{}, {}", a, b);
+}

3. Arc<T> - Thread-Safe Reference Counting

Arc is like Rc, but for concurrent scenarios.

use std::sync::Arc;
+use std::thread;
+
+fn main() {
+    let data = Arc::new("Hello, Arc!".to_string());
+
+    let handles: Vec<_> = (0..3)
+        .map(|_| {
+            let data = Arc::clone(&data);
+            thread::spawn(move || println!("{}", data))
+        })
+        .collect();
+
+    for handle in handles {
+        handle.join().unwrap();
+    }
+}

4. RefCell<T> - Interior Mutability

RefCell allows mutable borrowing even if the RefCell itself is immutable.

use std::cell::RefCell;
+
+fn main() {
+    let data = RefCell::new(42);
+
+    *data.borrow_mut() += 1; // Runtime-checked mutable borrow
+    println!("Updated value: {}", *data.borrow());
+}

5. Mutex<T> - Mutual Exclusion

Mutex ensures exclusive access to data in multithreaded scenarios.

use std::sync::Mutex;
+
+fn main() {
+    let data = Mutex::new(42);
+
+    {
+        let mut locked = data.lock().unwrap();
+        *locked += 1;
+    }
+
+    println!("Updated value: {}", *data.lock().unwrap());
+}

6. RwLock<T> - Read-Write Lock

RwLock allows multiple readers or one writer.

use std::sync::RwLock;
+
+fn main() {
+    let data = RwLock::new(42);
+
+    {
+        let read1 = data.read().unwrap();
+        let read2 = data.read().unwrap();
+        println!("Readers: {}, {}", read1, read2);
+    }
+
+    {
+        let mut write = data.write().unwrap();
+        *write += 1;
+    }
+
+    println!("Updated value: {}", *data.read().unwrap());
+}

7. Cow<T> - Clone-on-Write

Problem: Avoid Unnecessary Copies

Cow means “clone-on-write” — use a borrowed reference by default, but automatically “upgrade” +to owned data when you must modify it.

// Without `Cow`
+fn to_upper_always_owned(s: &str) -> String {
+    s.to_uppercase() // allocates every time
+}

This is wasteful if the string is already uppercase.

We can borrow when possible, clone only if necessary.

// With `Cow`
+use std::borrow::Cow;
+
+fn ensure_uppercase<'a>(input: &'a str) -> Cow<'a, str> {
+    if input.chars().all(|c| !c.is_lowercase()) {
+        // already uppercase — no allocation
+        Cow::Borrowed(input)
+    } else {
+        // needs change — allocate a new String
+        Cow::Owned(input.to_uppercase())
+    }
+}
+
+fn main() {
+    let s1 = "HELLO";
+    let s2 = "Hello";
+
+    let r1 = ensure_uppercase(s1);
+    let r2 = ensure_uppercase(s2);
+
+    println!("r1 = {}", r1); // Borrowed(&str)
+    println!("r2 = {}", r2); // Owned(String)
+}
Input Result type Allocates? Result value
"HELLO" Cow::Borrowed(&str) No "HELLO"
"Hello" Cow::Owned(String) Yes "HELLO"

Cow lets you return borrowed data when no change is needed, avoiding a clone — but owned +data when modification is necessary.

Where this is useful:

**8. Weak<T>

Weak<T> is a non-owning handle to data in an Rc<T> (or Arc<T>). +It’s used to break reference cycles and observe shared data without keeping it alive.

Background: Rc<T> creates shared ownership

Rc<T> = “reference-counted” pointer.

use std::rc::Rc;
+
+let a = Rc::new(5);
+let b = Rc::clone(&a);
+
+println!("{}", Rc::strong_count(&a)); // 2

The problem — reference cycles:

a → b → a

Neither’s count ever reaches zero → memory leak.

The solutionWeak<T>

Weak<T> is a non-owning reference to data managed by an Rc<T>.

// Example: Preventing a cycle
+use std::rc::{Rc, Weak};
+use std::cell::RefCell;
+
+struct Node {
+    value: i32,
+    parent: RefCell<Weak<Node>>,      // weak reference to parent
+    children: RefCell<Vec<Rc<Node>>>, // strong references to children
+}
+
+fn main() {
+    let parent = Rc::new(Node {
+        value: 1,
+        parent: RefCell::new(Weak::new()),
+        children: RefCell::new(Vec::new()),
+    });
+
+    let child = Rc::new(Node {
+        value: 2,
+        parent: RefCell::new(Rc::downgrade(&parent)), // weak ref to parent
+        children: RefCell::new(Vec::new()),
+    });
+
+    parent.children.borrow_mut().push(Rc::clone(&child));
+
+    println!(
+        "strong = {}, weak = {}",
+        Rc::strong_count(&parent),
+        Rc::weak_count(&parent)
+    );
+
+    // Try to upgrade the weak reference
+    if let Some(parent_rc) = child.parent.borrow().upgrade() {
+        println!("Parent value = {}", parent_rc.value);
+    } // upgrade() returns None if parent was dropped
+}
Count type Incremented by Keeps data alive? Example
Strong (Rc::strong_count) Rc::clone() Yes child holds parent strongly
Weak (Rc::weak_count) Rc::downgrade() No parent holds child weakly

Use Weak when you want to:

Key Differences

Smart Pointer Heap Allocation Shared Ownership Thread Safety Interior Mutability
Box<T> ? ? ? ?
Rc<T> ? ? ? ?
Arc<T> ? ? ? ?
RefCell<T> ? ? ? ? (runtime checked)
Mutex<T> ? ? ? ? (runtime checked)

When to Use What

File: /Users/Q187392/dev/s/private/vimwiki/dev/rust-reference.md

+1735811879776 1686497988937

why do I need the "&" operator on m. It is a smart pointer which behaves already as a reference, isn't it?

fn hello(string: &str) { }
+
+let m = Box::new(String::from("Rust"));
+hello(&m);

Key Distinction: Smart Pointers vs References

Even though Box<T> implements the Deref trait to dereference into T, it is still an owned type, not a reference. +Deref coercion works on references to the smart pointer, not the smart pointer itself.

Why &m is Necessary

The function hello expects a &str. However:

  1. m is of type Box<String>, which means it owns the String.
  2. To call hello, Rust needs a &str. The compiler must:
    • Borrow m as &Box<String> (using the & operator).
    • Apply deref coercion on &Box<String> to get &String.
    • Apply slicing on the String to get &str.

Without the &, there's no reference for deref coercion to work with. Smart pointers like Box don't automatically behave as references when passed to functions.

File: /Users/Q187392/dev/s/private/vimwiki/dev/rust-reference.md

+1735811880108 1686497988937

Exlain the Law of Demeter

states that a given object should only interact with its:

  1. Immediate collaborators (its own direct dependencies).
  2. Objects it creates.
  3. Objects passed to it as arguments.

Violation

public class CustomerService {
+    public String getCustomerAddress(Order order) {
+        return order.getCustomer().getAddress().toString(); // Chaining calls: Order -> Customer -> Address
+    }
+}

Adhering to the Law of Demeter

public class Order {
+    public String getCustomerAddress() {
+        return customer.getAddress().toString(); // Delegation
+    }
+}
+
+public class CustomerService {
+    public String getCustomerAddress(Order order) {
+        return order.getCustomerAddress(); // Only interacts with Order
+    }
+}
  1. Reduced Coupling: Minimizes dependencies between classes, making the system easier to understand and maintain.
  2. Increased Encapsulation: Protects the internal structure of objects.
  3. Improved Maintainability: Changes to one class have minimal impact on others.
  4. Enhanced Testability: Reduces the need to mock or simulate deep object graphs.

When to Relax the Rule


File: /Users/Q187392/dev/s/private/vimwiki/dev/solid.md

+1736449127090 1686497988937

Explain @Jacksonized:

File: /Users/Q187392/dev/s/private/vimwiki/help/java/lombok.md

+1736584024689 1686497988937

In OpenAPI, why is using a base schema with discriminator preferred over anyOf for handling polymorphic types?

anyOf Approach (Less Preferred)

items:
+  anyOf:
+    - $ref: "#/components/schemas/TypeA"
+    - $ref: "#/components/schemas/TypeB"
+
+components:
+  schemas:
+    TypeA:
+      type: object
+      properties:
+        fieldA:
+          type: string
+
+    TypeB:
+      type: object
+      properties:
+        fieldB:
+          type: integer

🔴 Downside: Harder to determine object type, requires additional validation logic.

✅ Base Schema with Discriminator and Mapping (Preferred)

items:
+  $ref: "#/components/schemas/BaseItem"
+
+components:
+  schemas:
+    BaseItem:
+      type: object
+      required: [type]
+      properties:
+        type:
+          type: string
+          enum: [TypeA, TypeB]
+      discriminator:
+        propertyName: type
+        mapping:
+          TypeA: "#/components/schemas/TypeA"
+          TypeB: "#/components/schemas/TypeB"
+
+    TypeA:
+      allOf:  // commonly used when extending a base schema
+        - $ref: "#/components/schemas/BaseItem"
+        - type: object
+          properties:
+            fieldA:
+              type: string
+
+    TypeB:
+      allOf:
+        - $ref: "#/components/schemas/BaseItem"
+        - type: object
+          properties:
+            fieldB:
+              type: integer

Advantages:

Example

Open http://localhost:8080/swagger-ui.html to interact with the API.

// Base Interface & Subclasses
+package com.example.openapi.model;
+
+import com.fasterxml.jackson.annotation.*;
+import io.swagger.v3.oas.annotations.media.DiscriminatorMapping;
+import io.swagger.v3.oas.annotations.media.Schema;
+
+@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type")
+@JsonSubTypes({
+    @JsonSubTypes.Type(value = TypeA.class, name = "TypeA"),
+    @JsonSubTypes.Type(value = TypeB.class, name = "TypeB")
+})
+@Schema(
+    description = "BaseItem",
+    discriminatorProperty = "type",
+    oneOf = {TypeA.class, TypeB.class},
+    discriminatorMapping = {
+        @DiscriminatorMapping(schema = TypeA.class, value = "TypeA"),
+        @DiscriminatorMapping(schema = TypeB.class, value = "TypeB")
+    }
+)
+public abstract class BaseItem {
+    @Schema(description = "Type of item", required = true, example = "TypeA")
+    public String type;
+}
+
+@Schema(description = "TypeA extends BaseItem")
+class TypeA extends BaseItem {
+    @Schema(description = "Field specific to TypeA", example = "some text")
+    public String fieldA;
+}
+
+@Schema(description = "TypeB extends BaseItem")
+class TypeB extends BaseItem {
+    @Schema(description = "Field specific to TypeB", example = "42")
+    public int fieldB;
+}
+
+// REST Controller to Handle OpenAPI Requests
+package com.example.openapi.controller;
+
+import com.example.openapi.model.BaseItem;
+import io.swagger.v3.oas.annotations.Operation;
+import io.swagger.v3.oas.annotations.tags.Tag;
+import org.springframework.http.ResponseEntity;
+import org.springframework.web.bind.annotation.*;
+
+import java.util.List;
+
+@RestController
+@RequestMapping("/api/items")
+@Tag(name = "Items", description = "API for handling polymorphic OpenAPI requests")
+public class ItemController {
+
+    @PostMapping
+    @Operation(summary = "Create an item", description = "Accepts a polymorphic item with discriminator-based deserialization")
+    public ResponseEntity<BaseItem> createItem(@RequestBody BaseItem item) {
+        return ResponseEntity.ok(item);
+    }
+
+    @GetMapping
+    @Operation(summary = "Get sample items", description = "Returns a list of different item types")
+    public ResponseEntity<List<BaseItem>> getItems() {
+        // Example response
+        return ResponseEntity.ok(List.of(
+                new TypeA() {{ type = "TypeA"; fieldA = "Example A"; }},
+                new TypeB() {{ type = "TypeB"; fieldB = 123; }}
+        ));
+    }
+}

File: /Users/Q187392/dev/s/private/vimwiki/dev/json_schema.md

+1737182757446 1686497988937

How to find when a term/bug was introduced?

# -S 'search-term': (also called "pickaxe") filters commits to those that introduce or remove the given term.
+git log -S 'search-term'

File: /Users/Q187392/dev/s/private/vimwiki/help/git.md

+1737791849637 1686497988937

Explain Send trait:

  1. Ownership Transfer Across Threads:

    • A Send type can be moved from one thread to another. For example, a Send type stored in anstd::thread::spawn closure will be safely transferred to the new thread.
  2. Automatically Implemented:

    • Most types in Rust are Send by default if they don’t contain non-Send types.
    • For instance, primitive types like i32, f64, and thread-safe smart pointers like Arc andBox are all Send.
  3. Non-Send Types:

    • A type will not implement Send if it contains data that is inherently not thread-safe, like:
      • Rc<T>: A reference-counted smart pointer that isn’t thread-safe, transferring it to +another thread would lead to race conditions.
      • *const T / *mut T: Raw pointers, because they can lead to undefined behavior if accessed +across threads without synchronization.
    • These types must be wrapped in thread-safe abstractions like Arc or Mutex to be sent +between threads.
  4. Zero-Cost Abstraction:

    • The Send trait is purely a marker trait. It has no runtime overhead or extra functionality — +it's used by the compiler to enforce safety guarantees at compile time.

Reasons Why a Type Might Not Be Send

A type is not Send when it contains non-thread-safe data or enforces single-threaded usage.

  1. Shared Mutable State: Types that allow shared access to mutable data without proper +synchronization are not Send.
  2. Non-Thread-Safe API: Types that are inherently tied to the thread in which they were +created, such as thread-local storage or types bound to OS-specific resources, may not beSend.

Example std::cell::RefCell:

use std::cell::RefCell;
+use std::thread;
+
+fn main() {
+    let data = RefCell::new(42);
+
+    let handle = thread::spawn(move || {
+        // This would cause a compile error because RefCell is not Send
+        let mut value = data.borrow_mut();
+        *value += 1;
+    });
+
+    handle.join().unwrap();
+}

File: /Users/Q187392/dev/s/private/vimwiki/dev/rust/core/concurrency.md

+1739771956551 1686497988937

What are "object-safe" traits?

  1. Trait objects are fat pointers - they contain both a data pointer and a vtable pointer
  2. Vtables have fixed layouts - the compiler can determine method offsets statically
  3. Object safety prevents vtable chaos - no generics means predictable vtable structure
  4. Runtime dispatch works - the same vtable layout works for all implementing types

The vtable makes trait objects work - it's a compile-time-generated jump table that enables +runtime polymorphism without the overhead of type checking at every method call.

The vtable contains all the information needed to work with the concrete type at +runtime, even though the compile-time type information has been "erased" by the trait object.

Trait Object: Box<dyn Draw>
+┌─────────────────┐
+│  data_ptr   ────┼──┐
+├─────────────────┤  │
+│  vtable_ptr ────┼──┼──┐
+└─────────────────┘  │  │
+                     │  │
+                     │  │
+┌────────────────────┘  │
+│                       │
+▼ ACTUAL DATA           │
+┌─────────────────┐     │
+│   Circle {      │     │
+│     radius: 5   │     │
+│   }             │     │
+└─────────────────┘     │
+                        │
+┌───────────────────────┘
+│
+▼ VTABLE (Static, Non-Generic)
+┌──────────────────────────┐
+│ Drop function ptr        │ ← Compiler-generated
+├──────────────────────────┤
+│ Size of concrete type    │ ← Circle: 8 bytes
+├──────────────────────────┤
+│ Alignment requirements   │ ← Circle: 8 byte align
+├──────────────────────────┤
+│ draw() method ptr   ────┼──┐
+└──────────────────────────┘  │
+                              │
+┌─────────────────────────────┘
+│
+▼ CONCRETE IMPLEMENTATION
+fn circle_draw(self: &Circle) {
+    println!("Drawing circle with radius {}", self.radius);
+}
+
+MULTIPLE CONCRETE TYPES, SAME VTABLE LAYOUT
+============================================
+
+Circle vtable:          Rectangle vtable:       Triangle vtable:
+┌─────────────┐        ┌─────────────┐         ┌─────────────┐
+│ drop_circle │        │ drop_rect   │         │ drop_tri    │
+├─────────────┤        ├─────────────┤         ├─────────────┤
+│ size: 8     │        │ size: 16    │         │ size: 24    │
+├─────────────┤        ├─────────────┤         ├─────────────┤
+│ align: 8    │        │ align: 8    │         │ align: 8    │
+├─────────────┤        ├─────────────┤         ├─────────────┤
+│ circle_draw │        │ rect_draw   │         │ tri_draw    │
+├─────────────┤        ├─────────────┤         ├─────────────┤
+│ circle_area │        │ rect_area   │         │ tri_area    │
+└─────────────┘        └─────────────┘         └─────────────┘
+
+
+MEMORY SIZE BREAKDOWN:
+======================
+
+struct Circle {
+    radius: f64,    // 8 bytes
+}
+Total: 8 bytes
+
+struct Rectangle {
+    width: f64,     // 8 bytes
+    height: f64,    // 8 bytes
+}
+Total: 16 bytes
+
+struct Triangle {
+    a: f64,         // 8 bytes
+    b: f64,         // 8 bytes
+    c: f64,         // 8 bytes
+}
+Total: 24 bytes
+     ▲                      ▲                      ▲
+     │                      │                      │
+     └──────────────────────┼──────────────────────┘
+                            │
+                    SAME LAYOUT STRUCTURE
+                    (different function ptrs)
+
+struct Circle {
+    radius: f64,     // 8 bytes
+    color: u32,      // 4 bytes
+    // + 4 bytes padding for alignment
+}
+
+TRAIT OBJECT CREATION & DISPATCH
+=================================
+
+1. COMPILE TIME:
+   let circle = Circle { radius: 5 };
+   let shape: Box<dyn Draw> = Box::new(circle);
+
+   Compiler generates vtable for Circle + Draw:
+   ┌──────────────────┐
+   │ Circle's vtable  │ ← Created once per type
+   └──────────────────┘
+
+2. RUNTIME:
+   shape.draw();
+
+   Assembly-like pseudocode:
+   mov rax, [shape + 8]     ; Load vtable_ptr
+   mov rbx, [rax + 24]      ; Load draw() from vtable[3]
+   mov rdi, [shape]         ; Load data_ptr as &self
+   call rbx                 ; Call the function

To understand why certain traits are "object-safe" in Rust, and what that means, you need to understand howtrait objects work under the hood.

What is a Trait Object?

A trait object is a value of a type like &dyn Trait or Box<dyn Trait> — it allows dynamic dispatch of +methods via a vtable (virtual method table), rather than through compile-time monomorphization like with generics.

fn process(shape: &dyn Shape) {
+    shape.draw(); // dynamic dispatch
+}

But not all traits can be turned into trait objects — only object-safe traits can.

Object-Safe Trait Requirements

  1. Methods do not return Self.
  2. Methods do not use generic type parameters.

❌ Why return Self is not object-safe

trait Cloneable {
+    fn clone(&self) -> Self;
+}

🔧 Solution: use Box<dyn Trait> if you want to return trait objects:

trait Cloneable {
+    fn clone_box(&self) -> Box<dyn Cloneable>;
+}
+// associated implementation
+impl<T> Cloneable for T
+where
+    T: 'static + Clone + Cloneable,
+{
+    fn clone_box(&self) -> Box<dyn Cloneable> {
+        Box::new(self.clone())
+    }
+}

Now it's object-safe because the return type is not Self, and is dynamically dispatchable.

❌ Why generic methods are not object-safe

trait Saver {
+    fn save<T: Serialize>(&self, data: &T);
+}

🔧 Solution: move the generic type out of the method:

trait Saver {
+    fn save(&self, data: &dyn Serialize); // now object-safe
+}

🧠 Mental Model

Think of a trait object as a pointer to data + pointer to a vtable. That vtable must be fixed and non-generic.

Object safety ensures that:

If a trait violates these rules, it means you need compile-time monomorphization, not runtime polymorphism.

File: /Users/Q187392/dev/s/private/vimwiki/dev/rust/core/traits.md

+1746458994880 1686497988937

When does a KTable emit an event?

A KTable emits events whenever there is an update to its underlying state:

If a KTable is built from an input topic, it will only emit an event when there is an actual change to the state of the KTable. +This means that if an event from the input topic does not result in a change (e.g., the new value for a key is identical to the existing value in the KTable), the KTable will not emit an event.

This optimization helps reduce unnecessary downstream processing and ensures that only meaningful changes are propagated.

✅ A KTable emits an event:

  1. When the source topic receives a new record with:

    • a key already present in the KTable, and
    • the value is different (by default, determined via equals()).
  2. When a new key is added to the KTable.

  3. When a key is deleted, i.e., a null value is written for that key — this is interpreted as a tombstone and emitted.

❌ A KTable does not emit:

KTable<String, String> users = builder.table("users");

Input Topic (users):

Key Value Emitted from KTable?
A Alice ✅ Yes
A Alice ❌ No (same value)
A Alicia ✅ Yes (value change)
A null ✅ Yes (tombstone)

What I said earlier:

A KTable emits an update when its value changes (i.e., a new value is computed at a key).

This is true in the context of internal processing, particularly when using .aggregate(), .mapValues(), or other transformations — Kafka Streams will avoid forwarding an update if the value didn't change (based on Objects.equals() by default).

But in your test:

staticInputTopic.pipeInput(RECORD_KEY, chargingPoiEvent1);
+staticInputTopic.pipeInput(RECORD_KEY, chargingPoiEvent2);

Even though chargingPoiEvent1.equals(chargingPoiEvent2):

Why? Because:

  1. KTables do not suppress identical records unless explicitly told to via .suppress(Suppressed.untilChanged()).
  2. Joins are recomputed on every side input change, and the result is emitted unless suppressed.

So, to reconcile:

Scenario Emits? Explanation
.aggregate() with same value ❌ No Internally skips re-emitting unless result differs.
.table() re-processing same value ✅ Yes Still triggers downstream joins or subscriptions.
.join() on KTable with same right side ✅ Yes Recomputes and emits result unless .suppress() is used.

📘 Official Documentation on KTable Emission Behavior

1. KTable as a Changelog Stream

The Kafka Streams documentation describes a KTable as an abstraction of a changelog stream, where each data record represents an update:([Confluent Documentation][1])

"A KTable is an abstraction of a changelog stream, where each data record represents an update. More precisely, the value in a data record is interpreted as an 'UPDATE' of the last value for the same record key, if any (if a corresponding key doesn’t exist yet, the update will be considered an INSERT)."([Confluent Documentation][1])

This means that every new record, even if it has the same value as the previous one for a given key, is treated as an update and thus can trigger downstream emissions.

2. Emission on Every Update

In the KTable JavaDoc, it's noted:([Apache Kafka][2])

"Each record in this changelog stream is an update on the primary-keyed table with the record key as the primary key."([Apache Kafka][2])

File: /Users/Q187392/dev/s/private/vimwiki/dev/kafka-stream.md

+1746803169861 1686497988937

Explain CDLS usage of OpenSearch synthetic source:

Traditionally, Elasticsearch stores the original JSON document in the _source field. +While convenient for retrieval, this can consume significant storage and impact performance.

With Synthetic Source, the _source is no longer stored but is reconstructed from indexed fields on demand. This approach brings:

Most users will not notice any difference in Kibana or dashboards. However, here are a few things to be aware of:

File: /Users/Q187392/dev/s/private/vimwiki/help/opensearch.md

+1748353212051 1686497988937

Explain:

dependency "los_caps" {
+  config_path = "../los-caps"
+  mock_outputs = {
+    tg_arn_suffix = "REAL_VALUE_KNOWN_DURING_APPLY"
+  }
+}
  1. Dependency Block:

    dependency "los_caps" {
    +  config_path = "../los-caps"
    +}
    • dependency "los_caps": allow to reference outputs from other modules or configurations.
  2. mock_outputs:

    mock_outputs = {
    +  tg_arn_suffix = "REAL_VALUE_KNOWN_DURING_APPLY"
    +}
    • mock_outputs: This is a way to define expected outputs from the dependency that may not be available at the time of planning or applying the configuration. It allows you to specify placeholder values for outputs that will be resolved during the actual apply phase.
    • tg_arn_suffix = "REAL_VALUE_KNOWN_DURING_APPLY": "REAL_VALUE_KNOWN_DURING_APPLY" is placeholder indicating that the actual value will be determined when the configuration is applied.

Purpose of mock_outputs

The mock_outputs feature is particularly useful in scenarios where:

File: /Users/Q187392/dev/s/private/vimwiki/help/terragrunt.md

+1756965820894 1686497988937

Explain Primitives versus Wrappers:

Rule of thumb: +Use primitives for calculations. Use wrappers when you need null, collections, or object APIs.

File: /Users/Q187392/dev/s/private/vimwiki/help/java/java.md

+1756965821034 1686497988937

Explaint AWS metrics heartbeat pattern:

This pattern is robust for “must-happen-once-per-day” signals and scales well to “at least N per +day” by adjusting only the threshold.

# Example inputs
+locals {
+  powertools_namespace = "MyApp/Powertools"
+  pipeline_prefix      = "pcv-pipeline"
+}
+
+resource "aws_sns_topic" "pcv_team_topic" {
+  name = "pcv-team-alerts"
+}
+
+resource "aws_cloudwatch_metric_alarm" "daily_pcv_publish_heartbeat" {
+  alarm_name          = "pcv-file-published--daily-heartbeat"
+  alarm_description   = "Alarms if no pcv-file-published metric was emitted in the last UTC day."
+  comparison_operator = "LessThanThreshold"
+  evaluation_periods  = 1
+  threshold           = 1
+  treat_missing_data  = "ignore"
+  alarm_actions       = [aws_sns_topic.pcv_team_topic.arn]
+  ok_actions          = [aws_sns_topic.pcv_team_topic.arn]
+
+  # 1) Source metric (not directly evaluated)
+  metric_query {
+    id           = "m1"
+    return_data  = false
+    metric {
+      namespace   = local.powertools_namespace
+      metric_name = "pcv-file-published"
+      # The 1-day rollup. Buckets are aligned to UTC day boundaries.
+      period      = 86400
+      stat        = "Sum"
+      unit        = "Count"
+      dimensions = {
+        service = local.pipeline_prefix
+      }
+    }
+  }
+
+  # 2) Math series: force a value each day (0 when missing)
+  metric_query {
+    id          = "e1"
+    expression  = "FILL(m1, 0)"
+    label       = "pcv-file-published--daily-sum-filled"
+    return_data = true
+  }
+}

Why two metric_query blocks?

When does FILL(..., 0) produce 0?

Operational notes & gotchas

File: /Users/Q187392/dev/s/private/vimwiki/help/aws/cloudwatch.md

+1757309867700 1686497988937

Explain deref coercion in method resolution:

This is one of the most ergonomic parts of Deref + DerefMut: they hook into method call +resolution.

When you call a method like:

my_box.some_method()

the compiler checks:

  1. Does MyBox itself implement some_method?

    • If yes → call it.
  2. If not, and MyBox: Deref<Target = U>, check if U implements some_method.

    • If yes → implicitly insert (*my_box).some_method().
  3. If still not found and you have DerefMut, the same applies for mutable methods.

This is called deref coercion in method resolution.

use std::ops::{Deref, DerefMut};
+
+struct MyBox<T>(T);
+
+impl<T> Deref for MyBox<T> {
+    type Target = T;
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl<T> DerefMut for MyBox<T> {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.0
+    }
+}
+
+fn main() {
+    let mut b = MyBox(String::from("Hello"));
+
+    // Calls String::len via Deref
+    println!("Length = {}", b.len());
+
+    // Calls String::push_str via DerefMut
+    b.push_str(" World");
+
+    println!("{b}");
+}

Output:

Length = 5
+Hello World

What happened?

File: /Users/Q187392/dev/s/private/vimwiki/dev/rust-concepts.md

+1757313441504 1686497988937

Explain Interaction Tests:

Interaction tests are a type of unit test that verify how a class collaborates with its +dependencies, usually with mocks and verify(). Instead of checking outputs or state, they check +that the right methods are called on collaborators.

verify(paymentService).charge(order.getTotal());

Why use sparingly:

  1. Brittle – They fail when you refactor implementation details, even if behavior hasn’t changed.

    • Example: switching from paymentService.charge() to billingService.processPayment() breaks +the test, though the outcome (customer charged) is the same.
  2. Coupled to implementation – They tie tests to how work is done, not what the system +achieves. This makes refactoring painful.

  3. Noise – Too many verify() calls add little business value and create failing tests for +non-functional reasons.

When they make sense:

Better default:Use state-based tests where possible — assert on returned values or persisted state.

Order order = service.placeOrder("item1");
+assertEquals(OrderStatus.PAID, order.getStatus());

Example

class OrderService {
+    private final PaymentService paymentService;
+    private final EmailService emailService;
+
+    OrderService(PaymentService paymentService, EmailService emailService) {
+        this.paymentService = paymentService;
+        this.emailService = emailService;
+    }
+
+    void placeOrder(Order order) {
+        paymentService.charge(order.getTotal());
+        emailService.sendConfirmation(order.getId());
+    }
+}
import org.junit.jupiter.api.Test;
+
+import static org.mockito.Mockito.*;
+
+class OrderServiceTest {
+
+    @Test
+    void placeOrder_ChargesCustomerAndSendsEmail() {
+        // Arrange
+        PaymentService paymentService = mock(PaymentService.class);
+        EmailService emailService = mock(EmailService.class);
+        OrderService orderService = new OrderService(paymentService, emailService);
+        Order order = new Order("123", 50.0);
+
+        // Act
+        orderService.placeOrder(order);
+
+        // Assert
+        verify(paymentService).charge(50.0);
+        verify(emailService).sendConfirmation("123");
+        verifyNoMoreInteractions(paymentService, emailService);
+    }
+}

Breakdown:

This shows clearly:

File: /Users/Q187392/dev/s/private/vimwiki/dev/tdd.md

+1757482783526 1686497988937

Explain truststore versus keystore:

Usage in SSL/TLS Communication

During an SSL handshake (e.g., HTTPs):

1. Keystore

A single certificate (e.g., your server’s certificate) is not sufficient to prove your identity +to clients.

                 [ Root CA Certificate ]
+                 (Trusted by clients)
+                          │
+                          ▼
+             [ Intermediate CA Certificate ]
+                  (Issued by Root CA)
+                          │
+                          ▼
+             [ Server Certificate + Private Key ]
+                   (Issued by Intermediate CA)

The client needs the full chain (except the root)

Keystore
+│
+├─ Private Key (server.key)
+├─ Server Certificate (server.crt)
+└─ Intermediate CA Certificate(s)

If the server only sent its leaf certificate, the client would fail validation because it +wouldn’t know how to link it back to a trusted root.

The keystore needs to contain:

2. Truststore

File: /Users/Q187392/dev/s/private/vimwiki/help/certificates.md

+1758358945793 1686497988937

Explain Python EntryPoints in context of Plugin architecture:

The main application defines a named entry point group (e.g. "myapp.plugins") as part of +its plugin API contract. That group name is essentially the “namespace” under which plugins +register themselves.

So the workflow is:

  1. Main app: chooses a group name and looks up entry_points(group="myapp.plugins").
  2. Plugins: advertise themselves under that exact group name.
  3. Discovery: at runtime, the app asks for that group and loads whatever is installed.

ConceptEntry points are a setuptools mechanism for declaring and discovering pluggable components. +Packages advertise objects (classes, functions, factories) under named groups in their +metadata. Other code can then query these groups and load the objects dynamically.

How it works

  1. Declaration – myplugin package declares entry points in its pyproject.toml:

    [project]
    +name = "myplugin"
    +version = "0.1"
    +
    +[project.entry-points."myapp.plugins"]
    +hello = "myplugin.hello:HelloPlugin"

    Here, myplugin registers HelloPlugin under the myapp.plugins group.

  2. Installation metadata – When installed, this information is written into the package’s.dist-info/entry_points.txt.

  3. Discovery – At runtime, importlib.metadata.entry_points scans all installed distributions +on sys.path, reads their entry point metadata, and returns matches for a given group.

    from importlib.metadata import entry_points
    +
    +for ep in entry_points(group="myapp.plugins"):
    +    plugin_cls = ep.load()   # Import object
    +    plugin_cls().run()

Why useful for plugins

Illustrative example

Core app: myapp

myapp/main.py

from importlib.metadata import entry_points
+
+def load_plugins():
+    for ep in entry_points(group="myapp.plugins"):
+        plugin = ep.load()()
+        plugin.run()
+
+if __name__ == "__main__":
+    load_plugins()

pyproject.toml (core app doesn’t usually declare entry points, only defines itself):

[project]
+name = "myapp"
+version = "0.1"
+dependencies = []

Plugin: myplugin

myplugin/hello.py

class HelloPlugin:
+    def run(self):
+        print("Hello from plugin!")

pyproject.toml

[project]
+name = "myplugin"
+version = "0.1"
+dependencies = []
+
+[project.entry-points."myapp.plugins"]
+hello = "myplugin.hello:HelloPlugin"

With both installed in the same environment, running python -m myapp.main will discover the entry +point defined by myplugin and execute it, without any explicit import in myapp.

File: /Users/Q187392/dev/s/private/vimwiki/help/python-packaging.md

+1758696231518 1686497988937

Explain Kafka Tombstone:

Within Kafka itself, a tombstone is just a record with:

Kafka does not assign any "delete" semantics beyond this. Its only built-in behavior is:

Outside of that, Kafka does nothing special — the broker just stores and delivers tombstone +records like any other.

It’s up to **applications to decide how to interpret them:

1. Produce a tombstone

Application sends a record with a non-null key and a null value.

kafkaTemplate.send("users", "user-123", null);

Kafka stores it in the topic log just like any other record.

2. Consume the tombstone

Consumers see it immediately.

@KafkaListener(topics = "users")
+public void listen(ConsumerRecord<String, User> record) {
+    if (record.value() == null) {
+        // tombstone → remove key from state
+        userCache.remove(record.key());
+    }
+}

Applications decide how to react. Kafka itself doesn’t enforce “delete.”

3. Log compaction

This ensures:

4. Resulting state

So, the lifecycle is: produce → consume → compact → expire → gone.

File: /Users/Q187392/dev/s/private/vimwiki/dev/kafka.md

+1758696231519 1686497988937

Explain tombstone handling in streams versus regular Kafka:

Streams assigns delete semantics to tombstones for table-like abstractions and state stores.

KTable / GlobalKTable

KStream

Joins and Aggregations (table semantics)

Internal topics

Practical tips

File: /Users/Q187392/dev/s/private/vimwiki/dev/kafka.md

+1759085796792 1686497988937

Explain functions vs function pointers vs lambdas:

File: /Users/Q187392/dev/s/private/vimwiki/dev/rust/core/datatypes.md

+1759136110647 1686497988937

Explain HMAC

HMAC uses two passes of hash computation:

A typical usage of this is uploading and downloading from and to S3. You generate a pre-signed S3 +upload or download URL. This URL will only work to perform the given operation on your behalf +without making the bucket publicly accessible

Algorithm

Algorithm Steps

Let:

  1. If K is longer than B, hash it to shorten. +If shorter, pad with zeros to length B.

  2. Compute:

    inner = H((K ⊕ ipad) || M)  # concatenation
    +outer = H((K ⊕ opad) || inner)
  3. Result = outer, which is the HMAC.

Let’s break the inner part of the HMAC algorithm down:

\[H((K \oplus ipad) \; || \; M)\]
  1. Key preparation:

    • Make sure K is exactly the hash block size (B bytes, 64 for SHA-256).
    • If shorter → zero-pad.
    • If longer → hash it down.
  2. XOR with ipad:

    Ki = K ⊕ ipad

    This mixes the key with a fixed constant (ipad = 0x36...36) to prevent certain attacks.

  3. Concatenate with message:

    Ki || M

    Append the message to the modified key.

  4. Hash the result:

    inner = H(Ki || M)

    This produces the inner hash used in the next step of HMAC.

Why do this?

If you only did H(K || M), the key and message could interact in insecure ways (length extension +attacks). The ipad/opad trick forces the hash to start from two different, unrelated internal +states, strengthening security.

So H((K ⊕ ipad) || M) = the inner hash of HMAC, a secure mix of the padded key, the fixed +constant, and the message.

Security Properties

File: /Users/Q187392/dev/s/private/vimwiki/dev/security/hmac.md

+1760001711585 1686497988937

Explain Authorization Code Flow with PKCE

pkce-flow

The PKCE (Proof Key for Code Exchange) OAuth 2.0 flow is an extension of the Authorization +Code flow, designed to make it secure for public clients (e.g., mobile apps, single-page +apps) that cannot safely store a client secret.

  1. Generate a Code Verifier

    • A random, high-entropy string (43–128 characters).
    • Example: q8nT1_Xx9vwP5fO-L3z7x0U...
  2. Create a Code Challenge

    • Apply SHA256 + Base64URL-encoding to the verifier.
    • Example: code_challenge = BASE64URL(SHA256(code_verifier))
  3. Authorization Request

    • The client sends the user to the Authorization Server with:

      • response_type=code
      • client_id
      • redirect_uri
      • code_challenge
      • code_challenge_method=S256
    • The user logs in and authorizes.

  4. Authorization Server Redirect

    • After login, the server redirects back with an authorization code.
  5. Token Request

    • The client exchanges the code for tokens (access/refresh).

    • Instead of a client secret, it sends:

      • code
      • redirect_uri
      • code_verifier
  6. Token Response

    • The Authorization Server hashes the code_verifier, compares with the originalcode_challenge, and if valid, issues tokens.

Even if an attacker intercepts the authorization code, they cannot redeem it without the +original code_verifier. Since the verifier never left the client, only the legitimate app can +complete the flow.

File: /Users/Q187392/dev/s/private/vimwiki/dev/security/oauth.md

+1760338853334 1686497988937

question

answer

File: /Users/Q187392/dev/s/private/vimwiki/ai/claude-code.md

+1760419554790 1686497988937

xxxxx

xxxxx

File: /Users/Q187392/dev/s/private/vimwiki/help/inka2.md

+1760509284607 1686497988937

Explain Borrow vs AsRef

AsRef / AsMut vs Borrow / BorrowMut (crisp)

Why not always Borrow?It imposes the stronger key-equivalence contract, which you often don’t need and can’t always satisfy. Prefer AsRef/AsMut for general input flexibility; reserve Borrow/BorrowMut for key semantics.

How lookups use BorrowCollections call borrow() internally. Example:

let mut m: std::collections::HashMap<String, i32> = Default::default();
+m.insert("alice".to_string(), 1);
+assert_eq!(m.get("alice"), Some(&1)); // works because String: Borrow<str>
+
+impl<K, V, S> HashMap<K, V, S> {
+    pub fn get<Q: ?Sized>(&self, k: &Q) -> Option<&V>
+    where
+        K: std::borrow::Borrow<Q>, // <-- key type K must Borrow<Q>
+        Q: std::hash::Hash + Eq,
+    { /* uses K::borrow() internally */ }
+}
+assert_eq!(m.get("alice"), m.get(("alice").borrow())); // same result

HashMap::get accepts &Q and requires K: Borrow<Q>, so it borrows each String key as &strto compare/hash.

File: /Users/Q187392/dev/s/private/vimwiki/dev/rust/core/traits.md

+1709395514436 1686497988937

Should embeddings be normalized for clustering?

normalizing embeddings before clustering is generally recommended, especially when using distance metrics like cosine similarity, which are sensitive to the magnitude of the vectors. +making the distance between vectors purely a function of the angle is important:

  1. Magnitude Independence: Normalization removes the influence of the vector's magnitude, focusing the comparison on the direction (or angle) of the vectors. This is particularly useful when the magnitude does not carry meaningful information for the analysis.

  2. Improved Clustering Quality: For algorithms that rely on distance metrics, such as k-means or hierarchical clustering, normalization can lead to more meaningful clusters. It ensures that the clustering process is based on the shape of the data distribution rather than the scale of the data points.

  3. Consistency: Normalizing embeddings ensures consistency across different vectors, making them comparable on the same scale. This is crucial when embeddings come from different sources or when they represent different types of entities.

  4. Enhanced Computational Efficiency: Some clustering algorithms can compute distances more efficiently when vectors are normalized, as certain optimizations can be applied when vectors have a unit norm.

File: /Users/Q187392/dev/s/private/vimwiki/ai/ml.md

+1709395514438 1686497988937

How to ensure the kmeans clustering uses cosine distance?

you cannot directly use default KMeans implementation from libraries like scikit-learn, as it is designed to work with Euclidean distances.

Approximation: +Normalize Data Before Clustering: This way, Euclidean distance in the normalized space relates closely to cosine similarity, but it's not the same. +After normalization, you could use the standard KMeans algorithm, keeping in mind that this approach approximates cosine similarity by minimizing squared Euclidean distance on normalized data.

FAISS provides Clustering class for clustering vectors. It uses k-means by default. Note that the actual clustering process in FAISS does not directly consider cosine similarity; it's primarily designed for L2 distances. However, by normalizing your vectors and using IndexFlatIP, the clustering will align closely with cosine similarity principles.

File: /Users/Q187392/dev/s/private/vimwiki/ai/ml.md

+1709395515407 1686497988937

What are stop_sequences in LLM parameters?

# Claude - Body Syntax
+body = json.dumps({
+    "prompt": prompt_data,
+    "max_tokens_to_sample": 200,
+    "temperature": 0.0,
+    "top_k": 250,
+    "top_p": 0.5,
+    "stop_sequences": ["\n\nHuman:"]
+})

Why Are They Needed?

Language models (like Claude, GPT, etc.) are trained to predict the next most likely token, without inherent knowledge of:

Without explicit guidance, the model might:

For example, if the prompt includes:

Human: What's the capital of France?
+Assistant:

the model might have seen many examples where after the assistant’s reply, the transcript continues with another "Human:" marker. +So, it tries to "complete the transcript" by adding what it thinks comes next, which is the next turn marker. +Without guidance, it continues generating:

The capital of France is Paris.
+Human: What's the population?

Boundary Control

Task-Specific UseEssential for applications needing:

Why does it work without on the Chat models:

Trained on Turn-Based Dialogue.

Models like ChatGPT are trained on instruction-following datasets, where:

Best Practices for Choosing Effective stop_sequences

Scenario Example stop_sequences
Chatbot turn boundary "\n\nHuman:", "\nUser:"
Email generation "--END OF EMAIL--", "\nThanks,"
JSON response termination "}", "]" (for ensuring JSON closure)
Web page generation "</html>", "</body>"
Custom task-specific marker "### END", "<<STOP>>"
SQL query generation ";" (ensures end of query)

File: /Users/Q187392/dev/s/private/vimwiki/help/aws/bedrock.md

diff --git a/data/testuser/first.md b/data/testuser/first.md new file mode 100644 index 0000000..80e16e0 --- /dev/null +++ b/data/testuser/first.md @@ -0,0 +1,15 @@ +--- + +Deck: Life Questions + +Tags: learning life-questions + + +1. What is the answer to the Ultimate Question of Life, the Universe, and Everything? + +> 42 + + +2. If it {{c1::looks like a duck, swims like a duck, and quacks like a duck}}, then it is a {{c2::duck}}. + +--- diff --git a/scripts/clean_media.py b/scripts/clean_media.py new file mode 100755 index 0000000..ea9cd2e --- /dev/null +++ b/scripts/clean_media.py @@ -0,0 +1,198 @@ +#!/usr/bin/env python3 +""" +Clean unused media files from Anki collection. + +This script: +1. Scans all notes in collection.anki2 to find media references +2. Compares with actual files in collection.media/ +3. Deletes unreferenced files +4. Updates collection.media.db2 to match +""" + +import sqlite3 +import re +import os +import sys +import argparse +from pathlib import Path +from typing import Set + +def extract_media_from_html(html: str) -> Set[str]: + """Extract all media filenames referenced in HTML content.""" + media_files = set() + + # Pattern 1: + img_pattern = r']+src=["\']([^"\']+)["\']' + for match in re.finditer(img_pattern, html, re.IGNORECASE): + filename = match.group(1) + # Skip external URLs + if not filename.startswith(('http://', 'https://', '//', 'data:')): + media_files.add(filename) + + # Pattern 2: [sound:filename] + sound_pattern = r'\[sound:([^\]]+)\]' + for match in re.finditer(sound_pattern, html, re.IGNORECASE): + media_files.add(match.group(1)) + + # Pattern 3: Background images in style attributes + bg_pattern = r'background-image:\s*url\(["\']?([^"\')\s]+)["\']?\)' + for match in re.finditer(bg_pattern, html, re.IGNORECASE): + filename = match.group(1) + if not filename.startswith(('http://', 'https://', '//', 'data:')): + media_files.add(filename) + + return media_files + +def get_referenced_media(collection_path: Path) -> Set[str]: + """Get all media files referenced in notes.""" + print(f"Analyzing notes in {collection_path}...") + conn = sqlite3.connect(collection_path) + cursor = conn.cursor() + + # Get all note fields + cursor.execute("SELECT flds FROM notes") + + referenced = set() + note_count = 0 + for (flds,) in cursor: + note_count += 1 + # Fields are separated by \x1f + media = extract_media_from_html(flds) + referenced.update(media) + + conn.close() + print(f" Found {note_count} notes") + print(f" Found {len(referenced)} unique media references") + + return referenced + +def get_actual_media_files(media_dir: Path) -> Set[str]: + """Get all actual files in the media directory.""" + print(f"\nScanning media directory: {media_dir}...") + files = set() + + for item in media_dir.iterdir(): + if item.is_file(): + files.add(item.name) + + print(f" Found {len(files)} files") + return files + +def clean_media_database(db_path: Path, referenced_files: Set[str]): + """Update media database to remove unreferenced entries.""" + print(f"\nUpdating media database: {db_path}...") + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + + # Get current entries + cursor.execute("SELECT fname FROM media WHERE csum IS NOT NULL") + db_entries = {row[0] for row in cursor} + + # Find entries to remove + to_remove = db_entries - referenced_files + + if to_remove: + print(f" Removing {len(to_remove)} entries from media database") + for fname in to_remove: + cursor.execute("DELETE FROM media WHERE fname = ?", (fname,)) + conn.commit() + else: + print(" No database entries to remove") + + conn.close() + +def main(): + # Parse arguments + parser = argparse.ArgumentParser( + description="Clean unused media files from Anki collection" + ) + parser.add_argument( + "--yes", "-y", + action="store_true", + help="Skip confirmation prompt and proceed with deletion" + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Show what would be deleted without actually deleting" + ) + args = parser.parse_args() + + # Paths + base_dir = Path("/Users/Q187392/dev/s/private/ankiview/data/testuser") + collection_path = base_dir / "collection.anki2" + media_dir = base_dir / "collection.media" + media_db_path = base_dir / "collection.media.db2" + + print("=" * 70) + print("Anki Media Cleanup") + print("=" * 70) + + # Step 1: Find referenced media + referenced = get_referenced_media(collection_path) + + # Step 2: Find actual files + actual_files = get_actual_media_files(media_dir) + + # Step 3: Identify unreferenced files + unreferenced = actual_files - referenced + + print("\n" + "=" * 70) + print(f"Summary:") + print(f" Referenced media files: {len(referenced)}") + print(f" Actual media files: {len(actual_files)}") + print(f" Unreferenced files: {len(unreferenced)}") + print("=" * 70) + + if not unreferenced: + print("\n✓ No unreferenced files to delete!") + return + + # Show first 20 files to be deleted + print("\nFiles to be deleted:") + for i, filename in enumerate(sorted(unreferenced), 1): + if i <= 20: + print(f" {filename}") + elif i == 21: + print(f" ... and {len(unreferenced) - 20} more") + break + + # Dry run mode - exit early + if args.dry_run: + print("\n✓ Dry run mode - no files deleted") + return + + # Confirm deletion + if not args.yes: + try: + response = input("\nProceed with deletion? (yes/no): ") + if response.lower() != 'yes': + print("Aborted.") + return + except (EOFError, KeyboardInterrupt): + print("\nAborted.") + return + else: + print("\nAuto-confirming deletion (--yes flag used)") + + # Step 4: Delete unreferenced files + print("\nDeleting unreferenced files...") + deleted_count = 0 + for filename in unreferenced: + file_path = media_dir / filename + try: + file_path.unlink() + deleted_count += 1 + except Exception as e: + print(f" Error deleting {filename}: {e}") + + print(f" Deleted {deleted_count} files") + + # Step 5: Update media database + clean_media_database(media_db_path, referenced) + + print("\n✓ Cleanup completed successfully!") + print(f" Disk space freed: ~{deleted_count} files") + +if __name__ == "__main__": + main()