diff --git a/.gitignore b/.gitignore index 095c9a8..20e68ce 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ thoughts/ +data/testuser/collection.anki2-wal diff --git a/Makefile b/Makefile index 686276f..2bcc1f2 100644 --- a/Makefile +++ b/Makefile @@ -37,6 +37,15 @@ init-env: ## init-env test: ## Run all tests (unit, integration, and doc tests) with debug logging pushd $(pkg_src) && RUST_LOG=INFO cargo test --all-features --all-targets -- --test-threads=1 #--nocapture +.PHONY: refresh-test-fixture +refresh-test-fixture: ## Refresh test fixture from golden dataset + @echo "Refreshing test fixture from golden dataset..." + ./ankiview/tests/fixtures/copy_golden_dataset.sh + +.PHONY: test-verbose +test-verbose: ## Run tests with verbose logging + pushd $(pkg_src) && RUST_LOG=debug cargo test --all-features --all-targets -- --test-threads=1 --nocapture + ################################################################################ # Building, Deploying \ BUILDING: ## ################################################################## diff --git a/ankiview/Cargo.toml b/ankiview/Cargo.toml index cb6a71c..8591f44 100644 --- a/ankiview/Cargo.toml +++ b/ankiview/Cargo.toml @@ -27,6 +27,9 @@ thiserror = "2.0.11" tracing = "0.1.41" tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } +[[bin]] +name = "build_test_collection" +path = "tests/fixtures/build_test_collection.rs" [profile.release] codegen-units = 1 diff --git a/ankiview/tests/fixtures/README.md b/ankiview/tests/fixtures/README.md new file mode 100644 index 0000000..832f3e0 --- /dev/null +++ b/ankiview/tests/fixtures/README.md @@ -0,0 +1,64 @@ +# Test Fixtures + +## Golden Test Dataset + +**Source**: `/Users/Q187392/dev/s/private/ankiview/data/testuser/` +**Fixture Location**: `test_collection/` + +**IMPORTANT**: The golden dataset in the source location is READ-ONLY. Never modify it. All tests work with copies. + +### Structure +- 15 notes with real-world content +- Basic card type (front/back) +- 4 media files (PNG images) +- Collection size: ~1MB +- Media directory: ~140KB + +### Content Coverage +- Data structures (DAG, Tree, DFS) +- Algorithms and complexity +- Data science metrics (F1, accuracy) +- Database concepts (star schema) +- Embeddings and ML concepts +- Geographic reference systems + +### Media Files +- `dag.png` (37KB) - Referenced by note 1695797540370 +- `star-schema.png` (16KB) - Referenced by note 1713763428669 +- `mercator.png` (24KB) - Referenced by note 1737647330399 +- `wsg-enu2.png` (58KB) - Referenced by note 1737647330399 + +### Refreshing Fixture from Golden Dataset + +If the golden dataset is updated, refresh the fixture: + +```bash +chmod +x ankiview/tests/fixtures/copy_golden_dataset.sh +./ankiview/tests/fixtures/copy_golden_dataset.sh +``` + +### Note IDs for Testing + +Use these note IDs in integration tests: + +```rust +pub mod test_notes { + // Notes with images + pub const DAG_NOTE: i64 = 1695797540370; + pub const STAR_SCHEMA: i64 = 1713763428669; + pub const MERCATOR: i64 = 1737647330399; + + // Text-heavy notes + pub const TREE: i64 = 1695797540371; + pub const RECURSIVE_DFS: i64 = 1695797540372; + pub const TAIL_RECURSION: i64 = 1698125272387; + + // Data science notes + pub const F1_SCORE: i64 = 1714489634039; + pub const ACCURACY: i64 = 1714489634040; + pub const COLBERT: i64 = 1715928977633; + + // For testing errors + pub const NONEXISTENT: i64 = 999999999; +} +``` diff --git a/ankiview/tests/fixtures/build_test_collection.rs b/ankiview/tests/fixtures/build_test_collection.rs new file mode 100644 index 0000000..d588f17 --- /dev/null +++ b/ankiview/tests/fixtures/build_test_collection.rs @@ -0,0 +1,120 @@ +// Build script to create test collection fixture +// Run manually: cargo run --bin build_test_collection +// +// This script creates a minimal Anki collection using the Anki library. +// Due to the complexity and version-specific nature of the Anki API, +// an alternative approach is to manually create the collection in Anki desktop +// and copy it here. This script serves as documentation of what the collection should contain. + +use anki::collection::CollectionBuilder; +use std::path::PathBuf; + +fn main() -> anyhow::Result<()> { + println!("Creating test collection...\n"); + println!("Note: Due to Anki API complexity, this script creates an empty collection."); + println!("You should add notes manually using Anki desktop, then copy the collection here.\n"); + + let fixture_dir = PathBuf::from("tests/fixtures/test_collection"); + + // Remove old collection if exists + if fixture_dir.exists() { + std::fs::remove_dir_all(&fixture_dir)?; + } + std::fs::create_dir_all(&fixture_dir)?; + + let collection_path = fixture_dir.join("collection.anki2"); + let col = CollectionBuilder::new(&collection_path).build()?; + + println!("Created empty collection at: {:?}", collection_path); + + // Close collection + col.close(None)?; + + // Create media directory + let media_dir = fixture_dir.join("collection.media"); + std::fs::create_dir_all(&media_dir)?; + + // Create test images + create_test_media(&media_dir)?; + + println!("\n=============================================="); + println!("MANUAL STEPS REQUIRED:"); + println!("==============================================\n"); + println!("1. Open Anki desktop application"); + println!("2. Create a new profile or use existing one"); + println!("3. Add the following 8 notes with Basic card type:\n"); + println!(" Note 1:"); + println!(" Front: What is Rust?"); + println!(" Back: A systems programming language\n"); + println!(" Note 2:"); + println!(" Front: What is the quadratic formula?"); + println!(r#" Back:
$x = \frac{{-b \pm \sqrt{{b^2 - 4ac}}}}{{2a}}$"#);
+ println!();
+ println!(" Note 3:");
+ println!(" Front: How to create a vector in Rust?");
+ println!(r#" Back: let v: Vec = vec![1, 2, 3]; "#);
+ println!();
+ println!(" Note 4:");
+ println!(" Front: Rust logo");
+ println!(r#" Back:
"#);
+ println!();
+ println!(" Note 5:");
+ println!(" Front: External image test");
+ println!(r#" Back:
"#);
+ println!();
+ println!(" Note 6:");
+ println!(" Front: HTML entities test");
+ println!(" Back: Less than: < Greater than: > Ampersand: &");
+ println!();
+ println!(" Note 7:");
+ println!(" Front: Question with no answer");
+ println!(" Back: (leave empty)");
+ println!();
+ println!(" Note 8:");
+ println!(" Front: Tagged question");
+ println!(" Back: Tagged answer");
+ println!(" Tags: test rust programming");
+ println!();
+ println!("4. Close Anki");
+ println!("5. Copy the collection.anki2 file to:");
+ println!(" {}", collection_path.display());
+ println!("6. Copy media files from profile's collection.media/ to:");
+ println!(" {}", media_dir.display());
+ println!("7. Note the IDs of the created notes (use SQLite browser or query)");
+ println!("8. Update tests/helpers/mod.rs with the actual note IDs\n");
+ println!("==============================================\n");
+
+ Ok(())
+}
+
+fn create_test_media(media_dir: &std::path::Path) -> anyhow::Result<()> {
+ // Create a simple 1x1 PNG file (rust-logo.png)
+ // PNG signature + IHDR chunk for 1x1 red pixel
+ let rust_logo_png = [
+ 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, // PNG signature
+ 0x00, 0x00, 0x00, 0x0D, // IHDR length
+ 0x49, 0x48, 0x44, 0x52, // IHDR
+ 0x00, 0x00, 0x00, 0x01, // width: 1
+ 0x00, 0x00, 0x00, 0x01, // height: 1
+ 0x08, 0x02, 0x00, 0x00, 0x00, // bit depth, color type, compression, filter, interlace
+ 0x90, 0x77, 0x53, 0xDE, // CRC
+ 0x00, 0x00, 0x00, 0x0C, // IDAT length
+ 0x49, 0x44, 0x41, 0x54, // IDAT
+ 0x08, 0xD7, 0x63, 0xF8, 0xCF, 0xC0, 0x00, 0x00, 0x03, 0x01, 0x01, 0x00,
+ 0x18, 0xDD, 0x8D, 0xB4, // CRC
+ 0x00, 0x00, 0x00, 0x00, // IEND length
+ 0x49, 0x45, 0x4E, 0x44, // IEND
+ 0xAE, 0x42, 0x60, 0x82, // CRC
+ ];
+
+ let rust_logo_path = media_dir.join("rust-logo.png");
+ std::fs::write(&rust_logo_path, &rust_logo_png)?;
+ println!("Created test image: {:?}", rust_logo_path);
+
+ // Create another simple PNG (sample.jpg - actually a PNG despite the name)
+ let sample_path = media_dir.join("sample.jpg");
+ std::fs::write(&sample_path, &rust_logo_png)?;
+ println!("Created test image: {:?}", sample_path);
+
+ Ok(())
+}
diff --git a/ankiview/tests/fixtures/copy_golden_dataset.sh b/ankiview/tests/fixtures/copy_golden_dataset.sh
new file mode 100755
index 0000000..efc4b56
--- /dev/null
+++ b/ankiview/tests/fixtures/copy_golden_dataset.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+# Copy golden test dataset to fixtures directory
+# This script should be run from the repository root
+
+set -euo pipefail
+
+GOLDEN_SOURCE="/Users/Q187392/dev/s/private/ankiview/data/testuser"
+FIXTURE_TARGET="ankiview/tests/fixtures/test_collection"
+
+echo "Copying golden dataset to test fixtures..."
+
+# Remove old fixture if exists
+if [ -d "$FIXTURE_TARGET" ]; then
+ echo "Removing existing fixture at $FIXTURE_TARGET"
+ rm -rf "$FIXTURE_TARGET"
+fi
+
+# Create fixture directory
+mkdir -p "$FIXTURE_TARGET"
+
+# Copy collection file (close any open SQLite connections first)
+echo "Copying collection.anki2..."
+cp "$GOLDEN_SOURCE/collection.anki2" "$FIXTURE_TARGET/"
+
+# Copy media directory
+echo "Copying media files..."
+cp -r "$GOLDEN_SOURCE/collection.media" "$FIXTURE_TARGET/"
+
+# Copy media database
+echo "Copying media database..."
+cp "$GOLDEN_SOURCE/collection.media.db2" "$FIXTURE_TARGET/"
+
+# Verify files were copied
+echo ""
+echo "Verification:"
+ls -lh "$FIXTURE_TARGET/collection.anki2"
+ls -lh "$FIXTURE_TARGET/collection.media.db2"
+echo ""
+echo "Media files:"
+ls -lh "$FIXTURE_TARGET/collection.media/"
+echo ""
+echo "Golden dataset copied successfully!"
+echo ""
+echo "IMPORTANT: Do not modify files in $GOLDEN_SOURCE"
+echo "Tests will work with copies of this fixture."
diff --git a/ankiview/tests/fixtures/test_collection/collection.anki2 b/ankiview/tests/fixtures/test_collection/collection.anki2
new file mode 100644
index 0000000..28014ec
Binary files /dev/null and b/ankiview/tests/fixtures/test_collection/collection.anki2 differ
diff --git a/ankiview/tests/fixtures/test_collection/collection.anki2-shm b/ankiview/tests/fixtures/test_collection/collection.anki2-shm
new file mode 100644
index 0000000..fe9ac28
Binary files /dev/null and b/ankiview/tests/fixtures/test_collection/collection.anki2-shm differ
diff --git a/ankiview/tests/fixtures/test_collection/collection.anki2-wal b/ankiview/tests/fixtures/test_collection/collection.anki2-wal
new file mode 100644
index 0000000..e69de29
diff --git a/ankiview/tests/fixtures/test_collection/collection.media.db2 b/ankiview/tests/fixtures/test_collection/collection.media.db2
new file mode 100644
index 0000000..2410091
Binary files /dev/null and b/ankiview/tests/fixtures/test_collection/collection.media.db2 differ
diff --git a/ankiview/tests/fixtures/test_collection/collection.media/dag.png b/ankiview/tests/fixtures/test_collection/collection.media/dag.png
new file mode 100644
index 0000000..c37a120
Binary files /dev/null and b/ankiview/tests/fixtures/test_collection/collection.media/dag.png differ
diff --git a/ankiview/tests/fixtures/test_collection/collection.media/mercator.png b/ankiview/tests/fixtures/test_collection/collection.media/mercator.png
new file mode 100644
index 0000000..21f7181
Binary files /dev/null and b/ankiview/tests/fixtures/test_collection/collection.media/mercator.png differ
diff --git a/ankiview/tests/fixtures/test_collection/collection.media/star-schema.png b/ankiview/tests/fixtures/test_collection/collection.media/star-schema.png
new file mode 100644
index 0000000..ee14737
Binary files /dev/null and b/ankiview/tests/fixtures/test_collection/collection.media/star-schema.png differ
diff --git a/ankiview/tests/fixtures/test_collection/collection.media/wsg-enu2.png b/ankiview/tests/fixtures/test_collection/collection.media/wsg-enu2.png
new file mode 100644
index 0000000..9ac799d
Binary files /dev/null and b/ankiview/tests/fixtures/test_collection/collection.media/wsg-enu2.png differ
diff --git a/ankiview/tests/helpers/mod.rs b/ankiview/tests/helpers/mod.rs
new file mode 100644
index 0000000..f1869ce
--- /dev/null
+++ b/ankiview/tests/helpers/mod.rs
@@ -0,0 +1,102 @@
+use ankiview::infrastructure::AnkiRepository;
+use anyhow::{Context, Result};
+use std::path::{Path, PathBuf};
+use tempfile::TempDir;
+
+/// Test fixture for working with temporary Anki collections
+#[allow(dead_code)]
+pub struct TestCollection {
+ _temp_dir: TempDir,
+ pub collection_path: PathBuf,
+ pub media_dir: PathBuf,
+}
+
+impl TestCollection {
+ /// Create a new test collection by copying the fixture
+ pub fn new() -> ResultExplain stack-based DFS algorithm for tree.
eliminates the need for recursion by using an explicit stack data structure to keep track of nodes to visit. +This is particularly useful in scenarios where recursion is expensive in terms of memory or computational overhead.
class Node:
+ def __init__(self, value):
+ self.value = value
+ self.left = None
+ self.right = None
+# Pre-Order
+def dfs_tree_stack(root):
+ if root is None:
+ return
+ stack = [root]
+ while stack:
+ current_node = stack.pop()
+ print(current_node.value) # Process the current node
+ if current_node.right:
+ stack.append(current_node.right) # Push right child if exists
+ if current_node.left:
+ stack.append(current_node.left) # Push left child if exists
+# In-Order
+def dfs_in_order_stack(root):
+ if root is None:
+ return
+ stack = []
+ current = root
+ while stack or current:
+ while current:
+ stack.append(current) # Push current node
+ current = current.left # Move to left child
+ current = stack.pop() # Pop the top item
+ print(current.value) # Process the current node
+ current = current.right # Move to right child
+# Post-Order
+def dfs_post_order_stack(root):
+ if root is None:
+ return
+ stack = []
+ # ensure that a node's right subtree is processed before the node itself.
+ # The node is only processed (popped and printed) when either it has no right child, or its right child has already been processed
+ last_node_visited = None
+ current = root
+ while stack or current:
+ if current:
+ stack.append(current) # Push current node
+ current = current.left # Move to left child
+ else:
+ peek_node = stack[-1]
+ # Check if right child exists and is unvisited
+ if peek_node.right and last_node_visited != peek_node.right:
+ current = peek_node.right
+ else:
+ last_node_visited = stack.pop() # Pop the top item
+ print(last_node_visited.value) # Process the current nodeFile: /Users/Q187392/dev/s/private/vimwiki/dev/algo/algorithms.md
+1695797540374 1686497988937Explain pre-order vs. in-order vs. post-order in tree traversal.
In binary trees, there are three common methods of traversal: pre-order, in-order, and post-order
class Node:
+ def __init__(self, value):
+ self.value = value
+ self.left = None
+ self.right = None
+
+def preorder_traversal(node):
+ if node:
+ print(node.value, end=' ')
+ preorder_traversal(node.left)
+ preorder_traversal(node.right)
+
+def inorder_traversal(node):
+ if node:
+ inorder_traversal(node.left)
+ print(node.value, end=' ')
+ inorder_traversal(node.right)
+
+def postorder_traversal(node):
+ if node:
+ postorder_traversal(node.left)
+ postorder_traversal(node.right)
+ print(node.value, end=' ')
+
+# Example tree
+# 1
+# / \
+# 2 3
+# / \
+# 4 5
+
+root = Node(1)
+root.left = Node(2)
+root.right = Node(3)
+root.left.left = Node(4)
+root.left.right = Node(5)
+
+print("Pre-order Traversal: ")
+preorder_traversal(root) # Output: 1 2 4 5 3
+
+print("\nIn-order Traversal: ")
+inorder_traversal(root) # Output: 4 2 5 1 3
+
+print("\nPost-order Traversal: ")
+postorder_traversal(root) # Output: 4 5 2 3 1File: /Users/Q187392/dev/s/private/vimwiki/dev/algo/algorithms.md
+1699599187787 1686497988937What is Polymorphism?
objects of different classes can be treated as objects of a common superclass, allowing for a single interface to control access to the different underlying forms of those objects
the notion that you can define a single interface with multiple underlying implementations.
foundation of dependency inversion
File: /Users/tw/dev/s/private/vimwiki/dev/OOP.md
+1700896496025 1686497988937Explain Change Data Capture (CDC):
is a software design pattern used to efficiently track changes in data in a database system.
Purpose: CDC aims to identify and capture changes made to the data in a database, such as inserts, updates, and deletes.
Process:
Techniques:
Benefits:
Use Cases:
CDC is a key component in modern data architectures, particularly in systems that require high levels of data freshness and accuracy for real-time decision-making.Five Advantages of Log-Based Change Data Capture
File: /Users/Q187392/dev/s/private/vimwiki/dev/event_driven.md
+1705384410662 1686497988937Explain Ring Buffer:
class RingBuffer:
+ def __init__(self, size):
+ self.size = size
+ self.buffer = [None] * size
+ self.write_pos = 0
+ self.read_pos = 0
+
+ def is_full(self):
+ next_write_pos = (self.write_pos + 1) % self.size
+ return next_write_pos == self.read_pos
+
+ def is_empty(self):
+ return self.write_pos == self.read_pos
+
+ def write(self, item):
+ self.buffer[self.write_pos] = item
+ self.write_pos = (self.write_pos + 1) % self.size
+
+ def read(self):
+ item = self.buffer[self.read_pos]
+ self.read_pos = (self.read_pos + 1) % self.size
+ return item
+
+# Example usage
+buffer = RingBuffer(5)
+
+# Write data to the buffer
+for i in range(1, 6):
+ buffer.write(i)
+
+# Read data from the buffer
+for _ in range(5):
+ print(buffer.read())File: /Users/Q187392/dev/s/private/vimwiki/dev/algo/algorithms.md
+1706855598821 1686497988937Explain "at-least-once" semantic of queue:
If I have a message for you, I will read it to you, and keep doing so again and again until you acknowledge it. +when you receive a message from the queue and don't delete/acknowledge it, you will receive it again in the future, and will keep receiving it until you explicitly delete/acknowledge it. +If the queuing system restarts before it can properly keep track of what's been sent to you, the message will be sent again. +This simple remedy of sending the message again in case of any problem on any side is what makes this guarantee so reliable.
File: /Users/Q187392/dev/s/private/vimwiki/dev/queues.md
+1711518761236 1686497988937Explain pointers:
myvar = SOMETHING;
+mypointer = get_address_of(myvar);
+print(get_value_via_pointer(mypointer));
+## output is SOMETHINGint myvar = 17;
+int *mypointer = &myvar; // declares mypointer as a pointer to an int (C)
+print_int(*mypointer); // outputs 17pointers must be declared with the * syntax (int *pointerName), and they store memory addresses.
+to access the value stored at the address a pointer is pointing to, use dereference operator * again (e.g., *pointerName).
&T and &mut T)Safety: References are always safe to use because borrow checker ensures that they point to valid memory, preventing dangling references and ensuring that either multiple immutable references or a single mutable reference can exist at the same time, but not both.
Lifetimes: References in Rust have lifetimes, which are compile-time annotations that specify the scope for which the reference is valid. Lifetimes prevent dangling references by ensuring that references do not outlive the data they point to.
Syntax and Usage: & to borrow a value, * to dereference
fn main() {
+ let myvar: i32 = 17;
+ let mypointer: &i32 = &myvar; // declares mypointer as a reference to an int
+
+ println!("{}", *mypointer);
+}File: /Users/Q187392/dev/s/private/vimwiki/dev/algo/algorithms.md
+1715673437739 1686497988937What is an algebraic type?
An algebraic type is a composite type that is formed by combining other types. An Algebraic Data Type (ADT) is a type formed by combining other types (just a bundle of data)
Product Types: These are types formed by combining multiple values from other types. An example in many programming languages is a struct or a class, which can contain multiple fields of different types. In a product type, the total number of possible values is the product of the number of possible values of its constituent types.
Sum Types (Union Types): These are types where a value can be one of several types but not simultaneously. They are called "sum types" because the total number of possible values is the sum of the number of possible values from its constituent types. The term "union" in union types is often used in languages like C and Rust, where it represents a type that may hold data from different types, but only one type at a time.
In summary, a union is considered an algebraic type (specifically a sum type) because it is formed by combining multiple types in a way that the resulting type can take a value that is one of its constituent types. The algebraic nature of these types comes from the way they are formed using operations analogous to those in algebra (sum for sum types, product for product types).
File: /Users/Q187392/dev/s/private/vimwiki/help/mypy.md
+1724482677282 1686497988937Explain RAII:
Drop trait) automatically releases those resources.Example where a file is opened and closed using RAII:
use std::fs::File;
+use std::io::{self, Write, Read};
+
+struct FileWrapper {
+ file: File,
+}
+
+impl FileWrapper {
+ fn new(filename: &str) -> io::Result<FileWrapper> {
+ let file = File::open(filename)?;
+ Ok(FileWrapper { file })
+ }
+}
+
+impl Drop for FileWrapper {
+ fn drop(&mut self) {
+ // The file will be automatically closed when the struct goes out of scope
+ // No explicit cleanup is needed in this case because the File type implements Drop
+ println!("File is being closed automatically");
+ }
+}
+
+fn process_file(filename: &str) -> io::Result<()> {
+ let mut file_wrapper = FileWrapper::new(filename)?;
+
+ let mut contents = String::new();
+ file_wrapper.file.read_to_string(&mut contents)?;
+ println!("File content: {}", contents);
+
+ // The file is automatically closed here when file_wrapper goes out of scope
+ Ok(())
+}
+
+fn main() {
+ if let Err(e) = process_file("example.txt") {
+ eprintln!("An error occurred: {}", e);
+ }
+}Automatic Resource Management:
Exception Safety (Panic Safety in Rust):
Drop trait) are still called, ensuring that resources are released properly.Improved Reliability and Safety:
Python's with statement and context managers provide a similar mechanism for managing resources automatically.
The closest concept to RAII in Java is try-with-resources statement
AutoCloseable interface can be declared in a try-with-resources statement.close() method of the resource is automatically called, ensuring that the resource is released.import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+
+public class FileReaderExample {
+ public static void main(String[] args) {
+ try (BufferedReader reader = new BufferedReader(new FileReader("example.txt"))) {
+ String line;
+ while ((line = reader.readLine()) != null) {
+ System.out.println(line);
+ }
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ // No need to explicitly close the reader; it's done automatically.
+ }
+}explicit in Java (through syntax), whereas in Rust, it's implicit via the Drop trait and ownership model.
+Both Rust (via RAII and the Drop trait) and Java (via try-with-resources) ensure that resources are cleaned up even if an error occurs.
File: /Users/Q187392/dev/s/private/vimwiki/dev/rust-reference.md
+1746867058129 1686497988937Explain Float Precision
float typically follows the IEEE 754 32-bit single-precision standard.
f = float("0.1")
+print(f"{f:.17f}")
+
+0.10000000149011612Even 0.1 is already rounded in binary.
A 32-bit float has:
That’s exactly about 7.22 decimal digits (log10(2^24) ≈ 7.22).
f = float(1.123456789)
+print(f)
+
+1.123456789 # may appear unchanged, but internal precision is ~7 digitsdouble (64-bit, ~15-17 decimal digits)decimal.Decimal in Python (arbitrary precision)BigDecimal in JavaSignificant digits are the digits that carry meaning in a number -- they represent its precision.
123456 -> 6 significant digits0.00123456 -> also 6 significant digits (leading zeros don't count)123456000 -> may still be 6 significant digits, depending on how many trailing zeros are meaningfulf = float("0.1234567")
+print(f"{f:.17f}")
+
+0.12345670163631439Notice the inexact representation — even though you gave exactly 7 decimal digits, the float cannot represent 0.1234567 precisely.
1.0000001f = float("1.0000001")
+print(f"{f:.17f}")
+1.00000011920928955
File: /Users/Q187392/dev/s/private/vimwiki/dev/development.md
+1748163225597 1686497988937When to use Graph versus DAG?
A graph is a collection of nodes (vertices) connected by edges (which can be directed or undirected).
A DAG is a directed graph with no cycles.
DAGs solve problems where dependency order matters and cycles must be avoided:
These problems fail in a general graph because:
Graphs with cycles represent problems where feedback or mutual relationships are essential:
| Feature | DAG | General Graph |
|---|---|---|
| Cycles | Forbidden | Allowed |
| Topological ordering | Possible | Not possible (if cycles) |
| Suitable for dependencies | Yes | No (cycles create deadlocks) |
| Suitable for mutual relationships | No | Yes |
File: /Users/Q187392/dev/s/private/vimwiki/dev/algo/algorithms.md
+1748163225601 1686497988937why is it easier to determine whether dependencies are met with DAG rather than a Graph?
A DAG (Directed Acyclic Graph) has no cycles. This means:
A general graph can have cycles (loops of dependencies):
With a DAG, you can:
Track each node's in-degree (number of incoming edges, i.e., how many dependencies are left to satisfy).
Start with nodes with in-degree 0 (no unmet dependencies).
As you execute nodes, you decrement the in-degree of their dependents.
When a node's in-degree reaches 0, it's ready to execute.
This approach ensures:
This method is safe and efficient because a DAG guarantees:
This can lead to:
To handle this, you'd need:
| Aspect | DAG | General Graph |
|---|---|---|
| Cycles | Forbidden | Allowed (possible mutual dependencies) |
| Topological order | Always possible | Impossible if cycles exist |
| Dependency tracking | In-degree approach, simple and efficient | Needs cycle detection and complex logic |
| Risk of deadlocks | None | High, if cycles exist |
| Execution ordering | Deterministic | Unclear if cycles exist |
DAGs make it easy to determine if a node's dependencies are satisfied because:
File: /Users/Q187392/dev/s/private/vimwiki/dev/algo/algorithms.md
+1732086221265 1686497988937How does a Kafka message look like?
| Field | Description |
|---|---|
| Key | An optional key associated with the message, used for partitioning. |
| Value | The actual data or payload of the message. |
| Timestamp | The timestamp associated with the message. |
| Headers | Optional key-value pairs for additional metadata. |
| Partition | The partition where the message resides. |
| Offset | The position of the message in the partition. |
| Topic | The name of the topic to which the message belongs. |
Here’s an example JSON representation of a Kafka message (though messages are in binary format by default):
{
+ "topic": "example-topic",
+ "partition": 0,
+ "offset": 12345,
+ "timestamp": 1678901234567,
+ "key": "user123",
+ "value": {
+ "eventType": "login",
+ "timestamp": "2023-11-19T12:34:56Z"
+ },
+ "headers": {
+ "traceId": "abcd-1234"
+ }
+}CreateTime is used, the producer sets the timestamp when sending the message.LogAppendTime is used, the broker overwrites the timestamp with the time it appends the message to the partition.File: /Users/Q187392/dev/s/private/vimwiki/dev/kafka.md
+1734591704184 1686497988937how does map work in Rust Result context?
Ok from Result type, Error passed through: env::var("HOME")
+ .map(|home| format!("{}/xxx/rs-cg", home))
+ .unwrap_or_else(|_| "/tmp/xxx/rs-cg".to_string())File: /Users/Q187392/dev/s/private/vimwiki/dev/rust-concepts.md
+1734591704413 1686497988937Explain derived traits for structs:
use derive_builder::Builder;
+use serde::{Serialize, Deserialize};
+use getset::{Getters, Setters};
+use derive_more::{Display, From};
+use std::fmt;
+
+#[derive(
+ Debug, // Allows `{:?}` formatting for debugging.
+ Clone, // Enables `.clone()` for creating a duplicate.
+ PartialEq, Eq, // Enables `==` and `!=` for equality comparison.
+ PartialOrd, Ord, // Enables `<`, `>`, `<=`, `>=` for ordering.
+ Hash, // Allows hashing for use in `HashMap` or `HashSet`.
+ Default, // Provides a default value with `T::default()`.
+ Serialize, // Enables serialization to formats like JSON, YAML, etc.
+ Deserialize, // Enables deserialization from those formats.
+ Builder, // Generates a builder for the struct.
+ Getters, // Creates getter methods for struct fields.
+ Setters, // Creates setter methods for struct fields.
+ Display, // Enables user-facing string representation.
+ From // Enables conversion from tuple or compatible structs.
+)]
+pub struct Person {
+ #[getset(get = "pub", set = "pub")]
+ name: String,
+ #[getset(get = "pub", set = "pub")]
+ age: u8,
+ #[getset(get = "pub", set = "pub")]
+ email: String,
+}
+
+fn main() {
+ // Default instance
+ let default_person = Person::default();
+ println!("{:?}", default_person);
+
+ // Using the builder
+ let builder_person = PersonBuilder::default()
+ .name("Alice".to_string())
+ .age(30)
+ .email("alice@example.com".to_string())
+ .build()
+ .unwrap();
+ println!("{}", builder_person); // Uses Display trait
+
+ // Using Clone
+ let cloned_person = builder_person.clone();
+ assert_eq!(builder_person, cloned_person); // Uses PartialEq
+}Default Instance:
Person::default() creates an instance with default field values (e.g., empty strings, 0 for integers).Builder Pattern:
PersonBuilder::default()
+ .name("Alice".to_string())
+ .age(30)
+ .email("alice@example.com".to_string())
+ .build()
+ .unwrap();Serialization/Deserialization:
let json = serde_json::to_string(&person).unwrap();
+let deserialized: Person = serde_json::from_str(&json).unwrap();Equality and Ordering:
assert_eq!(person1, person2);
+let ordered = vec![person1, person2].sort();Getters and Setters:
println!("{}", person.name());
+person.set_name("Bob".to_string());Display Formatting:
println!("{}", person); // Customizable with `Display`File: /Users/Q187392/dev/s/private/vimwiki/dev/rust/core/annotations.md
+1734591704479 1686497988937Explaing logging with tracing:
set RUST_LOG !
+Ensure tracing and tracing-subscriber are added to your Cargo.toml:
[dependencies]
+tracing = "0.1"
+tracing-subscriber = "0.3"Initialize the tracing-subscriber with a formatter that includes span information:
use tracing_subscriber::fmt;
+
+fn main() {
+ // Initialize a subscriber with span context enabled
+ tracing_subscriber::fmt()
+ .with_env_filter("info") // Set log level
+ .with_target(true) // Include module path
+ .with_thread_names(true) // Include thread names (optional)
+ .init();
+
+ example_function();
+}#[instrument] to Capture Method NamesAnnotate functions with the #[instrument] macro to capture their name in the logs:
use tracing::{info, instrument};
+
+#[instrument]
+fn example_function() {
+ info!("This is a log message from the method.");
+}Output:
INFO tracing_example::example_function: This is a log message from the method.example_function is the name of the method logged automatically by #[instrument].You can also include method arguments in the logs by leveraging the #[instrument] macro. It automatically logs the values of the arguments:
use tracing::{info, instrument};
+
+#[instrument]
+fn calculate_sum(a: i32, b: i32) {
+ let result = a + b;
+ info!("Sum calculated: {}", result);
+}
+
+fn main() {
+ tracing_subscriber::fmt().with_env_filter("info").init();
+ calculate_sum(5, 7);
+}Output:
INFO tracing_example::calculate_sum{a=5, b=7}: Sum calculated: 12You can customize the format to explicitly include the span information in your log output:
use tracing_subscriber::fmt::format::FmtSpan;
+
+fn main() {
+ tracing_subscriber::fmt()
+ .with_env_filter("info")
+ .with_span_events(FmtSpan::ACTIVE) // Log span entry/exit
+ .init();
+}If you want finer control without the #[instrument] macro, you can manually create spans using tracing::span!:
use tracing::{info, span, Level};
+
+fn main() {
+ tracing_subscriber::fmt().init();
+
+ let span = span!(Level::INFO, "custom_span", method = "main_function");
+ let _enter = span.enter(); // Enter the span
+ info!("Logging within the custom span");
+}Output:
INFO custom_span{method=main_function}: Logging within the custom span#[instrument] to automatically log method names and arguments.tracing-subscriber to format logs with span information.span! to manually define spans.File: /Users/Q187392/dev/s/private/vimwiki/dev/rust/core/logging.md
+1734591766331 1686497988937Popular crates for struct traits:
In addition to the built-in traits external crates provide procedural macros to derive commonly needed traits for specific use cases.
| Crate | Trait | Purpose |
|---|---|---|
derive_builder | Builder | Generates a builder pattern for constructing complex structs. |
serde | Serialize, Deserialize | Provides (de)serialization support for structs and enums to/from formats like JSON or YAML. |
thiserror | Error | Simplifies error handling by deriving implementations of the std::error::Error trait. |
num-derive | FromPrimitive, ToPrimitive | Derives conversions between enums and primitive types. |
strum | EnumString, Display, AsRefStr, EnumIter | Enhances enums with string conversions, iteration, and more. |
getset | Getters, Setters | Auto-generates getter and setter methods for struct fields. |
async-trait | async_trait | Allows traits to contain async functions, resolving lifetime and complexity issues. |
derive_more | From, Into, Display, etc. | Provides convenient derives for common conversion and formatting traits. |
enum-as-inner | EnumAsInner | Provides safe accessors for enums with single-value variants. |
bitflags | BitFlags | Easily define and manipulate bitflags. |
smart-default | SmartDefault | Extends Default with custom defaults for individual fields. |
derive_builder - Builder PatternUsed to generate a builder for constructing complex structs.
use derive_builder::Builder;
+
+#[derive(Builder, Debug)]
+struct Config {
+ host: String,
+ port: u16,
+ use_tls: bool,
+}
+
+fn main() {
+ let config = ConfigBuilder::default()
+ .host("localhost".to_string())
+ .port(8080)
+ .use_tls(true)
+ .build()
+ .unwrap();
+
+ println!("{:?}", config); // Config { host: "localhost", port: 8080, use_tls: true }
+}serde - Serialization/DeserializationConverts structs or enums to/from formats like JSON or YAML.
use serde::{Serialize, Deserialize};
+
+#[derive(Serialize, Deserialize, Debug)]
+struct User {
+ id: u64,
+ name: String,
+}
+
+fn main() {
+ let user = User { id: 1, name: "Alice".to_string() };
+ let json = serde_json::to_string(&user).unwrap();
+ println!("{}", json); // {"id":1,"name":"Alice"}
+
+ let deserialized: User = serde_json::from_str(&json).unwrap();
+ println!("{:?}", deserialized); // User { id: 1, name: "Alice" }
+}thiserror - Error HandlingSimplifies creating custom error types.
use thiserror::Error;
+
+#[derive(Error, Debug)]
+enum MyError {
+ #[error("Invalid input: {0}")]
+ InvalidInput(String),
+ #[error("Database error")]
+ DatabaseError,
+}
+
+fn main() {
+ let err = MyError::InvalidInput("missing field".to_string());
+ println!("{}", err); // Invalid input: missing field
+}strum - Enum EnhancementsAdds utilities for enums, such as string conversions or iteration.
use strum_macros::{EnumString, Display, EnumIter};
+
+#[derive(EnumString, Display, EnumIter, Debug)]
+enum Color {
+ #[strum(serialize = "red")]
+ Red,
+ #[strum(serialize = "green")]
+ Green,
+ #[strum(serialize = "blue")]
+ Blue,
+}
+
+fn main() {
+ let color: Color = "red".parse().unwrap();
+ println!("{}", color); // Red
+}getset - Getters and SettersAuto-generates getters and setters for fields.
use getset::{Getters, Setters};
+
+#[derive(Getters, Setters, Debug)]
+struct Person {
+ #[getset(get = "pub", set = "pub")]
+ name: String,
+ #[getset(get = "pub")]
+ age: u8,
+}
+
+fn main() {
+ let mut person = Person { name: "Alice".to_string(), age: 30 };
+ person.set_name("Bob".to_string());
+ println!("{}", person.name()); // Bob
+}File: /Users/Q187392/dev/s/private/vimwiki/dev/rust/core/annotations.md
+1734591766497 1686497988937Most common traits of stdlib?
| Trait | Purpose | Key Methods |
|---|---|---|
Clone | For creating duplicate values. | clone() |
Copy | For simple, bitwise copyable types (e.g., integers, floats). | Implicit (=) |
Default | For creating a default value for a type. | default() |
Debug | For formatting values for debugging. | fmt() (used with {:?}) |
PartialEq/Eq | For comparing values for equality. | ==, != |
PartialOrd/Ord | For ordering and comparisons. | <, >, cmp() |
| Trait | Purpose | Key Methods |
|---|---|---|
Iterator | For iterating over a sequence of items. | next(), map(), filter() |
IntoIterator | Converts a type into an Iterator. | into_iter() |
Extend | Extends a collection by adding items from an Iterator. | extend() |
FromIterator | Creates a collection from an Iterator. | from_iter() |
| Trait | Purpose | Key Methods |
|---|---|---|
From | Converts one type into another. | from() |
Into | Converts a type into another type. | into() |
AsRef/AsMut | Provides references to types (&T or &mut T). | as_ref(), as_mut() |
ToString | Converts a type into a String. | to_string() |
Display | Formats a value for user-facing output. | fmt() (used with {}) |
FromInto and TryFrom).pub trait From<T>: Sized {
+ fn from(value: T) -> Self;
+}
+
+let s = String::from("hello"); // &str → String
+let n = i32::from(42u8); // u8 → i32From and Into relationship: They’re directly linked:
impl<T, U> Into<U> for T
+where
+ U: From<T>,
+{
+ fn into(self) -> U {
+ U::from(self)
+ }
+}So if you implement From<T> for U, you automatically get Into<U> for free.
Use From when:
Examples:
String::from(&str)Vec::from(&[T])IpAddr::from(Ipv4Addr)AsRefAsRef<T> is a standard conversion trait that allows to borrow a reference from another type.pub trait AsRef<T: ?Sized> {
+ fn as_ref(&self) -> &T;
+}
+
+let s = String::from("hello");
+let r: &str = s.as_ref(); // &String -> &strYou rarely see AsRef in callsites because:
It’s used in function bounds, not in code bodies.
Library authors use it in generic APIs to accept multiple input types. +Callers just pass whatever type they already have.
fn read_file<P: AsRef<std::path::Path>>(path: P) {
+ let path_ref: &std::path::Path = path.as_ref();
+ // ...
+}Caller side:
read_file("config.yaml"); // &str
+read_file(String::from("a.txt")); // String
+read_file(Path::new("b.txt")); // &PathYou don’t write as_ref() manually — the compiler infers it via generics.
Where you do see it
std, tokio, reqwest, serde, etc.).&str, String, Path, &Path, etc.impl File {
+ pub fn open<P: AsRef<Path>>(path: P) -> io::Result<File> {
+ // internally: path.as_ref()
+ }
+}Callers can use any type that can reference a Path — that’s the power of AsRef.
Use AsRef when you write APIs that take “anything that can be referenced as …”, e.g.:
fn print_uppercase<S: AsRef<str>>(input: S) {
+ println!("{}", input.as_ref().to_uppercase());
+}You can now call:
print_uppercase("hello");
+print_uppercase(String::from("world"));| Why you rarely see it | Explanation |
|---|---|
| It’s used in trait bounds, not directly in calls | Most developers are callers, not implementors |
| It enables flexible APIs to accept multiple reference types | Common in library design |
It’s often implicit — no need to call .as_ref() yourself | Rust’s type inference handles it |
AsMut is the mutable companion to AsRef.&mut T from another type, cheaply and without allocation.pub trait AsMut<T: ?Sized> {
+ fn as_mut(&mut self) -> &mut T;
+}Takes &mut self and returns &mut T.
Used for zero-cost, ref-to-ref mutable conversions.
Common impls:
impl<T> AsMut<T> for T → x.as_mut() gives &mut TBox<T> : AsMut<T>Vec<T> : AsMut<[T]> (mutable slice)String : AsMut<str>PathBuf : AsMut<Path>, OsString : AsMut<OsStr>, etc.// Example: generic API that can mutate through many wrappers
+fn zero_out<U>(mut x: U)
+where
+ U: AsMut<u32>,
+{
+ *x.as_mut() = 0;
+}
+
+fn main() {
+ let mut a: u32 = 5;
+ zero_out(&mut a); // &mut T implements AsMut<T>
+ let mut b = Box::new(7u32);
+ zero_out(&mut b); // Box<u32> → &mut u32
+}Pitfall:
&mut T from &T via AsMut (it requires &mut self), which preserves Rust’s
+aliasing rules.| Trait | Purpose | Key Methods |
|---|---|---|
Error | Represents error types with optional descriptions or sources. | description(), source() |
Result | Common enum for error handling (Ok or Err). | unwrap(), expect(), map() |
Option | Represents an optional value (Some or None). | unwrap(), map(), is_some() |
| Trait | Purpose | Key Methods |
|---|---|---|
Fn, FnMut, FnOnce | For closures and callable objects. | call(), call_mut(), call_once() |
| Trait | Purpose | Key Methods |
|---|---|---|
Deref | Overloads the dereference operator (*). | deref() |
Drop | Custom cleanup logic when a value goes out of scope. | drop() (called automatically) |
Borrow/BorrowMut | Provides immutable/mutable borrowing of values. | borrow(), borrow_mut() |
| Trait | Purpose | Key Notes |
|---|---|---|
Sized | Indicates types with a known size at compile-time (automatically applied). | All types are Sized by default. |
Send | Allows types to be transferred between threads. | Needed for thread safety. |
Sync | Allows shared references to be shared between threads. | Needed for concurrency. |
File: /Users/Q187392/dev/s/private/vimwiki/dev/rust/core/traits.md
+1734944783891 1686497988937Explain Smart Pointers:
Deref, DropVec<T> is a smart pointerA Vec<T> is implemented as a struct that contains:
*mut T),len),cap).So the Vec value itself lives on the stack, but it points to elements stored on the heap.
When the Vec is dropped:
That automatic heap memory management makes it a smart pointer.
StringString is essentially a Vec<u8> with the invariant that its bytes are valid UTF-8.String and Vec<T> are growable, heap-allocated collections.fn main() {
+ let mut v = Vec::new();
+ v.push(10);
+ v.push(20);
+
+ println!("Length: {}", v.len()); // metadata (len)
+ println!("First: {}", v[0]); // dereference into heap memory
+}v itself is a small struct (3 machine words: pointer, len, cap).10 and 20 live in heap memory.v[0]) means following the pointer + indexing.Vec<T> is a smart pointer**, but also a collection type.
+It owns heap memory, manages it automatically, and provides higher-level APIs on top.
Deref Trait in Smart PointersDeref allows smart pointers (like Box<T>, Rc<T>, Arc<T>) to behave like references to T.* operator.trait Deref {
+ type Target: ?Sized; // Target can be Sized (like `String`) or unsized (like `[u8]`)
+ fn deref(&self) -> &Self::Target;
+}
+let x: Box<T> = Box::new(...);
+let y: &T = &*x;
+let y: &T = &x; // auto-deref happens hereRust automatically calls deref() behind the scenes, yielding &T.
&T).let s = Rc::new(String::from("hello"));
+println!("{}", s.len()); // s is auto-dereferenced to &StringDeref defines how smart pointer behaves when * operator is used.
let x = 5;
+let y = Box::new(x);
+
+assert_eq!(5, *y); // works because Box<T> implements Deref<Target=T>
+
+*(y.deref()) // y.deref() -> &i32, then *&i32 -> i32*y calls Deref::deref(&y), which returns &i32.&i32 to i32 (because * was applied).So *y is really:
*(y.deref()) // y.deref() -> &i32, then *&i32 -> i32Confusion comes from how Rust automatically applies Deref coercions in certain contexts
+(like method calls). But the raw * operator itself is precise:
*reference_to_box → gets the Box back.*box → calls Deref, gets the inner value.fn main() {
+ let x = 5;
+ let y = Box::new(x);
+
+ // *y calls Deref -> &i32 -> i32
+ assert_eq!(5, *y);
+
+ // &y is &Box<i32>
+ let r: &Box<i32> = &y;
+
+ // *r removes the & -> gives back the Box<i32>
+ let b: Box<i32> = *r; // moves it out!
+ assert_eq!(5, *b);
+}functions take references (&T) as arguments. For smart pointers to work, they must be convertible to regular references. The Deref trait enables this by defining how the smart pointer can be dereferenced to a &T.
fn greet(name: &str) {
+ println!("Hello, {}!", name);
+}
+
+let my_name = Box::new(String::from("Alice"));
+greet(&my_name); // Deref coercion converts `&Box<String>` to `&String` and then to `&str`.This is known as Deref coercion, where the compiler automatically calls the Deref implementation to transform a smart pointer into a compatible reference.
By implementing the Deref trait, you can define custom dereference behavior for your own types to seamlessly expose their inner values.
use std::ops::Deref;
+
+struct MyBox<T>(T);
+
+impl<T> MyBox<T> {
+ fn new(value: T) -> MyBox<T> {
+ MyBox(value)
+ }
+}
+
+impl<T> Deref for MyBox<T> {
+ type Target = T;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0 // Returns a reference to the inner value
+ }
+}
+
+let my_box = MyBox::new(42);
+assert_eq!(42, *my_box); // Deref trait enables dereferencing to the inner valueTreating a Type Like a Reference by Implementing the Deref Trait:*sp is replaced with *(sp.deref()):
deref() method returns a reference, which can be dereferenced with *MyBox<T>| Smart Pointer | Purpose | Key Features | Example Use Cases |
|---|---|---|---|
Box<T> | Allocates data on the heap. | Single ownership; Deref to value. | Large structs, recursive types. |
Rc<T> | Reference-counted pointer for shared ownership. | Immutable shared ownership. | Shared immutable data (e.g., graphs, trees). |
Arc<T> | Like Rc<T>, but thread-safe (atomic reference counting). | Thread-safe shared RO ownership. | Shared data across threads in concurrent code. |
RefCell<T> | Allows mutable borrows checked at runtime. | Interior mutability; not thread-safe. | Mutating data in shared ownership contexts. |
Cell<T> | Similar to RefCell, but for Copy types with no borrows. | Interior mutability for Copy types. | Mutating small values like integers. |
Mutex<T> | Provides mutual exclusion for data in multithreaded code. | Thread-safe interior mutability. | Protecting data shared across threads. |
RwLock<T> | A read-write lock for multithreaded code. | Multiple readers or a single writer. | High-performance shared mutable state. |
Cow<T> | A clone-on-write pointer. | Avoids cloning unless necessary. | Efficient handling of borrowed or owned data. |
Weak<T> | non-owning reference for use with Rc or Arc. | does not add to reference count, no owning. | Prevents circular refs in ref-counted structures trees/graphs. |
Box<T> - Heap AllocationBox is used to allocate data on the heap, providing ownership and a stable address.
fn main() {
+ let b = Box::new(42); // Allocate on the heap
+ println!("Boxed value: {}", b);
+
+ // Useful for recursive types:
+ enum List {
+ Cons(i32, Box<List>),
+ Nil,
+ }
+
+ let _list = List::Cons(1, Box::new(List::Cons(2, Box::new(List::Nil))));
+}Rc<T> - Reference CountingRc enables multiple owners for the same data, with immutable access.
use std::rc::Rc;
+
+fn main() {
+ let data = Rc::new("Hello, Rc!".to_string());
+
+ let a = Rc::clone(&data); // Clone the reference (not the data).
+ let b = Rc::clone(&data);
+
+ println!("Reference count: {}", Rc::strong_count(&data)); // 3
+ println!("{}, {}", a, b);
+}Arc<T> - Thread-Safe Reference CountingArc is like Rc, but for concurrent scenarios.
use std::sync::Arc;
+use std::thread;
+
+fn main() {
+ let data = Arc::new("Hello, Arc!".to_string());
+
+ let handles: Vec<_> = (0..3)
+ .map(|_| {
+ let data = Arc::clone(&data);
+ thread::spawn(move || println!("{}", data))
+ })
+ .collect();
+
+ for handle in handles {
+ handle.join().unwrap();
+ }
+}RefCell<T> - Interior MutabilityRefCell allows mutable borrowing even if the RefCell itself is immutable.
use std::cell::RefCell;
+
+fn main() {
+ let data = RefCell::new(42);
+
+ *data.borrow_mut() += 1; // Runtime-checked mutable borrow
+ println!("Updated value: {}", *data.borrow());
+}Mutex<T> - Mutual ExclusionMutex ensures exclusive access to data in multithreaded scenarios.
use std::sync::Mutex;
+
+fn main() {
+ let data = Mutex::new(42);
+
+ {
+ let mut locked = data.lock().unwrap();
+ *locked += 1;
+ }
+
+ println!("Updated value: {}", *data.lock().unwrap());
+}RwLock<T> - Read-Write LockRwLock allows multiple readers or one writer.
use std::sync::RwLock;
+
+fn main() {
+ let data = RwLock::new(42);
+
+ {
+ let read1 = data.read().unwrap();
+ let read2 = data.read().unwrap();
+ println!("Readers: {}, {}", read1, read2);
+ }
+
+ {
+ let mut write = data.write().unwrap();
+ *write += 1;
+ }
+
+ println!("Updated value: {}", *data.read().unwrap());
+}Cow<T> - Clone-on-WriteProblem: Avoid Unnecessary Copies
Cow means “clone-on-write” — use a borrowed reference by default, but automatically “upgrade”
+to owned data when you must modify it.
// Without `Cow`
+fn to_upper_always_owned(s: &str) -> String {
+ s.to_uppercase() // allocates every time
+}This is wasteful if the string is already uppercase.
We can borrow when possible, clone only if necessary.
// With `Cow`
+use std::borrow::Cow;
+
+fn ensure_uppercase<'a>(input: &'a str) -> Cow<'a, str> {
+ if input.chars().all(|c| !c.is_lowercase()) {
+ // already uppercase — no allocation
+ Cow::Borrowed(input)
+ } else {
+ // needs change — allocate a new String
+ Cow::Owned(input.to_uppercase())
+ }
+}
+
+fn main() {
+ let s1 = "HELLO";
+ let s2 = "Hello";
+
+ let r1 = ensure_uppercase(s1);
+ let r2 = ensure_uppercase(s2);
+
+ println!("r1 = {}", r1); // Borrowed(&str)
+ println!("r2 = {}", r2); // Owned(String)
+}| Input | Result type | Allocates? | Result value |
|---|---|---|---|
"HELLO" | Cow::Borrowed(&str) | No | "HELLO" |
"Hello" | Cow::Owned(String) | Yes | "HELLO" |
Cow lets you return borrowed data when no change is needed, avoiding a clone — but owned
+data when modification is necessary.
Where this is useful:
Weak<T>Weak<T> is a non-owning handle to data in an Rc<T> (or Arc<T>).
+It’s used to break reference cycles and observe shared data without keeping it alive.
Background: Rc<T> creates shared ownership
Rc<T> = “reference-counted” pointer.
Rc<T>s can share ownership of the same data.use std::rc::Rc;
+
+let a = Rc::new(5);
+let b = Rc::clone(&a);
+
+println!("{}", Rc::strong_count(&a)); // 2The problem — reference cycles:
Rcs refer to each other, they form a cycle:a → b → aNeither’s count ever reaches zero → memory leak.
The solution — Weak<T>
Weak<T> is a non-owning reference to data managed by an Rc<T>.
Rc<T> if the data is still alive.// Example: Preventing a cycle
+use std::rc::{Rc, Weak};
+use std::cell::RefCell;
+
+struct Node {
+ value: i32,
+ parent: RefCell<Weak<Node>>, // weak reference to parent
+ children: RefCell<Vec<Rc<Node>>>, // strong references to children
+}
+
+fn main() {
+ let parent = Rc::new(Node {
+ value: 1,
+ parent: RefCell::new(Weak::new()),
+ children: RefCell::new(Vec::new()),
+ });
+
+ let child = Rc::new(Node {
+ value: 2,
+ parent: RefCell::new(Rc::downgrade(&parent)), // weak ref to parent
+ children: RefCell::new(Vec::new()),
+ });
+
+ parent.children.borrow_mut().push(Rc::clone(&child));
+
+ println!(
+ "strong = {}, weak = {}",
+ Rc::strong_count(&parent),
+ Rc::weak_count(&parent)
+ );
+
+ // Try to upgrade the weak reference
+ if let Some(parent_rc) = child.parent.borrow().upgrade() {
+ println!("Parent value = {}", parent_rc.value);
+ } // upgrade() returns None if parent was dropped
+}| Count type | Incremented by | Keeps data alive? | Example |
|---|---|---|---|
Strong (Rc::strong_count) | Rc::clone() | Yes | child holds parent strongly |
Weak (Rc::weak_count) | Rc::downgrade() | No | parent holds child weakly |
Use Weak when you want to:
| Smart Pointer | Heap Allocation | Shared Ownership | Thread Safety | Interior Mutability |
|---|---|---|---|---|
Box<T> | ? | ? | ? | ? |
Rc<T> | ? | ? | ? | ? |
Arc<T> | ? | ? | ? | ? |
RefCell<T> | ? | ? | ? | ? (runtime checked) |
Mutex<T> | ? | ? | ? | ? (runtime checked) |
Box<T>: When you need heap allocation or recursive types.Rc<T>: Shared ownership in single-threaded contexts.Arc<T>: Shared ownership in multithreaded contexts.RefCell<T>: Mutable access in single-threaded scenarios.Mutex<T>/RwLock<T>: For thread-safe interior mutability.File: /Users/Q187392/dev/s/private/vimwiki/dev/rust-reference.md
+1735811879776 1686497988937why do I need the "&" operator on m. It is a smart pointer which behaves already as a reference, isn't it?
fn hello(string: &str) { }
+
+let m = Box::new(String::from("Rust"));
+hello(&m); Box<T> provides ownership over the value it points to. It behaves like a pointer but is not the same as a Rust reference.&T) is non-owning pointer used to borrow data temporarily.Even though Box<T> implements the Deref trait to dereference into T, it is still an owned type, not a reference.
+Deref coercion works on references to the smart pointer, not the smart pointer itself.
&m is NecessaryThe function hello expects a &str. However:
m is of type Box<String>, which means it owns the String.hello, Rust needs a &str. The compiler must:m as &Box<String> (using the & operator).&Box<String> to get &String.String to get &str.Without the &, there's no reference for deref coercion to work with. Smart pointers like Box don't automatically behave as references when passed to functions.
File: /Users/Q187392/dev/s/private/vimwiki/dev/rust-reference.md
+1735811880108 1686497988937Exlain the Law of Demeter
states that a given object should only interact with its:
public class CustomerService {
+ public String getCustomerAddress(Order order) {
+ return order.getCustomer().getAddress().toString(); // Chaining calls: Order -> Customer -> Address
+ }
+}Customer and Address objects, which are not its immediate collaborators. This creates tight coupling.public class Order {
+ public String getCustomerAddress() {
+ return customer.getAddress().toString(); // Delegation
+ }
+}
+
+public class CustomerService {
+ public String getCustomerAddress(Order order) {
+ return order.getCustomerAddress(); // Only interacts with Order
+ }
+}Order encapsulates the details of the Customer and Address, adhering to the Law of Demeter.File: /Users/Q187392/dev/s/private/vimwiki/dev/solid.md
+1736449127090 1686497988937Explain @Jacksonized:
@Jacksonized used to integrate Lombok's generated classes with the Jackson library@Builder works seamlessly with Jackson@Builder, these are not generated.ChargingPoi class using Jackson, even though the class is immutable and uses a builder pattern.File: /Users/Q187392/dev/s/private/vimwiki/help/java/lombok.md
+1736584024689 1686497988937In OpenAPI, why is using a base schema with discriminator preferred over anyOf for handling polymorphic types?
anyOf Approach (Less Preferred)items:
+ anyOf:
+ - $ref: "#/components/schemas/TypeA"
+ - $ref: "#/components/schemas/TypeB"
+
+components:
+ schemas:
+ TypeA:
+ type: object
+ properties:
+ fieldA:
+ type: string
+
+ TypeB:
+ type: object
+ properties:
+ fieldB:
+ type: integer🔴 Downside: Harder to determine object type, requires additional validation logic.
items:
+ $ref: "#/components/schemas/BaseItem"
+
+components:
+ schemas:
+ BaseItem:
+ type: object
+ required: [type]
+ properties:
+ type:
+ type: string
+ enum: [TypeA, TypeB]
+ discriminator:
+ propertyName: type
+ mapping:
+ TypeA: "#/components/schemas/TypeA"
+ TypeB: "#/components/schemas/TypeB"
+
+ TypeA:
+ allOf: // commonly used when extending a base schema
+ - $ref: "#/components/schemas/BaseItem"
+ - type: object
+ properties:
+ fieldA:
+ type: string
+
+ TypeB:
+ allOf:
+ - $ref: "#/components/schemas/BaseItem"
+ - type: object
+ properties:
+ fieldB:
+ type: integer✅ Advantages:
type.Open http://localhost:8080/swagger-ui.html to interact with the API.
// Base Interface & Subclasses
+package com.example.openapi.model;
+
+import com.fasterxml.jackson.annotation.*;
+import io.swagger.v3.oas.annotations.media.DiscriminatorMapping;
+import io.swagger.v3.oas.annotations.media.Schema;
+
+@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type")
+@JsonSubTypes({
+ @JsonSubTypes.Type(value = TypeA.class, name = "TypeA"),
+ @JsonSubTypes.Type(value = TypeB.class, name = "TypeB")
+})
+@Schema(
+ description = "BaseItem",
+ discriminatorProperty = "type",
+ oneOf = {TypeA.class, TypeB.class},
+ discriminatorMapping = {
+ @DiscriminatorMapping(schema = TypeA.class, value = "TypeA"),
+ @DiscriminatorMapping(schema = TypeB.class, value = "TypeB")
+ }
+)
+public abstract class BaseItem {
+ @Schema(description = "Type of item", required = true, example = "TypeA")
+ public String type;
+}
+
+@Schema(description = "TypeA extends BaseItem")
+class TypeA extends BaseItem {
+ @Schema(description = "Field specific to TypeA", example = "some text")
+ public String fieldA;
+}
+
+@Schema(description = "TypeB extends BaseItem")
+class TypeB extends BaseItem {
+ @Schema(description = "Field specific to TypeB", example = "42")
+ public int fieldB;
+}
+
+// REST Controller to Handle OpenAPI Requests
+package com.example.openapi.controller;
+
+import com.example.openapi.model.BaseItem;
+import io.swagger.v3.oas.annotations.Operation;
+import io.swagger.v3.oas.annotations.tags.Tag;
+import org.springframework.http.ResponseEntity;
+import org.springframework.web.bind.annotation.*;
+
+import java.util.List;
+
+@RestController
+@RequestMapping("/api/items")
+@Tag(name = "Items", description = "API for handling polymorphic OpenAPI requests")
+public class ItemController {
+
+ @PostMapping
+ @Operation(summary = "Create an item", description = "Accepts a polymorphic item with discriminator-based deserialization")
+ public ResponseEntity<BaseItem> createItem(@RequestBody BaseItem item) {
+ return ResponseEntity.ok(item);
+ }
+
+ @GetMapping
+ @Operation(summary = "Get sample items", description = "Returns a list of different item types")
+ public ResponseEntity<List<BaseItem>> getItems() {
+ // Example response
+ return ResponseEntity.ok(List.of(
+ new TypeA() {{ type = "TypeA"; fieldA = "Example A"; }},
+ new TypeB() {{ type = "TypeB"; fieldB = 123; }}
+ ));
+ }
+}File: /Users/Q187392/dev/s/private/vimwiki/dev/json_schema.md
+1737182757446 1686497988937How to find when a term/bug was introduced?
# -S 'search-term': (also called "pickaxe") filters commits to those that introduce or remove the given term.
+git log -S 'search-term'File: /Users/Q187392/dev/s/private/vimwiki/help/git.md
+1737791849637 1686497988937Explain Send trait:
Send ensures safe ownership transfer of a type between threads.Send can safely be moved across thread boundaries.Send unless they explicitly involve non-thread-safe structures.Ownership Transfer Across Threads:
Send type can be moved from one thread to another. For example, a Send type stored in anstd::thread::spawn closure will be safely transferred to the new thread.Automatically Implemented:
Send by default if they don’t contain non-Send types.i32, f64, and thread-safe smart pointers like Arc andBox are all Send.Non-Send Types:
Send if it contains data that is inherently not thread-safe, like:Rc<T>: A reference-counted smart pointer that isn’t thread-safe, transferring it to
+another thread would lead to race conditions.*const T / *mut T: Raw pointers, because they can lead to undefined behavior if accessed
+across threads without synchronization.Arc or Mutex to be sent
+between threads.Zero-Cost Abstraction:
Send trait is purely a marker trait. It has no runtime overhead or extra functionality —
+it's used by the compiler to enforce safety guarantees at compile time.SendA type is not Send when it contains non-thread-safe data or enforces single-threaded usage.
Send.Send.std::cell::RefCell:use std::cell::RefCell;
+use std::thread;
+
+fn main() {
+ let data = RefCell::new(42);
+
+ let handle = thread::spawn(move || {
+ // This would cause a compile error because RefCell is not Send
+ let mut value = data.borrow_mut();
+ *value += 1;
+ });
+
+ handle.join().unwrap();
+}File: /Users/Q187392/dev/s/private/vimwiki/dev/rust/core/concurrency.md
+1739771956551 1686497988937What are "object-safe" traits?
The vtable makes trait objects work - it's a compile-time-generated jump table that enables +runtime polymorphism without the overhead of type checking at every method call.
The vtable contains all the information needed to work with the concrete type at +runtime, even though the compile-time type information has been "erased" by the trait object.
Trait Object: Box<dyn Draw>
+┌─────────────────┐
+│ data_ptr ────┼──┐
+├─────────────────┤ │
+│ vtable_ptr ────┼──┼──┐
+└─────────────────┘ │ │
+ │ │
+ │ │
+┌────────────────────┘ │
+│ │
+▼ ACTUAL DATA │
+┌─────────────────┐ │
+│ Circle { │ │
+│ radius: 5 │ │
+│ } │ │
+└─────────────────┘ │
+ │
+┌───────────────────────┘
+│
+▼ VTABLE (Static, Non-Generic)
+┌──────────────────────────┐
+│ Drop function ptr │ ← Compiler-generated
+├──────────────────────────┤
+│ Size of concrete type │ ← Circle: 8 bytes
+├──────────────────────────┤
+│ Alignment requirements │ ← Circle: 8 byte align
+├──────────────────────────┤
+│ draw() method ptr ────┼──┐
+└──────────────────────────┘ │
+ │
+┌─────────────────────────────┘
+│
+▼ CONCRETE IMPLEMENTATION
+fn circle_draw(self: &Circle) {
+ println!("Drawing circle with radius {}", self.radius);
+}
+
+MULTIPLE CONCRETE TYPES, SAME VTABLE LAYOUT
+============================================
+
+Circle vtable: Rectangle vtable: Triangle vtable:
+┌─────────────┐ ┌─────────────┐ ┌─────────────┐
+│ drop_circle │ │ drop_rect │ │ drop_tri │
+├─────────────┤ ├─────────────┤ ├─────────────┤
+│ size: 8 │ │ size: 16 │ │ size: 24 │
+├─────────────┤ ├─────────────┤ ├─────────────┤
+│ align: 8 │ │ align: 8 │ │ align: 8 │
+├─────────────┤ ├─────────────┤ ├─────────────┤
+│ circle_draw │ │ rect_draw │ │ tri_draw │
+├─────────────┤ ├─────────────┤ ├─────────────┤
+│ circle_area │ │ rect_area │ │ tri_area │
+└─────────────┘ └─────────────┘ └─────────────┘
+
+
+MEMORY SIZE BREAKDOWN:
+======================
+
+struct Circle {
+ radius: f64, // 8 bytes
+}
+Total: 8 bytes
+
+struct Rectangle {
+ width: f64, // 8 bytes
+ height: f64, // 8 bytes
+}
+Total: 16 bytes
+
+struct Triangle {
+ a: f64, // 8 bytes
+ b: f64, // 8 bytes
+ c: f64, // 8 bytes
+}
+Total: 24 bytes
+ ▲ ▲ ▲
+ │ │ │
+ └──────────────────────┼──────────────────────┘
+ │
+ SAME LAYOUT STRUCTURE
+ (different function ptrs)
+
+struct Circle {
+ radius: f64, // 8 bytes
+ color: u32, // 4 bytes
+ // + 4 bytes padding for alignment
+}
+
+TRAIT OBJECT CREATION & DISPATCH
+=================================
+
+1. COMPILE TIME:
+ let circle = Circle { radius: 5 };
+ let shape: Box<dyn Draw> = Box::new(circle);
+
+ Compiler generates vtable for Circle + Draw:
+ ┌──────────────────┐
+ │ Circle's vtable │ ← Created once per type
+ └──────────────────┘
+
+2. RUNTIME:
+ shape.draw();
+
+ Assembly-like pseudocode:
+ mov rax, [shape + 8] ; Load vtable_ptr
+ mov rbx, [rax + 24] ; Load draw() from vtable[3]
+ mov rdi, [shape] ; Load data_ptr as &self
+ call rbx ; Call the functionTo understand why certain traits are "object-safe" in Rust, and what that means, you need to understand howtrait objects work under the hood.
A trait object is a value of a type like &dyn Trait or Box<dyn Trait> — it allows dynamic dispatch of
+methods via a vtable (virtual method table), rather than through compile-time monomorphization like with generics.
fn process(shape: &dyn Shape) {
+ shape.draw(); // dynamic dispatch
+}But not all traits can be turned into trait objects — only object-safe traits can.
Self.return Self is not object-safetrait Cloneable {
+ fn clone(&self) -> Self;
+}Self means "the concrete type implementing the trait."&dyn Cloneable, the concrete type is erased — we don’t know what Self is.🔧 Solution: use Box<dyn Trait> if you want to return trait objects:
trait Cloneable {
+ fn clone_box(&self) -> Box<dyn Cloneable>;
+}
+// associated implementation
+impl<T> Cloneable for T
+where
+ T: 'static + Clone + Cloneable,
+{
+ fn clone_box(&self) -> Box<dyn Cloneable> {
+ Box::new(self.clone())
+ }
+}Now it's object-safe because the return type is not Self, and is dynamically dispatchable.
trait Saver {
+ fn save<T: Serialize>(&self, data: &T);
+}&dyn Saver must have a fixed vtable at runtime.save<T> is a generic method — it could compile into many different versions depending on T.T.🔧 Solution: move the generic type out of the method:
trait Saver {
+ fn save(&self, data: &dyn Serialize); // now object-safe
+}Think of a trait object as a pointer to data + pointer to a vtable. That vtable must be fixed and non-generic.
Object safety ensures that:
If a trait violates these rules, it means you need compile-time monomorphization, not runtime polymorphism.
File: /Users/Q187392/dev/s/private/vimwiki/dev/rust/core/traits.md
+1746458994880 1686497988937When does a KTable emit an event?
A KTable emits events whenever there is an update to its underlying state:
If a KTable is built from an input topic, it will only emit an event when there is an actual change to the state of the KTable. +This means that if an event from the input topic does not result in a change (e.g., the new value for a key is identical to the existing value in the KTable), the KTable will not emit an event.
This optimization helps reduce unnecessary downstream processing and ensures that only meaningful changes are propagated.
KTable emits an event:When the source topic receives a new record with:
KTable, andequals()).When a new key is added to the KTable.
When a key is deleted, i.e., a null value is written for that key — this is interpreted as a tombstone and emitted.
KTable does not emit:equals()).ValueTransformer or aggregation function does not produce a change.KTable<String, String> users = builder.table("users");users):| Key | Value | Emitted from KTable? |
|---|---|---|
| A | Alice | ✅ Yes |
| A | Alice | ❌ No (same value) |
| A | Alicia | ✅ Yes (value change) |
| A | null | ✅ Yes (tombstone) |
A KTable emits an update when its value changes (i.e., a new value is computed at a key).
This is true in the context of internal processing, particularly when using .aggregate(), .mapValues(), or other transformations — Kafka Streams will avoid forwarding an update if the value didn't change (based on Objects.equals() by default).
staticInputTopic.pipeInput(RECORD_KEY, chargingPoiEvent1);
+staticInputTopic.pipeInput(RECORD_KEY, chargingPoiEvent2);Even though chargingPoiEvent1.equals(chargingPoiEvent2):
join() recomputes the result and emits it, even if the join result is equal to the previous one..suppress(Suppressed.untilChanged()).| Scenario | Emits? | Explanation |
|---|---|---|
.aggregate() with same value | ❌ No | Internally skips re-emitting unless result differs. |
.table() re-processing same value | ✅ Yes | Still triggers downstream joins or subscriptions. |
.join() on KTable with same right side | ✅ Yes | Recomputes and emits result unless .suppress() is used. |
.suppress() for that behavior.KTable Emission BehaviorThe Kafka Streams documentation describes a KTable as an abstraction of a changelog stream, where each data record represents an update:([Confluent Documentation][1])
"A KTable is an abstraction of a changelog stream, where each data record represents an update. More precisely, the value in a data record is interpreted as an 'UPDATE' of the last value for the same record key, if any (if a corresponding key doesn’t exist yet, the update will be considered an INSERT)."([Confluent Documentation][1])
This means that every new record, even if it has the same value as the previous one for a given key, is treated as an update and thus can trigger downstream emissions.
In the KTable JavaDoc, it's noted:([Apache Kafka][2])
"Each record in this changelog stream is an update on the primary-keyed table with the record key as the primary key."([Apache Kafka][2])
File: /Users/Q187392/dev/s/private/vimwiki/dev/kafka-stream.md
+1746803169861 1686497988937Explain CDLS usage of OpenSearch synthetic source:
Traditionally, Elasticsearch stores the original JSON document in the _source field.
+While convenient for retrieval, this can consume significant storage and impact performance.
With Synthetic Source, the _source is no longer stored but is reconstructed from indexed fields on demand. This approach brings:
Most users will not notice any difference in Kibana or dashboards. However, here are a few things to be aware of:
_source._source as any operation relying on them will be impossibleFile: /Users/Q187392/dev/s/private/vimwiki/help/opensearch.md
+1748353212051 1686497988937Explain:
dependency "los_caps" {
+ config_path = "../los-caps"
+ mock_outputs = {
+ tg_arn_suffix = "REAL_VALUE_KNOWN_DURING_APPLY"
+ }
+} Dependency Block:
dependency "los_caps" {
+ config_path = "../los-caps"
+}mock_outputs:
mock_outputs = {
+ tg_arn_suffix = "REAL_VALUE_KNOWN_DURING_APPLY"
+}"REAL_VALUE_KNOWN_DURING_APPLY" is placeholder indicating that the actual value will be determined when the configuration is applied.mock_outputsThe mock_outputs feature is particularly useful in scenarios where:
File: /Users/Q187392/dev/s/private/vimwiki/help/terragrunt.md
+1756965820894 1686497988937Explain Primitives versus Wrappers:
Primitive (int):
null (default = 0).Wrapper (Integer):
null.List<Integer>), nullable values, and APIs expecting objects.parseInt, toString, etc.).Boxing: primitive → wrapper (int → Integer).
+moving a primitive value into a heap-allocated object (wrapper).
Unboxing: wrapper → primitive (Integer → int).
+extracting the raw primitive from that object.
Autoboxing/unboxing: automatic conversion by the compiler.
Rule of thumb:
+Use primitives for calculations. Use wrappers when you need null, collections, or object APIs.
File: /Users/Q187392/dev/s/private/vimwiki/help/java/java.md
+1756965821034 1686497988937Explaint AWS metrics heartbeat pattern:
This pattern is robust for “must-happen-once-per-day” signals and scales well to “at least N per +day” by adjusting only the threshold.
Source metric (m1): your counter/heartbeat metric, aggregated with Sum over a 1-day period (86 400s).
Math series (e1): FILL(m1, 0) converts MISSING daily buckets (no datapoints that day) to 0.
Alarm logic: LessThanThreshold with threshold = 1.
e1 = 0 → ALARMreturn_data: mark only the math series as true so the alarm evaluates that series.
treat_missing_data = "ignore": safe because the math series always yields a number.
# Example inputs
+locals {
+ powertools_namespace = "MyApp/Powertools"
+ pipeline_prefix = "pcv-pipeline"
+}
+
+resource "aws_sns_topic" "pcv_team_topic" {
+ name = "pcv-team-alerts"
+}
+
+resource "aws_cloudwatch_metric_alarm" "daily_pcv_publish_heartbeat" {
+ alarm_name = "pcv-file-published--daily-heartbeat"
+ alarm_description = "Alarms if no pcv-file-published metric was emitted in the last UTC day."
+ comparison_operator = "LessThanThreshold"
+ evaluation_periods = 1
+ threshold = 1
+ treat_missing_data = "ignore"
+ alarm_actions = [aws_sns_topic.pcv_team_topic.arn]
+ ok_actions = [aws_sns_topic.pcv_team_topic.arn]
+
+ # 1) Source metric (not directly evaluated)
+ metric_query {
+ id = "m1"
+ return_data = false
+ metric {
+ namespace = local.powertools_namespace
+ metric_name = "pcv-file-published"
+ # The 1-day rollup. Buckets are aligned to UTC day boundaries.
+ period = 86400
+ stat = "Sum"
+ unit = "Count"
+ dimensions = {
+ service = local.pipeline_prefix
+ }
+ }
+ }
+
+ # 2) Math series: force a value each day (0 when missing)
+ metric_query {
+ id = "e1"
+ expression = "FILL(m1, 0)"
+ label = "pcv-file-published--daily-sum-filled"
+ return_data = true
+ }
+}metric_query blocks?m1: defines the real metric and how to aggregate it (Sum over 1 day). return_data = false⇒ helper only.e1: defines what the alarm actually evaluates. FILL(m1, 0) yields a numeric value even
+when m1 is missing. return_data = true ⇒ alarm uses this series.FILL(..., 0) produce 0?m1 is MISSING → FILL(m1, 0) returns 0.m1 equals the Sum of that day → FILL returns that sum unchanged.INSUFFICIENT_DATA.unit = "Count" if that’s how you publish; mismatched units can make graphs
+confusing.threshold. Example: require ≥3 publishes/day ⇒threshold = 3.period = 86400, evaluation_periods = 7, datapoints_to_alarm = 1 (alarm if any of the last 7 days is zero) or (b) sum a wider
+range via additional math if you need a true rolling sum.File: /Users/Q187392/dev/s/private/vimwiki/help/aws/cloudwatch.md
+1757309867700 1686497988937Explain deref coercion in method resolution:
This is one of the most ergonomic parts of Deref + DerefMut: they hook into method call
+resolution.
When you call a method like:
my_box.some_method()the compiler checks:
Does MyBox itself implement some_method?
If not, and MyBox: Deref<Target = U>, check if U implements some_method.
(*my_box).some_method().If still not found and you have DerefMut, the same applies for mutable methods.
This is called deref coercion in method resolution.
use std::ops::{Deref, DerefMut};
+
+struct MyBox<T>(T);
+
+impl<T> Deref for MyBox<T> {
+ type Target = T;
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
+impl<T> DerefMut for MyBox<T> {
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ &mut self.0
+ }
+}
+
+fn main() {
+ let mut b = MyBox(String::from("Hello"));
+
+ // Calls String::len via Deref
+ println!("Length = {}", b.len());
+
+ // Calls String::push_str via DerefMut
+ b.push_str(" World");
+
+ println!("{b}");
+}Output:
Length = 5
+Hello WorldWhat happened?
b.len() → MyBox doesn’t have len().
+Rust sees MyBox: Deref<Target=String>, so it tries String::len(&*b).b.push_str(" World") → mutable method, so Rust uses DerefMut to get &mut String, then callsString::push_str.File: /Users/Q187392/dev/s/private/vimwiki/dev/rust-concepts.md
+1757313441504 1686497988937Explain Interaction Tests:
Interaction tests are a type of unit test that verify how a class collaborates with its
+dependencies, usually with mocks and verify(). Instead of checking outputs or state, they check
+that the right methods are called on collaborators.
verify(paymentService).charge(order.getTotal());Why use sparingly:
Brittle – They fail when you refactor implementation details, even if behavior hasn’t changed.
paymentService.charge() to billingService.processPayment() breaks
+the test, though the outcome (customer charged) is the same.Coupled to implementation – They tie tests to how work is done, not what the system +achieves. This makes refactoring painful.
Noise – Too many verify() calls add little business value and create failing tests for
+non-functional reasons.
When they make sense:
For orchestration code where behavior is the interaction.
OrderService.placeOrder() must call paymentService.charge() andemailService.sendConfirmation().When a unit has no meaningful output or state except its calls to collaborators.
To ensure side effects happen (e.g., one database write, one message sent).
Better default:Use state-based tests where possible — assert on returned values or persisted state.
Order order = service.placeOrder("item1");
+assertEquals(OrderStatus.PAID, order.getStatus());class OrderService {
+ private final PaymentService paymentService;
+ private final EmailService emailService;
+
+ OrderService(PaymentService paymentService, EmailService emailService) {
+ this.paymentService = paymentService;
+ this.emailService = emailService;
+ }
+
+ void placeOrder(Order order) {
+ paymentService.charge(order.getTotal());
+ emailService.sendConfirmation(order.getId());
+ }
+}import org.junit.jupiter.api.Test;
+
+import static org.mockito.Mockito.*;
+
+class OrderServiceTest {
+
+ @Test
+ void placeOrder_ChargesCustomerAndSendsEmail() {
+ // Arrange
+ PaymentService paymentService = mock(PaymentService.class);
+ EmailService emailService = mock(EmailService.class);
+ OrderService orderService = new OrderService(paymentService, emailService);
+ Order order = new Order("123", 50.0);
+
+ // Act
+ orderService.placeOrder(order);
+
+ // Assert
+ verify(paymentService).charge(50.0);
+ verify(emailService).sendConfirmation("123");
+ verifyNoMoreInteractions(paymentService, emailService);
+ }
+}Breakdown:
OrderServicePaymentService and EmailService (dependencies).OrderService instance.placeOrder(order).charge() and sendConfirmation() were invoked with the right arguments.This shows clearly:
PaymentService or EmailService (they’re mocked).OrderService orchestrates its collaborators correctly.File: /Users/Q187392/dev/s/private/vimwiki/dev/tdd.md
+1757482783526 1686497988937Explain truststore versus keystore:
During an SSL handshake (e.g., HTTPs):
Server uses its keystore:
javax.net.ssl.keyStore, ...keyStorePassword, and optionally ...keyStoreType.Client uses its truststore:
javax.net.ssl.trustStore, ...trustStorePassword, and optionally ...trustStoreType.In mutual (two-way) SSL/TLS, both parties use keystore and truststore:
Although both stores technically share the same file formats (e.g., JKS or PKCS12), it’s +recommended to keep them separate for clarity and better security hygiene.
A single certificate (e.g., your server’s certificate) is not sufficient to prove your identity +to clients.
[ Root CA Certificate ]
+ (Trusted by clients)
+ │
+ ▼
+ [ Intermediate CA Certificate ]
+ (Issued by Root CA)
+ │
+ ▼
+ [ Server Certificate + Private Key ]
+ (Issued by Intermediate CA)The client needs the full chain (except the root)
it walks the chain:
Keystore
+│
+├─ Private Key (server.key)
+├─ Server Certificate (server.crt)
+└─ Intermediate CA Certificate(s)If the server only sent its leaf certificate, the client would fail validation because it +wouldn’t know how to link it back to a trusted root.
The keystore needs to contain:
cacerts) containing certificates
+of common CAs, located in the JRE/JDK’s lib/security directory.File: /Users/Q187392/dev/s/private/vimwiki/help/certificates.md
+1758358945793 1686497988937Explain Python EntryPoints in context of Plugin architecture:
The main application defines a named entry point group (e.g. "myapp.plugins") as part of
+its plugin API contract. That group name is essentially the “namespace” under which plugins
+register themselves.
pyproject.toml; it only needs to decide
+on and consistently use the group name in its discovery code.pyproject.toml.So the workflow is:
entry_points(group="myapp.plugins").ConceptEntry points are a setuptools mechanism for declaring and discovering pluggable components. +Packages advertise objects (classes, functions, factories) under named groups in their +metadata. Other code can then query these groups and load the objects dynamically.
How it works
Declaration – myplugin package declares entry points in its pyproject.toml:
[project]
+name = "myplugin"
+version = "0.1"
+
+[project.entry-points."myapp.plugins"]
+hello = "myplugin.hello:HelloPlugin"Here, myplugin registers HelloPlugin under the myapp.plugins group.
Installation metadata – When installed, this information is written into the package’s.dist-info/entry_points.txt.
Discovery – At runtime, importlib.metadata.entry_points scans all installed distributions
+on sys.path, reads their entry point metadata, and returns matches for a given group.
from importlib.metadata import entry_points
+
+for ep in entry_points(group="myapp.plugins"):
+ plugin_cls = ep.load() # Import object
+ plugin_cls().run()Why useful for plugins
myappmyapp/main.py
from importlib.metadata import entry_points
+
+def load_plugins():
+ for ep in entry_points(group="myapp.plugins"):
+ plugin = ep.load()()
+ plugin.run()
+
+if __name__ == "__main__":
+ load_plugins()pyproject.toml (core app doesn’t usually declare entry points, only defines itself):
[project]
+name = "myapp"
+version = "0.1"
+dependencies = []mypluginmyplugin/hello.py
class HelloPlugin:
+ def run(self):
+ print("Hello from plugin!")pyproject.toml
[project]
+name = "myplugin"
+version = "0.1"
+dependencies = []
+
+[project.entry-points."myapp.plugins"]
+hello = "myplugin.hello:HelloPlugin"With both installed in the same environment, running python -m myapp.main will discover the entry
+point defined by myplugin and execute it, without any explicit import in myapp.
File: /Users/Q187392/dev/s/private/vimwiki/help/python-packaging.md
+1758696231518 1686497988937Explain Kafka Tombstone:
Within Kafka itself, a tombstone is just a record with:
nullKafka does not assign any "delete" semantics beyond this. Its only built-in behavior is:
value = null), Kafka will eventually drop the
+key entirely during compaction.Outside of that, Kafka does nothing special — the broker just stores and delivers tombstone +records like any other.
It’s up to **applications to decide how to interpret them:
Application sends a record with a non-null key and a null value.
kafkaTemplate.send("users", "user-123", null);Kafka stores it in the topic log just like any other record.
Consumers see it immediately.
@KafkaListener(topics = "users")
+public void listen(ConsumerRecord<String, User> record) {
+ if (record.value() == null) {
+ // tombstone → remove key from state
+ userCache.remove(record.key());
+ }
+}Applications decide how to react. Kafka itself doesn’t enforce “delete.”
Periodically, Kafka compacts the log.
For each key, only the latest record is retained.
If the latest record is a tombstone:
log.cleanup.policy=compact, delete.retention.ms).After that retention period, the tombstone is discarded, and the key disappears from the log.
This ensures:
Before compaction:
("user-123", {...})
+("user-123", null) ← tombstoneAfter compaction + tombstone retention expiry:
(no record for "user-123")So, the lifecycle is: produce → consume → compact → expire → gone.
File: /Users/Q187392/dev/s/private/vimwiki/dev/kafka.md
+1758696231519 1686497988937Explain tombstone handling in streams versus regular Kafka:
null values;KTable treats tombstones as first-class deletes in its state and propagation.Streams assigns delete semantics to tombstones for table-like abstractions and state stores.
Reading a compacted topic as a KTable:
value == null ⇒ delete key from the underlying state store (e.g., RocksDB).State restoration:
KStream simply forwards the record (including null values). No state is deleted unlessyou materialize and define that behavior.KTable–KTable join: a tombstone on either side removes the joined result for that key
+(downstream receives a tombstone).
Aggregations on KTable (e.g., groupBy().aggregate()):
null), Streams
+deletes the key from the result table’s store and emits a tombstone.Windowed aggregations (TimeWindows, SessionWindows):
KTable/changelogs should be compacted; retention settings determine how long
+tombstones are kept for catch-up consumers.null. Most built-ins do; custom SerDes should handle null defensively.File: /Users/Q187392/dev/s/private/vimwiki/dev/kafka.md
+1759085796792 1686497988937Explain functions vs function pointers vs lambdas:
FnOnce moves
+captured variables, FnMut mutates them, and Fn only reads them. The compiler chooses the least
+intrusive capture mode possible.File: /Users/Q187392/dev/s/private/vimwiki/dev/rust/core/datatypes.md
+1759136110647 1686497988937Explain HMAC
HMAC uses two passes of hash computation:
The secret key is first used to derive two keys – inner and outer.
The first pass of the algorithm produces an internal hash derived from the message and the inner key.
The second pass produces the final HMAC code derived from the inner hash result and the outer key.
Someone who intercepts this message won't even be able to guess at its length.
The work renders the message contents absolutely useless to anyone without a key or a code.
Once the server receives the request and regenerates its own unique HMAC, it compares the two HMACs. +If they're equal, the client is trusted and the request is executed. This process is often called +a secret handshake.
Since the signature is irreversible and does not expose the secret key, it is safe to share, +e.g. REST query string
A typical usage of this is uploading and downloading from and to S3. You generate a pre-signed S3 +upload or download URL. This URL will only work to perform the given operation on your behalf +without making the bucket publicly accessible
Let:
H = cryptographic hash function (e.g., SHA-256)K = secret key (padded to block size of H)M = messageB = block size of H (64 bytes for SHA-256)⊕ = bitwise XORipad = byte 0x36 repeated B timesopad = byte 0x5c repeated B timesIf K is longer than B, hash it to shorten.
+If shorter, pad with zeros to length B.
Compute:
inner = H((K ⊕ ipad) || M) # concatenation
+outer = H((K ⊕ opad) || inner)Result = outer, which is the HMAC.
Let’s break the inner part of the HMAC algorithm down:
\[H((K \oplus ipad) \; || \; M)\]H = cryptographic hash function (e.g., SHA-256)K = secret key (padded or hashed to the block size of H)ipad = inner padding (a block of the hash’s block size, each byte = 0x36)⊕ = XOR (bitwise exclusive OR)|| = concatenationM = messageKey preparation:
K is exactly the hash block size (B bytes, 64 for SHA-256).XOR with ipad:
Ki = K ⊕ ipadThis mixes the key with a fixed constant (ipad = 0x36...36) to prevent certain attacks.
Concatenate with message:
Ki || MAppend the message to the modified key.
Hash the result:
inner = H(Ki || M)This produces the inner hash used in the next step of HMAC.
If you only did H(K || M), the key and message could interact in insecure ways (length extension
+attacks). The ipad/opad trick forces the hash to start from two different, unrelated internal
+states, strengthening security.
So H((K ⊕ ipad) || M) = the inner hash of HMAC, a secure mix of the padded key, the fixed
+constant, and the message.
H(K || M)) due to structural weaknesses.File: /Users/Q187392/dev/s/private/vimwiki/dev/security/hmac.md
+1760001711585 1686497988937Explain Authorization Code Flow with PKCE

The PKCE (Proof Key for Code Exchange) OAuth 2.0 flow is an extension of the Authorization +Code flow, designed to make it secure for public clients (e.g., mobile apps, single-page +apps) that cannot safely store a client secret.
Generate a Code Verifier
q8nT1_Xx9vwP5fO-L3z7x0U...Create a Code Challenge
code_challenge = BASE64URL(SHA256(code_verifier))Authorization Request
The client sends the user to the Authorization Server with:
response_type=codeclient_idredirect_uricode_challengecode_challenge_method=S256The user logs in and authorizes.
Authorization Server Redirect
Token Request
The client exchanges the code for tokens (access/refresh).
Instead of a client secret, it sends:
coderedirect_uricode_verifierToken Response
code_verifier, compares with the originalcode_challenge, and if valid, issues tokens.Even if an attacker intercepts the authorization code, they cannot redeem it without the
+original code_verifier. Since the verifier never left the client, only the legitimate app can
+complete the flow.
File: /Users/Q187392/dev/s/private/vimwiki/dev/security/oauth.md
+1760338853334 1686497988937question
answer
File: /Users/Q187392/dev/s/private/vimwiki/ai/claude-code.md
+1760419554790 1686497988937xxxxx
xxxxx
File: /Users/Q187392/dev/s/private/vimwiki/help/inka2.md
+1760509284607 1686497988937Explain Borrow vs AsRef
AsRef / AsMut vs Borrow / BorrowMut (crisp)
AsRefas_ref(&self) -> &T, as_mut(&mut self) -> &mut T.
+Use when: you just need a &T/&mut T from many “X-like” inputs (e.g., PathBuf→Path, Vec<T>→[T]).
+Guarantees: none beyond producing the (mutable) reference.
Borrow / BorrowMut
Purpose: view conversion with key-equivalence.
+API: borrow(&self) -> &Q, borrow_mut(&mut self) -> &mut Q.
+Guarantees: Eq, Ord, and Hash of the owner and the borrowed form must match.
+Use when: collections/lookup semantics must treat borrowed and owned keys identically.
Why not always Borrow?It imposes the stronger key-equivalence contract, which you often don’t need and can’t always satisfy. Prefer AsRef/AsMut for general input flexibility; reserve Borrow/BorrowMut for key semantics.
How lookups use BorrowCollections call borrow() internally. Example:
let mut m: std::collections::HashMap<String, i32> = Default::default();
+m.insert("alice".to_string(), 1);
+assert_eq!(m.get("alice"), Some(&1)); // works because String: Borrow<str>
+
+impl<K, V, S> HashMap<K, V, S> {
+ pub fn get<Q: ?Sized>(&self, k: &Q) -> Option<&V>
+ where
+ K: std::borrow::Borrow<Q>, // <-- key type K must Borrow<Q>
+ Q: std::hash::Hash + Eq,
+ { /* uses K::borrow() internally */ }
+}
+assert_eq!(m.get("alice"), m.get(("alice").borrow())); // same resultHashMap::get accepts &Q and requires K: Borrow<Q>, so it borrows each String key as &strto compare/hash.
File: /Users/Q187392/dev/s/private/vimwiki/dev/rust/core/traits.md
+1709395514436 1686497988937Should embeddings be normalized for clustering?
normalizing embeddings before clustering is generally recommended, especially when using distance metrics like cosine similarity, which are sensitive to the magnitude of the vectors. +making the distance between vectors purely a function of the angle is important:
Magnitude Independence: Normalization removes the influence of the vector's magnitude, focusing the comparison on the direction (or angle) of the vectors. This is particularly useful when the magnitude does not carry meaningful information for the analysis.
Improved Clustering Quality: For algorithms that rely on distance metrics, such as k-means or hierarchical clustering, normalization can lead to more meaningful clusters. It ensures that the clustering process is based on the shape of the data distribution rather than the scale of the data points.
Consistency: Normalizing embeddings ensures consistency across different vectors, making them comparable on the same scale. This is crucial when embeddings come from different sources or when they represent different types of entities.
Enhanced Computational Efficiency: Some clustering algorithms can compute distances more efficiently when vectors are normalized, as certain optimizations can be applied when vectors have a unit norm.
File: /Users/Q187392/dev/s/private/vimwiki/ai/ml.md
+1709395514438 1686497988937How to ensure the kmeans clustering uses cosine distance?
you cannot directly use default KMeans implementation from libraries like scikit-learn, as it is designed to work with Euclidean distances.
Approximation: +Normalize Data Before Clustering: This way, Euclidean distance in the normalized space relates closely to cosine similarity, but it's not the same. +After normalization, you could use the standard KMeans algorithm, keeping in mind that this approach approximates cosine similarity by minimizing squared Euclidean distance on normalized data.
FAISS provides Clustering class for clustering vectors. It uses k-means by default. Note that the actual clustering process in FAISS does not directly consider cosine similarity; it's primarily designed for L2 distances. However, by normalizing your vectors and using IndexFlatIP, the clustering will align closely with cosine similarity principles.
File: /Users/Q187392/dev/s/private/vimwiki/ai/ml.md
+1709395515407 1686497988937What are stop_sequences in LLM parameters?
# Claude - Body Syntax
+body = json.dumps({
+ "prompt": prompt_data,
+ "max_tokens_to_sample": 200,
+ "temperature": 0.0,
+ "top_k": 250,
+ "top_p": 0.5,
+ "stop_sequences": ["\n\nHuman:"]
+})stop_sequences specific sequences of characters where the model should stop generating further text.stop_sequences during its generation process, it treats it as a signal to end the output, effectively stopping the generation.stop_sequences might include tokens or phrases that signify the end of a user's input or the beginning of the system's response, such as "\n\nHuman:" in your example.Language models (like Claude, GPT, etc.) are trained to predict the next most likely token, without inherent knowledge of:
Without explicit guidance, the model might:
For example, if the prompt includes:
Human: What's the capital of France?
+Assistant:the model might have seen many examples where after the assistant’s reply, the transcript continues with another "Human:" marker. +So, it tries to "complete the transcript" by adding what it thinks comes next, which is the next turn marker. +Without guidance, it continues generating:
The capital of France is Paris.
+Human: What's the population?Boundary Control
"Human:", "Assistant:" markers).\n### END).Task-Specific UseEssential for applications needing:
"</html>" for HTML, "END_OF_ANSWER" for completions).Trained on Turn-Based Dialogue.
Models like ChatGPT are trained on instruction-following datasets, where:
stop_sequencesUse a stop sequence that naturally fits your prompt format.
"Human: " and "Assistant: ", use "\n\nHuman:" or "\n\nAssistant:" as the stop signal.Pick a sequence unlikely to occur naturally in the generated text.
"the", "and").If generating structured formats (e.g., JSON, XML, code), choose a sequence that matches the closing token:
"}", "]"."</html>", "</body>".Ensure the stop sequence isn’t part of valid outputs.
";" if generating complex C code with semicolons.Tweak the sequence if the model:
Short, clear stop sequences (e.g., "\n\nHuman:") are faster to detect and reduce the model's output size.
| Scenario | Example stop_sequences |
|---|---|
| Chatbot turn boundary | "\n\nHuman:", "\nUser:" |
| Email generation | "--END OF EMAIL--", "\nThanks," |
| JSON response termination | "}", "]" (for ensuring JSON closure) |
| Web page generation | "</html>", "</body>" |
| Custom task-specific marker | "### END", "<<STOP>>" |
| SQL query generation | ";" (ensures end of query) |
File: /Users/Q187392/dev/s/private/vimwiki/help/aws/bedrock.md
diff --git a/data/testuser/first.md b/data/testuser/first.md new file mode 100644 index 0000000..80e16e0 --- /dev/null +++ b/data/testuser/first.md @@ -0,0 +1,15 @@ +--- + +Deck: Life Questions + +Tags: learning life-questions + + +1. What is the answer to the Ultimate Question of Life, the Universe, and Everything? + +> 42 + + +2. If it {{c1::looks like a duck, swims like a duck, and quacks like a duck}}, then it is a {{c2::duck}}. + +--- diff --git a/scripts/clean_media.py b/scripts/clean_media.py new file mode 100755 index 0000000..ea9cd2e --- /dev/null +++ b/scripts/clean_media.py @@ -0,0 +1,198 @@ +#!/usr/bin/env python3 +""" +Clean unused media files from Anki collection. + +This script: +1. Scans all notes in collection.anki2 to find media references +2. Compares with actual files in collection.media/ +3. Deletes unreferenced files +4. Updates collection.media.db2 to match +""" + +import sqlite3 +import re +import os +import sys +import argparse +from pathlib import Path +from typing import Set + +def extract_media_from_html(html: str) -> Set[str]: + """Extract all media filenames referenced in HTML content.""" + media_files = set() + + # Pattern 1: