Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ members = [
"crates/storage",
"crates/distributed-storage",
"crates/challenge-sdk",
"crates/challenge-registry",
"crates/epoch",
"crates/bittensor-integration",
"crates/subnet-manager",
Expand All @@ -20,6 +21,10 @@ members = [
# Note: WASM runtime removed - updates via git, version checked at handshake
# Note: P2P-only architecture - no centralized platform-server

# Challenge crates can be added here or as optional path/git dependencies
# Example:
# "challenges/example-challenge",

[workspace.package]
version = "0.1.0"
edition = "2021"
Expand Down Expand Up @@ -95,3 +100,11 @@ type_complexity = "allow"
await_holding_lock = "warn" # TODO: Fix async lock issues properly
collapsible_match = "allow"
collapsible_if = "allow"

# Workspace-level feature flags for challenge integration
# Individual crates can enable these by adding features in their Cargo.toml:
# [features]
# dynamic-challenges = ["libloading"]
[workspace.metadata.challenge-features]
# Enable dynamic challenge loading (crates opt-in via features)
dynamic-loading-available = true
126 changes: 124 additions & 2 deletions bins/validator-node/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,108 @@ use platform_bittensor::{
sync_metagraph, BittensorClient, BlockSync, BlockSyncConfig, BlockSyncEvent, Metagraph,
Subtensor, SubtensorClient,
};
use platform_core::{Hotkey, Keypair, SUDO_KEY_SS58};
use platform_core::{
checkpoint::{
CheckpointData, CheckpointManager, CompletedEvaluationState, PendingEvaluationState,
WeightVoteState,
},
Hotkey, Keypair, SUDO_KEY_SS58,
};
use platform_distributed_storage::{
DistributedStoreExt, LocalStorage, LocalStorageBuilder, StorageKey,
};
use platform_p2p_consensus::{
ChainState, ConsensusEngine, NetworkEvent, P2PConfig, P2PMessage, P2PNetwork, StateManager,
ValidatorRecord, ValidatorSet,
};
use std::path::PathBuf;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Duration;
use tracing::{debug, error, info, warn};

/// Storage key for persisted chain state
const STATE_STORAGE_KEY: &str = "chain_state";

// ==================== Shutdown Handler ====================

/// Handles graceful shutdown with state persistence
struct ShutdownHandler {
checkpoint_manager: CheckpointManager,
state_manager: Arc<StateManager>,
netuid: u16,
}

impl ShutdownHandler {
fn new(checkpoint_dir: &Path, state_manager: Arc<StateManager>, netuid: u16) -> Result<Self> {
let checkpoint_manager = CheckpointManager::new(checkpoint_dir.join("checkpoints"), 10)?;
Ok(Self {
checkpoint_manager,
state_manager,
netuid,
})
}

/// Create checkpoint from current state
fn create_checkpoint(&mut self) -> Result<()> {
let state = self.state_manager.snapshot();

let mut checkpoint_data = CheckpointData::new(state.sequence, state.epoch, self.netuid);

// Convert pending evaluations
for (id, record) in &state.pending_evaluations {
let pending = PendingEvaluationState {
submission_id: id.clone(),
challenge_id: record.challenge_id,
miner: record.miner.clone(),
submission_hash: record.agent_hash.clone(),
scores: record
.evaluations
.iter()
.map(|(k, v)| (k.clone(), v.score))
.collect(),
created_at: record.created_at,
finalizing: record.finalized,
};
checkpoint_data.add_pending(pending);
}

// Convert completed evaluations (current epoch only)
if let Some(completed) = state.completed_evaluations.get(&state.epoch) {
for record in completed {
if let Some(score) = record.aggregated_score {
let completed_state = CompletedEvaluationState {
submission_id: record.submission_id.clone(),
challenge_id: record.challenge_id,
final_score: score,
epoch: state.epoch,
completed_at: record.finalized_at.unwrap_or(record.created_at),
};
checkpoint_data.add_completed(completed_state);
}
}
}

// Convert weight votes
if let Some(ref votes) = state.weight_votes {
checkpoint_data.weight_votes = Some(WeightVoteState {
epoch: votes.epoch,
netuid: votes.netuid,
votes: votes.votes.clone(),
finalized: votes.finalized,
final_weights: votes.final_weights.clone(),
});
}

checkpoint_data.bittensor_block = state.bittensor_block;

self.checkpoint_manager
.create_checkpoint(&checkpoint_data)?;
info!("Shutdown checkpoint created at sequence {}", state.sequence);

Ok(())
}
}

// ==================== CLI ====================

#[derive(Parser, Debug)]
Expand Down Expand Up @@ -252,6 +338,22 @@ async fn main() -> Result<()> {
bittensor_client_for_metagraph = None;
}

// Initialize shutdown handler for graceful checkpoint persistence
let mut shutdown_handler =
match ShutdownHandler::new(&data_dir, state_manager.clone(), args.netuid) {
Ok(handler) => {
info!("Shutdown handler initialized with checkpoint directory");
Some(handler)
}
Err(e) => {
warn!(
"Failed to initialize shutdown handler: {}. Checkpoints disabled.",
e
);
None
}
};

info!("Decentralized validator running. Press Ctrl+C to stop.");

let netuid = args.netuid;
Expand All @@ -260,6 +362,7 @@ async fn main() -> Result<()> {
let mut metagraph_interval = tokio::time::interval(Duration::from_secs(300));
let mut stale_check_interval = tokio::time::interval(Duration::from_secs(60));
let mut state_persist_interval = tokio::time::interval(Duration::from_secs(60));
let mut checkpoint_interval = tokio::time::interval(Duration::from_secs(300)); // 5 minutes

loop {
tokio::select! {
Expand Down Expand Up @@ -335,8 +438,27 @@ async fn main() -> Result<()> {
debug!("Active validators: {}", validator_set.active_count());
}

// Periodic checkpoint
_ = checkpoint_interval.tick() => {
if let Some(handler) = shutdown_handler.as_mut() {
if let Err(e) = handler.create_checkpoint() {
warn!("Failed to create periodic checkpoint: {}", e);
} else {
debug!("Periodic checkpoint created");
}
}
}

// Ctrl+C
_ = tokio::signal::ctrl_c() => {
info!("Received shutdown signal, creating final checkpoint...");
if let Some(handler) = shutdown_handler.as_mut() {
if let Err(e) = handler.create_checkpoint() {
error!("Failed to create shutdown checkpoint: {}", e);
} else {
info!("Shutdown checkpoint saved successfully");
}
}
info!("Shutting down...");
break;
}
Expand Down
1 change: 1 addition & 0 deletions challenges/.gitkeep
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

41 changes: 41 additions & 0 deletions challenges/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Platform Challenge Crates

This directory contains challenge crates that can be integrated with the Platform validator network.

## Directory Structure

```
challenges/
├── README.md # This file
├── example-challenge/ # Example challenge template (future)
└── [your-challenge]/ # Your custom challenge crate
```
Comment on lines +7 to +12
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Add a language tag to the fenced block.
Markdownlint MD040 flags fenced blocks without a language; use text here.

Proposed fix
-```
+```text
 challenges/
 ├── README.md           # This file
 ├── example-challenge/  # Example challenge template (future)
 └── [your-challenge]/   # Your custom challenge crate
</details>

<details>
<summary>🤖 Prompt for AI Agents</summary>

In @challenges/README.md around lines 7 - 12, The fenced code block in README.md
that displays the directory tree (the block starting with the triple backticks
followed by the tree: "challenges/ ├── README.md ...") is missing a language
tag; update that fenced block to include the language tag "text" (i.e., change
the opening totext) so Markdownlint MD040 is satisfied and the tree is
treated as plain text.


</details>

<!-- fingerprinting:phantom:poseidon:eagle -->

<!-- This is an auto-generated comment by CodeRabbit -->


## Adding a New Challenge Crate

1. Create your challenge crate in this directory or reference it as a git dependency
2. Implement the `Challenge` trait from `platform-challenge-sdk`
3. Register your challenge in the challenge registry
4. Update the workspace `Cargo.toml` if adding locally

## External Challenge Crates

Challenge crates can also be external (like term-challenge). They should:
- Import `platform-challenge-sdk` as a dependency
- Implement the `ServerChallenge` trait
- Provide Docker configuration for evaluation

## Challenge Crate Requirements

- Must implement `platform-challenge-sdk::ServerChallenge`
- Must provide `/evaluate` HTTP endpoint
- Must handle graceful shutdown signals
- Must support state persistence for hot-reload

## Example

See [term-challenge](https://github.com/PlatformNetwork/term-challenge) for a complete example.

## Documentation

For detailed integration instructions, see the [Challenge Integration Guide](../docs/challenge-integration.md).
42 changes: 42 additions & 0 deletions crates/challenge-registry/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
[package]
name = "platform-challenge-registry"
version.workspace = true
edition.workspace = true
description = "Challenge registry and lifecycle management for Platform Network"

[dependencies]
platform-core = { path = "../core" }
platform-challenge-sdk = { path = "../challenge-sdk" }
platform-storage = { path = "../storage" }

# Async
tokio = { workspace = true }
async-trait = { workspace = true }
futures = { workspace = true }

# Serialization
serde = { workspace = true }
serde_json = { workspace = true }
bincode = { workspace = true }

# Utils
tracing = { workspace = true }
thiserror = { workspace = true }
anyhow = { workspace = true }
chrono = { workspace = true }
parking_lot = { workspace = true }
uuid = { workspace = true }

# Crypto for checksums
sha2 = { workspace = true }
hex = { workspace = true }

# Versioning
semver = "1.0"

# Health checks
reqwest = { workspace = true }

[dev-dependencies]
tempfile = { workspace = true }
tokio-test = { workspace = true }
Loading