Configuration Reference¶
Complete reference for all FluxConfig parameters.
FluxConfig (Top-Level)¶
# Model
model_path: Qwen/Qwen3-8B # Required: Model path or HF ID
model_type: llama # Model architecture
output_dir: ./outputs # Output directory
# Training
learning_rate: 1.0e-6 # Learning rate
batch_size: 32 # Batch size
gradient_accumulation_steps: 4
num_steps: 10000 # Total training steps
warmup_steps: 100 # LR warmup steps
weight_decay: 0.0 # Weight decay
# Adam optimizer
adam_beta1: 0.9
adam_beta2: 0.999
adam_epsilon: 1.0e-8
# Logging
log_interval: 10 # Steps between logs
checkpoint_interval: 1000 # Steps between checkpoints
eval_interval: 500 # Steps between evals
seed: 42 # Random seed
# W&B (optional)
wandb_project: null # W&B project name
wandb_run_name: null # W&B run name
adaptive_async¶
adaptive_async:
target_staleness: 0.15 # Target staleness [0, 1]
tolerance: 0.05 # Acceptable deviation
min_async_ratio: 0.1 # Never fully sync
max_async_ratio: 0.9 # Never fully async
# PID controller
kp: 0.1 # Proportional gain
ki: 0.01 # Integral gain
kd: 0.05 # Derivative gain
# Staleness computation
kl_normalizer: 0.1
iw_normalizer: 2.0
max_version_gap: 5
kl_weight: 0.4 # Must sum to 1.0
iw_weight: 0.3
version_weight: 0.3
# Sync control
max_steps_without_sync: 50
ema_alpha: 0.1
rollout¶
rollout:
max_tokens: 2048 # Max tokens per response
temperature: 1.0 # Sampling temperature
top_p: 1.0 # Nucleus sampling
top_k: -1 # Top-k (-1 disabled)
# APRIL strategy
oversample_ratio: 1.5 # Oversample factor
min_yield_size: 8 # Min batch before yield
batch_timeout: 30.0 # Timeout (seconds)
# Partial reuse
use_length_prediction: true
partial_reuse_threshold: 0.5
partial_buffer_max_factor: 2.0
algorithm¶
algorithm:
name: grpo # ppo, grpo, dpo, reinforce, dapo, rloo
clip_range: 0.2 # PPO clip range
clip_range_vf: null # Value function clip (optional)
entropy_coef: 0.01 # Entropy bonus
value_coef: 0.5 # Value loss coefficient
max_grad_norm: 1.0 # Gradient clipping
gamma: 1.0 # Discount factor
gae_lambda: 0.95 # GAE lambda
normalize_advantages: true
kl_coef: 0.0 # KL penalty
kl_target: null # Adaptive KL target
adv_estimator: null # Override advantage estimator
policy_loss: null # Override policy loss
reward¶
reward:
reward_type: rule_based # rule_based, model_based, hybrid
reward_model_path: null # Path to reward model
rule_functions: [] # List of rule function names
reward_scale: 1.0 # Scale factor
reward_clip: 10.0 # Clip to [-clip, clip]
baseline_type: mean # none, mean, per_token
kl_penalty_coef: 0.0 # KL penalty in reward
sglang¶
sglang:
base_url: http://localhost:8000
num_servers: 1 # Number of server instances
server_urls: null # Explicit URL list (overrides above)
timeout: 60.0 # Request timeout
max_retries: 3 # Retry count
use_streaming: true # Streaming responses
megatron¶
megatron:
tp_size: 1 # Tensor parallelism
pp_size: 1 # Pipeline parallelism
dp_size: 1 # Data parallelism
sequence_parallel: false
activation_checkpointing: true
fp16: false
bf16: true
use_flash_attention: true
accumulate_allreduce_grads_in_fp32: true
weight_sync¶
weight_sync:
method: delta # full, delta, per_tensor
sync_interval: 1 # Steps between syncs
use_cuda_ipc: true # CUDA IPC for same node
sparsity_threshold: 1.0e-6
sparsity_target: 0.3
snapshot_interval: 10
max_snapshots: 5
quantize: false
quantize_bits: 16 # 8 or 16
batch_composer¶
batch_composer:
use_length_bucketing: true
use_staleness_balancing: true
use_curriculum: true
length_bucket_boundaries: [512, 1024, 2048]
staleness_strata: 4
curriculum_randomness_decay: 1.0