majiayu000 · majiayu000 · Apr 27, 2026 · Apr 21, 2026 · Apr 23, 2026
diff --git a/Cargo.toml b/Cargo.toml
@@ -3,10 +3,10 @@ name = "ccstats"
 version = "0.2.61"
 edition = "2024"
 rust-version = "1.85"
-description = "Fast Claude Code token usage statistics CLI"
+description = "Fast token and cost usage statistics CLI for Claude Code and OpenAI Codex"
 license = "MIT"
 repository = "https://github.com/majiayu000/ccstats"
-keywords = ["claude", "anthropic", "token", "usage", "cli"]
+keywords = ["claude", "codex", "openai", "token", "usage"]
 categories = ["command-line-utilities"]
 
 [package.metadata.binstall]

diff --git a/README.md b/README.md
@@ -194,7 +194,12 @@ See [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) for:
 - Adding new data sources
 - Data flow and processing pipeline
 - Caching mechanism
-- Deduplication algorithm
+- Architecture and module boundaries
+
+See [docs/algorithm/authoritative-token-accounting.md](docs/algorithm/authoritative-token-accounting.md) for:
+- Token accounting rules
+- Source-specific normalization
+- Deduplication semantics
 
 ## License
 

diff --git a/docs/algorithm/authoritative-token-accounting.md b/docs/algorithm/authoritative-token-accounting.md
@@ -83,12 +83,14 @@ Anthropic 的字段天然互不重叠，parser 直接映射即可。
 
 Claude Code 的流式响应会为同一个 `message.id` 写入多条日志（每个 chunk 都可能更新 usage）。去重规则：
 
-1. 以 `message.id` 为全局去重键（跨主文件和 subagent 文件）
-2. 同一 `message.id` 的多条记录，选择规则：
-   - 优先选有 `stop_reason` 的（表示完成），取最早的一条
+1. 以“源日志文件 + `message.id`”作为去重键
+2. 同一去重键的多条记录，选择规则：
+   - 优先选有 `stop_reason` 的（表示完成），取最新的一条
    - 若都没有 `stop_reason`，取最晚的一条（最佳近似）
 3. 没有 `message.id` 的条目：仅当有 `stop_reason` 时才计入
 
+这样可以避免不同日志文件中碰巧复用同一 `message.id` 时发生误去重，同时仍然保留同一文件内流式 chunk 的合并行为。
+
 ### 模型名归一化
 
 ```

diff --git a/src/cli/args.rs b/src/cli/args.rs
@@ -41,7 +41,10 @@ pub(crate) enum CostMode {
 
 #[derive(Parser)]
 #[command(name = "ccstats")]
-#[command(about = "Fast Claude Code token usage statistics", version)]
+#[command(
+    about = "Fast token and cost usage statistics for Claude Code and OpenAI Codex",
+    version
+)]
 #[allow(clippy::struct_excessive_bools)]
 pub(crate) struct Cli {
     #[command(subcommand)]

diff --git a/src/core/aggregator.rs b/src/core/aggregator.rs
@@ -23,6 +23,8 @@ pub(crate) fn aggregate_daily(entries: Vec<RawEntry>) -> HashMap<String, DayStat
 /// Session accumulator for building session stats
 #[derive(Debug, Default)]
 struct SessionAccumulator {
+    session_key: String,
+    session_id: String,
     project_path: String,
     first_timestamp: String,
     last_timestamp: String,
@@ -33,8 +35,16 @@ struct SessionAccumulator {
 }
 
 impl SessionAccumulator {
-    fn new(project_path: String, timestamp: &str, timestamp_ms: i64) -> Self {
+    fn new(
+        session_key: String,
+        session_id: String,
+        project_path: String,
+        timestamp: &str,
+        timestamp_ms: i64,
+    ) -> Self {
         SessionAccumulator {
+            session_key,
+            session_id,
             project_path,
             first_timestamp: timestamp.to_string(),
             last_timestamp: timestamp.to_string(),
@@ -86,29 +96,39 @@ impl SessionAccumulator {
             self.last_timestamp_ms = timestamp_ms;
         }
     }
+
+    fn into_session_stats(self) -> SessionStats {
+        SessionStats {
+            session_key: self.session_key,
+            session_id: self.session_id,
+            project_path: self.project_path,
+            first_timestamp: self.first_timestamp,
+            last_timestamp: self.last_timestamp,
+            stats: self.stats,
+            models: self.models,
+        }
+    }
 }
 
 /// Aggregate entries by session (consumes entries to avoid cloning)
 pub(crate) fn aggregate_sessions(entries: Vec<RawEntry>) -> Vec<SessionStats> {
-    let session_map = aggregate_sessions_map(entries);
-    session_map
-        .into_iter()
-        .map(|(session_id, mut session)| {
-            session.session_id = session_id;
-            session
-        })
-        .collect()
+    aggregate_sessions_map(entries).into_values().collect()
 }
 
-/// Aggregate entries by session into a map keyed by session ID.
-/// Values intentionally omit `session_id` to avoid duplicate storage.
+/// Aggregate entries by session into a map keyed by stable internal session key.
 pub(crate) fn aggregate_sessions_map(entries: Vec<RawEntry>) -> HashMap<String, SessionStats> {
     let mut sessions: HashMap<String, SessionAccumulator> = HashMap::with_capacity(entries.len());
 
-    for mut entry in entries {
-        let session_id = std::mem::take(&mut entry.session_id);
-        let session = sessions.entry(session_id).or_insert_with(|| {
+    for entry in entries {
+        let session_key = if entry.session_key.is_empty() {
+            entry.session_id.clone()
+        } else {
+            entry.session_key.clone()
+        };
+        let session = sessions.entry(session_key.clone()).or_insert_with(|| {
             SessionAccumulator::new(
+                session_key,
+                entry.session_id.clone(),
                 entry.project_path.clone(),
                 &entry.timestamp,
                 entry.timestamp_ms,
@@ -119,19 +139,7 @@ pub(crate) fn aggregate_sessions_map(entries: Vec<RawEntry>) -> HashMap<String,
 
     sessions
         .into_iter()
-        .map(|(session_id, acc)| {
-            (
-                session_id,
-                SessionStats {
-                    session_id: String::new(),
-                    project_path: acc.project_path,
-                    first_timestamp: acc.first_timestamp,
-                    last_timestamp: acc.last_timestamp,
-                    stats: acc.stats,
-                    models: acc.models,
-                },
-            )
-        })
+        .map(|(session_key, acc)| (session_key, acc.into_session_stats()))
         .collect()
 }
 
@@ -243,6 +251,7 @@ mod tests {
             timestamp_ms: ts_ms,
             date_str: date.to_string(),
             message_id: None,
+            session_key: session.to_string(),
             session_id: session.to_string(),
             project_path: project.to_string(),
             model: model.to_string(),
@@ -396,6 +405,7 @@ mod tests {
                 timestamp_ms: 5000,
                 date_str: "2025-01-01".to_string(),
                 message_id: None,
+                session_key: "s1".to_string(),
                 session_id: "s1".to_string(),
                 project_path: "p1".to_string(),
                 model: "claude".to_string(),
@@ -411,6 +421,7 @@ mod tests {
                 timestamp_ms: 1000,
                 date_str: "2025-01-01".to_string(),
                 message_id: None,
+                session_key: "s1".to_string(),
                 session_id: "s1".to_string(),
                 project_path: "p1".to_string(),
                 model: "claude".to_string(),
@@ -426,6 +437,7 @@ mod tests {
                 timestamp_ms: 9000,
                 date_str: "2025-01-01".to_string(),
                 message_id: None,
+                session_key: "s1".to_string(),
                 session_id: "s1".to_string(),
                 project_path: "p1".to_string(),
                 model: "claude".to_string(),
@@ -475,6 +487,7 @@ mod tests {
     #[test]
     fn aggregate_projects_single_project() {
         let sessions = vec![SessionStats {
+            session_key: "s1".to_string(),
             session_id: "s1".to_string(),
             project_path: "/Users/john/myapp".to_string(),
             first_timestamp: "t1".to_string(),

diff --git a/src/core/dedup.rs b/src/core/dedup.rs
@@ -11,6 +11,9 @@ pub(crate) trait Deduplicatable {
     fn timestamp_ms(&self) -> i64;
     fn has_stop_reason(&self) -> bool;
     fn message_id(&self) -> Option<&str>;
+    fn dedup_scope(&self) -> Option<&str> {
+        None
+    }
 }
 
 impl Deduplicatable for RawEntry {
@@ -25,6 +28,10 @@ impl Deduplicatable for RawEntry {
     fn message_id(&self) -> Option<&str> {
         self.message_id.as_deref()
     }
+
+    fn dedup_scope(&self) -> Option<&str> {
+        Some(&self.session_key)
+    }
 }
 
 /// State machine for tracking best candidate entry for a message ID
@@ -114,7 +121,7 @@ impl<T: Deduplicatable> CandidateState<T> {
 /// Incremental dedup accumulator for chunked/parallel loading.
 #[derive(Debug)]
 pub(crate) struct DedupAccumulator<T: Deduplicatable> {
-    message_map: HashMap<String, CandidateState<T>>,
+    message_map: HashMap<(String, String), CandidateState<T>>,
     no_id_entries: Vec<T>,
     total_with_id: i64,
 }
@@ -137,11 +144,14 @@ impl<T: Deduplicatable> DedupAccumulator<T> {
     pub(crate) fn push(&mut self, entry: T) {
         if let Some(id) = entry.message_id() {
             self.total_with_id += 1;
-            match self.message_map.get_mut(id) {
+            let key = (
+                entry.dedup_scope().unwrap_or_default().to_string(),
+                id.to_string(),
+            );
+            match self.message_map.get_mut(&key) {
                 Some(state) => state.update(entry),
                 None => {
-                    self.message_map
-                        .insert(id.to_string(), CandidateState::new(entry));
+                    self.message_map.insert(key, CandidateState::new(entry));
                 }
             }
         } else if entry.has_stop_reason() {
@@ -162,11 +172,11 @@ impl<T: Deduplicatable> DedupAccumulator<T> {
         self.total_with_id += other.total_with_id;
         self.no_id_entries.extend(other.no_id_entries);
 
-        for (id, state) in other.message_map {
-            match self.message_map.get_mut(&id) {
+        for (key, state) in other.message_map {
+            match self.message_map.get_mut(&key) {
                 Some(existing) => existing.merge(state),
                 None => {
-                    self.message_map.insert(id, state);
+                    self.message_map.insert(key, state);
                 }
             }
         }

diff --git a/src/core/types.rs b/src/core/types.rs
@@ -56,6 +56,7 @@ impl DayStats {
 /// Session statistics
 #[derive(Debug, Default, Clone)]
 pub(crate) struct SessionStats {
+    pub(crate) session_key: String,
     pub(crate) session_id: String,
     pub(crate) project_path: String,
     pub(crate) first_timestamp: String,
@@ -95,6 +96,9 @@ pub(crate) struct RawEntry {
     pub(crate) date_str: String,
     /// Message ID for deduplication (optional)
     pub(crate) message_id: Option<String>,
+    /// Stable internal session identity used for aggregation/deduplication.
+    #[serde(skip_serializing, skip_deserializing, default)]
+    pub(crate) session_key: String,
     /// Session ID
     pub(crate) session_id: String,
     /// Project path (may be empty for some sources)
@@ -296,6 +300,7 @@ mod tests {
             timestamp_ms: 0,
             date_str: String::new(),
             message_id: None,
+            session_key: String::new(),
             session_id: String::new(),
             project_path: String::new(),
             model: String::new(),

diff --git a/src/main.rs b/src/main.rs
@@ -198,11 +198,21 @@ fn main() {
     // Initialize currency converter if requested
     let currency_converter = if show_cost {
         cli.currency.as_ref().map(|code| {
-            let Some(conv) = CurrencyConverter::load(code, cli.offline) else {
-                eprintln!("Error: failed to load exchange rate for '{code}'");
-                std::process::exit(1);
+            let conv = if let Some(conv) = CurrencyConverter::load(code, cli.offline) {
+                conv
+            } else {
+                if !is_statusline {
+                    eprintln!(
+                        "Warning: failed to load exchange rate for '{code}', showing USD costs."
+                    );
+                }
+                let Some(conv) = CurrencyConverter::load("USD", true) else {
+                    eprintln!("Error: failed to initialize USD currency converter");
+                    std::process::exit(1);
+                };
+                conv
             };
-            if !is_statusline {
+            if !is_statusline && conv.currency_code() != "USD" {
                 eprintln!(
                     "Converting costs to {} (rate: displayed as {})",
                     conv.currency_code(),

diff --git a/src/output/csv.rs b/src/output/csv.rs
@@ -462,6 +462,7 @@ mod tests {
     #[test]
     fn session_csv_structure() {
         let sessions = vec![SessionStats {
+            session_key: "abc-123".to_string(),
             session_id: "abc-123".to_string(),
             project_path: "/home/user/project".to_string(),
             first_timestamp: "2025-01-01T00:00:00Z".to_string(),
@@ -489,6 +490,7 @@ mod tests {
     #[test]
     fn session_csv_includes_reasoning_and_cache_tokens() {
         let sessions = vec![SessionStats {
+            session_key: "reasoning".to_string(),
             session_id: "reasoning".to_string(),
             project_path: String::new(),
             first_timestamp: "2025-01-01T00:00:00Z".to_string(),

diff --git a/src/output/session.rs b/src/output/session.rs
@@ -440,6 +440,7 @@ mod tests {
 
     fn make_session(id: &str, last_ts: &str, input: i64, output: i64) -> SessionStats {
         SessionStats {
+            session_key: id.to_string(),
             session_id: id.to_string(),
             project_path: "/home/user/project".to_string(),
             first_timestamp: "2026-02-12T08:00:00Z".to_string(),
@@ -515,6 +516,7 @@ mod tests {
         models.insert("haiku".to_string(), Stats::default());
 
         let sessions = vec![SessionStats {
+            session_key: "s1".to_string(),
             session_id: "s1".to_string(),
             models,
             ..Default::default()

diff --git a/src/output/tools.rs b/src/output/tools.rs
@@ -1,8 +1,9 @@
 //! Output formatters for tool usage statistics
 
-use comfy_table::CellAlignment;
 use std::fmt::Write;
 
+use comfy_table::CellAlignment;
+
 use crate::core::ToolSummary;
 
 use super::format::{create_styled_table, header_cell, right_cell};

diff --git a/src/pricing/resolver.rs b/src/pricing/resolver.rs
@@ -143,6 +143,12 @@ pub(super) fn fallback_pricing(model: &str) -> ModelPricing {
             cache_create: 1e-6,
             cache_read: 0.08e-6,
         }
+    } else if model_lower.contains("gpt-5.4-mini") {
+        openai_pricing(0.75e-6, 4.5e-6, 0.075e-6)
+    } else if model_lower.contains("gpt-5.4-nano") {
+        openai_pricing(0.2e-6, 1.25e-6, 0.02e-6)
+    } else if model_lower.contains("gpt-5.4") {
+        openai_pricing(2.5e-6, 15e-6, 0.25e-6)
     } else if model_lower.contains("gpt-5.1-codex-mini") {
         openai_pricing(0.25e-6, 2e-6, 0.025e-6)
     } else if model_lower.contains("gpt-5.2-codex") || model_lower.contains("gpt-5.3-codex") {
@@ -389,6 +395,14 @@ mod tests {
         assert_eq!(p.cache_read, 0.025e-6);
     }
 
+    #[test]
+    fn test_fallback_gpt5_4_mini() {
+        let p = fallback_pricing("gpt-5.4-mini");
+        assert_eq!(p.input, 0.75e-6);
+        assert_eq!(p.output, 4.5e-6);
+        assert_eq!(p.cache_read, 0.075e-6);
+    }
+
     #[test]
     fn test_fallback_gpt5_2_codex() {
         let p = fallback_pricing("gpt-5.2-codex");