{"$schema":"https://json-schema.org/draft/2020-12/schema","$id":"https://trenchsignals.io/api/dataset/schema.json","title":"Trench Eval Dataset record","description":"One paired (intel context, model reasoning, probability, market price, eventual outcome) row. See https://trenchsignals.io/dataset/schema for prose.","type":"object","additionalProperties":false,"properties":{"sample_id":{"description":"Deterministic identifier derived from made_at + ticker (sha256[:8] suffix). Stable across re-exports of the same source row.","type":"string","x-stability":"stable"},"schema_version":{"description":"Schema version of this record. Semver; breaking changes bump the major.","type":"string","enum":["0.1.0"],"x-stability":"stable"},"question":{"type":"object","additionalProperties":false,"properties":{"text":{"description":"Exact wording of the market question, as published by the exchange.","type":"string","x-stability":"stable"},"type":{"description":"v0.1.0 ships binary only. Numeric and categorical land in v0.2.","type":"string","enum":["binary"],"x-stability":"stable"},"exchange":{"description":"Where the market was listed and where the outcome was settled.","type":"string","enum":["kalshi","polymarket"],"x-stability":"stable"},"ticker":{"description":"Exchange-side market identifier. Stable, queryable on the exchange's public API.","type":"string","x-stability":"stable"},"theater":{"description":"Geopolitical theater tag. Null for markets the live tagger did not classify. v0.2 will backfill tags retroactively.","type":["string","null"],"enum":["iran","israel","ukraine","russia","taiwan","china","lebanon","middle_east","gaza","syria","yemen","red_sea","africa","sahel",null],"x-stability":"experimental"},"event_type":{"description":"Sub-event tag where the bot's analyzer set one (e.g. direct_strike, nuclear_deal). Often null in v0.1.0.","type":["string","null"],"x-stability":"experimental"},"resolves_at":{"description":"Exchange-stated resolution deadline at the time of prediction. May differ from resolution.resolved_at if the market settled early/late.","type":"string","format":"date-time","x-stability":"stable"}}},"prediction":{"type":"object","additionalProperties":false,"properties":{"made_at":{"description":"Wall-clock UTC timestamp the model produced this record. Microsecond precision. The anchor pipeline timestamps every bundle within the same analyzer call.","type":"string","format":"date-time","x-stability":"stable"},"model":{"description":"Model identifier. Always claude-sonnet-4-6 in v0.1.0; v0.2 adds ensemble shadow records from OpenAI + Gemini.","type":"string","enum":["claude-sonnet-4-6","gpt-4o","gemini-1.5-pro"],"x-stability":"stable"},"side":{"description":"The side the bot actually traded, or null if the bot scored the market but skipped the trade. The reasoning trace stands on its own regardless.","type":["string","null"],"enum":["YES","NO",null],"x-stability":"stable"},"probability_yes":{"description":"Claude's estimated probability that the market resolves YES. resolution.brier_score is computed against this field.","type":"number","minimum":0.0,"maximum":1.0,"x-stability":"stable"},"confidence":{"description":"Claude's self-reported confidence in the directional call. Empirically not the same as 1-sigma; measures conviction. Useful as an eval-time feature.","type":"number","minimum":0.0,"maximum":1.0,"x-stability":"stable"},"direction":{"description":"The signal-level call. Distinct from side: a model can score ESCALATE but trade NO if the market is over-pricing.","type":"string","enum":["ESCALATE","DEESCALATE","YES","NO","SKIP"],"x-stability":"stable"},"reasoning":{"description":"The full Claude reasoning text, verbatim. ~500–2000 chars typical, occasionally up to ~4000. This is the field that makes the dataset valuable for reasoning-eval work.","type":"string","x-stability":"stable"}}},"context":{"type":"object","additionalProperties":false,"properties":{"market_yes_price":{"description":"Mid-price of YES at prediction time. Edge = prediction.probability_yes − this.","type":["number","null"],"minimum":0.0,"maximum":1.0,"x-stability":"stable"},"market_yes_bid":{"description":"Best-bid of YES, where available. Combined with ask/spread, useful for execution-quality analysis.","type":["number","null"],"minimum":0.0,"maximum":1.0,"x-stability":"stable"},"market_yes_ask":{"description":"Best-ask of YES, where available.","type":["number","null"],"minimum":0.0,"maximum":1.0,"x-stability":"stable"},"spread":{"description":"ask − bid. Large spreads (>0.10) are common on thin markets and may bias realized P&L away from eval-time edge.","type":["number","null"],"minimum":0.0,"maximum":1.0,"x-stability":"stable"},"replay_bundle_id":{"description":"Pointer to the full input intel snapshot. GET /api/replay-bundle/{id} returns the bundle + a sha256 that matches anchor.bundle_sha256. Null for records made before 2026-05-07.","type":["string","null"],"x-stability":"nullable-pre-anchor"},"intel_summary":{"type":"object","additionalProperties":false,"properties":{"num_items":{"description":"Total count of intel items in the replay bundle (RSS + tweets + Telegram + financial indicators + ontology activations).","type":["integer","null"],"x-stability":"nullable-pre-anchor"},"by_source_type":{"description":"Histogram of intel items by source category (english_media, iranian_media, financial_indicator, …). Use to filter records by input distribution.","type":["object","null"],"x-stability":"nullable-pre-anchor"},"languages":{"description":"Distinct natural languages present in the input intel (ISO 639-1 codes: en, fa, he, ru, zh, …).","type":["array","null"],"x-stability":"nullable-pre-anchor"}}}}},"resolution":{"type":"object","additionalProperties":false,"properties":{"resolved_at":{"description":"When the market actually resolved, sourced from the exchange's settlement event.","type":"string","format":"date-time","x-stability":"stable"},"outcome":{"description":"Realized outcome. Derived from settlement_yes_price >= 0.5; binary markets settle at exactly 0 or 1.","type":"string","enum":["YES","NO"],"x-stability":"stable"},"settlement_yes_price":{"description":"Final settlement value for YES. Almost always 0.0 or 1.0; some Kalshi markets settle at intermediate values when contracts split.","type":"number","minimum":0.0,"maximum":1.0,"x-stability":"stable"},"days_to_resolution":{"description":"Wall-clock days between made_at and resolved_at. Used in time-decay analyses at /methodology.","type":"number","minimum":0.0,"x-stability":"stable"},"brier_score":{"description":"Standard binary Brier: (probability_yes − outcome_yes)^2. Recomputable byte-identical from other fields.","type":"number","minimum":0.0,"maximum":1.0,"x-stability":"stable"}}},"anchor":{"oneOf":[{"type":"null"},{"type":"object","additionalProperties":false,"properties":{"registry_date":{"description":"YYYY-MM-DD of the day-file in the public registry chain that contains this bundle's hash. Same as made_at[0:10].","type":"string","format":"date","x-stability":"nullable-pre-anchor"},"registry_url":{"description":"Direct link to that day's hash chain. Fetching it and grepping for bundle_sha256 is step 3 of the verification protocol.","type":"string","format":"uri","x-stability":"nullable-pre-anchor"},"bundle_sha256":{"description":"SHA-256 of canonical_json(bundle) — sorted keys, no whitespace, ASCII-safe. Reference impl in trench-core on PyPI.","type":"string","pattern":"^[0-9a-f]{64}$","x-stability":"nullable-pre-anchor"},"wayback_url":{"description":"Internet Archive capture URL for the registry day-file. Verifier confirms capture timestamp < resolution.resolved_at — proof the prediction predated the outcome.","type":"string","format":"uri","x-stability":"nullable-pre-anchor"}},"required":["registry_date","registry_url","bundle_sha256","wayback_url"]}],"description":"Hash-anchor proof block. Null for predictions made before the replay-bundle pipeline launched (2026-05-07). When non-null, every sub-field is required."}},"required":["sample_id","schema_version","question","prediction","context","resolution"]}