def estimate(
self,
docs: list[Document],
strategies: list[str],
param_grid: dict[str, list[dict]],
embedding_model: str,
*,
generate_dataset: bool = True,
avg_chunks_per_doc: float = 3.0,
llm_tokens_per_query: int = 400,
) -> CostEstimate:
"""Heuristic token counts × strategy configs; USD from litellm when available."""
doc_tokens = _sum_doc_tokens(docs)
n_configs = sum(len(param_grid.get(s, [{}])) for s in strategies)
if n_configs == 0:
n_configs = len(strategies)
est_chunks_total = len(docs) * avg_chunks_per_doc * n_configs
# Heuristic: avg chunk ≈ 64 tokens (covers fixed_tokens/recursive defaults 256–1024 ÷ ~6
# chunks), plus 32 tokens per query embedding per doc per config (2 queries/doc × 16
# tok/question avg). These are conservative over-estimates; the max() floor below guards
# under-estimation.
embed_tokens = int(est_chunks_total * 64) + int(len(docs) * 32 * n_configs)
embed_tokens = max(embed_tokens, doc_tokens * n_configs // 4)
embed_cost = _embedding_cost_usd(embedding_model, embed_tokens)
llm_cost = 0.0
if generate_dataset:
q = max(1, len(docs) * 2)
llm_cost = _llm_cost_usd(q * llm_tokens_per_query)
wall = embed_tokens / THROUGHPUT_API_TOKENS_PER_MIN
return CostEstimate(
total_tokens=embed_tokens,
embedding_cost_usd=round(embed_cost, 6),
llm_cost_usd=round(llm_cost, 6),
estimated_wall_time_min=round(wall, 3),
strategy_configs=n_configs,
)