Skip to content

Config

DatasetConfig pydantic-model

Bases: BaseModel

Configuration of the dataset to be evaluated

Fields:

  • name (str)
  • split (str | None)
  • hf_subset (str | None)
Source code in evals_hub/config.py
12
13
14
15
16
17
class DatasetConfig(BaseModel):
    """Configuration of the dataset to be evaluated"""

    name: str
    split: str | None = None
    hf_subset: str | None = None

EvaluationConfig pydantic-model

Bases: BaseModel

Evaluation configuration

Fields:

  • top_k (int)
  • batch_size (int)
  • seed (int | None)
  • max_length (int | None)
  • prompt_name_query (str | None)
  • prompt_name_doc (str | None)
  • samples_per_label (int | None)
  • n_experiments (int)
  • num_patents_per_query (int | None)
  • patents_per_search (int | None)
  • starting_stage (Annotated[str, Field(None, description='Stage to start for running the benchmark')])
  • langfuse_version_tag (Annotated[str | None, Field(None, description='A string used to identify the current evaluation run')])
  • max_concurrency (Annotated[int | None, Field(None, description='Max concurrency to be used for evaluation')])
Source code in evals_hub/config.py
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
class EvaluationConfig(BaseModel):
    """Evaluation configuration"""

    top_k: int = 10
    batch_size: int = 16
    seed: int | None = 42
    max_length: int | None = None
    prompt_name_query: str | None = None
    prompt_name_doc: str | None = None
    samples_per_label: int | None = None
    n_experiments: int = 10
    num_patents_per_query: int | None = None
    patents_per_search: int | None = None
    starting_stage: Annotated[
        str,
        Field(
            None,
            description="Stage to start for running the benchmark",
        ),
    ]
    langfuse_version_tag: Annotated[
        str | None,
        Field(None, description="A string used to identify the current evaluation run"),
    ]
    max_concurrency: Annotated[
        int | None, Field(None, description="Max concurrency to be used for evaluation")
    ]

MetricsConfig pydantic-model

Bases: BaseModel

Fields:

Source code in evals_hub/config.py
116
117
118
119
120
121
122
123
124
125
126
127
128
class MetricsConfig(BaseModel):
    map: str | None = Field(None, description="Identifier for MAP metric")
    mrr: str | None = Field(None, description="Identifier for MRR metric")
    ndcg: str | None = Field(None, description="Identifier for NDCG metric")
    recall: str | None = Field(None, description="Identifier for Recall metric")
    precision: str | None = Field(None, description="Identifier for Precision metric")
    micro_avg_f1: str | None = Field(
        None, description="Identifier for micro average F1 metric"
    )
    macro_avg_f1: str | None = Field(
        None, description="Identifier for macro average F1 metric"
    )
    accuracy: str | None = Field(None, description="Identifier for accuracy metric")

accuracy = None pydantic-field

Identifier for accuracy metric

macro_avg_f1 = None pydantic-field

Identifier for macro average F1 metric

map = None pydantic-field

Identifier for MAP metric

micro_avg_f1 = None pydantic-field

Identifier for micro average F1 metric

mrr = None pydantic-field

Identifier for MRR metric

ndcg = None pydantic-field

Identifier for NDCG metric

precision = None pydantic-field

Identifier for Precision metric

recall = None pydantic-field

Identifier for Recall metric

ModelConfig pydantic-model

Bases: BaseModel

Model config. One and only one of checkpoint or import_path must be provided.

Fields:

  • checkpoint (Annotated[str | None, Field(None, description='Model checkpoint, if using a Transformers/SentenceTransformers model')])
  • device (Annotated[Literal['cpu', 'cuda', 'mps', 'auto'], Field(auto, description="Device to run the model on. Options: 'auto', 'cpu', 'cuda', 'mps'. Auto selects the best available device.")])
  • system_prompt_path (Annotated[Path | None, Field(None, description='System prompt path')])
  • user_prompt_path (Annotated[Path | None, Field(None, description='User prompt path')])
  • import_path (Annotated[str | None, Field(None, description="Import path for a callable which runs a model, in the form 'a.b.c:<function>")])
  • model_settings (Annotated[dict[str, Any], Field(default_factory=dict)])
  • reranking_method (Annotated[Literal['api', 'embedding'], Field(embedding, description="Reranking method: 'api' for API-based, 'embedding' for local embedding-based. Defaults to 'embedding' if not provided.")])
  • api_config (Annotated[dict[str, Any] | None, Field(None, description='API configuration for external reranking services (base_url, api_key, etc.)')])

Validators:

  • resolve_devicedevice
  • ensure_checkpoint_or_import_path
  • hydrate_api_config_defaults
Source code in evals_hub/config.py
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
class ModelConfig(BaseModel):
    """Model config. One and only one of `checkpoint` or `import_path` must be provided."""

    checkpoint: Annotated[
        str | None,
        Field(
            None,
            description="Model checkpoint, if using a Transformers/SentenceTransformers model",
        ),
    ]
    device: Annotated[
        Literal["cpu", "cuda", "mps", "auto"],
        Field(
            "auto",
            description="Device to run the model on. Options: 'auto', 'cpu', 'cuda', 'mps'. Auto selects the best available device.",
        ),
    ]

    @field_validator("device")
    @classmethod
    def resolve_device(cls, v: str) -> str:
        """Automatically resolve device when set to 'auto'."""
        if v == "auto":
            device = get_device()
            return device.type
        return v

    system_prompt_path: Annotated[
        Path | None, Field(None, description="System prompt path")
    ]
    user_prompt_path: Annotated[
        Path | None, Field(None, description="User prompt path")
    ]
    import_path: Annotated[
        str | None,
        Field(
            None,
            description="Import path for a callable which runs a model, in the form 'a.b.c:<function>",
        ),
    ]
    model_settings: Annotated[dict[str, Any], Field(default_factory=dict)]

    # Reranking-specific configuration
    reranking_method: Annotated[
        Literal["api", "embedding"],
        Field(
            "embedding",
            description="Reranking method: 'api' for API-based, 'embedding' for local embedding-based. Defaults to 'embedding' if not provided.",
        ),
    ]
    api_config: Annotated[
        dict[str, Any] | None,
        Field(
            None,
            description="API configuration for external reranking services (base_url, api_key, etc.)",
        ),
    ]

    @model_validator(mode="after")
    def ensure_checkpoint_or_import_path(self) -> "ModelConfig":
        if all([self.checkpoint, self.import_path]) or not any(
            [self.checkpoint, self.import_path]
        ):
            msg = "One (and only one) of `checkpoint` or import path should be provided"
            raise ValueError(msg)
        return self

    @model_validator(mode="after")
    def hydrate_api_config_defaults(self) -> "ModelConfig":
        cfg = self.api_config or {}
        # Default base URL and API key (can be overridden in YAML)
        cfg.setdefault(
            "base_url",
            os.getenv("BASE_URL"),
        )
        if os.getenv("COHERE_API_KEY") and not cfg.get("api_key"):
            cfg["api_key"] = os.getenv("COHERE_API_KEY")
        self.api_config = cfg
        return self

resolve_device(v) pydantic-validator

Automatically resolve device when set to 'auto'.

Source code in evals_hub/config.py
53
54
55
56
57
58
59
60
@field_validator("device")
@classmethod
def resolve_device(cls, v: str) -> str:
    """Automatically resolve device when set to 'auto'."""
    if v == "auto":
        device = get_device()
        return device.type
    return v

OutputConfig pydantic-model

Bases: BaseModel

Output file configuration

Fields:

  • results_file (Path)
Source code in evals_hub/config.py
160
161
162
163
class OutputConfig(BaseModel):
    """Output file configuration"""

    results_file: Path