| | |
| | """ |
| | Script to get version information for all models in the dataset. |
| | Usage: |
| | export CBORG_API_KEY=... |
| | python get_all_model_versions.py |
| | """ |
| | import os |
| | import sys |
| | import pandas as pd |
| | from openai import OpenAI |
| |
|
| | def test_model_version(client, model_id): |
| | """Test a model and return the underlying model name.""" |
| | try: |
| | response = client.chat.completions.create( |
| | model=model_id, |
| | messages=[{"role": "user", "content": "Hi"}], |
| | max_tokens=5 |
| | ) |
| | return response.model |
| | except Exception as e: |
| | error_msg = str(e)[:150] |
| | return f"ERROR: {error_msg}" |
| |
|
| | def main(): |
| | api_key = os.environ.get('CBORG_API_KEY') |
| | if not api_key: |
| | print("Error: CBORG_API_KEY environment variable not set.") |
| | sys.exit(1) |
| |
|
| | client = OpenAI( |
| | api_key=api_key, |
| | base_url="https://api.cborg.lbl.gov" |
| | ) |
| | |
| | |
| | df = pd.read_csv('/global/cfs/projectdirs/atlas/joshua/llm4hep/results_summary.csv', comment='#') |
| | df = df.dropna(subset=['supervisor', 'coder']) |
| | |
| | |
| | all_models = sorted(set(df['supervisor'].unique()) | set(df['coder'].unique())) |
| | |
| | print("=" * 100) |
| | print("TESTING ALL MODELS IN DATASET FOR VERSION INFORMATION") |
| | print("=" * 100) |
| | print(f"\nFound {len(all_models)} unique models in the dataset") |
| | print() |
| | |
| | results = {} |
| | |
| | for idx, model in enumerate(all_models, 1): |
| | print(f"[{idx}/{len(all_models)}] Testing {model:<45}", end=" ", flush=True) |
| | underlying = test_model_version(client, model) |
| | results[model] = underlying |
| | |
| | if underlying.startswith('ERROR'): |
| | print("β") |
| | else: |
| | print("β") |
| | |
| | |
| | print("\n" + "=" * 100) |
| | print("RESULTS: MODEL MAPPINGS") |
| | print("=" * 100) |
| | |
| | for model in sorted(results.keys()): |
| | underlying = results[model] |
| | if underlying.startswith('ERROR'): |
| | print(f"β {model:<45} {underlying[:50]}") |
| | else: |
| | if model == underlying: |
| | print(f" {model:<45} (no alias)") |
| | else: |
| | print(f" {model:<45} β {underlying}") |
| | |
| | |
| | output_file = 'model_version_mappings.txt' |
| | with open(output_file, 'w') as f: |
| | f.write("MODEL VERSION MAPPINGS\n") |
| | f.write("=" * 100 + "\n") |
| | f.write(f"Discovered on: October 29, 2025\n") |
| | f.write(f"Total models tested: {len(results)}\n\n") |
| | |
| | for model in sorted(results.keys()): |
| | underlying = results[model] |
| | if not underlying.startswith('ERROR'): |
| | if model == underlying: |
| | f.write(f"{model} (no alias)\n") |
| | else: |
| | f.write(f"{model} β {underlying}\n") |
| | |
| | print(f"\nβ Results saved to {output_file}") |
| | print("=" * 100) |
| |
|
| | if __name__ == '__main__': |
| | main() |
| |
|