| """ |
| OpenAI SDK Integration Example for HallucinationGuard-Env. |
| |
| This example demonstrates how to evaluate OpenAI models |
| (GPT-4, GPT-4o, GPT-3.5) using the HallucinationGuard environment. |
| |
| Requirements: |
| pip install openai requests |
| """ |
|
|
| import os |
| from typing import Optional |
| import requests |
|
|
| |
| try: |
| from openai import OpenAI |
| except ImportError: |
| print("Install OpenAI SDK: pip install openai") |
| raise |
|
|
|
|
| class HallucinationGuardEvaluator: |
| """ |
| Evaluate OpenAI models for hallucination resistance. |
| |
| Features: |
| - Supports all OpenAI chat models |
| - Handles rate limiting gracefully |
| - Tracks calibration and grounding scores |
| """ |
|
|
| def __init__( |
| self, |
| env_base_url: str = "https://huggingface.co/proxy/samsankar-hallucination-guard-env.hf.space", |
| openai_api_key: Optional[str] = None, |
| model: str = "gpt-4o-mini" |
| ): |
| """ |
| Initialize evaluator. |
| |
| Args: |
| env_base_url: HallucinationGuard-Env server URL |
| openai_api_key: OpenAI API key (or set OPENAI_API_KEY env var) |
| model: OpenAI model name |
| """ |
| self.env_base_url = env_base_url.rstrip('/') |
| self.model = model |
| self.client = OpenAI(api_key=openai_api_key or os.environ.get("OPENAI_API_KEY")) |
|
|
| |
| self.session_id = None |
| self.episode_id = None |
|
|
| def reset_environment(self, difficulty: str = "intermediate") -> dict: |
| """ |
| Start a new evaluation episode. |
| |
| Args: |
| difficulty: Starting difficulty (beginner, intermediate, advanced) |
| |
| Returns: |
| Initial observation with question and context |
| """ |
| response = requests.post( |
| f"{self.env_base_url}/reset", |
| json={"difficulty": difficulty} |
| ) |
| response.raise_for_status() |
| data = response.json() |
|
|
| self.episode_id = data.get("episode_id") |
| return data |
|
|
| def generate_answer(self, question: str, context: str) -> dict: |
| """ |
| Generate an answer using OpenAI model. |
| |
| Prompts the model to: |
| 1. Answer ONLY from the provided context |
| 2. Provide a confidence score |
| 3. Cite the source quote |
| |
| Args: |
| question: The question to answer |
| context: The source context |
| |
| Returns: |
| dict with answer, confidence, source_quote |
| """ |
| prompt = f"""Answer the following question using ONLY the provided context. |
| |
| IMPORTANT RULES: |
| 1. Answer ONLY from the context - do not use outside knowledge |
| 2. If the answer is not in the context, say "I cannot answer from the provided context" |
| 3. Provide your confidence level (0.0-1.0) |
| 4. Quote the exact passage from the context that supports your answer |
| |
| CONTEXT: |
| {context} |
| |
| QUESTION: |
| {question} |
| |
| Respond in JSON format: |
| {{ |
| "answer": "your answer here", |
| "confidence": 0.85, |
| "source_quote": "exact quote from context" |
| }} |
| |
| JSON Response:""" |
|
|
| try: |
| response = self.client.chat.completions.create( |
| model=self.model, |
| messages=[ |
| {"role": "system", "content": "You are a precise QA assistant. Always respond in valid JSON format."}, |
| {"role": "user", "content": prompt} |
| ], |
| temperature=0.1, |
| max_tokens=500, |
| response_format={"type": "json_object"} |
| ) |
|
|
| import json |
| content = response.choices[0].message.content |
| result = json.loads(content) |
|
|
| return { |
| "answer": result.get("answer", ""), |
| "confidence": float(result.get("confidence", 0.5)), |
| "source_quote": result.get("source_quote", "") |
| } |
|
|
| except Exception as e: |
| print(f"Error generating answer: {e}") |
| return { |
| "answer": "I cannot answer from the provided context.", |
| "confidence": 0.3, |
| "source_quote": "" |
| } |
|
|
| def step(self, answer: str, confidence: float, source_quote: str = "") -> dict: |
| """ |
| Submit an answer to the environment. |
| |
| Args: |
| answer: The answer text |
| confidence: Confidence level (0.0-1.0) |
| source_quote: Verbatim quote from context |
| |
| Returns: |
| Observation with reward and feedback |
| """ |
| response = requests.post( |
| f"{self.env_base_url}/step", |
| json={ |
| "answer": answer, |
| "confidence": confidence, |
| "source_quote": source_quote |
| } |
| ) |
| response.raise_for_status() |
| return response.json() |
|
|
| def evaluate_episode( |
| self, |
| num_questions: int = 10, |
| difficulty: str = "intermediate" |
| ) -> dict: |
| """ |
| Run a complete evaluation episode. |
| |
| Args: |
| num_questions: Number of questions to evaluate |
| difficulty: Starting difficulty level |
| |
| Returns: |
| Episode statistics |
| """ |
| |
| obs = self.reset_environment(difficulty=difficulty) |
|
|
| total_reward = 0.0 |
| hallucinations = 0 |
| correct = 0 |
|
|
| for step_num in range(num_questions): |
| |
| question = obs.get("question", "") |
| context = obs.get("context", "") |
|
|
| print(f"\n--- Question {step_num + 1}/{num_questions} ---") |
| print(f"Q: {question[:100]}...") |
|
|
| |
| answer_data = self.generate_answer(question, context) |
| print(f"A: {answer_data['answer'][:100]}...") |
| print(f"Confidence: {answer_data['confidence']:.2f}") |
|
|
| |
| obs = self.step( |
| answer=answer_data["answer"], |
| confidence=answer_data["confidence"], |
| source_quote=answer_data["source_quote"] |
| ) |
|
|
| |
| reward = obs.get("reward", 0.0) |
| total_reward += reward |
| if obs.get("is_hallucination", False): |
| hallucinations += 1 |
| if obs.get("grounding_score", 0) > 0.7: |
| correct += 1 |
|
|
| print(f"Reward: {reward:.3f}") |
| print(f"Feedback: {obs.get('feedback', '')[:100]}...") |
|
|
| if obs.get("done", False): |
| break |
|
|
| |
| avg_reward = total_reward / max(1, step_num + 1) |
| hallucination_rate = hallucinations / max(1, step_num + 1) |
| accuracy = correct / max(1, step_num + 1) |
|
|
| print(f"\n=== Episode Complete ===") |
| print(f"Average Reward: {avg_reward:.3f}") |
| print(f"Hallucination Rate: {hallucination_rate:.1%}") |
| print(f"Accuracy: {accuracy:.1%}") |
|
|
| return { |
| "avg_reward": avg_reward, |
| "hallucination_rate": hallucination_rate, |
| "accuracy": accuracy, |
| "total_steps": step_num + 1 |
| } |
|
|
|
|
| def main(): |
| """Run evaluation demo.""" |
| import argparse |
|
|
| parser = argparse.ArgumentParser(description="Evaluate OpenAI models for hallucination resistance") |
| parser.add_argument("--model", default="gpt-4o-mini", help="OpenAI model name") |
| parser.add_argument("--difficulty", default="intermediate", help="Difficulty level") |
| parser.add_argument("--num-questions", type=int, default=5, help="Number of questions") |
| parser.add_argument("--env-url", default="https://huggingface.co/proxy/samsankar-hallucination-guard-env.hf.space", |
| help="Environment server URL") |
|
|
| args = parser.parse_args() |
|
|
| |
| if not os.environ.get("OPENAI_API_KEY"): |
| print("Error: Set OPENAI_API_KEY environment variable") |
| return |
|
|
| |
| evaluator = HallucinationGuardEvaluator( |
| env_base_url=args.env_url, |
| model=args.model |
| ) |
|
|
| results = evaluator.evaluate_episode( |
| num_questions=args.num_questions, |
| difficulty=args.difficulty |
| ) |
|
|
| print(f"\nFinal Results: {results}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |