MachineLearningAlgorithms / data /Agent_Environment_State.json
deedrop1140's picture
Upload 41 files
0d00d62 verified
{
"title": "Reinforcement Learning Basics Mastery: 100 MCQs",
"description": "A comprehensive set of 100 multiple-choice questions covering the fundamental concepts of Reinforcement Learning, including agent, environment, state, action, reward, policy, value functions, and exploration vs. exploitation.",
"questions": [
{
"id": 1,
"questionText": "In reinforcement learning, what is an agent?",
"options": [
"The reward signal",
"The entity that interacts with the environment and learns from feedback",
"A state of the system",
"The environment itself"
],
"correctAnswerIndex": 1,
"explanation": "An agent is the learner or decision maker that takes actions in an environment and receives feedback (rewards) to improve its behavior."
},
{
"id": 2,
"questionText": "What does the environment represent in reinforcement learning?",
"options": [
"The reward signal only",
"The agent itself",
"Everything the agent interacts with, including states and rules",
"The action space only"
],
"correctAnswerIndex": 2,
"explanation": "The environment includes everything external to the agent, defining possible states, transitions, and feedback."
},
{
"id": 3,
"questionText": "What is a 'state' in the context of reinforcement learning?",
"options": [
"A description of the current situation of the environment",
"The agent's action choice",
"The policy function",
"The reward received"
],
"correctAnswerIndex": 0,
"explanation": "A state captures all relevant information about the environment at a specific time, which the agent can use to decide its next action."
},
{
"id": 4,
"questionText": "Which of the following is true about the interaction between agent and environment?",
"options": [
"Agent observes state, takes action, environment returns next state and reward",
"Agent only observes rewards",
"Environment takes action, agent returns state",
"Environment updates agent’s policy directly"
],
"correctAnswerIndex": 0,
"explanation": "The standard reinforcement learning loop: the agent observes a state, acts, and receives a reward and new state from the environment."
},
{
"id": 5,
"questionText": "What is the purpose of the reward signal?",
"options": [
"To provide feedback to the agent on the quality of its action",
"To define the environment",
"To choose the next action automatically",
"To represent the current state"
],
"correctAnswerIndex": 0,
"explanation": "The reward signal guides the agent, helping it learn which actions are beneficial or harmful."
},
{
"id": 6,
"questionText": "What does a fully observable environment mean?",
"options": [
"The agent knows only past rewards",
"The agent cannot observe the state",
"The agent has complete information about the current state",
"The environment chooses the agent's actions"
],
"correctAnswerIndex": 2,
"explanation": "In a fully observable environment, the agent can see all relevant aspects of the current state to make optimal decisions."
},
{
"id": 7,
"questionText": "What is a partially observable environment?",
"options": [
"The agent has incomplete information about the current state",
"Rewards are constant",
"The agent observes everything",
"The environment changes randomly without states"
],
"correctAnswerIndex": 0,
"explanation": "In partially observable environments, the agent must act based on incomplete or noisy observations."
},
{
"id": 8,
"questionText": "Which of the following defines the agent's behavior?",
"options": [
"Reward",
"Policy",
"State",
"Environment"
],
"correctAnswerIndex": 1,
"explanation": "The policy is a mapping from states to actions that defines the agent’s behavior."
},
{
"id": 9,
"questionText": "What is the action space?",
"options": [
"The environment dynamics",
"All possible states in the environment",
"The set of reward values",
"The set of all actions an agent can take in a given state"
],
"correctAnswerIndex": 3,
"explanation": "The action space contains all the actions available to the agent at any point in time."
},
{
"id": 10,
"questionText": "What is the difference between deterministic and stochastic environments?",
"options": [
"There is no difference",
"Deterministic: no rewards; Stochastic: rewards exist",
"Deterministic: random actions; Stochastic: fixed actions",
"Deterministic: same action leads to same outcome; Stochastic: action may lead to different outcomes"
],
"correctAnswerIndex": 3,
"explanation": "In deterministic environments, actions have predictable results. In stochastic environments, outcomes may vary probabilistically."
},
{
"id": 11,
"questionText": "Which of the following is an example of an agent in real life?",
"options": [
"The maze itself",
"The state of the environment",
"A robot navigating a maze",
"A reward signal"
],
"correctAnswerIndex": 2,
"explanation": "The agent is the decision-making entity that interacts with the environment, e.g., a robot in a maze."
},
{
"id": 12,
"questionText": "Which of the following is an example of a state in a grid-world environment?",
"options": [
"The reward function",
"Agent’s current position on the grid",
"The policy",
"The set of all possible actions"
],
"correctAnswerIndex": 1,
"explanation": "A state represents the agent’s current location and situation in the environment."
},
{
"id": 13,
"questionText": "In reinforcement learning, what is the transition function?",
"options": [
"Specifies the policy",
"Defines probability of moving from one state to another given an action",
"Defines the reward only",
"Specifies the agent"
],
"correctAnswerIndex": 1,
"explanation": "The transition function determines how the environment evolves in response to the agent's actions."
},
{
"id": 14,
"questionText": "What does a Markov Decision Process (MDP) assume about states?",
"options": [
"States are not related",
"The future depends on all past states",
"Rewards are ignored",
"The future state depends only on the current state and action"
],
"correctAnswerIndex": 3,
"explanation": "The Markov property assumes that the next state depends only on the current state and action."
},
{
"id": 15,
"questionText": "Which component is NOT part of a standard reinforcement learning framework?",
"options": [
"Agent",
"Environment",
"Loss function (not always explicit)",
"Reward signal"
],
"correctAnswerIndex": 2,
"explanation": "The loss function is part of optimization in some algorithms, but not a fundamental RL component."
},
{
"id": 16,
"questionText": "What is the observation in reinforcement learning?",
"options": [
"The action chosen by the agent",
"The policy itself",
"The reward only",
"The information received by the agent from the environment"
],
"correctAnswerIndex": 3,
"explanation": "Observation is what the agent perceives about the environment at each time step."
},
{
"id": 17,
"questionText": "In an episodic task, what marks the end of an episode?",
"options": [
"The agent stops observing",
"A terminal state is reached",
"The reward is zero",
"The environment resets automatically"
],
"correctAnswerIndex": 1,
"explanation": "An episode ends when the agent reaches a terminal state, after which learning may continue in a new episode."
},
{
"id": 18,
"questionText": "Which of the following best describes the policy function π(s)?",
"options": [
"A mapping from actions to rewards",
"The transition function",
"A mapping from states to probabilities of actions",
"The environment model"
],
"correctAnswerIndex": 2,
"explanation": "The policy π(s) defines the probability distribution over actions for each state."
},
{
"id": 19,
"questionText": "What is a terminal state?",
"options": [
"The starting state",
"A state with no available actions only",
"Any state with a negative reward",
"A state where the episode ends and no further actions are taken"
],
"correctAnswerIndex": 3,
"explanation": "Terminal states signal the end of an episode in reinforcement learning."
},
{
"id": 20,
"questionText": "Which component captures the dynamics of how actions affect the environment?",
"options": [
"Reward function only",
"Agent",
"Policy function",
"Transition function"
],
"correctAnswerIndex": 3,
"explanation": "The transition function defines how the environment responds to the agent's actions."
},
{
"id": 21,
"questionText": "In a stochastic environment, taking the same action twice may result in:",
"options": [
"Different next states and rewards",
"Deterministic rewards only",
"No change in environment",
"Always the same next state"
],
"correctAnswerIndex": 0,
"explanation": "Stochastic environments have probabilistic outcomes, so the same action may lead to different states and rewards."
},
{
"id": 22,
"questionText": "Which of the following is an example of an environment in reinforcement learning?",
"options": [
"The policy function",
"The robot itself",
"A chessboard on which a robot plays",
"The reward function only"
],
"correctAnswerIndex": 2,
"explanation": "The environment includes the world the agent interacts with, such as the chessboard in a robot chess task."
},
{
"id": 23,
"questionText": "Which type of environment requires the agent to infer hidden states?",
"options": [
"Deterministic environment",
"Fully observable environment",
"Partially observable environment",
"Episodic environment"
],
"correctAnswerIndex": 2,
"explanation": "In partially observable environments, the agent cannot directly observe the true state and must infer it from observations."
},
{
"id": 24,
"questionText": "Which of the following is a key challenge for agents in large state spaces?",
"options": [
"Small reward",
"Fully observable environment",
"Curse of dimensionality",
"Terminal states"
],
"correctAnswerIndex": 2,
"explanation": "Large state spaces make it difficult for agents to explore and learn efficiently, a problem known as the curse of dimensionality."
},
{
"id": 25,
"questionText": "Which signal tells the agent how good an action was?",
"options": [
"Action",
"Policy",
"Reward",
"State"
],
"correctAnswerIndex": 2,
"explanation": "The reward is feedback from the environment that indicates the quality of an action."
},
{
"id": 26,
"questionText": "Which of the following is true for a Markov environment?",
"options": [
"The future depends only on the current state and action",
"The state space is continuous only",
"Rewards are always zero",
"The future depends on all past actions"
],
"correctAnswerIndex": 0,
"explanation": "Markov environments satisfy the Markov property, where the next state depends only on the current state and action."
},
{
"id": 27,
"questionText": "Which component allows the agent to explore and learn optimal behavior?",
"options": [
"Policy and reward feedback",
"Transition function only",
"State only",
"Environment only"
],
"correctAnswerIndex": 0,
"explanation": "The agent uses its policy and reward feedback to explore and improve actions."
},
{
"id": 28,
"questionText": "In an RL problem, what is an episodic task?",
"options": [
"Task without any terminal state",
"Task with only one action",
"Task with clearly defined episodes ending in terminal states",
"Task without rewards"
],
"correctAnswerIndex": 2,
"explanation": "Episodic tasks consist of episodes with start and terminal states; each episode is independent."
},
{
"id": 29,
"questionText": "What is the role of exploration in reinforcement learning?",
"options": [
"Observing states without acting",
"Always exploiting known rewards",
"Ignoring the environment",
"Trying new actions to discover their effects"
],
"correctAnswerIndex": 3,
"explanation": "Exploration allows the agent to learn about unknown states and actions, balancing exploration and exploitation."
},
{
"id": 30,
"questionText": "Which statement describes the agent-environment loop?",
"options": [
"Agent observes reward only",
"Environment observes agent → acts → gives state",
"Agent observes state → takes action → receives reward → observes next state",
"Environment updates policy directly"
],
"correctAnswerIndex": 2,
"explanation": "The agent-environment loop is the core of reinforcement learning, where the agent interacts with the environment through actions and feedback."
},
{
"id": 31,
"questionText": "What distinguishes a deterministic policy from a stochastic policy?",
"options": [
"Deterministic policy has rewards; stochastic does not",
"Deterministic policy selects a specific action for a state; stochastic policy assigns probabilities to actions",
"Deterministic policy assigns probabilities; stochastic policy selects a fixed action",
"Both are the same"
],
"correctAnswerIndex": 1,
"explanation": "Deterministic policies always select a specific action for a state, whereas stochastic policies assign a probability distribution over possible actions."
},
{
"id": 32,
"questionText": "In a partially observable environment, what does the agent often maintain to act effectively?",
"options": [
"Fixed action list",
"Reward history only",
"Transition function only",
"Belief state representing probability distribution over possible true states"
],
"correctAnswerIndex": 3,
"explanation": "Belief states allow the agent to infer the true state when observations are incomplete or noisy."
},
{
"id": 33,
"questionText": "Which type of environment is an agent guaranteed to learn an optimal policy with enough exploration?",
"options": [
"Non-Markov environment",
"Continuous reward environment only",
"Partially observable environment",
"Markov environment"
],
"correctAnswerIndex": 3,
"explanation": "In Markov environments, the future depends only on the current state and action, making learning optimal policies feasible."
},
{
"id": 34,
"questionText": "What is the advantage of modeling an environment as an MDP?",
"options": [
"Removes need for rewards",
"Reduces the state space automatically",
"Guarantees deterministic transitions",
"Enables mathematical analysis and algorithmic solution for optimal policies"
],
"correctAnswerIndex": 3,
"explanation": "MDPs provide a formal framework to model the agent-environment interaction mathematically, facilitating policy optimization."
},
{
"id": 35,
"questionText": "Which of the following best describes a reward function R(s,a,s')?",
"options": [
"Indicates terminal state only",
"Predicts the next state",
"Defines the policy",
"Gives the immediate reward for transitioning from state s to s' using action a"
],
"correctAnswerIndex": 3,
"explanation": "The reward function assigns a scalar value indicating the immediate benefit of taking an action in a state and transitioning to the next state."
},
{
"id": 36,
"questionText": "Which of the following is true about terminal states in episodic tasks?",
"options": [
"The agent restarts immediately without learning",
"Terminal states do not exist in episodic tasks",
"Rewards become infinite",
"Once reached, no further actions are taken in the episode"
],
"correctAnswerIndex": 3,
"explanation": "Terminal states mark the end of an episode; learning continues by updating values or policies before starting a new episode."
},
{
"id": 37,
"questionText": "Which is an example of continuous state space?",
"options": [
"Number of books on a shelf",
"Robot’s position in a 2D plane with real coordinates",
"A traffic light with three colors",
"A chessboard with 64 squares"
],
"correctAnswerIndex": 1,
"explanation": "Continuous state spaces have infinitely many possible states, such as positions represented by real numbers."
},
{
"id": 38,
"questionText": "What does the exploration-exploitation trade-off mean?",
"options": [
"Choosing between reward and punishment",
"Choosing between trying new actions (exploration) and using known best actions (exploitation)",
"Choosing between deterministic and stochastic policies",
"Choosing between discrete and continuous states"
],
"correctAnswerIndex": 1,
"explanation": "Agents must balance exploring new actions to learn about the environment and exploiting known actions to maximize rewards."
},
{
"id": 39,
"questionText": "Which term describes a mapping from a state to an action probability distribution?",
"options": [
"Transition function",
"Deterministic policy",
"Reward function",
"Stochastic policy"
],
"correctAnswerIndex": 3,
"explanation": "A stochastic policy assigns probabilities to possible actions in a given state."
},
{
"id": 40,
"questionText": "Which statement is true about the state transition function P(s'|s,a)?",
"options": [
"It defines the policy",
"It measures agent’s performance",
"It defines the probability of reaching state s' from state s using action a",
"It gives the immediate reward only"
],
"correctAnswerIndex": 2,
"explanation": "The transition function models the environment’s dynamics probabilistically, specifying how actions lead to next states."
},
{
"id": 41,
"questionText": "Which of the following is NOT a property of Markov Decision Processes?",
"options": [
"Transition probabilities exist for all actions",
"Rewards depend only on current state and action",
"Next state depends only on current state and action",
"Agent history is needed to determine next state"
],
"correctAnswerIndex": 3,
"explanation": "MDPs satisfy the Markov property: the next state depends only on the current state and action, not the full history."
},
{
"id": 42,
"questionText": "In reinforcement learning, what is a value function?",
"options": [
"Determines the next action directly",
"Gives immediate reward only",
"Estimates expected cumulative reward from a state or state-action pair",
"Represents environment transitions"
],
"correctAnswerIndex": 2,
"explanation": "Value functions estimate how good it is to be in a state or take an action, guiding the agent toward optimal behavior."
},
{
"id": 43,
"questionText": "What is Q-learning primarily used for?",
"options": [
"Learning optimal action-value function without model of environment",
"Defining transition probabilities",
"Calculating rewards directly",
"Predicting future states only"
],
"correctAnswerIndex": 0,
"explanation": "Q-learning is a model-free reinforcement learning algorithm that learns the expected cumulative reward (Q-values) for state-action pairs."
},
{
"id": 44,
"questionText": "Which of the following is a characteristic of a reward signal?",
"options": [
"A policy mapping",
"A transition probability",
"Scalar feedback indicating desirability of action outcome",
"Vector describing all states"
],
"correctAnswerIndex": 2,
"explanation": "Rewards provide scalar feedback to guide the agent toward favorable actions."
},
{
"id": 45,
"questionText": "Which type of environment requires the agent to maintain memory of past actions to perform well?",
"options": [
"Episodic tasks",
"Partially observable environments",
"Discrete environments only",
"Fully observable deterministic environments"
],
"correctAnswerIndex": 1,
"explanation": "In partially observable environments, the agent may need to use past actions or observations to infer the current state."
},
{
"id": 46,
"questionText": "Which of the following represents a stochastic reward?",
"options": [
"The reward is always zero",
"The reward is always 1",
"The reward depends deterministically on the state only",
"The reward received for the same action may vary probabilistically"
],
"correctAnswerIndex": 3,
"explanation": "Stochastic rewards introduce randomness, making the same action yield different outcomes in different trials."
},
{
"id": 47,
"questionText": "What is the main challenge in large continuous state spaces?",
"options": [
"Rewards are absent",
"Terminal states do not exist",
"Policies are deterministic only",
"Efficiently representing and learning value or policy functions"
],
"correctAnswerIndex": 3,
"explanation": "Continuous and high-dimensional states require function approximation techniques to represent policies and value functions efficiently."
},
{
"id": 48,
"questionText": "Which statement describes a fully observable episodic environment?",
"options": [
"Environment never resets",
"Agent can see entire state and each episode ends at a terminal state",
"Agent receives no rewards",
"Agent cannot observe state"
],
"correctAnswerIndex": 1,
"explanation": "Fully observable episodic environments allow the agent to see complete states and episodes terminate at terminal states."
},
{
"id": 49,
"questionText": "Which is a common approach to handle partial observability?",
"options": [
"Using deterministic rewards only",
"Maintaining a belief state or memory of observations",
"Always resetting the environment",
"Ignoring the missing information"
],
"correctAnswerIndex": 1,
"explanation": "Agents can maintain belief states or use history to make decisions in partially observable environments."
},
{
"id": 50,
"questionText": "Which of the following is an example of an action in a reinforcement learning problem?",
"options": [
"Moving a robot forward",
"The robot’s position",
"The environment itself",
"The reward value"
],
"correctAnswerIndex": 0,
"explanation": "Actions are choices made by the agent to interact with the environment."
},
{
"id": 51,
"questionText": "What is the purpose of a policy evaluation step in reinforcement learning?",
"options": [
"To estimate the value function for a given policy",
"To reset the environment",
"To define the transition function",
"To select the next action directly"
],
"correctAnswerIndex": 0,
"explanation": "Policy evaluation calculates how good a given policy is by computing expected cumulative rewards (value function) for states or state-action pairs."
},
{
"id": 52,
"questionText": "Which of the following is true for model-free reinforcement learning?",
"options": [
"States are fully deterministic",
"Agent learns value or policy without knowing environment transitions",
"Agent knows all transition probabilities in advance",
"Rewards are not used"
],
"correctAnswerIndex": 1,
"explanation": "Model-free methods learn from interaction and feedback without requiring a model of the environment’s dynamics."
},
{
"id": 53,
"questionText": "Which of the following best defines the state-value function V(s)?",
"options": [
"Expected cumulative reward from state s following a given policy",
"Immediate reward of state s",
"Set of possible actions",
"Probability of transitioning to next state"
],
"correctAnswerIndex": 0,
"explanation": "V(s) estimates how good it is to be in a particular state under a specific policy."
},
{
"id": 54,
"questionText": "Which best defines the action-value function Q(s,a)?",
"options": [
"Expected cumulative reward from taking action a in state s following a given policy",
"Immediate reward for state s only",
"Policy mapping for s only",
"Probability of reaching terminal state"
],
"correctAnswerIndex": 0,
"explanation": "Q(s,a) estimates the expected return for taking an action in a state and following the policy thereafter."
},
{
"id": 55,
"questionText": "In a stochastic environment, the optimal policy may be:",
"options": [
"Independent of rewards",
"Always deterministic",
"Stochastic",
"Always random"
],
"correctAnswerIndex": 2,
"explanation": "In stochastic environments, an optimal policy may assign probabilities to actions to maximize expected returns."
},
{
"id": 56,
"questionText": "Which term describes the probability distribution over next states given current state and action?",
"options": [
"Reward function R(s,a)",
"Policy π(s)",
"Transition function P(s'|s,a)",
"Value function V(s)"
],
"correctAnswerIndex": 2,
"explanation": "The transition function defines the environment dynamics in terms of probabilities for next states."
},
{
"id": 57,
"questionText": "Which type of task has no terminal state?",
"options": [
"Episodic task",
"Fully observable task",
"Partially observable task",
"Continuing task"
],
"correctAnswerIndex": 3,
"explanation": "In continuing tasks, the agent interacts with the environment indefinitely without episodes ending in terminal states."
},
{
"id": 58,
"questionText": "Which of the following statements is true about discounted rewards?",
"options": [
"Rewards are ignored",
"Future rewards are multiplied by a discount factor gamma (0 ≤ γ ≤ 1)",
"Only immediate rewards are considered",
"All rewards are summed without discount"
],
"correctAnswerIndex": 1,
"explanation": "Discounted rewards give less weight to future rewards, balancing immediate and long-term gains."
},
{
"id": 59,
"questionText": "Which approach can handle large or continuous state spaces?",
"options": [
"Terminal states only",
"Function approximation (like neural networks)",
"Ignoring rewards",
"Tabular methods only"
],
"correctAnswerIndex": 1,
"explanation": "Function approximation allows RL algorithms to scale to large or continuous states by generalizing across similar states."
},
{
"id": 60,
"questionText": "Which statement is true about on-policy learning?",
"options": [
"Agent learns using a different policy than the one it follows",
"Policy is deterministic only",
"Agent learns value or policy using the same policy it follows to act",
"Agent does not consider rewards"
],
"correctAnswerIndex": 2,
"explanation": "On-policy methods update value or policy estimates based on the policy currently being followed by the agent."
},
{
"id": 61,
"questionText": "Which statement describes off-policy learning?",
"options": [
"Requires fully observable environment only",
"Learning only immediate rewards",
"Learning about one policy while following another policy",
"Ignoring actions"
],
"correctAnswerIndex": 2,
"explanation": "Off-policy methods allow the agent to evaluate or improve a policy different from the behavior policy used to generate data."
},
{
"id": 62,
"questionText": "Which of the following can improve exploration in RL?",
"options": [
"Reducing state space only",
"Adding randomness to action selection (e.g., ε-greedy)",
"Always selecting the best-known action",
"Ignoring rewards"
],
"correctAnswerIndex": 1,
"explanation": "Exploration strategies like ε-greedy help the agent discover better actions instead of always exploiting known ones."
},
{
"id": 63,
"questionText": "Which scenario represents a partially observable environment?",
"options": [
"Moving pieces on a completely known board",
"A fixed deterministic reward system",
"A robot navigating a room with occluded areas",
"A chess game with full board visible"
],
"correctAnswerIndex": 2,
"explanation": "Partial observability occurs when the agent cannot directly perceive the full state of the environment."
},
{
"id": 64,
"questionText": "Which of the following is a key benefit of defining MDPs?",
"options": [
"Eliminates the need for rewards",
"Provides a formal structure for planning and learning optimal policies",
"Reduces state space automatically",
"Removes stochasticity"
],
"correctAnswerIndex": 1,
"explanation": "MDPs formalize the problem so agents can use algorithms like dynamic programming, Monte Carlo, or Q-learning to optimize policies."
},
{
"id": 65,
"questionText": "Which of the following represents a terminal reward?",
"options": [
"Random reward at any state",
"Reward at every step",
"A reward received upon reaching a terminal state",
"Reward ignored in final step"
],
"correctAnswerIndex": 2,
"explanation": "Terminal rewards are obtained only when the agent reaches a terminal state, signaling the end of an episode."
},
{
"id": 66,
"questionText": "Which of the following is true about value iteration?",
"options": [
"Estimates reward only",
"Computes optimal value function iteratively using Bellman optimality equation",
"Ignores transition probabilities",
"Updates policy randomly"
],
"correctAnswerIndex": 1,
"explanation": "Value iteration is a dynamic programming algorithm that iteratively updates state values to converge to the optimal value function."
},
{
"id": 67,
"questionText": "Which approach can solve RL problems without knowing the transition function?",
"options": [
"Value iteration with model",
"Policy evaluation with model",
"Model-free reinforcement learning",
"Dynamic programming"
],
"correctAnswerIndex": 2,
"explanation": "Model-free RL methods learn directly from experience without requiring knowledge of environment dynamics."
},
{
"id": 68,
"questionText": "Which component of RL represents the agent’s knowledge about the environment?",
"options": [
"Terminal state",
"State only",
"Value function or Q-function",
"Action space only"
],
"correctAnswerIndex": 2,
"explanation": "Value functions or Q-functions encode the agent's learned knowledge about which states or actions are desirable."
},
{
"id": 69,
"questionText": "Which of the following best describes a reward shaping technique?",
"options": [
"Altering the action space only",
"Ignoring exploration",
"Modifying the reward function to accelerate learning",
"Changing the state space"
],
"correctAnswerIndex": 2,
"explanation": "Reward shaping adds additional rewards to guide the agent more efficiently without changing the optimal policy."
},
{
"id": 70,
"questionText": "Which type of RL problem is suitable for continuing tasks?",
"options": [
"Tasks with ongoing interactions and no terminal state",
"Episodic tasks only",
"Tasks with fixed-length episodes only",
"Tasks with stochastic rewards ignored"
],
"correctAnswerIndex": 0,
"explanation": "Continuing tasks involve ongoing interaction with no defined terminal states, such as monitoring a robot continuously."
},
{
"id": 71,
"questionText": "A robot is navigating a warehouse with moving obstacles. Which type of environment does it face?",
"options": [
"Fully observable and stochastic",
"Partially observable and deterministic",
"Partially observable and stochastic",
"Fully observable and deterministic"
],
"correctAnswerIndex": 2,
"explanation": "The robot cannot always see all obstacles (partial observability) and the movement of obstacles is unpredictable (stochastic)."
},
{
"id": 72,
"questionText": "An agent receives rewards only at the end of a maze. Which learning challenge is most significant?",
"options": [
"Partial observability",
"Continuous state representation",
"Delayed reward problem",
"Exploration-exploitation trade-off"
],
"correctAnswerIndex": 2,
"explanation": "When rewards are delayed, the agent must figure out which actions contributed to eventual success, making learning harder."
},
{
"id": 73,
"questionText": "A drone must decide its path to maximize battery life and avoid collisions. Which components are crucial for its RL model?",
"options": [
"Reward function only",
"Terminal state only",
"State (position, velocity), actions (move directions), rewards (safety + battery efficiency)",
"Policy only"
],
"correctAnswerIndex": 2,
"explanation": "The drone’s model needs full state representation, available actions, and a reward function capturing multiple objectives."
},
{
"id": 74,
"questionText": "A game AI observes opponent moves but cannot see hidden cards. Which property describes its environment?",
"options": [
"Partially observable",
"Deterministic",
"Fully observable",
"Episodic only"
],
"correctAnswerIndex": 0,
"explanation": "The agent has incomplete information due to hidden cards, making the environment partially observable."
},
{
"id": 75,
"questionText": "In a stock trading simulation, the agent sees only past prices and indicators. What RL challenge is this?",
"options": [
"Deterministic policy",
"Partial observability and delayed reward",
"Terminal state only",
"Immediate rewards only"
],
"correctAnswerIndex": 1,
"explanation": "The agent must infer the hidden market state from past observations and often receives profit/loss rewards after some delay."
},
{
"id": 76,
"questionText": "A robot has sensors with noise. Which approach can help it act reliably?",
"options": [
"Use deterministic rewards only",
"Act randomly always",
"Maintain belief state or use filtering techniques",
"Ignore sensor readings"
],
"correctAnswerIndex": 2,
"explanation": "Belief states or filters help the agent estimate the true state from noisy observations."
},
{
"id": 77,
"questionText": "In a maze with traps, the agent must minimize risk while reaching the goal. Which reward design is best?",
"options": [
"Positive reward for reaching goal, negative reward for traps",
"Rewards for every step only",
"Random rewards everywhere",
"No reward at all"
],
"correctAnswerIndex": 0,
"explanation": "Assigning penalties for traps encourages safe paths while rewarding goal completion guides the agent."
},
{
"id": 78,
"questionText": "A robot learns by observing a human performing a task first. Which type of RL method does this resemble?",
"options": [
"Value iteration only",
"Random exploration only",
"Model-free Q-learning only",
"Imitation learning / guided RL"
],
"correctAnswerIndex": 3,
"explanation": "Imitation learning leverages demonstrations to guide the agent before independent reinforcement learning."
},
{
"id": 79,
"questionText": "In an autonomous car simulation, the agent sees other cars’ positions and velocities. Which component represents this information?",
"options": [
"State",
"Policy",
"Action",
"Reward"
],
"correctAnswerIndex": 0,
"explanation": "The state captures the current situation the agent observes, including positions and velocities of other cars."
},
{
"id": 80,
"questionText": "A reinforcement learning agent must act in a changing environment. Which feature is critical?",
"options": [
"Terminal states only",
"Fixed rewards only",
"Adaptive policy that can update over time",
"Static value function only"
],
"correctAnswerIndex": 2,
"explanation": "In non-stationary environments, the agent must adapt its policy based on new observations and feedback."
},
{
"id": 81,
"questionText": "In a game where rewards are sparse, which strategy improves learning efficiency?",
"options": [
"Always taking the first available action",
"Ignoring delayed rewards",
"Reward shaping to provide intermediate feedback",
"Reducing state space arbitrarily"
],
"correctAnswerIndex": 2,
"explanation": "Reward shaping provides intermediate signals, helping the agent learn even when main rewards are rare."
},
{
"id": 82,
"questionText": "A delivery drone must optimize routes while avoiding bad weather. What is a suitable reward structure?",
"options": [
"Random reward for actions",
"Positive for deliveries, negative for entering bad weather zones",
"Reward only for flight time",
"Reward constant at all states"
],
"correctAnswerIndex": 1,
"explanation": "The reward structure guides the agent to balance speed and safety."
},
{
"id": 83,
"questionText": "Which type of environment is represented by a multi-agent traffic simulation?",
"options": [
"Episodic only",
"Single-agent deterministic",
"Fully observable and deterministic",
"Partially observable and stochastic"
],
"correctAnswerIndex": 3,
"explanation": "Each agent sees limited information and other agents behave unpredictably, making the environment partially observable and stochastic."
},
{
"id": 84,
"questionText": "An agent in a factory can perform actions that affect multiple machines. Which is critical for modeling this RL problem?",
"options": [
"State should include all relevant machine statuses",
"Terminal state alone is sufficient",
"Only the reward matters",
"Policy is unnecessary"
],
"correctAnswerIndex": 0,
"explanation": "To make informed decisions, the agent must have a state representation capturing all relevant aspects of the environment."
},
{
"id": 85,
"questionText": "Which challenge arises in partially observable environments with stochastic transitions?",
"options": [
"Terminal states disappear",
"Actions are deterministic only",
"The agent must infer hidden states and handle uncertainty simultaneously",
"Rewards are always zero"
],
"correctAnswerIndex": 2,
"explanation": "Partial observability and stochasticity require advanced strategies like belief state tracking to make good decisions."
},
{
"id": 86,
"questionText": "A robot arm must assemble objects and avoid collisions. Which learning strategy helps?",
"options": [
"Random movements only",
"Ignoring collisions",
"Combining reward penalties for collisions with positive rewards for assembly",
"Reward only at end of task"
],
"correctAnswerIndex": 2,
"explanation": "Carefully designed rewards for positive and negative outcomes guide the agent toward safe and efficient behavior."
},
{
"id": 87,
"questionText": "In reinforcement learning, what is the primary difficulty in environments with delayed rewards?",
"options": [
"Terminal states are not defined",
"Actions are deterministic only",
"States are fully observable",
"Credit assignment problem (figuring out which actions caused the reward)"
],
"correctAnswerIndex": 3,
"explanation": "The agent must determine which past actions contributed to the reward, which is the credit assignment problem."
},
{
"id": 88,
"questionText": "Which technique helps an agent handle large continuous action spaces?",
"options": [
"Ignoring actions",
"Tabular Q-learning only",
"Function approximation or actor-critic methods",
"Random exploration without guidance"
],
"correctAnswerIndex": 2,
"explanation": "Function approximation methods allow the agent to represent policies and value functions efficiently for continuous spaces."
},
{
"id": 89,
"questionText": "An agent receives multiple objectives (speed, energy efficiency, safety). Which RL approach is suitable?",
"options": [
"Terminal states only",
"Ignoring rewards",
"Single-objective Q-learning only",
"Multi-objective reinforcement learning"
],
"correctAnswerIndex": 3,
"explanation": "Multi-objective RL allows the agent to optimize across several competing criteria simultaneously."
},
{
"id": 90,
"questionText": "Which scenario exemplifies an agent learning in a non-stationary environment?",
"options": [
"Chess with fixed rules",
"A fixed maze with stationary obstacles",
"A deterministic robot path",
"Stock market prices changing over time"
],
"correctAnswerIndex": 3,
"explanation": "Non-stationary environments change over time, requiring the agent to adapt continuously."
},
{
"id": 91,
"questionText": "In a warehouse robot problem, how can partial observability be mitigated?",
"options": [
"Using sensors, memory, or belief states",
"Terminal state modifications",
"Ignoring hidden objects",
"Reward shaping only"
],
"correctAnswerIndex": 0,
"explanation": "Advanced sensors and state estimation techniques help infer hidden or occluded parts of the environment."
},
{
"id": 92,
"questionText": "An agent must choose actions under uncertainty. Which property of the RL model is critical?",
"options": [
"Terminal states only",
"Fixed policy only",
"Deterministic rewards only",
"Transition probabilities (stochastic model)"
],
"correctAnswerIndex": 3,
"explanation": "Knowing or estimating transition probabilities is essential when dealing with uncertainty in the environment."
},
{
"id": 93,
"questionText": "In a robot soccer game, which component captures positions, velocities, and ball possession?",
"options": [
"Reward",
"Action",
"Policy",
"State"
],
"correctAnswerIndex": 3,
"explanation": "The state must represent all relevant information needed to make decisions."
},
{
"id": 94,
"questionText": "Which scenario demonstrates the credit assignment problem?",
"options": [
"Terminal state immediately reached",
"Agent receives a reward at the end of a long sequence of actions",
"Deterministic transitions only",
"Reward at every step"
],
"correctAnswerIndex": 1,
"explanation": "When rewards are delayed, the agent must determine which earlier actions were responsible."
},
{
"id": 95,
"questionText": "A reinforcement learning agent uses both exploration and exploitation. Which method represents this?",
"options": [
"Terminal state selection",
"Random actions only",
"Deterministic policy only",
"ε-greedy policy"
],
"correctAnswerIndex": 3,
"explanation": "An ε-greedy policy allows the agent to explore with probability ε and exploit the best-known action otherwise."
},
{
"id": 96,
"questionText": "An agent must operate in real-time with changing conditions. Which design consideration is essential?",
"options": [
"Delayed reward only",
"Fixed value function only",
"Adaptive policy updates",
"Ignoring stochasticity"
],
"correctAnswerIndex": 2,
"explanation": "Real-time adaptation is necessary for performance in dynamic, changing environments."
},
{
"id": 97,
"questionText": "A robot navigating in fog receives uncertain sensor readings. Which approach is appropriate?",
"options": [
"Terminal rewards only",
"Use belief states or filtering methods to estimate the true state",
"Use deterministic policy only",
"Ignore sensor noise"
],
"correctAnswerIndex": 1,
"explanation": "Filtering techniques help the agent estimate the true state from noisy or partial observations."
},
{
"id": 98,
"questionText": "An agent must balance speed and energy usage in a task. Which RL concept helps design its behavior?",
"options": [
"State space reduction only",
"Random actions only",
"Terminal states only",
"Reward function that combines multiple objectives"
],
"correctAnswerIndex": 3,
"explanation": "Designing a reward function that incorporates multiple objectives guides the agent to optimal trade-offs."
},
{
"id": 99,
"questionText": "Which situation requires an agent to maintain memory of previous states?",
"options": [
"Terminal states only",
"Deterministic rewards only",
"Fully observable deterministic environments",
"Partially observable environments"
],
"correctAnswerIndex": 3,
"explanation": "Memory or belief states are needed when the agent cannot directly observe the full environment."
},
{
"id": 100,
"questionText": "A reinforcement learning agent must adapt policies as other agents change strategies. Which environment type is this?",
"options": [
"Multi-agent and non-stationary",
"Terminal state only",
"Single-agent deterministic",
"Episodic only"
],
"correctAnswerIndex": 0,
"explanation": "The agent interacts with other adaptive agents, making the environment non-stationary and multi-agent."
}
]
}