MachineLearningAlgorithms / data /Decision_Tree_Regression.json
deedrop1140's picture
Upload 41 files
0d00d62 verified
{
"title": "Decision Tree Regression Mastery: 100 MCQs",
"description": "A comprehensive set of 100 multiple-choice questions designed to teach and test your understanding of Decision Tree Regression, covering basic concepts, splitting criteria, pruning, overfitting, hyperparameter tuning, and real-world scenarios.",
"questions": [
{
"id": 1,
"questionText": "What is the main goal of Decision Tree Regression?",
"options": [
"Reduce dimensionality",
"Predict continuous target values using a tree structure",
"Classify data into categories",
"Cluster data points"
],
"correctAnswerIndex": 1,
"explanation": "Decision Tree Regression predicts continuous values by splitting the data into subsets based on feature thresholds."
},
{
"id": 2,
"questionText": "Which criterion is commonly used to decide splits in Decision Tree Regression?",
"options": [
"Mean Squared Error (MSE)",
"Gini Index",
"Entropy",
"Silhouette Score"
],
"correctAnswerIndex": 0,
"explanation": "MSE is commonly used to minimize the variance of the target variable in the child nodes."
},
{
"id": 3,
"questionText": "What is overfitting in Decision Tree Regression?",
"options": [
"When the tree is too shallow",
"When predictions are always zero",
"When data is not standardized",
"When the tree captures noise in the training data"
],
"correctAnswerIndex": 3,
"explanation": "Overfitting occurs when a tree becomes too complex, capturing noise instead of general patterns."
},
{
"id": 4,
"questionText": "Which hyperparameter helps control the maximum depth of a Decision Tree?",
"options": [
"max_depth",
"criterion",
"min_samples_split",
"gamma"
],
"correctAnswerIndex": 0,
"explanation": "max_depth limits how deep the tree can grow, preventing overfitting."
},
{
"id": 5,
"questionText": "Scenario: A Decision Tree is trained with max_depth=None on a small dataset. Observation?",
"options": [
"Tree ignores features",
"Tree likely overfits",
"Tree underfits",
"Predictions are zero"
],
"correctAnswerIndex": 1,
"explanation": "Without depth limitation, the tree can grow too complex and overfit small datasets."
},
{
"id": 6,
"questionText": "What is min_samples_split in Decision Tree Regression?",
"options": [
"Minimum samples required to split an internal node",
"Maximum number of features used",
"Maximum depth of the tree",
"Minimum samples required at a leaf node"
],
"correctAnswerIndex": 0,
"explanation": "min_samples_split controls when a node can be split, helping to regularize the tree."
},
{
"id": 7,
"questionText": "Scenario: Decision Tree with very high min_samples_split. Observation?",
"options": [
"Tree underfits, predictions may be too coarse",
"Features ignored",
"Tree overfits",
"Predictions perfect"
],
"correctAnswerIndex": 0,
"explanation": "High min_samples_split prevents many splits, making the tree simpler and possibly underfitting."
},
{
"id": 8,
"questionText": "Which method can be used to reduce overfitting in Decision Tree Regression?",
"options": [
"Using all features without restriction",
"Pruning",
"Reducing training data",
"Increasing max_depth without limit"
],
"correctAnswerIndex": 1,
"explanation": "Pruning removes unnecessary splits to improve generalization and prevent overfitting."
},
{
"id": 9,
"questionText": "What is the role of a leaf node in Decision Tree Regression?",
"options": [
"Counts the number of features",
"Measures feature importance",
"Contains the predicted value",
"Decides feature splits"
],
"correctAnswerIndex": 2,
"explanation": "Leaf nodes hold the predicted output value for the observations in that node."
},
{
"id": 10,
"questionText": "Scenario: Tree is very deep and training error is near zero but test error is high. Observation?",
"options": [
"Tree ignores features",
"Tree is overfitting",
"Tree predictions are unbiased",
"Tree is underfitting"
],
"correctAnswerIndex": 1,
"explanation": "A very deep tree may perfectly fit the training data but fail to generalize, causing overfitting."
},
{
"id": 11,
"questionText": "Which splitting strategy minimizes variance in Decision Tree Regression?",
"options": [
"Maximizing entropy",
"Maximizing Gini index",
"Choosing splits that minimize MSE in child nodes",
"Random splits"
],
"correctAnswerIndex": 2,
"explanation": "Splits are chosen to minimize the mean squared error of target values in child nodes."
},
{
"id": 12,
"questionText": "Scenario: Dataset has categorical features with many levels. Decision Tree Observation?",
"options": [
"Tree may create too many splits, overfitting possible",
"Tree ignores categorical features",
"Tree always underfits",
"Tree cannot handle categorical data"
],
"correctAnswerIndex": 0,
"explanation": "High-cardinality categorical features can lead to many splits, increasing overfitting risk."
},
{
"id": 13,
"questionText": "What does min_samples_leaf control?",
"options": [
"Minimum number of samples required in a leaf node",
"Kernel choice",
"Maximum depth of the tree",
"Minimum split threshold"
],
"correctAnswerIndex": 0,
"explanation": "min_samples_leaf ensures leaf nodes have a minimum number of samples, preventing overfitting to very small subsets."
},
{
"id": 14,
"questionText": "Scenario: Decision Tree with min_samples_leaf=10 on small dataset. Observation?",
"options": [
"Tree may underfit, predictions coarser",
"Leaf nodes empty",
"Tree ignores features",
"Tree overfits"
],
"correctAnswerIndex": 0,
"explanation": "High min_samples_leaf prevents fine splits, which may underfit small datasets."
},
{
"id": 15,
"questionText": "Which metric is used to measure feature importance in Decision Tree Regression?",
"options": [
"Silhouette Score",
"Entropy",
"Reduction in MSE due to splits on the feature",
"Gini Index"
],
"correctAnswerIndex": 2,
"explanation": "Features contributing more to variance reduction are considered more important."
},
{
"id": 16,
"questionText": "Scenario: Feature importance shows some features are zero. Observation?",
"options": [
"They are most important",
"Tree ignored all features",
"Those features do not contribute to splits",
"Training error is zero"
],
"correctAnswerIndex": 2,
"explanation": "Features with zero importance do not improve splits in the tree."
},
{
"id": 17,
"questionText": "Which is a disadvantage of Decision Tree Regression?",
"options": [
"Always underfits",
"Cannot handle categorical data",
"Prone to overfitting if not regularized",
"Cannot handle continuous data"
],
"correctAnswerIndex": 2,
"explanation": "Decision trees can overfit training data without depth or sample restrictions."
},
{
"id": 18,
"questionText": "Scenario: Decision Tree applied to noisy dataset without restrictions. Observation?",
"options": [
"Tree ignores features",
"Tree underfits",
"Tree overfits noise",
"Training error high"
],
"correctAnswerIndex": 2,
"explanation": "Unrestricted trees fit all variations including noise, causing overfitting."
},
{
"id": 19,
"questionText": "Scenario: Decision Tree applied with max_features='sqrt'. Observation?",
"options": [
"All features ignored",
"Training error zero",
"Random subset of features considered for splits, reduces overfitting",
"Tree depth unlimited"
],
"correctAnswerIndex": 2,
"explanation": "Limiting features per split reduces variance and improves generalization."
},
{
"id": 20,
"questionText": "Scenario: Tree with very small max_leaf_nodes. Observation?",
"options": [
"Tree ignores features",
"Tree overfits",
"Tree underfits due to limited leaves",
"Training error zero"
],
"correctAnswerIndex": 2,
"explanation": "Restricting leaves reduces tree complexity, which may lead to underfitting."
},
{
"id": 21,
"questionText": "Scenario: Tree applied to dataset with continuous features. Observation: splits based on thresholds. Why?",
"options": [
"Tree uses Gini",
"Decision Tree Regression splits continuous features using thresholds to minimize variance",
"Tree uses entropy",
"Tree ignores continuous features"
],
"correctAnswerIndex": 1,
"explanation": "Continuous features are split at values that minimize MSE in child nodes."
},
{
"id": 22,
"questionText": "Scenario: Tree applied with min_impurity_decrease=0.01. Observation?",
"options": [
"Tree always overfits",
"Tree ignores features",
"Only splits that reduce impurity by 0.01 or more are made, helps prevent overfitting",
"Training error zero"
],
"correctAnswerIndex": 2,
"explanation": "Setting a minimum impurity decrease prevents unnecessary splits, regularizing the tree."
},
{
"id": 23,
"questionText": "Scenario: Tree applied to dataset with correlated features. Observation?",
"options": [
"Tree fails",
"Tree may select one correlated feature for splits, ignoring others",
"Tree overfits all features",
"Tree underfits"
],
"correctAnswerIndex": 1,
"explanation": "Decision trees choose splits greedily; correlated features may be ignored once one is used."
},
{
"id": 24,
"questionText": "Scenario: Decision Tree applied to dataset with outliers. Observation?",
"options": [
"Tree ignores outliers",
"Tree may overfit outliers if not pruned",
"Tree underfits",
"Training error zero"
],
"correctAnswerIndex": 1,
"explanation": "Extreme values can cause splits that focus too much on outliers, overfitting the model."
},
{
"id": 25,
"questionText": "Scenario: Decision Tree applied with random_state set. Observation?",
"options": [
"Tree overfits",
"Tree underfits",
"Randomly ignores features",
"Ensures reproducible results"
],
"correctAnswerIndex": 3,
"explanation": "Setting random_state makes the tree building process deterministic, allowing reproducibility."
},
{
"id": 26,
"questionText": "Scenario: Decision Tree applied to housing dataset with max_depth=3. Observation?",
"options": [
"Tree overfits",
"Training error zero",
"Tree may underfit due to shallow depth",
"Tree ignores features"
],
"correctAnswerIndex": 2,
"explanation": "Shallow trees may fail to capture complex patterns, leading to underfitting."
},
{
"id": 27,
"questionText": "Scenario: Tree trained on noisy stock prices with max_depth=None. Observation?",
"options": [
"Tree underfits",
"Tree overfits noise, poor generalization",
"Features ignored",
"Predictions smooth"
],
"correctAnswerIndex": 1,
"explanation": "Unlimited depth allows the tree to capture every fluctuation, including noise."
},
{
"id": 28,
"questionText": "Scenario: Tree applied to dataset with outliers. Observation: pruning applied. Effect?",
"options": [
"Tree ignores all data",
"Tree underfits entirely",
"Reduces overfitting to outliers, better generalization",
"Training error zero"
],
"correctAnswerIndex": 2,
"explanation": "Pruning removes unnecessary splits caused by outliers, improving generalization."
},
{
"id": 29,
"questionText": "Scenario: Decision Tree applied with min_samples_split=50 on a dataset of 500 samples. Observation?",
"options": [
"Tree overfits",
"Tree is simpler, may underfit local patterns",
"Tree ignores features",
"Training error zero"
],
"correctAnswerIndex": 1,
"explanation": "Requiring 50 samples to split reduces the number of splits and may miss finer patterns."
},
{
"id": 30,
"questionText": "Scenario: Tree applied with min_samples_leaf=20 on small dataset. Observation?",
"options": [
"Tree underfits, coarse predictions",
"Training error zero",
"Tree overfits",
"Leaf nodes empty"
],
"correctAnswerIndex": 0,
"explanation": "Minimum leaf size prevents small leaves, simplifying the tree and possibly underfitting."
},
{
"id": 31,
"questionText": "Scenario: Tree applied to dataset with 1000 features. Observation: max_features='sqrt'. Effect?",
"options": [
"Training error zero",
"Tree ignores features",
"Tree depth unlimited",
"Random subset considered for splits, reduces overfitting"
],
"correctAnswerIndex": 3,
"explanation": "Using only a subset of features per split helps control variance and prevents overfitting."
},
{
"id": 32,
"questionText": "Scenario: Tree applied with max_leaf_nodes=10. Observation?",
"options": [
"Tree underfits due to limited complexity",
"Tree ignores features",
"Tree overfits",
"Training error zero"
],
"correctAnswerIndex": 0,
"explanation": "Limiting the number of leaves reduces tree complexity, potentially causing underfitting."
},
{
"id": 33,
"questionText": "Scenario: Tree applied to dataset with highly correlated features. Observation?",
"options": [
"Tree uses all features equally",
"Tree underfits",
"Tree fails to train",
"Tree may favor one correlated feature, ignoring others"
],
"correctAnswerIndex": 3,
"explanation": "Greedy splitting selects one feature and ignores redundant correlated features."
},
{
"id": 34,
"questionText": "Scenario: Decision Tree applied with min_impurity_decrease=0.05. Observation?",
"options": [
"Tree ignores features",
"Training error zero",
"Tree always overfits",
"Only splits reducing impurity ≥0.05 are made, regularizes tree"
],
"correctAnswerIndex": 3,
"explanation": "Setting minimum impurity decrease prevents unnecessary splits, improving generalization."
},
{
"id": 35,
"questionText": "Scenario: Tree applied to dataset with categorical features of high cardinality. Observation?",
"options": [
"Tree may overfit due to many splits",
"Tree underfits",
"Tree fails to train",
"Tree ignores categories"
],
"correctAnswerIndex": 0,
"explanation": "High-cardinality categorical features can lead to over-complex splits and overfitting."
},
{
"id": 36,
"questionText": "Scenario: Decision Tree applied to time-series dataset without feature engineering. Observation?",
"options": [
"Tree ignores features",
"Training error zero",
"Tree overfits",
"Tree may not capture temporal patterns"
],
"correctAnswerIndex": 3,
"explanation": "Decision trees cannot inherently capture sequential patterns without engineered features."
},
{
"id": 37,
"questionText": "Scenario: Tree applied to dataset with extreme outliers. Observation?",
"options": [
"Tree ignores outliers automatically",
"Tree underfits",
"Tree may overfit to extreme values if not regularized",
"Training error zero"
],
"correctAnswerIndex": 2,
"explanation": "Outliers can create splits that distort predictions, overfitting the model."
},
{
"id": 38,
"questionText": "Scenario: Decision Tree applied with random_state set. Observation?",
"options": [
"Results are reproducible",
"Training error zero",
"Tree underfits",
"Tree ignores features"
],
"correctAnswerIndex": 0,
"explanation": "Setting random_state ensures deterministic tree construction, making results reproducible."
},
{
"id": 39,
"questionText": "Scenario: Tree applied with max_depth=5 and high noise. Observation?",
"options": [
"Tree ignores features",
"Tree perfectly fits data",
"May underfit some patterns, partially overfit noise",
"Training error zero"
],
"correctAnswerIndex": 2,
"explanation": "Limited depth may underfit trends but still capture some noise, leading to mixed performance."
},
{
"id": 40,
"questionText": "Scenario: Decision Tree applied to dataset with missing values. Observation?",
"options": [
"Tree underfits",
"Tree ignores missing values automatically",
"Tree cannot handle missing values directly; preprocessing needed",
"Tree overfits"
],
"correctAnswerIndex": 2,
"explanation": "Decision trees require complete data or proper imputation before training."
},
{
"id": 41,
"questionText": "Scenario: Decision Tree applied to dataset with highly skewed target. Observation?",
"options": [
"Tree underfits",
"Tree ignores skew",
"Tree predictions may be biased toward dominant target values",
"Tree perfectly predicts"
],
"correctAnswerIndex": 2,
"explanation": "Skewed targets can cause trees to favor majority values, reducing accuracy on rare cases."
},
{
"id": 42,
"questionText": "Scenario: Tree applied to dataset with continuous and categorical features. Observation?",
"options": [
"Tree ignores continuous features",
"Tree fails",
"Tree ignores categorical features",
"Tree can handle both, splits on thresholds for continuous and categories for categorical"
],
"correctAnswerIndex": 3,
"explanation": "Decision trees can split both types, using appropriate thresholds or categories."
},
{
"id": 43,
"questionText": "Scenario: Decision Tree applied with criterion='mse'. Observation?",
"options": [
"Tree ignores criterion",
"Splits minimize mean squared error in child nodes",
"Splits maximize entropy",
"Splits maximize Gini index"
],
"correctAnswerIndex": 1,
"explanation": "MSE is used to reduce variance and improve regression accuracy at splits."
},
{
"id": 44,
"questionText": "Scenario: Tree applied with very small min_samples_split. Observation?",
"options": [
"Tree underfits",
"Tree may overfit by creating many small splits",
"Training error zero",
"Tree ignores features"
],
"correctAnswerIndex": 1,
"explanation": "Small min_samples_split allows splits on tiny subsets, causing overfitting."
},
{
"id": 45,
"questionText": "Scenario: Tree applied with max_features=None on high-dimensional data. Observation?",
"options": [
"Tree ignores some features",
"Tree underfits",
"Tree considers all features at each split, may overfit",
"Training error zero"
],
"correctAnswerIndex": 2,
"explanation": "Using all features increases variance and can lead to overfitting."
},
{
"id": 46,
"questionText": "Scenario: Tree applied to dataset with many redundant features. Observation?",
"options": [
"Tree underfits",
"Tree fails",
"Tree uses all features equally",
"Tree may ignore redundant features after selecting one correlated feature"
],
"correctAnswerIndex": 3,
"explanation": "Greedy splitting selects the most informative feature and ignores others."
},
{
"id": 47,
"questionText": "Scenario: Decision Tree applied to small dataset. Observation: high variance in predictions across different train/test splits. Reason?",
"options": [
"Tree always underfits",
"Training error zero",
"Tree ignores features",
"Trees are sensitive to small data changes, high variance"
],
"correctAnswerIndex": 3,
"explanation": "Small datasets cause instability in tree splits, resulting in high variance."
},
{
"id": 48,
"questionText": "Scenario: Tree applied to a dataset with uniform target values. Observation?",
"options": [
"Tree overfits",
"Tree will have shallow depth; predictions equal to uniform target",
"Tree underfits",
"Training error zero"
],
"correctAnswerIndex": 1,
"explanation": "If target values are uniform, splits do not reduce variance; tree remains shallow."
},
{
"id": 49,
"questionText": "Scenario: Tree applied to dataset with features scaled differently. Observation?",
"options": [
"Tree underfits",
"Training error zero",
"Decision Tree unaffected by feature scaling",
"Tree overfits"
],
"correctAnswerIndex": 2,
"explanation": "Decision Trees are scale-invariant; feature scaling does not affect splits."
},
{
"id": 50,
"questionText": "Scenario: Decision Tree applied with max_depth=10, min_samples_leaf=5. Observation?",
"options": [
"Tree balances depth and leaf size, reducing overfitting",
"Tree ignores features",
"Tree overfits",
"Tree underfits"
],
"correctAnswerIndex": 0,
"explanation": "Limiting depth and minimum leaf samples helps regularize the tree and improve generalization."
},
{
"id": 51,
"questionText": "Scenario: Decision Tree applied to financial dataset with max_depth=None and min_samples_split=2. Observation?",
"options": [
"Tree underfits",
"Tree likely overfits due to unlimited depth and small splits",
"Tree ignores features",
"Predictions smooth"
],
"correctAnswerIndex": 1,
"explanation": "Unlimited depth and tiny splits allow the tree to capture all noise, causing overfitting."
},
{
"id": 52,
"questionText": "Scenario: Tree applied to dataset with extreme outliers. Observation: min_samples_leaf=10 applied. Effect?",
"options": [
"Tree ignores features",
"Tree overfits outliers",
"Tree becomes more robust to outliers",
"Training error zero"
],
"correctAnswerIndex": 2,
"explanation": "Minimum leaf size prevents leaves from fitting extreme individual outliers, improving robustness."
},
{
"id": 53,
"questionText": "Scenario: Tree applied to housing dataset. Observation: max_features='sqrt' used. Benefit?",
"options": [
"Tree ignores most features",
"Tree depth unlimited",
"Reduces variance and prevents overfitting by using feature subsets at each split",
"Tree underfits"
],
"correctAnswerIndex": 2,
"explanation": "Random subsets per split regularize the tree, balancing bias and variance."
},
{
"id": 54,
"questionText": "Scenario: Decision Tree applied with criterion='friedman_mse'. Observation?",
"options": [
"Tree ignores MSE",
"Tree underfits",
"Tree fails",
"Optimized for boosting algorithms, may improve split selection"
],
"correctAnswerIndex": 3,
"explanation": "Friedman MSE is designed for boosting, accounting for residuals in regression tasks."
},
{
"id": 55,
"questionText": "Scenario: Tree applied to small dataset with many features. Observation: high variance in predictions. Reason?",
"options": [
"Tree ignores features",
"Training error zero",
"Greedy splits sensitive to small changes, causing high variance",
"Tree underfits"
],
"correctAnswerIndex": 2,
"explanation": "Small datasets are prone to unstable splits, resulting in varying predictions."
},
{
"id": 56,
"questionText": "Scenario: Tree applied with max_leaf_nodes=15. Observation?",
"options": [
"Tree overfits",
"Limits complexity, helps prevent overfitting",
"Tree ignores features",
"Tree underfits completely"
],
"correctAnswerIndex": 1,
"explanation": "Restricting leaves reduces tree complexity, acting as regularization."
},
{
"id": 57,
"questionText": "Scenario: Tree applied to dataset with categorical features of high cardinality. Observation?",
"options": [
"Tree ignores categorical features",
"Tree may overfit due to numerous splits on categories",
"Tree fails",
"Tree underfits"
],
"correctAnswerIndex": 1,
"explanation": "Many categories can produce too many splits, increasing risk of overfitting."
},
{
"id": 58,
"questionText": "Scenario: Decision Tree applied to time-series dataset without lag features. Observation?",
"options": [
"Tree underfits",
"Tree may not capture temporal dependencies",
"Training error zero",
"Tree overfits"
],
"correctAnswerIndex": 1,
"explanation": "Decision Trees cannot inherently model sequences; engineered features like lagged variables are needed."
},
{
"id": 59,
"questionText": "Scenario: Tree applied with min_impurity_decrease=0.02. Observation?",
"options": [
"Tree ignores features",
"Tree always overfits",
"Only splits that reduce impurity ≥0.02 are made, preventing overfitting",
"Training error zero"
],
"correctAnswerIndex": 2,
"explanation": "Minimum impurity decrease restricts splits, acting as a regularization technique."
},
{
"id": 60,
"questionText": "Scenario: Decision Tree applied with max_depth=8, min_samples_leaf=5 on noisy dataset. Observation?",
"options": [
"Tree overfits",
"Tree balances depth and leaf size, reducing overfitting",
"Tree ignores features",
"Tree underfits completely"
],
"correctAnswerIndex": 1,
"explanation": "Combining depth limit and minimum leaf size regularizes the tree for better generalization."
},
{
"id": 61,
"questionText": "Scenario: Tree applied to dataset with uniform target values. Observation?",
"options": [
"Tree remains shallow, predicts uniform target",
"Training error zero",
"Tree overfits",
"Tree underfits"
],
"correctAnswerIndex": 0,
"explanation": "Uniform targets do not create variance; tree does not split further."
},
{
"id": 62,
"questionText": "Scenario: Tree applied with max_depth=5 on dataset with strong non-linear patterns. Observation?",
"options": [
"Tree may underfit due to limited depth",
"Tree ignores features",
"Tree overfits",
"Training error zero"
],
"correctAnswerIndex": 0,
"explanation": "Shallow trees cannot capture complex patterns, causing underfitting."
},
{
"id": 63,
"questionText": "Scenario: Decision Tree applied to financial dataset with high outlier presence. Observation?",
"options": [
"Tree underfits",
"Tree splits may overfit outliers without pruning or min_samples_leaf",
"Tree ignores outliers automatically",
"Training error zero"
],
"correctAnswerIndex": 1,
"explanation": "Extreme values can cause splits that focus too much on outliers."
},
{
"id": 64,
"questionText": "Scenario: Tree applied to dataset with missing values. Observation?",
"options": [
"Tree ignores missing values automatically",
"Tree overfits",
"Tree requires imputation; cannot handle missing values directly",
"Tree underfits"
],
"correctAnswerIndex": 2,
"explanation": "Decision Trees need complete data or preprocessing to handle missing values."
},
{
"id": 65,
"questionText": "Scenario: Tree applied with max_features=None on high-dimensional dataset. Observation?",
"options": [
"Tree ignores some features",
"Tree underfits",
"Training error zero",
"Tree considers all features per split, may overfit"
],
"correctAnswerIndex": 3,
"explanation": "Using all features at each split increases variance and risk of overfitting."
},
{
"id": 66,
"questionText": "Scenario: Tree applied with very small min_samples_split. Observation?",
"options": [
"Tree may overfit due to tiny splits",
"Tree ignores features",
"Tree underfits",
"Training error zero"
],
"correctAnswerIndex": 0,
"explanation": "Small min_samples_split allows splitting on tiny subsets, increasing overfitting risk."
},
{
"id": 67,
"questionText": "Scenario: Tree applied to dataset with skewed target distribution. Observation?",
"options": [
"Tree underfits",
"Tree ignores skew",
"Tree may bias predictions toward dominant target values",
"Tree perfectly predicts"
],
"correctAnswerIndex": 2,
"explanation": "Skewed targets can bias splits toward majority values, reducing accuracy for minority ranges."
},
{
"id": 68,
"questionText": "Scenario: Tree applied with random_state set. Observation?",
"options": [
"Training error zero",
"Tree ignores features",
"Ensures reproducible results",
"Tree underfits"
],
"correctAnswerIndex": 2,
"explanation": "Setting random_state ensures deterministic tree construction."
},
{
"id": 69,
"questionText": "Scenario: Tree applied with max_depth=10 and min_samples_leaf=2 on noisy dataset. Observation?",
"options": [
"Tree underfits",
"May overfit noise despite some leaf constraints",
"Tree ignores features",
"Training error zero"
],
"correctAnswerIndex": 1,
"explanation": "Even with min_samples_leaf=2, deep trees can still overfit noisy data."
},
{
"id": 70,
"questionText": "Scenario: Tree applied to dataset with continuous and categorical features. Observation?",
"options": [
"Tree can handle both; splits thresholds for continuous and categories for categorical",
"Tree fails",
"Tree ignores categorical features",
"Tree ignores continuous features"
],
"correctAnswerIndex": 0,
"explanation": "Decision Trees split both types appropriately."
},
{
"id": 71,
"questionText": "Scenario: Tree applied to dataset with correlated features. Observation?",
"options": [
"Tree fails",
"Tree underfits",
"Tree uses all features equally",
"Tree may select one correlated feature for split, ignoring others"
],
"correctAnswerIndex": 3,
"explanation": "Greedy splits select the most informative feature and ignore redundant correlated features."
},
{
"id": 72,
"questionText": "Scenario: Tree applied to small dataset. Observation: prediction varies with different train/test splits. Reason?",
"options": [
"High variance due to sensitivity to small data changes",
"Tree always underfits",
"Training error zero",
"Tree ignores features"
],
"correctAnswerIndex": 0,
"explanation": "Small datasets cause unstable splits, leading to high variance."
},
{
"id": 73,
"questionText": "Scenario: Tree applied with max_depth=6 on dataset with complex patterns. Observation?",
"options": [
"Tree may underfit due to limited depth",
"Training error zero",
"Tree overfits",
"Tree ignores features"
],
"correctAnswerIndex": 0,
"explanation": "Limited depth restricts tree complexity and may underfit complex relationships."
},
{
"id": 74,
"questionText": "Scenario: Tree applied to dataset with features scaled differently. Observation?",
"options": [
"Tree overfits",
"Tree underfits",
"Training error zero",
"Decision Tree is scale-invariant; scaling has no effect"
],
"correctAnswerIndex": 3,
"explanation": "Decision Trees do not rely on feature magnitudes; scaling does not affect splits."
},
{
"id": 75,
"questionText": "Scenario: Decision Tree applied with max_depth=8, min_samples_leaf=4. Observation?",
"options": [
"Tree overfits",
"Tree underfits",
"Tree ignores features",
"Tree balances depth and leaf size, reducing overfitting"
],
"correctAnswerIndex": 3,
"explanation": "Combination of depth and leaf constraints helps tree generalize better."
},
{
"id": 76,
"questionText": "Scenario: Tree applied to very small dataset with max_depth=None. Observation?",
"options": [
"Tree underfits",
"Training error high",
"Tree ignores features",
"Tree highly overfits, predictions unstable"
],
"correctAnswerIndex": 3,
"explanation": "Unlimited depth on small data leads to capturing noise, causing overfitting and instability."
},
{
"id": 77,
"questionText": "Scenario: Decision Tree applied with min_samples_split very high. Observation?",
"options": [
"Tree underfits due to few splits",
"Tree ignores features",
"Tree overfits",
"Training error zero"
],
"correctAnswerIndex": 0,
"explanation": "High min_samples_split prevents many splits, simplifying the tree and possibly underfitting."
},
{
"id": 78,
"questionText": "Scenario: Tree applied with max_features=1 on dataset with 50 features. Observation?",
"options": [
"Tree overfits",
"Tree underfits completely",
"Tree uses all features equally",
"Tree uses only one feature per split, reduces overfitting but may underfit"
],
"correctAnswerIndex": 3,
"explanation": "Limiting to one feature per split introduces randomness, reducing variance but may increase bias."
},
{
"id": 79,
"questionText": "Scenario: Tree applied to dataset with extreme outliers in target. Observation?",
"options": [
"Tree may create leaves specifically fitting outliers, overfitting",
"Tree ignores outliers automatically",
"Training error zero",
"Tree underfits"
],
"correctAnswerIndex": 0,
"explanation": "Decision Trees can focus on extreme values, creating splits that overfit outliers."
},
{
"id": 80,
"questionText": "Scenario: Tree applied to dataset with skewed categorical features. Observation?",
"options": [
"Tree may bias splits toward frequent categories",
"Tree ignores categories",
"Tree underfits",
"Tree perfectly predicts"
],
"correctAnswerIndex": 0,
"explanation": "Highly imbalanced categories influence the tree to favor majority categories in splits."
},
{
"id": 81,
"questionText": "Scenario: Tree applied to dataset with missing values. Observation?",
"options": [
"Tree overfits",
"Tree requires preprocessing; cannot handle missing values directly",
"Tree ignores missing values automatically",
"Tree underfits"
],
"correctAnswerIndex": 1,
"explanation": "Decision Trees need complete data or imputation before training."
},
{
"id": 82,
"questionText": "Scenario: Tree applied with criterion='poisson'. Observation?",
"options": [
"Tree ignores criterion",
"Tree overfits",
"Optimized for count data, splits minimize Poisson deviance",
"Tree underfits"
],
"correctAnswerIndex": 2,
"explanation": "Poisson criterion is used for regression tasks with count targets."
},
{
"id": 83,
"questionText": "Scenario: Tree applied to dataset with both continuous and categorical features. Observation?",
"options": [
"Tree handles both; continuous via thresholds, categorical via category splits",
"Tree ignores continuous features",
"Tree ignores categorical features",
"Tree fails"
],
"correctAnswerIndex": 0,
"explanation": "Decision Trees split both types appropriately."
},
{
"id": 84,
"questionText": "Scenario: Tree applied to time-series dataset without feature engineering. Observation?",
"options": [
"Tree overfits",
"Tree underfits",
"Training error zero",
"Tree may not capture temporal dependencies"
],
"correctAnswerIndex": 3,
"explanation": "Decision Trees require features like lag variables to capture temporal patterns."
},
{
"id": 85,
"questionText": "Scenario: Tree applied with max_depth=3 on dataset with complex non-linear relationships. Observation?",
"options": [
"Tree underfits due to shallow depth",
"Training error zero",
"Tree ignores features",
"Tree overfits"
],
"correctAnswerIndex": 0,
"explanation": "Shallow depth limits the ability to capture complex patterns, leading to underfitting."
},
{
"id": 86,
"questionText": "Scenario: Tree applied with min_impurity_decrease=0.1. Observation?",
"options": [
"Tree ignores features",
"Training error zero",
"Only splits reducing impurity ≥0.1 are allowed, regularizing tree",
"Tree overfits"
],
"correctAnswerIndex": 2,
"explanation": "Minimum impurity decrease prevents unnecessary splits, controlling complexity."
},
{
"id": 87,
"questionText": "Scenario: Tree applied to dataset with skewed target values. Observation?",
"options": [
"Tree underfits",
"Tree ignores skew",
"Tree perfectly predicts",
"Tree may bias predictions toward dominant target values"
],
"correctAnswerIndex": 3,
"explanation": "Skewed targets can bias splits toward majority values, reducing accuracy for minority ranges."
},
{
"id": 88,
"questionText": "Scenario: Tree applied with very small min_samples_leaf. Observation?",
"options": [
"Tree ignores features",
"Tree may overfit by creating tiny leaves",
"Training error zero",
"Tree underfits"
],
"correctAnswerIndex": 1,
"explanation": "Small leaves can cause overfitting to minor fluctuations or noise."
},
{
"id": 89,
"questionText": "Scenario: Tree applied with max_leaf_nodes=5. Observation?",
"options": [
"Tree underfits due to limited leaf complexity",
"Tree ignores features",
"Training error zero",
"Tree overfits"
],
"correctAnswerIndex": 0,
"explanation": "Restricting leaves limits the tree's ability to capture detailed patterns."
},
{
"id": 90,
"questionText": "Scenario: Tree applied with max_features=None on high-dimensional data. Observation?",
"options": [
"Tree ignores some features",
"Tree considers all features per split, may overfit",
"Training error zero",
"Tree underfits"
],
"correctAnswerIndex": 1,
"explanation": "Using all features increases variance, risking overfitting."
},
{
"id": 91,
"questionText": "Scenario: Tree applied to dataset with correlated features. Observation?",
"options": [
"Tree may select one correlated feature, ignoring others",
"Tree underfits",
"Tree uses all features equally",
"Tree fails"
],
"correctAnswerIndex": 0,
"explanation": "Greedy splits pick the most informative feature and ignore redundant ones."
},
{
"id": 92,
"questionText": "Scenario: Tree applied to dataset with small sample size. Observation?",
"options": [
"Training error zero",
"Tree underfits",
"Predictions are unstable across train/test splits due to high variance",
"Tree ignores features"
],
"correctAnswerIndex": 2,
"explanation": "Small datasets cause instability in splits, producing high variance predictions."
},
{
"id": 93,
"questionText": "Scenario: Tree applied to dataset with extreme noise. Observation?",
"options": [
"Tree ignores features",
"Training error zero",
"Tree underfits",
"Tree may overfit noise without regularization"
],
"correctAnswerIndex": 3,
"explanation": "Unrestricted trees capture noise, reducing generalization performance."
},
{
"id": 94,
"questionText": "Scenario: Tree applied with max_depth=6, min_samples_leaf=5 on noisy dataset. Observation?",
"options": [
"Tree balances complexity and leaf constraints, better generalization",
"Tree underfits",
"Tree ignores features",
"Tree overfits"
],
"correctAnswerIndex": 0,
"explanation": "Combining depth and leaf constraints regularizes the tree for improved generalization."
},
{
"id": 95,
"questionText": "Scenario: Tree applied to dataset with continuous and categorical features. Observation?",
"options": [
"Tree ignores categorical features",
"Tree fails",
"Tree ignores continuous features",
"Tree can handle both; splits thresholds for continuous and categories for categorical"
],
"correctAnswerIndex": 3,
"explanation": "Decision Trees split both feature types appropriately."
},
{
"id": 96,
"questionText": "Scenario: Tree applied with random_state set. Observation?",
"options": [
"Training error zero",
"Results reproducible across runs",
"Tree underfits",
"Tree ignores features"
],
"correctAnswerIndex": 1,
"explanation": "Setting random_state ensures deterministic tree construction."
},
{
"id": 97,
"questionText": "Scenario: Tree applied to dataset with highly imbalanced categorical features. Observation?",
"options": [
"Tree underfits",
"Tree perfectly predicts",
"Splits biased toward frequent categories, may reduce accuracy for rare categories",
"Tree ignores categories"
],
"correctAnswerIndex": 2,
"explanation": "Imbalanced categories influence split decisions, potentially causing bias."
},
{
"id": 98,
"questionText": "Scenario: Tree applied to dataset with missing values. Observation?",
"options": [
"Tree underfits",
"Requires imputation or preprocessing",
"Tree overfits",
"Tree ignores missing values automatically"
],
"correctAnswerIndex": 1,
"explanation": "Decision Trees cannot handle missing values directly; preprocessing is required."
},
{
"id": 99,
"questionText": "Scenario: Tree applied to high-dimensional data with max_depth=None. Observation?",
"options": [
"Tree may overfit due to unlimited depth and many features",
"Tree ignores features",
"Tree underfits",
"Training error zero"
],
"correctAnswerIndex": 0,
"explanation": "Unlimited depth with high-dimensional features leads to over-complex splits and overfitting."
},
{
"id": 100,
"questionText": "Scenario: Decision Tree applied with max_depth=8, min_samples_leaf=4, max_features='sqrt'. Observation?",
"options": [
"Tree underfits",
"Tree overfits",
"Tree ignores features",
"Tree balances depth, leaf size, and feature selection for improved generalization"
],
"correctAnswerIndex": 3,
"explanation": "Combining depth limit, leaf constraint, and feature subset selection regularizes the tree effectively."
}
]
}