| { | |
| "title": "Decision Tree Regression Mastery: 100 MCQs", | |
| "description": "A comprehensive set of 100 multiple-choice questions designed to teach and test your understanding of Decision Tree Regression, covering basic concepts, splitting criteria, pruning, overfitting, hyperparameter tuning, and real-world scenarios.", | |
| "questions": [ | |
| { | |
| "id": 1, | |
| "questionText": "What is the main goal of Decision Tree Regression?", | |
| "options": [ | |
| "Reduce dimensionality", | |
| "Predict continuous target values using a tree structure", | |
| "Classify data into categories", | |
| "Cluster data points" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Decision Tree Regression predicts continuous values by splitting the data into subsets based on feature thresholds." | |
| }, | |
| { | |
| "id": 2, | |
| "questionText": "Which criterion is commonly used to decide splits in Decision Tree Regression?", | |
| "options": [ | |
| "Mean Squared Error (MSE)", | |
| "Gini Index", | |
| "Entropy", | |
| "Silhouette Score" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "MSE is commonly used to minimize the variance of the target variable in the child nodes." | |
| }, | |
| { | |
| "id": 3, | |
| "questionText": "What is overfitting in Decision Tree Regression?", | |
| "options": [ | |
| "When the tree is too shallow", | |
| "When predictions are always zero", | |
| "When data is not standardized", | |
| "When the tree captures noise in the training data" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Overfitting occurs when a tree becomes too complex, capturing noise instead of general patterns." | |
| }, | |
| { | |
| "id": 4, | |
| "questionText": "Which hyperparameter helps control the maximum depth of a Decision Tree?", | |
| "options": [ | |
| "max_depth", | |
| "criterion", | |
| "min_samples_split", | |
| "gamma" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "max_depth limits how deep the tree can grow, preventing overfitting." | |
| }, | |
| { | |
| "id": 5, | |
| "questionText": "Scenario: A Decision Tree is trained with max_depth=None on a small dataset. Observation?", | |
| "options": [ | |
| "Tree ignores features", | |
| "Tree likely overfits", | |
| "Tree underfits", | |
| "Predictions are zero" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Without depth limitation, the tree can grow too complex and overfit small datasets." | |
| }, | |
| { | |
| "id": 6, | |
| "questionText": "What is min_samples_split in Decision Tree Regression?", | |
| "options": [ | |
| "Minimum samples required to split an internal node", | |
| "Maximum number of features used", | |
| "Maximum depth of the tree", | |
| "Minimum samples required at a leaf node" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "min_samples_split controls when a node can be split, helping to regularize the tree." | |
| }, | |
| { | |
| "id": 7, | |
| "questionText": "Scenario: Decision Tree with very high min_samples_split. Observation?", | |
| "options": [ | |
| "Tree underfits, predictions may be too coarse", | |
| "Features ignored", | |
| "Tree overfits", | |
| "Predictions perfect" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "High min_samples_split prevents many splits, making the tree simpler and possibly underfitting." | |
| }, | |
| { | |
| "id": 8, | |
| "questionText": "Which method can be used to reduce overfitting in Decision Tree Regression?", | |
| "options": [ | |
| "Using all features without restriction", | |
| "Pruning", | |
| "Reducing training data", | |
| "Increasing max_depth without limit" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Pruning removes unnecessary splits to improve generalization and prevent overfitting." | |
| }, | |
| { | |
| "id": 9, | |
| "questionText": "What is the role of a leaf node in Decision Tree Regression?", | |
| "options": [ | |
| "Counts the number of features", | |
| "Measures feature importance", | |
| "Contains the predicted value", | |
| "Decides feature splits" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Leaf nodes hold the predicted output value for the observations in that node." | |
| }, | |
| { | |
| "id": 10, | |
| "questionText": "Scenario: Tree is very deep and training error is near zero but test error is high. Observation?", | |
| "options": [ | |
| "Tree ignores features", | |
| "Tree is overfitting", | |
| "Tree predictions are unbiased", | |
| "Tree is underfitting" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "A very deep tree may perfectly fit the training data but fail to generalize, causing overfitting." | |
| }, | |
| { | |
| "id": 11, | |
| "questionText": "Which splitting strategy minimizes variance in Decision Tree Regression?", | |
| "options": [ | |
| "Maximizing entropy", | |
| "Maximizing Gini index", | |
| "Choosing splits that minimize MSE in child nodes", | |
| "Random splits" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Splits are chosen to minimize the mean squared error of target values in child nodes." | |
| }, | |
| { | |
| "id": 12, | |
| "questionText": "Scenario: Dataset has categorical features with many levels. Decision Tree Observation?", | |
| "options": [ | |
| "Tree may create too many splits, overfitting possible", | |
| "Tree ignores categorical features", | |
| "Tree always underfits", | |
| "Tree cannot handle categorical data" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "High-cardinality categorical features can lead to many splits, increasing overfitting risk." | |
| }, | |
| { | |
| "id": 13, | |
| "questionText": "What does min_samples_leaf control?", | |
| "options": [ | |
| "Minimum number of samples required in a leaf node", | |
| "Kernel choice", | |
| "Maximum depth of the tree", | |
| "Minimum split threshold" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "min_samples_leaf ensures leaf nodes have a minimum number of samples, preventing overfitting to very small subsets." | |
| }, | |
| { | |
| "id": 14, | |
| "questionText": "Scenario: Decision Tree with min_samples_leaf=10 on small dataset. Observation?", | |
| "options": [ | |
| "Tree may underfit, predictions coarser", | |
| "Leaf nodes empty", | |
| "Tree ignores features", | |
| "Tree overfits" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "High min_samples_leaf prevents fine splits, which may underfit small datasets." | |
| }, | |
| { | |
| "id": 15, | |
| "questionText": "Which metric is used to measure feature importance in Decision Tree Regression?", | |
| "options": [ | |
| "Silhouette Score", | |
| "Entropy", | |
| "Reduction in MSE due to splits on the feature", | |
| "Gini Index" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Features contributing more to variance reduction are considered more important." | |
| }, | |
| { | |
| "id": 16, | |
| "questionText": "Scenario: Feature importance shows some features are zero. Observation?", | |
| "options": [ | |
| "They are most important", | |
| "Tree ignored all features", | |
| "Those features do not contribute to splits", | |
| "Training error is zero" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Features with zero importance do not improve splits in the tree." | |
| }, | |
| { | |
| "id": 17, | |
| "questionText": "Which is a disadvantage of Decision Tree Regression?", | |
| "options": [ | |
| "Always underfits", | |
| "Cannot handle categorical data", | |
| "Prone to overfitting if not regularized", | |
| "Cannot handle continuous data" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Decision trees can overfit training data without depth or sample restrictions." | |
| }, | |
| { | |
| "id": 18, | |
| "questionText": "Scenario: Decision Tree applied to noisy dataset without restrictions. Observation?", | |
| "options": [ | |
| "Tree ignores features", | |
| "Tree underfits", | |
| "Tree overfits noise", | |
| "Training error high" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Unrestricted trees fit all variations including noise, causing overfitting." | |
| }, | |
| { | |
| "id": 19, | |
| "questionText": "Scenario: Decision Tree applied with max_features='sqrt'. Observation?", | |
| "options": [ | |
| "All features ignored", | |
| "Training error zero", | |
| "Random subset of features considered for splits, reduces overfitting", | |
| "Tree depth unlimited" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Limiting features per split reduces variance and improves generalization." | |
| }, | |
| { | |
| "id": 20, | |
| "questionText": "Scenario: Tree with very small max_leaf_nodes. Observation?", | |
| "options": [ | |
| "Tree ignores features", | |
| "Tree overfits", | |
| "Tree underfits due to limited leaves", | |
| "Training error zero" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Restricting leaves reduces tree complexity, which may lead to underfitting." | |
| }, | |
| { | |
| "id": 21, | |
| "questionText": "Scenario: Tree applied to dataset with continuous features. Observation: splits based on thresholds. Why?", | |
| "options": [ | |
| "Tree uses Gini", | |
| "Decision Tree Regression splits continuous features using thresholds to minimize variance", | |
| "Tree uses entropy", | |
| "Tree ignores continuous features" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Continuous features are split at values that minimize MSE in child nodes." | |
| }, | |
| { | |
| "id": 22, | |
| "questionText": "Scenario: Tree applied with min_impurity_decrease=0.01. Observation?", | |
| "options": [ | |
| "Tree always overfits", | |
| "Tree ignores features", | |
| "Only splits that reduce impurity by 0.01 or more are made, helps prevent overfitting", | |
| "Training error zero" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Setting a minimum impurity decrease prevents unnecessary splits, regularizing the tree." | |
| }, | |
| { | |
| "id": 23, | |
| "questionText": "Scenario: Tree applied to dataset with correlated features. Observation?", | |
| "options": [ | |
| "Tree fails", | |
| "Tree may select one correlated feature for splits, ignoring others", | |
| "Tree overfits all features", | |
| "Tree underfits" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Decision trees choose splits greedily; correlated features may be ignored once one is used." | |
| }, | |
| { | |
| "id": 24, | |
| "questionText": "Scenario: Decision Tree applied to dataset with outliers. Observation?", | |
| "options": [ | |
| "Tree ignores outliers", | |
| "Tree may overfit outliers if not pruned", | |
| "Tree underfits", | |
| "Training error zero" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Extreme values can cause splits that focus too much on outliers, overfitting the model." | |
| }, | |
| { | |
| "id": 25, | |
| "questionText": "Scenario: Decision Tree applied with random_state set. Observation?", | |
| "options": [ | |
| "Tree overfits", | |
| "Tree underfits", | |
| "Randomly ignores features", | |
| "Ensures reproducible results" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Setting random_state makes the tree building process deterministic, allowing reproducibility." | |
| }, | |
| { | |
| "id": 26, | |
| "questionText": "Scenario: Decision Tree applied to housing dataset with max_depth=3. Observation?", | |
| "options": [ | |
| "Tree overfits", | |
| "Training error zero", | |
| "Tree may underfit due to shallow depth", | |
| "Tree ignores features" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Shallow trees may fail to capture complex patterns, leading to underfitting." | |
| }, | |
| { | |
| "id": 27, | |
| "questionText": "Scenario: Tree trained on noisy stock prices with max_depth=None. Observation?", | |
| "options": [ | |
| "Tree underfits", | |
| "Tree overfits noise, poor generalization", | |
| "Features ignored", | |
| "Predictions smooth" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Unlimited depth allows the tree to capture every fluctuation, including noise." | |
| }, | |
| { | |
| "id": 28, | |
| "questionText": "Scenario: Tree applied to dataset with outliers. Observation: pruning applied. Effect?", | |
| "options": [ | |
| "Tree ignores all data", | |
| "Tree underfits entirely", | |
| "Reduces overfitting to outliers, better generalization", | |
| "Training error zero" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Pruning removes unnecessary splits caused by outliers, improving generalization." | |
| }, | |
| { | |
| "id": 29, | |
| "questionText": "Scenario: Decision Tree applied with min_samples_split=50 on a dataset of 500 samples. Observation?", | |
| "options": [ | |
| "Tree overfits", | |
| "Tree is simpler, may underfit local patterns", | |
| "Tree ignores features", | |
| "Training error zero" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Requiring 50 samples to split reduces the number of splits and may miss finer patterns." | |
| }, | |
| { | |
| "id": 30, | |
| "questionText": "Scenario: Tree applied with min_samples_leaf=20 on small dataset. Observation?", | |
| "options": [ | |
| "Tree underfits, coarse predictions", | |
| "Training error zero", | |
| "Tree overfits", | |
| "Leaf nodes empty" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Minimum leaf size prevents small leaves, simplifying the tree and possibly underfitting." | |
| }, | |
| { | |
| "id": 31, | |
| "questionText": "Scenario: Tree applied to dataset with 1000 features. Observation: max_features='sqrt'. Effect?", | |
| "options": [ | |
| "Training error zero", | |
| "Tree ignores features", | |
| "Tree depth unlimited", | |
| "Random subset considered for splits, reduces overfitting" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Using only a subset of features per split helps control variance and prevents overfitting." | |
| }, | |
| { | |
| "id": 32, | |
| "questionText": "Scenario: Tree applied with max_leaf_nodes=10. Observation?", | |
| "options": [ | |
| "Tree underfits due to limited complexity", | |
| "Tree ignores features", | |
| "Tree overfits", | |
| "Training error zero" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Limiting the number of leaves reduces tree complexity, potentially causing underfitting." | |
| }, | |
| { | |
| "id": 33, | |
| "questionText": "Scenario: Tree applied to dataset with highly correlated features. Observation?", | |
| "options": [ | |
| "Tree uses all features equally", | |
| "Tree underfits", | |
| "Tree fails to train", | |
| "Tree may favor one correlated feature, ignoring others" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Greedy splitting selects one feature and ignores redundant correlated features." | |
| }, | |
| { | |
| "id": 34, | |
| "questionText": "Scenario: Decision Tree applied with min_impurity_decrease=0.05. Observation?", | |
| "options": [ | |
| "Tree ignores features", | |
| "Training error zero", | |
| "Tree always overfits", | |
| "Only splits reducing impurity ≥0.05 are made, regularizes tree" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Setting minimum impurity decrease prevents unnecessary splits, improving generalization." | |
| }, | |
| { | |
| "id": 35, | |
| "questionText": "Scenario: Tree applied to dataset with categorical features of high cardinality. Observation?", | |
| "options": [ | |
| "Tree may overfit due to many splits", | |
| "Tree underfits", | |
| "Tree fails to train", | |
| "Tree ignores categories" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "High-cardinality categorical features can lead to over-complex splits and overfitting." | |
| }, | |
| { | |
| "id": 36, | |
| "questionText": "Scenario: Decision Tree applied to time-series dataset without feature engineering. Observation?", | |
| "options": [ | |
| "Tree ignores features", | |
| "Training error zero", | |
| "Tree overfits", | |
| "Tree may not capture temporal patterns" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Decision trees cannot inherently capture sequential patterns without engineered features." | |
| }, | |
| { | |
| "id": 37, | |
| "questionText": "Scenario: Tree applied to dataset with extreme outliers. Observation?", | |
| "options": [ | |
| "Tree ignores outliers automatically", | |
| "Tree underfits", | |
| "Tree may overfit to extreme values if not regularized", | |
| "Training error zero" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Outliers can create splits that distort predictions, overfitting the model." | |
| }, | |
| { | |
| "id": 38, | |
| "questionText": "Scenario: Decision Tree applied with random_state set. Observation?", | |
| "options": [ | |
| "Results are reproducible", | |
| "Training error zero", | |
| "Tree underfits", | |
| "Tree ignores features" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Setting random_state ensures deterministic tree construction, making results reproducible." | |
| }, | |
| { | |
| "id": 39, | |
| "questionText": "Scenario: Tree applied with max_depth=5 and high noise. Observation?", | |
| "options": [ | |
| "Tree ignores features", | |
| "Tree perfectly fits data", | |
| "May underfit some patterns, partially overfit noise", | |
| "Training error zero" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Limited depth may underfit trends but still capture some noise, leading to mixed performance." | |
| }, | |
| { | |
| "id": 40, | |
| "questionText": "Scenario: Decision Tree applied to dataset with missing values. Observation?", | |
| "options": [ | |
| "Tree underfits", | |
| "Tree ignores missing values automatically", | |
| "Tree cannot handle missing values directly; preprocessing needed", | |
| "Tree overfits" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Decision trees require complete data or proper imputation before training." | |
| }, | |
| { | |
| "id": 41, | |
| "questionText": "Scenario: Decision Tree applied to dataset with highly skewed target. Observation?", | |
| "options": [ | |
| "Tree underfits", | |
| "Tree ignores skew", | |
| "Tree predictions may be biased toward dominant target values", | |
| "Tree perfectly predicts" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Skewed targets can cause trees to favor majority values, reducing accuracy on rare cases." | |
| }, | |
| { | |
| "id": 42, | |
| "questionText": "Scenario: Tree applied to dataset with continuous and categorical features. Observation?", | |
| "options": [ | |
| "Tree ignores continuous features", | |
| "Tree fails", | |
| "Tree ignores categorical features", | |
| "Tree can handle both, splits on thresholds for continuous and categories for categorical" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Decision trees can split both types, using appropriate thresholds or categories." | |
| }, | |
| { | |
| "id": 43, | |
| "questionText": "Scenario: Decision Tree applied with criterion='mse'. Observation?", | |
| "options": [ | |
| "Tree ignores criterion", | |
| "Splits minimize mean squared error in child nodes", | |
| "Splits maximize entropy", | |
| "Splits maximize Gini index" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "MSE is used to reduce variance and improve regression accuracy at splits." | |
| }, | |
| { | |
| "id": 44, | |
| "questionText": "Scenario: Tree applied with very small min_samples_split. Observation?", | |
| "options": [ | |
| "Tree underfits", | |
| "Tree may overfit by creating many small splits", | |
| "Training error zero", | |
| "Tree ignores features" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Small min_samples_split allows splits on tiny subsets, causing overfitting." | |
| }, | |
| { | |
| "id": 45, | |
| "questionText": "Scenario: Tree applied with max_features=None on high-dimensional data. Observation?", | |
| "options": [ | |
| "Tree ignores some features", | |
| "Tree underfits", | |
| "Tree considers all features at each split, may overfit", | |
| "Training error zero" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Using all features increases variance and can lead to overfitting." | |
| }, | |
| { | |
| "id": 46, | |
| "questionText": "Scenario: Tree applied to dataset with many redundant features. Observation?", | |
| "options": [ | |
| "Tree underfits", | |
| "Tree fails", | |
| "Tree uses all features equally", | |
| "Tree may ignore redundant features after selecting one correlated feature" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Greedy splitting selects the most informative feature and ignores others." | |
| }, | |
| { | |
| "id": 47, | |
| "questionText": "Scenario: Decision Tree applied to small dataset. Observation: high variance in predictions across different train/test splits. Reason?", | |
| "options": [ | |
| "Tree always underfits", | |
| "Training error zero", | |
| "Tree ignores features", | |
| "Trees are sensitive to small data changes, high variance" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Small datasets cause instability in tree splits, resulting in high variance." | |
| }, | |
| { | |
| "id": 48, | |
| "questionText": "Scenario: Tree applied to a dataset with uniform target values. Observation?", | |
| "options": [ | |
| "Tree overfits", | |
| "Tree will have shallow depth; predictions equal to uniform target", | |
| "Tree underfits", | |
| "Training error zero" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "If target values are uniform, splits do not reduce variance; tree remains shallow." | |
| }, | |
| { | |
| "id": 49, | |
| "questionText": "Scenario: Tree applied to dataset with features scaled differently. Observation?", | |
| "options": [ | |
| "Tree underfits", | |
| "Training error zero", | |
| "Decision Tree unaffected by feature scaling", | |
| "Tree overfits" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Decision Trees are scale-invariant; feature scaling does not affect splits." | |
| }, | |
| { | |
| "id": 50, | |
| "questionText": "Scenario: Decision Tree applied with max_depth=10, min_samples_leaf=5. Observation?", | |
| "options": [ | |
| "Tree balances depth and leaf size, reducing overfitting", | |
| "Tree ignores features", | |
| "Tree overfits", | |
| "Tree underfits" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Limiting depth and minimum leaf samples helps regularize the tree and improve generalization." | |
| }, | |
| { | |
| "id": 51, | |
| "questionText": "Scenario: Decision Tree applied to financial dataset with max_depth=None and min_samples_split=2. Observation?", | |
| "options": [ | |
| "Tree underfits", | |
| "Tree likely overfits due to unlimited depth and small splits", | |
| "Tree ignores features", | |
| "Predictions smooth" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Unlimited depth and tiny splits allow the tree to capture all noise, causing overfitting." | |
| }, | |
| { | |
| "id": 52, | |
| "questionText": "Scenario: Tree applied to dataset with extreme outliers. Observation: min_samples_leaf=10 applied. Effect?", | |
| "options": [ | |
| "Tree ignores features", | |
| "Tree overfits outliers", | |
| "Tree becomes more robust to outliers", | |
| "Training error zero" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Minimum leaf size prevents leaves from fitting extreme individual outliers, improving robustness." | |
| }, | |
| { | |
| "id": 53, | |
| "questionText": "Scenario: Tree applied to housing dataset. Observation: max_features='sqrt' used. Benefit?", | |
| "options": [ | |
| "Tree ignores most features", | |
| "Tree depth unlimited", | |
| "Reduces variance and prevents overfitting by using feature subsets at each split", | |
| "Tree underfits" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Random subsets per split regularize the tree, balancing bias and variance." | |
| }, | |
| { | |
| "id": 54, | |
| "questionText": "Scenario: Decision Tree applied with criterion='friedman_mse'. Observation?", | |
| "options": [ | |
| "Tree ignores MSE", | |
| "Tree underfits", | |
| "Tree fails", | |
| "Optimized for boosting algorithms, may improve split selection" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Friedman MSE is designed for boosting, accounting for residuals in regression tasks." | |
| }, | |
| { | |
| "id": 55, | |
| "questionText": "Scenario: Tree applied to small dataset with many features. Observation: high variance in predictions. Reason?", | |
| "options": [ | |
| "Tree ignores features", | |
| "Training error zero", | |
| "Greedy splits sensitive to small changes, causing high variance", | |
| "Tree underfits" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Small datasets are prone to unstable splits, resulting in varying predictions." | |
| }, | |
| { | |
| "id": 56, | |
| "questionText": "Scenario: Tree applied with max_leaf_nodes=15. Observation?", | |
| "options": [ | |
| "Tree overfits", | |
| "Limits complexity, helps prevent overfitting", | |
| "Tree ignores features", | |
| "Tree underfits completely" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Restricting leaves reduces tree complexity, acting as regularization." | |
| }, | |
| { | |
| "id": 57, | |
| "questionText": "Scenario: Tree applied to dataset with categorical features of high cardinality. Observation?", | |
| "options": [ | |
| "Tree ignores categorical features", | |
| "Tree may overfit due to numerous splits on categories", | |
| "Tree fails", | |
| "Tree underfits" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Many categories can produce too many splits, increasing risk of overfitting." | |
| }, | |
| { | |
| "id": 58, | |
| "questionText": "Scenario: Decision Tree applied to time-series dataset without lag features. Observation?", | |
| "options": [ | |
| "Tree underfits", | |
| "Tree may not capture temporal dependencies", | |
| "Training error zero", | |
| "Tree overfits" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Decision Trees cannot inherently model sequences; engineered features like lagged variables are needed." | |
| }, | |
| { | |
| "id": 59, | |
| "questionText": "Scenario: Tree applied with min_impurity_decrease=0.02. Observation?", | |
| "options": [ | |
| "Tree ignores features", | |
| "Tree always overfits", | |
| "Only splits that reduce impurity ≥0.02 are made, preventing overfitting", | |
| "Training error zero" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Minimum impurity decrease restricts splits, acting as a regularization technique." | |
| }, | |
| { | |
| "id": 60, | |
| "questionText": "Scenario: Decision Tree applied with max_depth=8, min_samples_leaf=5 on noisy dataset. Observation?", | |
| "options": [ | |
| "Tree overfits", | |
| "Tree balances depth and leaf size, reducing overfitting", | |
| "Tree ignores features", | |
| "Tree underfits completely" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Combining depth limit and minimum leaf size regularizes the tree for better generalization." | |
| }, | |
| { | |
| "id": 61, | |
| "questionText": "Scenario: Tree applied to dataset with uniform target values. Observation?", | |
| "options": [ | |
| "Tree remains shallow, predicts uniform target", | |
| "Training error zero", | |
| "Tree overfits", | |
| "Tree underfits" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Uniform targets do not create variance; tree does not split further." | |
| }, | |
| { | |
| "id": 62, | |
| "questionText": "Scenario: Tree applied with max_depth=5 on dataset with strong non-linear patterns. Observation?", | |
| "options": [ | |
| "Tree may underfit due to limited depth", | |
| "Tree ignores features", | |
| "Tree overfits", | |
| "Training error zero" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Shallow trees cannot capture complex patterns, causing underfitting." | |
| }, | |
| { | |
| "id": 63, | |
| "questionText": "Scenario: Decision Tree applied to financial dataset with high outlier presence. Observation?", | |
| "options": [ | |
| "Tree underfits", | |
| "Tree splits may overfit outliers without pruning or min_samples_leaf", | |
| "Tree ignores outliers automatically", | |
| "Training error zero" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Extreme values can cause splits that focus too much on outliers." | |
| }, | |
| { | |
| "id": 64, | |
| "questionText": "Scenario: Tree applied to dataset with missing values. Observation?", | |
| "options": [ | |
| "Tree ignores missing values automatically", | |
| "Tree overfits", | |
| "Tree requires imputation; cannot handle missing values directly", | |
| "Tree underfits" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Decision Trees need complete data or preprocessing to handle missing values." | |
| }, | |
| { | |
| "id": 65, | |
| "questionText": "Scenario: Tree applied with max_features=None on high-dimensional dataset. Observation?", | |
| "options": [ | |
| "Tree ignores some features", | |
| "Tree underfits", | |
| "Training error zero", | |
| "Tree considers all features per split, may overfit" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Using all features at each split increases variance and risk of overfitting." | |
| }, | |
| { | |
| "id": 66, | |
| "questionText": "Scenario: Tree applied with very small min_samples_split. Observation?", | |
| "options": [ | |
| "Tree may overfit due to tiny splits", | |
| "Tree ignores features", | |
| "Tree underfits", | |
| "Training error zero" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Small min_samples_split allows splitting on tiny subsets, increasing overfitting risk." | |
| }, | |
| { | |
| "id": 67, | |
| "questionText": "Scenario: Tree applied to dataset with skewed target distribution. Observation?", | |
| "options": [ | |
| "Tree underfits", | |
| "Tree ignores skew", | |
| "Tree may bias predictions toward dominant target values", | |
| "Tree perfectly predicts" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Skewed targets can bias splits toward majority values, reducing accuracy for minority ranges." | |
| }, | |
| { | |
| "id": 68, | |
| "questionText": "Scenario: Tree applied with random_state set. Observation?", | |
| "options": [ | |
| "Training error zero", | |
| "Tree ignores features", | |
| "Ensures reproducible results", | |
| "Tree underfits" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Setting random_state ensures deterministic tree construction." | |
| }, | |
| { | |
| "id": 69, | |
| "questionText": "Scenario: Tree applied with max_depth=10 and min_samples_leaf=2 on noisy dataset. Observation?", | |
| "options": [ | |
| "Tree underfits", | |
| "May overfit noise despite some leaf constraints", | |
| "Tree ignores features", | |
| "Training error zero" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Even with min_samples_leaf=2, deep trees can still overfit noisy data." | |
| }, | |
| { | |
| "id": 70, | |
| "questionText": "Scenario: Tree applied to dataset with continuous and categorical features. Observation?", | |
| "options": [ | |
| "Tree can handle both; splits thresholds for continuous and categories for categorical", | |
| "Tree fails", | |
| "Tree ignores categorical features", | |
| "Tree ignores continuous features" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Decision Trees split both types appropriately." | |
| }, | |
| { | |
| "id": 71, | |
| "questionText": "Scenario: Tree applied to dataset with correlated features. Observation?", | |
| "options": [ | |
| "Tree fails", | |
| "Tree underfits", | |
| "Tree uses all features equally", | |
| "Tree may select one correlated feature for split, ignoring others" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Greedy splits select the most informative feature and ignore redundant correlated features." | |
| }, | |
| { | |
| "id": 72, | |
| "questionText": "Scenario: Tree applied to small dataset. Observation: prediction varies with different train/test splits. Reason?", | |
| "options": [ | |
| "High variance due to sensitivity to small data changes", | |
| "Tree always underfits", | |
| "Training error zero", | |
| "Tree ignores features" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Small datasets cause unstable splits, leading to high variance." | |
| }, | |
| { | |
| "id": 73, | |
| "questionText": "Scenario: Tree applied with max_depth=6 on dataset with complex patterns. Observation?", | |
| "options": [ | |
| "Tree may underfit due to limited depth", | |
| "Training error zero", | |
| "Tree overfits", | |
| "Tree ignores features" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Limited depth restricts tree complexity and may underfit complex relationships." | |
| }, | |
| { | |
| "id": 74, | |
| "questionText": "Scenario: Tree applied to dataset with features scaled differently. Observation?", | |
| "options": [ | |
| "Tree overfits", | |
| "Tree underfits", | |
| "Training error zero", | |
| "Decision Tree is scale-invariant; scaling has no effect" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Decision Trees do not rely on feature magnitudes; scaling does not affect splits." | |
| }, | |
| { | |
| "id": 75, | |
| "questionText": "Scenario: Decision Tree applied with max_depth=8, min_samples_leaf=4. Observation?", | |
| "options": [ | |
| "Tree overfits", | |
| "Tree underfits", | |
| "Tree ignores features", | |
| "Tree balances depth and leaf size, reducing overfitting" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Combination of depth and leaf constraints helps tree generalize better." | |
| }, | |
| { | |
| "id": 76, | |
| "questionText": "Scenario: Tree applied to very small dataset with max_depth=None. Observation?", | |
| "options": [ | |
| "Tree underfits", | |
| "Training error high", | |
| "Tree ignores features", | |
| "Tree highly overfits, predictions unstable" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Unlimited depth on small data leads to capturing noise, causing overfitting and instability." | |
| }, | |
| { | |
| "id": 77, | |
| "questionText": "Scenario: Decision Tree applied with min_samples_split very high. Observation?", | |
| "options": [ | |
| "Tree underfits due to few splits", | |
| "Tree ignores features", | |
| "Tree overfits", | |
| "Training error zero" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "High min_samples_split prevents many splits, simplifying the tree and possibly underfitting." | |
| }, | |
| { | |
| "id": 78, | |
| "questionText": "Scenario: Tree applied with max_features=1 on dataset with 50 features. Observation?", | |
| "options": [ | |
| "Tree overfits", | |
| "Tree underfits completely", | |
| "Tree uses all features equally", | |
| "Tree uses only one feature per split, reduces overfitting but may underfit" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Limiting to one feature per split introduces randomness, reducing variance but may increase bias." | |
| }, | |
| { | |
| "id": 79, | |
| "questionText": "Scenario: Tree applied to dataset with extreme outliers in target. Observation?", | |
| "options": [ | |
| "Tree may create leaves specifically fitting outliers, overfitting", | |
| "Tree ignores outliers automatically", | |
| "Training error zero", | |
| "Tree underfits" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Decision Trees can focus on extreme values, creating splits that overfit outliers." | |
| }, | |
| { | |
| "id": 80, | |
| "questionText": "Scenario: Tree applied to dataset with skewed categorical features. Observation?", | |
| "options": [ | |
| "Tree may bias splits toward frequent categories", | |
| "Tree ignores categories", | |
| "Tree underfits", | |
| "Tree perfectly predicts" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Highly imbalanced categories influence the tree to favor majority categories in splits." | |
| }, | |
| { | |
| "id": 81, | |
| "questionText": "Scenario: Tree applied to dataset with missing values. Observation?", | |
| "options": [ | |
| "Tree overfits", | |
| "Tree requires preprocessing; cannot handle missing values directly", | |
| "Tree ignores missing values automatically", | |
| "Tree underfits" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Decision Trees need complete data or imputation before training." | |
| }, | |
| { | |
| "id": 82, | |
| "questionText": "Scenario: Tree applied with criterion='poisson'. Observation?", | |
| "options": [ | |
| "Tree ignores criterion", | |
| "Tree overfits", | |
| "Optimized for count data, splits minimize Poisson deviance", | |
| "Tree underfits" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Poisson criterion is used for regression tasks with count targets." | |
| }, | |
| { | |
| "id": 83, | |
| "questionText": "Scenario: Tree applied to dataset with both continuous and categorical features. Observation?", | |
| "options": [ | |
| "Tree handles both; continuous via thresholds, categorical via category splits", | |
| "Tree ignores continuous features", | |
| "Tree ignores categorical features", | |
| "Tree fails" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Decision Trees split both types appropriately." | |
| }, | |
| { | |
| "id": 84, | |
| "questionText": "Scenario: Tree applied to time-series dataset without feature engineering. Observation?", | |
| "options": [ | |
| "Tree overfits", | |
| "Tree underfits", | |
| "Training error zero", | |
| "Tree may not capture temporal dependencies" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Decision Trees require features like lag variables to capture temporal patterns." | |
| }, | |
| { | |
| "id": 85, | |
| "questionText": "Scenario: Tree applied with max_depth=3 on dataset with complex non-linear relationships. Observation?", | |
| "options": [ | |
| "Tree underfits due to shallow depth", | |
| "Training error zero", | |
| "Tree ignores features", | |
| "Tree overfits" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Shallow depth limits the ability to capture complex patterns, leading to underfitting." | |
| }, | |
| { | |
| "id": 86, | |
| "questionText": "Scenario: Tree applied with min_impurity_decrease=0.1. Observation?", | |
| "options": [ | |
| "Tree ignores features", | |
| "Training error zero", | |
| "Only splits reducing impurity ≥0.1 are allowed, regularizing tree", | |
| "Tree overfits" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Minimum impurity decrease prevents unnecessary splits, controlling complexity." | |
| }, | |
| { | |
| "id": 87, | |
| "questionText": "Scenario: Tree applied to dataset with skewed target values. Observation?", | |
| "options": [ | |
| "Tree underfits", | |
| "Tree ignores skew", | |
| "Tree perfectly predicts", | |
| "Tree may bias predictions toward dominant target values" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Skewed targets can bias splits toward majority values, reducing accuracy for minority ranges." | |
| }, | |
| { | |
| "id": 88, | |
| "questionText": "Scenario: Tree applied with very small min_samples_leaf. Observation?", | |
| "options": [ | |
| "Tree ignores features", | |
| "Tree may overfit by creating tiny leaves", | |
| "Training error zero", | |
| "Tree underfits" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Small leaves can cause overfitting to minor fluctuations or noise." | |
| }, | |
| { | |
| "id": 89, | |
| "questionText": "Scenario: Tree applied with max_leaf_nodes=5. Observation?", | |
| "options": [ | |
| "Tree underfits due to limited leaf complexity", | |
| "Tree ignores features", | |
| "Training error zero", | |
| "Tree overfits" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Restricting leaves limits the tree's ability to capture detailed patterns." | |
| }, | |
| { | |
| "id": 90, | |
| "questionText": "Scenario: Tree applied with max_features=None on high-dimensional data. Observation?", | |
| "options": [ | |
| "Tree ignores some features", | |
| "Tree considers all features per split, may overfit", | |
| "Training error zero", | |
| "Tree underfits" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Using all features increases variance, risking overfitting." | |
| }, | |
| { | |
| "id": 91, | |
| "questionText": "Scenario: Tree applied to dataset with correlated features. Observation?", | |
| "options": [ | |
| "Tree may select one correlated feature, ignoring others", | |
| "Tree underfits", | |
| "Tree uses all features equally", | |
| "Tree fails" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Greedy splits pick the most informative feature and ignore redundant ones." | |
| }, | |
| { | |
| "id": 92, | |
| "questionText": "Scenario: Tree applied to dataset with small sample size. Observation?", | |
| "options": [ | |
| "Training error zero", | |
| "Tree underfits", | |
| "Predictions are unstable across train/test splits due to high variance", | |
| "Tree ignores features" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Small datasets cause instability in splits, producing high variance predictions." | |
| }, | |
| { | |
| "id": 93, | |
| "questionText": "Scenario: Tree applied to dataset with extreme noise. Observation?", | |
| "options": [ | |
| "Tree ignores features", | |
| "Training error zero", | |
| "Tree underfits", | |
| "Tree may overfit noise without regularization" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Unrestricted trees capture noise, reducing generalization performance." | |
| }, | |
| { | |
| "id": 94, | |
| "questionText": "Scenario: Tree applied with max_depth=6, min_samples_leaf=5 on noisy dataset. Observation?", | |
| "options": [ | |
| "Tree balances complexity and leaf constraints, better generalization", | |
| "Tree underfits", | |
| "Tree ignores features", | |
| "Tree overfits" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Combining depth and leaf constraints regularizes the tree for improved generalization." | |
| }, | |
| { | |
| "id": 95, | |
| "questionText": "Scenario: Tree applied to dataset with continuous and categorical features. Observation?", | |
| "options": [ | |
| "Tree ignores categorical features", | |
| "Tree fails", | |
| "Tree ignores continuous features", | |
| "Tree can handle both; splits thresholds for continuous and categories for categorical" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Decision Trees split both feature types appropriately." | |
| }, | |
| { | |
| "id": 96, | |
| "questionText": "Scenario: Tree applied with random_state set. Observation?", | |
| "options": [ | |
| "Training error zero", | |
| "Results reproducible across runs", | |
| "Tree underfits", | |
| "Tree ignores features" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Setting random_state ensures deterministic tree construction." | |
| }, | |
| { | |
| "id": 97, | |
| "questionText": "Scenario: Tree applied to dataset with highly imbalanced categorical features. Observation?", | |
| "options": [ | |
| "Tree underfits", | |
| "Tree perfectly predicts", | |
| "Splits biased toward frequent categories, may reduce accuracy for rare categories", | |
| "Tree ignores categories" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Imbalanced categories influence split decisions, potentially causing bias." | |
| }, | |
| { | |
| "id": 98, | |
| "questionText": "Scenario: Tree applied to dataset with missing values. Observation?", | |
| "options": [ | |
| "Tree underfits", | |
| "Requires imputation or preprocessing", | |
| "Tree overfits", | |
| "Tree ignores missing values automatically" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Decision Trees cannot handle missing values directly; preprocessing is required." | |
| }, | |
| { | |
| "id": 99, | |
| "questionText": "Scenario: Tree applied to high-dimensional data with max_depth=None. Observation?", | |
| "options": [ | |
| "Tree may overfit due to unlimited depth and many features", | |
| "Tree ignores features", | |
| "Tree underfits", | |
| "Training error zero" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Unlimited depth with high-dimensional features leads to over-complex splits and overfitting." | |
| }, | |
| { | |
| "id": 100, | |
| "questionText": "Scenario: Decision Tree applied with max_depth=8, min_samples_leaf=4, max_features='sqrt'. Observation?", | |
| "options": [ | |
| "Tree underfits", | |
| "Tree overfits", | |
| "Tree ignores features", | |
| "Tree balances depth, leaf size, and feature selection for improved generalization" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Combining depth limit, leaf constraint, and feature subset selection regularizes the tree effectively." | |
| } | |
| ] | |
| } | |