{ "title": "Decision Tree Regression Mastery: 100 MCQs", "description": "A comprehensive set of 100 multiple-choice questions designed to teach and test your understanding of Decision Tree Regression, covering basic concepts, splitting criteria, pruning, overfitting, hyperparameter tuning, and real-world scenarios.", "questions": [ { "id": 1, "questionText": "What is the main goal of Decision Tree Regression?", "options": [ "Reduce dimensionality", "Predict continuous target values using a tree structure", "Classify data into categories", "Cluster data points" ], "correctAnswerIndex": 1, "explanation": "Decision Tree Regression predicts continuous values by splitting the data into subsets based on feature thresholds." }, { "id": 2, "questionText": "Which criterion is commonly used to decide splits in Decision Tree Regression?", "options": [ "Mean Squared Error (MSE)", "Gini Index", "Entropy", "Silhouette Score" ], "correctAnswerIndex": 0, "explanation": "MSE is commonly used to minimize the variance of the target variable in the child nodes." }, { "id": 3, "questionText": "What is overfitting in Decision Tree Regression?", "options": [ "When the tree is too shallow", "When predictions are always zero", "When data is not standardized", "When the tree captures noise in the training data" ], "correctAnswerIndex": 3, "explanation": "Overfitting occurs when a tree becomes too complex, capturing noise instead of general patterns." }, { "id": 4, "questionText": "Which hyperparameter helps control the maximum depth of a Decision Tree?", "options": [ "max_depth", "criterion", "min_samples_split", "gamma" ], "correctAnswerIndex": 0, "explanation": "max_depth limits how deep the tree can grow, preventing overfitting." }, { "id": 5, "questionText": "Scenario: A Decision Tree is trained with max_depth=None on a small dataset. Observation?", "options": [ "Tree ignores features", "Tree likely overfits", "Tree underfits", "Predictions are zero" ], "correctAnswerIndex": 1, "explanation": "Without depth limitation, the tree can grow too complex and overfit small datasets." }, { "id": 6, "questionText": "What is min_samples_split in Decision Tree Regression?", "options": [ "Minimum samples required to split an internal node", "Maximum number of features used", "Maximum depth of the tree", "Minimum samples required at a leaf node" ], "correctAnswerIndex": 0, "explanation": "min_samples_split controls when a node can be split, helping to regularize the tree." }, { "id": 7, "questionText": "Scenario: Decision Tree with very high min_samples_split. Observation?", "options": [ "Tree underfits, predictions may be too coarse", "Features ignored", "Tree overfits", "Predictions perfect" ], "correctAnswerIndex": 0, "explanation": "High min_samples_split prevents many splits, making the tree simpler and possibly underfitting." }, { "id": 8, "questionText": "Which method can be used to reduce overfitting in Decision Tree Regression?", "options": [ "Using all features without restriction", "Pruning", "Reducing training data", "Increasing max_depth without limit" ], "correctAnswerIndex": 1, "explanation": "Pruning removes unnecessary splits to improve generalization and prevent overfitting." }, { "id": 9, "questionText": "What is the role of a leaf node in Decision Tree Regression?", "options": [ "Counts the number of features", "Measures feature importance", "Contains the predicted value", "Decides feature splits" ], "correctAnswerIndex": 2, "explanation": "Leaf nodes hold the predicted output value for the observations in that node." }, { "id": 10, "questionText": "Scenario: Tree is very deep and training error is near zero but test error is high. Observation?", "options": [ "Tree ignores features", "Tree is overfitting", "Tree predictions are unbiased", "Tree is underfitting" ], "correctAnswerIndex": 1, "explanation": "A very deep tree may perfectly fit the training data but fail to generalize, causing overfitting." }, { "id": 11, "questionText": "Which splitting strategy minimizes variance in Decision Tree Regression?", "options": [ "Maximizing entropy", "Maximizing Gini index", "Choosing splits that minimize MSE in child nodes", "Random splits" ], "correctAnswerIndex": 2, "explanation": "Splits are chosen to minimize the mean squared error of target values in child nodes." }, { "id": 12, "questionText": "Scenario: Dataset has categorical features with many levels. Decision Tree Observation?", "options": [ "Tree may create too many splits, overfitting possible", "Tree ignores categorical features", "Tree always underfits", "Tree cannot handle categorical data" ], "correctAnswerIndex": 0, "explanation": "High-cardinality categorical features can lead to many splits, increasing overfitting risk." }, { "id": 13, "questionText": "What does min_samples_leaf control?", "options": [ "Minimum number of samples required in a leaf node", "Kernel choice", "Maximum depth of the tree", "Minimum split threshold" ], "correctAnswerIndex": 0, "explanation": "min_samples_leaf ensures leaf nodes have a minimum number of samples, preventing overfitting to very small subsets." }, { "id": 14, "questionText": "Scenario: Decision Tree with min_samples_leaf=10 on small dataset. Observation?", "options": [ "Tree may underfit, predictions coarser", "Leaf nodes empty", "Tree ignores features", "Tree overfits" ], "correctAnswerIndex": 0, "explanation": "High min_samples_leaf prevents fine splits, which may underfit small datasets." }, { "id": 15, "questionText": "Which metric is used to measure feature importance in Decision Tree Regression?", "options": [ "Silhouette Score", "Entropy", "Reduction in MSE due to splits on the feature", "Gini Index" ], "correctAnswerIndex": 2, "explanation": "Features contributing more to variance reduction are considered more important." }, { "id": 16, "questionText": "Scenario: Feature importance shows some features are zero. Observation?", "options": [ "They are most important", "Tree ignored all features", "Those features do not contribute to splits", "Training error is zero" ], "correctAnswerIndex": 2, "explanation": "Features with zero importance do not improve splits in the tree." }, { "id": 17, "questionText": "Which is a disadvantage of Decision Tree Regression?", "options": [ "Always underfits", "Cannot handle categorical data", "Prone to overfitting if not regularized", "Cannot handle continuous data" ], "correctAnswerIndex": 2, "explanation": "Decision trees can overfit training data without depth or sample restrictions." }, { "id": 18, "questionText": "Scenario: Decision Tree applied to noisy dataset without restrictions. Observation?", "options": [ "Tree ignores features", "Tree underfits", "Tree overfits noise", "Training error high" ], "correctAnswerIndex": 2, "explanation": "Unrestricted trees fit all variations including noise, causing overfitting." }, { "id": 19, "questionText": "Scenario: Decision Tree applied with max_features='sqrt'. Observation?", "options": [ "All features ignored", "Training error zero", "Random subset of features considered for splits, reduces overfitting", "Tree depth unlimited" ], "correctAnswerIndex": 2, "explanation": "Limiting features per split reduces variance and improves generalization." }, { "id": 20, "questionText": "Scenario: Tree with very small max_leaf_nodes. Observation?", "options": [ "Tree ignores features", "Tree overfits", "Tree underfits due to limited leaves", "Training error zero" ], "correctAnswerIndex": 2, "explanation": "Restricting leaves reduces tree complexity, which may lead to underfitting." }, { "id": 21, "questionText": "Scenario: Tree applied to dataset with continuous features. Observation: splits based on thresholds. Why?", "options": [ "Tree uses Gini", "Decision Tree Regression splits continuous features using thresholds to minimize variance", "Tree uses entropy", "Tree ignores continuous features" ], "correctAnswerIndex": 1, "explanation": "Continuous features are split at values that minimize MSE in child nodes." }, { "id": 22, "questionText": "Scenario: Tree applied with min_impurity_decrease=0.01. Observation?", "options": [ "Tree always overfits", "Tree ignores features", "Only splits that reduce impurity by 0.01 or more are made, helps prevent overfitting", "Training error zero" ], "correctAnswerIndex": 2, "explanation": "Setting a minimum impurity decrease prevents unnecessary splits, regularizing the tree." }, { "id": 23, "questionText": "Scenario: Tree applied to dataset with correlated features. Observation?", "options": [ "Tree fails", "Tree may select one correlated feature for splits, ignoring others", "Tree overfits all features", "Tree underfits" ], "correctAnswerIndex": 1, "explanation": "Decision trees choose splits greedily; correlated features may be ignored once one is used." }, { "id": 24, "questionText": "Scenario: Decision Tree applied to dataset with outliers. Observation?", "options": [ "Tree ignores outliers", "Tree may overfit outliers if not pruned", "Tree underfits", "Training error zero" ], "correctAnswerIndex": 1, "explanation": "Extreme values can cause splits that focus too much on outliers, overfitting the model." }, { "id": 25, "questionText": "Scenario: Decision Tree applied with random_state set. Observation?", "options": [ "Tree overfits", "Tree underfits", "Randomly ignores features", "Ensures reproducible results" ], "correctAnswerIndex": 3, "explanation": "Setting random_state makes the tree building process deterministic, allowing reproducibility." }, { "id": 26, "questionText": "Scenario: Decision Tree applied to housing dataset with max_depth=3. Observation?", "options": [ "Tree overfits", "Training error zero", "Tree may underfit due to shallow depth", "Tree ignores features" ], "correctAnswerIndex": 2, "explanation": "Shallow trees may fail to capture complex patterns, leading to underfitting." }, { "id": 27, "questionText": "Scenario: Tree trained on noisy stock prices with max_depth=None. Observation?", "options": [ "Tree underfits", "Tree overfits noise, poor generalization", "Features ignored", "Predictions smooth" ], "correctAnswerIndex": 1, "explanation": "Unlimited depth allows the tree to capture every fluctuation, including noise." }, { "id": 28, "questionText": "Scenario: Tree applied to dataset with outliers. Observation: pruning applied. Effect?", "options": [ "Tree ignores all data", "Tree underfits entirely", "Reduces overfitting to outliers, better generalization", "Training error zero" ], "correctAnswerIndex": 2, "explanation": "Pruning removes unnecessary splits caused by outliers, improving generalization." }, { "id": 29, "questionText": "Scenario: Decision Tree applied with min_samples_split=50 on a dataset of 500 samples. Observation?", "options": [ "Tree overfits", "Tree is simpler, may underfit local patterns", "Tree ignores features", "Training error zero" ], "correctAnswerIndex": 1, "explanation": "Requiring 50 samples to split reduces the number of splits and may miss finer patterns." }, { "id": 30, "questionText": "Scenario: Tree applied with min_samples_leaf=20 on small dataset. Observation?", "options": [ "Tree underfits, coarse predictions", "Training error zero", "Tree overfits", "Leaf nodes empty" ], "correctAnswerIndex": 0, "explanation": "Minimum leaf size prevents small leaves, simplifying the tree and possibly underfitting." }, { "id": 31, "questionText": "Scenario: Tree applied to dataset with 1000 features. Observation: max_features='sqrt'. Effect?", "options": [ "Training error zero", "Tree ignores features", "Tree depth unlimited", "Random subset considered for splits, reduces overfitting" ], "correctAnswerIndex": 3, "explanation": "Using only a subset of features per split helps control variance and prevents overfitting." }, { "id": 32, "questionText": "Scenario: Tree applied with max_leaf_nodes=10. Observation?", "options": [ "Tree underfits due to limited complexity", "Tree ignores features", "Tree overfits", "Training error zero" ], "correctAnswerIndex": 0, "explanation": "Limiting the number of leaves reduces tree complexity, potentially causing underfitting." }, { "id": 33, "questionText": "Scenario: Tree applied to dataset with highly correlated features. Observation?", "options": [ "Tree uses all features equally", "Tree underfits", "Tree fails to train", "Tree may favor one correlated feature, ignoring others" ], "correctAnswerIndex": 3, "explanation": "Greedy splitting selects one feature and ignores redundant correlated features." }, { "id": 34, "questionText": "Scenario: Decision Tree applied with min_impurity_decrease=0.05. Observation?", "options": [ "Tree ignores features", "Training error zero", "Tree always overfits", "Only splits reducing impurity ≥0.05 are made, regularizes tree" ], "correctAnswerIndex": 3, "explanation": "Setting minimum impurity decrease prevents unnecessary splits, improving generalization." }, { "id": 35, "questionText": "Scenario: Tree applied to dataset with categorical features of high cardinality. Observation?", "options": [ "Tree may overfit due to many splits", "Tree underfits", "Tree fails to train", "Tree ignores categories" ], "correctAnswerIndex": 0, "explanation": "High-cardinality categorical features can lead to over-complex splits and overfitting." }, { "id": 36, "questionText": "Scenario: Decision Tree applied to time-series dataset without feature engineering. Observation?", "options": [ "Tree ignores features", "Training error zero", "Tree overfits", "Tree may not capture temporal patterns" ], "correctAnswerIndex": 3, "explanation": "Decision trees cannot inherently capture sequential patterns without engineered features." }, { "id": 37, "questionText": "Scenario: Tree applied to dataset with extreme outliers. Observation?", "options": [ "Tree ignores outliers automatically", "Tree underfits", "Tree may overfit to extreme values if not regularized", "Training error zero" ], "correctAnswerIndex": 2, "explanation": "Outliers can create splits that distort predictions, overfitting the model." }, { "id": 38, "questionText": "Scenario: Decision Tree applied with random_state set. Observation?", "options": [ "Results are reproducible", "Training error zero", "Tree underfits", "Tree ignores features" ], "correctAnswerIndex": 0, "explanation": "Setting random_state ensures deterministic tree construction, making results reproducible." }, { "id": 39, "questionText": "Scenario: Tree applied with max_depth=5 and high noise. Observation?", "options": [ "Tree ignores features", "Tree perfectly fits data", "May underfit some patterns, partially overfit noise", "Training error zero" ], "correctAnswerIndex": 2, "explanation": "Limited depth may underfit trends but still capture some noise, leading to mixed performance." }, { "id": 40, "questionText": "Scenario: Decision Tree applied to dataset with missing values. Observation?", "options": [ "Tree underfits", "Tree ignores missing values automatically", "Tree cannot handle missing values directly; preprocessing needed", "Tree overfits" ], "correctAnswerIndex": 2, "explanation": "Decision trees require complete data or proper imputation before training." }, { "id": 41, "questionText": "Scenario: Decision Tree applied to dataset with highly skewed target. Observation?", "options": [ "Tree underfits", "Tree ignores skew", "Tree predictions may be biased toward dominant target values", "Tree perfectly predicts" ], "correctAnswerIndex": 2, "explanation": "Skewed targets can cause trees to favor majority values, reducing accuracy on rare cases." }, { "id": 42, "questionText": "Scenario: Tree applied to dataset with continuous and categorical features. Observation?", "options": [ "Tree ignores continuous features", "Tree fails", "Tree ignores categorical features", "Tree can handle both, splits on thresholds for continuous and categories for categorical" ], "correctAnswerIndex": 3, "explanation": "Decision trees can split both types, using appropriate thresholds or categories." }, { "id": 43, "questionText": "Scenario: Decision Tree applied with criterion='mse'. Observation?", "options": [ "Tree ignores criterion", "Splits minimize mean squared error in child nodes", "Splits maximize entropy", "Splits maximize Gini index" ], "correctAnswerIndex": 1, "explanation": "MSE is used to reduce variance and improve regression accuracy at splits." }, { "id": 44, "questionText": "Scenario: Tree applied with very small min_samples_split. Observation?", "options": [ "Tree underfits", "Tree may overfit by creating many small splits", "Training error zero", "Tree ignores features" ], "correctAnswerIndex": 1, "explanation": "Small min_samples_split allows splits on tiny subsets, causing overfitting." }, { "id": 45, "questionText": "Scenario: Tree applied with max_features=None on high-dimensional data. Observation?", "options": [ "Tree ignores some features", "Tree underfits", "Tree considers all features at each split, may overfit", "Training error zero" ], "correctAnswerIndex": 2, "explanation": "Using all features increases variance and can lead to overfitting." }, { "id": 46, "questionText": "Scenario: Tree applied to dataset with many redundant features. Observation?", "options": [ "Tree underfits", "Tree fails", "Tree uses all features equally", "Tree may ignore redundant features after selecting one correlated feature" ], "correctAnswerIndex": 3, "explanation": "Greedy splitting selects the most informative feature and ignores others." }, { "id": 47, "questionText": "Scenario: Decision Tree applied to small dataset. Observation: high variance in predictions across different train/test splits. Reason?", "options": [ "Tree always underfits", "Training error zero", "Tree ignores features", "Trees are sensitive to small data changes, high variance" ], "correctAnswerIndex": 3, "explanation": "Small datasets cause instability in tree splits, resulting in high variance." }, { "id": 48, "questionText": "Scenario: Tree applied to a dataset with uniform target values. Observation?", "options": [ "Tree overfits", "Tree will have shallow depth; predictions equal to uniform target", "Tree underfits", "Training error zero" ], "correctAnswerIndex": 1, "explanation": "If target values are uniform, splits do not reduce variance; tree remains shallow." }, { "id": 49, "questionText": "Scenario: Tree applied to dataset with features scaled differently. Observation?", "options": [ "Tree underfits", "Training error zero", "Decision Tree unaffected by feature scaling", "Tree overfits" ], "correctAnswerIndex": 2, "explanation": "Decision Trees are scale-invariant; feature scaling does not affect splits." }, { "id": 50, "questionText": "Scenario: Decision Tree applied with max_depth=10, min_samples_leaf=5. Observation?", "options": [ "Tree balances depth and leaf size, reducing overfitting", "Tree ignores features", "Tree overfits", "Tree underfits" ], "correctAnswerIndex": 0, "explanation": "Limiting depth and minimum leaf samples helps regularize the tree and improve generalization." }, { "id": 51, "questionText": "Scenario: Decision Tree applied to financial dataset with max_depth=None and min_samples_split=2. Observation?", "options": [ "Tree underfits", "Tree likely overfits due to unlimited depth and small splits", "Tree ignores features", "Predictions smooth" ], "correctAnswerIndex": 1, "explanation": "Unlimited depth and tiny splits allow the tree to capture all noise, causing overfitting." }, { "id": 52, "questionText": "Scenario: Tree applied to dataset with extreme outliers. Observation: min_samples_leaf=10 applied. Effect?", "options": [ "Tree ignores features", "Tree overfits outliers", "Tree becomes more robust to outliers", "Training error zero" ], "correctAnswerIndex": 2, "explanation": "Minimum leaf size prevents leaves from fitting extreme individual outliers, improving robustness." }, { "id": 53, "questionText": "Scenario: Tree applied to housing dataset. Observation: max_features='sqrt' used. Benefit?", "options": [ "Tree ignores most features", "Tree depth unlimited", "Reduces variance and prevents overfitting by using feature subsets at each split", "Tree underfits" ], "correctAnswerIndex": 2, "explanation": "Random subsets per split regularize the tree, balancing bias and variance." }, { "id": 54, "questionText": "Scenario: Decision Tree applied with criterion='friedman_mse'. Observation?", "options": [ "Tree ignores MSE", "Tree underfits", "Tree fails", "Optimized for boosting algorithms, may improve split selection" ], "correctAnswerIndex": 3, "explanation": "Friedman MSE is designed for boosting, accounting for residuals in regression tasks." }, { "id": 55, "questionText": "Scenario: Tree applied to small dataset with many features. Observation: high variance in predictions. Reason?", "options": [ "Tree ignores features", "Training error zero", "Greedy splits sensitive to small changes, causing high variance", "Tree underfits" ], "correctAnswerIndex": 2, "explanation": "Small datasets are prone to unstable splits, resulting in varying predictions." }, { "id": 56, "questionText": "Scenario: Tree applied with max_leaf_nodes=15. Observation?", "options": [ "Tree overfits", "Limits complexity, helps prevent overfitting", "Tree ignores features", "Tree underfits completely" ], "correctAnswerIndex": 1, "explanation": "Restricting leaves reduces tree complexity, acting as regularization." }, { "id": 57, "questionText": "Scenario: Tree applied to dataset with categorical features of high cardinality. Observation?", "options": [ "Tree ignores categorical features", "Tree may overfit due to numerous splits on categories", "Tree fails", "Tree underfits" ], "correctAnswerIndex": 1, "explanation": "Many categories can produce too many splits, increasing risk of overfitting." }, { "id": 58, "questionText": "Scenario: Decision Tree applied to time-series dataset without lag features. Observation?", "options": [ "Tree underfits", "Tree may not capture temporal dependencies", "Training error zero", "Tree overfits" ], "correctAnswerIndex": 1, "explanation": "Decision Trees cannot inherently model sequences; engineered features like lagged variables are needed." }, { "id": 59, "questionText": "Scenario: Tree applied with min_impurity_decrease=0.02. Observation?", "options": [ "Tree ignores features", "Tree always overfits", "Only splits that reduce impurity ≥0.02 are made, preventing overfitting", "Training error zero" ], "correctAnswerIndex": 2, "explanation": "Minimum impurity decrease restricts splits, acting as a regularization technique." }, { "id": 60, "questionText": "Scenario: Decision Tree applied with max_depth=8, min_samples_leaf=5 on noisy dataset. Observation?", "options": [ "Tree overfits", "Tree balances depth and leaf size, reducing overfitting", "Tree ignores features", "Tree underfits completely" ], "correctAnswerIndex": 1, "explanation": "Combining depth limit and minimum leaf size regularizes the tree for better generalization." }, { "id": 61, "questionText": "Scenario: Tree applied to dataset with uniform target values. Observation?", "options": [ "Tree remains shallow, predicts uniform target", "Training error zero", "Tree overfits", "Tree underfits" ], "correctAnswerIndex": 0, "explanation": "Uniform targets do not create variance; tree does not split further." }, { "id": 62, "questionText": "Scenario: Tree applied with max_depth=5 on dataset with strong non-linear patterns. Observation?", "options": [ "Tree may underfit due to limited depth", "Tree ignores features", "Tree overfits", "Training error zero" ], "correctAnswerIndex": 0, "explanation": "Shallow trees cannot capture complex patterns, causing underfitting." }, { "id": 63, "questionText": "Scenario: Decision Tree applied to financial dataset with high outlier presence. Observation?", "options": [ "Tree underfits", "Tree splits may overfit outliers without pruning or min_samples_leaf", "Tree ignores outliers automatically", "Training error zero" ], "correctAnswerIndex": 1, "explanation": "Extreme values can cause splits that focus too much on outliers." }, { "id": 64, "questionText": "Scenario: Tree applied to dataset with missing values. Observation?", "options": [ "Tree ignores missing values automatically", "Tree overfits", "Tree requires imputation; cannot handle missing values directly", "Tree underfits" ], "correctAnswerIndex": 2, "explanation": "Decision Trees need complete data or preprocessing to handle missing values." }, { "id": 65, "questionText": "Scenario: Tree applied with max_features=None on high-dimensional dataset. Observation?", "options": [ "Tree ignores some features", "Tree underfits", "Training error zero", "Tree considers all features per split, may overfit" ], "correctAnswerIndex": 3, "explanation": "Using all features at each split increases variance and risk of overfitting." }, { "id": 66, "questionText": "Scenario: Tree applied with very small min_samples_split. Observation?", "options": [ "Tree may overfit due to tiny splits", "Tree ignores features", "Tree underfits", "Training error zero" ], "correctAnswerIndex": 0, "explanation": "Small min_samples_split allows splitting on tiny subsets, increasing overfitting risk." }, { "id": 67, "questionText": "Scenario: Tree applied to dataset with skewed target distribution. Observation?", "options": [ "Tree underfits", "Tree ignores skew", "Tree may bias predictions toward dominant target values", "Tree perfectly predicts" ], "correctAnswerIndex": 2, "explanation": "Skewed targets can bias splits toward majority values, reducing accuracy for minority ranges." }, { "id": 68, "questionText": "Scenario: Tree applied with random_state set. Observation?", "options": [ "Training error zero", "Tree ignores features", "Ensures reproducible results", "Tree underfits" ], "correctAnswerIndex": 2, "explanation": "Setting random_state ensures deterministic tree construction." }, { "id": 69, "questionText": "Scenario: Tree applied with max_depth=10 and min_samples_leaf=2 on noisy dataset. Observation?", "options": [ "Tree underfits", "May overfit noise despite some leaf constraints", "Tree ignores features", "Training error zero" ], "correctAnswerIndex": 1, "explanation": "Even with min_samples_leaf=2, deep trees can still overfit noisy data." }, { "id": 70, "questionText": "Scenario: Tree applied to dataset with continuous and categorical features. Observation?", "options": [ "Tree can handle both; splits thresholds for continuous and categories for categorical", "Tree fails", "Tree ignores categorical features", "Tree ignores continuous features" ], "correctAnswerIndex": 0, "explanation": "Decision Trees split both types appropriately." }, { "id": 71, "questionText": "Scenario: Tree applied to dataset with correlated features. Observation?", "options": [ "Tree fails", "Tree underfits", "Tree uses all features equally", "Tree may select one correlated feature for split, ignoring others" ], "correctAnswerIndex": 3, "explanation": "Greedy splits select the most informative feature and ignore redundant correlated features." }, { "id": 72, "questionText": "Scenario: Tree applied to small dataset. Observation: prediction varies with different train/test splits. Reason?", "options": [ "High variance due to sensitivity to small data changes", "Tree always underfits", "Training error zero", "Tree ignores features" ], "correctAnswerIndex": 0, "explanation": "Small datasets cause unstable splits, leading to high variance." }, { "id": 73, "questionText": "Scenario: Tree applied with max_depth=6 on dataset with complex patterns. Observation?", "options": [ "Tree may underfit due to limited depth", "Training error zero", "Tree overfits", "Tree ignores features" ], "correctAnswerIndex": 0, "explanation": "Limited depth restricts tree complexity and may underfit complex relationships." }, { "id": 74, "questionText": "Scenario: Tree applied to dataset with features scaled differently. Observation?", "options": [ "Tree overfits", "Tree underfits", "Training error zero", "Decision Tree is scale-invariant; scaling has no effect" ], "correctAnswerIndex": 3, "explanation": "Decision Trees do not rely on feature magnitudes; scaling does not affect splits." }, { "id": 75, "questionText": "Scenario: Decision Tree applied with max_depth=8, min_samples_leaf=4. Observation?", "options": [ "Tree overfits", "Tree underfits", "Tree ignores features", "Tree balances depth and leaf size, reducing overfitting" ], "correctAnswerIndex": 3, "explanation": "Combination of depth and leaf constraints helps tree generalize better." }, { "id": 76, "questionText": "Scenario: Tree applied to very small dataset with max_depth=None. Observation?", "options": [ "Tree underfits", "Training error high", "Tree ignores features", "Tree highly overfits, predictions unstable" ], "correctAnswerIndex": 3, "explanation": "Unlimited depth on small data leads to capturing noise, causing overfitting and instability." }, { "id": 77, "questionText": "Scenario: Decision Tree applied with min_samples_split very high. Observation?", "options": [ "Tree underfits due to few splits", "Tree ignores features", "Tree overfits", "Training error zero" ], "correctAnswerIndex": 0, "explanation": "High min_samples_split prevents many splits, simplifying the tree and possibly underfitting." }, { "id": 78, "questionText": "Scenario: Tree applied with max_features=1 on dataset with 50 features. Observation?", "options": [ "Tree overfits", "Tree underfits completely", "Tree uses all features equally", "Tree uses only one feature per split, reduces overfitting but may underfit" ], "correctAnswerIndex": 3, "explanation": "Limiting to one feature per split introduces randomness, reducing variance but may increase bias." }, { "id": 79, "questionText": "Scenario: Tree applied to dataset with extreme outliers in target. Observation?", "options": [ "Tree may create leaves specifically fitting outliers, overfitting", "Tree ignores outliers automatically", "Training error zero", "Tree underfits" ], "correctAnswerIndex": 0, "explanation": "Decision Trees can focus on extreme values, creating splits that overfit outliers." }, { "id": 80, "questionText": "Scenario: Tree applied to dataset with skewed categorical features. Observation?", "options": [ "Tree may bias splits toward frequent categories", "Tree ignores categories", "Tree underfits", "Tree perfectly predicts" ], "correctAnswerIndex": 0, "explanation": "Highly imbalanced categories influence the tree to favor majority categories in splits." }, { "id": 81, "questionText": "Scenario: Tree applied to dataset with missing values. Observation?", "options": [ "Tree overfits", "Tree requires preprocessing; cannot handle missing values directly", "Tree ignores missing values automatically", "Tree underfits" ], "correctAnswerIndex": 1, "explanation": "Decision Trees need complete data or imputation before training." }, { "id": 82, "questionText": "Scenario: Tree applied with criterion='poisson'. Observation?", "options": [ "Tree ignores criterion", "Tree overfits", "Optimized for count data, splits minimize Poisson deviance", "Tree underfits" ], "correctAnswerIndex": 2, "explanation": "Poisson criterion is used for regression tasks with count targets." }, { "id": 83, "questionText": "Scenario: Tree applied to dataset with both continuous and categorical features. Observation?", "options": [ "Tree handles both; continuous via thresholds, categorical via category splits", "Tree ignores continuous features", "Tree ignores categorical features", "Tree fails" ], "correctAnswerIndex": 0, "explanation": "Decision Trees split both types appropriately." }, { "id": 84, "questionText": "Scenario: Tree applied to time-series dataset without feature engineering. Observation?", "options": [ "Tree overfits", "Tree underfits", "Training error zero", "Tree may not capture temporal dependencies" ], "correctAnswerIndex": 3, "explanation": "Decision Trees require features like lag variables to capture temporal patterns." }, { "id": 85, "questionText": "Scenario: Tree applied with max_depth=3 on dataset with complex non-linear relationships. Observation?", "options": [ "Tree underfits due to shallow depth", "Training error zero", "Tree ignores features", "Tree overfits" ], "correctAnswerIndex": 0, "explanation": "Shallow depth limits the ability to capture complex patterns, leading to underfitting." }, { "id": 86, "questionText": "Scenario: Tree applied with min_impurity_decrease=0.1. Observation?", "options": [ "Tree ignores features", "Training error zero", "Only splits reducing impurity ≥0.1 are allowed, regularizing tree", "Tree overfits" ], "correctAnswerIndex": 2, "explanation": "Minimum impurity decrease prevents unnecessary splits, controlling complexity." }, { "id": 87, "questionText": "Scenario: Tree applied to dataset with skewed target values. Observation?", "options": [ "Tree underfits", "Tree ignores skew", "Tree perfectly predicts", "Tree may bias predictions toward dominant target values" ], "correctAnswerIndex": 3, "explanation": "Skewed targets can bias splits toward majority values, reducing accuracy for minority ranges." }, { "id": 88, "questionText": "Scenario: Tree applied with very small min_samples_leaf. Observation?", "options": [ "Tree ignores features", "Tree may overfit by creating tiny leaves", "Training error zero", "Tree underfits" ], "correctAnswerIndex": 1, "explanation": "Small leaves can cause overfitting to minor fluctuations or noise." }, { "id": 89, "questionText": "Scenario: Tree applied with max_leaf_nodes=5. Observation?", "options": [ "Tree underfits due to limited leaf complexity", "Tree ignores features", "Training error zero", "Tree overfits" ], "correctAnswerIndex": 0, "explanation": "Restricting leaves limits the tree's ability to capture detailed patterns." }, { "id": 90, "questionText": "Scenario: Tree applied with max_features=None on high-dimensional data. Observation?", "options": [ "Tree ignores some features", "Tree considers all features per split, may overfit", "Training error zero", "Tree underfits" ], "correctAnswerIndex": 1, "explanation": "Using all features increases variance, risking overfitting." }, { "id": 91, "questionText": "Scenario: Tree applied to dataset with correlated features. Observation?", "options": [ "Tree may select one correlated feature, ignoring others", "Tree underfits", "Tree uses all features equally", "Tree fails" ], "correctAnswerIndex": 0, "explanation": "Greedy splits pick the most informative feature and ignore redundant ones." }, { "id": 92, "questionText": "Scenario: Tree applied to dataset with small sample size. Observation?", "options": [ "Training error zero", "Tree underfits", "Predictions are unstable across train/test splits due to high variance", "Tree ignores features" ], "correctAnswerIndex": 2, "explanation": "Small datasets cause instability in splits, producing high variance predictions." }, { "id": 93, "questionText": "Scenario: Tree applied to dataset with extreme noise. Observation?", "options": [ "Tree ignores features", "Training error zero", "Tree underfits", "Tree may overfit noise without regularization" ], "correctAnswerIndex": 3, "explanation": "Unrestricted trees capture noise, reducing generalization performance." }, { "id": 94, "questionText": "Scenario: Tree applied with max_depth=6, min_samples_leaf=5 on noisy dataset. Observation?", "options": [ "Tree balances complexity and leaf constraints, better generalization", "Tree underfits", "Tree ignores features", "Tree overfits" ], "correctAnswerIndex": 0, "explanation": "Combining depth and leaf constraints regularizes the tree for improved generalization." }, { "id": 95, "questionText": "Scenario: Tree applied to dataset with continuous and categorical features. Observation?", "options": [ "Tree ignores categorical features", "Tree fails", "Tree ignores continuous features", "Tree can handle both; splits thresholds for continuous and categories for categorical" ], "correctAnswerIndex": 3, "explanation": "Decision Trees split both feature types appropriately." }, { "id": 96, "questionText": "Scenario: Tree applied with random_state set. Observation?", "options": [ "Training error zero", "Results reproducible across runs", "Tree underfits", "Tree ignores features" ], "correctAnswerIndex": 1, "explanation": "Setting random_state ensures deterministic tree construction." }, { "id": 97, "questionText": "Scenario: Tree applied to dataset with highly imbalanced categorical features. Observation?", "options": [ "Tree underfits", "Tree perfectly predicts", "Splits biased toward frequent categories, may reduce accuracy for rare categories", "Tree ignores categories" ], "correctAnswerIndex": 2, "explanation": "Imbalanced categories influence split decisions, potentially causing bias." }, { "id": 98, "questionText": "Scenario: Tree applied to dataset with missing values. Observation?", "options": [ "Tree underfits", "Requires imputation or preprocessing", "Tree overfits", "Tree ignores missing values automatically" ], "correctAnswerIndex": 1, "explanation": "Decision Trees cannot handle missing values directly; preprocessing is required." }, { "id": 99, "questionText": "Scenario: Tree applied to high-dimensional data with max_depth=None. Observation?", "options": [ "Tree may overfit due to unlimited depth and many features", "Tree ignores features", "Tree underfits", "Training error zero" ], "correctAnswerIndex": 0, "explanation": "Unlimited depth with high-dimensional features leads to over-complex splits and overfitting." }, { "id": 100, "questionText": "Scenario: Decision Tree applied with max_depth=8, min_samples_leaf=4, max_features='sqrt'. Observation?", "options": [ "Tree underfits", "Tree overfits", "Tree ignores features", "Tree balances depth, leaf size, and feature selection for improved generalization" ], "correctAnswerIndex": 3, "explanation": "Combining depth limit, leaf constraint, and feature subset selection regularizes the tree effectively." } ] }