Spaces:

deedrop1140
/

MachineLearningAlgorithms

Running

File size: 46,859 Bytes

0d00d62

{
  "title": "Decision Tree Regression Mastery: 100 MCQs",
  "description": "A comprehensive set of 100 multiple-choice questions designed to teach and test your understanding of Decision Tree Regression, covering basic concepts, splitting criteria, pruning, overfitting, hyperparameter tuning, and real-world scenarios.",
  "questions": [
    {
      "id": 1,
      "questionText": "What is the main goal of Decision Tree Regression?",
      "options": [
        "Reduce dimensionality",
        "Predict continuous target values using a tree structure",
        "Classify data into categories",
        "Cluster data points"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Decision Tree Regression predicts continuous values by splitting the data into subsets based on feature thresholds."
    },
    {
      "id": 2,
      "questionText": "Which criterion is commonly used to decide splits in Decision Tree Regression?",
      "options": [
        "Mean Squared Error (MSE)",
        "Gini Index",
        "Entropy",
        "Silhouette Score"
      ],
      "correctAnswerIndex": 0,
      "explanation": "MSE is commonly used to minimize the variance of the target variable in the child nodes."
    },
    {
      "id": 3,
      "questionText": "What is overfitting in Decision Tree Regression?",
      "options": [
        "When the tree is too shallow",
        "When predictions are always zero",
        "When data is not standardized",
        "When the tree captures noise in the training data"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Overfitting occurs when a tree becomes too complex, capturing noise instead of general patterns."
    },
    {
      "id": 4,
      "questionText": "Which hyperparameter helps control the maximum depth of a Decision Tree?",
      "options": [
        "max_depth",
        "criterion",
        "min_samples_split",
        "gamma"
      ],
      "correctAnswerIndex": 0,
      "explanation": "max_depth limits how deep the tree can grow, preventing overfitting."
    },
    {
      "id": 5,
      "questionText": "Scenario: A Decision Tree is trained with max_depth=None on a small dataset. Observation?",
      "options": [
        "Tree ignores features",
        "Tree likely overfits",
        "Tree underfits",
        "Predictions are zero"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Without depth limitation, the tree can grow too complex and overfit small datasets."
    },
    {
      "id": 6,
      "questionText": "What is min_samples_split in Decision Tree Regression?",
      "options": [
        "Minimum samples required to split an internal node",
        "Maximum number of features used",
        "Maximum depth of the tree",
        "Minimum samples required at a leaf node"
      ],
      "correctAnswerIndex": 0,
      "explanation": "min_samples_split controls when a node can be split, helping to regularize the tree."
    },
    {
      "id": 7,
      "questionText": "Scenario: Decision Tree with very high min_samples_split. Observation?",
      "options": [
        "Tree underfits, predictions may be too coarse",
        "Features ignored",
        "Tree overfits",
        "Predictions perfect"
      ],
      "correctAnswerIndex": 0,
      "explanation": "High min_samples_split prevents many splits, making the tree simpler and possibly underfitting."
    },
    {
      "id": 8,
      "questionText": "Which method can be used to reduce overfitting in Decision Tree Regression?",
      "options": [
        "Using all features without restriction",
        "Pruning",
        "Reducing training data",
        "Increasing max_depth without limit"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Pruning removes unnecessary splits to improve generalization and prevent overfitting."
    },
    {
      "id": 9,
      "questionText": "What is the role of a leaf node in Decision Tree Regression?",
      "options": [
        "Counts the number of features",
        "Measures feature importance",
        "Contains the predicted value",
        "Decides feature splits"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Leaf nodes hold the predicted output value for the observations in that node."
    },
    {
      "id": 10,
      "questionText": "Scenario: Tree is very deep and training error is near zero but test error is high. Observation?",
      "options": [
        "Tree ignores features",
        "Tree is overfitting",
        "Tree predictions are unbiased",
        "Tree is underfitting"
      ],
      "correctAnswerIndex": 1,
      "explanation": "A very deep tree may perfectly fit the training data but fail to generalize, causing overfitting."
    },
    {
      "id": 11,
      "questionText": "Which splitting strategy minimizes variance in Decision Tree Regression?",
      "options": [
        "Maximizing entropy",
        "Maximizing Gini index",
        "Choosing splits that minimize MSE in child nodes",
        "Random splits"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Splits are chosen to minimize the mean squared error of target values in child nodes."
    },
    {
      "id": 12,
      "questionText": "Scenario: Dataset has categorical features with many levels. Decision Tree Observation?",
      "options": [
        "Tree may create too many splits, overfitting possible",
        "Tree ignores categorical features",
        "Tree always underfits",
        "Tree cannot handle categorical data"
      ],
      "correctAnswerIndex": 0,
      "explanation": "High-cardinality categorical features can lead to many splits, increasing overfitting risk."
    },
    {
      "id": 13,
      "questionText": "What does min_samples_leaf control?",
      "options": [
        "Minimum number of samples required in a leaf node",
        "Kernel choice",
        "Maximum depth of the tree",
        "Minimum split threshold"
      ],
      "correctAnswerIndex": 0,
      "explanation": "min_samples_leaf ensures leaf nodes have a minimum number of samples, preventing overfitting to very small subsets."
    },
    {
      "id": 14,
      "questionText": "Scenario: Decision Tree with min_samples_leaf=10 on small dataset. Observation?",
      "options": [
        "Tree may underfit, predictions coarser",
        "Leaf nodes empty",
        "Tree ignores features",
        "Tree overfits"
      ],
      "correctAnswerIndex": 0,
      "explanation": "High min_samples_leaf prevents fine splits, which may underfit small datasets."
    },
    {
      "id": 15,
      "questionText": "Which metric is used to measure feature importance in Decision Tree Regression?",
      "options": [
        "Silhouette Score",
        "Entropy",
        "Reduction in MSE due to splits on the feature",
        "Gini Index"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Features contributing more to variance reduction are considered more important."
    },
    {
      "id": 16,
      "questionText": "Scenario: Feature importance shows some features are zero. Observation?",
      "options": [
        "They are most important",
        "Tree ignored all features",
        "Those features do not contribute to splits",
        "Training error is zero"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Features with zero importance do not improve splits in the tree."
    },
    {
      "id": 17,
      "questionText": "Which is a disadvantage of Decision Tree Regression?",
      "options": [
        "Always underfits",
        "Cannot handle categorical data",
        "Prone to overfitting if not regularized",
        "Cannot handle continuous data"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Decision trees can overfit training data without depth or sample restrictions."
    },
    {
      "id": 18,
      "questionText": "Scenario: Decision Tree applied to noisy dataset without restrictions. Observation?",
      "options": [
        "Tree ignores features",
        "Tree underfits",
        "Tree overfits noise",
        "Training error high"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Unrestricted trees fit all variations including noise, causing overfitting."
    },
    {
      "id": 19,
      "questionText": "Scenario: Decision Tree applied with max_features='sqrt'. Observation?",
      "options": [
        "All features ignored",
        "Training error zero",
        "Random subset of features considered for splits, reduces overfitting",
        "Tree depth unlimited"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Limiting features per split reduces variance and improves generalization."
    },
    {
      "id": 20,
      "questionText": "Scenario: Tree with very small max_leaf_nodes. Observation?",
      "options": [
        "Tree ignores features",
        "Tree overfits",
        "Tree underfits due to limited leaves",
        "Training error zero"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Restricting leaves reduces tree complexity, which may lead to underfitting."
    },
    {
      "id": 21,
      "questionText": "Scenario: Tree applied to dataset with continuous features. Observation: splits based on thresholds. Why?",
      "options": [
        "Tree uses Gini",
        "Decision Tree Regression splits continuous features using thresholds to minimize variance",
        "Tree uses entropy",
        "Tree ignores continuous features"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Continuous features are split at values that minimize MSE in child nodes."
    },
    {
      "id": 22,
      "questionText": "Scenario: Tree applied with min_impurity_decrease=0.01. Observation?",
      "options": [
        "Tree always overfits",
        "Tree ignores features",
        "Only splits that reduce impurity by 0.01 or more are made, helps prevent overfitting",
        "Training error zero"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Setting a minimum impurity decrease prevents unnecessary splits, regularizing the tree."
    },
    {
      "id": 23,
      "questionText": "Scenario: Tree applied to dataset with correlated features. Observation?",
      "options": [
        "Tree fails",
        "Tree may select one correlated feature for splits, ignoring others",
        "Tree overfits all features",
        "Tree underfits"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Decision trees choose splits greedily; correlated features may be ignored once one is used."
    },
    {
      "id": 24,
      "questionText": "Scenario: Decision Tree applied to dataset with outliers. Observation?",
      "options": [
        "Tree ignores outliers",
        "Tree may overfit outliers if not pruned",
        "Tree underfits",
        "Training error zero"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Extreme values can cause splits that focus too much on outliers, overfitting the model."
    },
    {
      "id": 25,
      "questionText": "Scenario: Decision Tree applied with random_state set. Observation?",
      "options": [
        "Tree overfits",
        "Tree underfits",
        "Randomly ignores features",
        "Ensures reproducible results"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Setting random_state makes the tree building process deterministic, allowing reproducibility."
    },
    {
      "id": 26,
      "questionText": "Scenario: Decision Tree applied to housing dataset with max_depth=3. Observation?",
      "options": [
        "Tree overfits",
        "Training error zero",
        "Tree may underfit due to shallow depth",
        "Tree ignores features"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Shallow trees may fail to capture complex patterns, leading to underfitting."
    },
    {
      "id": 27,
      "questionText": "Scenario: Tree trained on noisy stock prices with max_depth=None. Observation?",
      "options": [
        "Tree underfits",
        "Tree overfits noise, poor generalization",
        "Features ignored",
        "Predictions smooth"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Unlimited depth allows the tree to capture every fluctuation, including noise."
    },
    {
      "id": 28,
      "questionText": "Scenario: Tree applied to dataset with outliers. Observation: pruning applied. Effect?",
      "options": [
        "Tree ignores all data",
        "Tree underfits entirely",
        "Reduces overfitting to outliers, better generalization",
        "Training error zero"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Pruning removes unnecessary splits caused by outliers, improving generalization."
    },
    {
      "id": 29,
      "questionText": "Scenario: Decision Tree applied with min_samples_split=50 on a dataset of 500 samples. Observation?",
      "options": [
        "Tree overfits",
        "Tree is simpler, may underfit local patterns",
        "Tree ignores features",
        "Training error zero"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Requiring 50 samples to split reduces the number of splits and may miss finer patterns."
    },
    {
      "id": 30,
      "questionText": "Scenario: Tree applied with min_samples_leaf=20 on small dataset. Observation?",
      "options": [
        "Tree underfits, coarse predictions",
        "Training error zero",
        "Tree overfits",
        "Leaf nodes empty"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Minimum leaf size prevents small leaves, simplifying the tree and possibly underfitting."
    },
    {
      "id": 31,
      "questionText": "Scenario: Tree applied to dataset with 1000 features. Observation: max_features='sqrt'. Effect?",
      "options": [
        "Training error zero",
        "Tree ignores features",
        "Tree depth unlimited",
        "Random subset considered for splits, reduces overfitting"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Using only a subset of features per split helps control variance and prevents overfitting."
    },
    {
      "id": 32,
      "questionText": "Scenario: Tree applied with max_leaf_nodes=10. Observation?",
      "options": [
        "Tree underfits due to limited complexity",
        "Tree ignores features",
        "Tree overfits",
        "Training error zero"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Limiting the number of leaves reduces tree complexity, potentially causing underfitting."
    },
    {
      "id": 33,
      "questionText": "Scenario: Tree applied to dataset with highly correlated features. Observation?",
      "options": [
        "Tree uses all features equally",
        "Tree underfits",
        "Tree fails to train",
        "Tree may favor one correlated feature, ignoring others"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Greedy splitting selects one feature and ignores redundant correlated features."
    },
    {
      "id": 34,
      "questionText": "Scenario: Decision Tree applied with min_impurity_decrease=0.05. Observation?",
      "options": [
        "Tree ignores features",
        "Training error zero",
        "Tree always overfits",
        "Only splits reducing impurity ≥0.05 are made, regularizes tree"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Setting minimum impurity decrease prevents unnecessary splits, improving generalization."
    },
    {
      "id": 35,
      "questionText": "Scenario: Tree applied to dataset with categorical features of high cardinality. Observation?",
      "options": [
        "Tree may overfit due to many splits",
        "Tree underfits",
        "Tree fails to train",
        "Tree ignores categories"
      ],
      "correctAnswerIndex": 0,
      "explanation": "High-cardinality categorical features can lead to over-complex splits and overfitting."
    },
    {
      "id": 36,
      "questionText": "Scenario: Decision Tree applied to time-series dataset without feature engineering. Observation?",
      "options": [
        "Tree ignores features",
        "Training error zero",
        "Tree overfits",
        "Tree may not capture temporal patterns"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Decision trees cannot inherently capture sequential patterns without engineered features."
    },
    {
      "id": 37,
      "questionText": "Scenario: Tree applied to dataset with extreme outliers. Observation?",
      "options": [
        "Tree ignores outliers automatically",
        "Tree underfits",
        "Tree may overfit to extreme values if not regularized",
        "Training error zero"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Outliers can create splits that distort predictions, overfitting the model."
    },
    {
      "id": 38,
      "questionText": "Scenario: Decision Tree applied with random_state set. Observation?",
      "options": [
        "Results are reproducible",
        "Training error zero",
        "Tree underfits",
        "Tree ignores features"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Setting random_state ensures deterministic tree construction, making results reproducible."
    },
    {
      "id": 39,
      "questionText": "Scenario: Tree applied with max_depth=5 and high noise. Observation?",
      "options": [
        "Tree ignores features",
        "Tree perfectly fits data",
        "May underfit some patterns, partially overfit noise",
        "Training error zero"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Limited depth may underfit trends but still capture some noise, leading to mixed performance."
    },
    {
      "id": 40,
      "questionText": "Scenario: Decision Tree applied to dataset with missing values. Observation?",
      "options": [
        "Tree underfits",
        "Tree ignores missing values automatically",
        "Tree cannot handle missing values directly; preprocessing needed",
        "Tree overfits"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Decision trees require complete data or proper imputation before training."
    },
    {
      "id": 41,
      "questionText": "Scenario: Decision Tree applied to dataset with highly skewed target. Observation?",
      "options": [
        "Tree underfits",
        "Tree ignores skew",
        "Tree predictions may be biased toward dominant target values",
        "Tree perfectly predicts"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Skewed targets can cause trees to favor majority values, reducing accuracy on rare cases."
    },
    {
      "id": 42,
      "questionText": "Scenario: Tree applied to dataset with continuous and categorical features. Observation?",
      "options": [
        "Tree ignores continuous features",
        "Tree fails",
        "Tree ignores categorical features",
        "Tree can handle both, splits on thresholds for continuous and categories for categorical"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Decision trees can split both types, using appropriate thresholds or categories."
    },
    {
      "id": 43,
      "questionText": "Scenario: Decision Tree applied with criterion='mse'. Observation?",
      "options": [
        "Tree ignores criterion",
        "Splits minimize mean squared error in child nodes",
        "Splits maximize entropy",
        "Splits maximize Gini index"
      ],
      "correctAnswerIndex": 1,
      "explanation": "MSE is used to reduce variance and improve regression accuracy at splits."
    },
    {
      "id": 44,
      "questionText": "Scenario: Tree applied with very small min_samples_split. Observation?",
      "options": [
        "Tree underfits",
        "Tree may overfit by creating many small splits",
        "Training error zero",
        "Tree ignores features"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Small min_samples_split allows splits on tiny subsets, causing overfitting."
    },
    {
      "id": 45,
      "questionText": "Scenario: Tree applied with max_features=None on high-dimensional data. Observation?",
      "options": [
        "Tree ignores some features",
        "Tree underfits",
        "Tree considers all features at each split, may overfit",
        "Training error zero"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Using all features increases variance and can lead to overfitting."
    },
    {
      "id": 46,
      "questionText": "Scenario: Tree applied to dataset with many redundant features. Observation?",
      "options": [
        "Tree underfits",
        "Tree fails",
        "Tree uses all features equally",
        "Tree may ignore redundant features after selecting one correlated feature"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Greedy splitting selects the most informative feature and ignores others."
    },
    {
      "id": 47,
      "questionText": "Scenario: Decision Tree applied to small dataset. Observation: high variance in predictions across different train/test splits. Reason?",
      "options": [
        "Tree always underfits",
        "Training error zero",
        "Tree ignores features",
        "Trees are sensitive to small data changes, high variance"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Small datasets cause instability in tree splits, resulting in high variance."
    },
    {
      "id": 48,
      "questionText": "Scenario: Tree applied to a dataset with uniform target values. Observation?",
      "options": [
        "Tree overfits",
        "Tree will have shallow depth; predictions equal to uniform target",
        "Tree underfits",
        "Training error zero"
      ],
      "correctAnswerIndex": 1,
      "explanation": "If target values are uniform, splits do not reduce variance; tree remains shallow."
    },
    {
      "id": 49,
      "questionText": "Scenario: Tree applied to dataset with features scaled differently. Observation?",
      "options": [
        "Tree underfits",
        "Training error zero",
        "Decision Tree unaffected by feature scaling",
        "Tree overfits"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Decision Trees are scale-invariant; feature scaling does not affect splits."
    },
    {
      "id": 50,
      "questionText": "Scenario: Decision Tree applied with max_depth=10, min_samples_leaf=5. Observation?",
      "options": [
        "Tree balances depth and leaf size, reducing overfitting",
        "Tree ignores features",
        "Tree overfits",
        "Tree underfits"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Limiting depth and minimum leaf samples helps regularize the tree and improve generalization."
    },
    {
      "id": 51,
      "questionText": "Scenario: Decision Tree applied to financial dataset with max_depth=None and min_samples_split=2. Observation?",
      "options": [
        "Tree underfits",
        "Tree likely overfits due to unlimited depth and small splits",
        "Tree ignores features",
        "Predictions smooth"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Unlimited depth and tiny splits allow the tree to capture all noise, causing overfitting."
    },
    {
      "id": 52,
      "questionText": "Scenario: Tree applied to dataset with extreme outliers. Observation: min_samples_leaf=10 applied. Effect?",
      "options": [
        "Tree ignores features",
        "Tree overfits outliers",
        "Tree becomes more robust to outliers",
        "Training error zero"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Minimum leaf size prevents leaves from fitting extreme individual outliers, improving robustness."
    },
    {
      "id": 53,
      "questionText": "Scenario: Tree applied to housing dataset. Observation: max_features='sqrt' used. Benefit?",
      "options": [
        "Tree ignores most features",
        "Tree depth unlimited",
        "Reduces variance and prevents overfitting by using feature subsets at each split",
        "Tree underfits"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Random subsets per split regularize the tree, balancing bias and variance."
    },
    {
      "id": 54,
      "questionText": "Scenario: Decision Tree applied with criterion='friedman_mse'. Observation?",
      "options": [
        "Tree ignores MSE",
        "Tree underfits",
        "Tree fails",
        "Optimized for boosting algorithms, may improve split selection"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Friedman MSE is designed for boosting, accounting for residuals in regression tasks."
    },
    {
      "id": 55,
      "questionText": "Scenario: Tree applied to small dataset with many features. Observation: high variance in predictions. Reason?",
      "options": [
        "Tree ignores features",
        "Training error zero",
        "Greedy splits sensitive to small changes, causing high variance",
        "Tree underfits"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Small datasets are prone to unstable splits, resulting in varying predictions."
    },
    {
      "id": 56,
      "questionText": "Scenario: Tree applied with max_leaf_nodes=15. Observation?",
      "options": [
        "Tree overfits",
        "Limits complexity, helps prevent overfitting",
        "Tree ignores features",
        "Tree underfits completely"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Restricting leaves reduces tree complexity, acting as regularization."
    },
    {
      "id": 57,
      "questionText": "Scenario: Tree applied to dataset with categorical features of high cardinality. Observation?",
      "options": [
        "Tree ignores categorical features",
        "Tree may overfit due to numerous splits on categories",
        "Tree fails",
        "Tree underfits"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Many categories can produce too many splits, increasing risk of overfitting."
    },
    {
      "id": 58,
      "questionText": "Scenario: Decision Tree applied to time-series dataset without lag features. Observation?",
      "options": [
        "Tree underfits",
        "Tree may not capture temporal dependencies",
        "Training error zero",
        "Tree overfits"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Decision Trees cannot inherently model sequences; engineered features like lagged variables are needed."
    },
    {
      "id": 59,
      "questionText": "Scenario: Tree applied with min_impurity_decrease=0.02. Observation?",
      "options": [
        "Tree ignores features",
        "Tree always overfits",
        "Only splits that reduce impurity ≥0.02 are made, preventing overfitting",
        "Training error zero"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Minimum impurity decrease restricts splits, acting as a regularization technique."
    },
    {
      "id": 60,
      "questionText": "Scenario: Decision Tree applied with max_depth=8, min_samples_leaf=5 on noisy dataset. Observation?",
      "options": [
        "Tree overfits",
        "Tree balances depth and leaf size, reducing overfitting",
        "Tree ignores features",
        "Tree underfits completely"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Combining depth limit and minimum leaf size regularizes the tree for better generalization."
    },
    {
      "id": 61,
      "questionText": "Scenario: Tree applied to dataset with uniform target values. Observation?",
      "options": [
        "Tree remains shallow, predicts uniform target",
        "Training error zero",
        "Tree overfits",
        "Tree underfits"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Uniform targets do not create variance; tree does not split further."
    },
    {
      "id": 62,
      "questionText": "Scenario: Tree applied with max_depth=5 on dataset with strong non-linear patterns. Observation?",
      "options": [
        "Tree may underfit due to limited depth",
        "Tree ignores features",
        "Tree overfits",
        "Training error zero"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Shallow trees cannot capture complex patterns, causing underfitting."
    },
    {
      "id": 63,
      "questionText": "Scenario: Decision Tree applied to financial dataset with high outlier presence. Observation?",
      "options": [
        "Tree underfits",
        "Tree splits may overfit outliers without pruning or min_samples_leaf",
        "Tree ignores outliers automatically",
        "Training error zero"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Extreme values can cause splits that focus too much on outliers."
    },
    {
      "id": 64,
      "questionText": "Scenario: Tree applied to dataset with missing values. Observation?",
      "options": [
        "Tree ignores missing values automatically",
        "Tree overfits",
        "Tree requires imputation; cannot handle missing values directly",
        "Tree underfits"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Decision Trees need complete data or preprocessing to handle missing values."
    },
    {
      "id": 65,
      "questionText": "Scenario: Tree applied with max_features=None on high-dimensional dataset. Observation?",
      "options": [
        "Tree ignores some features",
        "Tree underfits",
        "Training error zero",
        "Tree considers all features per split, may overfit"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Using all features at each split increases variance and risk of overfitting."
    },
    {
      "id": 66,
      "questionText": "Scenario: Tree applied with very small min_samples_split. Observation?",
      "options": [
        "Tree may overfit due to tiny splits",
        "Tree ignores features",
        "Tree underfits",
        "Training error zero"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Small min_samples_split allows splitting on tiny subsets, increasing overfitting risk."
    },
    {
      "id": 67,
      "questionText": "Scenario: Tree applied to dataset with skewed target distribution. Observation?",
      "options": [
        "Tree underfits",
        "Tree ignores skew",
        "Tree may bias predictions toward dominant target values",
        "Tree perfectly predicts"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Skewed targets can bias splits toward majority values, reducing accuracy for minority ranges."
    },
    {
      "id": 68,
      "questionText": "Scenario: Tree applied with random_state set. Observation?",
      "options": [
        "Training error zero",
        "Tree ignores features",
        "Ensures reproducible results",
        "Tree underfits"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Setting random_state ensures deterministic tree construction."
    },
    {
      "id": 69,
      "questionText": "Scenario: Tree applied with max_depth=10 and min_samples_leaf=2 on noisy dataset. Observation?",
      "options": [
        "Tree underfits",
        "May overfit noise despite some leaf constraints",
        "Tree ignores features",
        "Training error zero"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Even with min_samples_leaf=2, deep trees can still overfit noisy data."
    },
    {
      "id": 70,
      "questionText": "Scenario: Tree applied to dataset with continuous and categorical features. Observation?",
      "options": [
        "Tree can handle both; splits thresholds for continuous and categories for categorical",
        "Tree fails",
        "Tree ignores categorical features",
        "Tree ignores continuous features"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Decision Trees split both types appropriately."
    },
    {
      "id": 71,
      "questionText": "Scenario: Tree applied to dataset with correlated features. Observation?",
      "options": [
        "Tree fails",
        "Tree underfits",
        "Tree uses all features equally",
        "Tree may select one correlated feature for split, ignoring others"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Greedy splits select the most informative feature and ignore redundant correlated features."
    },
    {
      "id": 72,
      "questionText": "Scenario: Tree applied to small dataset. Observation: prediction varies with different train/test splits. Reason?",
      "options": [
        "High variance due to sensitivity to small data changes",
        "Tree always underfits",
        "Training error zero",
        "Tree ignores features"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Small datasets cause unstable splits, leading to high variance."
    },
    {
      "id": 73,
      "questionText": "Scenario: Tree applied with max_depth=6 on dataset with complex patterns. Observation?",
      "options": [
        "Tree may underfit due to limited depth",
        "Training error zero",
        "Tree overfits",
        "Tree ignores features"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Limited depth restricts tree complexity and may underfit complex relationships."
    },
    {
      "id": 74,
      "questionText": "Scenario: Tree applied to dataset with features scaled differently. Observation?",
      "options": [
        "Tree overfits",
        "Tree underfits",
        "Training error zero",
        "Decision Tree is scale-invariant; scaling has no effect"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Decision Trees do not rely on feature magnitudes; scaling does not affect splits."
    },
    {
      "id": 75,
      "questionText": "Scenario: Decision Tree applied with max_depth=8, min_samples_leaf=4. Observation?",
      "options": [
        "Tree overfits",
        "Tree underfits",
        "Tree ignores features",
        "Tree balances depth and leaf size, reducing overfitting"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Combination of depth and leaf constraints helps tree generalize better."
    },
    {
      "id": 76,
      "questionText": "Scenario: Tree applied to very small dataset with max_depth=None. Observation?",
      "options": [
        "Tree underfits",
        "Training error high",
        "Tree ignores features",
        "Tree highly overfits, predictions unstable"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Unlimited depth on small data leads to capturing noise, causing overfitting and instability."
    },
    {
      "id": 77,
      "questionText": "Scenario: Decision Tree applied with min_samples_split very high. Observation?",
      "options": [
        "Tree underfits due to few splits",
        "Tree ignores features",
        "Tree overfits",
        "Training error zero"
      ],
      "correctAnswerIndex": 0,
      "explanation": "High min_samples_split prevents many splits, simplifying the tree and possibly underfitting."
    },
    {
      "id": 78,
      "questionText": "Scenario: Tree applied with max_features=1 on dataset with 50 features. Observation?",
      "options": [
        "Tree overfits",
        "Tree underfits completely",
        "Tree uses all features equally",
        "Tree uses only one feature per split, reduces overfitting but may underfit"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Limiting to one feature per split introduces randomness, reducing variance but may increase bias."
    },
    {
      "id": 79,
      "questionText": "Scenario: Tree applied to dataset with extreme outliers in target. Observation?",
      "options": [
        "Tree may create leaves specifically fitting outliers, overfitting",
        "Tree ignores outliers automatically",
        "Training error zero",
        "Tree underfits"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Decision Trees can focus on extreme values, creating splits that overfit outliers."
    },
    {
      "id": 80,
      "questionText": "Scenario: Tree applied to dataset with skewed categorical features. Observation?",
      "options": [
        "Tree may bias splits toward frequent categories",
        "Tree ignores categories",
        "Tree underfits",
        "Tree perfectly predicts"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Highly imbalanced categories influence the tree to favor majority categories in splits."
    },
    {
      "id": 81,
      "questionText": "Scenario: Tree applied to dataset with missing values. Observation?",
      "options": [
        "Tree overfits",
        "Tree requires preprocessing; cannot handle missing values directly",
        "Tree ignores missing values automatically",
        "Tree underfits"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Decision Trees need complete data or imputation before training."
    },
    {
      "id": 82,
      "questionText": "Scenario: Tree applied with criterion='poisson'. Observation?",
      "options": [
        "Tree ignores criterion",
        "Tree overfits",
        "Optimized for count data, splits minimize Poisson deviance",
        "Tree underfits"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Poisson criterion is used for regression tasks with count targets."
    },
    {
      "id": 83,
      "questionText": "Scenario: Tree applied to dataset with both continuous and categorical features. Observation?",
      "options": [
        "Tree handles both; continuous via thresholds, categorical via category splits",
        "Tree ignores continuous features",
        "Tree ignores categorical features",
        "Tree fails"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Decision Trees split both types appropriately."
    },
    {
      "id": 84,
      "questionText": "Scenario: Tree applied to time-series dataset without feature engineering. Observation?",
      "options": [
        "Tree overfits",
        "Tree underfits",
        "Training error zero",
        "Tree may not capture temporal dependencies"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Decision Trees require features like lag variables to capture temporal patterns."
    },
    {
      "id": 85,
      "questionText": "Scenario: Tree applied with max_depth=3 on dataset with complex non-linear relationships. Observation?",
      "options": [
        "Tree underfits due to shallow depth",
        "Training error zero",
        "Tree ignores features",
        "Tree overfits"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Shallow depth limits the ability to capture complex patterns, leading to underfitting."
    },
    {
      "id": 86,
      "questionText": "Scenario: Tree applied with min_impurity_decrease=0.1. Observation?",
      "options": [
        "Tree ignores features",
        "Training error zero",
        "Only splits reducing impurity ≥0.1 are allowed, regularizing tree",
        "Tree overfits"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Minimum impurity decrease prevents unnecessary splits, controlling complexity."
    },
    {
      "id": 87,
      "questionText": "Scenario: Tree applied to dataset with skewed target values. Observation?",
      "options": [
        "Tree underfits",
        "Tree ignores skew",
        "Tree perfectly predicts",
        "Tree may bias predictions toward dominant target values"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Skewed targets can bias splits toward majority values, reducing accuracy for minority ranges."
    },
    {
      "id": 88,
      "questionText": "Scenario: Tree applied with very small min_samples_leaf. Observation?",
      "options": [
        "Tree ignores features",
        "Tree may overfit by creating tiny leaves",
        "Training error zero",
        "Tree underfits"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Small leaves can cause overfitting to minor fluctuations or noise."
    },
    {
      "id": 89,
      "questionText": "Scenario: Tree applied with max_leaf_nodes=5. Observation?",
      "options": [
        "Tree underfits due to limited leaf complexity",
        "Tree ignores features",
        "Training error zero",
        "Tree overfits"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Restricting leaves limits the tree's ability to capture detailed patterns."
    },
    {
      "id": 90,
      "questionText": "Scenario: Tree applied with max_features=None on high-dimensional data. Observation?",
      "options": [
        "Tree ignores some features",
        "Tree considers all features per split, may overfit",
        "Training error zero",
        "Tree underfits"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Using all features increases variance, risking overfitting."
    },
    {
      "id": 91,
      "questionText": "Scenario: Tree applied to dataset with correlated features. Observation?",
      "options": [
        "Tree may select one correlated feature, ignoring others",
        "Tree underfits",
        "Tree uses all features equally",
        "Tree fails"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Greedy splits pick the most informative feature and ignore redundant ones."
    },
    {
      "id": 92,
      "questionText": "Scenario: Tree applied to dataset with small sample size. Observation?",
      "options": [
        "Training error zero",
        "Tree underfits",
        "Predictions are unstable across train/test splits due to high variance",
        "Tree ignores features"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Small datasets cause instability in splits, producing high variance predictions."
    },
    {
      "id": 93,
      "questionText": "Scenario: Tree applied to dataset with extreme noise. Observation?",
      "options": [
        "Tree ignores features",
        "Training error zero",
        "Tree underfits",
        "Tree may overfit noise without regularization"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Unrestricted trees capture noise, reducing generalization performance."
    },
    {
      "id": 94,
      "questionText": "Scenario: Tree applied with max_depth=6, min_samples_leaf=5 on noisy dataset. Observation?",
      "options": [
        "Tree balances complexity and leaf constraints, better generalization",
        "Tree underfits",
        "Tree ignores features",
        "Tree overfits"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Combining depth and leaf constraints regularizes the tree for improved generalization."
    },
    {
      "id": 95,
      "questionText": "Scenario: Tree applied to dataset with continuous and categorical features. Observation?",
      "options": [
        "Tree ignores categorical features",
        "Tree fails",
        "Tree ignores continuous features",
        "Tree can handle both; splits thresholds for continuous and categories for categorical"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Decision Trees split both feature types appropriately."
    },
    {
      "id": 96,
      "questionText": "Scenario: Tree applied with random_state set. Observation?",
      "options": [
        "Training error zero",
        "Results reproducible across runs",
        "Tree underfits",
        "Tree ignores features"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Setting random_state ensures deterministic tree construction."
    },
    {
      "id": 97,
      "questionText": "Scenario: Tree applied to dataset with highly imbalanced categorical features. Observation?",
      "options": [
        "Tree underfits",
        "Tree perfectly predicts",
        "Splits biased toward frequent categories, may reduce accuracy for rare categories",
        "Tree ignores categories"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Imbalanced categories influence split decisions, potentially causing bias."
    },
    {
      "id": 98,
      "questionText": "Scenario: Tree applied to dataset with missing values. Observation?",
      "options": [
        "Tree underfits",
        "Requires imputation or preprocessing",
        "Tree overfits",
        "Tree ignores missing values automatically"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Decision Trees cannot handle missing values directly; preprocessing is required."
    },
    {
      "id": 99,
      "questionText": "Scenario: Tree applied to high-dimensional data with max_depth=None. Observation?",
      "options": [
        "Tree may overfit due to unlimited depth and many features",
        "Tree ignores features",
        "Tree underfits",
        "Training error zero"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Unlimited depth with high-dimensional features leads to over-complex splits and overfitting."
    },
    {
      "id": 100,
      "questionText": "Scenario: Decision Tree applied with max_depth=8, min_samples_leaf=4, max_features='sqrt'. Observation?",
      "options": [
        "Tree underfits",
        "Tree overfits",
        "Tree ignores features",
        "Tree balances depth, leaf size, and feature selection for improved generalization"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Combining depth limit, leaf constraint, and feature subset selection regularizes the tree effectively."
    }
  ]
}