Spaces:

deedrop1140
/

MachineLearningAlgorithms

Running

File size: 42,906 Bytes

0d00d62

{
  "title": "Gaussian Mixture Models (GMM) Mastery: 100 MCQs",
  "description": "A comprehensive set of 100 MCQs covering Gaussian Mixture Models (GMM) from fundamental intuition to EM algorithm, applications, soft clustering, covariance types, and real-world scenarios.",
  "questions": [
    {
      "id": 1,
      "questionText": "What is the primary goal of a Gaussian Mixture Model (GMM)?",
      "options": [
        "To reduce data dimensionality",
        "To perform supervised classification",
        "To model data as a mixture of multiple Gaussian distributions",
        "To perform hard clustering like K-Means"
      ],
      "correctAnswerIndex": 2,
      "explanation": "GMM assumes data is generated from a mixture of multiple Gaussian distributions."
    },
    {
      "id": 2,
      "questionText": "GMM is mainly used for:",
      "options": [
        "Unsupervised clustering",
        "Time series forecasting",
        "Supervised learning",
        "Regression problems"
      ],
      "correctAnswerIndex": 0,
      "explanation": "GMM is an unsupervised clustering technique."
    },
    {
      "id": 3,
      "questionText": "GMM provides which type of clustering?",
      "options": [
        "Soft probabilistic clustering",
        "Binary classification",
        "Hard clustering",
        "Feature selection"
      ],
      "correctAnswerIndex": 0,
      "explanation": "GMM assigns probability of belonging to each cluster."
    },
    {
      "id": 4,
      "questionText": "Which algorithm is commonly used to train GMM?",
      "options": [
        "Backpropagation",
        "Expectation-Maximization (EM)",
        "Gradient Descent",
        "Genetic Algorithm"
      ],
      "correctAnswerIndex": 1,
      "explanation": "EM algorithm is used to estimate GMM parameters."
    },
    {
      "id": 5,
      "questionText": "In GMM, each Gaussian distribution is called a:",
      "options": [
        "Component",
        "Kernel",
        "Label",
        "Loss function"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Each Gaussian is a component of the mixture model."
    },
    {
      "id": 6,
      "questionText": "The output of GMM for each data point is:",
      "options": [
        "Single cluster label only",
        "Probability distribution over all clusters",
        "Binary classification output",
        "Feature importance scores"
      ],
      "correctAnswerIndex": 1,
      "explanation": "GMM gives probability of belonging to each Gaussian cluster."
    },
    {
      "id": 7,
      "questionText": "GMM assumes that data in each cluster follows a:",
      "options": [
        "Uniform distribution",
        "Poisson distribution",
        "Exponential distribution",
        "Gaussian distribution"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Each cluster is modeled as a Normal (Gaussian) distribution."
    },
    {
      "id": 8,
      "questionText": "The number of Gaussian components in GMM must be:",
      "options": [
        "Always equal to number of features",
        "Unlimited by default",
        "Automatically detected always",
        "Predefined manually in most implementations"
      ],
      "correctAnswerIndex": 3,
      "explanation": "We usually define number of components (k) before training."
    },
    {
      "id": 9,
      "questionText": "GMM is a generalization of which algorithm?",
      "options": [
        "Random Forest",
        "K-Means",
        "Naive Bayes",
        "SVM"
      ],
      "correctAnswerIndex": 1,
      "explanation": "GMM is probabilistic extension of K-Means with soft assignments."
    },
    {
      "id": 10,
      "questionText": "Which of the following does GMM estimate?",
      "options": [
        "Mean only",
        "Variance only",
        "Only class probabilities",
        "Mean and covariance of each Gaussian"
      ],
      "correctAnswerIndex": 3,
      "explanation": "GMM learns mean, covariance, and weight of each Gaussian."
    },
    {
      "id": 11,
      "questionText": "What does 'mixture' mean in GMM?",
      "options": [
        "Multiple datasets combined",
        "Adding noise to data",
        "Combination of several Gaussian probability distributions",
        "Blending of supervised and unsupervised learning"
      ],
      "correctAnswerIndex": 2,
      "explanation": "GMM models data as sum of multiple Gaussian distributions."
    },
    {
      "id": 12,
      "questionText": "GMM can model clusters with:",
      "options": [
        "Only spherical shapes",
        "Elliptical and varying density clusters",
        "Only equal-sized circles",
        "Linear boundaries only"
      ],
      "correctAnswerIndex": 1,
      "explanation": "GMM supports ellipsoidal clusters due to covariance matrix."
    },
    {
      "id": 13,
      "questionText": "Which type of covariance can GMM use?",
      "options": [
        "Only diagonal",
        "Only identity matrix",
        "Only full",
        "Full, Diagonal, Tied, Spherical"
      ],
      "correctAnswerIndex": 3,
      "explanation": "GMM allows flexible covariance structure options."
    },
    {
      "id": 14,
      "questionText": "GMM is best suited when clusters are:",
      "options": [
        "Categorical only",
        "Perfectly separated",
        "Non-overlapping and spherical",
        "Overlapping and elliptical"
      ],
      "correctAnswerIndex": 3,
      "explanation": "GMM is ideal for overlapping soft clusters."
    },
    {
      "id": 15,
      "questionText": "Which step assigns probability to each point belonging to a cluster in EM?",
      "options": [
        "Initialization step",
        "Maximization step",
        "Regularization step",
        "Expectation step"
      ],
      "correctAnswerIndex": 3,
      "explanation": "E-step calculates responsibility of each Gaussian."
    },
    {
      "id": 16,
      "questionText": "Which step updates parameters of Gaussians in EM?",
      "options": [
        "Prediction step",
        "Maximization step",
        "Normalization step",
        "Expectation step"
      ],
      "correctAnswerIndex": 1,
      "explanation": "M-step updates means, covariances, and weights."
    },
    {
      "id": 17,
      "questionText": "In GMM, mixing coefficients must:",
      "options": [
        "Sum to one",
        "Be greater than one",
        "Sum to zero",
        "Be negative"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Mixing weights represent probabilities → must sum to 1."
    },
    {
      "id": 18,
      "questionText": "What does a high responsibility value mean in GMM?",
      "options": [
        "Point is outlier",
        "Point strongly belongs to that Gaussian",
        "Cluster is ignored",
        "Model has failed"
      ],
      "correctAnswerIndex": 1,
      "explanation": "High responsibility = high probability of belonging to that cluster."
    },
    {
      "id": 19,
      "questionText": "GMM belongs to which model category?",
      "options": [
        "Neural network",
        "Discriminative model",
        "Purely geometric model",
        "Generative probabilistic model"
      ],
      "correctAnswerIndex": 3,
      "explanation": "GMM models probability distribution of data (generative)."
    },
    {
      "id": 20,
      "questionText": "GMM is useful in:",
      "options": [
        "Anomaly detection",
        "Speaker recognition",
        "Image segmentation",
        "All of the above"
      ],
      "correctAnswerIndex": 3,
      "explanation": "GMM widely used in real-world probabilistic applications."
    },
    {
      "id": 21,
      "questionText": "What does GMM do better than K-Means?",
      "options": [
        "Handle only linear separability",
        "Ignore feature scale",
        "Model overlapping probabilistic clusters",
        "Assign hard labels only"
      ],
      "correctAnswerIndex": 2,
      "explanation": "GMM outperforms K-Means when clusters overlap."
    },
    {
      "id": 22,
      "questionText": "A drawback of GMM is:",
      "options": [
        "Requires predefined number of clusters",
        "No probabilistic output",
        "Only works with categorical data",
        "Cannot handle continuous data"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Number of components must be specified before fitting."
    },
    {
      "id": 23,
      "questionText": "What initialization is commonly used for GMM?",
      "options": [
        "K-Means centroids",
        "Random labels",
        "Bootstrap resampling",
        "Gradient descent"
      ],
      "correctAnswerIndex": 0,
      "explanation": "K-Means is commonly used to initialize cluster means."
    },
    {
      "id": 24,
      "questionText": "GMM uses which principle to maximize likelihood?",
      "options": [
        "Gradient Descent",
        "Expectation-Maximization",
        "Dropout Regularization",
        "Least Squares Minimization"
      ],
      "correctAnswerIndex": 1,
      "explanation": "EM is a likelihood-based optimization method."
    },
    {
      "id": 25,
      "questionText": "The final decision in GMM assigns a point to the cluster with:",
      "options": [
        "Highest variance",
        "Minimum distance",
        "Maximum probability (responsibility)",
        "Random chance"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Soft → hard label by selecting cluster with highest probability."
    },
    {
      "id": 26,
      "questionText": "What role does covariance play in GMM?",
      "options": [
        "Defines cluster shape and orientation",
        "Sets learning rate",
        "Controls number of clusters",
        "Removes noise features"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Covariance allows modeling elliptical and rotated clusters."
    },
    {
      "id": 27,
      "questionText": "What does GMM maximize during training?",
      "options": [
        "Sum of distances",
        "Training accuracy",
        "Entropy of clusters",
        "Total log-likelihood of data"
      ],
      "correctAnswerIndex": 3,
      "explanation": "EM optimizes log-likelihood of observing the data."
    },
    {
      "id": 28,
      "questionText": "What happens if two Gaussian components overlap heavily?",
      "options": [
        "Clusters merge into one automatically",
        "GMM switches to K-Means automatically",
        "GMM handles it with soft probabilities",
        "GMM fails immediately"
      ],
      "correctAnswerIndex": 2,
      "explanation": "GMM assigns probabilities to each cluster; overlap is handled naturally via soft assignments."
    },
    {
      "id": 29,
      "questionText": "Which metric can be used to choose the number of components in GMM?",
      "options": [
        "Accuracy",
        "F1-score",
        "Learning rate",
        "AIC (Akaike Information Criterion)"
      ],
      "correctAnswerIndex": 3,
      "explanation": "AIC and BIC help balance model fit with complexity to select number of components."
    },
    {
      "id": 30,
      "questionText": "BIC in GMM is used to:",
      "options": [
        "Normalize probabilities",
        "Estimate cluster assignments",
        "Update mean and covariance",
        "Select number of clusters considering model complexity"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Bayesian Information Criterion penalizes overly complex models to avoid overfitting."
    },
    {
      "id": 31,
      "questionText": "Scenario: You have overlapping clusters in 2D data. Which approach is suitable?",
      "options": [
        "K-Means",
        "DBSCAN with minPts=1",
        "Hierarchical clustering",
        "Gaussian Mixture Model"
      ],
      "correctAnswerIndex": 3,
      "explanation": "GMM handles overlapping clusters probabilistically, unlike K-Means which assigns hard labels."
    },
    {
      "id": 32,
      "questionText": "Which initialization can improve EM convergence?",
      "options": [
        "Using covariance as identity for all clusters",
        "Setting all means to zero",
        "Using K-Means centroids as initial means",
        "Randomly choosing one data point"
      ],
      "correctAnswerIndex": 2,
      "explanation": "K-Means initialization provides better starting points for EM algorithm."
    },
    {
      "id": 33,
      "questionText": "Soft clustering means:",
      "options": [
        "Each point has a probability of belonging to multiple clusters",
        "Clusters are linearly separable",
        "Clusters have equal sizes",
        "Each point is assigned only one cluster"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Soft clustering assigns probabilities rather than hard labels."
    },
    {
      "id": 34,
      "questionText": "What does EM algorithm alternate between?",
      "options": [
        "Expectation (E-step) and Maximization (M-step)",
        "Gradient descent and regularization",
        "Probability normalization and prediction",
        "Clustering and dimensionality reduction"
      ],
      "correctAnswerIndex": 0,
      "explanation": "EM alternates between computing responsibilities and updating parameters."
    },
    {
      "id": 35,
      "questionText": "Scenario: A GMM component has nearly zero weight after EM. Implication?",
      "options": [
        "Component is insignificant; may be removed",
        "Covariance matrix is singular",
        "Model is invalid",
        "Training failed"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Very low weight indicates component contributes little to data representation."
    },
    {
      "id": 36,
      "questionText": "Scenario: Covariance type set to 'spherical'. Effect?",
      "options": [
        "All clusters are circular with equal variance in all directions",
        "EM cannot converge",
        "Clusters can have arbitrary orientation",
        "Covariance ignored"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Spherical covariance assumes isotropic variance for each cluster."
    },
    {
      "id": 37,
      "questionText": "Scenario: Covariance type 'full' in GMM allows:",
      "options": [
        "One-dimensional data only",
        "Elliptical clusters with arbitrary orientation",
        "Only circular clusters",
        "Clusters of equal size"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Full covariance allows each cluster to have a unique covariance matrix."
    },
    {
      "id": 38,
      "questionText": "Scenario: High-dimensional data with GMM. Challenge?",
      "options": [
        "Covariance estimation becomes difficult",
        "EM converges faster",
        "Number of clusters reduces automatically",
        "Probabilities become binary"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Estimating full covariance in high dimensions is prone to overfitting."
    },
    {
      "id": 39,
      "questionText": "Scenario: Using diagonal covariance instead of full. Advantage?",
      "options": [
        "Reduces number of parameters, faster EM",
        "EM fails automatically",
        "Improves cluster overlap",
        "Always increases accuracy"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Diagonal covariance assumes feature independence, reducing parameters."
    },
    {
      "id": 40,
      "questionText": "Scenario: Two clusters have very close means. EM may:",
      "options": [
        "Fail to run",
        "Merge clusters automatically",
        "Assign probabilities reflecting overlap",
        "Ignore one cluster"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Soft assignments reflect uncertainty in overlapping regions."
    },
    {
      "id": 41,
      "questionText": "Scenario: EM initialized with random means may:",
      "options": [
        "Converge to local maxima",
        "Merge clusters automatically",
        "Fail to compute responsibilities",
        "Always find global maximum"
      ],
      "correctAnswerIndex": 0,
      "explanation": "EM is sensitive to initialization; may converge to local optima."
    },
    {
      "id": 42,
      "questionText": "Scenario: GMM applied to anomaly detection. How?",
      "options": [
        "EM ignores outliers automatically",
        "Points with low likelihood under model considered anomalies",
        "Points assigned to smallest cluster are anomalies",
        "Clusters removed, remaining points are anomalies"
      ],
      "correctAnswerIndex": 1,
      "explanation": "GMM can detect outliers by evaluating likelihood of each point."
    },
    {
      "id": 43,
      "questionText": "Scenario: Overfitting in GMM can occur when:",
      "options": [
        "Dataset is small but clusters well-separated",
        "Covariance type is spherical",
        "Initialization uses K-Means",
        "Too many components relative to data size"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Excessive components may fit noise rather than true structure."
    },
    {
      "id": 44,
      "questionText": "Scenario: Selecting number of components with BIC. Lower BIC means:",
      "options": [
        "EM failed",
        "Overfitting",
        "Better balance between fit and complexity",
        "Worse model"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Lower BIC indicates model explains data well without unnecessary complexity."
    },
    {
      "id": 45,
      "questionText": "Scenario: You normalize features before GMM. Benefit?",
      "options": [
        "EM converges slower",
        "Number of components reduces",
        "Prevents dominance by large-scale features",
        "Covariance becomes singular"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Feature scaling ensures all features contribute equally to Gaussian components."
    },
    {
      "id": 46,
      "questionText": "Scenario: You use too few components in GMM. Likely effect?",
      "options": [
        "Covariance becomes negative",
        "Overfitting",
        "EM fails to converge",
        "Underfitting, poor representation of clusters"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Too few Gaussians cannot capture underlying data structure."
    },
    {
      "id": 47,
      "questionText": "Scenario: Two clusters have different variances. Which GMM setting captures this?",
      "options": [
        "Tied covariance",
        "Diagonal covariance only",
        "Full covariance",
        "Spherical covariance"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Full covariance allows different shapes and orientations per cluster."
    },
    {
      "id": 48,
      "questionText": "Scenario: Real-world use of GMM in speaker recognition relies on:",
      "options": [
        "Only frequency features",
        "Decision trees",
        "Modeling probability distribution of feature vectors",
        "Hard cluster labels"
      ],
      "correctAnswerIndex": 2,
      "explanation": "GMM models the distribution of features for each speaker."
    },
    {
      "id": 49,
      "questionText": "Scenario: Image segmentation with GMM. How?",
      "options": [
        "K-Means replaces EM",
        "Pixels assigned randomly",
        "Only grayscale images",
        "Pixels assigned probabilistically to color clusters"
      ],
      "correctAnswerIndex": 3,
      "explanation": "GMM can segment images based on color probability distributions."
    },
    {
      "id": 50,
      "questionText": "Scenario: Overlapping Gaussian clusters, hard assignment used. Effect?",
      "options": [
        "Covariance becomes zero",
        "EM improves accuracy",
        "Soft assignment automatically applied",
        "Information loss, may misclassify points"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Hard labels ignore uncertainty and may misrepresent overlapping regions."
    },
    {
      "id": 51,
      "questionText": "Scenario: You have categorical features. GMM suitability?",
      "options": [
        "Perfectly suitable",
        "Requires only diagonal covariance",
        "Not ideal; GMM assumes continuous features",
        "Number of components is irrelevant"
      ],
      "correctAnswerIndex": 2,
      "explanation": "GMM assumes continuous-valued features for Gaussian distributions."
    },
    {
      "id": 52,
      "questionText": "Scenario: EM converges slowly. Common solutions?",
      "options": [
        "Ignore convergence criteria",
        "Better initialization, feature scaling, or fewer components",
        "Increase number of iterations indefinitely",
        "Switch to K-Means always"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Good initialization and preprocessing improve EM efficiency."
    },
    {
      "id": 53,
      "questionText": "Scenario: GMM for anomaly detection in network traffic. Strategy?",
      "options": [
        "Ignore rare events",
        "Hard assign all points to clusters",
        "Flag low likelihood points as anomalies",
        "Use K-Means to cluster anomalies"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Points with low probability under the model are considered outliers."
    },
    {
      "id": 54,
      "questionText": "Scenario: EM oscillates between two solutions. Likely reason?",
      "options": [
        "Number of components too small",
        "Poor initialization causing local maxima",
        "Covariance matrix full",
        "Using diagonal covariance"
      ],
      "correctAnswerIndex": 1,
      "explanation": "EM can get stuck in local maxima if initial parameters are suboptimal."
    },
    {
      "id": 55,
      "questionText": "Scenario: Soft clustering probability threshold used to assign points. Advantage?",
      "options": [
        "EM fails automatically",
        "Always misclassifies clusters",
        "Allows filtering uncertain points",
        "Covariance ignored"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Thresholding allows flexible assignment based on confidence."
    },
    {
      "id": 56,
      "questionText": "Scenario: Tied covariance for all components. Effect?",
      "options": [
        "All clusters share same shape/orientation",
        "Covariance ignored",
        "Number of components reduced automatically",
        "EM cannot run"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Tied covariance forces all Gaussians to share same covariance matrix."
    },
    {
      "id": 57,
      "questionText": "Scenario: GMM with diagonal covariance and correlated features. Effect?",
      "options": [
        "EM automatically switches to full",
        "Perfect modeling",
        "Covariance fails to compute",
        "Model may be suboptimal due to ignored correlations"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Diagonal covariance ignores feature correlations, which may reduce accuracy."
    },
    {
      "id": 58,
      "questionText": "Scenario: High-dimensional data, small sample size. Solution for GMM?",
      "options": [
        "Always full covariance",
        "Increase number of components",
        "Ignore dimension scaling",
        "Use diagonal covariance or reduce dimensions with PCA"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Reducing parameters via diagonal covariance or PCA helps prevent overfitting."
    },
    {
      "id": 59,
      "questionText": "Scenario: GMM applied on time-series data. Typical strategy?",
      "options": [
        "Switch to K-Means only",
        "Model features extracted per time window",
        "Use raw timestamps directly",
        "Ignore temporal ordering"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Features are extracted per window to apply GMM effectively."
    },
    {
      "id": 60,
      "questionText": "Scenario: You wish to compare two GMMs with different components. Metric?",
      "options": [
        "Silhouette score",
        "Mean squared error",
        "AIC/BIC",
        "Accuracy"
      ],
      "correctAnswerIndex": 2,
      "explanation": "AIC/BIC compare likelihoods while penalizing complexity."
    },
    {
      "id": 61,
      "questionText": "Scenario: EM stops improving log-likelihood. Action?",
      "options": [
        "Reinitialize covariance",
        "Converged; training can stop",
        "Increase components automatically",
        "Reduce number of iterations"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Plateau in log-likelihood indicates convergence."
    },
    {
      "id": 62,
      "questionText": "Scenario: GMM applied on overlapping clusters. Which is true?",
      "options": [
        "Clusters must be separated manually",
        "GMM fails completely",
        "K-Means always better",
        "Soft assignments handle ambiguity better than hard assignments"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Soft probabilistic assignments capture uncertainty in overlapping regions."
    },
    {
      "id": 63,
      "questionText": "Scenario: EM converges to degenerate covariance. Likely cause?",
      "options": [
        "Full covariance required",
        "Component collapsed to single data point",
        "Too few iterations",
        "Random initialization"
      ],
      "correctAnswerIndex": 1,
      "explanation": "A component may shrink variance toward zero, creating numerical issues."
    },
    {
      "id": 64,
      "questionText": "Scenario: Choosing between GMM and K-Means. Advantage of GMM?",
      "options": [
        "Always faster",
        "Works only on spherical clusters",
        "No parameters needed",
        "Handles overlapping, probabilistic clusters"
      ],
      "correctAnswerIndex": 3,
      "explanation": "GMM can model overlap using probabilities, unlike K-Means."
    },
    {
      "id": 65,
      "questionText": "Scenario: GMM with full covariance. Drawback?",
      "options": [
        "EM does not converge",
        "Cannot model elliptical clusters",
        "Higher number of parameters; risk of overfitting",
        "Soft assignment ignored"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Full covariance requires estimating many parameters, sensitive to small datasets."
    },
    {
      "id": 66,
      "questionText": "Scenario: GMM for anomaly detection. Threshold selection?",
      "options": [
        "Use hard assignments only",
        "Ignore low-probability points",
        "Based on likelihood distribution of normal data",
        "Random selection"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Threshold is chosen based on typical likelihood values of normal data."
    },
    {
      "id": 67,
      "questionText": "Scenario: EM alternates but log-likelihood decreases. Cause?",
      "options": [
        "Soft assignments ignored",
        "Convergence achieved",
        "Number of components too low",
        "Numerical instability or rounding errors"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Numerical issues may cause slight decreases; regularization may help."
    },
    {
      "id": 68,
      "questionText": "Scenario: GMM on skewed data. Observation?",
      "options": [
        "Clusters automatically corrected",
        "EM converges faster",
        "Gaussian assumption may be violated",
        "Covariance ignored"
      ],
      "correctAnswerIndex": 2,
      "explanation": "GMM assumes Gaussian distribution; skewed data may reduce accuracy."
    },
    {
      "id": 69,
      "questionText": "Scenario: Using GMM to compress data. How?",
      "options": [
        "Remove clusters randomly",
        "Use only spherical covariance",
        "Switch to K-Means",
        "Represent each point by cluster responsibilities instead of raw features"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Probabilities can serve as a compact representation of original features."
    },
    {
      "id": 70,
      "questionText": "Scenario: GMM applied to multimodal distribution. Advantage?",
      "options": [
        "Models multiple peaks naturally using several Gaussians",
        "Covariance must be diagonal",
        "Cannot handle multimodal data",
        "Requires K-Means preprocessing"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Multiple Gaussian components allow GMM to capture multimodal patterns."
    },
    {
      "id": 71,
      "questionText": "Scenario: You want to model customer segments with GMM. Best approach?",
      "options": [
        "Use K-Means only",
        "Use soft clustering to capture overlapping preferences",
        "Assign each customer randomly",
        "Ignore continuous features"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Soft clustering captures overlapping behavior patterns between segments."
    },
    {
      "id": 72,
      "questionText": "Scenario: Data has outliers. How does GMM handle them?",
      "options": [
        "EM fails automatically",
        "Outliers dominate clusters",
        "Outliers get low probabilities; may need special handling",
        "Clusters merge to include outliers"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Outliers have low likelihood under Gaussian components, reducing their impact."
    },
    {
      "id": 73,
      "questionText": "Scenario: Using EM, you notice very slow convergence. Possible fix?",
      "options": [
        "Switch to hierarchical clustering",
        "Randomly assign clusters",
        "Improve initialization, scale features, or reduce number of components",
        "Increase iterations without changes"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Good initialization and preprocessing improve EM efficiency."
    },
    {
      "id": 74,
      "questionText": "Scenario: You apply GMM to cluster text embeddings. Challenge?",
      "options": [
        "High-dimensionality may make full covariance unstable",
        "GMM works perfectly without change",
        "Soft assignments are ignored",
        "Clusters must be one-dimensional"
      ],
      "correctAnswerIndex": 0,
      "explanation": "High-dimensional embeddings require dimensionality reduction or diagonal covariance."
    },
    {
      "id": 75,
      "questionText": "Scenario: You use GMM with tied covariance. Effect?",
      "options": [
        "EM fails automatically",
        "Clusters become one",
        "Each cluster has unique covariance",
        "All clusters share same covariance matrix"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Tied covariance enforces shared shape/orientation for all Gaussians."
    },
    {
      "id": 76,
      "questionText": "Scenario: GMM model seems overfitted. Possible reasons?",
      "options": [
        "Too many components or full covariance on small data",
        "Diagonal covariance used",
        "EM converged perfectly",
        "Spherical covariance used"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Overfitting occurs when model complexity exceeds data size."
    },
    {
      "id": 77,
      "questionText": "Scenario: You want interpretable clusters with GMM. Strategy?",
      "options": [
        "Soft assignments ignored",
        "Use fewer components and diagonal covariance",
        "Random initialization",
        "Use full covariance and many components"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Fewer components and simpler covariance improve interpretability."
    },
    {
      "id": 78,
      "questionText": "Scenario: You combine GMM with PCA. Purpose?",
      "options": [
        "Ignore low-variance features",
        "Increase number of clusters",
        "EM converges automatically",
        "Reduce dimensionality to stabilize covariance estimation"
      ],
      "correctAnswerIndex": 3,
      "explanation": "PCA reduces features, improving parameter estimation in high-dimensional GMM."
    },
    {
      "id": 79,
      "questionText": "Scenario: GMM applied on customer churn probability. Approach?",
      "options": [
        "Use only binary labels",
        "Switch to linear regression",
        "Hard cluster and ignore overlap",
        "Model feature distribution, assign probabilities to segments"
      ],
      "correctAnswerIndex": 3,
      "explanation": "GMM probabilistically models customer groups for better segmentation."
    },
    {
      "id": 80,
      "questionText": "Scenario: You notice EM stuck in local optimum. Solution?",
      "options": [
        "Use fewer components only",
        "Increase iterations infinitely",
        "Try multiple random initializations",
        "Ignore convergence"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Multiple initializations reduce chance of getting trapped in local maxima."
    },
    {
      "id": 81,
      "questionText": "Scenario: Using GMM for speaker verification. Why suitable?",
      "options": [
        "K-Means performs better",
        "Captures probabilistic feature distributions per speaker",
        "Covariance must be spherical",
        "Hard assignments suffice"
      ],
      "correctAnswerIndex": 1,
      "explanation": "GMM models variability in speaker features effectively."
    },
    {
      "id": 82,
      "questionText": "Scenario: EM applied to data with small clusters. Challenge?",
      "options": [
        "Soft assignments fail",
        "Small clusters may be ignored or collapse",
        "Covariance ignored",
        "EM always finds them"
      ],
      "correctAnswerIndex": 1,
      "explanation": "EM may assign negligible weight to very small clusters."
    },
    {
      "id": 83,
      "questionText": "Scenario: GMM used in anomaly detection for fraud. Key idea?",
      "options": [
        "Transactions are clustered randomly",
        "High-probability transactions flagged",
        "Transactions with low probability under model are flagged",
        "EM ignores rare patterns"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Low-likelihood points are potential anomalies."
    },
    {
      "id": 84,
      "questionText": "Scenario: GMM applied to multimodal sensor readings. Advantage?",
      "options": [
        "EM fails automatically",
        "Single Gaussian suffices",
        "Spherical covariance required",
        "Multiple peaks captured by several Gaussian components"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Multiple Gaussians allow modeling multimodal distributions."
    },
    {
      "id": 85,
      "questionText": "Scenario: You notice EM log-likelihood plateauing early. Interpretation?",
      "options": [
        "EM converged; model parameters stabilized",
        "Covariance ignored",
        "EM failed",
        "Increase components immediately"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Plateau indicates convergence of EM algorithm."
    },
    {
      "id": 86,
      "questionText": "Scenario: GMM with full covariance on small dataset. Risk?",
      "options": [
        "Overfitting due to too many parameters",
        "EM fails automatically",
        "Better modeling",
        "Clusters ignored"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Full covariance requires estimating many parameters, risky for small data."
    },
    {
      "id": 87,
      "questionText": "Scenario: GMM used for image segmentation. Key step?",
      "options": [
        "Only grayscale images allowed",
        "K-Means replacement",
        "Assign pixels probabilistically to color clusters",
        "Ignore soft assignments"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Soft clustering assigns pixels to Gaussian components representing colors."
    },
    {
      "id": 88,
      "questionText": "Scenario: GMM applied to text clustering with embeddings. Key step?",
      "options": [
        "Increase components arbitrarily",
        "Soft assignments ignored",
        "Use dimensionality reduction to stabilize covariance estimation",
        "EM fails automatically"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Reducing dimensionality prevents overfitting in high-dimensional embeddings."
    },
    {
      "id": 89,
      "questionText": "Scenario: GMM with overlapping clusters. Hard labels used. Effect?",
      "options": [
        "Improves EM convergence",
        "Loss of probabilistic information; misclassification possible",
        "EM fails",
        "Covariance ignored"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Hard labels ignore uncertainty in overlapping regions."
    },
    {
      "id": 90,
      "questionText": "Scenario: EM for GMM shows component collapsing. Solution?",
      "options": [
        "Use diagonal covariance always",
        "Reduce number of components only",
        "Regularize covariance to prevent singularities",
        "Ignore component"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Regularization prevents variances from collapsing to zero."
    },
    {
      "id": 91,
      "questionText": "Scenario: Choosing GMM vs K-Means. Advantage?",
      "options": [
        "K-Means always faster",
        "EM not required",
        "GMM ignores probabilities",
        "Soft assignment, handles overlap and ellipsoidal clusters"
      ],
      "correctAnswerIndex": 3,
      "explanation": "GMM models overlapping clusters with probabilistic assignments."
    },
    {
      "id": 92,
      "questionText": "Scenario: GMM applied for speech synthesis. Benefit?",
      "options": [
        "Clusters speakers only",
        "Soft assignments ignored",
        "Models probability distribution of acoustic features",
        "Only spherical clusters allowed"
      ],
      "correctAnswerIndex": 2,
      "explanation": "GMM captures feature distributions needed for realistic speech synthesis."
    },
    {
      "id": 93,
      "questionText": "Scenario: EM fails to converge. Possible reasons?",
      "options": [
        "Too few iterations",
        "Poor initialization, singular covariance, or incompatible data",
        "Full covariance always fails",
        "Soft assignments ignored"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Bad initialization or degenerate covariances can prevent EM convergence."
    },
    {
      "id": 94,
      "questionText": "Scenario: You need probabilistic clustering on 2D sensor data. Choice?",
      "options": [
        "Hierarchical clustering",
        "DBSCAN only",
        "GMM with appropriate covariance type",
        "K-Means only"
      ],
      "correctAnswerIndex": 2,
      "explanation": "GMM provides soft probabilistic clustering for continuous data."
    },
    {
      "id": 95,
      "questionText": "Scenario: GMM used for anomaly detection in machinery. How?",
      "options": [
        "Use hard assignments only",
        "Cluster readings randomly",
        "Flag low-likelihood sensor readings as anomalies",
        "Ignore rare readings"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Points that do not fit any Gaussian component well can indicate anomalies."
    },
    {
      "id": 96,
      "questionText": "Scenario: High-dimensional embeddings, GMM unstable. Solution?",
      "options": [
        "Increase components",
        "Ignore scaling",
        "Reduce dimensions with PCA or use diagonal covariance",
        "Use full covariance only"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Reducing parameters prevents overfitting and stabilizes EM."
    },
    {
      "id": 97,
      "questionText": "Scenario: GMM for multimodal customer behavior. Advantage?",
      "options": [
        "K-Means better",
        "Multiple components capture different behavioral modes",
        "Single Gaussian suffices",
        "Covariance must be spherical"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Different modes of behavior can be modeled with multiple Gaussians."
    },
    {
      "id": 98,
      "questionText": "Scenario: EM converges but log-likelihood decreases occasionally. Cause?",
      "options": [
        "Numerical instability; can use regularization",
        "Covariance ignored",
        "EM failed",
        "Increase components"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Small decreases may occur due to rounding errors; regularization helps."
    },
    {
      "id": 99,
      "questionText": "Scenario: Soft assignment threshold applied. Benefit?",
      "options": [
        "Filter uncertain points or highlight ambiguous memberships",
        "EM fails automatically",
        "Covariance ignored",
        "Always misclassifies clusters"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Thresholding helps decide which points are confidently assigned."
    },
    {
      "id": 100,
      "questionText": "Scenario: GMM applied in finance for risk clustering. Advantage?",
      "options": [
        "Requires categorical data only",
        "Models probability distribution of different risk profiles",
        "Soft assignment ignored",
        "Clusters randomly"
      ],
      "correctAnswerIndex": 1,
      "explanation": "GMM provides probabilistic segmentation of customers or assets by risk levels."
    }
  ]
}