| { | |
| "title": "Principal Component Analysis (PCA) Mastery: 100 MCQs", | |
| "description": "A comprehensive set of 100 multiple-choice questions designed to test and deepen your understanding of Principal Component Analysis (PCA), from basic concepts to advanced applications in dimensionality reduction and feature engineering.", | |
| "questions": [ | |
| { | |
| "id": 1, | |
| "questionText": "What is the main goal of Principal Component Analysis (PCA)?", | |
| "options": [ | |
| "To classify data into categories", | |
| "To generate random features", | |
| "To cluster data points", | |
| "To reduce the dimensionality of a dataset while retaining most variance" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "PCA aims to reduce the number of variables in a dataset while preserving as much variability as possible." | |
| }, | |
| { | |
| "id": 2, | |
| "questionText": "In PCA, what does a 'principal component' represent?", | |
| "options": [ | |
| "The cluster center", | |
| "An original feature in the dataset", | |
| "A new uncorrelated feature that captures maximum variance", | |
| "The mean of all features" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Principal components are linear combinations of original features that are uncorrelated and ordered by the amount of variance they capture." | |
| }, | |
| { | |
| "id": 3, | |
| "questionText": "What is the first step before applying PCA?", | |
| "options": [ | |
| "Standardize or normalize the data", | |
| "Train a classifier", | |
| "Apply k-means clustering", | |
| "Remove outliers only" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Standardization ensures that features with different scales contribute equally to the principal components." | |
| }, | |
| { | |
| "id": 4, | |
| "questionText": "Scenario: You have 10 features, but 90% of the variance is captured in 2 components. What can you do?", | |
| "options": [ | |
| "Keep all 10 features", | |
| "Apply dropout", | |
| "Add more features", | |
| "Reduce the dataset to 2 principal components" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Reducing to 2 principal components retains most of the information while simplifying the dataset." | |
| }, | |
| { | |
| "id": 5, | |
| "questionText": "Which mathematical technique is commonly used to compute PCA?", | |
| "options": [ | |
| "Eigen decomposition of the covariance matrix", | |
| "Linear regression", | |
| "Gradient descent", | |
| "k-Nearest Neighbors" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "PCA typically involves computing eigenvectors and eigenvalues of the covariance matrix to find principal components." | |
| }, | |
| { | |
| "id": 6, | |
| "questionText": "What property do all principal components have?", | |
| "options": [ | |
| "They are uncorrelated (orthogonal) to each other", | |
| "They sum to zero", | |
| "They are dependent on each other", | |
| "They always have the same variance" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Principal components are constructed to be orthogonal, ensuring no redundancy in the information they capture." | |
| }, | |
| { | |
| "id": 7, | |
| "questionText": "Scenario: PCA applied on a dataset with features in different scales. What happens if you don’t standardize?", | |
| "options": [ | |
| "The first component captures zero variance", | |
| "PCA fails to compute", | |
| "Features with larger scale dominate the principal components", | |
| "Variance is automatically normalized" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Without standardization, features with larger numeric ranges contribute more to variance, skewing PCA results." | |
| }, | |
| { | |
| "id": 8, | |
| "questionText": "Which of the following can PCA NOT do?", | |
| "options": [ | |
| "Remove correlated features", | |
| "Reduce dimensionality", | |
| "Improve classification accuracy directly", | |
| "Visualize high-dimensional data" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "PCA is unsupervised and reduces dimensionality; it does not directly improve classification accuracy." | |
| }, | |
| { | |
| "id": 9, | |
| "questionText": "Scenario: You apply PCA and find that the first principal component explains 70% variance, the second 20%, and the rest 10%. How many components would you keep to retain 90% variance?", | |
| "options": [ | |
| "All components", | |
| "One component", | |
| "Two components", | |
| "Three components" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Adding the first two components (70% + 20%) captures 90% of the dataset variance." | |
| }, | |
| { | |
| "id": 10, | |
| "questionText": "PCA is an example of which type of learning?", | |
| "options": [ | |
| "Supervised learning", | |
| "Unsupervised learning", | |
| "Reinforcement learning", | |
| "Semi-supervised learning" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "PCA does not use labels; it finds patterns based on feature correlations, making it unsupervised." | |
| }, | |
| { | |
| "id": 11, | |
| "questionText": "Scenario: You perform PCA and transform your 5D data to 3D. What property is guaranteed?", | |
| "options": [ | |
| "Classification accuracy improves", | |
| "The 3 components capture the maximum possible variance in 3D", | |
| "All data points remain equidistant", | |
| "The original features are perfectly preserved" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "PCA selects components capturing maximum variance in the reduced dimensions, not necessarily preserving original distances." | |
| }, | |
| { | |
| "id": 12, | |
| "questionText": "Which PCA component explains the least variance?", | |
| "options": [ | |
| "All components explain equal variance", | |
| "Any intermediate component", | |
| "The last principal component", | |
| "The first principal component" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "PCA orders components from highest to lowest variance." | |
| }, | |
| { | |
| "id": 13, | |
| "questionText": "Scenario: Dataset is already perfectly uncorrelated. What effect does PCA have?", | |
| "options": [ | |
| "PCA fails to compute", | |
| "Principal components are the same as original features", | |
| "PCA increases correlation", | |
| "PCA reduces variance" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "If features are uncorrelated, PCA simply aligns components with original features without reducing dimensionality." | |
| }, | |
| { | |
| "id": 14, | |
| "questionText": "Scenario: You want to visualize high-dimensional data in 2D. PCA is applied. What is the risk?", | |
| "options": [ | |
| "Data labels change", | |
| "All variance is retained", | |
| "Some variance is lost", | |
| "Original features are unchanged" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Reducing dimensions to 2D inevitably loses some information (variance)." | |
| }, | |
| { | |
| "id": 15, | |
| "questionText": "Which of the following matrices is symmetric and used in PCA?", | |
| "options": [ | |
| "Distance matrix", | |
| "Covariance matrix", | |
| "Adjacency matrix", | |
| "Correlation matrix" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "The covariance matrix is symmetric and serves as the basis for eigen decomposition in PCA." | |
| }, | |
| { | |
| "id": 16, | |
| "questionText": "Scenario: You have highly correlated features. PCA is applied. Effect?", | |
| "options": [ | |
| "Normalizes variance", | |
| "Reduces redundancy by combining correlated features into fewer components", | |
| "Removes labels", | |
| "Increases correlation" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "PCA transforms correlated features into uncorrelated principal components, reducing redundancy." | |
| }, | |
| { | |
| "id": 17, | |
| "questionText": "Scenario: You want to reduce noise in data. PCA helps by:", | |
| "options": [ | |
| "Scaling the first component only", | |
| "Adding more features", | |
| "Increasing learning rate", | |
| "Discarding components with low variance" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Low-variance components often represent noise; removing them denoises the dataset." | |
| }, | |
| { | |
| "id": 18, | |
| "questionText": "Scenario: Eigenvalues of a PCA covariance matrix are [4, 2, 1]. Which component explains the most variance?", | |
| "options": [ | |
| "Second component", | |
| "All equally", | |
| "First component", | |
| "Third component" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Eigenvalues correspond to the variance captured; the largest eigenvalue indicates the component with most variance." | |
| }, | |
| { | |
| "id": 19, | |
| "questionText": "Scenario: You apply PCA to a dataset and plot explained variance ratio. Purpose?", | |
| "options": [ | |
| "Scale features", | |
| "Compute correlation matrix", | |
| "Determine how many components to keep", | |
| "Train a classifier" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Explained variance ratio helps decide how many components capture sufficient information." | |
| }, | |
| { | |
| "id": 20, | |
| "questionText": "Which PCA component is always orthogonal to the first component?", | |
| "options": [ | |
| "It may not be orthogonal", | |
| "Third component", | |
| "Second component", | |
| "Last component only" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "By definition, each principal component is orthogonal to all previous components." | |
| }, | |
| { | |
| "id": 21, | |
| "questionText": "Scenario: After PCA, some components have very small variance. Action?", | |
| "options": [ | |
| "They must be retained", | |
| "Scale them up", | |
| "Add noise to them", | |
| "They can be discarded" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Components with negligible variance contribute little to data representation and can be removed." | |
| }, | |
| { | |
| "id": 22, | |
| "questionText": "Scenario: PCA applied to non-linear data. Limitation?", | |
| "options": [ | |
| "PCA generates labels", | |
| "PCA cannot capture non-linear relationships", | |
| "PCA increases variance", | |
| "PCA overfits" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Standard PCA is linear and cannot model complex non-linear structures; kernel PCA may be used instead." | |
| }, | |
| { | |
| "id": 23, | |
| "questionText": "Scenario: PCA reduces features from 5D to 2D. Data reconstruction is approximate. Why?", | |
| "options": [ | |
| "Labels change", | |
| "Information is lost in discarded components", | |
| "PCA adds noise", | |
| "Variance is increased" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Dimensionality reduction retains only top components, losing some original information." | |
| }, | |
| { | |
| "id": 24, | |
| "questionText": "Which metric is used to measure how much variance is captured by selected components?", | |
| "options": [ | |
| "Mean squared error", | |
| "Correlation coefficient", | |
| "Explained variance ratio", | |
| "Euclidean distance" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Explained variance ratio shows the proportion of total variance captured by each principal component." | |
| }, | |
| { | |
| "id": 25, | |
| "questionText": "Scenario: PCA on standardized data vs. unstandardized data. Difference?", | |
| "options": [ | |
| "Standardization reduces variance", | |
| "Standardized data gives equal weight to all features", | |
| "Unstandardized data improves variance capture", | |
| "No difference" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Standardization prevents features with large scales from dominating the PCA components." | |
| }, | |
| { | |
| "id": 26, | |
| "questionText": "Scenario: Two features are perfectly correlated. PCA effect?", | |
| "options": [ | |
| "Cannot perform PCA", | |
| "One component captures the shared variance", | |
| "Both components are kept equally", | |
| "Variance becomes zero" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "PCA combines correlated features into a single principal component." | |
| }, | |
| { | |
| "id": 27, | |
| "questionText": "Scenario: You want to visualize 3D data in 2D. PCA helps by:", | |
| "options": [ | |
| "Adding more dimensions", | |
| "Projecting onto top 2 principal components", | |
| "Scaling features only", | |
| "Generating new labels" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Projection onto top principal components preserves as much variance as possible in reduced dimensions." | |
| }, | |
| { | |
| "id": 28, | |
| "questionText": "Scenario: After PCA, you notice negative values in transformed features. Meaning?", | |
| "options": [ | |
| "PCA failed", | |
| "Original data must be negative", | |
| "Data must be scaled again", | |
| "Principal components can have negative and positive values" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "PCA components are linear combinations of original features, allowing both negative and positive values." | |
| }, | |
| { | |
| "id": 29, | |
| "questionText": "Scenario: You apply PCA for feature selection. Best approach?", | |
| "options": [ | |
| "Discard largest components", | |
| "Select random components", | |
| "Keep all features", | |
| "Select top components explaining desired variance" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Top principal components capture most variance and are most informative for feature selection." | |
| }, | |
| { | |
| "id": 30, | |
| "questionText": "Scenario: PCA is applied to a dataset with 100 features. First 10 components explain 95% variance. Next step?", | |
| "options": [ | |
| "Use all 100 features", | |
| "Discard the 10 components", | |
| "Add more features", | |
| "Use 10 components for reduced dataset" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Using the first 10 components retains 95% of information while reducing dimensionality significantly." | |
| }, | |
| { | |
| "id": 31, | |
| "questionText": "Scenario: You apply PCA but some features dominate due to large variance. Solution?", | |
| "options": [ | |
| "Standardize the features", | |
| "Remove features with high variance", | |
| "Apply k-means clustering", | |
| "Reduce dataset size" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Standardizing ensures all features contribute equally, preventing dominance of large-scale features." | |
| }, | |
| { | |
| "id": 32, | |
| "questionText": "Scenario: PCA eigenvalues are [5, 2, 0.5, 0.1]. What does the smallest eigenvalue indicate?", | |
| "options": [ | |
| "Largest variance", | |
| "Component is most important", | |
| "Least variance along that component", | |
| "PCA failed" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Eigenvalues represent variance along principal components; the smallest captures minimal variance." | |
| }, | |
| { | |
| "id": 33, | |
| "questionText": "Scenario: Two components have similar eigenvalues. What to do?", | |
| "options": [ | |
| "Discard one randomly", | |
| "Keep both as they explain similar variance", | |
| "Combine them manually", | |
| "Always choose the first" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Similar eigenvalues indicate both components carry significant information; both should be retained." | |
| }, | |
| { | |
| "id": 34, | |
| "questionText": "Scenario: You want to analyze which original features contribute to a principal component. Technique?", | |
| "options": [ | |
| "Check explained variance ratio only", | |
| "Remove low variance features", | |
| "Examine component loadings (eigenvectors)", | |
| "Normalize the dataset" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Loadings show the weight of each original feature in a principal component, indicating contribution." | |
| }, | |
| { | |
| "id": 35, | |
| "questionText": "Scenario: PCA applied on correlation matrix vs covariance matrix. Difference?", | |
| "options": [ | |
| "Correlation matrix standardizes features; covariance matrix uses original scale", | |
| "No difference", | |
| "Covariance matrix reduces variance", | |
| "Correlation matrix increases variance" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Correlation matrix accounts for differing scales by standardizing variables before computing PCA." | |
| }, | |
| { | |
| "id": 36, | |
| "questionText": "Scenario: PCA is applied to noisy data. Effect of noise?", | |
| "options": [ | |
| "Noise appears in low-variance components", | |
| "Noise improves variance", | |
| "Noise is amplified in all components", | |
| "Noise disappears automatically" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "High-frequency noise often contributes little variance and is captured in later components, which can be discarded." | |
| }, | |
| { | |
| "id": 37, | |
| "questionText": "Scenario: You want to visualize 4D data in 2D using PCA. Which components to use?", | |
| "options": [ | |
| "Last 2 components", | |
| "All 4 components", | |
| "Top 2 principal components", | |
| "Random 2 features" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Top components retain most variance, providing the best 2D representation of high-dimensional data." | |
| }, | |
| { | |
| "id": 38, | |
| "questionText": "Scenario: PCA applied on dataset with zero mean. Why mean-centering?", | |
| "options": [ | |
| "Normalizes labels", | |
| "Increases variance artificially", | |
| "Reduces number of features", | |
| "Ensures first component captures maximum variance from origin" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Mean-centering removes bias and ensures principal components represent variance relative to the mean." | |
| }, | |
| { | |
| "id": 39, | |
| "questionText": "Scenario: PCA applied but first component explains only 20% variance. Interpretation?", | |
| "options": [ | |
| "First component is irrelevant", | |
| "Data variance is spread across many components", | |
| "Reduce dataset size", | |
| "PCA failed" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Low variance in first component indicates no single direction dominates; variance is more uniform across features." | |
| }, | |
| { | |
| "id": 40, | |
| "questionText": "Scenario: You perform PCA on features measured in different units. Why important?", | |
| "options": [ | |
| "To generate labels", | |
| "To increase explained variance", | |
| "To remove features", | |
| "To prevent unit differences from skewing components" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Standardization equalizes units, ensuring PCA reflects intrinsic data structure rather than measurement scale." | |
| }, | |
| { | |
| "id": 41, | |
| "questionText": "Scenario: Projecting data back from reduced PCA components to original space. Effect?", | |
| "options": [ | |
| "Perfect reconstruction always", | |
| "Approximate reconstruction with some information loss", | |
| "Increase variance", | |
| "Remove correlations" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Dimensionality reduction discards minor components, so reconstruction is approximate." | |
| }, | |
| { | |
| "id": 42, | |
| "questionText": "Scenario: PCA on sparse high-dimensional data. Which method can help?", | |
| "options": [ | |
| "Sparse PCA", | |
| "Random projection", | |
| "Feature scaling", | |
| "Standard PCA only" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Sparse PCA introduces sparsity constraints to handle high-dimensional data efficiently." | |
| }, | |
| { | |
| "id": 43, | |
| "questionText": "Scenario: Kernel PCA vs standard PCA. Advantage?", | |
| "options": [ | |
| "Kernel PCA reduces variance", | |
| "Kernel PCA does not reduce dimensions", | |
| "Kernel PCA captures non-linear patterns", | |
| "Standard PCA is better for non-linear data" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Kernel PCA uses kernel functions to capture non-linear relationships, unlike linear PCA." | |
| }, | |
| { | |
| "id": 44, | |
| "questionText": "Scenario: PCA applied to dataset, but covariance matrix is singular. Cause?", | |
| "options": [ | |
| "Data standardized", | |
| "Number of features > number of samples", | |
| "Explained variance too high", | |
| "Too few features" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "A singular covariance matrix occurs when the data matrix has more features than samples, causing linear dependency." | |
| }, | |
| { | |
| "id": 45, | |
| "questionText": "Scenario: After PCA, a component has zero eigenvalue. Meaning?", | |
| "options": [ | |
| "Data is invalid", | |
| "No variance along this component", | |
| "PCA failed", | |
| "Most important component" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Zero eigenvalue indicates the component captures no variation in the dataset." | |
| }, | |
| { | |
| "id": 46, | |
| "questionText": "Scenario: PCA used for feature reduction in regression. Benefit?", | |
| "options": [ | |
| "Removes labels", | |
| "Increases overfitting", | |
| "Generates noise", | |
| "Reduces multicollinearity and model complexity" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "PCA produces uncorrelated features, mitigating multicollinearity and simplifying models." | |
| }, | |
| { | |
| "id": 47, | |
| "questionText": "Scenario: PCA shows first 3 components explain 85% variance. Choice?", | |
| "options": [ | |
| "Keep 3 components for reduced dataset", | |
| "Keep only first", | |
| "Discard all 3", | |
| "Keep all original features" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Top components capturing majority variance are sufficient for dimensionality reduction." | |
| }, | |
| { | |
| "id": 48, | |
| "questionText": "Scenario: PCA reduces dataset but downstream classifier performs worse. Likely reason?", | |
| "options": [ | |
| "Important variance discarded in low components", | |
| "Features were not standardized", | |
| "Too few principal components retained", | |
| "All of the above" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Poor classifier performance can result from lost variance, insufficient components, or unstandardized features." | |
| }, | |
| { | |
| "id": 49, | |
| "questionText": "Scenario: PCA on image dataset. First component represents lighting changes. Meaning?", | |
| "options": [ | |
| "Reduce dataset size", | |
| "Lighting has no effect", | |
| "Largest variance is due to lighting, not object content", | |
| "PCA failed" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "PCA captures directions of maximal variance; if lighting dominates, first component encodes lighting." | |
| }, | |
| { | |
| "id": 50, | |
| "questionText": "Scenario: You need interpretability for principal components. Technique?", | |
| "options": [ | |
| "Examine feature loadings", | |
| "Discard low variance components", | |
| "Use explained variance only", | |
| "Standardize data" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Loadings show contribution of each original feature to principal components, aiding interpretation." | |
| }, | |
| { | |
| "id": 51, | |
| "questionText": "Scenario: PCA applied on time-series features. Issue?", | |
| "options": [ | |
| "Eigenvalues become negative", | |
| "Labels are affected", | |
| "Variance is increased", | |
| "Temporal structure may be lost" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Standard PCA ignores sequence information; temporal relationships may not be preserved." | |
| }, | |
| { | |
| "id": 52, | |
| "questionText": "Scenario: Dataset contains categorical variables. PCA requirement?", | |
| "options": [ | |
| "Convert to numerical via encoding", | |
| "Remove them", | |
| "No change required", | |
| "Use labels directly" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "PCA requires numeric input; categorical features must be encoded first (e.g., one-hot encoding)." | |
| }, | |
| { | |
| "id": 53, | |
| "questionText": "Scenario: PCA reduces dimensions for clustering. Benefit?", | |
| "options": [ | |
| "Increases data size", | |
| "Reduces noise and speeds computation", | |
| "Removes clusters", | |
| "Generates labels" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "PCA simplifies data, removes redundant features, and accelerates clustering algorithms." | |
| }, | |
| { | |
| "id": 54, | |
| "questionText": "Scenario: After PCA, components are used in regression. Advantage?", | |
| "options": [ | |
| "Remove labels", | |
| "Avoid multicollinearity and improve stability", | |
| "Increases overfitting", | |
| "Increase computation" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Principal components are uncorrelated, reducing multicollinearity in regression." | |
| }, | |
| { | |
| "id": 55, | |
| "questionText": "Scenario: PCA applied to normalized vs standardized features. Impact?", | |
| "options": [ | |
| "No impact", | |
| "Normalized features fail PCA", | |
| "Standardization is crucial for unequal scales", | |
| "Variance is reduced" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Features with different scales must be standardized; normalization alone may not equalize contribution." | |
| }, | |
| { | |
| "id": 56, | |
| "questionText": "Scenario: First principal component explains 40% variance, second 25%. What % variance is left?", | |
| "options": [ | |
| "40%", | |
| "35%", | |
| "25%", | |
| "65%" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Remaining variance = 100% - (40% + 25%) = 35%." | |
| }, | |
| { | |
| "id": 57, | |
| "questionText": "Scenario: PCA used on finance dataset. First component dominated by one stock. Meaning?", | |
| "options": [ | |
| "Data should be reduced", | |
| "This stock has highest variance in data", | |
| "Stock is irrelevant", | |
| "PCA failed" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Principal components capture directions of maximum variance; one high-variance stock can dominate." | |
| }, | |
| { | |
| "id": 58, | |
| "questionText": "Scenario: PCA applied on small dataset. Risk?", | |
| "options": [ | |
| "PCA fails", | |
| "Components become identical", | |
| "Overfitting and noisy components", | |
| "Variance increases" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Small datasets may produce unstable covariance estimates, leading to noisy components." | |
| }, | |
| { | |
| "id": 59, | |
| "questionText": "Scenario: You want to project new data using previously fitted PCA. Method?", | |
| "options": [ | |
| "Cannot project new data", | |
| "Recompute PCA", | |
| "Multiply new data by learned component matrix", | |
| "Use labels only" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "New data can be projected by applying the PCA transformation learned from training data." | |
| }, | |
| { | |
| "id": 60, | |
| "questionText": "Scenario: PCA shows negative loadings. Meaning?", | |
| "options": [ | |
| "Error in computation", | |
| "Variance is negative", | |
| "Feature removed", | |
| "Feature negatively correlates with component" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Negative loadings indicate the original feature moves in opposite direction to the component." | |
| }, | |
| { | |
| "id": 61, | |
| "questionText": "Scenario: PCA applied to very high-dimensional genomic data. Challenge?", | |
| "options": [ | |
| "Variance is too high", | |
| "Cannot compute eigenvectors", | |
| "Labels cannot be used", | |
| "Covariance matrix may be singular or noisy" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "High dimensionality with few samples can make the covariance matrix singular and PCA unstable." | |
| }, | |
| { | |
| "id": 62, | |
| "questionText": "Scenario: PCA on dataset with outliers. Effect?", | |
| "options": [ | |
| "PCA removes outliers", | |
| "Outliers may distort principal components", | |
| "Outliers are ignored automatically", | |
| "Variance increases uniformly" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Outliers can dominate variance, affecting directions of principal components." | |
| }, | |
| { | |
| "id": 63, | |
| "questionText": "Scenario: PCA applied for compression. Target explained variance?", | |
| "options": [ | |
| "Keep all components", | |
| "Keep only first component", | |
| "Choose enough components to capture 90–95% variance", | |
| "Discard top components" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Selecting components that retain most variance ensures compression without losing significant information." | |
| }, | |
| { | |
| "id": 64, | |
| "questionText": "Scenario: PCA reduces dataset but downstream classifier performs worse. Likely reason?", | |
| "options": [ | |
| "PCA cannot be applied to numeric data", | |
| "Too few principal components retained", | |
| "All of the above", | |
| "Features were not standardized", | |
| "Important variance discarded in low components" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Poor classifier performance can result from lost variance, insufficient components, or unstandardized features." | |
| }, | |
| { | |
| "id": 65, | |
| "questionText": "Scenario: PCA used for exploratory analysis. Benefit?", | |
| "options": [ | |
| "Removes labels", | |
| "Increases dimensionality", | |
| "Generates random features", | |
| "Reveals patterns, clusters, and correlations" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "PCA simplifies data and highlights underlying patterns or groupings." | |
| }, | |
| { | |
| "id": 66, | |
| "questionText": "Scenario: PCA reduces dataset from 50 to 10 features. Effect on storage?", | |
| "options": [ | |
| "Significant reduction in storage and computation", | |
| "Increases storage", | |
| "No change", | |
| "Removes labels" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Fewer features reduce memory usage and accelerate computations." | |
| }, | |
| { | |
| "id": 67, | |
| "questionText": "Scenario: PCA used on correlation matrix. Advantage?", | |
| "options": [ | |
| "Reduces variance", | |
| "Generates labels", | |
| "Equalizes feature scales and emphasizes relative relationships", | |
| "Removes low-variance features only" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Correlation matrix ensures features with different units or scales contribute proportionally to PCA." | |
| }, | |
| { | |
| "id": 68, | |
| "questionText": "Scenario: After PCA, some features have nearly zero loadings across components. Meaning?", | |
| "options": [ | |
| "Variance is too high", | |
| "These features contribute little variance and can be discarded", | |
| "They are most important", | |
| "PCA failed" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Features with negligible loadings do not influence principal components significantly." | |
| }, | |
| { | |
| "id": 69, | |
| "questionText": "Scenario: PCA applied to dataset with correlated noise. Effect?", | |
| "options": [ | |
| "All variance captured by noise", | |
| "Noise may form separate low-variance components", | |
| "PCA fails", | |
| "Noise dominates first component" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Correlated noise often appears in later components with low variance." | |
| }, | |
| { | |
| "id": 70, | |
| "questionText": "Scenario: You want to reduce dimensionality without losing much information. PCA strategy?", | |
| "options": [ | |
| "Use all components", | |
| "Keep enough components to capture desired variance (e.g., 90–95%)", | |
| "Keep only first component", | |
| "Discard components randomly" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Selecting enough principal components ensures dimensionality reduction while retaining most data information." | |
| }, | |
| { | |
| "id": 71, | |
| "questionText": "Scenario: Kernel PCA is used instead of standard PCA. Benefit?", | |
| "options": [ | |
| "Reduces dimensionality linearly", | |
| "Captures non-linear relationships in the data", | |
| "Removes outliers automatically", | |
| "Generates labels" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Kernel PCA uses kernel functions to map data into higher-dimensional space to capture non-linear patterns." | |
| }, | |
| { | |
| "id": 72, | |
| "questionText": "Scenario: Sparse PCA is applied on high-dimensional genomic data. Advantage?", | |
| "options": [ | |
| "Improves label prediction automatically", | |
| "Maximizes variance only", | |
| "Generates components with few non-zero loadings for interpretability", | |
| "Removes all correlations" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Sparse PCA introduces sparsity constraints, creating components influenced by fewer original features for easier interpretation." | |
| }, | |
| { | |
| "id": 73, | |
| "questionText": "Scenario: PCA is applied to compress image data. How to measure quality of compression?", | |
| "options": [ | |
| "Variance ratio only", | |
| "Correlation of first component with pixels", | |
| "Reconstruction error (difference between original and reconstructed images)", | |
| "Number of components kept" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Reconstruction error quantifies information loss during dimensionality reduction, evaluating compression quality." | |
| }, | |
| { | |
| "id": 74, | |
| "questionText": "Scenario: PCA applied to multicollinear financial features. Effect?", | |
| "options": [ | |
| "Increases collinearity", | |
| "Reduces multicollinearity by generating uncorrelated components", | |
| "Removes variance", | |
| "Generates labels" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Principal components are orthogonal, addressing multicollinearity issues in regression or predictive models." | |
| }, | |
| { | |
| "id": 75, | |
| "questionText": "Scenario: PCA applied on large sparse document-term matrix. Challenge?", | |
| "options": [ | |
| "PCA cannot be applied", | |
| "High dimensionality and sparsity require optimized algorithms", | |
| "All features dominate equally", | |
| "Variance becomes negative" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Sparse high-dimensional data may need techniques like randomized PCA to efficiently compute components." | |
| }, | |
| { | |
| "id": 76, | |
| "questionText": "Scenario: PCA applied to time-series data. Concern?", | |
| "options": [ | |
| "Labels are changed", | |
| "Temporal correlations may be ignored", | |
| "PCA reduces samples", | |
| "Variance increases" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "PCA does not account for order in sequences; temporal patterns may be lost." | |
| }, | |
| { | |
| "id": 77, | |
| "questionText": "Scenario: PCA reduces 100 features to 5 components. Downstream model performance drops. Likely cause?", | |
| "options": [ | |
| "All of the above", | |
| "Data not standardized", | |
| "Too few samples", | |
| "Important low-variance features were discarded" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Discarding low-variance features may remove predictive information; other preprocessing issues can also affect performance." | |
| }, | |
| { | |
| "id": 78, | |
| "questionText": "Scenario: PCA is used for anomaly detection. Approach?", | |
| "options": [ | |
| "Discard all components", | |
| "Use first component only", | |
| "Model normal data with top components and examine reconstruction error", | |
| "Apply PCA on labels" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Anomalies often lie in directions of low variance; reconstruction error from PCA can identify unusual data points." | |
| }, | |
| { | |
| "id": 79, | |
| "questionText": "Scenario: You apply PCA on a dataset with missing values. Best approach?", | |
| "options": [ | |
| "PCA fills missing values automatically", | |
| "Ignore missing values", | |
| "Impute missing values before PCA", | |
| "Discard rows with missing values" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "PCA requires complete numerical data; missing values must be imputed or handled before applying PCA." | |
| }, | |
| { | |
| "id": 80, | |
| "questionText": "Scenario: PCA applied and first component aligns with single feature. Interpretation?", | |
| "options": [ | |
| "This feature dominates variance in the dataset", | |
| "Component is irrelevant", | |
| "All features are equally important", | |
| "PCA failed" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "When a single feature dominates variance, the first principal component aligns closely with that feature." | |
| }, | |
| { | |
| "id": 81, | |
| "questionText": "Scenario: You perform PCA and observe negative explained variance ratio for a component. Reason?", | |
| "options": [ | |
| "Data was not mean-centered properly", | |
| "Eigenvectors are invalid", | |
| "Variance is negative", | |
| "PCA cannot run on this data" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Improper centering can lead to incorrect covariance matrix, causing negative variance calculations." | |
| }, | |
| { | |
| "id": 82, | |
| "questionText": "Scenario: PCA applied on data with categorical features encoded as one-hot. Concern?", | |
| "options": [ | |
| "Variance decreases automatically", | |
| "Labels are affected", | |
| "Components become identical", | |
| "High dimensionality may lead to sparse components" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "One-hot encoding increases dimensions, producing sparse data; special handling or sparse PCA may be useful." | |
| }, | |
| { | |
| "id": 83, | |
| "questionText": "Scenario: After PCA, you plot a biplot. Purpose?", | |
| "options": [ | |
| "Scale data", | |
| "Generate labels", | |
| "Visualize principal components and feature contributions", | |
| "Remove low-variance components" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Biplots show both projected data points and how original features contribute to components." | |
| }, | |
| { | |
| "id": 84, | |
| "questionText": "Scenario: PCA applied on standardized vs non-standardized data with different scales. Outcome?", | |
| "options": [ | |
| "Standardization ensures fair contribution; non-standardized may bias components", | |
| "Non-standardized data improves variance", | |
| "No difference", | |
| "Variance is reduced in standardized data" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Features with large scales dominate components without standardization, skewing PCA results." | |
| }, | |
| { | |
| "id": 85, | |
| "questionText": "Scenario: You want interpretable PCA components. Which approach?", | |
| "options": [ | |
| "Keep all components", | |
| "Sparse PCA or rotation methods like varimax", | |
| "Use first component only", | |
| "Discard low-variance features" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Sparse PCA and rotation techniques improve interpretability by reducing the number of features contributing to each component." | |
| }, | |
| { | |
| "id": 86, | |
| "questionText": "Scenario: PCA reduces dimensions for clustering. Benefit?", | |
| "options": [ | |
| "Removes noise, reduces computation, highlights clusters", | |
| "Increases dimensionality", | |
| "Generates labels", | |
| "Removes clusters" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Reduced, de-noised features simplify clustering and often improve performance." | |
| }, | |
| { | |
| "id": 87, | |
| "questionText": "Scenario: PCA eigenvectors are not unique. Reason?", | |
| "options": [ | |
| "PCA failed", | |
| "Covariance matrix invalid", | |
| "Eigenvectors are unique up to sign; direction can flip", | |
| "Variance negative" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Eigenvectors can be multiplied by -1 without changing the subspace, so they are not unique in sign." | |
| }, | |
| { | |
| "id": 88, | |
| "questionText": "Scenario: PCA applied to data where noise dominates variance. Risk?", | |
| "options": [ | |
| "Components may represent noise rather than signal", | |
| "Variance reduces", | |
| "All information preserved", | |
| "Components become identical" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "High-variance noise can dominate principal components, reducing meaningful representation of data." | |
| }, | |
| { | |
| "id": 89, | |
| "questionText": "Scenario: PCA applied on dataset with many features having zero variance. Effect?", | |
| "options": [ | |
| "These features are ignored in covariance computation", | |
| "PCA fails", | |
| "Variance increases", | |
| "Components become identical" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Zero-variance features do not contribute to covariance and do not affect PCA results." | |
| }, | |
| { | |
| "id": 90, | |
| "questionText": "Scenario: PCA applied to compress hyperspectral image. Key consideration?", | |
| "options": [ | |
| "Retain components capturing most spectral variance for accurate reconstruction", | |
| "Discard high-variance components", | |
| "Keep only first component", | |
| "Generate labels automatically" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Hyperspectral data has many correlated channels; top components capture essential information while reducing data size." | |
| }, | |
| { | |
| "id": 91, | |
| "questionText": "Scenario: PCA applied and first component is dominated by outlier. Solution?", | |
| "options": [ | |
| "Keep data as-is", | |
| "Remove or mitigate outliers before PCA", | |
| "Scale only first feature", | |
| "Discard PCA entirely" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Outliers can skew variance and principal directions; handling them ensures meaningful PCA components." | |
| }, | |
| { | |
| "id": 92, | |
| "questionText": "Scenario: You need to project streaming data onto PCA components. Requirement?", | |
| "options": [ | |
| "Project only first sample", | |
| "Cannot apply PCA", | |
| "Recompute PCA each time", | |
| "Use incremental PCA or precomputed components" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Incremental PCA allows efficient projection of new data without recomputing from scratch." | |
| }, | |
| { | |
| "id": 93, | |
| "questionText": "Scenario: PCA reduces dimensionality but variance explained is too low. Solution?", | |
| "options": [ | |
| "Remove first component", | |
| "Normalize data again", | |
| "Keep more components", | |
| "Discard components" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Increasing number of components ensures more variance is retained for downstream tasks." | |
| }, | |
| { | |
| "id": 94, | |
| "questionText": "Scenario: PCA applied on correlated features with different scales. Effect if not standardized?", | |
| "options": [ | |
| "Variance is evenly distributed", | |
| "Components are orthogonal", | |
| "PCA fails", | |
| "Large-scale features dominate components" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Without standardization, features with larger numeric ranges contribute more variance, skewing PCA results." | |
| }, | |
| { | |
| "id": 95, | |
| "questionText": "Scenario: PCA used for dimensionality reduction prior to deep learning. Benefit?", | |
| "options": [ | |
| "Generates labels", | |
| "Increases overfitting", | |
| "Reduces input size and noise, improving training efficiency", | |
| "Removes all variance" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "PCA simplifies input features, removing redundant information and reducing computational load." | |
| }, | |
| { | |
| "id": 96, | |
| "questionText": "Scenario: PCA applied but some components are highly correlated. Why unusual?", | |
| "options": [ | |
| "Principal components should be orthogonal; correlation indicates an issue", | |
| "Variance is low", | |
| "Labels are influencing components", | |
| "Expected in standard PCA" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "PCA produces orthogonal components; correlated components suggest computation or preprocessing errors." | |
| }, | |
| { | |
| "id": 97, | |
| "questionText": "Scenario: PCA applied on multi-class dataset for visualization. Approach?", | |
| "options": [ | |
| "Discard labels", | |
| "Keep all features", | |
| "Project onto top 2 or 3 components and color points by class", | |
| "Use only first component" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Low-dimensional projection allows visualization of class separation while preserving maximal variance." | |
| }, | |
| { | |
| "id": 98, | |
| "questionText": "Scenario: PCA applied with top components explaining 80% variance. Downstream task requires 95%. Action?", | |
| "options": [ | |
| "Normalize data again", | |
| "Use only top components", | |
| "Include additional components until 95% variance is captured", | |
| "Discard low components" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Selecting enough components ensures sufficient information is retained for downstream analysis." | |
| }, | |
| { | |
| "id": 99, | |
| "questionText": "Scenario: PCA applied to text embeddings. Challenge?", | |
| "options": [ | |
| "High dimensionality and sparsity require careful computation", | |
| "Variance is negative", | |
| "PCA fails automatically", | |
| "Components lose meaning entirely" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Text embeddings are often high-dimensional; PCA helps reduce size but may require optimized algorithms." | |
| }, | |
| { | |
| "id": 100, | |
| "questionText": "Scenario: PCA used for feature selection. How to choose components?", | |
| "options": [ | |
| "Choose random components", | |
| "Discard high-variance components", | |
| "Use only first component", | |
| "Select components explaining desired variance threshold (e.g., 90–95%)" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Selecting top components ensures maximal retained information while reducing dimensionality." | |
| } | |
| ] | |
| } | |