| { | |
| "title": "Support Vector Machines (SVM) Mastery: 100 MCQs", | |
| "description": "A complete 100-question collection designed to teach and test your understanding of Support Vector Machines — from basic margin intuition to advanced kernel tricks, soft margin optimization, hyperparameter tuning, and real-world scenario applications.", | |
| "questions": [ | |
| { | |
| "id": 1, | |
| "questionText": "What does an SVM aim to find in the feature space?", | |
| "options": [ | |
| "A random boundary", | |
| "A centroid of all data points", | |
| "A hyperplane that maximizes margin", | |
| "A cluster center" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "SVM aims to find the optimal separating hyperplane that maximizes the margin between classes." | |
| }, | |
| { | |
| "id": 2, | |
| "questionText": "Scenario: SVM is trained on perfectly separable data. Which margin type is used?", | |
| "options": [ | |
| "No margin", | |
| "Random margin", | |
| "Soft margin", | |
| "Hard margin" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Hard margin SVM is used when data is perfectly linearly separable." | |
| }, | |
| { | |
| "id": 3, | |
| "questionText": "Scenario: Data contains overlapping classes. Which SVM variation should be used?", | |
| "options": [ | |
| "Decision trees", | |
| "Hard margin", | |
| "Soft margin", | |
| "Polynomial kernel only" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Soft margin SVM allows some misclassification to handle overlapping data." | |
| }, | |
| { | |
| "id": 4, | |
| "questionText": "What is the primary role of support vectors?", | |
| "options": [ | |
| "Maximize dataset size", | |
| "Define the decision boundary", | |
| "Increase margin penalty", | |
| "Reduce dimensions" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Support vectors are the critical points that define the position and orientation of the separating hyperplane." | |
| }, | |
| { | |
| "id": 5, | |
| "questionText": "Scenario: Linear SVM trained on non-linear data. What is likely?", | |
| "options": [ | |
| "Perfect accuracy", | |
| "Underfitting occurs", | |
| "Zero training loss", | |
| "Overfitting occurs" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Linear SVMs cannot model non-linear relationships, leading to underfitting." | |
| }, | |
| { | |
| "id": 6, | |
| "questionText": "Which kernel function maps data to infinite-dimensional space?", | |
| "options": [ | |
| "Linear", | |
| "RBF (Gaussian)", | |
| "Polynomial", | |
| "Sigmoid" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "The RBF kernel maps data into an infinite-dimensional feature space, enabling complex boundaries." | |
| }, | |
| { | |
| "id": 7, | |
| "questionText": "Scenario: SVM with RBF kernel and γ is too large. Effect?", | |
| "options": [ | |
| "Acts like linear", | |
| "Overfits training data", | |
| "Fails to converge", | |
| "Underfits" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Large γ makes the model focus too much on each point, overfitting the training set." | |
| }, | |
| { | |
| "id": 8, | |
| "questionText": "Scenario: SVM trained with small C. What happens?", | |
| "options": [ | |
| "Overfits training data", | |
| "Allows more misclassifications", | |
| "Creates zero margin", | |
| "Fails to train" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "A smaller C allows wider margins and tolerates more errors for better generalization." | |
| }, | |
| { | |
| "id": 9, | |
| "questionText": "Scenario: Large C used with noisy data. Effect?", | |
| "options": [ | |
| "Reduces kernel complexity", | |
| "Ignores outliers", | |
| "Overfits noise", | |
| "Underfits" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "A large C emphasizes classification accuracy, possibly overfitting noisy samples." | |
| }, | |
| { | |
| "id": 10, | |
| "questionText": "Why is feature scaling critical for SVM?", | |
| "options": [ | |
| "To remove duplicates", | |
| "To normalize labels", | |
| "Because SVM depends on distance calculations", | |
| "To convert categorical data" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "SVM uses dot products and distance metrics; scaling prevents feature dominance." | |
| }, | |
| { | |
| "id": 11, | |
| "questionText": "Scenario: Two features have vastly different ranges. What happens if not scaled?", | |
| "options": [ | |
| "No impact", | |
| "Better accuracy", | |
| "Model bias towards larger scale feature", | |
| "Faster convergence" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Unscaled features distort margin calculations, biasing the model." | |
| }, | |
| { | |
| "id": 12, | |
| "questionText": "What is the role of the kernel trick?", | |
| "options": [ | |
| "Reduces features", | |
| "Improves feature scaling", | |
| "Maps data to higher dimensions without explicit transformation", | |
| "Normalizes data" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Kernel trick lets SVM handle non-linear data efficiently without explicit transformation." | |
| }, | |
| { | |
| "id": 13, | |
| "questionText": "Scenario: SVM applied to high-dimensional text data. Best kernel?", | |
| "options": [ | |
| "Sigmoid kernel", | |
| "Linear kernel", | |
| "RBF kernel", | |
| "Polynomial kernel" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Linear SVMs perform well for high-dimensional sparse data such as text." | |
| }, | |
| { | |
| "id": 14, | |
| "questionText": "Scenario: Non-linear boundaries observed. Which kernel is best?", | |
| "options": [ | |
| "No kernel", | |
| "RBF kernel", | |
| "Sigmoid kernel only", | |
| "Linear kernel" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "The RBF kernel can model highly non-linear decision boundaries." | |
| }, | |
| { | |
| "id": 15, | |
| "questionText": "What does γ control in an RBF kernel?", | |
| "options": [ | |
| "Regularization strength", | |
| "The influence of a single training example", | |
| "Learning rate", | |
| "Loss function type" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "γ defines how far the influence of a training sample reaches; higher γ = closer reach." | |
| }, | |
| { | |
| "id": 16, | |
| "questionText": "Scenario: γ too small in RBF kernel. Effect?", | |
| "options": [ | |
| "Zero accuracy", | |
| "Underfits; boundary too smooth", | |
| "Fails to converge", | |
| "Overfits" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Small γ makes the model too smooth, underfitting complex patterns." | |
| }, | |
| { | |
| "id": 17, | |
| "questionText": "What happens if C=∞ in soft-margin SVM?", | |
| "options": [ | |
| "Ignores support vectors", | |
| "Always fails", | |
| "Creates random margins", | |
| "Behaves like hard-margin SVM" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "When C is very large, SVM tries to classify all points correctly like a hard-margin model." | |
| }, | |
| { | |
| "id": 18, | |
| "questionText": "Scenario: SVM used for regression (SVR). What is optimized?", | |
| "options": [ | |
| "Epsilon-insensitive loss", | |
| "Huber loss", | |
| "Cross-entropy", | |
| "Hinge loss" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Support Vector Regression uses epsilon-insensitive loss for fitting continuous data." | |
| }, | |
| { | |
| "id": 19, | |
| "questionText": "What happens if all points lie outside the margin in SVM?", | |
| "options": [ | |
| "Margin expands", | |
| "Kernel fails", | |
| "Model complexity increases", | |
| "C ignored" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "If most points lie outside, the penalty term increases model complexity." | |
| }, | |
| { | |
| "id": 20, | |
| "questionText": "Scenario: SVM trained with too many features but few samples. Risk?", | |
| "options": [ | |
| "Overfitting", | |
| "Perfect generalization", | |
| "Fast convergence", | |
| "Underfitting" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "High feature-to-sample ratio leads to overfitting." | |
| }, | |
| { | |
| "id": 21, | |
| "questionText": "What does the bias term in SVM represent?", | |
| "options": [ | |
| "C penalty", | |
| "The offset of the hyperplane", | |
| "Learning rate", | |
| "The variance" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Bias determines how far the decision boundary is from the origin." | |
| }, | |
| { | |
| "id": 22, | |
| "questionText": "Scenario: RBF kernel with optimal γ and large C. Expected result?", | |
| "options": [ | |
| "Linear decision boundary", | |
| "Overfit training set", | |
| "Underfit", | |
| "Ignore support vectors" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Large C and high γ both risk overfitting due to complex boundaries." | |
| }, | |
| { | |
| "id": 23, | |
| "questionText": "Why does SVM not work well with large datasets?", | |
| "options": [ | |
| "Cannot handle linear data", | |
| "Training time increases quadratically", | |
| "Too few features", | |
| "Memory always freed" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "SVM training complexity scales poorly with data size." | |
| }, | |
| { | |
| "id": 24, | |
| "questionText": "Scenario: SVM applied with polynomial kernel degree=10. What happens?", | |
| "options": [ | |
| "Overfits data", | |
| "Linear boundary", | |
| "Underfits", | |
| "No effect" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "High-degree polynomial kernels can easily overfit." | |
| }, | |
| { | |
| "id": 25, | |
| "questionText": "What is hinge loss used for?", | |
| "options": [ | |
| "Hyperparameter tuning", | |
| "Feature selection", | |
| "Margin-based classification", | |
| "Regression" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Hinge loss is used in SVM to measure margin violations." | |
| }, | |
| { | |
| "id": 26, | |
| "questionText": "Scenario: Noisy dataset with overlapping features. Best SVM approach?", | |
| "options": [ | |
| "Linear only", | |
| "Soft margin with small C", | |
| "High γ", | |
| "Hard margin" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Soft margin and smaller C improve tolerance to noise." | |
| }, | |
| { | |
| "id": 27, | |
| "questionText": "Scenario: Model overfits using RBF kernel. Possible fix?", | |
| "options": [ | |
| "Remove regularization", | |
| "Reduce γ", | |
| "Increase γ", | |
| "Increase C" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Reducing γ smooths decision boundaries to avoid overfitting." | |
| }, | |
| { | |
| "id": 28, | |
| "questionText": "Scenario: Data not linearly separable but low-dimensional. Efficient kernel?", | |
| "options": [ | |
| "Polynomial kernel (degree 2)", | |
| "Linear", | |
| "RBF kernel", | |
| "Sigmoid" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Low-degree polynomial kernels can model slight non-linearities efficiently." | |
| }, | |
| { | |
| "id": 29, | |
| "questionText": "Scenario: You use an RBF kernel on data with high dimensionality and little noise. What might happen?", | |
| "options": [ | |
| "Good fit if parameters tuned", | |
| "Ignores all kernels", | |
| "Underfits automatically", | |
| "Always overfits" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "High-dimensional data can work well with RBF kernels if hyperparameters are well-tuned." | |
| }, | |
| { | |
| "id": 30, | |
| "questionText": "Scenario: Polynomial kernel used with degree=1. What kernel does this mimic?", | |
| "options": [ | |
| "RBF kernel", | |
| "No kernel", | |
| "Sigmoid kernel", | |
| "Linear kernel" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "A polynomial kernel with degree 1 is equivalent to a linear kernel." | |
| }, | |
| { | |
| "id": 31, | |
| "questionText": "Scenario: γ in RBF kernel set to 0.001. What happens?", | |
| "options": [ | |
| "Acts as linear kernel", | |
| "Fails to converge", | |
| "Model overfits", | |
| "Model underfits; boundary too smooth" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Very small γ makes the RBF behave almost linearly, leading to underfitting." | |
| }, | |
| { | |
| "id": 32, | |
| "questionText": "Scenario: Multiclass classification with SVM. Which strategy is used?", | |
| "options": [ | |
| "One-vs-Rest or One-vs-One", | |
| "Naive Bayes", | |
| "K-Means", | |
| "Softmax" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "SVM handles multiclass via one-vs-rest or one-vs-one strategies." | |
| }, | |
| { | |
| "id": 33, | |
| "questionText": "Scenario: SVM trained on imbalanced data. What may occur?", | |
| "options": [ | |
| "Perfect accuracy", | |
| "Bias toward majority class", | |
| "Bias toward minority", | |
| "Uniform decision boundary" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "SVM may favor the majority class unless class weights are balanced." | |
| }, | |
| { | |
| "id": 34, | |
| "questionText": "How does SVM handle non-linear separations?", | |
| "options": [ | |
| "By increasing epochs", | |
| "By removing bias", | |
| "By adding dropout", | |
| "By using kernel functions" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Kernels allow SVMs to map data into higher-dimensional spaces to handle non-linearity." | |
| }, | |
| { | |
| "id": 35, | |
| "questionText": "Scenario: Large C and large γ chosen for RBF kernel. Expected behavior?", | |
| "options": [ | |
| "Overfitting", | |
| "Stable model", | |
| "Underfitting", | |
| "Fails to train" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Large C and γ can both cause complex decision boundaries, leading to overfitting." | |
| }, | |
| { | |
| "id": 36, | |
| "questionText": "Scenario: You increase C from 1 to 1000. What happens?", | |
| "options": [ | |
| "Margin widens", | |
| "Kernel ignored", | |
| "Margin becomes narrower", | |
| "Model underfits" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "A larger C penalizes misclassifications more, resulting in a narrower margin." | |
| }, | |
| { | |
| "id": 37, | |
| "questionText": "Which optimization technique does SVM use to find the best hyperplane?", | |
| "options": [ | |
| "Simulated annealing", | |
| "Gradient descent", | |
| "Quadratic programming", | |
| "Stochastic optimization" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "SVMs use quadratic programming to solve the optimization problem." | |
| }, | |
| { | |
| "id": 38, | |
| "questionText": "Scenario: High γ, low C combination. Expected result?", | |
| "options": [ | |
| "Complex boundary but tolerates errors", | |
| "Training failure", | |
| "Linear separation", | |
| "Perfect fit" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "High γ adds complexity, but low C softens penalties, balancing flexibility." | |
| }, | |
| { | |
| "id": 39, | |
| "questionText": "Scenario: SVM fails to converge. Possible reason?", | |
| "options": [ | |
| "Improper scaling or large C/γ", | |
| "Too few features", | |
| "Low variance", | |
| "Kernel not imported" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Unscaled data or extreme parameter values can cause convergence issues." | |
| }, | |
| { | |
| "id": 40, | |
| "questionText": "Why does SVM perform well in high-dimensional spaces?", | |
| "options": [ | |
| "It uses PCA internally", | |
| "It ignores most features", | |
| "It compresses data", | |
| "It depends on support vectors, not dimensionality" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "SVM focuses on boundary points (support vectors), not the entire space." | |
| }, | |
| { | |
| "id": 41, | |
| "questionText": "Scenario: Features highly correlated. Impact on SVM?", | |
| "options": [ | |
| "Minimal impact; still works well", | |
| "Fails to classify", | |
| "Reduces C", | |
| "Doubles training time" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "SVMs can still work well but may benefit from decorrelation or PCA." | |
| }, | |
| { | |
| "id": 42, | |
| "questionText": "Scenario: RBF kernel underfits training data. Possible fix?", | |
| "options": [ | |
| "Use linear kernel", | |
| "Remove kernel", | |
| "Decrease γ", | |
| "Increase γ or C" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Higher γ or C increases flexibility and reduces underfitting." | |
| }, | |
| { | |
| "id": 43, | |
| "questionText": "Scenario: Linear kernel chosen for non-linear data. Expected result?", | |
| "options": [ | |
| "Balanced model", | |
| "Underfitting", | |
| "Perfect fit", | |
| "Overfitting" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Linear kernels cannot capture complex patterns, leading to underfitting." | |
| }, | |
| { | |
| "id": 44, | |
| "questionText": "What happens if all data points are support vectors?", | |
| "options": [ | |
| "Underfitting", | |
| "Overfitting", | |
| "No change", | |
| "Perfect generalization" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "If every point influences the boundary, the model likely overfits." | |
| }, | |
| { | |
| "id": 45, | |
| "questionText": "Scenario: You observe slow SVM training on large dataset. What can help?", | |
| "options": [ | |
| "Add regularization", | |
| "Increase γ", | |
| "Use linear SVM (LinearSVC)", | |
| "Use higher-degree kernel" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "LinearSVC is optimized for large-scale linear problems." | |
| }, | |
| { | |
| "id": 46, | |
| "questionText": "Scenario: Dataset has noise and outliers. Which SVM parameter to tune?", | |
| "options": [ | |
| "C (regularization)", | |
| "Degree", | |
| "γ", | |
| "Bias" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Smaller C helps tolerate misclassifications and handle noisy data." | |
| }, | |
| { | |
| "id": 47, | |
| "questionText": "Scenario: SVM used on normalized image features. Kernel to start with?", | |
| "options": [ | |
| "Polynomial (deg=3)", | |
| "RBF", | |
| "Linear", | |
| "Sigmoid" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "RBF kernel often performs well on normalized, moderate-dimensional data." | |
| }, | |
| { | |
| "id": 48, | |
| "questionText": "Scenario: γ=0.0001 and C=1000. Likely effect?", | |
| "options": [ | |
| "Overfitting", | |
| "No convergence", | |
| "Underfitting", | |
| "Optimal fit" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Extremely low γ makes decision boundary too simple, underfitting occurs." | |
| }, | |
| { | |
| "id": 49, | |
| "questionText": "Scenario: Multiclass SVM classification accuracy drops. Fix?", | |
| "options": [ | |
| "Add dropout", | |
| "Use balanced class weights", | |
| "Reduce features", | |
| "Switch to regression" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Balancing class weights helps when class imbalance causes bias." | |
| }, | |
| { | |
| "id": 50, | |
| "questionText": "Scenario: You test SVM on polynomial kernel degree=6. Observation?", | |
| "options": [ | |
| "Underfits large datasets", | |
| "Ignores bias", | |
| "Linearizes output", | |
| "Overfits small datasets" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "High-degree polynomial kernels often overfit, especially on limited data." | |
| }, | |
| { | |
| "id": 51, | |
| "questionText": "Scenario: SVM trained with sigmoid kernel. What does it resemble?", | |
| "options": [ | |
| "RBF mapping", | |
| "Decision tree splitting", | |
| "Linear regression", | |
| "Neural network activation function" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Sigmoid kernel mimics a neural network activation behavior." | |
| }, | |
| { | |
| "id": 52, | |
| "questionText": "What is the dual formulation used for in SVM?", | |
| "options": [ | |
| "To reduce memory usage", | |
| "To handle high-dimensional kernels", | |
| "To remove bias term", | |
| "To normalize outputs" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Dual formulation helps apply kernel trick efficiently in high-dimensional space." | |
| }, | |
| { | |
| "id": 53, | |
| "questionText": "Scenario: SVM used for spam detection (text data). Best kernel?", | |
| "options": [ | |
| "Polynomial", | |
| "RBF", | |
| "Sigmoid", | |
| "Linear" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Linear kernels are efficient and effective for sparse text data." | |
| }, | |
| { | |
| "id": 54, | |
| "questionText": "Scenario: Overfitting in SVM model. Which parameter should be reduced?", | |
| "options": [ | |
| "Loss function", | |
| "C or γ", | |
| "Degree", | |
| "Bias" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Reducing C or γ simplifies the model and improves generalization." | |
| }, | |
| { | |
| "id": 55, | |
| "questionText": "Scenario: Dataset has millions of samples. SVM alternative?", | |
| "options": [ | |
| "Sigmoid SVM", | |
| "Stochastic gradient linear classifier", | |
| "Polynomial SVM", | |
| "Decision tree" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Large datasets often use SGD-based linear classifiers for scalability." | |
| }, | |
| { | |
| "id": 56, | |
| "questionText": "Scenario: You use a kernel not positive semi-definite. What may occur?", | |
| "options": [ | |
| "Better accuracy", | |
| "Optimization fails", | |
| "Underfitting", | |
| "Overfitting" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Non-PSD kernels can violate convex optimization requirements." | |
| }, | |
| { | |
| "id": 57, | |
| "questionText": "Scenario: SVM applied for anomaly detection. Variant used?", | |
| "options": [ | |
| "One-Class SVM", | |
| "K-Means", | |
| "SVR", | |
| "Binary SVM" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "One-Class SVM is designed for novelty or anomaly detection tasks." | |
| }, | |
| { | |
| "id": 58, | |
| "questionText": "Scenario: Data contains many irrelevant features. Approach?", | |
| "options": [ | |
| "Lower γ", | |
| "Feature selection before SVM", | |
| "Add more kernels", | |
| "Increase C" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Feature selection reduces noise and improves SVM performance." | |
| }, | |
| { | |
| "id": 59, | |
| "questionText": "Scenario: SVM decision boundary oscillates too much. Cause?", | |
| "options": [ | |
| "Linear kernel", | |
| "Large γ", | |
| "Small C", | |
| "Small γ" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Large γ makes decision boundaries sensitive to individual samples." | |
| }, | |
| { | |
| "id": 60, | |
| "questionText": "Scenario: You tune γ=0.1, C=10 via grid search. Effect?", | |
| "options": [ | |
| "Guaranteed overfit", | |
| "Always underfit", | |
| "Improved generalization", | |
| "No change" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Grid search helps find the optimal trade-off between bias and variance." | |
| }, | |
| { | |
| "id": 61, | |
| "questionText": "Scenario: SVM used with PCA-transformed features. Benefit?", | |
| "options": [ | |
| "Faster convergence and less overfitting", | |
| "No benefit", | |
| "Worse accuracy", | |
| "Kernel ignored" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "PCA reduces redundancy, improving SVM performance and speed." | |
| }, | |
| { | |
| "id": 62, | |
| "questionText": "Scenario: RBF kernel accuracy drops on test data. Likely reason?", | |
| "options": [ | |
| "Kernel removed", | |
| "Overfitting due to high γ", | |
| "Underfitting", | |
| "Noisy training data" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Too high γ causes overfitting, reducing test performance." | |
| }, | |
| { | |
| "id": 63, | |
| "questionText": "Scenario: SVM predicts continuous target. Variant?", | |
| "options": [ | |
| "Kernel Ridge", | |
| "SVR (Support Vector Regression)", | |
| "Soft-margin SVM", | |
| "Linear SVM" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "SVR adapts the SVM principle for regression tasks." | |
| }, | |
| { | |
| "id": 64, | |
| "questionText": "Scenario: You combine linear and RBF kernels. Effect?", | |
| "options": [ | |
| "Error increases", | |
| "Kernel ignored", | |
| "Hybrid decision surface", | |
| "No benefit" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Hybrid kernels can model both linear and non-linear relationships." | |
| }, | |
| { | |
| "id": 65, | |
| "questionText": "Scenario: SVM model gives different results on same data. Cause?", | |
| "options": [ | |
| "Kernel mismatch", | |
| "Scaling issue", | |
| "Different γ", | |
| "Non-deterministic solver or random state" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Different random seeds or solvers can yield slightly varying solutions." | |
| }, | |
| { | |
| "id": 66, | |
| "questionText": "Scenario: Class imbalance severe (90:10). Recommended?", | |
| "options": [ | |
| "Use sigmoid kernel", | |
| "Reduce features", | |
| "Increase C", | |
| "Use class_weight='balanced'" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Setting class_weight='balanced' compensates for imbalance." | |
| }, | |
| { | |
| "id": 67, | |
| "questionText": "Scenario: SVM on dataset with 10M samples. Efficient library?", | |
| "options": [ | |
| "Polynomial kernel", | |
| "Naive Bayes", | |
| "RBF kernel SVC", | |
| "LinearSVC or SGDClassifier" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "LinearSVC or SGDClassifier scale better for large data." | |
| }, | |
| { | |
| "id": 68, | |
| "questionText": "Scenario: High variance SVM results. Remedy?", | |
| "options": [ | |
| "Increase γ", | |
| "Use hard margin", | |
| "Use cross-validation and parameter tuning", | |
| "Add noise" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Cross-validation stabilizes and selects optimal hyperparameters." | |
| }, | |
| { | |
| "id": 69, | |
| "questionText": "Scenario: Feature scaling forgotten before training. Effect?", | |
| "options": [ | |
| "Incorrect margin calculation", | |
| "Higher recall", | |
| "Faster training", | |
| "Better accuracy" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Unscaled data distorts distance-based calculations in SVM." | |
| }, | |
| { | |
| "id": 70, | |
| "questionText": "Scenario: SVM trained with kernel='poly', degree=5. What to expect?", | |
| "options": [ | |
| "No margin", | |
| "Underfitting", | |
| "Overfitting", | |
| "Perfect generalization" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "High-degree polynomial kernels tend to overfit." | |
| }, | |
| { | |
| "id": 71, | |
| "questionText": "Scenario: You visualize decision boundary very smooth. Cause?", | |
| "options": [ | |
| "High degree", | |
| "Small C", | |
| "Small γ", | |
| "Large γ" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Small γ creates smoother, less complex boundaries." | |
| }, | |
| { | |
| "id": 72, | |
| "questionText": "Scenario: You want probabilistic outputs from SVM. How?", | |
| "options": [ | |
| "Enable probability=True", | |
| "Use RBF kernel", | |
| "Disable scaling", | |
| "Increase C" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Enabling probability=True uses Platt scaling to estimate probabilities." | |
| }, | |
| { | |
| "id": 73, | |
| "questionText": "Scenario: Training time too high with kernel SVM. Remedy?", | |
| "options": [ | |
| "Use linear approximation", | |
| "Add noise", | |
| "Increase C", | |
| "Increase γ" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Approximation methods like LinearSVC or kernel approximation speed up training." | |
| }, | |
| { | |
| "id": 74, | |
| "questionText": "Scenario: SVM applied on binary imbalanced medical dataset. Recommendation?", | |
| "options": [ | |
| "Drop small class", | |
| "Hard margin", | |
| "Random validation", | |
| "Stratified cross-validation" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Stratified cross-validation preserves class ratio during validation." | |
| }, | |
| { | |
| "id": 75, | |
| "questionText": "Scenario: Model accuracy high on train, low on test. Issue?", | |
| "options": [ | |
| "Low variance", | |
| "Overfitting", | |
| "Scaling error", | |
| "Underfitting" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Overfitting occurs when SVM learns noise and fails to generalize." | |
| }, | |
| { | |
| "id": 76, | |
| "questionText": "Scenario: Feature correlation high, training unstable. Fix?", | |
| "options": [ | |
| "Apply PCA before SVM", | |
| "Use high γ", | |
| "Reduce support vectors", | |
| "Increase C" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "PCA helps reduce correlation and stabilize training." | |
| }, | |
| { | |
| "id": 77, | |
| "questionText": "Scenario: You want to visualize margin width. Which SVM attribute?", | |
| "options": [ | |
| "kernel_", | |
| "n_iter_", | |
| "coef_ and intercept_", | |
| "support_" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Margin width can be computed using coef_ and intercept_ in linear SVM." | |
| }, | |
| { | |
| "id": 78, | |
| "questionText": "Scenario: You use polynomial kernel degree=3. What’s effect?", | |
| "options": [ | |
| "Fail to train", | |
| "Linear separation", | |
| "Non-linear decision surface", | |
| "Underfitting" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Polynomial kernel allows curved decision boundaries." | |
| }, | |
| { | |
| "id": 79, | |
| "questionText": "Scenario: SVM used for text sentiment analysis. Kernel?", | |
| "options": [ | |
| "Sigmoid", | |
| "Linear", | |
| "RBF", | |
| "Polynomial" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Linear kernel works best for high-dimensional sparse text data." | |
| }, | |
| { | |
| "id": 80, | |
| "questionText": "Scenario: Decision boundary too sensitive to single points. Cause?", | |
| "options": [ | |
| "Balanced class weights", | |
| "Small γ", | |
| "Large γ", | |
| "Small C" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Large γ focuses too much on nearby data, making boundary sensitive." | |
| }, | |
| { | |
| "id": 81, | |
| "questionText": "Scenario: You combine SVM with bagging ensemble. Benefit?", | |
| "options": [ | |
| "No change", | |
| "Overfitting", | |
| "Higher bias", | |
| "Reduced variance" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Ensembling multiple SVMs reduces variance and improves generalization." | |
| }, | |
| { | |
| "id": 82, | |
| "questionText": "Scenario: You reduce C drastically. Effect?", | |
| "options": [ | |
| "Kernel ignored", | |
| "Perfect accuracy", | |
| "Wider margin, higher bias", | |
| "Narrow margin" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Smaller C allows more misclassifications, leading to wider margins." | |
| }, | |
| { | |
| "id": 83, | |
| "questionText": "Scenario: Dataset scaled incorrectly. Decision boundary looks tilted. Reason?", | |
| "options": [ | |
| "Feature scaling inconsistency", | |
| "Large γ", | |
| "Kernel mismatch", | |
| "Small C" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Inconsistent scaling distorts feature space, altering boundary shape." | |
| }, | |
| { | |
| "id": 84, | |
| "questionText": "Scenario: You use SVM with polynomial kernel on 3D data. Result?", | |
| "options": [ | |
| "Underfit", | |
| "Linear separation", | |
| "Over-generalization", | |
| "Non-linear surface fit" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Polynomial kernels enable non-linear separation even in 3D." | |
| }, | |
| { | |
| "id": 85, | |
| "questionText": "Scenario: You train SVM on very small dataset. Danger?", | |
| "options": [ | |
| "High accuracy guaranteed", | |
| "Underfitting", | |
| "Overfitting due to few points", | |
| "Fast convergence" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Small datasets can make SVM overfit due to few support vectors." | |
| }, | |
| { | |
| "id": 86, | |
| "questionText": "Scenario: SVM kernel parameter γ=1e5. What happens?", | |
| "options": [ | |
| "Underfitting", | |
| "Extreme overfitting", | |
| "Stable training", | |
| "No change" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Very high γ makes the model memorize data, leading to overfitting." | |
| }, | |
| { | |
| "id": 87, | |
| "questionText": "Scenario: SVM hyperplane perfectly separates training points. Danger?", | |
| "options": [ | |
| "No bias", | |
| "Perfect generalization", | |
| "Underfitting", | |
| "Overfitting likely" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Perfect separation may indicate overfitting unless data is clean." | |
| }, | |
| { | |
| "id": 88, | |
| "questionText": "Scenario: You enable shrinking=True in SVC. Effect?", | |
| "options": [ | |
| "Faster optimization using heuristics", | |
| "Slower training", | |
| "Lower accuracy", | |
| "No difference" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Shrinking heuristic speeds up convergence during optimization." | |
| }, | |
| { | |
| "id": 89, | |
| "questionText": "Scenario: High dimensional dataset (10000 features). Kernel?", | |
| "options": [ | |
| "RBF", | |
| "Linear", | |
| "Polynomial", | |
| "Sigmoid" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Linear kernel is efficient in very high-dimensional spaces." | |
| }, | |
| { | |
| "id": 90, | |
| "questionText": "Scenario: Decision boundary too smooth, misclassifying nonlinear data. Fix?", | |
| "options": [ | |
| "Increase γ or use RBF kernel", | |
| "Reduce C", | |
| "Change solver", | |
| "Add dropout" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Increasing γ adds flexibility to handle complex patterns." | |
| }, | |
| { | |
| "id": 91, | |
| "questionText": "Scenario: You visualize few support vectors only. Meaning?", | |
| "options": [ | |
| "Training failed", | |
| "Underfits", | |
| "Model generalizes well", | |
| "Overfits" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Fewer support vectors indicate a strong, well-generalized boundary." | |
| }, | |
| { | |
| "id": 92, | |
| "questionText": "Scenario: SVM with RBF kernel used for face recognition. Why suitable?", | |
| "options": [ | |
| "Less computation", | |
| "Captures complex non-linear relationships", | |
| "Linear mapping only", | |
| "Ignores features" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "RBF kernels are effective for non-linear facial feature mapping." | |
| }, | |
| { | |
| "id": 93, | |
| "questionText": "Scenario: Hyperparameter tuning done via grid search. Risk?", | |
| "options": [ | |
| "Underfitting", | |
| "Overfitting to validation set", | |
| "Bias error", | |
| "Kernel mismatch" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Excessive grid search tuning can overfit to validation data." | |
| }, | |
| { | |
| "id": 94, | |
| "questionText": "Scenario: You observe many support vectors even after tuning. Cause?", | |
| "options": [ | |
| "Complex data distribution", | |
| "Low γ", | |
| "Simpler boundary", | |
| "Small C" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "More support vectors imply complex class boundaries." | |
| }, | |
| { | |
| "id": 95, | |
| "questionText": "Scenario: Kernel trick purpose?", | |
| "options": [ | |
| "Speed training", | |
| "Reduce dimensionality", | |
| "Compute inner products in higher-dimensional space", | |
| "Add noise" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Kernel trick implicitly computes high-dimensional mappings efficiently." | |
| }, | |
| { | |
| "id": 96, | |
| "questionText": "Scenario: SVM trained with linear kernel on non-linear XOR data. Outcome?", | |
| "options": [ | |
| "Good generalization", | |
| "Overfitting", | |
| "Underfitting", | |
| "Perfect accuracy" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Linear kernel cannot separate XOR patterns." | |
| }, | |
| { | |
| "id": 97, | |
| "questionText": "Scenario: You set tol=1e-10 in SVM. Effect?", | |
| "options": [ | |
| "Overfitting", | |
| "Underfitting", | |
| "Faster training", | |
| "Higher precision but slower convergence" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Smaller tolerance increases precision but slows convergence." | |
| }, | |
| { | |
| "id": 98, | |
| "questionText": "Scenario: C=0.01 and γ=1. What’s the likely behavior?", | |
| "options": [ | |
| "Perfect fit", | |
| "Underfitting with soft margin", | |
| "Overfitting", | |
| "Fast overgeneralization" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Low C allows large margin and misclassifications, causing underfit." | |
| }, | |
| { | |
| "id": 99, | |
| "questionText": "Scenario: RBF kernel used with default params. What’s the effect?", | |
| "options": [ | |
| "Ignores margin", | |
| "Fails to train", | |
| "Depends on feature scaling", | |
| "Always best choice" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "RBF kernel performs well if data is scaled properly." | |
| }, | |
| { | |
| "id": 100, | |
| "questionText": "Scenario: SVM’s decision boundary has maximum margin. Why important?", | |
| "options": [ | |
| "Increases bias", | |
| "Improves generalization", | |
| "Decreases variance", | |
| "Reduces training speed" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Maximizing margin leads to better generalization and robustness." | |
| } | |
| ] | |
| } | |