scikit-learn · SwathiR1999 · Mar 21, 2025 · Mar 21, 2025 · Mar 21, 2025 · Mar 21, 2025
diff --git a/doc/conf.py b/doc/conf.py
@@ -508,6 +508,9 @@ def add_js_css_files(app, pagename, templatename, context, doctree):
    "auto_examples/linear_model/plot_sgd_comparison": (
        "auto_examples/linear_model/plot_sgd_loss_functions"
    ),
+    "auto_examples/svm/plot_svm_margin": (
+        "auto_examples/svm/plot_svm_hyperplane_margin"
+    ),
 }
 html_context["redirects"] = redirects
 for old_link in redirects:

diff --git a/doc/modules/svm.rst b/doc/modules/svm.rst
@@ -404,13 +404,14 @@ Tips on Practical Use

 * **Setting C**: ``C`` is ``1`` by default and it's a reasonable default
  choice.  If you have a lot of noisy observations you should decrease it:
-  decreasing C corresponds to more regularization.
+  decreasing C corresponds to more regularization (see example below).
-  decreasing C corresponds to more regularization (see example below).
+  decreasing C corresponds to more regularization.
-  decreasing C corresponds to more regularization (see example below).
+  decreasing C corresponds to more regularization.

  :class:`LinearSVC` and :class:`LinearSVR` are less sensitive to ``C`` when
  it becomes large, and prediction results stop improving after a certain
  threshold. Meanwhile, larger ``C`` values will take more time to train,
  sometimes up to 10 times longer, as shown in [#3]_.

+
-
-
 * Support Vector Machine algorithms are not scale invariant, so **it
  is highly recommended to scale your data**. For example, scale each
  attribute on the input vector X to [0,1] or [-1,+1], or standardize it
@@ -468,6 +469,9 @@ Tips on Practical Use
  The ``C`` value that yields a "null" model (all weights equal to zero) can
  be calculated using :func:`l1_min_c`.

+.. rubric:: Examples
+
+* :ref:`sphx_glr_auto_examples_svm_plot_svm_hyperplane_margin.py`

 .. _svm_kernels:

@@ -632,7 +636,11 @@ indicates a perfect prediction. But problems are usually not always perfectly
 separable with a hyperplane, so we allow some samples to be at a distance :math:`\zeta_i` from
 their correct margin boundary. The penalty term `C` controls the strength of
 this penalty, and as a result, acts as an inverse regularization parameter
-this penalty, and as a result, acts as an inverse regularization parameter
+this penalty, and as a result, acts as an inverse regularization parameter:
-this penalty, and as a result, acts as an inverse regularization parameter
+this penalty, and as a result, acts as an inverse regularization parameter:
-(see note below).
+(see the figure below). Also please refer to the note below.
-(see the figure below). Also please refer to the note below.
-(see the figure below). Also please refer to the note below.
+
+.. figure:: ../auto_examples/svm/images/sphx_glr_plot_svm_hyperplane_margin_001.png
+   :align: center
+   :scale: 75
-.. figure:: ../auto_examples/svm/images/sphx_glr_plot_svm_hyperplane_margin_001.png
-   :align: center
-   :scale: 75
+.. figure:: ../auto_examples/svm/images/sphx_glr_plot_svm_hyperplane_margin_001.png
+   :target: ../auto_examples/examples/svm/plot_svm_hyperplane_margin.py
+   :align: center
+   :scale: 75
-.. figure:: ../auto_examples/svm/images/sphx_glr_plot_svm_hyperplane_margin_001.png
-   :align: center
-   :scale: 75
+.. figure:: ../auto_examples/svm/images/sphx_glr_plot_svm_hyperplane_margin_001.png
+   :target: ../auto_examples/examples/svm/plot_svm_hyperplane_margin.py
+   :align: center
+   :scale: 75

 The dual problem to the primal is


diff --git a/examples/svm/plot_svm_hyperplane_margin.py b/examples/svm/plot_svm_hyperplane_margin.py
@@ -0,0 +1,108 @@
+"""
+=========================================================================
+SVM: Effect of Regularization (C) on Maximum Margin Separating Hyperplane
+=========================================================================
+
+This script demonstrates the concept of maximum margin separating hyperplane
+in a two-class separable dataset using a Support Vector Machine (SVM)
+with a linear kernel and how different values of `C` influence margin width.
+
+- **Small C (e.g., 0.05)**:
+    - Allows some misclassifications, resulting in wider margin.
+- **Moderate C (e.g., 1)**:
+    - Balances classification accuracy and margin width.
+- **Large C (e.g., 1000)**:
+    - Prioritizes classifying all points correctly, leading to narrower margin.
+
+"""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+# %%
+import matplotlib.pyplot as plt
+
+from sklearn import svm
+from sklearn.datasets import make_blobs
+from sklearn.inspection import DecisionBoundaryDisplay
+
+# %%
+# Create 40 separable points
+X, y = make_blobs(n_samples=40, centers=2, cluster_std=1.5, random_state=6)
+
+# %%
+# Define different values of C to observe its effect on the margin
+C_values = [0.05, 1, 1000]
+
+# %%
+# Visualize
+plt.figure(figsize=(12, 4))
+for i, C_val in enumerate(C_values, 1):
+    clf = svm.SVC(kernel="linear", C=C_val)
+    clf.fit(X, y)
+    y_pred = clf.predict(X)
+    misclassified = y_pred != y
+
+    plt.subplot(1, 3, i)
+    plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=plt.cm.Paired, edgecolors="k")
+    # misclassified samples
+    plt.scatter(
+        X[misclassified, 0],
+        X[misclassified, 1],
+        facecolors="none",
+        edgecolors="k",
+        s=80,
+        linewidths=1.5,
+        label="Misclassified",
+    )
+
+    # plot the decision function
+    ax = plt.gca()
+    DecisionBoundaryDisplay.from_estimator(
+        clf,
+        X,
+        plot_method="contour",
+        colors="k",
+        levels=[-1, 0, 1],
+        alpha=0.5,
+        linestyles=["--", "-", "--"],
+        ax=ax,
+    )
+
+    # plot support vectors
+    ax.scatter(
+        clf.support_vectors_[:, 0],
+        clf.support_vectors_[:, 1],
+        s=120,
+        linewidth=1.5,
+        facecolors="none",
+        edgecolors="r",
+        label="Support Vectors",
+    )
+
+    plt.title(f"SVM Decision Boundary (C={C_val})")
+    plt.xlabel("Feature 1")
+    plt.ylabel("Feature 2")
+    plt.legend()
+
+plt.tight_layout()
+plt.show()
+
+# %% [markdown]
+# - **Small `C` (e.g., 0.01, 0.05)**:
+#   - Use when:
+#     - You expect noisy or overlapping data.
+#     - You can tolerate some misclassification in training.
+#     - Your priority is better generalization on unseen data.
+#   - Note:
+#     - May underfit if the margin is too lenient.
+# - **Moderate `C` (e.g., 1)**:
+#   - Use when:
+#     - You're unsure about noise levels.
+#     - You want good balance between margin width and classification accuracy.
+# - **Large `C` (e.g., 1000)**:
+#   - Use when:
+#     - The data is clean and linearly separable.
+#     - You want to avoid any training misclassification.
+#   - Note:
+#     - May overfit noisy data by trying to classify all samples correctly.
diff --git a/examples/svm/plot_svm_margin.py b/examples/svm/plot_svm_margin.py