# File Name: logistic_regression_classification.R
# Author: Gerard King - www.gerardking.dev
# Title: Logistic Regression for Binary Classification and Visualization
# Description: This R program generates synthetic data for a binary classification problem,
# fits a logistic regression model to the data, and visualizes the decision boundary.
# It demonstrates the use of logistic regression for predicting binary outcomes.
# Use Cases:
# - Predicting binary outcomes (e.g., yes/no, 0/1)
# - Classifying data points into two categories
# - Solving classification problems in various domains (e.g., marketing, medical diagnostics)
# Audience:
# - Data scientists and machine learning practitioners
# - Students learning about classification techniques
# - Researchers working on binary classification problems
# Blue Team Uses:
# - Identifying whether a security event is benign or suspicious
# - Classifying system logs into different categories (e.g., normal vs. attack)
# - Predicting binary outcomes like whether a system will experience a breach or not
# Red Team Uses:
# - Using logistic regression to classify attack attempts based on different features
# - Predicting the likelihood of a successful attack based on various factors
# - Analyzing the likelihood of detecting a breach given certain system metrics
# Current Date: 2025-03-06
# Load necessary library
library(ggplot2)
# Set seed for reproducibility
set.seed(404)
# Generate synthetic dataset with 2 features (x1, x2) and binary outcome (y)
n <- 200
x1 <- rnorm(n)
x2 <- rnorm(n)
y <- ifelse(0.5 * x1 + 0.5 * x2 + rnorm(n) > 0, 1, 0) # Binary outcome
# Create a data frame
data <- data.frame(x1 = x1, x2 = x2, y = as.factor(y))
# Fit a logistic regression model
logit_model <- glm(y ~ x1 + x2, data = data, family = binomial)
# Make predictions using the logistic regression model
data$predicted <- predict(logit_model, type = "response")
data$predicted_class <- ifelse(data$predicted > 0.5, 1, 0)
# Create a scatter plot with actual and predicted classes
ggplot(data, aes(x = x1, y = x2, color = as.factor(predicted_class))) +
geom_point(size = 3) +
labs(title = "Logistic Regression Classification",
x = "Feature 1 (x1)", y = "Feature 2 (x2)", color = "Predicted Class") +
scale_color_manual(values = c("red", "blue")) +
theme_minimal()
# Plot the decision boundary
ggplot(data, aes(x = x1, y = x2)) +
geom_point(aes(color = as.factor(predicted_class)), size = 3) +
stat_contour(data = data.frame(x1 = seq(min(x1), max(x1), length.out = 100),
x2 = seq(min(x2), max(x2), length.out = 100)),
aes(x = x1, y = x2, z = predict(logit_model, newdata = data.frame(x1 = x1, x2 = x2), type = "response")),
color = "black", bins = 1) +
labs(title = "Decision Boundary of Logistic Regression",
x = "Feature 1 (x1)", y = "Feature 2 (x2)") +
theme_minimal()
# Display the model summary
cat("Logistic Regression Model Summary:\n")
summary(logit_model)
# Print the current date for reference
cat("Date of execution:", Sys.Date(), "\n")