# File Name: k_means_clustering.R
# Author: Gerard King - www.gerardking.dev
# Title: K-Means Clustering and Visualization
# Description: This R program generates a synthetic dataset, performs k-means clustering to group
# the data points into clusters, and visualizes the clusters using a scatter plot. The program helps
# understand the concept of clustering and how data points are grouped based on similarity.
# Use Cases:
# - Segmenting customers or users based on behavior
# - Grouping similar data points for exploratory data analysis
# - Clustering data points for pattern recognition and anomaly detection
# Audience:
# - Data scientists and machine learning practitioners
# - Students learning unsupervised learning techniques
# - Analysts and researchers working on data segmentation or pattern recognition
# Blue Team Uses:
# - Grouping similar system behaviors or network traffic for anomaly detection
# - Clustering system logs to detect abnormal patterns or attack behaviors
# - Identifying similar incidents or security events to predict future threats
# Red Team Uses:
# - Identifying clusters of potential attack points or weaknesses in a system
# - Simulating clusters of attack patterns to evaluate security defenses
# - Detecting groups of vulnerabilities or exploits that share common characteristics
# Current Date: 2025-03-06
# Load necessary libraries
library(ggplot2)
# Set the seed for reproducibility
set.seed(202)
# Generate synthetic data with 2 features (e.g., x and y coordinates)
data <- data.frame(
x = c(rnorm(50, mean = 3), rnorm(50, mean = 7)),
y = c(rnorm(50, mean = 3), rnorm(50, mean = 7))
)
# Perform k-means clustering with 2 clusters (k = 2)
kmeans_result <- kmeans(data, centers = 2)
# Add cluster assignment to the dataset
data$cluster <- as.factor(kmeans_result$cluster)
# Create a scatter plot of the data points with different colors for each cluster
ggplot(data, aes(x = x, y = y, color = cluster)) +
geom_point(size = 3) +
labs(title = "K-Means Clustering Visualization", x = "Feature 1 (x)", y = "Feature 2 (y)") +
scale_color_manual(values = c("red", "blue")) +
theme_minimal()
# Display the cluster centers
cat("Cluster Centers:\n")
print(kmeans_result$centers)
# Print the current date for reference
cat("Date of execution:", Sys.Date(), "\n")