# File Name: customer_segmentation_kmeans.R
# Author: Gerard King - www.gerardking.dev
# Title: Customer Segmentation using K-Means Clustering
# Description: This program uses k-means clustering to segment customers into different groups based on their
# age, income, and spending score. Customer segmentation is widely used in banking to target specific customer
# groups with tailored marketing or financial products.
# Use Cases:
# - Segmenting customers for personalized marketing or product offerings
# - Identifying high-value customers for VIP services
# - Grouping customers based on behavior or financial health
# Audience:
# - Marketing analysts and data scientists working in banking and finance
# - Customer experience teams looking to tailor offerings to different customer segments
# - Business analysts interested in customer grouping for targeted interventions
# Blue Team Uses:
# - Grouping customers based on financial behaviors to target fraud prevention efforts
# - Identifying potential customers for high-risk behavior based on transaction patterns
# - Analyzing customer financial health across different segments to improve services
# Red Team Uses:
# - Segmenting attack data to find potential targets based on vulnerability patterns
# - Analyzing network behavior to cluster types of fraud or anomalous activities
# - Identifying common patterns in customer behavior that could lead to exploitation
# Current Date: 2025-03-06
# Load necessary libraries
library(ggplot2)
library(cluster)
# Set the seed for reproducibility
set.seed(606)
# Generate synthetic customer data (age, income, spending score)
n <- 500 # number of customers
age <- sample(18:70, n, replace = TRUE)
income <- rnorm(n, mean = 50000, sd = 20000)
spending_score <- sample(1:100, n, replace = TRUE) # Spending score from 1 to 100
customer_data <- data.frame(age, income, spending_score)
# Standardize the data (important for k-means clustering)
customer_data_scaled <- scale(customer_data)
# Perform k-means clustering with 4 clusters (k = 4)
kmeans_result <- kmeans(customer_data_scaled, centers = 4)
# Add cluster assignment to the dataset
customer_data$cluster <- as.factor(kmeans_result$cluster)
# Visualize the customer clusters using ggplot2 (Age vs Spending Score)
ggplot(customer_data, aes(x = age, y = spending_score, color = cluster)) +
geom_point(size = 3) +
labs(title = "Customer Segmentation: Age vs Spending Score",
x = "Age", y = "Spending Score", color = "Customer Segment") +
scale_color_manual(values = c("red", "blue", "green", "purple")) +
theme_minimal()
# Display the cluster centers
cat("Cluster Centers:\n")
print(kmeans_result$centers)
# Print the current date for reference
cat("Date of execution:", Sys.Date(), "\n")