# File Name: correlation_matrix_heatmap.R
# Author: Gerard King - www.gerardking.dev
# Title: Correlation Matrix and Heatmap Visualization
# Description: This R program generates a synthetic dataset with multiple variables, computes
# the correlation matrix, and visualizes it as a heatmap. This is useful for exploring relationships
# between variables in datasets.
# Use Cases:
# - Identifying correlated features in a dataset
# - Analyzing the strength of relationships between variables
# - Visualizing large datasets to identify patterns and outliers
# Audience:
# - Data scientists and analysts performing exploratory data analysis (EDA)
# - Students learning about correlation and data visualization
# - Researchers identifying key relationships in multivariate datasets
# Blue Team Uses:
# - Analyzing correlations between system metrics (e.g., CPU usage, memory, network traffic)
# - Detecting potential multicollinearity in system performance data
# - Identifying relationships between different security events or logs
# Red Team Uses:
# - Identifying potential dependencies between attack vectors and vulnerabilities
# - Analyzing correlated patterns in simulated attack data
# - Visualizing relationships between different attack strategies or techniques
# Current Date: 2025-03-06
# Load necessary library for visualization
library(ggplot2)
library(reshape2)
library(RColorBrewer)
# Set seed for reproducibility
set.seed(101)
# Generate a synthetic dataset with 5 variables
data <- data.frame(
var1 = rnorm(100),
var2 = rnorm(100),
var3 = rnorm(100),
var4 = rnorm(100),
var5 = rnorm(100)
)
# Compute the correlation matrix
cor_matrix <- cor(data)
# Melt the correlation matrix for ggplot
melted_cor_matrix <- melt(cor_matrix)
# Create a heatmap of the correlation matrix
ggplot(melted_cor_matrix, aes(Var1, Var2, fill = value)) +
geom_tile() +
scale_fill_gradient2(midpoint = 0, low = "blue", high = "red", mid = "white") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Correlation Matrix Heatmap",
x = "Variables",
y = "Variables",
fill = "Correlation")
# Print the correlation matrix to the console
cat("Correlation Matrix:\n")
print(cor_matrix)
# Print the current date for reference
cat("Date of execution:", Sys.Date(), "\n")