R语言与多股票波动性及相关性的可视化
前言
在量化投资建模过程之前,有时候,我们需要对多只股票的价格走势、收益率序列、波动率等进行分析。下面给出使用 R 语言比较多只股票价格走势的完整解决方案。方案涵盖数据获取、清洗、可视化及基础分析全流程:
数据获取
安装与加载工具包
# 安装必要包(首次运行需取消注释)
# install.packages(c("quantmod",
# "tidyverse",
# "ggplot2",
# "zoo",
# "corrplot"))
library(quantmod) # 获取金融数据
library(tidyverse) # 数据处理
library(ggplot2) # 可视化
library(zoo) # 时间序列处理
定义股票代码与时间范围
# 股票代码列表(支持多市场,如A股需加 .SS/.SZ)
# 苹果、谷歌、微软、英伟达
stocks <- c("AAPL", "GOOGL", "MSFT", "NVDA")
# 时间范围
start_date <- "2023-01-01"
end_date <- Sys.Date() # 获取当前日期
批量获取股票数据
# 获取数据
getSymbols(stocks,
src = "yahoo",
from = start_date,
to = end_date)
## [1] "AAPL" "GOOGL" "MSFT" "NVDA"
# 处理数据
stock_data <- lapply(stocks, function(x) {
data <- as_tibble(get(x)) %>%
mutate(Date = index(get(x))) %>%
rename_with(~ gsub(paste0("^", x, "\\."), "", .x)) %>%
select(Date, Close) %>%
mutate(symbol = x) %>% # 添加股票代码列
rename(price = Close) # 重命名收盘价列
}) %>%
bind_rows()
# 查看结果
head(stock_data)
## # A tibble: 6 × 3
## Date price symbol
## <date> <dbl> <chr>
## 1 2023-01-03 125. AAPL
## 2 2023-01-04 126. AAPL
## 3 2023-01-05 125. AAPL
## 4 2023-01-06 130. AAPL
## 5 2023-01-09 130. AAPL
## 6 2023-01-10 131. AAPL
数据清洗
处理缺失值
library(dplyr)
# 检查缺失值
missing_values <- stock_data %>%
group_by(symbol) %>%
summarise(missing = sum(is.na(price)))
# 填充缺失值(使用前向填充)
stock_data <- stock_data %>%
group_by(symbol) %>%
mutate(price = na.locf(price))
对齐时间序列
library(dplyr)
# 生成完整日期序列
full_dates <- tibble(Date = seq(as.Date(start_date),
as.Date(end_date),
by = "day"))
# 左连接填充所有日期
stock_data <- full_dates %>%
left_join(stock_data, by = "Date") %>%
group_by(symbol) %>%
fill(price, .direction = "downup") %>%
na.omit()
价格走势可视化
基础折线图
library(dplyr)
ggplot(stock_data, aes(x = Date, y = price, color = symbol)) +
geom_line(linewidth = 0.8) +
labs(title = "多只股票价格走势对比",
x = "日期",
y = "收盘价",
color = "股票代码") +
theme_minimal() +
theme(legend.position = "top") +
scale_color_manual(values = c("AAPL" = "red",
"GOOGL" = "blue",
"MSFT" = "green",
"NVDA" = "purple")
)

对数收益率对比
library(dplyr)
# 计算对数收益率
return_data <- stock_data %>%
group_by(symbol) %>%
mutate(log_return = log(price) - log(lag(price))) %>%
na.omit()
# 绘制收益率曲线
ggplot(return_data,
aes(x = Date, y = log_return, color = symbol)) +
geom_line(alpha = 0.7) +
labs(title = "对数收益率对比",
x = "日期",
y = "对数收益率",
color = "股票代码") +
theme_minimal() +
theme(legend.position = "top") # 图例放底部

绘制对数收益率密度图: