股票

R语言与多股票波动性及相关性的可视化

前言

在量化投资建模过程之前,有时候,我们需要对多只股票的价格走势、收益率序列、波动率等进行分析。下面给出使用 R 语言比较多只股票价格走势的完整解决方案。方案涵盖数据获取、清洗、可视化及基础分析全流程:

数据获取

安装与加载工具包

# 安装必要包(首次运行需取消注释)
# install.packages(c("quantmod", 
#                    "tidyverse", 
#                    "ggplot2", 
#                    "zoo", 
#                    "corrplot"))

library(quantmod)   # 获取金融数据
library(tidyverse)  # 数据处理
library(ggplot2)    # 可视化
library(zoo)        # 时间序列处理

定义股票代码与时间范围

# 股票代码列表(支持多市场,如A股需加 .SS/.SZ)
# 苹果、谷歌、微软、英伟达
stocks <- c("AAPL", "GOOGL", "MSFT", "NVDA")  
# 时间范围
start_date <- "2023-01-01"
end_date <- Sys.Date()  # 获取当前日期

批量获取股票数据

# 获取数据
getSymbols(stocks, 
           src = "yahoo", 
           from = start_date, 
           to = end_date)
## [1] "AAPL"  "GOOGL" "MSFT"  "NVDA"
# 处理数据
stock_data <- lapply(stocks, function(x) {
  data <- as_tibble(get(x)) %>%
    mutate(Date = index(get(x))) %>%
    rename_with(~ gsub(paste0("^", x, "\\."), "", .x)) %>%
    select(Date, Close) %>%
    mutate(symbol = x) %>%  # 添加股票代码列
    rename(price = Close)   # 重命名收盘价列
}) %>%
  bind_rows()

# 查看结果
head(stock_data)
## # A tibble: 6 × 3
##   Date       price symbol
##   <date>     <dbl> <chr> 
## 1 2023-01-03  125. AAPL  
## 2 2023-01-04  126. AAPL  
## 3 2023-01-05  125. AAPL  
## 4 2023-01-06  130. AAPL  
## 5 2023-01-09  130. AAPL  
## 6 2023-01-10  131. AAPL

数据清洗

处理缺失值

library(dplyr)
# 检查缺失值
missing_values <- stock_data %>%
  group_by(symbol) %>%
  summarise(missing = sum(is.na(price)))

# 填充缺失值(使用前向填充)
stock_data <- stock_data %>%
  group_by(symbol) %>%
  mutate(price = na.locf(price))

对齐时间序列

library(dplyr)
# 生成完整日期序列
full_dates <- tibble(Date = seq(as.Date(start_date), 
                                as.Date(end_date), 
                                by = "day"))

# 左连接填充所有日期
stock_data <- full_dates %>%
  left_join(stock_data, by = "Date") %>%
  group_by(symbol) %>%
  fill(price, .direction = "downup") %>%
  na.omit()

价格走势可视化

基础折线图

library(dplyr)
ggplot(stock_data, aes(x = Date, y = price, color = symbol)) +
  geom_line(linewidth = 0.8) +
  labs(title = "多只股票价格走势对比",
       x = "日期",
       y = "收盘价",
       color = "股票代码") +
  theme_minimal() +
  theme(legend.position = "top") +  
  scale_color_manual(values = c("AAPL" = "red", 
                                "GOOGL" = "blue", 
                                "MSFT" = "green", 
                                "NVDA" = "purple")
                     )

对数收益率对比

library(dplyr)
# 计算对数收益率
return_data <- stock_data %>%
  group_by(symbol) %>%
  mutate(log_return = log(price) - log(lag(price))) %>%
  na.omit()

# 绘制收益率曲线
ggplot(return_data, 
       aes(x = Date, y = log_return, color = symbol)) +
  geom_line(alpha = 0.7) +
  labs(title = "对数收益率对比",
       x = "日期",
       y = "对数收益率",
       color = "股票代码") +
  theme_minimal() + 
  theme(legend.position = "top") # 图例放底部

绘制对数收益率密度图: