森林图是以统计指标和统计分析方法为基础,用数值运算结果绘制出的图型。它在平面直角坐标系中,以一条垂直的无效线 (横坐标刻度为1或0)为中心,用平行于横轴的多条线描述每个被纳入研究的效应量和可信区间,用一个棱形 (或其它图形)描述多个研究合并的效应量及可信区间,简单直观地描述了Meta分析的统计结果。森林图是展示效应量及其可信区间的重要可视化工具,下面我们详细介绍如何使用ggplot2包创建自定义的森林图。

####################################森林图绘制#################################### # ====================== # 第一部分:准备工作 # ====================== # 清除工作环境 rm(list = ls()) options(stringsAsFactors = FALSE) # 加载必要的包 if(!require(ggplot2)) install.packages("ggplot2")

## 载入需要的程序包:ggplot2

## Warning: 程序包'ggplot2'是用R版本4.4.3 来建造的

if(!require(dplyr)) install.packages("tidyverse")

## 载入需要的程序包:dplyr

##  ## 载入程序包:'dplyr'

## The following objects are masked from 'package:stats': ##  ##     filter, lag

## The following objects are masked from 'package:base': ##  ##     intersect, setdiff, setequal, union

library(ggplot2) library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ── ## ✔ forcats   1.0.0     ✔ stringr   1.5.1 ## ✔ lubridate 1.9.4     ✔ tibble    3.2.1 ## ✔ purrr     1.0.2     ✔ tidyr     1.3.1 ## ✔ readr     2.1.5

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ── ## ✖ dplyr::filter() masks stats::filter() ## ✖ dplyr::lag()    masks stats::lag() ## ℹ Use the conflicted package ( ) to force all conflicts to become errors

# ====================== # 第二部分:创建模拟数据集 # ====================== # 创建模拟数据 set.seed(123) forest_data <- data.frame(   Varnames = paste("Variable", LETTERS[1:10]),   Factor = rep(c("Group1", "Group2"), each = 5),   OR = c(1.2, 1.5, 0.8, 1.1, 0.9, 1.8, 2.1, 1.4, 0.7, 1.3),   Lower = c(0.9, 1.1, 0.6, 0.8, 0.7, 1.3, 1.5, 1.0, 0.5, 0.9),   Upper = c(1.5, 1.9, 1.0, 1.4, 1.1, 2.3, 2.7, 1.8, 0.9, 1.7),   Sample = sample(50:200, 10) ) %>%   mutate(Varnames = factor(Varnames, levels = Varnames))  # 保持原始顺序 # 创建注释数据 annotation_data <- data.frame(   x = c(0.6, 2.2),   y = c(3, 8),   label = c("Protective Effect", "Risk Effect") ) # ====================== # 第三部分:基础森林图 # ====================== basic_forest <- ggplot(forest_data, aes(x = OR, y = Varnames, color = Factor)) +   geom_point(size = 3) +   geom_errorbarh(aes(xmax = Upper, xmin = Lower), height = 0.2) +   geom_vline(xintercept = 1, linetype = "dashed", color = "red", size = 0.8) +   scale_x_continuous(limits = c(0.5, 3), breaks = seq(0.5, 3, 0.5)) +   labs(x = "Odds Ratio (95% CI)", y = "",         title = "Basic Forest Plot") +   theme_bw() +   theme(legend.position = "top",         plot.title = element_text(hjust = 0.5, face = "bold"),         axis.text.y = element_text(face = "bold"))

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0. ## ℹ Please use `linewidth` instead. ## This warning is displayed once every 8 hours. ## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was ## generated.

print(basic_forest)

# ggsave("basic_forest.png", width = 8, height = 6, dpi = 300) # ====================== # 第四部分:添加注释的森林图 # ====================== annotated_forest <- ggplot(forest_data, aes(x = OR, y = Varnames)) +   geom_point(aes(color = Factor), size = 3) +   geom_errorbarh(aes(xmax = Upper, xmin = Lower), height = 0.2) +   geom_vline(xintercept = 1, linetype = "dashed", color = "red", size = 0.8) +   scale_x_continuous(limits = c(0.5, 3), breaks = seq(0.5, 3, 0.5)) +   labs(x = "Odds Ratio (95% CI)", y = "",         title = "Annotated Forest Plot") +   theme_bw() +   theme(legend.position = "top",         plot.title = element_text(hjust = 0.5, face = "bold"),         panel.grid.major = element_blank(),         panel.grid.minor = element_blank(),         axis.text.y = element_text(face = "bold")) +   geom_text(data = annotation_data,              aes(x = x, y = y, label = label),             color = "blue", fontface = "bold", size = 4) +   annotate("text", x = 1, y = 10.5, label = "Reference Line (OR = 1)",             color = "red", vjust = -0.5) print(annotated_forest)

# ggsave("annotated_forest.png", width = 9, height = 6, dpi = 300) # ====================== # 第五部分:点大小表示样本量的森林图 # ====================== size_forest <- ggplot(forest_data, aes(x = OR, y = Varnames)) +   geom_point(aes(size = Sample, color = Factor), alpha = 0.7) +   geom_errorbarh(aes(xmax = Upper, xmin = Lower), height = 0.2) +   geom_vline(xintercept = 1, linetype = "dashed", color = "red", size = 0.8) +   scale_x_continuous(limits = c(0.5, 3), breaks = seq(0.5, 3, 0.5)) +   scale_size_continuous(range = c(3, 10),                          breaks = c(50, 100, 150, 200),                         labels = c("50", "100", "150", "200")) +   labs(x = "Odds Ratio (95% CI)", y = "",         title = "Forest Plot with Sample Size",        size = "Sample Size", color = "Group") +   theme_bw() +   theme(legend.position = "right",         plot.title = element_text(hjust = 0.5, face = "bold"),         panel.grid.major = element_blank(),         panel.grid.minor = element_blank(),         axis.text.y = element_text(face = "bold"),         legend.box = "vertical") print(size_forest)

# ggsave("size_forest.png", width = 10, height = 6, dpi = 300) # ====================== # 第六部分:高级定制森林图 # ====================== advanced_forest <- ggplot(forest_data, aes(x = OR, y = Varnames)) + # 添加背景区域   annotate("rect", xmin = -Inf, xmax = 1, ymin = -Inf, ymax = Inf,            fill = "lightblue", alpha = 0.05) +   annotate("rect", xmin = 1, xmax = Inf, ymin = -Inf, ymax = Inf,            fill = "pink", alpha = 0.05) + # 添加数据点   geom_point(aes(size = Sample, color = Factor), shape = 18) + # 添加误差线   geom_errorbarh(aes(xmax = Upper, xmin = Lower), height = 0.1, size = 0.8) + # 添加参考线   geom_vline(xintercept = 1, linetype = "dashed", color = "black", size = 1) + # 坐标轴设置   scale_x_continuous(limits = c(0.4, 3), breaks = seq(0.5, 3, 0.5)) +   scale_color_manual(values = c("Group1" = "#1f78b4", "Group2" = "#e31a1c")) +   scale_size_continuous(range = c(3, 10)) + # 标签和标题   labs(x = "\nOdds Ratio with 95% Confidence Interval", y = "",         title = "Advanced Customized Forest Plot",        subtitle = "Effect sizes with sample size indicators",        caption = "Reference line at OR = 1.0\nDashed lines represent 95% CIs",        color = "Study Group", size = "Sample Size") + # 主题设置   theme_minimal() +   theme(     plot.title = element_text(face = "bold", hjust = 0.5, size = 14),     plot.subtitle = element_text(hjust = 0.5, size = 12),     axis.text.y = element_text(face = "bold", size = 10),     axis.text.x = element_text(size = 10),     axis.title.x = element_text(face = "bold", size = 11),     legend.position = "bottom",     legend.box = "horizontal",     panel.grid.major = element_line(color = "gray90"),     panel.grid.minor = element_blank(),     plot.background = element_rect(fill = "white", color = NA)   ) + # 添加注释   geom_text(data = annotation_data,              aes(x = x, y = y, label = label),             color = c("#1f78b4", "#e31a1c"),              fontface = "bold", size = 4.5) + # 添加箭头指示   annotate("segment", x = 0.6, xend = 0.8, y = 3, yend = 3,             arrow = arrow(length = unit(0.2, "cm")), color = "#1f78b4") +   annotate("segment", x = 2.2, xend = 2.0, y = 8, yend = 8,             arrow = arrow(length = unit(0.2, "cm")), color = "#e31a1c") print(advanced_forest)

# ggsave("advanced_forest.png", width = 10, height = 7, dpi = 300) # ====================== # 第七部分:数据说明 # ====================== cat(" 主数据结构说明: - Varnames: 变量名称 (在y轴显示) - Factor: 分组变量 (用于颜色分组) - OR: 效应量 (比值比) - Lower: 置信区间下限 - Upper: 置信区间上限 - Sample: 样本量 (可用于点的大小) 注释数据结构说明: - x: 注释文本的x坐标位置 - y: 注释文本的y坐标位置 - label: 要显示的文本内容 ")

##  ## 主数据结构说明: ## - Varnames: 变量名称 (在y轴显示) ## - Factor: 分组变量 (用于颜色分组) ## - OR: 效应量 (比值比) ## - Lower: 置信区间下限 ## - Upper: 置信区间上限 ## - Sample: 样本量 (可用于点的大小) ##  ## 注释数据结构说明: ## - x: 注释文本的x坐标位置 ## - y: 注释文本的y坐标位置 ## - label: 要显示的文本内容

# ====================== # 第八部分:总结 # ====================== cat(" 使用说明: 1. 基础森林图展示了最基本的森林图元素 2. 注释森林图添加了文字说明和标记 3. 样本量森林图通过点大小反映样本量信息 4. 高级森林图展示了更多定制化选项 使用建议: - 根据实际数据调整坐标轴范围 - 选择合适的颜色方案 - 调整图例位置和样式以适应不同需求 - 使用ggsave()保存高质量图片 ")

##  ## 使用说明: ## 1. 基础森林图展示了最基本的森林图元素 ## 2. 注释森林图添加了文字说明和标记 ## 3. 样本量森林图通过点大小反映样本量信息 ## 4. 高级森林图展示了更多定制化选项 ##  ## 使用建议: ## - 根据实际数据调整坐标轴范围 ## - 选择合适的颜色方案 ## - 调整图例位置和样式以适应不同需求 ## - 使用ggsave()保存高质量图片

#########文献森林图1,饮料消费与遗传风险的森林图############## # 创建数据框 beverage_data <- data.frame(   GeneticRisk = rep(c("Low", "Intermediate", "High"), each = 12),   Consumption = rep(rep(c("0/day", ">0-1/day", ">1-2/day", ">2/day"), 3), each = 3),   BeverageType = rep(c("SSBs", "ASBs", "PJs"), 12),   HR = c(1.00, 0.97, 0.87, 1.48, 1.00, 0.82, 1.28, 1.50, 1.00, 0.85, 1.06, 1.12, 1.00, 0.97, 1.03, 1.17, 1.00, 1.02, 1.20, 1.21, 1.00, 0.93, 1.01, 0.96, 1.00, 0.99, 1.03, 1.15, 1.00, 1.08, 1.16, 1.19, 1.00, 0.89, 0.92, 0.82),   LowerCI = c(NA, 0.84, 0.69, 1.14, NA, 0.67, 1.00, 1.08, NA, 0.75, 0.88, 0.83, NA, 0.90, 0.92, 1.00, NA, 0.92, 1.05, 1.01, NA, 0.87, 0.91, 0.80, NA, 0.92, 0.91, 0.99, NA, 0.98, 1.00, 0.99, NA, 0.84, 0.82, 0.68),   UpperCI = c(NA, 1.12, 1.09, 1.91, NA, 0.99, 1.64, 2.08, NA, 0.96, 1.27, 1.53, NA, 1.05, 1.15, 1.36, NA, 1.12, 1.38, 1.45, NA, 0.99, 1.12, 1.14, NA, 1.08, 1.16, 1.35, NA, 1.19, 1.33, 1.44, NA, 0.96, 1.02, 0.99) ) # 绘制森林图 ggplot(beverage_data, aes(x = HR, y = interaction(Consumption, BeverageType),                            color = BeverageType)) +   geom_point(size = 3, position = position_dodge(width = 0.5)) +   geom_errorbarh(aes(xmin = LowerCI, xmax = UpperCI),                   height = 0.2, position = position_dodge(width = 0.5)) +   geom_vline(xintercept = 1, linetype = "dashed") +   facet_grid(GeneticRisk ~ ., scales = "free_y", space = "free_y") +   scale_color_manual(values = c("SSBs" = "#E41A1C", "ASBs" = "#377EB8", "PJs" = "#4DAF4A")) +   labs(x = "Hazard Ratio (95% CI)", y = "Consumption Level",         title = "Association between beverage consumption and risk by genetic risk level") +   theme_minimal() +   theme(panel.grid.major.y = element_blank(),         panel.grid.minor.y = element_blank(),         strip.text = element_text(face = "bold"))

## Warning: Removed 9 rows containing missing values or values outside the scale range ## (`geom_errorbarh()`).

#########文献森林图2,亚组分析的森林图############## subgroup_data <- data.frame(   Subgroup = c("All patients", "Age ≤72 yr", "Age >72 yr", "Female", "Male",  "Asian", "Black", "White", "Other", "Europe or Saudi Arabia",  "Asia", "Latin America", "North America", "NYHA class II",  "NYHA class III or IV", "LVEF ≤49%", "LVEF 50-59%", "LVEF ≥60%", "NT-proBNP ≤1011 pg/ml", "NT-proBNP >1011 pg/ml", "Not recently hospitalized", "Recently hospitalized", "No diabetes", "Diabetes", "No atrial fibrillation",  "Atrial fibrillation", "BMI <30", "BMI ≥30", "eGFR <60 ml/min", "eGFR ≥60 ml/min", "SBP ≤128 mm Hg", "SBP >128 mm Hg", "No previous LVEF ≤40%", "Previous LVEF ≤40%"),   HR = c(0.82, 0.82, 0.81, 0.81, 0.81, 0.81, 0.83, 0.78, 0.80, 0.84,  0.82, 0.82, 0.71, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81,  0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81),  # 补充4个值使总数达到34   LowerCI = c(0.73, 0.69, 0.67, 0.69, 0.67, 0.67, 0.46, 0.57, 0.65, 0.68,  0.69, 0.72, 0.60, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69,  0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69),  # 补充4个值   UpperCI = c(0.92, 0.96, 0.97, 0.96, 0.97, 0.97, 1.48, 1.07, 0.98, 1.02,  0.96, 0.94, 0.85, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96,  0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96),  # 补充4个值   EventsDapa = c(512, 247, 265, 195, 317, 97, 21, 372, 22, 261, 92, 70, 89,  331, 181, 207, 174, 131, 173, 339, 419, 93, 242, 270, 285,  227, 275, 236, 289, 223, 285, 227, 275, 236),  # 补充4个值   TotalDapa = c(3131, 1545, 1586, 1364, 1767, 630, 81, 2214, 206, 1494, 607,  602, 428, 2314, 817, 1067, 1133, 931, 1555, 1576, 2803, 328,  1730, 1401, 1803, 1327, 1734, 1395, 1516, 1615, 1803, 1327, 1734, 1395),  # 补充4个值   EventsPlac = c(610, 306, 304, 243, 367, 106, 19, 461, 24, 309, 103, 87,  111, 411, 198, 229, 211, 170, 208, 402, 497, 113, 293, 317,  339, 271, 302, 308, 355, 255, 339, 271, 302, 308),  # 补充4个值   TotalPlac = c(3132, 1604, 1528, 1383, 1749, 644, 78, 2225, 185, 1511, 619,  579, 423, 2399, 732, 1049, 1123, 960, 1578, 1553, 2806, 326,  1727, 1405, 1814, 1317, 1736, 1392, 1554, 1577, 1814, 1317, 1736, 1392)  # 补充4个值 ) # 检查数据结构 str(subgroup_data)

## 'data.frame':    34 obs. of  8 variables: ##  $ Subgroup  : chr  "All patients" "Age ≤72 yr" "Age >72 yr" "Female" ... ##  $ HR        : num  0.82 0.82 0.81 0.81 0.81 0.81 0.83 0.78 0.8 0.84 ... ##  $ LowerCI   : num  0.73 0.69 0.67 0.69 0.67 0.67 0.46 0.57 0.65 0.68 ... ##  $ UpperCI   : num  0.92 0.96 0.97 0.96 0.97 0.97 1.48 1.07 0.98 1.02 ... ##  $ EventsDapa: num  512 247 265 195 317 97 21 372 22 261 ... ##  $ TotalDapa : num  3131 1545 1586 1364 1767 ... ##  $ EventsPlac: num  610 306 304 243 367 106 19 461 24 309 ... ##  $ TotalPlac : num  3132 1604 1528 1383 1749 ...

head(subgroup_data)

##       Subgroup   HR LowerCI UpperCI EventsDapa TotalDapa EventsPlac TotalPlac ## 1 All patients 0.82    0.73    0.92        512      3131        610      3132 ## 2   Age ≤72 yr 0.82    0.69    0.96        247      1545        306      1604 ## 3   Age >72 yr 0.81    0.67    0.97        265      1586        304      1528 ## 4       Female 0.81    0.69    0.96        195      1364        243      1383 ## 5         Male 0.81    0.67    0.97        317      1767        367      1749 ## 6        Asian 0.81    0.67    0.97         97       630        106       644

# 绘制森林图 ggplot(subgroup_data, aes(x = HR, y = reorder(Subgroup, desc(Subgroup)))) +   geom_point(size = 2) +   geom_errorbarh(aes(xmin = LowerCI, xmax = UpperCI), height = 0.2) +   geom_vline(xintercept = 1, linetype = "dashed") +   geom_text(aes(label = paste0(EventsDapa, "/", TotalDapa, " vs ",                                 EventsPlac, "/", TotalPlac)),              x = 1.5, hjust = 0, size = 3) +   scale_x_continuous(limits = c(0.4, 1.6), breaks = seq(0.4, 1.6, by = 0.2)) +   labs(x = "Hazard Ratio (95% CI)", y = "",         title = "Subgroup Analysis of Treatment Effect") +   theme_minimal() +   theme(panel.grid.major.y = element_blank(),         panel.grid.minor.y = element_blank())

#################文献森林图3,系统发育多样性的森林图################## # 创建数据框 phylogenetic_data <- data.frame(   Group = c("Acidobacteria", "Actinobacteria", "Bacteroidetes", "Chlamydiae",  "Chloroflexi", "Firmicutes", "Gemmatimonadetes", "Nitrospirae",  "Planctomycetes", "α-Proteobacteria", "β-Proteobacteria",  "γ-Proteobacteria", "δ-Proteobacteria", "Verrucomicrobia",  "Ascomycota", "Basidiomycota", "Mortierellomycota", "AMF",  "Plant pathogen", "Saprotroph", "Cercozoa", "Ciliophora",  "Conosa", "Lobosa", "Ochrophyta", "Consumer", "Phototroph",  "Parasite"),   EffectSize = c(0.2, -0.3, 0.1, -0.4, 0.3, -0.2, 0.5, -0.1, 0.4, -0.3,  0.2, -0.5, 0.1, -0.2, 0.3, -0.4, 0.2, -0.1, 0.4, -0.3,  0.1, -0.2, 0.3, -0.4, 0.2, -0.1, 0.5, -0.3),   Significance = c("P<0.05", "P≥0.05", "P<0.05", "P≥0.05", "P<0.05", "P≥0.05",  "P<0.05", "P≥0.05", "P<0.05", "P≥0.05", "P<0.05", "P≥0.05",  "P<0.05", "P≥0.05", "P<0.05", "P≥0.05", "P<0.05", "P≥0.05",  "P<0.05", "P≥0.05", "P<0.05", "P≥0.05", "P<0.05", "P≥0.05",  "P<0.05", "P≥0.05", "P<0.05", "P≥0.05") ) # 绘制森林图 ggplot(phylogenetic_data, aes(x = EffectSize, y = reorder(Group, EffectSize),                                fill = Significance)) +   geom_col(aes(fill = Significance), width = 0.7) +   geom_vline(xintercept = 0, linetype = "solid") +   scale_fill_manual(values = c("P<0.05" = "#E41A1C", "P≥0.05" = "#377EB8")) +   scale_x_continuous(limits = c(-0.6, 0.6), breaks = seq(-0.6, 0.6, by = 0.2)) +   labs(x = "Warming Effect Size", y = "Phylogenetic Group",         title = "Phylogenetic Diversity and Relative Abundance") +   theme_minimal() +   theme(panel.grid.major.y = element_blank(),         panel.grid.minor.y = element_blank(),         legend.position = "top")

#################文献森林图4,临床特征的森林图################## # 创建数据框 clinical_data <- data.frame(   Feature = c("Age", "IDH_status (Mutant)", "MGMT_promoter_status (Wildtype)",  "MGMT_promoter_status (Methylated)", "MGMT_promoter_status (Un-methylated)",  "Stage (WHO II)", "Stage (WHO III)", "Stage (WHO IV)"),   HR = c(1.0, 1.0, 3.0, 1.0, 1.3, 2.2, 2.2, 4.1),   LowerCI = c(1.02, NA, 1.92, NA, 0.95, 1.44, 1.44, 2.39),   UpperCI = c(1.0, NA, 4.7, NA, 1.8, 3.3, 3.3, 6.9),   N = c(674, 428, 237, 477, 163, 248, 265, 160) ) # 绘制森林图 ggplot(clinical_data, aes(x = HR, y = reorder(Feature, desc(Feature)))) +   geom_point(size = 3) +   geom_errorbarh(aes(xmin = LowerCI, xmax = UpperCI), height = 0.2) +   geom_vline(xintercept = 1, linetype = "dashed") +   geom_text(aes(label = paste0("N=", N)), x = 5, hjust = 0, size = 3.5) +   scale_x_log10(limits = c(0.5, 10), breaks = c(0.5, 1, 2, 3, 4, 5, 10)) +   labs(x = "Hazard Ratio (95% CI)", y = "",         title = "Clinical Features and Hazard Ratios",        subtitle = "Events: 214; Global p-value (Log-Rank): 1.0821e-67\nAIC: 2020.23; Concordance Index: 0.86") +   theme_minimal() +   theme(panel.grid.major.y = element_blank(),         panel.grid.minor.y = element_blank(),         plot.subtitle = element_text(size = 10))

## Warning: Removed 2 rows containing missing values or values outside the scale range ## (`geom_errorbarh()`).

以上代码完全是不断向DeepSeek提问、不断纠正生成,供大家参考!