A dotplot with outlier

Posted by Chevy on October 30, 2024

Data Preparation

Load the dataset and adjust outliers in IC50 values. Replace any value of 1000 in the IC50 column with a calculated outlier value.

data <- readxl::read_excel("ic50.xlsx", sheet = 2) %>% rename(IC50 = colnames(.)[6])
outlier <- (data$IC50 %>% sort(decreasing = TRUE) %>% nth(2)) * 1.1
data$IC50[data$IC50 == 1000] <- outlier

Plot 1: IC50 Distribution by Tissue

We plot the IC50 values grouped by tissue type, applying a color gradient based on unique tissue values. Outliers are displayed as black points, with a label.

# Define color palette
col <- colorRampPalette(colors = c("royalblue3", "deepskyblue3", "white", "lightpink2", "tomato3"))(length(data$Tissue %>% unique()))

# Generate the plot
ggplot(data %>% filter(IC50 != outlier), aes(x = reorder(Tissue, -IC50, FUN = median), y = IC50, fill = reorder(Tissue, IC50, FUN = median))) +
  geom_boxplot(outlier.shape = NA, alpha = 0.5, color = "black") +  
  geom_jitter(width = 0.1, size = 2, shape = 21, color = "black", alpha = 0.9) +
  scale_fill_manual(values = col) +  
  scale_y_continuous() +  
  labs(title = "Distribution of IC50 Values",
       subtitle = "102 cancer cell lines grouped by disease type",
       x = NULL,
       y = "IC50 (nM)") +
  theme_minimal(base_size = 14) +  
    plot.title = element_text(face = "bold", hjust = 0.5, size = 18),
    plot.subtitle = element_text(hjust = 0.5, size = 14),
    axis.text.x = element_text(angle = 0, size = 12, color = "black", face = "bold"),
    axis.text.y = element_text(angle = 0, size = 13, hjust = 1, face = "italic", color = "black"),
    axis.title.x = element_text(margin = margin(t = 0), face = "bold", size = 15, color = "black"),
    axis.title.y = element_text(margin = margin(r = 0)),
    legend.position = "none"
  ) +
  coord_flip() +
  geom_point(data = data %>% filter(IC50 == outlier), size = 3, shape = 21, fill = "black", color = "gray", alpha = 0.9) +
  ggrepel::geom_text_repel(data = data %>% filter(IC50 == outlier), label = "Outlier, IC50 > 1000",
                           fontface = "italic", color = "brown3", size = 5, nudge_x = 1)

Plot 2: IC50 Distribution by Disease Type

This plot displays IC50 values grouped by disease type, with a similar color gradient based on unique disease values.

# Define color palette
col <- colorRampPalette(colors = c("royalblue3", "deepskyblue3", "white", "lightpink2", "tomato3"))(length(data$Disease %>% unique()))

# Generate the plot
ggplot(data %>% filter(IC50 != outlier), aes(x = reorder(Disease, IC50, FUN = median), y = IC50, fill = reorder(Disease, IC50, FUN = median))) +
  geom_boxplot(outlier.shape = NA, alpha = 0.7, color = "black") +  
  geom_jitter(width = 0.1, size = 3, shape = 21, color = "black", alpha = 0.9) +
  scale_fill_manual(values = col) +  
  scale_y_continuous() +  
  labs(title = "Distribution of IC50 Values by Disease Type",
       subtitle = "102 cancer cell lines grouped by disease type",
       x = NULL,
       y = "IC50 (nM)") +
  theme_minimal(base_size = 14) +  
    plot.title = element_text(face = "bold", hjust = 0.5, size = 18),
    plot.subtitle = element_text(hjust = 0.5, size = 14),
    axis.text.x = element_text(angle = 45, size = 12, hjust = 1, color = "black"),
    axis.text.y = element_text(angle = 0, face = "italic", color = "black"),
    axis.title.x = element_text(margin = margin(t = 0), face = "bold", size = 15, color = "black"),
    axis.title.y = element_text(margin = margin(r = 0)),
    legend.position = "none"
  ) +
  coord_flip() +
  geom_point(data = data %>% filter(IC50 == outlier), size = 3, shape = 21, fill = "black", color = "gray", alpha = 0.9) +
  ggrepel::geom_text_repel(data = data %>% filter(IC50 == outlier), label = "Outlier, IC50 > 1000",
                           fontface = "italic", color = "brown3", size = 5, nudge_y = -10)