The dataset for this week's analysis is sourced from Simon Couch's detectors R package, which includes predictions from multiple GPT detectors. In a recent study by Liang et al. (2023), a range of essays were subjected to different GPT detection models. By comparing the predictions of these models for essays written by native and non-native English writers, the authors discovered a notable disparity. Specifically, the GPT detectors exhibited a tendency to misclassify authentic writing by non-native English writers as AI-generated, suggesting potential bias in the detection process.

I decided to make a visualization with three subplots. Two heatmaps and one bar chart.

Loading packages, data, fonts and difining colors

# load packages
# Load data
detectors <- readr::read_csv('')
# Load necessary fonts
font_add('fa-reg', 'c:/Users/info/OneDrive/Dokumente/fonts/Font Awesome 6 Free-Regular-400.otf')
font_add('fa-brands', 'c:/Users/info/OneDrive/Dokumente/fonts/Font Awesome 6 Brands-Regular-400.otf')
font_add('fa-solid', 'c:/Users/info/OneDrive/Dokumente/fonts/Font Awesome 6 Free-Solid-900.otf')

# Color assignment using okabe_ito() function
col <- thematic::okabe_ito()

# Define background and individual colors
bg <- "white"
col1 <- col[1]
col2 <- "darkgrey"
col3 <- col[2]
col4 <- col[3]
col5 <- col[6]

Text generation

# Text generation with appropriate line breaks
twitter <- glue("<span style='color:{col5};font-family:fa-brands;'>&#xf099;</span>")
mastodon <- glue("<span style='color:{col5};font-family:fa-brands;'>&#xf4f6;</span>")
link <- glue("<span style='color:{col5};font-family:fa-solid;'>&#xf0c1;</span>")
data <- glue("<span style='color:{col5};font-family:fa-solid;'>&#xf1c0;</span>")
hash <- glue("<span style='color:{col5};font-family:fa-solid;'>&#x23;</span>")
quote <- glue("<span style='color:{col5};font-family:fa-solid;'>&#xf10d;</span>")
space <- glue("<span style='color:{bg}'>-</span>")
space2 <- glue("<span style='color:{bg}'>--</span>")

cap <- glue("{twitter}{space2}@web_design_fh{space2}|
	{data}{space2}detectors R package by @simonpcouch{space2}|
	{quote}{space2}Liang, W., Yuksekgonul, M., Mao, Y., Wu, E. & Zou, J. (2023).
	GPT detectors are biased against non-native English writers. arXiv (Cornell University).")

subplot1.title <- "Detection Accuracy Comparison: AI vs. Human Essays"
subplot1.subtitle <- "Analyzing Accuracy across Essay Types"

subplot2.title <- "Accuracy by Writer Type: Native vs. Non-Native Speakers"
subplot2.subtitle <- "Examining Accuracy of Human-Written Essays"

subplot3.title <- glue("Detection Accuracy by Writer Type")
subplot3.subtitle <- glue("written by <span style='color:{col3}'>AI</span>, 
	<span style='color:{col1}'>native speakers</span>, 
	and <span style='color:{col4}'>non-native speakers</span>")

p.title <- "Detection Accuracy and Writer Type Analysis"
p.subtitle <- glue("In a recent study by Liang et al. (2023), a range of essays were subjected to <br>
different GPT detection models. By comparing the predictions of these models for essays written by <br>
native and non-native English writers, the authors discovered a notable disparity. Specifically, the <br>
GPT detectors exhibited a tendency to misclassify authentic writing by non-native English writers as <br>
AI-generated, suggesting potential bias in the detection process.")

Subplot 1

# Subplot 1: Data manipulation and plot
subplot1 <- detectors %>%
  mutate(det.cor = kind == .pred_class) %>%
  count(kind, detector, det.cor) %>%
  group_by(kind, detector) %>%
  mutate(total = sum(n)) %>%
  filter(det.cor) %>%
  mutate(percent = round(n / total, 2)) %>%
  ggplot(aes(x = detector, y = kind, fill = percent)) +
  geom_tile() +
  scale_fill_gradient(low = "white", high = col1) +
  theme_minimal() +
  coord_fixed() +
  geom_text(aes(label = percent), color = col5, size = 4) +
    plot.title = element_markdown(color = col5, size = 11),
    plot.subtitle = element_markdown(size = 9),
    plot.caption = element_markdown(size = 8, lineheight = 1.3, color = col2),
    axis.title = element_blank(),
    axis.text.x = element_text(angle = 90, vjust = 0.5),) +
    title = subplot1.title,
    subtitle = subplot1.subtitle
Subplot 2

# Subplot 2: Data manipulation and plot
subplot2 <- detectors %>%
  filter(kind == "Human") %>%
  mutate(det.cor = kind == .pred_class,
         native = if_else(native == "Yes", "native", "non-native")) %>%
  group_by(kind, native, detector) %>%
  count(det.cor) %>%
  rename(true = n) %>%
  mutate(total = sum(true)) %>%
  mutate(percent = round(true / total, 2)) %>%
  ggplot(aes(x = detector, y = native, fill = percent)) +
  geom_tile() +
  scale_fill_gradient(low = "white", high = col1) +
  theme_minimal() +
  coord_fixed() +
  geom_text(aes(label = percent), color = col5, size = 4) +
    plot.title = element_markdown(color = col5, size = 11),
    plot.subtitle = element_markdown(size = 9),
    axis.title = element_blank(),
    axis.text.x = element_text(angle = 90, vjust = 0.5)
  ) +
    title = subplot2.title,
    subtitle = subplot2.subtitle
Subplot 3

# Subplot 3: Data manipulation and plot
subplot3 <- detectors %>%
  mutate( = if_else(native == "Yes", "native", "non-native"), = replace_na(, "AI"),
         det.cor = kind == .pred_class) %>%
  ggplot(aes(x = detector, fill = +
  geom_bar() +
  facet_grid(det.cor ~ .) +
  scale_fill_manual(values = c(col3, col1, col4)) +
  theme_minimal() +
    plot.title = element_markdown(color = col5, size = 11),
    plot.subtitle = element_markdown(size = 9),
    plot.caption = element_markdown(size = 8, lineheight = 1.3, color = col2),
    axis.title = element_blank(),
    axis.text.x = element_text(angle = 90, vjust = 0.5),
    legend.position = "none"
  ) +
    title = subplot3.title,
    subtitle = subplot3.subtitle
p <- (subplot1 / subplot2) | subplot3
p + plot_layout(widths = c(2, 1)) +
    title = p.title,
    subtitle = p.subtitle,
    caption = cap
        )  &
    theme(plot.title = element_markdown(color=col5,size = 14),
        plot.subtitle = element_markdown(size = 12),
        plot.caption = element_markdown(color="black",size = 9,lineheight = 1.3))
Liang, W., Yuksekgonul, M., Mao, Y., Wu, E. & Zou, J. (2023). GPT detectors are biased against non-native English writers. arXiv (Cornell University).

Couch, S. (2023). Prediction Data from GPT Detectors. R-Package version: 0.0.1.

Thomas Mock (2022). Tidy Tuesday: A weekly data project aimed at the R ecosystem.