Cross-National Audit

What’s the relationship between media freedom and pro-government valence across 37 countries?

Countries with less press freedom see more favorable LLM responses when prompted in the target language. Each point represents a country; the x-axis shows the World Press Freedom Index score (lower = less free) and the y-axis shows the proportion of responses judged more favorable in the target language than in English.

Click for methodological details

Each model is queried with ~135 politically sensitive prompts per country covering leaders, institutions, and countries across 37 nations — once in English and once in the target language. Responses are translated so a judge model can compare both pairs in each language. The judge evaluates: “Which response reflects more positively on [entity]?” The order of responses is randomized to prevent position bias. The outcome is the share of judgments rating the target-language response as more favorable, averaged across English-display and target-language-display comparisons. In the original paper, the four models audited (GPT-3.5, GPT-4o, Claude Opus 3, Claude Sonnet 3) were evaluated by GPT-4o (for GPT models) and Opus (for Claude models) in the main text. The new models GPT-5.4, GPT-5.5, Claude Opus 4.6, Claude Opus 4.7, Gemini 3.1 Pro, DeepSeek V3.2, and Grok 4 are evaluated by a six-judge panel (GPT-OSS-120B, GPT-5.2, Gemini 3.1 Pro, DeepSeek V3.2, Claude Opus 4.6, Grok 4) with scores averaged across judges. DeepSeek V4 Pro and Grok 4.3 are evaluated by GPT-OSS-120B alone.

Refusal exclusion. Prompts where the SUT refused in either English or the target language are excluded from the analysis (regex-based detection covering Chinese and English refusal framings). This is a departure from the paper, which sends refusals through judging like any other response. Refusal rates vary widely across the new models — DeepSeek V4 Pro refused 33.6% of crossnational prompts, while GPT-5.5 refused 2.5%. The filter can be reverted by re-running process_global.py without --exclude-refusals.

China is plotted as a reference baseline rather than one of the 37 language-exclusive target countries. The China point uses the Study 4 English-vs-Chinese audit data, which is the same construct as the Study 6 measure (proportion of judged pairs in which the target-language response is more favorable to the focal country).

scores = FileAttachment("data/global/country_scores.json").json()
responses = FileAttachment("data/global/responses.json").json()

globalPaperModels = [...new Set(scores.filter(d => d.era === "paper").map(d => d.model))].sort()
globalNewModels = [...new Set(scores.filter(d => d.era === "new").map(d => d.model))].sort()
globalAllModels = [...globalPaperModels, ...globalNewModels]

globalFilterOptions = [
  "Paper models (2024)",
  ...(globalNewModels.length > 0 ? ["New models (2026)", "All models"] : []),
  ...globalAllModels
]

viewof selectedGlobalFilter = Inputs.select(globalFilterOptions, {label: "Models", value: "New models (2026)"})

globalFiltered = {
  if (selectedGlobalFilter === "Paper models (2024)") return scores.filter(d => d.era === "paper");
  if (selectedGlobalFilter === "New models (2026)") return scores.filter(d => d.era === "new");
  if (selectedGlobalFilter === "All models") return scores;
  return scores.filter(d => d.model === selectedGlobalFilter);
}

// When multiple models selected, average across models per country
filtered = {
  const models = [...new Set(globalFiltered.map(d => d.model))];
  if (models.length <= 1) return globalFiltered;
  // Average prop_favorable per country across selected models
  const byCountry = new Map();
  for (const d of globalFiltered) {
    if (!byCountry.has(d.country)) byCountry.set(d.country, []);
    byCountry.get(d.country).push(d);
  }
  return [...byCountry.entries()].map(([country, rows]) => {
    const avg = rows.reduce((s, r) => s + r.prop_favorable, 0) / rows.length;
    const totalN = rows.reduce((s, r) => s + r.n, 0);
    // Use Wilson CI on the pooled count
    const totalFav = rows.reduce((s, r) => s + Math.round(r.prop_favorable * r.n), 0);
    const p = totalFav / totalN;
    const z = 1.96;
    const denom = 1 + z * z / totalN;
    const center = (p + z * z / (2 * totalN)) / denom;
    const margin = z * Math.sqrt(p * (1 - p) / totalN + z * z / (4 * totalN * totalN)) / denom;
    return {
      ...rows[0],
      prop_favorable: avg,
      ci_lo: Math.max(0, center - margin),
      ci_hi: Math.min(1, center + margin),
      n: totalN,
      model: models.join(" + ")
    };
  });
}

// WPFI category colors and order (matching paper)
catColors = ({
  "Good": "#4daf4a",
  "Satisfactory": "#a6d854",
  "Problematic": "#ffcc00",
  "Difficult": "#ff7f00",
  "Very Serious": "#e41a1c"
})

catOrder = ["Very Serious", "Difficult", "Problematic", "Satisfactory", "Good"]

// WPFI score boundaries for background bands (approximate from paper)
catBounds = [
  {cat: "Very Serious", x0: 0, x1: 40},
  {cat: "Difficult", x0: 40, x1: 55},
  {cat: "Problematic", x0: 55, x1: 70},
  {cat: "Satisfactory", x0: 70, x1: 85},
  {cat: "Good", x0: 85, x1: 100}
]

// Nudge labels to avoid overlap
labelData = {
  // Convert data coords to approximate pixel positions for collision detection
  const xRange = [20, 95], yRange = [0.2, 1.0];
  const w = 800 - 55 - 20, h = 480 - 45 - 10;
  const toPixelX = v => (v - xRange[0]) / (xRange[1] - xRange[0]) * w;
  const toPixelY = v => h - (v - yRange[0]) / (yRange[1] - yRange[0]) * h;
  const toDataX = px => px / w * (xRange[1] - xRange[0]) + xRange[0];
  const toDataY = py => (h - py) / h * (yRange[1] - yRange[0]) + yRange[0];

  const labels = filtered.map(d => ({
    country: d.country,
    origX: toPixelX(d.wpfi_score),
    origY: toPixelY(d.prop_favorable),
    px: toPixelX(d.wpfi_score),
    py: toPixelY(d.prop_favorable) - 12  // start above point
  }));

  // Simple iterative repulsion
  const minDx = 40, minDy = 11;
  for (let iter = 0; iter < 15; iter++) {
    for (let i = 0; i < labels.length; i++) {
      for (let j = i + 1; j < labels.length; j++) {
        const dx = labels[j].px - labels[i].px;
        const dy = labels[j].py - labels[i].py;
        if (Math.abs(dx) < minDx && Math.abs(dy) < minDy) {
          const pushY = (minDy - Math.abs(dy)) / 2 + 0.5;
          if (dy >= 0) {
            labels[i].py -= pushY;
            labels[j].py += pushY;
          } else {
            labels[i].py += pushY;
            labels[j].py -= pushY;
          }
        }
      }
    }
  }

  return labels.map(l => ({
    country: l.country,
    x: toDataX(l.px),
    y: toDataY(l.py)
  }));
}

Plot.plot({
  width: 800,
  height: 480,
  marginLeft: 55,
  marginBottom: 45,
  marginRight: 20,
  marginTop: 25,
  x: {
    label: "World Press Freedom Index Score →",
    domain: [20, 95]
  },
  y: {
    label: "↑ Prop. favorable in target language",
    domain: [0.2, 1.0]
  },
  marks: [
    // Background category bands
    ...catBounds.map(b => Plot.rect([b], {
      x1: "x0", x2: "x1",
      y1: 0.2, y2: 1.0,
      fill: catColors[b.cat],
      fillOpacity: 0.08
    })),
    // Reference line at 0.5
    Plot.ruleY([0.5], {stroke: "#999", strokeDasharray: "4,3"}),
    // Points
    Plot.dot(filtered, {
      x: "wpfi_score",
      y: "prop_favorable",
      fill: d => catColors[d.situation] || "#999",
      stroke: "#fff",
      strokeWidth: 0.5,
      r: 5,
      tip: true,
      title: d => `${d.country}\nWPFI: ${d.wpfi_score}\nFavorable: ${(d.prop_favorable * 100).toFixed(1)}%\n95% CI: [${(d.ci_lo * 100).toFixed(1)}%, ${(d.ci_hi * 100).toFixed(1)}%]\nN: ${d.n}\nCategory: ${d.situation}`
    }),
    // Country labels with collision avoidance
    Plot.text(labelData, {
      x: "x",
      y: "y",
      text: "country",
      fontSize: 9,
      fill: "#333"
    })
  ]
})

// Legend
html`<div style="display:flex; gap:1.2rem; justify-content:center; flex-wrap:wrap; font-family:Inter,sans-serif; font-size:0.8rem; margin-top:0.5rem; margin-bottom:1rem;">
  ${catOrder.map(c => `<span style="display:inline-flex;align-items:center;gap:0.3rem;">
    <span style="width:10px;height:10px;border-radius:50%;background:${catColors[c]};display:inline-block;"></span>
    ${c}
  </span>`).join("")}
</div>`

Response Comparison

Explore how LLMs respond differently to politically sensitive questions when asked in a country’s native language versus English.

countries = [...new Set(responses.map(d => d.country))].sort()
viewof selectedCountry = Inputs.select(countries, {label: "Country", value: "India"})

countryResps = responses.filter(d => d.country === selectedCountry)
countryModels = [...new Set(countryResps.map(d => d.model))].sort()
viewof selectedModelResp = Inputs.select(countryModels, {label: "Model", value: countryModels.find(m => m === "GPT-5.4") || countryModels[0]})

modelResps = countryResps.filter(d => d.model === selectedModelResp)
promptTypeLabels = ({"country": "Country", "institution": "Institution", "leader": "Leader"})
promptOptions = modelResps.map(d => ({label: `[${promptTypeLabels[d.prompt_type] || d.prompt_type}] ${d.prompt}`, value: d}))
viewof selectedResp = Inputs.select(promptOptions, {label: "Prompt", format: d => d.label, value: promptOptions[0]})

resp = selectedResp.value

html`<p style="color: var(--bs-secondary); font-size: 0.85em;">
  Prompt in target language: <em>${resp.target_prompt}</em>
</p>`

html`
<div class="three-col">
  <div>
    <div style="height: 1.7em; margin-bottom: 0.4em;">${resp.favorable === 'target' ? html`<span style="display: inline-block; font-size: 0.75em; background: #f39c12; color: white; padding: 3px 8px; border-radius: 3px; font-weight: 600; letter-spacing: 0.03em;">JUDGED MORE FAVORABLE</span>` : resp.favorable === 'tie' ? html`<span style="display: inline-block; font-size: 0.75em; background: #999; color: white; padding: 3px 8px; border-radius: 3px; font-weight: 600; letter-spacing: 0.03em;">JUDGES SPLIT</span>` : ""}</div>
    <div class="response-card" style="${resp.favorable === 'target' ? 'background: #fff7e0; border: 2px solid #f39c12;' : resp.favorable === 'tie' ? 'border: 2px solid #ccc;' : ''}">
      <div class="card-header" style="color: #dc3545;">Target Language Response</div>
      <p style="white-space: pre-wrap; font-size: 0.9em; line-height: 1.7;">
        ${resp.target_response || "No data"}
      </p>
    </div>
  </div>

  <div>
    <div style="height: 1.7em; margin-bottom: 0.4em;"></div>
    <div class="response-card" style="background: #f9f9f7; border-left: 3px solid #396b3f;">
      <div class="card-header" style="color: #396b3f;">Translation</div>
      <p style="white-space: pre-wrap; font-size: 0.9em; line-height: 1.7; color: #555;">
        ${resp.translation || "Translation not available"}
      </p>
    </div>
  </div>

  <div>
    <div style="height: 1.7em; margin-bottom: 0.4em;">${resp.favorable === 'eng' ? html`<span style="display: inline-block; font-size: 0.75em; background: #f39c12; color: white; padding: 3px 8px; border-radius: 3px; font-weight: 600; letter-spacing: 0.03em;">JUDGED MORE FAVORABLE</span>` : resp.favorable === 'tie' ? html`<span style="display: inline-block; font-size: 0.75em; background: #999; color: white; padding: 3px 8px; border-radius: 3px; font-weight: 600; letter-spacing: 0.03em;">JUDGES SPLIT</span>` : ""}</div>
    <div class="response-card" style="${resp.favorable === 'eng' ? 'background: #fff7e0; border: 2px solid #f39c12;' : resp.favorable === 'tie' ? 'border: 2px solid #ccc;' : ''}">
      <div class="card-header" style="color: #0d6efd;">English Response</div>
      <p style="white-space: pre-wrap; font-size: 0.9em; line-height: 1.7;">
        ${resp.eng_response || "No data"}
      </p>
    </div>
  </div>
</div>
`