From 66c93ce316e6fa8a972e95fbdb5ad9f35c381512 Mon Sep 17 00:00:00 2001 From: Lukas Wallrich Date: Thu, 2 Jul 2026 12:22:10 +0200 Subject: [PATCH] Fix leaked Google Docs comment markers on the clusters page Six cluster/sub-cluster descriptions on the live clusters page showed literal bracket artifacts like "process:[ag][ah]" and "feedback[ai][aj], have received". Root cause: parse_clusters_to_sheet.py fetches the source Google Doc via its plain-text export (docs.google.com/.../export?format=txt), and Google Docs' plain-text export represents unresolved inline comments as bracketed reference codes (a, b, ... aa, ab, ...) inserted directly into the body text at the comment's anchor point. The script already had an extract_annotations() helper for exactly this pattern (built for citation/publication text and sub-cluster ToC names), but the code paths that build cluster- and sub-cluster-level *description* text appended the raw, uncleaned line instead - five call sites across parse_cluster_toc(), parse_subclusters_standard(), and parse_cluster_11(). Fixed all five so future re-syncs from the Google Doc won't reintroduce this even if comments are left open. Also directly cleaned the six leaked instances already baked into the committed data/clusters_v4.json (a minimal, scoped find/replace on just the affected fields - not a full re-sync from the live doc, to avoid pulling in unrelated content drift). Co-Authored-By: Claude Sonnet 5 --- data/clusters_v4.json | 10 +++++----- scripts/parse_clusters_to_sheet.py | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/data/clusters_v4.json b/data/clusters_v4.json index 08438246203..e858c100467 100644 --- a/data/clusters_v4.json +++ b/data/clusters_v4.json @@ -1241,7 +1241,7 @@ { "number": 3, "name": "Ways of Working", - "description": "Attainment of an understanding of how research is conducted, managed and disseminated. There are 12 sub-clusters which aim to further parse the learning and teaching process[s]:", + "description": "Attainment of an understanding of how research is conducted, managed and disseminated. There are 12 sub-clusters which aim to further parse the learning and teaching process:", "sub_clusters": [ { "name": "Qualitative research", @@ -3302,7 +3302,7 @@ { "number": 8, "name": "Replication and meta-research", - "description": "Attainment of a grounding in 'replication research', which takes a variety of forms, each with a different purpose and contribution. Replicable science requires replication research. When teaching, students should understand the purpose and need of replications in its variety of forms and be able to conduct (and join) replication projects. There are 6 sub-clusters which aim to further parse the learning and teaching process:[ag][ah]", + "description": "Attainment of a grounding in 'replication research', which takes a variety of forms, each with a different purpose and contribution. Replicable science requires replication research. When teaching, students should understand the purpose and need of replications in its variety of forms and be able to conduct (and join) replication projects. There are 6 sub-clusters which aim to further parse the learning and teaching process:", "sub_clusters": [ { "name": "Conducting replication studies; challenges, limitations, and comparisons with the original study", @@ -3737,7 +3737,7 @@ }, { "name": "The politics of replicating famous studies", - "description": "Sometimes responses to replication research can be negative. Failed replications of famous work, most notably power posing, ego depletion, stereotype threat, and facial feedback[ai][aj], have received a lot of attention.", + "description": "Sometimes responses to replication research can be negative. Failed replications of famous work, most notably power posing, ego depletion, stereotype threat, and facial feedback, have received a lot of attention.", "publications": [ { "doi": "10.1371/journal.pone.0029081", @@ -4980,7 +4980,7 @@ }, { "name": "Sexuality & Gender", - "description": "Sexuality refers to the various aspects of an individual’s being related to their sexual feelings, thoughts, attractions and behaviour. We use “LGBTQ+” as an inclusive term to refer to all sexual identities and orientations which are not heterosexual. This includes but is not limited to lesbian, gay, bisexual, pansexual, asexual, queer, and questioning[ap].", + "description": "Sexuality refers to the various aspects of an individual’s being related to their sexual feelings, thoughts, attractions and behaviour. We use “LGBTQ+” as an inclusive term to refer to all sexual identities and orientations which are not heterosexual. This includes but is not limited to lesbian, gay, bisexual, pansexual, asexual, queer, and questioning.", "publications": [ { "doi": "10.1126/sciadv.abe0933", @@ -6444,7 +6444,7 @@ { "number": 11, "name": "Research Integrity", - "description": "Research Integrity (RI) encompasses the moral and professional standards that ensure research is trustworthy, transparent, and ethical from inception to publication​. Traditionally, RI efforts have centered on preventing misconduct—the blatant fabrication, falsification, or plagiarism (FFP) that betrays the core of science. Modern perspectives, however, advocate a holistic vision: cultivating a culture of responsible, equitable, and open research practices that goes beyond avoiding misconduct to actively promoting excellence and fairness. Supervision, mentorship, and everyday collegial relations have an important part in cultivating a culture of research integrity and shaping what “good practice” looks like in labs and teams to new researchers. This includes how power dynamics, authorship, and credit are negotiated. Institutions matter too, policies, incentives, workload, and leadership either enable or erode integrity. RI is intrinsically linked with the Open Science movement. Both strive to make research more transparent and accountable, thereby strengthening credibility and public trust​. Open Science initiatives (e.g. data sharing, preregistration, open access) can make misconduct easier to detect and discourage, while fostering norms of honesty and rigor. Conversely, RI provides the ethical foundation for openness – emphasizing values like honesty, accountability, respect, and fairness that guide how openness is pursued. By making research integrity “possible, easy, normative, and rewarding” (Haven et al., 2022)​, institutions and communities create an environment where ethical, inclusive, and rigorous research thrives. Ultimately, RI is about more than rule-following; it is about embedding integrity as a fundamental ethos of research design, conduct, and dissemination – ensuring science advances knowledge and the public good in tandem.[at]", + "description": "Research Integrity (RI) encompasses the moral and professional standards that ensure research is trustworthy, transparent, and ethical from inception to publication​. Traditionally, RI efforts have centered on preventing misconduct—the blatant fabrication, falsification, or plagiarism (FFP) that betrays the core of science. Modern perspectives, however, advocate a holistic vision: cultivating a culture of responsible, equitable, and open research practices that goes beyond avoiding misconduct to actively promoting excellence and fairness. Supervision, mentorship, and everyday collegial relations have an important part in cultivating a culture of research integrity and shaping what “good practice” looks like in labs and teams to new researchers. This includes how power dynamics, authorship, and credit are negotiated. Institutions matter too, policies, incentives, workload, and leadership either enable or erode integrity. RI is intrinsically linked with the Open Science movement. Both strive to make research more transparent and accountable, thereby strengthening credibility and public trust​. Open Science initiatives (e.g. data sharing, preregistration, open access) can make misconduct easier to detect and discourage, while fostering norms of honesty and rigor. Conversely, RI provides the ethical foundation for openness – emphasizing values like honesty, accountability, respect, and fairness that guide how openness is pursued. By making research integrity “possible, easy, normative, and rewarding” (Haven et al., 2022)​, institutions and communities create an environment where ethical, inclusive, and rigorous research thrives. Ultimately, RI is about more than rule-following; it is about embedding integrity as a fundamental ethos of research design, conduct, and dissemination – ensuring science advances knowledge and the public good in tandem.", "sub_clusters": [ { "name": "Principles and Frameworks of Research Integrity", diff --git a/scripts/parse_clusters_to_sheet.py b/scripts/parse_clusters_to_sheet.py index bf375991560..115939e2cad 100644 --- a/scripts/parse_clusters_to_sheet.py +++ b/scripts/parse_clusters_to_sheet.py @@ -204,7 +204,7 @@ def parse_cluster_toc(lines, desc_start, section_end): if toc_names: toc_names[-1] += ' ' + stripped else: - description_parts.append(stripped) + description_parts.append(extract_annotations(stripped)[0]) i += 1 @@ -371,7 +371,7 @@ def save_current(): if current_cites: current_cites[-1] += ' ' + stripped elif current_sc is not None: - current_desc.append(stripped) + current_desc.append(extract_annotations(stripped)[0]) i += 1 continue @@ -426,7 +426,7 @@ def save_current(): # 5. Description text (after heading, before citations) if state == 'in_description': - current_desc.append(stripped) + current_desc.append(extract_annotations(stripped)[0]) i += 1 continue @@ -460,7 +460,7 @@ def parse_cluster_11(lines, start, end): if stripped in ('Cross-Cluster Integration and Synergies', 'Possible Additional Sub-clusters?'): break - desc_parts.append(stripped) + desc_parts.append(extract_annotations(stripped)[0]) elif found_desc and not stripped and desc_parts: # Blank line after description content # Check if next non-blank line is a sub-cluster or ToC @@ -575,7 +575,7 @@ def save_current(): # Description continuation if current_sc and not in_key_readings: - current_desc.append(stripped) + current_desc.append(clean_stripped) j += 1