diff --git a/scripts/clean_talks.py b/scripts/clean_talks.py
new file mode 100644
index 0000000..4375c28
--- /dev/null
+++ b/scripts/clean_talks.py
@@ -0,0 +1,60 @@
+from arango_client import arango
+
+
+def clean_talk(text):
+    import re
+
+    # Remove "STYLEREF Kantrubrik \* MERGEFORMAT" from the text
+    if "STYLEREF Kantrubrik * MERGEFORMAT" in text:
+        text = text.replace("STYLEREF Kantrubrik * MERGEFORMAT", "")
+    # Remove "- " from the text when there are text on both sides, eg. till- sammans (this comes from line breaks in Word)
+    text = re.sub(r"(?<=\S)-\s(?=\S)", "", text)
+    # Remove linebreaks in the middle of sentences
+    text = re.sub(r"(?<=[^\s.!?:;])\n(?=[a-zåäö])", " ", text)
+    return text
+
+
+if __name__ == "__main__":
+
+    people = arango.db.aql.execute(
+        "FOR p IN people RETURN {'namn': CONCAT(p.tilltalsnamn, ' ', p.efternamn), '_key': p._key}"
+    )
+    people_dict = {str(p["_key"]): p["namn"] for p in people}
+
+    cursor = arango.db.aql.execute(
+        """FOR t IN talks RETURN {'_id': t._id, 'anforandetext': t.anforandetext, 'avsnittsrubrik': t.avsnittsrubrik, 'parti': t.parti, 'intressent_id': t.intressent_id}""",
+        batch_size=100,
+        count=True,
+    )
+
+    cleaned_talks = []
+    n = 0
+    for talk in cursor:
+        n += 1
+        talk["anforandetext"] = clean_talk(talk.get("anforandetext", ""))
+        talk["avsnittsrubrik"] = clean_talk(talk.get("avsnittsrubrik", ""))
+        if talk.get("intressent_id") in people_dict:
+            talk["talare"] = people_dict[str(talk.get("intressent_id"))]
+        if talk["parti"] == "FP":
+            talk["parti"] = "L"
+        if talk["parti"] == "KDS":
+            talk["parti"] = "KD"
+        if talk["parti"] in [
+            "TALMANNEN",
+            "FÖRSTE VICE TALMANNEN",
+            "ANDRE VICE TALMANNEN",
+            "TREDJE VICE TALMANNEN",
+            "ÅLDERSPRESIDENTEN",
+            "HANS MAJESTÄT KONUNGEN",
+            "TJÄNSTGÖRANDE ÅLDERSPRESIDENTEN",
+        ]:
+            # Make first letter uppercase and rest lowercase
+            talk["parti"] = talk["parti"].title()
+        if talk["parti"] == "":
+            talk["parti"] = "-"
+        cleaned_talks.append(talk)
+        if len(cleaned_talks) >= 100:
+            arango.db.collection("talks").update_many(cleaned_talks, silent=True)
+            print(
+                f"Processed {n} talks", end="\r"), 
+            cleaned_talks = []