val upstreamReader = spark.sql("select subject, predicate, object, concat(predicate, ' ', object) as po, context from discovery.wikibase_rdf_scholarly_split where snapshot = '20231016' and wiki = 'wikidata' and scope = 'scholarly_articles' limit 1000000000")
val grouped = upstreamReader.groupBy("subject", "predicate").agg(collect_list(col("object")).alias("obj_mul"))
// this part probably needs a \n\t\t
val groupedComma = grouped.withColumn("nice_obj", concat_ws(", ", col("obj_mul")))
val groupedSplicePredicateAndMultiObject = groupedComma.withColumn("spliced_pmo", concat(col("predicate"), lit(" "), col("nice_obj")))
val groupedForPredicates = groupedSplicePredicateAndMultiObject.groupBy("subject").agg(collect_list(col("spliced_pmo")).alias("po_mul"))
val groupedCommaPreFinal = groupedForPredicates.withColumn("nice_pred", concat_ws(";\n\t", col("po_mul")))
val statements = groupedCommaPreFinal.select(concat(col("subject"), lit(" "), col("nice_pred"), lit(" .\n")))