Rework of halfnarp and fullnarp into a self contained repository. Still WIP

author: erdgeist <erdgeist@erdgeist.org> 2024-12-22 21:53:57 +0100
committer: erdgeist <erdgeist@erdgeist.org> 2024-12-22 21:53:57 +0100
commit: e3481a4a35091b32b6fbee80c1c9ba2b6d7b50d6 (patch)
tree: 58f90b32cbd89599acfaab07377cc0447f1190c1 /scripts/create_corr_array_pretalx.py
1 files changed, 50 insertions, 0 deletions
diff --git a/scripts/create_corr_array_pretalx.py b/scripts/create_corr_array_pretalx.py
new file mode 100644
index 0000000..dd9221d
--- /dev/null
+++ b/scripts/create_corr_array_pretalx.py
@@ -0,0 +1,50 @@
+#!venv/bin/python
+# convert halfnarp's output into the correlation array that can later
+# be served by halfnarp to show clusters of talks by your audience's
+# preferences
+#
+# Run this script with a single parameter, a file containing the output
+# of halfnarp2.py -e and place this script's output in a file that is
+# being served statically, e.g. corr_array_38c3.json
+import json
+import sys
+import itertools
+from collections import defaultdict
+# load in list of talk preferences, which is list of strings with talk ids
+with open(sys.argv[1]) as data_file:
+    corr = json.load(data_file)
+all_sums = dict(defaultdict())
+all_events = {}
+l = len(corr)
+for arr in corr:
+    for x in arr:
+        all_events[x] = 1
+    for x, y in itertools.combinations_with_replacement(sorted(arr), 2):
+        if x in all_sums.keys():
+            all_sums[x][y] = 1 + all_sums[x].get(y, 1)
+        else:
+            all_sums[x] = defaultdict()
+all_events = sorted(set(all_events))
+out_strings = defaultdict(str)
+for x, y in itertools.combinations(all_events, 2):
+    xyc = all_sums[x].get(y, 0)
+    xc  = all_sums[x].get(x, 0)
+    yc  = all_sums[y].get(y, 0)
+#   print (x, y, both, xcount, ycount)
+    xy_corr = 4.0 * l * xyc * xyc * (xc + yc) / (xc*xc*yc*yc) if xc * yc > 0 else 0
+    if xy_corr > 9:
+        xy_corr = 9
+    out_strings[x] += str(int(xy_corr))
+out_dict = { 'event_ids': all_events, 'event_corrs': list(out_strings.values()) }
+print (json.dumps(out_dict, separators=(',', ':')))
author	erdgeist <erdgeist@erdgeist.org>	2024-12-22 21:53:57 +0100
committer	erdgeist <erdgeist@erdgeist.org>	2024-12-22 21:53:57 +0100
commit	e3481a4a35091b32b6fbee80c1c9ba2b6d7b50d6 (patch)
tree	58f90b32cbd89599acfaab07377cc0447f1190c1 /scripts/create_corr_array_pretalx.py

diff --git a/scripts/create_corr_array_pretalx.py b/scripts/create_corr_array_pretalx.py new file mode 100644 index 0000000..dd9221d --- /dev/null +++ b/scripts/create_corr_array_pretalx.py
@@ -0,0 +1,50 @@
	1	#!venv/bin/python
	2
	3	# convert halfnarp's output into the correlation array that can later
	4	# be served by halfnarp to show clusters of talks by your audience's
	5	# preferences
	6	#
	7	# Run this script with a single parameter, a file containing the output
	8	# of halfnarp2.py -e and place this script's output in a file that is
	9	# being served statically, e.g. corr_array_38c3.json
	10
	11	import json
	12	import sys
	13	import itertools
	14	from collections import defaultdict
	15
	16	# load in list of talk preferences, which is list of strings with talk ids
	17	with open(sys.argv[1]) as data_file:
	18	corr = json.load(data_file)
	19
	20	all_sums = dict(defaultdict())
	21	all_events = {}
	22	l = len(corr)
	23
	24	for arr in corr:
	25
	26	for x in arr:
	27	all_events[x] = 1
	28
	29	for x, y in itertools.combinations_with_replacement(sorted(arr), 2):
	30	if x in all_sums.keys():
	31	all_sums[x][y] = 1 + all_sums[x].get(y, 1)
	32	else:
	33	all_sums[x] = defaultdict()
	34
	35	all_events = sorted(set(all_events))
	36
	37	out_strings = defaultdict(str)
	38
	39	for x, y in itertools.combinations(all_events, 2):
	40	xyc = all_sums[x].get(y, 0)
	41	xc = all_sums[x].get(x, 0)
	42	yc = all_sums[y].get(y, 0)
	43	# print (x, y, both, xcount, ycount)
	44	xy_corr = 4.0 * l * xyc * xyc * (xc + yc) / (xcxcycyc) if xc yc > 0 else 0
	45	if xy_corr > 9:
	46	xy_corr = 9
	47	out_strings[x] += str(int(xy_corr))
	48
	49	out_dict = { 'event_ids': all_events, 'event_corrs': list(out_strings.values()) }
	50	print (json.dumps(out_dict, separators=(',', ':')))