summaryrefslogtreecommitdiff
path: root/scripts/create_corr_array_pretalx.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/create_corr_array_pretalx.py')
-rw-r--r--scripts/create_corr_array_pretalx.py50
1 files changed, 50 insertions, 0 deletions
diff --git a/scripts/create_corr_array_pretalx.py b/scripts/create_corr_array_pretalx.py
new file mode 100644
index 0000000..dd9221d
--- /dev/null
+++ b/scripts/create_corr_array_pretalx.py
@@ -0,0 +1,50 @@
1#!venv/bin/python
2
3# convert halfnarp's output into the correlation array that can later
4# be served by halfnarp to show clusters of talks by your audience's
5# preferences
6#
7# Run this script with a single parameter, a file containing the output
8# of halfnarp2.py -e and place this script's output in a file that is
9# being served statically, e.g. corr_array_38c3.json
10
11import json
12import sys
13import itertools
14from collections import defaultdict
15
16# load in list of talk preferences, which is list of strings with talk ids
17with open(sys.argv[1]) as data_file:
18 corr = json.load(data_file)
19
20all_sums = dict(defaultdict())
21all_events = {}
22l = len(corr)
23
24for arr in corr:
25
26 for x in arr:
27 all_events[x] = 1
28
29 for x, y in itertools.combinations_with_replacement(sorted(arr), 2):
30 if x in all_sums.keys():
31 all_sums[x][y] = 1 + all_sums[x].get(y, 1)
32 else:
33 all_sums[x] = defaultdict()
34
35all_events = sorted(set(all_events))
36
37out_strings = defaultdict(str)
38
39for x, y in itertools.combinations(all_events, 2):
40 xyc = all_sums[x].get(y, 0)
41 xc = all_sums[x].get(x, 0)
42 yc = all_sums[y].get(y, 0)
43# print (x, y, both, xcount, ycount)
44 xy_corr = 4.0 * l * xyc * xyc * (xc + yc) / (xc*xc*yc*yc) if xc * yc > 0 else 0
45 if xy_corr > 9:
46 xy_corr = 9
47 out_strings[x] += str(int(xy_corr))
48
49out_dict = { 'event_ids': all_events, 'event_corrs': list(out_strings.values()) }
50print (json.dumps(out_dict, separators=(',', ':')))