1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
|
#!venv/bin/python
# convert halfnarp's output into the correlation array that can later
# be served by halfnarp to show clusters of talks by your audience's
# preferences
#
# Run this script with a single parameter, a file containing the output
# of halfnarp2.py -e and place this script's output in a file that is
# being served statically, e.g. corr_array_38c3.json
import json
import sys
import itertools
from collections import defaultdict
# load in list of talk preferences, which is list of strings with talk ids
with open(sys.argv[1]) as data_file:
corr = json.load(data_file)
all_sums = dict(defaultdict())
all_events = {}
l = len(corr)
for arr in corr:
for x in arr:
all_events[x] = 1
for x, y in itertools.combinations_with_replacement(sorted(arr), 2):
if x in all_sums.keys():
all_sums[x][y] = 1 + all_sums[x].get(y, 1)
else:
all_sums[x] = defaultdict()
all_events = sorted(set(all_events))
out_strings = defaultdict(str)
for x, y in itertools.combinations(all_events, 2):
xyc = all_sums[x].get(y, 0)
xc = all_sums[x].get(x, 0)
yc = all_sums[y].get(y, 0)
# print (x, y, both, xcount, ycount)
xy_corr = 4.0 * l * xyc * xyc * (xc + yc) / (xc*xc*yc*yc) if xc * yc > 0 else 0
if xy_corr > 9:
xy_corr = 9
out_strings[x] += str(int(xy_corr))
out_dict = { 'event_ids': all_events, 'event_corrs': list(out_strings.values()) }
print (json.dumps(out_dict, separators=(',', ':')))
|