Sort and show all abstracts after voting

Installation instructions

Once you have downloaded the votes from Qualtrics as a CSV file votes.csv, run the code below to show abstracts in order of approval.

import csv
from collections import defaultdict

import jinja2

from IPython.core.display import HTML
import numpy as np
import matplotlib.pyplot as plt
from wordcloud import WordCloud

# Read the submissions and do a bit of data cleaning
with open('submissions.csv', newline='', encoding='utf8') as csvfile:
    csvreader = csv.DictReader(csvfile, dialect='excel')
    submissions = list(csvreader)
for submission in submissions:
    for k, v in list(submission.items()):
        # This character shows up for some reason (unclear)
        k_new = k.replace('\ufeff', '').strip().split('\n')[0]
        # Some people use all caps for the title, so we automatically fix that
        if k_new=='Presentation title' and v.upper()==v:
            v = v.title()
        # some people paste text with newlines for every line which looks ugly, so we detect that and automatically fix
        if k_new=='Abstract (please keep under 300 words)' and max(map(len, v.split('\n')))<120:
            v = v.replace('\n', ' ')
        submission[k_new] = v
    submission['ID'] = submission['\ufeffID'] # not sure why forms inserts this random character

# Read the votes from Qualtrics
with open('votes.csv', newline='') as csvfile:
    csvreader = csv.DictReader(csvfile, dialect='excel')
    votes = list(csvreader)

votes = votes[2:]

print(f'{len(votes)} raw votes')

# Remove votes with duplicate IP addresses
ip_address_counts = defaultdict(int)
for vote in votes:
    ip_address_counts[vote['IPAddress']] += 1

votes = [vote for vote in votes if ip_address_counts[vote['IPAddress']]==1]

print(f'{len(votes)} votes after duplicate IP address votes removed')

# Check the votes make sense
votes[0]

# Check the submissions make sense
submissions[0]

# Link these two tables using a dictionary
id_to_submission = {}
for submission in submissions:
    submission['yes_votes'] = 0
    submission['no_votes'] = 0
    submission['comments'] = []
    id_to_submission[submission['ID']] = submission
    
for vote in votes:
    vote['yes'] = yes_votes = []
    vote['no'] = no_votes = []
    for k, v in vote.items():
        if v:
            if k.startswith('abstract'):
                k = k.replace('abstract', '')
                if k.endswith('yesno'):
                    k = k.replace('yesno', '')
                    if v=='Yes':
                        id_to_submission[k]['yes_votes'] += 1
                        yes_votes.append(k)
                    elif v=='No':
                        id_to_submission[k]['no_votes'] += 1
                        no_votes.append(k)
                elif k.endswith('comment_1'):
                    k = k.replace('comment_1', '')
                    id_to_submission[k]['comments'].append(v)

# Compute approval and sort
submissions.sort(reverse=True, key=lambda submission: submission['yes_votes']/max((submission['yes_votes']+submission['no_votes']), 1))

for sub in submissions:
    sub['approval'] = sub['yes_votes']/max(sub['yes_votes']+sub['no_votes'], 1)

# Generate an output HTML file with all the abstracts and their number of votes and comments
template = jinja2.Template('''
<html><head><title>Submissions</title></head><body>
    
    {% for sub in submissions %}
    <div style="border: 1px solid grey; margin: 1em; padding: 1em;" >
        <h3><span style="border: 1px solid red; border-radius: 5px; background-color: yellow; color: red; padding: 5px;">{{ loop.index }}</span> {{ sub['Presentation title'] }}</h3>
        <h4>{{ sub['Presentation authors'] }}</h4>
        <h4>Corresponding author: <a href="mailto:{{ sub['Corresponding author email address'] }}">{{ sub['Corresponding author name'] }}</a></h4>
        {% for para in sub['Abstract (please keep under 300 words)'].splitlines() %}
            {% if para.strip() %}
                <p>
                    {{ para }}
                </p>
            {% endif %}
        {% endfor %}
        <p>
            <span style="background: lightgreen; border-radius: 10px; padding: 10px; display: inline-block; margin: 1px;">
                👍 <b>{{ sub['yes_votes'] }}</b> yes
            </span>
            <span style="background: lightpink; border-radius: 10px; padding: 10px; display: inline-block; margin: 1px;">
                👎 <b>{{ sub['no_votes'] }}</b> no
            </span>
            <span style="background: lightblue; border-radius: 10px; padding: 10px; display: inline-block; margin: 1px;">
                <b>{{ int(100*sub['yes_votes']/max(sub['yes_votes']+sub['no_votes'], 1)) }}%</b> positive
            </span>            
        </p>
        {% if sub['comments'] %}
        <p>
            Comments:
        </p>
        <ul>
            {% for comment in sub['comments'] %}
                <li>
                    {{ comment }}
                </li>
            {% endfor %}
        </ul>
        {% endif %}
    </div>
    {% endfor %}

</body></html>
''')

submissions_html = template.render(submissions=submissions, int=int, max=max)

open('submissions_with_votes.html', 'w', encoding='utf-8').write(submissions_html)

HTML(submissions_html)

# Generate a list of the top 8
all_talks = submissions[:8]
all_posters = submissions[8:]
for sub in all_talks:
    sub['talk'] = True
for sub in all_posters:
    sub['talk'] = False

# Talk emails
', '.join(sub['Corresponding author email address'] for sub in submissions[:8])

# Poster emails
all_posters.sort(key=lambda sub: sub['Corresponding author name'])
', '.join(sub['Corresponding author email address'] for sub in all_posters)

# Histogram of ratings
boundary = (submissions[8]['approval']+submissions[7]['approval'])/2
print(f'Boundary = {round(100*boundary)}%')
binedges = boundary+np.arange(-20, 21)*0.1
counts, binedges, _ = plt.hist([sub['approval'] for sub in submissions], bins=binedges, label='All submissions (talk preferred)')
plt.hist([sub['approval'] for sub in submissions[:8]], bins=binedges, label='Accepted for talk')
plt.axvline(boundary, ls='--', c='k', label=f'Cutoff = {round(100*boundary)}%')
plt.xlabel('Fraction interested in seeing submission as talk')
plt.ylabel('Number of submissions')
plt.xlim(0, 1)
plt.legend(loc='best')
plt.tight_layout()

# Generate a word cloud of all the abstracts that can be used on social media etc.
all_abstracts = ' '.join(sub['Abstract (please keep under 300 words)'] for sub in submissions)
wordcloud = WordCloud(background_color="white", width=1000, height=1000).generate(all_abstracts)
wordcloud.to_file('wordcloud.png')

Code

Generate list of submissions and survey form for Qualtrics