This file was created from the following Jupyter-notebook: docs/preproc_example_pavlov.ipynb
Interactive version: Binder badge

Example: Preprocessing of a full dataset with multiple subjects

This is an example of a real dataset of a recent study from our cognitive neuroscience lab at the University of Tromsø.

The notebook cannot be run without the underlying data (which can be found at https://osf.io/7wcej/) but it can be used as an example to tweak for your own studies.

[1]:
import sys
import pypillometry as pp
import pylab as plt
plt.rcParams.update({'figure.max_open_warning': 0}) ## suppress a warning
import pandas as pd
import numpy as np
import os, glob
[3]:
# load all raw datasets
exclude=["20_pav", "8_pav", "1_pav_pilot"] ## these subject did not have usable data

# here, data is already stored in pypillometry's own format as .pd files
pdfiles=glob.glob("data/eyedata/*.pd")
datasets=[pp.PupilData.from_file(fname) for fname in pdfiles]

# datasets are stored in a dict structure
datasets={d.name.split("_")[0]:d for d in datasets if d.name not in exclude}
[4]:
# specify pre-processing parameters per subj
default_param={"min_duration":10,    # min duration of a blink
               "min_offset_len":2,   # offset/onset-length for blink-detection
               "min_onset_len":3,
               "vel_onset":-5,       # velocity thresholds for onset and offset
               "vel_offset":5,
               "strategies":["zero","velocity"],  # strategies for blink-detection
              "distance":100,        # minimum distance between two blinks
              "margin":(50,150),     # margins for Mahot algorithm
              "cutoff":5}            # lowpass-filter cutoff (Hz)

# create dict with parameters per subject
# all subjects get the same set of default parameters initially
params={subj:default_param.copy() for subj in datasets.keys()}
[5]:
## fine-tuning of the parameters per subject (this is done iteratively by
## inspecting the per-subject PDF-files generated below and changing parameters)
subj="4"
params[subj]["vel_onset"]=-10
params[subj]["min_onset_len"]=3
params[subj]["min_duration"]=5
subj="6"
params[subj]["strategies"]=["zero"]
subj="7"
params[subj]["vel_onset"]=-10
subj="9"
params[subj]["vel_onset"]=-10
subj="10"
params[subj]["vel_onset"]=-10
subj="12"
params[subj]["margin"]=(50,200)
params[subj]["vel_onset"]=-10
subj="13"
params[subj]["vel_onset"]=-10
subj="14"
params[subj]["margin"]=(50,220)
params[subj]["vel_onset"]=-10
subj="15"
params[subj]["margin"]=(50,350)
params[subj]["vel_onset"]=-10
subj="16"
params[subj]["margin"]=(50,300)
params[subj]["vel_onset"]=-10
params[subj]["min_onset_len"]=3
params[subj]["min_duration"]=5
subj="17"
params[subj]["vel_onset"]=-10
subj="21"
params[subj]["vel_onset"]=-100
params[subj]["vel_offset"]=100
params[subj]["min_offset_len"]=2
params[subj]["min_onset_len"]=2
subj="23"
params[subj]["vel_onset"]=-10
params[subj]["min_onset_len"]=2
params[subj]["min_duration"]=5
subj="24"
params[subj]["margin"]=(50,320)
params[subj]["vel_onset"]=-10
params[subj]["min_onset_len"]=3
params[subj]["min_duration"]=1
subj="25"
params[subj]["margin"]=(50,350)
params[subj]["vel_onset"]=-10
subj="29"
params[subj]["margin"]=(100,400)
params[subj]["vel_onset"]=-10
subj="30"
params[subj]["margin"]=(100,300)
params[subj]["vel_onset"]=-20
subj="31"
params[subj]["margin"]=(100,300)
params[subj]["vel_onset"]=-10
subj="32"
params[subj]["vel_onset"]=-10
subj="34"
params[subj]["margin"]=(70,350)
params[subj]["vel_onset"]=-10
subj="35"
params[subj]["margin"]=(50,250)
params[subj]["vel_onset"]=-10
params[subj]["min_duration"]=5
subj="36"
params[subj]["margin"]=(100,300)
params[subj]["vel_onset"]=-10
params[subj]["min_duration"]=5
subj="37"
params[subj]["margin"]=(80,250)
params[subj]["vel_onset"]=-10
params[subj]["min_duration"]=1
subj="39"
params[subj]["vel_onset"]=-10
params[subj]["min_onset_len"]=3
params[subj]["min_duration"]=5
subj="40"
params[subj]["margin"]=(60,200)
params[subj]["vel_onset"]=-10
subj="41"
params[subj]["margin"]=(60,230)
params[subj]["vel_onset"]=-20
params[subj]["min_duration"]=3
subj="42"
params[subj]["vel_onset"]=-10
params[subj]["min_duration"]=0
subj="43"
params[subj]["vel_onset"]=-10
params[subj]["min_duration"]=0
subj="46"
params[subj]["margin"]=(80,220)
params[subj]["vel_onset"]=-20
params[subj]["min_onset_len"]=3
params[subj]["min_duration"]=0
subj="47"
params[subj]["vel_onset"]=-10
subj="49"
params[subj]["vel_onset"]=-10
subj="50"
params[subj]["vel_onset"]=-10
params[subj]["min_duration"]=0
subj="51"
params[subj]["margin"]=(50,220)
params[subj]["vel_onset"]=-10
subj="53"
params[subj]["vel_onset"]=-10
params[subj]["min_duration"]=0
subj="54"
params[subj]["margin"]=(50,220)
params[subj]["vel_onset"]=-10
subj="55"
params[subj]["margin"]=(50,250)
params[subj]["vel_onset"]=-10
subj="56"
params[subj]["vel_onset"]=-10
params[subj]["min_duration"]=0
subj="57"
params[subj]["margin"]=(80,150)
params[subj]["vel_onset"]=-10
params[subj]["min_duration"]=0
[6]:
## run pre-proc pipeline on all subjects and produce PDFs for inspection
preprocs={}
for subj,d in datasets.items():
    print(d.name)
    pars=params[subj]
    dp=d.blinks_detect(min_duration=pars["min_duration"],strategies=pars["strategies"],
                       vel_onset=pars["vel_onset"], vel_offset=pars["vel_offset"],
                       min_onset_len=pars["min_onset_len"], min_offset_len=pars["min_offset_len"])\
            .blinks_merge(distance=pars["distance"])\
            .blinks_interp_mahot(margin=pars["margin"], vel_onset=pars["vel_onset"], vel_offset=pars["vel_offset"])\
            .lowpass_filter(cutoff=pars["cutoff"])
    d.plot_segments(overlay=dp, pdffile="pics/%s.pdf"%d.name, ylim=(dp.sy.min(), dp.sy.max()))
    preprocs[subj]=dp
4_pav
> Writing PDF file 'pics/4_pav.pdf'
39_pav
> Writing PDF file 'pics/39_pav.pdf'
44_pav
> Writing PDF file 'pics/44_pav.pdf'
27_pav
> Writing PDF file 'pics/27_pav.pdf'
56_pav
> Writing PDF file 'pics/56_pav.pdf'
35_pav
> Writing PDF file 'pics/35_pav.pdf'
11_pav
> Writing PDF file 'pics/11_pav.pdf'
37_pav
> Writing PDF file 'pics/37_pav.pdf'
29_pav
> Writing PDF file 'pics/29_pav.pdf'
54_pav
> Writing PDF file 'pics/54_pav.pdf'
25_pav
> Writing PDF file 'pics/25_pav.pdf'
46_pav
> Writing PDF file 'pics/46_pav.pdf'
13_pav
> Writing PDF file 'pics/13_pav.pdf'
6_pav
> Writing PDF file 'pics/6_pav.pdf'
17_pav
> Writing PDF file 'pics/17_pav.pdf'
21_pav
> Writing PDF file 'pics/21_pav.pdf'
42_pav
> Writing PDF file 'pics/42_pav.pdf'
33_pav
> Writing PDF file 'pics/33_pav.pdf'
50_pav
> Writing PDF file 'pics/50_pav.pdf'
2_pav
> Writing PDF file 'pics/2_pav.pdf'
15_pav
> Writing PDF file 'pics/15_pav.pdf'
19_pav
> Writing PDF file 'pics/19_pav.pdf'
52_pav
> Writing PDF file 'pics/52_pav.pdf'
31_pav
> Writing PDF file 'pics/31_pav.pdf'
40_pav
> Writing PDF file 'pics/40_pav.pdf'
23_pav
> Writing PDF file 'pics/23_pav.pdf'
10_pav
> Writing PDF file 'pics/10_pav.pdf'
26_pav
> Writing PDF file 'pics/26_pav.pdf'
45_pav
> Writing PDF file 'pics/45_pav.pdf'
49_pav
> Writing PDF file 'pics/49_pav.pdf'
34_pav
> Writing PDF file 'pics/34_pav.pdf'
57_pav
> Writing PDF file 'pics/57_pav.pdf'
5_pav
> Writing PDF file 'pics/5_pav.pdf'
9_pav
> Writing PDF file 'pics/9_pav.pdf'
7_pav
> Writing PDF file 'pics/7_pav.pdf'
12_pav
> Writing PDF file 'pics/12_pav.pdf'
55_pav
> Writing PDF file 'pics/55_pav.pdf'
36_pav
> Writing PDF file 'pics/36_pav.pdf'
47_pav
> Writing PDF file 'pics/47_pav.pdf'
24_pav
> Writing PDF file 'pics/24_pav.pdf'
3_pav
> Writing PDF file 'pics/3_pav.pdf'
43_pav
> Writing PDF file 'pics/43_pav.pdf'
51_pav
> Writing PDF file 'pics/51_pav.pdf'
32_pav
> Writing PDF file 'pics/32_pav.pdf'
16_pav
> Writing PDF file 'pics/16_pav.pdf'
30_pav
> Writing PDF file 'pics/30_pav.pdf'
53_pav
> Writing PDF file 'pics/53_pav.pdf'
22_pav
> Writing PDF file 'pics/22_pav.pdf'
41_pav
> Writing PDF file 'pics/41_pav.pdf'
14_pav
> Writing PDF file 'pics/14_pav.pdf'
1_pav
> Writing PDF file 'pics/1_pav.pdf'
[24]:
# write down notes about each subject when going through the preprocs
notes={
    '1':"last 5 mins missing; else quite ok, a few semi-blinks not properly detected",
    "2":"many blinks after min 17; but reconstruction works very well",
    "3":"fine",
    "4":"fine (after tuning); a couple of short blinks that do not go all the way down to zero",
    "5":"an awful lot of blinks; reconstruction seems to work but subj should probably be excluded",
    "6":"fine (after tuning); very nice data with few blinks",
    "7":"ok after tuning; weird stuff min 32-33",
    "8":"missing data",
    "9":"fine (after tuning); worse data in second half but ok",
    "10":"many blinks, probably exclude",
    "11":"usable but quite a few blinks (especially in the last part from min 33)",
    "12":"fine after tuning but many blinks in second half",
    "13":"usable but quite a few blinks; some segments are bad but should be ok due to exclusion crit",
    "14":"fine",
    "15":"exclude, many blinks and long opening-period after blinks",
    "16":"fine; very nice data with few blinks",
    "17":"usable but quite a few blinks",
    "18":"missing",
    "19":"usable (borderline), many blinks but nice reconstruction",
    "20":"missing",
    "21":"fine; except from min 37; incredibly fast transients!",
    "22":"exclude; way to many blinks to do anything with this",
    "23":"fine",
    "24":"fine; very slow opening-transient",
    "25":"borderline; a lot of blinks (more towards end) and a few weird artifacts in the second half",
    "26":"fine; great, clean data!",
    "27":"exclude",
    "28":"missing",
    "29":"usable; slow opening-transient",
    "30":"fine",
    "31":"fine",
    "32":"fine",
    "33":"usable; many blinks in second half",
    "34":"fine",
    "35":"borderline; lots of blinks during later segments",
    "36":"borderline; esp near the end",
    "37":"borderline; blinks consist of many consecutive, short-blinks",
    "38":"missing",
    "39":"fine (after tuning)",
    "40":"borderline; many blinks throughout",
    "41":"fine",
    "42":"fine; has some of the weird triple-blinks",
    "43":"fine; missing from min 34 (last block?)",
    "44":"fine",
    "45":"exclude; many blinks throughout",
    "46":"usable",
    "47":"fine",
    "48":"missing",
    "49":"fine",
    "50":"fine",
    "51":"fine",
    "52":"fine",
    "53":"fine",
    "54":"fine",
    "55":"usable; borderline towards the end",
    "56":"usable, worse towards end",
    "57":"exclude"
}
exclude_preproc=["5","10", "15", "22", "27","45","57"]
[8]:
## save table with preproc-parameters per subj
df=pd.DataFrame(params).T
df.index.name = 'subj'
df.reset_index(inplace=True)
df.subj=pd.to_numeric(df.subj)
df.sort_values("subj", inplace=True)
#df.reset_index(inplace=True)
df.to_csv("preproc_params.csv")
[9]:
## save table with notes
pd.DataFrame(notes.items(), columns=["subj","notes"]).to_csv("preproc_notes.csv")
[26]:
## write final prep-processed files for further analysis
preprocs={subj:d for subj,d in preprocs.items() if subj not in exclude_preproc}
pp.pd_write_pickle(preprocs, "data/export/eye_preproc.pd")
[29]:
## save table with final summary of preprocs
tab=pd.DataFrame([{**d.summary(), **{"ncues":len([lab for lab in d.event_labels if lab.startswith("C_")]),
                                 "nfb":len([lab for lab in d.event_labels if lab.startswith("F_")]),
                                 "nresp":len([lab for lab in d.event_labels if lab.startswith("R_")]),
                                 "subj":d.name.split("_")[0],
                                 "note":notes[d.name.split("_")[0]]}}
                  for d in preprocs.values()])
#tab.drop(columns=["baseline_estimated", "response_estimated"])
tab["perc_interpolated"]=tab.ninterpolated/tab.n*100.
scols=["subj","perc_miss","perc_interpolated","nevents","blinks_per_min","duration_minutes","note"]
tab.subj=pd.to_numeric(tab.subj)

tab.sort_values("subj",inplace=True)
tab[scols].to_csv("preproc_summary.csv")
This file was created from the following Jupyter-notebook: docs/preproc_example_pavlov.ipynb
Interactive version: Binder badge