{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Example: Preprocessing of a full dataset with multiple subjects\n", "\n", "This is an example of a real dataset of a recent study from our cognitive neuroscience lab at the University of Tromsø.\n", "\n", "The notebook cannot be run without the underlying data (which can be found at ) but it can be used as an example to tweak for your own studies." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import sys\n", "import pypillometry as pp\n", "import pylab as plt\n", "plt.rcParams.update({'figure.max_open_warning': 0}) ## suppress a warning\n", "import pandas as pd\n", "import numpy as np\n", "import os, glob" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# load all raw datasets\n", "exclude=[\"20_pav\", \"8_pav\", \"1_pav_pilot\"] ## these subject did not have usable data\n", "\n", "# here, data is already stored in pypillometry's own format as .pd files\n", "pdfiles=glob.glob(\"data/eyedata/*.pd\")\n", "datasets=[pp.PupilData.from_file(fname) for fname in pdfiles]\n", "\n", "# datasets are stored in a dict structure\n", "datasets={d.name.split(\"_\")[0]:d for d in datasets if d.name not in exclude}" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# specify pre-processing parameters per subj\n", "default_param={\"min_duration\":10, # min duration of a blink\n", " \"min_offset_len\":2, # offset/onset-length for blink-detection\n", " \"min_onset_len\":3,\n", " \"vel_onset\":-5, # velocity thresholds for onset and offset\n", " \"vel_offset\":5,\n", " \"strategies\":[\"zero\",\"velocity\"], # strategies for blink-detection\n", " \"distance\":100, # minimum distance between two blinks\n", " \"margin\":(50,150), # margins for Mahot algorithm\n", " \"cutoff\":5} # lowpass-filter cutoff (Hz)\n", "\n", "# create dict with parameters per subject\n", "# all subjects get the same set of default parameters initially\n", "params={subj:default_param.copy() for subj in datasets.keys()} " ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "## fine-tuning of the parameters per subject (this is done iteratively by \n", "## inspecting the per-subject PDF-files generated below and changing parameters)\n", "subj=\"4\"\n", "params[subj][\"vel_onset\"]=-10\n", "params[subj][\"min_onset_len\"]=3\n", "params[subj][\"min_duration\"]=5\n", "subj=\"6\"\n", "params[subj][\"strategies\"]=[\"zero\"]\n", "subj=\"7\"\n", "params[subj][\"vel_onset\"]=-10\n", "subj=\"9\"\n", "params[subj][\"vel_onset\"]=-10\n", "subj=\"10\"\n", "params[subj][\"vel_onset\"]=-10\n", "subj=\"12\"\n", "params[subj][\"margin\"]=(50,200)\n", "params[subj][\"vel_onset\"]=-10\n", "subj=\"13\"\n", "params[subj][\"vel_onset\"]=-10\n", "subj=\"14\"\n", "params[subj][\"margin\"]=(50,220)\n", "params[subj][\"vel_onset\"]=-10\n", "subj=\"15\"\n", "params[subj][\"margin\"]=(50,350)\n", "params[subj][\"vel_onset\"]=-10\n", "subj=\"16\"\n", "params[subj][\"margin\"]=(50,300)\n", "params[subj][\"vel_onset\"]=-10\n", "params[subj][\"min_onset_len\"]=3\n", "params[subj][\"min_duration\"]=5\n", "subj=\"17\"\n", "params[subj][\"vel_onset\"]=-10\n", "subj=\"21\"\n", "params[subj][\"vel_onset\"]=-100\n", "params[subj][\"vel_offset\"]=100\n", "params[subj][\"min_offset_len\"]=2\n", "params[subj][\"min_onset_len\"]=2\n", "subj=\"23\"\n", "params[subj][\"vel_onset\"]=-10\n", "params[subj][\"min_onset_len\"]=2\n", "params[subj][\"min_duration\"]=5\n", "subj=\"24\"\n", "params[subj][\"margin\"]=(50,320)\n", "params[subj][\"vel_onset\"]=-10\n", "params[subj][\"min_onset_len\"]=3\n", "params[subj][\"min_duration\"]=1\n", "subj=\"25\"\n", "params[subj][\"margin\"]=(50,350)\n", "params[subj][\"vel_onset\"]=-10\n", "subj=\"29\"\n", "params[subj][\"margin\"]=(100,400)\n", "params[subj][\"vel_onset\"]=-10\n", "subj=\"30\"\n", "params[subj][\"margin\"]=(100,300)\n", "params[subj][\"vel_onset\"]=-20\n", "subj=\"31\"\n", "params[subj][\"margin\"]=(100,300)\n", "params[subj][\"vel_onset\"]=-10\n", "subj=\"32\"\n", "params[subj][\"vel_onset\"]=-10\n", "subj=\"34\"\n", "params[subj][\"margin\"]=(70,350)\n", "params[subj][\"vel_onset\"]=-10\n", "subj=\"35\"\n", "params[subj][\"margin\"]=(50,250)\n", "params[subj][\"vel_onset\"]=-10\n", "params[subj][\"min_duration\"]=5\n", "subj=\"36\"\n", "params[subj][\"margin\"]=(100,300)\n", "params[subj][\"vel_onset\"]=-10\n", "params[subj][\"min_duration\"]=5\n", "subj=\"37\"\n", "params[subj][\"margin\"]=(80,250)\n", "params[subj][\"vel_onset\"]=-10\n", "params[subj][\"min_duration\"]=1\n", "subj=\"39\"\n", "params[subj][\"vel_onset\"]=-10\n", "params[subj][\"min_onset_len\"]=3\n", "params[subj][\"min_duration\"]=5\n", "subj=\"40\"\n", "params[subj][\"margin\"]=(60,200)\n", "params[subj][\"vel_onset\"]=-10\n", "subj=\"41\"\n", "params[subj][\"margin\"]=(60,230)\n", "params[subj][\"vel_onset\"]=-20\n", "params[subj][\"min_duration\"]=3\n", "subj=\"42\"\n", "params[subj][\"vel_onset\"]=-10\n", "params[subj][\"min_duration\"]=0\n", "subj=\"43\"\n", "params[subj][\"vel_onset\"]=-10\n", "params[subj][\"min_duration\"]=0\n", "subj=\"46\"\n", "params[subj][\"margin\"]=(80,220)\n", "params[subj][\"vel_onset\"]=-20\n", "params[subj][\"min_onset_len\"]=3\n", "params[subj][\"min_duration\"]=0\n", "subj=\"47\"\n", "params[subj][\"vel_onset\"]=-10\n", "subj=\"49\"\n", "params[subj][\"vel_onset\"]=-10\n", "subj=\"50\"\n", "params[subj][\"vel_onset\"]=-10\n", "params[subj][\"min_duration\"]=0\n", "subj=\"51\"\n", "params[subj][\"margin\"]=(50,220)\n", "params[subj][\"vel_onset\"]=-10\n", "subj=\"53\"\n", "params[subj][\"vel_onset\"]=-10\n", "params[subj][\"min_duration\"]=0\n", "subj=\"54\"\n", "params[subj][\"margin\"]=(50,220)\n", "params[subj][\"vel_onset\"]=-10\n", "subj=\"55\"\n", "params[subj][\"margin\"]=(50,250)\n", "params[subj][\"vel_onset\"]=-10\n", "subj=\"56\"\n", "params[subj][\"vel_onset\"]=-10\n", "params[subj][\"min_duration\"]=0\n", "subj=\"57\"\n", "params[subj][\"margin\"]=(80,150)\n", "params[subj][\"vel_onset\"]=-10\n", "params[subj][\"min_duration\"]=0" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "4_pav\n", "> Writing PDF file 'pics/4_pav.pdf'\n", "39_pav\n", "> Writing PDF file 'pics/39_pav.pdf'\n", "44_pav\n", "> Writing PDF file 'pics/44_pav.pdf'\n", "27_pav\n", "> Writing PDF file 'pics/27_pav.pdf'\n", "56_pav\n", "> Writing PDF file 'pics/56_pav.pdf'\n", "35_pav\n", "> Writing PDF file 'pics/35_pav.pdf'\n", "11_pav\n", "> Writing PDF file 'pics/11_pav.pdf'\n", "37_pav\n", "> Writing PDF file 'pics/37_pav.pdf'\n", "29_pav\n", "> Writing PDF file 'pics/29_pav.pdf'\n", "54_pav\n", "> Writing PDF file 'pics/54_pav.pdf'\n", "25_pav\n", "> Writing PDF file 'pics/25_pav.pdf'\n", "46_pav\n", "> Writing PDF file 'pics/46_pav.pdf'\n", "13_pav\n", "> Writing PDF file 'pics/13_pav.pdf'\n", "6_pav\n", "> Writing PDF file 'pics/6_pav.pdf'\n", "17_pav\n", "> Writing PDF file 'pics/17_pav.pdf'\n", "21_pav\n", "> Writing PDF file 'pics/21_pav.pdf'\n", "42_pav\n", "> Writing PDF file 'pics/42_pav.pdf'\n", "33_pav\n", "> Writing PDF file 'pics/33_pav.pdf'\n", "50_pav\n", "> Writing PDF file 'pics/50_pav.pdf'\n", "2_pav\n", "> Writing PDF file 'pics/2_pav.pdf'\n", "15_pav\n", "> Writing PDF file 'pics/15_pav.pdf'\n", "19_pav\n", "> Writing PDF file 'pics/19_pav.pdf'\n", "52_pav\n", "> Writing PDF file 'pics/52_pav.pdf'\n", "31_pav\n", "> Writing PDF file 'pics/31_pav.pdf'\n", "40_pav\n", "> Writing PDF file 'pics/40_pav.pdf'\n", "23_pav\n", "> Writing PDF file 'pics/23_pav.pdf'\n", "10_pav\n", "> Writing PDF file 'pics/10_pav.pdf'\n", "26_pav\n", "> Writing PDF file 'pics/26_pav.pdf'\n", "45_pav\n", "> Writing PDF file 'pics/45_pav.pdf'\n", "49_pav\n", "> Writing PDF file 'pics/49_pav.pdf'\n", "34_pav\n", "> Writing PDF file 'pics/34_pav.pdf'\n", "57_pav\n", "> Writing PDF file 'pics/57_pav.pdf'\n", "5_pav\n", "> Writing PDF file 'pics/5_pav.pdf'\n", "9_pav\n", "> Writing PDF file 'pics/9_pav.pdf'\n", "7_pav\n", "> Writing PDF file 'pics/7_pav.pdf'\n", "12_pav\n", "> Writing PDF file 'pics/12_pav.pdf'\n", "55_pav\n", "> Writing PDF file 'pics/55_pav.pdf'\n", "36_pav\n", "> Writing PDF file 'pics/36_pav.pdf'\n", "47_pav\n", "> Writing PDF file 'pics/47_pav.pdf'\n", "24_pav\n", "> Writing PDF file 'pics/24_pav.pdf'\n", "3_pav\n", "> Writing PDF file 'pics/3_pav.pdf'\n", "43_pav\n", "> Writing PDF file 'pics/43_pav.pdf'\n", "51_pav\n", "> Writing PDF file 'pics/51_pav.pdf'\n", "32_pav\n", "> Writing PDF file 'pics/32_pav.pdf'\n", "16_pav\n", "> Writing PDF file 'pics/16_pav.pdf'\n", "30_pav\n", "> Writing PDF file 'pics/30_pav.pdf'\n", "53_pav\n", "> Writing PDF file 'pics/53_pav.pdf'\n", "22_pav\n", "> Writing PDF file 'pics/22_pav.pdf'\n", "41_pav\n", "> Writing PDF file 'pics/41_pav.pdf'\n", "14_pav\n", "> Writing PDF file 'pics/14_pav.pdf'\n", "1_pav\n", "> Writing PDF file 'pics/1_pav.pdf'\n" ] } ], "source": [ "## run pre-proc pipeline on all subjects and produce PDFs for inspection\n", "preprocs={}\n", "for subj,d in datasets.items():\n", " print(d.name)\n", " pars=params[subj]\n", " dp=d.blinks_detect(min_duration=pars[\"min_duration\"],strategies=pars[\"strategies\"],\n", " vel_onset=pars[\"vel_onset\"], vel_offset=pars[\"vel_offset\"],\n", " min_onset_len=pars[\"min_onset_len\"], min_offset_len=pars[\"min_offset_len\"])\\\n", " .blinks_merge(distance=pars[\"distance\"])\\\n", " .blinks_interp_mahot(margin=pars[\"margin\"], vel_onset=pars[\"vel_onset\"], vel_offset=pars[\"vel_offset\"])\\\n", " .lowpass_filter(cutoff=pars[\"cutoff\"])\n", " d.plot_segments(overlay=dp, pdffile=\"pics/%s.pdf\"%d.name, ylim=(dp.sy.min(), dp.sy.max()))\n", " preprocs[subj]=dp\n", " " ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "# write down notes about each subject when going through the preprocs\n", "notes={\n", " '1':\"last 5 mins missing; else quite ok, a few semi-blinks not properly detected\",\n", " \"2\":\"many blinks after min 17; but reconstruction works very well\",\n", " \"3\":\"fine\",\n", " \"4\":\"fine (after tuning); a couple of short blinks that do not go all the way down to zero\",\n", " \"5\":\"an awful lot of blinks; reconstruction seems to work but subj should probably be excluded\",\n", " \"6\":\"fine (after tuning); very nice data with few blinks\",\n", " \"7\":\"ok after tuning; weird stuff min 32-33\",\n", " \"8\":\"missing data\",\n", " \"9\":\"fine (after tuning); worse data in second half but ok\",\n", " \"10\":\"many blinks, probably exclude\",\n", " \"11\":\"usable but quite a few blinks (especially in the last part from min 33)\",\n", " \"12\":\"fine after tuning but many blinks in second half\",\n", " \"13\":\"usable but quite a few blinks; some segments are bad but should be ok due to exclusion crit\",\n", " \"14\":\"fine\",\n", " \"15\":\"exclude, many blinks and long opening-period after blinks\",\n", " \"16\":\"fine; very nice data with few blinks\",\n", " \"17\":\"usable but quite a few blinks\",\n", " \"18\":\"missing\",\n", " \"19\":\"usable (borderline), many blinks but nice reconstruction\",\n", " \"20\":\"missing\",\n", " \"21\":\"fine; except from min 37; incredibly fast transients!\",\n", " \"22\":\"exclude; way to many blinks to do anything with this\",\n", " \"23\":\"fine\",\n", " \"24\":\"fine; very slow opening-transient\",\n", " \"25\":\"borderline; a lot of blinks (more towards end) and a few weird artifacts in the second half\",\n", " \"26\":\"fine; great, clean data!\",\n", " \"27\":\"exclude\",\n", " \"28\":\"missing\",\n", " \"29\":\"usable; slow opening-transient\",\n", " \"30\":\"fine\",\n", " \"31\":\"fine\",\n", " \"32\":\"fine\",\n", " \"33\":\"usable; many blinks in second half\",\n", " \"34\":\"fine\",\n", " \"35\":\"borderline; lots of blinks during later segments\",\n", " \"36\":\"borderline; esp near the end\",\n", " \"37\":\"borderline; blinks consist of many consecutive, short-blinks\",\n", " \"38\":\"missing\",\n", " \"39\":\"fine (after tuning)\",\n", " \"40\":\"borderline; many blinks throughout\",\n", " \"41\":\"fine\",\n", " \"42\":\"fine; has some of the weird triple-blinks\",\n", " \"43\":\"fine; missing from min 34 (last block?)\",\n", " \"44\":\"fine\",\n", " \"45\":\"exclude; many blinks throughout\",\n", " \"46\":\"usable\",\n", " \"47\":\"fine\",\n", " \"48\":\"missing\",\n", " \"49\":\"fine\",\n", " \"50\":\"fine\",\n", " \"51\":\"fine\",\n", " \"52\":\"fine\",\n", " \"53\":\"fine\",\n", " \"54\":\"fine\",\n", " \"55\":\"usable; borderline towards the end\",\n", " \"56\":\"usable, worse towards end\",\n", " \"57\":\"exclude\"\n", "}\n", "exclude_preproc=[\"5\",\"10\", \"15\", \"22\", \"27\",\"45\",\"57\"]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "## save table with preproc-parameters per subj\n", "df=pd.DataFrame(params).T\n", "df.index.name = 'subj'\n", "df.reset_index(inplace=True)\n", "df.subj=pd.to_numeric(df.subj)\n", "df.sort_values(\"subj\", inplace=True)\n", "#df.reset_index(inplace=True)\n", "df.to_csv(\"preproc_params.csv\")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "## save table with notes\n", "pd.DataFrame(notes.items(), columns=[\"subj\",\"notes\"]).to_csv(\"preproc_notes.csv\")" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "## write final prep-processed files for further analysis\n", "preprocs={subj:d for subj,d in preprocs.items() if subj not in exclude_preproc}\n", "pp.pd_write_pickle(preprocs, \"data/export/eye_preproc.pd\")" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "## save table with final summary of preprocs\n", "tab=pd.DataFrame([{**d.summary(), **{\"ncues\":len([lab for lab in d.event_labels if lab.startswith(\"C_\")]),\n", " \"nfb\":len([lab for lab in d.event_labels if lab.startswith(\"F_\")]),\n", " \"nresp\":len([lab for lab in d.event_labels if lab.startswith(\"R_\")]),\n", " \"subj\":d.name.split(\"_\")[0],\n", " \"note\":notes[d.name.split(\"_\")[0]]}} \n", " for d in preprocs.values()])\n", "#tab.drop(columns=[\"baseline_estimated\", \"response_estimated\"])\n", "tab[\"perc_interpolated\"]=tab.ninterpolated/tab.n*100.\n", "scols=[\"subj\",\"perc_miss\",\"perc_interpolated\",\"nevents\",\"blinks_per_min\",\"duration_minutes\",\"note\"]\n", "tab.subj=pd.to_numeric(tab.subj)\n", "\n", "tab.sort_values(\"subj\",inplace=True)\n", "tab[scols].to_csv(\"preproc_summary.csv\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }