Commit 847fa433 authored by Chanelle Lee's avatar Chanelle Lee
Browse files

Changing the noise set

parent 0f2d9445
......@@ -61,7 +61,7 @@
"outputs": [],
"source": [
"n = 5\n",
"qualities = {i: i/(n+1) for i in range(1, (n+1))}"
"qualities = {i: i/(n+1) for i in range(1, (n+1))} # i is the id of the site with corresponding quality"
]
},
{
......@@ -71,9 +71,27 @@
"outputs": [],
"source": [
"def integrand(x, i, j, s):\n",
" \"\"\"\n",
" x : float\n",
" point at which to evaluate the integrand\n",
" i : int\n",
" id of first site so the quality can be returned as the mean of the pdf\n",
" j : int\n",
" id of the second site so the quality can be returned as the mean of the cdf\n",
" s : float\n",
" sigma value to be tested\n",
" \"\"\"\n",
" return f(x, qualities[i], s)*F(x, qualities[j], s)\n",
"\n",
"def probConfusion(i, j, s):\n",
" \"\"\"\n",
" i : int\n",
" id of the first site to be confused (will normally always be 1)\n",
" j : int\n",
" id of the site to be confused with\n",
" s : float\n",
" sigma value to be tested\n",
" \"\"\"\n",
" return scipy.integrate.quad(integrand, -np.inf, np.inf, args=(i, j, s))"
]
},
......@@ -97,7 +115,7 @@
{
"data": {
"text/plain": [
"[<matplotlib.lines.Line2D at 0x22e0f4ad240>]"
"[<matplotlib.lines.Line2D at 0x96b7400>]"
]
},
"execution_count": 7,
......@@ -129,7 +147,7 @@
{
"data": {
"text/plain": [
"[<matplotlib.lines.Line2D at 0x22e0f548f28>]"
"[<matplotlib.lines.Line2D at 0x9753e48>]"
]
},
"execution_count": 8,
......@@ -322,1116 +340,550 @@
" <td>7.149700e-70</td>\n",
" <td>3.085665e-124</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" sigma q2 q3 q4 q5\n",
"0 1.000000e-07 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00\n",
"1 5.005105e-03 4.104751e-122 0.000000e+00 0.000000e+00 0.000000e+00\n",
"2 1.001011e-02 1.051201e-31 3.577518e-122 7.451264e-274 0.000000e+00\n",
"3 1.501511e-02 5.734691e-15 2.733803e-55 9.343211e-123 2.336542e-219\n",
"4 2.002012e-02 4.075678e-09 6.984412e-32 7.149700e-70 3.085665e-124"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Here want to get some idea of appropriate sigma values - how about checking the cut offs for having at least a 10% chance of confusion for all sites within a range:"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [],
"source": [
"testParams_DF = pd.DataFrame(columns=['sigma', 'alpha', 'note'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Range is whole quality space"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sigma</th>\n",
" <th>q2</th>\n",
" <th>q3</th>\n",
" <th>q4</th>\n",
" <th>q5</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>74</th>\n",
" <td>0.370370</td>\n",
" <td>0.375167</td>\n",
" <td>0.262259</td>\n",
" <td>0.169892</td>\n",
" <td>0.101546</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75</th>\n",
" <td>0.375375</td>\n",
" <td>0.376777</td>\n",
" <td>0.265031</td>\n",
" <td>0.173131</td>\n",
" <td>0.104590</td>\n",
" <th>5</th>\n",
" <td>2.502512e-02</td>\n",
" <td>1.242769e-06</td>\n",
" <td>4.746855e-21</td>\n",
" <td>2.142651e-45</td>\n",
" <td>3.304722e-80</td>\n",
" </tr>\n",
" <tr>\n",
" <th>76</th>\n",
" <td>0.380380</td>\n",
" <td>0.378347</td>\n",
" <td>0.267745</td>\n",
" <td>0.176322</td>\n",
" <td>0.107618</td>\n",
" <th>6</th>\n",
" <td>3.003013e-02</td>\n",
" <td>4.346773e-05</td>\n",
" <td>3.619482e-15</td>\n",
" <td>4.226558e-32</td>\n",
" <td>2.779073e-56</td>\n",
" </tr>\n",
" <tr>\n",
" <th>77</th>\n",
" <td>0.385385</td>\n",
" <td>0.379878</td>\n",
" <td>0.270401</td>\n",
" <td>0.179466</td>\n",
" <td>0.110627</td>\n",
" <th>7</th>\n",
" <td>3.503513e-02</td>\n",
" <td>3.843883e-04</td>\n",
" <td>1.271217e-11</td>\n",
" <td>4.388103e-24</td>\n",
" <td>7.574905e-42</td>\n",
" </tr>\n",
" <tr>\n",
" <th>78</th>\n",
" <td>0.390390</td>\n",
" <td>0.381372</td>\n",
" <td>0.273002</td>\n",
" <td>0.182563</td>\n",
" <td>0.113616</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" sigma q2 q3 q4 q5\n",
"74 0.370370 0.375167 0.262259 0.169892 0.101546\n",
"75 0.375375 0.376777 0.265031 0.173131 0.104590\n",
"76 0.380380 0.378347 0.267745 0.176322 0.107618\n",
"77 0.385385 0.379878 0.270401 0.179466 0.110627\n",
"78 0.390390 0.381372 0.273002 0.182563 0.113616"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc[(df['q2'] >= 0.1) & (df['q3'] >= 0.1) & (df['q4'] >= 0.1) & (df['q5'] >= 0.1)].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"$\\sigma = 0.37$\n",
"\n",
"- q2 : 0.38\n",
"- q3 : 0.26\n",
"- q4 : 0.17\n",
"- q5 : 0.10"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Range is q4 or closer"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sigma</th>\n",
" <th>q2</th>\n",
" <th>q3</th>\n",
" <th>q4</th>\n",
" <th>q5</th>\n",
" <th>8</th>\n",
" <td>4.004014e-02</td>\n",
" <td>1.623538e-03</td>\n",
" <td>2.550270e-09</td>\n",
" <td>7.022133e-19</td>\n",
" <td>1.806725e-32</td>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>56</th>\n",
" <td>0.280280</td>\n",
" <td>0.337069</td>\n",
" <td>0.200187</td>\n",
" <td>0.103577</td>\n",
" <td>0.046294</td>\n",
" <th>9</th>\n",
" <td>4.504514e-02</td>\n",
" <td>4.444560e-03</td>\n",
" <td>8.358074e-08</td>\n",
" <td>2.614756e-15</td>\n",
" <td>4.920100e-26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57</th>\n",
" <td>0.285285</td>\n",
" <td>0.339767</td>\n",
" <td>0.204346</td>\n",
" <td>0.107618</td>\n",
" <td>0.049227</td>\n",
" <th>10</th>\n",
" <td>5.005015e-02</td>\n",
" <td>9.269807e-03</td>\n",
" <td>1.242712e-06</td>\n",
" <td>9.464277e-13</td>\n",
" <td>1.989406e-21</td>\n",
" </tr>\n",
" <tr>\n",
" <th>58</th>\n",
" <td>0.290290</td>\n",
" <td>0.342380</td>\n",
" <td>0.208409</td>\n",
" <td>0.111625</td>\n",
" <td>0.052198</td>\n",
" <th>11</th>\n",
" <td>5.505515e-02</td>\n",
" <td>1.615310e-02</td>\n",
" <td>9.294307e-06</td>\n",
" <td>7.491474e-11</td>\n",
" <td>5.123919e-18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>59</th>\n",
" <td>0.295295</td>\n",
" <td>0.344911</td>\n",
" <td>0.212380</td>\n",
" <td>0.115597</td>\n",
" <td>0.055202</td>\n",
" <th>12</th>\n",
" <td>6.006016e-02</td>\n",
" <td>2.486855e-02</td>\n",
" <td>4.346655e-05</td>\n",
" <td>2.110726e-09</td>\n",
" <td>2.021150e-15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>60</th>\n",
" <td>0.300300</td>\n",
" <td>0.347365</td>\n",
" <td>0.216260</td>\n",
" <td>0.119531</td>\n",
" <td>0.058234</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" sigma q2 q3 q4 q5\n",
"56 0.280280 0.337069 0.200187 0.103577 0.046294\n",
"57 0.285285 0.339767 0.204346 0.107618 0.049227\n",
"58 0.290290 0.342380 0.208409 0.111625 0.052198\n",
"59 0.295295 0.344911 0.212380 0.115597 0.055202\n",
"60 0.300300 0.347365 0.216260 0.119531 0.058234"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc[(df['q2'] >= 0.1) & (df['q3'] >= 0.1) & (df['q4'] >= 0.1)].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"$\\sigma=0.28$\n",
"\n",
"- q2 : 0.34\n",
"- q3 : 0.20\n",
"- q4 : 0.10\n",
"- q5 : 0.04"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Range is q3 or closer"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sigma</th>\n",
" <th>q2</th>\n",
" <th>q3</th>\n",
" <th>q4</th>\n",
" <th>q5</th>\n",
" <th>13</th>\n",
" <td>6.506516e-02</td>\n",
" <td>3.504888e-02</td>\n",
" <td>1.458526e-04</td>\n",
" <td>2.757773e-08</td>\n",
" <td>2.125659e-13</td>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>37</th>\n",
" <td>0.185185</td>\n",
" <td>0.262259</td>\n",
" <td>0.101546</td>\n",
" <td>0.028119</td>\n",
" <td>0.005455</td>\n",
" <th>14</th>\n",
" <td>7.007017e-02</td>\n",
" <td>4.629396e-02</td>\n",
" <td>3.843816e-04</td>\n",
" <td>2.259262e-07</td>\n",
" <td>8.581276e-12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38</th>\n",
" <td>0.190190</td>\n",
" <td>0.267745</td>\n",
" <td>0.107618</td>\n",
" <td>0.031517</td>\n",
" <td>0.006595</td>\n",
" <th>15</th>\n",
" <td>7.507517e-02</td>\n",
" <td>5.823373e-02</td>\n",
" <td>8.460377e-04</td>\n",
" <td>1.242693e-06</td>\n",
" <td>1.703016e-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>0.195195</td>\n",
" <td>0.273002</td>\n",
" <td>0.113616</td>\n",
" <td>0.035049</td>\n",
" <td>0.007867</td>\n",
" <th>16</th>\n",
" <td>8.008018e-02</td>\n",
" <td>7.055579e-02</td>\n",
" <td>1.623519e-03</td>\n",
" <td>5.050674e-06</td>\n",
" <td>1.974383e-09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>40</th>\n",
" <td>0.200200</td>\n",
" <td>0.278043</td>\n",
" <td>0.119531</td>\n",
" <td>0.038698</td>\n",
" <td>0.009270</td>\n",
" <th>17</th>\n",
" <td>8.508518e-02</td>\n",
" <td>8.301159e-02</td>\n",
" <td>2.801166e-03</td>\n",
" <td>1.624395e-05</td>\n",
" <td>1.509052e-08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41</th>\n",
" <td>0.205205</td>\n",
" <td>0.282880</td>\n",
" <td>0.125357</td>\n",
" <td>0.042451</td>\n",
" <td>0.010803</td>\n",
" <th>18</th>\n",
" <td>9.009019e-02</td>\n",
" <td>9.541188e-02</td>\n",
" <td>4.444522e-03</td>\n",
" <td>4.346616e-05</td>\n",
" <td>8.357811e-08</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" sigma q2 q3 q4 q5\n",
"37 0.185185 0.262259 0.101546 0.028119 0.005455\n",
"38 0.190190 0.267745 0.107618 0.031517 0.006595\n",
"39 0.195195 0.273002 0.113616 0.035049 0.007867\n",
"40 0.200200 0.278043 0.119531 0.038698 0.009270\n",
"41 0.205205 0.282880 0.125357 0.042451 0.010803"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc[(df['q2'] >= 0.1) & (df['q3'] >= 0.1)].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"$\\sigma = 0.19$\n",
"\n",
"- q2 : 0.26\n",
"- q3 : 0.10\n",
"- q4 : 0.03\n",
"- q5 : 0.01"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Range is just q2"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sigma</th>\n",
" <th>q2</th>\n",
" <th>q3</th>\n",
" <th>q4</th>\n",
" <th>q5</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>0.095095</td>\n",
" <td>0.107618</td>\n",
" <td>0.006595</td>\n",
" <td>0.000100</td>\n",
" <td>9.509519e-02</td>\n",
" <td>1.076179e-01</td>\n",
" <td>6.595090e-03</td>\n",
" <td>1.004472e-04</td>\n",
" <td>3.576083e-07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>0.100100</td>\n",
" <td>0.119532</td>\n",
" <td>0.009270</td>\n",
" <td>0.000206</td>\n",
" <td>1.001002e-01</td>\n",
" <td>1.195316e-01</td>\n",
" <td>9.269748e-03</td>\n",
" <td>2.062186e-04</td>\n",
" <td>1.242683e-06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>0.105105</td>\n",
" <td>0.131087</td>\n",
" <td>0.012463</td>\n",
" <td>0.000384</td>\n",
" <td>1.051052e-01</td>\n",
" <td>1.310868e-01</td>\n",
" <td>1.246336e-02</td>\n",
" <td>3.843794e-04</td>\n",
" <td>3.644447e-06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>0.110110</td>\n",
" <td>0.142242</td>\n",
" <td>0.016153</td>\n",
" <td>0.000662</td>\n",
" <td>1.101102e-01</td>\n",
" <td>1.422418e-01</td>\n",
" <td>1.615303e-02</td>\n",
" <td>6.615889e-04</td>\n",
" <td>9.294145e-06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>0.115115</td>\n",
" <td>0.152973</td>\n",
" <td>0.020303</td>\n",
" <td>0.001066</td>\n",
" <td>1.151152e-01</td>\n",
" <td>1.529727e-01</td>\n",
" <td>2.030286e-02</td>\n",
" <td>1.065643e-03</td>\n",
" <td>2.110231e-05</td>\n",
" </tr>\n",