Commit 755f1241 authored by Chanelle Lee's avatar Chanelle Lee
Browse files

Merge branch 'master' of git.brl.ac.uk:c53-lee/BotSimBestOfN

Made some logging changes on dEesktop but forgot to pull laptop changes
parents 14498068 71213f44
This diff is collapsed.
This diff is collapsed.
......@@ -1247,18 +1247,87 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 101,
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" sigma alpha note\n",
"paramSet \n",
"1 0.370370 0.375167 Prob_q2\n",
"2 0.280280 0.337069 Prob_q2\n",
"3 0.185185 0.262259 Prob_q2\n",
"4 0.095095 0.107618 Prob_q2\n",
"5 0.370370 0.227216 mean\n",
"6 0.280280 0.171782 mean\n",
"7 0.185185 0.099345 mean\n",
"8 0.095095 0.028578 mean\n",
"9 0.700701 0.433217 Prob_q2\n",
"10 0.525526 0.411280 Prob_q2\n",
"11 0.350350 0.368292 Prob_q2\n",
"12 0.175175 0.250550 Prob_q2\n",
"13 0.700701 0.339747 mean\n",
"14 0.525526 0.293395 mean\n",
"15 0.350350 0.216131 mean\n",
"16 0.175175 0.091280 mean\n"
]
}
],
"source": [
"testParams_DF['paramSet'] = range(1, len(testParams_DF)+1)\n",
"testParams_DF = testParams_DF.set_index('paramSet')\n",
"print(testParams_DF)"
]
},
{
"cell_type": "code",
"execution_count": 68,
"execution_count": 102,
"metadata": {},
"outputs": [],
"source": [
"testParams_DF.reset.to_csv('testParams.csv')"
"testParams_DF.to_csv('testParams.csv')"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.37037046296296294"
]
},
"execution_count": 103,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"testParams_DF.iloc[0]['sigma']"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}"
]
},
"execution_count": 104,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"set(testParams_DF.index.values) "
]
},
{
......
%% Cell type:code id: tags:
``` python
import numpy as np
import scipy
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
```
%% Cell type:markdown id: tags:
# Analysis of the probability of confusing qualities for different values of sigma
%% Cell type:markdown id: tags:
First need the pdf and cdf of the normal distributions which will represent the noise around each of the quality values
%% Cell type:code id: tags:
``` python
def f(x, mu, s):
return scipy.stats.norm.pdf(x, loc=mu, scale=s)
```
%% Cell type:code id: tags:
``` python
def F(x, mu, s):
return scipy.stats.norm.cdf(x, loc=mu, scale=s)
```
%% Cell type:markdown id: tags:
Qualities are set in the interval (0,1) and equally spaced
%% Cell type:code id: tags:
``` python
n = 5
qualities = {i: i/(n+1) for i in range(1, (n+1))}
```
%% Cell type:code id: tags:
``` python
def integrand(x, i, j, s):
return f(x, qualities[i], s)*F(x, qualities[j], s)
def probConfusion(i, j, s):
return scipy.integrate.quad(integrand, -np.inf, np.inf, args=(i, j, s))
```
%% Cell type:code id: tags:
``` python
s0 = 0.0000001
sf = 100
ss = np.linspace(s0, sf, 1000)
confusions = [probConfusion(1, 2, s)[0] for s in ss]
```
%% Cell type:code id: tags:
``` python
plt.plot(ss, confusions)
```
%%%% Output: execute_result
[<matplotlib.lines.Line2D at 0x2140dc6d9b0>]
%%%% Output: display_data
[Hidden Image Output]
%% Cell type:code id: tags:
``` python
ss = np.linspace(0.0000001, 10, 1000)
confusions = [probConfusion(1, 2, s)[0] for s in ss]
plt.plot(ss, confusions)
```
%%%% Output: execute_result
[<matplotlib.lines.Line2D at 0x2140dd105f8>]
%%%% Output: display_data
[Hidden Image Output]
%% Cell type:code id: tags:
``` python
ss = np.linspace(0.0000001, 10, 1000)
confusions = [[probConfusion(1, j, s)[0] for s in ss] for j in [2, 3, 4, 5]]
for confusion in confusions:
plt.plot(ss, confusion)
```
%%%% Output: display_data
[Hidden Image Output]
%% Cell type:code id: tags:
``` python
ss = np.linspace(0.0000001, 10, 1000)
confusions = [[probConfusion(1, j, s)[0] for s in ss] for j in [2, 3, 4, 5]]
for confusion in confusions:
plt.plot(ss, confusion)
```
%%%% Output: display_data
[Hidden Image Output]
%% Cell type:code id: tags:
``` python
ss = np.linspace(0.0000001, 5, 1000)
confusions = [[probConfusion(1, j, s)[0] for s in ss] for j in [2, 3, 4, 5]]
for confusion in confusions:
plt.plot(ss, confusion)
```
%%%% Output: display_data
[Hidden Image Output]
%% Cell type:code id: tags:
``` python
d = {'sigma':ss, **{'q{}'.format(j): confusions[i] for i, j in enumerate([2, 3, 4, 5])}}
```
%% Cell type:code id: tags:
``` python
df = pd.DataFrame(data=d)
```
%% Cell type:code id: tags:
``` python
df.head()
```
%%%% Output: execute_result
sigma q2 q3 q4 q5
0 1.000000e-07 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
1 5.005105e-03 4.104751e-122 0.000000e+00 0.000000e+00 0.000000e+00
2 1.001011e-02 1.051201e-31 3.577518e-122 7.451264e-274 0.000000e+00
3 1.501511e-02 5.734691e-15 2.733803e-55 9.343211e-123 2.336542e-219
4 2.002012e-02 4.075678e-09 6.984412e-32 7.149700e-70 3.085665e-124
%% Cell type:markdown id: tags:
### Here want to get some idea of appropriate sigma values - how about checking the cut offs for having at least a 10% chance of confusion for all sites within a range:
%% Cell type:code id: tags:
``` python
testParams_DF = pd.DataFrame(columns=['sigma', 'alpha', 'note'])
```
%% Cell type:markdown id: tags:
Range is whole quality space
%% Cell type:code id: tags:
``` python
df.loc[(df['q2'] >= 0.1) & (df['q3'] >= 0.1) & (df['q4'] >= 0.1) & (df['q5'] >= 0.1)].head()
```
%%%% Output: execute_result
sigma q2 q3 q4 q5
74 0.370370 0.375167 0.262259 0.169892 0.101546
75 0.375375 0.376777 0.265031 0.173131 0.104590
76 0.380380 0.378347 0.267745 0.176322 0.107618
77 0.385385 0.379878 0.270401 0.179466 0.110627
78 0.390390 0.381372 0.273002 0.182563 0.113616
%% Cell type:markdown id: tags:
$\sigma = 0.37$
- q2 : 0.38
- q3 : 0.26
- q4 : 0.17
- q5 : 0.10
%% Cell type:markdown id: tags:
Range is q4 or closer
%% Cell type:code id: tags:
``` python
df.loc[(df['q2'] >= 0.1) & (df['q3'] >= 0.1) & (df['q4'] >= 0.1)].head()
```
%%%% Output: execute_result
sigma q2 q3 q4 q5
56 0.280280 0.337069 0.200187 0.103577 0.046294
57 0.285285 0.339767 0.204346 0.107618 0.049227
58 0.290290 0.342380 0.208409 0.111625 0.052198
59 0.295295 0.344911 0.212380 0.115597 0.055202
60 0.300300 0.347365 0.216260 0.119531 0.058234
%% Cell type:markdown id: tags:
$\sigma=0.28$
- q2 : 0.34
- q3 : 0.20
- q4 : 0.10
- q5 : 0.04
%% Cell type:markdown id: tags:
Range is q3 or closer
%% Cell type:code id: tags:
``` python
df.loc[(df['q2'] >= 0.1) & (df['q3'] >= 0.1)].head()
```
%%%% Output: execute_result
sigma q2 q3 q4 q5
37 0.185185 0.262259 0.101546 0.028119 0.005455
38 0.190190 0.267745 0.107618 0.031517 0.006595
39 0.195195 0.273002 0.113616 0.035049 0.007867
40 0.200200 0.278043 0.119531 0.038698 0.009270
41 0.205205 0.282880 0.125357 0.042451 0.010803
%% Cell type:markdown id: tags:
$\sigma = 0.19$
- q2 : 0.26
- q3 : 0.10
- q4 : 0.03
- q5 : 0.01
%% Cell type:markdown id: tags:
Range is just q2
%% Cell type:code id: tags:
``` python
df.loc[(df['q2'] >= 0.1)].head()
```
%%%% Output: execute_result
sigma q2 q3 q4 q5
19 0.095095 0.107618 0.006595 0.000100 3.576083e-07
20 0.100100 0.119532 0.009270 0.000206 1.242683e-06
21 0.105105 0.131087 0.012463 0.000384 3.644447e-06
22 0.110110 0.142242 0.016153 0.000662 9.294145e-06
23 0.115115 0.152973 0.020303 0.001066 2.110231e-05
%% Cell type:markdown id: tags:
$\sigma = 0.1$
- q2 : 0.11
- q3 : 0.01
- q4 : 0.00
- q5 : 0.00
%% Cell type:code id: tags:
``` python
sns.lineplot(data=df.set_index('sigma'))
```
%%%% Output: execute_result
<matplotlib.axes._subplots.AxesSubplot at 0x2140ed8b860>
%%%% Output: display_data
[Hidden Image Output]
%% Cell type:code id: tags:
``` python
sns.lineplot(data=df.where(df['sigma'] <= 1.0).set_index('sigma'))
# plotting the values of sigma
plt.axvline(0.1, alpha=0.3, color='grey')
plt.axvline(0.19, alpha=0.3, color='grey')
plt.axvline(0.28, alpha=0.3, color='grey')
plt.axvline(0.37, alpha=0.3, color='grey')
# plotting P = 0.1
plt.axhline(0.1, alpha=0.3, color='grey')
```
%%%% Output: execute_result
<matplotlib.lines.Line2D at 0x2140f13ad30>
%%%% Output: display_data
[Hidden Image Output]
%% Cell type:code id: tags:
``` python
desProb = 0.1
range_all = df[(df['q2'] >= desProb) & (df['q3'] >= desProb) & (df['q4'] >= desProb) & (df['q5'] >= desProb)].iloc[0]
range_q4 = df[(df['q2'] >= desProb) & (df['q3'] >= desProb) & (df['q4'] >= desProb)].iloc[0]
range_q3 = df[(df['q2'] >= desProb) & (df['q3'] >= desProb)].iloc[0]
range_q2 = df[(df['q2'] >= desProb)].iloc[0]
df_1Ranges = pd.DataFrame(data = [range_all, range_q4, range_q3, range_q2])
```
%% Cell type:code id: tags:
``` python
df_1Ranges
```
%%%% Output: execute_result
sigma q2 q3 q4 q5
74 0.370370 0.375167 0.262259 0.169892 1.015460e-01
56 0.280280 0.337069 0.200187 0.103577 4.629378e-02
37 0.185185 0.262259 0.101546 0.028119 5.454770e-03
19 0.095095 0.107618 0.006595 0.000100 3.576083e-07
%% Cell type:code id: tags:
``` python
testParams_DF = testParams_DF.append([{'sigma': row[1], 'alpha': row[2], 'note': 'Prob_q2'} for row in df_1Ranges.itertuples()])
```
%% Cell type:code id: tags:
``` python
alphas_1 = [np.mean(row[2:]) for row in df_1Ranges.itertuples()]
alphas_1
```
%%%% Output: execute_result
[0.22721601881269943,
0.17178190793707013,
0.09934474874734342,
0.028578442773621412]
%% Cell type:code id: tags:
``` python
testParams_DF = testParams_DF.append([{'sigma': row[1], 'alpha': alphas_1[i], 'note': 'mean'}
for i, row in enumerate(df_1Ranges.itertuples())])
```
%% Cell type:markdown id: tags:
### Here want to get some idea of appropriate sigma values - how about checking the cut offs for having at least a 25% chance of confusion for all sites within a range:
%% Cell type:code id: tags:
``` python
desProb = 0.25
range_all = df[(df['q2'] >= desProb) & (df['q3'] >= desProb) & (df['q4'] >= desProb) & (df['q5'] >= desProb)].iloc[0]
range_q4 = df[(df['q2'] >= desProb) & (df['q3'] >= desProb) & (df['q4'] >= desProb)].iloc[0]
range_q3 = df[(df['q2'] >= desProb) & (df['q3'] >= desProb)].iloc[0]
range_q2 = df[(df['q2'] >= desProb)].iloc[0]
df_25Ranges = pd.DataFrame(data = [range_all, range_q4, range_q3, range_q2])
```
%% Cell type:code id: tags:
``` python
df_25Ranges
```
%%%% Output: execute_result
sigma q2 q3 q4 q5
140 0.700701 0.433217 0.368292 0.306930 0.250550
105 0.525526 0.411280 0.326893 0.250550 0.184855
70 0.350350 0.368292 0.250550 0.156453 0.089228
35 0.175175 0.250550 0.089228 0.021781 0.003561
%% Cell type:code id: tags:
``` python
sns.lineplot(data=df.where(df['sigma'] <= 1.0).set_index('sigma'))
# plotting the values of sigma
plt.axvline(df_25Ranges['sigma'].iloc[0], alpha=0.3, color='grey')
plt.axvline(df_25Ranges['sigma'].iloc[1], alpha=0.3, color='grey')
plt.axvline(df_25Ranges['sigma'].iloc[2], alpha=0.3, color='grey')
plt.axvline(df_25Ranges['sigma'].iloc[3], alpha=0.3, color='grey')
# plotting P = 0.25
plt.axhline(desProb, alpha=0.3, color='grey')
```
%%%% Output: execute_result
<matplotlib.lines.Line2D at 0x2140f873a20>
%%%% Output: display_data
[Hidden Image Output]
%% Cell type:code id: tags:
``` python
testParams_DF = testParams_DF.append([{'sigma': row[1], 'alpha': row[2], 'note': 'Prob_q2'} for row in df_25Ranges.itertuples()])
```
%% Cell type:code id: tags:
``` python
alphas_25 = [np.mean(row[2:]) for row in df_25Ranges.itertuples()]
```
%% Cell type:code id: tags:
``` python
testParams_DF = testParams_DF.append([{'sigma': row[1], 'alpha': alphas_25[i], 'note': 'mean'}
for i, row in enumerate(df_25Ranges.itertuples())])
```
%% Cell type:code id: tags:
``` python
print(testParams_DF)
```
%%%% Output: stream
sigma alpha note
0 0.370370 0.375167 Prob_q2
1 0.280280 0.337069 Prob_q2
2 0.185185 0.262259 Prob_q2
3 0.095095 0.107618 Prob_q2
0 0.370370 0.227216 mean
1 0.280280 0.171782 mean
2 0.185185 0.099345 mean
3 0.095095 0.028578 mean
0 0.700701 0.433217 Prob_q2
1 0.525526 0.411280 Prob_q2
2 0.350350 0.368292 Prob_q2
3 0.175175 0.250550 Prob_q2
0 0.700701 0.339747 mean
1 0.525526 0.293395 mean
2 0.350350 0.216131 mean
3 0.175175 0.091280 mean
%% Cell type:code id: tags:
``` python
testParams_DF['paramSet'] = range(1, len(testParams_DF)+1)
testParams_DF = testParams_DF.set_index('paramSet')
print(testParams_DF)
```
%%%% Output: stream
sigma alpha note
paramSet
1 0.370370 0.375167 Prob_q2
2 0.280280 0.337069 Prob_q2
3 0.185185 0.262259 Prob_q2
4 0.095095 0.107618 Prob_q2
5 0.370370 0.227216 mean
6 0.280280 0.171782 mean
7 0.185185 0.099345 mean
8 0.095095 0.028578 mean
9 0.700701 0.433217 Prob_q2
10 0.525526 0.411280 Prob_q2
11 0.350350 0.368292 Prob_q2
12 0.175175 0.250550 Prob_q2
13 0.700701 0.339747 mean
14 0.525526 0.293395 mean
15 0.350350 0.216131 mean
16 0.175175 0.091280 mean
%% Cell type:code id: tags:
``` python
testParams_DF.to_csv('testParams.csv')
```
%% Cell type:code id: tags:
``` python
testParams_DF.iloc[0]['sigma']
```
%%%% Output: execute_result
0.37037046296296294
%% Cell type:code id: tags:
``` python
testParams_DF.reset.to_csv('testParams.csv')
set(testParams_DF.index.values)
```
%%%% Output: execute_result
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
%% Cell type:code id: tags:
``` python
```
......
This diff is collapsed.
This diff is collapsed.
......@@ -1247,43 +1247,43 @@
},
{
"cell_type": "code",
"execution_count": 85,
"execution_count": 101,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" sigma alpha note\n",
"id \n",
"1 0.370370 0.375167 Prob_q2\n",
"2 0.280280 0.337069 Prob_q2\n",
"3 0.185185 0.262259 Prob_q2\n",
"4 0.095095 0.107618 Prob_q2\n",
"5 0.370370 0.227216 mean\n",
"6 0.280280 0.171782 mean\n",
"7 0.185185 0.099345 mean\n",
"8 0.095095 0.028578 mean\n",
"9 0.700701 0.433217 Prob_q2\n",
"10 0.525526 0.411280 Prob_q2\n",
"11 0.350350 0.368292 Prob_q2\n",
"12 0.175175 0.250550 Prob_q2\n",
"13 0.700701 0.339747 mean\n",
"14 0.525526 0.293395 mean\n",
"15 0.350350 0.216131 mean\n",
"16 0.175175 0.091280 mean\n"
" sigma alpha note\n",
"paramSet \n",
"1 0.370370 0.375167 Prob_q2\n",
"2 0.280280 0.337069 Prob_q2\n",
"3 0.185185 0.262259 Prob_q2\n",
"4 0.095095 0.107618 Prob_q2\n",
"5 0.370370 0.227216 mean\n",
"6 0.280280 0.171782 mean\n",
"7 0.185185 0.099345 mean\n",
"8 0.095095 0.028578 mean\n",
"9 0.700701 0.433217 Prob_q2\n",
"10 0.525526 0.411280 Prob_q2\n",
"11 0.350350 0.368292 Prob_q2\n",
"12 0.175175 0.250550 Prob_q2\n",
"13 0.700701 0.339747 mean\n",
"14 0.525526 0.293395 mean\n",
"15 0.350350 0.216131 mean\n",
"16 0.175175 0.091280 mean\n"
]
}
],
"source": [
"testParams_DF['id'] = range(1, len(testParams_DF)+1)\n",
"testParams_DF = testParams_DF.set_index('id')\n",
"testParams_DF['paramSet'] = range(1, len(testParams_DF)+1)\n",
"testParams_DF = testParams_DF.set_index('paramSet')\n",
"print(testParams_DF)"
]
},
{
"cell_type": "code",
"execution_count": 86,
"execution_count": 102,
"metadata": {},
"outputs": [],
"source": [
......@@ -1292,7 +1292,7 @@
},
{
"cell_type": "code",
"execution_count": 96,
"execution_count": 103,
"metadata": {},
"outputs": [
{
......@@ -1301,7 +1301,7 @@
"0.37037046296296294"
]
},
"execution_count": 96,
"execution_count": 103,
"metadata": {},
"output_type": "execute_result"
}
......@@ -1312,7 +1312,7 @@
},
{
"cell_type": "code",
"execution_count": 99,
"execution_count": 104,
"metadata": {},
"outputs": [
{
......@@ -1321,7 +1321,7 @@
"{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}"
]
},
"execution_count": 99,
"execution_count": 104,
"metadata": {},
"output_type": "execute_result"
}
......
%% Cell type:code id: tags:
``` python
import numpy as np
import scipy
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
```
%% Cell type:markdown id: tags:
# Analysis of the probability of confusing qualities for different values of sigma
%% Cell type:markdown id: tags:
First need the pdf and cdf of the normal distributions which will represent the noise around each of the quality values
%% Cell type:code id: tags:
``` python
def f(x, mu, s):
return scipy.stats.norm.pdf(x, loc=mu, scale=s)
```
%% Cell type:code id: tags:
``` python
def F(x, mu, s):
return scipy.stats.norm.cdf(x, loc=mu, scale=s)
```
%% Cell type:markdown id: tags:
Qualities are set in the interval (0,1) and equally spaced
%% Cell type:code id: tags:
``` python
n = 5
qualities = {i: i/(n+1) for i in range(1, (n+1))}
```
%% Cell type:code id: tags:
``` python
def integrand(x, i, j, s):
return f(x, qualities[i], s)*F(x, qualities[j], s)
def probConfusion(i, j, s):
return scipy.integrate.quad(integrand, -np.inf, np.inf, args=(i, j, s))
```
%% Cell type:code id: tags:
``` python
s0 = 0.0000001
sf = 100
ss = np.linspace(s0, sf, 1000)
confusions = [probConfusion(1, 2, s)[0] for s in ss]
```
%% Cell type:code id: tags:
``` python
plt.plot(ss, confusions)
```
%%%% Output: execute_result
[<matplotlib.lines.Line2D at 0x2140dc6d9b0>]
%%%% Output: display_data
[Hidden Image Output]
%% Cell type:code id: tags:
``` python
ss = np.linspace(0.0000001, 10, 1000)
confusions = [probConfusion(1, 2, s)[0] for s in ss]
plt.plot(ss, confusions)
```
%%%% Output: execute_result
[<matplotlib.lines.Line2D at 0x2140dd105f8>]
%%%% Output: display_data
[Hidden Image Output]
%% Cell type:code id: tags:
``` python
ss = np.linspace(0.0000001, 10, 1000)
confusions = [[probConfusion(1, j, s)[0] for s in ss] for j in [2, 3, 4, 5]]
for confusion in confusions:
plt.plot(ss, confusion)
```
%%%% Output: display_data
[Hidden Image Output]
%% Cell type:code id: tags:
``` python
ss = np.linspace(0.0000001, 10, 1000)
confusions = [[probConfusion(1, j, s)[0] for s in ss] for j in [2, 3, 4, 5]]
for confusion in confusions:
plt.plot(ss, confusion)
```
%%%% Output: display_data
[Hidden Image Output]
%% Cell type:code id: tags:
``` python
ss = np.linspace(0.0000001, 5, 1000)
confusions = [[probConfusion(1, j, s)[0] for s in ss] for j in [2, 3, 4, 5]]
for confusion in confusions:
plt.plot(ss, confusion)
```
%%%% Output: display_data
[Hidden Image Output]
%% Cell type:code id: tags:
``` python
d = {'sigma':ss, **{'q{}'.format(j): confusions[i] for i, j in enumerate([2, 3, 4, 5])}}
```
%% Cell type:code id: tags:
``` python
df = pd.DataFrame(data=d)
```
%% Cell type:code id: tags:
``` python
df.head()
```
%%%% Output: execute_result
sigma q2 q3 q4 q5
0 1.000000e-07 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
1 5.005105e-03 4.104751e-122 0.000000e+00 0.000000e+00 0.000000e+00
2 1.001011e-02 1.051201e-31 3.577518e-122 7.451264e-274 0.000000e+00
3 1.501511e-02 5.734691e-15 2.733803e-55 9.343211e-123 2.336542e-219
4 2.002012e-02 4.075678e-09 6.984412e-32 7.149700e-70 3.085665e-124
%% Cell type:markdown id: tags:
### Here want to get some idea of appropriate sigma values - how about checking the cut offs for having at least a 10% chance of confusion for all sites within a range:
%% Cell type:code id: tags:
``` python
testParams_DF = pd.DataFrame(columns=['sigma', 'alpha', 'note'])
```
%% Cell type:markdown id: tags:
Range is whole quality space
%% Cell type:code id: tags:
``` python
df.loc[(df['q2'] >= 0.1) & (df['q3'] >= 0.1) & (df['q4'] >= 0.1) & (df['q5'] >= 0.1)].head()
```
%%%% Output: execute_result
sigma q2 q3 q4 q5
74 0.370370 0.375167 0.262259 0.169892 0.101546
75 0.375375 0.376777 0.265031 0.173131 0.104590
76 0.380380 0.378347 0.267745 0.176322 0.107618
77 0.385385 0.379878 0.270401 0.179466 0.110627
78 0.390390 0.381372 0.273002 0.182563 0.113616
%% Cell type:markdown id: tags:
$\sigma = 0.37$
- q2 : 0.38
- q3 : 0.26
- q4 : 0.17
- q5 : 0.10
%% Cell type:markdown id: tags:
Range is q4 or closer
%% Cell type:code id: tags:
``` python
df.loc[(df['q2'] >= 0.1) & (df['q3'] >= 0.1) & (df['q4'] >= 0.1)].head()
```
%%%% Output: execute_result
sigma q2 q3 q4 q5
56 0.280280 0.337069 0.200187 0.103577 0.046294
57 0.285285 0.339767 0.204346 0.107618 0.049227
58 0.290290 0.342380 0.208409 0.111625 0.052198
59 0.295295 0.344911 0.212380 0.115597 0.055202
60 0.300300 0.347365 0.216260 0.119531 0.058234
%% Cell type:markdown id: tags:
$\sigma=0.28$
- q2 : 0.34
- q3 : 0.20
- q4 : 0.10
- q5 : 0.04
%% Cell type:markdown id: tags:
Range is q3 or closer
%% Cell type:code id: tags:
``` python
df.loc[(df['q2'] >= 0.1) & (df['q3'] >= 0.1)].head()
```
%%%% Output: execute_result
sigma q2 q3 q4 q5
37 0.185185 0.262259 0.101546 0.028119 0.005455
38 0.190190 0.267745 0.107618 0.031517 0.006595
39 0.195195 0.273002 0.113616 0.035049 0.007867
40 0.200200 0.278043 0.119531 0.038698 0.009270
41 0.205205 0.282880 0.125357 0.042451 0.010803
%% Cell type:markdown id: tags:
$\sigma = 0.19$
- q2 : 0.26
- q3 : 0.10
- q4 : 0.03
- q5 : 0.01
%% Cell type:markdown id: tags:
Range is just q2
%% Cell type:code id: tags:
``` python
df.loc[(df['q2'] >= 0.1)].head()
```
%%%% Output: execute_result
sigma q2 q3 q4 q5
19 0.095095 0.107618 0.006595 0.000100 3.576083e-07
20 0.100100 0.119532 0.009270 0.000206 1.242683e-06
21 0.105105 0.131087 0.012463 0.000384 3.644447e-06
22 0.110110 0.142242 0.016153 0.000662 9.294145e-06
23 0.115115 0.152973 0.020303 0.001066 2.110231e-05
%% Cell type:markdown id: tags:
$\sigma = 0.1$
- q2 : 0.11
- q3 : 0.01
- q4 : 0.00
- q5 : 0.00
%% Cell type:code id: tags:
``` python
sns.lineplot(data=df.set_index('sigma'))
```
%%%% Output: execute_result
<matplotlib.axes._subplots.AxesSubplot at 0x2140ed8b860>
%%%% Output: display_data
[Hidden Image Output]
%% Cell type:code id: tags:
``` python
sns.lineplot(data=df.where(df['sigma'] <= 1.0).set_index('sigma'))
# plotting the values of sigma
plt.axvline(0.1, alpha=0.3, color='grey')
plt.axvline(0.19, alpha=0.3, color='grey')
plt.axvline(0.28, alpha=0.3, color='grey')
plt.axvline(0.37, alpha=0.3, color='grey')
# plotting P = 0.1
plt.axhline(0.1, alpha=0.3, color='grey')
```
%%%% Output: execute_result
<matplotlib.lines.Line2D at 0x2140f13ad30>
%%%% Output: display_data
[Hidden Image Output]
%% Cell type:code id: tags:
``` python
desProb = 0.1
range_all = df[(df['q2'] >= desProb) & (df['q3'] >= desProb) & (df['q4'] >= desProb) & (df['q5'] >= desProb)].iloc[0]
range_q4 = df[(df['q2'] >= desProb) & (df['q3'] >= desProb) & (df['q4'] >= desProb)].iloc[0]
range_q3 = df[(df['q2'] >= desProb) & (df['q3'] >= desProb)].iloc[0]
range_q2 = df[(df['q2'] >= desProb)].iloc[0]
df_1Ranges = pd.DataFrame(data = [range_all, range_q4, range_q3, range_q2])
```
%% Cell type:code id: tags:
``` python
df_1Ranges
```
%%%% Output: execute_result
sigma q2 q3 q4 q5
74 0.370370 0.375167 0.262259 0.169892 1.015460e-01
56 0.280280 0.337069 0.200187 0.103577 4.629378e-02
37 0.185185 0.262259 0.101546 0.028119 5.454770e-03
19 0.095095 0.107618 0.006595 0.000100 3.576083e-07
%% Cell type:code id: tags:
``` python
testParams_DF = testParams_DF.append([{'sigma': row[1], 'alpha': row[2], 'note': 'Prob_q2'} for row in df_1Ranges.itertuples()])
```
%% Cell type:code id: tags:
``` python
alphas_1 = [np.mean(row[2:]) for row in df_1Ranges.itertuples()]
alphas_1
```
%%%% Output: execute_result
[0.22721601881269943,
0.17178190793707013,
0.09934474874734342,
0.028578442773621412]
%% Cell type:code id: tags:
``` python
testParams_DF = testParams_DF.append([{'sigma': row[1], 'alpha': alphas_1[i], 'note': 'mean'}
for i, row in enumerate(df_1Ranges.itertuples())])
```
%% Cell type:markdown id: tags:
### Here want to get some idea of appropriate sigma values - how about checking the cut offs for having at least a 25% chance of confusion for all sites within a range:
%% Cell type:code id: tags:
``` python
desProb = 0.25
range_all = df[(df['q2'] >= desProb) & (df['q3'] >= desProb) & (df['q4'] >= desProb) & (df['q5'] >= desProb)].iloc[0]
range_q4 = df[(df['q2'] >= desProb) & (df['q3'] >= desProb) & (df['q4'] >= desProb)].iloc[0]
range_q3 = df[(df['q2'] >= desProb) & (df['q3'] >= desProb)].iloc[0]
range_q2 = df[(df['q2'] >= desProb)].iloc[0]
df_25Ranges = pd.DataFrame(data = [range_all, range_q4, range_q3, range_q2])
```
%% Cell type:code id: tags:
``` python
df_25Ranges
```
%%%% Output: execute_result
sigma q2 q3 q4 q5
140 0.700701 0.433217 0.368292 0.306930 0.250550
105 0.525526 0.411280 0.326893 0.250550 0.184855
70 0.350350 0.368292 0.250550 0.156453 0.089228
35 0.175175 0.250550 0.089228 0.021781 0.003561
%% Cell type:code id: tags:
``` python
sns.lineplot(data=df.where(df['sigma'] <= 1.0).set_index('sigma'))
# plotting the values of sigma
plt.axvline(df_25Ranges['sigma'].iloc[0], alpha=0.3, color='grey')
plt.axvline(df_25Ranges['sigma'].iloc[1], alpha=0.3, color='grey')
plt.axvline(df_25Ranges['sigma'].iloc[2], alpha=0.3, color='grey')
plt.axvline(df_25Ranges['sigma'].iloc[3], alpha=0.3, color='grey')
# plotting P = 0.25
plt.axhline(desProb, alpha=0.3, color='grey')
```
%%%% Output: execute_result
<matplotlib.lines.Line2D at 0x2140f873a20>
%%%% Output: display_data
[Hidden Image Output]
%% Cell type:code id: tags:
``` python
testParams_DF = testParams_DF.append([{'sigma': row[1], 'alpha': row[2], 'note': 'Prob_q2'} for row in df_25Ranges.itertuples()])
```
%% Cell type:code id: tags:
``` python
alphas_25 = [np.mean(row[2:]) for row in df_25Ranges.itertuples()]
```
%% Cell type:code id: tags:
``` python
testParams_DF = testParams_DF.append([{'sigma': row[1], 'alpha': alphas_25[i], 'note': 'mean'}
for i, row in enumerate(df_25Ranges.itertuples())])
```
%% Cell type:code id: tags:
``` python
print(testParams_DF)
```
%%%% Output: stream
sigma alpha note
0 0.370370 0.375167 Prob_q2
1 0.280280 0.337069 Prob_q2
2 0.185185 0.262259 Prob_q2
3 0.095095 0.107618 Prob_q2
0 0.370370 0.227216 mean
1 0.280280 0.171782 mean
2 0.185185 0.099345 mean
3 0.095095 0.028578 mean
0 0.700701 0.433217 Prob_q2
1 0.525526 0.411280 Prob_q2
2 0.350350 0.368292 Prob_q2
3 0.175175 0.250550 Prob_q2
0 0.700701 0.339747 mean
1 0.525526 0.293395 mean
2 0.350350 0.216131 mean
3 0.175175 0.091280 mean
%% Cell type:code id: tags:
``` python
testParams_DF['id'] = range(1, len(testParams_DF)+1)
testParams_DF = testParams_DF.set_index('id')
testParams_DF['paramSet'] = range(1, len(testParams_DF)+1)
testParams_DF = testParams_DF.set_index('paramSet')
print(testParams_DF)
```
%%%% Output: stream
sigma alpha note
id
1 0.370370 0.375167 Prob_q2
2 0.280280 0.337069 Prob_q2
3 0.185185 0.262259 Prob_q2
4 0.095095 0.107618 Prob_q2
5 0.370370 0.227216 mean
6 0.280280 0.171782 mean
7 0.185185 0.099345 mean
8 0.095095 0.028578 mean
9 0.700701 0.433217 Prob_q2
10 0.525526 0.411280 Prob_q2
11 0.350350 0.368292 Prob_q2
12 0.175175 0.250550 Prob_q2
13 0.700701 0.339747 mean
14 0.525526 0.293395 mean
15 0.350350 0.216131 mean
16 0.175175 0.091280 mean
sigma alpha note
paramSet
1 0.370370 0.375167 Prob_q2
2 0.280280 0.337069 Prob_q2
3 0.185185 0.262259 Prob_q2
4 0.095095 0.107618 Prob_q2
5 0.370370 0.227216 mean
6 0.280280 0.171782 mean
7 0.185185 0.099345 mean
8 0.095095 0.028578 mean
9 0.700701 0.433217 Prob_q2
10 0.525526 0.411280 Prob_q2
11 0.350350 0.368292 Prob_q2
12 0.175175 0.250550 Prob_q2
13 0.700701 0.339747 mean
14 0.525526 0.293395 mean
15 0.350350 0.216131 mean
16 0.175175 0.091280 mean
%% Cell type:code id: tags:
``` python
testParams_DF.to_csv('testParams.csv')
```
%% Cell type:code id: tags:
``` python
testParams_DF.iloc[0]['sigma']
```
%%%% Output: execute_result
0.37037046296296294
%% Cell type:code id: tags:
``` python
set(testParams_DF.index.values)
```
%%%% Output: execute_result
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
%% Cell type:code id: tags:
``` python
```
......
id,sigma,alpha,note
paramSet,sigma,alpha,note
1,0.37037046296296294,0.3751673855040233,Prob_q2
2,0.28028037467467465,0.33706893213176836,Prob_q2
3,0.18518528148148147,0.26225924792573907,Prob_q2
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment