import numpy as np
import pandas as pd
import seaborn as sns
df = pd.read_csv("data/primary.tsv", sep="\t")
df.head(3)
Ensembl ID | Uniprot IDs | Log10 length | Symbol | Log10 degree | mitochondrion organization | cell adhesion | signal transduction | B cell log10 FPKM | Neuron log10 FPKM | |
---|---|---|---|---|---|---|---|---|---|---|
0 | ENSG00000000003 | A0A087WYV6_HUMAN A0A087WZU5_HUMAN TSN6_HUMAN | 4.178517 | TSPAN6 | 0.698970 | 0 | 0 | 0 | -1.045757 | 1.294687 |
1 | ENSG00000000005 | TNMD_HUMAN | 4.110017 | TNMD | 0.000000 | 0 | 0 | 0 | -2.000000 | -2.000000 |
2 | ENSG00000000419 | H0Y368_HUMAN DPM1_HUMAN Q5QPJ9_HUMAN Q5QPK2_HUMAN | 4.649695 | DPM1 | 1.176091 | 0 | 0 | 0 | 0.980458 | 1.652246 |
import seaborn as sns
sns.distplot(df["Neuron log10 FPKM"])
<matplotlib.axes._subplots.AxesSubplot at 0x7f8f589abda0>
sns.set_style("darkgrid")
sns.set_palette("muted")
sns.distplot(df["Neuron log10 FPKM"])
<matplotlib.axes._subplots.AxesSubplot at 0x7f8f59404c18>
import matplotlib as mpl
width, height = 10, 6
mpl.rcParams['figure.figsize'] = [width, height]
sns.distplot(df["Neuron log10 FPKM"])
<matplotlib.axes._subplots.AxesSubplot at 0x7f8f55ea87b8>
sns.distplot(b_cell_expr_logs)
plt.title("FPKM distribution", size=18)
plt.xlabel("B cell log10 FPKM", size=15)
plt.ylabel("Density", size=15)
Text(0,0.5,'Density')
sns.distplot(b_cell_expr_logs)
plt.title("FPKM distribution", size=18)
plt.xticks(size=15)
plt.yticks(size=15)
(array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6]), <a list of 7 Text yticklabel objects>)
sns.jointplot("B cell log10 FPKM", "Neuron log10 FPKM",
data=df, kind="scatter")
<seaborn.axisgrid.JointGrid at 0x7f8f27b93ac8>
sns.jointplot("B cell log10 FPKM", "Neuron log10 FPKM", data=df, kind="kde")
<seaborn.axisgrid.JointGrid at 0x7f8f27a35dd8>
sns.jointplot("B cell log10 FPKM", "Neuron log10 FPKM", data=df, kind="scatter", alpha=0.1)
<seaborn.axisgrid.JointGrid at 0x7f8f27653da0>
expr_df = pd.DataFrame()
expr_df["Log10 FPKM"] = pd.concat([df["B cell log10 FPKM"],
df["Neuron log10 FPKM"]])
expr_df["Cell adhesion"] = pd.concat([df["cell adhesion"],
df["cell adhesion"]])
expr_df["Cell type"] = ["B cells" for _ in range(len(df))] + ["Neurons" for _ in range(len(df))]
expr_df.sample(3)
Log10 FPKM | Cell adhesion | Cell type | |
---|---|---|---|
4718 | -2.000000 | 0 | Neurons |
1547 | 1.555699 | 0 | B cells |
1189 | 0.729165 | 0 | B cells |
sns.boxplot("Cell type", "Log10 FPKM", hue="Cell adhesion", data=expr_df)
<matplotlib.axes._subplots.AxesSubplot at 0x7f8f3f07add8>
sns.boxplot("Cell type", "Log10 FPKM", hue="Mitochondrion organization", data=expr_df)
<matplotlib.axes._subplots.AxesSubplot at 0x7f8f3c984320>
sns.heatmap(df[["Neuron log10 FPKM", "B cell log10 FPKM"]])
<matplotlib.axes._subplots.AxesSubplot at 0x7f8f3ec80550>
sns.clustermap(df_expr, cmap="spring", figsize=(6, 5))
<seaborn.matrix.ClusterGrid at 0x7f8f27367dd8>
import matplotlib.pyplot as plt
import numpy as np
x = np.arange(200)
sqr_x = x ** 2 / 100
plt.plot(x, sqr_x, color="red")
plt.plot(x, x, color="blue")
plt.legend(["quadratic", "linear"], loc="upper center")
<matplotlib.legend.Legend at 0x7f8f250c3710>
import altair as alt
chart = alt.Chart(df, max_rows=20000)
chart.mark_bar().encode(
x=alt.X("Neuron log10 FPKM:Q", bin=True),
y=alt.Y("count(*):Q")
)
alt.Chart(df, max_rows=20000).mark_circle(color="blue", fillOpacity=0.2).encode(
x=alt.X("Neuron log10 FPKM"),
y=alt.Y("B cell log10 FPKM"),
)
alt.Chart(df, max_rows=20000).mark_bar(color="blue").encode(
x=alt.X("Log10 degree:Q", bin=alt.Bin(maxbins=15)),
y=alt.Y("average(B cell log10 FPKM):Q"),
)
alt.Chart(df, max_rows=20000).mark_circle().encode(
x=alt.X("Log10 degree:Q", bin=alt.Bin(maxbins=15)),
y=alt.Y("B cell log10 FPKM:Q", bin=alt.Bin(maxbins=40)),
size=alt.Size("count(*):Q")
)
alt.Chart(df, max_rows=20000).mark_circle().encode(
x=alt.X("Log10 degree:Q", bin=alt.Bin(maxbins=15)),
y=alt.Y("B cell log10 FPKM:Q", bin=alt.Bin(maxbins=40)),
size=alt.Size("count(*):Q"),
color=alt.Color("average(Neuron log10 FPKM):Q")
)