Chart

# The availability of ancestry information among the 114 TCR-Seq studies

import numpy as np
import matplotlib.pyplot as plt

reply=[21, 21, 7, 65]
reason=['Share in the publication','Share upon request','Not available upon request','Unavailable']

c=['#FF6A6A','#CD69C9','#9B30FF','#FFC0CB']

plt.pie(reply, labels=['']*4, autopct='%2.1f%%', colors=c)
plt.legend(loc='upper center', labels = reason, bbox_to_anchor=(0.5, -0), ncol=1, fontsize=15)

plt.title('Ancestry availability')


------------------------------------------------------------------------------------------------------------------------------------

#TCR-Seq Studies reported with Mono-ancestry/Trans-ancestry Information

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

reply=[33, 9]
reason=['Mono-ancestry','Trans-ancestry']
c=['#B2DFEE','#7D9EC0']

plt.rcParams['font.size'] = 15.0

plt.pie(reply, autopct='%2.1f%%', labels=['']*2, labeldistance=1, startangle=90, colors=c)


plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0), ncol=1, labels=reason)

plt.title('TCR-Seq Studies reported with Mono-ancestry/Trans-ancestry Information')


------------------------------------------------------------------------------------------------------------------------------------

# Comparisons of the proportion of study participants’ reported ancestry information among total study participants and total studies

import numpy as np
import pandas as pd
from pandas import Series, DataFrame
import matplotlib.pyplot as plt

x = ['Number of Participants', 'Number of Studies']

data1 = np.array([84.1, 58.9])
data2 = np.array([9.0, 25.0])
data3 = np.array([4.0, 10.7])
data4 = np.array([2.9, 5.4])


fig, ax= plt.subplots(figsize=(8,6))


plt.bar(x, data1, width=0.5,)
plt.bar(x, data2, bottom=data1, width=0.5,)
plt.bar(x, data3, bottom=data1+data2, width=0.5,)
plt.bar(x, data4, bottom=data1+data2+data3, width=0.5)


#for i in range(len(x)):
    #plt.text(i,data1[i], data1[i], ha= 'center', va='bottom')


plt.ylabel('Percentage')
plt.legend(["European", "Asian", "African", "Other"], loc='upper center', bbox_to_anchor=(0.5, -0.06), ncol=4)
plt.title('Comparisons of ancestry distribution')


--------------------------------------------------------------------------------------------------------------------------------------------

# The proportion of US-Based TCR-Seq studies of the reported ethnic information

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

reply=[1923, 84]
reason=['Non-Hispanic','Hispanic']
colors = ['#FFC125', '#FFF68F']

plt.rcParams['font.size'] = 15.0


plt.pie(reply, autopct='%2.1f%%', labels=['']*2, pctdistance=1.25, labeldistance=1.4, startangle=45, colors=colors)
plt.legend(loc='upper center', labels = reason, bbox_to_anchor=(0.5, 0), ncol=2, fontsize=15)

#plt.title('Reported Ethnic information')


------------------------------------------------------------------------------------------------------------------------------------

# Boxplot of the number of study participants in each ancestry group among the TCR-Seq studies

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

%matplotlib inline

df = pd.read_excel('ancestryboxplot222.xlsx')
df.head()

my_colors = ["#1C86EE", "#FF7F24", "#32CD32"]

sns.set(style='whitegrid')
sns.set_palette( my_colors )

plt.rcParams['font.size'] = 15.0

fig, ax= plt.subplots(figsize=(10,8))


sns.boxplot(x= 'ancestry', y='n', data=df, width=0.5)
sns.stripplot(x= 'ancestry', y='n', data=df, marker='o', alpha=0.5, color='black')
#sns.violinplot(data=df, width=0.5, inner="point")


#plt.title('Number of study participants')
plt.xlabel('Ancestry', fontsize=15)
plt.ylabel('Number of study participants', fontsize=15)
ax.set_yscale('log')

ax.set_ylim([1,1.5E3])


------------------------------------------------------------------------------------------------------------------------------------

# Changes in the proportion of ancestry over years

import matplotlib.pyplot as plt
import numpy as  np


times = ['2009-2015', '2016-2018', '2019-2021']
labels = ["European", "Asian", "African", "Other"]
European = [92.55, 83.57, 82.52]
Asian = [5.96, 8.55, 10.68]
African = [1.49, 1.89, 6.37]
Other = [0, 5.99, 0.43]

Data = np.array([European, Asian, African, Other])

width = 0.35


fig, ax = plt.subplots(figsize=(8,8))

for i in range(4):
    ax.bar(times, Data[i], width, label=labels[i], bottom = np.sum(Data[0:i,:], axis=0))

for row in range(3):
    for people in range(4):
        if (row, people) != (0,3):
            ax.text(row+0.2, np.sum(Data[0:people, row]) + (Data[people,row]/2), "{:.2f}".format(Data[people, row]))


ax.set_xlim([-0.4,2.4])
ax.set_ylim([0,110])
ax.set_ylabel('Percentage')
ax.set_title('Ancestry Distribution over Years')
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.1), ncol=4)


------------------------------------------------------------------------------------------------------------------------------------

# Proportion of overall TCR-Seq Covid patients

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

reply=[952, 101, 60, 117]
reason=['European','African', 'Asian', 'Other']

plt.rcParams['font.size'] = 15.0


plt.pie(reply, autopct='%2.1f%%', labels=['']*4, pctdistance=1.25, labeldistance=1.4)
plt.legend(loc='upper center', labels=reason, bbox_to_anchor=(0.5, -0.0), ncol=2, fontsize=15)

#plt.title('Proportion of ancestry of Covid patients' )


------------------------------------------------------------------------------------------------------------------------------------

# Invidual cohort's proportion of ancestry

import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
import pandas as pd
 
# y-axis in bold
#rc('font', , weight='bold')
 
# Values of each group
bars1 = [100, 77.1, 0, 100, 76.9]
bars2 = [0, 3.3, 100, 0, 17.9]
bars3 = [0, 9.0, 0, 0, 5.2]
bars4 = [0, 10.6,0, 0, 0]
 
# Heights of bars1 + bars2
bars = np.add(bars1, bars2).tolist()
barb = np.add(bars, bars3).tolist()
 
# The position of the bars on the x-axis
r = [0,1,2,3,4]
 
    
plt.rcParams['font.size'] = 15.0

# Names of group and bar width
#names = ['Schultheiß, C. et al.','Nolan, S. et al. ','Liao, M. et al.','Shomuradova, A. S. et al.','Kusnadi, A. et al. ']
barWidth = 1

    
# Create brown bars
plt.bar(r, bars1, edgecolor='white', width=barWidth)
# Create green bars (middle), on top of the first ones
plt.bar(r, bars2, bottom=bars1, edgecolor='white', width=barWidth)
# Create green bars (top)
plt.bar(r, bars3, bottom=bars, edgecolor='white', width=barWidth)
plt.bar(r, bars4, bottom=barb, edgecolor='white', width=barWidth)

 
# Custom X axis
plt.xticks(r, [])
#plt.xticks(rotation=45)
#plt.xlabel("Studies")
plt.ylabel('Percentage')

plt.legend(['European','Asian','African','Other'], loc='upper center', bbox_to_anchor=(0.5, -0.03), ncol=2)