%matplotlib inline
import pandas as pd
import numpy as np
import seaborn as sns
from pandas import Series, DataFrame
from matplotlib import pyplot as plt
World Religion Map: https://www.kaggle.com/umichigan/world-religions/downloads/regional.csv/notebook
Global = pd.read_csv('csv/global.csv', index_col=0)
national = pd.read_csv('csv/national.csv', index_col=0)
regional = pd.read_csv('csv/regional.csv', index_col=0)
Global.head(10)
regional.head(10)
national.head(10)
アメリカ以外の国も含まれていることがわかる
national.sample(10)
国名でグループすることにより、複数の国名を1つにし、count()で各行を数える
national.groupby(['state']).size().count()
print(national['state'].unique())
Global.shinto_all.plot(kind='bar', title='世界の神道人口/年代', rot=45)
(Global['shinto_all'] / Global['world_population']).plot('bar', title='世界人口からみる神道信者数')
national = pd.read_csv('csv/national.csv',
usecols=[0, 1,25,29,38,39,76], # 使う列は 1,25,29,38,39,76列
index_col=1) # indexはstateに
national.head(3)
national.head(3)[['year', 'religion_all']]
national['Japan':'Japan']
.loc['インデックスの行','列']
national.loc['Japan','religion_all']
df_jp = (national.loc['Japan','shinto_all'] + national.loc['Japan','buddhism_all'] / national.loc['Japan','religion_all']).sort_values(ascending=False)
df_jp.plot('bar', title='日本における全宗教人口と神道+仏教人口との割合', rot=45)
インデックスを国名にして日本を摘出したことにより、インデックスに年代が表示できない
列と行の中の項目を指定することはできないのだろうか....
先生のアドバイスにより、以下のように解説
日本をloc(行ラベル)で指定し、df_jpに代入し、インデックスをset_index()で設定し、年代を指定
df_jp = national.loc['Japan']
df_jp = df_jp.set_index('year')
df_jp['shinto_and_buddhism_ratio'] = df_jp['shinto_all'] + df_jp['buddhism_all'] / df_jp['religion_all']
df_jp['shinto_and_buddhism_ratio'].plot('bar', title='日本における全宗教人口と神道+仏教人口との割合', rot=45)
2005年から他の宗教より神道+仏教人口が上回っていることがわかる
日本の人口から神道+仏教人口を割って比率でみることで、人口増加の要因から離れてみることができる
df_jp = national.loc['Japan']
df_jp = df_jp.set_index('year')
df_jp['shinto_and_buddhism_ratio'] = df_jp['shinto_all'] + df_jp['buddhism_all'] / df_jp['population']
df_jp['shinto_and_buddhism_ratio'].plot('bar', title='日本における全宗教人口と神道+仏教人口との割合', rot=45)
神道+仏教人口が、2005年から人口より上回っている。
神道が文化に溶け込んでいる日本では、神道+他の宗教を支持する人が多いということだろうか。
では、人口数が宗教人口より上回っている国は他にもあるのだろうか...
national['total_percent'].sort_values(ascending=False)
神道人口数をどう計算したのかは不明だが、日本以外でもCuba、Haitiがあるよう。
TIP: index_col=1を指定するとIndex部を読み込めなくエラーが出るので、削除すること
national = pd.read_csv('csv/national.csv', usecols=[0, 1,25,29,38,39])
plt.figure(figsize=(27, 8))
sns.heatmap(national.pivot('year', 'state', 'shinto_all'), cmap='Blues')
神道は日本のみで栄えていることがわかる
plt.figure(figsize=(27, 8))
sns.heatmap(national.pivot('year', 'state', 'buddhism_all'), cmap='Greens')
国別だと200の国名が表示できなく、国を特定できないが日本以外でも仏教徒がいることがわかる
regional = pd.read_csv('csv/regional.csv')
sns.heatmap(regional.pivot('year', 'region', 'shinto_all'), cmap='Blues')
神道人口はアジア(日本のみ)で年々急増している > 人口増加による急増
sns.heatmap(regional.pivot('year', 'region', 'buddhism_all'), cmap='Greens')
仏教人口はアジアのみで年々急増している > 人口増加による急増
national = pd.read_csv('csv/national.csv', usecols=[0, 1,25,29,38,39])
sns.barplot(x= national.year, y= national[national.state=='Japan']['shinto_all'],palette= 'Blues')
national = pd.read_csv('csv/national.csv', usecols=[0, 1,25,29,38,39])
sns.barplot(x= national.year, y= national[national.state=='Japan']['buddhism_all'],palette= 'Greens')
sns.heatmap(regional.pivot('year', 'region', 'world_population'), cmap='Reds')
世界全体で人口が増加していることがわかる
sns.heatmap(regional.pivot('year', 'region', 'religion_all'), cmap='Reds')
宗教人口はなぜかアジアだけ増加していることがわかる
Global = pd.read_csv('csv/global.csv')
cols= ['year','christianity_all','islam_all','hinduism_all','buddhism_all','noreligion_all','syncretism_all','christianity_percent','hinduism_percent','islam_percent','buddhism_percent','syncretism_percent','noreligion_percent']
Global= Global[cols]
for col in Global.columns:
if '_all' in col:
ax= plt.plot(Global.index, Global[col],label= col, linewidth= 3)
plt.title('宗教別支持者数')
plt.legend(bbox_to_anchor= (-.2,-.5,1.4,.5), mode= 'expand', loc=3,ncol=3,fontsize= 14)
plt.xticks(size=14)
plt.yticks(size=14)
クリスチャンが多く、人口増加で右上がり
Global = pd.read_csv('csv/global.csv')
cols= ['christianity_all','islam_all','hinduism_all','buddhism_all','shinto_all','noreligion_all','syncretism_all']
Global= Global[cols]
plt.figure(figsize=(13, 11))
sns.heatmap(Global.corr(), square=True, cmap='Blues', annot=True, fmt="1.1f")
plt.title('世界の宗教相対関係')
4大宗教のキリスト教、イスラム教、ヒンデゥー教、仏教の相対値が濃くなっている。
シンクレティズム(syncretism)は、重層信仰という意味で、日本の神仏習合(神道+仏教)の現れだが、色の変化はイスラム教より顕著でない。
人口、宗教人口、他の宗教も追加
Global = pd.read_csv('csv/global.csv')
cols= ['population','religion_all','christianity_all','judaism_all','islam_all','hinduism_all','buddhism_all','shinto_all','zoroastrianism_all','sikhism_all','baha’i_all','jainism_all','animism_all','noreligion_all','syncretism_all']
Global= Global[cols]
plt.figure(figsize=(13, 11))
sns.heatmap(Global.corr(), square=True, cmap='Blues', annot=True, fmt="1.1f")
plt.title('世界の宗教相対関係 ver.2')
他の記述方法
cols = Global.corr()
order = ['population','religion_all','christianity_all','judaism_all','islam_all','hinduism_all','buddhism_all','shinto_all','zoroastrianism_all','sikhism_all','baha’i_all','jainism_all','animism_all','noreligion_all','syncretism_all']
cols[order]
cols_ordered = cols[order]
plt.figure(figsize=(13, 11))
sns.heatmap(cols_ordered, square=True, cmap='Blues', annot=True, fmt="1.1f")
plt.title('世界の宗教相対関係 ver.3')
regional= pd.read_csv('csv/regional.csv')
cols= ['year','population','christianity_all','judaism_all','islam_all','hinduism_all','buddhism_all','shinto_all','zoroastrianism_all','sikhism_all','baha’i_all','jainism_all','animism_all','noreligion_all','syncretism_all']
regionals= regional[cols]
regionals
regional = pd.read_csv('csv/regional.csv')
plt.figure(figsize=(13, 8))
sns.heatmap(regional.pivot('region', 'year', 'noreligion_all'), cmap='Purples')
アジアで急激に無宗教人口が多くなっている
colormap = plt.cm.Set2
noreligion_year = regional.groupby(['year','region']).noreligion_all.sum()
noreligion_year.unstack().plot(kind='bar',stacked=True, colormap= colormap, grid=False, figsize=(14,8) ,
legend=False, title='無宗教人口の割合の変化')
plt.legend(bbox_to_anchor= (-.2,-.6,1.4,.5), mode= 'expand', loc=1, ncol=5, fontsize= 14)
plt.show()
アジア(青)が一番、無宗教人口が増えている
[cols]の中身は[['region'....'syncretism_all']]として[ ]も含めて代入されるため、Dataframeになる
cols= ['year','region','population','christianity_all','judaism_all','islam_all','hinduism_all','buddhism_all','shinto_all','zoroastrianism_all','sikhism_all','baha’i_all','jainism_all','animism_all','noreligion_all','syncretism_all']
regionals= regional[cols]
regionals
代入をしなくとも以下のように書ける
regionals= regional[['year','region','population','christianity_all','judaism_all','islam_all','hinduism_all','buddhism_all','shinto_all','zoroastrianism_all','sikhism_all','baha’i_all','jainism_all','animism_all','noreligion_all','syncretism_all']]
regionals
regional= pd.read_csv('csv/regional.csv')
regionals= regional[['year','population','christianity_all','judaism_all','islam_all','hinduism_all','buddhism_all','shinto_all','zoroastrianism_all','sikhism_all','baha’i_all','jainism_all','animism_all','noreligion_all','syncretism_all']]
regionals_by_year= regionals.groupby(['year']).sum()
regionals_by_year
人口で割ってから比率を出すことで人口増加の影響が及ばなくする
regionals = regional['christianity_all'] / regional['population']
regionals
数値が入っていないとき、lambdaを使う。通常は以下のようにfooを代入し書くことができる。
def foo(row):
return row / row['population'] * 100
df.apply(foo * 100 , axis=1)
regional= pd.read_csv('csv/regional.csv')
regionals= regional[['year','population','christianity_all','judaism_all','islam_all','hinduism_all','buddhism_all','shinto_all','zoroastrianism_all','sikhism_all','baha’i_all','jainism_all','animism_all','noreligion_all','syncretism_all']]
regionals_by_year= regionals.groupby(['year']).sum()
df = regionals_by_year
df.apply(lambda row: row / row['population'] * 100 , axis=1)
plt.figure(figsize=(13, 8))
sns.heatmap(regionals_by_year, cmap='Purples')
人口は増えているが、宗教人口はそれほどの増加がない。キリスト教、イスラム教、ヒンデュー教、無宗教人口が徐々に増加していることがわかる
各種機能をインストール
%matplotlib inline
import pandas as pd
import numpy as np
import seaborn as sns
from pandas import Series, DataFrame
from matplotlib import pyplot as plt
csvファイルの読み込み
regional = pd.read_csv('csv/regional.csv')
regionals = regional[['year','population','christianity_all','islam_all','judaism_all','hinduism_all','buddhism_all','shinto_all','animism_all','noreligion_all','syncretism_all']]
regionals_by_year = regionals.groupby(['year']).sum()
df = regionals_by_year
df_aspect = df.apply(lambda row: row / row['population'] * 100 , axis =1)
df_result = df_aspect.drop('population', axis =1)
plt.figure(figsize=(16,8))
sns.heatmap(df_result.T, square=True, cmap='Purples', annot=True, fmt="1.0f")
regionals = regional[['year','population','christianity_all','islam_all','hinduism_all','buddhism_all','animism_all','noreligion_all','syncretism_all']]
regionals_by_year = regionals.groupby(['year']).sum()
df = regionals_by_year
df_aspect = df.apply(lambda row: row / row['population'] * 100 , axis =1)
df_result = df_aspect.drop('population', axis =1)
plt.figure(figsize=(13,6))
sns.heatmap(df_result.T, square=True, cmap='Purples', annot=True, fmt="1.0f")
regionals = regional[['year','population','judaism_all','shinto_all','zoroastrianism_all','sikhism_all','baha’i_all','jainism_all']]
regionals_by_year = regionals.groupby(['year']).sum()
df = regionals_by_year
df_aspect = df.apply(lambda row: row / row['population'] * 100 , axis =1)
df_result = df_aspect.drop('population', axis =1)
plt.figure(figsize=(13,5))
sns.heatmap(df_result.T, square=True, cmap='Purples', annot=True, fmt="1.0f")
年代でグループ化し、'.T'でxy 軸の反転をする。
regional = pd.read_csv('csv/regional.csv')
regionals = regional[['year','christianity_all','islam_all','judaism_all','hinduism_all','buddhism_all','shinto_all','animism_all','noreligion_all','syncretism_all']]
regionals_by_year = regionals.groupby(['year']).sum()
regionals_by_year.T
regional = pd.read_csv('csv/regional.csv')
regionals = regional[['year','christianity_all','islam_all','judaism_all','hinduism_all','buddhism_all','shinto_all','animism_all','noreligion_all','syncretism_all']]
regionals_by_year = regionals.groupby(['year']).sum()
df = regionals_by_year.T
df= df.apply(lambda row: row / row[1945] , axis=1)
df
計算があっているかを確認確認する
q = 6964802102 / 701350290
print(q)
regional = pd.read_csv('csv/regional.csv')
regionals = regional[['year','christianity_all','islam_all','judaism_all','hinduism_all','buddhism_all','shinto_all','animism_all','noreligion_all','syncretism_all']]
regionals_by_year = regionals.groupby(['year']).sum()
df = regionals_by_year.T
df=df.apply(lambda row: row / row[1945] , axis=1)
plt.figure(figsize=(16,8))
sns.heatmap(df, square=True, cmap='Purples', annot=True, fmt="1.0f")
regional = pd.read_csv('csv/regional.csv')
fig, axes = plt.subplots(nrows=1, ncols=3)
colormap = plt.cm.Set2
christianity_year = regional.groupby(['year','region']).christianity_all.sum()
christianity_year.unstack().plot(kind='bar',stacked=True, colormap= colormap, grid=False,ax= axes[0],figsize=(18,8) , legend=False)
axes[0].set_title('Christianity', y=1.08,size=12)
islam_year = regional.groupby(['year','region']).islam_all.sum()
islam_year.unstack().plot(kind='bar',stacked=True, colormap= colormap, grid=False, ax= axes[1], legend= False)
axes[1].set_title('Islam', y=1.08,size=12)
judaism_year = regional.groupby(['year','region']).judaism_all.sum()
judaism_year.unstack().plot(kind='bar',stacked=True, colormap= colormap, grid=False, ax= axes[2], legend= False)
axes[2].legend(bbox_to_anchor=(-1.4, -0.2, 2.4, 0.1), loc=10,prop={'size':12},
ncol=5, mode="expand", borderaxespad=0.)
axes[2].set_title('Judaism', y=1.08,size=12)
plt.tight_layout()
plt.show()
fig, axes = plt.subplots(nrows=1, ncols=3)
colormap = plt.cm.Set2
christianity_year = regional.groupby(['year','region']).hinduism_all.sum()
christianity_year.unstack().plot(kind='bar',stacked=True, colormap= colormap, grid=False,ax= axes[0],figsize=(18,8) , legend=False)
axes[0].set_title('Hinduism', y=1.08,size=12)
islam_year = regional.groupby(['year','region']).buddhism_all.sum()
islam_year.unstack().plot(kind='bar',stacked=True, colormap= colormap, grid=False, ax= axes[1], legend= False)
axes[1].set_title('Buddhism', y=1.08,size=12)
judaism_year = regional.groupby(['year','region']).shinto_all.sum()
judaism_year.unstack().plot(kind='bar',stacked=True, colormap= colormap, grid=False, ax= axes[2], legend= False)
axes[2].legend(bbox_to_anchor=(-1.4, -0.2, 2.4, 0.1), loc=10,prop={'size':12},
ncol=5, mode="expand", borderaxespad=0.)
axes[2].set_title('Shinto', y=1.08,size=12)
plt.tight_layout()
plt.show()
fig, axes = plt.subplots(nrows=1, ncols=3)
colormap = plt.cm.Set2
christianity_year = regional.groupby(['year','region']).animism_all.sum()
christianity_year.unstack().plot(kind='bar',stacked=True, colormap= colormap, grid=False,ax= axes[0],figsize=(18,8) , legend=False)
axes[0].set_title('Animism', y=1.08,size=12)
islam_year = regional.groupby(['year','region']).noreligion_all.sum()
islam_year.unstack().plot(kind='bar',stacked=True, colormap= colormap, grid=False, ax= axes[1], legend= False)
axes[1].set_title('No religion', y=1.08,size=12)
judaism_year = regional.groupby(['year','region']).syncretism_all.sum()
judaism_year.unstack().plot(kind='bar',stacked=True, colormap= colormap, grid=False, ax= axes[2])
axes[2].legend(bbox_to_anchor=(-1.4, -0.2, 2.4, 0.1), loc=10,prop={'size':12},
ncol=5, mode="expand", borderaxespad=0.)
axes[2].set_title('Syncretism', y=1.08,size=12)
plt.tight_layout()
plt.show()
参考資料:
Global Religion 1945-2010: Plotly & Pandas visuals: https://www.kaggle.com/arthurtok/global-religion-1945-2010-plotly-pandas-visuals
pandas.DataFrame の列の抽出(射影)および行の抽出(選択)方法まとめ: https://akiyoko.hatenablog.jp/entry/2017/04/03/081630
seaborn.heatmap: https://seaborn.pydata.org/generated/seaborn.heatmap.html
pandas の loc、iloc、ix の違い – python: http://ailaby.com/lox_iloc_ix/
Seaborn でヒートマップを作成する: https://pythondatascience.plavox.info/seaborn/heatmap
グラフ作成のためのチートシートとPythonによる各種グラフの実装: https://qiita.com/4m1t0/items/76b0033edb545a78cef5
Dbpedia: https://wiki.dbpedia.org/
Ontology: http://mappings.dbpedia.org/server/ontology/classes/
SPARQL Endpoint interface to Python: https://rdflib.github.io/sparqlwrapper/
フェルミ推定(Fermi estimate)とは: 実際に調査するのが難しいようなとらえどころのない量を、いくつかの手掛かりを元に論理的に推論し、短時間で概算すること