main.py
a=""" """
#During this time, copy the sequence of numbers from Excel etc.
import collections
import matplotlib.pyplot as plt
a=a.split("\n")
a = [x for x in a if x != '']
#Remove null
l_head=list(map(lambda x: str(x)[0], a))
#Get the first character
l_count=collections.Counter(l_head).most_common(9)
#9th from the top because it may contain 0
#l_count.sort(key=lambda x: x[0])
#sort
#When graphed, it is sorted in the order of 123456789 or the frequency of appearance. Every 123 when removed.
l_count
l_rate=list(map(lambda x: x*100 /sum(list(zip(*l_count))[1]), list(zip(*l_count))[1]))
#Percentage calculation
l_index=list(zip(*l_count))[0]
l_rate
plt.xticks([0.15, 1.15, 2.15, 3.15, 4.15, 5.15, 6.15, 7.15, 8.15], l_index)
plt.bar(range(9), l_rate, color='g', width=0.3, label='Selected_data', align="center")
ben={1:30.1, 2:17.6, 3:12.5, 4:9.7, 5:7.9, 6:6.7, 7:5.8, 8:5.1, 9:4.6}
ben_label= list(map(lambda x: ben[int(x)], l_index))
plt.bar([0.3,1.3,2.3,3.3,4.3,5.3,6.3,7.3,8.3], ben_label, color='b', width=0.3, label='Benford_law', align="center")
plt.legend(bbox_to_anchor=(0.5, 1),loc=2)
plt.show()
Example:
Population by administrative unit in Japan

Area for each administrative unit in Japan

Population density by administrative unit in Japan

Number of households per administrative unit in Japan

All the numbers reflected here

Statistics are from the statistics bureau figures https://www.stat.go.jp/data/index.html
Recommended Posts