import sys
sys.path.append('..')
from data.dataset import DATASET, DATASET_PATH
from utils.geo import get_average_1k_population_density,get_place
DATASET['population_1k_density'] = DATASET.apply(lambda city: get_average_1k_population_density(city['latitude'],city['longitude']),axis=1)
DATASET.sort_values(by='population_1k_density').iloc[-10:][['city','country','latitude','longitude','population_1k_density']]
DATASET.sort_values(by='population_1k_density').iloc[:10][['city','country','latitude','longitude','population_1k_density']]
as density nan or lower than 2 people/sqkm is probably bad measurement
#Incorrect lat,long for Roma, Italy: 41.5300, 12.2858
#Manual Curation
flag = (DATASET.city=='Roma')&(DATASET.country=='Italy')
DATASET.loc[flag,'latitude'] = 41.9028
DATASET.loc[flag,'longitude'] = 12.4964
DATASET.loc[flag,'population_1k_density'] = DATASET.loc[flag].apply(lambda city: get_average_1k_population_density(city['latitude'],city['longitude']),axis=1)
DATASET.loc[flag,'population_1k_density']
#Incorrect lat,long for Natal, Brazil: -6.9838, -60.2699
#Manual Curation
DATASET.loc[(DATASET.city=='Natal')&(DATASET.country=='Brazil'),'latitude'] = -5.7793
DATASET.loc[(DATASET.city=='Natal')&(DATASET.country=='Brazil'),'longitude'] = -35.2009
DATASET.loc[(DATASET.city=='Natal')&(DATASET.country=='Brazil'),'population_1k_density'] = DATASET.loc[(DATASET.city=='Natal')&(DATASET.country=='Brazil')].apply(lambda city: get_average_1k_population_density(city['latitude'],city['longitude']),axis=1)
DATASET.loc[(DATASET.city=='Natal')&(DATASET.country=='Brazil'),'population_1k_density']
get_place( -5.7793,-35.2009)
#Incorrect lat,long for Anchorage, US: 61.1508 -149.1091
#Manual Curation
flag = (DATASET.city=='Anchorage')&(DATASET.country=='United States of America')
DATASET.loc[flag,'latitude'] = 61.2181
DATASET.loc[flag,'longitude'] = -149.9003
DATASET.loc[flag,'population_1k_density'] = DATASET.loc[flag].apply(lambda city: get_average_1k_population_density(city['latitude'],city['longitude']),axis=1)
DATASET.loc[flag,'population_1k_density']
#Incorrect lat,long for Douglas, Canada: 46.2819 -66.9420
#Manual Curation
flag = (DATASET.city=='Douglas')&(DATASET.country=='Canada')
DATASET.loc[flag,'latitude'] = 49.0056
DATASET.loc[flag,'longitude'] = -122.7452
DATASET.loc[flag,'population_1k_density'] = DATASET.loc[flag].apply(lambda city: get_average_1k_population_density(city['latitude'],city['longitude']),axis=1)
DATASET.loc[flag,'population_1k_density']
#Incorrect lat,long for Santa Cruz, Ecuador: -0.5333 -90.3500
#Manual Curation
flag = (DATASET.city=='Santa Cruz')&(DATASET.country=='Ecuador')
DATASET.loc[flag,'latitude'] = -0.6394
DATASET.loc[flag,'longitude'] = -90.3372
DATASET.loc[flag,'population_1k_density'] = DATASET.loc[flag].apply(lambda city: get_average_1k_population_density(city['latitude'],city['longitude']),axis=1)
DATASET.loc[flag,'population_1k_density']
#Dropping Vhembe, South Africa, as it is a municipality of 25597 sqkm!
#Manual Curation
flag = ~((DATASET.city=='Vhembe')&(DATASET.country=='South Africa'))
DATASET = DATASET[flag].copy()
#Unknown location JICOSUR, Mexico: 19.3400 -104.3800
#Manual Curation
flag = (DATASET.city=='JICOSUR')&(DATASET.country=='Mexico')
DATASET.loc[flag,'city'] = 'Cihuatlán'
#Incorrect lat,long for Oristano, Italy: 39.7207, 8.8980
#Manual Curation
flag = (DATASET.city=='Oristano')&(DATASET.country=='Italy')
DATASET.loc[flag,'latitude'] = 39.9062
DATASET.loc[flag,'longitude'] = 8.5884
DATASET.loc[flag,'population_1k_density'] = DATASET.loc[flag].apply(lambda city: get_average_1k_population_density(city['latitude'],city['longitude']),axis=1)
DATASET.loc[flag,'population_1k_density']
DATASET.sort_values(by='population_1k_density').iloc[:10][['city','country','latitude','longitude','population_1k_density','c40']]
#Incorrect lat,long for Georgetown, Australia: -18.3000 143.5500
#Manual Curation
flag = (DATASET.city=='Georgetown')&(DATASET.country=='Australia')
DATASET.loc[flag,'latitude'] = -18.2470
DATASET.loc[flag,'longitude'] = 143.0360
DATASET.loc[flag,'population_1k_density'] = DATASET.loc[flag].apply(lambda city: get_average_1k_population_density(city['latitude'],city['longitude']),axis=1)
DATASET.loc[flag,'population_1k_density']
DATASET.to_csv(DATASET_PATH)