import pandas as pd
import sklearn
from helpers import *
import matplotlib.pyplot as plt
import sys
sys.path.append("..")
from data.unlabeled.raw import edstats_co as df_c, edstats_da as df_d, edstats_se as df_s
import seaborn as sns

..\data\unlabeled\raw\__init__.py:41: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.
  aquastat_eah = pd.read_csv(aquastat_eah_path, skipfooter=8)
..\data\unlabeled\raw\__init__.py:42: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.
  aquastat_wr = pd.read_csv(aquastat_wr_path, skipfooter=8)
..\data\unlabeled\raw\__init__.py:43: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.
  aquastat_wu = pd.read_csv(aquastat_wu_path, skipfooter=8)

Remember to extract Edstats_csv.zip to Edstats_csv¶

df_d.columns

Index(['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code',
       '1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977', '1978',
       '1979', '1980', '1981', '1982', '1983', '1984', '1985', '1986', '1987',
       '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995', '1996',
       '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005',
       '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014',
       '2015', '2016', '2017', '2020', '2025', '2030', '2035', '2040', '2045',
       '2050', '2055', '2060', '2065', '2070', '2075', '2080', '2085', '2090',
       '2095', '2100', 'Unnamed: 69'],
      dtype='object')

years = ['2015', '2016', '2017', '2020']
stripped_df_d = df_d[['Country Code','Indicator Code',*years]]
print("Non na values in col 2020:",stripped_df_d['2020'].count())
stripped_df_d

Non na values in col 2020: 51436

Merge colums to remove null values¶

for year in years[:-1]:
    fill_missing_with_column(stripped_df_d, '2020',year)
print("Non na values in col 2020:", stripped_df_d['2020'].count())
stripped_df_d

Non na values in col 2020: 132991
c:\Users\joach\code-projects\WaterSecurity\unlabeled_preprocessing\helpers.py:15: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[into] = df[into].combine_first(df[fro])
C:\Users\joach\.conda\envs\wsenv\lib\site-packages\pandas\core\frame.py:4308: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(

df_d_withseries = stripped_df_d.merge(df_s, how='left', left_on='Indicator Code', right_on='Series Code')
df_d_withseries.drop(['Indicator Code'], inplace=True, axis=1)

df_d_withseries.columns

Index(['Country Code', '2020', 'Series Code', 'Topic', 'Indicator Name',
       'Short definition', 'Long definition', 'Unit of measure', 'Periodicity',
       'Base Period', 'Other notes', 'Aggregation method',
       'Limitations and exceptions', 'Notes from original source',
       'General comments', 'Source', 'Statistical concept and methodology',
       'Development relevance', 'Related source links', 'Other web links',
       'Related indicators', 'License Type', 'Unnamed: 20'],
      dtype='object')

Rearange table to indicator on column¶

countryXindicator = df_d_withseries.pivot_table('2020',['Country Code'],'Indicator Name')
countryXindicator

Find columns that contain "number", "Projection" or "$"¶

projection_col = [colname for colname in countryXindicator.columns if "Projection" in colname]
number = [colname for colname in countryXindicator.columns if "number" in colname]
dollar = [colname for colname in countryXindicator.columns if "$" in colname]

And remove them¶

countryXindicator.drop(projection_col + number+ dollar,inplace=True,axis=1)
countryXindicator.shape

(241, 1502)

Remove population data¶

countryXindicator.drop(['Population, total'], axis=1, inplace=True)

See how much of the data is missing and drop columns where more than 50% of the values are null¶

print_missing_percentages(countryXindicator)

Max, min and mean number of missing values for the columns
Max: 99.5850622406639 %
Min: 3.7344398340248963 %
Mean: 81.31204369977415 %

(3.7344398340248963, 99.5850622406639)

dropColumnHalf(countryXindicator)

print_missing_percentages(countryXindicator)

Max, min and mean number of missing values for the columns
Max: 49.79253112033195 %
Min: 3.7344398340248963 %
Mean: 26.60212079299217 %

(3.7344398340248963, 49.79253112033195)

countryXindicator.shape

(241, 243)

 countryXindicator

Split dataset into a dataframe containing year-data and a dataframe for the float-precentages-data¶

This i done for better imputation results

int_cols = find_all_integer_columns(countryXindicator)

countryXindicator_float = countryXindicator.drop(int_cols, axis=1)
print(countryXindicator_float.index.to_list())

['ABW', 'AFG', 'AGO', 'ALB', 'AND', 'ARB', 'ARE', 'ARG', 'ARM', 'ASM', 'ATG', 'AUS', 'AUT', 'AZE', 'BDI', 'BEL', 'BEN', 'BFA', 'BGD', 'BGR', 'BHR', 'BHS', 'BIH', 'BLR', 'BLZ', 'BMU', 'BOL', 'BRA', 'BRB', 'BRN', 'BTN', 'BWA', 'CAF', 'CAN', 'CHE', 'CHI', 'CHL', 'CHN', 'CIV', 'CMR', 'COD', 'COG', 'COL', 'COM', 'CPV', 'CRI', 'CUB', 'CUW', 'CYM', 'CYP', 'CZE', 'DEU', 'DJI', 'DMA', 'DNK', 'DOM', 'DZA', 'EAP', 'EAS', 'ECA', 'ECS', 'ECU', 'EGY', 'EMU', 'ERI', 'ESP', 'EST', 'ETH', 'EUU', 'FIN', 'FJI', 'FRA', 'FRO', 'FSM', 'GAB', 'GBR', 'GEO', 'GHA', 'GIB', 'GIN', 'GMB', 'GNB', 'GNQ', 'GRC', 'GRD', 'GRL', 'GTM', 'GUM', 'GUY', 'HIC', 'HKG', 'HND', 'HPC', 'HRV', 'HTI', 'HUN', 'IDN', 'IMN', 'IND', 'IRL', 'IRN', 'IRQ', 'ISL', 'ISR', 'ITA', 'JAM', 'JOR', 'JPN', 'KAZ', 'KEN', 'KGZ', 'KHM', 'KIR', 'KNA', 'KOR', 'KWT', 'LAC', 'LAO', 'LBN', 'LBR', 'LBY', 'LCA', 'LCN', 'LDC', 'LIC', 'LIE', 'LKA', 'LMC', 'LMY', 'LSO', 'LTU', 'LUX', 'LVA', 'MAC', 'MAR', 'MCO', 'MDA', 'MDG', 'MDV', 'MEA', 'MEX', 'MHL', 'MIC', 'MKD', 'MLI', 'MLT', 'MMR', 'MNA', 'MNE', 'MNG', 'MNP', 'MOZ', 'MRT', 'MUS', 'MWI', 'MYS', 'NAC', 'NAM', 'NCL', 'NER', 'NGA', 'NIC', 'NLD', 'NOR', 'NPL', 'NRU', 'NZL', 'OED', 'OMN', 'PAK', 'PAN', 'PER', 'PHL', 'PLW', 'PNG', 'POL', 'PRI', 'PRK', 'PRT', 'PRY', 'PSE', 'PYF', 'QAT', 'ROU', 'RUS', 'RWA', 'SAS', 'SAU', 'SDN', 'SEN', 'SGP', 'SLB', 'SLE', 'SLV', 'SMR', 'SOM', 'SRB', 'SSA', 'SSD', 'SSF', 'STP', 'SUR', 'SVK', 'SVN', 'SWE', 'SWZ', 'SXM', 'SYC', 'SYR', 'TCA', 'TCD', 'TGO', 'THA', 'TJK', 'TKM', 'TLS', 'TON', 'TTO', 'TUN', 'TUR', 'TUV', 'TZA', 'UGA', 'UKR', 'UMC', 'URY', 'USA', 'UZB', 'VCT', 'VEN', 'VGB', 'VIR', 'VNM', 'VUT', 'WLD', 'WSM', 'XKX', 'YEM', 'ZAF', 'ZMB', 'ZWE']

countryXindicator_year = countryXindicator[int_cols]
countryXindicator_year

countryXindicator_year = countryXindicator_year[[colname for colname in countryXindicator_year.columns if "years" in colname]]
countryXindicator_year

Impute both dataframes¶

imputed_countryXindicator_float = impute_df(countryXindicator_float, max_iter=100, verbose=2)

veImputer] Change: 2.532294563247444, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 19/100, elapsed time 4.17
[IterativeImputer] Change: 2.2119869217502828, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 20/100, elapsed time 4.38
[IterativeImputer] Change: 1.9202443611257713, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 21/100, elapsed time 4.59
[IterativeImputer] Change: 1.6606990496546086, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 22/100, elapsed time 4.82
[IterativeImputer] Change: 1.6559282826221846, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 23/100, elapsed time 5.04
[IterativeImputer] Change: 1.6520065848505072, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 24/100, elapsed time 5.26
[IterativeImputer] Change: 1.648782063252183, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 25/100, elapsed time 5.48
[IterativeImputer] Change: 1.6461831356141814, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 26/100, elapsed time 5.69
[IterativeImputer] Change: 1.644058054001174, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 27/100, elapsed time 5.90
[IterativeImputer] Change: 1.6423013079998188, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 28/100, elapsed time 6.12
[IterativeImputer] Change: 1.6408741216707456, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 29/100, elapsed time 6.33
[IterativeImputer] Change: 1.6396788922926566, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 30/100, elapsed time 6.55
[IterativeImputer] Change: 1.638655100233116, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 31/100, elapsed time 6.77
[IterativeImputer] Change: 1.6377695134752297, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 32/100, elapsed time 6.98
[IterativeImputer] Change: 1.6369872536064, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 33/100, elapsed time 7.19
[IterativeImputer] Change: 1.636221623251295, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 34/100, elapsed time 7.41
[IterativeImputer] Change: 1.6355083766661642, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 35/100, elapsed time 7.63
[IterativeImputer] Change: 1.634821858203819, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 36/100, elapsed time 7.85
[IterativeImputer] Change: 1.6341370631609828, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 37/100, elapsed time 8.07
[IterativeImputer] Change: 1.6334747186241456, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 38/100, elapsed time 8.28
[IterativeImputer] Change: 1.645652531918897, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 39/100, elapsed time 8.50
[IterativeImputer] Change: 1.652182711035818, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 40/100, elapsed time 8.72
[IterativeImputer] Change: 1.6532662904419548, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 41/100, elapsed time 8.94
[IterativeImputer] Change: 1.649451831600862, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 42/100, elapsed time 9.16
[IterativeImputer] Change: 1.6408222980663139, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 43/100, elapsed time 9.38
[IterativeImputer] Change: 1.6294400496414387, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 44/100, elapsed time 9.60
[IterativeImputer] Change: 1.6287678524729357, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 45/100, elapsed time 9.82
[IterativeImputer] Change: 1.6281047469396985, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 46/100, elapsed time 10.05
[IterativeImputer] Change: 1.6274834308429378, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 47/100, elapsed time 10.26
[IterativeImputer] Change: 1.6270130873718072, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 48/100, elapsed time 10.48
[IterativeImputer] Change: 1.6262940758389055, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 49/100, elapsed time 10.70
[IterativeImputer] Change: 1.6257297763508762, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 50/100, elapsed time 10.92
[IterativeImputer] Change: 1.6252298538321177, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 51/100, elapsed time 11.14
[IterativeImputer] Change: 1.624722930666042, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 52/100, elapsed time 11.35
[IterativeImputer] Change: 1.6242577689697895, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 53/100, elapsed time 11.57
[IterativeImputer] Change: 1.6238212014114874, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 54/100, elapsed time 11.79
[IterativeImputer] Change: 1.6234519318135652, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 55/100, elapsed time 12.01
[IterativeImputer] Change: 1.6231115358163457, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 56/100, elapsed time 12.23
[IterativeImputer] Change: 1.6228249713969374, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 57/100, elapsed time 12.44
[IterativeImputer] Change: 1.6225670613424206, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 58/100, elapsed time 12.66
[IterativeImputer] Change: 1.6222797867936543, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 59/100, elapsed time 12.89
[IterativeImputer] Change: 1.6221342960698018, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 60/100, elapsed time 13.10
[IterativeImputer] Change: 1.6219870713467206, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 61/100, elapsed time 13.31
[IterativeImputer] Change: 1.6218646076142422, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 62/100, elapsed time 13.53
[IterativeImputer] Change: 1.6217730363655758, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 63/100, elapsed time 13.75
[IterativeImputer] Change: 1.621725788928061, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 64/100, elapsed time 14.06
[IterativeImputer] Change: 1.6216920357302036, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 65/100, elapsed time 14.27
[IterativeImputer] Change: 1.621698990708198, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 66/100, elapsed time 14.50
[IterativeImputer] Change: 1.6217532944875095, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 67/100, elapsed time 14.72
[IterativeImputer] Change: 1.6218227224444455, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 68/100, elapsed time 14.93
[IterativeImputer] Change: 1.6218972339192113, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 69/100, elapsed time 15.15
[IterativeImputer] Change: 1.6232902366910664, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 70/100, elapsed time 15.37
[IterativeImputer] Change: 1.6249616723710512, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 71/100, elapsed time 15.59
[IterativeImputer] Change: 1.6261259356463242, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 72/100, elapsed time 15.81
[IterativeImputer] Change: 1.6281859862659571, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 73/100, elapsed time 16.03
[IterativeImputer] Change: 1.63006029323919, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 74/100, elapsed time 16.24
[IterativeImputer] Change: 1.6318311593012644, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 75/100, elapsed time 16.46
[IterativeImputer] Change: 1.6336233225534909, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 76/100, elapsed time 16.68
[IterativeImputer] Change: 1.6353960973093213, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 77/100, elapsed time 16.90
[IterativeImputer] Change: 1.637182751172711, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 78/100, elapsed time 17.12
[IterativeImputer] Change: 1.6389758111697363, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 79/100, elapsed time 17.34
[IterativeImputer] Change: 1.6407763042253765, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 80/100, elapsed time 17.55
[IterativeImputer] Change: 1.6425834310407716, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 81/100, elapsed time 17.77
[IterativeImputer] Change: 1.6443968858747002, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 82/100, elapsed time 17.99
[IterativeImputer] Change: 1.64625348689472, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 83/100, elapsed time 18.21
[IterativeImputer] Change: 1.6481429500945106, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 84/100, elapsed time 18.43
[IterativeImputer] Change: 1.6500518120410161, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 85/100, elapsed time 18.65
[IterativeImputer] Change: 1.6519548194561064, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 86/100, elapsed time 18.87
[IterativeImputer] Change: 1.6538680162736423, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 87/100, elapsed time 19.09
[IterativeImputer] Change: 1.6552679864808504, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 88/100, elapsed time 19.31
[IterativeImputer] Change: 1.6574626185722092, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 89/100, elapsed time 19.52
[IterativeImputer] Change: 1.6594748268725177, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 90/100, elapsed time 19.74
[IterativeImputer] Change: 1.6613749089305434, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 91/100, elapsed time 19.96
[IterativeImputer] Change: 1.6632615294758013, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 92/100, elapsed time 20.18
[IterativeImputer] Change: 1.6651267664591594, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 93/100, elapsed time 20.40
[IterativeImputer] Change: 1.6669733028122995, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 94/100, elapsed time 20.62
[IterativeImputer] Change: 1.6688041142649566, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 95/100, elapsed time 20.84
[IterativeImputer] Change: 1.6706225132059456, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 96/100, elapsed time 21.08
[IterativeImputer] Change: 1.672451264542782, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 97/100, elapsed time 21.30
[IterativeImputer] Change: 1.6742766711767634, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 98/100, elapsed time 21.53
[IterativeImputer] Change: 1.6761039010597747, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 99/100, elapsed time 21.74
[IterativeImputer] Change: 1.6778935337913161, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Ending imputation round 100/100, elapsed time 21.96
[IterativeImputer] Change: 1.679678914280904, scaled tolerance: 0.19410198974609402 
[IterativeImputer] Completing matrix with shape (241, 48)
[IterativeImputer] Ending imputation round 1/100, elapsed time 0.01
[IterativeImputer] Ending imputation round 2/100, elapsed time 0.02
[IterativeImputer] Ending imputation round 3/100, elapsed time 0.03
[IterativeImputer] Ending imputation round 4/100, elapsed time 0.04
[IterativeImputer] Ending imputation round 5/100, elapsed time 0.05
[IterativeImputer] Ending imputation round 6/100, elapsed time 0.06
[IterativeImputer] Ending imputation round 7/100, elapsed time 0.07
[IterativeImputer] Ending imputation round 8/100, elapsed time 0.07
[IterativeImputer] Ending imputation round 9/100, elapsed time 0.08
[IterativeImputer] Ending imputation round 10/100, elapsed time 0.09
[IterativeImputer] Ending imputation round 11/100, elapsed time 0.10
[IterativeImputer] Ending imputation round 12/100, elapsed time 0.11
[IterativeImputer] Ending imputation round 13/100, elapsed time 0.12
[IterativeImputer] Ending imputation round 14/100, elapsed time 0.12
[IterativeImputer] Ending imputation round 15/100, elapsed time 0.13
[IterativeImputer] Ending imputation round 16/100, elapsed time 0.14
[IterativeImputer] Ending imputation round 17/100, elapsed time 0.15
[IterativeImputer] Ending imputation round 18/100, elapsed time 0.16
[IterativeImputer] Ending imputation round 19/100, elapsed time 0.17
[IterativeImputer] Ending imputation round 20/100, elapsed time 0.18
[IterativeImputer] Ending imputation round 21/100, elapsed time 0.18
C:\Users\joach\.conda\envs\wsenv\lib\site-packages\sklearn\impute\_iterative.py:685: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.
  warnings.warn("[IterativeImputer] Early stopping criterion not"
[IterativeImputer] Ending imputation round 22/100, elapsed time 0.20
[IterativeImputer] Ending imputation round 23/100, elapsed time 0.20
[IterativeImputer] Ending imputation round 24/100, elapsed time 0.21
[IterativeImputer] Ending imputation round 25/100, elapsed time 0.22
[IterativeImputer] Ending imputation round 26/100, elapsed time 0.23
[IterativeImputer] Ending imputation round 27/100, elapsed time 0.24
[IterativeImputer] Ending imputation round 28/100, elapsed time 0.24
[IterativeImputer] Ending imputation round 29/100, elapsed time 0.25
[IterativeImputer] Ending imputation round 30/100, elapsed time 0.26
[IterativeImputer] Ending imputation round 31/100, elapsed time 0.27
[IterativeImputer] Ending imputation round 32/100, elapsed time 0.28
[IterativeImputer] Ending imputation round 33/100, elapsed time 0.28
[IterativeImputer] Ending imputation round 34/100, elapsed time 0.29
[IterativeImputer] Ending imputation round 35/100, elapsed time 0.30
[IterativeImputer] Ending imputation round 36/100, elapsed time 0.31
[IterativeImputer] Ending imputation round 37/100, elapsed time 0.31
[IterativeImputer] Ending imputation round 38/100, elapsed time 0.32
[IterativeImputer] Ending imputation round 39/100, elapsed time 0.33
[IterativeImputer] Ending imputation round 40/100, elapsed time 0.34
[IterativeImputer] Ending imputation round 41/100, elapsed time 0.34
[IterativeImputer] Ending imputation round 42/100, elapsed time 0.35
[IterativeImputer] Ending imputation round 43/100, elapsed time 0.36
[IterativeImputer] Ending imputation round 44/100, elapsed time 0.37
[IterativeImputer] Ending imputation round 45/100, elapsed time 0.38
[IterativeImputer] Ending imputation round 46/100, elapsed time 0.38
[IterativeImputer] Ending imputation round 47/100, elapsed time 0.39
[IterativeImputer] Ending imputation round 48/100, elapsed time 0.40
[IterativeImputer] Ending imputation round 49/100, elapsed time 0.41
[IterativeImputer] Ending imputation round 50/100, elapsed time 0.42
[IterativeImputer] Ending imputation round 51/100, elapsed time 0.43
[IterativeImputer] Ending imputation round 52/100, elapsed time 0.43
[IterativeImputer] Ending imputation round 53/100, elapsed time 0.44
[IterativeImputer] Ending imputation round 54/100, elapsed time 0.45
[IterativeImputer] Ending imputation round 55/100, elapsed time 0.46
[IterativeImputer] Ending imputation round 56/100, elapsed time 0.47
[IterativeImputer] Ending imputation round 57/100, elapsed time 0.48
[IterativeImputer] Ending imputation round 58/100, elapsed time 0.49
[IterativeImputer] Ending imputation round 59/100, elapsed time 0.49
[IterativeImputer] Ending imputation round 60/100, elapsed time 0.50
[IterativeImputer] Ending imputation round 61/100, elapsed time 0.51
[IterativeImputer] Ending imputation round 62/100, elapsed time 0.52
[IterativeImputer] Ending imputation round 63/100, elapsed time 0.52
[IterativeImputer] Ending imputation round 64/100, elapsed time 0.53
[IterativeImputer] Ending imputation round 65/100, elapsed time 0.54
[IterativeImputer] Ending imputation round 66/100, elapsed time 0.55
[IterativeImputer] Ending imputation round 67/100, elapsed time 0.55
[IterativeImputer] Ending imputation round 68/100, elapsed time 0.56
[IterativeImputer] Ending imputation round 69/100, elapsed time 0.57
[IterativeImputer] Ending imputation round 70/100, elapsed time 0.58
[IterativeImputer] Ending imputation round 71/100, elapsed time 0.58
[IterativeImputer] Ending imputation round 72/100, elapsed time 0.59
[IterativeImputer] Ending imputation round 73/100, elapsed time 0.60
[IterativeImputer] Ending imputation round 74/100, elapsed time 0.61
[IterativeImputer] Ending imputation round 75/100, elapsed time 0.61
[IterativeImputer] Ending imputation round 76/100, elapsed time 0.62
[IterativeImputer] Ending imputation round 77/100, elapsed time 0.63
[IterativeImputer] Ending imputation round 78/100, elapsed time 0.64
[IterativeImputer] Ending imputation round 79/100, elapsed time 0.65
[IterativeImputer] Ending imputation round 80/100, elapsed time 0.66
[IterativeImputer] Ending imputation round 81/100, elapsed time 0.66
[IterativeImputer] Ending imputation round 82/100, elapsed time 0.67
[IterativeImputer] Ending imputation round 83/100, elapsed time 0.68
[IterativeImputer] Ending imputation round 84/100, elapsed time 0.69
[IterativeImputer] Ending imputation round 85/100, elapsed time 0.70
[IterativeImputer] Ending imputation round 86/100, elapsed time 0.70
[IterativeImputer] Ending imputation round 87/100, elapsed time 0.71
[IterativeImputer] Ending imputation round 88/100, elapsed time 0.72
[IterativeImputer] Ending imputation round 89/100, elapsed time 0.73
[IterativeImputer] Ending imputation round 90/100, elapsed time 0.73
[IterativeImputer] Ending imputation round 91/100, elapsed time 0.74
[IterativeImputer] Ending imputation round 92/100, elapsed time 0.75
[IterativeImputer] Ending imputation round 93/100, elapsed time 0.76
[IterativeImputer] Ending imputation round 94/100, elapsed time 0.76
[IterativeImputer] Ending imputation round 95/100, elapsed time 0.77
[IterativeImputer] Ending imputation round 96/100, elapsed time 0.78
[IterativeImputer] Ending imputation round 97/100, elapsed time 0.79
[IterativeImputer] Ending imputation round 98/100, elapsed time 0.79
[IterativeImputer] Ending imputation round 99/100, elapsed time 0.80
[IterativeImputer] Ending imputation round 100/100, elapsed time 0.81

imputed_countryXindicator_year = impute_df(countryXindicator_year, max_iter=100, verbose=2)

[IterativeImputer] Completing matrix with shape (241, 6)
[IterativeImputer] Ending imputation round 1/100, elapsed time 0.01
[IterativeImputer] Change: 1.6443802657682207, scaled tolerance: 0.015 
[IterativeImputer] Ending imputation round 2/100, elapsed time 0.02
[IterativeImputer] Change: 0.0830316620808409, scaled tolerance: 0.015 
[IterativeImputer] Ending imputation round 3/100, elapsed time 0.04
[IterativeImputer] Change: 0.0289166641679115, scaled tolerance: 0.015 
[IterativeImputer] Ending imputation round 4/100, elapsed time 0.05
[IterativeImputer] Change: 0.009636184792338298, scaled tolerance: 0.015 
[IterativeImputer] Early stopping criterion reached.
[IterativeImputer] Completing matrix with shape (241, 6)
[IterativeImputer] Ending imputation round 1/4, elapsed time 0.00
[IterativeImputer] Ending imputation round 2/4, elapsed time 0.00
[IterativeImputer] Ending imputation round 3/4, elapsed time 0.00
[IterativeImputer] Ending imputation round 4/4, elapsed time 0.01

Convert years float to int¶

imputed_countryXindicator_year = imputed_countryXindicator_year.round(0).astype(int)

Comparison correlation plot float-precentages-data¶

Before Imputation

corr_calc = countryXindicator_float.corr()
sns.heatmap(corr_calc, vmin=-1, vmax=1, center=0, xticklabels=False, yticklabels=False, cmap='mako')

<AxesSubplot:xlabel='Indicator Name', ylabel='Indicator Name'>

After imputation

corr_calc = imputed_countryXindicator_float.corr()
sns.heatmap(corr_calc, vmin=-1, vmax=1, center=0, xticklabels=False, yticklabels=False, cmap='mako')

<AxesSubplot:xlabel='Indicator Name', ylabel='Indicator Name'>

Comparison correlation plot yeardata¶

Before imputation

corr_calc = countryXindicator_year.corr()
sns.heatmap(corr_calc, vmin=-1, vmax=1, center=0, cmap='mako')

<AxesSubplot:xlabel='Indicator Name', ylabel='Indicator Name'>

After imputation

corr_calc = imputed_countryXindicator_year.corr()
sns.heatmap(corr_calc, vmin=-1, vmax=1, center=0, cmap='mako')

<AxesSubplot:xlabel='Indicator Name', ylabel='Indicator Name'>

Merge imputed dataframes¶

imputed_data = imputed_countryXindicator_year.merge(imputed_countryXindicator_float, how='inner', on="Country Code")
imputed_data

bigtable = imputed_data.merge(df_c, how='left', on="Country Code")
bigtable.columns

Index(['Country Code', 'Duration of compulsory education (years)',
       'Official entrance age to lower secondary education (years)',
       'Official entrance age to primary education (years)',
       'Theoretical duration of primary education (years)',
       'Theoretical duration of secondary education (years)',
       'Theoretical duration of upper secondary education (years)',
       'Adjusted net enrolment rate, primary, both sexes (%)',
       'Gross enrolment ratio, lower secondary, both sexes (%)',
       'Gross enrolment ratio, lower secondary, female (%)',
       'Gross enrolment ratio, lower secondary, male (%)',
       'Gross enrolment ratio, pre-primary, both sexes (%)',
       'Gross enrolment ratio, pre-primary, female (%)',
       'Gross enrolment ratio, pre-primary, male (%)',
       'Gross enrolment ratio, primary, both sexes (%)',
       'Gross enrolment ratio, primary, female (%)',
       'Gross enrolment ratio, primary, gender parity index (GPI)',
       'Gross enrolment ratio, primary, male (%)',
       'Gross enrolment ratio, secondary, both sexes (%)',
       'Gross enrolment ratio, secondary, female (%)',
       'Gross enrolment ratio, secondary, gender parity index (GPI)',
       'Gross enrolment ratio, secondary, male (%)',
       'Gross enrolment ratio, upper secondary, both sexes (%)',
       'Gross intake ratio to Grade 1 of primary education, both sexes (%)',
       'Gross intake ratio to Grade 1 of primary education, female (%)',
       'Gross intake ratio to Grade 1 of primary education, male (%)',
       'Internet users (per 100 people)',
       'Labor force, female (% of total labor force)',
       'Mortality rate, under-5 (per 1,000 live births)',
       'Net enrolment rate, primary, both sexes (%)',
       'Percentage of enrolment in pre-primary education in private institutions (%)',
       'Percentage of enrolment in primary education in private institutions (%)',
       'Percentage of enrolment in secondary education in private institutions (%)',
       'Percentage of female students enrolled in primary education who are over-age, female (%)',
       'Percentage of male students enrolled in primary education who are over-age, male (%)',
       'Percentage of repeaters in primary education, all grades, both sexes (%)',
       'Percentage of repeaters in primary education, all grades, female (%)',
       'Percentage of repeaters in primary education, all grades, male (%)',
       'Percentage of students enrolled in primary education who are over-age, both sexes (%)',
       'Percentage of students in pre-primary education who are female (%)',
       'Percentage of students in primary education who are female (%)',
       'Percentage of students in secondary education who are female (%)',
       'Percentage of students in secondary general education who are female (%)',
       'Population ages 0-14 (% of total)',
       'Population ages 15-64 (% of total)', 'Population growth (annual %)',
       'Population, female (% of total)', 'Population, male (% of total)',
       'Prevalence of HIV, total (% of population ages 15-49)',
       'Primary completion rate, both sexes (%)',
       'Primary completion rate, female (%)',
       'Primary completion rate, male (%)',
       'Unemployment, female (% of female labor force) (modeled ILO estimate)',
       'Unemployment, male (% of male labor force) (modeled ILO estimate)',
       'Unemployment, total (% of total labor force) (modeled ILO estimate)',
       'Short Name', 'Table Name', 'Long Name', '2-alpha code',
       'Currency Unit', 'Special Notes', 'Region', 'Income Group', 'WB-2 code',
       'National accounts base year', 'National accounts reference year',
       'SNA price valuation', 'Lending category', 'Other groups',
       'System of National Accounts', 'Alternative conversion factor',
       'PPP survey year', 'Balance of Payments Manual in use',
       'External debt Reporting status', 'System of trade',
       'Government Accounting concept', 'IMF data dissemination standard',
       'Latest population census', 'Latest household survey',
       'Source of most recent Income and expenditure data',
       'Vital registration complete', 'Latest agricultural census',
       'Latest industrial data', 'Latest trade data',
       'Latest water withdrawal data', 'Unnamed: 31'],
      dtype='object')

bigtable.set_index('Country Code', inplace=True)

bigtable

bigtable.to_csv("../data/unlabeled/preprocessed/edstats_preprocessed.csv")

	Country Code	Indicator Code	2015	2016	2017	2020
0	ARB	UIS.NERA.2	NaN	NaN	NaN	NaN
1	ARB	UIS.NERA.2.F	NaN	NaN	NaN	NaN
2	ARB	UIS.NERA.2.GPI	NaN	NaN	NaN	NaN
3	ARB	UIS.NERA.2.M	NaN	NaN	NaN	NaN
4	ARB	SE.PRM.TENR	NaN	NaN	NaN	NaN
...	...	...	...	...	...	...
886925	ZWE	UIS.LP.AG15T24.M	NaN	NaN	NaN	NaN
886926	ZWE	SE.ADT.1524.LT.ZS	NaN	NaN	NaN	NaN
886927	ZWE	SE.ADT.1524.LT.FE.ZS	NaN	NaN	NaN	NaN
886928	ZWE	SE.ADT.1524.LT.FM.ZS	NaN	NaN	NaN	NaN
886929	ZWE	SE.ADT.1524.LT.MA.ZS	NaN	NaN	NaN	NaN

	Country Code	Indicator Code	2020
0	ARB	UIS.NERA.2	NaN
1	ARB	UIS.NERA.2.F	NaN
2	ARB	UIS.NERA.2.GPI	NaN
3	ARB	UIS.NERA.2.M	NaN
4	ARB	SE.PRM.TENR	NaN
...	...	...	...
886925	ZWE	UIS.LP.AG15T24.M	NaN
886926	ZWE	SE.ADT.1524.LT.ZS	NaN
886927	ZWE	SE.ADT.1524.LT.FE.ZS	NaN
886928	ZWE	SE.ADT.1524.LT.FM.ZS	NaN
886929	ZWE	SE.ADT.1524.LT.MA.ZS	NaN

Indicator Name	Adjusted net enrolment rate, lower secondary, both sexes (%)	Adjusted net enrolment rate, lower secondary, female (%)	Adjusted net enrolment rate, lower secondary, gender parity index (GPI)	Adjusted net enrolment rate, lower secondary, male (%)	Adjusted net enrolment rate, primary, both sexes (%)	Adjusted net enrolment rate, primary, female (%)	Adjusted net enrolment rate, primary, gender parity index (GPI)	Adjusted net enrolment rate, primary, male (%)	Adjusted net enrolment rate, upper secondary, both sexes (%)	Adjusted net enrolment rate, upper secondary, female (%)	...	Under-age enrolment ratio in secondary education, female (%)	Under-age enrolment ratio in secondary education, male (%)	Unemployment, female (% of female labor force) (modeled ILO estimate)	Unemployment, male (% of male labor force) (modeled ILO estimate)	Unemployment, total (% of total labor force) (modeled ILO estimate)	Youth illiterate population, 15-24 years, % female	Youth literacy rate, population 15-24 years, both sexes (%)	Youth literacy rate, population 15-24 years, female (%)	Youth literacy rate, population 15-24 years, gender parity index (GPI)	Youth literacy rate, population 15-24 years, male (%)
Country Code
ABW	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
AFG	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	12.700000	7.7	8.600000	NaN	NaN	NaN	NaN	NaN
AGO	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	6.700000	5.7	6.200000	NaN	NaN	NaN	NaN	NaN
ALB	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	17.299999	17.0	17.100000	NaN	NaN	NaN	NaN	NaN
AND	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	47.80890	100.00000	100.00000	1.00000	100.00000
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
XKX	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
YEM	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	32.700001	11.1	16.700001	NaN	NaN	NaN	NaN	NaN
ZAF	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	27.700001	23.1	25.200001	34.34808	98.95578	99.22904	1.00552	98.68459
ZMB	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	8.000000	7.3	7.700000	NaN	NaN	NaN	NaN	NaN
ZWE	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	5.100000	5.1	5.100000	NaN	NaN	NaN	NaN	NaN

Indicator Name	Adjusted net enrolment rate, primary, both sexes (%)	Age population, age 0, female, UNESCO	Age population, age 0, total, UNESCO	Age population, age 01, female, UNESCO	Age population, age 01, total, UNESCO	Age population, age 02, female, UNESCO	Age population, age 02, total, UNESCO	Age population, age 03, female, UNESCO	Age population, age 03, total, UNESCO	Age population, age 04, female, UNESCO	...	Prevalence of HIV, total (% of population ages 15-49)	Primary completion rate, both sexes (%)	Primary completion rate, female (%)	Primary completion rate, male (%)	Theoretical duration of primary education (years)	Theoretical duration of secondary education (years)	Theoretical duration of upper secondary education (years)	Unemployment, female (% of female labor force) (modeled ILO estimate)	Unemployment, male (% of male labor force) (modeled ILO estimate)	Unemployment, total (% of total labor force) (modeled ILO estimate)
Country Code
ABW	NaN	542.0	1112.0	572.0	1170.0	600.0	1224.0	623.0	1269.0	643.0	...	NaN	NaN	NaN	NaN	6.0	5.0	3.0	NaN	NaN	NaN
AFG	NaN	682677.0	1403010.0	650389.0	1338500.0	620880.0	1279403.0	593957.0	1225345.0	569425.0	...	0.1	NaN	NaN	NaN	6.0	6.0	3.0	12.700000	7.7	8.600000
AGO	NaN	414919.0	832093.0	396537.0	793950.0	379705.0	759164.0	364295.0	727455.0	350180.0	...	1.9	NaN	NaN	NaN	6.0	6.0	3.0	6.700000	5.7	6.200000
ALB	NaN	23697.0	49105.0	24026.0	49709.0	24243.0	50104.0	24360.0	50315.0	24397.0	...	0.1	106.367561	104.699371	107.900124	5.0	7.0	3.0	17.299999	17.0	17.100000
AND	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	6.0	6.0	2.0	NaN	NaN	NaN
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
XKX	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
YEM	NaN	458859.0	936053.0	452601.0	923017.0	444988.0	907252.0	436223.0	889171.0	426514.0	...	0.1	NaN	NaN	NaN	6.0	6.0	3.0	32.700001	11.1	16.700001
ZAF	NaN	484535.0	980663.0	486901.0	985215.0	489516.0	990194.0	492299.0	995445.0	495165.0	...	18.9	NaN	NaN	NaN	7.0	5.0	3.0	27.700001	23.1	25.200001
ZMB	NaN	226616.0	457329.0	220950.0	445451.0	215890.0	434876.0	211367.0	425454.0	207311.0	...	12.6	NaN	NaN	NaN	7.0	5.0	3.0	8.000000	7.3	7.700000
ZWE	NaN	181422.0	364761.0	179761.0	361101.0	177982.0	357261.0	176130.0	353321.0	174240.0	...	13.9	NaN	NaN	NaN	7.0	6.0	4.0	5.100000	5.1	5.100000

Indicator Name	Age population, age 0, female, UNESCO	Age population, age 0, total, UNESCO	Age population, age 01, female, UNESCO	Age population, age 01, total, UNESCO	Age population, age 02, female, UNESCO	Age population, age 02, total, UNESCO	Age population, age 03, female, UNESCO	Age population, age 03, total, UNESCO	Age population, age 04, female, UNESCO	Age population, age 04, total, UNESCO	...	Population, ages 7-13, male	Population, ages 7-13, total	Population, ages 7-9, female	Population, ages 7-9, male	Population, ages 7-9, total	Population, female	Population, male	Theoretical duration of primary education (years)	Theoretical duration of secondary education (years)	Theoretical duration of upper secondary education (years)
Country Code
ABW	542.0	1112.0	572.0	1170.0	600.0	1224.0	623.0	1269.0	643.0	1310.0	...	5152.0	10124.0	2095.0	2169.0	4264.0	54743.0	49598.0	6.0	5.0	3.0
AFG	682677.0	1403010.0	650389.0	1338500.0	620880.0	1279403.0	593957.0	1225345.0	569425.0	1175957.0	...	3481981.0	6728457.0	1475542.0	1580498.0	3056040.0	16346869.0	17389625.0	6.0	6.0	3.0
AGO	414919.0	832093.0	396537.0	793950.0	379705.0	759164.0	364295.0	727455.0	350180.0	698548.0	...	1988558.0	3995534.0	913109.0	904577.0	1817686.0	14205741.0	13653564.0	6.0	6.0	3.0
ALB	23697.0	49105.0	24026.0	49709.0	24243.0	50104.0	24360.0	50315.0	24397.0	50378.0	...	179160.0	346732.0	72246.0	77107.0	149353.0	1426369.0	1454334.0	5.0	7.0	3.0
AND	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	6.0	6.0	2.0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
XKX	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
YEM	458859.0	936053.0	452601.0	923017.0	444988.0	907252.0	436223.0	889171.0	426514.0	869195.0	...	2614331.0	5137180.0	1147011.0	1189006.0	2336017.0	13315678.0	13600529.0	6.0	6.0	3.0
ZAF	484535.0	980663.0	486901.0	985215.0	489516.0	990194.0	492299.0	995445.0	495165.0	1000810.0	...	3615273.0	7175459.0	1516934.0	1543083.0	3060017.0	27999778.0	27012199.0	7.0	5.0	3.0
ZMB	226616.0	457329.0	220950.0	445451.0	215890.0	434876.0	211367.0	425454.0	207311.0	417041.0	...	1334582.0	2657396.0	583300.0	588617.0	1171917.0	8112243.0	7988344.0	7.0	5.0	3.0
ZWE	181422.0	364761.0	179761.0	361101.0	177982.0	357261.0	176130.0	353321.0	174240.0	349351.0	...	1156372.0	2310111.0	501606.0	502943.0	1004549.0	8099354.0	7678097.0	7.0	6.0	4.0

Indicator Name	Duration of compulsory education (years)	Official entrance age to lower secondary education (years)	Official entrance age to primary education (years)	Theoretical duration of primary education (years)	Theoretical duration of secondary education (years)	Theoretical duration of upper secondary education (years)
Country Code
ABW	13.0	12.0	6.0	6.0	5.0	3.0
AFG	9.0	13.0	7.0	6.0	6.0	3.0
AGO	6.0	12.0	6.0	6.0	6.0	3.0
ALB	9.0	11.0	6.0	5.0	7.0	3.0
AND	10.0	12.0	6.0	6.0	6.0	2.0
...	...	...	...	...	...	...
XKX	NaN	NaN	NaN	NaN	NaN	NaN
YEM	9.0	12.0	6.0	6.0	6.0	3.0
ZAF	9.0	14.0	7.0	7.0	5.0	3.0
ZMB	7.0	14.0	7.0	7.0	5.0	3.0
ZWE	7.0	13.0	6.0	7.0	6.0	4.0

Indicator Name	Duration of compulsory education (years)	Official entrance age to lower secondary education (years)	Official entrance age to primary education (years)	Theoretical duration of primary education (years)	Theoretical duration of secondary education (years)	Theoretical duration of upper secondary education (years)	Adjusted net enrolment rate, primary, both sexes (%)	Gross enrolment ratio, lower secondary, both sexes (%)	Gross enrolment ratio, lower secondary, female (%)	Gross enrolment ratio, lower secondary, male (%)	...	Population growth (annual %)	Population, female (% of total)	Population, male (% of total)	Prevalence of HIV, total (% of population ages 15-49)	Primary completion rate, both sexes (%)	Primary completion rate, female (%)	Primary completion rate, male (%)	Unemployment, female (% of female labor force) (modeled ILO estimate)	Unemployment, male (% of male labor force) (modeled ILO estimate)	Unemployment, total (% of total labor force) (modeled ILO estimate)
Country Code
ABW	13	12	6	6	5	3	96.636541	91.072857	90.689076	91.406452	...	0.524658	52.465521	47.534479	1.416423	91.125743	90.752535	91.425942	10.349564	8.746525	9.324957
AFG	9	13	7	6	6	3	87.416874	67.447617	49.580441	84.329559	...	2.943234	48.454558	51.545442	0.100000	93.312781	88.498878	97.920359	12.700000	7.700000	8.600000
AGO	6	12	6	6	6	3	87.446703	90.903513	90.527003	91.325392	...	3.428021	50.991010	49.008990	1.900000	92.486172	92.930030	92.143730	6.700000	5.700000	6.200000
ALB	9	11	6	5	7	3	99.516937	101.488373	99.889503	102.980438	...	-0.291206	49.514599	50.485401	0.100000	106.367561	104.699371	107.900124	17.299999	17.000000	17.100000
AND	10	12	6	6	6	2	94.592438	91.281081	91.067359	91.467822	...	-1.537836	49.937398	50.062602	-0.301099	90.510694	89.175083	91.671974	14.798726	11.023894	11.875731
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
XKX	10	12	6	6	6	3	95.413339	91.084565	90.721799	91.429833	...	-1.103886	49.937397	50.062603	-0.423033	91.464884	91.250304	91.619722	9.662467	9.205525	9.289344
YEM	9	12	6	6	6	3	86.150744	90.902881	90.439904	91.274501	...	2.520254	49.470856	50.529144	0.100000	92.609481	92.837224	92.326815	32.700001	11.100000	16.700001
ZAF	9	14	7	7	5	3	96.505376	91.062603	90.916364	91.240835	...	1.585325	50.897604	49.102396	18.900000	91.111444	91.824840	90.441719	27.700001	23.100000	25.200001
ZMB	7	14	7	7	5	3	93.181744	91.012954	90.829133	91.267250	...	3.024123	50.384766	49.615234	12.600000	91.406170	91.905970	90.976530	8.000000	7.300000	7.700000
ZWE	7	13	6	7	6	4	94.625610	91.047967	90.927321	91.251754	...	2.345643	51.334997	48.665003	13.900000	91.325104	91.913467	90.806284	5.100000	5.100000	5.100000