1 package org.unicode.cldr.tool; 2 3 import java.util.Set; 4 import org.unicode.cldr.util.CLDRFile; 5 import org.unicode.cldr.util.CLDRPaths; 6 import org.unicode.cldr.util.Counter; 7 import org.unicode.cldr.util.Factory; 8 import org.unicode.cldr.util.Pair; 9 import org.unicode.cldr.util.SupplementalDataInfo; 10 import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus; 11 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData; 12 13 public class GetLanguageData { 14 SupplementalDataInfo sdata = SupplementalDataInfo.getInstance(CLDRPaths.SUPPLEMENTAL_DIRECTORY); 15 Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*"); 16 CLDRFile english = cldrFactory.make("en", true); 17 Set<String> euCountries = sdata.getContained("EU"); 18 Counter<String> languageToGdp = new Counter<>(); 19 Counter<String> languageToPop = new Counter<>(); 20 main(String[] args)21 public static void main(String[] args) { 22 new GetLanguageData().run(); 23 } 24 run()25 private void run() { 26 findSuspectData(); 27 System.out.println("Code\tLang\tLpop\tApprox. Gdp"); 28 for (String language : sdata.getLanguages()) { 29 final long pop = languageToPop.getCount(language); 30 System.out.print(language + "\t" + english.getName(language)); 31 if (pop > 0) { 32 Pair<OfficialStatus, String> status = isOfficialLanguageOfEUCountry(language); 33 System.out.print( 34 "\t" 35 + pop // 36 + "\t" 37 + languageToGdp.getCount(language) // 38 + "\t" 39 + (status.getFirst().isOfficial() ? status.getFirst() : "") // 40 + "\t" 41 + status.getSecond() // 42 ); 43 } 44 System.out.println(); 45 } 46 } 47 findSuspectData()48 private void findSuspectData() { 49 Set<String> territories = sdata.getTerritoriesWithPopulationData(); 50 for (String territory : territories) { 51 double scale = 1.0; 52 final PopulationData populationDataForTerritory = 53 sdata.getPopulationDataForTerritory(territory); 54 final double gdp = populationDataForTerritory.getGdp(); 55 double territoryPop = populationDataForTerritory.getPopulation(); 56 double langPop = 0; 57 double officialLangPop = 0; 58 Set<String> languages = sdata.getLanguagesForTerritoryWithPopulationData(territory); 59 for (String language : languages) { 60 if (language.equals("tl")) continue; 61 PopulationData pop2 = 62 sdata.getLanguageAndTerritoryPopulationData(language, territory); 63 langPop += pop2.getPopulation(); 64 if (pop2.getOfficialStatus().isOfficial()) { 65 officialLangPop += pop2.getPopulation(); 66 } 67 } 68 final double missing = 0.75 * territoryPop - langPop; 69 if (missing > 0) { 70 System.out.println( 71 territory // 72 + "\t" 73 + english.getName("territory", territory) // 74 + "\t" 75 + territoryPop // 76 + "\t" 77 + langPop // 78 + "\t" 79 + gdp // 80 ); 81 scale = 1 + missing / officialLangPop; 82 // scale up the official so that 83 // official + non-official = 70% of total 84 langPop = territoryPop * 0.75; 85 System.out.println("\tScaling " + territory + "\t" + scale * 100 + "%"); 86 } 87 long langUnknown = (long) territoryPop; 88 for (String language : languages) { 89 if (language.equals("tl")) continue; 90 PopulationData pop2 = 91 sdata.getLanguageAndTerritoryPopulationData(language, territory); 92 double langPop2 = pop2.getPopulation(); 93 if (pop2.getOfficialStatus().isOfficial()) { 94 langPop2 *= scale; 95 } 96 languageToGdp.add(language, (long) (gdp * langPop2 / territoryPop)); 97 languageToPop.add(language, (long) (langPop2)); 98 langUnknown -= langPop2; 99 } 100 if (langUnknown > 0) { 101 languageToGdp.add("und", (long) (gdp * langUnknown / territoryPop)); 102 languageToPop.add("und", (langUnknown)); 103 } 104 } 105 } 106 isOfficialLanguageOfEUCountry(String language)107 private Pair<OfficialStatus, String> isOfficialLanguageOfEUCountry(String language) { 108 OfficialStatus bestStatus = OfficialStatus.unknown; 109 String eu = ""; 110 double bestEuPop = 0; 111 Set<String> territories = sdata.getTerritoriesForPopulationData(language); 112 for (String territory : territories) { 113 PopulationData pop = sdata.getLanguageAndTerritoryPopulationData(language, territory); 114 OfficialStatus status = pop.getOfficialStatus(); 115 if (bestStatus.compareTo(status) < 0) { 116 bestStatus = status; 117 } 118 if (status.isMajor() && euCountries.contains(territory)) { 119 if (pop.getLiteratePopulation() > bestEuPop) { 120 bestEuPop = pop.getLiteratePopulation(); 121 eu = territory; 122 } 123 } 124 } 125 return Pair.of(bestStatus, eu); 126 } 127 } 128