xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GetLanguageData.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.tool;
2 
3 import java.util.Set;
4 import org.unicode.cldr.util.CLDRFile;
5 import org.unicode.cldr.util.CLDRPaths;
6 import org.unicode.cldr.util.Counter;
7 import org.unicode.cldr.util.Factory;
8 import org.unicode.cldr.util.Pair;
9 import org.unicode.cldr.util.SupplementalDataInfo;
10 import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus;
11 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData;
12 
13 public class GetLanguageData {
14     SupplementalDataInfo sdata = SupplementalDataInfo.getInstance(CLDRPaths.SUPPLEMENTAL_DIRECTORY);
15     Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
16     CLDRFile english = cldrFactory.make("en", true);
17     Set<String> euCountries = sdata.getContained("EU");
18     Counter<String> languageToGdp = new Counter<>();
19     Counter<String> languageToPop = new Counter<>();
20 
main(String[] args)21     public static void main(String[] args) {
22         new GetLanguageData().run();
23     }
24 
run()25     private void run() {
26         findSuspectData();
27         System.out.println("Code\tLang\tLpop\tApprox. Gdp");
28         for (String language : sdata.getLanguages()) {
29             final long pop = languageToPop.getCount(language);
30             System.out.print(language + "\t" + english.getName(language));
31             if (pop > 0) {
32                 Pair<OfficialStatus, String> status = isOfficialLanguageOfEUCountry(language);
33                 System.out.print(
34                         "\t"
35                                 + pop //
36                                 + "\t"
37                                 + languageToGdp.getCount(language) //
38                                 + "\t"
39                                 + (status.getFirst().isOfficial() ? status.getFirst() : "") //
40                                 + "\t"
41                                 + status.getSecond() //
42                         );
43             }
44             System.out.println();
45         }
46     }
47 
findSuspectData()48     private void findSuspectData() {
49         Set<String> territories = sdata.getTerritoriesWithPopulationData();
50         for (String territory : territories) {
51             double scale = 1.0;
52             final PopulationData populationDataForTerritory =
53                     sdata.getPopulationDataForTerritory(territory);
54             final double gdp = populationDataForTerritory.getGdp();
55             double territoryPop = populationDataForTerritory.getPopulation();
56             double langPop = 0;
57             double officialLangPop = 0;
58             Set<String> languages = sdata.getLanguagesForTerritoryWithPopulationData(territory);
59             for (String language : languages) {
60                 if (language.equals("tl")) continue;
61                 PopulationData pop2 =
62                         sdata.getLanguageAndTerritoryPopulationData(language, territory);
63                 langPop += pop2.getPopulation();
64                 if (pop2.getOfficialStatus().isOfficial()) {
65                     officialLangPop += pop2.getPopulation();
66                 }
67             }
68             final double missing = 0.75 * territoryPop - langPop;
69             if (missing > 0) {
70                 System.out.println(
71                         territory //
72                                 + "\t"
73                                 + english.getName("territory", territory) //
74                                 + "\t"
75                                 + territoryPop //
76                                 + "\t"
77                                 + langPop //
78                                 + "\t"
79                                 + gdp //
80                         );
81                 scale = 1 + missing / officialLangPop;
82                 // scale up the official so that
83                 // official + non-official = 70% of total
84                 langPop = territoryPop * 0.75;
85                 System.out.println("\tScaling " + territory + "\t" + scale * 100 + "%");
86             }
87             long langUnknown = (long) territoryPop;
88             for (String language : languages) {
89                 if (language.equals("tl")) continue;
90                 PopulationData pop2 =
91                         sdata.getLanguageAndTerritoryPopulationData(language, territory);
92                 double langPop2 = pop2.getPopulation();
93                 if (pop2.getOfficialStatus().isOfficial()) {
94                     langPop2 *= scale;
95                 }
96                 languageToGdp.add(language, (long) (gdp * langPop2 / territoryPop));
97                 languageToPop.add(language, (long) (langPop2));
98                 langUnknown -= langPop2;
99             }
100             if (langUnknown > 0) {
101                 languageToGdp.add("und", (long) (gdp * langUnknown / territoryPop));
102                 languageToPop.add("und", (langUnknown));
103             }
104         }
105     }
106 
isOfficialLanguageOfEUCountry(String language)107     private Pair<OfficialStatus, String> isOfficialLanguageOfEUCountry(String language) {
108         OfficialStatus bestStatus = OfficialStatus.unknown;
109         String eu = "";
110         double bestEuPop = 0;
111         Set<String> territories = sdata.getTerritoriesForPopulationData(language);
112         for (String territory : territories) {
113             PopulationData pop = sdata.getLanguageAndTerritoryPopulationData(language, territory);
114             OfficialStatus status = pop.getOfficialStatus();
115             if (bestStatus.compareTo(status) < 0) {
116                 bestStatus = status;
117             }
118             if (status.isMajor() && euCountries.contains(territory)) {
119                 if (pop.getLiteratePopulation() > bestEuPop) {
120                     bestEuPop = pop.getLiteratePopulation();
121                     eu = territory;
122                 }
123             }
124         }
125         return Pair.of(bestStatus, eu);
126     }
127 }
128