From c641f9fb405298ad06bcb71f3d7b0e9cc82b0600 Mon Sep 17 00:00:00 2001
From: Ajai Tirumali <ajaits@google.com>
Date: Tue, 30 Dec 2025 19:53:11 +0530
Subject: [PATCH 01/12] Support 0 eval results

---
 tools/statvar_importer/property_value_mapper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/statvar_importer/property_value_mapper.py b/tools/statvar_importer/property_value_mapper.py
index 76d5209216..8c60f481a3 100644
--- a/tools/statvar_importer/property_value_mapper.py
+++ b/tools/statvar_importer/property_value_mapper.py
@@ -347,7 +347,7 @@ def _process_eval(self, pvs: dict, data_key: str) -> bool:
             self._log_every_n)
         if not eval_prop:
             eval_prop = data_key
-        if eval_data and eval_data != eval_str:
+        if eval_data is not None and eval_data != eval_str:
             pvs[eval_prop] = eval_data
             self._counters.add_counter('processed-eval', 1, eval_str)
             pvs.pop(eval_key)

From f30e89550743d537d0aed8f59e3031cd2452cf19 Mon Sep 17 00:00:00 2001
From: Ajai Tirumali <ajaits@google.com>
Date: Wed, 25 Mar 2026 16:32:16 +0530
Subject: [PATCH 02/12] Use environment variable for DC API root

---
 util/dc_api_wrapper.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/util/dc_api_wrapper.py b/util/dc_api_wrapper.py
index f1a1fee247..98f7209d0e 100644
--- a/util/dc_api_wrapper.py
+++ b/util/dc_api_wrapper.py
@@ -15,7 +15,7 @@
 
 It uses the DataCommonsClient library module for DC APIs and adds support for
 batched requests, retries and HTTP caching.
-DC API requires an environment variable set for DC_API_KEY.
+DC API requires an environment variable set for DC_API_KEY and DC_API_ROOT.
 Please refer to https://docs.datacommons.org/api/python/v2 for more details.
 """
 
@@ -265,7 +265,7 @@ def get_datacommons_client(config: dict = None) -> DataCommonsClient:
     """Returns a DataCommonsClient object initialized using config."""
     config = _validate_v2_config(config)
     api_key = get_dc_api_key(config)
-    dc_instance = config.get('dc_api_root')
+    dc_instance = config.get('dc_api_root', os.environ.get('DC_API_ROOT'))
     url = None
     # Check if API root is a host or url endpoint.
     if dc_instance:

From 2a0e93d8f73aeef6ff2b9b48bf9b5b819ed99182 Mon Sep 17 00:00:00 2001
From: Ajai Tirumali <ajaits@google.com>
Date: Wed, 25 Mar 2026 22:17:08 +0530
Subject: [PATCH 03/12] cleanup dc_api_root configs

---
 scripts/earthengine/utils.py                                 | 2 --
 scripts/earthengine/utils_test.py                            | 1 -
 .../common/datacommons_api_wrappers/datacommons_wrappers.py  | 3 ---
 .../datacommons_api_wrappers/datacommons_wrappers_test.py    | 2 --
 .../india_rbi_state_statistics/environment_sdg_metadata.csv  | 1 -
 .../india_rbi_state_statistics/infrastructure_metadata.csv   | 1 -
 .../india_rbi_state_statistics/rbi_metadata.csv              | 5 -----
 .../denmark_demographics/denmark_demographics_metadata.csv   | 1 -
 .../fema/flood_insurance_claims/us_flood_nfip_config.py      | 3 ---
 .../state_domestic_product_metadata.csv                      | 1 -
 .../statistics_poland/StatisticsPoland_metadata.csv          | 3 ---
 .../ap_ib_gt_enrollment/config/common_metadata.csv           | 1 -
 .../state/config/SATorACT_Participation_metadata.csv         | 1 -
 tools/statvar_importer/config_flags.py                       | 2 +-
 util/dc_api_wrapper.py                                       | 3 ++-
 15 files changed, 3 insertions(+), 27 deletions(-)

diff --git a/scripts/earthengine/utils.py b/scripts/earthengine/utils.py
index 44dbd71535..7e59202eaa 100644
--- a/scripts/earthengine/utils.py
+++ b/scripts/earthengine/utils.py
@@ -46,7 +46,6 @@
 # Constants
 _MAX_LATITUDE = 90.0
 _MAX_LONGITUDE = 180.0
-_DC_API_ROOT = 'https://api.datacommons.org'
 
 # Utilities for dicts.
 
@@ -372,7 +371,6 @@ def place_id_to_lat_lng(placeid: str,
             {
                 'dc_api_version': 'V2',
                 'dc_api_use_cache': True,
-                'dc_api_root': _DC_API_ROOT,
             },
         )
         node_props = resp.get(placeid) if resp else None
diff --git a/scripts/earthengine/utils_test.py b/scripts/earthengine/utils_test.py
index bfdd347661..f0f8e567cd 100644
--- a/scripts/earthengine/utils_test.py
+++ b/scripts/earthengine/utils_test.py
@@ -394,5 +394,4 @@ def test_place_id_to_lat_lng_dc_api(self):
                 [placeid], ['latitude', 'longitude'], {
                     'dc_api_version': 'V2',
                     'dc_api_use_cache': True,
-                    'dc_api_root': utils._DC_API_ROOT,
                 })
diff --git a/scripts/us_census/acs5yr/subject_tables/common/datacommons_api_wrappers/datacommons_wrappers.py b/scripts/us_census/acs5yr/subject_tables/common/datacommons_api_wrappers/datacommons_wrappers.py
index 39c93bc599..70e0936ee5 100644
--- a/scripts/us_census/acs5yr/subject_tables/common/datacommons_api_wrappers/datacommons_wrappers.py
+++ b/scripts/us_census/acs5yr/subject_tables/common/datacommons_api_wrappers/datacommons_wrappers.py
@@ -64,9 +64,6 @@ def dc_check_existence(dcid_list: list,
     wrapper_config = {
         'dc_api_batch_size':
             max_items,
-        'dc_api_root':
-            'https://autopush.api.datacommons.org'
-            if use_autopush else 'https://api.datacommons.org'
     }
     return dc_api_is_defined_dcid(dcid_list, wrapper_config)
 
diff --git a/scripts/us_census/acs5yr/subject_tables/common/datacommons_api_wrappers/datacommons_wrappers_test.py b/scripts/us_census/acs5yr/subject_tables/common/datacommons_api_wrappers/datacommons_wrappers_test.py
index 5eb9d2a497..e0374c7010 100644
--- a/scripts/us_census/acs5yr/subject_tables/common/datacommons_api_wrappers/datacommons_wrappers_test.py
+++ b/scripts/us_census/acs5yr/subject_tables/common/datacommons_api_wrappers/datacommons_wrappers_test.py
@@ -37,14 +37,12 @@ def test_dc_check_existence_mock(self, mock_is_defined):
         mock_is_defined.assert_called_with(
             ['node1'], {
                 'dc_api_batch_size': 450,
-                'dc_api_root': 'https://autopush.api.datacommons.org'
             })
 
         # Test 2: use_autopush=False
         dc_check_existence(['node2'], use_autopush=False, max_items=10)
         mock_is_defined.assert_called_with(['node2'], {
             'dc_api_batch_size': 10,
-            'dc_api_root': 'https://api.datacommons.org'
         })
 
     @mock.patch('datacommons_wrappers.request_post_json')
diff --git a/statvar_imports/database_on_indian_economy/india_rbi_state_statistics/environment_sdg_metadata.csv b/statvar_imports/database_on_indian_economy/india_rbi_state_statistics/environment_sdg_metadata.csv
index 782aa4c330..f11ac0b3c4 100644
--- a/statvar_imports/database_on_indian_economy/india_rbi_state_statistics/environment_sdg_metadata.csv
+++ b/statvar_imports/database_on_indian_economy/india_rbi_state_statistics/environment_sdg_metadata.csv
@@ -2,4 +2,3 @@ parameter,value
 header_rows,3
 output_columns,"observationAbout,observationDate,variableMeasured,value,unit,observationPeriod"
 mapped_rows,3
-dc_api_root,https://api.datacommons.org
diff --git a/statvar_imports/database_on_indian_economy/india_rbi_state_statistics/infrastructure_metadata.csv b/statvar_imports/database_on_indian_economy/india_rbi_state_statistics/infrastructure_metadata.csv
index 475c900919..f5c45d8c4e 100644
--- a/statvar_imports/database_on_indian_economy/india_rbi_state_statistics/infrastructure_metadata.csv
+++ b/statvar_imports/database_on_indian_economy/india_rbi_state_statistics/infrastructure_metadata.csv
@@ -2,4 +2,3 @@ parameter,value
 header_rows,5
 output_columns,"observationAbout,observationDate,variableMeasured,value,unit,observationPeriod"
 mapped_rows,5
-dc_api_root,https://api.datacommons.org
diff --git a/statvar_imports/database_on_indian_economy/india_rbi_state_statistics/rbi_metadata.csv b/statvar_imports/database_on_indian_economy/india_rbi_state_statistics/rbi_metadata.csv
index ad0d50f768..c2042f4fd4 100644
--- a/statvar_imports/database_on_indian_economy/india_rbi_state_statistics/rbi_metadata.csv
+++ b/statvar_imports/database_on_indian_economy/india_rbi_state_statistics/rbi_metadata.csv
@@ -2,8 +2,3 @@ parameter,value
 output_columns,"observationAbout,observationDate,variableMeasured,value,unit,observationPeriod"
 header_rows,4
 mapped_rows,4
-dc_api_root,https://api.datacommons.org
-
-
-
-
diff --git a/statvar_imports/denmark_demographics/denmark_demographics_metadata.csv b/statvar_imports/denmark_demographics/denmark_demographics_metadata.csv
index 41f8f31e37..95d252a541 100644
--- a/statvar_imports/denmark_demographics/denmark_demographics_metadata.csv
+++ b/statvar_imports/denmark_demographics/denmark_demographics_metadata.csv
@@ -1,3 +1,2 @@
 parameter,value
 output_columns,"observationDate,value,observationAbout,variableMeasured"
-dc_api_root,https://api.datacommons.org
diff --git a/statvar_imports/fema/flood_insurance_claims/us_flood_nfip_config.py b/statvar_imports/fema/flood_insurance_claims/us_flood_nfip_config.py
index 90e53db883..082ce8b7e3 100644
--- a/statvar_imports/fema/flood_insurance_claims/us_flood_nfip_config.py
+++ b/statvar_imports/fema/flood_insurance_claims/us_flood_nfip_config.py
@@ -68,7 +68,4 @@
         5,
     'dc_api_use_cache':
         True,
-    #'dc_api_root': 'http://autopush.api.datacommons.org',
-    'dc_api_root':
-        'http://api.datacommons.org',
 }
diff --git a/statvar_imports/india_rbistatedomesticproduct/state_domestic_product_metadata.csv b/statvar_imports/india_rbistatedomesticproduct/state_domestic_product_metadata.csv
index ee630bdff4..0c90bd2702 100644
--- a/statvar_imports/india_rbistatedomesticproduct/state_domestic_product_metadata.csv
+++ b/statvar_imports/india_rbistatedomesticproduct/state_domestic_product_metadata.csv
@@ -11,4 +11,3 @@ comments,
 output_columns,"observationAbout,observationDate,variableMeasured,value,unit,measurementMethod,observationPeriod"
 #header_rows,6
 #mapped_rows,5
-dc_api_root,https://api.datacommons.org
diff --git a/statvar_imports/statistics_poland/StatisticsPoland_metadata.csv b/statvar_imports/statistics_poland/StatisticsPoland_metadata.csv
index b909a13a08..a3a30ec1c2 100644
--- a/statvar_imports/statistics_poland/StatisticsPoland_metadata.csv
+++ b/statvar_imports/statistics_poland/StatisticsPoland_metadata.csv
@@ -9,6 +9,3 @@ places_within,country/POL
 #skip_rows,1
 header_rows,5
 mapped_columns,2
-dc_api_root,https://api.datacommons.org
-
-
diff --git a/statvar_imports/us_urban_school/ap_ib_gt_enrollment/config/common_metadata.csv b/statvar_imports/us_urban_school/ap_ib_gt_enrollment/config/common_metadata.csv
index 41a321a836..2c8f80a15c 100644
--- a/statvar_imports/us_urban_school/ap_ib_gt_enrollment/config/common_metadata.csv
+++ b/statvar_imports/us_urban_school/ap_ib_gt_enrollment/config/common_metadata.csv
@@ -3,4 +3,3 @@ mapped_rows,1
 output_columns,"observationDate,observationAbout,variableMeasured,value"
 #input_rows,10
 mapped_columns,2
-dc_api_root,https://api.datacommons.org
diff --git a/statvar_imports/us_urban_school/sat_act_participation/state/config/SATorACT_Participation_metadata.csv b/statvar_imports/us_urban_school/sat_act_participation/state/config/SATorACT_Participation_metadata.csv
index 4909fa4a53..75997951a4 100644
--- a/statvar_imports/us_urban_school/sat_act_participation/state/config/SATorACT_Participation_metadata.csv
+++ b/statvar_imports/us_urban_school/sat_act_participation/state/config/SATorACT_Participation_metadata.csv
@@ -1,3 +1,2 @@
 parameter,value
 output_columns,"observationAbout,observationDate,value,variableMeasured,unit,scalingFactor"
-dc_api_root,https://api.datacommons.org
diff --git a/tools/statvar_importer/config_flags.py b/tools/statvar_importer/config_flags.py
index d5214a3510..94a162f33c 100644
--- a/tools/statvar_importer/config_flags.py
+++ b/tools/statvar_importer/config_flags.py
@@ -370,7 +370,7 @@ def get_default_config() -> dict:
             True,
         # Settings for DC API.
         'dc_api_root':
-            'http://api.datacommons.org',
+            os.environ.get('DC_API_ROOT', 'http://api.datacommons.org'),
         'dc_api_use_cache':
             False,
         'dc_api_batch_size':
diff --git a/util/dc_api_wrapper.py b/util/dc_api_wrapper.py
index 98f7209d0e..682d4aeeaf 100644
--- a/util/dc_api_wrapper.py
+++ b/util/dc_api_wrapper.py
@@ -520,7 +520,8 @@ def dc_api_resolve_latlng(lat_lngs: list,
     dictionary containing the resolved place information.
   """
     config = _validate_v2_config(config)
-    api_root = config.get('dc_api_root', _DEFAULT_API_ROOT)
+    api_root = config.get('dc_api_root',
+                          os.environ.get('DC_API_ROOT', _DEFAULT_API_ROOT))
     v1_data = {}
     v1_data['coordinates'] = lat_lngs
     num_ids = len(lat_lngs)

From dfad7506200cbf30ee076a9e258e975460c965a2 Mon Sep 17 00:00:00 2001
From: Ajai Tirumali <ajaits@google.com>
Date: Wed, 25 Mar 2026 23:12:48 +0530
Subject: [PATCH 04/12] lint fix

---
 scripts/earthengine/utils.py                  |  12 +--
 scripts/earthengine/utils_test.py             |  10 +-
 .../datacommons_wrappers.py                   |   3 +-
 .../datacommons_wrappers_test.py              |   7 +-
 tools/statvar_importer/config_flags.py        |   2 +-
 util/dc_api_wrapper.py                        | 100 +++++++++++-------
 6 files changed, 77 insertions(+), 57 deletions(-)

diff --git a/scripts/earthengine/utils.py b/scripts/earthengine/utils.py
index 7e59202eaa..aeb94045ac 100644
--- a/scripts/earthengine/utils.py
+++ b/scripts/earthengine/utils.py
@@ -19,11 +19,11 @@
 from datetime import datetime
 import glob
 import os
+from pathlib import Path
 import pickle
 import re
 import sys
 import tempfile
-from pathlib import Path
 from typing import Union
 
 from absl import logging
@@ -305,8 +305,8 @@ def grid_get_neighbor_ids(grid_id: str) -> list:
             if lat_offset != 0 or lng_offset != 0:
                 neighbour_lat = lat + lat_offset * deg
                 neighbour_lng = lng + lng_offset * deg
-                if abs(neighbour_lat) < _MAX_LATITUDE and abs(
-                        neighbour_lng) < _MAX_LONGITUDE:
+                if (abs(neighbour_lat) < _MAX_LATITUDE and
+                        abs(neighbour_lng) < _MAX_LONGITUDE):
                     neighbours.append(
                         grid_id_from_lat_lng(
                             deg,
@@ -433,7 +433,7 @@ def add_namespace(dcid: str, prefix: str = 'dcid:') -> str:
 
 
 def str_get_numeric_value(
-        value: Union[str, list, int, float]) -> Union[int, float, None]:
+    value: Union[str, list, int, float],) -> Union[int, float, None]:
     """Returns the numeric value from input string or None."""
     if isinstance(value, list):
         value = value[0]
@@ -528,7 +528,7 @@ def date_advance_by_period(date_str: str,
     if not date_str:
         return ''
     dt = datetime.strptime(date_str, date_format)
-    (delta, unit) = date_parse_time_period(time_period)
+    delta, unit = date_parse_time_period(time_period)
     if not delta or not unit:
         logging.error(
             f'Unable to parse time period: {time_period} for date: {date_str}')
@@ -545,7 +545,7 @@ def date_format_by_time_period(date_str: str, time_period: str) -> str:
   """
     if not time_period:
         return date_str
-    (delta, unit) = date_parse_time_period(time_period)
+    delta, unit = date_parse_time_period(time_period)
     date_parts = date_str.split('-')
     if unit == 'years':
         return date_parts[0]
diff --git a/scripts/earthengine/utils_test.py b/scripts/earthengine/utils_test.py
index f0f8e567cd..e93be53fea 100644
--- a/scripts/earthengine/utils_test.py
+++ b/scripts/earthengine/utils_test.py
@@ -390,8 +390,8 @@ def test_place_id_to_lat_lng_dc_api(self):
             lat, lng = utils.place_id_to_lat_lng(placeid, dc_api_lookup=True)
             self.assertAlmostEqual(37.221614, lat)
             self.assertAlmostEqual(-121.68954, lng)
-            mock_get.assert_called_once_with(
-                [placeid], ['latitude', 'longitude'], {
-                    'dc_api_version': 'V2',
-                    'dc_api_use_cache': True,
-                })
+            mock_get.assert_called_once_with([placeid],
+                                             ['latitude', 'longitude'], {
+                                                 'dc_api_version': 'V2',
+                                                 'dc_api_use_cache': True,
+                                             })
diff --git a/scripts/us_census/acs5yr/subject_tables/common/datacommons_api_wrappers/datacommons_wrappers.py b/scripts/us_census/acs5yr/subject_tables/common/datacommons_api_wrappers/datacommons_wrappers.py
index 70e0936ee5..eb0e487f2a 100644
--- a/scripts/us_census/acs5yr/subject_tables/common/datacommons_api_wrappers/datacommons_wrappers.py
+++ b/scripts/us_census/acs5yr/subject_tables/common/datacommons_api_wrappers/datacommons_wrappers.py
@@ -62,8 +62,7 @@ def dc_check_existence(dcid_list: list,
         Dict object with dcids as key values and boolean values signifying existence as values.
     """
     wrapper_config = {
-        'dc_api_batch_size':
-            max_items,
+        'dc_api_batch_size': max_items,
     }
     return dc_api_is_defined_dcid(dcid_list, wrapper_config)
 
diff --git a/scripts/us_census/acs5yr/subject_tables/common/datacommons_api_wrappers/datacommons_wrappers_test.py b/scripts/us_census/acs5yr/subject_tables/common/datacommons_api_wrappers/datacommons_wrappers_test.py
index e0374c7010..e605afb5a9 100644
--- a/scripts/us_census/acs5yr/subject_tables/common/datacommons_api_wrappers/datacommons_wrappers_test.py
+++ b/scripts/us_census/acs5yr/subject_tables/common/datacommons_api_wrappers/datacommons_wrappers_test.py
@@ -34,10 +34,9 @@ def test_dc_check_existence_mock(self, mock_is_defined):
         # Test 1: Default (use_autopush=True by default in function signature)
         mock_is_defined.return_value = {'node1': True}
         dc_check_existence(['node1'])
-        mock_is_defined.assert_called_with(
-            ['node1'], {
-                'dc_api_batch_size': 450,
-            })
+        mock_is_defined.assert_called_with(['node1'], {
+            'dc_api_batch_size': 450,
+        })
 
         # Test 2: use_autopush=False
         dc_check_existence(['node2'], use_autopush=False, max_items=10)
diff --git a/tools/statvar_importer/config_flags.py b/tools/statvar_importer/config_flags.py
index 94a162f33c..a7449ca032 100644
--- a/tools/statvar_importer/config_flags.py
+++ b/tools/statvar_importer/config_flags.py
@@ -370,7 +370,7 @@ def get_default_config() -> dict:
             True,
         # Settings for DC API.
         'dc_api_root':
-            os.environ.get('DC_API_ROOT', 'http://api.datacommons.org'),
+            os.environ.get('DC_API_ROOT', 'https://api.datacommons.org'),
         'dc_api_use_cache':
             False,
         'dc_api_batch_size':
diff --git a/util/dc_api_wrapper.py b/util/dc_api_wrapper.py
index 682d4aeeaf..1265452f8c 100644
--- a/util/dc_api_wrapper.py
+++ b/util/dc_api_wrapper.py
@@ -21,16 +21,21 @@
 
 import os
 import sys
-import urllib
-import requests
 from typing import Union
+import urllib
 
 from absl import logging
 from datacommons_client.client import DataCommonsClient
 from datacommons_client.utils.error_handling import APIError, DCConnectionError, DCStatusError
+import requests
 import requests_cache
-from tenacity import (RetryCallState, Retrying, retry_if_exception,
-                      stop_after_attempt, wait_fixed)
+from tenacity import (
+    RetryCallState,
+    Retrying,
+    retry_if_exception,
+    stop_after_attempt,
+    wait_fixed,
+)
 
 _SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(_SCRIPT_DIR)
@@ -64,8 +69,14 @@ def _get_exception_status_code(exception):
 
 
 def _should_retry_exception(exception: Exception) -> bool:
-    if isinstance(exception, (DCConnectionError, requests.exceptions.Timeout,
-                              requests.exceptions.ChunkedEncodingError)):
+    if isinstance(
+            exception,
+        (
+            DCConnectionError,
+            requests.exceptions.Timeout,
+            requests.exceptions.ChunkedEncodingError,
+        ),
+    ):
         return True
     if isinstance(exception, (urllib.error.HTTPError, DCStatusError, APIError)):
         status_code = _get_exception_status_code(exception)
@@ -105,6 +116,7 @@ def dc_api_wrapper(
     retries: Maximum number of attempts (including the first attempt).
     retry_sec: Interval in seconds between retries for which caller is blocked.
     use_cache: If True, uses request cache for faster response.
+
   Returns:
     The response from the DataCommons API call.
   """
@@ -147,8 +159,9 @@ def dc_api_wrapper(
             logging.error(f'Got exception for api: {function}, {e}')
             return None
         except Exception as e:
-            e.add_note(f'DC API call failed for {function} with max attempts '
-                       f'{max_attempts}.')
+            e.add_note(
+                f'DC API call failed for {function} with max attempts {max_attempts}.'
+            )
             raise
 
 
@@ -252,11 +265,13 @@ def get_dc_api_key(config: dict = None) -> str:
     api_key = config.get('dc_api_key', os.environ.get('DC_API_KEY'))
     if not api_key:
         logging.log_first_n(
-            logging.WARNING, f'Using default DC API key with limited quota. '
-            'Please set an API key in the environment variable: DC_API_KEY.'
-            'Refer https://docs.datacommons.org/api/python/v2/#authentication '
-            'for more details.',
-            n=1)
+            logging.WARNING,
+            f'Using default DC API key with limited quota. '
+            f'Please set an API key in the environment variable: DC_API_KEY.'
+            f'Refer https://docs.datacommons.org/api/python/v2/#authentication '
+            f'for more details.',
+            n=1,
+        )
         api_key = _DEFAULT_DC_API_KEY
     return api_key
 
@@ -286,6 +301,7 @@ def get_datacommons_client(config: dict = None) -> DataCommonsClient:
 
 def dc_api_is_defined_dcid(dcids: list, config: dict = {}) -> dict:
     """Returns a dictionary with dcids mapped to True/False based on whether
+
   the dcid is defined in the API and has a 'typeOf' property.
      Uses the property_value() DC API to lookup 'typeOf' for each dcid.
      dcids not defined in KG get a value of False.
@@ -301,11 +317,13 @@ def dc_api_is_defined_dcid(dcids: list, config: dict = {}) -> dict:
     # Set parameters for node API.
     client = get_datacommons_client(config)
     api_function = client.node.fetch_property_values
-    api_result = dc_api_batched_wrapper(function=api_function,
-                                        dcids=dcids,
-                                        args={'properties': 'typeOf'},
-                                        dcid_arg_kw='node_dcids',
-                                        config=config)
+    api_result = dc_api_batched_wrapper(
+        function=api_function,
+        dcids=dcids,
+        args={'properties': 'typeOf'},
+        dcid_arg_kw='node_dcids',
+        config=config,
+    )
     response = {}
     for dcid in dcids:
         dcid_stripped = _strip_namespace(dcid)
@@ -348,11 +366,13 @@ def _dc_api_get_node_property_v2(dcids: list,
     api_function = client.node.fetch_property_values
     args = {'properties': prop}
     dcid_arg_kw = 'node_dcids'
-    api_result = dc_api_batched_wrapper(function=api_function,
-                                        dcids=dcids,
-                                        args=args,
-                                        dcid_arg_kw=dcid_arg_kw,
-                                        config=config)
+    api_result = dc_api_batched_wrapper(
+        function=api_function,
+        dcids=dcids,
+        args=args,
+        dcid_arg_kw=dcid_arg_kw,
+        config=config,
+    )
     response = {}
     for dcid in dcids:
         dcid_stripped = _strip_namespace(dcid)
@@ -398,11 +418,13 @@ def dc_api_get_node_property_values(dcids: list, config: dict = {}) -> dict:
     api_function = client.node.fetch
     args = {'expression': '->*'}
     dcid_arg_kw = 'node_dcids'
-    api_result = dc_api_batched_wrapper(function=api_function,
-                                        dcids=dcids,
-                                        args=args,
-                                        dcid_arg_kw=dcid_arg_kw,
-                                        config=config)
+    api_result = dc_api_batched_wrapper(
+        function=api_function,
+        dcids=dcids,
+        args=args,
+        dcid_arg_kw=dcid_arg_kw,
+        config=config,
+    )
     response = {}
     for dcid, arcs in api_result.items():
         pvs = {}
@@ -446,11 +468,13 @@ def dc_api_resolve_placeid(dcids: list,
     api_function = client.resolve.fetch
     args = {'expression': f'<-{in_prop}->dcid'}
     dcid_arg_kw = 'node_ids'
-    api_result = dc_api_batched_wrapper(function=api_function,
-                                        dcids=dcids,
-                                        args=args,
-                                        dcid_arg_kw=dcid_arg_kw,
-                                        config=config)
+    api_result = dc_api_batched_wrapper(
+        function=api_function,
+        dcids=dcids,
+        args=args,
+        dcid_arg_kw=dcid_arg_kw,
+        config=config,
+    )
     results = {}
     if api_result:
         for node in api_result.get('entities', []):
@@ -478,7 +502,7 @@ def dc_api_resolve_latlng(lat_lngs: list,
     }
 
     if return_v1_response is True, a v1 response of this form is returned:
-    
+
     {
       "placeCoordinates": [
           {
@@ -552,8 +576,7 @@ def dc_api_resolve_latlng(lat_lngs: list,
 
 
 def _convert_v2_to_v1_coordinate_response(v2_response: dict) -> dict:
-    """Converts a v2 coordinate resolution response to a v1 response.
-    """
+    """Converts a v2 coordinate resolution response to a v1 response."""
     v1_response = {'placeCoordinates': []}
     for entity in v2_response.get('entities', []):
         node = entity.get('node', '')
@@ -573,15 +596,14 @@ def _convert_v2_to_v1_coordinate_response(v2_response: dict) -> dict:
                 candidate.get('dcid')
                 for candidate in entity.get('candidates', [])
             ],
-            'places': entity.get('candidates', [])
+            'places': entity.get('candidates', []),
         }
         v1_response['placeCoordinates'].append(place_coordinate)
     return v1_response
 
 
 def _convert_v1_to_v2_coordinate_request(v1_request: dict) -> dict:
-    """Converts a v1 coordinate resolution request to a v2 request.
-    """
+    """Converts a v1 coordinate resolution request to a v2 request."""
     v2_request = {'nodes': [], 'property': '<-geoCoordinate->dcid'}
     for coordinate in v1_request.get('coordinates', []):
         lat = coordinate.get('latitude')

From 9951b781977f14554b7a3f144974ed038e74d858 Mon Sep 17 00:00:00 2001
From: Ajai Tirumali <ajaits@google.com>
Date: Fri, 12 Jun 2026 19:21:12 +0530
Subject: [PATCH 05/12] Add script for UN20 imports

---
 .../schema/statvar_dcid_gen.py                | 197 ++++++++++++++++++
 .../schema/statvar_dcid_gen_test.py           |  71 +++++++
 tools/statvar_importer/stat_var_processor.py  |  10 +-
 3 files changed, 276 insertions(+), 2 deletions(-)
 create mode 100644 tools/statvar_importer/schema/statvar_dcid_gen.py
 create mode 100644 tools/statvar_importer/schema/statvar_dcid_gen_test.py

diff --git a/tools/statvar_importer/schema/statvar_dcid_gen.py b/tools/statvar_importer/schema/statvar_dcid_gen.py
new file mode 100644
index 0000000000..52c345aac2
--- /dev/null
+++ b/tools/statvar_importer/schema/statvar_dcid_gen.py
@@ -0,0 +1,197 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#         https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Utilities to generate statvar dcid."""
+
+import os
+import re
+import sys
+
+from absl import app
+from absl import flags
+from absl import logging
+
+_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(_SCRIPT_DIR)
+sys.path.append(os.path.dirname(_SCRIPT_DIR))
+sys.path.append(os.path.dirname(os.path.dirname(_SCRIPT_DIR)))
+_DATA_DIR = os.path.dirname(os.path.dirname(os.path.dirname(_SCRIPT_DIR)))
+sys.path.append(os.path.join(_DATA_DIR, 'util'))
+
+from counters import Counters
+from dc_api_wrapper import dc_api_get_node_property_values
+from mcf_file_util import strip_namespace, add_namespace, add_mcf_node, is_leaf_object
+
+
+def camel_to_snake(text: str, delim: str = '_') -> str:
+    """Convert a string from camelCase to snake_case.
+
+    Args:
+      text: The camelCase string to convert.
+      delim: Delimiter to use between words (default is '_').
+
+    Returns:
+      The converted snake_case string in lowercase.
+    """
+    s1 = re.sub(r'([a-z0-9])([A-Z])', r'\1' + delim + r'\2', text)
+    s2 = re.sub(r'([a-zA-Z])([0-9])', r'\1' + delim + r'\2', s1)
+    s3 = re.sub(r'([A-Z])([A-Z][a-z])', r'\1' + delim + r'\2', s2)
+    return s3.lower()
+
+
+def get_dcid_name(dcid: str, schema_nodes: dict) -> str:
+    """Returns the name for a DCID if it exists in the schema.
+
+    Args:
+      dcid: The DCID string to look up.
+      schema_nodes: Dictionary of schema nodes containing properties.
+
+    Returns:
+      The name of the DCID if found, or stripped DCID if no name property is
+      defined. Returns None if the DCID is not in the schema.
+    """
+    node = schema_nodes.get(strip_namespace(dcid))
+    if not node:
+        node = schema_nodes.get(add_namespace(dcid))
+    if not node:
+        return None
+    name = node.get('name')
+    if not name:
+        name = strip_namespace(dcid)
+    return name.strip('"').strip()
+
+
+def get_dcid_token(word: str,
+                   upper_case: bool = False,
+                   remove_prefix: str = '') -> str:
+    """Returns the word normalized into a token suitable for a DCID.
+
+    Args:
+      word: The raw string to normalize.
+      upper_case: If True, converts camelCase to uppercase snake_case.
+      remove_prefix: Optional prefix string to remove from the token.
+
+    Returns:
+      A normalized DCID token string.
+    """
+    # Convert any non alphanumeric characters to '_'
+    token = re.sub(r'[^A-Za-z0-9_.-]+', '_', word.strip())
+    token = re.sub(r'_+', '_', token).strip('_')
+
+    if upper_case:
+        # Convert camelCase to snake case
+        token = camel_to_snake(token).upper()
+    if remove_prefix:
+        token = token.removeprefix(remove_prefix)
+    return token[0].upper() + token[1:]
+
+
+def generate_dcid_for_statvar(pvs: dict,
+                              config: dict,
+                              schema_nodes: dict = None,
+                              counters: Counters = None) -> str:
+    """Returns the generated statistical variable DCID using the configuration.
+
+    Args:
+      pvs: Dictionary of property-value mappings representing the StatVar.
+      config: Configuration dictionary defining DCID generation parameters.
+      schema_nodes: Optional dictionary of loaded schema nodes.
+      counters: Optional Counters object to track statistics.
+
+    Returns:
+      A generated DCID string for the StatVar.
+    """
+
+    if schema_nodes is None:
+        schema_nodes = dict()
+
+    # Get the order of properties for dcid with ignored values
+    dcid_props = config.get('statvar_dcid_fixed_properties', [
+        'statType<>measuredValue',
+        'measurementQualifier',
+        'measuredProperty',
+        'populationType',
+    ])
+    fixed_props = dict()
+    for prop in dcid_props:
+        val = ''
+        if '<>' in prop:
+            prop, val = prop.split('<>', 1)
+        fixed_props.setdefault(prop, set()).add(val)
+
+    use_value_names = config.get('statvar_dcid_value_name', False)
+    dcid_prefix = config.get('statvar_dcid_prefix', '')
+    ignore_props = config.get('statvar_dcid_ignore_properties', [
+        'description', 'name', 'nameWithLanguage', 'descriptionUrl',
+        'alternateName', 'footnote', 'unCode', 'Node', 'typeOf'
+    ])
+    prop_delim = config.get('statvar_dcid_delimiter', '_')
+    val_delim = config.get('statvar_dcid_value_delimiter', '')
+    upper_case = config.get('statvar_dcid_upper_case', False)
+    remove_prefix = config.get('statvar_dcid_remove_prefix', '')
+
+    add_prop = False
+    if val_delim:
+        add_prop = True
+
+    # Lookup names for values.
+    lookup_dcids = set()
+    dcid_pvs = dict()
+    for prop, value in pvs.items():
+        if prop not in ignore_props:
+            dcid_pvs[prop] = value
+            if use_value_names and not get_dcid_name(prop, schema_nodes):
+                lookup_dcids.add(prop)
+            if use_value_names and not is_leaf_object(
+                    value) and not get_dcid_name(value, schema_nodes):
+                lookup_dcids.add(value)
+
+    if lookup_dcids:
+        if counters:
+            counters.add_counter('dc_api_lookup_name', len(lookup_dcids))
+        node_names = dc_api_get_node_property_values(list(lookup_dcids))
+        for pvs in node_names.values():
+            add_mcf_node(pvs, schema_nodes)
+
+    ordered_props = []
+    # Add properties from template followed by constraint props
+    for prop, val in fixed_props.items():
+        prop_val = dcid_pvs.get(prop)
+        if prop_val:
+            if val and prop_val in val:
+                dcid_pvs.pop(prop)
+            else:
+                ordered_props.append(prop)
+    for prop in sorted(dcid_pvs.keys()):
+        if prop not in ordered_props:
+            ordered_props.append(prop)
+
+    # Get ordered list of dcid tokens
+    dcid_tokens = []
+    for prop in ordered_props:
+        prop_value = dcid_pvs.pop(prop, None)
+        if prop_value:
+            value_name = prop_value
+            if use_value_names:
+                value_name = get_dcid_name(prop_value, schema_nodes)
+            value_name = get_dcid_token(value_name, upper_case, remove_prefix)
+            if val_delim and prop not in fixed_props:
+                prop_name = get_dcid_token(prop, upper_case, remove_prefix)
+                value_name = prop_name + val_delim + value_name
+            if upper_case:
+                value_name = value_name.upper()
+            dcid_tokens.append(value_name)
+    dcid = prop_delim.join(dcid_tokens)
+    if dcid_prefix:
+        dcid = dcid_prefix + dcid
+    return dcid
diff --git a/tools/statvar_importer/schema/statvar_dcid_gen_test.py b/tools/statvar_importer/schema/statvar_dcid_gen_test.py
new file mode 100644
index 0000000000..5a1737d6e6
--- /dev/null
+++ b/tools/statvar_importer/schema/statvar_dcid_gen_test.py
@@ -0,0 +1,71 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#         https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+from statvar_dcid_gen import camel_to_snake
+from statvar_dcid_gen import generate_dcid_for_statvar
+from statvar_dcid_gen import get_dcid_name
+from statvar_dcid_gen import get_dcid_token
+
+
+class TestStatvarDcidGen(unittest.TestCase):
+
+    def test_camel_to_snake(self):
+        self.assertEqual(camel_to_snake('camelCase'), 'camel_case')
+        self.assertEqual(camel_to_snake('CamelCase'), 'camel_case')
+        self.assertEqual(camel_to_snake('CaseACRONYM'), 'case_acronym')
+        self.assertEqual(camel_to_snake('CaseAbc123'), 'case_abc_123')
+        self.assertEqual(camel_to_snake('simple'), 'simple')
+
+    def test_get_dcid_token(self):
+        self.assertEqual(get_dcid_token('Hello World!'), 'Hello_World')
+        self.assertEqual(
+            get_dcid_token('helloWorld', upper_case=True), 'HELLO_WORLD')
+        self.assertEqual(
+            get_dcid_token('prefixWorld', remove_prefix='prefix'), 'World')
+
+    def test_get_dcid_name(self):
+        schema_nodes = {
+            'Person': {
+                'name': '"Human"'
+            },
+            'dcid:Count': {
+                'name': 'TotalCount'
+            },
+        }
+        self.assertEqual(get_dcid_name('Person', schema_nodes), 'Human')
+        self.assertEqual(get_dcid_name('dcid:Person', schema_nodes), 'Human')
+        self.assertEqual(get_dcid_name('Count', schema_nodes), 'TotalCount')
+        self.assertEqual(get_dcid_name('Unknown', schema_nodes), None)
+
+    def test_generate_dcid(self):
+        pvs = {
+            'statType': 'measuredValue',
+            'measuredProperty': 'count',
+            'populationType': 'Person',
+        }
+        dcid = generate_dcid_for_statvar(pvs, {})
+        self.assertEqual(dcid, 'Count_Person')
+
+        pvs2 = {
+            'statType': 'index',
+            'measuredProperty': 'count',
+            'populationType': 'Person',
+        }
+        dcid2 = generate_dcid_for_statvar(pvs2, {})
+        self.assertEqual(dcid2, 'Index_Count_Person')
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tools/statvar_importer/stat_var_processor.py b/tools/statvar_importer/stat_var_processor.py
index 487cce556f..cb4553cdcb 100644
--- a/tools/statvar_importer/stat_var_processor.py
+++ b/tools/statvar_importer/stat_var_processor.py
@@ -89,6 +89,7 @@
 from schema_generator import generate_schema_nodes, generate_statvar_name
 from schema_checker import sanity_check_nodes
 from schema_reconciler import SchemaReconciler
+from statvar_dcid_gen import generate_dcid_for_statvar
 
 # imports from ../../util
 from config_map import ConfigMap, read_py_dict_from_file
@@ -375,14 +376,19 @@ def generate_statvar_dcid(self, pvs: dict) -> str:
             'statvar_dcid_ignore_properties',
             [
                 'description', 'name', 'nameWithLanguage', 'descriptionUrl',
-                'alternateName'
+                'alternateName', 'footnote', 'typeOf', 'Node'
             ],
         )
         if not self._config.get(
                 'schemaless',
                 False) or not self._get_schemaless_statvar_props(pvs):
             try:
-                dcid = get_statvar_dcid(pvs, ignore_props=dcid_ignore_props)
+                if self._config.get('statvar_dcid_fixed_properties'):
+                    # Use the custom statvar dcid generator
+                    dcid = generate_dcid_for_statvar(pvs, self._config,
+                                                     self._counters)
+                else:
+                    dcid = get_statvar_dcid(pvs, ignore_props=dcid_ignore_props)
                 dcid = re.sub(r'[^A-Za-z_0-9/_\.-]+', '_', dcid)
             except TypeError as e:
                 logging.log_every_n(

From 4e305159f6a4a3bfc9e8f02cd53ff885dfee620f Mon Sep 17 00:00:00 2001
From: Ajai Tirumali <ajaits@google.com>
Date: Fri, 12 Jun 2026 19:21:55 +0530
Subject: [PATCH 06/12] Add script for generating codelist mapping

---
 scripts/un/codes/generate_codelist_map.py | 174 ++++++++++++++++++++++
 1 file changed, 174 insertions(+)
 create mode 100644 scripts/un/codes/generate_codelist_map.py

diff --git a/scripts/un/codes/generate_codelist_map.py b/scripts/un/codes/generate_codelist_map.py
new file mode 100644
index 0000000000..f7d13597e0
--- /dev/null
+++ b/scripts/un/codes/generate_codelist_map.py
@@ -0,0 +1,174 @@
+"""Script to geenrate codelist mapings for a specific codelist file."""
+
+import os
+import re
+import sys
+
+from absl import app
+from absl import flags
+from absl import logging
+
+import file_util
+import mcf_file_util
+import eval_functions
+
+from counters import Counters
+
+flags.DEFINE_string('input_codelist', '', 'CSV file with codelist.')
+flags.DEFINE_string('output_pvmap', '', 'Output pvmap csv.')
+flags.DEFINE_string('namespace', 'un', 'Namespace prefix for agency')
+flags.DEFINE_integer('logging_level', logging.INFO, 'Logging level.')
+flags.DEFINE_string('pvmap_template', '', 'Python file with pvmap template.')
+
+_FLAGS = flags.FLAGS
+
+_DEFAULT_CODE_PROPS = [
+    'CONCEPT',
+    'CODE',
+    'NAME_EN',
+    'PARENT',
+    'SORT_ORDER',
+    'NAME_FR',
+    'NAME_ES',
+    'DESCRIPTION',
+]
+
+# Map from a code to a pvmap
+_DEFAULT_CODE_PVMAP = {
+    'key': '{CONCEPT}:{CODE}',
+    'UnConceptProp': 'Property',
+    'UnConcept': '"{CONCEPT}"',
+    'UnCodeProp': 'UnCode',
+    'UnCode': '"{CODE}"',
+    'ConstraintProp': 'to_property(CONCEPT)',
+    'ConstraintPropValue': 'to_dcid(NAMESPACE+"_"+CODE)',
+    'ConstraintPropType': 'TypeOf',
+    'ConstraintPropEnum': 'str(ConstraintProp[0].upper() + ConstraintProp[1:]+"Enum")',
+    'NameProp': '{CONCEPT}_name',
+    'ConstraintValueName': '"{NAME_EN}"',
+    'DescriptionProp': '{CONCEPT}_description',
+    'ConstraintValueDescription': '{DESCRIPTION}',
+    'End': 'End',
+    'Dummy': '.',
+}
+
+
+def to_property(concept: str) -> str:
+    """Returns a property for the concept."""
+    c = eval_functions.str_to_camel_case(concept.lower().replace('_', ' '))
+    return c[0].lower() + c[1:]
+
+
+def to_dcid(code: str) -> str:
+    """Replace any non alphanumeric characters with '_'"""
+    value = re.sub(r'[^A-Za-z0-9\.]+', '_', code)
+    return value[0].upper() + value[1:]
+
+
+def clean_value_str(val: str,
+                    regex: str = 'r[^A-Za-z0-9()[]".-]+',
+                    replace: str = '_') -> str:
+    """Cleanup value string to remove redundant characters."""
+    val = val.strip()
+    if val[0] == '"' and val[-1] == '"':
+        val = '"' + val[1:-1].strip() + '"'
+    val = re.sub(regex, replace, val)
+    return val
+
+
+_EVAL_FUNCTIONS = dict(eval_functions.EVAL_GLOBALS)
+_EVAL_FUNCTIONS.update({
+    'to_property': to_property,
+    'to_dcid': to_dcid,
+    'clean_value_str': clean_value_str,
+})
+
+def get_value(tpl_val: str, input_pvs: dict) -> str:
+  """Retuns a value with the pvs applied."""
+  value = tpl_val
+  if '{' in tpl_val:
+      # Format string
+      try:
+          value = tpl_val.format(**input_pvs)
+      except Exception as e:
+          logging.error(
+              f'Failed to format "{tpl_val}" using dict: {input_pvs}, error:{e}'
+          )
+          value = ''
+  elif '(' in tpl_val:
+      # Evaluate a function
+      try:
+          prop, value = eval_functions.evaluate_statement(
+              tpl_val, input_pvs, _EVAL_FUNCTIONS)
+      except Exception as e:
+          lgging.error(
+              f'Failed to evaluate "{tpl_val}" using dict: {pvs}, error:{e}'
+          )
+          value = ''
+  if value:
+      # Cleanup value
+      value = clean_value_str(value)
+  return value
+
+
+def generate_code_map(code_pvs: dict,
+                      namespace: str = 'un',
+                      template: dict = _DEFAULT_CODE_PVMAP) -> dict:
+    """Returns a pvmap pvs for a single code.
+   A code has keys listed in _DEFAULT_CODE_PROPS
+   It returns a dictionary with the keys in template.
+   """
+    output_pvs = dict()
+    input_pvs = dict(code_pvs)
+    input_pvs.setdefault('namespace', namespace.lower())
+    input_pvs.setdefault('NAMESPACE', namespace.upper())
+    for tpl_prop, tpl_val in template.items():
+        tpl_prop = get_value(tpl_prop, input_pvs)
+        value = get_value(tpl_val, input_pvs)
+        output_pvs[tpl_prop] = value
+        input_pvs[tpl_prop] = value
+        logging.log(2, f'Mapped {tpl_prop} using {tpl_val} to {value}')
+    return output_pvs
+
+
+def generate_codelist_pvmap(cl_file: str,
+                            output: str,
+                            namespace: str = 'un') -> dict:
+    """Generate a pvmap file for a codelist."""
+    counters = Counters()
+
+    input_codes = file_util.file_load_csv_dict(cl_file, key_index=True)
+    logging.info(f'Loaded {len(input_codes)} from codelist: {cl_file}')
+    counters.add_counter('input-codes', len(input_codes))
+
+    output_pvs = {}
+    for index, code_pvs in input_codes.items():
+        pvs = generate_code_map(code_pvs, namespace)
+        output_pvs[index] = pvs
+        logging.debug(f'Mapped {code_pvs} to {pvs}')
+
+    # Write to output file
+    if output:
+        file_util.file_write_csv_dict(output_pvs, output)
+
+    # Get unique counts across output columns
+    unique_counts = dict()
+    for index, pvs in output_pvs.items():
+        for prop, val in pvs.items():
+            if val:
+                unique_counts.setdefault(prop, set()).add(val)
+    for prop, vals in unique_counts.items():
+        counters.add_counter(f'output-unique-{prop}', len(vals))
+
+    counters.add_counter('output-rows', len(output_pvs))
+    counters.print_counters()
+
+
+def main(_):
+    logging.set_verbosity(_FLAGS.logging_level)
+    generate_codelist_pvmap(_FLAGS.input_codelist, _FLAGS.output_pvmap,
+                            _FLAGS.namespace)
+
+
+if __name__ == '__main__':
+    app.run(main)

From 3cae0f3e93ce8b3efe150029f6ac16e3ab2c939e Mon Sep 17 00:00:00 2001
From: Ajai Tirumali <ajaits@google.com>
Date: Sat, 13 Jun 2026 20:04:40 +0530
Subject: [PATCH 07/12] add dcid generator to flags

---
 tools/statvar_importer/config_flags.py        | 15 +++++++
 .../schema/statvar_dcid_gen_test.py           | 39 +++++++++++++++++--
 2 files changed, 50 insertions(+), 4 deletions(-)

diff --git a/tools/statvar_importer/config_flags.py b/tools/statvar_importer/config_flags.py
index a7449ca032..5b8f05e3cb 100644
--- a/tools/statvar_importer/config_flags.py
+++ b/tools/statvar_importer/config_flags.py
@@ -433,6 +433,21 @@ def get_default_config() -> dict:
             _FLAGS.generate_statvar_name,  # Generate names for StatVars
         'llm_generate_statvar_name':
             _FLAGS.llm_generate_statvar_name,
+
+        # Settings for statvar dcid generator
+        'statvar_dcid_fixed_properties': [],
+        'statvar_dcid_prefix':
+            '',
+        'statvar_dcid_remove_prefix':
+            '',
+        'statvar_dcid_delimiter':
+            '',
+        'statvar_dcid_value_delimiter':
+            '',
+        'statvar_dcid_upper_case':
+            False,
+        'statvar_dcid_remove_prefix':
+            '',
     }
 
 
diff --git a/tools/statvar_importer/schema/statvar_dcid_gen_test.py b/tools/statvar_importer/schema/statvar_dcid_gen_test.py
index 5a1737d6e6..0538e1456e 100644
--- a/tools/statvar_importer/schema/statvar_dcid_gen_test.py
+++ b/tools/statvar_importer/schema/statvar_dcid_gen_test.py
@@ -30,10 +30,10 @@ def test_camel_to_snake(self):
 
     def test_get_dcid_token(self):
         self.assertEqual(get_dcid_token('Hello World!'), 'Hello_World')
-        self.assertEqual(
-            get_dcid_token('helloWorld', upper_case=True), 'HELLO_WORLD')
-        self.assertEqual(
-            get_dcid_token('prefixWorld', remove_prefix='prefix'), 'World')
+        self.assertEqual(get_dcid_token('helloWorld', upper_case=True),
+                         'HELLO_WORLD')
+        self.assertEqual(get_dcid_token('prefixWorld', remove_prefix='prefix'),
+                         'World')
 
     def test_get_dcid_name(self):
         schema_nodes = {
@@ -66,6 +66,37 @@ def test_generate_dcid(self):
         dcid2 = generate_dcid_for_statvar(pvs2, {})
         self.assertEqual(dcid2, 'Index_Count_Person')
 
+    def test_generate_dcid_with_property(self):
+        config = {
+            'statvar_dcid_fixed_properties': [
+                'statType<>measuredValue', 'measuredProperty<>value',
+                'populationType'
+            ],
+            'statvar_dcid_delimiter': '__',
+            'statvar_dcid_value_delimiter': '--',
+            'statvar_dcid_remove_prefix': 'TEST_',
+            'statvar_dcid_upper_case': True,
+            'statvar_dcid_prefix': 'test/',
+        }
+        pvs = {
+            'statType': 'measuredValue',
+            'measuredProperty': 'count',
+            'populationType': 'Person',
+        }
+        dcid = generate_dcid_for_statvar(pvs, config)
+        self.assertEqual(dcid, 'test/COUNT__PERSON')
+
+        pvs2 = {
+            'statType': 'medianValue',
+            'measuredProperty': 'age',
+            'populationType': 'Person',
+            'gender': 'Male',
+            'place': 'TEST_Urban',
+        }
+        dcid2 = generate_dcid_for_statvar(pvs2, config)
+        self.assertEqual(
+            dcid2, 'test/MEDIAN_VALUE__AGE__PERSON__GENDER--MALE__PLACE--URBAN')
+
 
 if __name__ == '__main__':
     unittest.main()

From 258bbdedd2c0086c9fe0b748a44ea9403a557755 Mon Sep 17 00:00:00 2001
From: Ajai Tirumali <ajaits@google.com>
Date: Tue, 16 Jun 2026 19:15:53 +0530
Subject: [PATCH 08/12] script to generate statvar names

---
 scripts/un/codes/codelist_pvmap_template.py   |  23 +++
 scripts/un/codes/codelist_schema.tmcf         |  17 ++
 scripts/un/codes/dsd_property_pvmap.py        |  22 +++
 .../un/codes/dsd_property_pvmap_template.py   |  39 ++++
 scripts/un/codes/dsd_property_schema.tmcf     |  15 ++
 scripts/un/codes/generate_codelist_map.py     | 138 +++++++++-----
 scripts/un/codes/generate_statvar_name.py     | 169 ++++++++++++++++++
 tools/statvar_importer/config_flags.py        |   5 +-
 .../schema/statvar_dcid_gen.py                |  23 ++-
 .../schema/statvar_dcid_gen_test.py           |  16 +-
 10 files changed, 409 insertions(+), 58 deletions(-)
 create mode 100644 scripts/un/codes/codelist_pvmap_template.py
 create mode 100644 scripts/un/codes/codelist_schema.tmcf
 create mode 100644 scripts/un/codes/dsd_property_pvmap.py
 create mode 100644 scripts/un/codes/dsd_property_pvmap_template.py
 create mode 100644 scripts/un/codes/dsd_property_schema.tmcf
 create mode 100644 scripts/un/codes/generate_statvar_name.py

diff --git a/scripts/un/codes/codelist_pvmap_template.py b/scripts/un/codes/codelist_pvmap_template.py
new file mode 100644
index 0000000000..3a9b0f4196
--- /dev/null
+++ b/scripts/un/codes/codelist_pvmap_template.py
@@ -0,0 +1,23 @@
+# Template for converting UN codelist files to PVMap for statvar processor.
+# The pvmap will have columns to generate statvar constraint propoerty:values
+# and names.
+# The pvmap will also have columns to generate schema MCF with a tMCF.
+{
+    'key': '{CONCEPT}:{CODE}',
+    'UnConceptProp': 'Property',
+    'UnConcept': '"{CONCEPT}"',
+    'UnCodeProp': 'UnCode',
+    'UnCode': '"{CODE}"',
+    'ConstraintProp': '{PROPERTY}',
+    'ConstraintPropValue': 'to_dcid(NAMESPACE+"_"+CONCEPT+"-"+CODE)',
+    'ConstraintPropType': 'TypeOf',
+    'ConstraintPropEnum': 'str(ConstraintProp[0].upper() + ConstraintProp[1:]+"Enum")',
+    'NameProp': 'ValueName_{CONCEPT}',
+    'ConstraintValueName': '"{NAME_EN}"',
+    'DescriptionProp': 'Desc_{CONCEPT}',
+    'ConstraintValueDescription': 'quote(anyascii(DESCRIPTION))',
+    # Eond of line prop:value when description is empty.
+    '#End': 'End',
+    'Dummy': '.',
+}
+
diff --git a/scripts/un/codes/codelist_schema.tmcf b/scripts/un/codes/codelist_schema.tmcf
new file mode 100644
index 0000000000..038dd3a5d9
--- /dev/null
+++ b/scripts/un/codes/codelist_schema.tmcf
@@ -0,0 +1,17 @@
+Node: E:UN->E0
+typeOf: C:UN->ConstraintPropEnum
+dcid: C:UN->ConstraintPropValue
+unCode: C:UN->UnCode
+name: C:UN->ConstraintValueName
+description: C:UN->ConstraintValueDescription
+
+Node: E:UN->E1
+typeOf: C:UN->UnConceptProp
+dcid: C:UN->ConstraintProp
+unConcept: C:UN->UnConcept
+rangeIncludes: C:UN->ConstraintPropEnum
+
+Node: E:UN->E2
+typeOf: schema:Class
+dcid: C:UN->ConstraintPropEnum
+subClassOf: schema:Enumeration
diff --git a/scripts/un/codes/dsd_property_pvmap.py b/scripts/un/codes/dsd_property_pvmap.py
new file mode 100644
index 0000000000..aa1b9ef914
--- /dev/null
+++ b/scripts/un/codes/dsd_property_pvmap.py
@@ -0,0 +1,22 @@
+# Template for converting UN DSD file with column metadata
+# to PVMap for statvar processor.
+# The pvmap will have columns to generate statvar constraint
+# propoerties with names.
+# The pvmap will also have columns to generate schema MCF with a tMCF.
+{
+    'key': '{CONCEPT}',
+    'UnCodeProp': 'UnConceptCode',
+    'UnCode': '"{CONCEPT}"',
+    'ConceptProp': 'UnConceptProperty',
+    'ConstraintProp': '{PROPERTY}',
+    'ConstraintPropType': 'Property',
+    'ConstraintPropEnum': 'str(ConstraintProp[0].upper() + ConstraintProp[1:]+"Enum")',
+    'ConceptNameProp': 'PropertyName_{CONCEPT}',
+    'ConceptName': '"{NAME_EN}"',
+    'DescriptionProp': 'PropertyDesc_{CONCEPT}',
+    'ConceptDescription': '{DESCRIPTION}',
+    # Eond of line prop:value when description is empty.
+    '#End': 'End',
+    'Dummy': '.',
+}
+
diff --git a/scripts/un/codes/dsd_property_pvmap_template.py b/scripts/un/codes/dsd_property_pvmap_template.py
new file mode 100644
index 0000000000..708beac19f
--- /dev/null
+++ b/scripts/un/codes/dsd_property_pvmap_template.py
@@ -0,0 +1,39 @@
+# Template for converting UN DSD file with column metadata
+# to PVMap for statvar processor.
+# The pvmap will have columns to generate statvar constraint
+# propoerties with names.
+# The pvmap will also have columns to generate schema MCF with a tMCF.
+{
+    'key':
+        '{CONCEPT}',
+    'UnCodeProp':
+        'UnConceptCode',
+    'UnCode':
+        '"{CONCEPT}"',
+    'ConceptProp':
+        'UnConceptProperty',
+    'ConstraintProp':
+        '{PROPERTY}',
+    'ConstraintPropType':
+        'Property',
+    'ConstraintPropEnum':
+        'str(ConstraintProp[0].upper() + ConstraintProp[1:]+"Enum")',
+    'ConceptNameProp':
+        'PropertyName_{CONCEPT}',
+    'ConceptName':
+        '"{NAME_EN}"',
+    'DescriptionProp':
+        'ValueDesc_{CONCEPT}',
+    'ConceptDescription':
+        'quote(anyascii(DESCRIPTION))',
+    # Initialize ValueName for specific codes for a concept to empty string
+    'CodeNameProp':
+        'ValueName_{CONCEPT}',
+    'DefaultName':
+        '""',
+    # End of line prop:value when description is empty.
+    '#End':
+        'End',
+    'Dummy':
+        '.',
+}
diff --git a/scripts/un/codes/dsd_property_schema.tmcf b/scripts/un/codes/dsd_property_schema.tmcf
new file mode 100644
index 0000000000..8491b4b64f
--- /dev/null
+++ b/scripts/un/codes/dsd_property_schema.tmcf
@@ -0,0 +1,15 @@
+Node: E:DSD->E0
+dcid: C:DSD->ConstraintProp
+typeOf: C:DSD->ConstraintPropType
+name: C:DSD->ConstraintProp
+domainIncludes: dcid:UNSeries
+alternateName: C:DSD->ConceptName
+description: C:DSD->ConceptDescription
+rangeIncludes: C:DSD->ConstraintPropEnum
+
+Node: E:DSD->E1
+dcid: C:DSD->ConstraintPropEnum
+typeOf: schema:Class
+subClassOf: schema:Enumeration
+description: C:DSD->ConceptDescription
+
diff --git a/scripts/un/codes/generate_codelist_map.py b/scripts/un/codes/generate_codelist_map.py
index f7d13597e0..c511807379 100644
--- a/scripts/un/codes/generate_codelist_map.py
+++ b/scripts/un/codes/generate_codelist_map.py
@@ -1,12 +1,15 @@
-"""Script to geenrate codelist mapings for a specific codelist file."""
+"""Script to generate codelist mapings for a specific codelist file."""
 
 import os
 import re
 import sys
+import unicodedata
 
 from absl import app
 from absl import flags
 from absl import logging
+from anyascii import anyascii
+from pprint import pprint
 
 import file_util
 import mcf_file_util
@@ -17,8 +20,8 @@
 flags.DEFINE_string('input_codelist', '', 'CSV file with codelist.')
 flags.DEFINE_string('output_pvmap', '', 'Output pvmap csv.')
 flags.DEFINE_string('namespace', 'un', 'Namespace prefix for agency')
-flags.DEFINE_integer('logging_level', logging.INFO, 'Logging level.')
 flags.DEFINE_string('pvmap_template', '', 'Python file with pvmap template.')
+flags.DEFINE_integer('logging_level', logging.INFO, 'Logging level.')
 
 _FLAGS = flags.FLAGS
 
@@ -35,23 +38,49 @@
 
 # Map from a code to a pvmap
 _DEFAULT_CODE_PVMAP = {
-    'key': '{CONCEPT}:{CODE}',
-    'UnConceptProp': 'Property',
-    'UnConcept': '"{CONCEPT}"',
-    'UnCodeProp': 'UnCode',
-    'UnCode': '"{CODE}"',
-    'ConstraintProp': 'to_property(CONCEPT)',
-    'ConstraintPropValue': 'to_dcid(NAMESPACE+"_"+CODE)',
-    'ConstraintPropType': 'TypeOf',
-    'ConstraintPropEnum': 'str(ConstraintProp[0].upper() + ConstraintProp[1:]+"Enum")',
-    'NameProp': '{CONCEPT}_name',
-    'ConstraintValueName': '"{NAME_EN}"',
-    'DescriptionProp': '{CONCEPT}_description',
-    'ConstraintValueDescription': '{DESCRIPTION}',
-    'End': 'End',
-    'Dummy': '.',
+    'key':
+        '{CONCEPT}:{CODE}',
+    'UnConceptProp':
+        'Property',
+    'UnConcept':
+        '"{CONCEPT}"',
+    'UnCodeProp':
+        'UnCode',
+    'UnCode':
+        '"{CODE}"',
+    'ConstraintProp':
+        '{PROPERTY}',
+    'ConstraintPropValue':
+        'to_dcid(NAMESPACE+"_"+CONCEPT+"-"+CODE)',
+    'ConstraintPropType':
+        'TypeOf',
+    'ConstraintPropEnum':
+        'str(ConstraintProp[0].upper() + ConstraintProp[1:]+"Enum")',
+    'NameProp':
+        'ValueName_{CONCEPT}',
+    'ConstraintValueName':
+        '"{NAME_EN}"',
+    'DescriptionProp':
+        'ValueDesc_{CONCEPT}',
+    'ConstraintValueDescription':
+        '{DESCRIPTION}',
+    'End':
+        'End',
+    'Dummy':
+        '.',
+}
+
+# Mapping from concept to properties.
+# If not set it map, the concept is used as the property.
+_DEFAULT_CONCEPT_PROP_MAP = {
+    'SERIES': 'populationType',
+    'UNIT_MEASURE': 'unit',
 }
 
+def quote(value: str) -> str:
+  """Returns a string in double quotes."""
+  value = value.strip().strip('"').strip()
+  return f'"{value}"'
 
 def to_property(concept: str) -> str:
     """Returns a property for the concept."""
@@ -61,7 +90,7 @@ def to_property(concept: str) -> str:
 
 def to_dcid(code: str) -> str:
     """Replace any non alphanumeric characters with '_'"""
-    value = re.sub(r'[^A-Za-z0-9\.]+', '_', code)
+    value = re.sub(r'[^A-Za-z0-9\._:-]+', '_', code)
     return value[0].upper() + value[1:]
 
 
@@ -81,34 +110,39 @@ def clean_value_str(val: str,
     'to_property': to_property,
     'to_dcid': to_dcid,
     'clean_value_str': clean_value_str,
+    'quote': quote,
+
+    # Additional modules for text manipulations
+    'unicodedata': unicodedata,
+    'anyascii': anyascii,
 })
 
+
 def get_value(tpl_val: str, input_pvs: dict) -> str:
-  """Retuns a value with the pvs applied."""
-  value = tpl_val
-  if '{' in tpl_val:
-      # Format string
-      try:
-          value = tpl_val.format(**input_pvs)
-      except Exception as e:
-          logging.error(
-              f'Failed to format "{tpl_val}" using dict: {input_pvs}, error:{e}'
-          )
-          value = ''
-  elif '(' in tpl_val:
-      # Evaluate a function
-      try:
-          prop, value = eval_functions.evaluate_statement(
-              tpl_val, input_pvs, _EVAL_FUNCTIONS)
-      except Exception as e:
-          lgging.error(
-              f'Failed to evaluate "{tpl_val}" using dict: {pvs}, error:{e}'
-          )
-          value = ''
-  if value:
-      # Cleanup value
-      value = clean_value_str(value)
-  return value
+    """Retuns a value with the pvs applied."""
+    value = tpl_val
+    if '{' in tpl_val:
+        # Format string
+        try:
+            value = tpl_val.format(**input_pvs)
+        except Exception as e:
+            logging.error(
+                f'Failed to format "{tpl_val}" using dict: {input_pvs}, error:{e}'
+            )
+            value = ''
+    elif '(' in tpl_val:
+        # Evaluate a function
+        try:
+            prop, value = eval_functions.evaluate_statement(
+                tpl_val, input_pvs, _EVAL_FUNCTIONS)
+        except Exception as e:
+            lgging.error(
+                f'Failed to evaluate "{tpl_val}" using dict: {pvs}, error:{e}')
+            value = ''
+    if value:
+        # Cleanup value
+        value = clean_value_str(value)
+    return value
 
 
 def generate_code_map(code_pvs: dict,
@@ -122,6 +156,11 @@ def generate_code_map(code_pvs: dict,
     input_pvs = dict(code_pvs)
     input_pvs.setdefault('namespace', namespace.lower())
     input_pvs.setdefault('NAMESPACE', namespace.upper())
+    concept = code_pvs.get('CONCEPT')
+    concept_prop = _DEFAULT_CONCEPT_PROP_MAP.get(concept)
+    if not concept_prop:
+        concept_prop = to_property(concept)
+    input_pvs['PROPERTY'] = concept_prop
     for tpl_prop, tpl_val in template.items():
         tpl_prop = get_value(tpl_prop, input_pvs)
         value = get_value(tpl_val, input_pvs)
@@ -133,7 +172,8 @@ def generate_code_map(code_pvs: dict,
 
 def generate_codelist_pvmap(cl_file: str,
                             output: str,
-                            namespace: str = 'un') -> dict:
+                            namespace: str = 'un',
+                            template_file: str = None) -> dict:
     """Generate a pvmap file for a codelist."""
     counters = Counters()
 
@@ -141,9 +181,15 @@ def generate_codelist_pvmap(cl_file: str,
     logging.info(f'Loaded {len(input_codes)} from codelist: {cl_file}')
     counters.add_counter('input-codes', len(input_codes))
 
+    pvmap_template = _DEFAULT_CODE_PVMAP
+    if template_file:
+        pvmap_template = file_util.file_load_py_dict(template_file)
+
+    logging.info(f'Using template: {pprint(pvmap_template)}')
+
     output_pvs = {}
     for index, code_pvs in input_codes.items():
-        pvs = generate_code_map(code_pvs, namespace)
+        pvs = generate_code_map(code_pvs, namespace, pvmap_template)
         output_pvs[index] = pvs
         logging.debug(f'Mapped {code_pvs} to {pvs}')
 
@@ -167,7 +213,7 @@ def generate_codelist_pvmap(cl_file: str,
 def main(_):
     logging.set_verbosity(_FLAGS.logging_level)
     generate_codelist_pvmap(_FLAGS.input_codelist, _FLAGS.output_pvmap,
-                            _FLAGS.namespace)
+                            _FLAGS.namespace, _FLAGS.pvmap_template)
 
 
 if __name__ == '__main__':
diff --git a/scripts/un/codes/generate_statvar_name.py b/scripts/un/codes/generate_statvar_name.py
new file mode 100644
index 0000000000..079e5f096a
--- /dev/null
+++ b/scripts/un/codes/generate_statvar_name.py
@@ -0,0 +1,169 @@
+"""Script to generate statvar names for UN statvars."""
+
+import os
+import re
+import sys
+
+from absl import app
+from absl import flags
+from absl import logging
+
+import file_util
+from mcf_file_util import add_namespace, strip_namespace, get_node_dcid
+from mcf_file_util import load_mcf_nodes, write_mcf_nodes
+
+from config_map import ConfigMap
+from counters import Counters
+
+flags.DEFINE_string('input_statvar_mcf', '', 'MCF files with statvar nodes.')
+flags.DEFINE_string('output_statvar_mcf', '',
+                    'Output MCF files for statvar with names.')
+flags.DEFINE_string('input_schema_mcf', '',
+                    'Schema file with names for propeorties.')
+flags.DEFINE_integer('logging_level', logging.INFO, 'Logging level.')
+
+_FLAGS = flags.FLAGS
+"""Wrapper to generate statvar names for UN StatVars."""
+
+_DEFAULT_IGNORE_PROP = {
+    'Node': '',
+    'dcid': '',
+    'typeOf': '',
+    'memberOf': '',
+    'footnote': '',
+    'description': '',
+    'name': '',
+    'populationType': '',
+    'measuredProperty': 'value',
+    'statType': 'measuredValue',
+}
+
+
+def to_sentence_case(text: str) -> str:
+    """Returns a string in sentence case."""
+    # convert camelCase
+    sentence = re.sub(r'(?<=[a-z0-9])(?=[A-Z])', ' ', text)
+
+    # convert '_' to spaces
+    sentence = re.sub(r'[_ ]+', ' ', sentence)
+    sentence = sentence.strip()
+    return sentence.capitalize()
+
+
+class UNStatVarNameGenerator:
+
+    def __init__(
+        self,
+        config_dict: dict = {},
+        counters: Counters = None,
+    ):
+        self._config = ConfigMap()
+        self._config.update_config(config_dict)
+        self._counters = counters
+        if counters is None:
+            self._counters = Counters()
+        self._schema_nodes = {}
+
+    def load_schema_mcf(self, mcf: str) -> dict:
+        """Loads schema nodes from MCF files."""
+        load_mcf_nodes(mcf, nodes=self._schema_nodes)
+        self._counters.add_counter('input-schema-nodes',
+                                   len(self._schema_nodes))
+        return self._schema_nodes
+
+    def get_schema_node(self, dcid: str) -> dict:
+        """Returns a schema node for the dcid."""
+        if not dcid:
+            return None
+        node = self._schema_nodes.get(strip_namespace(dcid))
+        if not node:
+            node = self._schema_nodes.get(add_namespace(dcid))
+        return node
+
+    def get_schema_name(self, dcid: str) -> str:
+        """Returns the name for the dcid fomr the schema."""
+        node = self.get_schema_node(dcid)
+        if not node:
+            return ''
+        name = node.get('alternateName')
+        if not name:
+            name = node.get('name', '')
+        return name.strip('"').strip()
+
+    def generate_statvar_name(self, pvs: dict) -> dict:
+        """Adds a name to a statvar if it doesn't exist already."""
+        name = pvs.get('name')
+        if name:
+            logging.debug(f'Using existing name for statvar:{name}')
+            self._counters.add_counter(f'input-existing-name', 1)
+            return pvs
+
+        # Use the name from the schema if it already exists.
+        dcid = get_node_dcid(pvs)
+        name = self.get_schema_name(dcid)
+        if name:
+            pvs['name'] = '"' + name + '"'
+            self._counters.add_counter(f'input-schema-name', 1)
+            return pvs
+
+        # Get the name from the populationType
+        name_prefix = self.get_schema_name(pvs.get('populationType'))
+        name_tokens = []
+        # Collect names for constraint property:values
+        for prop, value in pvs.items():
+            pv_tokens = []
+            prop = strip_namespace(prop)
+            value = strip_namespace(value)
+            ignore_val = strip_namespace(_DEFAULT_IGNORE_PROP.get(prop))
+            if ignore_val is not None:
+                if not ignore_val or ignore_val == value:
+                    continue
+            prop_name = self.get_schema_name(prop)
+            if not prop_name:
+                prop_name = to_sentence_case(prop)
+                self._counters.add_counter('property-missing-name', 1)
+            if prop_name:
+                pv_tokens.append(prop_name)
+            val_name = self.get_schema_name(value)
+            if not val_name:
+                val_name = to_sentence_case(value)
+                self._counters.add_counter('value-missing-name', 1)
+            if val_name:
+                pv_tokens.append(val_name)
+            if pv_tokens:
+                name_tokens.append('='.join(pv_tokens))
+        name_suffix = ', '.join(name_tokens)
+        name = name_prefix
+        if name_suffix:
+            self._counters.add_counter(f'generated-statvar-name-contraints', 1)
+            name = f'{name} [{name_suffix}]'
+        pvs['name'] = f'"{name}"'
+        self._counters.add_counter(f'generated-statvar-names', 1)
+
+
+def generate_statvar_names(input_mcf: str, schema_mcf: str, output_mcf: str):
+    """Generate names for statvars in input_mcf."""
+    counters = Counters()
+    config = {}
+    sv_name_generator = UNStatVarNameGenerator(config, counters)
+    sv_name_generator.load_schema_mcf(schema_mcf)
+
+    statvar_nodes = load_mcf_nodes(input_mcf)
+    logging.info(f'Generating statvar names for {len(statvar_nodes)}')
+    for dcid, pvs in statvar_nodes.items():
+        sv_name_generator.generate_statvar_name(pvs)
+
+    if output_mcf:
+        write_mcf_nodes(statvar_nodes, output_mcf)
+
+    counters.print_counters()
+
+
+def main(_):
+    logging.set_verbosity(_FLAGS.logging_level)
+    generate_statvar_names(_FLAGS.input_statvar_mcf, _FLAGS.input_schema_mcf,
+                           _FLAGS.output_statvar_mcf)
+
+
+if __name__ == '__main__':
+    app.run(main)
diff --git a/tools/statvar_importer/config_flags.py b/tools/statvar_importer/config_flags.py
index 5b8f05e3cb..be5088526a 100644
--- a/tools/statvar_importer/config_flags.py
+++ b/tools/statvar_importer/config_flags.py
@@ -169,7 +169,8 @@
                   'Generate names for Statvars.')
 flags.DEFINE_bool('enable_cloud_logging', False,
                   'Enable cloud logging when running on cloud.')
-
+flags.DEFINE_string('statvar_dcid_prefix', '',
+                    'Prefix for statvar dcid.')
 
 def get_default_config() -> dict:
     """Returns the default config as dictionary of config parameters and values."""
@@ -437,7 +438,7 @@ def get_default_config() -> dict:
         # Settings for statvar dcid generator
         'statvar_dcid_fixed_properties': [],
         'statvar_dcid_prefix':
-            '',
+            _FLAGS.statvar_dcid_prefix,
         'statvar_dcid_remove_prefix':
             '',
         'statvar_dcid_delimiter':
diff --git a/tools/statvar_importer/schema/statvar_dcid_gen.py b/tools/statvar_importer/schema/statvar_dcid_gen.py
index 52c345aac2..32d509950f 100644
--- a/tools/statvar_importer/schema/statvar_dcid_gen.py
+++ b/tools/statvar_importer/schema/statvar_dcid_gen.py
@@ -43,10 +43,9 @@ def camel_to_snake(text: str, delim: str = '_') -> str:
     Returns:
       The converted snake_case string in lowercase.
     """
-    s1 = re.sub(r'([a-z0-9])([A-Z])', r'\1' + delim + r'\2', text)
-    s2 = re.sub(r'([a-zA-Z])([0-9])', r'\1' + delim + r'\2', s1)
-    s3 = re.sub(r'([A-Z])([A-Z][a-z])', r'\1' + delim + r'\2', s2)
-    return s3.lower()
+    s1 = re.sub(r'([a-z])([A-Z0-9])', r'\1' + delim + r'\2', text)
+    s2 = re.sub(r'([A-Z])([A-Z][a-z])', r'\1' + delim + r'\2', s1)
+    return s2.lower()
 
 
 def get_dcid_name(dcid: str, schema_nodes: dict) -> str:
@@ -92,7 +91,7 @@ def get_dcid_token(word: str,
         # Convert camelCase to snake case
         token = camel_to_snake(token).upper()
     if remove_prefix:
-        token = token.removeprefix(remove_prefix)
+        token = re.sub(remove_prefix, '', token)
     return token[0].upper() + token[1:]
 
 
@@ -136,6 +135,7 @@ def generate_dcid_for_statvar(pvs: dict,
         'alternateName', 'footnote', 'unCode', 'Node', 'typeOf'
     ])
     prop_delim = config.get('statvar_dcid_delimiter', '_')
+    fixed_prop_delim = config.get('statvar_dcid_fixed_delimiter', '_')
     val_delim = config.get('statvar_dcid_value_delimiter', '')
     upper_case = config.get('statvar_dcid_upper_case', False)
     remove_prefix = config.get('statvar_dcid_remove_prefix', '')
@@ -177,7 +177,8 @@ def generate_dcid_for_statvar(pvs: dict,
             ordered_props.append(prop)
 
     # Get ordered list of dcid tokens
-    dcid_tokens = []
+    dcid_fixed_tokens = []
+    dcid_prop_tokens = []
     for prop in ordered_props:
         prop_value = dcid_pvs.pop(prop, None)
         if prop_value:
@@ -190,8 +191,14 @@ def generate_dcid_for_statvar(pvs: dict,
                 value_name = prop_name + val_delim + value_name
             if upper_case:
                 value_name = value_name.upper()
-            dcid_tokens.append(value_name)
-    dcid = prop_delim.join(dcid_tokens)
+            if prop in fixed_props:
+                dcid_fixed_tokens.append(value_name)
+            else:
+                dcid_prop_tokens.append(value_name)
+    prop_token = prop_delim.join(dcid_prop_tokens)
+    if prop_token:
+        dcid_fixed_tokens.append(prop_token)
+    dcid = fixed_prop_delim.join(dcid_fixed_tokens)
     if dcid_prefix:
         dcid = dcid_prefix + dcid
     return dcid
diff --git a/tools/statvar_importer/schema/statvar_dcid_gen_test.py b/tools/statvar_importer/schema/statvar_dcid_gen_test.py
index 0538e1456e..a74a52f9f6 100644
--- a/tools/statvar_importer/schema/statvar_dcid_gen_test.py
+++ b/tools/statvar_importer/schema/statvar_dcid_gen_test.py
@@ -73,6 +73,7 @@ def test_generate_dcid_with_property(self):
                 'populationType'
             ],
             'statvar_dcid_delimiter': '__',
+            'statvar_dcid_fixed_delimiter': '.',
             'statvar_dcid_value_delimiter': '--',
             'statvar_dcid_remove_prefix': 'TEST_',
             'statvar_dcid_upper_case': True,
@@ -84,7 +85,7 @@ def test_generate_dcid_with_property(self):
             'populationType': 'Person',
         }
         dcid = generate_dcid_for_statvar(pvs, config)
-        self.assertEqual(dcid, 'test/COUNT__PERSON')
+        self.assertEqual(dcid, 'test/COUNT.PERSON')
 
         pvs2 = {
             'statType': 'medianValue',
@@ -95,7 +96,18 @@ def test_generate_dcid_with_property(self):
         }
         dcid2 = generate_dcid_for_statvar(pvs2, config)
         self.assertEqual(
-            dcid2, 'test/MEDIAN_VALUE__AGE__PERSON__GENDER--MALE__PLACE--URBAN')
+            dcid2, 'test/MEDIAN_VALUE.AGE.PERSON.GENDER--MALE__PLACE--URBAN')
+        pvs3 = {
+            'statType': 'measuredValue',
+            'measuredProperty': 'value',
+            'populationType': 'AdultPerson',
+            'gender': 'Male',
+            'place': 'TEST_Urban',
+        }
+        dcid3 = generate_dcid_for_statvar(pvs3, config)
+        self.assertEqual(
+            dcid3, 'test/ADULT_PERSON.GENDER--MALE__PLACE--URBAN')
+
 
 
 if __name__ == '__main__':

From c1c2a676e4034db98fd09138a0292777f1db4d81 Mon Sep 17 00:00:00 2001
From: Ajai Tirumali <ajaits@google.com>
Date: Tue, 16 Jun 2026 21:23:31 +0530
Subject: [PATCH 09/12] convert large ints to float

---
 import-automation/executor/requirements.txt | 1 +
 tools/statvar_importer/utils.py             | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/import-automation/executor/requirements.txt b/import-automation/executor/requirements.txt
index 7ee91417a2..9868d3656c 100644
--- a/import-automation/executor/requirements.txt
+++ b/import-automation/executor/requirements.txt
@@ -1,6 +1,7 @@
 # Requirements for Python scripts in this repo that have automation enabled!
 
 absl-py
+anyascii
 arcgis2geojson
 beautifulsoup4
 chardet
diff --git a/tools/statvar_importer/utils.py b/tools/statvar_importer/utils.py
index 9c060de0b0..b40982bc9a 100644
--- a/tools/statvar_importer/utils.py
+++ b/tools/statvar_importer/utils.py
@@ -94,7 +94,10 @@ def str_from_number(number: Union[int, float],
         '123.45'
     """
     # Check if number is an integer or float without any decimals.
-    if int(number) == number:
+    if abs(number) > sys.maxsize:
+        # Convert very large ints to float.
+        number = float(number)
+    elif int(number) == number:
         number_int = int(number)
         return f'{number_int}'
     # Return float rounded to precision digits.

From 0d87b47573c5e1c5bf54ac1e1e98177bb8c98f18 Mon Sep 17 00:00:00 2001
From: Ajai Tirumali <ajaits@google.com>
Date: Tue, 16 Jun 2026 22:31:27 +0530
Subject: [PATCH 10/12] make max_int configurable

---
 tools/statvar_importer/stat_var_processor.py | 1 +
 tools/statvar_importer/utils.py              | 8 +++++---
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/tools/statvar_importer/stat_var_processor.py b/tools/statvar_importer/stat_var_processor.py
index cb4553cdcb..2767a18d02 100644
--- a/tools/statvar_importer/stat_var_processor.py
+++ b/tools/statvar_importer/stat_var_processor.py
@@ -1222,6 +1222,7 @@ def format_svobs(self, svobs: dict) -> dict:
                     numeric_value,
                     precision_digits=self._config.get('output_precision_digits',
                                                       5),
+                    max_int=self._config.get('max_integer', sys.maxsize),
                 )
             elif isinstance(value, str) and value:
                 value = value.strip()
diff --git a/tools/statvar_importer/utils.py b/tools/statvar_importer/utils.py
index b40982bc9a..17c98832a6 100644
--- a/tools/statvar_importer/utils.py
+++ b/tools/statvar_importer/utils.py
@@ -67,7 +67,8 @@ def capitalize_first_char(string: str) -> str:
 
 
 def str_from_number(number: Union[int, float],
-                    precision_digits: Optional[int] = None) -> str:
+                    precision_digits: Optional[int] = None,
+                    max_int: int = sys.maxsize) -> str:
     """Converts a number (int or float) to its string representation.
 
     Integers and floats that are whole numbers (e.g., 10.0) are returned as
@@ -77,6 +78,7 @@ def str_from_number(number: Union[int, float],
     Args:
         number: The number to convert.
         precision_digits: Optional number of decimal places to round a float to.
+        max_int: Numbers larger than this are converted to float
 
     Returns:
         The string representation of the number.
@@ -94,8 +96,8 @@ def str_from_number(number: Union[int, float],
         '123.45'
     """
     # Check if number is an integer or float without any decimals.
-    if abs(number) > sys.maxsize:
-        # Convert very large ints to float.
+    if abs(number) > max_int:
+        # Convert very large ints to float with potential loss of precision
         number = float(number)
     elif int(number) == number:
         number_int = int(number)

From eaecf244d07cf2151b7528198e81e9d27d3765ea Mon Sep 17 00:00:00 2001
From: Ajai Tirumali <ajaits@google.com>
Date: Tue, 16 Jun 2026 22:31:58 +0530
Subject: [PATCH 11/12] support imports form other folders

---
 scripts/un/codes/generate_codelist_map.py | 9 +++++++++
 scripts/un/codes/generate_statvar_name.py | 9 +++++++++
 2 files changed, 18 insertions(+)

diff --git a/scripts/un/codes/generate_codelist_map.py b/scripts/un/codes/generate_codelist_map.py
index c511807379..88cb3b6bfc 100644
--- a/scripts/un/codes/generate_codelist_map.py
+++ b/scripts/un/codes/generate_codelist_map.py
@@ -11,6 +11,15 @@
 from anyascii import anyascii
 from pprint import pprint
 
+_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(_SCRIPT_DIR)
+sys.path.append(os.path.dirname(_SCRIPT_DIR))
+sys.path.append(os.path.dirname(os.path.dirname(_SCRIPT_DIR)))
+_DATA_DIR = os.path.dirname(os.path.dirname(os.path.dirname(_SCRIPT_DIR)))
+sys.path.append(_DATA_DIR)
+sys.path.append(os.path.join(_DATA_DIR, 'util'))
+sys.path.append(os.path.join(_DATA_DIR, 'tools', 'statvar_importer'))
+
 import file_util
 import mcf_file_util
 import eval_functions
diff --git a/scripts/un/codes/generate_statvar_name.py b/scripts/un/codes/generate_statvar_name.py
index 079e5f096a..4f6a075b10 100644
--- a/scripts/un/codes/generate_statvar_name.py
+++ b/scripts/un/codes/generate_statvar_name.py
@@ -8,6 +8,15 @@
 from absl import flags
 from absl import logging
 
+_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(_SCRIPT_DIR)
+sys.path.append(os.path.dirname(_SCRIPT_DIR))
+sys.path.append(os.path.dirname(os.path.dirname(_SCRIPT_DIR)))
+_DATA_DIR = os.path.dirname(os.path.dirname(os.path.dirname(_SCRIPT_DIR)))
+sys.path.append(_DATA_DIR)
+sys.path.append(os.path.join(_DATA_DIR, 'util'))
+sys.path.append(os.path.join(_DATA_DIR, 'tools', 'statvar_importer'))
+
 import file_util
 from mcf_file_util import add_namespace, strip_namespace, get_node_dcid
 from mcf_file_util import load_mcf_nodes, write_mcf_nodes

From 87b5b93fe5d26f2758ae1e5a61a991be6f6e09bf Mon Sep 17 00:00:00 2001
From: Ajai Tirumali <ajaits@google.com>
Date: Thu, 18 Jun 2026 01:24:07 +0530
Subject: [PATCH 12/12] add script to generate statvar groups

---
 scripts/un/codes/generate_statvar_groups.py   | 294 ++++++++++++++++++
 scripts/un/codes/generate_statvar_name.py     |  15 +-
 .../schema/statvar_dcid_gen.py                |   6 +-
 3 files changed, 310 insertions(+), 5 deletions(-)
 create mode 100644 scripts/un/codes/generate_statvar_groups.py

diff --git a/scripts/un/codes/generate_statvar_groups.py b/scripts/un/codes/generate_statvar_groups.py
new file mode 100644
index 0000000000..4fa599f804
--- /dev/null
+++ b/scripts/un/codes/generate_statvar_groups.py
@@ -0,0 +1,294 @@
+"""Script to generate statvar groups for UN statvars."""
+
+import itertools
+import os
+import re
+import sys
+
+from absl import app
+from absl import flags
+from absl import logging
+
+_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(_SCRIPT_DIR)
+sys.path.append(os.path.dirname(_SCRIPT_DIR))
+sys.path.append(os.path.dirname(os.path.dirname(_SCRIPT_DIR)))
+_DATA_DIR = os.path.dirname(os.path.dirname(os.path.dirname(_SCRIPT_DIR)))
+sys.path.append(_DATA_DIR)
+sys.path.append(os.path.join(_DATA_DIR, 'util'))
+sys.path.append(os.path.join(_DATA_DIR, 'tools', 'statvar_importer'))
+
+import file_util
+from mcf_file_util import add_namespace, strip_namespace, get_node_dcid
+from mcf_file_util import load_mcf_nodes, write_mcf_nodes, add_mcf_node
+
+from config_map import ConfigMap
+from counters import Counters
+
+flags.DEFINE_string('input_statvar_mcf', '', 'MCF files with statvar nodes.')
+flags.DEFINE_string('output_statvar_group_mcf', '',
+                    'Output MCF files for statvar groups.')
+flags.DEFINE_string('input_schema_mcf', '',
+                    'Schema file with names for properties.')
+flags.DEFINE_string('statvar_root', 'dc/g/Root', 'Root for the statvar group.')
+flags.DEFINE_string('statvar_group_prefix', 'custom/g/undata',
+                    'Prefix for the statvar group.')
+flags.DEFINE_string('statvar_dcid_remove_prefix', '',
+                    'Prefix for the statvar group.')
+flags.DEFINE_list('statvar_property_order', ['populationType'],
+                  'Statvar properties ordered by group heirarchy.')
+flags.DEFINE_bool(
+    'statvar_group_permutations', True,
+    'Geneerate statvar groups for all permutations of properties.')
+flags.DEFINE_integer('logging_level', logging.INFO, 'Logging level.')
+
+_FLAGS = flags.FLAGS
+"""Wrapper to generate statvar groups for UN StatVars."""
+
+_DEFAULT_IGNORE_PROP = {
+    'Node': '',
+    'dcid': '',
+    'typeOf': '',
+    'memberOf': '',
+    'footnote': '',
+    'description': '',
+    'name': '',
+    'measuredProperty': 'value',
+    'statType': 'measuredValue',
+}
+
+
+def get_default_statvar_group_config() -> dict:
+    """Returns the default statvar group config."""
+    return {
+        'svg_root': _FLAGS.statvar_root,
+        'svg_prefix': _FLAGS.statvar_group_prefix,
+        'svg_properties': _FLAGS.statvar_property_order,
+        'statvar_dcid_remove_prefix': _FLAGS.statvar_dcid_remove_prefix,
+        'statvar_group_permutations': _FLAGS.statvar_group_permutations,
+    }
+
+
+def to_snake_case(text: str, delim: str = '_', upper: bool = True) -> str:
+    """Returns a string in sentence case."""
+    # convert camelCase
+    sentence = re.sub(r'(?<=[a-z0-9])(?=[A-Z])', delim, text)
+
+    # convert '_' to spaces
+    sentence = re.sub(r'[_ ]+', delim, sentence)
+    sentence = sentence.strip()
+    if upper:
+        return sentence.upper()
+    return sentence
+
+
+def to_quoted(text: str) -> str:
+    """Returns quoted string."""
+    if not text:
+        return text
+    text = text.strip().strip('"').strip().replace('"', "'")
+    if text:
+        return '"' + text + '"'
+    return ''
+
+
+class UNStatVarGroupGenerator:
+
+    def __init__(
+        self,
+        config_dict: dict = {},
+        counters: Counters = None,
+    ):
+        self._config = ConfigMap()
+        self._config.update_config(config_dict)
+        self._counters = counters
+        if counters is None:
+            self._counters = Counters()
+        # dictionary of schema nodes keyed by dcid.
+        self._schema_nodes = {}
+        # dictionary of statvar groups created.
+        self._statvar_groups = {}
+
+    def load_schema_mcf(self, mcf: str) -> dict:
+        """Loads schema nodes from MCF files."""
+        load_mcf_nodes(mcf, nodes=self._schema_nodes)
+        self._counters.add_counter('input-schema-nodes',
+                                   len(self._schema_nodes))
+        return self._schema_nodes
+
+    def get_schema_node(self, dcid: str) -> dict:
+        """Returns a schema node for the dcid."""
+        if not dcid:
+            return None
+        node = self._schema_nodes.get(strip_namespace(dcid))
+        if not node:
+            node = self._schema_nodes.get(add_namespace(dcid))
+        return node
+
+    def get_schema_name(self, dcid: str) -> str:
+        """Returns the name for the dcid fomr the schema."""
+        node = self.get_schema_node(dcid)
+        if not node:
+            return ''
+        name = node.get('alternateName')
+        if not name:
+            name = node.get('name', '')
+        if not name:
+            # convert the dcid to a name string
+            remove_prefix = self._config.get('statvar_dcid_remove_prefix', '')
+            name = re.sub(remove_prefix, '', dcid[dcid.find('/') + 1:])
+            name = to_snake_case(name).capitalize()
+        return name.strip('"').strip()
+
+    def add_statvar_group(self, pvs: dict):
+        """Add a statvar group to schema."""
+        add_mcf_node(pvs, self._schema_nodes)
+        add_mcf_node(pvs, self._statvar_groups)
+
+    def get_statvar_groups(self) -> dict:
+        """Returns the new statvar groups created."""
+        return self._statvar_groups
+
+    def get_statvar_group_node(self, dcid, name, parent) -> dict:
+        return {
+            'Node': add_namespace(dcid),
+            'typeOf': 'dcid:StatVarGroup',
+            'name': to_quoted(name),
+            'specializationOf': add_namespace(parent),
+        }
+
+    def generate_prop_value_svg(self, pvs: dict, grp_props: list,
+                                svg_parent: str, svg_prefix: str):
+        """Generate statvar groups for the property values in the list."""
+        strip_prefix = self._config.get('svg_dcid_remove_prefix', '')
+        depth = 0
+        for prop in grp_props:
+            val = strip_namespace(pvs.get(prop, ''))
+            if not val:
+                continue
+            # Create svg for the property
+            prop_id = re.sub(strip_prefix, '', to_snake_case(prop))
+            svg_dcid = svg_prefix + prop_id
+            svg_name = self.get_schema_name(prop)
+            self.add_statvar_group(
+                self.get_statvar_group_node(svg_dcid, svg_name, svg_parent))
+            depth += 1
+            self._counters.add_counter(
+                f'generated-statvar-groups-depth-{depth}', 1)
+            svg_parent = svg_dcid
+            svg_prefix = svg_dcid + self._config.get(
+                'statvar_dcid_value_delimiter', '--')
+
+            # Generate statvar group for value
+            val_id = re.sub(strip_prefix, '', val)
+            svg_dcid = svg_prefix + val_id
+            svg_name = self.get_schema_name(val)
+            self.add_statvar_group(
+                self.get_statvar_group_node(svg_dcid, svg_name, svg_parent))
+            depth += 1
+            self._counters.add_counter(
+                f'generated-statvar-groups-depth-{depth}', 1)
+            svg_parent = svg_dcid
+            svg_prefix = svg_dcid + self._config.get('statvar_dcid_delimiter',
+                                                     '__')
+        # Add the statvar to the leaf group.
+        sv = {
+            'Node': add_namespace(get_node_dcid(pvs)),
+            'typeOf': 'StatisticalVariable',
+            'memberOf': svg_parent,
+        }
+        self.add_statvar_group(sv)
+        self._counters.add_counter(f'statvar-for-depth-{depth}', 1)
+
+    def generate_groups_for_statvar(self, pvs: dict, svg_parent: str,
+                                    svg_prefix: str):
+        """Generates statvar groups for the hierarchy property:values in the statvar."""
+        self._counters.add_counter('input-statvars', 1)
+        # Get the properties for the group
+        grp_props = dict()
+        for prop, value in pvs.items():
+            prop = strip_namespace(prop)
+            value = strip_namespace(value)
+            ignore_val = strip_namespace(_DEFAULT_IGNORE_PROP.get(prop))
+            if ignore_val is not None:
+                if not ignore_val or ignore_val == value:
+                    continue
+            grp_props.setdefault(prop, value)
+
+        # Get an ordered list of properties to create statvar groups.
+        # Also generate statvar for each set of properties.
+        strip_prefix = self._config.get('svg_dcid_remove_prefix', '')
+        for prop in self._config.get('svg_properties', ['populationType']):
+            val = grp_props.pop(prop, None)
+            if not val:
+                continue
+            val = re.sub(strip_prefix, '', to_snake_case(val))
+            svg_dcid = svg_prefix + val
+            svg_name = self.get_schema_name(val)
+            self.add_statvar_group(
+                self.get_statvar_group_node(svg_dcid, svg_name, svg_parent))
+            self._counters.add_counter(f'generated-statvar-groups-{prop}', 1)
+            svg_parent = svg_dcid
+            svg_prefix = svg_dcid + self._config.get('statvar_dcid_delimiter',
+                                                     '__')
+
+        # Generate statvar group for all permutations of properties.
+        props_perm = sorted(grp_props.keys())
+        if self._config.get('statvar_group_permutations', False):
+            props_perm = list(itertools.permutations(grp_props.keys()))
+        for props_list in props_perm:
+            self.generate_prop_value_svg(pvs, props_list, svg_parent,
+                                         svg_prefix)
+
+    def generate_statvar_groups(self, sv_nodes: dict):
+        """Generate statvar groups for given statvar nodes."""
+        svg_prefix = self._config.get('svg_prefix', 'dc/g/')
+        svg_root = self._config.get('svg_root', 'dc/g/Root')
+        self._counters.add_counter('total', len(sv_nodes))
+        for dcid, pvs in sv_nodes.items():
+            self._counters.add_counter('processed', 1)
+            typ = strip_namespace(pvs.get('typeOf', ''))
+            if typ and typ != 'StatisticalVariable':
+                self._counters.add_counter('input-non-statvar-ignored', 1)
+                continue
+            self.generate_groups_for_statvar(pvs, svg_root, svg_prefix)
+
+        # Make the top SVG a child of root
+        if 'Root' not in svg_root and self._config.get(
+                'generate_statvar_group_root', True):
+            name = to_snake_case(svg_root[svg_root.find('/') + 1:], ' ', False)
+            self.add_statvar_group(
+                self.get_statvar_group_node(svg_root, name, 'dc/g/Root'))
+            self._counters.add_counter(f'generated-statvar-groups-root', 1)
+
+
+def generate_statvar_groups(input_mcf: str,
+                            schema_mcf: str,
+                            output_mcf: str,
+                            config: dict = None):
+    """Generate groups for statvars in input_mcf."""
+    counters = Counters()
+    sv_grp_generator = UNStatVarGroupGenerator(config, counters)
+    sv_grp_generator.load_schema_mcf(schema_mcf)
+
+    statvar_nodes = load_mcf_nodes(input_mcf)
+    logging.info(f'Generating statvar groups for {len(statvar_nodes)} nodes')
+    sv_grp_generator.generate_statvar_groups(statvar_nodes)
+
+    sv_grps = sv_grp_generator.get_statvar_groups()
+    if output_mcf and sv_grps:
+        write_mcf_nodes(sv_grps, output_mcf)
+    counters.add_counter('output-nodes', len(sv_grps))
+
+    counters.print_counters()
+
+
+def main(_):
+    logging.set_verbosity(_FLAGS.logging_level)
+    generate_statvar_groups(_FLAGS.input_statvar_mcf, _FLAGS.input_schema_mcf,
+                            _FLAGS.output_statvar_group_mcf,
+                            get_default_statvar_group_config())
+
+
+if __name__ == '__main__':
+    app.run(main)
diff --git a/scripts/un/codes/generate_statvar_name.py b/scripts/un/codes/generate_statvar_name.py
index 4f6a075b10..60fbdec682 100644
--- a/scripts/un/codes/generate_statvar_name.py
+++ b/scripts/un/codes/generate_statvar_name.py
@@ -28,7 +28,7 @@
 flags.DEFINE_string('output_statvar_mcf', '',
                     'Output MCF files for statvar with names.')
 flags.DEFINE_string('input_schema_mcf', '',
-                    'Schema file with names for propeorties.')
+                    'Schema file with names for properties.')
 flags.DEFINE_integer('logging_level', logging.INFO, 'Logging level.')
 
 _FLAGS = flags.FLAGS
@@ -47,6 +47,14 @@
     'statType': 'measuredValue',
 }
 
+def to_quoted(text: str) -> str:
+    """Returns quoted string."""
+    if not text:
+        return text
+    text = text.strip().strip('"').strip().replace('"', "'")
+    if text:
+        return '"' + text + '"'
+    return ''
 
 def to_sentence_case(text: str) -> str:
     """Returns a string in sentence case."""
@@ -105,13 +113,14 @@ def generate_statvar_name(self, pvs: dict) -> dict:
         if name:
             logging.debug(f'Using existing name for statvar:{name}')
             self._counters.add_counter(f'input-existing-name', 1)
+            pvs['name'] = to_quoted(name)
             return pvs
 
         # Use the name from the schema if it already exists.
         dcid = get_node_dcid(pvs)
         name = self.get_schema_name(dcid)
         if name:
-            pvs['name'] = '"' + name + '"'
+            pvs['name'] = to_quoted(name)
             self._counters.add_counter(f'input-schema-name', 1)
             return pvs
 
@@ -146,7 +155,7 @@ def generate_statvar_name(self, pvs: dict) -> dict:
         if name_suffix:
             self._counters.add_counter(f'generated-statvar-name-contraints', 1)
             name = f'{name} [{name_suffix}]'
-        pvs['name'] = f'"{name}"'
+        pvs['name'] = to_quoted(name)
         self._counters.add_counter(f'generated-statvar-names', 1)
 
 
diff --git a/tools/statvar_importer/schema/statvar_dcid_gen.py b/tools/statvar_importer/schema/statvar_dcid_gen.py
index 32d509950f..ad09b8a1cc 100644
--- a/tools/statvar_importer/schema/statvar_dcid_gen.py
+++ b/tools/statvar_importer/schema/statvar_dcid_gen.py
@@ -92,7 +92,9 @@ def get_dcid_token(word: str,
         token = camel_to_snake(token).upper()
     if remove_prefix:
         token = re.sub(remove_prefix, '', token)
-    return token[0].upper() + token[1:]
+    if token:
+      return token[0].upper() + token[1:]
+    return ''
 
 
 def generate_dcid_for_statvar(pvs: dict,
@@ -187,7 +189,7 @@ def generate_dcid_for_statvar(pvs: dict,
                 value_name = get_dcid_name(prop_value, schema_nodes)
             value_name = get_dcid_token(value_name, upper_case, remove_prefix)
             if val_delim and prop not in fixed_props:
-                prop_name = get_dcid_token(prop, upper_case, remove_prefix)
+                prop_name = get_dcid_token(prop, upper_case)
                 value_name = prop_name + val_delim + value_name
             if upper_case:
                 value_name = value_name.upper()