pyscf · sunqm · Mar 10, 2026 · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026
diff --git a/gpu4pyscf/_patch_pyscf.py b/gpu4pyscf/_patch_pyscf.py
@@ -252,22 +252,18 @@ def to_gpu(method, out=None):
 
             from importlib import import_module
             mod = import_module(method.__module__.replace('pyscf', 'gpu4pyscf'))
-            try:
-                cls = getattr(mod, method.__class__.__name__)
-            except AttributeError:
-                if hasattr(cls, 'from_cpu'):
-                    # the customized to_gpu function can be accessed at module
-                    # levelin gpu4pyscf.
-                    return cls.from_cpu(method)
-                raise
-
-            # Allow gpu4pyscf to customize the to_gpu method for PySCF classes.
             if hasattr(mod, 'from_cpu'):
+                # the customized to_gpu function can be accessed at module
+                # levelin gpu4pyscf.
                 return mod.from_cpu(method)
 
             # A temporary GPU instance. This ensures to initialize private
             # attributes that are only available for GPU code.
             cls = getattr(mod, method.__class__.__name__)
+            # Allow gpu4pyscf to customize the to_gpu method for PySCF classes.
+            if hasattr(cls, 'from_cpu'):
+                return cls.from_cpu(method)
+
             out = method.view(cls)
 
         elif hasattr(out, 'from_cpu'):

diff --git a/gpu4pyscf/pbc/df/aft.py b/gpu4pyscf/pbc/df/aft.py
@@ -118,13 +118,15 @@ class AFTDFMixin:
 
     pw_loop = NotImplemented
 
-    def weighted_coulG(mydf, kpt=None, exx=None, mesh=None, omega=None, kpts=None):
+    def weighted_coulG(mydf, kpt=None, exx=None, mesh=None, omega=None, kmesh=None):
         '''Weighted regular Coulomb kernel'''
         cell = mydf.cell
         if mesh is None:
             mesh = mydf.mesh
         Gv, Gvbase, kws = cell.get_Gv_weights(mesh)
-        coulG = get_coulG(cell, kpt, exx, mesh=mesh, Gv=Gv, omega=omega, kpts=kpts)
+        if kmesh is not None:
+            mydf = None
+        coulG = get_coulG(cell, kpt, exx, mesh=mesh, Gv=Gv, omega=omega, kmesh=kmesh)
         coulG *= kws
         return coulG
 

diff --git a/gpu4pyscf/pbc/df/aft_jk.py b/gpu4pyscf/pbc/df/aft_jk.py
@@ -31,6 +31,7 @@
 from gpu4pyscf.pbc.tools.k2gamma import kpts_to_kmesh
 from gpu4pyscf.pbc.df.ft_ao import FTOpt, libpbc
 from gpu4pyscf.pbc.df.fft_jk import _format_dms, _format_jks, _ewald_exxdiv_for_G0
+from gpu4pyscf.pbc.tools import madelung
 from gpu4pyscf.lib.cupy_helper import contract, get_avail_mem, asarray
 from gpu4pyscf.lib import logger
 from gpu4pyscf.scf.jk import SHM_SIZE
@@ -216,7 +217,7 @@ def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=None, kpts_band=None,
     log.debug1('Gblksize = %d', Gblksize)
 
     for group_id, (kpt, ki_idx, kj_idx, self_conj) in enumerate(kpt_iters):
-        vkcoulG = mydf.weighted_coulG(kpt, exxdiv, mesh, kpts=kpts) * weight
+        vkcoulG = mydf.weighted_coulG(kpt, exxdiv, mesh, kmesh=bvk_kmesh) * weight
         for p0, p1 in lib.prange(0, ngrids, Gblksize):
             log.debug3('update_vk [%s:%s]', p0, p1)
             Gpq = ft_kern(Gv[p0:p1], kpt, kpts, kj_idx)
@@ -461,7 +462,7 @@ def get_ek_ip1(mydf, dm, kpts=None, exxdiv=None):
     ek = cp.zeros((cell.natm, 3))
     for group_id, (kp, kp_conj, ki_idx, kj_idx) in enumerate(bvk_kk_adapted_iter(kmesh)):
         kpt = kpts[kp]
-        wcoulG = mydf.weighted_coulG(kpt, exxdiv, mydf.mesh, kpts=kpts)
+        wcoulG = mydf.weighted_coulG(kpt, exxdiv, mydf.mesh, kmesh=kmesh)
         swap_2e = kp != kp_conj
         for p0, p1 in lib.prange(0, ngrids, blksize):
             nGv = p1 - p0
@@ -829,27 +830,24 @@ def get_ek_strain_deriv(mydf, dm, kpts=None, exxdiv=None, omega=None):
     if (exxdiv == 'ewald' and
         (cell.dimension == 3 or
          (cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum'))):
-        from pyscf.pbc.tools.pbc import madelung
         from gpu4pyscf.pbc.gto import int1e
         cell = mydf.cell
         s0 = int1e.int1e_ovlp(cell, kpts, kmesh)
         k_dm = contract('nkpq,kqr->nkpr', dm0, s0)
         k_dm = contract('nkpr,nkrs->kps', k_dm, dm0)
         ek_G0 = .5 * cp.einsum('kij,kji->', s0, k_dm).real.get() / nkpts**2
 
-        scaled_kpts = kpts.dot(cell.lattice_vectors().T)
         ewald_G0 = np.empty((3,3))
         disp = max(1e-5, (cell.precision*.1)**.5)
         for i in range(3):
             for j in range(i+1):
                 cell1, cell2 = rks_stress._finite_diff_cells(cell, i, j, disp)
-                kpts1 = scaled_kpts.dot(cell1.reciprocal_vectors(norm_to=1))
-                kpts2 = scaled_kpts.dot(cell2.reciprocal_vectors(norm_to=1))
-                e1 = nkpts * madelung(cell1, kpts1, omega=omega)
-                e2 = nkpts * madelung(cell2, kpts2, omega=omega)
+                e1 = nkpts * madelung(cell1, omega=omega, kmesh=kmesh)
+                e2 = nkpts * madelung(cell2, omega=omega, kmesh=kmesh)
                 ewald_G0[j,i] = ewald_G0[i,j] = (e1-e2)/(2*disp)
         ewald_G0 *= ek_G0
-        ewald_G0 += int1e.ovlp_strain_deriv(cell, k_dm, kpts) * madelung(cell, kpts, omega=omega)
+        ovlp_strain = int1e.ovlp_strain_deriv(cell, k_dm, kpts)
+        ewald_G0 += ovlp_strain * madelung(cell, omega=omega, kmesh=kmesh)
         sigma += ewald_G0
 
     log.timer_debug1('get_ek_ip1', *cpu0)
@@ -929,7 +927,7 @@ def get_jk(mydf, dm, hermi=1, kpt=np.zeros(3),
         if (exxdiv == 'ewald' and
             (cell.dimension < 2 or  # 0D and 1D are computed with inf_vacuum
              (cell.dimension == 2 and cell.low_dim_ft_type == 'inf_vacuum'))):
-            _ewald_exxdiv_for_G0(cell, kpt, dms, vk)
+            _ewald_exxdiv_for_G0(cell, None, dms, vk)
         if k_real:
             vk = vk.real
         vk = vk.reshape(dm.shape)

diff --git a/gpu4pyscf/pbc/df/fft_jk.py b/gpu4pyscf/pbc/df/fft_jk.py
@@ -29,6 +29,7 @@
 from gpu4pyscf.lib import logger
 from gpu4pyscf.lib.cupy_helper import contract
 from gpu4pyscf.pbc import tools
+from gpu4pyscf.pbc.tools.k2gamma import kpts_to_kmesh
 
 def get_j_kpts(mydf, dm_kpts, hermi=1, kpts=np.zeros((1,3)), kpts_band=None):
     '''Get the Coulomb (J) AO matrix at sampled k-points.
@@ -92,7 +93,7 @@ def get_j_kpts(mydf, dm_kpts, hermi=1, kpts=np.zeros((1,3)), kpts_band=None):
 
     return _format_jks(vj_kpts, dm_kpts, input_band, kpts)
 
-def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=np.zeros((1,3)), kpts_band=None,
+def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=None, kpts_band=None,
                exxdiv=None):
     '''Get the Coulomb (J) and exchange (K) AO matrices at sampled k-points.
 
@@ -130,7 +131,13 @@ def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=np.zeros((1,3)), kpts_band=None,
         mo_coeff = None
 
     ni = mydf._numint
-    kpts = np.asarray(kpts)
+    if kpts is None:
+        kpts = np.zeros((1, 3))
+        kmesh = [1] * 3
+    else:
+        kpts = np.asarray(kpts).reshape(-1, 3)
+        kmesh = kpts_to_kmesh(cell, kpts)
+
     dm_kpts = cp.asarray(dm_kpts, order='C')
     dms = _format_dms(dm_kpts, kpts)
     nset, nkpts, nao = dms.shape[:3]
@@ -174,7 +181,7 @@ def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=np.zeros((1,3)), kpts_band=None,
         for k1, ao1 in enumerate(ao1_kpts):
             ao1T = ao1.T
             kpt1 = kpts_band[k1]
-            coulG = tools.get_coulG(cell, kpt2-kpt1, exxdiv, mesh, kpts=kpts)
+            coulG = tools.get_coulG(cell, kpt2-kpt1, exxdiv, mesh, kmesh=kmesh)
             if is_zero(kpt1-kpt2):
                 expmikr = cp.array(1.)
             else:
@@ -357,14 +364,17 @@ def get_j_e1_kpts(mydf, dm_kpts, kpts=None):
         ej = ej[0]
     return ej
 
-def get_k_e1_kpts(mydf, dm_kpts, kpts=np.zeros((1,3)), exxdiv=None):
+def get_k_e1_kpts(mydf, dm_kpts, kpts=None, exxdiv=None):
     raise NotImplementedError
 
 def _ewald_exxdiv_for_G0(cell, kpts, dms, vk, kpts_band=None):
-    from pyscf.pbc.tools.pbc import madelung
     from gpu4pyscf.pbc.gto.int1e import int1e_ovlp
     s = int1e_ovlp(cell, kpts=kpts)
-    m = madelung(cell, kpts)
+    if kpts is None:
+        kmesh = [1] * 3
+    else:
+        kmesh = kpts_to_kmesh(cell, kpts)
+    m = tools.madelung(cell, kmesh=kmesh)
     if kpts is None:
         for i,dm in enumerate(dms):
             vk[i] += m * s.dot(dm).dot(s)

diff --git a/gpu4pyscf/pbc/df/grad/krhf.py b/gpu4pyscf/pbc/df/grad/krhf.py
@@ -565,9 +565,9 @@ def _jk_energy_per_atom(int3c2e_opt, dm, kpts=None, hermi=0, j_factor=1., k_fact
         # cell.omega is not 0, which can lead to incorrect correction for
         # full-range ewald probe charge correction.
         if with_long_range:
-            weighted_coulG_at_G0 = madelung(cell, kpts, omega=0)
+            weighted_coulG_at_G0 = madelung(cell, omega=0, kmesh=bvk_kmesh)
         else:
-            weighted_coulG_at_G0 = madelung(cell, kpts, omega=-omega)
+            weighted_coulG_at_G0 = madelung(cell, omega=-omega, kmesh=bvk_kmesh)
         # The k_factor was previously scaled by 1/nkpts^2. The ewald term
         # requires a factor of 1/nkpts. Rescale k_factor by nkpts
         k_factor *= nkpts

diff --git a/gpu4pyscf/pbc/df/grad/kuhf.py b/gpu4pyscf/pbc/df/grad/kuhf.py
@@ -496,9 +496,9 @@ def _jk_energy_per_atom(int3c2e_opt, dm, kpts=None, hermi=0, j_factor=1., k_fact
         # cell.omega is not 0, which can lead to incorrect correction for
         # full-range ewald probe charge correction.
         if with_long_range:
-            weighted_coulG_at_G0 = madelung(cell, kpts, omega=0)
+            weighted_coulG_at_G0 = madelung(cell, omega=0, kmesh=bvk_kmesh)
         else:
-            weighted_coulG_at_G0 = madelung(cell, kpts, omega=-omega)
+            weighted_coulG_at_G0 = madelung(cell, omega=-omega, kmesh=bvk_kmesh)
         # Note the additional minus sign for nabla_A ovlp = -nabla ovlp
         ejk_ewald *= k_factor*nkpts * weighted_coulG_at_G0
         ejk += ejk_ewald

diff --git a/gpu4pyscf/pbc/df/grad/rhf.py b/gpu4pyscf/pbc/df/grad/rhf.py
@@ -411,9 +411,9 @@ def _jk_energy_per_atom(int3c2e_opt, dm, hermi=0, j_factor=1., k_factor=1.,
         # cell.omega is not 0, which can lead to incorrect correction for
         # full-range ewald probe charge correction.
         if with_long_range:
-            weighted_coulG_at_G0 = madelung(cell, np.zeros((1, 3)), omega=0)
+            weighted_coulG_at_G0 = madelung(cell, omega=0)
         else:
-            weighted_coulG_at_G0 = madelung(cell, np.zeros((1, 3)), omega=-omega)
+            weighted_coulG_at_G0 = madelung(cell, omega=-omega)
         # Note the additional minus sign for nabla_A ovlp = -nabla ovlp
         ejk_ewald *= .5 * k_factor * weighted_coulG_at_G0
         ejk += ejk_ewald

diff --git a/gpu4pyscf/pbc/df/grad/uhf.py b/gpu4pyscf/pbc/df/grad/uhf.py
@@ -400,9 +400,9 @@ def _jk_energy_per_atom(int3c2e_opt, dm, hermi=0, j_factor=1., k_factor=1.,
         # cell.omega is not 0, which can lead to incorrect correction for
         # full-range ewald probe charge correction.
         if with_long_range:
-            weighted_coulG_at_G0 = madelung(cell, np.zeros((1, 3)), omega=0)
+            weighted_coulG_at_G0 = madelung(cell, omega=0)
         else:
-            weighted_coulG_at_G0 = madelung(cell, np.zeros((1, 3)), omega=-omega)
+            weighted_coulG_at_G0 = madelung(cell, omega=-omega)
         # Note the additional minus sign for nabla_A ovlp = -nabla ovlp
         ejk_ewald *= k_factor * weighted_coulG_at_G0
         ejk += ejk_ewald

diff --git a/gpu4pyscf/pbc/df/tests/test_pbc_aft.py b/gpu4pyscf/pbc/df/tests/test_pbc_aft.py
@@ -355,7 +355,7 @@ def test_ek_ip1_gamma_point(self):
             for i in range(cell.natm):
                 p0, p1 = aoslices[i, 2:]
                 ref[i] = np.einsum('xnpq,nqp->x', vk[:,:,p0:p1], dm[:,:,p0:p1])
-            assert abs(ek_ewald - ref).max() < 1e-8
+            assert abs(ek_ewald - ref).max() < 3e-8
 
     @unittest.skipIf(num_devices > 1, '')
     def test_ek_ip1_kpts(self):
@@ -383,7 +383,7 @@ def test_ek_ip1_kpts(self):
 
         cell.precision = 1e-10
         cell.build(0, 0)
-        myfft = fft_cpu.FFTDF(cell)
+        myfft = fft_cpu.FFTDF(cell, kpts=kpts)
         vk = myfft.get_k_e1(dm, kpts=kpts)
         aoslices = cell.aoslice_by_atom()
         ref = np.empty((cell.natm, 3))

diff --git a/gpu4pyscf/pbc/scf/hf.py b/gpu4pyscf/pbc/scf/hf.py
@@ -193,7 +193,7 @@ def dump_flags(self, verbose=None):
         cell = self.cell
         if ((cell.dimension >= 2 and cell.low_dim_ft_type != 'inf_vacuum') and
             isinstance(self.exxdiv, str) and self.exxdiv.lower() == 'ewald'):
-            madelung = tools.pbc.madelung(cell, self.kpt[None])
+            madelung = tools.pbc.madelung(cell)
             log.info('    madelung (= occupied orbital energy shift) = %s', madelung)
             log.info('    Total energy shift due to Ewald probe charge'
                      ' = -1/2 * Nelec*madelung = %.12g',

diff --git a/gpu4pyscf/pbc/scf/j_engine.py b/gpu4pyscf/pbc/scf/j_engine.py
@@ -410,7 +410,7 @@ def get_j(self, dm, hermi=0, kpts=None, kpts_band=None, verbose=None):
         vj += self._get_j_lr(dm, hermi, kpts, kpts_band, verbose=verbose)
         return vj
 
-    def weighted_coulG(self, kpt=None, exx=None, mesh=None, omega=None, kpts=None):
+    def weighted_coulG(self, kpt=None, exx=None, mesh=None, omega=None, **kwargs):
         '''weighted LR Coulomb kernel. Mimic AFTDF.weighted_coulG'''
         if mesh is None:
             mesh = self.mesh

diff --git a/gpu4pyscf/pbc/scf/rsjk.py b/gpu4pyscf/pbc/scf/rsjk.py
@@ -41,7 +41,8 @@
 from gpu4pyscf.pbc.df.fft import _check_kpts
 from gpu4pyscf.pbc.df.fft_jk import _format_dms
 from gpu4pyscf.pbc.dft.multigrid_v2 import _unique_image_pair
-from gpu4pyscf.pbc.tools.pbc import get_coulG, probe_charge_sr_coulomb
+from gpu4pyscf.pbc.tools.pbc import get_coulG, probe_charge_sr_coulomb, madelung
+from gpu4pyscf.pbc.tools.k2gamma import kpts_to_kmesh
 from gpu4pyscf.grad.rhf import _ejk_quartets_scheme
 from gpu4pyscf.pbc.gto import int1e
 
@@ -227,8 +228,10 @@ def _get_k_sr(self, dm, hermi, kpts=None, kpts_band=None, exxdiv=None, verbose=N
 
         if kpts is None:
             kpts = np.zeros((1, 3))
+            kmesh = [1] * 3
         else:
             kpts = kpts.reshape(-1, 3)
+            kmesh = kpts_to_kmesh(cell, kpts, bound_by_supmol=True)
         is_gamma_point = is_zero(kpts)
         if is_gamma_point:
             assert dms.dtype == np.float64
@@ -406,7 +409,7 @@ def proc(dms, dm_cond):
                 # This term rapidly decays to 0 for large k-mesh. In the
                 # FFTDF.get_jk based implementation, this contribution is
                 # included in the short-range part.
-                wcoulG_SR_at_G0 = probe_charge_sr_coulomb(cell, omega, kpts)
+                wcoulG_SR_at_G0 = probe_charge_sr_coulomb(cell, omega, kmesh)
             else:
                 # Remove the G=0 contribution to match the output of FFTDF.get_jk().
                 wcoulG_SR_at_G0 = np.pi / omega**2 / cell.vol
@@ -435,33 +438,34 @@ def _get_k_lr(self, dm, hermi, kpts=None, kpts_band=None, exxdiv=None,
             kpts = kpts[0]
         return get_k_kpts(self, dm, hermi, kpts, kpts_band, exxdiv=exxdiv)
 
-    def weighted_coulG(self, kpt=None, exx=None, mesh=None, omega=None, kpts=None):
+    def weighted_coulG(self, kpt=None, exx=None, mesh=None, omega=None,
+                       kmesh=None):
         '''weighted LR Coulomb kernel. Mimic AFTDF.weighted_coulG'''
         if mesh is None:
             mesh = self.mesh
         cell = self.cell
         omega = self.omega
         Gv, Gvbase, kws = cell.get_Gv_weights(mesh)
         coulG = get_coulG(cell, kpt, exx=None, mesh=mesh, Gv=Gv,
-                          wrap_around=True, omega=omega, kpts=kpts)
+                          wrap_around=True, omega=omega, kmesh=kmesh)
         coulG *= kws
         if kpt is None or not is_zero(kpt):
             return coulG
 
         if exx == 'ewald':
-            Nk = len(kpts)
+            Nk = np.prod(kmesh)
             # In the full-range Coulomb, the ewald correction for get_k_lr is
-            #     +Nk*pbctools.madelung(cell, kpts) - np.pi / omega**2 * kws - probe_charge_sr_coulomb
+            #     +Nk*madelung(cell, kmesh) - np.pi / omega**2 * kws - probe_charge_sr_coulomb
             # The last two terms are included in the get_k_sr. The second term
             # (np.pi/omega**2) removes the contribution of the SR integrals at G=0.
             #
-            # pbctools.madelung(cell, kpts) includes three terms: -2*ewovrl, -2*ewself and -2*ewg.
+            # madelung(cell, kmesh) includes three terms: -2*ewovrl, -2*ewself and -2*ewg.
             # ewself is the sum of ewself_lr_point_charge and ewself_sr_at_G0.
-            # This correction is identical to madelung(cell, kpts, omega=omega),
+            # This correction is identical to madelung(cell, omega, kmesh),
             # which gives -2*(ewself_lr_point_charges + ewg) .
             # ewself_sr_at_G0 in ewovrl cancels out the second term (np.pi/omega**2);
             # -2*ewovrl cancels out the last term (probe_charge_sr_coulomb).
-            coulG[0] += Nk*pbctools.madelung(cell, kpts, omega=omega)
+            coulG[0] += Nk*madelung(cell, omega=omega, kmesh=kmesh)
         return coulG
 
     def _get_ejk_sr_ip1(self, dm, kpts=None, exxdiv=None,
@@ -716,8 +720,10 @@ def _get_ejk_sr_strain_deriv(self, dm, kpts=None, exxdiv=None,
 
         if kpts is None:
             kpts = np.zeros((1, 3))
+            kmesh = [1] * 3
         else:
             kpts = kpts.reshape(-1, 3)
+            kmesh = kpts_to_kmesh(cell, kpts, bound_by_supmol=True)
         is_gamma_point = is_zero(kpts)
         if is_gamma_point:
             assert dms.dtype == np.float64
@@ -906,18 +912,15 @@ def proc(dms, dm_cond):
             sigma += ej_G0 * np.eye(3)
             sigma -= wcoulG_SR_at_G0 * ek_G0 * np.eye(3)
             if exxdiv == 'ewald':
-                from pyscf.pbc.tools.pbc import madelung
                 from gpu4pyscf.pbc.grad.rks_stress import _finite_diff_cells
                 scaled_kpts = kpts.dot(cell.lattice_vectors().T)
                 ewald_G0_response = np.empty((3,3))
                 disp = max(1e-5, (cell.precision*.1)**.5)
                 for i in range(3):
                     for j in range(i+1):
                         cell1, cell2 = _finite_diff_cells(cell, i, j, disp)
-                        kpts1 = scaled_kpts.dot(cell1.reciprocal_vectors(norm_to=1))
-                        kpts2 = scaled_kpts.dot(cell2.reciprocal_vectors(norm_to=1))
-                        e1 = nkpts * madelung(cell1, kpts1, omega=-omega)
-                        e2 = nkpts * madelung(cell2, kpts2, omega=-omega)
+                        e1 = nkpts * madelung(cell1, omega=-omega, kmesh=kmesh)
+                        e2 = nkpts * madelung(cell2, omega=-omega, kmesh=kmesh)
                         ewald_G0_response[j,i] = ewald_G0_response[i,j] = (e1-e2)/(2*disp)
                 ewald_G0_response *= ek_G0
                 sigma -= ewald_G0_response