Skip to content

Commit 790c9c6

Browse files
committed
Fix boundary marks in Arabic.
1 parent 094deb3 commit 790c9c6

File tree

1 file changed

+35
-33
lines changed

1 file changed

+35
-33
lines changed

scriptshifter/tables/data/arabic.yml

Lines changed: 35 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ roman_to_script:
2121
# Additional info from R. Vassie, [n.d.] "Marrying the Arabic and Latin
2222
# Scripts Conceptually"
2323
# Updated, 26 March 2025 by Randall K. Barry to reverse truncation marks for ScriptShifter
24+
# Updates, 26 November 2025 by Randall K. Barry to reverse truncation marks again
25+
# xxxx% for end of string, %xxxx for beginning of string
2426

2527

2628
# Punctuation marks:
@@ -137,15 +139,15 @@ roman_to_script:
137139
"ka-": "\u0643"
138140

139141
# Vowels and vowel/consonant combinations - ta-marbutah at end of word
140-
"%ah": "\u0629"
141-
"%at": "\u0629"
142+
"ah%": "\u0629"
143+
"at%": "\u0629"
142144

143145
# tanwin at end of word
144-
"%an": "\u0627"
146+
"an%": "\u0627"
145147

146148
# ayn-alif combo
147-
"%\u02BBa\u0304\u02BE": "\u0639\u0627\u0621"
148-
"%\u02BBa\u0304\u02BC": "\u0639\u0627\u0621"
149+
"\u02BBa\u0304\u02BE%": "\u0639\u0627\u0621"
150+
"\u02BBa\u0304\u02BC%": "\u0639\u0627\u0621"
149151

150152
"\u02BBA\u0304": "\u0639\u0627"
151153
"\u02BBa\u0304": "\u0639\u0627"
@@ -160,24 +162,24 @@ roman_to_script:
160162
"\u02BBU": "\u0639"
161163
"\u02BBu": "\u0639"
162164

163-
"\u02BBA%": "\u0639"
165+
"%\u02BBA": "\u0639"
164166
# "\u02BBa%": "\u0639"
165167

166168
# alif and hamzas for all occasions
167169

168170
# truncation necessary? It seems to work fine with.
169171

170-
"%i\u0304\u02BEah": "\u064A\u0626\u0629"
171-
"%i\u0304\u02BCah": "\u064A\u0626\u0629"
172+
"i\u0304\u02BEah%": "\u064A\u0626\u0629"
173+
"i\u0304\u02BCah%": "\u064A\u0626\u0629"
172174

173-
"%i\u0304\u02BEat": "\u064A\u0626\u0629"
174-
"%i\u0304\u02BCat": "\u064A\u0626\u0629"
175+
"i\u0304\u02BEat%": "\u064A\u0626\u0629"
176+
"i\u0304\u02BCat%": "\u064A\u0626\u0629"
175177

176-
"%i\u02BEa\u0304": "\u0626\u0627"
177-
"%i\u02BCa\u0304": "\u0626\u0627"
178+
"i\u02BEa\u0304%": "\u0626\u0627"
179+
"i\u02BCa\u0304%": "\u0626\u0627"
178180

179-
"%i\u02BE": "\u0626"
180-
"%i\u02BC": "\u0626"
181+
"i\u02BE": "\u0626%"
182+
"i\u02BC": "\u0626%"
181183
"a\u0304\u02BEa\u0304": "\u0627\u0621\u0627"
182184
"a\u0304\u02BCa\u0304": "\u0627\u0621\u0627"
183185

@@ -205,34 +207,34 @@ roman_to_script:
205207
"a\u0304\u02BEi": "\u0627\u0626"
206208
"a\u0304\u02BC": "\u0627\u0621"
207209
"a\u0304\u02BE": "\u0627\u0621"
208-
"A\u0304%": "\u0622"
209-
"a\u0304%": "\u0622"
210+
"%A\u0304": "\u0622"
211+
"%a\u0304": "\u0622"
210212
"A\u0304": "\u0627"
211213
"a\u0304": "\u0627"
212214

213215
# These next two lines were intended to convert to alif-ayn when it is at
214216
# # the beginning of a word, definite or indefinine (i.e.
215217
# al-a[ayn]ma[macron]l or [space]a[ayn]ma[macron]l"
216-
"A\u02BB%": "\u0623\u0639"
217-
"a\u02BB%": "\u0623\u0639"
218+
"%A\u02BB": "\u0623\u0639"
219+
"%a\u02BB": "\u0623\u0639"
218220
"a\u02BB": "\u0639"
219221
"A\u0301": "\u0649"
220222
"a\u0301": "\u0649"
221223

222224
"ayy": "\u064A"
223-
"A%": "\u0623"
224-
"a%": "\u0627"
225+
"%A": "\u0623"
226+
"%a": "\u0627"
225227
"A": "\u0623"
226228
"a": ""
227229

228230
# I - Capital I at beginning of word is usually alif hamzah-below.
229231

230-
"i\u0304%": "\u064A"
232+
"%i\u0304": "\u064A"
231233
"i\u0304y": "\u064A"
232234
"iy": "\u064A"
233-
"I\u0304%": "\u0625\u064A"
235+
"%I\u0304": "\u0625\u064A"
234236
"i\u0304": "\u064A"
235-
"\u02BBI%": "\u0639"
237+
"%\u02BBI": "\u0639"
236238

237239
# "i\u02BB": "\u0625\u0639"
238240

@@ -241,26 +243,26 @@ roman_to_script:
241243
"i\u02BE": "\u0626"
242244
"i\u02BC": "\u0627\u0626"
243245

244-
"I%": "\u0625"
245-
"i%": "\u0625"
246+
"%I": "\u0625"
247+
"%i": "\u0625"
246248
"I": "\u0625"
247249
"i": ""
248250

249251
# U
250252

251253
"u\u0304\u02BE": "\u0624"
252254
"u\u0304\u02BC": "\u0624"
253-
"U\u0304w%": "\u0623\u0648"
254-
"u\u0304w%": "\u0623\u0648"
255-
"U\u0304%": "\u0623\u0648"
256-
"u\u0304%": "\u0623\u0648"
255+
"%U\u0304w": "\u0623\u0648"
256+
"%u\u0304w": "\u0623\u0648"
257+
"%U\u0304": "\u0623\u0648"
258+
"%u\u0304": "\u0623\u0648"
257259
"u\u0304w": "\u0648"
258260
"u\u0304": "\u0648"
259261
"u\u02BE": "\u0624"
260262
"u\u02BC": "\u0624"
261263

262-
"U%": "\u0623"
263-
"u%": "\u0623"
264+
"%U": "\u0623"
265+
"%u": "\u0623"
264266
"U": "\u0623"
265267
"u": ""
266268

@@ -361,8 +363,8 @@ roman_to_script:
361363
# hamza - not romanized
362364
# "\u0621"
363365
# hamza (alone in final position)
364-
"%\u02BE": "\u0621"
365-
"%\u02BC": "\u0621"
366+
"\u02BE%": "\u0621"
367+
"\u02BC%": "\u0621"
366368

367369
# Do not know what, if anything, is needed here:
368370
# tatweel:

0 commit comments

Comments
 (0)