aviadrom commited on
Commit
5178dd6
1 Parent(s): f99916c

Create preprocessing.py

Browse files
Files changed (1) hide show
  1. preprocessing.py +45 -0
preprocessing.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARABIC_TO_HEBREW_LETTER_MAP = {
2
+ "ا": "א",
3
+ "ب": "ב",
4
+ "ج": "ג׳",
5
+ "غ": "ג",
6
+ "د": "ד",
7
+ "ذ": "דֿ",
8
+ "ه": "ה",
9
+ "ة": "ה׳",
10
+ "و": "ו",
11
+ "ز": "ז",
12
+ "ح": "ח",
13
+ "ط": "ט",
14
+ "ظ": "ט׳",
15
+ "ي": "י",
16
+ "ك": "כ",
17
+ "خ": "כ׳",
18
+ "ل": "ל",
19
+ "م": "מ",
20
+ "ن": "נ",
21
+ "س": "ס",
22
+ "ع": "ע",
23
+ "ف": "פ",
24
+ "ص": "צ",
25
+ "ض": "צ׳",
26
+ "ق": "ק",
27
+ "ر": "ר",
28
+ "ش": "ש",
29
+ "ت": "ת",
30
+ "ث": "ת׳",
31
+ "ء": "א",
32
+ "ئ": "י",
33
+ "ؤ": "ו",
34
+ "ى": "א",
35
+ "؟": "?",
36
+ "إ": "א",
37
+ "آ": "א",
38
+ "أ": "א",
39
+ }
40
+
41
+
42
+ def transliterate_arabic_to_hebrew(text):
43
+ assert isinstance(text, str), "Cannot transliterate non-string values"
44
+ result_chars = [ARABIC_TO_HEBREW_LETTER_MAP.get(c, c) for c in text]
45
+ return "".join(result_chars)