@@ -13,10 +13,14 @@ def parse_whatsapp_chat(file):
13
13
timestamp = match .group (1 ).strip ('[]' )
14
14
sender = match .group (2 )
15
15
message = match .group (3 )
16
- # Check if the message is not a placeholder for media
16
+
17
+ # Check if the message is not a placeholder for media; might need to additional strings for stickers, calls, etc.
17
18
if message not in ["audio omitted" , "image omitted" , "video omitted" ]:
18
-
19
- timestamp = timestamp .replace (' a.m.' , ' AM' ).replace (' p.m.' , ' PM' ).replace ('a.m.' , ' AM' ).replace ('p.m.' , ' PM' )
19
+
20
+ # Normalize different AM/PM representations
21
+ timestamp = timestamp .replace (' a.m.' , ' AM' ).replace (' p.m.' , ' PM' )
22
+ timestamp = timestamp .replace ('a. m.' , ' AM' ).replace ('p. m.' , ' PM' )
23
+ timestamp = timestamp .replace ('a.m.' , ' AM' ).replace ('p.m.' , ' PM' )
20
24
21
25
# List of possible date formats to handle various cases
22
26
date_formats = [
@@ -27,18 +31,27 @@ def parse_whatsapp_chat(file):
27
31
'%d/%m/%y, %I:%M:%S %p' , # 12/08/24, 08:57:27 AM
28
32
'%d/%m/%y, %H:%M:%S' , # 23/05/24, 21:44:49 (24-hour format)
29
33
'%d/%m/%Y, %H:%M:%S' , # 23/05/2024, 21:44:49 (24-hour format with full year)
30
- '%m/%d/%Y, %I:%M:%S %p' # 08/12/2024, 8:57:27 PM
34
+ '%m/%d/%Y, %I:%M:%S %p' , # 08/12/2024, 8:57:27 PM
35
+ '%H:%M, %d/%m/%Y' , # 10:03, 12/3/2024
36
+ '%H:%M, %m/%d/%Y' , # 10:03, 3/12/2024 (US format)
37
+ '%H:%M, %d/%m/%y' , # 10:03, 12/3/24 (short year format)
38
+ '%H:%M, %m/%d/%y' , # 10:03, 3/12/24 (short year, US format)
39
+ '%I:%M %p, %d/%m/%Y' , # 0:28 PM, 22/8/2022 (Handling AM/PM format with day/month/year)
40
+ '%I:%M %p, %m/%d/%Y' , # 0:28 PM, 8/22/2022 (US format)
41
+ '%I:%M %p, %d/%m/%y' , # 0:28 PM, 22/8/22 (short year format)
42
+ '%I:%M %p, %m/%d/%y' , # 0:28 PM, 8/22/22 (short year, US format)
43
+ '%I:%M %p, %d/%m/%Y' , # 0:28 p.m., 22/8/2022
31
44
]
32
45
33
46
for date_format in date_formats :
34
47
try :
35
48
date_time_obj = datetime .strptime (timestamp , date_format )
36
49
break
37
50
except ValueError :
38
- continue
39
- else :
40
- # If no format matched, raise an error or handle it accordingly
41
- raise ValueError ( f"Timestamp format not recognized: { timestamp } " )
51
+ date_time_obj = None
52
+
53
+ if date_time_obj is None :
54
+ continue
42
55
43
56
data .append ([date_time_obj , sender , message ])
44
57
0 commit comments