This repository was archived by the owner on Oct 16, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathDelayParsingUtility.java
More file actions
177 lines (157 loc) · 5.5 KB
/
DelayParsingUtility.java
File metadata and controls
177 lines (157 loc) · 5.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
package utility.delay;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Delay string parser
*/
public class DelayParsingUtility {
// regex for minutes delay identification
private final static String MIN_REGEX = "([0-9]*-*[0-9]+)(m.*)";
// regex for hours delay identification
private final static String HR_REGEX = "([0-9]*-*[0-9]+)(h.*)";
// regex for isolated delay number identification
private final static String ISOLATED_VALUE_REGEX = "([0-9]+$)";
private static Pattern MIN_PATTERN = null;
private static Pattern HOUR_PATTERN = null;
private static Pattern ISOLATED_PATTERN = null;
/**
* Builds a pattern to search for minutes delay
* @return regex pattern to use on a string
*/
private static Pattern getMinPattern() {
// if not already defined, builds it
if (MIN_PATTERN == null) {
MIN_PATTERN = Pattern.compile(MIN_REGEX);
}
return MIN_PATTERN;
}
/**
* Builds a pattern to search for hours delay
* @return regex pattern to use on a string
*/
private static Pattern getHourPattern() {
// if not already defined, builds it
if (HOUR_PATTERN == null) {
HOUR_PATTERN = Pattern.compile(HR_REGEX);
}
return HOUR_PATTERN;
}
/**
* Builds a pattern to search for isolated value delay
* @return regex pattern to use on a string
*/
private static Pattern getIsolatedPattern() {
// if not already defined, builds it
if (ISOLATED_PATTERN == null) {
ISOLATED_PATTERN = Pattern.compile(ISOLATED_VALUE_REGEX);
}
return ISOLATED_PATTERN;
}
/**
* Parse a delay string
* @param dirtyDelay string containing delay to parse
* @return total delay minutes identified from the string as a long integer
* @throws DelayFormatException in case of absence of information in string
*/
public static Double parseDelay(String dirtyDelay) throws DelayFormatException {
double totalMinutes = 0;
boolean foundSomething = false;
DelayInfo current;
String originalString = dirtyDelay;
// cleans the original string removing spaces, symbols and converting it to lower case
dirtyDelay = dirtyDelay.toLowerCase().replaceAll("\\s+", "")
.replaceAll("\\+", "")
.replaceAll("\\.", "")
.replaceAll(",", "")
.replaceAll("--", "")
.replaceAll("\\?", "")
.replaceAll(":", "")
.replaceAll("!", "");
// cleans the original string from conversion errors
dirtyDelay = fixDelay(dirtyDelay);
// in case of - or / evaluate every part separately and returns mean value of the parts
if (dirtyDelay.contains("-") || dirtyDelay.contains("/")) {
String[] parts = dirtyDelay.split("[-/]");
boolean singleAsHours = false;
int counter = 0;
// process delay from right to left for a correct interpretation of delays
for (int i = parts.length - 1; i >= 0; i--) {
current = parseCleanDelay(parts[i], singleAsHours);
if (current != null) {
foundSomething = true;
counter++;
totalMinutes += (current.getHours()*60) + current.getMinutes();
// if there were hours in the current string it will consider next string isolated values as hours
// resilience to strings like 1/2hours, the first isolated value must be considered as hour.
singleAsHours = current.hasHoursData();
}
}
// evaluate mean
totalMinutes = totalMinutes / counter;
} else {
// single value of delay in string
current = parseCleanDelay(dirtyDelay, false);
if (current != null) {
foundSomething = true;
totalMinutes = (current.getHours()*60) + current.getMinutes();
}
}
if (!foundSomething) {
throw new DelayFormatException("Could not find any delay information in string: " + originalString);
} else {
return totalMinutes;
}
}
/**
* Parse a single part of a cleaned delay string
* @param cleanDelay cleaned input string
* @param singleAsHours tells whether to consider isolated values as hours or minutes
* @return delay information
*/
private static DelayInfo parseCleanDelay(String cleanDelay, boolean singleAsHours) {
Long minutes = null;
Long hours = null;
Matcher minMatcher = getMinPattern().matcher(cleanDelay);
Matcher hourMatcher = getHourPattern().matcher(cleanDelay);
Matcher isolatedMatcher = getIsolatedPattern().matcher(cleanDelay);
if (minMatcher.find()) {
// minutes in string
minutes = Long.parseLong(minMatcher.group(1));
}
if (hourMatcher.find()) {
// hours in string
hours = Long.parseLong(hourMatcher.group(1));
}
// isolated value in string; checks for previous translations in the same string to avoid typos
// like 15mins0 (for 15 minutes) or 3hr7 (for 3 hours)
if (isolatedMatcher.find() && ((!singleAsHours && minutes == null) || (singleAsHours && hours == null))) {
if (singleAsHours) {
// isolated value to consider as hour value
hours = Long.parseLong(isolatedMatcher.group(1));
} else {
// isolated value to consider as minute value
minutes = Long.parseLong(isolatedMatcher.group(1));
}
}
if (hours == null && minutes == null) {
return null;
} else {
return new DelayInfo(hours == null ? 0 : hours, minutes == null ? 0 : minutes, hours != null);
}
}
/**
* This function is used to solve conversion errors in the dataset
* @param unfixedDelay original delay string
* @return fixed string or the original one if no problem was found
*/
private static String fixDelay(String unfixedDelay) {
String month;
for (DelayFixes toFix : DelayFixes.values()) {
month = toFix.name();
if (unfixedDelay.contains(month)) {
unfixedDelay = unfixedDelay.replaceAll(month, toFix.getText());
}
}
return unfixedDelay;
}
}