You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Found during work when using BEX to parse our JSP files. Below is the fix I found. I'll also add some unit tests
public static ImmutableIntRangeMap<ParsingState> parseJSPTextStates(final CharSequence text) {
// TODO: used Java as a basic and need to enhance
// For example, to handle JSP Expression
// https://www.tutorialspoint.com/jsp/jsp_syntax.htm
// TODO: need to make RangeMap class and correctly and nested ranges
// Currently, doesn't work as expected
// "stuff <%= expression%> more stuff"
// "More stuff" after the expression should be seen as part of the String literal,
// but isn't since it gets the last range, which is the expression, which is over
// Think can fix by end the state when go into a inner state
// Then, when leave inner state, start a new state based on the outer state
// TODO: make RangeMap class to handle this
// When adding a new record, check for overlap using the below logic
// + An overlap occurs if and only if
// a) The added range's start in part of an existing range
// * Can check by finding existing range in map and seeing if the added range's start is in the middle
// * BEXUtilities.getEntryInRanges
// b) An existing range's start is contained in the new range
// * Can do a subRange check on the existing NavigableMap and see if there are any entries
// If there's an overlap, handle by breaking apart ranges in pieces
// Parse text to get states
// * Block comment
// * Line comment
// * In String literal
// * Other stuff?
// Reference: https://www.tutorialspoint.com/jsp/jsp_syntax.htm
ImmutableIntRangeMap.Builder<ParsingState> builder = ImmutableIntRangeMap.builder();
ArrayDeque<ParsingState> stateStack = new ArrayDeque<>();
ArrayDeque<Integer> startTextInfoStack = new ArrayDeque<>();
ArrayDeque<Integer> parentStartStack = new ArrayDeque<>();
boolean isJava = false;
// HTML tag
boolean isTag = false;
// TODO: should I refactor and use this? how would I use it?
// String expectedEnd = "";
for (int i = 0; i < text.length(); i++) {
// if (i == 50) {
// System.out.println("Debug");
// }
char c = text.charAt(i);
// System.out.printf("Index %s%n"
// + "Char %s%n"
// + "States %s%n"
// + "Start %s%n"
// + "Parent %s%n", i, c, stateStack, startTextInfoStack, parentStartStack);
ParsingState currentState = unwrapParsingState(stateStack.peek());
// if (currentState == null) {
// System.out.println("Parent: " + i);
// }
if (currentState == IN_STRING_LITERAL) {
if (c == '\\') {
// Escape next character
if (nextChar(text, i) == '\0') {
break;
}
i++;
} else if (c == '"') {
popParsingState(i, builder, stateStack, startTextInfoStack, parentStartStack);
} else if (isTag && hasText(text, i, "<%=")) {
pushNextLevelParsingState(IN_EXPRESSION_BLOCK, i, builder, stateStack, startTextInfoStack,
parentStartStack);
i += 2;
isJava = true;
}
// Other characters don't matter??
// TODO: handle unicode and other escaping in String literal
} else if (currentState == IN_SECONDARY_STRING_LITERAL) {
if (c == '\\') {
// Escape next character
if (nextChar(text, i) == '\0') {
break;
}
i++;
} else if (c == '\'') {
popParsingState(i, builder, stateStack, startTextInfoStack, parentStartStack);
} else if (hasText(text, i, "<%=")) {
pushNextLevelParsingState(IN_EXPRESSION_BLOCK, i, builder, stateStack, startTextInfoStack,
parentStartStack);
i += 2;
}
// Other characters don't matter??
// TODO: handle unicode and other escaping in String literal
// TODO: Java comments only valid in <% code block %>
} else if (isJava && hasText(text, i, "%>")) {
isJava = false;
// System.out.println("Current: " + currentState
// + "\t"
// + i);
if (currentState != IN_EXPRESSION_BLOCK) {
// End the current state on the prior character
popParsingState(i - 1, builder, stateStack, startTextInfoStack, parentStartStack);
}
i++;
popParsingState(i, builder, stateStack, startTextInfoStack, parentStartStack);
} else if (isJava && currentState == IN_LINE_COMMENT) {
if (c == '\n' || c == '\r') {
popParsingState(i - 1, builder, stateStack, startTextInfoStack, parentStartStack);
i = handleLineTerminator(i, c, text, builder, stateStack, startTextInfoStack, parentStartStack);
// int startTextInfo = startTextInfoStack.pop();
// builder.put(IntBEXRange.of(startTextInfo, i), stateStack.pop());
}
// Other characters don't matter?
} else if (isJava && currentState == IN_MULTILINE_COMMENT) {
if (hasText(text, i, "*/")) {
i++;
popParsingState(i, builder, stateStack, startTextInfoStack, parentStartStack);
// int startTextInfo = startTextInfoStack.pop();
// builder.put(IntBEXRange.closed(startTextInfo, i), stateStack.pop());
}
} else if (currentState == IN_MULTILINE_COMMENT) {
if (hasText(text, i, "--%>")) {
i += 3;
popParsingState(i, builder, stateStack, startTextInfoStack, parentStartStack);
}
} else if (currentState == IN_SECONDARY_MULTILINE_COMMENT) {
if (hasText(text, i, "-->")) {
i += 2;
popParsingState(i, builder, stateStack, startTextInfoStack, parentStartStack);
}
} else if (isJava && c == '/' && nextChar(text, i) == '/') {
pushNextLevelParsingState(IN_LINE_COMMENT, i, builder, stateStack, startTextInfoStack,
parentStartStack);
i++;
} else if (isJava && c == '/' && nextChar(text, i) == '*') {
pushNextLevelParsingState(IN_MULTILINE_COMMENT, i, builder, stateStack, startTextInfoStack,
parentStartStack);
i++;
} else if (c == '"' && isTag) {
pushNextLevelParsingState(IN_STRING_LITERAL, i, builder, stateStack, startTextInfoStack,
parentStartStack);
} else if (c == '\'' && isTag) {
pushNextLevelParsingState(IN_SECONDARY_STRING_LITERAL, i, builder, stateStack,
startTextInfoStack, parentStartStack);
} else if (c == '"' && isJava) {
pushParsingState(IN_STRING_LITERAL, i, stateStack, startTextInfoStack, parentStartStack);
} else if (c == '\'' && isJava) {
pushParsingState(IN_SECONDARY_STRING_LITERAL, i, stateStack, startTextInfoStack, parentStartStack);
} else if (hasText(text, i, "<%--")) {
pushParsingState(IN_MULTILINE_COMMENT, i, stateStack, startTextInfoStack, parentStartStack);
i += 3;
} else if (hasText(text, i, "<!--")) {
pushParsingState(IN_SECONDARY_MULTILINE_COMMENT, i, stateStack, startTextInfoStack, parentStartStack);
i += 3;
} else if (hasText(text, i, "<%=")) {
// In Java expression
pushParsingState(IN_EXPRESSION_BLOCK, i, stateStack, startTextInfoStack, parentStartStack);
i += 2;
isJava = true;
} else if (hasText(text, i, "<%!")) {
pushParsingState(IN_EXPRESSION_BLOCK, i, stateStack, startTextInfoStack, parentStartStack);
i += 2;
isJava = true;
} else if (hasText(text, i, "<%")) {
// In Java scriptlet
pushParsingState(IN_EXPRESSION_BLOCK, i, stateStack, startTextInfoStack, parentStartStack);
i++;
isJava = true;
} else if (c == '<' && !isJava && !isTag) {
pushParsingState(IN_TAG, i, stateStack, startTextInfoStack, parentStartStack);
isTag = true;
} else if (c == '>' && isTag && !isJava) {
isTag = false;
popParsingState(i, builder, stateStack, startTextInfoStack, parentStartStack);
} else if (Character.isWhitespace(c)) {
i = handleWhitespace(i, c, text, builder, stateStack, startTextInfoStack, parentStartStack);
}
}
if (!stateStack.isEmpty()) {
// TODO: what if there are multiple entries?
// (this would suggest improperly formatted code)
int startTextInfo = startTextInfoStack.pop();
// TODO: does there need to be a parent?
if (startTextInfo != text.length()) {
builder.put(IntBEXRange.of(startTextInfo, text.length()), stateStack.pop());
}
}
return builder.build();
}
The text was updated successfully, but these errors were encountered:
Found during work when using BEX to parse our JSP files. Below is the fix I found. I'll also add some unit tests
The text was updated successfully, but these errors were encountered: