Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OTWO-7093 Add support for powershell #88

Merged
merged 2 commits into from
Oct 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ on:

jobs:
test:
runs-on: ubuntu-18.04
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
- uses: ruby/setup-ruby@v1
Expand Down
9 changes: 7 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -146,11 +146,16 @@ Contributing
-------------

* Observe any existing PR contribution and emulate the pattern. For e.g. see [this](https://github.com/blackducksoftware/ohcount/pull/76/files).
* Run `./build` to compile the ragel files.
* While writing the **test/expected_dir** files, disable any whitespace/tab replacing options from your editor.
* Ohcount output has tabs in it, so the **test/expected_dir** also needs to contain tab characters.
* Sample format of **test/expected_dir** is as follows. There is a tab character after dart, code & comment:
* Sample format of **test/expected_dir** is as follows. There is a **Tab** character after dart, code & comment:
```
dart code void main() {
dart comment // Line comment
```
* Run tests with `./build tests`.
* Some editors convert **Tab** to Space. The following steps help ensure that the proper character is added.
** Open the file in Vim editor.
** Run `:set list`. This makes all hidden characters like **Tab** visible.
** Type *dart*, press `ctrl+v` followed by `tab`.
** Run the tests to confirm these changes: `./build tests`.
Binary file modified ruby/x86_64-linux_ubuntu/ohcount.so
Binary file not shown.
1 change: 1 addition & 0 deletions src/hash/extensions.gperf
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ pp, DISAMBIGUATE("pp")
ppt, BINARY
pro, DISAMBIGUATE("pro")
ps, LANG_POSTSCRIPT
ps1, LANG_POWERSHELL
py, LANG_PYTHON
qml, LANG_QML
qt, BINARY
Expand Down
1 change: 1 addition & 0 deletions src/hash/languages.gperf
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ perl, LANG_PERL, "Perl", 0
php, LANG_PHP, "PHP", 0
pike, LANG_PIKE, "Pike", 0
postscript, LANG_POSTSCRIPT, "PostScript", 1
powershell, LANG_POWERSHELL, "PowerShell", 0
prolog, LANG_PROLOG, "Prolog", 0
puppet, LANG_PUPPET, "Puppet", 0
python, LANG_PYTHON, "Python", 0
Expand Down
2 changes: 2 additions & 0 deletions src/hash/parsers.gperf
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
#include "../parsers/phphtml.h"
#include "../parsers/pike.h"
#include "../parsers/postscript.h"
#include "../parsers/powershell.h"
#include "../parsers/prolog.h"
#include "../parsers/puppet.h"
#include "../parsers/python.h"
Expand Down Expand Up @@ -198,6 +199,7 @@ perl, parse_perl
php, parse_phtml
pike, parse_pike
postscript, parse_postscript
powershell, parse_powershell
prolog, parse_prolog
puppet, parse_puppet
python, parse_python
Expand Down
1 change: 1 addition & 0 deletions src/languages.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@
#define LANG_PHP "php"
#define LANG_PIKE "pike"
#define LANG_POSTSCRIPT "postscript"
#define LANG_POWERSHELL "powershell"
#define LANG_PROLOG "prolog"
#define LANG_PUPPET "puppet"
#define LANG_PYTHON "python"
Expand Down
139 changes: 139 additions & 0 deletions src/parsers/powershell.rl
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
/************************* Required for every parser *************************/
#ifndef OHCOUNT_POWERSHELL_PARSER_H
#define OHCOUNT_POWERSHELL_PARSER_H

#include "../parser_macros.h"

// the name of the language
const char *POWERSHELL_LANG = LANG_POWERSHELL;

// the languages entities
const char *powershell_entities[] = {
"space", "comment", "string", "any"
};

// constants associated with the entities
enum {
POWERSHELL_SPACE = 0, POWERSHELL_COMMENT, POWERSHELL_STRING, POWERSHELL_ANY
};

/*****************************************************************************/

%%{
machine powershell;
write data;
include common "common.rl";

# Line counting machine

action powershell_ccallback {
switch(entity) {
case POWERSHELL_SPACE:
ls
break;
case POWERSHELL_ANY:
code
break;
case INTERNAL_NL:
std_internal_newline(POWERSHELL_LANG)
break;
case NEWLINE:
std_newline(POWERSHELL_LANG)
}
}

powershell_line_comment = '#' @comment nonnewline*;
powershell_block_comment =
'<#' @comment (
newline %{ entity = INTERNAL_NL; } %powershell_ccallback
|
ws
|
(nonnewline - ws) @comment
)* :>> '#>';
powershell_comment = powershell_line_comment | powershell_block_comment;

powershell_sq_str =
'\'' @enqueue @code (
newline %{ entity = INTERNAL_NL; } %powershell_ccallback
|
'\\' newline %{ entity = INTERNAL_NL; } %powershell_ccallback
|
ws
|
[^\r\n\f\t '\\] @code
|
'\\' nonnewline @code
)* '\'' @commit;
powershell_dq_str =
'"' @enqueue @code (
newline %{ entity = INTERNAL_NL; } %powershell_ccallback
|
'\\' newline %{ entity = INTERNAL_NL; } %powershell_ccallback
|
ws
|
[^\r\n\f\t "\\] @code
|
'\\' nonnewline @code
)* '"' @commit;
# TODO: heredoc; see ruby.rl for details.
powershell_string = powershell_sq_str | powershell_dq_str;

powershell_line := |*
spaces ${ entity = POWERSHELL_SPACE; } => powershell_ccallback;
powershell_comment;
powershell_string;
newline ${ entity = NEWLINE; } => powershell_ccallback;
^space ${ entity = POWERSHELL_ANY; } => powershell_ccallback;
*|;

# Entity machine

action powershell_ecallback {
callback(POWERSHELL_LANG, powershell_entities[entity], cint(ts), cint(te), userdata);
}

powershell_line_comment_entity = '#' nonnewline*;
powershell_block_comment_entity = '<#' any* :>> '#>';
powershell_comment_entity = powershell_line_comment_entity | powershell_block_comment_entity;

powershell_entity := |*
space+ ${ entity = POWERSHELL_SPACE; } => powershell_ecallback;
powershell_comment_entity ${ entity = POWERSHELL_COMMENT; } => powershell_ecallback;
# TODO:
^space;
*|;
}%%

/************************* Required for every parser *************************/

/* Parses a string buffer with powershell code.
*
* @param *buffer The string to parse.
* @param length The length of the string to parse.
* @param count Integer flag specifying whether or not to count lines. If yes,
* uses the Ragel machine optimized for counting. Otherwise uses the Ragel
* machine optimized for returning entity positions.
* @param *callback Callback function. If count is set, callback is called for
* every line of code, comment, or blank with 'lcode', 'lcomment', and
* 'lblank' respectively. Otherwise callback is called for each entity found.
*/
void parse_powershell(char *buffer, int length, int count,
void (*callback) (const char *lang, const char *entity, int s,
int e, void *udata),
void *userdata
) {
init

%% write init;
cs = (count) ? powershell_en_powershell_line : powershell_en_powershell_entity;
%% write exec;

// if no newline at EOF; callback contents of last line
if (count) { process_last_line(POWERSHELL_LANG) }
}

#endif

/*****************************************************************************/
1 change: 1 addition & 0 deletions test/unit/detector_test.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ void test_detector_detect_polyglot() {
ASSERT_DETECT(LANG_LIVECODE, "foo.lc");
ASSERT_DETECT(LANG_LIVECODE, "script.utf8");
ASSERT_DETECT(LANG_POSTSCRIPT, "foo.ps");
ASSERT_DETECT(LANG_POWERSHELL, "foo.ps1");
ASSERT_DETECT(LANG_SWIFT, "foo.swift");
ASSERT_DETECT(LANG_UMPLE, "foo.umple");
ASSERT_NODETECT("empty.inc");
Expand Down
2 changes: 2 additions & 0 deletions test/unit/parser_test.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ void test_parser_verify_entity(SourceFile *sf, const char *entity,
#include "parsers/test_perl.h"
#include "parsers/test_pike.h"
#include "parsers/test_postscript.h"
#include "parsers/test_powershell.h"
#include "parsers/test_puppet.h"
#include "parsers/test_python.h"
#include "parsers/test_qml.h"
Expand Down Expand Up @@ -336,6 +337,7 @@ void all_parser_tests() {
all_perl_tests();
all_pike_tests();
all_postscript_tests();
all_powershell_tests();
all_python_tests();
all_r_tests();
all_racket_tests();
Expand Down
18 changes: 18 additions & 0 deletions test/unit/parsers/test_powershell.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
void test_powershell_comments() {
test_parser_verify_parse(
test_parser_sourcefile("powershell", " #comment"),
"powershell", "", "#comment", 0
);
}

void test_powershell_comment_entities() {
test_parser_verify_entity(
test_parser_sourcefile("powershell", " #comment"),
"comment", "#comment"
);
}

void all_powershell_tests() {
test_powershell_comments();
test_powershell_comment_entities();
}