-../ +../ abbot/ - -
From c46eaa15cf3669a3dde904991b69359ff0cd23ee Mon Sep 17 00:00:00 2001 From: DmitriyLewen <91113035+DmitriyLewen@users.noreply.github.com> Date: Fri, 27 Sep 2024 17:41:31 +0600 Subject: [PATCH] fix: panic to write in close channel (#42) --- pkg/crawler/crawler.go | 28 +- pkg/crawler/crawler_test.go | 61 +- .../{ => happy}/abbot-0.12.3.jar.sha1 | 0 .../{ => happy}/abbot-0.13.0-copy.jar.sha1 | 0 .../{ => happy}/abbot-0.13.0.jar.sha1 | 0 .../{ => happy}/abbot-1.4.0-lite.jar.sha1 | 0 .../testdata/{ => happy}/abbot-1.4.0.jar.sha1 | 0 pkg/crawler/testdata/{ => happy}/abbot.html | 2 +- .../abbot.json => happy/abbot.json.golden} | 0 .../testdata/{ => happy}/abbot_abbot.html | 2 +- .../{ => happy}/abbot_abbot_0.12.3.html | 0 .../{ => happy}/abbot_abbot_0.13.0.html | 0 .../{ => happy}/abbot_abbot_1.4.0.html | 0 pkg/crawler/testdata/{ => happy}/index.html | 2 +- .../testdata/{ => happy}/maven-metadata.xml | 0 pkg/crawler/testdata/sad/abbot.html | 27 + pkg/crawler/testdata/sad/abbot_abbot.html | 26 + pkg/crawler/testdata/sad/httpclient.html | 27 + .../testdata/sad/httpclient_httpclient.html | 26 + pkg/crawler/testdata/sad/index.html | 1047 +++++++++++++++++ pkg/crawler/testdata/sad/maven-metadata.xml | 13 + 21 files changed, 1228 insertions(+), 33 deletions(-) rename pkg/crawler/testdata/{ => happy}/abbot-0.12.3.jar.sha1 (100%) rename pkg/crawler/testdata/{ => happy}/abbot-0.13.0-copy.jar.sha1 (100%) rename pkg/crawler/testdata/{ => happy}/abbot-0.13.0.jar.sha1 (100%) rename pkg/crawler/testdata/{ => happy}/abbot-1.4.0-lite.jar.sha1 (100%) rename pkg/crawler/testdata/{ => happy}/abbot-1.4.0.jar.sha1 (100%) rename pkg/crawler/testdata/{ => happy}/abbot.html (95%) rename pkg/crawler/testdata/{golden/abbot.json => happy/abbot.json.golden} (100%) rename pkg/crawler/testdata/{ => happy}/abbot_abbot.html (98%) rename pkg/crawler/testdata/{ => happy}/abbot_abbot_0.12.3.html (100%) rename pkg/crawler/testdata/{ => happy}/abbot_abbot_0.13.0.html (100%) rename pkg/crawler/testdata/{ => happy}/abbot_abbot_1.4.0.html (100%) rename pkg/crawler/testdata/{ => happy}/index.html (79%) rename pkg/crawler/testdata/{ => happy}/maven-metadata.xml (100%) create mode 100644 pkg/crawler/testdata/sad/abbot.html create mode 100644 pkg/crawler/testdata/sad/abbot_abbot.html create mode 100644 pkg/crawler/testdata/sad/httpclient.html create mode 100644 pkg/crawler/testdata/sad/httpclient_httpclient.html create mode 100644 pkg/crawler/testdata/sad/index.html create mode 100644 pkg/crawler/testdata/sad/maven-metadata.xml diff --git a/pkg/crawler/crawler.go b/pkg/crawler/crawler.go index cfe3fa6..65f2a4c 100644 --- a/pkg/crawler/crawler.go +++ b/pkg/crawler/crawler.go @@ -34,7 +34,6 @@ type Crawler struct { rootUrl string wg sync.WaitGroup urlCh chan string - errOnce sync.Once limit *semaphore.Weighted wrongSHA1Values []string } @@ -58,8 +57,12 @@ func NewCrawler(opt Option) Crawler { } } client.ErrorHandler = func(resp *http.Response, err error, numTries int) (*http.Response, error) { - logger := slog.With(slog.String("url", resp.Request.URL.String()), slog.Int("status_code", resp.StatusCode), - slog.Int("num_tries", numTries)) + logger := slog.Default() + if resp != nil { + logger = slog.With(slog.String("url", resp.Request.URL.String()), slog.Int("status_code", resp.StatusCode), + slog.Int("num_tries", numTries)) + } + if err != nil { logger = logger.With(slog.String("error", err.Error())) } @@ -81,7 +84,6 @@ func NewCrawler(opt Option) Crawler { rootUrl: opt.RootUrl, urlCh: make(chan string, opt.Limit*10), limit: semaphore.NewWeighted(opt.Limit), - errOnce: sync.Once{}, } } @@ -122,13 +124,13 @@ func (c *Crawler) Crawl(ctx context.Context) error { defer c.limit.Release(1) defer c.wg.Done() if err := c.Visit(ctx, url); err != nil { - // There might be a case where we get 2 errors at the same time. - // In this case we close `errCh` after reading the first error - // and get panic for the second error - // That's why we need to return the error once. - c.errOnce.Do(func() { - errCh <- xerrors.Errorf("visit error: %w", err) - }) + select { + // Context can be canceled if we receive an error from another Visit function. + case <-ctx.Done(): + return + case errCh <- err: + return + } } }(url) } @@ -210,8 +212,8 @@ func (c *Crawler) Visit(ctx context.Context, url string) error { // Context can be canceled if we receive an error from another Visit function. case <-ctx.Done(): return - default: - c.urlCh <- url + child + case c.urlCh <- url + child: + continue } } }() diff --git a/pkg/crawler/crawler_test.go b/pkg/crawler/crawler_test.go index 5cd9fee..69a004c 100644 --- a/pkg/crawler/crawler_test.go +++ b/pkg/crawler/crawler_test.go @@ -2,42 +2,64 @@ package crawler_test import ( "context" - "github.com/stretchr/testify/assert" "net/http" "net/http/httptest" "os" "path/filepath" "testing" + "github.com/stretchr/testify/assert" + "github.com/aquasecurity/trivy-java-db/pkg/crawler" ) func TestCrawl(t *testing.T) { tests := []struct { name string + limit int64 fileNames map[string]string goldenPath string filePath string + wantErr string }{ { - name: "happy path", + name: "happy path", + limit: 1, fileNames: map[string]string{ - "/maven2/": "testdata/index.html", - "/maven2/abbot/": "testdata/abbot.html", - "/maven2/abbot/abbot/": "testdata/abbot_abbot.html", - "/maven2/abbot/abbot/maven-metadata.xml": "testdata/maven-metadata.xml", - "/maven2/abbot/abbot/0.12.3/": "testdata/abbot_abbot_0.12.3.html", - "/maven2/abbot/abbot/0.12.3/abbot-0.12.3.jar.sha1": "testdata/abbot-0.12.3.jar.sha1", - "/maven2/abbot/abbot/0.13.0/": "testdata/abbot_abbot_0.13.0.html", - "/maven2/abbot/abbot/0.13.0/abbot-0.13.0.jar.sha1": "testdata/abbot-0.13.0.jar.sha1", - "/maven2/abbot/abbot/0.13.0/abbot-0.13.0-copy.jar.sha1": "testdata/abbot-0.13.0-copy.jar.sha1", - "/maven2/abbot/abbot/1.4.0/": "testdata/abbot_abbot_1.4.0.html", - "/maven2/abbot/abbot/1.4.0/abbot-1.4.0.jar.sha1": "testdata/abbot-1.4.0.jar.sha1", - "/maven2/abbot/abbot/1.4.0/abbot-1.4.0-lite.jar.sha1": "testdata/abbot-1.4.0-lite.jar.sha1", + "/maven2/": "testdata/happy/index.html", + "/maven2/abbot/": "testdata/happy/abbot.html", + "/maven2/abbot/abbot/": "testdata/happy/abbot_abbot.html", + "/maven2/abbot/abbot/maven-metadata.xml": "testdata/happy/maven-metadata.xml", + "/maven2/abbot/abbot/0.12.3/": "testdata/happy/abbot_abbot_0.12.3.html", + "/maven2/abbot/abbot/0.12.3/abbot-0.12.3.jar.sha1": "testdata/happy/abbot-0.12.3.jar.sha1", + "/maven2/abbot/abbot/0.13.0/": "testdata/happy/abbot_abbot_0.13.0.html", + "/maven2/abbot/abbot/0.13.0/abbot-0.13.0.jar.sha1": "testdata/happy/abbot-0.13.0.jar.sha1", + "/maven2/abbot/abbot/0.13.0/abbot-0.13.0-copy.jar.sha1": "testdata/happy/abbot-0.13.0-copy.jar.sha1", + "/maven2/abbot/abbot/1.4.0/": "testdata/happy/abbot_abbot_1.4.0.html", + "/maven2/abbot/abbot/1.4.0/abbot-1.4.0.jar.sha1": "testdata/happy/abbot-1.4.0.jar.sha1", + "/maven2/abbot/abbot/1.4.0/abbot-1.4.0-lite.jar.sha1": "testdata/happy/abbot-1.4.0-lite.jar.sha1", }, - goldenPath: "testdata/golden/abbot.json", + goldenPath: "testdata/happy/abbot.json.golden", filePath: "indexes/abbot/abbot.json", }, + { + name: "sad path", + limit: 2, + fileNames: map[string]string{ + // index.html file for this test contains many links to avoid case + // when we finish crawl and get error in one time. + // We will get a `panic` because we will try to close `urlCh` in 2 places (after the wait group and after the error) + // In real case it is impossible + "/maven2/": "testdata/sad/index.html", + "/maven2/abbot/": "testdata/sad/abbot.html", + "/maven2/abbot/abbot/": "testdata/sad/abbot_abbot.html", + "/maven2/abbot/abbot/maven-metadata.xml": "testdata/sad/maven-metadata.xml", + "/maven2/HTTPClient/": "testdata/sad/httpclient.html", + "/maven2/HTTPClient/HTTPClient/": "testdata/sad/httpclient_httpclient.html", + "/maven2/HTTPClient/maven-metadata.xml": "testdata/sad/maven-metadata.xml", + }, + wantErr: "decode error:", + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -48,18 +70,23 @@ func TestCrawl(t *testing.T) { return } http.ServeFile(w, r, fileName) + w.WriteHeader(http.StatusOK) + return })) defer ts.Close() tmpDir := t.TempDir() cl := crawler.NewCrawler(crawler.Option{ RootUrl: ts.URL + "/maven2/", - Limit: 1, + Limit: tt.limit, CacheDir: tmpDir, }) err := cl.Crawl(context.Background()) - assert.NoError(t, err) + if tt.wantErr != "" { + assert.ErrorContains(t, err, tt.wantErr) + return + } got, err := os.ReadFile(filepath.Join(tmpDir, tt.filePath)) assert.NoError(t, err) diff --git a/pkg/crawler/testdata/abbot-0.12.3.jar.sha1 b/pkg/crawler/testdata/happy/abbot-0.12.3.jar.sha1 similarity index 100% rename from pkg/crawler/testdata/abbot-0.12.3.jar.sha1 rename to pkg/crawler/testdata/happy/abbot-0.12.3.jar.sha1 diff --git a/pkg/crawler/testdata/abbot-0.13.0-copy.jar.sha1 b/pkg/crawler/testdata/happy/abbot-0.13.0-copy.jar.sha1 similarity index 100% rename from pkg/crawler/testdata/abbot-0.13.0-copy.jar.sha1 rename to pkg/crawler/testdata/happy/abbot-0.13.0-copy.jar.sha1 diff --git a/pkg/crawler/testdata/abbot-0.13.0.jar.sha1 b/pkg/crawler/testdata/happy/abbot-0.13.0.jar.sha1 similarity index 100% rename from pkg/crawler/testdata/abbot-0.13.0.jar.sha1 rename to pkg/crawler/testdata/happy/abbot-0.13.0.jar.sha1 diff --git a/pkg/crawler/testdata/abbot-1.4.0-lite.jar.sha1 b/pkg/crawler/testdata/happy/abbot-1.4.0-lite.jar.sha1 similarity index 100% rename from pkg/crawler/testdata/abbot-1.4.0-lite.jar.sha1 rename to pkg/crawler/testdata/happy/abbot-1.4.0-lite.jar.sha1 diff --git a/pkg/crawler/testdata/abbot-1.4.0.jar.sha1 b/pkg/crawler/testdata/happy/abbot-1.4.0.jar.sha1 similarity index 100% rename from pkg/crawler/testdata/abbot-1.4.0.jar.sha1 rename to pkg/crawler/testdata/happy/abbot-1.4.0.jar.sha1 diff --git a/pkg/crawler/testdata/abbot.html b/pkg/crawler/testdata/happy/abbot.html similarity index 95% rename from pkg/crawler/testdata/abbot.html rename to pkg/crawler/testdata/happy/abbot.html index e6c2f99..6f9090c 100644 --- a/pkg/crawler/testdata/abbot.html +++ b/pkg/crawler/testdata/happy/abbot.html @@ -17,7 +17,7 @@
-../ +../ abbot/ - -
../ -../ +../ 0.12..../ 2005-09-20 05:44 - 0.13.0/ 2005-09-20 05:44 - 1.4.0/ 2015-09-22 16:03 - diff --git a/pkg/crawler/testdata/abbot_abbot_0.12.3.html b/pkg/crawler/testdata/happy/abbot_abbot_0.12.3.html similarity index 100% rename from pkg/crawler/testdata/abbot_abbot_0.12.3.html rename to pkg/crawler/testdata/happy/abbot_abbot_0.12.3.html diff --git a/pkg/crawler/testdata/abbot_abbot_0.13.0.html b/pkg/crawler/testdata/happy/abbot_abbot_0.13.0.html similarity index 100% rename from pkg/crawler/testdata/abbot_abbot_0.13.0.html rename to pkg/crawler/testdata/happy/abbot_abbot_0.13.0.html diff --git a/pkg/crawler/testdata/abbot_abbot_1.4.0.html b/pkg/crawler/testdata/happy/abbot_abbot_1.4.0.html similarity index 100% rename from pkg/crawler/testdata/abbot_abbot_1.4.0.html rename to pkg/crawler/testdata/happy/abbot_abbot_1.4.0.html diff --git a/pkg/crawler/testdata/index.html b/pkg/crawler/testdata/happy/index.html similarity index 79% rename from pkg/crawler/testdata/index.html rename to pkg/crawler/testdata/happy/index.html index 6fe2394..9b958f4 100644 --- a/pkg/crawler/testdata/index.html +++ b/pkg/crawler/testdata/happy/index.html @@ -1,7 +1,7 @@Index of / -../ +../ abbot/ - -
\ No newline at end of file diff --git a/pkg/crawler/testdata/maven-metadata.xml b/pkg/crawler/testdata/happy/maven-metadata.xml similarity index 100% rename from pkg/crawler/testdata/maven-metadata.xml rename to pkg/crawler/testdata/happy/maven-metadata.xml diff --git a/pkg/crawler/testdata/sad/abbot.html b/pkg/crawler/testdata/sad/abbot.html new file mode 100644 index 0000000..ffecc04 --- /dev/null +++ b/pkg/crawler/testdata/sad/abbot.html @@ -0,0 +1,27 @@ + + + +Central Repository: abbot + + + + + ++ +abbot
+
++ ++../ +abbot/ ++
+ + + \ No newline at end of file diff --git a/pkg/crawler/testdata/sad/abbot_abbot.html b/pkg/crawler/testdata/sad/abbot_abbot.html new file mode 100644 index 0000000..c1f77e5 --- /dev/null +++ b/pkg/crawler/testdata/sad/abbot_abbot.html @@ -0,0 +1,26 @@ + + + +Central Repository: abbot/abbot + + + + + ++ +abbot/abbot
+
++ +../ +maven-metadata.xml 2015-09-24 14:18 402 ++
+ + + \ No newline at end of file diff --git a/pkg/crawler/testdata/sad/httpclient.html b/pkg/crawler/testdata/sad/httpclient.html new file mode 100644 index 0000000..2812b6d --- /dev/null +++ b/pkg/crawler/testdata/sad/httpclient.html @@ -0,0 +1,27 @@ + + + +Central Repository: abbot + + + + + ++ +HTTPClient
+
++ ++../ +HTTPClient/ ++
+ + + \ No newline at end of file diff --git a/pkg/crawler/testdata/sad/httpclient_httpclient.html b/pkg/crawler/testdata/sad/httpclient_httpclient.html new file mode 100644 index 0000000..19afe9c --- /dev/null +++ b/pkg/crawler/testdata/sad/httpclient_httpclient.html @@ -0,0 +1,26 @@ + + + +Central Repository: abbot/abbot + + + + + ++ +HTTPClient/HTTPClient
+
++ +../ +maven-metadata.xml 2015-09-24 14:18 402 ++
+ + + \ No newline at end of file diff --git a/pkg/crawler/testdata/sad/index.html b/pkg/crawler/testdata/sad/index.html new file mode 100644 index 0000000..15e0cdb --- /dev/null +++ b/pkg/crawler/testdata/sad/index.html @@ -0,0 +1,1047 @@ + + + +Central Repository: + + + + + ++ + +
++ +../ +HTTPClient/ - - +abbot/ - - +academy/ - - +acegisecurity/ - - +activation/ - - +activecluster/ - - +activeio/ - - +activemq/ - - +activemq-jaxb/ - - +activesoap/ - - +activespace/ - - +adarwin/ - - +ae/ - - +aelfred/ - - +aero/ - - +africa/ - - +ag/ - - +ai/ - - +aislib/ - - +al/ - - +altrmi/ - - +am/ - - +andromda/ - - +annogen/ - - +ant/ - - +ant-contrib/ - - +ant-doxygen/ - - +ant4eclipse/ - - +antlr/ - - +anttex/ - - +aopalliance/ - - +apache-jaxme/ - - +app/ - - +aptconvert/ - - +ar/ - - +args4j/ - - +art/ - - +as/ - - +ashkay/ - - +ashkelon/ - - +asia/ - - +asm/ - - +aspectj/ - - +aspectwerkz/ - - +at/ - - +au/ - - +avalon/ - - +avalon-activation/ - - +avalon-apps/ - - +avalon-composition/ - - +avalon-cornerstone/ - - +avalon-extension/ - - +avalon-framework/ - - +avalon-http/ - - +avalon-logging/ - - +avalon-logkit/ - - +avalon-meta/ - - +avalon-phoenix/ - - +avalon-repository/ - - +avalon-util/ - - +aws/ - - +ax/ - - +axion/ - - +axis/ - - +axis2/ - - +azote/ - - +ba/ - - +backport-util-concurrent/ - - +backport175/ - - +band/ - - +barsuift/ - - +batik/ - - +bayern/ - - +bcel/ - - +bd/ - - +be/ - - +beehive/ - - +berkano/ - - +berkeleydb/ - - +berlin/ - - +best/ - - +bg/ - - +bi/ - - +bike/ - - +binky/ - - +bio/ - - +biz/ - - +black/ - - +blissed/ - - +blog/ - - +blue/ - - +boo/ - - +bot/ - - +bouncycastle/ - - +boxstuff/ - - +br/ - - +bsf/ - - +bsh/ - - +build/ - - +builders/ - - +burlap/ - - +buzz/ - - +by/ - - +bz/ - - +c10n/ - - +c3p0/ - - +ca/ - - +cactus/ - - +cafe/ - - +cam/ - - +camp/ - - +capital/ - - +care/ - - +cargo/ - - +cas/ - - +casa/ - - +cash/ - - +castor/ - - +cat/ - - +cc/ - - +cd/ - - +center/ - - +cewolf/ - - +cf/ - - +cglib/ - - +ch/ - - +charlotte/ - - +chat/ - - +checkstyle/ - - +church/ - - +city/ - - +cl/ - - +classworlds/ - - +click/ - - +clickstream/ - - +clirr/ - - +cloud/ - - +clover/ - - +club/ - - +cm/ - - +cmsdeploy/ - - +cn/ - - +co/ - - +cobertura/ - - +coconut/ - - +cocoon/ - - +code/ - - +code316/ - - +codehaus/ - - +codes/ - - +coffee/ - - +colt/ - - +com/ - - +commons-attributes/ - - +commons-beanutils/ - - +commons-betwixt/ - - +commons-chain/ - - +commons-cli/ - - +commons-codec/ - - +commons-collections/ - - +commons-compress/ - - +commons-configuration/ - - +commons-daemon/ - - +commons-dbcp/ - - +commons-dbutils/ - - +commons-digester/ - - +commons-discovery/ - - +commons-el/ - - +commons-email/ - - +commons-fileupload/ - - +commons-grafolia/ - - +commons-grant/ - - +commons-graph/ - - +commons-http/ - - +commons-httpclient/ - - +commons-i18n/ - - +commons-io/ - - +commons-jdbc2pool/ - - +commons-jelly/ - - +commons-jexl/ - - +commons-jux/ - - +commons-jxpath/ - - +commons-lang/ - - +commons-latka/ - - +commons-launcher/ - - +commons-logging/ - - +commons-math/ - - +commons-messenger/ - - +commons-modeler/ - - +commons-naming/ - - +commons-net/ - - +commons-pool/ - - +commons-primitives/ - - +commons-resources/ - - +commons-scxml/ - - +commons-services/ - - +commons-sql/ - - +commons-test/ - - +commons-threadpool/ - - +commons-transaction/ - - +commons-util/ - - +commons-validator/ - - +commons-vfs/ - - +commons-xo/ - - +community/ - - +company/ - - +computer/ - - +concurrent/ - - +consulting/ - - +continuum/ - - +controlhaus/ - - +cool/ - - +coop/ - - +cornerstone-connection/ - - +cornerstone-datasources/ - - +cornerstone-scheduler/ - - +cornerstone-sockets/ - - +cornerstone-store/ - - +cornerstone-threads/ - - +cos/ - - +crimson/ - - +cryptix/ - - +cssparser/ - - +cv/ - - +cx/ - - +cz/ - - +d-haven-event/ - - +d-haven-eventbus/ - - +d-haven-mpool/ - - +dalma/ - - +dalms/ - - +damagecontrol/ - - +dataforge/ - - +datasift/ - - +date/ - - +dbunit/ - - +de/ - - +decorutils/ - - +default/ - - +dentaku/ - - +dependency-maven-plugin/ - - +design/ - - +dev/ - - +digital/ - - +directory/ - - +directory-asn1/ - - +directory-authx/ - - +directory-clients/ - - +directory-naming/ - - +directory-network/ - - +directory-protocols/ - - +directory-shared/ - - +displaytag/ - - +ditchnet/ - - +dk/ - - +dna/ - - +dnsjava/ - - +docbook/ - - +doccheck/ - - +dom4j/ - - +domify/ - - +doxia/ - - +drone/ - - +drools/ - - +drools-examples/ - - +dsh-vocabulary/ - - +dtdparser/ - - +dumbster/ - - +dwr/ - - +dynaop/ - - +earth/ - - +easyconf/ - - +easymock/ - - +ec/ - - +echo/ - - +echo3/ - - +echo3extras/ - - +eclipse/ - - +eco/ - - +ecs/ - - +edenlib/ - - +edtftp/ - - +edu/ - - +education/ - - +ee/ - - +ehcache/ - - +ejb/ - - +el-impl/ - - +emberio/ - - +emma/ - - +energy/ - - +engineer/ - - +engineering/ - - +enterprises/ - - +ervacon/ - - +es/ - - +esper/ - - +eu/ - - +eus/ - - +events/ - - +excalibur/ - - +excalibur-altrmi/ - - +excalibur-cli/ - - +excalibur-collections/ - - +excalibur-component/ - - +excalibur-component-examples/ - - +excalibur-component-tests/ - - +excalibur-concurrent/ - - +excalibur-configuration/ - - +excalibur-containerkit/ - - +excalibur-datasource/ - - +excalibur-event/ - - +excalibur-extension/ - - +excalibur-fortress/ - - +excalibur-i18n/ - - +excalibur-instrument/ - - +excalibur-instrument-manager/ - - +excalibur-instrument-manager-interfaces/ - - +excalibur-io/ - - +excalibur-lifecycle/ - - +excalibur-logger/ - - +excalibur-monitor/ - - +excalibur-mpool/ - - +excalibur-naming/ - - +excalibur-pool/ - - +excalibur-sourceresolve/ - - +excalibur-store/ - - +excalibur-thread/ - - +excalibur-util/ - - +excalibur-xmlutil/ - - +exchange/ - - +exist/ - - +exml/ - - +exo/ - - +exolabcore/ - - +expert/ - - +exteca/ - - +external/ - - +family/ - - +fan/ - - +fans/ - - +farm/ - - +fastutil/ - - +fesi/ - - +fi/ - - +film/ - - +finance/ - - +financial/ - - +findbugs/ - - +fish/ - - +fit/ - - +flox/ - - +flux/ - - +fm/ - - +fo/ - - +fop/ - - +forehead/ - - +formproc/ - - +foundation/ - - +foxtrot/ - - +fr/ - - +freebxml/ - - +freemarker/ - - +frl/ - - +fulcrum/ - - +fun/ - - +fyi/ - - +ga/ - - +gabriel/ - - +games/ - - +garden/ - - +gay/ - - +gbean/ - - +gdn/ - - +generama/ - - +genjar/ - - +genjava/ - - +gent/ - - +geronimo/ - - +geronimo-spec/ - - +gg/ - - +github/ - - +glass/ - - +glassfish/ - - +global/ - - +gmbh/ - - +gnu/ - - +gnu-regexp/ - - +gov/ - - +gq/ - - +gr/ - - +graphics/ - - +graphlayout/ - - +green/ - - +grizzly/ - - +grizzly-cachetest/ - - +groovy/ - - +groovy-xmlrpc/ - - +group/ - - +gs/ - - +gsbase/ - - +guru/ - - +health/ - - +help/ - - +hessian/ - - +hibernate/ - - +hivemind/ - - +hk/ - - +hm/ - - +host/ - - +house/ - - +howl/ - - +hr/ - - +hsqldb/ - - +htmlunit/ - - +httpcomponents-httpcore/ - - +httpunit/ - - +hu/ - - +hudson/ - - +ical4j/ - - +icu/ - - +icu4j/ - - +id/ - - +idb/ - - +ie/ - - +iirekm/ - - +il/ - - +im/ - - +in/ - - +industries/ - - +info/ - - +informa/ - - +ink/ - - +innig/ - - +int/ - - +io/ - - +iq/ - - +ir/ - - +is/ - - +isorelax/ - - +it/ - - +itext/ - - +ivory/ - - +izpack/ - - +jaas/ - - +jackcess/ - - +jackson/ - - +jacl/ - - +jaf/ - - +jaimbot/ - - +jakarta/ - - +jakarta-regexp/ - - +jalopy/ - - +james/ - - +janino/ - - +jardiff/ - - +jarjar/ - - +jarsync/ - - +jasper-jsr199/ - - +jasperreports/ - - +java2html/ - - +java3d/ - - +java_cup/ - - +javacc/ - - +javaconfig/ - - +javadb/ - - +javadoc/ - - +javaee/ - - +javagroups/ - - +javainetlocator/ - - +javamail/ - - +javancss/ - - +javanettasks/ - - +javassist/ - - +javatar/ - - +javax/ - - +javazoom/ - - +javolution/ - - +jawin/ - - +jaxb/ - - +jaxen/ - - +jaxme/ - - +jaxr-ra/ - - +jblanket/ - - +jboss/ - - +jca/ - - +jcache/ - - +jcharts/ - - +jcifs/ - - +jcom/ - - +jcommon/ - - +jcoverage/ - - +jcs/ - - +jcs-javagroups/ - - +jcvsii/ - - +jdbc/ - - +jdbm/ - - +jdepend/ - - +jdiff/ - - +jdo/ - - +jdom/ - - +jdring/ - - +jdsl/ - - +je/ - - +jen/ - - +jencks/ - - +jep/ - - +jepi/ - - +jersey/ - - +jetty/ - - +jexcelapi/ - - +jface/ - - +jfree/ - - +jfreechart/ - - +jgen/ - - +jgoodies/ - - +jgraph/ - - +jgrapht/ - - +jgroups/ - - +jhunlang/ - - +jini/ - - +jintention/ - - +jisp/ - - +jivesoftware/ - - +jlibdiff/ - - +jline/ - - +jmagick/ - - +jmaki/ - - +jmdns/ - - +jmimemagic/ - - +jmml/ - - +jmock/ - - +jms/ - - +jmscts/ - - +jmsn/ - - +joda-time/ - - +john-test/ - - +jotm/ - - +jp/ - - +jparsec/ - - +jpl/ - - +jpox/ - - +jpox-dbcp/ - - +jpox-enhancer/ - - +jpox-java5/ - - +jrexx/ - - +jrms/ - - +jrobin/ - - +jruby/ - - +jsch/ - - +jsf-extensions/ - - +jspapi/ - - +jsptags/ - - +jstl/ - - +jstyle/ - - +jta/ - - +jtds/ - - +jtidy/ - - +juddi/ - - +jug/ - - +jung/ - - +junit/ - - +junit-addons/ - - +junit-doclet/ - - +junitperf/ - - +juno/ - - +jwebunit/ - - +jxta/ - - +jython/ - - +kawa/ - - +kg/ - - +kh/ - - +kim/ - - +kiwi/ - - +koeln/ - - +kohsuke/ - - +kr/ - - +kxml/ - - +kxml2/ - - +ky/ - - +kz/ - - +la/ - - +land/ - - +lc/ - - +ldapd-common/ - - +ldapsdk/ - - +lgbt/ - - +li/ - - +libgdx-oboe/ - - +life/ - - +lingo/ - - +link/ - - +live/ - - +lk/ - - +locc/ - - +log4j/ - - +log4unit/ - - +logkit/ - - +lol/ - - +loom/ - - +love/ - - +lpg/ - - +lt/ - - +ltd/ - - +lu/ - - +lucene/ - - +lv/ - - +ly/ - - +ma/ - - +magicGball/ - - +maps/ - - +market/ - - +marmalade/ - - +math/ - - +maven/ - - +maven-integration-test-helper/ - - +maven-javanet-plugin/ - - +maven-new/ - - +maven-plugins/ - - +maven-proxy/ - - +maven-taglib/ - - +maven-torque-plugin/ - - +maven-validator/ - - +maven-xdoclet2-plugin/ - - +maxq/ - - +mckoi/ - - +md/ - - +me/ - - +media/ - - +merlin/ - - +merlin-developer/ - - +merlin-tutorial/ - - +messenger/ - - +metaclass/ - - +mevenide/ - - +microcontainer/ - - +middlegen/ - - +mil/ - - +milyn/ - - +mk/ - - +ml/ - - +mm/ - - +mm-mysql/ - - +mn/ - - +mobi/ - - +mockcreator/ - - +mockit/ - - +mockmaker/ - - +mockobjects/ - - +mockrunner/ - - +modello/ - - +moe/ - - +monetdb/ - - +money/ - - +monster/ - - +mrj/ - - +ms/ - - +mstor/ - - +msv/ - - +mt/ - - +mu/ - - +mule/ - - +muse/ - - +mx/ - - +mx4j/ - - +myfaces/ - - +mysql/ - - +mz/ - - +name/ - - +nanning/ - - +nanocontainer/ - - +nekohtml/ - - +neo/ - - +net/ - - +netbeans/ - - +network/ - - +new/ - - +nf/ - - +ng/ - - +ninja/ - - +nl/ - - +no/ - - +norbert/ - - +np/ - - +nsuml/ - - +nu/ - - +nz/ - - +oauth/ - - +odmg/ - - +ognl/ - - +ojb/ - - +ojdbc/ - - +old/ - - +one/ - - +oness/ - - +onl/ - - +online/ - - +open/ - - +open-esb/ - - +opencypher/ - - +openejb/ - - +openim/ - - +openjms/ - - +opennms/ - - +opensymphony/ - - +oracle/ - - +org/ - - +org.elasticsearch/ - - +org.elasticsearch.client/ - - +org.elasticsearch.distribution.integ-test-zip/ - - +org.elasticsearch.distribution.zip/ - - +org.elasticsearch.gradle/ - - +org.elasticsearch.plugin/ - - +org.elasticsearch.test/ - - +oro/ - - +oscube/ - - +ovh/ - - +p2psockets/ - - +p6spy/ - - +page/ - - +party/ - - +pathwaycommons/ - - +patterntesting/ - - +payload/ - - +pcj/ - - +pdfbox/ - - +pe/ - - +penguin/ - - +pet/ - - +petridish/ - - +ph/ - - +piccolo/ - - +picocontainer/ - - +picounit/ - - +pink/ - - +pircbot/ - - +pitt/ - - +pk/ - - +pl/ - - +plexus/ - - +plj/ - - +plugin/ - - +plus/ - - +pluto-container/ - - +pm/ - - +pmd/ - - +pnuts/ - - +poi/ - - +poolman/ - - +portlet-api/ - - +postgresql/ - - +prevayler/ - - +pro/ - - +proctor/ - - +profiler/ - - +proxool/ - - +proxytoys/ - - +pt/ - - +pub/ - - +pubscribe/ - - +pull-parser/ - - +pw/ - - +qa/ - - +qdox/ - - +qfork/ - - +quartz/ - - +quilt/ - - +radeox/ - - +re/ - - +readline/ - - +red/ - - +redhill/ - - +redis/ - - +redmine/ - - +regexp/ - - +relaxngDatatype/ - - +ren/ - - +report/ - - +reportrunner/ - - +rest/ - - +rhino/ - - +ro/ - - +robo-guice/ - - +roboguice/ - - +rocks/ - - +rodeo/ - - +roller/ - - +rome/ - - +rs/ - - +rss4j/ - - +rsslibj/ - - +ru/ - - +rubygems/ - - +run/ - - +sa/ - - +sablecc/ - - +sale/ - - +sax/ - - +saxon/ - - +saxpath/ - - +sc/ - - +school/ - - +science/ - - +scout/ - - +scraping-engine/ - - +se/ - - +securityfilter/ - - +servicemix/ - - +servicemix-ws/ - - +services/ - - +servletapi/ - - +servlets/ - - +setpoint/ - - +sfx4j/ - - +sg/ - - +sh/ - - +shellix/ - - +shocks/ - - +shop/ - - +show/ - - +si/ - - +sillyexceptions/ - - +simple-jms/ - - +simple-jndi/ - - +site/ - - +sk/ - - +skaringa/ - - +ski/ - - +skinlf/ - - +slide/ - - +smartrics/ - - +sn/ - - +so/ - - +soap/ - - +social/ - - +software/ - - +soimp/ - - +solar/ - - +solarisrealm/ - - +solutions/ - - +space/ - - +speexx/ - - +spice/ - - +spring/ - - +springframework/ - - +springmodules/ - - +sqlline/ - - +srl/ - - +sshtools/ - - +sslext/ - - +st/ - - +stapler/ - - +statcvs/ - - +stax/ - - +stax-utils/ - - +store/ - - +stratum/ - - +stream/ - - +struts/ - - +struts-menu/ - - +strutstestcase/ - - +studio/ - - +stxx/ - - +su/ - - +subpersistence/ - - +subshell/ - - +suiterunner/ - - +surefire/ - - +swarmcache/ - - +swiss/ - - +swt/ - - +systems/ - - +sysunit/ - - +tablelayout/ - - +tagalog/ - - +tagishauth/ - - +taglibrarydoc/ - - +taglibs/ - - +tagsoup/ - - +tambora/ - - +tanukisoft/ - - +tapestry/ - - +tclib/ - - +team/ - - +tec/ - - +tech/ - - +technology/ - - +tel/ - - +textarea/ - - +tf/ - - +th/ - - +thaiopensource/ - - +tiffrenderer/ - - +tjdo/ - - +tk/ - - +tl/ - - +tmporb/ - - +to/ - - +tokyo/ - - +tomcat/ - - +tomcat-util/ - - +tonic/ - - +tools/ - - +top/ - - +toplink/ - - +torque/ - - +torque-gen/ - - +touchstone/ - - +toys/ - - +tr/ - - +traer/ - - +trail-taglib/ - - +tranql/ - - +travel/ - - +trove/ - - +trust/ - - +turbine/ - - +tv/ - - +tw/ - - +tyrex/ - - +tz/ - - +ua/ - - +uaihebert/ - - +ubique/ - - +ug/ - - +uispec4j/ - - +uk/ - - +uno/ - - +urbanophile/ - - +urlrewrite/ - - +us/ - - +uy/ - - +uz/ - - +vc/ - - +vdoclet/ - - +velocity/ - - +velocity-anakia/ - - +velocity-dvsl/ - - +velocity-tools/ - - +video/ - - +village/ - - +vin/ - - +vip/ - - +vision/ - - +vn/ - - +vu/ - - +wadi/ - - +wang/ - - +webmacro/ - - +website/ - - +webtest/ - - +weixinkeji/ - - +werken-xpath/ - - +werkflow/ - - +werkz/ - - +westhawk/ - - +wf/ - - +which/ - - +wicket/ - - +wiki/ - - +win/ - - +woodstox/ - - +work/ - - +works/ - - +world/ - - +wrapper/ - - +ws/ - - +ws-commons/ - - +ws-commons-java5/ - - +ws-commons-util/ - - +ws-scout/ - - +wsdl4j/ - - +wsrf/ - - +wss4j/ - - +wstx/ - - +wtf/ - - +wurfl/ - - +wutka/ - - +xajile/ - - +xalan/ - - +xbean/ - - +xdoclet/ - - +xdoclet-plugins/ - - +xerces/ - - +xercesjarv/ - - +xfire/ - - +xfire-root/ - - +xin/ - - +xjavadoc/ - - +xml-apis/ - - +xml-resolver/ - - +xml-security/ - - +xmlbeans/ - - +xmldb/ - - +xmlenc/ - - +xmlmind/ - - +xmlpull/ - - +xmlrpc/ - - +xmlrpc-helma/ - - +xmlunit/ - - +xmlwise/ - - +xmlwriter/ - - +xom/ - - +xpp3/ - - +xsddoc/ - - +xsdlib/ - - +xstream/ - - +xtc/ - - +xtiff-jai/ - - +xxl/ - - +xyz/ - - +yan/ - - +ymsg/ - - +yom/ - - +za/ - - +zone/ - - +zw/ - - +archetype-catalog.xml 2024-08-30 07:37 15719460 +archetype-catalog.xml.md5 2024-08-30 07:37 32 +archetype-catalog.xml.sha1 2024-08-30 07:37 40 +last_updated.txt 2024-09-02 12:49 29 +robots.txt 2016-01-01 19:20 26 ++
+ + + \ No newline at end of file diff --git a/pkg/crawler/testdata/sad/maven-metadata.xml b/pkg/crawler/testdata/sad/maven-metadata.xml new file mode 100644 index 0000000..047e92f --- /dev/null +++ b/pkg/crawler/testdata/sad/maven-metadata.xml @@ -0,0 +1,13 @@ + ++ \ No newline at end of fileabbot +abbot ++ +1.4.0 +1.4.0 ++ 0.12.3 +1.4.0 +20150924141841 +