Skip to content

Commit

Permalink
fix: panic to write in close channel (#42)
Browse files Browse the repository at this point in the history
  • Loading branch information
DmitriyLewen authored Sep 27, 2024
1 parent 01e86e1 commit c46eaa1
Show file tree
Hide file tree
Showing 21 changed files with 1,228 additions and 33 deletions.
28 changes: 15 additions & 13 deletions pkg/crawler/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ type Crawler struct {
rootUrl string
wg sync.WaitGroup
urlCh chan string
errOnce sync.Once
limit *semaphore.Weighted
wrongSHA1Values []string
}
Expand All @@ -58,8 +57,12 @@ func NewCrawler(opt Option) Crawler {
}
}
client.ErrorHandler = func(resp *http.Response, err error, numTries int) (*http.Response, error) {
logger := slog.With(slog.String("url", resp.Request.URL.String()), slog.Int("status_code", resp.StatusCode),
slog.Int("num_tries", numTries))
logger := slog.Default()
if resp != nil {
logger = slog.With(slog.String("url", resp.Request.URL.String()), slog.Int("status_code", resp.StatusCode),
slog.Int("num_tries", numTries))
}

if err != nil {
logger = logger.With(slog.String("error", err.Error()))
}
Expand All @@ -81,7 +84,6 @@ func NewCrawler(opt Option) Crawler {
rootUrl: opt.RootUrl,
urlCh: make(chan string, opt.Limit*10),
limit: semaphore.NewWeighted(opt.Limit),
errOnce: sync.Once{},
}
}

Expand Down Expand Up @@ -122,13 +124,13 @@ func (c *Crawler) Crawl(ctx context.Context) error {
defer c.limit.Release(1)
defer c.wg.Done()
if err := c.Visit(ctx, url); err != nil {
// There might be a case where we get 2 errors at the same time.
// In this case we close `errCh` after reading the first error
// and get panic for the second error
// That's why we need to return the error once.
c.errOnce.Do(func() {
errCh <- xerrors.Errorf("visit error: %w", err)
})
select {
// Context can be canceled if we receive an error from another Visit function.
case <-ctx.Done():
return
case errCh <- err:
return
}
}
}(url)
}
Expand Down Expand Up @@ -210,8 +212,8 @@ func (c *Crawler) Visit(ctx context.Context, url string) error {
// Context can be canceled if we receive an error from another Visit function.
case <-ctx.Done():
return
default:
c.urlCh <- url + child
case c.urlCh <- url + child:
continue
}
}
}()
Expand Down
61 changes: 44 additions & 17 deletions pkg/crawler/crawler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,64 @@ package crawler_test

import (
"context"
"github.com/stretchr/testify/assert"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"testing"

"github.com/stretchr/testify/assert"

"github.com/aquasecurity/trivy-java-db/pkg/crawler"
)

func TestCrawl(t *testing.T) {
tests := []struct {
name string
limit int64
fileNames map[string]string
goldenPath string
filePath string
wantErr string
}{
{
name: "happy path",
name: "happy path",
limit: 1,
fileNames: map[string]string{
"/maven2/": "testdata/index.html",
"/maven2/abbot/": "testdata/abbot.html",
"/maven2/abbot/abbot/": "testdata/abbot_abbot.html",
"/maven2/abbot/abbot/maven-metadata.xml": "testdata/maven-metadata.xml",
"/maven2/abbot/abbot/0.12.3/": "testdata/abbot_abbot_0.12.3.html",
"/maven2/abbot/abbot/0.12.3/abbot-0.12.3.jar.sha1": "testdata/abbot-0.12.3.jar.sha1",
"/maven2/abbot/abbot/0.13.0/": "testdata/abbot_abbot_0.13.0.html",
"/maven2/abbot/abbot/0.13.0/abbot-0.13.0.jar.sha1": "testdata/abbot-0.13.0.jar.sha1",
"/maven2/abbot/abbot/0.13.0/abbot-0.13.0-copy.jar.sha1": "testdata/abbot-0.13.0-copy.jar.sha1",
"/maven2/abbot/abbot/1.4.0/": "testdata/abbot_abbot_1.4.0.html",
"/maven2/abbot/abbot/1.4.0/abbot-1.4.0.jar.sha1": "testdata/abbot-1.4.0.jar.sha1",
"/maven2/abbot/abbot/1.4.0/abbot-1.4.0-lite.jar.sha1": "testdata/abbot-1.4.0-lite.jar.sha1",
"/maven2/": "testdata/happy/index.html",
"/maven2/abbot/": "testdata/happy/abbot.html",
"/maven2/abbot/abbot/": "testdata/happy/abbot_abbot.html",
"/maven2/abbot/abbot/maven-metadata.xml": "testdata/happy/maven-metadata.xml",
"/maven2/abbot/abbot/0.12.3/": "testdata/happy/abbot_abbot_0.12.3.html",
"/maven2/abbot/abbot/0.12.3/abbot-0.12.3.jar.sha1": "testdata/happy/abbot-0.12.3.jar.sha1",
"/maven2/abbot/abbot/0.13.0/": "testdata/happy/abbot_abbot_0.13.0.html",
"/maven2/abbot/abbot/0.13.0/abbot-0.13.0.jar.sha1": "testdata/happy/abbot-0.13.0.jar.sha1",
"/maven2/abbot/abbot/0.13.0/abbot-0.13.0-copy.jar.sha1": "testdata/happy/abbot-0.13.0-copy.jar.sha1",
"/maven2/abbot/abbot/1.4.0/": "testdata/happy/abbot_abbot_1.4.0.html",
"/maven2/abbot/abbot/1.4.0/abbot-1.4.0.jar.sha1": "testdata/happy/abbot-1.4.0.jar.sha1",
"/maven2/abbot/abbot/1.4.0/abbot-1.4.0-lite.jar.sha1": "testdata/happy/abbot-1.4.0-lite.jar.sha1",
},
goldenPath: "testdata/golden/abbot.json",
goldenPath: "testdata/happy/abbot.json.golden",
filePath: "indexes/abbot/abbot.json",
},
{
name: "sad path",
limit: 2,
fileNames: map[string]string{
// index.html file for this test contains many links to avoid case
// when we finish crawl and get error in one time.
// We will get a `panic` because we will try to close `urlCh` in 2 places (after the wait group and after the error)
// In real case it is impossible
"/maven2/": "testdata/sad/index.html",
"/maven2/abbot/": "testdata/sad/abbot.html",
"/maven2/abbot/abbot/": "testdata/sad/abbot_abbot.html",
"/maven2/abbot/abbot/maven-metadata.xml": "testdata/sad/maven-metadata.xml",
"/maven2/HTTPClient/": "testdata/sad/httpclient.html",
"/maven2/HTTPClient/HTTPClient/": "testdata/sad/httpclient_httpclient.html",
"/maven2/HTTPClient/maven-metadata.xml": "testdata/sad/maven-metadata.xml",
},
wantErr: "decode error:",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand All @@ -48,18 +70,23 @@ func TestCrawl(t *testing.T) {
return
}
http.ServeFile(w, r, fileName)
w.WriteHeader(http.StatusOK)
return
}))
defer ts.Close()

tmpDir := t.TempDir()
cl := crawler.NewCrawler(crawler.Option{
RootUrl: ts.URL + "/maven2/",
Limit: 1,
Limit: tt.limit,
CacheDir: tmpDir,
})

err := cl.Crawl(context.Background())
assert.NoError(t, err)
if tt.wantErr != "" {
assert.ErrorContains(t, err, tt.wantErr)
return
}

got, err := os.ReadFile(filepath.Join(tmpDir, tt.filePath))
assert.NoError(t, err)
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ <h1>abbot</h1>
<hr>
<main>
<pre id="contents">
<a href="../">../</a>
<a href="../..">../</a>
<a href="abbot/" title="abbot/">abbot/</a> - -
</pre>
</main>
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ <h1>abbot/abbot</h1>
<hr>
<main>
<pre id="contents"><a href="https://repo.maven.apache.org/maven2/abbot/">../</a>
<a href="../">../</a>
<a href="../..">../</a>
<a href="0.12.3/" title="0.12.3/">0.12..../</a> 2005-09-20 05:44 -
<a href="0.13.0/" title="0.13.0/">0.13.0/</a> 2005-09-20 05:44 -
<a href="1.4.0/" title="1.4.0/">1.4.0/</a> 2015-09-22 16:03 -
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<html>
<head><title>Index of /</title></head>
<body>
<h1></h1><hr><pre><a href="../">../</a>
<h1></h1><hr><pre><a href="../..">../</a>
<a href="abbot/">abbot/</a> - -
</pre><hr></body>
</html>
File renamed without changes.
27 changes: 27 additions & 0 deletions pkg/crawler/testdata/sad/abbot.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<!DOCTYPE html>
<html><head>
<meta http-equiv="content-type" content="text/html; charset=windows-1252">
<title>Central Repository: abbot</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<style>
body {
background: #fff;
}
</style>
</head>

<body>
<header>
<h1>abbot</h1>
</header>
<hr>
<main>
<pre id="contents">
<a href="../..">../</a>
<a href="abbot/" title="abbot/">abbot/</a>
</pre>
</main>
<hr>


</body></html>
26 changes: 26 additions & 0 deletions pkg/crawler/testdata/sad/abbot_abbot.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<!DOCTYPE html>
<html><head>
<meta http-equiv="content-type" content="text/html; charset=windows-1252">
<title>Central Repository: abbot/abbot</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<style>
body {
background: #fff;
}
</style>
</head>

<body>
<header>
<h1>abbot/abbot</h1>
</header>
<hr>
<main>
<pre id="contents"><a href="https://repo.maven.apache.org/maven2/abbot/">../</a>
<a href="maven-metadata.xml" title="maven-metadata.xml">maven-metadata.xml</a> 2015-09-24 14:18 402
</pre>
</main>
<hr>


</body></html>
27 changes: 27 additions & 0 deletions pkg/crawler/testdata/sad/httpclient.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<!DOCTYPE html>
<html><head>
<meta http-equiv="content-type" content="text/html; charset=windows-1252">
<title>Central Repository: abbot</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<style>
body {
background: #fff;
}
</style>
</head>

<body>
<header>
<h1>HTTPClient</h1>
</header>
<hr>
<main>
<pre id="contents">
<a href="../..">../</a>
<a href="HTTPClient/" title="HTTPClient/">HTTPClient/</a>
</pre>
</main>
<hr>


</body></html>
26 changes: 26 additions & 0 deletions pkg/crawler/testdata/sad/httpclient_httpclient.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<!DOCTYPE html>
<html><head>
<meta http-equiv="content-type" content="text/html; charset=windows-1252">
<title>Central Repository: abbot/abbot</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<style>
body {
background: #fff;
}
</style>
</head>

<body>
<header>
<h1>HTTPClient/HTTPClient</h1>
</header>
<hr>
<main>
<pre id="contents"><a href="https://repo.maven.apache.org/maven2/HTTPClient/">../</a>
<a href="maven-metadata.xml" title="maven-metadata.xml">maven-metadata.xml</a> 2015-09-24 14:18 402
</pre>
</main>
<hr>


</body></html>
Loading

0 comments on commit c46eaa1

Please sign in to comment.