Counting different kinds of line terminator characters in files

Clash Royale CLAN TAG#URR8PPP
.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;
up vote
2
down vote
favorite
Can someone take a look if this code piece is ok. I'm most interested whether the goroutines are used correctly, but also general Go best practices.
package main
import (
 "fmt"
 "io/ioutil"
 "os"
 "path/filepath"
 "regexp"
)
type Endings struct 
 crlf, lf uint
func check(e error) 
 if e != nil 
 panic(e)
 
func getFileList(searchDir string) string 
 fileList := string
 filepath.Walk(searchDir, func(path string, f os.FileInfo, err error) error 
 if !f.IsDir() 
 fileList = append(fileList, path)
 
 return nil
 )
 return fileList
func getFileEndings(filename string, c chan Endings) 
 fileContent, err := ioutil.ReadFile(filename)
 check(err)
 c <- countEndings(string(fileContent))
func countEndings(s string) Endings 
 crlf := regexp.MustCompile("rn")
 lf := regexp.MustCompile("n")
 x := len(crlf.FindAllStringIndex(s, -1))
 y := len(lf.FindAllStringIndex(s, -1))
 return Endingsuint(x), uint(y - x)
func splitIntoChunks(arr string, chunkSize int) string 
 if chunkSize <= 0 
 panic("chunk size too small")
 
 if len(arr) <= chunkSize 
 return stringarr
 
 numChunks := int(len(arr)/chunkSize) + 1
 chunks := make(string, numChunks)
 for i := 0; i < numChunks; i++ 
 l := i * chunkSize
 u := l + chunkSize
 if u > len(arr) 
 u = len(arr)
 
 chunks[i] = arr[l:u]
 
 return chunks
func main() 
 searchDir := os.Args[1]
 c := make(chan Endings)
 chunkSize := 1000
 fileList := getFileList(searchDir)
 count := Endings0, 0
 for _, chunk := range splitIntoChunks(fileList, chunkSize) 
 for _, file := range chunk 
 go getFileEndings(file, c)
 
 for _ = range chunk 
 result := <-c
 count.crlf += result.crlf
 count.lf += result.lf
 
 
 fmt.Printf("crlf: %d, lf: %dn", count.crlf, count.lf)
regex file go
add a comment |Â
up vote
2
down vote
favorite
Can someone take a look if this code piece is ok. I'm most interested whether the goroutines are used correctly, but also general Go best practices.
package main
import (
 "fmt"
 "io/ioutil"
 "os"
 "path/filepath"
 "regexp"
)
type Endings struct 
 crlf, lf uint
func check(e error) 
 if e != nil 
 panic(e)
 
func getFileList(searchDir string) string 
 fileList := string
 filepath.Walk(searchDir, func(path string, f os.FileInfo, err error) error 
 if !f.IsDir() 
 fileList = append(fileList, path)
 
 return nil
 )
 return fileList
func getFileEndings(filename string, c chan Endings) 
 fileContent, err := ioutil.ReadFile(filename)
 check(err)
 c <- countEndings(string(fileContent))
func countEndings(s string) Endings 
 crlf := regexp.MustCompile("rn")
 lf := regexp.MustCompile("n")
 x := len(crlf.FindAllStringIndex(s, -1))
 y := len(lf.FindAllStringIndex(s, -1))
 return Endingsuint(x), uint(y - x)
func splitIntoChunks(arr string, chunkSize int) string 
 if chunkSize <= 0 
 panic("chunk size too small")
 
 if len(arr) <= chunkSize 
 return stringarr
 
 numChunks := int(len(arr)/chunkSize) + 1
 chunks := make(string, numChunks)
 for i := 0; i < numChunks; i++ 
 l := i * chunkSize
 u := l + chunkSize
 if u > len(arr) 
 u = len(arr)
 
 chunks[i] = arr[l:u]
 
 return chunks
func main() 
 searchDir := os.Args[1]
 c := make(chan Endings)
 chunkSize := 1000
 fileList := getFileList(searchDir)
 count := Endings0, 0
 for _, chunk := range splitIntoChunks(fileList, chunkSize) 
 for _, file := range chunk 
 go getFileEndings(file, c)
 
 for _ = range chunk 
 result := <-c
 count.crlf += result.crlf
 count.lf += result.lf
 
 
 fmt.Printf("crlf: %d, lf: %dn", count.crlf, count.lf)
regex file go
add a comment |Â
up vote
2
down vote
favorite
up vote
2
down vote
favorite
Can someone take a look if this code piece is ok. I'm most interested whether the goroutines are used correctly, but also general Go best practices.
package main
import (
 "fmt"
 "io/ioutil"
 "os"
 "path/filepath"
 "regexp"
)
type Endings struct 
 crlf, lf uint
func check(e error) 
 if e != nil 
 panic(e)
 
func getFileList(searchDir string) string 
 fileList := string
 filepath.Walk(searchDir, func(path string, f os.FileInfo, err error) error 
 if !f.IsDir() 
 fileList = append(fileList, path)
 
 return nil
 )
 return fileList
func getFileEndings(filename string, c chan Endings) 
 fileContent, err := ioutil.ReadFile(filename)
 check(err)
 c <- countEndings(string(fileContent))
func countEndings(s string) Endings 
 crlf := regexp.MustCompile("rn")
 lf := regexp.MustCompile("n")
 x := len(crlf.FindAllStringIndex(s, -1))
 y := len(lf.FindAllStringIndex(s, -1))
 return Endingsuint(x), uint(y - x)
func splitIntoChunks(arr string, chunkSize int) string 
 if chunkSize <= 0 
 panic("chunk size too small")
 
 if len(arr) <= chunkSize 
 return stringarr
 
 numChunks := int(len(arr)/chunkSize) + 1
 chunks := make(string, numChunks)
 for i := 0; i < numChunks; i++ 
 l := i * chunkSize
 u := l + chunkSize
 if u > len(arr) 
 u = len(arr)
 
 chunks[i] = arr[l:u]
 
 return chunks
func main() 
 searchDir := os.Args[1]
 c := make(chan Endings)
 chunkSize := 1000
 fileList := getFileList(searchDir)
 count := Endings0, 0
 for _, chunk := range splitIntoChunks(fileList, chunkSize) 
 for _, file := range chunk 
 go getFileEndings(file, c)
 
 for _ = range chunk 
 result := <-c
 count.crlf += result.crlf
 count.lf += result.lf
 
 
 fmt.Printf("crlf: %d, lf: %dn", count.crlf, count.lf)
regex file go
Can someone take a look if this code piece is ok. I'm most interested whether the goroutines are used correctly, but also general Go best practices.
package main
import (
 "fmt"
 "io/ioutil"
 "os"
 "path/filepath"
 "regexp"
)
type Endings struct 
 crlf, lf uint
func check(e error) 
 if e != nil 
 panic(e)
 
func getFileList(searchDir string) string 
 fileList := string
 filepath.Walk(searchDir, func(path string, f os.FileInfo, err error) error 
 if !f.IsDir() 
 fileList = append(fileList, path)
 
 return nil
 )
 return fileList
func getFileEndings(filename string, c chan Endings) 
 fileContent, err := ioutil.ReadFile(filename)
 check(err)
 c <- countEndings(string(fileContent))
func countEndings(s string) Endings 
 crlf := regexp.MustCompile("rn")
 lf := regexp.MustCompile("n")
 x := len(crlf.FindAllStringIndex(s, -1))
 y := len(lf.FindAllStringIndex(s, -1))
 return Endingsuint(x), uint(y - x)
func splitIntoChunks(arr string, chunkSize int) string 
 if chunkSize <= 0 
 panic("chunk size too small")
 
 if len(arr) <= chunkSize 
 return stringarr
 
 numChunks := int(len(arr)/chunkSize) + 1
 chunks := make(string, numChunks)
 for i := 0; i < numChunks; i++ 
 l := i * chunkSize
 u := l + chunkSize
 if u > len(arr) 
 u = len(arr)
 
 chunks[i] = arr[l:u]
 
 return chunks
func main() 
 searchDir := os.Args[1]
 c := make(chan Endings)
 chunkSize := 1000
 fileList := getFileList(searchDir)
 count := Endings0, 0
 for _, chunk := range splitIntoChunks(fileList, chunkSize) 
 for _, file := range chunk 
 go getFileEndings(file, c)
 
 for _ = range chunk 
 result := <-c
 count.crlf += result.crlf
 count.lf += result.lf
 
 
 fmt.Printf("crlf: %d, lf: %dn", count.crlf, count.lf)
regex file go
edited Mar 2 at 22:41
asked Mar 1 at 14:57
Chris
20516
20516
add a comment |Â
add a comment |Â
 1 Answer
 1
 
active
oldest
votes
up vote
1
down vote
accepted
Your program works fine and you are correctly using goroutines, but the code isn't very idiomatic.
Producer/Consumer architecture
What we have here is a typical producer/consumer scenario. The producer is filepath.Walk() wich returns a list of files, and the consumers are goroutines executing regex on file content. 
So instead of the splitIntoChunks() method, we could just have a chan string: the producer sends file names to the channel, and the consumers loop over it and parse the file content. 
This have two main advantages:
- code is easier to read
 - producer and consumer can work in parallel
 
So the code would looks like
results := make(chan Endings, 2) // chan to get the results of each worker
var wg sync.WaitGroup
wg.Add(2)
files := make(chan string, 1000) // channel containing file list
for i := 0; i < 2; i++ 
 go run(files, results, &wg) // start the producers 
filepath.Walk(searchDir, func(path string, f os.FileInfo, err error) error 
 if !f.IsDir() 
 files <- path // feed the channel
 
 return nil
)
close(files)
wg.Wait()
close(results)
Work on byte rather than on string
This is a general advice regarding performances: always prefer working on byte instead of working on string to avoid extra allocations. 
The regex package has methods to work on string or on byte slice, so instead of 
fileContent, err := ioutil.ReadFile(filename)
c <- countEndings(string(fileContent)) 
...
x := len(crlf.FindAllStringIndex(s, -1))
we can just use
fileContent, err := ioutil.ReadFile(fileName)
...
x := len(crlf.FindAllIndex(fileContent, -1))
Don't panic
The code shouldn't panic on every error. For example, if the user running the program don't have the permission to read a file in searchDir, the program should not crash but rather log the error 
so avoid method like this:
func check(e error) 
 if e != nil 
 panic(e)
 
instead handle the error localy:
fileContent, err := ioutil.ReadFile(fileName)
if err != nil 
 fmt.Printf("fail to read file %s: %vn", fileName, err)
Other consideration
It's better to define regex as global var:
var (
 crlf = regexp.MustCompile("rn")
 lf = regexp.MustCompile("n")
)
It's also better to use the flag package to parse arguments instead of relying on os.Args
Here is the final version of the code:
package main
import (
 "flag"
 "fmt"
 "io/ioutil"
 "os"
 "path/filepath"
 "regexp"
 "sync"
)
var (
 crlf = regexp.MustCompile("rn")
 lf = regexp.MustCompile("n")
)
type Endings struct 
 crlf, lf uint
func (e *Endings) countEndings(fileName string) 
 c, err := ioutil.ReadFile(fileName)
 if err != nil 
 fmt.Printf("fail to read file %s: %vn", fileName, err)
 
 x := len(crlf.FindAllIndex(c, -1))
 y := len(lf.FindAllIndex(c, -1))
 e.crlf += uint(x)
 e.lf += uint(y - x)
func run(files <-chan string, results chan<- Endings, wg *sync.WaitGroup) 
 e := &Endings
 for f := range files 
 e.countEndings(f)
 
 results <- *e
 wg.Done()
func main() 
 searchDir := flag.String("dir", "dir", "directory to search")
 nWorker := flag.Int("n", 2, "number of worker")
 flag.Parse()
 results := make(chan Endings, *nWorker)
 var wg sync.WaitGroup
 wg.Add(*nWorker)
 files := make(chan string, 1000)
 for i := 0; i < *nWorker; i++ 
 go run(files, results, &wg)
 
 filepath.Walk(*searchDir, func(path string, f os.FileInfo, err error) error 
 if !f.IsDir() 
 files <- path
 
 return nil
 )
 close(files)
 wg.Wait()
 close(results)
 count := &Endings
 for e := range results 
 count.crlf += e.crlf
 count.lf += e.lf
 
 fmt.Printf("crlf: %d, lf: %dn", count.crlf, count.lf)
Performance
The new code is slightly faster:
old:
$ time ./test ~/go/src/golang.org 
crlf: 6231, lf: 1589462
./test ~/go/src/golang.org 1,78s user 0,23s system 167% cpu 1,201 total
new:
$ time ./test -dir ~/go/src/golang.org -n 6 
crlf: 6231, lf: 1589462
./test -dir ~/go/src/golang.org -n 6 1,48s user 0,18s system 181% cpu 0,911 total
add a comment |Â
 1 Answer
 1
 
active
oldest
votes
 1 Answer
 1
 
active
oldest
votes
active
oldest
votes
active
oldest
votes
up vote
1
down vote
accepted
Your program works fine and you are correctly using goroutines, but the code isn't very idiomatic.
Producer/Consumer architecture
What we have here is a typical producer/consumer scenario. The producer is filepath.Walk() wich returns a list of files, and the consumers are goroutines executing regex on file content. 
So instead of the splitIntoChunks() method, we could just have a chan string: the producer sends file names to the channel, and the consumers loop over it and parse the file content. 
This have two main advantages:
- code is easier to read
 - producer and consumer can work in parallel
 
So the code would looks like
results := make(chan Endings, 2) // chan to get the results of each worker
var wg sync.WaitGroup
wg.Add(2)
files := make(chan string, 1000) // channel containing file list
for i := 0; i < 2; i++ 
 go run(files, results, &wg) // start the producers 
filepath.Walk(searchDir, func(path string, f os.FileInfo, err error) error 
 if !f.IsDir() 
 files <- path // feed the channel
 
 return nil
)
close(files)
wg.Wait()
close(results)
Work on byte rather than on string
This is a general advice regarding performances: always prefer working on byte instead of working on string to avoid extra allocations. 
The regex package has methods to work on string or on byte slice, so instead of 
fileContent, err := ioutil.ReadFile(filename)
c <- countEndings(string(fileContent)) 
...
x := len(crlf.FindAllStringIndex(s, -1))
we can just use
fileContent, err := ioutil.ReadFile(fileName)
...
x := len(crlf.FindAllIndex(fileContent, -1))
Don't panic
The code shouldn't panic on every error. For example, if the user running the program don't have the permission to read a file in searchDir, the program should not crash but rather log the error 
so avoid method like this:
func check(e error) 
 if e != nil 
 panic(e)
 
instead handle the error localy:
fileContent, err := ioutil.ReadFile(fileName)
if err != nil 
 fmt.Printf("fail to read file %s: %vn", fileName, err)
Other consideration
It's better to define regex as global var:
var (
 crlf = regexp.MustCompile("rn")
 lf = regexp.MustCompile("n")
)
It's also better to use the flag package to parse arguments instead of relying on os.Args
Here is the final version of the code:
package main
import (
 "flag"
 "fmt"
 "io/ioutil"
 "os"
 "path/filepath"
 "regexp"
 "sync"
)
var (
 crlf = regexp.MustCompile("rn")
 lf = regexp.MustCompile("n")
)
type Endings struct 
 crlf, lf uint
func (e *Endings) countEndings(fileName string) 
 c, err := ioutil.ReadFile(fileName)
 if err != nil 
 fmt.Printf("fail to read file %s: %vn", fileName, err)
 
 x := len(crlf.FindAllIndex(c, -1))
 y := len(lf.FindAllIndex(c, -1))
 e.crlf += uint(x)
 e.lf += uint(y - x)
func run(files <-chan string, results chan<- Endings, wg *sync.WaitGroup) 
 e := &Endings
 for f := range files 
 e.countEndings(f)
 
 results <- *e
 wg.Done()
func main() 
 searchDir := flag.String("dir", "dir", "directory to search")
 nWorker := flag.Int("n", 2, "number of worker")
 flag.Parse()
 results := make(chan Endings, *nWorker)
 var wg sync.WaitGroup
 wg.Add(*nWorker)
 files := make(chan string, 1000)
 for i := 0; i < *nWorker; i++ 
 go run(files, results, &wg)
 
 filepath.Walk(*searchDir, func(path string, f os.FileInfo, err error) error 
 if !f.IsDir() 
 files <- path
 
 return nil
 )
 close(files)
 wg.Wait()
 close(results)
 count := &Endings
 for e := range results 
 count.crlf += e.crlf
 count.lf += e.lf
 
 fmt.Printf("crlf: %d, lf: %dn", count.crlf, count.lf)
Performance
The new code is slightly faster:
old:
$ time ./test ~/go/src/golang.org 
crlf: 6231, lf: 1589462
./test ~/go/src/golang.org 1,78s user 0,23s system 167% cpu 1,201 total
new:
$ time ./test -dir ~/go/src/golang.org -n 6 
crlf: 6231, lf: 1589462
./test -dir ~/go/src/golang.org -n 6 1,48s user 0,18s system 181% cpu 0,911 total
add a comment |Â
up vote
1
down vote
accepted
Your program works fine and you are correctly using goroutines, but the code isn't very idiomatic.
Producer/Consumer architecture
What we have here is a typical producer/consumer scenario. The producer is filepath.Walk() wich returns a list of files, and the consumers are goroutines executing regex on file content. 
So instead of the splitIntoChunks() method, we could just have a chan string: the producer sends file names to the channel, and the consumers loop over it and parse the file content. 
This have two main advantages:
- code is easier to read
 - producer and consumer can work in parallel
 
So the code would looks like
results := make(chan Endings, 2) // chan to get the results of each worker
var wg sync.WaitGroup
wg.Add(2)
files := make(chan string, 1000) // channel containing file list
for i := 0; i < 2; i++ 
 go run(files, results, &wg) // start the producers 
filepath.Walk(searchDir, func(path string, f os.FileInfo, err error) error 
 if !f.IsDir() 
 files <- path // feed the channel
 
 return nil
)
close(files)
wg.Wait()
close(results)
Work on byte rather than on string
This is a general advice regarding performances: always prefer working on byte instead of working on string to avoid extra allocations. 
The regex package has methods to work on string or on byte slice, so instead of 
fileContent, err := ioutil.ReadFile(filename)
c <- countEndings(string(fileContent)) 
...
x := len(crlf.FindAllStringIndex(s, -1))
we can just use
fileContent, err := ioutil.ReadFile(fileName)
...
x := len(crlf.FindAllIndex(fileContent, -1))
Don't panic
The code shouldn't panic on every error. For example, if the user running the program don't have the permission to read a file in searchDir, the program should not crash but rather log the error 
so avoid method like this:
func check(e error) 
 if e != nil 
 panic(e)
 
instead handle the error localy:
fileContent, err := ioutil.ReadFile(fileName)
if err != nil 
 fmt.Printf("fail to read file %s: %vn", fileName, err)
Other consideration
It's better to define regex as global var:
var (
 crlf = regexp.MustCompile("rn")
 lf = regexp.MustCompile("n")
)
It's also better to use the flag package to parse arguments instead of relying on os.Args
Here is the final version of the code:
package main
import (
 "flag"
 "fmt"
 "io/ioutil"
 "os"
 "path/filepath"
 "regexp"
 "sync"
)
var (
 crlf = regexp.MustCompile("rn")
 lf = regexp.MustCompile("n")
)
type Endings struct 
 crlf, lf uint
func (e *Endings) countEndings(fileName string) 
 c, err := ioutil.ReadFile(fileName)
 if err != nil 
 fmt.Printf("fail to read file %s: %vn", fileName, err)
 
 x := len(crlf.FindAllIndex(c, -1))
 y := len(lf.FindAllIndex(c, -1))
 e.crlf += uint(x)
 e.lf += uint(y - x)
func run(files <-chan string, results chan<- Endings, wg *sync.WaitGroup) 
 e := &Endings
 for f := range files 
 e.countEndings(f)
 
 results <- *e
 wg.Done()
func main() 
 searchDir := flag.String("dir", "dir", "directory to search")
 nWorker := flag.Int("n", 2, "number of worker")
 flag.Parse()
 results := make(chan Endings, *nWorker)
 var wg sync.WaitGroup
 wg.Add(*nWorker)
 files := make(chan string, 1000)
 for i := 0; i < *nWorker; i++ 
 go run(files, results, &wg)
 
 filepath.Walk(*searchDir, func(path string, f os.FileInfo, err error) error 
 if !f.IsDir() 
 files <- path
 
 return nil
 )
 close(files)
 wg.Wait()
 close(results)
 count := &Endings
 for e := range results 
 count.crlf += e.crlf
 count.lf += e.lf
 
 fmt.Printf("crlf: %d, lf: %dn", count.crlf, count.lf)
Performance
The new code is slightly faster:
old:
$ time ./test ~/go/src/golang.org 
crlf: 6231, lf: 1589462
./test ~/go/src/golang.org 1,78s user 0,23s system 167% cpu 1,201 total
new:
$ time ./test -dir ~/go/src/golang.org -n 6 
crlf: 6231, lf: 1589462
./test -dir ~/go/src/golang.org -n 6 1,48s user 0,18s system 181% cpu 0,911 total
add a comment |Â
up vote
1
down vote
accepted
up vote
1
down vote
accepted
Your program works fine and you are correctly using goroutines, but the code isn't very idiomatic.
Producer/Consumer architecture
What we have here is a typical producer/consumer scenario. The producer is filepath.Walk() wich returns a list of files, and the consumers are goroutines executing regex on file content. 
So instead of the splitIntoChunks() method, we could just have a chan string: the producer sends file names to the channel, and the consumers loop over it and parse the file content. 
This have two main advantages:
- code is easier to read
 - producer and consumer can work in parallel
 
So the code would looks like
results := make(chan Endings, 2) // chan to get the results of each worker
var wg sync.WaitGroup
wg.Add(2)
files := make(chan string, 1000) // channel containing file list
for i := 0; i < 2; i++ 
 go run(files, results, &wg) // start the producers 
filepath.Walk(searchDir, func(path string, f os.FileInfo, err error) error 
 if !f.IsDir() 
 files <- path // feed the channel
 
 return nil
)
close(files)
wg.Wait()
close(results)
Work on byte rather than on string
This is a general advice regarding performances: always prefer working on byte instead of working on string to avoid extra allocations. 
The regex package has methods to work on string or on byte slice, so instead of 
fileContent, err := ioutil.ReadFile(filename)
c <- countEndings(string(fileContent)) 
...
x := len(crlf.FindAllStringIndex(s, -1))
we can just use
fileContent, err := ioutil.ReadFile(fileName)
...
x := len(crlf.FindAllIndex(fileContent, -1))
Don't panic
The code shouldn't panic on every error. For example, if the user running the program don't have the permission to read a file in searchDir, the program should not crash but rather log the error 
so avoid method like this:
func check(e error) 
 if e != nil 
 panic(e)
 
instead handle the error localy:
fileContent, err := ioutil.ReadFile(fileName)
if err != nil 
 fmt.Printf("fail to read file %s: %vn", fileName, err)
Other consideration
It's better to define regex as global var:
var (
 crlf = regexp.MustCompile("rn")
 lf = regexp.MustCompile("n")
)
It's also better to use the flag package to parse arguments instead of relying on os.Args
Here is the final version of the code:
package main
import (
 "flag"
 "fmt"
 "io/ioutil"
 "os"
 "path/filepath"
 "regexp"
 "sync"
)
var (
 crlf = regexp.MustCompile("rn")
 lf = regexp.MustCompile("n")
)
type Endings struct 
 crlf, lf uint
func (e *Endings) countEndings(fileName string) 
 c, err := ioutil.ReadFile(fileName)
 if err != nil 
 fmt.Printf("fail to read file %s: %vn", fileName, err)
 
 x := len(crlf.FindAllIndex(c, -1))
 y := len(lf.FindAllIndex(c, -1))
 e.crlf += uint(x)
 e.lf += uint(y - x)
func run(files <-chan string, results chan<- Endings, wg *sync.WaitGroup) 
 e := &Endings
 for f := range files 
 e.countEndings(f)
 
 results <- *e
 wg.Done()
func main() 
 searchDir := flag.String("dir", "dir", "directory to search")
 nWorker := flag.Int("n", 2, "number of worker")
 flag.Parse()
 results := make(chan Endings, *nWorker)
 var wg sync.WaitGroup
 wg.Add(*nWorker)
 files := make(chan string, 1000)
 for i := 0; i < *nWorker; i++ 
 go run(files, results, &wg)
 
 filepath.Walk(*searchDir, func(path string, f os.FileInfo, err error) error 
 if !f.IsDir() 
 files <- path
 
 return nil
 )
 close(files)
 wg.Wait()
 close(results)
 count := &Endings
 for e := range results 
 count.crlf += e.crlf
 count.lf += e.lf
 
 fmt.Printf("crlf: %d, lf: %dn", count.crlf, count.lf)
Performance
The new code is slightly faster:
old:
$ time ./test ~/go/src/golang.org 
crlf: 6231, lf: 1589462
./test ~/go/src/golang.org 1,78s user 0,23s system 167% cpu 1,201 total
new:
$ time ./test -dir ~/go/src/golang.org -n 6 
crlf: 6231, lf: 1589462
./test -dir ~/go/src/golang.org -n 6 1,48s user 0,18s system 181% cpu 0,911 total
Your program works fine and you are correctly using goroutines, but the code isn't very idiomatic.
Producer/Consumer architecture
What we have here is a typical producer/consumer scenario. The producer is filepath.Walk() wich returns a list of files, and the consumers are goroutines executing regex on file content. 
So instead of the splitIntoChunks() method, we could just have a chan string: the producer sends file names to the channel, and the consumers loop over it and parse the file content. 
This have two main advantages:
- code is easier to read
 - producer and consumer can work in parallel
 
So the code would looks like
results := make(chan Endings, 2) // chan to get the results of each worker
var wg sync.WaitGroup
wg.Add(2)
files := make(chan string, 1000) // channel containing file list
for i := 0; i < 2; i++ 
 go run(files, results, &wg) // start the producers 
filepath.Walk(searchDir, func(path string, f os.FileInfo, err error) error 
 if !f.IsDir() 
 files <- path // feed the channel
 
 return nil
)
close(files)
wg.Wait()
close(results)
Work on byte rather than on string
This is a general advice regarding performances: always prefer working on byte instead of working on string to avoid extra allocations. 
The regex package has methods to work on string or on byte slice, so instead of 
fileContent, err := ioutil.ReadFile(filename)
c <- countEndings(string(fileContent)) 
...
x := len(crlf.FindAllStringIndex(s, -1))
we can just use
fileContent, err := ioutil.ReadFile(fileName)
...
x := len(crlf.FindAllIndex(fileContent, -1))
Don't panic
The code shouldn't panic on every error. For example, if the user running the program don't have the permission to read a file in searchDir, the program should not crash but rather log the error 
so avoid method like this:
func check(e error) 
 if e != nil 
 panic(e)
 
instead handle the error localy:
fileContent, err := ioutil.ReadFile(fileName)
if err != nil 
 fmt.Printf("fail to read file %s: %vn", fileName, err)
Other consideration
It's better to define regex as global var:
var (
 crlf = regexp.MustCompile("rn")
 lf = regexp.MustCompile("n")
)
It's also better to use the flag package to parse arguments instead of relying on os.Args
Here is the final version of the code:
package main
import (
 "flag"
 "fmt"
 "io/ioutil"
 "os"
 "path/filepath"
 "regexp"
 "sync"
)
var (
 crlf = regexp.MustCompile("rn")
 lf = regexp.MustCompile("n")
)
type Endings struct 
 crlf, lf uint
func (e *Endings) countEndings(fileName string) 
 c, err := ioutil.ReadFile(fileName)
 if err != nil 
 fmt.Printf("fail to read file %s: %vn", fileName, err)
 
 x := len(crlf.FindAllIndex(c, -1))
 y := len(lf.FindAllIndex(c, -1))
 e.crlf += uint(x)
 e.lf += uint(y - x)
func run(files <-chan string, results chan<- Endings, wg *sync.WaitGroup) 
 e := &Endings
 for f := range files 
 e.countEndings(f)
 
 results <- *e
 wg.Done()
func main() 
 searchDir := flag.String("dir", "dir", "directory to search")
 nWorker := flag.Int("n", 2, "number of worker")
 flag.Parse()
 results := make(chan Endings, *nWorker)
 var wg sync.WaitGroup
 wg.Add(*nWorker)
 files := make(chan string, 1000)
 for i := 0; i < *nWorker; i++ 
 go run(files, results, &wg)
 
 filepath.Walk(*searchDir, func(path string, f os.FileInfo, err error) error 
 if !f.IsDir() 
 files <- path
 
 return nil
 )
 close(files)
 wg.Wait()
 close(results)
 count := &Endings
 for e := range results 
 count.crlf += e.crlf
 count.lf += e.lf
 
 fmt.Printf("crlf: %d, lf: %dn", count.crlf, count.lf)
Performance
The new code is slightly faster:
old:
$ time ./test ~/go/src/golang.org 
crlf: 6231, lf: 1589462
./test ~/go/src/golang.org 1,78s user 0,23s system 167% cpu 1,201 total
new:
$ time ./test -dir ~/go/src/golang.org -n 6 
crlf: 6231, lf: 1589462
./test -dir ~/go/src/golang.org -n 6 1,48s user 0,18s system 181% cpu 0,911 total
edited Mar 21 at 6:47
answered Mar 20 at 10:59
felix
67829
67829
add a comment |Â
add a comment |Â
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f188616%2fcounting-different-kinds-of-line-terminator-characters-in-files%23new-answer', 'question_page');
);
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password