diff options
author | Hiroshi SHIBATA <[email protected]> | 2022-12-09 08:46:14 +0900 |
---|---|---|
committer | Hiroshi SHIBATA <[email protected]> | 2022-12-09 16:36:22 +0900 |
commit | 643918ecfe9c980f251247de6acd3be6280da24c () | |
tree | a5b4011c13ee3af5b110e377a839e79045266dcd /lib/csv | |
parent | 260a00d80e4dcc930b040313a99da29e4b1e6678 (diff) |
Merge csv-3.2.6
Notes: Merged: https://.com/ruby/ruby/pull/6890
-rw-r--r-- | lib/csv/fields_converter.rb | 5 | ||||
-rw-r--r-- | lib/csv/input_record_separator.rb | 15 | ||||
-rw-r--r-- | lib/csv/parser.rb | 293 | ||||
-rw-r--r-- | lib/csv/row.rb | 229 | ||||
-rw-r--r-- | lib/csv/table.rb | 626 | ||||
-rw-r--r-- | lib/csv/version.rb | 2 | ||||
-rw-r--r-- | lib/csv/writer.rb | 10 |
7 files changed, 926 insertions, 254 deletions
@@ -44,7 +44,7 @@ class CSV @converters.empty? end - def convert(fields, headers, lineno) return fields unless need_convert? fields.collect.with_index do |field, index| @@ -63,7 +63,8 @@ class CSV else header = nil end - field = converter[field, FieldInfo.new(index, lineno, header)] end break unless field.is_a?(String) # short-circuit pipeline for speed end @@ -4,20 +4,7 @@ require "stringio" class CSV module InputRecordSeparator class << self - is_input_record_separator_deprecated = false - verbose, $VERBOSE = $VERBOSE, true - stderr, $stderr = $stderr, StringIO.new - input_record_separator = $INPUT_RECORD_SEPARATOR - begin - $INPUT_RECORD_SEPARATOR = "\r\n" - is_input_record_separator_deprecated = (not $stderr.string.empty?) - ensure - $INPUT_RECORD_SEPARATOR = input_record_separator - $stderr = stderr - $VERBOSE = verbose - end - - if is_input_record_separator_deprecated def value "\n" end @@ -2,15 +2,10 @@ require "strscan" -require_relative "delete_suffix" require_relative "input_record_separator" -require_relative "match_p" require_relative "row" require_relative "table" -using CSV::DeleteSuffix if CSV.const_defined?(:DeleteSuffix) -using CSV::MatchP if CSV.const_defined?(:MatchP) - class CSV # Note: Don't use this class directly. This is an internal class. class Parser @@ -27,6 +22,10 @@ class CSV class InvalidEncoding < StandardError end # # CSV::Scanner receives a CSV output, scans it and return the content. # It also controls the life cycle of the object with its methods +keep_start+, @@ -78,10 +77,10 @@ class CSV # +keep_end+, +keep_back+, +keep_drop+. # # CSV::InputsScanner.scan() tries to match with pattern at the current position. - # If there's a match, the scanner advances the “scan pointer” and returns the matched string. # Otherwise, the scanner returns nil. # - # CSV::InputsScanner.rest() returns the “rest” of the string (i.e. everything after the scan pointer). # If there is no more data (eos? = true), it returns "". # class InputsScanner @@ -96,11 +95,13 @@ class CSV end def each_line(row_separator) buffer = nil input = @scanner.rest position = @scanner.pos offset = 0 n_row_separator_chars = row_separator.size while true input.each_line(row_separator) do |line| @scanner.pos += line.bytesize @@ -140,25 +141,28 @@ class CSV end def scan(pattern) value = @scanner.scan(pattern) return value if @last_scanner - if value - read_chunk if @scanner.eos? - return value - else - nil - end end def scan_all(pattern) value = @scanner.scan(pattern) return value if @last_scanner return nil if value.nil? while @scanner.eos? and read_chunk and (sub_value = @scanner.scan(pattern)) value << sub_value end value end @@ -167,68 +171,126 @@ class CSV end def keep_start - @keeps.push([@scanner.pos, nil]) end def keep_end - start, buffer = @keeps.pop - keep = @scanner.string.byteslice(start, @scanner.pos - start) if buffer buffer << keep keep = buffer end keep end def keep_back - start, buffer = @keeps.pop if buffer string = @scanner.string - keep = string.byteslice(start, string.bytesize - start) if keep and not keep.empty? @inputs.unshift(StringIO.new(keep)) @last_scanner = false end @scanner = StringScanner.new(buffer) else @scanner.pos = start end read_chunk if @scanner.eos? end def keep_drop - @keeps.pop end def rest @scanner.rest end private - def read_chunk - return false if @last_scanner - unless @keeps.empty? - keep = @keeps.last - keep_start = keep[0] - string = @scanner.string - keep_data = string.byteslice(keep_start, @scanner.pos - keep_start) - if keep_data - keep_buffer = keep[1] - if keep_buffer - keep_buffer << keep_data - else - keep[1] = keep_data.dup - end end - keep[0] = 0 end input = @inputs.first case input when StringIO string = input.read raise InvalidEncoding unless string.valid_encoding? @scanner = StringScanner.new(string) @inputs.shift @last_scanner = @inputs.empty? @@ -237,6 +299,7 @@ class CSV chunk = input.gets(@row_separator, @chunk_size) if chunk raise InvalidEncoding unless chunk.valid_encoding? @scanner = StringScanner.new(chunk) if input.respond_to?(:eof?) and input.eof? @inputs.shift @@ -244,6 +307,7 @@ class CSV end true else @scanner = StringScanner.new("".encode(@encoding)) @inputs.shift @last_scanner = @inputs.empty? @@ -278,7 +342,11 @@ class CSV end def field_size_limit - @field_size_limit end def skip_lines @@ -346,6 +414,16 @@ class CSV end message = "Invalid byte sequence in #{@encoding}" raise MalformedCSVError.new(message, lineno) end end @@ -390,7 +468,7 @@ class CSV @backslash_quote = false end @unconverted_fields = @options[:unconverted_fields] - @field_size_limit = @options[:field_size_limit] @skip_blanks = @options[:skip_blanks] @fields_converter = @options[:fields_converter] @header_fields_converter = @options[:header_fields_converter] @@ -680,9 +758,10 @@ class CSV case headers when Array @raw_headers = headers @use_headers = true when String - @raw_headers = parse_headers(headers) @use_headers = true when nil, false @raw_headers = nil @@ -692,21 +771,28 @@ class CSV @use_headers = true end if @raw_headers - @headers = adjust_headers(@raw_headers) else @headers = nil end end def parse_headers(row) - CSV.parse_line(row, - col_sep: @column_separator, - row_sep: @row_separator, - quote_char: @quote_character) end - def adjust_headers(headers) - adjusted_headers = @header_fields_converter.convert(headers, nil, @lineno) adjusted_headers.each {|h| h.freeze if h.is_a? String} adjusted_headers end @@ -729,28 +815,28 @@ class CSV sample[0, 128].index(@quote_character) end - SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes") - if SCANNER_TEST - class UnoptimizedStringIO - def initialize(string) - @io = StringIO.new(string, "rb:#{string.encoding}") - end - def gets(*args) - @io.gets(*args) - end - def each_line(*args, &block) - @io.each_line(*args, &block) - end - def eof? - @io.eof? - end end - SCANNER_TEST_CHUNK_SIZE = - Integer((ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"), 10) def build_scanner inputs = @samples.collect do |sample| UnoptimizedStringIO.new(sample) @@ -760,10 +846,17 @@ class CSV else inputs << @input end InputsScanner.new(inputs, @encoding, @row_separator, - chunk_size: SCANNER_TEST_CHUNK_SIZE) end else def build_scanner @@ -826,6 +919,14 @@ class CSV end end def parse_no_quote(&block) @scanner.each_line(@row_separator) do |line| next if @skip_lines and skip_line?(line) @@ -835,9 +936,16 @@ class CSV if line.empty? next if @skip_blanks row = [] else line = strip_value(line) row = line.split(@split_column_separator, -1) n_columns = row.size i = 0 while i < n_columns @@ -846,7 +954,7 @@ class CSV end end @last_line = original_line - emit_row(row, &block) end end @@ -868,31 +976,37 @@ class CSV next end row = [] elsif line.include?(@cr) or line.include?(@lf) @scanner.keep_back @need_robust_parsing = true return parse_quotable_robust(&block) else row = line.split(@split_column_separator, -1) n_columns = row.size i = 0 while i < n_columns column = row[i] if column.empty? row[i] = nil else n_quotes = column.count(@quote_character) if n_quotes.zero? # no quote elsif n_quotes == 2 and column.start_with?(@quote_character) and column.end_with?(@quote_character) row[i] = column[1..-2] else @scanner.keep_back @need_robust_parsing = true return parse_quotable_robust(&block) end end i += 1 end @@ -900,13 +1014,14 @@ class CSV @scanner.keep_drop @scanner.keep_start @last_line = original_line - emit_row(row, &block) end @scanner.keep_drop end def parse_quotable_robust(&block) row = [] skip_needless_lines start_row while true @@ -916,32 +1031,39 @@ class CSV value = parse_column_value if value @scanner.scan_all(@strip_value) if @strip_value - if @field_size_limit and value.size >= @field_size_limit - ignore_broken_line - raise MalformedCSVError.new("Field size exceeded", @lineno) - end end if parse_column_end row << value elsif parse_row_end if row.empty? and value.nil? - emit_row([], &block) unless @skip_blanks else row << value - emit_row(row, &block) row = [] end skip_needless_lines start_row elsif @scanner.eos? break if row.empty? and value.nil? row << value - emit_row(row, &block) break else if @quoted_column_value ignore_broken_line - message = "Any value after quoted field isn't allowed" raise MalformedCSVError.new(message, @lineno) elsif @unquoted_column_value and (new_line = @scanner.scan(@line_end)) @@ -1034,7 +1156,7 @@ class CSV if (n_quotes % 2).zero? quotes[0, (n_quotes - 2) / 2] else - value = quotes[0, (n_quotes - 1) / 2] while true quoted_value = @scanner.scan_all(@quoted_value) value << quoted_value if quoted_value @@ -1058,11 +1180,9 @@ class CSV n_quotes = quotes.size if n_quotes == 1 break - elsif (n_quotes % 2) == 1 - value << quotes[0, (n_quotes - 1) / 2] - break else value << quotes[0, n_quotes / 2] end end value @@ -1098,18 +1218,15 @@ class CSV def strip_value(value) return value unless @strip - return nil if value.nil? case @strip when String - size = value.size - while value.start_with?(@strip) - size -= 1 - value = value[1, size] end - while value.end_with?(@strip) - size -= 1 - value = value[0, size] end else value.strip! @@ -1132,22 +1249,22 @@ class CSV @scanner.keep_start end - def emit_row(row, &block) @lineno += 1 raw_row = row if @use_headers if @headers.nil? - @headers = adjust_headers(row) return unless @return_headers row = Row.new(@headers, row, true) else row = Row.new(@headers, - @fields_converter.convert(raw_row, @headers, @lineno)) end else # convert fields, if needed... - row = @fields_converter.convert(raw_row, nil, @lineno) end # inject unconverted fields and accessor, if requested... @@ -3,30 +3,105 @@ require "forwardable" class CSV # - # A CSV::Row is part Array and part Hash. It retains an order for the fields - # and allows duplicates just as an Array would, but also allows you to access - # fields by name just as you could if they were in a Hash. # - # All rows returned by CSV will be constructed from this class, if header row - # processing is activated. # class Row - # - # Constructs a new CSV::Row from +headers+ and +fields+, which are expected - # to be Arrays. If one Array is shorter than the other, it will be padded - # with +nil+ objects. - # - # The optional +header_row+ parameter can be set to +true+ to indicate, via - # CSV::Row.header_row?() and CSV::Row.field_row?(), that this is a header - # row. Otherwise, the row assumes to be a field row. - # - # A CSV::Row object supports the following Array methods through delegation: - # - # * empty?() - # * length() - # * size() - # def initialize(headers, fields, header_row = false) @header_row = header_row headers.each { |h| h.freeze if h.is_a? String } @@ -48,6 +123,10 @@ class CSV extend Forwardable def_delegators :@row, :empty?, :length, :size def initialize_copy(other) super_return_value = super @row = @row.collect(&:dup) @@ -71,7 +150,7 @@ class CSV end # :call-seq: - # row.headers # # Returns the headers for this row: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" @@ -83,9 +162,9 @@ class CSV end # :call-seq: - # field(index) - # field(header) - # field(header, offset) # # Returns the field value for the given +index+ or +header+. # @@ -137,9 +216,9 @@ class CSV # # :call-seq: - # fetch(header) - # fetch(header, default) - # fetch(header) {|row| ... } # # Returns the field value as specified by +header+. # @@ -193,7 +272,7 @@ class CSV end # :call-seq: - # row.has_key?(header) # # Returns +true+ if there is a field with the given +header+, # +false+ otherwise. @@ -320,7 +399,7 @@ class CSV end # :call-seq: - # row.push(*values) ->self # # Appends each of the given +values+ to +self+ as a field; returns +self+: # source = "Name,Name,Name\nFoo,Bar,Baz\n" @@ -403,7 +482,7 @@ class CSV end # :call-seq: - # self.fields(*specifiers) # # Returns field values per the given +specifiers+, which may be any mixture of: # - \Integer index. @@ -471,15 +550,26 @@ class CSV end alias_method :values_at, :fields - # # :call-seq: - # index( header ) - # index( header, offset ) # - # This method will return the index of a field with the provided +header+. - # The +offset+ can be used to locate duplicate header names, as described in - # CSV::Row.field(). # def index(header, minimum_index = 0) # find the pair index = headers[minimum_index..-1].index(header) @@ -487,24 +577,36 @@ class CSV index.nil? ? nil : index + minimum_index end # - # Returns +true+ if +data+ matches a field in this row, and +false+ - # otherwise. - # def field?(data) fields.include? data end include Enumerable # - # Yields each pair of the row as header and field tuples (much like - # iterating over a Hash). This method returns the row for chaining. - # - # If no block is given, an Enumerator is returned. - # - # Support for Enumerable. # def each(&block) return enum_for(__method__) { size } unless block_given? @@ -515,10 +617,19 @@ class CSV alias_method :each_pair, :each # - # Returns +true+ if this row contains the same headers and fields in the - # same order as +other+. - # def ==(other) return @row == other.row if other.is_a? CSV::Row @row == other @@ -548,9 +659,31 @@ class CSV end alias_method :to_hash, :to_h alias_method :to_ary, :to_a # :call-seq: # row.to_csv -> csv_string # # Returns the row as a \CSV String. Headers are not included: @@ -3,31 +3,199 @@ require "forwardable" class CSV # - # A CSV::Table is a two-dimensional data structure for representing CSV - # documents. Tables allow you to work with the data by row or column, - # manipulate the data, and even convert the results back to CSV, if needed. # - # All tables returned by CSV will be constructed from this class, if header - # row processing is activated. # class Table # - # Constructs a new CSV::Table from +array_of_rows+, which are expected - # to be CSV::Row objects. All rows are assumed to have the same headers. # - # The optional +headers+ parameter can be set to Array of headers. - # If headers aren't set, headers are fetched from CSV::Row objects. - # Otherwise, headers() method will return headers being set in - # headers argument. # - # A CSV::Table object supports the following Array methods through - # delegation: # - # * empty?() - # * length() - # * size() # def initialize(array_of_rows, headers: nil) @table = array_of_rows @headers = headers @@ -54,88 +222,141 @@ class CSV extend Forwardable def_delegators :@table, :empty?, :length, :size # - # Returns a duplicate table object, in column mode. This is handy for - # chaining in a single call without changing the table mode, but be aware - # that this method can consume a fair amount of memory for bigger data sets. # - # This method returns the duplicate table for chaining. Don't chain - # destructive methods (like []=()) this way though, since you are working - # with a duplicate. # def by_col self.class.new(@table.dup).by_col! end # - # Switches the mode of this table to column mode. All calls to indexing and - # iteration methods will work with columns until the mode is changed again. - # - # This method returns the table and is safe to chain. - # def by_col! @mode = :col self end # - # Returns a duplicate table object, in mixed mode. This is handy for - # chaining in a single call without changing the table mode, but be aware - # that this method can consume a fair amount of memory for bigger data sets. # - # This method returns the duplicate table for chaining. Don't chain - # destructive methods (like []=()) this way though, since you are working - # with a duplicate. # def by_col_or_row self.class.new(@table.dup).by_col_or_row! end # - # Switches the mode of this table to mixed mode. All calls to indexing and - # iteration methods will use the default intelligent indexing system until - # the mode is changed again. In mixed mode an index is assumed to be a row - # reference while anything else is assumed to be column access by headers. - # - # This method returns the table and is safe to chain. - # def by_col_or_row! @mode = :col_or_row self end # - # Returns a duplicate table object, in row mode. This is handy for chaining - # in a single call without changing the table mode, but be aware that this - # method can consume a fair amount of memory for bigger data sets. # - # This method returns the duplicate table for chaining. Don't chain - # destructive methods (like []=()) this way though, since you are working - # with a duplicate. # def by_row self.class.new(@table.dup).by_row! end # - # Switches the mode of this table to row mode. All calls to indexing and - # iteration methods will work with rows until the mode is changed again. - # - # This method returns the table and is safe to chain. - # def by_row! @mode = :row self end # - # Returns the headers for the first row of this table (assumed to match all - # other rows). The headers Array passed to CSV::Table.new is returned for - # empty tables. # def headers if @table.empty? @headers.dup @@ -145,17 +366,21 @@ class CSV end # :call-seq: - # table[n] -> row - # table[range] -> array_of_rows - # table[header] -> array_of_fields # # Returns data from the table; does not modify the table. # # --- # - # The expression <tt>table[n]</tt>, where +n+ is a non-negative \Integer, - # returns the +n+th row of the table, if that row exists, - # and if the access mode is <tt>:row</tt> or <tt>:col_or_row</tt>: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.by_row! # => #<CSV::Table mode:row row_count:4> @@ -168,20 +393,45 @@ class CSV # # Returns +nil+ if +n+ is too large or too small: # table[4] # => nil - # table[-4] => nil # # Raises an exception if the access mode is <tt>:row</tt> - # and +n+ is not an - # {Integer-convertible object}[rdoc-ref:implicit_conversion.rdoc@Integer-Convertible+Objects]. # table.by_row! # => #<CSV::Table mode:row row_count:4> # # Raises TypeError (no implicit conversion of String into Integer): # table['Name'] # # --- # - # The expression <tt>table[range]</tt>, where +range+ is a Range object, - # returns rows from the table, beginning at row <tt>range.first</tt>, - # if those rows exist, and if the access mode is <tt>:row</tt> or <tt>:col_or_row</tt>: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.by_row! # => #<CSV::Table mode:row row_count:4> @@ -191,11 +441,11 @@ class CSV # rows = table[1..2] # => #<CSV::Row "Name":"bar" "Value":"1"> # rows # => [#<CSV::Row "Name":"bar" "Value":"1">, #<CSV::Row "Name":"baz" "Value":"2">] # - # If there are too few rows, returns all from <tt>range.first</tt> to the end: # rows = table[1..50] # => #<CSV::Row "Name":"bar" "Value":"1"> # rows # => [#<CSV::Row "Name":"bar" "Value":"1">, #<CSV::Row "Name":"baz" "Value":"2">] # - # Special case: if <tt>range.start == table.size</tt>, returns an empty \Array: # table[table.size..50] # => [] # # If <tt>range.end</tt> is negative, calculates the ending index from the end: @@ -211,9 +461,41 @@ class CSV # # --- # - # The expression <tt>table[header]</tt>, where +header+ is a \String, - # returns column values (\Array of \Strings) if the column exists - # and if the access mode is <tt>:col</tt> or <tt>:col_or_row</tt>: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.by_col! # => #<CSV::Table mode:col row_count:4> @@ -238,22 +520,132 @@ class CSV end end # - # In the default mixed mode, this method assigns rows for index access and - # columns for header access. You can force the index association by first - # calling by_col!() or by_row!(). # - # Rows may be set to an Array of values (which will inherit the table's - # headers()) or a CSV::Row. # - # Columns may be set to a single value, which is copied to each row of the - # column, or an Array of values. Arrays of values are assigned to rows top - # to bottom in row major order. Excess values are ignored and if the Array - # does not have a value for each row the extra rows will receive a +nil+. # - # Assigning to an existing column or row clobbers the data. Assigning to - # new columns creates them at the right end of the table. # def []=(index_or_header, value) if @mode == :row or # by index (@mode == :col_or_row and index_or_header.is_a? Integer) @@ -463,6 +855,9 @@ class CSV end end # Removes rows or columns for which the block returns a truthy value; # returns +self+. # @@ -495,9 +890,8 @@ class CSV if @mode == :row or @mode == :col_or_row # by index @table.delete_if(&block) else # by header - deleted = [] headers.each do |header| - deleted << delete(header) if yield([header, self[header]]) end end @@ -506,6 +900,9 @@ class CSV include Enumerable # Calls the block with each row or column; returns +self+. # # When the access mode is <tt>:row</tt> or <tt>:col_or_row</tt>, @@ -534,7 +931,9 @@ class CSV return enum_for(__method__) { @mode == :col ? headers.size : size } unless block_given? if @mode == :col - headers.each { |header| yield([header, self[header]]) } else @table.each(&block) end @@ -542,6 +941,9 @@ class CSV self # for chaining end # Returns +true+ if all each row of +self+ <tt>==</tt> # the corresponding row of +other_table+, otherwise, +false+. # @@ -565,10 +967,14 @@ class CSV @table == other end # - # Returns the table as an Array of Arrays. Headers will be the first row, - # then all of the field rows will follow. - # def to_a array = [headers] @table.each do |row| @@ -578,16 +984,29 @@ class CSV array end # - # Returns the table as a complete CSV String. Headers will be listed first, - # then all of the field rows. # - # This method assumes you want the Table.headers(), unless you explicitly - # pass <tt>:write_headers => false</tt>. # - def to_csv(write_headers: true, **options) array = write_headers ? [headers.to_csv(**options)] : [] - @table.each do |row| array.push(row.fields.to_csv(**options)) unless row.header_row? end @@ -613,9 +1032,24 @@ class CSV end end - # Shows the mode and size of this table in a US-ASCII String. def inspect - "#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>".encode("US-ASCII") end end end @@ -2,5 +2,5 @@ class CSV # The version of the installed library. - VERSION = "3.2.2" end @@ -1,11 +1,8 @@ # frozen_string_literal: true require_relative "input_record_separator" -require_relative "match_p" require_relative "row" -using CSV::MatchP if CSV.const_defined?(:MatchP) - class CSV # Note: Don't use this class directly. This is an internal class. class Writer @@ -42,7 +39,10 @@ class CSV @headers ||= row if @use_headers @lineno += 1 - row = @fields_converter.convert(row, nil, lineno) if @fields_converter i = -1 converted_row = row.collect do |field| @@ -97,7 +97,7 @@ class CSV return unless @headers converter = @options[:header_fields_converter] - @headers = converter.convert(@headers, nil, 0) @headers.each do |header| header.freeze if header.is_a?(String) end |