From df01e3d5f412b95fb509ab0f069d36a5f3e3488f Mon Sep 17 00:00:00 2001 From: Tilo Sloboda Date: Tue, 14 Aug 2018 14:16:33 -0700 Subject: [PATCH 1/8] adding rspec tests from pull request #1 --- spec/people_spec.rb | 252 ++++++++++++++++++++++++++++++++++++++++++++ spec/spec_helper.rb | 1 + 2 files changed, 253 insertions(+) create mode 100644 spec/people_spec.rb create mode 100644 spec/spec_helper.rb diff --git a/spec/people_spec.rb b/spec/people_spec.rb new file mode 100644 index 0000000..0e4c29d --- /dev/null +++ b/spec/people_spec.rb @@ -0,0 +1,252 @@ +require 'spec_helper' + +module Peoplw + describe "Parse standard name variations" do + before( :each ) do + @np = People::NameParser.new + end + + it "should parse first initial, last name" do + name = @np.parse( "M ERICSON" ) + name[:parsed].should be true + name[:parse_type].should == 1 + name[:first].should == "M" + name[:last].should == "Ericson" + end + + it "should parse first initial, middle initial, last name" do + name = @np.parse( "M E ERICSON" ) + name[:parsed].should be true + name[:parse_type].should == 2 + name[:first].should == "M" + name[:middle].should == 'E' + name[:last].should == "Ericson" + end + + it "should parse first initial with period, middle initial with period, last name" do + name = @np.parse( "M.E. ERICSON" ) + name[:parsed].should be true + name[:parse_type].should == 3 + name[:first].should == "M" + name[:middle].should == 'E' + name[:last].should == "Ericson" + end + + it "should parse first initial, two middle initials, last name" do + name = @np.parse( "M E E ERICSON" ) + name[:parsed].should be true + name[:parse_type].should == 4 + name[:first].should == "M" + name[:middle].should == 'E E' + name[:last].should == "Ericson" + end + + it "should parse first initial, middle name, last name" do + name = @np.parse( "M EDWARD ERICSON" ) + name[:parsed].should be true + name[:parse_type].should == 5 + name[:first].should == "M" + name[:middle].should == 'Edward' + name[:last].should == "Ericson" + end + + it "should parse first name, middle initial, last name" do + name = @np.parse( "MATTHEW E ERICSON" ) + name[:parsed].should be true + name[:parse_type].should == 6 + name[:first].should == "Matthew" + name[:middle].should == 'E' + name[:last].should == "Ericson" + end + + it "should parse first name, two middle initials, last name" do + name = @np.parse( "MATTHEW E E ERICSON" ) + name[:parsed].should be true + name[:parse_type].should == 7 + name[:first].should == "Matthew" + name[:middle].should == 'E E' + name[:last].should == "Ericson" + end + + it "should parse first name, two middle initials with periods, last name" do + name = @np.parse( "MATTHEW E.E. ERICSON" ) + name[:parsed].should be true + name[:parse_type].should == 8 + name[:first].should == "Matthew" + name[:middle].should == 'E.E.' + name[:last].should == "Ericson" + end + + it "should parse first name, last name" do + name = @np.parse( "MATTHEW ERICSON" ) + name[:parsed].should be true + name[:parse_type].should == 9 + name[:first].should == "Matthew" + name[:last].should == "Ericson" + end + + it "should parse first name, middle name, last name" do + name = @np.parse( "MATTHEW EDWARD ERICSON" ) + name[:parsed].should be true + name[:parse_type].should == 10 + name[:first].should == "Matthew" + name[:middle].should == 'Edward' + name[:last].should == "Ericson" + end + + it 'does not parse two middle names' do + name = @np.parse( "MATTHEW EDWARD RICHARD ERICSON" ) + name[:parsed].should be false + end + +=begin + it "should parse first name, middle initial, middle name, last name" do + pending( "Doesn't correctly parse two middle names" ) do + name = @np.parse( "MATTHEW E. SHEIE ERICSON" ) + puts name.inspect + name[:parsed].should be true + name[:parse_type].should == 11 + name[:first].should == "Matthew" + name[:middle].should == 'E. Sheie' + name[:last].should == "Ericson" + end + end +=end + + end + + describe "Parse multiple names" do + before( :each ) do + @np = People::NameParser.new( :couples => true ) + end + + it "should parse multiple first names and last name" do + name = @np.parse( "Joe and Jill Hill" ) + name[:parsed].should == true + name[:multiple].should be true + name[:parsed2].should be true + name[:parse_type].should == 9 + name[:first2].should == "Jill" + end + + it "should parse multiple first names, middle initial, last name" do + name = @np.parse( "Joe and Jill S Hill" ) + name[:parsed].should == true + name[:multiple].should be true + name[:parsed2].should be true + name[:parse_type].should == 9 + name[:first2].should == "Jill" + name[:middle2].should == 'S' + end + + it "should parse multiple first names, middle initial, last name" do + name = @np.parse( "Joe S and Jill Hill" ) + name[:parsed].should == true + name[:multiple].should be true + name[:parsed2].should be true + name[:parse_type].should == 6 + name[:first2].should == "Jill" + name[:middle].should == 'S' + end + end + + describe "Parse unusual names" do + before( :each ) do + @np = People::NameParser.new + end + + it "should parse multiple-word last name" do + name = @np.parse( "Matthew De La Hoya" ) + name[:parsed].should be true + name[:parse_type].should == 9 + name[:last].should == "De La Hoya" + end + + it "should parse last name with cammel case" do + name = @np.parse( "Matthew McIntosh" ) + name[:parsed].should be true + name[:parse_type].should == 9 + name[:last].should == "McIntosh" + end + end + + describe "Parse names with decorations" do + before( :each ) do + @np = People::NameParser.new + end + + it "should parse name with the suffix 'Jr'" do + name = @np.parse( "Matthew E Ericson Jr" ) + name[:parsed].should be true + name[:suffix].should == "Jr" + end + + it "should parse name with a roman numeral suffix" do + name = @np.parse( "Matthew E Ericson III" ) + name[:parsed].should be true + name[:suffix].should == "III" + end + +# it "should parse name with an ordinal suffix" do +# name = @np.parse( "Matthew E Ericson 2nd" ) +# name[:parsed].should be true +# name[:suffix].should == "2nd" +# end + + it "should parse name with a suffix with periods" do + name = @np.parse( "Matthew E Ericson M.D." ) + name[:parsed].should be true + name[:suffix].should == "M.D." + end + + it "should parse name with a title" do + name = @np.parse( "Mr Matthew E Ericson" ) + name[:parsed].should be true + name[:title].should == "Mr " + end + + it "should parse name with a title with a period" do + name = @np.parse( "Mr. Matthew E Ericson" ) + name[:parsed].should be true + name[:title].should == "Mr. " + end + + it "should parse name with a title, first initial" do + name = @np.parse( "Rabbi M Edward Ericson" ) + name[:parsed].should be true + name[:parse_type].should == 5 + name[:title].should == "Rabbi " + name[:first].should == 'M' + end + + it "should parse 1950s married couple name" do + name = @np.parse( "Mr. and Mrs. Matthew E Ericson" ) + name[:parsed].should be true + name[:title].should == "Mr. And Mrs. " + name[:first].should == "Matthew" + end + end + + describe "Name case options" do + it "should change upper case to proper case" do + proper_np = People::NameParser.new( :case_mode => 'proper' ) + name = proper_np.parse( "MATTHEW ERICSON" ) + name[:first].should == "Matthew" + name[:last].should == "Ericson" + end + + it "should change proper case to upper case" do + proper_np = People::NameParser.new( :case_mode => 'upper' ) + name = proper_np.parse( "Matthew Ericson" ) + name[:first].should == "MATTHEW" + name[:last].should == "ERICSON" + end + + it "should leave case as is" do + proper_np = People::NameParser.new( :case_mode => 'leave' ) + name = proper_np.parse( "mATTHEW eRicSon" ) + name[:first].should == "mATTHEW" + name[:last].should == "eRicSon" + end + end +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb new file mode 100644 index 0000000..bc10b6d --- /dev/null +++ b/spec/spec_helper.rb @@ -0,0 +1 @@ +require 'people' From 25e01e928849b6c730da6df7d4e48ce1cef42e5b Mon Sep 17 00:00:00 2001 From: Tilo Sloboda Date: Tue, 14 Aug 2018 14:51:22 -0700 Subject: [PATCH 2/8] updating the gemspec file to newer format; deleting empty test directory; converting RSpec tests to 3.0; updated Rakefile; moved version to standard location --- Gemfile | 4 + Gemfile.lock | 40 +++++++++ Rakefile | 24 ++++- VERSION.yml | 4 - lib/people.rb | 2 + lib/people/version.rb | 3 + people.gemspec | 48 ++++------ spec/people_spec.rb | 204 +++++++++++++++++++++--------------------- test/people_test.rb | 7 -- test/test_helper.rb | 10 --- 10 files changed, 186 insertions(+), 160 deletions(-) create mode 100644 Gemfile create mode 100644 Gemfile.lock delete mode 100644 VERSION.yml create mode 100644 lib/people/version.rb delete mode 100644 test/people_test.rb delete mode 100644 test/test_helper.rb diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..953f5c6 --- /dev/null +++ b/Gemfile @@ -0,0 +1,4 @@ + +source "https://rubygems.org" + +gemspec diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000..21f7dff --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,40 @@ +PATH + remote: . + specs: + people (0.2.1) + +GEM + remote: https://rubygems.org/ + specs: + coderay (1.1.2) + diff-lcs (1.3) + method_source (0.9.0) + pry (0.11.3) + coderay (~> 1.1.0) + method_source (~> 0.9.0) + rake (12.3.1) + rspec (3.8.0) + rspec-core (~> 3.8.0) + rspec-expectations (~> 3.8.0) + rspec-mocks (~> 3.8.0) + rspec-core (3.8.0) + rspec-support (~> 3.8.0) + rspec-expectations (3.8.1) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.8.0) + rspec-mocks (3.8.0) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.8.0) + rspec-support (3.8.0) + +PLATFORMS + ruby + +DEPENDENCIES + people! + pry + rake + rspec + +BUNDLED WITH + 1.16.3 diff --git a/Rakefile b/Rakefile index 3898919..568cb91 100644 --- a/Rakefile +++ b/Rakefile @@ -1,6 +1,23 @@ +#!/usr/bin/env rake +require "bundler/gem_tasks" + require 'rubygems' require 'rake' +require 'rspec/core/rake_task' + +desc "Run RSpec" +RSpec::Core::RakeTask.new do |t| + t.verbose = false +end + +desc "Run specs for all test cases" +task :spec_all do + system "rake spec" +end + +task :default => :spec + begin require 'jeweler' Jeweler::Tasks.new do |gem| @@ -37,11 +54,10 @@ rescue LoadError end -task :default => :test - -require 'rake/rdoctask' -Rake::RDocTask.new do |rdoc| +require 'rdoc/task' +RDoc::Task.new do |rdoc| if File.exist?('VERSION.yml') + require 'yaml' config = YAML.load(File.read('VERSION.yml')) version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}" else diff --git a/VERSION.yml b/VERSION.yml deleted file mode 100644 index f9fa7f6..0000000 --- a/VERSION.yml +++ /dev/null @@ -1,4 +0,0 @@ ---- -:major: 0 -:minor: 2 -:patch: 0 diff --git a/lib/people.rb b/lib/people.rb index f11783e..891c941 100644 --- a/lib/people.rb +++ b/lib/people.rb @@ -1,3 +1,5 @@ +require 'people/version' + module People # Class to parse names into their components like first, middle, last, etc. diff --git a/lib/people/version.rb b/lib/people/version.rb new file mode 100644 index 0000000..030893b --- /dev/null +++ b/lib/people/version.rb @@ -0,0 +1,3 @@ +module People + VERSION = '0.2.1' +end \ No newline at end of file diff --git a/people.gemspec b/people.gemspec index ac834c1..d21fde3 100644 --- a/people.gemspec +++ b/people.gemspec @@ -2,48 +2,30 @@ # DO NOT EDIT THIS FILE # Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec` # -*- encoding: utf-8 -*- +require File.expand_path('../lib/people/version', __FILE__) Gem::Specification.new do |s| - s.name = %q{people} - s.version = "0.2.0" + s.name ='people' + s.version = People::VERSION - s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version= - s.authors = ["Matthew Ericson"] - s.date = %q{2010-10-29} - s.email = %q{mericson@ericson.net} + s.authors = ['Matthew Ericson'] + s.date = '2010-10-29' + s.email = 'mericson@ericson.net' s.extra_rdoc_files = [ "LICENSE", "README.rdoc" ] - s.files = [ - ".document", - ".gitignore", - "LICENSE", - "README.rdoc", - "Rakefile", - "VERSION.yml", - "lib/people.rb", - "people.gemspec", - "test/people_test.rb", - "test/test_helper.rb" - ] - s.homepage = %q{http://github.com/mericson/people} + s.files = `git ls-files`.split($\) + s.executables = s.files.grep(%r{^bin/}).map{ |f| File.basename(f) } + s.test_files = s.files.grep(%r{^(test|spec|features)/}) + + s.homepage = 'http://github.com/mericson/people' s.rdoc_options = ["--charset=UTF-8"] s.require_paths = ["lib"] s.rubygems_version = %q{1.3.7} - s.summary = %q{Matts Name Parser} - s.test_files = [ - "test/people_test.rb", - "test/test_helper.rb" - ] - - if s.respond_to? :specification_version then - current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION - s.specification_version = 3 + s.summary = 'Matts Name Parser' - if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then - else - end - else - end + s.add_development_dependency 'pry' + s.add_development_dependency 'rake' + s.add_development_dependency 'rspec' end diff --git a/spec/people_spec.rb b/spec/people_spec.rb index 0e4c29d..394457d 100644 --- a/spec/people_spec.rb +++ b/spec/people_spec.rb @@ -8,95 +8,95 @@ module Peoplw it "should parse first initial, last name" do name = @np.parse( "M ERICSON" ) - name[:parsed].should be true - name[:parse_type].should == 1 - name[:first].should == "M" - name[:last].should == "Ericson" + expect(name[:parsed]).to be true + expect(name[:parse_type]).to eq 1 + expect(name[:first]).to eq "M" + expect(name[:last]).to eq "Ericson" end it "should parse first initial, middle initial, last name" do name = @np.parse( "M E ERICSON" ) - name[:parsed].should be true - name[:parse_type].should == 2 - name[:first].should == "M" - name[:middle].should == 'E' - name[:last].should == "Ericson" + expect(name[:parsed]).to be true + expect(name[:parse_type]).to eq 2 + expect(name[:first]).to eq "M" + expect(name[:middle]).to eq 'E' + expect(name[:last]).to eq "Ericson" end it "should parse first initial with period, middle initial with period, last name" do name = @np.parse( "M.E. ERICSON" ) - name[:parsed].should be true - name[:parse_type].should == 3 - name[:first].should == "M" - name[:middle].should == 'E' - name[:last].should == "Ericson" + expect(name[:parsed]).to be true + expect(name[:parse_type]).to eq 3 + expect(name[:first]).to eq "M" + expect(name[:middle]).to eq 'E' + expect(name[:last]).to eq "Ericson" end it "should parse first initial, two middle initials, last name" do name = @np.parse( "M E E ERICSON" ) - name[:parsed].should be true - name[:parse_type].should == 4 - name[:first].should == "M" - name[:middle].should == 'E E' - name[:last].should == "Ericson" + expect(name[:parsed]).to be true + expect(name[:parse_type]).to eq 4 + expect(name[:first]).to eq "M" + expect(name[:middle]).to eq 'E E' + expect(name[:last]).to eq "Ericson" end it "should parse first initial, middle name, last name" do name = @np.parse( "M EDWARD ERICSON" ) - name[:parsed].should be true - name[:parse_type].should == 5 - name[:first].should == "M" - name[:middle].should == 'Edward' - name[:last].should == "Ericson" + expect(name[:parsed]).to be true + expect(name[:parse_type]).to eq 5 + expect(name[:first]).to eq "M" + expect(name[:middle]).to eq 'Edward' + expect(name[:last]).to eq "Ericson" end it "should parse first name, middle initial, last name" do name = @np.parse( "MATTHEW E ERICSON" ) - name[:parsed].should be true - name[:parse_type].should == 6 - name[:first].should == "Matthew" - name[:middle].should == 'E' - name[:last].should == "Ericson" + expect(name[:parsed]).to be true + expect(name[:parse_type]).to eq 6 + expect(name[:first]).to eq "Matthew" + expect(name[:middle]).to eq 'E' + expect(name[:last]).to eq "Ericson" end it "should parse first name, two middle initials, last name" do name = @np.parse( "MATTHEW E E ERICSON" ) - name[:parsed].should be true - name[:parse_type].should == 7 - name[:first].should == "Matthew" - name[:middle].should == 'E E' - name[:last].should == "Ericson" + expect(name[:parsed]).to be true + expect(name[:parse_type]).to eq 7 + expect(name[:first]).to eq "Matthew" + expect(name[:middle]).to eq 'E E' + expect(name[:last]).to eq "Ericson" end it "should parse first name, two middle initials with periods, last name" do name = @np.parse( "MATTHEW E.E. ERICSON" ) - name[:parsed].should be true - name[:parse_type].should == 8 - name[:first].should == "Matthew" - name[:middle].should == 'E.E.' - name[:last].should == "Ericson" + expect(name[:parsed]).to be true + expect(name[:parse_type]).to eq 8 + expect(name[:first]).to eq "Matthew" + expect(name[:middle]).to eq 'E.E.' + expect(name[:last]).to eq "Ericson" end it "should parse first name, last name" do name = @np.parse( "MATTHEW ERICSON" ) - name[:parsed].should be true - name[:parse_type].should == 9 - name[:first].should == "Matthew" - name[:last].should == "Ericson" + expect(name[:parsed]).to be true + expect(name[:parse_type]).to eq 9 + expect(name[:first]).to eq "Matthew" + expect(name[:last]).to eq "Ericson" end it "should parse first name, middle name, last name" do name = @np.parse( "MATTHEW EDWARD ERICSON" ) - name[:parsed].should be true - name[:parse_type].should == 10 - name[:first].should == "Matthew" - name[:middle].should == 'Edward' - name[:last].should == "Ericson" + expect(name[:parsed]).to be true + expect(name[:parse_type]).to eq 10 + expect(name[:first]).to eq "Matthew" + expect(name[:middle]).to eq 'Edward' + expect(name[:last]).to eq "Ericson" end it 'does not parse two middle names' do name = @np.parse( "MATTHEW EDWARD RICHARD ERICSON" ) - name[:parsed].should be false + expect(name[:parsed]).to be false end =begin @@ -104,11 +104,11 @@ module Peoplw pending( "Doesn't correctly parse two middle names" ) do name = @np.parse( "MATTHEW E. SHEIE ERICSON" ) puts name.inspect - name[:parsed].should be true - name[:parse_type].should == 11 - name[:first].should == "Matthew" - name[:middle].should == 'E. Sheie' - name[:last].should == "Ericson" + expect( name[:parsed]).to be true + expect( name[:parse_type]).to eq 11 + expect( name[:first]).to eq "Matthew" + expect( name[:middle]).to eq 'E. Sheie' + expect( name[:last]).to eq "Ericson" end end =end @@ -122,31 +122,31 @@ module Peoplw it "should parse multiple first names and last name" do name = @np.parse( "Joe and Jill Hill" ) - name[:parsed].should == true - name[:multiple].should be true - name[:parsed2].should be true - name[:parse_type].should == 9 - name[:first2].should == "Jill" + expect(name[:parsed]).to eq true + expect(name[:multiple]).to be true + expect(name[:parsed2]).to be true + expect(name[:parse_type]).to eq 9 + expect(name[:first2]).to eq "Jill" end it "should parse multiple first names, middle initial, last name" do name = @np.parse( "Joe and Jill S Hill" ) - name[:parsed].should == true - name[:multiple].should be true - name[:parsed2].should be true - name[:parse_type].should == 9 - name[:first2].should == "Jill" - name[:middle2].should == 'S' + expect(name[:parsed]).to eq true + expect(name[:multiple]).to be true + expect(name[:parsed2]).to be true + expect(name[:parse_type]).to eq 9 + expect(name[:first2]).to eq "Jill" + expect(name[:middle2]).to eq 'S' end it "should parse multiple first names, middle initial, last name" do name = @np.parse( "Joe S and Jill Hill" ) - name[:parsed].should == true - name[:multiple].should be true - name[:parsed2].should be true - name[:parse_type].should == 6 - name[:first2].should == "Jill" - name[:middle].should == 'S' + expect(name[:parsed]).to eq true + expect(name[:multiple]).to be true + expect(name[:parsed2]).to be true + expect(name[:parse_type]).to eq 6 + expect(name[:first2]).to eq "Jill" + expect(name[:middle]).to eq 'S' end end @@ -157,16 +157,16 @@ module Peoplw it "should parse multiple-word last name" do name = @np.parse( "Matthew De La Hoya" ) - name[:parsed].should be true - name[:parse_type].should == 9 - name[:last].should == "De La Hoya" + expect(name[:parsed]).to be true + expect(name[:parse_type]).to eq 9 + expect(name[:last]).to eq "De La Hoya" end it "should parse last name with cammel case" do name = @np.parse( "Matthew McIntosh" ) - name[:parsed].should be true - name[:parse_type].should == 9 - name[:last].should == "McIntosh" + expect(name[:parsed]).to be true + expect(name[:parse_type]).to eq 9 + expect(name[:last]).to eq "McIntosh" end end @@ -177,53 +177,53 @@ module Peoplw it "should parse name with the suffix 'Jr'" do name = @np.parse( "Matthew E Ericson Jr" ) - name[:parsed].should be true - name[:suffix].should == "Jr" + expect(name[:parsed]).to be true + expect(name[:suffix]).to eq "Jr" end it "should parse name with a roman numeral suffix" do name = @np.parse( "Matthew E Ericson III" ) - name[:parsed].should be true - name[:suffix].should == "III" + expect(name[:parsed]).to be true + expect(name[:suffix]).to eq "III" end # it "should parse name with an ordinal suffix" do # name = @np.parse( "Matthew E Ericson 2nd" ) -# name[:parsed].should be true -# name[:suffix].should == "2nd" +# expect(name[:parsed]).to be true +# expect(name[:suffix]).to eq "2nd" # end it "should parse name with a suffix with periods" do name = @np.parse( "Matthew E Ericson M.D." ) - name[:parsed].should be true - name[:suffix].should == "M.D." + expect(name[:parsed]).to be true + expect(name[:suffix]).to eq "M.D." end it "should parse name with a title" do name = @np.parse( "Mr Matthew E Ericson" ) - name[:parsed].should be true - name[:title].should == "Mr " + expect(name[:parsed]).to be true + expect(name[:title]).to eq "Mr " end it "should parse name with a title with a period" do name = @np.parse( "Mr. Matthew E Ericson" ) - name[:parsed].should be true - name[:title].should == "Mr. " + expect(name[:parsed]).to be true + expect(name[:title]).to eq "Mr. " end it "should parse name with a title, first initial" do name = @np.parse( "Rabbi M Edward Ericson" ) - name[:parsed].should be true - name[:parse_type].should == 5 - name[:title].should == "Rabbi " - name[:first].should == 'M' + expect(name[:parsed]).to be true + expect(name[:parse_type]).to eq 5 + expect(name[:title]).to eq "Rabbi " + expect(name[:first]).to eq 'M' end it "should parse 1950s married couple name" do name = @np.parse( "Mr. and Mrs. Matthew E Ericson" ) - name[:parsed].should be true - name[:title].should == "Mr. And Mrs. " - name[:first].should == "Matthew" + expect(name[:parsed]).to be true + expect(name[:title]).to eq "Mr. And Mrs. " + expect(name[:first]).to eq "Matthew" end end @@ -231,22 +231,22 @@ module Peoplw it "should change upper case to proper case" do proper_np = People::NameParser.new( :case_mode => 'proper' ) name = proper_np.parse( "MATTHEW ERICSON" ) - name[:first].should == "Matthew" - name[:last].should == "Ericson" + expect(name[:first]).to eq "Matthew" + expect(name[:last]).to eq "Ericson" end it "should change proper case to upper case" do proper_np = People::NameParser.new( :case_mode => 'upper' ) name = proper_np.parse( "Matthew Ericson" ) - name[:first].should == "MATTHEW" - name[:last].should == "ERICSON" + expect(name[:first]).to eq "MATTHEW" + expect(name[:last]).to eq "ERICSON" end it "should leave case as is" do proper_np = People::NameParser.new( :case_mode => 'leave' ) name = proper_np.parse( "mATTHEW eRicSon" ) - name[:first].should == "mATTHEW" - name[:last].should == "eRicSon" + expect(name[:first]).to eq "mATTHEW" + expect(name[:last]).to eq "eRicSon" end end end diff --git a/test/people_test.rb b/test/people_test.rb deleted file mode 100644 index 616f036..0000000 --- a/test/people_test.rb +++ /dev/null @@ -1,7 +0,0 @@ -require 'test_helper' - -class PeopleTest < Test::Unit::TestCase - should "probably rename this file and start testing for real" do - flunk "hey buddy, you should probably rename this file and start testing for real" - end -end diff --git a/test/test_helper.rb b/test/test_helper.rb deleted file mode 100644 index f11cc62..0000000 --- a/test/test_helper.rb +++ /dev/null @@ -1,10 +0,0 @@ -require 'rubygems' -require 'test/unit' -require 'shoulda' - -$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib')) -$LOAD_PATH.unshift(File.dirname(__FILE__)) -require 'people' - -class Test::Unit::TestCase -end From ee535d014b16f8623a0e1a4e42ccc588123ddd71 Mon Sep 17 00:00:00 2001 From: Tilo Sloboda Date: Tue, 14 Aug 2018 14:56:35 -0700 Subject: [PATCH 3/8] applying PR-4 --- lib/people.rb | 3 --- 1 file changed, 3 deletions(-) diff --git a/lib/people.rb b/lib/people.rb index 891c941..ef8095f 100644 --- a/lib/people.rb +++ b/lib/people.rb @@ -278,9 +278,6 @@ def parse( name ) :parsed => false, :parse_type => "", - :parsed => false, - :parse_type => "", - :parsed2 => false, :parse_type2 => "", From 622f6076e6891d66cac268cf2afd5048d6664c96 Mon Sep 17 00:00:00 2001 From: Tilo Sloboda Date: Wed, 15 Aug 2018 10:14:12 -0700 Subject: [PATCH 4/8] simplified parsing; added some robustness against UTF-8 characters; added tests --- Gemfile.lock | 4 +- lib/people.rb | 199 ++++++++++++++++++++++++----------- lib/people/version.rb | 2 +- people.gemspec | 1 + spec/people_spec.rb | 236 +++++++++++++++++++++++++++++++++++------- spec/spec_helper.rb | 2 + 6 files changed, 343 insertions(+), 101 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 21f7dff..b4d83f6 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,11 +1,12 @@ PATH remote: . specs: - people (0.2.1) + people (0.3.0) GEM remote: https://rubygems.org/ specs: + awesome_print (1.8.0) coderay (1.1.2) diff-lcs (1.3) method_source (0.9.0) @@ -31,6 +32,7 @@ PLATFORMS ruby DEPENDENCIES + awesome_print people! pry rake diff --git a/lib/people.rb b/lib/people.rb index ef8095f..b2dd02c 100644 --- a/lib/people.rb +++ b/lib/people.rb @@ -3,6 +3,9 @@ module People # Class to parse names into their components like first, middle, last, etc. + + # How it works: https://xkcd.com/208/ + class NameParser attr_reader :seen, :parsed @@ -10,8 +13,7 @@ class NameParser # Creates a name parsing object def initialize( opts={} ) - @name_chars = "A-Za-z0-9\\-\\'" - @nc = @name_chars + @nc = @name_chars = "A-Za-z\\-" @opts = { :strip_mr => true, @@ -126,6 +128,8 @@ def initialize( opts={} ) 'Ph\.?d\.?', 'C\.?P\.?A\.?', + '1st', '2nd', '3rd', '\d+th', # numeric instead of roman + 'XI{1,3}', # 11th, 12th, 13th 'X', # 10th 'IV', # 4th @@ -137,8 +141,8 @@ def initialize( opts={} ) 'D.?M\.?D\.?' # M.D. ]; - @last_name_p = "((;.+)|(((Mc|Mac|Des|Dell[ae]|Del|De La|De Los|Da|Di|Du|La|Le|Lo|St\.|Den|Von|Van|Von Der|Van De[nr]) )?([#{@nc}]+)))"; - @mult_name_p = "((;.+)|(((Mc|Mac|Des|Dell[ae]|Del|De La|De Los|Da|Di|Du|La|Le|Lo|St\.|Den|Von|Van|Von Der|Van De[nr]) )?([#{@nc} ]+)))"; + @last_name_p = "((;.+)|(((Mc|Mac|Des|Dell[ae]|Del|De La|De Los|Da|Di|Du|La|Le|Lo|St\.|Den|Von|Van|V[ao]n De[nr]) )?([#{@nc}]+)))" + @mult_name_p = "((;.+)|(((Mc|Mac|Des|Dell[ae]|Del|De La|De Los|Da|Di|Du|La|Le|Lo|St\.|Den|Von|Van|V[ao]n De[nr]) )?([#{@nc} ]+)))" @seen = 0 @parsed = 0; @@ -257,10 +261,6 @@ def parse( name ) out[:clean] = name - - - - return { :title => "", :first => "", @@ -290,10 +290,9 @@ def parse( name ) def clean( s ) # remove illegal characters - s.gsub!( /[^A-Za-z0-9\-\'\.&\/ \,]/, "" ) - # remove repeating spaces - s.gsub!( / +/, " " ) - s.gsub!( /\s+/, " " ) +# s.gsub!( /[^A-Za-z0-9\-\'\.&\/ \,]/, "" ) # we don't want this, because it's stripping UTF-8 characters + # squish repeating spaces + s.gsub!( /\s+/, ' ' ) s.strip! s @@ -346,93 +345,167 @@ def get_name_parts( name, no_last_name = false ) parsed = false + if name.match( /^([#{@nc}]+) (#{last_name_p})$/i ) + first = $1 + middle = '' + last = $2 + parsed = true + parse_type = 9 + + # MATTHEW E ERICSON + elsif name.match( /^([#{@nc}]+) ([A-Za-z])\.? (#{last_name_p})$/i ) + first = $1 + middle = $2 + last = $3 + parsed = true + parse_type = 6 + +# it would be better to strip-off complicated lastnames first + + elsif /(?((;.+)|(((Mc|Mac|Des|Dell[ae]|Del|De La|De Los|Da|Di|Du|La|Le|Lo|St\.|Den|Von|Van|V[ao]n De[nr]) )?([\p{Word}\-\.]+))))$/i =~ name + last = ln + name.slice!(ln) + name.strip! + parsed = true + parse_type = 0 + + if /^(?[A-Za-z\-\.]+)[\s+\.](?([A-Za-z\-\.]+\s*)+)\s*$/ =~ name + first = fn + middle = mn + end + + # just as a fall-back -- nothing should need this rule + + elsif /^(?[A-Za-z\-\.]+)[\s+\.](?([A-Za-z\-\.]+\s*)+)\s+(?[A-Za-z\-\.]+)$/ =~ name + first = fn + middle = mn + last = ln + parsed = true + parse_type = 99 +=begin # M ERICSON - if name.match( /^([A-Za-z])\.? (#{last_name_p})$/i ) - first = $1; - middle = ''; - last = $2; + elsif name.match( /^([A-Za-z])\.? (#{last_name_p})$/i ) + first = $1 + middle = '' + last = $2 parsed = true - parse_type = 1; + parse_type = 1 # M E ERICSON elsif name.match( /^([A-Za-z])\.? ([A-Za-z])\.? (#{last_name_p})$/i ) - first = $1; - middle = $2; - last = $3; + first = $1 + middle = $2 + last = $3 parsed = true - parse_type = 2; + parse_type = 2 # M.E. ERICSON elsif name.match( /^([A-Za-z])\.([A-Za-z])\. (#{last_name_p})$/i ) - first = $1; - middle = $2; - last = $3; + first = $1 + middle = $2 + last = $3 parsed = true - parse_type = 3; + parse_type = 3 # M E E ERICSON elsif name.match( /^([A-Za-z])\.? ([A-Za-z])\.? ([A-Za-z])\.? (#{last_name_p})$/i ) - first = $1; - middle = $2 + ' ' + $3; - last = $4; + first = $1 + middle = $2 + ' ' + $3 + last = $4 parsed = true - parse_type = 4; + parse_type = 4 # M EDWARD ERICSON elsif name.match( /^([A-Za-z])\.? ([#{@nc}]+) (#{last_name_p})$/i ) - first = $1; - middle = $2; - last = $3; + first = $1 + middle = $2 + last = $3 parsed = true - parse_type = 5; + parse_type = 5 # MATTHEW E ERICSON elsif name.match( /^([#{@nc}]+) ([A-Za-z])\.? (#{last_name_p})$/i ) - first = $1; - middle = $2; - last = $3; + first = $1 + middle = $2 + last = $3 parsed = true - parse_type = 6; - - # MATTHEW E E ERICSON - elsif name.match( /^([#{@nc}]+) ([A-Za-z])\.? ([A-Za-z])\.? (#{last_name_p})$/i ) - first = $1; - middle = $2 + ' ' + $3; - last = $4; - parsed = true - parse_type = 7; + parse_type = 6 # MATTHEW E.E. ERICSON elsif name.match( /^([#{@nc}]+) ([A-Za-z]\.[A-Za-z]\.) (#{last_name_p})$/i ) - first = $1; - middle = $2; - last = $3; + first = $1 + middle = $2 + last = $3 + parsed = true + parse_type = 8 + + # MATTHEW E. E. ERICSON + elsif name.match( /^([#{@nc}]+) ([A-Za-z]\. )+ (#{last_name_p})$/i ) + first = $1 + middle = $2 + last = $3 + parsed = true + parse_type = 15 + + # MATTHEW E E ERICSON + elsif name.match( /^([#{@nc}]+) ([A-Za-z])\.? ([A-Za-z])\.? (#{last_name_p})$/i ) + first = $1 + middle = $2 + ' ' + $3 + last = $4 parsed = true - parse_type = 8; + parse_type = 7 # MATTHEW ERICSON elsif name.match( /^([#{@nc}]+) (#{last_name_p})$/i ) - first = $1; - middle = ''; - last = $2; + first = $1 + middle = '' + last = $2 parsed = true - parse_type = 9; + parse_type = 9 # MATTHEW EDWARD ERICSON elsif name.match( /^([#{@nc}]+) ([#{@nc}]+) (#{last_name_p})$/i ) - first = $1; - middle = $2; - last = $3; + first = $1 + middle = $2 + last = $3 parsed = true - parse_type = 10; + parse_type = 10 # MATTHEW E. SHEIE ERICSON - elsif name.match( /^([#{@nc}]+) ([A-Za-z])\.? ($multNamePat)$/i ) - first = $1; - middle = $2; - last = $3; + elsif name.match( /^([#{@nc}]+) ([A-Za-z])\.? ($mult_name_p)$/i ) + first = $1 + middle = $2 + last = $3 parsed = true - parse_type = 11; + parse_type = 11 + + + + + elsif name.match( /^([#{@nc}]+) ([A-Za-z])\.? (.*)$/i ) + first = $1 + middle = $2 + last = $3 + parsed = true + parse_type = 12 + + # Abaid Ullah A. Choudry + elsif name.match( /^([#{@nc}]+) (.* [A-Za-z])\.? (.*)$/i ) + first = $1 + middle = $2 + last = $3 + parsed = true + parse_type = 13 + + # Abdel Kader El Tal + elsif name.match( /^([#{@nc}]+) ([^ ]*)\.? (.*)$/i ) + first = $1 + middle = $2 + last = $3 + parsed = true + parse_type = 14 +=end + end last.gsub!( /;/, "" ) @@ -449,7 +522,7 @@ def proper ( name ) # Now uppercase first letter of every word. By checking on word boundaries, # we will account for apostrophes (D'Angelo) and hyphenated names - fixed.gsub!( /\b(\w+)/ ) { |m| m.match( /^[ixv]$+/i ) ? m.upcase : m.capitalize } + fixed.gsub!( /\b(\p{Word}+)/ ) { |m| m.match( /^[ixv]$+/i ) ? m.upcase : m.capitalize } # Name case Macs and Mcs # Exclude names with 1-2 letters after prefix like Mack, Macky, Mace diff --git a/lib/people/version.rb b/lib/people/version.rb index 030893b..733527c 100644 --- a/lib/people/version.rb +++ b/lib/people/version.rb @@ -1,3 +1,3 @@ module People - VERSION = '0.2.1' + VERSION = '0.3.0' end \ No newline at end of file diff --git a/people.gemspec b/people.gemspec index d21fde3..a0e0ef5 100644 --- a/people.gemspec +++ b/people.gemspec @@ -28,4 +28,5 @@ Gem::Specification.new do |s| s.add_development_dependency 'pry' s.add_development_dependency 'rake' s.add_development_dependency 'rspec' + s.add_development_dependency 'awesome_print' end diff --git a/spec/people_spec.rb b/spec/people_spec.rb index 394457d..1008c02 100644 --- a/spec/people_spec.rb +++ b/spec/people_spec.rb @@ -1,118 +1,247 @@ require 'spec_helper' -module Peoplw +=begin + +# more test cases -- the ones with "and" might be a bit excotic + +Donald Ericson +Ericson, Donald R. S +Ericson, Matthew +Matthew E. Ericson +Matt Van Ericson +Matthew E. La Ericson +M. Edward Ericson +Matthew and Ben Ericson +Mathew R. and Ben Q. Ericson +Ericson, Matthew R. and Ben Q. +MATTHEW ERICSON +MATTHEW MCDONALD +Mr. Matthew Ericson +Sir Matthew Ericson +Matthew Ericson III +Dr. Matthew Q Ericson IV +Ericson, Mr. Matthew E +Von Ericson, Dr. Matthew Edward +Angel S. Viloria III + +=end + +module People describe "Parse standard name variations" do before( :each ) do @np = People::NameParser.new end + it "should parse first initial" do + name = @np.parse( "M" ) + expect(name[:parsed]).to be true + expect(name[:first]).to eq '' + expect(name[:last]).to eq 'M' + expect(name[:parse_type]).to eq 0 + end + + it "should parse first initial last initial" do + name = @np.parse( "M E" ) + expect(name[:parsed]).to be true + expect(name[:first]).to eq 'M' + expect(name[:last]).to eq 'E' + expect(name[:parse_type]).to eq 9 + end + it "should parse first initial, last name" do name = @np.parse( "M ERICSON" ) expect(name[:parsed]).to be true - expect(name[:parse_type]).to eq 1 expect(name[:first]).to eq "M" expect(name[:last]).to eq "Ericson" + expect(name[:parse_type]).to eq 9 # 1 end it "should parse first initial, middle initial, last name" do name = @np.parse( "M E ERICSON" ) expect(name[:parsed]).to be true - expect(name[:parse_type]).to eq 2 expect(name[:first]).to eq "M" expect(name[:middle]).to eq 'E' expect(name[:last]).to eq "Ericson" + expect(name[:parse_type]).to eq 6 # 2 end it "should parse first initial with period, middle initial with period, last name" do name = @np.parse( "M.E. ERICSON" ) expect(name[:parsed]).to be true - expect(name[:parse_type]).to eq 3 expect(name[:first]).to eq "M" - expect(name[:middle]).to eq 'E' + expect(name[:middle]).to eq 'E.' expect(name[:last]).to eq "Ericson" + expect(name[:parse_type]).to eq 0 # 3 end it "should parse first initial, two middle initials, last name" do name = @np.parse( "M E E ERICSON" ) expect(name[:parsed]).to be true - expect(name[:parse_type]).to eq 4 expect(name[:first]).to eq "M" expect(name[:middle]).to eq 'E E' expect(name[:last]).to eq "Ericson" + expect(name[:parse_type]).to eq 0 # 4 + end + + it "should parse first initial, two middle initials, last name" do + name = @np.parse( "M. E. E. ERICSON" ) + expect(name[:parsed]).to be true + expect(name[:first]).to eq "M." + expect(name[:middle]).to eq 'E. E.' + expect(name[:last]).to eq "Ericson" + expect(name[:parse_type]).to eq 0 # 4 end it "should parse first initial, middle name, last name" do name = @np.parse( "M EDWARD ERICSON" ) expect(name[:parsed]).to be true - expect(name[:parse_type]).to eq 5 expect(name[:first]).to eq "M" expect(name[:middle]).to eq 'Edward' expect(name[:last]).to eq "Ericson" + expect(name[:parse_type]).to eq 0 # 5 end it "should parse first name, middle initial, last name" do name = @np.parse( "MATTHEW E ERICSON" ) expect(name[:parsed]).to be true - expect(name[:parse_type]).to eq 6 expect(name[:first]).to eq "Matthew" expect(name[:middle]).to eq 'E' expect(name[:last]).to eq "Ericson" + expect(name[:parse_type]).to eq 6 end it "should parse first name, two middle initials, last name" do name = @np.parse( "MATTHEW E E ERICSON" ) expect(name[:parsed]).to be true - expect(name[:parse_type]).to eq 7 expect(name[:first]).to eq "Matthew" expect(name[:middle]).to eq 'E E' expect(name[:last]).to eq "Ericson" + expect(name[:parse_type]).to eq 0 # 7 end - it "should parse first name, two middle initials with periods, last name" do + it "should parse first name, two middle initials with periods without space, last name" do name = @np.parse( "MATTHEW E.E. ERICSON" ) expect(name[:parsed]).to be true - expect(name[:parse_type]).to eq 8 expect(name[:first]).to eq "Matthew" expect(name[:middle]).to eq 'E.E.' expect(name[:last]).to eq "Ericson" + expect(name[:parse_type]).to eq 0 # 8 + end + + it "should parse first name, two middle initials with periods, last name" do + name = @np.parse( "MATTHEW A. B. C. ERICSON" ) + expect(name[:parsed]).to be true + expect(name[:first]).to eq "Matthew" + expect(name[:middle]).to eq 'A. B. C.' + expect(name[:last]).to eq "Ericson" + expect(name[:parse_type]).to eq 0 # 8 end it "should parse first name, last name" do name = @np.parse( "MATTHEW ERICSON" ) expect(name[:parsed]).to be true - expect(name[:parse_type]).to eq 9 expect(name[:first]).to eq "Matthew" expect(name[:last]).to eq "Ericson" + expect(name[:parse_type]).to eq 9 + end + + it "should parse compound first name, last name" do + name = @np.parse( "MATTHEW-JOSEPH ERICSON-MILLER" ) + expect(name[:parsed]).to be true + expect(name[:first]).to eq "Matthew-Joseph" + expect(name[:last]).to eq "Ericson-Miller" + expect(name[:parse_type]).to eq 9 end it "should parse first name, middle name, last name" do name = @np.parse( "MATTHEW EDWARD ERICSON" ) expect(name[:parsed]).to be true - expect(name[:parse_type]).to eq 10 expect(name[:first]).to eq "Matthew" expect(name[:middle]).to eq 'Edward' expect(name[:last]).to eq "Ericson" + expect(name[:parse_type]).to eq 0 # 10 end - it 'does not parse two middle names' do + it "parses first name, middle initial, middle name, last name" do + name = @np.parse( "MATTHEW E. SHEIE ERICSON" ) + expect( name[:parsed]).to be true + expect( name[:first]).to eq "Matthew" + expect( name[:middle]).to eq 'E. Sheie' + expect( name[:last]).to eq "Ericson" + expect( name[:parse_type]).to eq 0 # 11 + end + + it "parses first name, middle name, middle initial, last name" do + name = @np.parse( "MATTHEW ERWIN S. ERICSON" ) + expect( name[:parsed]).to be true + expect( name[:first]).to eq "Matthew" + expect( name[:middle]).to eq 'Erwin S.' + expect( name[:last]).to eq "Ericson" + expect( name[:parse_type]).to eq 0 # 13 + end + + it 'parses two middle names' do name = @np.parse( "MATTHEW EDWARD RICHARD ERICSON" ) - expect(name[:parsed]).to be false + expect( name[:parsed]).to be true + expect( name[:first]).to eq "Matthew" + expect( name[:middle]).to eq 'Edward Richard' + expect( name[:last]).to eq "Ericson" + expect( name[:parse_type]).to eq 0 # 11 end -=begin - it "should parse first name, middle initial, middle name, last name" do - pending( "Doesn't correctly parse two middle names" ) do - name = @np.parse( "MATTHEW E. SHEIE ERICSON" ) - puts name.inspect + it 'parses three middle names' do + name = @np.parse( "MATTHEW EDWARD RICHARD MARIA ERICSON" ) expect( name[:parsed]).to be true - expect( name[:parse_type]).to eq 11 expect( name[:first]).to eq "Matthew" - expect( name[:middle]).to eq 'E. Sheie' + expect( name[:middle]).to eq 'Edward Richard Maria' expect( name[:last]).to eq "Ericson" - end + expect( name[:parse_type]).to eq 0 # 11 end -=end + it 'parses two middle names and handles compound names' do + name = @np.parse( "MATTHEW EDWARD RICHARD MARIA-ERICSON" ) + expect( name[:parsed]).to be true + expect( name[:first]).to eq "Matthew" + expect( name[:middle]).to eq 'Edward Richard' + expect( name[:last]).to eq "Maria-Ericson" + expect( name[:parse_type]).to eq 0 # 11 + end + + it 'parses one middle names and handles compound names' do + name = @np.parse( "MATTHEW HANS-WURST MARIA-ERICSON" ) + expect( name[:parsed]).to be true + expect( name[:first]).to eq "Matthew" + expect( name[:middle]).to eq 'Hans-Wurst' + expect( name[:last]).to eq "Maria-Ericson" + expect( name[:parse_type]).to eq 0 # 10 + end + + it "should parse compound first name, last name" do + name = @np.parse( "MATTHEW-JOSEPH HANS-WURST ERICSON-MILLER" ) + expect(name[:parsed]).to be true + expect(name[:first]).to eq "Matthew-Joseph" + expect(name[:middle]).to eq 'Hans-Wurst' + expect(name[:last]).to eq "Ericson-Miller" + expect(name[:parse_type]).to eq 0 # 10 + end + + it 'parses one middle names and handles compound names' do + name = @np.parse( "HANS-WURST MATTHEW MARIA-ERICSON" ) + expect( name[:parsed]).to be true + expect( name[:first]).to eq 'Hans-Wurst' + expect( name[:middle]).to eq "Matthew" + expect( name[:last]).to eq "Maria-Ericson" + expect( name[:parse_type]).to eq 0 # 10 + end + + it 'parses one middle names and handles compound names' do + name = @np.parse( "HANS WURST MATTHEW MARIA DE LA CULPA-GARCÍA" ) + expect( name[:parsed]).to be true + expect( name[:last]).to eq "De La Culpa-García" + expect( name[:middle]).to eq 'Wurst Matthew Maria' + expect( name[:first]).to eq 'Hans' + expect( name[:parse_type]).to eq 0 + end end describe "Parse multiple names" do @@ -125,18 +254,29 @@ module Peoplw expect(name[:parsed]).to eq true expect(name[:multiple]).to be true expect(name[:parsed2]).to be true - expect(name[:parse_type]).to eq 9 expect(name[:first2]).to eq "Jill" + expect(name[:parse_type]).to eq 9 end - +=begin + it "should parse multiple first names and last name with UTF-8 characters" do + name = @np.parse("MARÍA AND JOSÉ GARCÍA-D'ANGELO" ) + expect(name[:parsed]).to eq true + expect(name[:multiple]).to be true + expect(name[:parsed2]).to be true + expect(name[:first]).to eq 'María' + expect(name[:first2]).to eq 'José' + expect( name[:last]).to eq "García-D'Angelo" + expect(name[:parse_type]).to eq 9 + end +=end it "should parse multiple first names, middle initial, last name" do name = @np.parse( "Joe and Jill S Hill" ) expect(name[:parsed]).to eq true expect(name[:multiple]).to be true expect(name[:parsed2]).to be true expect(name[:parse_type]).to eq 9 - expect(name[:first2]).to eq "Jill" expect(name[:middle2]).to eq 'S' + expect(name[:first2]).to eq "Jill" end it "should parse multiple first names, middle initial, last name" do @@ -144,9 +284,9 @@ module Peoplw expect(name[:parsed]).to eq true expect(name[:multiple]).to be true expect(name[:parsed2]).to be true - expect(name[:parse_type]).to eq 6 expect(name[:first2]).to eq "Jill" expect(name[:middle]).to eq 'S' + expect(name[:parse_type]).to eq 6 end end @@ -158,15 +298,15 @@ module Peoplw it "should parse multiple-word last name" do name = @np.parse( "Matthew De La Hoya" ) expect(name[:parsed]).to be true - expect(name[:parse_type]).to eq 9 expect(name[:last]).to eq "De La Hoya" + expect(name[:parse_type]).to eq 9 end it "should parse last name with cammel case" do name = @np.parse( "Matthew McIntosh" ) expect(name[:parsed]).to be true - expect(name[:parse_type]).to eq 9 expect(name[:last]).to eq "McIntosh" + expect(name[:parse_type]).to eq 9 end end @@ -187,11 +327,35 @@ module Peoplw expect(name[:suffix]).to eq "III" end -# it "should parse name with an ordinal suffix" do -# name = @np.parse( "Matthew E Ericson 2nd" ) -# expect(name[:parsed]).to be true -# expect(name[:suffix]).to eq "2nd" -# end + it "should parse name with a numerical suffix 1" do + name = @np.parse( "Matthew E Ericson 1st" ) + expect(name[:parsed]).to be true + expect(name[:suffix]).to eq "1st" + end + + it "should parse name with a numerical suffix 2" do + name = @np.parse( "Matthew E Ericson 2nd" ) + expect(name[:parsed]).to be true + expect(name[:suffix]).to eq "2nd" + end + + it "should parse name with a numerical suffix 3" do + name = @np.parse( "Matthew E Ericson 3rd" ) + expect(name[:parsed]).to be true + expect(name[:suffix]).to eq "3rd" + end + + it "should parse name with a numerical suffix 4" do + name = @np.parse( "Matthew E Ericson 4th" ) + expect(name[:parsed]).to be true + expect(name[:suffix]).to eq "4th" + end + + it "should parse name with a numerical suffix 13" do + name = @np.parse( "Matthew E Ericson 13th" ) + expect(name[:parsed]).to be true + expect(name[:suffix]).to eq "13th" + end it "should parse name with a suffix with periods" do name = @np.parse( "Matthew E Ericson M.D." ) @@ -214,9 +378,9 @@ module Peoplw it "should parse name with a title, first initial" do name = @np.parse( "Rabbi M Edward Ericson" ) expect(name[:parsed]).to be true - expect(name[:parse_type]).to eq 5 expect(name[:title]).to eq "Rabbi " expect(name[:first]).to eq 'M' + expect(name[:parse_type]).to eq 0 # 5 end it "should parse 1950s married couple name" do diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index bc10b6d..ae34ca0 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -1 +1,3 @@ require 'people' +require 'pry' +require 'awesome_print' From 9cf74b01b824467f3a4f6a083f9fd4f2b3c9ed54 Mon Sep 17 00:00:00 2001 From: Tilo Sloboda Date: Wed, 15 Aug 2018 10:23:23 -0700 Subject: [PATCH 5/8] removing unnecessary parsing rules --- lib/people.rb | 123 -------------------------------------------- spec/people_spec.rb | 5 +- 2 files changed, 3 insertions(+), 125 deletions(-) diff --git a/lib/people.rb b/lib/people.rb index b2dd02c..23ad606 100644 --- a/lib/people.rb +++ b/lib/people.rb @@ -382,129 +382,6 @@ def get_name_parts( name, no_last_name = false ) last = ln parsed = true parse_type = 99 -=begin - # M ERICSON - elsif name.match( /^([A-Za-z])\.? (#{last_name_p})$/i ) - first = $1 - middle = '' - last = $2 - parsed = true - parse_type = 1 - - # M E ERICSON - elsif name.match( /^([A-Za-z])\.? ([A-Za-z])\.? (#{last_name_p})$/i ) - first = $1 - middle = $2 - last = $3 - parsed = true - parse_type = 2 - - # M.E. ERICSON - elsif name.match( /^([A-Za-z])\.([A-Za-z])\. (#{last_name_p})$/i ) - first = $1 - middle = $2 - last = $3 - parsed = true - parse_type = 3 - - # M E E ERICSON - elsif name.match( /^([A-Za-z])\.? ([A-Za-z])\.? ([A-Za-z])\.? (#{last_name_p})$/i ) - first = $1 - middle = $2 + ' ' + $3 - last = $4 - parsed = true - parse_type = 4 - - # M EDWARD ERICSON - elsif name.match( /^([A-Za-z])\.? ([#{@nc}]+) (#{last_name_p})$/i ) - first = $1 - middle = $2 - last = $3 - parsed = true - parse_type = 5 - - # MATTHEW E ERICSON - elsif name.match( /^([#{@nc}]+) ([A-Za-z])\.? (#{last_name_p})$/i ) - first = $1 - middle = $2 - last = $3 - parsed = true - parse_type = 6 - - # MATTHEW E.E. ERICSON - elsif name.match( /^([#{@nc}]+) ([A-Za-z]\.[A-Za-z]\.) (#{last_name_p})$/i ) - first = $1 - middle = $2 - last = $3 - parsed = true - parse_type = 8 - - # MATTHEW E. E. ERICSON - elsif name.match( /^([#{@nc}]+) ([A-Za-z]\. )+ (#{last_name_p})$/i ) - first = $1 - middle = $2 - last = $3 - parsed = true - parse_type = 15 - - # MATTHEW E E ERICSON - elsif name.match( /^([#{@nc}]+) ([A-Za-z])\.? ([A-Za-z])\.? (#{last_name_p})$/i ) - first = $1 - middle = $2 + ' ' + $3 - last = $4 - parsed = true - parse_type = 7 - - # MATTHEW ERICSON - elsif name.match( /^([#{@nc}]+) (#{last_name_p})$/i ) - first = $1 - middle = '' - last = $2 - parsed = true - parse_type = 9 - - # MATTHEW EDWARD ERICSON - elsif name.match( /^([#{@nc}]+) ([#{@nc}]+) (#{last_name_p})$/i ) - first = $1 - middle = $2 - last = $3 - parsed = true - parse_type = 10 - - # MATTHEW E. SHEIE ERICSON - elsif name.match( /^([#{@nc}]+) ([A-Za-z])\.? ($mult_name_p)$/i ) - first = $1 - middle = $2 - last = $3 - parsed = true - parse_type = 11 - - - - - elsif name.match( /^([#{@nc}]+) ([A-Za-z])\.? (.*)$/i ) - first = $1 - middle = $2 - last = $3 - parsed = true - parse_type = 12 - - # Abaid Ullah A. Choudry - elsif name.match( /^([#{@nc}]+) (.* [A-Za-z])\.? (.*)$/i ) - first = $1 - middle = $2 - last = $3 - parsed = true - parse_type = 13 - - # Abdel Kader El Tal - elsif name.match( /^([#{@nc}]+) ([^ ]*)\.? (.*)$/i ) - first = $1 - middle = $2 - last = $3 - parsed = true - parse_type = 14 -=end end diff --git a/spec/people_spec.rb b/spec/people_spec.rb index 1008c02..2ca8413 100644 --- a/spec/people_spec.rb +++ b/spec/people_spec.rb @@ -259,13 +259,14 @@ module People end =begin it "should parse multiple first names and last name with UTF-8 characters" do - name = @np.parse("MARÍA AND JOSÉ GARCÍA-D'ANGELO" ) + name = @np.parse("MARÍA AND JOSÉ GARCÍA-D'ANGELO III" ) expect(name[:parsed]).to eq true expect(name[:multiple]).to be true expect(name[:parsed2]).to be true expect(name[:first]).to eq 'María' expect(name[:first2]).to eq 'José' - expect( name[:last]).to eq "García-D'Angelo" + expect(name[:last]).to eq "García-D'Angelo" + expect(name[:suffix]).to eq "III" expect(name[:parse_type]).to eq 9 end =end From 89d8e4edbf00846a4598af3e7688e6890416a1f4 Mon Sep 17 00:00:00 2001 From: Tilo Sloboda Date: Wed, 15 Aug 2018 10:30:19 -0700 Subject: [PATCH 6/8] more clean-up --- lib/people.rb | 60 ++++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/lib/people.rb b/lib/people.rb index 23ad606..8d2a53e 100644 --- a/lib/people.rb +++ b/lib/people.rb @@ -154,7 +154,7 @@ def parse( name ) @seen += 1 clean = '' - out = Hash.new( "" ) + out = Hash.new( '' ) out[:orig] = name.dup @@ -172,19 +172,15 @@ def parse( name ) name.gsub!( /Mr\.? \& Mrs\.?/i, "Mr. and Mrs." ) # Flip last and first if contain comma - name.gsub!( /;/, "" ) - name.gsub!( /(.+),(.+)/, "\\2 ;\\1" ) - - - name.gsub!( /,/, "" ) + name.gsub!( /;/, '' ) + name.gsub!( /(.+),(.+)/, "\\2 \\1" ) + name.gsub!( /,/, '' ) name.strip! if @opts[:couples] - name.gsub!( / +and +/i, " \& " ) + name.gsub!( /\s+and\s+/i, ' & ' ) end - - if @opts[:couples] && name.match( /\&/ ) names = name.split( / *& */ ) @@ -208,7 +204,7 @@ def parse( name ) out[:suffix] = get_suffix( a ); a.strip! - a += " " + a += ' ' parts = get_name_parts( a, true ) @@ -220,7 +216,7 @@ def parse( name ) if out[:parsed] && out[:parsed2] out[:multiple] = true else - out = Hash.new( "" ) + out = Hash.new( '' ) end @@ -262,24 +258,24 @@ def parse( name ) out[:clean] = name return { - :title => "", - :first => "", - :middle => "", - :last => "", - :suffix => "", + :title => '', + :first => '', + :middle => '', + :last => '', + :suffix => '', - :title2 => "", - :first2 => "", - :middle2 => "", - :suffix2 => "", + :title2 => '', + :first2 => '', + :middle2 => '', + :suffix2 => '', - :clean => "", + :clean => '', :parsed => false, - :parse_type => "", + :parse_type => '', :parsed2 => false, - :parse_type2 => "", + :parse_type2 => '', :multiple => false }.merge( out ) @@ -290,7 +286,7 @@ def parse( name ) def clean( s ) # remove illegal characters -# s.gsub!( /[^A-Za-z0-9\-\'\.&\/ \,]/, "" ) # we don't want this, because it's stripping UTF-8 characters +# s.gsub!( /[^A-Za-z0-9\-\'\.&\/ \,]/, '' ) # we don't want this, because it's stripping UTF-8 characters # squish repeating spaces s.gsub!( /\s+/, ' ' ) s.strip! @@ -311,7 +307,7 @@ def get_title( name ) end - return "" + return '' end def get_suffix( name ) @@ -326,14 +322,14 @@ def get_suffix( name ) end - return "" + return '' end def get_name_parts( name, no_last_name = false ) - first = "" - middle = "" - last = "" + first = '' + middle = '' + last = '' if no_last_name last_name_p = '' @@ -369,14 +365,14 @@ def get_name_parts( name, no_last_name = false ) parsed = true parse_type = 0 - if /^(?[A-Za-z\-\.]+)[\s+\.](?([A-Za-z\-\.]+\s*)+)\s*$/ =~ name + if /^(?[\p{Word}\-\.]+)[\s+\.](?([\p{Word}\-\.]+\s*)+)\s*$/ =~ name first = fn middle = mn end # just as a fall-back -- nothing should need this rule - elsif /^(?[A-Za-z\-\.]+)[\s+\.](?([A-Za-z\-\.]+\s*)+)\s+(?[A-Za-z\-\.]+)$/ =~ name + elsif /^(?[\p{Word}\-\.]+)[\s+\.](?([\p{Word}\-\.]+\s*)+)\s+(?[\p{Word}\-\.]+)$/ =~ name first = fn middle = mn last = ln @@ -385,7 +381,7 @@ def get_name_parts( name, no_last_name = false ) end - last.gsub!( /;/, "" ) + last.gsub!( /;/, '' ) return [ parsed, parse_type, first, middle, last ]; From 76e33b83de5b122084c89e87f544f63bf5c615b5 Mon Sep 17 00:00:00 2001 From: Tilo Sloboda Date: Wed, 15 Aug 2018 11:44:19 -0700 Subject: [PATCH 7/8] adding more complex test + bugfix --- Rakefile | 3 +++ lib/people.rb | 57 ++++++++++++++++++++++++--------------------- spec/people_spec.rb | 34 +++++++++++++-------------- 3 files changed, 50 insertions(+), 44 deletions(-) diff --git a/Rakefile b/Rakefile index 568cb91..32e834a 100644 --- a/Rakefile +++ b/Rakefile @@ -18,6 +18,8 @@ end task :default => :spec +=begin + begin require 'jeweler' Jeweler::Tasks.new do |gem| @@ -70,3 +72,4 @@ RDoc::Task.new do |rdoc| rdoc.rdoc_files.include('lib/**/*.rb') end +=end diff --git a/lib/people.rb b/lib/people.rb index 8d2a53e..282804e 100644 --- a/lib/people.rb +++ b/lib/people.rb @@ -1,4 +1,4 @@ -require 'people/version' +require 'people/version' unless defined?(People::VERSION) module People @@ -183,9 +183,7 @@ def parse( name ) if @opts[:couples] && name.match( /\&/ ) - names = name.split( / *& */ ) - a = names[0] - b = names[1] + a, b, _rest = name.split( / *& */ ) out[:title2] = get_title( b ); out[:suffix2] = get_suffix( b ); @@ -200,6 +198,7 @@ def parse( name ) out[:middle2] = parts[3] out[:last] = parts[4] + out[:title] = get_title( a ); out[:suffix] = get_suffix( a ); @@ -211,7 +210,7 @@ def parse( name ) out[:parsed] = parts[0] out[:parse_type] = parts[1] out[:first] = parts[2] - out[:middle] = parts[3] + out[:middle] = parts[4] if out[:parsed] && out[:parsed2] out[:multiple] = true @@ -285,8 +284,11 @@ def parse( name ) def clean( s ) + # IMPORTANT: we don't want to remove "illegal" characters, because it's stripping off valid UTF-8 characters +=begin # remove illegal characters -# s.gsub!( /[^A-Za-z0-9\-\'\.&\/ \,]/, '' ) # we don't want this, because it's stripping UTF-8 characters + s.gsub!( /[^A-Za-z0-9\-\'\.&\/ \,]/, '' ) +=end # squish repeating spaces s.gsub!( /\s+/, ' ' ) s.strip! @@ -341,42 +343,43 @@ def get_name_parts( name, no_last_name = false ) parsed = false - if name.match( /^([#{@nc}]+) (#{last_name_p})$/i ) - first = $1 - middle = '' - last = $2 - parsed = true - parse_type = 9 - - # MATTHEW E ERICSON - elsif name.match( /^([#{@nc}]+) ([A-Za-z])\.? (#{last_name_p})$/i ) - first = $1 - middle = $2 - last = $3 - parsed = true - parse_type = 6 - -# it would be better to strip-off complicated lastnames first - - elsif /(?((;.+)|(((Mc|Mac|Des|Dell[ae]|Del|De La|De Los|Da|Di|Du|La|Le|Lo|St\.|Den|Von|Van|V[ao]n De[nr]) )?([\p{Word}\-\.]+))))$/i =~ name + # FIRST strip-off the last name + if /(?((;.+)|(((Mc|Mac|Des|Dell[ae]|Del|De La|De Los|Da|Di|Du|La|Le|Lo|St\.|Den|Von|Van|V[ao]n De[nr]) )?([\p{Word}\-\.\']+))))$/i =~ name last = ln name.slice!(ln) name.strip! parsed = true parse_type = 0 + # THEN analyze the remaining names if /^(?[\p{Word}\-\.]+)[\s+\.](?([\p{Word}\-\.]+\s*)+)\s*$/ =~ name first = fn middle = mn - end - # just as a fall-back -- nothing should need this rule + elsif /^(?[\p{Word}\-\.]+)$/ =~ name + first = fn + + else +# binding.pry + end elsif /^(?[\p{Word}\-\.]+)[\s+\.](?([\p{Word}\-\.]+\s*)+)\s+(?[\p{Word}\-\.]+)$/ =~ name first = fn middle = mn last = ln parsed = true + parse_type = 97 + + # as a fall-back -- try first last + elsif /^(?[\p{Word}\-\.]+)[\s+\.](?[\p{Word}\-\.]+)\s*$/ =~ name + first = fn + last = ln + parsed = true + parse_type = 98 + + elsif /^(?[\p{Word}\-\.]+)\s*$/ =~ name # used for Jack and Jill; parsing the first name only + first = fn + parsed = true parse_type = 99 end @@ -395,7 +398,7 @@ def proper ( name ) # Now uppercase first letter of every word. By checking on word boundaries, # we will account for apostrophes (D'Angelo) and hyphenated names - fixed.gsub!( /\b(\p{Word}+)/ ) { |m| m.match( /^[ixv]$+/i ) ? m.upcase : m.capitalize } + fixed.gsub!( /\b(\p{Word}+)/ ) { |m| m.match( /^[ixv]+$/i ) ? m.upcase : m.capitalize } # Name case Macs and Mcs # Exclude names with 1-2 letters after prefix like Mack, Macky, Mace diff --git a/spec/people_spec.rb b/spec/people_spec.rb index 2ca8413..b6d9dac 100644 --- a/spec/people_spec.rb +++ b/spec/people_spec.rb @@ -45,7 +45,7 @@ module People expect(name[:parsed]).to be true expect(name[:first]).to eq 'M' expect(name[:last]).to eq 'E' - expect(name[:parse_type]).to eq 9 + expect(name[:parse_type]).to eq 0 end it "should parse first initial, last name" do @@ -53,7 +53,7 @@ module People expect(name[:parsed]).to be true expect(name[:first]).to eq "M" expect(name[:last]).to eq "Ericson" - expect(name[:parse_type]).to eq 9 # 1 + expect(name[:parse_type]).to eq 0 end it "should parse first initial, middle initial, last name" do @@ -62,7 +62,7 @@ module People expect(name[:first]).to eq "M" expect(name[:middle]).to eq 'E' expect(name[:last]).to eq "Ericson" - expect(name[:parse_type]).to eq 6 # 2 + expect(name[:parse_type]).to eq 0 end it "should parse first initial with period, middle initial with period, last name" do @@ -107,7 +107,7 @@ module People expect(name[:first]).to eq "Matthew" expect(name[:middle]).to eq 'E' expect(name[:last]).to eq "Ericson" - expect(name[:parse_type]).to eq 6 + expect(name[:parse_type]).to eq 0 end it "should parse first name, two middle initials, last name" do @@ -142,7 +142,7 @@ module People expect(name[:parsed]).to be true expect(name[:first]).to eq "Matthew" expect(name[:last]).to eq "Ericson" - expect(name[:parse_type]).to eq 9 + expect(name[:parse_type]).to eq 0 end it "should parse compound first name, last name" do @@ -150,7 +150,7 @@ module People expect(name[:parsed]).to be true expect(name[:first]).to eq "Matthew-Joseph" expect(name[:last]).to eq "Ericson-Miller" - expect(name[:parse_type]).to eq 9 + expect(name[:parse_type]).to eq 0 end it "should parse first name, middle name, last name" do @@ -255,39 +255,39 @@ module People expect(name[:multiple]).to be true expect(name[:parsed2]).to be true expect(name[:first2]).to eq "Jill" - expect(name[:parse_type]).to eq 9 + expect(name[:parse_type]).to eq 99 end -=begin + it "should parse multiple first names and last name with UTF-8 characters" do - name = @np.parse("MARÍA AND JOSÉ GARCÍA-D'ANGELO III" ) + name = @np.parse("MARÍA AND JOSÉ GARCÍA-D'ANGELO" ) expect(name[:parsed]).to eq true expect(name[:multiple]).to be true expect(name[:parsed2]).to be true expect(name[:first]).to eq 'María' expect(name[:first2]).to eq 'José' expect(name[:last]).to eq "García-D'Angelo" - expect(name[:suffix]).to eq "III" - expect(name[:parse_type]).to eq 9 + expect(name[:parse_type]).to eq 99 end -=end + it "should parse multiple first names, middle initial, last name" do name = @np.parse( "Joe and Jill S Hill" ) expect(name[:parsed]).to eq true expect(name[:multiple]).to be true expect(name[:parsed2]).to be true - expect(name[:parse_type]).to eq 9 expect(name[:middle2]).to eq 'S' expect(name[:first2]).to eq "Jill" + expect(name[:parse_type]).to eq 99 end it "should parse multiple first names, middle initial, last name" do - name = @np.parse( "Joe S and Jill Hill" ) + name = @np.parse( "Joe S and Jill X Hill" ) expect(name[:parsed]).to eq true expect(name[:multiple]).to be true expect(name[:parsed2]).to be true expect(name[:first2]).to eq "Jill" expect(name[:middle]).to eq 'S' - expect(name[:parse_type]).to eq 6 + expect(name[:middle2]).to eq 'X' + expect(name[:parse_type]).to eq 98 end end @@ -300,14 +300,14 @@ module People name = @np.parse( "Matthew De La Hoya" ) expect(name[:parsed]).to be true expect(name[:last]).to eq "De La Hoya" - expect(name[:parse_type]).to eq 9 + expect(name[:parse_type]).to eq 0 end it "should parse last name with cammel case" do name = @np.parse( "Matthew McIntosh" ) expect(name[:parsed]).to be true expect(name[:last]).to eq "McIntosh" - expect(name[:parse_type]).to eq 9 + expect(name[:parse_type]).to eq 0 end end From 654009aad563f29eeabd3b8d82a36c156b848432 Mon Sep 17 00:00:00 2001 From: Tilo Sloboda Date: Wed, 15 Aug 2018 15:52:49 -0700 Subject: [PATCH 8/8] adding more test cases; stripping spaces from titles and suffixes --- Rakefile | 56 ------ lib/people.rb | 59 +----- spec/people_spec.rb | 428 ++++++++++++++++++++++++-------------------- 3 files changed, 242 insertions(+), 301 deletions(-) diff --git a/Rakefile b/Rakefile index 32e834a..1cf231f 100644 --- a/Rakefile +++ b/Rakefile @@ -17,59 +17,3 @@ task :spec_all do end task :default => :spec - -=begin - -begin - require 'jeweler' - Jeweler::Tasks.new do |gem| - gem.name = "people" - gem.summary = %Q{Matts Name Parser} - gem.email = "mericson@ericson.net" - gem.homepage = "http://github.com/mericson/people" - gem.authors = ["Matthew Ericson"] - - # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings - end -rescue LoadError - puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com" -end - -require 'rake/testtask' -Rake::TestTask.new(:test) do |test| - test.libs << 'lib' << 'test' - test.pattern = 'test/**/*_test.rb' - test.verbose = true -end - -begin - require 'rcov/rcovtask' - Rcov::RcovTask.new do |test| - test.libs << 'test' - test.pattern = 'test/**/*_test.rb' - test.verbose = true - end -rescue LoadError - task :rcov do - abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov" - end -end - - -require 'rdoc/task' -RDoc::Task.new do |rdoc| - if File.exist?('VERSION.yml') - require 'yaml' - config = YAML.load(File.read('VERSION.yml')) - version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}" - else - version = "" - end - - rdoc.rdoc_dir = 'rdoc' - rdoc.title = "people #{version}" - rdoc.rdoc_files.include('README*') - rdoc.rdoc_files.include('lib/**/*.rb') -end - -=end diff --git a/lib/people.rb b/lib/people.rb index 282804e..d716e98 100644 --- a/lib/people.rb +++ b/lib/people.rb @@ -13,7 +13,7 @@ class NameParser # Creates a name parsing object def initialize( opts={} ) - @nc = @name_chars = "A-Za-z\\-" + @nc = @name_chars = 'A-Za-z\\-' @opts = { :strip_mr => true, @@ -22,8 +22,6 @@ def initialize( opts={} ) :couples => false }.merge! opts - ## constants - @titles = [ 'Mr\.? and Mrs\.? ', 'Mrs\.? ', 'M/s\.? ', @@ -107,7 +105,6 @@ def initialize( opts={} ) 'Ald(\.|erman)? ' ]; - @suffixes = [ 'Jn?r\.?,? Esq\.?', 'Sn?r\.?,? Esq\.?', @@ -141,25 +138,16 @@ def initialize( opts={} ) 'D.?M\.?D\.?' # M.D. ]; - @last_name_p = "((;.+)|(((Mc|Mac|Des|Dell[ae]|Del|De La|De Los|Da|Di|Du|La|Le|Lo|St\.|Den|Von|Van|V[ao]n De[nr]) )?([#{@nc}]+)))" - @mult_name_p = "((;.+)|(((Mc|Mac|Des|Dell[ae]|Del|De La|De Los|Da|Di|Du|La|Le|Lo|St\.|Den|Von|Van|V[ao]n De[nr]) )?([#{@nc} ]+)))" - @seen = 0 @parsed = 0; - end def parse( name ) - @seen += 1 - clean = '' - out = Hash.new( '' ) - + out = Hash.new('') out[:orig] = name.dup - name = name.dup - name = clean( name ) # strip trailing suffices @@ -169,7 +157,7 @@ def parse( name ) name.gsub!( sfx_p, "\\1 \\2" ) end - name.gsub!( /Mr\.? \& Mrs\.?/i, "Mr. and Mrs." ) + name.gsub!( /Mr\.? \& Mrs\.?/i, 'Mr. and Mrs.' ) # Flip last and first if contain comma name.gsub!( /;/, '' ) @@ -218,7 +206,6 @@ def parse( name ) out = Hash.new( '' ) end - else out[:title] = get_title( name ); @@ -231,7 +218,6 @@ def parse( name ) out[:first] = parts[2] out[:middle] = parts[3] out[:last] = parts[4] - end @@ -250,9 +236,7 @@ def parse( name ) end - if out[:parsed] - @parsed += 1 - end + @parsed += 1 if out[:parsed] out[:clean] = name @@ -278,22 +262,17 @@ def parse( name ) :multiple => false }.merge( out ) - end + private def clean( s ) + # IMPORTANT: we DO NOT want to remove "illegal" characters here, because it would be stripping off valid UTF-8 characters - # IMPORTANT: we don't want to remove "illegal" characters, because it's stripping off valid UTF-8 characters -=begin - # remove illegal characters - s.gsub!( /[^A-Za-z0-9\-\'\.&\/ \,]/, '' ) -=end # squish repeating spaces s.gsub!( /\s+/, ' ' ) s.strip! s - end def get_title( name ) @@ -301,14 +280,11 @@ def get_title( name ) @titles.each do |title| title_p = Regexp.new( "^(#{title})(.+)", true ) if m = name.match( title_p ) - title = m[1] name.replace( m[-1].strip ) - return title + return title.strip end - end - return '' end @@ -318,29 +294,17 @@ def get_suffix( name ) sfx_p = Regexp.new( "(.+) (#{sfx})$", true ) if name.match( sfx_p ) name.replace $1.strip - suffix = $2 - return $2 + return $2.strip # return the suffix end - end - return '' end def get_name_parts( name, no_last_name = false ) - first = '' middle = '' last = '' - if no_last_name - last_name_p = '' - mult_name_p = '' - else - last_name_p = @last_name_p - mult_name_p = @mult_name_p - end - parsed = false # FIRST strip-off the last name @@ -381,17 +345,13 @@ def get_name_parts( name, no_last_name = false ) first = fn parsed = true parse_type = 99 - end last.gsub!( /;/, '' ) return [ parsed, parse_type, first, middle, last ]; - end - - def proper ( name ) fixed = name.downcase @@ -430,18 +390,15 @@ def proper ( name ) fixed.gsub!( /\b(Mc)([a-z]+)/i ) do |m| $1 + $2.capitalize end - end # Exceptions (only 'Mac' name ending in 'o' ?) fixed.gsub!( /Macmurdo/i, 'MacMurdo' ) return fixed - end end - end diff --git a/spec/people_spec.rb b/spec/people_spec.rb index b6d9dac..41463d9 100644 --- a/spec/people_spec.rb +++ b/spec/people_spec.rb @@ -1,417 +1,457 @@ require 'spec_helper' -=begin - -# more test cases -- the ones with "and" might be a bit excotic - -Donald Ericson -Ericson, Donald R. S -Ericson, Matthew -Matthew E. Ericson -Matt Van Ericson -Matthew E. La Ericson -M. Edward Ericson -Matthew and Ben Ericson -Mathew R. and Ben Q. Ericson -Ericson, Matthew R. and Ben Q. -MATTHEW ERICSON -MATTHEW MCDONALD -Mr. Matthew Ericson -Sir Matthew Ericson -Matthew Ericson III -Dr. Matthew Q Ericson IV -Ericson, Mr. Matthew E -Von Ericson, Dr. Matthew Edward -Angel S. Viloria III - -=end - module People - describe "Parse standard name variations" do + describe 'Parse standard name variations' do before( :each ) do @np = People::NameParser.new end - it "should parse first initial" do - name = @np.parse( "M" ) + it 'should parse just first initial' do + name = @np.parse( 'M' ) expect(name[:parsed]).to be true expect(name[:first]).to eq '' expect(name[:last]).to eq 'M' expect(name[:parse_type]).to eq 0 end - it "should parse first initial last initial" do - name = @np.parse( "M E" ) + it 'should parse first and last initial' do + name = @np.parse( 'M E' ) expect(name[:parsed]).to be true expect(name[:first]).to eq 'M' expect(name[:last]).to eq 'E' expect(name[:parse_type]).to eq 0 end - it "should parse first initial, last name" do - name = @np.parse( "M ERICSON" ) + it 'should parse first and last initial with periods' do + name = @np.parse( 'M. E.' ) expect(name[:parsed]).to be true - expect(name[:first]).to eq "M" - expect(name[:last]).to eq "Ericson" + expect(name[:first]).to eq 'M.' + expect(name[:last]).to eq 'E.' + expect(name[:parse_type]).to eq 0 + end + + it 'should parse first initial, last name' do + name = @np.parse( 'M ERICSON' ) + expect(name[:parsed]).to be true + expect(name[:first]).to eq 'M' + expect(name[:last]).to eq 'Ericson' expect(name[:parse_type]).to eq 0 end - it "should parse first initial, middle initial, last name" do - name = @np.parse( "M E ERICSON" ) + it 'should parse first initial, middle initial, last name' do + name = @np.parse( 'M E ERICSON' ) expect(name[:parsed]).to be true - expect(name[:first]).to eq "M" + expect(name[:first]).to eq 'M' expect(name[:middle]).to eq 'E' - expect(name[:last]).to eq "Ericson" + expect(name[:last]).to eq 'Ericson' expect(name[:parse_type]).to eq 0 end - it "should parse first initial with period, middle initial with period, last name" do + it 'should parse first initial with period, middle initial with period, last name' do name = @np.parse( "M.E. ERICSON" ) expect(name[:parsed]).to be true - expect(name[:first]).to eq "M" + expect(name[:first]).to eq 'M' expect(name[:middle]).to eq 'E.' - expect(name[:last]).to eq "Ericson" - expect(name[:parse_type]).to eq 0 # 3 + expect(name[:last]).to eq 'Ericson' + expect(name[:parse_type]).to eq 0 end - it "should parse first initial, two middle initials, last name" do - name = @np.parse( "M E E ERICSON" ) + it 'should parse first initial, two middle initials, last name' do + name = @np.parse( 'M E E ERICSON' ) expect(name[:parsed]).to be true - expect(name[:first]).to eq "M" + expect(name[:first]).to eq 'M' expect(name[:middle]).to eq 'E E' expect(name[:last]).to eq "Ericson" - expect(name[:parse_type]).to eq 0 # 4 + expect(name[:parse_type]).to eq 0 end - it "should parse first initial, two middle initials, last name" do - name = @np.parse( "M. E. E. ERICSON" ) + it 'should parse first initial, two middle initials with periods, last name' do + name = @np.parse( 'M. E. E. ERICSON' ) expect(name[:parsed]).to be true - expect(name[:first]).to eq "M." + expect(name[:first]).to eq 'M.' expect(name[:middle]).to eq 'E. E.' - expect(name[:last]).to eq "Ericson" - expect(name[:parse_type]).to eq 0 # 4 + expect(name[:last]).to eq 'Ericson' + expect(name[:parse_type]).to eq 0 end - it "should parse first initial, middle name, last name" do - name = @np.parse( "M EDWARD ERICSON" ) + it 'should parse first initial, middle name, last name' do + name = @np.parse( 'M EDWARD ERICSON' ) expect(name[:parsed]).to be true - expect(name[:first]).to eq "M" + expect(name[:first]).to eq 'M' expect(name[:middle]).to eq 'Edward' - expect(name[:last]).to eq "Ericson" - expect(name[:parse_type]).to eq 0 # 5 + expect(name[:last]).to eq 'Ericson' + expect(name[:parse_type]).to eq 0 end - it "should parse first name, middle initial, last name" do - name = @np.parse( "MATTHEW E ERICSON" ) + it 'should parse first name, middle initial, last name' do + name = @np.parse( 'MATTHEW E ERICSON' ) expect(name[:parsed]).to be true - expect(name[:first]).to eq "Matthew" + expect(name[:first]).to eq 'Matthew' expect(name[:middle]).to eq 'E' - expect(name[:last]).to eq "Ericson" + expect(name[:last]).to eq 'Ericson' expect(name[:parse_type]).to eq 0 end - it "should parse first name, two middle initials, last name" do - name = @np.parse( "MATTHEW E E ERICSON" ) + it 'should parse first name, two middle initials, last name' do + name = @np.parse( 'MATTHEW E E ERICSON' ) expect(name[:parsed]).to be true - expect(name[:first]).to eq "Matthew" + expect(name[:first]).to eq 'Matthew' expect(name[:middle]).to eq 'E E' - expect(name[:last]).to eq "Ericson" - expect(name[:parse_type]).to eq 0 # 7 + expect(name[:last]).to eq 'Ericson' + expect(name[:parse_type]).to eq 0 end - it "should parse first name, two middle initials with periods without space, last name" do - name = @np.parse( "MATTHEW E.E. ERICSON" ) + it 'should parse first name, two middle initials with periods without space, last name' do + name = @np.parse( 'MATTHEW E.E. ERICSON' ) expect(name[:parsed]).to be true - expect(name[:first]).to eq "Matthew" + expect(name[:first]).to eq 'Matthew' expect(name[:middle]).to eq 'E.E.' - expect(name[:last]).to eq "Ericson" - expect(name[:parse_type]).to eq 0 # 8 + expect(name[:last]).to eq 'Ericson' + expect(name[:parse_type]).to eq 0 end - it "should parse first name, two middle initials with periods, last name" do - name = @np.parse( "MATTHEW A. B. C. ERICSON" ) + it 'should parse first name, three middle initials with periods, last name' do + name = @np.parse( 'MATTHEW A. B. C. ERICSON' ) expect(name[:parsed]).to be true - expect(name[:first]).to eq "Matthew" + expect(name[:first]).to eq 'Matthew' expect(name[:middle]).to eq 'A. B. C.' - expect(name[:last]).to eq "Ericson" - expect(name[:parse_type]).to eq 0 # 8 + expect(name[:last]).to eq 'Ericson' + expect(name[:parse_type]).to eq 0 end - it "should parse first name, last name" do - name = @np.parse( "MATTHEW ERICSON" ) + it 'should parse first name, last name' do + name = @np.parse( 'MATTHEW ERICSON' ) expect(name[:parsed]).to be true - expect(name[:first]).to eq "Matthew" - expect(name[:last]).to eq "Ericson" + expect(name[:first]).to eq 'Matthew' + expect(name[:last]).to eq 'Ericson' expect(name[:parse_type]).to eq 0 end - it "should parse compound first name, last name" do - name = @np.parse( "MATTHEW-JOSEPH ERICSON-MILLER" ) + it 'should parse reverse order last name, first name' do + name = @np.parse( 'ERICSON, MATTHEW' ) + expect(name[:parsed]).to be true + expect(name[:first]).to eq 'Matthew' + expect(name[:last]).to eq 'Ericson' + expect(name[:parse_type]).to eq 0 + end + + it 'should parse reverse order last name, first name middle initials' do + name = @np.parse( 'DUCK, DONALD R. S' ) expect(name[:parsed]).to be true - expect(name[:first]).to eq "Matthew-Joseph" - expect(name[:last]).to eq "Ericson-Miller" + expect(name[:first]).to eq 'Donald' + expect(name[:last]).to eq 'Duck' + expect(name[:middle]).to eq 'R. S' expect(name[:parse_type]).to eq 0 end - it "should parse first name, middle name, last name" do + it 'should parse compound first name, last name' do + name = @np.parse( 'MATTHEW-JOSEPH ERICSON-MILLER' ) + expect(name[:parsed]).to be true + expect(name[:first]).to eq 'Matthew-Joseph' + expect(name[:last]).to eq 'Ericson-Miller' + expect(name[:parse_type]).to eq 0 + end + + it 'should parse first name, middle name, last name' do name = @np.parse( "MATTHEW EDWARD ERICSON" ) expect(name[:parsed]).to be true - expect(name[:first]).to eq "Matthew" + expect(name[:first]).to eq 'Matthew' expect(name[:middle]).to eq 'Edward' - expect(name[:last]).to eq "Ericson" - expect(name[:parse_type]).to eq 0 # 10 + expect(name[:last]).to eq 'Ericson' + expect(name[:parse_type]).to eq 0 end - it "parses first name, middle initial, middle name, last name" do - name = @np.parse( "MATTHEW E. SHEIE ERICSON" ) + it 'parses first name, middle initial, middle name, last name' do + name = @np.parse( 'MATTHEW E. SHEIE ERICSON' ) expect( name[:parsed]).to be true - expect( name[:first]).to eq "Matthew" + expect( name[:first]).to eq 'Matthew' expect( name[:middle]).to eq 'E. Sheie' - expect( name[:last]).to eq "Ericson" - expect( name[:parse_type]).to eq 0 # 11 + expect( name[:last]).to eq 'Ericson' + expect( name[:parse_type]).to eq 0 end - it "parses first name, middle name, middle initial, last name" do - name = @np.parse( "MATTHEW ERWIN S. ERICSON" ) + it 'parses first name, middle name, middle initial, last name' do + name = @np.parse( 'MATTHEW ERWIN S. ERICSON' ) expect( name[:parsed]).to be true - expect( name[:first]).to eq "Matthew" + expect( name[:first]).to eq 'Matthew' expect( name[:middle]).to eq 'Erwin S.' - expect( name[:last]).to eq "Ericson" - expect( name[:parse_type]).to eq 0 # 13 + expect( name[:last]).to eq 'Ericson' + expect( name[:parse_type]).to eq 0 end it 'parses two middle names' do - name = @np.parse( "MATTHEW EDWARD RICHARD ERICSON" ) + name = @np.parse( 'MATTHEW EDWARD RICHARD ERICSON' ) expect( name[:parsed]).to be true - expect( name[:first]).to eq "Matthew" + expect( name[:first]).to eq 'Matthew' expect( name[:middle]).to eq 'Edward Richard' - expect( name[:last]).to eq "Ericson" - expect( name[:parse_type]).to eq 0 # 11 + expect( name[:last]).to eq 'Ericson' + expect( name[:parse_type]).to eq 0 end it 'parses three middle names' do name = @np.parse( "MATTHEW EDWARD RICHARD MARIA ERICSON" ) expect( name[:parsed]).to be true - expect( name[:first]).to eq "Matthew" + expect( name[:first]).to eq 'Matthew' expect( name[:middle]).to eq 'Edward Richard Maria' - expect( name[:last]).to eq "Ericson" - expect( name[:parse_type]).to eq 0 # 11 + expect( name[:last]).to eq 'Ericson' + expect( name[:parse_type]).to eq 0 end it 'parses two middle names and handles compound names' do - name = @np.parse( "MATTHEW EDWARD RICHARD MARIA-ERICSON" ) + name = @np.parse( 'MATTHEW EDWARD RICHARD MARIA-ERICSON' ) expect( name[:parsed]).to be true - expect( name[:first]).to eq "Matthew" + expect( name[:first]).to eq 'Matthew' expect( name[:middle]).to eq 'Edward Richard' - expect( name[:last]).to eq "Maria-Ericson" - expect( name[:parse_type]).to eq 0 # 11 + expect( name[:last]).to eq 'Maria-Ericson' + expect( name[:parse_type]).to eq 0 end it 'parses one middle names and handles compound names' do - name = @np.parse( "MATTHEW HANS-WURST MARIA-ERICSON" ) + name = @np.parse( 'MATTHEW HANS-WURST MARIA-ERICSON' ) expect( name[:parsed]).to be true - expect( name[:first]).to eq "Matthew" + expect( name[:first]).to eq 'Matthew' expect( name[:middle]).to eq 'Hans-Wurst' - expect( name[:last]).to eq "Maria-Ericson" - expect( name[:parse_type]).to eq 0 # 10 + expect( name[:last]).to eq 'Maria-Ericson' + expect( name[:parse_type]).to eq 0 end it "should parse compound first name, last name" do - name = @np.parse( "MATTHEW-JOSEPH HANS-WURST ERICSON-MILLER" ) + name = @np.parse( 'MATTHEW-JOSEPH HANS-WURST ERICSON-MILLER' ) expect(name[:parsed]).to be true - expect(name[:first]).to eq "Matthew-Joseph" + expect(name[:first]).to eq 'Matthew-Joseph' expect(name[:middle]).to eq 'Hans-Wurst' - expect(name[:last]).to eq "Ericson-Miller" - expect(name[:parse_type]).to eq 0 # 10 + expect(name[:last]).to eq 'Ericson-Miller' + expect(name[:parse_type]).to eq 0 end it 'parses one middle names and handles compound names' do - name = @np.parse( "HANS-WURST MATTHEW MARIA-ERICSON" ) + name = @np.parse( 'HANS-WURST MATTHEW MARIA-ERICSON' ) expect( name[:parsed]).to be true expect( name[:first]).to eq 'Hans-Wurst' - expect( name[:middle]).to eq "Matthew" - expect( name[:last]).to eq "Maria-Ericson" - expect( name[:parse_type]).to eq 0 # 10 + expect( name[:middle]).to eq 'Matthew' + expect( name[:last]).to eq 'Maria-Ericson' + expect( name[:parse_type]).to eq 0 end it 'parses one middle names and handles compound names' do - name = @np.parse( "HANS WURST MATTHEW MARIA DE LA CULPA-GARCÍA" ) + name = @np.parse( 'HANS WURST MATTHEW MARIA DE LA CULPA-GARCÍA' ) expect( name[:parsed]).to be true - expect( name[:last]).to eq "De La Culpa-García" + expect( name[:last]).to eq 'De La Culpa-García' expect( name[:middle]).to eq 'Wurst Matthew Maria' expect( name[:first]).to eq 'Hans' expect( name[:parse_type]).to eq 0 end end - describe "Parse multiple names" do + describe 'Parse multiple names' do before( :each ) do @np = People::NameParser.new( :couples => true ) end - it "should parse multiple first names and last name" do - name = @np.parse( "Joe and Jill Hill" ) + it 'should parse multiple first names and last name' do + name = @np.parse( 'Joe and Jill Hill' ) expect(name[:parsed]).to eq true expect(name[:multiple]).to be true expect(name[:parsed2]).to be true - expect(name[:first2]).to eq "Jill" + expect(name[:first2]).to eq 'Jill' expect(name[:parse_type]).to eq 99 end it "should parse multiple first names and last name with UTF-8 characters" do - name = @np.parse("MARÍA AND JOSÉ GARCÍA-D'ANGELO" ) + name = @np.parse('MARÍA AND JOSÉ GARCÍA-D\'ANGELO' ) expect(name[:parsed]).to eq true expect(name[:multiple]).to be true expect(name[:parsed2]).to be true expect(name[:first]).to eq 'María' expect(name[:first2]).to eq 'José' - expect(name[:last]).to eq "García-D'Angelo" + expect(name[:last]).to eq 'García-D\'Angelo' expect(name[:parse_type]).to eq 99 end - it "should parse multiple first names, middle initial, last name" do - name = @np.parse( "Joe and Jill S Hill" ) + it 'should parse multiple first names, middle initial, last name' do + name = @np.parse( 'Joe and Jill S Hill' ) expect(name[:parsed]).to eq true expect(name[:multiple]).to be true expect(name[:parsed2]).to be true expect(name[:middle2]).to eq 'S' - expect(name[:first2]).to eq "Jill" + expect(name[:first2]).to eq 'Jill' + expect(name[:first]).to eq 'Joe' + expect(name[:last]).to eq 'Hill' expect(name[:parse_type]).to eq 99 end - it "should parse multiple first names, middle initial, last name" do - name = @np.parse( "Joe S and Jill X Hill" ) + it 'should parse multiple first names, middle initial, last name' do + name = @np.parse( 'Joe S and Jill X Hill' ) expect(name[:parsed]).to eq true expect(name[:multiple]).to be true expect(name[:parsed2]).to be true - expect(name[:first2]).to eq "Jill" - expect(name[:middle]).to eq 'S' + expect(name[:first2]).to eq 'Jill' expect(name[:middle2]).to eq 'X' + expect(name[:first]).to eq 'Joe' + expect(name[:middle]).to eq 'S' + expect(name[:last]).to eq 'Hill' + expect(name[:parse_type]).to eq 98 + end + + it 'should parse reverse order with two first names and initials' do + name = @np.parse( 'Ericson, Matthew R. and Ben Q.' ) + expect(name[:parsed]).to eq true + expect(name[:multiple]).to be true + expect(name[:parsed2]).to be true + expect(name[:first2]).to eq 'Ben' + expect(name[:middle2]).to eq 'Q.' + expect(name[:first]).to eq 'Matthew' + expect(name[:middle]).to eq 'R.' + expect(name[:last]).to eq 'Ericson' expect(name[:parse_type]).to eq 98 end end - describe "Parse unusual names" do + describe 'Parse unusual names' do before( :each ) do @np = People::NameParser.new end - it "should parse multiple-word last name" do - name = @np.parse( "Matthew De La Hoya" ) + it 'should parse multiple-word last name' do + name = @np.parse( 'Rev Matthew De La Hoya Jr.' ) + expect(name[:parsed]).to be true + expect(name[:last]).to eq 'De La Hoya' + expect(name[:suffix]).to eq 'Jr.' + expect(name[:title]).to eq 'Rev' + expect(name[:parse_type]).to eq 0 + end + + # BUG: suffixes do not work in this format + it 'should parse reverse order name' do + name = @np.parse( 'Van Der Graf, Dr. Matthew' ) + expect(name[:parsed]).to be true + expect(name[:last]).to eq 'Van Der Graf' + expect(name[:first]).to eq 'Matthew' + expect(name[:title]).to eq 'Dr.' + expect(name[:middle]).to eq '' + expect(name[:parse_type]).to eq 0 + end + + it 'should parse long name with title and suffix' do + name = @np.parse( 'Dr. Matthew Q Ericson IV' ) expect(name[:parsed]).to be true - expect(name[:last]).to eq "De La Hoya" + expect(name[:last]).to eq 'Ericson' + expect(name[:first]).to eq 'Matthew' + expect(name[:title]).to eq 'Dr.' + expect(name[:middle]).to eq 'Q' + expect(name[:suffix]).to eq 'IV' expect(name[:parse_type]).to eq 0 end - it "should parse last name with cammel case" do - name = @np.parse( "Matthew McIntosh" ) + it 'should parse last name with cammel case' do + name = @np.parse( 'Matthew McIntosh' ) expect(name[:parsed]).to be true - expect(name[:last]).to eq "McIntosh" + expect(name[:last]).to eq 'McIntosh' expect(name[:parse_type]).to eq 0 end end - describe "Parse names with decorations" do + describe 'Parse names with decorations' do before( :each ) do @np = People::NameParser.new end it "should parse name with the suffix 'Jr'" do - name = @np.parse( "Matthew E Ericson Jr" ) + name = @np.parse( 'Matthew E Ericson Jr' ) expect(name[:parsed]).to be true - expect(name[:suffix]).to eq "Jr" + expect(name[:suffix]).to eq 'Jr' end - it "should parse name with a roman numeral suffix" do - name = @np.parse( "Matthew E Ericson III" ) + it 'should parse name with a roman numeral suffix' do + name = @np.parse( 'Matthew E Ericson III' ) expect(name[:parsed]).to be true - expect(name[:suffix]).to eq "III" + expect(name[:suffix]).to eq 'III' end - it "should parse name with a numerical suffix 1" do - name = @np.parse( "Matthew E Ericson 1st" ) + it 'should parse name with a numerical suffix 1' do + name = @np.parse( 'Matthew E Ericson 1st' ) expect(name[:parsed]).to be true - expect(name[:suffix]).to eq "1st" + expect(name[:suffix]).to eq '1st' end - it "should parse name with a numerical suffix 2" do - name = @np.parse( "Matthew E Ericson 2nd" ) + it 'should parse name with a numerical suffix 2' do + name = @np.parse( 'Matthew E Ericson 2nd' ) expect(name[:parsed]).to be true - expect(name[:suffix]).to eq "2nd" + expect(name[:suffix]).to eq '2nd' end - it "should parse name with a numerical suffix 3" do - name = @np.parse( "Matthew E Ericson 3rd" ) + it 'should parse name with a numerical suffix 3' do + name = @np.parse( 'Matthew E Ericson 3rd' ) expect(name[:parsed]).to be true - expect(name[:suffix]).to eq "3rd" + expect(name[:suffix]).to eq '3rd' end - it "should parse name with a numerical suffix 4" do - name = @np.parse( "Matthew E Ericson 4th" ) + it 'should parse name with a numerical suffix 4' do + name = @np.parse( 'Matthew E Ericson 4th' ) expect(name[:parsed]).to be true - expect(name[:suffix]).to eq "4th" + expect(name[:suffix]).to eq '4th' end - it "should parse name with a numerical suffix 13" do - name = @np.parse( "Matthew E Ericson 13th" ) + it 'should parse name with a numerical suffix 13' do + name = @np.parse( 'Matthew E Ericson 13th' ) expect(name[:parsed]).to be true - expect(name[:suffix]).to eq "13th" + expect(name[:suffix]).to eq '13th' end - it "should parse name with a suffix with periods" do - name = @np.parse( "Matthew E Ericson M.D." ) + it 'should parse name with a suffix with periods' do + name = @np.parse( 'Matthew E Ericson M.D.' ) expect(name[:parsed]).to be true - expect(name[:suffix]).to eq "M.D." + expect(name[:suffix]).to eq 'M.D.' end - it "should parse name with a title" do - name = @np.parse( "Mr Matthew E Ericson" ) + it 'should parse name with a title' do + name = @np.parse( 'Mr Matthew E Ericson' ) expect(name[:parsed]).to be true - expect(name[:title]).to eq "Mr " + expect(name[:title]).to eq 'Mr' end - it "should parse name with a title with a period" do - name = @np.parse( "Mr. Matthew E Ericson" ) + it 'should parse name with a title with a period' do + name = @np.parse( 'Mr. Matthew E Ericson' ) expect(name[:parsed]).to be true - expect(name[:title]).to eq "Mr. " + expect(name[:title]).to eq 'Mr.' end - it "should parse name with a title, first initial" do - name = @np.parse( "Rabbi M Edward Ericson" ) + it 'should parse name with a title, first initial' do + name = @np.parse( 'Rabbi M Edward Ericson' ) expect(name[:parsed]).to be true - expect(name[:title]).to eq "Rabbi " + expect(name[:title]).to eq 'Rabbi' expect(name[:first]).to eq 'M' - expect(name[:parse_type]).to eq 0 # 5 + expect(name[:parse_type]).to eq 0 end - it "should parse 1950s married couple name" do - name = @np.parse( "Mr. and Mrs. Matthew E Ericson" ) + it 'should parse 1950s married couple name' do + name = @np.parse( 'Mr. and Mrs. Matthew E Ericson' ) expect(name[:parsed]).to be true - expect(name[:title]).to eq "Mr. And Mrs. " - expect(name[:first]).to eq "Matthew" + expect(name[:title]).to eq 'Mr. And Mrs.' + expect(name[:first]).to eq 'Matthew' end end - describe "Name case options" do - it "should change upper case to proper case" do + describe 'Name case options' do + it 'should change upper case to proper case' do proper_np = People::NameParser.new( :case_mode => 'proper' ) - name = proper_np.parse( "MATTHEW ERICSON" ) - expect(name[:first]).to eq "Matthew" - expect(name[:last]).to eq "Ericson" + name = proper_np.parse( 'MATTHEW ERICSON' ) + expect(name[:first]).to eq 'Matthew' + expect(name[:last]).to eq 'Ericson' end - it "should change proper case to upper case" do + it 'should change proper case to upper case' do proper_np = People::NameParser.new( :case_mode => 'upper' ) - name = proper_np.parse( "Matthew Ericson" ) - expect(name[:first]).to eq "MATTHEW" - expect(name[:last]).to eq "ERICSON" + name = proper_np.parse( 'Matthew Ericson' ) + expect(name[:first]).to eq 'MATTHEW' + expect(name[:last]).to eq 'ERICSON' end - it "should leave case as is" do + it 'should leave case as is' do proper_np = People::NameParser.new( :case_mode => 'leave' ) - name = proper_np.parse( "mATTHEW eRicSon" ) - expect(name[:first]).to eq "mATTHEW" - expect(name[:last]).to eq "eRicSon" + name = proper_np.parse( 'mATTHEW eRicSon' ) + expect(name[:first]).to eq 'mATTHEW' + expect(name[:last]).to eq 'eRicSon' end end end