M Rakefile +6 -3
@@ 197,9 197,12 @@ GEMSPEC = Gem::Specification.new do |g
gem.add_runtime_dependency( name, version )
end
- DEVELOPMENT_DEPENDENCIES.each do |name, version|
- version = '>= 0' if version.length.zero?
- gem.add_development_dependency( name, version )
+ # Developmental dependencies don't work as of RubyGems 1.2.0
+ unless Gem::Version.new( Gem::RubyGemsVersion ) <= Gem::Version.new( "1.2.0" )
+ DEVELOPMENT_DEPENDENCIES.each do |name, version|
+ version = '>= 0' if version.length.zero?
+ gem.add_development_dependency( name, version )
+ end
end
REQUIREMENTS.each do |name, version|
M Rakefile.local +18 -2
@@ 23,8 23,8 @@ DATA_DATABASE_FILE = DATA_BUILD_DIR + 'd
### Tasks
# Add 'convert' to the default task, and the testing tasks
-Rake::Task[:default].prerequisites << :convert
-Rake::Task[:spec].prerequisites << :convert
+task :local => :convert
+task :spec => :convert
### Task: convert
desc "Convert WordNet dict files to a database"
@@ 44,3 44,19 @@ task DATA_DATABASE_FILE.to_s => CONVERT_
WordNetConverter.new( DATA_BUILD_DIR ).convertdb
end
+
+task :install => DATA_DATABASE_FILE do
+ datadir = Pathname.new( CONFIG['datadir'] )
+ log "Installing converted WordNet files in #{CONFIG['datadir']}"
+
+ datafiles = Pathname.glob( DATA_BUILD_DIR + '**/*' ).select {|pn| pn.file? }
+ datafiles.each do |file|
+ trace " installing #{file}"
+ relpath = file.expand_path.relative_path_from( Pathname.pwd )
+ target = datadir + relpath
+ FileUtils.mkpath target.dirname,
+ :mode => 0755, :verbose => true, :noop => $dryrun unless target.dirname.directory?
+ FileUtils.install relpath, target,
+ :mode => 0644, :verbose => true, :noop => $dryrun
+ end
+end
No newline at end of file
R docs/CATALOG => +0 -15
@@ 1,15 0,0 @@
-#
-# Title: Ruby-WordNet API
-# Main: README
-# Upload: ssh://deveiate.org/usr/local/www/public/code/Ruby-WordNet/
-# Webcvs: http://devEiate.org/projects/Ruby-WordNet/browse/trunk/
-#
-# Accessors:
-# def_pointer_methods=pointer
-#
-lib/wordnet.rb
-lib/wordnet/synset.rb
-lib/wordnet/lexicon.rb
-lib/wordnet/constants.rb
-README
-TODO
R docs/makedocs.rb => +0 -204
@@ 1,204 0,0 @@
-#!/usr/bin/ruby
-#
-# RDoc Documentation Generation Script
-# $Id$
-#
-# Copyright (c) 2001-2005 The FaerieMUD Consortium.
-#
-# This is free software. You may use, modify, and/or redistribute this
-# software under the terms of the Perl Artistic License. (See
-# http://language.perl.com/misc/Artistic.html)
-#
-
-# Make sure we're in the correct directory, and if not, change there.
-BEGIN {
- basedir = File::dirname(File::dirname( File::expand_path(__FILE__) ))
- unless Dir::pwd == basedir
- Dir::chdir( basedir )
- end
- $LOAD_PATH.unshift basedir
-}
-
-# Load modules
-require 'optparse'
-require 'rdoc/rdoc'
-require 'utils'
-include UtilityFunctions
-
-def makeDocs( docsdir, template='html', diagrams=false, upload=nil, ridocs=false )
- debugMsg "docsdir = %p, template = %p, diagrams = %p, upload = %p, ridocs = %p" %
- [docsdir, template, diagrams, upload, ridocs]
-
- title = findRdocTitle()
- docs = findRdocableFiles()
- main = findRdocMain()
- webcvs = findRdocCvsURL()
- accessors = findRdocAccessors()
-
- flags = [
- '--all',
- '--inline-source',
- '--fmt', 'html',
- '--include', 'docs',
- '--template', template,
- '--op', docsdir,
- '--title', title,
- '--tab-width', 4,
- ]
-
- flags += [ '--quiet' ] unless $VERBOSE
- flags += [ '--diagram' ] if diagrams
- flags += [ '--main', main ] if main
- flags += [ '--webcvs', webcvs ] if webcvs
- for accessor in accessors
- flags += [ '--accessor', accessor ]
- end
-
- if ridocs
- header "Will create/install 'ri' source" if ridocs
- buildRi( docs )
- else
- header "Making documentation in #{docsdir}."
- header "Will upload to '#{upload}'\n" if upload
- buildDocs( flags, docs )
- uploadDocs( upload, docsdir ) if upload
- end
-end
-
-
-def buildDocs( flags, docs )
- message "Running 'rdoc #{flags.join(' ')} #{docs.join(' ')}'\n" if $VERBOSE
- unless $DEBUG
- begin
- r = RDoc::RDoc.new
- r.document( flags + docs )
- rescue RDoc::RDocError => e
- $stderr.puts e.message
- exit(1)
- end
- end
-end
-
-
-def uploadDocs( url, docsdir )
- header "Uploading new docs snapshot to #{url}."
-
- case url
-
- # SSH target
- when %r{^ssh://(.*)}
- target = $1
- if target =~ %r{^([^/]+)/(.*)}
- host, path = $1, $2
- path = "/" + path unless path =~ /^(\/|\.)/
- cmd = "tar -C #{docsdir} -cf - . | ssh #{host} 'tar -C #{path} -xvf -'"
- unless $DEBUG
- system( cmd )
- else
- message "Would have uploaded using the command:\n #{cmd}\n\n"
- end
- else
- abort "--upload ssh://host/path"
- end
- when %r{^file://(.*)}
- targetdir = $1
- targetdir.gsub!( %r{^file://}, '' )
-
- File.makedirs targetdir, true
- Dir["#{docsdir}/**/*"].each {|file|
- fname = file.gsub( %r:#{docsdir}/:, '' )
- if File.directory? file
- unless $DEBUG
- File.makedirs File.join(targetdir, fname), true
- else
- message %{File.makedirs %s, true\n} % File.join(targetdir, fname)
- end
- else
- unless $DEBUG
- File.install( file, File.join(targetdir, fname), 0444, true )
- else
- message %{File.install( %s, %s, 0444, true )\n} % [
- file,
- File.join(targetdir, fname),
- ]
- end
- end
- }
-
- else
- raise "I don't know how to upload to urls like '#{url}'."
- end
-end
-
-def buildRi( docs )
- message "Running 'rdoc -R #{docs.join(' ')}'\n" if $VERBOSE
- unless $DEBUG
- begin
- r = RDoc::RDoc.new
- r.document( ['-R'] + docs )
- rescue RDoc::RDocError => e
- $stderr.puts e.message
- exit(1)
- end
- end
-
-end
-
-
-if $0 == __FILE__
- upload = nil
- diagrams = false
- template = 'html'
- docsdir = "docs/html"
- rimode = false
-
-
- # Read command-line options
- ARGV.options do |oparser|
- oparser.banner = "Usage: #$0 [options]\n"
-
- oparser.separator "RDoc options:"
- oparser.on( "--diagrams", "-d", TrueClass, "Generate diagrams" ) do
- diagrams = true
- end
-
- oparser.on( "--output=DIR", "-o=DIR", String, "Set the output directory" ) do |val|
- docsdir = val
- end
-
- oparser.on( "--ri", "-R", TrueClass, "Generate content for 'ri' instead of HTML" ) do
- rimode = true
- end
-
- oparser.separator ""
- oparser.separator "Post-generation options:"
-
- oparser.on( "--upload=[URI]", "-u=[URI]", String, "Upload to the given URI" ) do |val|
- upload = val
- upload = findRdocUpload() if val.nil? || val.empty?
- end
-
- oparser.separator ""
- oparser.separator "Output options:"
-
- oparser.on( "--debug", "-d", TrueClass, "Output debugging information" ) do
- $VERBOSE = true
- debugMsg "Turned debugging on."
- end
-
- oparser.on( "--verbose", "-v", TrueClass, "Make progress verbose" ) do
- $VERBOSE = true
- debugMsg "Turned verbose on."
- end
-
- # Handle the 'help' option
- oparser.on( "--help", "-h", "Display this text." ) do
- $stderr.puts oparser
- exit!(0)
- end
-
- oparser.parse!
- end
-
- makeDocs( docsdir, template, diagrams, upload, rimode )
-end
M lib/wordnet.rb +2 -0
@@ 83,5 83,7 @@ module WordNet
require 'wordnet/lexicon'
require 'wordnet/synset'
+ include WordNet::Constants
+
end # module WordNet
M lib/wordnet/constants.rb +226 -230
@@ 1,5 1,7 @@
#!/usr/bin/ruby
-#
+
+require 'wordnet'
+
# This is a module containing constants used in the WordNet interface for
# Ruby. They are contained in a module to facilitate their easy inclusion in
# other namespaces. All constants in this module are also contained in the
@@ 39,263 41,257 @@
#
# $Id$
#
-module WordNet
-
- ### Constant-container module
- module Constants
+### Constant-container module
+module WordNet::Constants
- # Synset syntactic-category names -> indicators
- SYNTACTIC_CATEGORIES = {
- :noun => "n",
- :verb => "v",
- :adjective => "a",
- :adverb => "r",
- :other => "s",
- }
- # Syntactic-category indicators -> names
- SYNTACTIC_SYMBOLS = SYNTACTIC_CATEGORIES.invert
+ # Synset syntactic-category names -> indicators
+ SYNTACTIC_CATEGORIES = {
+ :noun => "n",
+ :verb => "v",
+ :adjective => "a",
+ :adverb => "r",
+ :other => "s",
+ }
+ # Syntactic-category indicators -> names
+ SYNTACTIC_SYMBOLS = SYNTACTIC_CATEGORIES.invert
- # Map the categories into their own constants (eg., Noun)
- SYNTACTIC_CATEGORIES.each do |sym,val|
- cname = sym.to_s.capitalize
- const_set( cname, val )
- end
+ # Map the categories into their own constants (eg., Noun)
+ SYNTACTIC_CATEGORIES.each do |sym,val|
+ cname = sym.to_s.capitalize
+ const_set( cname, val )
+ end
- # Information about pointer types is contained in the wninput(5WN)
- # manpage.
+ # Information about pointer types is contained in the wninput(5WN)
+ # manpage.
- # Synset pointer typenames -> indicators
- POINTER_TYPES = {
- :antonym => '!',
- :hypernym => '@',
- :entailment => '*',
- :hyponym => '~',
- :meronym => '%',
- :holonym => '#',
- :cause => '>',
- :verb_group => %{$},
- :similar_to => '&',
- :participle => '<',
- :pertainym => '\\',
- :attribute => '=',
- :derived_from => '\\',
- :see_also => '^',
- :derivation => '+',
- :domain => ';',
- :member => '-',
- }
+ # Synset pointer typenames -> indicators
+ POINTER_TYPES = {
+ :antonym => '!',
+ :hypernym => '@',
+ :entailment => '*',
+ :hyponym => '~',
+ :meronym => '%',
+ :holonym => '#',
+ :cause => '>',
+ :verb_group => '$',
+ :similar_to => '&',
+ :participle => '<',
+ :pertainym => '\\',
+ :attribute => '=',
+ :derived_from => '\\',
+ :see_also => '^',
+ :derivation => '+',
+ :domain => ';',
+ :member => '-',
+ }
- # Synset pointer indicator -> typename
- POINTER_SYMBOLS = POINTER_TYPES.invert
+ # Synset pointer indicator -> typename
+ POINTER_SYMBOLS = POINTER_TYPES.invert
- # Map the pointer types into their own symbols (eg., :verb_group => VerbGroup)
- POINTER_TYPES.each do |sym,val|
- cname = sym.to_s.gsub( /(?:^|_)(.)/ ) { $1.upcase }
- const_set( cname, val )
- end
+ # Map the pointer types into their own symbols (eg., :verb_group => VerbGroup)
+ POINTER_TYPES.each do |sym,val|
+ cname = sym.to_s.gsub( /(?:^|_)(.)/ ) { $1.upcase }
+ const_set( cname, val )
+ end
- # Hypernym synset pointer types
- HYPERNYM_TYPES = {
- nil => '@', # Install non-subtype methods, too
- :instance => '@i',
- }
+ # Hypernym synset pointer types
+ HYPERNYM_TYPES = {
+ nil => '@', # Install non-subtype methods, too
+ :instance => '@i',
+ }
- # Hypernym indicator -> type map
- HYPERNYM_SYMBOLS = HYPERNYM_TYPES.invert
+ # Hypernym indicator -> type map
+ HYPERNYM_SYMBOLS = HYPERNYM_TYPES.invert
- # Hyponym synset pointer types
- HYPONYM_TYPES = {
- nil => '~', # Install non-subtype methods, too
- :instance => '~i',
- }
+ # Hyponym synset pointer types
+ HYPONYM_TYPES = {
+ nil => '~', # Install non-subtype methods, too
+ :instance => '~i',
+ }
- # Hyponym indicator -> type map
- HYPONYM_SYMBOLS = HYPONYM_TYPES.invert
+ # Hyponym indicator -> type map
+ HYPONYM_SYMBOLS = HYPONYM_TYPES.invert
- # Meronym synset pointer types
- MERONYM_TYPES = {
- :member => '%m',
- :stuff => '%s',
- :portion => '%o',
- :component => '%p',
- :feature => '%f',
- :phase => '%a',
- :place => '%l',
- }
+ # Meronym synset pointer types
+ MERONYM_TYPES = {
+ :member => '%m',
+ :stuff => '%s',
+ :portion => '%o',
+ :component => '%p',
+ :feature => '%f',
+ :phase => '%a',
+ :place => '%l',
+ }
- # Meronym indicator -> type map
- MERONYM_SYMBOLS = MERONYM_TYPES.invert
+ # Meronym indicator -> type map
+ MERONYM_SYMBOLS = MERONYM_TYPES.invert
- # Map the meronym types into their own constants (eg., MemberMeronym)
- MERONYM_TYPES.each do |sym,val|
- cname = sym.to_s.capitalize + "Meronym"
- const_set( cname, val )
- end
+ # Map the meronym types into their own constants (eg., MemberMeronym)
+ MERONYM_TYPES.each do |sym,val|
+ cname = sym.to_s.capitalize + "Meronym"
+ const_set( cname, val )
+ end
- # Holonym synset pointer types
- HOLONYM_TYPES = {
- :member => '#m',
- :stuff => '#s',
- :portion => '#o',
- :component => '#p',
- :feature => '#f',
- :phase => '#a',
- :place => '#l',
- }
+ # Holonym synset pointer types
+ HOLONYM_TYPES = {
+ :member => '#m',
+ :stuff => '#s',
+ :portion => '#o',
+ :component => '#p',
+ :feature => '#f',
+ :phase => '#a',
+ :place => '#l',
+ }
- # Holonym indicator -> type map
- HOLONYM_SYMBOLS = HOLONYM_TYPES.invert
+ # Holonym indicator -> type map
+ HOLONYM_SYMBOLS = HOLONYM_TYPES.invert
- # Map the holonym types into their own constants (eg., MemberHolonym)
- HOLONYM_TYPES.each do |sym,val|
- cname = sym.to_s.capitalize + "Holonym"
- const_set( cname, val )
- end
+ # Map the holonym types into their own constants (eg., MemberHolonym)
+ HOLONYM_TYPES.each do |sym,val|
+ cname = sym.to_s.capitalize + "Holonym"
+ const_set( cname, val )
+ end
- # Domain synset pointer types
- DOMAIN_TYPES = {
- :category => ';c',
- :region => ';r',
- :usage => ';u',
- }
+ # Domain synset pointer types
+ DOMAIN_TYPES = {
+ :category => ';c',
+ :region => ';r',
+ :usage => ';u',
+ }
- # Domain indicator -> type map
- DomainSymbols = DOMAIN_TYPES.invert
+ # Domain indicator -> type map
+ DomainSymbols = DOMAIN_TYPES.invert
- # Map the domain types into their own constants (eg., CategoryDomain)
- DOMAIN_TYPES.each do |sym,val|
- cname = sym.to_s.capitalize + "Domain"
- const_set( cname, val )
- end
+ # Map the domain types into their own constants (eg., CategoryDomain)
+ DOMAIN_TYPES.each do |sym,val|
+ cname = sym.to_s.capitalize + "Domain"
+ const_set( cname, val )
+ end
- # Member synset pointer types
- MEMBER_TYPES = {
- :category => '-c',
- :region => '-r',
- :usage => '-u',
- }
+ # Member synset pointer types
+ MEMBER_TYPES = {
+ :category => '-c',
+ :region => '-r',
+ :usage => '-u',
+ }
- # Member indicator -> type map
- MEMBER_SYMBOLS = MEMBER_TYPES.invert
+ # Member indicator -> type map
+ MEMBER_SYMBOLS = MEMBER_TYPES.invert
- # Map the member types into their own constants (eg., CategoryMember)
- MEMBER_TYPES.each do |sym,val|
- cname = sym.to_s.capitalize + "Member"
- const_set( cname, val )
- end
+ # Map the member types into their own constants (eg., CategoryMember)
+ MEMBER_TYPES.each do |sym,val|
+ cname = sym.to_s.capitalize + "Member"
+ const_set( cname, val )
+ end
- # Map of primary types to maps of their subtypes
- POINTER_SUBTYPES = {
- :hyponym => HYPONYM_TYPES,
- :hypernym => HYPERNYM_TYPES,
- :meronym => MERONYM_TYPES,
- :holonym => HOLONYM_TYPES,
- :member => MEMBER_TYPES,
- :domain => DOMAIN_TYPES,
- }
+ # Map of primary types to maps of their subtypes
+ POINTER_SUBTYPES = {
+ :hyponym => HYPONYM_TYPES,
+ :hypernym => HYPERNYM_TYPES,
+ :meronym => MERONYM_TYPES,
+ :holonym => HOLONYM_TYPES,
+ :member => MEMBER_TYPES,
+ :domain => DOMAIN_TYPES,
+ }
- # Record-part delimiter
- DELIM = '||'
- DELIM_RE = Regexp::new( Regexp::quote(DELIM) )
+ # Record-part delimiter
+ DELIM = '||'
+ DELIM_RE = Regexp::new( Regexp::quote(DELIM) )
- # Record-subpart delimiter
- SUB_DELIM = '|'
- SUB_DELIM_RE = Regexp::new( Regexp::quote(SUB_DELIM) )
+ # Record-subpart delimiter
+ SUB_DELIM = '|'
+ SUB_DELIM_RE = Regexp::new( Regexp::quote(SUB_DELIM) )
- # Lexicographer file index -- from lexnames(5WN)
- LEXFILES = [
- "adj.all",
- "adj.pert",
- "adv.all",
- "noun.Tops",
- "noun.act",
- "noun.animal",
- "noun.artifact",
- "noun.attribute",
- "noun.body",
- "noun.cognition",
- "noun.communication",
- "noun.event",
- "noun.feeling",
- "noun.food",
- "noun.group",
- "noun.location",
- "noun.motive",
- "noun.object",
- "noun.person",
- "noun.phenomenon",
- "noun.plant",
- "noun.possession",
- "noun.process",
- "noun.quantity",
- "noun.relation",
- "noun.shape",
- "noun.state",
- "noun.substance",
- "noun.time",
- "verb.body",
- "verb.change",
- "verb.cognition",
- "verb.communication",
- "verb.competition",
- "verb.consumption",
- "verb.contact",
- "verb.creation",
- "verb.emotion",
- "verb.motion",
- "verb.perception",
- "verb.possession",
- "verb.social",
- "verb.stative",
- "verb.weather",
- "adj.ppl"
- ]
+ # Lexicographer file index -- from lexnames(5WN)
+ LEXFILES = [
+ "adj.all",
+ "adj.pert",
+ "adv.all",
+ "noun.Tops",
+ "noun.act",
+ "noun.animal",
+ "noun.artifact",
+ "noun.attribute",
+ "noun.body",
+ "noun.cognition",
+ "noun.communication",
+ "noun.event",
+ "noun.feeling",
+ "noun.food",
+ "noun.group",
+ "noun.location",
+ "noun.motive",
+ "noun.object",
+ "noun.person",
+ "noun.phenomenon",
+ "noun.plant",
+ "noun.possession",
+ "noun.process",
+ "noun.quantity",
+ "noun.relation",
+ "noun.shape",
+ "noun.state",
+ "noun.substance",
+ "noun.time",
+ "verb.body",
+ "verb.change",
+ "verb.cognition",
+ "verb.communication",
+ "verb.competition",
+ "verb.consumption",
+ "verb.contact",
+ "verb.creation",
+ "verb.emotion",
+ "verb.motion",
+ "verb.perception",
+ "verb.possession",
+ "verb.social",
+ "verb.stative",
+ "verb.weather",
+ "adj.ppl"
+ ]
- # Verb sentences (?) -- used in building verb frames.
- VERB_SENTS = [
- "",
- "Something ----s",
- "Somebody ----s",
- "It is ----ing",
- "Something is ----ing PP",
- "Something ----s something Adjective/Noun",
- "Something ----s Adjective/Noun",
- "Somebody ----s Adjective",
- "Somebody ----s something",
- "Somebody ----s somebody",
- "Something ----s somebody",
- "Something ----s something",
- "Something ----s to somebody",
- "Somebody ----s on something",
- "Somebody ----s somebody something",
- "Somebody ----s something to somebody",
- "Somebody ----s something from somebody",
- "Somebody ----s somebody with something",
- "Somebody ----s somebody of something",
- "Somebody ----s something on somebody",
- "Somebody ----s somebody PP",
- "Somebody ----s something PP",
- "Somebody ----s PP",
- "Somebody's (body part) ----s",
- "Somebody ----s somebody to INFINITIVE",
- "Somebody ----s somebody INFINITIVE",
- "Somebody ----s that CLAUSE",
- "Somebody ----s to somebody",
- "Somebody ----s to INFINITIVE",
- "Somebody ----s whether INFINITIVE",
- "Somebody ----s somebody into V-ing something",
- "Somebody ----s something with something",
- "Somebody ----s INFINITIVE",
- "Somebody ----s VERB-ing",
- "It ----s that CLAUSE",
- "Something ----s INFINITIVE"
- ]
+ # Verb sentences (?) -- used in building verb frames.
+ VERB_SENTS = [
+ "",
+ "Something ----s",
+ "Somebody ----s",
+ "It is ----ing",
+ "Something is ----ing PP",
+ "Something ----s something Adjective/Noun",
+ "Something ----s Adjective/Noun",
+ "Somebody ----s Adjective",
+ "Somebody ----s something",
+ "Somebody ----s somebody",
+ "Something ----s somebody",
+ "Something ----s something",
+ "Something ----s to somebody",
+ "Somebody ----s on something",
+ "Somebody ----s somebody something",
+ "Somebody ----s something to somebody",
+ "Somebody ----s something from somebody",
+ "Somebody ----s somebody with something",
+ "Somebody ----s somebody of something",
+ "Somebody ----s something on somebody",
+ "Somebody ----s somebody PP",
+ "Somebody ----s something PP",
+ "Somebody ----s PP",
+ "Somebody's (body part) ----s",
+ "Somebody ----s somebody to INFINITIVE",
+ "Somebody ----s somebody INFINITIVE",
+ "Somebody ----s that CLAUSE",
+ "Somebody ----s to somebody",
+ "Somebody ----s to INFINITIVE",
+ "Somebody ----s whether INFINITIVE",
+ "Somebody ----s somebody into V-ing something",
+ "Somebody ----s something with something",
+ "Somebody ----s INFINITIVE",
+ "Somebody ----s VERB-ing",
+ "It ----s that CLAUSE",
+ "Something ----s INFINITIVE"
+ ]
- end # module Constants
+end # module Wordnet::Constants
- # Make the constants available under the WordNet namespace, too.
- include Constants
-
-end # module WordNet
M lib/wordnet/lexicon.rb +3 -3
@@ 90,7 90,7 @@ class WordNet::Lexicon
### octal mode (e.g., 0444) or one of (:readonly, :readwrite).
def initialize( dbenv=DEFAULT_DB_ENV, mode=:readonly )
@mode = normalize_mode( mode )
- debug_msg "Mode is: %04o" % [ mode ]
+ debug_msg "Mode is: %04o" % [ @mode ]
envflags = 0
dbflags = 0
@@ 230,7 230,7 @@ class WordNet::Lexicon
data = @data_db[ key ]
offset, part_of_speech = key.split( /%/, 2 )
- synsets << WordNet::Synset::new( self, offset, part_of_speech, nil, data )
+ synsets << WordNet::Synset.new( self, offset, part_of_speech, nil, data )
}
return *synsets
@@ 277,7 277,7 @@ class WordNet::Lexicon
### Factory method: Creates and returns a new WordNet::Synset object in
### this lexicon for the specified +word+ and +part_of_speech+.
def create_synset( word, part_of_speech )
- return WordNet::Synset::new( self, '', part_of_speech, word )
+ return WordNet::Synset.new( self, '', part_of_speech, word )
end
alias_method :new_synset, :create_synset
M lib/wordnet/synset.rb +620 -819
@@ 1,5 1,7 @@
#!/usr/bin/ruby
-#
+
+require 'wordnet/constants'
+
# WordNet synonym-set object class
#
# == Synopsis
@@ 31,878 33,677 @@
#
# $Id$
#
+class WordNet::Synset
+ include WordNet::Constants
-require 'sync'
-require 'wordnet/constants'
+ require 'wordnet/synset_pointer'
+
+ # Subversion ID
+ SVNId = %q$Id$
+
+ # Subversion Rev
+ SVNRev = %q$Rev$
+
+
+ #############################################################
+ ### C L A S S M E T H O D S
+ #############################################################
-module WordNet
+ ### Define a group of pointer methods based on +symbol+ that will fetch,
+ ### add, and delete pointer synsets of the type indicated. If no pointer
+ ### type corresponding to the given +symbol+ is found, a variant without
+ ### a trailing 's' is tried (e.g., 'def_pointer_methods :antonyms' will
+ ### create methods called #antonyms and #antonyms=, but will fetch
+ ### pointers of type :antonym). If the pointer type has subtypes
+ ### (according to WordNet::POINTER_SUBTYPES), accessors/mutators for the
+ ### subtypes will be generated as well.
+ def self::def_pointer_methods( symbol ) # :nodoc:
+ name = symbol.to_s
+ casename = name.dup
+ casename[ 0,1 ] = casename[ 0,1 ].upcase
+ type = nil
+ $stderr.puts '-' * 50,
+ ">>> defining pointer methods for %p" % [symbol] if $DEBUG
- ### Synset internal error class
- class SynsetError < StandardError ; end
+ if POINTER_TYPES.key?( symbol )
+ type = symbol
+ elsif POINTER_TYPES.key?( symbol.to_s.sub(/s$/, '').to_sym )
+ type = symbol.to_s.sub(/s$/, '').to_sym
+ else
+ raise ArgumentError, "Unknown pointer type %p" % symbol
+ end
+
+ # Define the accessor
+ $stderr.puts "Defining accessors for %p" % [ type ] if $DEBUG
+ define_method( name.to_sym ) { self.fetch_synset_pointers(type) }
+ define_method( "#{name}=".to_sym ) do |*synsets|
+ self.set_synset_pointers( type, synsets, nil )
+ end
+
+ # If the pointer is one that has subtypes, make the variants list
+ # out of the subtypes. If it doesn't have subtypes, make the only
+ # variant nil, which will cause the mutators to be defined for the
+ # main pointer type.
+ if POINTER_SUBTYPES.key?( type )
+ variants = POINTER_SUBTYPES[ type ].keys
+ else
+ variants = [nil]
+ end
+
+ # Define a set of methods for each variant, or for the main method
+ # if the variant is nil.
+ variants.each do |subtype|
+ varname = subtype ? [subtype, name].join('_') : name
- ### "Synonym set" class - encapsulates the data for a set of words in the
- ### lexical database that are interchangeable in some context, and provides
- ### methods for accessing its relationships.
- class Synset
- include WordNet::Constants
- include CrossCase if defined?( CrossCase )
+ unless subtype.nil?
+ $stderr.puts "Defining reader for #{varname}" if $DEBUG
+ define_method( varname ) do
+ self.fetch_synset_pointers( type, subtype )
+ end
+ else
+ $stderr.puts "No subtype for %s (subtype = %p)" %
+ [ varname, subtype ] if $DEBUG
+ end
+
+ $stderr.puts "Defining mutator for #{varname}" if $DEBUG
+ define_method( "#{varname}=" ) do |*synsets|
+ self.set_synset_pointers( type, synsets, subtype )
+ end
+ end
+ end
+
+
+ #############################################################
+ ### I N S T A N C E M E T H O D S
+ #############################################################
+
+ ### Create a new Synset object in the specified +lexicon+ for the
+ ### specified +word+ and +part_of_speech+. If +data+ is specified,
+ ### initialize the synset's other object data from it. This method
+ ### shouldn't be called directly: you should use one of the Lexicon
+ ### class's factory methods: #create_synset, #lookup_synsets, or
+ ### #lookup_synsets_by_keys.
+ def initialize( lexicon, offset, pos, word=nil, data=nil )
+ @lexicon = lexicon
- # Subversion ID
- SVNId = %q$Id$
+ if SYNTACTIC_SYMBOLS[ pos ]
+ @part_of_speech = SYNTACTIC_SYMBOLS[ pos ]
+ elsif SYNTACTIC_CATEGORIES.key?(pos)
+ @part_of_speech = pos
+ else
+ raise ArgumentError, "No such part of speech %p" % [ pos ]
+ end
+
+ @pointers = nil
+
+ @offset = offset.to_i
+ @wordlist = word ? word : ''
+ @data = data
+
+ @filenum = nil
+ @pointerlist = ''
+ @frameslist = ''
+ @gloss = ''
+
+ @filenum, @wordlist, @pointerlist, @frameslist, @gloss = data.split( DELIM_RE ) if data
+ end
+
+
+ ######
+ public
+ ######
+
+ # The WordNet::Lexicon that was used to look up this synset
+ attr_reader :lexicon
+
+ # The syntactic category of this Synset. Will be one of "n" (noun), "v"
+ # (verb), "a" (adjective), "r" (adverb), or "s" (other).
+ attr_accessor :part_of_speech
- # Subversion Rev
- SVNRev = %q$Rev$
+ # The original byte offset of the synset in the data file; acts as the
+ # unique identifier (when combined with #part_of_speech) of this Synset in
+ # the database.
+ attr_accessor :offset
+
+ # The number corresponding to the lexicographer file name containing the
+ # synset. Calling #lexInfo will return the actual filename. See the
+ # "System Description" of wngloss(7WN) for more info about this.
+ attr_accessor :filenum
+
+ # The raw list of word/lex_id pairs associated with this synset. Each
+ # word and lex_id is separated by a '%' character, and each pair is
+ # delimited with a '|'. E.g., the wordlist for "animal" is:
+ # "animal%0|animate_being%0|beast%0|brute%1|creature%0|fauna%1"
+ attr_accessor :wordlist
+
+ # The list of raw pointers to related synsets. E.g., the pointerlist for
+ # "mourning dove" is:
+ # "@ 01731700%n 0000|#m 01733452%n 0000"
+ attr_accessor :pointerlist
+
+ # The list of raw verb sentence frames for this synset.
+ attr_accessor :frameslist
+
+ # Definition and/or example sentences for the Synset.
+ attr_accessor :gloss
+
+ # The raw WordNet data that represents this synset
+ attr_reader :data
+
- # The "pointer" type that encapsulates relationships between one synset
- # and another.
- class Pointer
- include WordNet::Constants
- include CrossCase if defined?( CrossCase )
+ ### Return a human-readable representation of the Synset suitable for
+ ### debugging.
+ def inspect
+ pointer_counts = self.pointer_map.collect {|type,ptrs|
+ "#{type}s: #{ptrs.length}"
+ }.join( ", " )
+
+ return %q{#<%s:0x%08x/%s %s (%s): "%s" (%s)>} % [
+ self.class.name,
+ self.object_id * 2,
+ self.offset,
+ self.words.join(", "),
+ self.part_of_speech,
+ self.gloss,
+ pointer_counts,
+ ]
+ end
+
+
+ ### Returns the Synset's unique identifier, made up of its offset and
+ ### syntactic category catenated together with a '%' symbol.
+ def key
+ return "%d%%%s" % [ self.offset, self.pos ]
+ end
+
+
+ ### The symbol which represents this synset's syntactic category. Will
+ ### be one of :noun, :verb, :adjective, :adverb, or :other.
+ def pos
+ return SYNTACTIC_CATEGORIES[ @part_of_speech ]
+ end
+
- #########################################################
- ### C L A S S M E T H O D S
- #########################################################
+ ### Return each of the sentences of the gloss for this synset as an
+ ### array. The gloss is a definition of the synset, and optionally one
+ ### or more example sentences.
+ def glosses
+ return self.gloss.split( /\s*;\s*/ )
+ end
+
+
+ ### Returns true if the receiver and otherSyn are identical according to
+ ### their offsets.
+ def ==( otherSyn )
+ return false unless otherSyn.kind_of?( WordNet::Synset )
+ return self.offset == otherSyn.offset
+ end
+
+
+ ### Returns an Array of words and/or collocations associated with this
+ ### synset.
+ def words
+ self.wordlist.split( SUB_DELIM_RE ).collect do |word|
+ word.gsub( /_/, ' ' ).sub( /%.*$/, '' )
+ end
+ end
+ alias_method :synonyms, :words
+
+
+ ### Set the words in this synset's wordlist to +new_words+
+ def words=( *new_words )
+ @wordlist = new_words.join( SUB_DELIM )
+ end
+
- ### Make an Array of WordNet::Synset::Pointer objects out of the
- ### given +pointerList+. The pointerlist is a string of pointers
- ### delimited by Constants::SUB_DELIM. Pointers are in the form:
- ### "<pointer_symbol> <synset_offset>%<pos> <source/target>"
- def self::parse( pointerString )
- type, offsetPos, ptrNums = pointerString.split(/\s+/)
- offset, pos = offsetPos.split( /%/, 2 )
- new( type, offset, pos, ptrNums[0,2], ptrNums[2,2] )
- end
+ ### Add the specified +new_words+ to this synset's wordlist.
+ def add_words( *new_words )
+ self.words |= new_words
+ end
+
+
+ ### Delete the specified +old_words+ from this synset's wordlist. Alias:
+ ### +delete_words+.
+ def delete_words( *old_words )
+ self.words -= old_words
+ end
+
+
+ ### Return the synset as a string. Alias: +overview+.
+ def to_s
+ wordlist = self.words.join(", ").gsub( /%\d/, '' ).gsub( /_/, ' ' )
+ return "#{wordlist} [#{self.part_of_speech}] -- (#{self.gloss})"
+ end
+ alias_method :overview, :to_s
+
+
+ ### Writes any changes made to the object to the database and updates all
+ ### affected synset data and indexes. If the object passes out of scope
+ ### before #store is called, the changes are lost.
+ def store
+ self.lexicon.store_synset( self )
+ end
+ alias_method :write, :store
+
+
+ ### Removes this synset from the database.
+ def remove
+ self.lexicon.remove_synset( self )
+ end
- #########################################################
- ### I N S T A N C E M E T H O D S
- #########################################################
+ ### Returns the synset's data in a form suitable for storage in the
+ ### lexicon's database.
+ def serialize
+ return [
+ @filenum,
+ @wordlist,
+ @pointerlist,
+ @frameslist,
+ @gloss
+ ].join( WordNet::DELIM )
+ end
+
+
+ ### Auto-generate synset pointer methods for the various types
+
+ # The synsets for the receiver's antonyms (opposites). E.g.,
+ # $lexicon.lookup_synsets( "opaque", :adjective, 1 ).antonyms
+ # ==> [#<WordNet::Synset:0x010a9acc/454927 clear (adjective): "free
+ # from cloudiness; allowing light to pass through; "clear water";
+ # "clear plastic bags"; "clear glass"; "the air is clear and
+ # clean"" (similar_tos: 6, attributes: 1, derivations: 2,
+ # antonyms: 1, see_alsos: 1)>]
+ def_pointer_methods :antonyms
+
+ # Synsets for the receiver's entailments (a verb X entails Y if X cannot
+ # be done unless Y is or has been done). E.g.,
+ # $lexicon.lookup_synsets( 'rasp', :verb, 1 ).entailment
+ # ==> [#<WordNet::Synset:0x010dc24c rub (verb): "move over something
+ # with pressure; "rub my hands"; "rub oil into her skin""
+ # (derivations: 2, entailments: 1, hypernyms: 1, hyponyms: 13,
+ # see_alsos: 4)>]
+ def_pointer_methods :entailment
+
+ # Get/set synsets for the receiver's cause pointers (a verb X causes Y
+ # to happen).
+ def_pointer_methods :causes
+
+ # Get/set synsets for the receiver's verb groups. Verb groups link verbs
+ # with similar senses together.
+ def_pointer_methods :verb_groups
+
+ # Get/set list of synsets for the receiver's "similar to" pointers. This
+ # type of pointer links together head adjective synsets with its
+ # satellite adjective synsets.
+ def_pointer_methods :similar_to
+
+ # Get/set synsets for the receiver's participles. Participles are
+ # non-finite forms of a verb; used adjectivally and to form compound
+ # tenses. For example, the first participle for "working" is:
+ # "function, work, operate, go, run (verb)"
+ def_pointer_methods :participles
- ### Create a new synset pointer with the given arguments. The
- ### +ptrType+ is the type of the link between synsets, and must be
- ### either a key or a value of WordNet::Constants::POINTER_TYPES. The
- ### +offset+ is the unique identifier of the target synset, and
- ### +pos+ is its part-of-speech, which must be either a key or value
- ### of WordNet::Constants::SYNTACTIC_CATEGORIES. The +source_wn+ and
- ### +target_wn+ are numerical values which distinguish lexical and
- ### semantic pointers. +source_wn+ indicates the word number in the
- ### current (source) synset, and +target_wn+ indicates the word
- ### number in the target synset. If both are 0 (the default) it
- ### means that the pointer type of the pointer represents a semantic
- ### relation between the current (source) synset and the target
- ### synset indicated by +offset+.
- def initialize( type, offset, pos=Noun, source_wn=0, target_wn=0 )
+ # Get/set synsets for the receiver's pertainyms. Pertainyms are
+ # relational adjectives. Adjectives that are pertainyms are usually
+ # defined by such phrases as "of or pertaining to" and do not have
+ # antonyms. A pertainym can point to a noun or another pertainym.
+ def_pointer_methods :pertainyms
+
+ # Get/set synsets for the receiver's attributes.
+ def_pointer_methods :attributes
+
+ # Get/set synsets for the receiver's derived_from.
+ def_pointer_methods :derived_from
+
+ # Get/set synsets for the receiver's derivations.
+ def_pointer_methods :derivations
+
+ # Get/set synsets for the receiver's see_also.
+ def_pointer_methods :see_also
+
+
+ # Auto-generate types with subtypes
- # Allow type = '!', 'antonym', or :antonym. Also handle
- # splitting of compound pointers (e.g., :memberMeronym / '%m')
- # into their correct type/subtype parts.
- @type = @subtype = nil
- if type.to_s.length == 1
- @type = POINTER_SYMBOLS[ type[0,1] ]
+ # Synsets for the receiver's hypernyms (more-general terms). E.g.,
+ # $lexicon.lookup_synsets( "cudgel", :noun, 1 ).hypernyms
+ # ==> [#<WordNet::Synset:0x0109a644/3023321 club (noun): "stout
+ # stick that is larger at one end; "he carried a club in self
+ # defense"; "he felt as if he had been hit with a club""
+ # (derivations: 1, hypernyms: 1, hyponyms: 7)>]
+ #
+ # Also generates accessors for subtypes:
+ #
+ # [instance_hypernyms]
+ # A proper noun that refers to a particular, unique referent (as
+ # distinguished from nouns that refer to classes).
+ def_pointer_methods :hypernyms
+
+
+ # :TODO: Generate an example for this
+
+ # Get/set synsets for the receiver's hyponyms (more-specific terms). E.g.,
+ # $lexicon.lookup_synsets( "cudgel", :noun, 1 ).hyponyms
+ # ==> [...]
+ # [instance_hyponyms]
+ # The specific term used to designate a member of a class. X is a
+ # hyponym of Y if X is a (kind of) Y.
+ # Also generates accessors for subtypes:
+ #
+ # [instance_hyponyms]
+ # A proper noun that refers to a particular, unique referent (as
+ # distinguished from nouns that refer to classes).
+ def_pointer_methods :hyponyms
+
- elsif type.to_s.length == 2
- @type = POINTER_SYMBOLS[ type[0,1] ]
- raise "No known subtypes for '%s'" % [@type] unless
- POINTER_SUBTYPES.key?( @type )
- @subtype = POINTER_SUBTYPES[ @type ].index( type ) or
- raise "Unknown subtype '%s' for '%s'" %
- [ type, @type ]
+ # Get/set synsets for the receiver's meronyms. In addition to the
+ # general accessors for all meronyms, there are also accessors for
+ # subtypes as well:
+ #
+ # [member_meronyms]
+ # Get/set synsets for the receiver's "member" meronyms (HAS MEMBER
+ # relation).
+ # [stuff_meronyms]
+ # Get/set synsets for the receiver's "stuff" meronyms (IS MADE OUT OF
+ # relation).
+ # [portion_meronyms]
+ # Get/set synsets for the receiver's "portion" meronyms (HAS PORTION
+ # relation).
+ # [component_meronyms]
+ # Get/set synsets for the receiver's "component" meronyms (HAS
+ # COMPONENT relation).
+ # [feature_meronyms]
+ # Get/set synsets for the receiver's "feature" meronyms (HAS FEATURE
+ # relation).
+ # [phase_meronyms]
+ # Get/set synsets for the receiver's "phase" meronyms (HAS PHASE
+ # relation).
+ # [place_meronyms]
+ # Get/set synsets for the receiver's "place" meronyms (HAS PLACE
+ # relation).
+ def_pointer_methods :meronyms
- else
- if POINTER_TYPES.key?( type.to_sym )
- @type = type.to_sym
- elsif /([a-z]+)([A-Z][a-z]+)/ =~ type.to_s
- subtype, maintype = $1, $2.downcase
- @type = maintype.to_sym if
- POINTER_TYPES.key?( maintype.to_sym )
- @subtype = subtype.to_sym
- end
- end
+ # Get/set synsets for the receiver's holonyms. In addition to the
+ # general accessors for all holonyms, there are also accessors for
+ # subtypes as well:
+ #
+ # [member_holonyms]
+ # Get/set synsets for the receiver's "member" holonyms (IS A MEMBER OF
+ # relation).
+ # [stuff_holonyms]
+ # Get/set synsets for the receiver's "stuff" holonyms (IS MATERIAL OF
+ # relation).
+ # [portion_holonyms]
+ # Get/set synsets for the receiver's "portion" holonyms (IS A PORTION
+ # OF relation).
+ # [component_holonyms]
+ # Get/set synsets for the receiver's "component" holonyms (IS A
+ # COMPONENT OF relation).
+ # [feature_holonyms]
+ # Get/set synsets for the receiver's "feature" holonyms (IS A FEATURE
+ # OF relation).
+ # [phase_holonyms]
+ # Get/set synsets for the receiver's "phase" holonyms (IS A PHASE OF
+ # relation).
+ # [place_holonyms]
+ # Get/set synsets for the receiver's "place" holonyms (IS A PLACE IN
+ # relation).
+ def_pointer_methods :holonyms
+
+ # Get/set synsets for the receiver's topical domain members. In addition
+ # to the general members accessor, there are also accessors for
+ # membership subtypes:
+ #
+ # [category_members]
+ # Get/set synsets for the receiver's
+ # "category" topical domain members.
+ # [region_members]
+ # Get/set synsets for the receiver's "region"
+ # topical domain members.
+ # [usage_members]
+ # Get/set synsets for the receiver's "usage"
+ # topical domain members.
+ def_pointer_methods :members
- raise ArgumentError, "No such pointer type %p" % type if
- @type.nil?
+ # Get/set synsets for the receiver's topical domain domains. In addition
+ # to the general domains accessor, there are also accessors for
+ # domainship subtypes:
+ #
+ # [category_domains]
+ # Get/set synsets for the receiver's
+ # "category" topical domain domains.
+ # [region_domains]
+ # Get/set synsets for the receiver's "region"
+ # topical domain domains.
+ # [usage_domains]
+ # Get/set synsets for the receiver's "usage"
+ # topical domain domains.
+ def_pointer_methods :domains
+
+
+ ### Returns an Array of the coordinate sisters of the receiver.
+ def coordinates
+ self.hypernyms.collect {|syn| syn.hyponyms }.flatten
+ end
+
+
+ ### Return the name of the "lexicographer's file" associated with this
+ ### synset.
+ def lex_info
+ return LEXFILES[ self.filenum.to_i ]
+ end
+
- # Allow pos = 'n', 'noun', or :noun
- @part_of_speech = nil
- if pos.to_s.length == 1
- @part_of_speech = SYNTACTIC_SYMBOLS[ pos ]
- else
- @part_of_speech = pos.to_sym if
- SYNTACTIC_CATEGORIES.key?( pos.to_sym )
- end
- raise ArgumentError, "No such part of speech %p" % pos if
- @part_of_speech.nil?
+ ### Sets the "lexicographer's file" association for this synset to
+ ### +id+. The value in +id+ should correspond to one of the values in
+ ### #WordNet::LEXFILES
+ def lexInfo=( id )
+ raise ArgumentError, "Bad index: Lexinfo id must be within LEXFILES" unless
+ LEXFILES[id]
+ self.filenum = id
+ end
+
+
+ ### Returns an +Array+ of verb frame +String+s for the synset.
+ def frames
+ frarray = self.frameslist.split( WordNet::SUB_DELIM_RE )
+ verbFrames = []
- # Other attributes
- @offset = offset
- @source_wn = source_wn
- @target_wn = target_wn
+ frarray.each {|fr|
+ fnum, wnum = fr.split
+ if wnum > 0
+ wordtext = " (" + self.words[wnum] + ")"
+ verbFrames.push VERB_SENTS[ fnum ] + wordtext
+ else
+ verbFrames.push VERB_SENTS[ fnum ]
+ end
+ }
+
+ return verbFrames
+ end
+
+
+ ### Build a Proc to do recursive traversal of the specified +type+ of
+ ### relationship. It returns the synsets it traverses.
+ def build_traversal_func( type, include_origin=true )
+ func = Proc.new do |syn,depth|
+ depth ||= 0
+
+ # Flag to continue traversal
+ halt_flag = false
+
+ # Call the block if it exists and we're either past the origin or
+ # including it
+ if block_given? && (include_origin || depth.nonzero?)
+ res = yield( syn, depth )
+ halt_flag = true if res.is_a? TrueClass
end
-
- ######
- public
- ######
-
- # The type of the pointer. Will be one of the keys of
- # WordNet::POINTER_TYPES (e.g., :meronym).
- attr_accessor :type
-
- # The subtype of the pointer, if any. Will be one of the keys of one
- # of the hashes in POINTER_SUBTYPES (e.g., :portion).
- attr_accessor :subtype
-
- # The offset of the target synset
- attr_accessor :offset
-
- # The part-of-speech of the target synset. Will be one of the keys
- # of WordNet::SYNTACTIC_CATEGORIES.
- attr_accessor :part_of_speech
-
- # The word number in the source synset
- attr_accessor :source_wn
-
- # The word number in the target synset
- attr_accessor :target_wn
-
+ # Make an array for holding sub-synsets we see
+ sub_syns = []
+ sub_syns << syn unless depth.zero? && !include_origin
- ### Return the Pointer as a human-readable String suitable for
- ### debugging.
- def inspect
- "#<%s:0x%08x %s %s>" % [
- self.class.name,
- self.object_id,
- @subtype ? "#@type(#@subtype)" : @type,
- self.synset,
- ]
- end
-
-
- ### Return the synset key of the target synset (i.e.,
- ### <offset>%<pos symbol>).
- def synset
- self.offset + "%" + self.pos
- end
-
-
- ### Return the syntactic category symbol for this pointer
- def pos
- return SYNTACTIC_CATEGORIES[ @part_of_speech ]
- end
-
-
- ### Return the pointer type symbol for this pointer
- def type_symbol
- unless @subtype
- return POINTER_TYPES[ @type ]
- else
- return POINTER_SUBTYPES[ @type ][ @subtype ]
+ # Iterate over each synset returned by calling the pointer on the
+ # current syn. For each one, we call ourselves recursively, and
+ # break out of the iterator with a false value if the block has
+ # indicated we should abort by returning a false value.
+ unless halt_flag
+ syn.send( type ).each do |subsyn|
+ sub_sub_syns, halt_flag = func.call( subsyn, depth + 1 )
+ sub_syns += sub_sub_syns
+ break if halt_flag
end
end
-
- ### Comparison operator. Pointer are equivalent if they point at the
- ### same synset and are of the same type.
- def ==( other )
- return false unless other.is_a?( self.class )
- other.offset == self.offset &&
- other.type == self.type
- end
-
+ # return
+ [ sub_syns, halt_flag ]
+ end
+
+ return func
+ end
+
- ### Return the pointer in its stringified form.
- def to_s
- "%s %d%%%s %02x%02x" % [
- ptr.type_symbol,
- ptr.offset,
- ptr.posSymbol,
- ptr.source_wn,
- ptr.target_wn,
- ]
- end
- end # class Pointer
+ ### Traversal iterator: Iterates depth-first over a particular
+ ### +type+ of the receiver, and all of the pointed-to synset's
+ ### pointers. If called with a block, the block is called once for each
+ ### synset with the +foundSyn+ and its +depth+ in relation to the
+ ### originating synset as arguments. The first call will be the
+ ### originating synset with a depth of +0+ unless +include_origin+ is
+ ### +false+. If the +callback+ returns +true+, the traversal is halted,
+ ### and the method returns immediately. This method returns an Array of
+ ### the synsets which were traversed if no block is given, or a flag
+ ### which indicates whether or not the traversal was interrupted if a
+ ### block is given.
+ def traverse( type, include_origin=true )
+ raise ArgumentError, "Illegal parameter 1: Must be either a String or a Symbol" unless
+ type.kind_of?( String ) || type.kind_of?( Symbol )
+
+ raise ArgumentError, "Synset doesn't support the #{type.to_s} pointer type." unless
+ self.respond_to?( type )
+
+ traversal_func = nil
+
+ # Call the iterator
+ traversal_func = self.build_traversal_func( type, include_origin )
+ traversed_sets, halt_flag = traversal_func.call( self )
+
+ # If a block was given, just return whether or not the block was halted.
+ if block_given?
+ return halt_flag
+
+ # If no block was given, return the traversed synsets
+ else
+ return traversed_sets
+ end
+ end
- #############################################################
- ### C L A S S M E T H O D S
- #############################################################
+ ### Returns the distance in pointers between the receiver and +otherSynset+
+ ### using +type+ as the search path.
+ def distance( type, otherSynset )
+ dist = nil
+ self.traverse( type ) {|syn,depth|
+ if syn == otherSynset
+ dist = depth
+ true
+ end
+ }
- ### Define a group of pointer methods based on +symbol+ that will fetch,
- ### add, and delete pointer synsets of the type indicated. If no pointer
- ### type corresponding to the given +symbol+ is found, a variant without
- ### a trailing 's' is tried (e.g., 'def_pointer_methods :antonyms' will
- ### create methods called #antonyms and #antonyms=, but will fetch
- ### pointers of type :antonym). If the pointer type has subtypes
- ### (according to WordNet::POINTER_SUBTYPES), accessors/mutators for the
- ### subtypes will be generated as well.
- def self::def_pointer_methods( symbol ) # :nodoc:
- name = symbol.to_s
- casename = name.dup
- casename[ 0,1 ] = casename[ 0,1 ].upcase
- type = nil
- $stderr.puts '-' * 50,
- ">>> defining pointer methods for %p" % [symbol] if $DEBUG
+ return dist
+ end
- if POINTER_TYPES.key?( symbol )
- type = symbol
- elsif POINTER_TYPES.key?( symbol.to_s.sub(/s$/, '').to_sym )
- type = symbol.to_s.sub(/s$/, '').to_sym
- else
- raise ArgumentError, "Unknown pointer type %p" % symbol
- end
- # Define the accessor
- $stderr.puts "Defining accessors for %p" % [ type ] if $DEBUG
- define_method( name.to_sym ) { self.fetch_synset_pointers(type) }
- define_method( "#{name}=".to_sym ) do |*synsets|
- self.set_synset_pointers( type, synsets, nil )
- end
+ ### Recursively searches all of the receiver's pointers of the specified
+ ### +type+ for +otherSynset+, returning +true+ if it is found.
+ def search( type, otherSynset )
+ self.traverse( type ) {|syn,depth|
+ syn == otherSynset
+ }
+ end
- # If the pointer is one that has subtypes, make the variants list
- # out of the subtypes. If it doesn't have subtypes, make the only
- # variant nil, which will cause the mutators to be defined for the
- # main pointer type.
- if POINTER_SUBTYPES.key?( type )
- variants = POINTER_SUBTYPES[ type ].keys
- else
- variants = [nil]
- end
- # Define a set of methods for each variant, or for the main method
- # if the variant is nil.
- variants.each do |subtype|
- varname = subtype ? [subtype, name].join('_') : name
+ ### Union: Return the least general synset that the receiver and
+ ### +otherSynset+ have in common as a hypernym, or nil if it doesn't share
+ ### any.
+ def |( otherSyn )
- unless subtype.nil?
- $stderr.puts "Defining reader for #{varname}" if $DEBUG
- define_method( varname ) do
- self.fetch_synset_pointers( type, subtype )
- end
- else
- $stderr.puts "No subtype for %s (subtype = %p)" %
- [ varname, subtype ] if $DEBUG
- end
+ # Find all of this syn's hypernyms
+ hyper_syns = self.traverse( :hypernyms )
+ common_syn = nil
- $stderr.puts "Defining mutator for #{varname}" if $DEBUG
- define_method( "#{varname}=" ) do |*synsets|
- self.set_synset_pointers( type, synsets, subtype )
- end
+ # Now traverse the other synset's hypernyms looking for one of our
+ # own hypernyms.
+ otherSyn.traverse( :hypernyms ) do |syn,depth|
+ if hyper_syns.include?( syn )
+ common_syn = syn
+ break true
end
end
+ return common_syn
+ end
- #############################################################
- ### I N S T A N C E M E T H O D S
- #############################################################
- ### Create a new Synset object in the specified +lexicon+ for the
- ### specified +word+ and +part_of_speech+. If +data+ is specified,
- ### initialize the synset's other object data from it. This method
- ### shouldn't be called directly: you should use one of the Lexicon
- ### class's factory methods: #create_synset, #lookup_synsets, or
- ### #lookup_synsetsByOffset.
- def initialize( lexicon, offset, pos, word=nil, data=nil )
- @lexicon = lexicon or
- raise ArgumentError, "%p is not a WordNet::Lexicon" % lexicon
- @part_of_speech = SYNTACTIC_SYMBOLS[ pos ] or
- raise ArgumentError, "No such part of speech %p" % pos
- @mutex = Sync::new
- @pointers = []
+ ### Returns the pointers in this synset's pointerlist as an +Array+
+ def pointers
+ @pointers ||= @pointerlist.split(SUB_DELIM_RE).collect {|pstr|
+ Pointer.parse( pstr )
+ }
- if data
- @offset = offset.to_i
- @filenum, @wordlist, @pointerlist,
- @frameslist, @gloss = data.split( DELIM_RE )
- else
- @offset = 1
- @wordlist = word ? word : ''
- @filenum, @pointerlist, @frameslist, @gloss = [''] * 4
- end
- end
+ return @pointers
+ end
- ######
- public
- ######
-
- # The WordNet::Lexicon that was used to look up this synset
- attr_reader :lexicon
-
- # The syntactic category of this Synset. Will be one of "n" (noun), "v"
- # (verb), "a" (adjective), "r" (adverb), or "s" (other).
- attr_accessor :part_of_speech
-
- # The original byte offset of the synset in the data file; acts as the
- # unique identifier (when combined with #part_of_speech) of this Synset in
- # the database.
- attr_accessor :offset
-
- # The number corresponding to the lexicographer file name containing the
- # synset. Calling #lexInfo will return the actual filename. See the
- # "System Description" of wngloss(7WN) for more info about this.
- attr_accessor :filenum
-
- # The raw list of word/lex_id pairs associated with this synset. Each
- # word and lex_id is separated by a '%' character, and each pair is
- # delimited with a '|'. E.g., the wordlist for "animal" is:
- # "animal%0|animate_being%0|beast%0|brute%1|creature%0|fauna%1"
- attr_accessor :wordlist
-
- # The list of raw pointers to related synsets. E.g., the pointerlist for
- # "mourning dove" is:
- # "@ 01731700%n 0000|#m 01733452%n 0000"
- attr_accessor :pointerlist
-
- # The list of raw verb sentence frames for this synset.
- attr_accessor :frameslist
-
- # Definition and/or example sentences for the Synset.
- attr_accessor :gloss
+ ### Set the pointers in this synset's pointerlist to +new_pointers+
+ def pointers=( *new_pointers )
+ @pointerlist = new_pointers.collect {|ptr| ptr.to_s}.join( SUB_DELIM )
+ @pointers = new_pointers
+ end
- ### Return a human-readable representation of the Synset suitable for
- ### debugging.
- def inspect
- pointer_counts = self.pointer_map.collect {|type,ptrs|
- "#{type}s: #{ptrs.length}"
- }.join( ", " )
-
- %q{#<%s:0x%08x/%s %s (%s): "%s" (%s)>} % [
- self.class.name,
- self.object_id * 2,
- self.offset,
- self.words.join(", "),
- self.part_of_speech,
- self.gloss,
- pointer_counts,
- ]
- end
-
-
- ### Returns the Synset's unique identifier, made up of its offset and
- ### syntactic category catenated together with a '%' symbol.
- def key
- "%d%%%s" % [ self.offset, self.pos ]
+ ### Returns the synset's pointers in a Hash keyed by their type.
+ def pointer_map
+ return self.pointers.inject( {} ) do |hsh,ptr|
+ hsh[ ptr.type ] ||= []
+ hsh[ ptr.type ] << ptr
+ hsh
end
-
-
- ### The symbol which represents this synset's syntactic category. Will
- ### be one of :noun, :verb, :adjective, :adverb, or :other.
- def pos
- return SYNTACTIC_CATEGORIES[ @part_of_speech ]
- end
-
-
- ### Return each of the sentences of the gloss for this synset as an
- ### array. The gloss is a definition of the synset, and optionally one
- ### or more example sentences.
- def glosses
- return self.gloss.split( /\s*;\s*/ )
- end
-
-
- ### Returns true if the receiver and otherSyn are identical according to
- ### their offsets.
- def ==( otherSyn )
- return false unless otherSyn.kind_of?( WordNet::Synset )
- return self.offset == otherSyn.offset
- end
+ end
- ### Returns an Array of words and/or collocations associated with this
- ### synset.
- def words
- @mutex.synchronize( Sync::SH ) {
- self.wordlist.split( SUB_DELIM_RE ).collect do |word|
- word.gsub( /_/, ' ' ).sub( /%.*$/, '' )
- end
- }
- end
- alias_method :synonyms, :words
-
-
- ### Set the words in this synset's wordlist to +newWords+
- def words=( *newWords )
- @mutex.synchronize( Sync::EX ) {
- @wordlist = newWords.join( SUB_DELIM )
- }
- end
-
-
- ### Add the specified +newWords+ to this synset's wordlist. Alias:
- ### +add_words+.
- def add_words( *newWords )
- @mutex.synchronize( Sync::EX ) {
- self.words |= newWords
- }
- end
-
-
- ### Delete the specified +oldWords+ from this synset's wordlist. Alias:
- ### +delete_words+.
- def delete_words( *oldWords )
- @mutex.synchronize( Sync::EX ) {
- self.words -= oldWords
- }
- end
-
+ #########
+ protected
+ #########
- ### Return the synset as a string. Alias: +overview+.
- def to_s
- @mutex.synchronize( Sync::SH ) {
- wordlist = self.words.join(", ").gsub( /%\d/, '' ).gsub( /_/, ' ' )
- return "#{wordlist} [#{self.part_of_speech}] -- (#{self.gloss})"
- }
- end
- alias_method :overview, :to_s
-
-
- ### Writes any changes made to the object to the database and updates all
- ### affected synset data and indexes. If the object passes out of scope
- ### before #write is called, the changes are lost.
- def store
- @mutex.synchronize( Sync::EX ) {
- self.lexicon.store_synset( self )
- }
- end
- alias_method :write, :store
-
+ ### Returns an Array of synset objects for the receiver's pointers of the
+ ### specified +type+.
+ def fetch_synset_pointers( type, subtype=nil )
- ### Removes this synset from the database.
- def remove
- @mutex.synchronize( Sync::EX ) {
- self.lexicon.remove_synset( self )
- }
- end
-
-
- ### Returns the synset's data in a form suitable for storage in the
- ### lexicon's database.
- def serialize
- @mutex.synchronize( Sync::SH ) {
- return [
- @filenum,
- @wordlist,
- @pointerlist,
- @frameslist,
- @gloss
- ].join( WordNet::DELIM )
- }
+ # Iterate over this synset's pointers, looking for ones that match
+ # the type we're after.
+ pointers = self.pointers.
+ find_all do |ptr|
+ ptr.type == type and
+ subtype.nil? || ptr.subtype == subtype
end
-
- ### Auto-generate synset pointer methods for the various types
-
- # The synsets for the receiver's antonyms (opposites). E.g.,
- # $lexicon.lookup_synsets( "opaque", :adjective, 1 ).antonyms
- # ==> [#<WordNet::Synset:0x010a9acc/454927 clear (adjective): "free
- # from cloudiness; allowing light to pass through; "clear water";
- # "clear plastic bags"; "clear glass"; "the air is clear and
- # clean"" (similar_tos: 6, attributes: 1, derivations: 2,
- # antonyms: 1, see_alsos: 1)>]
- def_pointer_methods :antonyms
-
- # Synsets for the receiver's entailments (a verb X entails Y if X cannot
- # be done unless Y is or has been done). E.g.,
- # $lexicon.lookup_synsets( 'rasp', :verb, 1 ).entailment
- # ==> [#<WordNet::Synset:0x010dc24c rub (verb): "move over something
- # with pressure; "rub my hands"; "rub oil into her skin""
- # (derivations: 2, entailments: 1, hypernyms: 1, hyponyms: 13,
- # see_alsos: 4)>]
- def_pointer_methods :entailment
-
- # Get/set synsets for the receiver's cause pointers (a verb X causes Y
- # to happen).
- def_pointer_methods :causes
-
- # Get/set synsets for the receiver's verb groups. Verb groups link verbs
- # with similar senses together.
- def_pointer_methods :verb_groups
-
- # Get/set list of synsets for the receiver's "similar to" pointers. This
- # type of pointer links together head adjective synsets with its
- # satellite adjective synsets.
- def_pointer_methods :similar_to
-
- # Get/set synsets for the receiver's participles. Participles are
- # non-finite forms of a verb; used adjectivally and to form compound
- # tenses. For example, the first participle for "working" is:
- # "function, work, operate, go, run (verb)"
- def_pointer_methods :participles
-
- # Get/set synsets for the receiver's pertainyms. Pertainyms are
- # relational adjectives. Adjectives that are pertainyms are usually
- # defined by such phrases as "of or pertaining to" and do not have
- # antonyms. A pertainym can point to a noun or another pertainym.
- def_pointer_methods :pertainyms
-
- # Get/set synsets for the receiver's attributes.
- def_pointer_methods :attributes
-
- # Get/set synsets for the receiver's derived_from.
- def_pointer_methods :derived_from
-
- # Get/set synsets for the receiver's derivations.
- def_pointer_methods :derivations
-
- # Get/set synsets for the receiver's see_also.
- def_pointer_methods :see_also
-
-
- # Auto-generate types with subtypes
-
- # Synsets for the receiver's hypernyms (more-general terms). E.g.,
- # $lexicon.lookup_synsets( "cudgel", :noun, 1 ).hypernyms
- # ==> [#<WordNet::Synset:0x0109a644/3023321 club (noun): "stout
- # stick that is larger at one end; "he carried a club in self
- # defense"; "he felt as if he had been hit with a club""
- # (derivations: 1, hypernyms: 1, hyponyms: 7)>]
- #
- # Also generates accessors for subtypes:
- #
- # [instance_hypernyms]
- # A proper noun that refers to a particular, unique referent (as
- # distinguished from nouns that refer to classes).
- def_pointer_methods :hypernyms
-
-
- # :TODO: Generate an example for this
-
- # Get/set synsets for the receiver's hyponyms (more-specific terms). E.g.,
- # $lexicon.lookup_synsets( "cudgel", :noun, 1 ).hyponyms
- # ==> [...]
- # [instance_hyponyms]
- # The specific term used to designate a member of a class. X is a
- # hyponym of Y if X is a (kind of) Y.
- # Also generates accessors for subtypes:
- #
- # [instance_hyponyms]
- # A proper noun that refers to a particular, unique referent (as
- # distinguished from nouns that refer to classes).
- def_pointer_methods :hyponyms
+ #
+ return pointers.
+ collect {|ptr| ptr.synset }.
+ collect {|key| @lexicon.lookup_synsets_by_key( key )}.flatten
+ end
- # Get/set synsets for the receiver's meronyms. In addition to the
- # general accessors for all meronyms, there are also accessors for
- # subtypes as well:
- #
- # [member_meronyms]
- # Get/set synsets for the receiver's "member" meronyms (HAS MEMBER
- # relation).
- # [stuff_meronyms]
- # Get/set synsets for the receiver's "stuff" meronyms (IS MADE OUT OF
- # relation).
- # [portion_meronyms]
- # Get/set synsets for the receiver's "portion" meronyms (HAS PORTION
- # relation).
- # [component_meronyms]
- # Get/set synsets for the receiver's "component" meronyms (HAS
- # COMPONENT relation).
- # [feature_meronyms]
- # Get/set synsets for the receiver's "feature" meronyms (HAS FEATURE
- # relation).
- # [phase_meronyms]
- # Get/set synsets for the receiver's "phase" meronyms (HAS PHASE
- # relation).
- # [place_meronyms]
- # Get/set synsets for the receiver's "place" meronyms (HAS PLACE
- # relation).
- def_pointer_methods :meronyms
-
- # Get/set synsets for the receiver's holonyms. In addition to the
- # general accessors for all holonyms, there are also accessors for
- # subtypes as well:
- #
- # [member_holonyms]
- # Get/set synsets for the receiver's "member" holonyms (IS A MEMBER OF
- # relation).
- # [stuff_holonyms]
- # Get/set synsets for the receiver's "stuff" holonyms (IS MATERIAL OF
- # relation).
- # [portion_holonyms]
- # Get/set synsets for the receiver's "portion" holonyms (IS A PORTION
- # OF relation).
- # [component_holonyms]
- # Get/set synsets for the receiver's "component" holonyms (IS A
- # COMPONENT OF relation).
- # [feature_holonyms]
- # Get/set synsets for the receiver's "feature" holonyms (IS A FEATURE
- # OF relation).
- # [phase_holonyms]
- # Get/set synsets for the receiver's "phase" holonyms (IS A PHASE OF
- # relation).
- # [place_holonyms]
- # Get/set synsets for the receiver's "place" holonyms (IS A PLACE IN
- # relation).
- def_pointer_methods :holonyms
-
- # Get/set synsets for the receiver's topical domain members. In addition
- # to the general members accessor, there are also accessors for
- # membership subtypes:
- #
- # [category_members]
- # Get/set synsets for the receiver's
- # "category" topical domain members.
- # [region_members]
- # Get/set synsets for the receiver's "region"
- # topical domain members.
- # [usage_members]
- # Get/set synsets for the receiver's "usage"
- # topical domain members.
- def_pointer_methods :members
-
- # Get/set synsets for the receiver's topical domain domains. In addition
- # to the general domains accessor, there are also accessors for
- # domainship subtypes:
- #
- # [category_domains]
- # Get/set synsets for the receiver's
- # "category" topical domain domains.
- # [region_domains]
- # Get/set synsets for the receiver's "region"
- # topical domain domains.
- # [usage_domains]
- # Get/set synsets for the receiver's "usage"
- # topical domain domains.
- def_pointer_methods :domains
-
-
- ### Returns an Array of the coordinate sisters of the receiver.
- def coordinates
- self.hypernyms.collect {|syn|
- syn.hyponyms
- }.flatten
- end
-
-
- ### Return the name of the "lexicographer's file" associated with this
- ### synset.
- def lex_info
- @mutex.synchronize( Sync::SH ) {
- return LEXFILES[ self.filenum.to_i ]
- }
- end
-
-
- ### Sets the "lexicographer's file" association for this synset to
- ### +id+. The value in +id+ should correspond to one of the values in
- ### #WordNet::LEXFILES
- def lexInfo=( id )
- raise ArgumentError, "Bad index: Lexinfo id must be within LEXFILES" unless
- LEXFILES[id]
- @mutex.synchronize( Sync::EX ) {
- self.filenum = id
- }
- end
-
-
- ### Returns an +Array+ of verb frame +String+s for the synset.
- def frames
- frarray = self.frameslist.split( WordNet::SUB_DELIM_RE )
- verbFrames = []
-
- @mutex.synchronize( Sync::SH ) {
- frarray.each {|fr|
- fnum, wnum = fr.split
- if wnum > 0
- wordtext = " (" + self.words[wnum] + ")"
- verbFrames.push VERB_SENTS[ fnum ] + wordtext
- else
- verbFrames.push VERB_SENTS[ fnum ]
- end
- }
- }
-
- return verbFrames
- end
+ ### Sets the receiver's synset pointers for the specified +type+ to
+ ### the specified +synsets+.
+ def set_synset_pointers( type, synsets, subtype=nil )
+ synsets = [ synsets ] unless synsets.is_a?( Array )
+ pmap = self.pointer_map
+ pmap[ type ] = synsets
+ self.pointers = pmap.values
+ end
- ### Traversal iterator: Iterates depth-first over a particular
- ### +type+ of the receiver, and all of the pointed-to synset's
- ### pointers. If called with a block, the block is called once for each
- ### synset with the +foundSyn+ and its +depth+ in relation to the
- ### originating synset as arguments. The first call will be the
- ### originating synset with a depth of +0+ unless +includeOrigin+ is
- ### +false+. If the +callback+ returns +true+, the traversal is halted,
- ### and the method returns immediately. This method returns an Array of
- ### the synsets which were traversed if no block is given, or a flag
- ### which indicates whether or not the traversal was interrupted if a
- ### block is given.
- def traverse( type, includeOrigin=true )
- raise ArgumentError, "Illegal parameter 1: Must be either a String or a Symbol" unless
- type.kind_of?( String ) || type.kind_of?( Symbol )
-
- raise ArgumentError, "Synset doesn't support the #{type.to_s} pointer type." unless
- self.respond_to?( type )
-
- foundSyns = []
- depth = 0
- traversalFunc = nil
-
- # Build a traversal function which we can call recursively. It'll return
- # the synsets it traverses.
- traversalFunc = Proc.new {|syn,newDepth|
-
- # Flag to continue traversal
- haltFlag = false
-
- # Call the block if it exists and we're either past the origin or
- # including it
- if block_given? && (newDepth > 0 || includeOrigin)
- res = yield( syn, newDepth )
- haltFlag = true if res.is_a? TrueClass
- end
-
- # Make an array for holding sub-synsets we see
- subSyns = []
- subSyns.push( syn ) unless newDepth == 0 && !includeOrigin
-
- # Iterate over each synset returned by calling the pointer on the
- # current syn. For each one, we call ourselves recursively, and
- # break out of the iterator with a false value if the block has
- # indicated we should abort by returning a false value.
- unless haltFlag
- syn.send( type ).each {|subSyn|
- subSubSyns, haltFlag = traversalFunc.call( subSyn, newDepth + 1 )
- subSyns.push( *subSubSyns ) unless subSubSyns.empty?
- break if haltFlag
- }
- end
-
- # return
- [ subSyns, haltFlag ]
- }
-
- # Call the iterator
- traversedSets, haltFlag = traversalFunc.call( self, depth )
-
- # If a block was given, just return whether or not the block was halted.
- if block_given?
- return haltFlag
-
- # If no block was given, return the traversed synsets
- else
- return traversedSets
- end
- end
-
-
- ### Returns the distance in pointers between the receiver and +otherSynset+
- ### using +type+ as the search path.
- def distance( type, otherSynset )
- dist = nil
- self.traverse( type ) {|syn,depth|
- if syn == otherSynset
- dist = depth
- true
- end
- }
-
- return dist
- end
-
-
- ### Recursively searches all of the receiver's pointers of the specified
- ### +type+ for +otherSynset+, returning +true+ if it is found.
- def search( type, otherSynset )
- self.traverse( type ) {|syn,depth|
- syn == otherSynset
- }
- end
-
+end # class WordNet::Synset
- ### Union: Return the least general synset that the receiver and
- ### +otherSynset+ have in common as a hypernym, or nil if it doesn't share
- ### any.
- def |( otherSyn )
-
- # Find all of this syn's hypernyms
- hyperSyns = self.traverse( :hypernyms )
- commonSyn = nil
-
- # Now traverse the other synset's hypernyms looking for one of our
- # own hypernyms.
- otherSyn.traverse( :hypernyms ) {|syn,depth|
- if hyperSyns.include?( syn )
- commonSyn = syn
- true
- end
- }
-
- return commonSyn
- end
-
-
- ### Returns the pointers in this synset's pointerlist as an +Array+
- def pointers
- @mutex.synchronize( Sync::SH ) {
- @mutex.synchronize( Sync::EX ) {
- @pointers = @pointerlist.split(SUB_DELIM_RE).collect {|pstr|
- Pointer::parse( pstr )
- }
- } if @pointers.empty?
- @pointers
- }
- end
-
-
- ### Set the pointers in this synset's pointerlist to +newPointers+
- def pointers=( *newPointers )
- @mutex.synchronize( Sync::EX ) {
- @pointerlist = newPointers.collect {|ptr| ptr.to_s}.join( SUB_DELIM )
- @pointers = newPointers
- }
- end
-
-
- ### Returns the synset's pointers in a Hash keyed by their type.
- def pointer_map
- return self.pointers.inject( {} ) do |hsh,ptr|
- hsh[ ptr.type ] ||= []
- hsh[ ptr.type ] << ptr
- hsh
- end
- end
-
-
-
- #########
- protected
- #########
-
- ### Returns an Array of synset objects for the receiver's pointers of the
- ### specified +type+.
- def fetch_synset_pointers( type, subtype=nil )
- synsets = nil
-
- # Iterate over this synset's pointers, looking for ones that match
- # the type we're after. When we find one, we extract its offset and
- # use that to look it up.
- @mutex.synchronize( Sync::SH ) do
- synsets = self.pointers.
- find_all {|ptr|
- ptr.type == type and
- subtype.nil? || ptr.subtype == subtype
- }.
- collect {|ptr| ptr.synset }.
- collect {|key| @lexicon.lookup_synsets_by_key( key )}
- end
-
- return synsets.flatten
- end
-
-
- ### Sets the receiver's synset pointers for the specified +type+ to
- ### the specified +synsets+.
- def set_synset_pointers( type, synsets, subtype=nil )
- synsets = [ synsets ] unless synsets.is_a?( Array )
- pmap = self.pointer_map
- pmap[ type ] = synsets
- self.pointers = pmap.values
- end
-
-
- end # class Synset
- end # module WordNet
-
A => lib/wordnet/synset_pointer.rb +218 -0
@@ 0,0 1,218 @@
+#!/usr/bin/ruby
+#
+
+require 'wordnet'
+require 'wordnet/constants'
+require 'wordnet/synset'
+
+
+# WordNet synonym-set pointer class -- the "pointer" type that encapsulates
+# relationships between one synset and another.
+#
+# == Authors
+#
+# * Michael Granger <ged@FaerieMUD.org>
+#
+# Copyright (c) 2002-2008 The FaerieMUD Consortium. All rights reserved.
+#
+# This module is free software. You may use, modify, and/or redistribute this
+# software under the terms of the Perl Artistic License. (See
+# http://language.perl.com/misc/Artistic.html)
+#
+# Much of this code was inspired by/ported from the Lingua::Wordnet Perl module
+# by Dan Brian.
+#
+# == Version
+#
+# $Id$
+#
+class Pointer
+ include WordNet::Constants
+
+
+ #########################################################
+ ### C L A S S M E T H O D S
+ #########################################################
+
+ ### Make an Array of WordNet::Synset::Pointer objects out of the
+ ### given +pointer_string+. The pointer_string is a string of pointers
+ ### delimited by WordNet::Constants::SUB_DELIM. Pointers are in the form:
+ ### "<pointer_symbol> <synset_offset>%<pos> <source/target>"
+ def self::parse( pointer_string )
+ type, offset_pos, ptr_nums = pointer_string.split(/\s+/)
+ offset, pos = offset_pos.split( /%/, 2 )
+ return new( type, offset, pos, ptr_nums[0,2], ptr_nums[2,2] )
+ end
+
+
+ #########################################################
+ ### I N S T A N C E M E T H O D S
+ #########################################################
+
+ ### Create a new synset pointer with the given arguments. The
+ ### +type+ is the type of the link between synsets, and must be
+ ### either a key or a value of WordNet::Constants::POINTER_TYPES. The
+ ### +offset+ is the unique identifier of the target synset, and
+ ### +pos+ is its part-of-speech, which must be either a key or value
+ ### of WordNet::Constants::SYNTACTIC_CATEGORIES. The +source_wn+ and
+ ### +target_wn+ are numerical values which distinguish lexical and
+ ### semantic pointers. +source_wn+ indicates the word number in the
+ ### current (source) synset, and +target_wn+ indicates the word
+ ### number in the target synset. If both are 0 (the default) it
+ ### means that the pointer type of the pointer represents a semantic
+ ### relation between the current (source) synset and the target
+ ### synset indicated by +offset+.
+ def initialize( type, offset, pos=Noun, source_wn=0, target_wn=0 )
+ @type = @subtype = nil
+
+ @type, @subtype = self.normalize_type( type )
+ @part_of_speech = self.normalize_part_of_speech( pos )
+
+ # Other attributes
+ @offset = offset
+ @source_wn = source_wn
+ @target_wn = target_wn
+ end
+
+
+ ######
+ public
+ ######
+
+ # The type of the pointer. Will be one of the keys of
+ # WordNet::POINTER_TYPES (e.g., :meronym).
+ attr_accessor :type
+
+ # The subtype of the pointer, if any. Will be one of the keys of one
+ # of the hashes in POINTER_SUBTYPES (e.g., :portion).
+ attr_accessor :subtype
+
+ # The offset of the target synset
+ attr_accessor :offset
+
+ # The part-of-speech of the target synset. Will be one of the keys
+ # of WordNet::SYNTACTIC_CATEGORIES.
+ attr_accessor :part_of_speech
+
+ # The word number in the source synset
+ attr_accessor :source_wn
+
+ # The word number in the target synset
+ attr_accessor :target_wn
+
+
+ ### Return the Pointer as a human-readable String suitable for
+ ### debugging.
+ def inspect
+ "#<%s:0x%08x %s %s>" % [
+ self.class.name,
+ self.object_id,
+ @subtype ? "#@type(#@subtype)" : @type,
+ self.synset,
+ ]
+ end
+
+
+ ### Return the synset key of the target synset (i.e.,
+ ### <offset>%<pos symbol>).
+ def synset
+ self.offset + "%" + self.pos
+ end
+
+
+ ### Return the syntactic category symbol for this pointer
+ def pos
+ return SYNTACTIC_CATEGORIES[ @part_of_speech ]
+ end
+
+
+ ### Return the pointer type symbol for this pointer
+ def type_symbol
+ unless @subtype
+ return POINTER_TYPES[ @type ]
+ else
+ return POINTER_SUBTYPES[ @type ][ @subtype ]
+ end
+ end
+
+
+ ### Comparison operator. Pointer are equivalent if they point at the
+ ### same synset and are of the same type.
+ def ==( other )
+ return false unless other.is_a?( self.class )
+ other.offset == self.offset &&
+ other.type == self.type
+ end
+
+
+ ### Return the pointer in its stringified form.
+ def to_s
+ "%s %d%%%s %02x%02x" % [
+ ptr.type_symbol,
+ ptr.offset,
+ ptr.posSymbol,
+ ptr.source_wn,
+ ptr.target_wn,
+ ]
+ end
+
+
+ #########
+ protected
+ #########
+
+ ### Given a type description, normalize it into one of the WordNet pointer types (and
+ ### subtype, if applicable)
+ def normalize_type( typedesc )
+ type = subtype = nil
+
+ # Allow type = '!', 'antonym', or :antonym. Also handle
+ # splitting of compound pointers (e.g., :member_meronym / '%m')
+ # into their correct type/subtype parts.
+ case typedesc.to_s.length
+ when 1
+ type = POINTER_SYMBOLS[ typedesc.to_s[0,1] ]
+
+ when 2
+ type = POINTER_SYMBOLS[ typedesc.to_s[0,1] ]
+ raise "No known subtypes for '%s'" % [@type] unless
+ POINTER_SUBTYPES.key?( type )
+
+ subtype = POINTER_SUBTYPES[ type ].index( typedesc ) or
+ raise "Unknown subtype '%s' for '%s'" % [ typedesc, @type ]
+
+ else
+ if POINTER_TYPES.key?( typedesc.to_sym )
+ type = typedesc.to_sym
+
+ elsif /([a-z]+)([A-Z][a-z]+)/ =~ typedesc.to_s
+ subtype, maintype = $1, $2.downcase
+
+ type = maintype.to_sym if
+ POINTER_TYPES.key?( maintype.to_sym )
+
+ subtype = subtype.to_sym
+ end
+ end
+
+ raise ArgumentError, "No such pointer type %p" % [ typedesc ] if type.nil?
+
+ return type, subtype
+ end
+
+
+ ### Given a part of speech description, normalize it into one of the WordNet parts of speech
+ ### types.
+ def normalize_part_of_speech( pos )
+ if pos.to_s.length == 1
+ return SYNTACTIC_SYMBOLS[ pos ]
+ elsif SYNTACTIC_CATEGORIES.key?( pos.to_sym )
+ return pos.to_sym
+ end
+
+ raise ArgumentError, "No such part of speech %p" % [ pos ]
+ end
+
+end # class WordNet::Pointer
+
+
A => spec/wordnet/synset_pointer_spec.rb +38 -0
@@ 0,0 1,38 @@
+#!/usr/bin/env ruby
+
+BEGIN {
+ require 'pathname'
+ basedir = Pathname.new( __FILE__ ).dirname.parent.parent
+
+ libdir = basedir + 'lib'
+
+ $LOAD_PATH.unshift( libdir ) unless $LOAD_PATH.include?( libdir )
+}
+
+begin
+ require 'fileutils'
+ require 'tmpdir'
+ require 'bdb'
+ require 'spec/runner'
+ require 'spec/lib/helpers'
+
+ require 'wordnet/lexicon'
+ require 'wordnet/synset'
+rescue LoadError
+ unless Object.const_defined?( :Gem )
+ require 'rubygems'
+ retry
+ end
+ raise
+end
+
+
+#####################################################################
+### C O N T E X T S
+#####################################################################
+
+describe WordNet::Synset do
+
+end
+
+
M spec/wordnet/synset_spec.rb +104 -184
@@ 33,17 33,31 @@ end
describe WordNet::Synset do
- Accessors = [
- :part_of_speech,
- :offset,
- :filenum,
- :wordlist,
- :pointerlist,
- :frameslist,
- :gloss,
- ]
+ TEST_SYNSET_OFFSET = 6172789
- RelationMethods = [
+ TEST_SYNSET_POS = :noun
+
+ TEST_SYNSET_DATA = "09||linguistics%0||@ 05999797%n 0000|#p 06142861%n 0000|+ " +
+ "02843218%a 0101|+ 10264437%n 0101|-c 00111415%a 0000|-c 00111856%a 0000|-c 00120252%a " +
+ "0000|-c 00120411%a 0000|-c 00201802%a 0000|-c 00699651%a 0000|-c 00699876%a 0000|-c " +
+ "00819852%a 0000|-c 00820219%a 0000|-c 00820458%a 0000|-c 00820721%a 0000|-c 00820975%a " +
+ "0000|-c 00821208%a 0000|-c 01973823%a 0000|-c 02297664%a 0000|-c 02297966%a 0000|-c " +
+ "02298285%a 0000|-c 02298642%a 0000|-c 02298766%a 0000|-c 02478052%a 0000|-c 02482790%a " +
+ "0000|-c 02593124%a 0000|-c 02593578%a 0000|-c 02836479%a 0000|-c 02856124%a 0000|-c " +
+ "02993853%a 0000|-c 03041636%a 0000|-c 03045196%a 0000|-c 03102278%a 0000|-c 03129490%a " +
+ "0000|-c 00098051%n 0000|-c 04986883%n 0000|-c 05087664%n 0000|-c 05153897%n 0000|-c " +
+ "05850212%n 0000|~ 06168552%n 0000|~ 06168703%n 0000|~ 06168855%n 0000|~ 06169050%n 0000|-c " +
+ "06174404%n 0000|-c 06175829%n 0000|-c 06175967%n 0000|-c 06176107%n 0000|-c 06176322%n " +
+ "0000|-c 06176519%n 0000|-c 06177450%n 0000|~ 06179290%n 0000|~ 06179492%n 0000|~ 06179792%n " +
+ "0000|~ 06181123%n 0000|~ 06181284%n 0000|~ 06181448%n 0000|~ 06181584%n 0000|~ 06181893%n " +
+ "0000|-c 06249910%n 0000|-c 06250444%n 0000|-c 06290051%n 0000|-c 06290637%n 0000|-c " +
+ "06300193%n 0000|-c 06331803%n 0000|-c 06483702%n 0000|-c 06483992%n 0000|-c 06484279%n " +
+ "0000|-c 07111510%n 0000|-c 07111711%n 0000|-c 07111933%n 0000|-c 07259772%n 0000|-c " +
+ "07259984%n 0000|-c 07276018%n 0000|-c 08103635%n 0000|-c 13433061%n 0000|-c 13508333%n " +
+ "0000|-c 13802920%n 0000|-c 00587390%v 0000|-c 00587522%v 0000|-c 00634286%v 0000|-c " +
+ "01013856%v 0000|-c 01735556%v 0000||||the scientific study of language"
+
+ RELATION_METHODS = [
:antonyms,
:hypernyms,
:entailment,
@@ 87,201 101,107 @@ describe WordNet::Synset do
:usage_members,
]
- AggregateRelationMethods = [
+ AGGREGATE_RELATION_METHODS = [
:meronyms,
:holonyms,
:domains,
:members,
]
-
-
- before( :each ) do
- @blank_syn = WordNet::Synset::new( @lexicon, "1%n", WordNet::Noun )
- @traversal_syn = @lexicon.lookup_synsets( 'linguistics', :noun, 1 )
+
+
+ it "provides defaults for instances created with just a lexicon, offset, and part of speech" do
+ syn = WordNet::Synset.new( :lexicon, TEST_SYNSET_OFFSET, TEST_SYNSET_POS )
+ syn.filenum.should be_nil()
+ syn.wordlist.should == ''
+ syn.pointerlist.should == ''
+ syn.frameslist.should == ''
+ syn.gloss.should == ''
end
-
- #################################################################
- ### T E S T S
- #################################################################
-
- ### Accessors
- def test_accessors
- printTestHeader "Synset: Accessors"
- rval = nil
-
- assert_respond_to @blankSyn, :lexicon
-
- Accessors.each do |meth|
- assert_respond_to @blankSyn, meth
- assert_respond_to @blankSyn, "#{meth}="
-
- assert_nothing_raised do
- rval = @blankSyn.send( meth )
- end
- end
- end
-
- ### Relations
- def test_relations
- printTestHeader "Synset: Relation methods"
- rval = nil
-
- RelationMethods.each do |meth|
- casemeth = meth.to_s.sub( /^(\w)/ ) {|char| char.upcase }.intern
-
- assert_respond_to @blankSyn, meth
- assert_respond_to @blankSyn, "#{meth}="
-
- assert_nothing_raised {
- rval = @blankSyn.send( meth )
- }
-
- assert_instance_of Array, rval
+ it "has (generated) methods for each type of WordNet relation" do
+ RELATION_METHODS.each do |relation|
+ WordNet::Synset.instance_method( relation ).should be_an_instance_of( UnboundMethod )
end
end
- ### Aggregate relation methods
- def test_aggregate_relations
- printTestHeader "Synset: Aggregate relations"
- rval = nil
-
- AggregateRelationMethods.each {|meth|
- assert_respond_to @blankSyn, meth
-
- assert_nothing_raised {
- rval = @blankSyn.send( meth )
- }
-
- assert_instance_of Array, rval
- }
- end
-
- ### Traversal method
- def test_synset_should_respond_to_traverse_method
- printTestHeader "Synset: Traversal method"
- assert_respond_to @traversalSyn, :traverse
- end
-
- ### :TODO: This should really be split into two tests.
- ### Traversal: include origin, break loop
- def test_traversal_with_true_second_arg_should_include_origin
- printTestHeader "Synset: Traversal, including origin, break"
- rval = nil
- count = depth = 0
- sets = []
+
+ describe "instance created from synset data" do
- assert_nothing_raised {
- rval = @traversalSyn.traverse( :hyponyms, true ) {|tsyn,tdepth|
- sets << tsyn
- depth = tdepth
- count += 1
- return true
- }
- }
- assert_equal true, rval
- assert_equal 1, sets.length
- assert_equal @traversalSyn, sets[0]
- assert_equal 0, depth
- assert_equal 1, count
- end
-
- ### :TODO: This should really be split into two tests.
- ### Traversal: exclude origin, break loop
- def test_traversal_with_false_second_arg_should_not_include_origin
- printTestHeader "Synset: Traversal, excluding origin, break"
- rval = nil
- count = depth = 0
- sets = []
-
- assert_nothing_raised {
- rval = @traversalSyn.traverse( :hyponyms, false ) {|tsyn,tdepth|
- sets << tsyn
- depth = tdepth
- count += 1
- return true
- }
- }
- assert_equal true, rval
- assert_equal 1, sets.length
- assert_not_equal @traversalSyn, sets[0]
- assert_equal 1, depth
- assert_equal 1, count
- end
-
- ### Traversal: include origin, nobreak, noblock
- def test_hyponym_traversal_with_no_block_should_return_appropriate_hyponyms
- printTestHeader "Synset: Traversal, include origin, nobreak, noblock"
- sets = []
-
- assert_nothing_raised {
- sets = @traversalSyn.traverse( :hyponyms )
- }
- assert_block { sets.length > 1 }
- assert_equal @traversalSyn, sets[0]
- assert_block { sets.find {|hsyn| hsyn.words.include?( "grammar" )} }
- assert_block { sets.find {|hsyn| hsyn.words.include?( "syntax" )} }
- assert_block { sets.find {|hsyn| hsyn.words.include?( "computational linguistics" )} }
- end
+ before( :each ) do
+ @lexicon = mock( "lexicon" )
+ @synset = WordNet::Synset.new( @lexicon,
+ TEST_SYNSET_OFFSET, TEST_SYNSET_POS, 'linguistics', TEST_SYNSET_DATA )
+ end
- ### Traversal: exclude origin, nobreak, noblock
- def test_hyponym_traversal_with_no_block_and_false_second_arg_should_return_holonyms_but_not_the_origin
- printTestHeader "Synset: Traversal, exclude origin, nobreak, noblock"
- sets = []
-
- assert_nothing_raised {
- sets = @traversalSyn.traverse( :hyponyms, false )
- }
- assert_block { sets.length > 1 }
- assert_not_equal @traversalSyn, sets[0]
- assert_block { sets.find {|hsyn| hsyn.words.include?( "grammar" )} }
- assert_block { sets.find {|hsyn| hsyn.words.include?( "syntax" )} }
- assert_block { sets.find {|hsyn| hsyn.words.include?( "computational linguistics" )} }
- end
+ it "knows what part_of_speech it is" do
+ @synset.part_of_speech.should == TEST_SYNSET_POS
+ end
+
+ it "knows what offset it is" do
+ @synset.offset.should == TEST_SYNSET_OFFSET
+ end
+
+ it "knows what filenum it is" do
+ @synset.filenum.should == '09'
+ end
+
+ it "knows what its wordlist is" do
+ @synset.wordlist.should == 'linguistics%0'
+ end
+
+ POINTER_PATTERN = /(\S{2} \d+%[nvars] \d{4})/
+ LIST_OF_POINTERS = /#{POINTER_PATTERN}(\|#{POINTER_PATTERN})*/
+ it "knows what its pointerlist is" do
+ @synset.pointerlist.should =~ LIST_OF_POINTERS
+ end
+
+ it "knows what frameslist it is" do
+ @synset.frameslist.should == ''
+ end
+
+ it "knows what its gloss is" do
+ @synset.gloss.should =~ /study of language/i
+ end
- ### Traversal: include origin, nobreak, noblock
- def test_traversal_break_after_3_should_include_three_sets_plus_origin
- printTestHeader "Synset: Traversal, break after 3"
- rval = nil
- sets = Hash::new {|hsh,key| hsh[key] = []}
+ ### :TODO: Test traversal, content, storing, higher-order functions
+ describe "traversal" do
- assert_nothing_raised {
- rval = @traversalSyn.traverse( :hyponyms ) {|tsyn,tdepth|
- sets[tdepth] << tsyn
- tdepth == 3
- }
- }
- assert_equal 4, sets.keys.length
- assert_equal [0,1,2,3], sets.keys.sort
- assert_equal 1, sets[3].length
- assert rval, "Break early flag expected to be set"
- end
+ it "can traverse its relationships and return the resulting synsets" do
+ hypernym1 = mock( "hypernym of linguistics" )
+ hypernym2 = mock( "super-hypernym of linguistics" )
+
+ @lexicon.should_receive( :lookup_synsets_by_key ).with( /\d+%[nvars]/ ).
+ and_return( hypernym1 )
+ hypernym1.should_receive( :hypernyms ).and_return([ hypernym2 ])
+ hypernym2.should_receive( :hypernyms ).and_return([])
+
+ synsets = @synset.traverse( :hypernyms )
+
+ synsets.should have(3).members
+ synsets.should include( @synset, hypernym1, hypernym2 )
+ end
- ### Part of speech: part_of_speech
- def test_part_of_speech_should_return_the_symbol_part_of_speech
- printTestHeader "Synset: part_of_speech"
- rval = nil
-
- assert_nothing_raised { rval = @traversalSyn.part_of_speech }
- assert_equal :noun, rval
- end
-
+ it "can exclude its origin term from a traversal set" do
+ hypernym1 = mock( "hypernym of linguistics" )
+ hypernym2 = mock( "super-hypernym of linguistics" )
+
+ @lexicon.should_receive( :lookup_synsets_by_key ).with( /\d+%[nvars]/ ).
+ and_return( hypernym1 )
+ hypernym1.should_receive( :hypernyms ).and_return([ hypernym2 ])
+ hypernym2.should_receive( :hypernyms ).and_return([])
- ### Part of speech: pos
- def test_pos_should_return_the_synsets_singlechar_part_of_speech
- printTestHeader "Synset: pos"
- rval = nil
+ synsets = @synset.traverse( :hypernyms, false )
+
+ synsets.should have(2).members
+ synsets.should include( hypernym1, hypernym2 )
+ end
- assert_nothing_raised { rval = @traversalSyn.pos }
- assert_equal "n", rval
- end
+ end # "traversal"
-
- ### :TODO: Test traversal, content, storing, higher-order functions
-
+ end # "instance"
end