CPAN->GREP


1 to 25 of 682 distributions (7.55 seconds)
Lingua-NATools-v0.7.8/Build.PL
                      't/bin/*.o', 't/bin/*.exe',
                      't/bin/corpus', 't/bin/words',
                      'Lingua-NATools-*',
AMBS/Lingua-NATools-v0.7.8 64 more files »
Text-Mining-0.08/Text-Mining/lib/Text/Mining.pm
	sub create_corpus             { my ( $self, $arg_ref ) = @_; return Text::Mining::Corpus->new( $arg_ref ); }
	sub get
            { my ( $self, $arg_ref ) = @_; return Text::Mining::Corpus->new( $arg_ref ); }
	sub get_corpus                { my ( $self, $arg_ref ) = @_; return Text::Mining::Corpus->new( $arg_ref ); }
	sub 
         { my ( $self, $arg_ref ) = @_; return Text::Mining::Corpus->new( $arg_ref ); }
	sub delete_corpus             { my ( $self, $arg_ref ) = @_; my $corpus = Text::Mining::Corpus->new(); return $corpus
75 more matches »
ROGERHALL/Text-Mining-0.08 20 more files »
uplug-main-0.3.8/Uplug-0.3.8/bin/uplug-convert
		'language' => 1,
		'corpus' => 1,
		);
TIEDEMANN/uplug-main-0.3.8 93 more files »
Algorithm-VSM-1.42/examples/calculate_precision_and_recall_for_LSA.pl
my $corpus_dir = "corpus";                     # This is the directory containing
                                               # the corpus
my $corpus_dir = "corpus";                     # This is the directory containing
                                               # the corpus
my $corpus_dir = "corpus";                     # This is the directory containing
                                               # the corpus
#my $corpus_dir = "corpus_with_java_and_cpp";
16 more matches »
AVIKAK/Algorithm-VSM-1.42 15 more files »
Lingua-YaTeA-0.622/bin/yatea
yatea - Perl script for extracting terms from a corpus of texts and
providing a syntactic analysis in a head-modifier representation.
=item    I<file>               corpus of texts in Flemm or TreeTagger output format
YaTeA aims at extracting noun phrases that look like terms from a
corpus. It also provides their syntactic analysis in a head-modifier
format.
27 more matches »
THHAMON/Lingua-YaTeA-0.622 43 more files »
Text-SenseClusters-1.03/discriminate.pl
Removes features that occur less than F times in the training corpus.
	# check if the training file is senseval2 formatted file - if yes quit.
	if($inp_str =~ m/<corpus/i && $inp_str =~ m/<lexelt/i && $inp_str =~ m/<instance/i && $inp_str =~ m/<context/i)
	{
TPEDERSE/Text-SenseClusters-1.03 125 more files »
Lingua-Align-0.04/bin/add_english_treetags
#
# add tree tagger tags & lemmas to an english tigerXML corpus
#
TIEDEMANN/Lingua-Align-0.04 53 more files »
Lingua-BrillTagger-0.02/lib/Lingua/BrillTagger.xs
			     the training set  
			     When training on a very small corpus, better
			     performance might be obtained by setting this to
void
_load_into_corpus( self, word )
     SV   * self
KWILLIAMS/Lingua-BrillTagger-0.02 1 more file »
Text-Corpus-NewYorkTimes-1.01/lib/Text/Corpus/NewYorkTimes.pm
#12345678901234567890123456789012345678901234
#Interface to New York Times corpus.
C<Text::Corpus::NewYorkTimes> - Interface to New York Times corpus.
  Log::Log4perl->easy_init ($INFO);
  my $corpus = Text::Corpus::NewYorkTimes->new (fileList => $fileList, corpusDirectory => $corpusDirectory);
  dump $corpus->getTotalDocuments;
120 more matches »
KUBINA/Text-Corpus-NewYorkTimes-1.01 6 more files »
Text-Corpus-CNN-1.02/lib/Text/Corpus/CNN.pm
#12345678901234567890123456789012345678901234
#Make a corpus of CNN documents for research.
C<Text::Corpus::CNN> - Make a corpus of CNN documents for research.
  Log::Log4perl->easy_init ($INFO);
  my $corpusDirectory = File::Spec->catfile (getcwd(), 'corpus_cnn');
  my $corpus = Text::Corpus::CNN->new (corpusDirectory => $corpusDirectory);
65 more matches »
KUBINA/Text-Corpus-CNN-1.02 7 more files »
Text-Corpus-VoiceOfAmerica-1.03/lib/Text/Corpus/VoiceOfAmerica.pm
C<Text::Corpus::VoiceOfAmerica> - Make a corpus of VOA documents for research.
  Log::Log4perl->easy_init ($INFO);
  my $corpusDirectory = File::Spec->catfile (getcwd(), 'corpus_voa');
  my $corpus = Text::Corpus::VoiceOfAmerica->new (corpusDirectory => $corpusDirectory);
  Log::Log4perl->easy_init ($INFO);
  my $corpusDirectory = File::Spec->catfile (getcwd(), 'corpus_voa');
  my $corpus = Text::Corpus::VoiceOfAmerica->new (corpusDirectory => $corpusDirectory);
60 more matches »
KUBINA/Text-Corpus-VoiceOfAmerica-1.03 7 more files »
Dist-Zilla-5.025/dist.ini
[MetaNoIndex]
dir = corpus
dir = misc
parent  = 0 ; used by the AutoPrereq test corpus
RJBS/Dist-Zilla-5.025 40 more files »
Text-Corpus-Inspec-1.00/lib/Text/Corpus/Inspec.pm
#12345678901234567890123456789012345678901234
#Interface to Inspec abstracts corpus.
C<Text::Corpus::Inspec> - Interface to Inspec abstracts corpus.
  Log::Log4perl->easy_init ($INFO);
  my $corpus = Text::Corpus::Inspec->new (corpusDirectory => $corpusDirectory);
  dump $corpus->getTotalDocuments;
70 more matches »
KUBINA/Text-Corpus-Inspec-1.00 6 more files »
Text-Corpus-Summaries-Wikipedia-0.22/lib/Text/Corpus/Summaries/Wikipedia.pm
  use Data::Dump qw(dump);
  my $corpus = Text::Corpus::Summaries::Wikipedia->new;
  $corpus->create;
  my $corpus = Text::Corpus::Summaries::Wikipedia->new;
  $corpus->create;
  dump $corpus->getListOfXmlFiles;
  $corpus->create;
  dump $corpus->getListOfXmlFiles;
46 more matches »
KUBINA/Text-Corpus-Summaries-Wikipedia-0.22 4 more files »
Alt-CWB-CL-ambs-2.2.102.0/CL.xs
Corpus *
cl_new_corpus(registry_dir, registry_name)
    char *  registry_dir
int
cl_delete_corpus(corpus)
    Corpus *    corpus
int
cl_delete_corpus(corpus)
    Corpus *    corpus
3 more matches »
AMBS/Alt-CWB-CL-ambs-2.2.102.0 9 more files »
Unicode-Tussle-1.08/data/words.utf8
          	blood-bay [adj.] ← blood
bloodbeat             	 › blood-beat, -circulation, -clot, -corpuscle, -disease, -drop, -flow, -freezer, -gout, -mark, -spoor, -spot, -stream, -supply, -system, blood
ing, -monger, -offering, -seller, -wreaker, blood-curdling, -stirring, -stirringness ← blood
bloodcorpuscles       	 › blood-corpuscles, lymph-c, Malpighian corpuscles, splenic c, Pacinian c, c. of Vate
ler, -wreaker, blood-curdling, -stirring, -stirringness ← blood
bloodcorpuscles       	 › blood-corpuscles, lymph-c, Malpighian corpuscles, splenic c, Pacinian c, c. of Vater ← corpuscle
bloodcount   
105 more matches »
BDFOY/Unicode-Tussle-1.08
Alvis-NLPPlatform-0.6/bin/alvis-nlp-standalone
alvis-nlp-standalone - Perl script for linguistically annotating a corpus contained in a file
THHAMON/Alvis-NLPPlatform-0.6 6 more files »
Lingua-EN-Inflexion-0.000002/lib/Lingua/EN/Inflexion.pm
              # "7 formulas found"
              # "7 corpuses found"
              # "7 brothers found"
DCONWAY/Lingua-EN-Inflexion-0.000002 7 more files »
String-Sections-0.3.2/corpus/template/parse_filehandle.tpl
my $corpus;
my $parsefiles;
};
nofatals 'resolve corpus dir' => sub {
  $corpus = path($FindBin::Bin)->parent->parent->parent->child('corpus');
nofatals 'resolve corpus dir' => sub {
  $corpus = path($FindBin::Bin)->parent->parent->parent->child('corpus');
};
2 more matches »
KENTNL/String-Sections-0.3.2 16 more files »
WordNet-Similarity-2.05/doc/config.pod
information content file containing the frequency of occurrence of every
WordNet concept in a large corpus. A number of utility programs are
included in this distribution that can be used to generate an inf
TPEDERSE/WordNet-Similarity-2.05 18 more files »
Lingua-BioYaTeA-0.11/bin/bioyatea
# my $current_dir = `pwd`;
my $corpus_path = $ARGV[$#ARGV];
    open($fh, ">$preProcessingFile") or die "can not open file $preProcessingFile to record corrected file";
    $preProc->process_file($corpus_path, $fh);
    close($fh);
    close($fh);
    $corpus_path = $preProcessingFile;
}
14 more matches »
BIBLIOME/Lingua-BioYaTeA-0.11 18 more files »
Lingua-Interset-2.025/Changes
atures. Thanks to Saša Rosen, who tries to
use DZ Interset together with a multi-language parallel corpus called
Intercorp, we also created a driver for the IPI PAN Polish corpus, which in
 a multi-language parallel corpus called
Intercorp, we also created a driver for the IPI PAN Polish corpus, which in
turn caused one systemic change: o-tags (those setting the other feature) can
References). Dan added a driver for the Czech tags of the Multext East
multilingual corpus.
ZEMAN/Lingua-Interset-2.025 34 more files »
UMLS-Interface-1.41/lib/UMLS/Interface/ICFinder.pm
(P(d)) is obtained by dividing the number of times a concept is 
seen in the corpus (freq(d)) by the total number of concepts (N):
Not all of the concepts in the taxonomy will be seen in the corpus. 
We have the option to use Laplace smoothing, where the frequency 
overall probability mass of the concepts from what is actually seen 
in the corpus. 
BTMCINNES/UMLS-Interface-1.41 3 more files »
Lingua-EN-WSD-CorpusBased-0.11/Changes
	- More debug messages in Lingua::EN::WSD::CorpusBased::Corpus.
	- Documentation of the demo and test corpus.
	- Added method line() to the corpus class. Method returns a
	- Documentation of the demo and test corpus.
	- Added method line() to the corpus class. Method returns a
	  specific line of the corpus.
	- Added method line() to the corpus class. Method returns a
	  specific line of the corpus.
	- Methods of CorpusBased now return error values instead of
REITER/Lingua-EN-WSD-CorpusBased-0.11 4 more files »
CAD-Firemen-0.6.2/SIGNATURE
62238865719ffa85e6b7deb6f158029576 bin/fm_option_info
SHA1 746948ffaed8e263439725b7eefc92af120943e6 corpus/config-changed-doubles-new.pro
SHA1 370ac717b1ce0db6b912e5672095c8ce432c538a corpus/config-changed-
efc92af120943e6 corpus/config-changed-doubles-new.pro
SHA1 370ac717b1ce0db6b912e5672095c8ce432c538a corpus/config-changed-doubles-old.pro
SHA1 e179a77f4ee0a42fa0547d93c22a25ea711c6ac1 corpus/config-checkCon
095c8ce432c538a corpus/config-changed-doubles-old.pro
SHA1 e179a77f4ee0a42fa0547d93c22a25ea711c6ac1 corpus/config-checkConfig.cdb
SHA1 602451b3c5fc4427a9e3f7d5dfe24644e04acf09 corpus/config-error-absolutepa
13 more matches »
LANGENJO/CAD-Firemen-0.6.2 6 more files »
Home · About