CPAN->GREP


1 to 25 of 707 distributions (7.68 seconds)
Lingua-NATools-v0.7.8/Build.PL
                      't/bin/*.o', 't/bin/*.exe',
                      't/bin/corpus', 't/bin/words',
                      'Lingua-NATools-*',
AMBS/Lingua-NATools-v0.7.8 64 more files »
Text-Mining-0.08/Text-Mining/lib/Text/Mining.pm
	sub create_corpus             { my ( $self, $arg_ref ) = @_; return Text::Mining::Corpus->new( $arg_ref ); }
	sub get
            { my ( $self, $arg_ref ) = @_; return Text::Mining::Corpus->new( $arg_ref ); }
	sub get_corpus                { my ( $self, $arg_ref ) = @_; return Text::Mining::Corpus->new( $arg_ref ); }
	sub 
         { my ( $self, $arg_ref ) = @_; return Text::Mining::Corpus->new( $arg_ref ); }
	sub delete_corpus             { my ( $self, $arg_ref ) = @_; my $corpus = Text::Mining::Corpus->new(); return $corpus
75 more matches »
ROGERHALL/Text-Mining-0.08 20 more files »
uplug-main-0.3.8/Uplug-0.3.8/bin/uplug-convert
		'language' => 1,
		'corpus' => 1,
		);
TIEDEMANN/uplug-main-0.3.8 93 more files »
Lingua-YaTeA-0.622/bin/yatea
yatea - Perl script for extracting terms from a corpus of texts and
providing a syntactic analysis in a head-modifier representation.
=item    I<file>               corpus of texts in Flemm or TreeTagger output format
YaTeA aims at extracting noun phrases that look like terms from a
corpus. It also provides their syntactic analysis in a head-modifier
format.
27 more matches »
THHAMON/Lingua-YaTeA-0.622 43 more files »
Algorithm-VSM-1.4/examples/calculate_precision_and_recall_for_LSA.pl
my $corpus_dir = "corpus";                     # This is the directory containing
                                               # the corpus
my $corpus_dir = "corpus";                     # This is the directory containing
                                               # the corpus
my $corpus_dir = "corpus";                     # This is the directory containing
                                               # the corpus
#my $corpus_dir = "corpus_with_java_and_cpp";
16 more matches »
AVIKAK/Algorithm-VSM-1.4 15 more files »
Text-SenseClusters-1.03/discriminate.pl
Removes features that occur less than F times in the training corpus.
	# check if the training file is senseval2 formatted file - if yes quit.
	if($inp_str =~ m/<corpus/i && $inp_str =~ m/<lexelt/i && $inp_str =~ m/<instance/i && $inp_str =~ m/<context/i)
	{
TPEDERSE/Text-SenseClusters-1.03 125 more files »
Lingua-Align-0.04/bin/add_english_treetags
#
# add tree tagger tags & lemmas to an english tigerXML corpus
#
TIEDEMANN/Lingua-Align-0.04 53 more files »
Lingua-BrillTagger-0.02/lib/Lingua/BrillTagger.xs
			     the training set  
			     When training on a very small corpus, better
			     performance might be obtained by setting this to
void
_load_into_corpus( self, word )
     SV   * self
KWILLIAMS/Lingua-BrillTagger-0.02 1 more file »
Text-Corpus-CNN-1.02/lib/Text/Corpus/CNN.pm
#12345678901234567890123456789012345678901234
#Make a corpus of CNN documents for research.
C<Text::Corpus::CNN> - Make a corpus of CNN documents for research.
  Log::Log4perl->easy_init ($INFO);
  my $corpusDirectory = File::Spec->catfile (getcwd(), 'corpus_cnn');
  my $corpus = Text::Corpus::CNN->new (corpusDirectory => $corpusDirectory);
65 more matches »
KUBINA/Text-Corpus-CNN-1.02 7 more files »
Alvis-TermTagger-0.8/bin/TermTagger.pl
TermTagger.pl -- A Perl script for tagging corpus with terms
TermTagger.pl [options] corpus termlist selected_term_list
This script tags a corpus with terms. Corpus (C<corpus>) is a file
with one sentence per line. Term list (C<termlist>) is a file
1 more match »
THHAMON/Alvis-TermTagger-0.8 10 more files »
Text-Corpus-VoiceOfAmerica-1.03/lib/Text/Corpus/VoiceOfAmerica.pm
C<Text::Corpus::VoiceOfAmerica> - Make a corpus of VOA documents for research.
  Log::Log4perl->easy_init ($INFO);
  my $corpusDirectory = File::Spec->catfile (getcwd(), 'corpus_voa');
  my $corpus = Text::Corpus::VoiceOfAmerica->new (corpusDirectory => $corpusDirectory);
  Log::Log4perl->easy_init ($INFO);
  my $corpusDirectory = File::Spec->catfile (getcwd(), 'corpus_voa');
  my $corpus = Text::Corpus::VoiceOfAmerica->new (corpusDirectory => $corpusDirectory);
60 more matches »
KUBINA/Text-Corpus-VoiceOfAmerica-1.03 7 more files »
Alt-CWB-ambs-2.2.102.4/Changes
TODO:
  - implement tests for the new CWB::CQP interface, using the included VSS corpus
  - complete reorganisation of CWB/Perl modules into packages CWB (utility functions,
    corpus encoding, CQP interface) and CWB-CL (API for low-level corpus access);
    WebCqp functionality and
 packages CWB (utility functions,
    corpus encoding, CQP interface) and CWB-CL (API for low-level corpus access);
    WebCqp functionality and demo Web interface will be released as a separate package
AMBS/Alt-CWB-ambs-2.2.102.4 15 more files »
Dist-Zilla-5.021/dist.ini
[MetaNoIndex]
dir = corpus
dir = misc
parent  = 0 ; used by the AutoPrereq test corpus
RJBS/Dist-Zilla-5.021 39 more files »
Text-Corpus-Inspec-1.00/lib/Text/Corpus/Inspec.pm
#12345678901234567890123456789012345678901234
#Interface to Inspec abstracts corpus.
C<Text::Corpus::Inspec> - Interface to Inspec abstracts corpus.
  Log::Log4perl->easy_init ($INFO);
  my $corpus = Text::Corpus::Inspec->new (corpusDirectory => $corpusDirectory);
  dump $corpus->getTotalDocuments;
70 more matches »
KUBINA/Text-Corpus-Inspec-1.00 6 more files »
Text-Corpus-Summaries-Wikipedia-0.22/lib/Text/Corpus/Summaries/Wikipedia.pm
  use Data::Dump qw(dump);
  my $corpus = Text::Corpus::Summaries::Wikipedia->new;
  $corpus->create;
  my $corpus = Text::Corpus::Summaries::Wikipedia->new;
  $corpus->create;
  dump $corpus->getListOfXmlFiles;
  $corpus->create;
  dump $corpus->getListOfXmlFiles;
46 more matches »
KUBINA/Text-Corpus-Summaries-Wikipedia-0.22 4 more files »
Alt-CWB-CL-ambs-2.2.102.0/CL.xs
Corpus *
cl_new_corpus(registry_dir, registry_name)
    char *  registry_dir
int
cl_delete_corpus(corpus)
    Corpus *    corpus
int
cl_delete_corpus(corpus)
    Corpus *    corpus
3 more matches »
AMBS/Alt-CWB-CL-ambs-2.2.102.0 9 more files »
Unicode-Tussle-1.08/data/words.utf8
          	blood-bay [adj.] ← blood
bloodbeat             	 › blood-beat, -circulation, -clot, -corpuscle, -disease, -drop, -flow, -freezer, -gout, -mark, -spoor, -spot, -stream, -supply, -system, blood
ing, -monger, -offering, -seller, -wreaker, blood-curdling, -stirring, -stirringness ← blood
bloodcorpuscles       	 › blood-corpuscles, lymph-c, Malpighian corpuscles, splenic c, Pacinian c, c. of Vate
ler, -wreaker, blood-curdling, -stirring, -stirringness ← blood
bloodcorpuscles       	 › blood-corpuscles, lymph-c, Malpighian corpuscles, splenic c, Pacinian c, c. of Vater ← corpuscle
bloodcount   
105 more matches »
BDFOY/Unicode-Tussle-1.08
Text-Corpus-NewYorkTimes-1.01/lib/Text/Corpus/NewYorkTimes.pm
#12345678901234567890123456789012345678901234
#Interface to New York Times corpus.
C<Text::Corpus::NewYorkTimes> - Interface to New York Times corpus.
  Log::Log4perl->easy_init ($INFO);
  my $corpus = Text::Corpus::NewYorkTimes->new (fileList => $fileList, corpusDirectory => $corpusDirectory);
  dump $corpus->getTotalDocuments;
39 more matches »
KUBINA/Text-Corpus-NewYorkTimes-1.01 4 more files »
Alvis-NLPPlatform-0.6/bin/alvis-nlp-standalone
alvis-nlp-standalone - Perl script for linguistically annotating a corpus contained in a file
THHAMON/Alvis-NLPPlatform-0.6 6 more files »
Lingua-EN-Inflexion-0.000002/lib/Lingua/EN/Inflexion.pm
              # "7 formulas found"
              # "7 corpuses found"
              # "7 brothers found"
DCONWAY/Lingua-EN-Inflexion-0.000002 7 more files »
utf8-all-0.015/META.json
      "directory" : [
         "corpus",
         "inc",
DOHERTY/utf8-all-0.015 10 more files »
String-Sections-0.3.2/corpus/template/parse_filehandle.tpl
my $corpus;
my $parsefiles;
};
nofatals 'resolve corpus dir' => sub {
  $corpus = path($FindBin::Bin)->parent->parent->parent->child('corpus');
nofatals 'resolve corpus dir' => sub {
  $corpus = path($FindBin::Bin)->parent->parent->parent->child('corpus');
};
2 more matches »
KENTNL/String-Sections-0.3.2 16 more files »
WordNet-Similarity-2.05/doc/config.pod
information content file containing the frequency of occurrence of every
WordNet concept in a large corpus. A number of utility programs are
included in this distribution that can be used to generate an inf
TPEDERSE/WordNet-Similarity-2.05 18 more files »
Lingua-BioYaTeA-0.11/bin/bioyatea
# my $current_dir = `pwd`;
my $corpus_path = $ARGV[$#ARGV];
    open($fh, ">$preProcessingFile") or die "can not open file $preProcessingFile to record corrected file";
    $preProc->process_file($corpus_path, $fh);
    close($fh);
    close($fh);
    $corpus_path = $preProcessingFile;
}
14 more matches »
BIBLIOME/Lingua-BioYaTeA-0.11 18 more files »
UMLS-Interface-1.41/lib/UMLS/Interface/ICFinder.pm
(P(d)) is obtained by dividing the number of times a concept is 
seen in the corpus (freq(d)) by the total number of concepts (N):
Not all of the concepts in the taxonomy will be seen in the corpus. 
We have the option to use Laplace smoothing, where the frequency 
overall probability mass of the concepts from what is actually seen 
in the corpus. 
BTMCINNES/UMLS-Interface-1.41 3 more files »
Home · About