# Module of TWiki Enterprise Collaboration Platform, http://TWiki.org/ # # Copyright (C) 2000-2018 Peter Thoeny, peter[at]thoeny.org # and TWiki Contributors. All Rights Reserved. TWiki Contributors # are listed in the AUTHORS file in the root of this distribution. # NOTE: Please extend that file, not this notice. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. For # more details read LICENSE in the root of this distribution. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # As per the GPL, removal of this notice is prohibited. package TWiki::Search; =pod ---+ package TWiki::Search This module implements all the search functionality. =cut use strict; use Assert; use Error qw( :try ); require TWiki; require TWiki::Sandbox; require TWiki::Render; # SMELL: expensive my $queryParser; BEGIN { # 'Use locale' for internationalisation of Perl sorting and searching - # main locale settings are done in TWiki::setupLocale # Do a dynamic 'use locale' for this module if ( $TWiki::cfg{UseLocale} ) { require locale; import locale(); } } =pod ---++ ClassMethod new ($session) Constructor for the singleton Search engine object. =cut sub new { my ( $class, $session ) = @_; my $this = bless( { session => $session }, $class ); return $this; } =begin twiki ---++ ObjectMethod finish() Break circular references. =cut # Note to developers; please undef *all* fields in the object explicitly, # whether they are references or not. That way this method is "golden # documentation" of the live fields in the object. sub finish { my $this = shift; undef $this->{session}; } =pod ---++ StaticMethod getTextPattern ( $text, $pattern ) Sanitise search pattern - currently used for FormattedSearch only =cut sub getTextPattern { my ( $text, $pattern, $args ) = @_; $args ||= ''; # escape some special chars $pattern =~ s/([^\\])([\$\@\%\&\#\'\`\/])/$1\\$2/go; $pattern = TWiki::Sandbox::untaintUnchecked($pattern); my $OK = 0; eval { $OK = ( $text =~ s/$pattern/$1/is ); }; $text = '' unless ($OK); if ( $args =~ /\bencode:(\w+)/ ) { $text = TWiki::_encode($1, $text); } return $text; } # Split the search string into tokens depending on type of search. # Search is an 'AND' of all tokens - various syntaxes implemented # by this routine. sub _tokensFromSearchString { my ( $this, $searchString, $type ) = @_; my @tokens = (); if ( $type eq 'regex' ) { # Regular expression search Example: soap;wsdl;web service;!shampoo @tokens = split( /;/, $searchString ); } elsif ( $type eq 'literal' || $type eq 'query' ) { if( $searchString eq '' ) { # Legacy: empty search returns nothing } else { # Literal search (old style) or query $tokens[0] = $searchString; } } else { # Keyword search (Google-style) - implemented by converting # to regex format. Example: soap +wsdl +"web service" -shampoo # Prevent tokenizing on spaces in "literal string" $searchString =~ s/\"(.*?)\"/&_translateSpace($1)/geo; $searchString =~ s/[\+\-]\s+//go; # Build pattern of stop words my $prefs = $this->{session}->{prefs}; my $stopWords = $prefs->getPreferencesValue('SEARCHSTOPWORDS') || ''; $stopWords =~ s/[\s\,]+/\|/go; $stopWords =~ s/[\(\)]//go; # Tokenize string taking account of literal strings, then remove # stop words and convert '+' and '-' syntax. @tokens = map { s/^\+//o; s/^\-/\!/o; $_ } # remove +, change - to !, remove " grep { !/^($stopWords)$/i } # remove stopwords map { s/$TWiki::TranslationToken/ /go; $_ } # restore space split( /[\s]+/, $searchString ); # split on spaces } return @tokens; } # Convert spaces into translation token characters (typically NULs), # preventing tokenization. # # FIXME: Terminology confusing here! sub _translateSpace { my $text = shift; $text =~ s/\s+/$TWiki::TranslationToken/go; return $text; } # get a list of topics to search in the web, filtered by the $topic # spec sub _getTopicList { my( $this, $web, $topic, $options ) = @_; my @topicList = (); my $store = $this->{session}->{store}; if ($topic) { # limit search to topic list if ( $topic =~ /^\^\([\_\-\+$TWiki::regex{mixedAlphaNum}\|]+\)\$$/ ) { # topic list without wildcards # for speed, do not get all topics in web # but convert topic pattern into topic list my $topics = $topic; $topics =~ s/^\^\(//o; $topics =~ s/\)\$//o; # build list from topic pattern @topicList = grep( $store->topicExists($web, $_), split( /\|/, $topics )); } else { # topic list with wildcards @topicList = $store->getTopicNames($web); if ( $options->{caseSensitive} ) { # limit by topic name, @topicList = grep( /$topic/, @topicList ); } else { # Codev.SearchTopicNameAndTopicText @topicList = grep( /$topic/i, @topicList ); } } } else { @topicList = $store->getTopicNames( $web ); } return @topicList; } # Run a query over a list of topics sub _queryTopics { my( $this, $web, $query, @topicList ) = @_; my $store = $this->{session}->{store}; my $matches = $store->searchInWebMetaData( $query, $web, \@topicList ); return keys %$matches; } # Run a search over a list of topics - @tokens is a list of # search terms to be ANDed together sub _searchTopics { my ( $this, $web, $scope, $type, $options, $tokens, @topicList ) = @_; my $store = $this->{session}->{store}; # default scope is 'text' $scope = 'text' unless ( $scope =~ /^(topic|all)$/ ); # AND search - search once for each token, ANDing result together my @tokens = @$tokens; foreach my $token ( @tokens ) { my $invertSearch = 0; $invertSearch = ( $token =~ s/^\!//o ); # flag for AND NOT search my @scopeTextList = (); my @scopeTopicList = (); # scope can be 'topic' (default), 'text' or "all" # scope='text', e.g. Perl search on topic name: unless ( $scope eq 'text' ) { my $qtoken = $token; # FIXME I18N $qtoken = quotemeta($qtoken) if ( $type ne 'regex' ); if ( $options->{'caseSensitive'} ) { # fix for Codev.SearchWithNoPipe @scopeTopicList = grep( /$qtoken/, @topicList ); } else { @scopeTopicList = grep( /$qtoken/i, @topicList ); } } # scope='text', e.g. grep search on topic text: unless ( $scope eq 'topic' ) { my $matches = $store->searchInWebContent( $token, $web, \@topicList, { type => $type, scope => $scope, casesensitive => $options->{'caseSensitive'}, wordboundaries => $options->{'wordBoundaries'}, files_without_match => 1 } ); @scopeTextList = keys %$matches; } if ( @scopeTextList && @scopeTopicList ) { # join 'topic' and 'text' lists push( @scopeTextList, @scopeTopicList ); my %seen = (); # make topics unique @scopeTextList = sort grep { !$seen{$_}++ } @scopeTextList; } elsif( @scopeTopicList ) { @scopeTextList = @scopeTopicList; } if( $invertSearch ) { # do AND NOT search my %seen = (); foreach my $topic (@scopeTextList) { $seen{$topic} = 1; } @scopeTextList = (); foreach my $topic ( @topicList ) { push( @scopeTextList, $topic ) unless ( $seen{$topic} ); } } # reduced topic list for next token @topicList = @scopeTextList; } return @topicList; } sub _makeTopicPattern { my ( $topic ) = @_; return '' unless ( $topic ); # 'Web*, FooBar' ==> ( 'Web*', 'FooBar' ) ==> ( 'Web.*', "FooBar" ) my @arr = map { s/[^\*\_\-\+$TWiki::regex{mixedAlphaNum}]//go; s/\*/\.\*/go; $_ } split( /,\s*/, $topic ); return '' unless ( @arr ); # ( 'Web.*', 'FooBar' ) ==> "^(Web.*|FooBar)$" return '^(' . join( '|', @arr ) . ')$'; } sub _fixHeadingOffset { my ( $prefix, $level, $offset ) = @_; $level += $offset; $level = 1 if( $level < 1); $level = 6 if( $level > 6); return $prefix . '+' x $level; } =pod ---++ ObjectMethod searchWeb (...) Search one or more webs according to the parameters. If =_callback= is set, that means the caller wants results as soon as they are ready. =_callback_ should be set to a reference to a function which takes =_cbdata= as the first parameter and remaining parameters the same as 'print'. If =_callback= is set, the result is always undef. Otherwise the result is a string containing the rendered search results. If =inline= is set, then the results are *not* decorated with the search template head and tail blocks. The function will throw Error::Simple if it encounters any problems with the syntax of the search string. Note: If =format= is set, =template= will be ignored. Note: For legacy, if =regex= is defined, it will force type='regex' If =type="word"= it will be changed to =type="keyword"= with =wordBoundaries=1=. This will be used for searching with scope="text" only, because scope="topic" will do a Perl search on topic names. SMELL: If =template= is defined =bookview= will not work SMELL: it seems that if you define =_callback= or =inline= then you are responsible for converting the TML to HTML yourself! FIXME: =callback= cannot work with format parameter (consider format='| $topic |' =cut sub searchWeb { my $this = shift; my %params = @_; my $callback = $params{_callback}; my $cbdata = $params{_cbdata}; my $baseTopic = $params{basetopic} || $this->{session}->{topicName}; my $baseWeb = $params{baseweb} || $this->{session}->{webName}; my $doBookView = TWiki::isTrue( $params{bookview} ); my $caseSensitive = TWiki::isTrue( $params{casesensitive} ); my $excludeTopic = $params{excludetopic} || ''; my $doExpandVars = TWiki::isTrue( $params{expandvariables} ); my $format = $params{format} || ''; my $header = $params{header}; my $headingoffset = $params{headingoffset} || 0; $headingoffset =~ s/.*?([-+]?[0-9]).*/$1/ || 0; my $footer = $params{footer}; my $default = $params{default}; my $inline = $params{inline}; my $limit = $params{limit} || ''; my $start = defined($params{start}) ? int($params{start}) : ''; my $doMultiple = TWiki::isTrue( $params{multiple} ); my $nonoise = TWiki::isTrue( $params{nonoise} ); my $noEmpty = TWiki::isTrue( $params{noempty}, $nonoise ); # Note: a defined header overrides noheader my $noHeader = !defined( $header ) && TWiki::isTrue( $params{noheader}, $nonoise ) # Note: This is done for Cairo compatibility || ( !$header && $format && $inline ); # Note: a defined footer overrides nofooter my $noFooter = !defined( $footer ) && TWiki::isTrue( $params{nofooter}, $nonoise ) # Note: This is done for Cairo compatibility || ( !$footer && $format && $inline ); my $noSearch = TWiki::isTrue( $params{nosearch}, $nonoise ); my $noSummary = TWiki::isTrue( $params{nosummary}, $nonoise ); my $zeroResults = 1 - TWiki::isTrue( ( $params{zeroresults} || 'on' ), $nonoise ); my $noTotal = TWiki::isTrue( $params{nototal}, $nonoise ); my $newLine = $params{newline} || ''; my $sortOrder = $params{sort} || $params{order} || ''; my $revSort = $params{reverse} || ''; my $scope = $params{scope} || ''; my $searchString = $params{search} || ''; my $separator = $params{separator}; my $template = $params{template} || ''; my $topic = $params{topic} || ''; my $type = $params{type} || ''; my $wordBoundaries = 0; if ( $type eq 'word' ) { # 'word' is exactly the same as 'keyword', except we will be searching # with word boundaries $type = 'keyword'; $wordBoundaries = 1; } my $webName = $params{web} || ''; my $date = $params{date} || ''; my $createDate = $params{createdate} || ''; my $recurse = $params{'recurse'} || ''; my $finalTerm = $inline ? ( $params{nofinalnewline} || 0 ) : 0; my $users = $this->{session}->{users}; $baseWeb =~ s/\./\//go; my $session = $this->{session}; my $renderer = $session->renderer; # Limit search results if ( $limit =~ /(^\d+$)/o ) { # only digits, all else is the same as # an empty string. "+10" won't work. $limit = $1; } else { # change 'all' to 0, then to big number $limit = 0; } $limit = 32000 unless( $limit ); # Limit DoD attack, Item6784: DoS on bin/search whith an asterisk wildcard $limit = 64 if( $doBookView && $limit > 64 ); $type = 'regex' if( $params{regex} ); my $mixedAlpha = $TWiki::regex{mixedAlpha}; if( defined($separator) ) { $separator =~ s/\$br/
/gos; $separator =~ s/\$n\(\)/\n/gos; # expand "$n()" to new line $separator =~ s/\$n([^$mixedAlpha]|$)/\n$1/gos; } if( $newLine ) { $newLine =~ s/\$br/
/gos; $newLine =~ s/\$n\(\)/\n/gos; # expand "$n()" to new line $newLine =~ s/\$n([^$mixedAlpha]|$)/\n$1/gos; } my $searchResult = ''; my $homeWeb = $session->{webName}; my $homeTopic = $TWiki::cfg{HomeTopicName}; my $store = $session->{store}; my %excludeWeb; my @tmpWebs; # A value of 'all' or 'on' by itself gets all webs, # otherwise ignored (unless there is a web called 'All'.) $webName =~ s/\b(all|on)\b//ig if ( $TWiki::cfg{NoInAllPublicWebs} ); $webName =~ s/^[,\s]+//; $webName =~ s/[,\s]+$//; # all and on are ignored when necessary as per Item7575 my $searchAllFlag = ( $webName =~ /(^|[\,\s])(all|on)([\,\s]|$)/i ); if( $webName ) { foreach my $web ( split( /[\,\s]+/, $webName ) ) { $web =~ s#\.#/#go; # the web processing loop filters for valid web names, # so don't do it here. if( $web =~ s/^-// ) { $excludeWeb{$web} = 1; } else { push( @tmpWebs, $web ); if ( TWiki::isTrue( $recurse ) || $web =~ /^(all|on)$/i ) { my $webarg = ( $web =~ /^(all|on)$/i ) ? undef: $web; push( @tmpWebs, $store->getListOfWebs( 'user,allowed', $webarg ) ); } } } } else { # default to current web push( @tmpWebs, $session->{webName} ); if ( TWiki::isTrue( $recurse ) ) { push( @tmpWebs, $store->getListOfWebs( 'user,allowed', $session->{webName} ) ); } } my @webs; foreach my $web ( @tmpWebs ) { push( @webs, $web ) unless $excludeWeb{$web}; $excludeWeb{$web} = 1; } # E.g. "Bug*, *Patch" ==> "^(Bug.*|.*Patch)$" $topic = _makeTopicPattern( $topic ); # E.g. "Web*, FooBar" ==> "^(Web.*|FooBar)$" $excludeTopic = _makeTopicPattern( $excludeTopic ); my $output = ''; my $tmpl = ''; $searchString =~ s/$TWiki::percentSubstitute/%/go; # Item7847 my $originalSearch = $searchString; my $spacedTopic; if( $format ) { $template = 'searchformat'; } elsif( $template ) { # template definition overrides book and rename views } elsif( $doBookView ) { $template = 'searchbookview'; } else { $template = 'search'; } $tmpl = $session->templates->readTemplate( $template ); # SMELL: the only META tags in a template will be METASEARCH # Why the heck are they being filtered???? $tmpl =~ s/\%META\{.*?}\%//go; # remove %META{'parent'}% # Split template into 5 sections my ( $tmplHead, $tmplSearch, $tmplTable, $tmplNumber, $tmplTail ) = split( /%SPLIT%/, $tmpl ); # Invalid template? unless( $tmplTail ) { my $mess = CGI::h1( 'TWiki Installation Error' ) . 'Incorrect format of ' . $template . ' template (missing sections? There should be 4 %SPLIT% tags)'; if ( defined $callback ) { &$callback( $cbdata, $mess ); return undef; } else { return $mess; } } # Expand tags in template sections $tmplSearch = $session->handleCommonTags( $tmplSearch, $homeWeb, $homeTopic ); $tmplNumber = $session->handleCommonTags( $tmplNumber, $homeWeb, $homeTopic ); # If not inline search, also expand tags in head and tail sections unless( $inline ) { $tmplHead = $session->handleCommonTags( $tmplHead, $homeWeb, $homeTopic ); if ( defined $callback ) { $tmplHead = $renderer->getRenderedVersion( $tmplHead, $homeWeb, $homeTopic ); $tmplHead =~ s|||goi; # remove tags &$callback( $cbdata, $tmplHead ); } else { # don't getRenderedVersion; this will be done by a single # call at the end. $searchResult .= $tmplHead; } } # Generate 'Search:' part showing actual search string used unless( $noSearch ) { my $searchStr = $searchString; $searchStr =~ s/&/&/go; # escape entities $searchStr =~ s//>/go; # escape HTML $searchStr =~ s/%/%/go; # escape TWiki variables $searchStr =~ s/ / /go; # escape TWiki text formatting $searchStr =~ s/^\.\*$/Index/go; $tmplSearch =~ s/%SEARCHSTRING%/$searchStr/go; if( defined $callback ) { $tmplSearch = $renderer->getRenderedVersion( $tmplSearch, $homeWeb, $homeTopic ); $tmplSearch =~ s|||goi; # remove tag &$callback( $cbdata, $tmplSearch ); } else { # don't getRenderedVersion; will be done later $searchResult .= $tmplSearch; } } # Write log entry # FIXME: Move log entry further down to log actual webs searched if( ( $TWiki::cfg{Log}{search} ) && ( !$inline ) ) { my $t = join( ' ', @webs ); $session->writeLog( 'search', $t, $searchString ); } my $query; my @tokens; if( $type eq 'query' ) { unless( defined( $queryParser ) ) { require TWiki::Query::Parser; $queryParser = new TWiki::Query::Parser(); } my $error = ''; try { $query = $queryParser->parse( $searchString ); } catch TWiki::Infix::Error with { # Pass the error on to the caller throw Error::Simple( shift->stringify() ); }; return $error unless $query; } else { # Split the search string into tokens depending on type of search - # each token is ANDed together by actual search @tokens = _tokensFromSearchString( $this, $searchString, $type ); return '' unless scalar( @tokens ); } # Loop through webs my $isAdmin = $session->{users}->isAdmin( $session->{user} ); my $ttopics = 0; my $prefs = $session->{prefs}; foreach my $web ( @webs ) { $web =~ s/$TWiki::cfg{NameFilter}//go; $web = TWiki::Sandbox::untaintUnchecked( $web ); next unless $store->webExists( $web ); # can't process what ain't thar my $thisWebNoSearchAll = $prefs->getWebPreferencesValue( 'NOSEARCHALL', $web ) || ''; # make sure we can report this web on an 'all' search # DON'T filter out unless it's part of an 'all' search. next if( $searchAllFlag && !$isAdmin && ( $thisWebNoSearchAll =~ /on/i || $web =~ /^[\.\_]/ ) && $web ne $session->{webName} ); my $options = { caseSensitive => $caseSensitive, wordBoundaries => $wordBoundaries, }; # Run the search on topics in this web my @topicList = _getTopicList( $this, $web, $topic, $options ); # exclude topics, Codev.ExcludeWebTopicsFromSearch if( $caseSensitive && $excludeTopic ) { @topicList = grep( !/$excludeTopic/, @topicList ); } elsif( $excludeTopic ) { @topicList = grep( !/$excludeTopic/i, @topicList ); } next if( $noEmpty && !@topicList ); # Nothing to show for this web if( $type eq 'query' ) { @topicList = _queryTopics( $this, $web, $query, @topicList ); } else { @topicList = _searchTopics( $this, $web, $scope, $type, $options, \@tokens, @topicList ); } my $topicInfo = {}; # sort the topic list by date, author or topic name, and cache the # info extracted to do the sorting if ( $sortOrder eq 'modified' ) { # For performance: # * sort by approx time (to get a rough list) # * shorten list to the limit + some slack # * sort by rev date on shortened list to get the accurate list # SMELL: Ciaro had efficient two stage handling of modified sort. # SMELL: In Dakar this seems to be pointless since latest rev # time is taken from topic instead of dir list. my @saveShortened; my $slack = 10; if( $limit + 2 * $slack < scalar(@topicList) ) { # sort by approx latest rev time my @tmpList = map { $_->[1] } sort { $a->[0] <=> $b->[0] } map { [ $store->getTopicLatestRevTime( $web, $_ ), $_ ] } @topicList; @tmpList = reverse( @tmpList ) if( TWiki::isTrue( $revSort ) ); # then shorten list and build the hashes for date and author my $idx = $limit + $slack; @topicList = @tmpList[0 .. $idx - 1]; @saveShortened = @tmpList[$idx .. $#tmpList]; # removed elements are saved for later use } # sort topics and store topic info in $topicInfo _sortTopics( $this, $topicInfo, $web, \@topicList, $sortOrder, $revSort ); if ( $start ne '' && @saveShortened ) { # for pagination, which is indicated by the presense of the # =start= parameter, the removed elements for optimization are # put back push(@topicList, @saveShortened); } } elsif ( $sortOrder =~ /\b(created?|editby|formfield\((.*?)\)|parent(\(.*?\))?)([, ]|$)/ ) { # sort by topic creation time, author, parent, and store topic info in $topicInfo _sortTopics( $this, $topicInfo, $web, \@topicList, $sortOrder, $revSort ); } else { # simple sort, see Codev.SchwartzianTransformMisused # note no extraction of topic info here, as not needed # for the sort. Instead it will be read lazily, later on. if( TWiki::isTrue( $revSort ) ) { @topicList = sort { $b cmp $a } @topicList; } else { @topicList = sort { $a cmp $b } @topicList; } } if( $date ) { require TWiki::Time; my @ends = TWiki::Time::parseInterval( $date ); my @resultList = (); foreach my $topic ( @topicList ) { # if date falls out of interval: exclude topic from result my $topicdate = $store->getTopicLatestRevTime( $web, $topic ); push( @resultList, $topic ) unless( $topicdate < $ends[0] || $topicdate > $ends[1] ); } @topicList = @resultList; } if( $createDate ) { require TWiki::Time; my @ends = TWiki::Time::parseInterval( $createDate ); my @resultList = (); foreach my $topic ( @topicList ) { # if date falls out of interval: exclude topic from result my $info = {}; $this->_getRev1Info( $web, $topic, undef, $info); push( @resultList, $topic ) unless( $info->{date} < $ends[0] || $info->{date} > $ends[1] ); } @topicList = @resultList; } # header and footer of $web my ( $beforeText, $repeatText, $afterText ) = split( /%REPEAT%/, $tmplTable ); if ( defined $header ) { $beforeText = TWiki::expandStandardEscapes($header); $beforeText =~ s/\$web/$web/gos; # expand name of web if( defined($separator) ) { $beforeText .= $separator; } else { $beforeText =~ s/([^\n])$/$1\n/os; # add new line at end if needed } } # output the list of topics in $web my $ntopics = 0; # number of topics in current web my $ntopicsExclSkipped = 0; # number of topics in current web excluding the ones skipped by # the start parameter my $tntopics = @topicList; my $nhits = 0; # number of hits (if multiple=on) in current web my $headerDone = $noHeader; if ( $start ) { if ( $start < @topicList ) { $ntopics = $nhits = $start; splice(@topicList, 0, $start); } else { $ntopics = $nhits = @topicList; @topicList = (); } } foreach my $topic ( @topicList ) { my $forceRendering = 0; unless ( exists( $topicInfo->{$topic} ) ) { # not previously cached $topicInfo->{$topic} = _extractTopicInfo( $this, $topicInfo, $web, $topic, 0 ); } my $epochSecs = $topicInfo->{$topic}->{modified}; require TWiki::Time; my $revDate = TWiki::Time::formatTime( $epochSecs ); my $isoDate = TWiki::Time::formatTime( $epochSecs, '$iso', 'gmtime' ); my $cUID = $topicInfo->{$topic}->{cUID}; my $revNum = $topicInfo->{$topic}->{revNum} || 0; # Check security my $allowView = $topicInfo->{$topic}->{allowView}; next unless $allowView; my ( $meta, $text ); # Special handling for format='...' if( $format ) { ( $meta, $text ) = _getTextAndMeta( $this, $topicInfo, $web, $topic ); if( $headingoffset ) { $text =~ s/^(---*)(\++)/_fixHeadingOffset( $1, length( $2 ), $headingoffset )/gem; } if( $doExpandVars ) { if( $web eq $baseWeb && $topic eq $baseTopic ) { # primitive way to prevent recursion $text =~ s/%SEARCH/%SEARCH/g; } $text = $session->handleCommonTags( $text, $web, $topic, $meta ); } } my @multipleHitLines = (); if( $doMultiple ) { my $pattern = $tokens[$#tokens]; # last token in an AND search $pattern = quotemeta( $pattern ) if( $type ne 'regex' ); unless( $text ) { ( $meta, $text ) = _getTextAndMeta( $this, $topicInfo, $web, $topic ); if( $headingoffset ) { $text =~ s/^(---*)(\++)/_fixHeadingOffset( $1, length( $2 ), $headingoffset )/gem; } } if ($caseSensitive) { @multipleHitLines = reverse grep { /$pattern/ } split( /[\n\r]+/, $text ); } else { @multipleHitLines = reverse grep { /$pattern/i } split( /[\n\r]+/, $text ); } } $ntopics += 1; $ntopicsExclSkipped += 1; $ttopics += 1; do { # multiple=on loop $nhits += 1; my $out = ''; $text = pop( @multipleHitLines ) if( scalar(@multipleHitLines) ); my $wikiname = $topicInfo->{$topic}->{editby}; my $wikiusername = $TWiki::cfg{UsersWebName}.'.'.$wikiname; if( $format ) { $out = $format; $out =~ s/\$web/$web/gs; $out =~ s/\$topictitle/$meta->topicTitle()/ges; $out =~ s/\$topic\(([^\)]*)\)/TWiki::Render::breakName( $topic, $1 )/ges; $out =~ s/\$topic/$topic/gs; $out =~ s/\$date/$revDate/gs; $out =~ s/\$isodate/$isoDate/gs; $out =~ s/\$rev/$revNum/gs; $out =~ s/\$wikiusername/$wikiusername/ges; $out =~ s/\$ntopics/$ntopics/gs; $out =~ s/\$tntopics/$tntopics/gs; $out =~ s/\$nwebs/scalar(@webs)/gse; $out =~ s/\$nhits/$nhits/gs; $out =~ s/\$wikiname/$wikiname/ges; my $username = $users->getLoginName( $cUID ); $username = 'unknown' unless defined $username; $out =~ s/\$username/$username/ges; my $r1info = {}; $out =~ s/\$createdate/_getRev1Info( $this, $web, $topic, 'date', $r1info )/ges; $out =~ s/\$createusername/_getRev1Info( $this, $web, $topic, 'username', $r1info )/ges; $out =~ s/\$createwikiname/_getRev1Info( $this, $web, $topic, 'wikiname', $r1info )/ges; $out =~ s/\$createwikiusername/_getRev1Info( $this, $web, $topic, 'wikiusername', $r1info )/ges; if ( $out =~ m/\$text(?:\(([^\)]*)\))?/ ) { my $textArgs = $1; $textArgs = '' unless ( defined($textArgs) ); unless( $text || $doMultiple ) { ( $meta, $text ) = _getTextAndMeta( $this, $topicInfo, $web, $topic ); if( $headingoffset ) { $text =~ s/^(---*)(\++)/_fixHeadingOffset( $1, length( $2 ), $headingoffset )/gem; } } if ( $topic eq $session->{topicName} ) { # defuse SEARCH in current topic to prevent loop $text =~ s/%SEARCH\{.*?}%/SEARCH{...}/go; } my $textInserted; if ( $textArgs =~ /\bencode:(\w+)/ ) { $textInserted = TWiki::_encode($1, $text); } else { $textInserted = $text; } $out =~ s/\$text(?:\(([^\)]*)\))?/$textInserted/gos; $forceRendering = 1 unless( $doMultiple ); } } else { $out = $repeatText; } $out =~ s/%WEB%/$web/go; $out =~ s/%TOPICNAME%/$topic/go; $out =~ s/%TIME%/$revDate/o; my $srev = 'r' . $revNum; if( $revNum eq '0' || $revNum eq '1' ) { $srev = CGI::span( { class => 'twikiNew' }, ( $this->{session}->i18n->maketext('NEW') ) ); } $out =~ s/%REVISION%/$srev/o; $out =~ s/%AUTHOR%/[[$wikiusername][$wikiname]]/; if( $doBookView ) { # BookView ( $meta, $text ) = _getTextAndMeta( $this, $topicInfo, $web, $topic ) unless $text; if( $web eq $baseWeb && $topic eq $baseTopic ) { # primitive way to prevent recursion $text =~ s/%SEARCH/%SEARCH/g; } $text = $session->handleCommonTags( $text, $web, $topic, $meta ); $text = $session->renderer->getRenderedVersion( $text, $web, $topic ); # FIXME: What about meta data rendering? $out =~ s/%TEXTHEAD%/$text/go; } elsif( $format ) { $out =~ s/\$summary(?:\(([^\)]*)\))?/$renderer->makeTopicSummary( $text, $topic, $web, $1 )/ges; $out =~ s/\$changes(?:\(([^\)]*)\))?/$renderer->summariseChanges($cUID,$web,$topic,$1,$revNum)/ges; $out =~ s/\$formfield\(\s*([^\)]*)\s*\)/displayFormField( $meta, $1 )/ges; $out =~ s/\$parent\(([^\)]*)\)/TWiki::Render::breakName( $meta->getParent(), $1 )/ges; # undocumented $breadcrumb returning list with "parents, topic", # where parents is parent breadcrumb (only if sort by parent(...)) $out =~ s/\$breadcrumb/ if( $topicInfo->{$topic}{parent} ) { $topicInfo->{$topic}{parent}; } else { my $p = $meta->getParent(); ( $p ? "$p, $topic" : $topic ); } /ges; $out =~ s/\$parent/$meta->getParent()/ges; $out =~ s/\$formname/$meta->getFormName()/ges; $out =~ s/\$count\((.*?\s*\.\*)\)/_countPattern( $text, $1 )/ges; $out =~ s/\$query\(\s*([^\)]*)\s*\)/formatQuery( $meta, $1 )/ges; # FIXME: Allow all regex characters but escape them # Note: The RE requires a .* at the end of a pattern to avoid false positives # in pattern matching $out =~ s/\$pattern\((.*?\s*\.\*)\s*(?:,\s*([^\)]*))?\)/getTextPattern( $text, $1, $2 )/ges; $out =~ s/\r?\n/$newLine/gos if( $newLine ); if( defined($separator) ) { $out .= $separator; } else { # add new line at end if needed $out =~ s/([^\n])$/$1\n/s; } $out = TWiki::expandStandardEscapes( $out ); } elsif( $noSummary ) { $out =~ s/%TEXTHEAD%//go; $out =~ s/ //go; } else { # regular search view ( $meta, $text ) = _getTextAndMeta( $this, $topicInfo, $web, $topic ) unless $text; $text = $renderer->makeTopicSummary( $text, $topic, $web ); $out =~ s/%TEXTHEAD%/$text/go; } # lazy output of header (only if needed for the first time) unless( $headerDone ) { $headerDone = 1; my $prefs = $session->{prefs}; my $thisWebBGColor = $prefs->getWebPreferencesValue( 'WEBBGCOLOR', $web ) || '#FF00FF'; $beforeText =~ s/%WEBBGCOLOR%/$thisWebBGColor/go; $beforeText =~ s/%WEB%/$web/go; $beforeText =~ s/\$ntopics/0/gs; $beforeText =~ s/\$tntopics/0/gs; $beforeText =~ s/\$nwebs/scalar(@webs)/gse; $beforeText =~ s/\$nhits/0/gs; $beforeText = $session->handleCommonTags( $beforeText, $web, $topic ); if( defined $callback ) { $beforeText = $renderer->getRenderedVersion( $beforeText, $web, $topic ); $beforeText =~ s|||goi; # remove tag &$callback( $cbdata, $beforeText ); } else { $searchResult .= $beforeText; } } #don't expand if a format is specified - it breaks tables and stuff unless( $format ) { $out = $renderer->getRenderedVersion( $out, $web, $topic ); } # output topic (or line if multiple=on) if( defined $callback ) { $out =~ s|||goi; # remove tag &$callback( $cbdata, $out ); } else { $searchResult .= $out; } } while( @multipleHitLines ); # multiple=on loop # delete topic info to clear any cached data undef $topicInfo->{$topic}; last if( $ntopicsExclSkipped >= $limit ); } # end topic loop # output footer only if hits in web, else output default if defined if( $ntopics || $default ) { $afterText = $footer if( defined $footer ); $afterText = $default if( ! $ntopics && $default ); $afterText = TWiki::expandStandardEscapes( $afterText ); $afterText =~ s/\$web/$web/gos; # expand name of web $afterText =~ s/\$ntopics/$ntopics/gs; $afterText =~ s/\$tntopics/$tntopics/gs; $afterText =~ s/\$nwebs/scalar(@webs)/gse; $afterText =~ s/\$nhits/$nhits/gs; $afterText = $session->handleCommonTags( $afterText, $web, $homeTopic ); if( $afterText && $afterText ne '' ) { if( defined( $separator ) ) { $afterText .= $separator; } else { $afterText =~ s/([^\n])$/$1\n/os; # add new line at end if needed } if( defined $callback ) { $afterText = $renderer->getRenderedVersion( $afterText, $web, $homeTopic ); $afterText =~ s|||goi; # remove tag &$callback( $cbdata, $afterText ); } else { $searchResult .= $afterText; } } } # output number of topics (only if hits in web or if # only searching one web) if( $ntopics || scalar(@webs) < 2 ) { unless( $noTotal ) { my $thisNumber = $tmplNumber; if ( $start eq '' ) { $thisNumber =~ s/%NTOPICS%/$ntopics/go; } else { $thisNumber =~ s/%NTOPICS%/$tntopics/go; } if( defined $callback ) { $thisNumber = $renderer->getRenderedVersion( $thisNumber, $web, $homeTopic ); $thisNumber =~ s|||goi; # remove tag &$callback( $cbdata, $thisNumber ); } else { $searchResult .= $thisNumber; } } } } # end of: foreach my $web ( @webs ) return '' if( $ttopics == 0 && $zeroResults ); if( $format && !$finalTerm ) { if( $separator ) { $separator = quotemeta( $separator ); $searchResult =~ s/$separator$//s; # remove separator at end } else { $searchResult =~ s/\n$//os; # remove trailing new line } } unless( $inline ) { $tmplTail = $session->handleCommonTags( $tmplTail, $homeWeb, $homeTopic ); if ( defined $callback ) { $tmplTail = $renderer->getRenderedVersion( $tmplTail, $homeWeb, $homeTopic ); $tmplTail =~ s|||goi; # remove tag &$callback( $cbdata, $tmplTail ); } else { $searchResult .= $tmplTail; } } return undef if( defined $callback ); return $searchResult if( $inline ); $searchResult = $session->handleCommonTags( $searchResult, $homeWeb, $homeTopic ); $searchResult = $renderer->getRenderedVersion( $searchResult, $homeWeb, $homeTopic ); return $searchResult; } # RE for a full-spec floating-point number my $number = qr/^[-+]?[0-9]+(\.[0-9]*)?([Ee][-+]?[0-9]+)?$/s; # extract topic info required for sorting and sort. sub _sortTopics { my ( $this, $topicInfo, $web, $topics, $sortfield, $revSort ) = @_; my $users = $this->{session}->{users}; my $topicParents = {}; # initialize parent hash, used to optimize sorting by parents foreach my $topic ( @$topics ) { $topicInfo->{$topic} = _extractTopicInfo( $this, $topicInfo, $web, $topic, $sortfield, $topicParents ); } $sortfield =~ s/\bformfield\((.*?)\)/$1/g; $sortfield =~ s/\bparent\(.*?\)/parent/g; $sortfield =~ s/^ +//; $sortfield =~ s/ +$//; my @sortTokens = reverse split( / *, */, $sortfield ); my @reverseTokens = reverse map { TWiki::isTrue( $_ ) } split( / *, */, $revSort ); # fix reverse tokens to same size like sort tokens my $diff = $#reverseTokens - $#sortTokens; if( $diff > 0 ) { splice( @reverseTokens, 0, $diff ); # cut front to make same size } elsif( $diff < 0 ) { # repeat first state at front to make same size unshift( @reverseTokens, ($reverseTokens[0]) x abs( $diff ) ); } if( $#reverseTokens > 0 ) { # xor reverse to counteract repeated reverse my $o = $reverseTokens[$#reverseTokens]; for ( my $i = $#reverseTokens-1; $i >= 0; $i-- ) { my $n = ( $o xor $reverseTokens[$i] ); $o = $reverseTokens[$i]; $reverseTokens[$i] = $n; } } # sort by multiple fields my $i = 0; foreach my $sortToken ( @sortTokens ) { @$topics = map { $_->[1] } sort { $_ = 0; if( defined $a->[0] && defined $b->[0] ) { if( $a->[0] =~ /$number/o && $b->[0] =~ /$number/o ) { # when sorting numbers do it largest first; this is just because # this is what date comparisons need. $_ = $a->[0] <=> $b->[0]; } else { $_ = $a->[0] cmp $b->[0]; } } $_; } map { [ $topicInfo->{$_}->{$sortToken}, $_ ] } @$topics; @$topics = reverse @$topics if( $reverseTokens[$i++] ); } return $topicInfo; } # extract topic info sub _extractTopicInfo { my ( $this, $topicInfo, $web, $topic, $sortfield, $topicParents ) = @_; my $info = {}; my $session = $this->{session}; my $store = $session->{store}; my $users = $this->{session}->{users}; my ( $meta, $text ) = _getTextAndMeta( $this, $topicInfo, $web, $topic ); $info->{text} = $text; $info->{meta} = $meta; my ( $revdate, $cUID, $revnum ) = $meta->getRevisionInfo(); $info->{cUID} = $cUID || 'unknown'; $info->{editby} = $users->getWikiName($cUID); $info->{modified} = $revdate; $info->{revNum} = $revnum; $info->{allowView} = $session->security->checkAccessPermission( 'VIEW', $session->{user}, $text, $meta, $topic, $web ); return $info unless( $sortfield ); # sort field can have multiple tokens, such as: sort="parent, formfield(Title)" if ( $sortfield =~ /\bcreated?\b/ ) { ( $info->{created} ) = $meta->getRevisionInfo( 1 ); } if ( $sortfield =~ /\bparent(\(([0-9]+)\))?/ ) { # sort by parent breadcrumb up to indicated level. # for example, sorting on 3 levels is done with string "GrandParent, Parent, Topic" my $level = $2 || 1; my @parents = ( $topic ); my $parent = $meta->getParent(); $parent =~ s/.*\.//; # cut web prefix if present while( $level-- >= 1 && $parent ) { $topicParents->{$topic} = $parent if( $topicParents ); # remember for later use push( @parents, $parent ); if( $level >= 1 ) { my $gParent = ''; if( $topicParents && $topicParents->{$parent} ) { $gParent = $topicParents->{$parent}; } elsif( $store->topicExists( $web, $parent ) ) { my ( $gpMeta ) = _getTextAndMeta( $this, $topicInfo, $web, $parent ); $gParent = $gpMeta->getParent(); $gParent =~ s/.*\.//; # cut web prefix if present } $gParent = '' if( $gParent eq $parent ); # stop if topic points to itself as parent $topic = $parent; $parent = $gParent || ''; } }; ( $info->{parent} ) = join( ', ', reverse @parents ); } # handle possily multiple formfield(Name) $sortfield =~ s/\bformfield\((.*?)\)/_setFormFieldInfo( $info, $meta, $1 )/ge; return $info; } # set formfield info sub _setFormFieldInfo { my ( $info, $meta, $formfield ) = @_; unless ( defined( $info->{$formfield} ) ) { $info->{$formfield} = displayFormField( $meta, $formfield, 1 ); } } # pre-compile regex my $reATTACHURL = qr/%ATTACHURL%/; my $reATTACHURLPATH = qr/%ATTACHURLPATH%/; # get the text and meta for a topic sub _getTextAndMeta { my ( $this, $topicInfo, $web, $topic ) = @_; my ( $meta, $text ); my $store = $this->{session}->{store}; # read from cache if it's there if( $topicInfo ) { $text = $topicInfo->{$topic}->{text}; $meta = $topicInfo->{$topic}->{meta}; } unless( defined $text ) { ( $meta, $text ) = $store->readTopic( undef, $web, $topic, undef ); $text =~ s/%WEB%/$web/gos; $text =~ s/%TOPIC%/$topic/gos; if ( $text =~ $reATTACHURL ) { my $attachUrl = $this->{session}->getPubUrl(1, $web, $topic); $text =~ s/$reATTACHURL/$attachUrl/gos; } if ( $text =~ $reATTACHURLPATH ) { my $attachUrlPath = $this->{session}->getPubUrl(0, $web, $topic); $text =~ s/$reATTACHURLPATH/$attachUrlPath/gos; } } return( $meta, $text ); } =pod ---++ StaticMethod formatQuery( $meta, $query ) -> $text =cut sub formatQuery { my( $meta, $queryString ) = @_; my $encodeType = ''; if ( $queryString =~ s/\s*,\s*encode:(\w+)\s*// ) { $encodeType = $1; } my $strQuote = ''; if ( $queryString =~ s/\s*,\s*quote:([^\s\)]+)\s*// ) { $strQuote = $1; } unless( defined( $queryParser ) ) { require TWiki::Query::Parser; $queryParser = new TWiki::Query::Parser(); } my $error = ''; my $query; try { $query = $queryParser->parse( $queryString ); } catch TWiki::Infix::Error with { return "\$query() format parse error " . shift->stringify(); }; return "\$query() format error $error" unless $query; my $result = $query->evaluate( tom => $meta, data => $meta ); return $encodeType ? TWiki::_encode( $encodeType, _queryResultToString( $result, $strQuote ) ) : _queryResultToString( $result, $strQuote ); } sub _queryResultToString { my( $a, $strQuote ) = @_; return 'undef' unless( defined $a ); if( ref( $a ) eq 'ARRAY' ) { return join(', ', map { _queryResultToString( $_, $strQuote ) } @$a ); } elsif( UNIVERSAL::isa( $a, 'TWiki::Meta' ) ) { return $a->stringify(); } elsif( ref( $a ) eq 'HASH' ) { return '{'.join(', ', map { "$_=>"._queryResultToString($a->{$_}, $strQuote) } keys %$a).'}' } else { return "$strQuote$a$strQuote"; } } =pod ---++ StaticMethod displayFormField( $meta, $args ) -> $text Parse the arguments to a $formfield specification and extract the relevant formfield from the given meta data. * =args= string containing name of form field In addition to the name of a field =args= can be appended with a commas followed by a string format (\d+)([,\s*]\.\.\.)?). This supports the formatted search function $formfield and is used to shorten the returned string or a hyphenated string. =cut sub displayFormField { my( $meta, $args, $skipRendering ) = @_; my $render = ''; if( $args =~ s/\s*,\s*render:(\w+)// ) { $render = $1; } my $attrs = { protectdollar => 1, showhidden => 1 }; if( $args =~ s/\s*,\s*encode:(\w+)// ) { $attrs->{encode} = $1; } my $name = $args; if( $name =~ /\,/ ) { my @params = split( /\,\s*/, $name, 2 ); if( @params > 1 ) { $name = $params[0] || ''; $attrs->{break} = $params[1] || 1; } } return '' unless $name; # Item7616: Reverting partly to TWiki-5.1's Item6082 fix for performance. # $meta->renderFormFieldForDisplay is slow, doubling the time of a SEARCH # with formfields. Let's avoid it where feasible, e.g. avoid in sorting and # if no formatting needed. if( $skipRendering || ! ( $render eq 'display' || $attrs->{break} || $attrs->{encode} ) ) { my $form = $meta->get( 'FORM' ); my $fields; if( $form ) { $fields = $meta->get( 'FIELD', $name ); unless( $fields ) { # not a valid field name, maybe it's a title. require TWiki::Form; $fields = $meta->get( 'FIELD', TWiki::Form::fieldTitle2FieldName( $name ) ); } } if( ref( $fields ) eq 'HASH' ){ # fix for Item6167, this line was not added for fixing Item6082 my $val = $fields->{value}; $val =~ s/\$(n|nop|quot|percnt|dollar)/\$$1/g; return $val; } return ''; # form field not found } return $meta->renderFormFieldForDisplay( $name, '$value', $attrs ); } # Returns the topic revision info of the base version, # attributes are 'date', 'username', 'wikiname', # 'wikiusername'. Revision info is cached in the search # object for speed. sub _getRev1Info { my ( $this, $web, $topic, $attr, $info ) = @_; my $key = $web . '.' . $topic; my $store = $this->{session}->{store}; my $users = $this->{session}->{users}; unless ( $info->{webTopic} && $info->{webTopic} eq $key ) { require TWiki::Meta; my $meta = new TWiki::Meta( $this->{session}, $web, $topic ); my ( $d, $u ) = $meta->getRevisionInfo( 1 ); $info->{date} = $d; $info->{user} = $u; $info->{webTopic} = $key; } if( $attr eq 'username' ) { return $users->getLoginName( $info->{user} ); } if( $attr eq 'wikiname' ) { return $users->getWikiName( $info->{user} ); } if( $attr eq 'wikiusername' ) { return $users->webDotWikiName( $info->{user} ); } if( $attr eq 'date' ) { require TWiki::Time; return TWiki::Time::formatTime( $info->{date} ); } return 1; } # With the same argument as $pattern, returns a number which is the count of # occurences of the pattern argument. sub _countPattern { my ( $theText, $thePattern ) = @_; $thePattern =~ s/([^\\])([\$\@\%\&\#\'\`\/])/$1\\$2/go; # escape some special chars $thePattern =~ /(.*)/; # untaint $thePattern = $1; my $OK = 0; eval { # counting hack, see: http://dev.perl.org/perl6/rfc/110.html $OK = () = $theText =~ /$thePattern/g; }; return $OK; } 1;