Texis Web Script source code for:
/texis/site/demos/rsssearch.vs
Note: Click on links to view the documentation for HTML tags
which are special to the Vortex compiler.
<SCRIPT LANGUAGE=vortex>
<uses demonav=demonav><!--simple look and feel module-->
<entryfunc=init><!-- init function for every invocation -->
<!-- ====================================================================== -->
<!-- Global initialization done every time the script runs (see entryfunc) -->
<a name=init private>
<sum "%s" $SERVER_ROOT "/texis"><db=$ret><!-- set the database -->
</a>
<!-- ====================================================================== -->
<!-- primary starting point -->
<a name=main>
<demolook title="RSS Search"><!-- call site demo boilerplate -->
<mystyle>
<h2>Example of RSS feed processing and real-time profiling</h2><p>
<instructions>
<p>
<searchform>
<if "" neq $feedurl><!-- user entered a feed url -->
<rex ">>=http=s?://=[\alnum\-.]{3,}" $feedurl />
<if "" eq $ret>
<p><b>Please enter an http or https URL.</b></p>
<else>
<div class="Results">
<h2>Results</h2><p>
<flush>
<dorss feedurl=$feedurl query=$query><!-- process the feed -->
</div>
</if>
</if>
</demolook><!-- close site demo boilerplate -->
</a>
<!-- ====================================================================== -->
<!-- output code for styles custom to this demo -->
<a name=mystyle>
<style type="text/css">
.Results {
border-top-style: solid;
border-bottom-style: solid;
border-left-style: solid;
border-right-style: solid;
border-top-width: 1px;
border-bottom-width: 1px;
border-left-width: 1px;
border-right-width: 1px;
border-top-color: black;
border-bottom-color: black;
border-left-color: black;
border-right-color: black;
padding-top: 4px;
padding-bottom: 4px;
padding-left: 4px;
padding-right: 4px;
}
.Result {
background-color: #ddddee;
padding-top: 0px;
padding-bottom: 2px;
padding-left: 4px;
padding-right: 4px;
}
.Hit {
background: blue;
color: white;
}
</style>
</a>
<!-- ====================================================================== -->
<!-- output the user input and search form -->
<a name=searchform>
<form name=rssform method=POST action="$url">
RSS or Atom Feed URL:<br>
<input size=60 name="feedurl" value="$feedurl"><br>
Examples:<br>
<tt> http://rss.news.yahoo.com/rss/topstories<br>
</tt><br>
Render: <select name=media>
<$values="html" "text">
<$displays="full HTML and images" "just plain text">
<options $values $media $displays>
</select><br>
Search for: <input size=20 name="query" value="$query"> <i>Optional</i><br>
<input type=submit value="Submit">
<input type=button value="Clear" onclick="document.rssform.feedurl.value=''; document.rssform.query.value=''">
</form>
</a>
<!-- ====================================================================== -->
<!-- tell the user how to use the form -->
<a name=instructions>
Enter the URL for an
<a href="http://en.wikipedia.org/wiki/RSS_(file_format)">RSS</a>
or
<a href="http://en.wikipedia.org/wiki/Atom_%28standard%29">Atom</a>
feed and an optional query. The feed will be queried and its items
displayed. If a query is entered, only items matching the query
will be displayed.
<p>
In a larger <a href="/texis/site/tutorial/Profiling.html">profiling</a>
application one would place queries into
a table and index it for use with the
<a href="/site/vortexman/profiler.html"><profiler></a>
vortex function.
<p>
Alternatively one might fetch data from the feed and place it into a
database and index it for quick retrospective searches. Or a combination
of the approaches may be used.
</a>
<!-- ====================================================================== -->
<!-- process a feed url and optional query -->
<a name=dorss feedurl query>
<fetchrss feedurl=$feedurl><!-- get and parse the feed -->
<if $ret neq "">
<if "" neq $NewEtag>Etag: $NewEtag<br></if>
<if "" neq $NewLastMod>Last Modified: $NewLastMod<br></if>
<parserss feedurl=$feedurl data=$ret><!-- parse the xml into records -->
<if $loop gt 0><!-- there are some items -->
<processrss query=$query><!-- display and optionally search items -->
</if>
</if>
</a>
<!-- ====================================================================== -->
<!-- download the feed page -->
<a name=fetchrss feedurl>
<local rawrssdata rssdata>
<urlcp maxpgsize 4000000><!-- limit to 4MB -->
<fetch urls=$feedurl /><!-- get the page -->
<$rawrssdata=$ret><!-- remember the raw xml for later -->
<!-- remove doctype declaration so it doesn't confuse timport xml parse -->
<sandr "<\!DOCTYPE=[^>]+>=" "" $rawrssdata>
<$rssdata=$ret>
<if $rssdata eq ""><!-- page was empty -->
<div style="background-color: #eeeedd;">
Got nothing from $feedurl<br>
</div>
<return "">
</if>
<urlinfo errnum>
<if $ret neq 0><!-- page had an error -->
<div style="background-color: #eedddd;">
Got error $ret (<urlinfo errmsg>$ret)<br>
<hr>raw data:<p>$rawrssdata
</div>
<return "">
</if>
<!-- remember a couple useful header values -->
<urlinfo header ETag><$NewEtag=$ret>
<urlinfo header Last-Modified><$NewLastMod=$ret>
<return $rssdata><!-- return the xml for processing -->
</a>
<!-- ====================================================================== -->
<!-- parse the xml RSS or Atom items into records -->
<a name=parserss feedurl data>
<local rssschema found prevputmsg>
<!-- 3 timport schemas for the 3 major variants (2 RSS, 1 Atom).
Try them in turn to see which works. When you get some records
out you know you have the right one and can stop looking. -->
<$rssschema="
# rss feed
xml nohtml
xmldatasetlevel 2
field Title varchar rss/channel/item/title ''
field Description varchar rss/channel/item/description ''
field Link varchar rss/channel/item/link ''
field PubDate varchar rss/channel/item/pubDate ''
field dcdate varchar rss/channel/item/dc:date ''
field Author varchar rss/channel/item/author ''
"
"
# rss feed
xml nohtml
xmldatasetlevel 1
field Title varchar rdf:RDF/item/title ''
field Description varchar rdf:RDF/item/description ''
field Link varchar rdf:RDF/item/link ''
field PubDate varchar rdf:RDF/item/pubDate ''
field dcdate varchar rdf:RDF/item/dc:date ''
field Author varchar rdf:RDF/item/author ''
"
"
# atom feed
xml nohtml
xmldatasetlevel 1
field Title varchar feed/entry/title ''
field Description varchar feed/entry/summary ''
field Link varchar feed/entry/link@href ''
field PubDate varchar feed/entry/updated ''
field dcdate varchar feed/entry/published ''
field Author varchar feed/entry/author/name ''
field Content varchar feed/entry/content ''
"
>
<vxcp putmsg log off><!-- prevent logging of xml errors from bad sources -->
<$prevputmsg=$ret><!-- remember old setting for later restoration -->
<$found=0><!-- did we find a parsable format? -->
<loop $rssschema><!-- for each schema -->
<timport $rssschema $data><!-- try it -->
<parserssitem feedurl=$feedurl><!-- got a record, parse the content -->
</timport>
<$found=$loop>
<if $loop eq 0><!-- try removing potential byte-order mark from xml -->
<sandr ">>=\xEF\xBB\xBF" "" $data>
<timport $rssschema $ret><!-- try same schema again -->
<parserssitem feedurl=$feedurl><!-- got a record, parse the content -->
</timport>
<$found=$loop>
</if>
<if $loop neq 0><!-- got some records from this schema -->
<break><!-- stop looking for schema -->
</if>
</loop>
<vxcp putmsg log $prevputmsg><!-- restore previous logging setting -->
<if $found eq 0>
<div style="background-color: #eeeedd;">
No RSS or Atom items found in feed.<br>
</div>
</if>
</a>
<!-- ====================================================================== -->
<!-- parse the internals of an individual RSS or Atom item -->
<a name=parserssitem feedurl>
<!-- PubDate is preferred over dcdate.
Description is preferred over Content. -->
<if "" eq $Description><!-- atom feed -->
<$Description=$Content>
</if>
<urlcp reparentmode abs><!-- rewrite urls to include host -->
<fetch urls=$feedurl downloaddoc=$Description /><!-- reparse Description as standalone HTML -->
<if $media eq "text"><!-- get just the text to avoid the HTML overload -->
<urlinfo text>
</if>
<$Description=$ret>
<if "" eq $PubDate><!-- no PubDate, use dcdate instead -->
<$PubDate=$dcdate>
<else>
<sandr "\." " " $PubDate><$PubDate=$ret><!-- change . to space -->
</if>
<sandr "....-..-..=T=..:..:..=.*" "\1 \3" $PubDate><!-- reformat date -->
<$PubDate=$ret>
</a>
<!-- ====================================================================== -->
<!-- search and display the downloaded and parsed records -->
<a name=processrss query>
<$skipped=0><!-- keep count of how many records skipped (not displayed) -->
<loop $Title $Description $Link $PubDate $Author><!-- for each records -->
<if "" neq $query><!-- there's a query -->
<!-- combine the fields and query them all together as one -->
<!-- a fancy app. might have a different query for each field -->
<sum "%s " $Title $Description $Author>
<if $ret not like $query><!-- not a match -->
<$skipped=($skipped+1)><!-- increment skip counter -->
<continue><!-- go to next record instead of displaying this -->
</if>
</if>
<showrssitem><!-- display this record -->
</loop>
$loop items processed.
<if "" neq $query>
$skipped items suppressed due to non-match.
</if>
<p>
</a>
<!-- ====================================================================== -->
<!-- display an individual RSS or Atom record -->
<a name=showrssitem>
<div class="Result">
<!-- in case there are multiple Links, show them all -->
<split nonempty max=1 '\x0a' $Link>
<h3><a href="$ret"><showhits data=$Title query=$query></a></h3>
</split>
<!-- only show the meta fields if they have data -->
<if "" neq $PubDate>Publication date: $PubDate<br></if>
<if "" neq $Author>Author: <showhits data=$Author query=$query><br></if>
<showhits data=$Description query=$query html=y>
<div style="clear: both"></div>
</div>
</a>
<!-- ====================================================================== -->
<!-- display some text or html with matches highlighted -->
<a name=showhits data query html=n>
<local fmt>
<if $html eq "n"><!-- plain text -->
<$fmt="%mIhH"><!-- HTML escape HTML tag chars so they appear to user -->
<else>
<$fmt="%mIhs"><!-- leave HTML tag chars alone -->
</if>
<strfmt $fmt $query $data>
<!-- replace the standard vortex hit markup with span classes for customization -->
<sandr ">><a name\=hit=\digit+ href\=#hit=\digit+>=!</a>+</a>"
"<span class=Hit>\6</span>" $ret>
<fmt "%!hV" $ret><!-- decode UTF-8, html encoding out-of-range chars -->
</a>
<!-- ====================================================================== -->
</script>
The Source Viewer is also a Code Example.
Click Here to see its source.
Copyright © 1992-1999 Thunderstone Software