One guy's take on the web, programming, cigars, politics, Philadelphia, and whatever else comes to mind.
Google has started a standardized sitemap protocol. They have a Python based solution to automatically create a sitemap. However I don't believe I have access to Python on this server. My solution, write my own in CFML. So here's how I do it.
I use a CFC named sitemap.cfc that does four things.
Calling this code:
1: <cfinvoke component="cfc.sitemap" method="convert_directory_to_sitemap" returnvariable="url_list">
2: <cfinvokeargument name="file_root" value="#application.file_root#" />
3: <cfinvokeargument name="url_root" value="#application.url#/" />
4: <cfinvokeargument name="extenstions_included" value="cfm,xml" />
5: <cfinvokeargument name="directories_excluded" value="#dir_ex_list#" />
6: <cfinvokeargument name="files_excluded" value="#file_ex_list#" />
7: <cfinvokeargument name="sitemap_file" value="#application.file_root#\sitemap.xml" />
8: </cfinvoke>
Will return:
1: <cfcomponent hint="Encapsulates all of the functionality involved with the creation of a Google sitemap.">
2: <cffunction access="public" name="parse_directory" output="false" returntype="query" hint="Converts individual files from directory structure into urls.">
3: <cfargument name="file_root" type="string" required="yes" hint="The file root of the directory to scan." >
4: <cfargument name="url_root" type="string" required="yes" hint="The url root of the site.">
5: <cfargument name="extenstions_excluded" type="string" required="no" default="" hint="Extenstions to Exclude.">
6: <cfargument name="extenstions_included" type="string" required="no" default="" hint="Extenstions to which to restrict inclusion.">
7: <cfargument name="directories_excluded" type="string" required="no" default="" hint="Directory to Exclude">
8: <cfargument name="directories_included" type="string" required="no" default="" hint="Directory to which to restrict inclusion.">
9: <cfargument name="files_excluded" type="string" required="no" default="" hint="Files to Exclude">
10: <cfargument name="files_included" type="string" required="no" default="" hint="Files to which to restrict inclusion.">
11:
12: <cfset arguments.extenstions_excluded =ListQualify(arguments.extenstions_excluded,"'")>
13: <cfset arguments.extenstions_included =ListQualify(arguments.extenstions_included,"'")>
14: <cfset arguments.directories_excluded =ListQualify(arguments.directories_excluded,"'")>
15: <cfset arguments.directories_included =ListQualify(arguments.directories_included,"'")>
16: <cfset arguments.files_excluded =ListQualify(arguments.files_excluded,"'")>
17: <cfset arguments.files_included =ListQualify(arguments.files_included,"'")>
18:
19: <cfdirectory directory="#arguments.file_root#" action="list" name="files" filter="*" recurse="yes">
20:
21: <cfset extenstionArray=ArrayNew(1)>
22:
23: <cfloop query="files">
24: <cfif FindNoCase(".", name)>
25: <cfset extenstionArray[CurrentRow]=ReplaceNoCase(Right(name[CurrentRow],3),".", "", "ALL")>
26: <cfelse>
27: <cfset extenstionArray[CurrentRow]="">
28: </cfif>
29: </cfloop>
30:
31: <cfset QueryAddColumn(files, "extenstion", extenstionArray)>
32:
33: <cfquery name="files" dbtype="query">
34: select *
35: from files
36: where type != 'Dir'
37: <cfif Len(arguments.extenstions_excluded)>
38: and extenstion not in (#PreserveSingleQuotes(arguments.extenstions_excluded)#)
39: </cfif>
40: <cfif Len(arguments.extenstions_included)>
41: and extenstion in (#PreserveSingleQuotes(arguments.extenstions_included)#)
42: </cfif>
43: <cfif Len(arguments.directories_excluded)>
44: and directory not in (#PreserveSingleQuotes(arguments.directories_excluded)#)
45: </cfif>
46: <cfif Len(arguments.directories_included)>
47: and directory in (#PreserveSingleQuotes(arguments.directories_included)#)
48: </cfif>
49: <cfif Len(arguments.files_excluded)>
50: and name not in (#PreserveSingleQuotes(arguments.files_excluded)#)
51: </cfif>
52: <cfif Len(arguments.directories_included)>
53: and name in (#PreserveSingleQuotes(arguments.files_included)#)
54: </cfif>
55:
56: </cfquery>
57:
58: <cfset url_info_query=QueryNew("url,datelastmodified")>
59:
60: <cfloop query="files">
61: <cfset QueryAddRow(url_info_query)>
62: <cfset url_string = directory & "\" & name>
63: <cfset url_string = Replace(url_string, arguments.file_root, arguments.url_root, "ALL")>
64: <cfset url_string = Replace(url_string, "\", "/", "ALL")>
65: <cfset url_string = Replace(url_string, "//", "/", "ALL")>66: <cfset QuerySetCell(url_info_query, "url", url_string)>
67: <cfset QuerySetCell(url_info_query, "datelastmodified", datelastmodified)>
68: </cfloop>
69:
70:
71: <cfreturn url_info_query>
72: </cffunction>
73:
74: <cffunction access="public" name="create_google_sitemap" output="false" returntype="void" hint="Create a file based Google sitemap.">
75: <cfargument name="url_list" type="query" required="no" default="" hint="The url query.">
76: <cfargument name="sitemap_file" type="string" required="no" default="" hint="The filename.">
77:
78: <cfset sitemap = "">
79:
80: <cfset sitemap = sitemap & "<?xml version='1.0' encoding='UTF-8'?>" & chr(13) & chr(10)>
81: <cfset sitemap = sitemap & "<urlset xmlns=""http://www.google.com/schemas/sitemap/0.84""">
82: <cfset sitemap = sitemap & "xmlns:xsi=""http://www.w3.org/2001/XMLSchema-instance"" ">
83: <cfset sitemap = sitemap & "xsi:schemaLocation=""http://www.google.com/schemas/sitemap/0.84 http://www.google.com/schemas/sitemap/0.84/sitemap.xsd"">" & chr(13) & chr(10)>
84:
85: <cfloop query="arguments.url_list">
86: <cfset loop_nugget= "">
87: <cfset loop_nugget = loop_nugget & "<url>" & chr(13) & chr(10)>
88: <cfset loop_nugget = loop_nugget & "<loc>" & XmlFormat(url) & "</loc>" & chr(13) & chr(10)>
89: <cfset loop_nugget = loop_nugget & "<lastmod>" & DateFormat(datelastmodified,"yyyy-mm-dd") & "</lastmod>" & chr(13) & chr(10)>
90: <cfif isDefined("changefreq") and Len(changefreq) gt 0>
91: <cfset loop_nugget = loop_nugget & "<changefreq>" & changefreq & "</changefreq>" & chr(13) & chr(10)>
92: </cfif>
93: <cfif isDefined("priority") and Len(priority) gt 0>
94: <cfset loop_nugget = loop_nugget & "<priority>" & priority & "</priority>" & chr(13) & chr(10)>
95: </cfif>
96: <cfset loop_nugget = loop_nugget & "</url>" & chr(13) & chr(10)>
97: <cfset sitemap = sitemap & loop_nugget>
98: </cfloop>
99:
100: <cfset sitemap = sitemap & "</urlset>" & chr(13) & chr(10)>
101:
102: <cffile action="write" addnewline="no" file="#arguments.sitemap_file#" output="#sitemap#" fixnewline="no">
103:
104: </cffunction>
105:
106: <cffunction access="public" name="set_google_sitemap_attributes" output="false" returntype="query" hint="Munges the query to produce priorities and changefrequency.">
107: <cfargument name="url_list" type="query" required="yes" hint="The Query to munge.">
108:
109: <cfset url_list=arguments.url_list>
110: <cfset priorityArray=ArrayNew(1)>
111: <cfset changeArray=ArrayNew(1)>
112:
113: <cfloop query="url_list">
114: <cfset priorityArray[CurrentRow]= .5>
115:
116: <cfif findNoCase("index",url)>
117: <cfset priorityArray[CurrentRow]= .9>
118: </cfif>
119:
120: <cfif findNoCase("http:/www.numtopia.com/terry/index.cfm",url)>
121: <cfset priorityArray[CurrentRow]= 1.0>
122: </cfif>
123:
124: </cfloop>
125:
126: <cfloop query="url_list">
127: <cfset changeArray[CurrentRow]= "monthly">
128:
129: <cfif findNoCase("archives",url)>
130: <cfif findNoCase("index",url)>
131: <cfset changeArray[CurrentRow]= "monthly">
132: <cfelse>
133: <cfset changeArray[CurrentRow]= "never">
134: </cfif>
135: </cfif>
136:
137: <cfif findNoCase("index",url) and not findNoCase("archives",url)>
138: <cfset changeArray[CurrentRow]= "daily">
139: </cfif>
140:
141: </cfloop>
142:
143: <cfset QueryAddColumn(url_list, "priority", priorityArray)>
144: <cfset QueryAddColumn(url_list, "changefreq", changeArray)>
145:
146: <cfreturn url_list>
147: </cffunction>
148:
149: <cffunction access="public" name="convert_directory_to_sitemap" output="false" returntype="void" hint="Converts the directory structure of a site into a Google sitemap. ">
150: <cfargument name="file_root" type="string" required="yes" hint="The file root of the directory to scan." >
151: <cfargument name="url_root" type="string" required="yes" hint="The url root of the site.">
152: <cfargument name="extenstions_excluded" type="string" required="no" default="" hint="Extenstions to Exclude.">
153: <cfargument name="extenstions_included" type="string" required="no" default="" hint="Extenstions to which to restrict inclusion.">
154: <cfargument name="directories_excluded" type="string" required="no" default="" hint="Directory to Exclude">
155: <cfargument name="directories_included" type="string" required="no" default="" hint="Directory to which to restrict inclusion.">
156: <cfargument name="files_excluded" type="string" required="no" default="" hint="Files to Exclude">
157: <cfargument name="files_included" type="string" required="no" default="" hint="Files to which to restrict inclusion.">
158: <cfargument name="sitemap_file" type="string" required="no" default="" hint="The filename.">
159:
160: <cfinvoke method="parse_directory" returnvariable="url_list" argumentcollection="#arguments#" />
161:
162: <cfinvoke method="set_google_sitemap_attributes" returnvariable="url_list">
163: <cfinvokeargument name="url_list" value="#url_list#" />
164: </cfinvoke>
165:
166: <cfinvoke method="create_google_sitemap">
167: <cfinvokeargument name="url_list" value="#url_list#" />
168: <cfinvokeargument name="sitemap_file" value="#arguments.sitemap_file#" />
169: </cfinvoke>
170:
171: </cffunction>
172:
173: </cfcomponent>