MOM6
MOM_write_cputime.F90
1 !> A module to monitor the overall CPU time used by MOM6 and project when to stop the model
3 
4 ! This file is part of MOM6. See LICENSE.md for the license.
5 
6 use mom_coms, only : sum_across_pes, pe_here, num_pes
7 use mom_error_handler, only : mom_error, mom_mesg, fatal, is_root_pe
8 use mom_io, only : open_file, append_file, ascii_file, writeonly_file
10 use mom_time_manager, only : time_type, get_time, operator(>)
11 
12 implicit none ; private
13 
14 public write_cputime, mom_write_cputime_init, write_cputime_start_clock
15 
16 !-----------------------------------------------------------------------
17 
18 integer :: clocks_per_sec = 1000 !< The number of clock cycles per second, used by the system clock
19 integer :: max_ticks = 1000 !< The number of ticks per second, used by the system clock
20 
21 !> A control structure that regulates the writing of CPU time
22 type, public :: write_cputime_cs ; private
23  real :: maxcpu !< The maximum amount of cpu time per processor
24  !! for which MOM should run before saving a restart
25  !! file and quiting with a return value that
26  !! indicates that further execution is required to
27  !! complete the simulation, in wall-clock seconds.
28  type(time_type) :: start_time !< The start time of the simulation.
29  !! Start_time is set in MOM_initialization.F90
30  real :: startup_cputime !< The CPU time used in the startup phase of the model.
31  real :: prev_cputime = 0.0 !< The last measured CPU time.
32  real :: dn_dcpu_min = -1.0 !< The minimum derivative of timestep with CPU time.
33  real :: cputime2 = 0.0 !< The accumulated cpu time.
34  integer :: previous_calls = 0 !< The number of times write_CPUtime has been called.
35  integer :: prev_n = 0 !< The value of n from the last call.
36  integer :: filecpu_ascii !< The unit number of the CPU time file.
37  character(len=200) :: cpufile !< The name of the CPU time file.
38 end type write_cputime_cs
39 
40 contains
41 
42 !> Evaluate the CPU time returned by SYSTEM_CLOCK at the start of a run
43 subroutine write_cputime_start_clock(CS)
44  type(write_cputime_cs), pointer :: cs !< The control structure set up by a previous
45  !! call to MOM_write_cputime_init.
46  integer :: new_cputime ! The CPU time returned by SYSTEM_CLOCK
47  if (.not.associated(cs)) allocate(cs)
48 
49  call system_clock(new_cputime, clocks_per_sec, max_ticks)
50  cs%prev_cputime = new_cputime
51 end subroutine write_cputime_start_clock
52 
53 !> Initialize the MOM_write_cputime module.
54 subroutine mom_write_cputime_init(param_file, directory, Input_start_time, CS)
55  type(param_file_type), intent(in) :: param_file !< A structure to parse for run-time parameters
56  character(len=*), intent(in) :: directory !< The directory where the CPU time file goes.
57  type(time_type), intent(in) :: input_start_time !< The start model time of the simulation.
58  type(write_cputime_cs), pointer :: cs !< A pointer that may be set to point to the
59  !! control structure for this module.
60 
61  ! Local variables
62  integer :: new_cputime ! The CPU time returned by SYSTEM_CLOCK
63 ! This include declares and sets the variable "version".
64 #include "version_variable.h"
65  character(len=40) :: mdl = 'MOM_write_cputime' ! This module's name.
66 
67  if (.not.associated(cs)) then
68  allocate(cs)
69  call system_clock(new_cputime, clocks_per_sec, max_ticks)
70  cs%prev_cputime = new_cputime
71  endif
72 
73  ! Read all relevant parameters and write them to the model log.
74  call log_version(param_file, mdl, version, "")
75  call get_param(param_file, mdl, "MAXCPU", cs%maxcpu, &
76  "The maximum amount of cpu time per processor for which "//&
77  "MOM should run before saving a restart file and "//&
78  "quitting with a return value that indicates that a "//&
79  "further run is required to complete the simulation. "//&
80  "If automatic restarts are not desired, use a negative "//&
81  "value for MAXCPU. MAXCPU has units of wall-clock "//&
82  "seconds, so the actual CPU time used is larger by a "//&
83  "factor of the number of processors used.", &
84  units="wall-clock seconds", default=-1.0)
85  call get_param(param_file, mdl, "CPU_TIME_FILE", cs%CPUfile, &
86  "The file into which CPU time is written.",default="CPU_stats")
87  cs%CPUfile = trim(directory)//trim(cs%CPUfile)
88  call log_param(param_file, mdl, "directory/CPU_TIME_FILE", cs%CPUfile)
89 #ifdef STATSLABEL
90  cs%CPUfile = trim(cs%CPUfile)//"."//trim(adjustl(statslabel))
91 #endif
92 
93  cs%Start_time = input_start_time
94 
95 end subroutine mom_write_cputime_init
96 
97 !> This subroutine assesses how much CPU time the model has taken and determines how long the model
98 !! should be run before it saves a restart file and stops itself.
99 subroutine write_cputime(day, n, nmax, CS)
100  type(time_type), intent(inout) :: day !< The current model time.
101  integer, intent(in) :: n !< The time step number of the current execution.
102  integer, intent(inout) :: nmax !< The number of iterations after which to stop so
103  !! that the simulation will not run out of CPU time.
104  type(write_cputime_cs), pointer :: cs !< The control structure set up by a previous
105  !! call to MOM_write_cputime_init.
106 
107  ! Local variables
108  real :: d_cputime ! The change in CPU time since the last call
109  ! this subroutine.
110  integer :: new_cputime ! The CPU time returned by SYSTEM_CLOCK
111  real :: reday ! A real version of day.
112  character(len=256) :: mesg ! The text of an error message
113  integer :: start_of_day, num_days
114 
115  if (.not.associated(cs)) call mom_error(fatal, &
116  "write_energy: Module must be initialized before it is used.")
117 
118  call system_clock(new_cputime, clocks_per_sec, max_ticks)
119 ! The following lines extract useful information even if the clock has rolled
120 ! over, assuming a 32-bit SYSTEM_CLOCK. With more bits, rollover is essentially
121 ! impossible. Negative fluctuations of less than 10 seconds are not interpreted
122 ! as the clock rolling over. This should be unnecessary but is sometimes needed
123 ! on the GFDL SGI/O3k.
124  if (new_cputime < cs%prev_cputime-(10.0*clocks_per_sec)) then
125  d_cputime = new_cputime - cs%prev_cputime + max_ticks
126  else
127  d_cputime = new_cputime - cs%prev_cputime
128  endif
129 
130  call sum_across_pes(d_cputime)
131  if (cs%previous_calls == 0) cs%startup_cputime = d_cputime
132 
133  cs%cputime2 = cs%cputime2 + d_cputime
134 
135  if ((cs%previous_calls >= 1) .and. (cs%maxcpu > 0.0)) then
136  ! Determine the slowest rate at which time steps are executed.
137  if ((n > cs%prev_n) .and. (d_cputime > 0.0) .and. &
138  ((cs%dn_dcpu_min*d_cputime < (n - cs%prev_n)) .or. &
139  (cs%dn_dcpu_min < 0.0))) &
140  cs%dn_dcpu_min = (n - cs%prev_n) / d_cputime
141  if (cs%dn_dcpu_min >= 0.0) then
142  ! Have the model stop itself after 95% of the CPU time has been used.
143  nmax = n + int( cs%dn_dcpu_min * &
144  (0.95*cs%maxcpu * real(num_pes())*clocks_per_sec - &
145  (cs%startup_cputime + cs%cputime2)) )
146 ! write(mesg,*) "Resetting nmax to ",nmax," at day",reday
147 ! call MOM_mesg(mesg)
148  endif
149  endif
150  cs%prev_cputime = new_cputime ; cs%prev_n = n
151 
152  call get_time(day, start_of_day, num_days)
153  reday = real(num_days)+ (real(start_of_day)/86400.0)
154 
155  ! Reopen or create a text output file.
156  if ((cs%previous_calls == 0) .and. (is_root_pe())) then
157  if (day > cs%Start_time) then
158  call open_file(cs%fileCPU_ascii, trim(cs%CPUfile), &
159  action=append_file, form=ascii_file, nohdrs=.true.)
160  else
161  call open_file(cs%fileCPU_ascii, trim(cs%CPUfile), &
162  action=writeonly_file, form=ascii_file, nohdrs=.true.)
163  endif
164  endif
165 
166  if (is_root_pe()) then
167  if (cs%previous_calls == 0) then
168  write(cs%fileCPU_ascii, &
169  '("Startup CPU time: ", F12.3, " sec summed across", I5, " PEs.")') &
170  (cs%startup_cputime / clocks_per_sec), num_pes()
171  write(cs%fileCPU_ascii,*)" Day, Step number, CPU time, CPU time change"
172  endif
173  write(cs%fileCPU_ascii,'(F12.3,", "I11,", ", F12.3,", ", F12.3)') &
174  reday, n, (cs%cputime2 / real(clocks_per_sec)), &
175  d_cputime / real(clocks_per_sec)
176  endif
177  cs%previous_calls = cs%previous_calls + 1
178 
179 end subroutine write_cputime
180 
181 !> \namespace mom_write_cputime
182 !!
183 !! By Robert Hallberg, May 2006.
184 !!
185 !! This file contains the subroutine (write_cputime) that writes
186 !! the summed CPU time across all processors to an output file. In
187 !! addition, write_cputime estimates how many more time steps can be
188 !! taken before 95% of the available CPU time is used, so that the
189 !! model can be checkpointed at that time.
190 
191 end module mom_write_cputime
mom_time_manager
Wraps the FMS time manager functions.
Definition: MOM_time_manager.F90:2
mom_write_cputime
A module to monitor the overall CPU time used by MOM6 and project when to stop the model.
Definition: MOM_write_cputime.F90:2
mom_file_parser::log_version
An overloaded interface to log version information about modules.
Definition: MOM_file_parser.F90:109
mom_file_parser::param_file_type
A structure that can be parsed to read and document run-time parameters.
Definition: MOM_file_parser.F90:54
mom_file_parser::get_param
An overloaded interface to read and log the values of various types of parameters.
Definition: MOM_file_parser.F90:102
mom_io
This module contains I/O framework code.
Definition: MOM_io.F90:2
mom_coms
Interfaces to non-domain-oriented communication subroutines, including the MOM6 reproducing sums faci...
Definition: MOM_coms.F90:3
mom_file_parser
The MOM6 facility to parse input files for runtime parameters.
Definition: MOM_file_parser.F90:2
mom_write_cputime::write_cputime_cs
A control structure that regulates the writing of CPU time.
Definition: MOM_write_cputime.F90:22
mom_file_parser::log_param
An overloaded interface to log the values of various types of parameters.
Definition: MOM_file_parser.F90:96
mom_error_handler
Routines for error handling and I/O management.
Definition: MOM_error_handler.F90:2